aa3efe9715
This change adds feature to launch Prometheus process using a custom script which should be stored in override values. Because the known issue https://github.com/prometheus/prometheus/issues/6934 is still open many years, we are going to struggle with growing WAL files using our custom downstream wrapper script which stops Prometheus process and deletes WALs. This solution can not fit all customers because completely kills wal cached data but it is ok for our purposes. Such way I just added the feature to use another custom script to launch Prometheus and left original functionality by default. Default/custom mode are defined in 'values.yaml' as the body of the custom launcher script. Change-Id: Ie02ea1d6a7de5c676e2e96f3dcd6aca172af4afb
1137 lines
42 KiB
YAML
1137 lines
42 KiB
YAML
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
# Default values for prometheus.
|
|
# This is a YAML-formatted file.
|
|
# Declare name/value pairs to be passed into your templates.
|
|
# name: value
|
|
|
|
---
|
|
images:
|
|
tags:
|
|
apache_proxy: docker.io/library/httpd:2.4
|
|
prometheus: docker.io/prom/prometheus:v2.25.0
|
|
helm_tests: docker.io/openstackhelm/heat:wallaby-ubuntu_focal
|
|
dep_check: quay.io/airshipit/kubernetes-entrypoint:v1.0.0
|
|
image_repo_sync: docker.io/library/docker:17.07.0
|
|
pull_policy: IfNotPresent
|
|
local_registry:
|
|
active: false
|
|
exclude:
|
|
- dep_check
|
|
- image_repo_sync
|
|
|
|
labels:
|
|
prometheus:
|
|
node_selector_key: openstack-control-plane
|
|
node_selector_value: enabled
|
|
job:
|
|
node_selector_key: openstack-control-plane
|
|
node_selector_value: enabled
|
|
test:
|
|
node_selector_key: openstack-control-plane
|
|
node_selector_value: enabled
|
|
|
|
pod:
|
|
env:
|
|
prometheus: null
|
|
security_context:
|
|
api:
|
|
pod:
|
|
runAsUser: 65534
|
|
container:
|
|
prometheus_perms:
|
|
runAsUser: 0
|
|
readOnlyRootFilesystem: false
|
|
apache_proxy:
|
|
runAsUser: 0
|
|
readOnlyRootFilesystem: false
|
|
prometheus:
|
|
allowPrivilegeEscalation: false
|
|
readOnlyRootFilesystem: true
|
|
test:
|
|
pod:
|
|
runAsUser: 65534
|
|
container:
|
|
prometheus_helm_tests:
|
|
readOnlyRootFilesystem: true
|
|
allowPrivilegeEscalation: false
|
|
affinity:
|
|
anti:
|
|
type:
|
|
default: preferredDuringSchedulingIgnoredDuringExecution
|
|
topologyKey:
|
|
default: kubernetes.io/hostname
|
|
weight:
|
|
default: 10
|
|
mounts:
|
|
prometheus:
|
|
prometheus:
|
|
init_container: null
|
|
replicas:
|
|
prometheus: 1
|
|
lifecycle:
|
|
upgrades:
|
|
statefulsets:
|
|
pod_replacement_strategy: RollingUpdate
|
|
termination_grace_period:
|
|
prometheus:
|
|
timeout: 30
|
|
resources:
|
|
enabled: false
|
|
prometheus:
|
|
limits:
|
|
memory: "1024Mi"
|
|
cpu: "2000m"
|
|
requests:
|
|
memory: "128Mi"
|
|
cpu: "500m"
|
|
jobs:
|
|
image_repo_sync:
|
|
requests:
|
|
memory: "128Mi"
|
|
cpu: "100m"
|
|
limits:
|
|
memory: "1024Mi"
|
|
cpu: "2000m"
|
|
tests:
|
|
requests:
|
|
memory: "128Mi"
|
|
cpu: "100m"
|
|
limits:
|
|
memory: "1024Mi"
|
|
cpu: "2000m"
|
|
probes:
|
|
prometheus:
|
|
prometheus:
|
|
readiness:
|
|
enabled: true
|
|
params:
|
|
initialDelaySeconds: 30
|
|
timeoutSeconds: 30
|
|
liveness:
|
|
enabled: false
|
|
params:
|
|
initialDelaySeconds: 120
|
|
timeoutSeconds: 30
|
|
endpoints:
|
|
cluster_domain_suffix: cluster.local
|
|
local_image_registry:
|
|
name: docker-registry
|
|
namespace: docker-registry
|
|
hosts:
|
|
default: localhost
|
|
internal: docker-registry
|
|
node: localhost
|
|
host_fqdn_override:
|
|
default: null
|
|
port:
|
|
registry:
|
|
node: 5000
|
|
oci_image_registry:
|
|
name: oci-image-registry
|
|
namespace: oci-image-registry
|
|
auth:
|
|
enabled: false
|
|
prometheus:
|
|
username: prometheus
|
|
password: password
|
|
hosts:
|
|
default: localhost
|
|
host_fqdn_override:
|
|
default: null
|
|
port:
|
|
registry:
|
|
default: null
|
|
monitoring:
|
|
name: prometheus
|
|
namespace: null
|
|
auth:
|
|
admin:
|
|
username: admin
|
|
password: changeme
|
|
federate:
|
|
username: federate
|
|
password: changeme
|
|
hosts:
|
|
default: prom-metrics
|
|
public: prometheus
|
|
host_fqdn_override:
|
|
default: null
|
|
# NOTE(srwilkers): this chart supports TLS for fqdn over-ridden public
|
|
# endpoints using the following format:
|
|
# public:
|
|
# host: null
|
|
# tls:
|
|
# crt: null
|
|
# key: null
|
|
path:
|
|
default: null
|
|
scheme:
|
|
default: 'http'
|
|
port:
|
|
api:
|
|
default: 9090
|
|
http:
|
|
default: 80
|
|
alertmanager:
|
|
name: prometheus-alertmanager
|
|
namespace: null
|
|
hosts:
|
|
default: alerts-engine
|
|
public: prometheus-alertmanager
|
|
discovery: prometheus-alertmanager-discovery
|
|
host_fqdn_override:
|
|
default: null
|
|
path:
|
|
default: null
|
|
scheme:
|
|
default: 'http'
|
|
port:
|
|
api:
|
|
default: 9093
|
|
public: 80
|
|
mesh:
|
|
default: 9094
|
|
ldap:
|
|
hosts:
|
|
default: ldap
|
|
auth:
|
|
admin:
|
|
bind: "cn=admin,dc=cluster,dc=local"
|
|
password: password
|
|
host_fqdn_override:
|
|
default: null
|
|
path:
|
|
default: "/ou=People,dc=cluster,dc=local"
|
|
scheme:
|
|
default: ldap
|
|
port:
|
|
ldap:
|
|
default: 389
|
|
|
|
dependencies:
|
|
dynamic:
|
|
common:
|
|
local_image_registry:
|
|
jobs:
|
|
- prometheus-image-repo-sync
|
|
services:
|
|
- endpoint: node
|
|
service: local_image_registry
|
|
static:
|
|
image_repo_sync:
|
|
services:
|
|
- endpoint: internal
|
|
service: local_image_registry
|
|
prometheus:
|
|
services: null
|
|
tests:
|
|
services:
|
|
- endpoint: internal
|
|
service: monitoring
|
|
|
|
monitoring:
|
|
prometheus:
|
|
enabled: true
|
|
prometheus:
|
|
scrape: true
|
|
|
|
network:
|
|
prometheus:
|
|
ingress:
|
|
public: true
|
|
classes:
|
|
namespace: "nginx"
|
|
cluster: "nginx-cluster"
|
|
annotations:
|
|
nginx.ingress.kubernetes.io/rewrite-target: /
|
|
nginx.ingress.kubernetes.io/affinity: cookie
|
|
nginx.ingress.kubernetes.io/session-cookie-name: kube-ingress-session-prometheus
|
|
nginx.ingress.kubernetes.io/session-cookie-hash: sha1
|
|
nginx.ingress.kubernetes.io/session-cookie-expires: "600"
|
|
nginx.ingress.kubernetes.io/session-cookie-max-age: "600"
|
|
node_port:
|
|
enabled: false
|
|
port: 30900
|
|
|
|
network_policy:
|
|
prometheus:
|
|
ingress:
|
|
- {}
|
|
egress:
|
|
- {}
|
|
|
|
proc_launch:
|
|
prometheus:
|
|
default: true
|
|
custom_launch: |
|
|
while true
|
|
do
|
|
echo "If 'proc_launch.prometheus.default: false'."
|
|
echo "Your custom shell script code you can put here."
|
|
sleep 10
|
|
done
|
|
|
|
secrets:
|
|
oci_image_registry:
|
|
prometheus: prometheus-oci-image-registry-key
|
|
tls:
|
|
monitoring:
|
|
prometheus:
|
|
public: prometheus-tls-public
|
|
internal: prometheus-tls-api
|
|
|
|
tls_configs:
|
|
# If client certificates are required to connect to metrics endpoints, they
|
|
# can be configured here. They will be mounted in the pod under /tls_configs
|
|
# and can be referenced in scrape configs.
|
|
# The filenames will be the key and subkey concatenanted with a ".", e.g.:
|
|
# /tls_configs/kubernetes-etcd.ca.pem
|
|
# /tls_configs/kubernetes-etcd.crt.pem
|
|
# /tls_configs/kubernetes-etcd.key.pem
|
|
# From the following:
|
|
# kubernetes-etcd:
|
|
# ca.pem: |
|
|
# -----BEGIN CERTIFICATE-----
|
|
# -----END CERTIFICATE-----
|
|
# crt.pem: |
|
|
# -----BEGIN CERTIFICATE-----
|
|
# -----END CERTIFICATE-----
|
|
# key.pem: |
|
|
# -----BEGIN RSA PRIVATE KEY-----
|
|
# -----END RSA PRIVATE KEY-----
|
|
|
|
storage:
|
|
enabled: true
|
|
pvc:
|
|
name: prometheus-pvc
|
|
access_mode: ["ReadWriteOnce"]
|
|
requests:
|
|
storage: 5Gi
|
|
storage_class: general
|
|
|
|
manifests:
|
|
certificates: false
|
|
configmap_bin: true
|
|
configmap_etc: true
|
|
ingress: true
|
|
helm_tests: true
|
|
job_image_repo_sync: true
|
|
network_policy: true
|
|
secret_ingress_tls: true
|
|
secret_prometheus: true
|
|
secret_registry: true
|
|
service_ingress: true
|
|
service: true
|
|
statefulset_prometheus: true
|
|
|
|
conf:
|
|
httpd: |
|
|
ServerRoot "/usr/local/apache2"
|
|
|
|
Listen 80
|
|
|
|
LoadModule mpm_event_module modules/mod_mpm_event.so
|
|
LoadModule authn_file_module modules/mod_authn_file.so
|
|
LoadModule authn_core_module modules/mod_authn_core.so
|
|
LoadModule authz_host_module modules/mod_authz_host.so
|
|
LoadModule authz_groupfile_module modules/mod_authz_groupfile.so
|
|
LoadModule authz_user_module modules/mod_authz_user.so
|
|
LoadModule authz_core_module modules/mod_authz_core.so
|
|
LoadModule access_compat_module modules/mod_access_compat.so
|
|
LoadModule auth_basic_module modules/mod_auth_basic.so
|
|
LoadModule ldap_module modules/mod_ldap.so
|
|
LoadModule authnz_ldap_module modules/mod_authnz_ldap.so
|
|
LoadModule reqtimeout_module modules/mod_reqtimeout.so
|
|
LoadModule filter_module modules/mod_filter.so
|
|
LoadModule proxy_html_module modules/mod_proxy_html.so
|
|
LoadModule log_config_module modules/mod_log_config.so
|
|
LoadModule env_module modules/mod_env.so
|
|
LoadModule headers_module modules/mod_headers.so
|
|
LoadModule setenvif_module modules/mod_setenvif.so
|
|
LoadModule version_module modules/mod_version.so
|
|
LoadModule proxy_module modules/mod_proxy.so
|
|
LoadModule proxy_connect_module modules/mod_proxy_connect.so
|
|
LoadModule proxy_http_module modules/mod_proxy_http.so
|
|
LoadModule proxy_balancer_module modules/mod_proxy_balancer.so
|
|
LoadModule slotmem_shm_module modules/mod_slotmem_shm.so
|
|
LoadModule slotmem_plain_module modules/mod_slotmem_plain.so
|
|
LoadModule unixd_module modules/mod_unixd.so
|
|
LoadModule status_module modules/mod_status.so
|
|
LoadModule autoindex_module modules/mod_autoindex.so
|
|
|
|
<IfModule unixd_module>
|
|
User daemon
|
|
Group daemon
|
|
</IfModule>
|
|
|
|
<Directory />
|
|
AllowOverride none
|
|
Require all denied
|
|
</Directory>
|
|
|
|
<Files ".ht*">
|
|
Require all denied
|
|
</Files>
|
|
|
|
ErrorLog /dev/stderr
|
|
|
|
LogLevel warn
|
|
|
|
<IfModule log_config_module>
|
|
LogFormat "%a %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\"" combined
|
|
LogFormat "%{X-Forwarded-For}i %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\"" proxy
|
|
LogFormat "%h %l %u %t \"%r\" %>s %b" common
|
|
|
|
<IfModule logio_module>
|
|
LogFormat "%a %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\" %I %O" combinedio
|
|
</IfModule>
|
|
|
|
SetEnvIf X-Forwarded-For "^.*\..*\..*\..*" forwarded
|
|
CustomLog /dev/stdout common
|
|
CustomLog /dev/stdout combined
|
|
CustomLog /dev/stdout proxy env=forwarded
|
|
</IfModule>
|
|
|
|
<Directory "/usr/local/apache2/cgi-bin">
|
|
AllowOverride None
|
|
Options None
|
|
Require all granted
|
|
</Directory>
|
|
|
|
<IfModule headers_module>
|
|
RequestHeader unset Proxy early
|
|
</IfModule>
|
|
|
|
<IfModule proxy_html_module>
|
|
Include conf/extra/proxy-html.conf
|
|
</IfModule>
|
|
|
|
<VirtualHost *:80>
|
|
# Expose metrics to all users, as this is not sensitive information and
|
|
# circumvents the inability of Prometheus to interpolate environment vars
|
|
# in its configuration file
|
|
<Location /metrics>
|
|
ProxyPass http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/metrics
|
|
ProxyPassReverse http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/metrics
|
|
Satisfy Any
|
|
Allow from all
|
|
</Location>
|
|
# Expose the /federate endpoint to all users, as this is also not
|
|
# sensitive information and circumvents the inability of Prometheus to
|
|
# interpolate environment vars in its configuration file
|
|
<Location /federate>
|
|
ProxyPass http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/metrics
|
|
ProxyPassReverse http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/metrics
|
|
Satisfy Any
|
|
Allow from all
|
|
</Location>
|
|
# Restrict general user (LDAP) access to the /graph endpoint, as general trusted
|
|
# users should only be able to query Prometheus for metrics and not have access
|
|
# to information like targets, configuration, flags or build info for Prometheus
|
|
<Location />
|
|
ProxyPass http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/
|
|
ProxyPassReverse http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/
|
|
AuthName "Prometheus"
|
|
AuthType Basic
|
|
AuthBasicProvider file ldap
|
|
AuthUserFile /usr/local/apache2/conf/.htpasswd
|
|
AuthLDAPBindDN {{ .Values.endpoints.ldap.auth.admin.bind }}
|
|
AuthLDAPBindPassword {{ .Values.endpoints.ldap.auth.admin.password }}
|
|
AuthLDAPURL {{ tuple "ldap" "default" "ldap" . | include "helm-toolkit.endpoints.keystone_endpoint_uri_lookup" | quote }}
|
|
Require valid-user
|
|
</Location>
|
|
<Location /graph>
|
|
ProxyPass http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/graph
|
|
ProxyPassReverse http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/graph
|
|
AuthName "Prometheus"
|
|
AuthType Basic
|
|
AuthBasicProvider file ldap
|
|
AuthUserFile /usr/local/apache2/conf/.htpasswd
|
|
AuthLDAPBindDN {{ .Values.endpoints.ldap.auth.admin.bind }}
|
|
AuthLDAPBindPassword {{ .Values.endpoints.ldap.auth.admin.password }}
|
|
AuthLDAPURL {{ tuple "ldap" "default" "ldap" . | include "helm-toolkit.endpoints.keystone_endpoint_uri_lookup" | quote }}
|
|
Require valid-user
|
|
</Location>
|
|
# Restrict access to the /config (dashboard) and /api/v1/status/config (http) endpoints
|
|
# to the admin user
|
|
<Location /config>
|
|
ProxyPass http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/config
|
|
ProxyPassReverse http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/config
|
|
AuthName "Prometheus"
|
|
AuthType Basic
|
|
AuthBasicProvider file
|
|
AuthUserFile /usr/local/apache2/conf/.htpasswd
|
|
Require valid-user
|
|
</Location>
|
|
<Location /api/v1/status/config>
|
|
ProxyPass http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/api/v1/status/config
|
|
ProxyPassReverse http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/api/v1/status/config
|
|
AuthName "Prometheus"
|
|
AuthType Basic
|
|
AuthBasicProvider file
|
|
AuthUserFile /usr/local/apache2/conf/.htpasswd
|
|
Require valid-user
|
|
</Location>
|
|
# Restrict access to the /flags (dashboard) and /api/v1/status/flags (http) endpoints
|
|
# to the admin user
|
|
<Location /flags>
|
|
ProxyPass http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/flags
|
|
ProxyPassReverse http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/flags
|
|
AuthName "Prometheus"
|
|
AuthType Basic
|
|
AuthBasicProvider file
|
|
AuthUserFile /usr/local/apache2/conf/.htpasswd
|
|
Require valid-user
|
|
</Location>
|
|
<Location /api/v1/status/flags>
|
|
ProxyPass http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/api/v1/status/flags
|
|
ProxyPassReverse http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/api/v1/status/flags
|
|
AuthName "Prometheus"
|
|
AuthType Basic
|
|
AuthBasicProvider file
|
|
AuthUserFile /usr/local/apache2/conf/.htpasswd
|
|
Require valid-user
|
|
</Location>
|
|
# Restrict access to the /status (dashboard) endpoint to the admin user
|
|
<Location /status>
|
|
ProxyPass http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/status
|
|
ProxyPassReverse http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/status
|
|
AuthName "Prometheus"
|
|
AuthType Basic
|
|
AuthBasicProvider file
|
|
AuthUserFile /usr/local/apache2/conf/.htpasswd
|
|
Require valid-user
|
|
</Location>
|
|
# Restrict access to the /rules (dashboard) endpoint to the admin user
|
|
<Location /rules>
|
|
ProxyPass http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/rules
|
|
ProxyPassReverse http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/rules
|
|
AuthName "Prometheus"
|
|
AuthType Basic
|
|
AuthBasicProvider file
|
|
AuthUserFile /usr/local/apache2/conf/.htpasswd
|
|
Require valid-user
|
|
</Location>
|
|
# Restrict access to the /targets (dashboard) and /api/v1/targets (http) endpoints
|
|
# to the admin user
|
|
<Location /targets>
|
|
ProxyPass http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/targets
|
|
ProxyPassReverse http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/targets
|
|
AuthName "Prometheus"
|
|
AuthType Basic
|
|
AuthBasicProvider file
|
|
AuthUserFile /usr/local/apache2/conf/.htpasswd
|
|
Require valid-user
|
|
</Location>
|
|
<Location /api/v1/targets>
|
|
ProxyPass http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/api/v1/targets
|
|
ProxyPassReverse http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/api/v1/targets
|
|
AuthName "Prometheus"
|
|
AuthType Basic
|
|
AuthBasicProvider file
|
|
AuthUserFile /usr/local/apache2/conf/.htpasswd
|
|
Require valid-user
|
|
</Location>
|
|
# Restrict access to the /api/v1/admin/tsdb/ endpoints (http) to the admin user.
|
|
# These endpoints are disabled by default, but are included here to ensure only
|
|
# an admin user has access to these endpoints when enabled
|
|
<Location /api/v1/admin/tsdb/>
|
|
ProxyPass http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/api/v1/admin/tsdb/
|
|
ProxyPassReverse http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/api/v1/admin/tsdb/
|
|
AuthName "Prometheus"
|
|
AuthType Basic
|
|
AuthBasicProvider file
|
|
AuthUserFile /usr/local/apache2/conf/.htpasswd
|
|
Require valid-user
|
|
</Location>
|
|
</VirtualHost>
|
|
prometheus:
|
|
# Consumed by a prometheus helper function to generate the command line flags
|
|
# for configuring the prometheus service
|
|
command_line_flags:
|
|
log.level: info
|
|
query.max_concurrency: 20
|
|
query.timeout: 2m
|
|
storage.tsdb.path: /var/lib/prometheus/data
|
|
storage.tsdb.retention.time: 7d
|
|
# NOTE(srwilkers): These settings default to false, but they are
|
|
# exposed here to allow enabling if desired. Please note the security
|
|
# impacts of enabling these flags. More information regarding the impacts
|
|
# can be found here: https://prometheus.io/docs/operating/security/
|
|
#
|
|
# If set to true, all administrative functionality is exposed via the http
|
|
# /api/*/admin/ path
|
|
web.enable_admin_api: false
|
|
# If set to true, allows for http reloads and shutdown of Prometheus
|
|
web.enable_lifecycle: false
|
|
scrape_configs:
|
|
template: |
|
|
{{- $promHost := tuple "monitoring" "public" . | include "helm-toolkit.endpoints.hostname_fqdn_endpoint_lookup" }}
|
|
{{- if not (empty .Values.conf.prometheus.rules)}}
|
|
rule_files:
|
|
{{- $rulesKeys := keys .Values.conf.prometheus.rules -}}
|
|
{{- range $rule := $rulesKeys }}
|
|
{{ printf "- /etc/config/rules/%s.rules" $rule }}
|
|
{{- end }}
|
|
{{- end }}
|
|
global:
|
|
scrape_interval: 60s
|
|
evaluation_interval: 60s
|
|
external_labels:
|
|
prometheus_host: {{$promHost}}
|
|
scrape_configs:
|
|
- job_name: kubelet
|
|
scheme: https
|
|
# This TLS & bearer token file config is used to connect to the actual scrape
|
|
# endpoints for cluster components. This is separate to discovery auth
|
|
# configuration because discovery & scraping are two separate concerns in
|
|
# Prometheus. The discovery auth config is automatic if Prometheus runs inside
|
|
# the cluster. Otherwise, more config options have to be provided within the
|
|
# <kubernetes_sd_config>.
|
|
tls_config:
|
|
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
|
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
|
|
kubernetes_sd_configs:
|
|
- role: node
|
|
scrape_interval: 45s
|
|
relabel_configs:
|
|
- action: labelmap
|
|
regex: __meta_kubernetes_node_label_(.+)
|
|
- target_label: __address__
|
|
replacement: kubernetes.default.svc:443
|
|
- source_labels:
|
|
- __meta_kubernetes_node_name
|
|
regex: (.+)
|
|
target_label: __metrics_path__
|
|
replacement: /api/v1/nodes/${1}/proxy/metrics
|
|
- source_labels:
|
|
- __meta_kubernetes_node_name
|
|
action: replace
|
|
target_label: kubernetes_io_hostname
|
|
# Scrape config for Kubelet cAdvisor.
|
|
#
|
|
# This is required for Kubernetes 1.7.3 and later, where cAdvisor metrics
|
|
# (those whose names begin with 'container_') have been removed from the
|
|
# Kubelet metrics endpoint. This job scrapes the cAdvisor endpoint to
|
|
# retrieve those metrics.
|
|
#
|
|
# In Kubernetes 1.7.0-1.7.2, these metrics are only exposed on the cAdvisor
|
|
# HTTP endpoint; use "replacement: /api/v1/nodes/${1}:4194/proxy/metrics"
|
|
# in that case (and ensure cAdvisor's HTTP server hasn't been disabled with
|
|
# the --cadvisor-port=0 Kubelet flag).
|
|
#
|
|
# This job is not necessary and should be removed in Kubernetes 1.6 and
|
|
# earlier versions, or it will cause the metrics to be scraped twice.
|
|
- job_name: 'kubernetes-cadvisor'
|
|
|
|
# Default to scraping over https. If required, just disable this or change to
|
|
# `http`.
|
|
scheme: https
|
|
|
|
# This TLS & bearer token file config is used to connect to the actual scrape
|
|
# endpoints for cluster components. This is separate to discovery auth
|
|
# configuration because discovery & scraping are two separate concerns in
|
|
# Prometheus. The discovery auth config is automatic if Prometheus runs inside
|
|
# the cluster. Otherwise, more config options have to be provided within the
|
|
# <kubernetes_sd_config>.
|
|
tls_config:
|
|
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
|
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
|
|
|
|
kubernetes_sd_configs:
|
|
- role: node
|
|
|
|
relabel_configs:
|
|
- action: labelmap
|
|
regex: __meta_kubernetes_node_label_(.+)
|
|
- target_label: __address__
|
|
replacement: kubernetes.default.svc:443
|
|
- source_labels:
|
|
- __meta_kubernetes_node_name
|
|
regex: (.+)
|
|
target_label: __metrics_path__
|
|
replacement: /api/v1/nodes/${1}/proxy/metrics/cadvisor
|
|
metric_relabel_configs:
|
|
- source_labels:
|
|
- __name__
|
|
regex: 'container_network_tcp_usage_total'
|
|
action: drop
|
|
- source_labels:
|
|
- __name__
|
|
regex: 'container_tasks_state'
|
|
action: drop
|
|
- source_labels:
|
|
- __name__
|
|
regex: 'container_network_udp_usage_total'
|
|
action: drop
|
|
- source_labels:
|
|
- __name__
|
|
regex: 'container_memory_failures_total'
|
|
action: drop
|
|
- source_labels:
|
|
- __name__
|
|
regex: 'container_cpu_load_average_10s'
|
|
action: drop
|
|
- source_labels:
|
|
- __name__
|
|
regex: 'container_cpu_system_seconds_total'
|
|
action: drop
|
|
- source_labels:
|
|
- __name__
|
|
regex: 'container_cpu_user_seconds_total'
|
|
action: drop
|
|
- source_labels:
|
|
- __name__
|
|
regex: 'container_fs_inodes_free'
|
|
action: drop
|
|
- source_labels:
|
|
- __name__
|
|
regex: 'container_fs_inodes_total'
|
|
action: drop
|
|
- source_labels:
|
|
- __name__
|
|
regex: 'container_fs_io_current'
|
|
action: drop
|
|
- source_labels:
|
|
- __name__
|
|
regex: 'container_fs_io_time_seconds_total'
|
|
action: drop
|
|
- source_labels:
|
|
- __name__
|
|
regex: 'container_fs_io_time_weighted_seconds_total'
|
|
action: drop
|
|
- source_labels:
|
|
- __name__
|
|
regex: 'container_fs_read_seconds_total'
|
|
action: drop
|
|
- source_labels:
|
|
- __name__
|
|
regex: 'container_fs_reads_merged_total'
|
|
action: drop
|
|
- source_labels:
|
|
- __name__
|
|
regex: 'container_fs_reads_merged_total'
|
|
action: drop
|
|
- source_labels:
|
|
- __name__
|
|
regex: 'container_fs_reads_total'
|
|
action: drop
|
|
- source_labels:
|
|
- __name__
|
|
regex: 'container_fs_sector_reads_total'
|
|
action: drop
|
|
- source_labels:
|
|
- __name__
|
|
regex: 'container_fs_sector_writes_total'
|
|
action: drop
|
|
- source_labels:
|
|
- __name__
|
|
regex: 'container_fs_write_seconds_total'
|
|
action: drop
|
|
- source_labels:
|
|
- __name__
|
|
regex: 'container_fs_writes_bytes_total'
|
|
action: drop
|
|
- source_labels:
|
|
- __name__
|
|
regex: 'container_fs_writes_merged_total'
|
|
action: drop
|
|
- source_labels:
|
|
- __name__
|
|
regex: 'container_fs_writes_total'
|
|
action: drop
|
|
- source_labels:
|
|
- __name__
|
|
regex: 'container_last_seen'
|
|
action: drop
|
|
- source_labels:
|
|
- __name__
|
|
regex: 'container_memory_cache'
|
|
action: drop
|
|
- source_labels:
|
|
- __name__
|
|
regex: 'container_memory_failcnt'
|
|
action: drop
|
|
- source_labels:
|
|
- __name__
|
|
regex: 'container_memory_max_usage_bytes'
|
|
action: drop
|
|
- source_labels:
|
|
- __name__
|
|
regex: 'container_memory_rss'
|
|
action: drop
|
|
- source_labels:
|
|
- __name__
|
|
regex: 'container_memory_swap'
|
|
action: drop
|
|
- source_labels:
|
|
- __name__
|
|
regex: 'container_memory_usage_bytes'
|
|
action: drop
|
|
- source_labels:
|
|
- __name__
|
|
regex: 'container_network_receive_errors_total'
|
|
action: drop
|
|
- source_labels:
|
|
- __name__
|
|
regex: 'container_network_receive_packets_dropped_total'
|
|
action: drop
|
|
- source_labels:
|
|
- __name__
|
|
regex: 'container_network_receive_packets_total'
|
|
action: drop
|
|
- source_labels:
|
|
- __name__
|
|
regex: 'container_network_transmit_errors_total'
|
|
action: drop
|
|
- source_labels:
|
|
- __name__
|
|
regex: 'container_network_transmit_packets_dropped_total'
|
|
action: drop
|
|
- source_labels:
|
|
- __name__
|
|
regex: 'container_network_transmit_packets_total'
|
|
action: drop
|
|
- source_labels:
|
|
- __name__
|
|
regex: 'container_spec_cpu_period'
|
|
action: drop
|
|
- source_labels:
|
|
- __name__
|
|
regex: 'container_spec_cpu_shares'
|
|
action: drop
|
|
- source_labels:
|
|
- __name__
|
|
regex: 'container_spec_memory_limit_bytes'
|
|
action: drop
|
|
- source_labels:
|
|
- __name__
|
|
regex: 'container_spec_memory_reservation_limit_bytes'
|
|
action: drop
|
|
- source_labels:
|
|
- __name__
|
|
regex: 'container_spec_memory_swap_limit_bytes'
|
|
action: drop
|
|
- source_labels:
|
|
- __name__
|
|
regex: 'container_start_time_seconds'
|
|
action: drop
|
|
# Scrape config for API servers.
|
|
#
|
|
# Kubernetes exposes API servers as endpoints to the default/kubernetes
|
|
# service so this uses `endpoints` role and uses relabelling to only keep
|
|
# the endpoints associated with the default/kubernetes service using the
|
|
# default named port `https`. This works for single API server deployments as
|
|
# well as HA API server deployments.
|
|
- job_name: 'apiserver'
|
|
kubernetes_sd_configs:
|
|
- role: endpoints
|
|
scrape_interval: 45s
|
|
# Default to scraping over https. If required, just disable this or change to
|
|
# `http`.
|
|
scheme: https
|
|
# This TLS & bearer token file config is used to connect to the actual scrape
|
|
# endpoints for cluster components. This is separate to discovery auth
|
|
# configuration because discovery & scraping are two separate concerns in
|
|
# Prometheus. The discovery auth config is automatic if Prometheus runs inside
|
|
# the cluster. Otherwise, more config options have to be provided within the
|
|
# <kubernetes_sd_config>.
|
|
tls_config:
|
|
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
|
# If your node certificates are self-signed or use a different CA to the
|
|
# master CA, then disable certificate verification below. Note that
|
|
# certificate verification is an integral part of a secure infrastructure
|
|
# so this should only be disabled in a controlled environment. You can
|
|
# disable certificate verification by uncommenting the line below.
|
|
#
|
|
# insecure_skip_verify: true
|
|
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
|
|
# Keep only the default/kubernetes service endpoints for the https port. This
|
|
# will add targets for each API server which Kubernetes adds an endpoint to
|
|
# the default/kubernetes service.
|
|
relabel_configs:
|
|
- source_labels:
|
|
- __meta_kubernetes_namespace
|
|
- __meta_kubernetes_service_name
|
|
- __meta_kubernetes_endpoint_port_name
|
|
action: keep
|
|
regex: default;kubernetes;https
|
|
metric_relabel_configs:
|
|
- source_labels:
|
|
- __name__
|
|
regex: 'apiserver_admission_controller_admission_latencies_seconds_bucket'
|
|
action: drop
|
|
- source_labels:
|
|
- __name__
|
|
regex: 'rest_client_request_latency_seconds_bucket'
|
|
action: drop
|
|
- source_labels:
|
|
- __name__
|
|
regex: 'apiserver_response_sizes_bucket'
|
|
action: drop
|
|
- source_labels:
|
|
- __name__
|
|
regex: 'apiserver_admission_step_admission_latencies_seconds_bucket'
|
|
action: drop
|
|
- source_labels:
|
|
- __name__
|
|
regex: 'apiserver_admission_controller_admission_latencies_seconds_count'
|
|
action: drop
|
|
- source_labels:
|
|
- __name__
|
|
regex: 'apiserver_admission_controller_admission_latencies_seconds_sum'
|
|
action: drop
|
|
- source_labels:
|
|
- __name__
|
|
regex: 'apiserver_request_latencies_summary'
|
|
action: drop
|
|
# Scrape config for service endpoints.
|
|
#
|
|
# The relabeling allows the actual service scrape endpoint to be configured
|
|
# via the following annotations:
|
|
#
|
|
# * `prometheus.io/scrape`: Only scrape services that have a value of `true`
|
|
# * `prometheus.io/scheme`: If the metrics endpoint is secured then you will need
|
|
# to set this to `https` & most likely set the `tls_config` of the scrape config.
|
|
# * `prometheus.io/path`: If the metrics path is not `/metrics` override this.
|
|
# * `prometheus.io/port`: If the metrics are exposed on a different port to the
|
|
# service then set this appropriately.
|
|
- job_name: 'openstack-exporter'
|
|
kubernetes_sd_configs:
|
|
- role: endpoints
|
|
scrape_interval: 60s
|
|
relabel_configs:
|
|
- source_labels:
|
|
- __meta_kubernetes_service_name
|
|
action: keep
|
|
regex: "openstack-metrics"
|
|
- source_labels:
|
|
- __meta_kubernetes_service_annotation_prometheus_io_scrape
|
|
action: keep
|
|
regex: true
|
|
- source_labels:
|
|
- __meta_kubernetes_service_annotation_prometheus_io_scheme
|
|
action: replace
|
|
target_label: __scheme__
|
|
regex: (https?)
|
|
- source_labels:
|
|
- __meta_kubernetes_service_annotation_prometheus_io_path
|
|
action: replace
|
|
target_label: __metrics_path__
|
|
regex: (.+)
|
|
- source_labels:
|
|
- __address__
|
|
- __meta_kubernetes_service_annotation_prometheus_io_port
|
|
action: replace
|
|
target_label: __address__
|
|
regex: ([^:]+)(?::\d+)?;(\d+)
|
|
replacement: $1:$2
|
|
- action: labelmap
|
|
regex: __meta_kubernetes_service_label_(.+)
|
|
- source_labels:
|
|
- __meta_kubernetes_namespace
|
|
action: replace
|
|
target_label: kubernetes_namespace
|
|
- source_labels:
|
|
- __meta_kubernetes_service_name
|
|
action: replace
|
|
target_label: instance
|
|
- source_labels:
|
|
- __meta_kubernetes_service_name
|
|
action: replace
|
|
target_label: kubernetes_name
|
|
- source_labels:
|
|
- __meta_kubernetes_service_name
|
|
target_label: job
|
|
replacement: ${1}
|
|
- job_name: 'node-exporter'
|
|
kubernetes_sd_configs:
|
|
- role: endpoints
|
|
scrape_interval: 60s
|
|
relabel_configs:
|
|
- source_labels:
|
|
- __meta_kubernetes_service_name
|
|
action: keep
|
|
regex: 'node-exporter'
|
|
- source_labels:
|
|
- __meta_kubernetes_pod_node_name
|
|
action: replace
|
|
target_label: hostname
|
|
- job_name: 'kubernetes-service-endpoints'
|
|
kubernetes_sd_configs:
|
|
- role: endpoints
|
|
scrape_interval: 60s
|
|
relabel_configs:
|
|
- source_labels:
|
|
- __meta_kubernetes_service_name
|
|
action: drop
|
|
regex: '(openstack-metrics|prom-metrics|ceph-mgr|node-exporter)'
|
|
- source_labels:
|
|
- __meta_kubernetes_service_annotation_prometheus_io_scrape
|
|
action: keep
|
|
regex: true
|
|
- source_labels:
|
|
- __meta_kubernetes_service_annotation_prometheus_io_scheme
|
|
action: replace
|
|
target_label: __scheme__
|
|
regex: (https?)
|
|
- source_labels:
|
|
- __meta_kubernetes_service_annotation_prometheus_io_path
|
|
action: replace
|
|
target_label: __metrics_path__
|
|
regex: (.+)
|
|
- source_labels:
|
|
- __address__
|
|
- __meta_kubernetes_service_annotation_prometheus_io_port
|
|
action: replace
|
|
target_label: __address__
|
|
regex: ([^:]+)(?::\d+)?;(\d+)
|
|
replacement: $1:$2
|
|
- action: labelmap
|
|
regex: __meta_kubernetes_service_label_(.+)
|
|
- source_labels:
|
|
- __meta_kubernetes_namespace
|
|
action: replace
|
|
target_label: kubernetes_namespace
|
|
- source_labels:
|
|
- __meta_kubernetes_service_name
|
|
action: replace
|
|
target_label: kubernetes_name
|
|
- source_labels:
|
|
- __meta_kubernetes_service_name
|
|
target_label: job
|
|
replacement: ${1}
|
|
# Example scrape config for pods
|
|
#
|
|
# The relabeling allows the actual pod scrape endpoint to be configured via the
|
|
# following annotations:
|
|
#
|
|
# * `prometheus.io/scrape`: Only scrape pods that have a value of `true`
|
|
# * `prometheus.io/path`: If the metrics path is not `/metrics` override this.
|
|
# * `prometheus.io/port`: Scrape the pod on the indicated port instead of the
|
|
# pod's declared ports (default is a port-free target if none are declared).
|
|
- job_name: 'kubernetes-pods'
|
|
kubernetes_sd_configs:
|
|
- role: pod
|
|
relabel_configs:
|
|
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape]
|
|
action: keep
|
|
regex: true
|
|
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path]
|
|
action: replace
|
|
target_label: __metrics_path__
|
|
regex: (.+)
|
|
- source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port]
|
|
action: replace
|
|
regex: ([^:]+)(?::\d+)?;(\d+)
|
|
replacement: $1:$2
|
|
target_label: __address__
|
|
- action: labelmap
|
|
regex: __meta_kubernetes_pod_label_(.+)
|
|
- source_labels: [__meta_kubernetes_namespace]
|
|
action: replace
|
|
target_label: kubernetes_namespace
|
|
- source_labels: [__meta_kubernetes_pod_name]
|
|
action: replace
|
|
target_label: kubernetes_pod_name
|
|
- job_name: calico-etcd
|
|
kubernetes_sd_configs:
|
|
- role: service
|
|
scrape_interval: 20s
|
|
relabel_configs:
|
|
- action: labelmap
|
|
regex: __meta_kubernetes_service_label_(.+)
|
|
- action: keep
|
|
source_labels:
|
|
- __meta_kubernetes_service_name
|
|
regex: "calico-etcd"
|
|
- action: keep
|
|
source_labels:
|
|
- __meta_kubernetes_namespace
|
|
regex: kube-system
|
|
target_label: namespace
|
|
- source_labels:
|
|
- __meta_kubernetes_pod_name
|
|
target_label: pod
|
|
- source_labels:
|
|
- __meta_kubernetes_service_name
|
|
target_label: service
|
|
- source_labels:
|
|
- __meta_kubernetes_service_name
|
|
target_label: job
|
|
replacement: ${1}
|
|
- source_labels:
|
|
- __meta_kubernetes_service_label
|
|
target_label: job
|
|
regex: calico-etcd
|
|
replacement: ${1}
|
|
- target_label: endpoint
|
|
replacement: "calico-etcd"
|
|
- job_name: ceph-mgr
|
|
kubernetes_sd_configs:
|
|
- role: service
|
|
scrape_interval: 20s
|
|
relabel_configs:
|
|
- action: labelmap
|
|
regex: __meta_kubernetes_service_label_(.+)
|
|
- action: keep
|
|
source_labels:
|
|
- __meta_kubernetes_service_name
|
|
regex: "ceph-mgr"
|
|
- source_labels:
|
|
- __meta_kubernetes_service_port_name
|
|
action: drop
|
|
regex: 'ceph-mgr'
|
|
- action: keep
|
|
source_labels:
|
|
- __meta_kubernetes_namespace
|
|
regex: ceph
|
|
target_label: namespace
|
|
- source_labels:
|
|
- __meta_kubernetes_pod_name
|
|
target_label: pod
|
|
- source_labels:
|
|
- __meta_kubernetes_service_name
|
|
target_label: service
|
|
- source_labels:
|
|
- __meta_kubernetes_service_name
|
|
target_label: job
|
|
replacement: ${1}
|
|
- source_labels:
|
|
- __meta_kubernetes_service_label
|
|
target_label: job
|
|
regex: ceph-mgr
|
|
replacement: ${1}
|
|
- target_label: endpoint
|
|
replacement: "ceph-mgr"
|
|
alerting:
|
|
alertmanagers:
|
|
- kubernetes_sd_configs:
|
|
- role: pod
|
|
tls_config:
|
|
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
|
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
|
|
relabel_configs:
|
|
- source_labels: [__meta_kubernetes_pod_label_application]
|
|
regex: prometheus-alertmanager
|
|
action: keep
|
|
- source_labels: [__meta_kubernetes_pod_container_port_name]
|
|
regex: alerts-api
|
|
action: keep
|
|
- source_labels: [__meta_kubernetes_pod_container_port_name]
|
|
regex: peer-mesh
|
|
action: drop
|
|
rules: []
|
|
...
|