Terekhin, Alexey (at4945) aa3efe9715 Adding the feature to launch Prometheus process with custom script
This change adds feature to launch Prometheus process using a custom script which should be stored in override values. Because the known issue https://github.com/prometheus/prometheus/issues/6934 is still open many years, we are going to struggle with growing WAL files using our custom downstream wrapper script which stops Prometheus process and deletes WALs.
This solution can not fit all customers because completely kills wal cached data but it is ok for our purposes. Such way I just added the feature to use another custom script to launch Prometheus and left original functionality by default. Default/custom mode are defined in 'values.yaml' as the body of the custom launcher script.

Change-Id: Ie02ea1d6a7de5c676e2e96f3dcd6aca172af4afb
2022-12-29 16:09:22 -08:00

1137 lines
42 KiB
YAML

# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Default values for prometheus.
# This is a YAML-formatted file.
# Declare name/value pairs to be passed into your templates.
# name: value
---
images:
tags:
apache_proxy: docker.io/library/httpd:2.4
prometheus: docker.io/prom/prometheus:v2.25.0
helm_tests: docker.io/openstackhelm/heat:wallaby-ubuntu_focal
dep_check: quay.io/airshipit/kubernetes-entrypoint:v1.0.0
image_repo_sync: docker.io/library/docker:17.07.0
pull_policy: IfNotPresent
local_registry:
active: false
exclude:
- dep_check
- image_repo_sync
labels:
prometheus:
node_selector_key: openstack-control-plane
node_selector_value: enabled
job:
node_selector_key: openstack-control-plane
node_selector_value: enabled
test:
node_selector_key: openstack-control-plane
node_selector_value: enabled
pod:
env:
prometheus: null
security_context:
api:
pod:
runAsUser: 65534
container:
prometheus_perms:
runAsUser: 0
readOnlyRootFilesystem: false
apache_proxy:
runAsUser: 0
readOnlyRootFilesystem: false
prometheus:
allowPrivilegeEscalation: false
readOnlyRootFilesystem: true
test:
pod:
runAsUser: 65534
container:
prometheus_helm_tests:
readOnlyRootFilesystem: true
allowPrivilegeEscalation: false
affinity:
anti:
type:
default: preferredDuringSchedulingIgnoredDuringExecution
topologyKey:
default: kubernetes.io/hostname
weight:
default: 10
mounts:
prometheus:
prometheus:
init_container: null
replicas:
prometheus: 1
lifecycle:
upgrades:
statefulsets:
pod_replacement_strategy: RollingUpdate
termination_grace_period:
prometheus:
timeout: 30
resources:
enabled: false
prometheus:
limits:
memory: "1024Mi"
cpu: "2000m"
requests:
memory: "128Mi"
cpu: "500m"
jobs:
image_repo_sync:
requests:
memory: "128Mi"
cpu: "100m"
limits:
memory: "1024Mi"
cpu: "2000m"
tests:
requests:
memory: "128Mi"
cpu: "100m"
limits:
memory: "1024Mi"
cpu: "2000m"
probes:
prometheus:
prometheus:
readiness:
enabled: true
params:
initialDelaySeconds: 30
timeoutSeconds: 30
liveness:
enabled: false
params:
initialDelaySeconds: 120
timeoutSeconds: 30
endpoints:
cluster_domain_suffix: cluster.local
local_image_registry:
name: docker-registry
namespace: docker-registry
hosts:
default: localhost
internal: docker-registry
node: localhost
host_fqdn_override:
default: null
port:
registry:
node: 5000
oci_image_registry:
name: oci-image-registry
namespace: oci-image-registry
auth:
enabled: false
prometheus:
username: prometheus
password: password
hosts:
default: localhost
host_fqdn_override:
default: null
port:
registry:
default: null
monitoring:
name: prometheus
namespace: null
auth:
admin:
username: admin
password: changeme
federate:
username: federate
password: changeme
hosts:
default: prom-metrics
public: prometheus
host_fqdn_override:
default: null
# NOTE(srwilkers): this chart supports TLS for fqdn over-ridden public
# endpoints using the following format:
# public:
# host: null
# tls:
# crt: null
# key: null
path:
default: null
scheme:
default: 'http'
port:
api:
default: 9090
http:
default: 80
alertmanager:
name: prometheus-alertmanager
namespace: null
hosts:
default: alerts-engine
public: prometheus-alertmanager
discovery: prometheus-alertmanager-discovery
host_fqdn_override:
default: null
path:
default: null
scheme:
default: 'http'
port:
api:
default: 9093
public: 80
mesh:
default: 9094
ldap:
hosts:
default: ldap
auth:
admin:
bind: "cn=admin,dc=cluster,dc=local"
password: password
host_fqdn_override:
default: null
path:
default: "/ou=People,dc=cluster,dc=local"
scheme:
default: ldap
port:
ldap:
default: 389
dependencies:
dynamic:
common:
local_image_registry:
jobs:
- prometheus-image-repo-sync
services:
- endpoint: node
service: local_image_registry
static:
image_repo_sync:
services:
- endpoint: internal
service: local_image_registry
prometheus:
services: null
tests:
services:
- endpoint: internal
service: monitoring
monitoring:
prometheus:
enabled: true
prometheus:
scrape: true
network:
prometheus:
ingress:
public: true
classes:
namespace: "nginx"
cluster: "nginx-cluster"
annotations:
nginx.ingress.kubernetes.io/rewrite-target: /
nginx.ingress.kubernetes.io/affinity: cookie
nginx.ingress.kubernetes.io/session-cookie-name: kube-ingress-session-prometheus
nginx.ingress.kubernetes.io/session-cookie-hash: sha1
nginx.ingress.kubernetes.io/session-cookie-expires: "600"
nginx.ingress.kubernetes.io/session-cookie-max-age: "600"
node_port:
enabled: false
port: 30900
network_policy:
prometheus:
ingress:
- {}
egress:
- {}
proc_launch:
prometheus:
default: true
custom_launch: |
while true
do
echo "If 'proc_launch.prometheus.default: false'."
echo "Your custom shell script code you can put here."
sleep 10
done
secrets:
oci_image_registry:
prometheus: prometheus-oci-image-registry-key
tls:
monitoring:
prometheus:
public: prometheus-tls-public
internal: prometheus-tls-api
tls_configs:
# If client certificates are required to connect to metrics endpoints, they
# can be configured here. They will be mounted in the pod under /tls_configs
# and can be referenced in scrape configs.
# The filenames will be the key and subkey concatenanted with a ".", e.g.:
# /tls_configs/kubernetes-etcd.ca.pem
# /tls_configs/kubernetes-etcd.crt.pem
# /tls_configs/kubernetes-etcd.key.pem
# From the following:
# kubernetes-etcd:
# ca.pem: |
# -----BEGIN CERTIFICATE-----
# -----END CERTIFICATE-----
# crt.pem: |
# -----BEGIN CERTIFICATE-----
# -----END CERTIFICATE-----
# key.pem: |
# -----BEGIN RSA PRIVATE KEY-----
# -----END RSA PRIVATE KEY-----
storage:
enabled: true
pvc:
name: prometheus-pvc
access_mode: ["ReadWriteOnce"]
requests:
storage: 5Gi
storage_class: general
manifests:
certificates: false
configmap_bin: true
configmap_etc: true
ingress: true
helm_tests: true
job_image_repo_sync: true
network_policy: true
secret_ingress_tls: true
secret_prometheus: true
secret_registry: true
service_ingress: true
service: true
statefulset_prometheus: true
conf:
httpd: |
ServerRoot "/usr/local/apache2"
Listen 80
LoadModule mpm_event_module modules/mod_mpm_event.so
LoadModule authn_file_module modules/mod_authn_file.so
LoadModule authn_core_module modules/mod_authn_core.so
LoadModule authz_host_module modules/mod_authz_host.so
LoadModule authz_groupfile_module modules/mod_authz_groupfile.so
LoadModule authz_user_module modules/mod_authz_user.so
LoadModule authz_core_module modules/mod_authz_core.so
LoadModule access_compat_module modules/mod_access_compat.so
LoadModule auth_basic_module modules/mod_auth_basic.so
LoadModule ldap_module modules/mod_ldap.so
LoadModule authnz_ldap_module modules/mod_authnz_ldap.so
LoadModule reqtimeout_module modules/mod_reqtimeout.so
LoadModule filter_module modules/mod_filter.so
LoadModule proxy_html_module modules/mod_proxy_html.so
LoadModule log_config_module modules/mod_log_config.so
LoadModule env_module modules/mod_env.so
LoadModule headers_module modules/mod_headers.so
LoadModule setenvif_module modules/mod_setenvif.so
LoadModule version_module modules/mod_version.so
LoadModule proxy_module modules/mod_proxy.so
LoadModule proxy_connect_module modules/mod_proxy_connect.so
LoadModule proxy_http_module modules/mod_proxy_http.so
LoadModule proxy_balancer_module modules/mod_proxy_balancer.so
LoadModule slotmem_shm_module modules/mod_slotmem_shm.so
LoadModule slotmem_plain_module modules/mod_slotmem_plain.so
LoadModule unixd_module modules/mod_unixd.so
LoadModule status_module modules/mod_status.so
LoadModule autoindex_module modules/mod_autoindex.so
<IfModule unixd_module>
User daemon
Group daemon
</IfModule>
<Directory />
AllowOverride none
Require all denied
</Directory>
<Files ".ht*">
Require all denied
</Files>
ErrorLog /dev/stderr
LogLevel warn
<IfModule log_config_module>
LogFormat "%a %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\"" combined
LogFormat "%{X-Forwarded-For}i %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\"" proxy
LogFormat "%h %l %u %t \"%r\" %>s %b" common
<IfModule logio_module>
LogFormat "%a %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\" %I %O" combinedio
</IfModule>
SetEnvIf X-Forwarded-For "^.*\..*\..*\..*" forwarded
CustomLog /dev/stdout common
CustomLog /dev/stdout combined
CustomLog /dev/stdout proxy env=forwarded
</IfModule>
<Directory "/usr/local/apache2/cgi-bin">
AllowOverride None
Options None
Require all granted
</Directory>
<IfModule headers_module>
RequestHeader unset Proxy early
</IfModule>
<IfModule proxy_html_module>
Include conf/extra/proxy-html.conf
</IfModule>
<VirtualHost *:80>
# Expose metrics to all users, as this is not sensitive information and
# circumvents the inability of Prometheus to interpolate environment vars
# in its configuration file
<Location /metrics>
ProxyPass http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/metrics
ProxyPassReverse http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/metrics
Satisfy Any
Allow from all
</Location>
# Expose the /federate endpoint to all users, as this is also not
# sensitive information and circumvents the inability of Prometheus to
# interpolate environment vars in its configuration file
<Location /federate>
ProxyPass http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/metrics
ProxyPassReverse http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/metrics
Satisfy Any
Allow from all
</Location>
# Restrict general user (LDAP) access to the /graph endpoint, as general trusted
# users should only be able to query Prometheus for metrics and not have access
# to information like targets, configuration, flags or build info for Prometheus
<Location />
ProxyPass http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/
ProxyPassReverse http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/
AuthName "Prometheus"
AuthType Basic
AuthBasicProvider file ldap
AuthUserFile /usr/local/apache2/conf/.htpasswd
AuthLDAPBindDN {{ .Values.endpoints.ldap.auth.admin.bind }}
AuthLDAPBindPassword {{ .Values.endpoints.ldap.auth.admin.password }}
AuthLDAPURL {{ tuple "ldap" "default" "ldap" . | include "helm-toolkit.endpoints.keystone_endpoint_uri_lookup" | quote }}
Require valid-user
</Location>
<Location /graph>
ProxyPass http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/graph
ProxyPassReverse http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/graph
AuthName "Prometheus"
AuthType Basic
AuthBasicProvider file ldap
AuthUserFile /usr/local/apache2/conf/.htpasswd
AuthLDAPBindDN {{ .Values.endpoints.ldap.auth.admin.bind }}
AuthLDAPBindPassword {{ .Values.endpoints.ldap.auth.admin.password }}
AuthLDAPURL {{ tuple "ldap" "default" "ldap" . | include "helm-toolkit.endpoints.keystone_endpoint_uri_lookup" | quote }}
Require valid-user
</Location>
# Restrict access to the /config (dashboard) and /api/v1/status/config (http) endpoints
# to the admin user
<Location /config>
ProxyPass http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/config
ProxyPassReverse http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/config
AuthName "Prometheus"
AuthType Basic
AuthBasicProvider file
AuthUserFile /usr/local/apache2/conf/.htpasswd
Require valid-user
</Location>
<Location /api/v1/status/config>
ProxyPass http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/api/v1/status/config
ProxyPassReverse http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/api/v1/status/config
AuthName "Prometheus"
AuthType Basic
AuthBasicProvider file
AuthUserFile /usr/local/apache2/conf/.htpasswd
Require valid-user
</Location>
# Restrict access to the /flags (dashboard) and /api/v1/status/flags (http) endpoints
# to the admin user
<Location /flags>
ProxyPass http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/flags
ProxyPassReverse http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/flags
AuthName "Prometheus"
AuthType Basic
AuthBasicProvider file
AuthUserFile /usr/local/apache2/conf/.htpasswd
Require valid-user
</Location>
<Location /api/v1/status/flags>
ProxyPass http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/api/v1/status/flags
ProxyPassReverse http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/api/v1/status/flags
AuthName "Prometheus"
AuthType Basic
AuthBasicProvider file
AuthUserFile /usr/local/apache2/conf/.htpasswd
Require valid-user
</Location>
# Restrict access to the /status (dashboard) endpoint to the admin user
<Location /status>
ProxyPass http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/status
ProxyPassReverse http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/status
AuthName "Prometheus"
AuthType Basic
AuthBasicProvider file
AuthUserFile /usr/local/apache2/conf/.htpasswd
Require valid-user
</Location>
# Restrict access to the /rules (dashboard) endpoint to the admin user
<Location /rules>
ProxyPass http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/rules
ProxyPassReverse http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/rules
AuthName "Prometheus"
AuthType Basic
AuthBasicProvider file
AuthUserFile /usr/local/apache2/conf/.htpasswd
Require valid-user
</Location>
# Restrict access to the /targets (dashboard) and /api/v1/targets (http) endpoints
# to the admin user
<Location /targets>
ProxyPass http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/targets
ProxyPassReverse http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/targets
AuthName "Prometheus"
AuthType Basic
AuthBasicProvider file
AuthUserFile /usr/local/apache2/conf/.htpasswd
Require valid-user
</Location>
<Location /api/v1/targets>
ProxyPass http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/api/v1/targets
ProxyPassReverse http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/api/v1/targets
AuthName "Prometheus"
AuthType Basic
AuthBasicProvider file
AuthUserFile /usr/local/apache2/conf/.htpasswd
Require valid-user
</Location>
# Restrict access to the /api/v1/admin/tsdb/ endpoints (http) to the admin user.
# These endpoints are disabled by default, but are included here to ensure only
# an admin user has access to these endpoints when enabled
<Location /api/v1/admin/tsdb/>
ProxyPass http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/api/v1/admin/tsdb/
ProxyPassReverse http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/api/v1/admin/tsdb/
AuthName "Prometheus"
AuthType Basic
AuthBasicProvider file
AuthUserFile /usr/local/apache2/conf/.htpasswd
Require valid-user
</Location>
</VirtualHost>
prometheus:
# Consumed by a prometheus helper function to generate the command line flags
# for configuring the prometheus service
command_line_flags:
log.level: info
query.max_concurrency: 20
query.timeout: 2m
storage.tsdb.path: /var/lib/prometheus/data
storage.tsdb.retention.time: 7d
# NOTE(srwilkers): These settings default to false, but they are
# exposed here to allow enabling if desired. Please note the security
# impacts of enabling these flags. More information regarding the impacts
# can be found here: https://prometheus.io/docs/operating/security/
#
# If set to true, all administrative functionality is exposed via the http
# /api/*/admin/ path
web.enable_admin_api: false
# If set to true, allows for http reloads and shutdown of Prometheus
web.enable_lifecycle: false
scrape_configs:
template: |
{{- $promHost := tuple "monitoring" "public" . | include "helm-toolkit.endpoints.hostname_fqdn_endpoint_lookup" }}
{{- if not (empty .Values.conf.prometheus.rules)}}
rule_files:
{{- $rulesKeys := keys .Values.conf.prometheus.rules -}}
{{- range $rule := $rulesKeys }}
{{ printf "- /etc/config/rules/%s.rules" $rule }}
{{- end }}
{{- end }}
global:
scrape_interval: 60s
evaluation_interval: 60s
external_labels:
prometheus_host: {{$promHost}}
scrape_configs:
- job_name: kubelet
scheme: https
# This TLS & bearer token file config is used to connect to the actual scrape
# endpoints for cluster components. This is separate to discovery auth
# configuration because discovery & scraping are two separate concerns in
# Prometheus. The discovery auth config is automatic if Prometheus runs inside
# the cluster. Otherwise, more config options have to be provided within the
# <kubernetes_sd_config>.
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
kubernetes_sd_configs:
- role: node
scrape_interval: 45s
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- target_label: __address__
replacement: kubernetes.default.svc:443
- source_labels:
- __meta_kubernetes_node_name
regex: (.+)
target_label: __metrics_path__
replacement: /api/v1/nodes/${1}/proxy/metrics
- source_labels:
- __meta_kubernetes_node_name
action: replace
target_label: kubernetes_io_hostname
# Scrape config for Kubelet cAdvisor.
#
# This is required for Kubernetes 1.7.3 and later, where cAdvisor metrics
# (those whose names begin with 'container_') have been removed from the
# Kubelet metrics endpoint. This job scrapes the cAdvisor endpoint to
# retrieve those metrics.
#
# In Kubernetes 1.7.0-1.7.2, these metrics are only exposed on the cAdvisor
# HTTP endpoint; use "replacement: /api/v1/nodes/${1}:4194/proxy/metrics"
# in that case (and ensure cAdvisor's HTTP server hasn't been disabled with
# the --cadvisor-port=0 Kubelet flag).
#
# This job is not necessary and should be removed in Kubernetes 1.6 and
# earlier versions, or it will cause the metrics to be scraped twice.
- job_name: 'kubernetes-cadvisor'
# Default to scraping over https. If required, just disable this or change to
# `http`.
scheme: https
# This TLS & bearer token file config is used to connect to the actual scrape
# endpoints for cluster components. This is separate to discovery auth
# configuration because discovery & scraping are two separate concerns in
# Prometheus. The discovery auth config is automatic if Prometheus runs inside
# the cluster. Otherwise, more config options have to be provided within the
# <kubernetes_sd_config>.
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
kubernetes_sd_configs:
- role: node
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- target_label: __address__
replacement: kubernetes.default.svc:443
- source_labels:
- __meta_kubernetes_node_name
regex: (.+)
target_label: __metrics_path__
replacement: /api/v1/nodes/${1}/proxy/metrics/cadvisor
metric_relabel_configs:
- source_labels:
- __name__
regex: 'container_network_tcp_usage_total'
action: drop
- source_labels:
- __name__
regex: 'container_tasks_state'
action: drop
- source_labels:
- __name__
regex: 'container_network_udp_usage_total'
action: drop
- source_labels:
- __name__
regex: 'container_memory_failures_total'
action: drop
- source_labels:
- __name__
regex: 'container_cpu_load_average_10s'
action: drop
- source_labels:
- __name__
regex: 'container_cpu_system_seconds_total'
action: drop
- source_labels:
- __name__
regex: 'container_cpu_user_seconds_total'
action: drop
- source_labels:
- __name__
regex: 'container_fs_inodes_free'
action: drop
- source_labels:
- __name__
regex: 'container_fs_inodes_total'
action: drop
- source_labels:
- __name__
regex: 'container_fs_io_current'
action: drop
- source_labels:
- __name__
regex: 'container_fs_io_time_seconds_total'
action: drop
- source_labels:
- __name__
regex: 'container_fs_io_time_weighted_seconds_total'
action: drop
- source_labels:
- __name__
regex: 'container_fs_read_seconds_total'
action: drop
- source_labels:
- __name__
regex: 'container_fs_reads_merged_total'
action: drop
- source_labels:
- __name__
regex: 'container_fs_reads_merged_total'
action: drop
- source_labels:
- __name__
regex: 'container_fs_reads_total'
action: drop
- source_labels:
- __name__
regex: 'container_fs_sector_reads_total'
action: drop
- source_labels:
- __name__
regex: 'container_fs_sector_writes_total'
action: drop
- source_labels:
- __name__
regex: 'container_fs_write_seconds_total'
action: drop
- source_labels:
- __name__
regex: 'container_fs_writes_bytes_total'
action: drop
- source_labels:
- __name__
regex: 'container_fs_writes_merged_total'
action: drop
- source_labels:
- __name__
regex: 'container_fs_writes_total'
action: drop
- source_labels:
- __name__
regex: 'container_last_seen'
action: drop
- source_labels:
- __name__
regex: 'container_memory_cache'
action: drop
- source_labels:
- __name__
regex: 'container_memory_failcnt'
action: drop
- source_labels:
- __name__
regex: 'container_memory_max_usage_bytes'
action: drop
- source_labels:
- __name__
regex: 'container_memory_rss'
action: drop
- source_labels:
- __name__
regex: 'container_memory_swap'
action: drop
- source_labels:
- __name__
regex: 'container_memory_usage_bytes'
action: drop
- source_labels:
- __name__
regex: 'container_network_receive_errors_total'
action: drop
- source_labels:
- __name__
regex: 'container_network_receive_packets_dropped_total'
action: drop
- source_labels:
- __name__
regex: 'container_network_receive_packets_total'
action: drop
- source_labels:
- __name__
regex: 'container_network_transmit_errors_total'
action: drop
- source_labels:
- __name__
regex: 'container_network_transmit_packets_dropped_total'
action: drop
- source_labels:
- __name__
regex: 'container_network_transmit_packets_total'
action: drop
- source_labels:
- __name__
regex: 'container_spec_cpu_period'
action: drop
- source_labels:
- __name__
regex: 'container_spec_cpu_shares'
action: drop
- source_labels:
- __name__
regex: 'container_spec_memory_limit_bytes'
action: drop
- source_labels:
- __name__
regex: 'container_spec_memory_reservation_limit_bytes'
action: drop
- source_labels:
- __name__
regex: 'container_spec_memory_swap_limit_bytes'
action: drop
- source_labels:
- __name__
regex: 'container_start_time_seconds'
action: drop
# Scrape config for API servers.
#
# Kubernetes exposes API servers as endpoints to the default/kubernetes
# service so this uses `endpoints` role and uses relabelling to only keep
# the endpoints associated with the default/kubernetes service using the
# default named port `https`. This works for single API server deployments as
# well as HA API server deployments.
- job_name: 'apiserver'
kubernetes_sd_configs:
- role: endpoints
scrape_interval: 45s
# Default to scraping over https. If required, just disable this or change to
# `http`.
scheme: https
# This TLS & bearer token file config is used to connect to the actual scrape
# endpoints for cluster components. This is separate to discovery auth
# configuration because discovery & scraping are two separate concerns in
# Prometheus. The discovery auth config is automatic if Prometheus runs inside
# the cluster. Otherwise, more config options have to be provided within the
# <kubernetes_sd_config>.
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
# If your node certificates are self-signed or use a different CA to the
# master CA, then disable certificate verification below. Note that
# certificate verification is an integral part of a secure infrastructure
# so this should only be disabled in a controlled environment. You can
# disable certificate verification by uncommenting the line below.
#
# insecure_skip_verify: true
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
# Keep only the default/kubernetes service endpoints for the https port. This
# will add targets for each API server which Kubernetes adds an endpoint to
# the default/kubernetes service.
relabel_configs:
- source_labels:
- __meta_kubernetes_namespace
- __meta_kubernetes_service_name
- __meta_kubernetes_endpoint_port_name
action: keep
regex: default;kubernetes;https
metric_relabel_configs:
- source_labels:
- __name__
regex: 'apiserver_admission_controller_admission_latencies_seconds_bucket'
action: drop
- source_labels:
- __name__
regex: 'rest_client_request_latency_seconds_bucket'
action: drop
- source_labels:
- __name__
regex: 'apiserver_response_sizes_bucket'
action: drop
- source_labels:
- __name__
regex: 'apiserver_admission_step_admission_latencies_seconds_bucket'
action: drop
- source_labels:
- __name__
regex: 'apiserver_admission_controller_admission_latencies_seconds_count'
action: drop
- source_labels:
- __name__
regex: 'apiserver_admission_controller_admission_latencies_seconds_sum'
action: drop
- source_labels:
- __name__
regex: 'apiserver_request_latencies_summary'
action: drop
# Scrape config for service endpoints.
#
# The relabeling allows the actual service scrape endpoint to be configured
# via the following annotations:
#
# * `prometheus.io/scrape`: Only scrape services that have a value of `true`
# * `prometheus.io/scheme`: If the metrics endpoint is secured then you will need
# to set this to `https` & most likely set the `tls_config` of the scrape config.
# * `prometheus.io/path`: If the metrics path is not `/metrics` override this.
# * `prometheus.io/port`: If the metrics are exposed on a different port to the
# service then set this appropriately.
- job_name: 'openstack-exporter'
kubernetes_sd_configs:
- role: endpoints
scrape_interval: 60s
relabel_configs:
- source_labels:
- __meta_kubernetes_service_name
action: keep
regex: "openstack-metrics"
- source_labels:
- __meta_kubernetes_service_annotation_prometheus_io_scrape
action: keep
regex: true
- source_labels:
- __meta_kubernetes_service_annotation_prometheus_io_scheme
action: replace
target_label: __scheme__
regex: (https?)
- source_labels:
- __meta_kubernetes_service_annotation_prometheus_io_path
action: replace
target_label: __metrics_path__
regex: (.+)
- source_labels:
- __address__
- __meta_kubernetes_service_annotation_prometheus_io_port
action: replace
target_label: __address__
regex: ([^:]+)(?::\d+)?;(\d+)
replacement: $1:$2
- action: labelmap
regex: __meta_kubernetes_service_label_(.+)
- source_labels:
- __meta_kubernetes_namespace
action: replace
target_label: kubernetes_namespace
- source_labels:
- __meta_kubernetes_service_name
action: replace
target_label: instance
- source_labels:
- __meta_kubernetes_service_name
action: replace
target_label: kubernetes_name
- source_labels:
- __meta_kubernetes_service_name
target_label: job
replacement: ${1}
- job_name: 'node-exporter'
kubernetes_sd_configs:
- role: endpoints
scrape_interval: 60s
relabel_configs:
- source_labels:
- __meta_kubernetes_service_name
action: keep
regex: 'node-exporter'
- source_labels:
- __meta_kubernetes_pod_node_name
action: replace
target_label: hostname
- job_name: 'kubernetes-service-endpoints'
kubernetes_sd_configs:
- role: endpoints
scrape_interval: 60s
relabel_configs:
- source_labels:
- __meta_kubernetes_service_name
action: drop
regex: '(openstack-metrics|prom-metrics|ceph-mgr|node-exporter)'
- source_labels:
- __meta_kubernetes_service_annotation_prometheus_io_scrape
action: keep
regex: true
- source_labels:
- __meta_kubernetes_service_annotation_prometheus_io_scheme
action: replace
target_label: __scheme__
regex: (https?)
- source_labels:
- __meta_kubernetes_service_annotation_prometheus_io_path
action: replace
target_label: __metrics_path__
regex: (.+)
- source_labels:
- __address__
- __meta_kubernetes_service_annotation_prometheus_io_port
action: replace
target_label: __address__
regex: ([^:]+)(?::\d+)?;(\d+)
replacement: $1:$2
- action: labelmap
regex: __meta_kubernetes_service_label_(.+)
- source_labels:
- __meta_kubernetes_namespace
action: replace
target_label: kubernetes_namespace
- source_labels:
- __meta_kubernetes_service_name
action: replace
target_label: kubernetes_name
- source_labels:
- __meta_kubernetes_service_name
target_label: job
replacement: ${1}
# Example scrape config for pods
#
# The relabeling allows the actual pod scrape endpoint to be configured via the
# following annotations:
#
# * `prometheus.io/scrape`: Only scrape pods that have a value of `true`
# * `prometheus.io/path`: If the metrics path is not `/metrics` override this.
# * `prometheus.io/port`: Scrape the pod on the indicated port instead of the
# pod's declared ports (default is a port-free target if none are declared).
- job_name: 'kubernetes-pods'
kubernetes_sd_configs:
- role: pod
relabel_configs:
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape]
action: keep
regex: true
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path]
action: replace
target_label: __metrics_path__
regex: (.+)
- source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port]
action: replace
regex: ([^:]+)(?::\d+)?;(\d+)
replacement: $1:$2
target_label: __address__
- action: labelmap
regex: __meta_kubernetes_pod_label_(.+)
- source_labels: [__meta_kubernetes_namespace]
action: replace
target_label: kubernetes_namespace
- source_labels: [__meta_kubernetes_pod_name]
action: replace
target_label: kubernetes_pod_name
- job_name: calico-etcd
kubernetes_sd_configs:
- role: service
scrape_interval: 20s
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_service_label_(.+)
- action: keep
source_labels:
- __meta_kubernetes_service_name
regex: "calico-etcd"
- action: keep
source_labels:
- __meta_kubernetes_namespace
regex: kube-system
target_label: namespace
- source_labels:
- __meta_kubernetes_pod_name
target_label: pod
- source_labels:
- __meta_kubernetes_service_name
target_label: service
- source_labels:
- __meta_kubernetes_service_name
target_label: job
replacement: ${1}
- source_labels:
- __meta_kubernetes_service_label
target_label: job
regex: calico-etcd
replacement: ${1}
- target_label: endpoint
replacement: "calico-etcd"
- job_name: ceph-mgr
kubernetes_sd_configs:
- role: service
scrape_interval: 20s
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_service_label_(.+)
- action: keep
source_labels:
- __meta_kubernetes_service_name
regex: "ceph-mgr"
- source_labels:
- __meta_kubernetes_service_port_name
action: drop
regex: 'ceph-mgr'
- action: keep
source_labels:
- __meta_kubernetes_namespace
regex: ceph
target_label: namespace
- source_labels:
- __meta_kubernetes_pod_name
target_label: pod
- source_labels:
- __meta_kubernetes_service_name
target_label: service
- source_labels:
- __meta_kubernetes_service_name
target_label: job
replacement: ${1}
- source_labels:
- __meta_kubernetes_service_label
target_label: job
regex: ceph-mgr
replacement: ${1}
- target_label: endpoint
replacement: "ceph-mgr"
alerting:
alertmanagers:
- kubernetes_sd_configs:
- role: pod
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
relabel_configs:
- source_labels: [__meta_kubernetes_pod_label_application]
regex: prometheus-alertmanager
action: keep
- source_labels: [__meta_kubernetes_pod_container_port_name]
regex: alerts-api
action: keep
- source_labels: [__meta_kubernetes_pod_container_port_name]
regex: peer-mesh
action: drop
rules: []
...