openstack-helm-infra/prometheus/values.yaml
Steven Fitzpatrick f4bdb713c1 Prometheus: Add configurable readiness/liveness Probes
This change adds probes to the prometheus statefulset using the HTK
probe generation functions

Change-Id: I249d662dd0d23dd964f7118af94c733bbdc5db92
2020-10-05 19:28:00 +00:00

1106 lines
42 KiB
YAML

# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Default values for prometheus.
# This is a YAML-formatted file.
# Declare name/value pairs to be passed into your templates.
# name: value
---
images:
tags:
apache_proxy: docker.io/httpd:2.4
prometheus: docker.io/prom/prometheus:v2.12.0
helm_tests: docker.io/openstackhelm/heat:newton-ubuntu_xenial
dep_check: quay.io/airshipit/kubernetes-entrypoint:v1.0.0
image_repo_sync: docker.io/docker:17.07.0
pull_policy: IfNotPresent
local_registry:
active: false
exclude:
- dep_check
- image_repo_sync
labels:
prometheus:
node_selector_key: openstack-control-plane
node_selector_value: enabled
job:
node_selector_key: openstack-control-plane
node_selector_value: enabled
test:
node_selector_key: openstack-control-plane
node_selector_value: enabled
pod:
env:
prometheus: null
security_context:
api:
pod:
runAsUser: 65534
container:
prometheus_perms:
runAsUser: 0
readOnlyRootFilesystem: false
apache_proxy:
runAsUser: 0
readOnlyRootFilesystem: false
prometheus:
allowPrivilegeEscalation: false
readOnlyRootFilesystem: true
test:
pod:
runAsUser: 65534
container:
prometheus_helm_tests:
readOnlyRootFilesystem: true
allowPrivilegeEscalation: false
affinity:
anti:
type:
default: preferredDuringSchedulingIgnoredDuringExecution
topologyKey:
default: kubernetes.io/hostname
weight:
default: 10
mounts:
prometheus:
prometheus:
init_container: null
replicas:
prometheus: 1
lifecycle:
upgrades:
statefulsets:
pod_replacement_strategy: RollingUpdate
termination_grace_period:
prometheus:
timeout: 30
resources:
enabled: false
prometheus:
limits:
memory: "1024Mi"
cpu: "2000m"
requests:
memory: "128Mi"
cpu: "500m"
jobs:
image_repo_sync:
requests:
memory: "128Mi"
cpu: "100m"
limits:
memory: "1024Mi"
cpu: "2000m"
tests:
requests:
memory: "128Mi"
cpu: "100m"
limits:
memory: "1024Mi"
cpu: "2000m"
probes:
prometheus:
prometheus:
readiness:
enabled: true
params:
initialDelaySeconds: 30
timeoutSeconds: 30
liveness:
enabled: false
params:
initialDelaySeconds: 120
timeoutSeconds: 30
endpoints:
cluster_domain_suffix: cluster.local
local_image_registry:
name: docker-registry
namespace: docker-registry
hosts:
default: localhost
internal: docker-registry
node: localhost
host_fqdn_override:
default: null
port:
registry:
node: 5000
monitoring:
name: prometheus
namespace: null
auth:
admin:
username: admin
password: changeme
federate:
username: federate
password: changeme
hosts:
default: prom-metrics
public: prometheus
host_fqdn_override:
default: null
# NOTE(srwilkers): this chart supports TLS for fqdn over-ridden public
# endpoints using the following format:
# public:
# host: null
# tls:
# crt: null
# key: null
path:
default: null
scheme:
default: 'http'
port:
api:
default: 9090
http:
default: 80
alertmanager:
name: prometheus-alertmanager
namespace: null
hosts:
default: alerts-engine
public: prometheus-alertmanager
discovery: prometheus-alertmanager-discovery
host_fqdn_override:
default: null
path:
default: null
scheme:
default: 'http'
port:
api:
default: 9093
public: 80
mesh:
default: 9094
ldap:
hosts:
default: ldap
auth:
admin:
bind: "cn=admin,dc=cluster,dc=local"
password: password
host_fqdn_override:
default: null
path:
default: "/ou=People,dc=cluster,dc=local"
scheme:
default: ldap
port:
ldap:
default: 389
dependencies:
dynamic:
common:
local_image_registry:
jobs:
- prometheus-image-repo-sync
services:
- endpoint: node
service: local_image_registry
static:
image_repo_sync:
services:
- endpoint: internal
service: local_image_registry
prometheus:
services: null
tests:
services:
- endpoint: internal
service: monitoring
monitoring:
prometheus:
enabled: true
prometheus:
scrape: true
network:
prometheus:
ingress:
public: true
classes:
namespace: "nginx"
cluster: "nginx-cluster"
annotations:
nginx.ingress.kubernetes.io/rewrite-target: /
nginx.ingress.kubernetes.io/affinity: cookie
nginx.ingress.kubernetes.io/session-cookie-name: kube-ingress-session-prometheus
nginx.ingress.kubernetes.io/session-cookie-hash: sha1
nginx.ingress.kubernetes.io/session-cookie-expires: "600"
nginx.ingress.kubernetes.io/session-cookie-max-age: "600"
node_port:
enabled: false
port: 30900
network_policy:
prometheus:
ingress:
- {}
egress:
- {}
secrets:
tls:
monitoring:
prometheus:
public: prometheus-tls-public
tls_configs:
# If client certificates are required to connect to metrics endpoints, they
# can be configured here. They will be mounted in the pod under /tls_configs
# and can be referenced in scrape configs.
# The filenames will be the key and subkey concatenanted with a ".", e.g.:
# /tls_configs/kubernetes-etcd.ca.pem
# /tls_configs/kubernetes-etcd.crt.pem
# /tls_configs/kubernetes-etcd.key.pem
# From the following:
# kubernetes-etcd:
# ca.pem: |
# -----BEGIN CERTIFICATE-----
# -----END CERTIFICATE-----
# crt.pem: |
# -----BEGIN CERTIFICATE-----
# -----END CERTIFICATE-----
# key.pem: |
# -----BEGIN RSA PRIVATE KEY-----
# -----END RSA PRIVATE KEY-----
storage:
enabled: true
pvc:
name: prometheus-pvc
access_mode: ["ReadWriteOnce"]
requests:
storage: 5Gi
storage_class: general
manifests:
configmap_bin: true
configmap_etc: true
ingress: true
helm_tests: true
job_image_repo_sync: true
network_policy: true
secret_ingress_tls: true
secret_prometheus: true
service_ingress: true
service: true
statefulset_prometheus: true
conf:
httpd: |
ServerRoot "/usr/local/apache2"
Listen 80
LoadModule mpm_event_module modules/mod_mpm_event.so
LoadModule authn_file_module modules/mod_authn_file.so
LoadModule authn_core_module modules/mod_authn_core.so
LoadModule authz_host_module modules/mod_authz_host.so
LoadModule authz_groupfile_module modules/mod_authz_groupfile.so
LoadModule authz_user_module modules/mod_authz_user.so
LoadModule authz_core_module modules/mod_authz_core.so
LoadModule access_compat_module modules/mod_access_compat.so
LoadModule auth_basic_module modules/mod_auth_basic.so
LoadModule ldap_module modules/mod_ldap.so
LoadModule authnz_ldap_module modules/mod_authnz_ldap.so
LoadModule reqtimeout_module modules/mod_reqtimeout.so
LoadModule filter_module modules/mod_filter.so
LoadModule proxy_html_module modules/mod_proxy_html.so
LoadModule log_config_module modules/mod_log_config.so
LoadModule env_module modules/mod_env.so
LoadModule headers_module modules/mod_headers.so
LoadModule setenvif_module modules/mod_setenvif.so
LoadModule version_module modules/mod_version.so
LoadModule proxy_module modules/mod_proxy.so
LoadModule proxy_connect_module modules/mod_proxy_connect.so
LoadModule proxy_http_module modules/mod_proxy_http.so
LoadModule proxy_balancer_module modules/mod_proxy_balancer.so
LoadModule slotmem_shm_module modules/mod_slotmem_shm.so
LoadModule slotmem_plain_module modules/mod_slotmem_plain.so
LoadModule unixd_module modules/mod_unixd.so
LoadModule status_module modules/mod_status.so
LoadModule autoindex_module modules/mod_autoindex.so
<IfModule unixd_module>
User daemon
Group daemon
</IfModule>
<Directory />
AllowOverride none
Require all denied
</Directory>
<Files ".ht*">
Require all denied
</Files>
ErrorLog /dev/stderr
LogLevel warn
<IfModule log_config_module>
LogFormat "%a %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\"" combined
LogFormat "%{X-Forwarded-For}i %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\"" proxy
LogFormat "%h %l %u %t \"%r\" %>s %b" common
<IfModule logio_module>
LogFormat "%a %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\" %I %O" combinedio
</IfModule>
SetEnvIf X-Forwarded-For "^.*\..*\..*\..*" forwarded
CustomLog /dev/stdout common
CustomLog /dev/stdout combined
CustomLog /dev/stdout proxy env=forwarded
</IfModule>
<Directory "/usr/local/apache2/cgi-bin">
AllowOverride None
Options None
Require all granted
</Directory>
<IfModule headers_module>
RequestHeader unset Proxy early
</IfModule>
<IfModule proxy_html_module>
Include conf/extra/proxy-html.conf
</IfModule>
<VirtualHost *:80>
# Expose metrics to all users, as this is not sensitive information and
# circumvents the inability of Prometheus to interpolate environment vars
# in its configuration file
<Location /metrics>
ProxyPass http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/metrics
ProxyPassReverse http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/metrics
Satisfy Any
Allow from all
</Location>
# Expose the /federate endpoint to all users, as this is also not
# sensitive information and circumvents the inability of Prometheus to
# interpolate environment vars in its configuration file
<Location /federate>
ProxyPass http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/metrics
ProxyPassReverse http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/metrics
Satisfy Any
Allow from all
</Location>
# Restrict general user (LDAP) access to the /graph endpoint, as general trusted
# users should only be able to query Prometheus for metrics and not have access
# to information like targets, configuration, flags or build info for Prometheus
<Location />
ProxyPass http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/
ProxyPassReverse http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/
AuthName "Prometheus"
AuthType Basic
AuthBasicProvider file ldap
AuthUserFile /usr/local/apache2/conf/.htpasswd
AuthLDAPBindDN {{ .Values.endpoints.ldap.auth.admin.bind }}
AuthLDAPBindPassword {{ .Values.endpoints.ldap.auth.admin.password }}
AuthLDAPURL {{ tuple "ldap" "default" "ldap" . | include "helm-toolkit.endpoints.keystone_endpoint_uri_lookup" | quote }}
Require valid-user
</Location>
<Location /graph>
ProxyPass http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/graph
ProxyPassReverse http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/graph
AuthName "Prometheus"
AuthType Basic
AuthBasicProvider file ldap
AuthUserFile /usr/local/apache2/conf/.htpasswd
AuthLDAPBindDN {{ .Values.endpoints.ldap.auth.admin.bind }}
AuthLDAPBindPassword {{ .Values.endpoints.ldap.auth.admin.password }}
AuthLDAPURL {{ tuple "ldap" "default" "ldap" . | include "helm-toolkit.endpoints.keystone_endpoint_uri_lookup" | quote }}
Require valid-user
</Location>
# Restrict access to the /config (dashboard) and /api/v1/status/config (http) endpoints
# to the admin user
<Location /config>
ProxyPass http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/config
ProxyPassReverse http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/config
AuthName "Prometheus"
AuthType Basic
AuthBasicProvider file
AuthUserFile /usr/local/apache2/conf/.htpasswd
Require valid-user
</Location>
<Location /api/v1/status/config>
ProxyPass http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/api/v1/status/config
ProxyPassReverse http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/api/v1/status/config
AuthName "Prometheus"
AuthType Basic
AuthBasicProvider file
AuthUserFile /usr/local/apache2/conf/.htpasswd
Require valid-user
</Location>
# Restrict access to the /flags (dashboard) and /api/v1/status/flags (http) endpoints
# to the admin user
<Location /flags>
ProxyPass http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/flags
ProxyPassReverse http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/flags
AuthName "Prometheus"
AuthType Basic
AuthBasicProvider file
AuthUserFile /usr/local/apache2/conf/.htpasswd
Require valid-user
</Location>
<Location /api/v1/status/flags>
ProxyPass http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/api/v1/status/flags
ProxyPassReverse http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/api/v1/status/flags
AuthName "Prometheus"
AuthType Basic
AuthBasicProvider file
AuthUserFile /usr/local/apache2/conf/.htpasswd
Require valid-user
</Location>
# Restrict access to the /status (dashboard) endpoint to the admin user
<Location /status>
ProxyPass http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/status
ProxyPassReverse http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/status
AuthName "Prometheus"
AuthType Basic
AuthBasicProvider file
AuthUserFile /usr/local/apache2/conf/.htpasswd
Require valid-user
</Location>
# Restrict access to the /rules (dashboard) endpoint to the admin user
<Location /rules>
ProxyPass http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/rules
ProxyPassReverse http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/rules
AuthName "Prometheus"
AuthType Basic
AuthBasicProvider file
AuthUserFile /usr/local/apache2/conf/.htpasswd
Require valid-user
</Location>
# Restrict access to the /targets (dashboard) and /api/v1/targets (http) endpoints
# to the admin user
<Location /targets>
ProxyPass http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/targets
ProxyPassReverse http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/targets
AuthName "Prometheus"
AuthType Basic
AuthBasicProvider file
AuthUserFile /usr/local/apache2/conf/.htpasswd
Require valid-user
</Location>
<Location /api/v1/targets>
ProxyPass http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/api/v1/targets
ProxyPassReverse http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/api/v1/targets
AuthName "Prometheus"
AuthType Basic
AuthBasicProvider file
AuthUserFile /usr/local/apache2/conf/.htpasswd
Require valid-user
</Location>
# Restrict access to the /api/v1/admin/tsdb/ endpoints (http) to the admin user.
# These endpoints are disabled by default, but are included here to ensure only
# an admin user has access to these endpoints when enabled
<Location /api/v1/admin/tsdb/>
ProxyPass http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/api/v1/admin/tsdb/
ProxyPassReverse http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/api/v1/admin/tsdb/
AuthName "Prometheus"
AuthType Basic
AuthBasicProvider file
AuthUserFile /usr/local/apache2/conf/.htpasswd
Require valid-user
</Location>
</VirtualHost>
prometheus:
# Consumed by a prometheus helper function to generate the command line flags
# for configuring the prometheus service
command_line_flags:
log.level: info
query.max_concurrency: 20
query.timeout: 2m
storage.tsdb.path: /var/lib/prometheus/data
storage.tsdb.retention: 7d
# NOTE(srwilkers): These settings default to false, but they are
# exposed here to allow enabling if desired. Please note the security
# impacts of enabling these flags. More information regarding the impacts
# can be found here: https://prometheus.io/docs/operating/security/
#
# If set to true, all administrative functionality is exposed via the http
# /api/*/admin/ path
web.enable_admin_api: false
# If set to true, allows for http reloads and shutdown of Prometheus
web.enable_lifecycle: false
scrape_configs:
template: |
{{- $promHost := tuple "monitoring" "public" . | include "helm-toolkit.endpoints.hostname_fqdn_endpoint_lookup" }}
{{- if not (empty .Values.conf.prometheus.rules)}}
rule_files:
{{- $rulesKeys := keys .Values.conf.prometheus.rules -}}
{{- range $rule := $rulesKeys }}
{{ printf "- /etc/config/rules/%s.rules" $rule }}
{{- end }}
{{- end }}
global:
scrape_interval: 60s
evaluation_interval: 60s
external_labels:
prometheus_host: {{$promHost}}
scrape_configs:
- job_name: kubelet
scheme: https
# This TLS & bearer token file config is used to connect to the actual scrape
# endpoints for cluster components. This is separate to discovery auth
# configuration because discovery & scraping are two separate concerns in
# Prometheus. The discovery auth config is automatic if Prometheus runs inside
# the cluster. Otherwise, more config options have to be provided within the
# <kubernetes_sd_config>.
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
kubernetes_sd_configs:
- role: node
scrape_interval: 45s
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- target_label: __address__
replacement: kubernetes.default.svc:443
- source_labels:
- __meta_kubernetes_node_name
regex: (.+)
target_label: __metrics_path__
replacement: /api/v1/nodes/${1}/proxy/metrics
- source_labels:
- __meta_kubernetes_node_name
action: replace
target_label: kubernetes_io_hostname
# Scrape config for Kubelet cAdvisor.
#
# This is required for Kubernetes 1.7.3 and later, where cAdvisor metrics
# (those whose names begin with 'container_') have been removed from the
# Kubelet metrics endpoint. This job scrapes the cAdvisor endpoint to
# retrieve those metrics.
#
# In Kubernetes 1.7.0-1.7.2, these metrics are only exposed on the cAdvisor
# HTTP endpoint; use "replacement: /api/v1/nodes/${1}:4194/proxy/metrics"
# in that case (and ensure cAdvisor's HTTP server hasn't been disabled with
# the --cadvisor-port=0 Kubelet flag).
#
# This job is not necessary and should be removed in Kubernetes 1.6 and
# earlier versions, or it will cause the metrics to be scraped twice.
- job_name: 'kubernetes-cadvisor'
# Default to scraping over https. If required, just disable this or change to
# `http`.
scheme: https
# This TLS & bearer token file config is used to connect to the actual scrape
# endpoints for cluster components. This is separate to discovery auth
# configuration because discovery & scraping are two separate concerns in
# Prometheus. The discovery auth config is automatic if Prometheus runs inside
# the cluster. Otherwise, more config options have to be provided within the
# <kubernetes_sd_config>.
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
kubernetes_sd_configs:
- role: node
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- target_label: __address__
replacement: kubernetes.default.svc:443
- source_labels:
- __meta_kubernetes_node_name
regex: (.+)
target_label: __metrics_path__
replacement: /api/v1/nodes/${1}/proxy/metrics/cadvisor
metric_relabel_configs:
- source_labels:
- __name__
regex: 'container_network_tcp_usage_total'
action: drop
- source_labels:
- __name__
regex: 'container_tasks_state'
action: drop
- source_labels:
- __name__
regex: 'container_network_udp_usage_total'
action: drop
- source_labels:
- __name__
regex: 'container_memory_failures_total'
action: drop
- source_labels:
- __name__
regex: 'container_cpu_load_average_10s'
action: drop
- source_labels:
- __name__
regex: 'container_cpu_system_seconds_total'
action: drop
- source_labels:
- __name__
regex: 'container_cpu_user_seconds_total'
action: drop
- source_labels:
- __name__
regex: 'container_fs_inodes_free'
action: drop
- source_labels:
- __name__
regex: 'container_fs_inodes_total'
action: drop
- source_labels:
- __name__
regex: 'container_fs_io_current'
action: drop
- source_labels:
- __name__
regex: 'container_fs_io_time_seconds_total'
action: drop
- source_labels:
- __name__
regex: 'container_fs_io_time_weighted_seconds_total'
action: drop
- source_labels:
- __name__
regex: 'container_fs_read_seconds_total'
action: drop
- source_labels:
- __name__
regex: 'container_fs_reads_merged_total'
action: drop
- source_labels:
- __name__
regex: 'container_fs_reads_merged_total'
action: drop
- source_labels:
- __name__
regex: 'container_fs_reads_total'
action: drop
- source_labels:
- __name__
regex: 'container_fs_sector_reads_total'
action: drop
- source_labels:
- __name__
regex: 'container_fs_sector_writes_total'
action: drop
- source_labels:
- __name__
regex: 'container_fs_write_seconds_total'
action: drop
- source_labels:
- __name__
regex: 'container_fs_writes_bytes_total'
action: drop
- source_labels:
- __name__
regex: 'container_fs_writes_merged_total'
action: drop
- source_labels:
- __name__
regex: 'container_fs_writes_total'
action: drop
- source_labels:
- __name__
regex: 'container_last_seen'
action: drop
- source_labels:
- __name__
regex: 'container_memory_cache'
action: drop
- source_labels:
- __name__
regex: 'container_memory_failcnt'
action: drop
- source_labels:
- __name__
regex: 'container_memory_max_usage_bytes'
action: drop
- source_labels:
- __name__
regex: 'container_memory_rss'
action: drop
- source_labels:
- __name__
regex: 'container_memory_swap'
action: drop
- source_labels:
- __name__
regex: 'container_memory_usage_bytes'
action: drop
- source_labels:
- __name__
regex: 'container_network_receive_errors_total'
action: drop
- source_labels:
- __name__
regex: 'container_network_receive_packets_dropped_total'
action: drop
- source_labels:
- __name__
regex: 'container_network_receive_packets_total'
action: drop
- source_labels:
- __name__
regex: 'container_network_transmit_errors_total'
action: drop
- source_labels:
- __name__
regex: 'container_network_transmit_packets_dropped_total'
action: drop
- source_labels:
- __name__
regex: 'container_network_transmit_packets_total'
action: drop
- source_labels:
- __name__
regex: 'container_spec_cpu_period'
action: drop
- source_labels:
- __name__
regex: 'container_spec_cpu_shares'
action: drop
- source_labels:
- __name__
regex: 'container_spec_memory_limit_bytes'
action: drop
- source_labels:
- __name__
regex: 'container_spec_memory_reservation_limit_bytes'
action: drop
- source_labels:
- __name__
regex: 'container_spec_memory_swap_limit_bytes'
action: drop
- source_labels:
- __name__
regex: 'container_start_time_seconds'
action: drop
# Scrape config for API servers.
#
# Kubernetes exposes API servers as endpoints to the default/kubernetes
# service so this uses `endpoints` role and uses relabelling to only keep
# the endpoints associated with the default/kubernetes service using the
# default named port `https`. This works for single API server deployments as
# well as HA API server deployments.
- job_name: 'apiserver'
kubernetes_sd_configs:
- role: endpoints
scrape_interval: 45s
# Default to scraping over https. If required, just disable this or change to
# `http`.
scheme: https
# This TLS & bearer token file config is used to connect to the actual scrape
# endpoints for cluster components. This is separate to discovery auth
# configuration because discovery & scraping are two separate concerns in
# Prometheus. The discovery auth config is automatic if Prometheus runs inside
# the cluster. Otherwise, more config options have to be provided within the
# <kubernetes_sd_config>.
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
# If your node certificates are self-signed or use a different CA to the
# master CA, then disable certificate verification below. Note that
# certificate verification is an integral part of a secure infrastructure
# so this should only be disabled in a controlled environment. You can
# disable certificate verification by uncommenting the line below.
#
# insecure_skip_verify: true
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
# Keep only the default/kubernetes service endpoints for the https port. This
# will add targets for each API server which Kubernetes adds an endpoint to
# the default/kubernetes service.
relabel_configs:
- source_labels:
- __meta_kubernetes_namespace
- __meta_kubernetes_service_name
- __meta_kubernetes_endpoint_port_name
action: keep
regex: default;kubernetes;https
metric_relabel_configs:
- source_labels:
- __name__
regex: 'apiserver_admission_controller_admission_latencies_seconds_bucket'
action: drop
- source_labels:
- __name__
regex: 'rest_client_request_latency_seconds_bucket'
action: drop
- source_labels:
- __name__
regex: 'apiserver_response_sizes_bucket'
action: drop
- source_labels:
- __name__
regex: 'apiserver_admission_step_admission_latencies_seconds_bucket'
action: drop
- source_labels:
- __name__
regex: 'apiserver_admission_controller_admission_latencies_seconds_count'
action: drop
- source_labels:
- __name__
regex: 'apiserver_admission_controller_admission_latencies_seconds_sum'
action: drop
- source_labels:
- __name__
regex: 'apiserver_request_latencies_summary'
action: drop
# Scrape config for service endpoints.
#
# The relabeling allows the actual service scrape endpoint to be configured
# via the following annotations:
#
# * `prometheus.io/scrape`: Only scrape services that have a value of `true`
# * `prometheus.io/scheme`: If the metrics endpoint is secured then you will need
# to set this to `https` & most likely set the `tls_config` of the scrape config.
# * `prometheus.io/path`: If the metrics path is not `/metrics` override this.
# * `prometheus.io/port`: If the metrics are exposed on a different port to the
# service then set this appropriately.
- job_name: 'openstack-exporter'
kubernetes_sd_configs:
- role: endpoints
scrape_interval: 60s
relabel_configs:
- source_labels:
- __meta_kubernetes_service_name
action: keep
regex: "openstack-metrics"
- source_labels:
- __meta_kubernetes_service_annotation_prometheus_io_scrape
action: keep
regex: true
- source_labels:
- __meta_kubernetes_service_annotation_prometheus_io_scheme
action: replace
target_label: __scheme__
regex: (https?)
- source_labels:
- __meta_kubernetes_service_annotation_prometheus_io_path
action: replace
target_label: __metrics_path__
regex: (.+)
- source_labels:
- __address__
- __meta_kubernetes_service_annotation_prometheus_io_port
action: replace
target_label: __address__
regex: ([^:]+)(?::\d+)?;(\d+)
replacement: $1:$2
- action: labelmap
regex: __meta_kubernetes_service_label_(.+)
- source_labels:
- __meta_kubernetes_namespace
action: replace
target_label: kubernetes_namespace
- source_labels:
- __meta_kubernetes_service_name
action: replace
target_label: instance
- source_labels:
- __meta_kubernetes_service_name
action: replace
target_label: kubernetes_name
- source_labels:
- __meta_kubernetes_service_name
target_label: job
replacement: ${1}
- job_name: 'node-exporter'
kubernetes_sd_configs:
- role: endpoints
scrape_interval: 60s
relabel_configs:
- source_labels:
- __meta_kubernetes_service_name
action: keep
regex: 'node-exporter'
- source_labels:
- __meta_kubernetes_pod_node_name
action: replace
target_label: hostname
- job_name: 'kubernetes-service-endpoints'
kubernetes_sd_configs:
- role: endpoints
scrape_interval: 60s
relabel_configs:
- source_labels:
- __meta_kubernetes_service_name
action: drop
regex: '(openstack-metrics|prom-metrics|ceph-mgr|node-exporter)'
- source_labels:
- __meta_kubernetes_service_annotation_prometheus_io_scrape
action: keep
regex: true
- source_labels:
- __meta_kubernetes_service_annotation_prometheus_io_scheme
action: replace
target_label: __scheme__
regex: (https?)
- source_labels:
- __meta_kubernetes_service_annotation_prometheus_io_path
action: replace
target_label: __metrics_path__
regex: (.+)
- source_labels:
- __address__
- __meta_kubernetes_service_annotation_prometheus_io_port
action: replace
target_label: __address__
regex: ([^:]+)(?::\d+)?;(\d+)
replacement: $1:$2
- action: labelmap
regex: __meta_kubernetes_service_label_(.+)
- source_labels:
- __meta_kubernetes_namespace
action: replace
target_label: kubernetes_namespace
- source_labels:
- __meta_kubernetes_service_name
action: replace
target_label: kubernetes_name
- source_labels:
- __meta_kubernetes_service_name
target_label: job
replacement: ${1}
# Example scrape config for pods
#
# The relabeling allows the actual pod scrape endpoint to be configured via the
# following annotations:
#
# * `prometheus.io/scrape`: Only scrape pods that have a value of `true`
# * `prometheus.io/path`: If the metrics path is not `/metrics` override this.
# * `prometheus.io/port`: Scrape the pod on the indicated port instead of the
# pod's declared ports (default is a port-free target if none are declared).
- job_name: 'kubernetes-pods'
kubernetes_sd_configs:
- role: pod
relabel_configs:
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape]
action: keep
regex: true
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path]
action: replace
target_label: __metrics_path__
regex: (.+)
- source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port]
action: replace
regex: ([^:]+)(?::\d+)?;(\d+)
replacement: $1:$2
target_label: __address__
- action: labelmap
regex: __meta_kubernetes_pod_label_(.+)
- source_labels: [__meta_kubernetes_namespace]
action: replace
target_label: kubernetes_namespace
- source_labels: [__meta_kubernetes_pod_name]
action: replace
target_label: kubernetes_pod_name
- job_name: calico-etcd
kubernetes_sd_configs:
- role: service
scrape_interval: 20s
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_service_label_(.+)
- action: keep
source_labels:
- __meta_kubernetes_service_name
regex: "calico-etcd"
- action: keep
source_labels:
- __meta_kubernetes_namespace
regex: kube-system
target_label: namespace
- source_labels:
- __meta_kubernetes_pod_name
target_label: pod
- source_labels:
- __meta_kubernetes_service_name
target_label: service
- source_labels:
- __meta_kubernetes_service_name
target_label: job
replacement: ${1}
- source_labels:
- __meta_kubernetes_service_label
target_label: job
regex: calico-etcd
replacement: ${1}
- target_label: endpoint
replacement: "calico-etcd"
- job_name: ceph-mgr
kubernetes_sd_configs:
- role: service
scrape_interval: 20s
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_service_label_(.+)
- action: keep
source_labels:
- __meta_kubernetes_service_name
regex: "ceph-mgr"
- source_labels:
- __meta_kubernetes_service_port_name
action: drop
regex: 'ceph-mgr'
- action: keep
source_labels:
- __meta_kubernetes_namespace
regex: ceph
target_label: namespace
- source_labels:
- __meta_kubernetes_pod_name
target_label: pod
- source_labels:
- __meta_kubernetes_service_name
target_label: service
- source_labels:
- __meta_kubernetes_service_name
target_label: job
replacement: ${1}
- source_labels:
- __meta_kubernetes_service_label
target_label: job
regex: ceph-mgr
replacement: ${1}
- target_label: endpoint
replacement: "ceph-mgr"
alerting:
alertmanagers:
- kubernetes_sd_configs:
- role: pod
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
relabel_configs:
- source_labels: [__meta_kubernetes_pod_label_application]
regex: prometheus-alertmanager
action: keep
- source_labels: [__meta_kubernetes_pod_container_port_name]
regex: alerts-api
action: keep
- source_labels: [__meta_kubernetes_pod_container_port_name]
regex: peer-mesh
action: drop
rules: []
...