From c2ae21fd97beb987991eb68c6c306c607e4d9d78 Mon Sep 17 00:00:00 2001 From: Piotr Parczewski Date: Thu, 8 Jul 2021 16:31:44 +0200 Subject: [PATCH] Reduce container metrics cardinality Adds support for passing extra runtime options to cAdvisor. By default new options disable exporting rarely useful metrics and labels by cAdvisor. This helps reducing the load on Prometheus and cAdvisor itself. Change-Id: I81f3845d6cd03a70a0c8569f8d0ea421027df083 --- ansible/roles/prometheus/defaults/main.yml | 2 ++ .../templates/prometheus-cadvisor.json.j2 | 2 +- etc/kolla/globals.yml | 4 ++++ ...cadvisor-cardinality-1213854b9fe0c828.yaml | 19 +++++++++++++++++++ 4 files changed, 26 insertions(+), 1 deletion(-) create mode 100644 releasenotes/notes/reduce-cadvisor-cardinality-1213854b9fe0c828.yaml diff --git a/ansible/roles/prometheus/defaults/main.yml b/ansible/roles/prometheus/defaults/main.yml index d1ad085299..03b11c3a51 100644 --- a/ansible/roles/prometheus/defaults/main.yml +++ b/ansible/roles/prometheus/defaults/main.yml @@ -257,3 +257,5 @@ prometheus_openstack_exporter_disabled_dns: "{{ '--disable-service.dns' if not e prometheus_openstack_exporter_disabled_object: "{{ '--disable-service.object-store' if not enable_swift | bool else '' }}" prometheus_openstack_exporter_disabled_lb: "{{ '--disable-service.load-balancer --disable-metric=neutron-loadbalancers --disable-metric=neutron-loadbalancers_not_active' if not enable_octavia | bool else '' }}" prometheus_openstack_exporter_disabled_items: "{{ [prometheus_openstack_exporter_disabled_volume, prometheus_openstack_exporter_disabled_dns, prometheus_openstack_exporter_disabled_object, prometheus_openstack_exporter_disabled_lb|trim]|join(' ')|trim }}" + +prometheus_cadvisor_cmdline_extras: "--docker_only --store_container_labels=false --disable_metrics=percpu,referenced_memory,cpu_topology,resctrl,udp,advtcp,sched,hugetlb,memory_numa,tcp,process" diff --git a/ansible/roles/prometheus/templates/prometheus-cadvisor.json.j2 b/ansible/roles/prometheus/templates/prometheus-cadvisor.json.j2 index 58070b943a..47240535d7 100644 --- a/ansible/roles/prometheus/templates/prometheus-cadvisor.json.j2 +++ b/ansible/roles/prometheus/templates/prometheus-cadvisor.json.j2 @@ -1,5 +1,5 @@ { - "command": "/opt/cadvisor --port={{ prometheus_cadvisor_port }} --log_dir=/var/log/kolla/prometheus", + "command": "/opt/cadvisor --port={{ prometheus_cadvisor_port }} --log_dir=/var/log/kolla/prometheus {{ prometheus_cadvisor_cmdline_extras }}", "config_files": [], "permissions": [ { diff --git a/etc/kolla/globals.yml b/etc/kolla/globals.yml index ed8fb12a4a..c01eeb943a 100644 --- a/etc/kolla/globals.yml +++ b/etc/kolla/globals.yml @@ -702,6 +702,10 @@ # List of extra parameters passed to prometheus. You can add as many to the list. #prometheus_cmdline_extras: +# List of extra parameters passed to cAdvisor. By default system cgroups +# and container labels are not exposed to reduce time series cardinality. +#prometheus_cadvisor_cmdline_extras: "--docker_only --store_container_labels=false --disable_metrics=percpu,referenced_memory,cpu_topology,resctrl,udp,advtcp,sched,hugetlb,memory_numa,tcp,process" + # Example of setting endpoints for prometheus ceph mgr exporter. # You should add all ceph mgr's in your external ceph deployment. #prometheus_ceph_mgr_exporter_endpoints: diff --git a/releasenotes/notes/reduce-cadvisor-cardinality-1213854b9fe0c828.yaml b/releasenotes/notes/reduce-cadvisor-cardinality-1213854b9fe0c828.yaml new file mode 100644 index 0000000000..a76a33a741 --- /dev/null +++ b/releasenotes/notes/reduce-cadvisor-cardinality-1213854b9fe0c828.yaml @@ -0,0 +1,19 @@ +--- +features: + - | + Adds support for passing extra runtime options to cAdvisor via + ``prometheus_cadvisor_cmdline_extras`` new variable. By default + system cgroups' metrics are disabled, plus container labels + don't get exposed to Prometheus. Expensive metrics that usually + should not be exported are also enforced to be disabled - consult + ``_ + for a list. These defaults create savings in resources usage by both + cAdvisor and Prometheus. +upgrade: + - | + cAdvisor has now reduced number of Prometheus metrics and labels + exported by default. This means that corresponding timeseries will + no longer be created. If existing setup relies on these, eg. for + visualisation or alerting, they could be explicitly enabled prior to + upgrading with the ``prometheus_cadvisor_cmdline_extras`` new variable. + Reference for the possible options: ``_.