From c6259158e3eff4aff9770b7044b0179a7de533aa Mon Sep 17 00:00:00 2001 From: Piotr Parczewski Date: Tue, 30 Mar 2021 16:45:47 +0200 Subject: [PATCH] Reduce container metrics cardinality Adds support for passing extra runtime options to cAdvisor. By default new options disable exporting rarely useful metrics and labels by cAdvisor. This helps reducing the load on Prometheus and cAdvisor itself. Change-Id: Id0144e8fa518e3236cb94ba2e3961fb455d36443 --- ansible/roles/prometheus/defaults/main.yml | 2 ++ .../templates/prometheus-cadvisor.json.j2 | 2 +- etc/kolla/globals.yml | 4 ++++ ...cadvisor-cardinality-1213854b9fe0c828.yaml | 19 +++++++++++++++++++ 4 files changed, 26 insertions(+), 1 deletion(-) create mode 100644 releasenotes/notes/reduce-cadvisor-cardinality-1213854b9fe0c828.yaml diff --git a/ansible/roles/prometheus/defaults/main.yml b/ansible/roles/prometheus/defaults/main.yml index b1c1174609..a27646a85e 100644 --- a/ansible/roles/prometheus/defaults/main.yml +++ b/ansible/roles/prometheus/defaults/main.yml @@ -257,3 +257,5 @@ prometheus_openstack_exporter_disabled_dns: "{{ '--disable-service.dns' if not e prometheus_openstack_exporter_disabled_object: "{{ '--disable-service.object-store' if not enable_swift | bool else '' }}" prometheus_openstack_exporter_disabled_lb: "{{ '--disable-service.load-balancer --disable-metric=neutron-loadbalancers --disable-metric=neutron-loadbalancers_not_active' if not enable_octavia | bool else '' }}" prometheus_openstack_exporter_disabled_items: "{{ [prometheus_openstack_exporter_disabled_volume, prometheus_openstack_exporter_disabled_dns, prometheus_openstack_exporter_disabled_object, prometheus_openstack_exporter_disabled_lb|trim]|join(' ')|trim }}" + +prometheus_cadvisor_cmdline_extras: "--docker_only --store_container_labels=false --disable_metrics=percpu,referenced_memory,cpu_topology,resctrl,udp,advtcp,sched,hugetlb,memory_numa,tcp,process" diff --git a/ansible/roles/prometheus/templates/prometheus-cadvisor.json.j2 b/ansible/roles/prometheus/templates/prometheus-cadvisor.json.j2 index 58070b943a..47240535d7 100644 --- a/ansible/roles/prometheus/templates/prometheus-cadvisor.json.j2 +++ b/ansible/roles/prometheus/templates/prometheus-cadvisor.json.j2 @@ -1,5 +1,5 @@ { - "command": "/opt/cadvisor --port={{ prometheus_cadvisor_port }} --log_dir=/var/log/kolla/prometheus", + "command": "/opt/cadvisor --port={{ prometheus_cadvisor_port }} --log_dir=/var/log/kolla/prometheus {{ prometheus_cadvisor_cmdline_extras }}", "config_files": [], "permissions": [ { diff --git a/etc/kolla/globals.yml b/etc/kolla/globals.yml index 00abf88819..4194c6f490 100644 --- a/etc/kolla/globals.yml +++ b/etc/kolla/globals.yml @@ -696,6 +696,10 @@ # List of extra parameters passed to prometheus. You can add as many to the list. #prometheus_cmdline_extras: +# List of extra parameters passed to cAdvisor. By default system cgroups +# and container labels are not exposed to reduce time series cardinality. +#prometheus_cadvisor_cmdline_extras: "--docker_only --store_container_labels=false --disable_metrics=percpu,referenced_memory,cpu_topology,resctrl,udp,advtcp,sched,hugetlb,memory_numa,tcp,process" + # Example of setting endpoints for prometheus ceph mgr exporter. # You should add all ceph mgr's in your external ceph deployment. #prometheus_ceph_mgr_exporter_endpoints: diff --git a/releasenotes/notes/reduce-cadvisor-cardinality-1213854b9fe0c828.yaml b/releasenotes/notes/reduce-cadvisor-cardinality-1213854b9fe0c828.yaml new file mode 100644 index 0000000000..a76a33a741 --- /dev/null +++ b/releasenotes/notes/reduce-cadvisor-cardinality-1213854b9fe0c828.yaml @@ -0,0 +1,19 @@ +--- +features: + - | + Adds support for passing extra runtime options to cAdvisor via + ``prometheus_cadvisor_cmdline_extras`` new variable. By default + system cgroups' metrics are disabled, plus container labels + don't get exposed to Prometheus. Expensive metrics that usually + should not be exported are also enforced to be disabled - consult + ``_ + for a list. These defaults create savings in resources usage by both + cAdvisor and Prometheus. +upgrade: + - | + cAdvisor has now reduced number of Prometheus metrics and labels + exported by default. This means that corresponding timeseries will + no longer be created. If existing setup relies on these, eg. for + visualisation or alerting, they could be explicitly enabled prior to + upgrading with the ``prometheus_cadvisor_cmdline_extras`` new variable. + Reference for the possible options: ``_.