From 2e4db10e9b040f76d9b2aa86ee3396ae0e195133 Mon Sep 17 00:00:00 2001 From: Steve Wilkerson Date: Mon, 20 Aug 2018 13:29:40 -0500 Subject: [PATCH] Prometheus: Prune large unused time series metrics This begins to drop metrics from Prometheus scrape configurations. The metrics dropped are metrics not currently used by any service that interacts with Prometheus and are not used in any alerting rules by default. Dropping these metrics reduces the resource use by Prometheus, as it reduces the total number of time series data ingested and analyzed by Prometheus Change-Id: Ia09ddd482da0119167a19e7e4b092879b672c2ec --- prometheus/values.yaml | 194 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 194 insertions(+) diff --git a/prometheus/values.yaml b/prometheus/values.yaml index 4ce4115d3..a3e63f482 100644 --- a/prometheus/values.yaml +++ b/prometheus/values.yaml @@ -568,6 +568,171 @@ conf: regex: (.+) target_label: __metrics_path__ replacement: /api/v1/nodes/${1}/proxy/metrics/cadvisor + metric_relabel_configs: + - source_labels: + - __name__ + regex: 'container_network_tcp_usage_total' + action: drop + - source_labels: + - __name__ + regex: 'container_tasks_state' + action: drop + - source_labels: + - __name__ + regex: 'container_network_udp_usage_total' + action: drop + - source_labels: + - __name__ + regex: 'container_memory_failures_total' + action: drop + - source_labels: + - __name__ + regex: 'container_cpu_load_average_10s' + action: drop + - source_labels: + - __name__ + regex: 'container_cpu_system_seconds_total' + action: drop + - source_labels: + - __name__ + regex: 'container_cpu_user_seconds_total' + action: drop + - source_labels: + - __name__ + regex: 'container_fs_inodes_free' + action: drop + - source_labels: + - __name__ + regex: 'container_fs_inodes_total' + action: drop + - source_labels: + - __name__ + regex: 'container_fs_io_current' + action: drop + - source_labels: + - __name__ + regex: 'container_fs_io_time_seconds_total' + action: drop + - source_labels: + - __name__ + regex: 'container_fs_io_time_weighted_seconds_total' + action: drop + - source_labels: + - __name__ + regex: 'container_fs_read_seconds_total' + action: drop + - source_labels: + - __name__ + regex: 'container_fs_reads_merged_total' + action: drop + - source_labels: + - __name__ + regex: 'container_fs_reads_merged_total' + action: drop + - source_labels: + - __name__ + regex: 'container_fs_reads_total' + action: drop + - source_labels: + - __name__ + regex: 'container_fs_sector_reads_total' + action: drop + - source_labels: + - __name__ + regex: 'container_fs_sector_writes_total' + action: drop + - source_labels: + - __name__ + regex: 'container_fs_write_seconds_total' + action: drop + - source_labels: + - __name__ + regex: 'container_fs_writes_bytes_total' + action: drop + - source_labels: + - __name__ + regex: 'container_fs_writes_merged_total' + action: drop + - source_labels: + - __name__ + regex: 'container_fs_writes_total' + action: drop + - source_labels: + - __name__ + regex: 'container_last_seen' + action: drop + - source_labels: + - __name__ + regex: 'container_memory_cache' + action: drop + - source_labels: + - __name__ + regex: 'container_memory_failcnt' + action: drop + - source_labels: + - __name__ + regex: 'container_memory_max_usage_bytes' + action: drop + - source_labels: + - __name__ + regex: 'container_memory_rss' + action: drop + - source_labels: + - __name__ + regex: 'container_memory_swap' + action: drop + - source_labels: + - __name__ + regex: 'container_memory_usage_bytes' + action: drop + - source_labels: + - __name__ + regex: 'container_network_receive_errors_total' + action: drop + - source_labels: + - __name__ + regex: 'container_network_receive_packets_dropped_total' + action: drop + - source_labels: + - __name__ + regex: 'container_network_receive_packets_total' + action: drop + - source_labels: + - __name__ + regex: 'container_network_transmit_errors_total' + action: drop + - source_labels: + - __name__ + regex: 'container_network_transmit_packets_dropped_total' + action: drop + - source_labels: + - __name__ + regex: 'container_network_transmit_packets_total' + action: drop + - source_labels: + - __name__ + regex: 'container_spec_cpu_period' + action: drop + - source_labels: + - __name__ + regex: 'container_spec_cpu_shares' + action: drop + - source_labels: + - __name__ + regex: 'container_spec_memory_limit_bytes' + action: drop + - source_labels: + - __name__ + regex: 'container_spec_memory_reservation_limit_bytes' + action: drop + - source_labels: + - __name__ + regex: 'container_spec_memory_swap_limit_bytes' + action: drop + - source_labels: + - __name__ + regex: 'container_start_time_seconds' + action: drop # Scrape config for API servers. # # Kubernetes exposes API servers as endpoints to the default/kubernetes @@ -608,6 +773,35 @@ conf: - __meta_kubernetes_endpoint_port_name action: keep regex: default;kubernetes;https + metric_relabel_configs: + - source_labels: + - __name__ + regex: 'apiserver_admission_controller_admission_latencies_seconds_bucket' + action: drop + - source_labels: + - __name__ + regex: 'rest_client_request_latency_seconds_bucket' + action: drop + - source_labels: + - __name__ + regex: 'apiserver_response_sizes_bucket' + action: drop + - source_labels: + - __name__ + regex: 'apiserver_admission_step_admission_latencies_seconds_bucket' + action: drop + - source_labels: + - __name__ + regex: 'apiserver_admission_controller_admission_latencies_seconds_count' + action: drop + - source_labels: + - __name__ + regex: 'apiserver_admission_controller_admission_latencies_seconds_sum' + action: drop + - source_labels: + - __name__ + regex: 'apiserver_request_latencies_summary' + action: drop # Scrape config for service endpoints. # # The relabeling allows the actual service scrape endpoint to be configured