Prometheus: Status Alerts Scalar/Vector Conversion

This change converts alert expressions which relied on instant vectors
to use range aggregate functions instead.

Change-Id: I4df757f961524bed23b6a6ad361779c1749ca2c5
Co-Authored-By: Meghan Heisler <mkheisler93@gmail.com>
This commit is contained in:
Steven Fitzpatrick 2019-11-08 11:20:29 -06:00
parent aa48b16896
commit d408bed90d
5 changed files with 36 additions and 16 deletions

View File

@ -3,11 +3,11 @@ conf:
rules: rules:
elasticsearch: elasticsearch:
groups: groups:
- name: elasticsearch.rules - name: elasticsearch.alerting_rules
rules: rules:
- alert: prom_exporter_elasticsearch_unavailable - alert: prom_exporter_elasticsearch_unavailable
expr: absent(elasticsearch_cluster_health_status) expr: avg_over_time(up{job="elasticsearch-exporter"}[5m]) == 0
for: 10m for: 5m
labels: labels:
severity: warning severity: warning
annotations: annotations:
@ -85,3 +85,15 @@ conf:
annotations: annotations:
description: 'There are only {{$value}} < 3 ElasticSearch data nodes running' description: 'There are only {{$value}} < 3 ElasticSearch data nodes running'
summary: 'ElasticSearch running on less than 3 data nodes' summary: 'ElasticSearch running on less than 3 data nodes'
fluentd:
groups:
- name: fluentd.alerting_rules
rules:
- alert: prom_exporter_fluentd_unavailable
expr: avg_over_time(up{job="fluentd-daemonset-exporter"}[5m]) == 0
for: 5m
labels:
severity: warning
annotations:
description: Fluentd exporter is not collecting metrics or is not available for past 10 minutes
title: Fluentd exporter is not collecting metrics or is not available

View File

@ -6,7 +6,7 @@ conf:
- name: calico.rules - name: calico.rules
rules: rules:
- alert: prom_exporter_calico_unavailable - alert: prom_exporter_calico_unavailable
expr: absent(felix_host) expr: avg_over_time(up{job="kubernetes-pods",application="calico"}[5m]) == 0
for: 10m for: 10m
labels: labels:
severity: warning severity: warning
@ -250,8 +250,8 @@ conf:
- name: kubernetes-object.rules - name: kubernetes-object.rules
rules: rules:
- alert: prom_exporter_kube_state_metrics_unavailable - alert: prom_exporter_kube_state_metrics_unavailable
expr: absent(kube_node_info) expr: avg_over_time(up{job="kube-state-metrics"}[5m]) == 0
for: 10m for: 5m
labels: labels:
severity: warning severity: warning
annotations: annotations:

View File

@ -6,8 +6,8 @@ conf:
- name: nodes.rules - name: nodes.rules
rules: rules:
- alert: prom_exporter_node_unavailable - alert: prom_exporter_node_unavailable
expr: absent(node_uname_info) expr: avg_over_time(up{job="node-exporter"}[5m]) == 0
for: 10m for: 5m
labels: labels:
severity: warning severity: warning
annotations: annotations:

View File

@ -5,13 +5,21 @@ conf:
groups: groups:
- name: mariadb.rules - name: mariadb.rules
rules: rules:
- alert: prom_exporter_mariadb_unavailable - alert: prom_exporter_mariadb_openstack_unavailable
expr: absent(mysql_up) expr: avg_over_time(up{job="mysql-exporter",kubernetes_namespace="openstack"}[5m]) == 0
for: 10m for: 5m
labels: labels:
severity: warning severity: warning
annotations: annotations:
description: MariaDB exporter is not collecting metrics or is not available for past 10 minutes description: MariaDB exporter in {{ $labels.kubernetes_namespace }} is not collecting metrics or is not available for past 10 minutes
title: MariaDB exporter is not collecting metrics or is not available
- alert: prom_exporter_mariadb_osh_infra_unavailable
expr: avg_over_time(up{job="mysql-exporter",kubernetes_namespace="osh-infra"}[5m]) == 0
for: 5m
labels:
severity: warning
annotations:
description: MariaDB exporter in {{ $labels.kubernetes_namespace }} is not collecting metrics or is not available for past 10 minutes
title: MariaDB exporter is not collecting metrics or is not available title: MariaDB exporter is not collecting metrics or is not available
- alert: mariadb_table_lock_wait_high - alert: mariadb_table_lock_wait_high
expr: 100 * mysql_global_status_table_locks_waited/(mysql_global_status_table_locks_waited + mysql_global_status_table_locks_immediate) > 30 expr: 100 * mysql_global_status_table_locks_waited/(mysql_global_status_table_locks_waited + mysql_global_status_table_locks_immediate) > 30
@ -48,8 +56,8 @@ conf:
- name: openstack.rules - name: openstack.rules
rules: rules:
- alert: prom_exporter_openstack_unavailable - alert: prom_exporter_openstack_unavailable
expr: absent(openstack_exporter_cache_refresh_duration_seconds) expr: avg_over_time(up{job="openstack-metrics"}[5m]) == 0
for: 10m for: 5m
labels: labels:
severity: warning severity: warning
annotations: annotations:

View File

@ -6,8 +6,8 @@ conf:
- name: postgresql.rules - name: postgresql.rules
rules: rules:
- alert: prom_exporter_postgresql_unavailable - alert: prom_exporter_postgresql_unavailable
expr: absent(pg_static) expr: avg_over_time(up{job="postgresql-exporter"}[5m]) == 0
for: 10m for: 5m
labels: labels:
severity: warning severity: warning
annotations: annotations: