diff --git a/prometheus/values_overrides/elasticsearch.yaml b/prometheus/values_overrides/elasticsearch.yaml index ca185a2e1..d009eba1e 100644 --- a/prometheus/values_overrides/elasticsearch.yaml +++ b/prometheus/values_overrides/elasticsearch.yaml @@ -3,11 +3,11 @@ conf: rules: elasticsearch: groups: - - name: elasticsearch.rules + - name: elasticsearch.alerting_rules rules: - alert: prom_exporter_elasticsearch_unavailable - expr: absent(elasticsearch_cluster_health_status) - for: 10m + expr: avg_over_time(up{job="elasticsearch-exporter"}[5m]) == 0 + for: 5m labels: severity: warning annotations: @@ -85,3 +85,15 @@ conf: annotations: description: 'There are only {{$value}} < 3 ElasticSearch data nodes running' summary: 'ElasticSearch running on less than 3 data nodes' + fluentd: + groups: + - name: fluentd.alerting_rules + rules: + - alert: prom_exporter_fluentd_unavailable + expr: avg_over_time(up{job="fluentd-daemonset-exporter"}[5m]) == 0 + for: 5m + labels: + severity: warning + annotations: + description: Fluentd exporter is not collecting metrics or is not available for past 10 minutes + title: Fluentd exporter is not collecting metrics or is not available diff --git a/prometheus/values_overrides/kubernetes.yaml b/prometheus/values_overrides/kubernetes.yaml index 638722a82..fb4b75325 100644 --- a/prometheus/values_overrides/kubernetes.yaml +++ b/prometheus/values_overrides/kubernetes.yaml @@ -6,7 +6,7 @@ conf: - name: calico.rules rules: - alert: prom_exporter_calico_unavailable - expr: absent(felix_host) + expr: avg_over_time(up{job="kubernetes-pods",application="calico"}[5m]) == 0 for: 10m labels: severity: warning @@ -250,8 +250,8 @@ conf: - name: kubernetes-object.rules rules: - alert: prom_exporter_kube_state_metrics_unavailable - expr: absent(kube_node_info) - for: 10m + expr: avg_over_time(up{job="kube-state-metrics"}[5m]) == 0 + for: 5m labels: severity: warning annotations: diff --git a/prometheus/values_overrides/nodes.yaml b/prometheus/values_overrides/nodes.yaml index dbde76075..81497bf66 100644 --- a/prometheus/values_overrides/nodes.yaml +++ b/prometheus/values_overrides/nodes.yaml @@ -6,8 +6,8 @@ conf: - name: nodes.rules rules: - alert: prom_exporter_node_unavailable - expr: absent(node_uname_info) - for: 10m + expr: avg_over_time(up{job="node-exporter"}[5m]) == 0 + for: 5m labels: severity: warning annotations: diff --git a/prometheus/values_overrides/openstack.yaml b/prometheus/values_overrides/openstack.yaml index 4c38a6a5d..da8e6702e 100644 --- a/prometheus/values_overrides/openstack.yaml +++ b/prometheus/values_overrides/openstack.yaml @@ -5,13 +5,21 @@ conf: groups: - name: mariadb.rules rules: - - alert: prom_exporter_mariadb_unavailable - expr: absent(mysql_up) - for: 10m + - alert: prom_exporter_mariadb_openstack_unavailable + expr: avg_over_time(up{job="mysql-exporter",kubernetes_namespace="openstack"}[5m]) == 0 + for: 5m labels: severity: warning annotations: - description: MariaDB exporter is not collecting metrics or is not available for past 10 minutes + description: MariaDB exporter in {{ $labels.kubernetes_namespace }} is not collecting metrics or is not available for past 10 minutes + title: MariaDB exporter is not collecting metrics or is not available + - alert: prom_exporter_mariadb_osh_infra_unavailable + expr: avg_over_time(up{job="mysql-exporter",kubernetes_namespace="osh-infra"}[5m]) == 0 + for: 5m + labels: + severity: warning + annotations: + description: MariaDB exporter in {{ $labels.kubernetes_namespace }} is not collecting metrics or is not available for past 10 minutes title: MariaDB exporter is not collecting metrics or is not available - alert: mariadb_table_lock_wait_high expr: 100 * mysql_global_status_table_locks_waited/(mysql_global_status_table_locks_waited + mysql_global_status_table_locks_immediate) > 30 @@ -48,8 +56,8 @@ conf: - name: openstack.rules rules: - alert: prom_exporter_openstack_unavailable - expr: absent(openstack_exporter_cache_refresh_duration_seconds) - for: 10m + expr: avg_over_time(up{job="openstack-metrics"}[5m]) == 0 + for: 5m labels: severity: warning annotations: diff --git a/prometheus/values_overrides/postgresql.yaml b/prometheus/values_overrides/postgresql.yaml index 9e83ee92a..22fe481e1 100644 --- a/prometheus/values_overrides/postgresql.yaml +++ b/prometheus/values_overrides/postgresql.yaml @@ -6,8 +6,8 @@ conf: - name: postgresql.rules rules: - alert: prom_exporter_postgresql_unavailable - expr: absent(pg_static) - for: 10m + expr: avg_over_time(up{job="postgresql-exporter"}[5m]) == 0 + for: 5m labels: severity: warning annotations: