openstack-helm-infra/prometheus/values_overrides/alertmanager.yaml
Steven Fitzpatrick fb7fc87d23 Prometheus: Render Rules as Templates
This change allows us to substitute values into our rules files.

Example:

- alert: my_region_is_down
  expr: up{region="{{ $my_region }}"} == 0
  
To support this change, rule annotations that used the expansion
{{ $labels.foo }} had to be surrounded with "{{` ... `}}" to render
correctly.

Change-Id: Ia7ac891de8261acca62105a3e2636bd747a5fbea
2020-08-10 18:16:35 +00:00

34 lines
1.6 KiB
YAML

---
conf:
prometheus:
rules:
alertmanager:
groups:
- name: alertmanager.rules
rules:
- alert: AlertmanagerConfigInconsistent
expr: count_values("config_hash", alertmanager_config_hash) BY (service) / ON(service) GROUP_LEFT() label_replace(prometheus_operator_alertmanager_spec_replicas, "service", "alertmanager-$1", "alertmanager", "(.*)") != 1
for: 5m
labels:
severity: critical
annotations:
description: "{{`The configuration of the instances of the Alertmanager cluster {{$labels.service}} are out of sync.`}}"
summary: Alertmanager configurations are inconsistent
- alert: AlertmanagerDownOrMissing
expr: label_replace(prometheus_operator_alertmanager_spec_replicas, "job", "alertmanager-$1", "alertmanager", "(.*)") / ON(job) GROUP_RIGHT() sum(up) BY (job) != 1
for: 5m
labels:
severity: warning
annotations:
description: "{{`An unexpected number of Alertmanagers are scraped or Alertmanagers disappeared from discovery.`}}"
summary: Alertmanager down or not discovered
- alert: FailedReload
expr: alertmanager_config_last_reload_successful == 0
for: 10m
labels:
severity: warning
annotations:
description: "{{`Reloading Alertmanager's configuration has failed for {{ $labels.namespace }}/{{ $labels.pod }}.`}}"
summary: Alertmanager configuration reload has failed
...