openstack-helm-infra/prometheus-alertmanager/values.yaml

# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Default values for alertmanager.
# This is a YAML-formatted file.
# Declare name/value pairs to be passed into your templates.
# name: value

---
images:
  tags:
    prometheus-alertmanager: docker.io/prom/alertmanager:v0.20.0
    snmpnotifier: docker.io/maxwo/snmp-notifier:v1.0.0
    dep_check: quay.io/airshipit/kubernetes-entrypoint:v1.0.0
    image_repo_sync: docker.io/docker:17.07.0
  pull_policy: IfNotPresent
  local_registry:
    active: false
    exclude:
      - dep_check
      - image_repo_sync

labels:
  alertmanager:
    node_selector_key: openstack-control-plane
    node_selector_value: enabled
  snmpnotifier:
    node_selector_key: openstack-control-plane
    node_selector_value: enabled
  job:
    node_selector_key: openstack-control-plane
    node_selector_value: enabled

pod:
  security_context:
    server:
      pod:
        runAsUser: 65534
      container:
        prometheus_alertmanager_perms:
          runAsUser: 0
          readOnlyRootFilesystem: true
        prometheus_alertmanager:
          allowPrivilegeEscalation: false
          readOnlyRootFilesystem: true
  affinity:
    anti:
      type:
        default: preferredDuringSchedulingIgnoredDuringExecution
      topologyKey:
        default: kubernetes.io/hostname
      weight:
        default: 10
  mounts:
    alertmanager:
      alertmanager:
      init_container: null
    snmpnotifier:
      snmpnotifier:
      init_container: null
  replicas:
    alertmanager: 1
    snmpnotifier: 1
  lifecycle:
    upgrades:
      deployment:
        pod_replacement_strategy: RollingUpdate
      statefulsets:
        pod_replacement_strategy: RollingUpdate
    termination_grace_period:
      alertmanager:
        timeout: 30
      snmpnotifier:
        timeout: 30
  resources:
    enabled: false
    alertmanager:
      limits:
        memory: "1024Mi"
        cpu: "2000m"
      requests:
        memory: "128Mi"
        cpu: "500m"
    jobs:
      image_repo_sync:
        requests:
          memory: "128Mi"
          cpu: "100m"
        limits:
          memory: "1024Mi"
          cpu: "2000m"
    snmpnotifier:
      limits:
        memory: "1024Mi"
        cpu: "2000m"
      requests:
        memory: "128Mi"
        cpu: "500m"

endpoints:
  cluster_domain_suffix: cluster.local
  local_image_registry:
    name: docker-registry
    namespace: docker-registry
    hosts:
      default: localhost
      internal: docker-registry
      node: localhost
    host_fqdn_override:
      default: null
    port:
      registry:
        node: 5000
  alertmanager:
    name: prometheus-alertmanager
    namespace: null
    hosts:
      default: alerts-engine
      public: prometheus-alertmanager
      discovery: prometheus-alertmanager-discovery
    host_fqdn_override:
      default: null
      # NOTE(srwilkers): this chart supports TLS for fqdn over-ridden public
      # endpoints using the following format:
      # public:
      #   host: null
      #   tls:
      #     crt: null
      #     key: null
    path:
      default: null
    scheme:
      default: 'http'
    port:
      api:
        default: 9093
        public: 80
      mesh:
        default: 9094
  snmpnotifier:
    name: snmpnotifier
    namespace: null
    hosts:
      default: snmp-engine
    host_fqdn_override:
      default: null
    path:
      default: /alerts
    scheme:
      default: 'http'
    port:
      api:
        default: 9464
  alerta:
    name: alerta
    namespace: null
    hosts:
      default: alerta
    host_fqdn_override:
      default: null
    path:
      default: /api/webhooks/prometheus
    scheme:
      default: 'http'
    port:
      api:
        default: 8080

dependencies:
  dynamic:
    common:
      local_image_registry:
        jobs:
          - alertmanager-image-repo-sync
        services:
          - endpoint: node
            service: local_image_registry
  static:
    alertmanager:
      services: null
    image_repo_sync:
      services:
        - endpoint: internal
          service: local_image_registry

network:
  alertmanager:
    ingress:
      public: true
      classes:
        namespace: "nginx"
        cluster: "nginx-cluster"
      annotations:
        nginx.ingress.kubernetes.io/rewrite-target: /
    node_port:
      enabled: false
      port: 30903
  snmpnotifier:
    node_port:
      enabled: false
      port: 30464

secrets:
  tls:
    alertmanager:
      alertmanager:
        public: alerts-tls-public

storage:
  alertmanager:
    enabled: true
    pvc:
      access_mode: ["ReadWriteOnce"]
    requests:
      storage: 5Gi
    storage_class: general

manifests:
  clusterrolebinding: true
  configmap_bin: true
  configmap_etc: true
  ingress: true
  job_image_repo_sync: true
  network_policy: false
  secret_ingress_tls: true
  service: true
  service_discovery: true
  service_ingress: true
  statefulset: true
  snmpnotifier:
    service: true
    deployment: true

network_policy:
  alertmanager:
    ingress:
      - {}
    egress:
      - {}

conf:
  command_flags:
    alertmanager:
      storage:
        path: /var/lib/alertmanager/data
      cluster:
        listen_address: "0.0.0.0:9094"
    snmpnotifier:
      alert_default_severity: crititcal
      alert_severities: "critical,warning,info,page"
      alert_severity_label: severity
      log_level: debug
      snmp_community: public
      snmp_desination: 127.0.0.1:162
      snmp_trap_default_oid: 1.3.6.1.4.1.98789.0.1
      snmp_trap_description_template: /etc/snmp_notifier/description-template.tpl
      snmp_version: V2c
  alertmanager: |
    global:
      # The smarthost and SMTP sender used for mail notifications.
      smtp_smarthost: 'localhost:25'
      smtp_from: 'alertmanager@example.org'
      smtp_auth_username: 'alertmanager'
      smtp_auth_password: 'password'
      # The auth token for Hipchat.
      hipchat_auth_token: '1234556789'
      # Alternative host for Hipchat.
      hipchat_api_url: 'https://hipchat.foobar.org/'
    # The directory from which notification templates are read.
    templates:
      - '/etc/alertmanager/template/*.tmpl'
    # The root route on which each incoming alert enters.
    route:
      # The labels by which incoming alerts are grouped together. For example,
      # multiple alerts coming in for cluster=A and alertname=LatencyHigh would
      # be batched into a single group.
      group_by:
        - alertname
        - cluster
        - service
      # When a new group of alerts is created by an incoming alert, wait at
      # least 'group_wait' to send the initial notification.
      # This way ensures that you get multiple alerts for the same group that start
      # firing shortly after another are batched together on the first
      # notification.
      group_wait: 30s
      # When the first notification was sent, wait 'group_interval' to send a batch
      # of new alerts that started firing for that group.
      group_interval: 5m
      # If an alert has successfully been sent, wait 'repeat_interval' to
      # resend them.
      repeat_interval: 3h
      # A default receiver
      # receiver: team-X-mails
      receiver: snmp_notifier
      # All the above attributes are inherited by all child routes and can
      # overwritten on each.
      # The child route trees.
      routes:
        # This routes performs a regular expression match on alert
        # labels to catch alerts that are related to a list of
        # services.
        - receiver: "alerta"
          continue: true
        - receiver: "snmp_notifier"
          continue: true
        - match_re:
            service: ^(foo1|foo2|baz)$
          receiver: team-X-mails
          # The service has a sub-route for critical alerts, any alerts
          # that do not match, i.e. severity != critical, fall-back to the
          # parent node and are sent to 'team-X-mails'
          routes:
            - match:
                severity: critical
              receiver: team-X-pager
        - match:
            service: files
          receiver: team-Y-mails
          routes:
            - match:
                severity: critical
              receiver: team-Y-pager
        # This route handles all alerts coming from a database service. If there's
        # no team to handle it, it defaults to the DB team.
        - match:
            service: database
          receiver: team-DB-pager
          # Also group alerts by affected database.
          group_by:
            - alertname
            - cluster
            - database
          routes:
            - match:
                owner: team-X
              receiver: team-X-pager
            - match:
                owner: team-Y
              receiver: team-Y-pager
    # Inhibition rules allow to mute a set of alerts given that another alert is
    # firing.
    # We use this to mute any warning-level notifications if the same alert is
    # already critical.
    inhibit_rules:
      - source_match:
          severity: 'critical'
        target_match:
          severity: 'warning'
        # Apply inhibition if the alertname is the same.
        equal:
          - alertname
          - cluster
          - service
    receivers:
      - name: 'snmp_notifier'
        webhook_configs:
        - send_resolved: true
          #url: http://snmp-engine.osh-infra.svc.cluster.local:9464/alerts
          url: {{ tuple "snmpnotifier" "internal" "api" . | include "helm-toolkit.endpoints.keystone_endpoint_uri_lookup" }}
      - name: 'alerta'
        webhook_configs:
        - send_resolved: true
          #url: 'http://alerta:8080/api/webhooks/prometheus'
          url: {{ tuple "alerta" "internal" "api" . | include "helm-toolkit.endpoints.keystone_endpoint_uri_lookup" }}
          http_config:
            basic_auth:
              username: admin
              password: changeme
      - name: 'team-X-mails'
        email_configs:
          - to: 'team-X+alerts@example.org'
      - name: 'team-X-pager'
        email_configs:
          - to: 'team-X+alerts-critical@example.org'
        pagerduty_configs:
          - service_key: <team-X-key>
      - name: 'team-Y-mails'
        email_configs:
          - to: 'team-Y+alerts@example.org'
      - name: 'team-Y-pager'
        pagerduty_configs:
          - service_key: <team-Y-key>
      - name: 'team-DB-pager'
        pagerduty_configs:
          - service_key: <team-DB-key>
      - name: 'team-X-hipchat'
        hipchat_configs:
          - auth_token: <auth_token>
            room_id: 85
            message_format: html
            notify: false
  alert_templates: null
...