From 1596475db6249911bc61fcf218b66cf850b657fc Mon Sep 17 00:00:00 2001 From: Jorge Niedbalski Date: Fri, 1 Jun 2018 17:03:58 -0400 Subject: [PATCH] [prometheus] Initial implementation of prometheus-alertmanager This patch extends the prometheus role for being able to deploy the prometheus-alertmanager[0] container. The variable enable_prometheus_alertmanager decides if the container should be deployed and enabled. If enabled, the following configuration and actions are performed: - The alerting section on the prometheus-server configuration is added pointing the prometheus-alertmanager host group as targets. - HAProxy is configured to load-balance over the prometheus-alertmanager host group. (external/internal). Please note that a default (dummy) configuration is provided, that allows the service to start, the operator should extend it via a node custom config [0] https://github.com/openstack/kolla/tree/master/docker/prometheus/prometheus-alertmanager Change-Id: I3a13342c67744a278cc8d52900a913c3ccc452ae Closes-Bug: 1774725 Signed-off-by: Jorge Niedbalski --- ansible/group_vars/all.yml | 5 +++ ansible/inventory/all-in-one | 3 ++ ansible/inventory/multinode | 3 ++ .../roles/haproxy/templates/haproxy.cfg.j2 | 32 +++++++++++++++++++ ansible/roles/prometheus/defaults/main.yml | 14 ++++++++ ansible/roles/prometheus/handlers/main.yml | 19 +++++++++++ ansible/roles/prometheus/tasks/config.yml | 16 ++++++++++ ansible/roles/prometheus/tasks/precheck.yml | 16 ++++++++++ .../templates/prometheus-alertmanager.json.j2 | 23 +++++++++++++ .../templates/prometheus-alertmanager.yml.j2 | 11 +++++++ .../prometheus/templates/prometheus.yml.j2 | 10 ++++++ etc/kolla/globals.yml | 2 ++ etc/kolla/passwords.yml | 1 + ...metheus-alertmanager-dd6d38da2357b917.yaml | 5 +++ 14 files changed, 160 insertions(+) create mode 100644 ansible/roles/prometheus/templates/prometheus-alertmanager.json.j2 create mode 100644 ansible/roles/prometheus/templates/prometheus-alertmanager.yml.j2 create mode 100644 releasenotes/notes/prometheus-alertmanager-dd6d38da2357b917.yaml diff --git a/ansible/group_vars/all.yml b/ansible/group_vars/all.yml index bc4302e29a..8cd6b19c52 100644 --- a/ansible/group_vars/all.yml +++ b/ansible/group_vars/all.yml @@ -278,6 +278,9 @@ prometheus_memcached_exporter_port: "9150" # Default cadvisor port of 8080 already in use prometheus_cadvisor_port: "18080" +# Prometheus alertmanager ports +prometheus_alertmanager_port: "9093" +prometheus_alertmanager_cluster_port: "9094" qdrouterd_port: "31459" rabbitmq_port: "5672" @@ -834,3 +837,5 @@ enable_prometheus_mysqld_exporter: "{{ enable_mariadb | bool }}" enable_prometheus_node_exporter: "{{ enable_prometheus | bool }}" enable_prometheus_memcached_exporter: "{{ enable_memcached | bool }}" enable_prometheus_cadvisor: "{{ enable_prometheus | bool }}" +enable_prometheus_alertmanager: "{{ enable_prometheus | bool }}" +prometheus_alertmanager_user: "admin" diff --git a/ansible/inventory/all-in-one b/ansible/inventory/all-in-one index 845b5c03ad..ee1da6efe5 100644 --- a/ansible/inventory/all-in-one +++ b/ansible/inventory/all-in-one @@ -688,3 +688,6 @@ control compute network storage + +[prometheus-alertmanager:children] +monitoring diff --git a/ansible/inventory/multinode b/ansible/inventory/multinode index 1466025b75..1caccd0709 100644 --- a/ansible/inventory/multinode +++ b/ansible/inventory/multinode @@ -707,3 +707,6 @@ control compute network storage + +[prometheus-alertmanager:children] +monitoring diff --git a/ansible/roles/haproxy/templates/haproxy.cfg.j2 b/ansible/roles/haproxy/templates/haproxy.cfg.j2 index 8ae2c546e4..fffe96fa89 100644 --- a/ansible/roles/haproxy/templates/haproxy.cfg.j2 +++ b/ansible/roles/haproxy/templates/haproxy.cfg.j2 @@ -1397,3 +1397,35 @@ listen opendaylight_websocket {% endfor %} {% endif %} + +{% if enable_prometheus_alertmanager | bool %} + +userlist prometheus-alertmanager-user + user {{ prometheus_alertmanager_user }} insecure-password {{ prometheus_alertmanager_password }} + +listen prometheus_alertmanager + bind {{ kolla_internal_vip_address }}:{{ prometheus_alertmanager_port }} + acl auth_acl http_auth(prometheus-alertmanager-user) + http-request auth realm basicauth unless auth_acl +{% for http_option in haproxy_listen_http_extra %} + {{ http_option }} +{% endfor %} +{% for host in groups['prometheus-alertmanager'] %} + server {{ hostvars[host]['ansible_hostname'] }} {{ hostvars[host]['ansible_' + hostvars[host]['api_interface']]['ipv4']['address'] }}:{{ prometheus_alertmanager_port }} check inter 2000 rise 2 fall 5 +{% endfor %} +{% if haproxy_enable_external_vip | bool %} + +listen prometheus_alertmanager_external + bind {{ kolla_external_vip_address }}:{{ prometheus_alertmanager_port }} {{ tls_bind_info }} + http-request del-header X-Forwarded-Proto + http-request set-header X-Forwarded-Proto https if { ssl_fc } + acl auth_acl http_auth(prometheus-alertmanager-user) + http-request auth realm basicauth unless auth_acl +{% for http_option in haproxy_listen_http_extra %} + {{ http_option }} +{% endfor %} +{% for host in groups['prometheus-alertmanager'] %} + server {{ hostvars[host]['ansible_hostname'] }} {{ hostvars[host]['ansible_' + hostvars[host]['api_interface']]['ipv4']['address'] }}:{{ prometheus_alertmanager_port }} check inter 2000 rise 2 fall 5 +{% endfor %} +{% endif %} +{% endif %} diff --git a/ansible/roles/prometheus/defaults/main.yml b/ansible/roles/prometheus/defaults/main.yml index 2c6db50537..41179d66f5 100644 --- a/ansible/roles/prometheus/defaults/main.yml +++ b/ansible/roles/prometheus/defaults/main.yml @@ -66,6 +66,16 @@ prometheus_services: - "/sys:/sys:ro" - "/varlib/docker/:/var/lib/docker:ro" - "/dev/disk/:/dev/disk:ro" + prometheus-alertmanager: + container_name: "prometheus_alertmanager" + group: "prometheus-alertmanager" + enabled: "{{ enable_prometheus_alertmanager | bool }}" + image: "{{ prometheus_alertmanager_image_full }}" + volumes: + - "{{ node_config_directory }}/prometheus-alertmanager/:{{ container_config_directory }}/:ro" + - "/etc/localtime:/etc/localtime:ro" + - "kolla_logs:/var/log/kolla/" + - "prometheus:/var/lib/prometheus" #################### # Database @@ -101,3 +111,7 @@ prometheus_memcached_exporter_image_full: "{{ prometheus_memcached_exporter_imag prometheus_cadvisor_image: "{{ docker_registry ~ '/' if docker_registry else '' }}{{ docker_namespace }}/{{ kolla_base_distro }}-{{ kolla_install_type }}-prometheus-cadvisor" prometheus_cadvisor_tag: "{{ openstack_release }}" prometheus_cadvisor_image_full: "{{ prometheus_cadvisor_image }}:{{ prometheus_cadvisor_tag }}" + +prometheus_alertmanager_image: "{{ docker_registry ~ '/' if docker_registry else '' }}{{ docker_namespace }}/{{ kolla_base_distro }}-{{ kolla_install_type }}-prometheus-alertmanager" +prometheus_alertmanager_tag: "{{ openstack_release }}" +prometheus_alertmanager_image_full: "{{ prometheus_alertmanager_image }}:{{ prometheus_alertmanager_tag }}" diff --git a/ansible/roles/prometheus/handlers/main.yml b/ansible/roles/prometheus/handlers/main.yml index 3801e1a0d4..bcac309d5b 100644 --- a/ansible/roles/prometheus/handlers/main.yml +++ b/ansible/roles/prometheus/handlers/main.yml @@ -120,3 +120,22 @@ - service.enabled | bool - config_json.changed | bool or prometheus_container.changed | bool + +- name: Restart prometheus-alertmanager container + vars: + service_name: "prometheus-alertmanager" + service: "{{ prometheus_services[service_name] }}" + config_json: "{{ prometheus_config_jsons.results|selectattr('item.key', 'equalto', service_name)|first }}" + prometheus_container: "{{ check_prometheus_containers.results|selectattr('item.key', 'equalto', service_name)|first }}" + kolla_docker: + action: "recreate_or_restart_container" + common_options: "{{ docker_common_options }}" + name: "{{ service.container_name }}" + image: "{{ service.image }}" + volumes: "{{ service.volumes }}" + when: + - kolla_action != "config" + - inventory_hostname in groups[service.group] + - service.enabled | bool + - config_json.changed | bool + or prometheus_container.changed | bool diff --git a/ansible/roles/prometheus/tasks/config.yml b/ansible/roles/prometheus/tasks/config.yml index 67f77ded7d..a1b075b870 100644 --- a/ansible/roles/prometheus/tasks/config.yml +++ b/ansible/roles/prometheus/tasks/config.yml @@ -41,6 +41,22 @@ notify: - Restart prometheus-server container +- name: Copying over prometheus alertmanager config file + vars: + service: "{{ prometheus_services['prometheus-alertmanager']}}" + merge_yaml: + sources: + - "{{ node_custom_config }}/prometheus/{{ inventory_hostname }}/prometheus-alertmanager.yml" + - "{{ node_custom_config }}/prometheus/prometheus-alertmanager.yml" + - "{{ role_path }}/templates/prometheus-alertmanager.yml.j2" + dest: "{{ node_config_directory }}/prometheus-alertmanager/prometheus-alertmanager.yml" + register: prometheus_confs + when: + - inventory_hostname in groups[service.group] + - service.enabled | bool + notify: + - Restart prometheus-alertmanager container + - name: Copying over my.cnf for mysqld_exporter vars: service: "{{ prometheus_services['prometheus-mysqld-exporter']}}" diff --git a/ansible/roles/prometheus/tasks/precheck.yml b/ansible/roles/prometheus/tasks/precheck.yml index 1a76cd9b66..abfd368e91 100644 --- a/ansible/roles/prometheus/tasks/precheck.yml +++ b/ansible/roles/prometheus/tasks/precheck.yml @@ -7,6 +7,7 @@ - prometheus_haproxy_exporter - prometheus_mysqld_exporter - prometheus_cadvisor + - prometheus_alertmanager register: container_facts - name: Checking free port for Prometheus server @@ -79,3 +80,18 @@ - container_facts['prometheus_cadvisor'] is not defined - inventory_hostname in groups['prometheus-cadvisor'] - enable_prometheus_cadvisor | bool + +- name: Checking free ports for Prometheus Alertmanager + wait_for: + host: "{{ hostvars[inventory_hostname]['ansible_' + api_interface]['ipv4']['address'] }}" + port: "{{ item }}" + connect_timeout: 1 + timeout: 1 + state: stopped + when: + - container_facts['prometheus_alertmanager'] is not defined + - inventory_hostname in groups['prometheus-alertmanager'] + - enable_prometheus_alertmanager | bool + with_items: + - "{{ prometheus_alertmanager_port }}" + - "{{ prometheus_alertmanager_cluster_port }}" diff --git a/ansible/roles/prometheus/templates/prometheus-alertmanager.json.j2 b/ansible/roles/prometheus/templates/prometheus-alertmanager.json.j2 new file mode 100644 index 0000000000..31edb4e05b --- /dev/null +++ b/ansible/roles/prometheus/templates/prometheus-alertmanager.json.j2 @@ -0,0 +1,23 @@ +{ + "command": "/opt/prometheus_alertmanager/alertmanager --config.file=/etc/prometheus/alertmanager.yml --web.listen-address={{ api_interface_address }}:{{ prometheus_alertmanager_port }}{% if groups["prometheus-alertmanager"] | length > 1 %} --mesh.listen-address={{ api_interface_address }}:{{ prometheus_alertmanager_cluster_port }} {% for host in groups["prometheus-alertmanager"] %} --mesh.peer={{ hostvars[host]['ansible_' + hostvars[host]['api_interface']]['ipv4']['address'] }}:{{ hostvars[host]['prometheus_alertmanager_cluster_port'] }}{% endfor %}{% endif %} --storage.path /var/lib/prometheus", + "config_files": [ + { + "source": "{{ container_config_directory }}/prometheus-alertmanager.yml", + "dest": "/etc/prometheus/alertmanager.yml", + "owner": "prometheus", + "perm": "0600" + } + ], + "permissions": [ + { + "path": "/data", + "owner": "prometheus:kolla", + "recurse": true + }, + { + "path": "/var/log/kolla/prometheus", + "owner": "prometheus:kolla", + "recurse": true + } + ] +} diff --git a/ansible/roles/prometheus/templates/prometheus-alertmanager.yml.j2 b/ansible/roles/prometheus/templates/prometheus-alertmanager.yml.j2 new file mode 100644 index 0000000000..4918162978 --- /dev/null +++ b/ansible/roles/prometheus/templates/prometheus-alertmanager.yml.j2 @@ -0,0 +1,11 @@ +global: + resolve_timeout: 5m + smtp_require_tls: true +route: + receiver: default-receiver + group_wait: 10s + group_interval: 5m + repeat_interval: 3h +receivers: + - name: default-receiver +templates: [] diff --git a/ansible/roles/prometheus/templates/prometheus.yml.j2 b/ansible/roles/prometheus/templates/prometheus.yml.j2 index 77300676b5..9e43ee5374 100644 --- a/ansible/roles/prometheus/templates/prometheus.yml.j2 +++ b/ansible/roles/prometheus/templates/prometheus.yml.j2 @@ -57,3 +57,13 @@ scrape_configs: - '{{ hostvars[host]['ansible_' + hostvars[host]['api_interface']]['ipv4']['address'] }}:{{ hostvars[host]['prometheus_cadvisor_port'] }}' {% endfor %} {% endif %} + +{% if enable_prometheus_alertmanager | bool %} +alerting: + alertmanagers: + - static_configs: + - targets: +{% for host in groups["prometheus-alertmanager"] %} + - '{{ hostvars[host]['ansible_' + hostvars[host]['api_interface']]['ipv4']['address'] }}:{{ hostvars[host]['prometheus_alertmanager_port'] }}' +{% endfor %} +{% endif %} diff --git a/etc/kolla/globals.yml b/etc/kolla/globals.yml index 2e9200a2b4..9614196545 100644 --- a/etc/kolla/globals.yml +++ b/etc/kolla/globals.yml @@ -467,3 +467,5 @@ tempest_floating_network_name: #enable_prometheus_mysqld_exporter: "{{ enable_mariadb | bool }}" #enable_prometheus_node_exporter: "{{ enable_prometheus | bool }}" #enable_prometheus_cadvisor: "{{ enable_prometheus | bool }}" +#enable_prometheus_memcached: "{{ enable_prometheus | bool }}" +#enable_prometheus_alertmanager: "{{ enable_prometheus | bool }}" diff --git a/etc/kolla/passwords.yml b/etc/kolla/passwords.yml index 8ded2388a9..7da7f26057 100644 --- a/etc/kolla/passwords.yml +++ b/etc/kolla/passwords.yml @@ -258,3 +258,4 @@ xenserver_password: # Prometheus options #################### prometheus_mysql_exporter_database_password: +prometheus_alertmanager_password: diff --git a/releasenotes/notes/prometheus-alertmanager-dd6d38da2357b917.yaml b/releasenotes/notes/prometheus-alertmanager-dd6d38da2357b917.yaml new file mode 100644 index 0000000000..9b578d556f --- /dev/null +++ b/releasenotes/notes/prometheus-alertmanager-dd6d38da2357b917.yaml @@ -0,0 +1,5 @@ +--- +features: + - | + Deploy prometheus-alertmanager (https://prometheus.io/docs/alerting/alertmanager/) + as part of the prometheus monitoring stack.