From cb35bd16164ab7d9e2df0fdc27c3d8f28d44a0d0 Mon Sep 17 00:00:00 2001 From: Steve Wilkerson Date: Fri, 28 Jun 2019 11:13:32 -0500 Subject: [PATCH] Nagios: Add init container for generating hosts This updates the Nagios chart to include an init container for generating the host and host group definitions Nagios requires to function. The benefit is that Nagios does not need to constantly attempt to update its host and host group definitions, which currently triggers a restart of the Nagios service even in cases where the host file hasn't changed. With the introduction of an init container for handling this, we can also remove the service check definition and command definition for executing the plugin at periodic intervals Depends-On: https://review.opendev.org/668197 Change-Id: Id1d63d8c99850b960eb352361d7796162bd6be2f Signed-off-by: Steve Wilkerson --- nagios/templates/deployment.yaml | 16 ++++++++++++++++ nagios/values.yaml | 15 +-------------- 2 files changed, 17 insertions(+), 14 deletions(-) diff --git a/nagios/templates/deployment.yaml b/nagios/templates/deployment.yaml index 1bb2b24c1..ec160f084 100644 --- a/nagios/templates/deployment.yaml +++ b/nagios/templates/deployment.yaml @@ -96,6 +96,18 @@ spec: {{- end }} initContainers: {{ tuple $envAll "nagios" list | include "helm-toolkit.snippets.kubernetes_entrypoint_init_container" | indent 8 }} + - name: define-nagios-hosts +{{ tuple $envAll "nagios" | include "helm-toolkit.snippets.image" | indent 10 }} +{{ tuple $envAll $envAll.Values.pod.resources.nagios | include "helm-toolkit.snippets.kubernetes_resources" | indent 10 }} + command: + - /usr/lib/nagios/plugins/define-nagios-hosts.py + - --object_file_loc + - /opt/nagios/etc/conf.d/nagios-hosts.cfg + volumeMounts: + - name: pod-tmp + mountPath: /tmp + - name: nagios-confd + mountPath: /opt/nagios/etc/conf.d containers: - name: apache-proxy {{ tuple $envAll "apache_proxy" | include "helm-toolkit.snippets.image" | indent 10 }} @@ -181,6 +193,8 @@ spec: volumeMounts: - name: pod-tmp mountPath: /tmp + - name: nagios-confd + mountPath: /opt/nagios/etc/conf.d - name: nagios-etc mountPath: /opt/nagios/etc/nagios.cfg subPath: nagios.cfg @@ -210,6 +224,8 @@ spec: emptyDir: {} - name: pod-var-log emptyDir: {} + - name: nagios-confd + emptyDir: {} - name: nagios-etc secret: secretName: nagios-etc diff --git a/nagios/values.yaml b/nagios/values.yaml index 79ef4882f..6350fcafb 100644 --- a/nagios/values.yaml +++ b/nagios/values.yaml @@ -586,11 +586,6 @@ conf: command_name check_ceph_health } - define command { - command_line $USER1$/define-nagios-hosts.py --object_file_loc /opt/nagios/etc/objects/prometheus_discovery_objects.cfg - command_name check_prometheus_hosts - } - define command { command_line $USER1$/query_elasticsearch.py $USER9$ '$ARG1$' '$ARG2$' '$ARG3$' '$ARG4$' '$ARG5$' --simple_query '$ARG6$' --simple_query_fields '$ARG7$' --match '$ARG8$' --range '$ARG9$' command_name check_es_query @@ -1178,14 +1173,6 @@ conf: use generic-service } - define service { - check_command check_prometheus_hosts - check_interval 900 - hostgroup_name prometheus-hosts - service_description Prometheus_hosts-update - use notifying_service - } - define service { check_command check_prom_alert!pg_replication_fallen_behind!CRITICAL- Postgres Replication lag is over 2 minutes!OK- postgresql replication lag is nominal. hostgroup_name prometheus-hosts @@ -1428,7 +1415,7 @@ conf: cfg_file=/opt/nagios/etc/objects/contacts.cfg cfg_file=/opt/nagios/etc/objects/timeperiods.cfg cfg_file=/opt/nagios/etc/objects/templates.cfg - cfg_file=/opt/nagios/etc/objects/prometheus_discovery_objects.cfg + cfg_file=/opt/nagios/etc/conf.d/nagios-hosts.cfg check_external_commands=1 check_for_orphaned_hosts=1