From 046742c9c6962706f43be7aba796200092d18b13 Mon Sep 17 00:00:00 2001 From: Steve Wilkerson Date: Wed, 16 Jan 2019 19:39:43 -0600 Subject: [PATCH] Nagios: Update logging, add readiness probe This updates the Nagios chart configuration to not use syslog for logging, removes the logging of notifications, and drastically increases the number of concurrent checks executed. This also removes the hostPath for Nagios logs, as it seems to add no value over what's already reported to the console. Finally, as Nagios's log file has the potential to grow very rapidly while the service has no means to disable logging to disk, this adds a readiness probe that both checks whether Nagios's endpoint is being served and clears out the log file by redirecting the no-op commands output to the nagios log file. Change-Id: I81151c48ef4e0b7877f595c271f55b8fd479e8c1 --- nagios/templates/bin/_nagios-readiness.sh.tpl | 27 +++++++++++++++++++ nagios/templates/configmap-bin.yaml | 2 ++ nagios/templates/deployment.yaml | 27 ++++++++----------- nagios/values.yaml | 5 ++-- 4 files changed, 43 insertions(+), 18 deletions(-) create mode 100644 nagios/templates/bin/_nagios-readiness.sh.tpl diff --git a/nagios/templates/bin/_nagios-readiness.sh.tpl b/nagios/templates/bin/_nagios-readiness.sh.tpl new file mode 100644 index 000000000..9e50f36d9 --- /dev/null +++ b/nagios/templates/bin/_nagios-readiness.sh.tpl @@ -0,0 +1,27 @@ +#!/bin/bash + +{{/* +Copyright 2019 The Openstack-Helm Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/}} + +# NOTE(sw5822): Redirect no-op operator output to Nagios log file to clean out +# Nagios's log file, since Nagios doesn't support logging to /dev/null +: > /opt/nagios/var/log/nagios.log + +# Check whether Nagios endpoint is reachable +reply=$(curl -s -o /dev/null -w %{http_code} http://127.0.0.1:8000/nagios) +if [ \"$reply\" -lt 200 -o \"$reply\" -ge 400 ]; then + exit 1 +fi diff --git a/nagios/templates/configmap-bin.yaml b/nagios/templates/configmap-bin.yaml index db1ea00fe..759ed32fe 100644 --- a/nagios/templates/configmap-bin.yaml +++ b/nagios/templates/configmap-bin.yaml @@ -24,6 +24,8 @@ metadata: data: apache.sh: | {{ tuple "bin/_apache.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }} + nagios-readiness.sh: | +{{ tuple "bin/_nagios-readiness.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }} image-repo-sync.sh: |+ {{- include "helm-toolkit.scripts.image_repo_sync" . | indent 4 }} {{- end }} diff --git a/nagios/templates/deployment.yaml b/nagios/templates/deployment.yaml index 5e32f0e6d..6c16264a9 100644 --- a/nagios/templates/deployment.yaml +++ b/nagios/templates/deployment.yaml @@ -92,19 +92,6 @@ spec: {{- end }} initContainers: {{ tuple $envAll "nagios" list | include "helm-toolkit.snippets.kubernetes_entrypoint_init_container" | indent 8 }} - - name: nagios-log-ownership -{{ tuple $envAll "nagios" | include "helm-toolkit.snippets.image" | indent 10 }} - securityContext: - runAsUser: 0 - command: - - chown - - -R - - nagios:root - - /opt/nagios/var/log - volumeMounts: - - name: pod-var-log - mountPath: /opt/nagios/var/log - readOnly: false containers: - name: apache-proxy {{ tuple $envAll "apache_proxy" | include "helm-toolkit.snippets.image" | indent 10 }} @@ -146,6 +133,12 @@ spec: ports: - name: nagios containerPort: {{ tuple "nagios" "internal" "nagios" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }} + readinessProbe: + exec: + command: + - /tmp/nagios-readiness.sh + initialDelaySeconds: 60 + periodSeconds: 30 env: - name: SNMP_NOTIF_PRIMARY_TARGET_WITH_PORT value: {{ $envAll.Values.conf.nagios.notification.snmp.primary_target }} @@ -190,6 +183,10 @@ spec: mountPath: /opt/nagios/etc/nagios_objects.cfg subPath: nagios_objects.cfg readOnly: true + - name: nagios-bin + mountPath: /tmp/nagios-readiness.sh + subPath: nagios-readiness.sh + readOnly: true {{- if not (empty .Values.conf.nagios.query_es_clauses) }} - name: nagios-etc mountPath: /opt/nagios/etc/objects/query_es_clauses.json @@ -198,11 +195,9 @@ spec: {{- end }} - name: pod-var-log mountPath: /opt/nagios/var/log - readOnly: false volumes: - name: pod-var-log - hostPath: - path: /opt/nagios/var/log + emptyDir: {} - name: nagios-etc secret: secretName: nagios-etc diff --git a/nagios/values.yaml b/nagios/values.yaml index 178708297..e6daf7609 100644 --- a/nagios/values.yaml +++ b/nagios/values.yaml @@ -1075,7 +1075,8 @@ conf: event_broker_options: -1 log_rotation_method: d log_archive_path: /opt/nagios/var/log/archives - use_syslog: 1 + use_syslog: 0 + log_notifications: 0 log_service_retries: 1 log_host_retries: 1 log_event_handlers: 1 @@ -1088,7 +1089,7 @@ conf: service_interleave_factor: s host_inter_check_delay_method: s max_host_check_spread: 30 - max_concurrent_checks: 60 + max_concurrent_checks: 300 check_result_reaper_frequency: 10 max_check_result_reaper_time: 30 check_result_path: /opt/nagios/var/spool/checkresults