Nagios: Update logging, add readiness probe

This updates the Nagios chart configuration to not use syslog for
logging, removes the logging of notifications, and drastically
increases the number of concurrent checks executed.

This also removes the hostPath for Nagios logs, as it seems to add
no value over what's already reported to the console.  Finally, as
Nagios's log file has the potential to grow very rapidly while the
service has no means to disable logging to disk, this adds a
readiness probe that both checks whether Nagios's endpoint is
being served and clears out the log file by redirecting the
no-op commands output to the nagios log file.

Change-Id: I81151c48ef4e0b7877f595c271f55b8fd479e8c1
This commit is contained in:
Steve Wilkerson 2019-01-16 19:39:43 -06:00
parent 379d918a20
commit 046742c9c6
4 changed files with 43 additions and 18 deletions

View File

@ -0,0 +1,27 @@
#!/bin/bash
{{/*
Copyright 2019 The Openstack-Helm Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/}}
# NOTE(sw5822): Redirect no-op operator output to Nagios log file to clean out
# Nagios's log file, since Nagios doesn't support logging to /dev/null
: > /opt/nagios/var/log/nagios.log
# Check whether Nagios endpoint is reachable
reply=$(curl -s -o /dev/null -w %{http_code} http://127.0.0.1:8000/nagios)
if [ \"$reply\" -lt 200 -o \"$reply\" -ge 400 ]; then
exit 1
fi

View File

@ -24,6 +24,8 @@ metadata:
data:
apache.sh: |
{{ tuple "bin/_apache.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }}
nagios-readiness.sh: |
{{ tuple "bin/_nagios-readiness.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }}
image-repo-sync.sh: |+
{{- include "helm-toolkit.scripts.image_repo_sync" . | indent 4 }}
{{- end }}

View File

@ -92,19 +92,6 @@ spec:
{{- end }}
initContainers:
{{ tuple $envAll "nagios" list | include "helm-toolkit.snippets.kubernetes_entrypoint_init_container" | indent 8 }}
- name: nagios-log-ownership
{{ tuple $envAll "nagios" | include "helm-toolkit.snippets.image" | indent 10 }}
securityContext:
runAsUser: 0
command:
- chown
- -R
- nagios:root
- /opt/nagios/var/log
volumeMounts:
- name: pod-var-log
mountPath: /opt/nagios/var/log
readOnly: false
containers:
- name: apache-proxy
{{ tuple $envAll "apache_proxy" | include "helm-toolkit.snippets.image" | indent 10 }}
@ -146,6 +133,12 @@ spec:
ports:
- name: nagios
containerPort: {{ tuple "nagios" "internal" "nagios" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}
readinessProbe:
exec:
command:
- /tmp/nagios-readiness.sh
initialDelaySeconds: 60
periodSeconds: 30
env:
- name: SNMP_NOTIF_PRIMARY_TARGET_WITH_PORT
value: {{ $envAll.Values.conf.nagios.notification.snmp.primary_target }}
@ -190,6 +183,10 @@ spec:
mountPath: /opt/nagios/etc/nagios_objects.cfg
subPath: nagios_objects.cfg
readOnly: true
- name: nagios-bin
mountPath: /tmp/nagios-readiness.sh
subPath: nagios-readiness.sh
readOnly: true
{{- if not (empty .Values.conf.nagios.query_es_clauses) }}
- name: nagios-etc
mountPath: /opt/nagios/etc/objects/query_es_clauses.json
@ -198,11 +195,9 @@ spec:
{{- end }}
- name: pod-var-log
mountPath: /opt/nagios/var/log
readOnly: false
volumes:
- name: pod-var-log
hostPath:
path: /opt/nagios/var/log
emptyDir: {}
- name: nagios-etc
secret:
secretName: nagios-etc

View File

@ -1075,7 +1075,8 @@ conf:
event_broker_options: -1
log_rotation_method: d
log_archive_path: /opt/nagios/var/log/archives
use_syslog: 1
use_syslog: 0
log_notifications: 0
log_service_retries: 1
log_host_retries: 1
log_event_handlers: 1
@ -1088,7 +1089,7 @@ conf:
service_interleave_factor: s
host_inter_check_delay_method: s
max_host_check_spread: 30
max_concurrent_checks: 60
max_concurrent_checks: 300
check_result_reaper_frequency: 10
max_check_result_reaper_time: 30
check_result_path: /opt/nagios/var/spool/checkresults