Steve Wilkerson 046742c9c6 Nagios: Update logging, add readiness probe
This updates the Nagios chart configuration to not use syslog for
logging, removes the logging of notifications, and drastically
increases the number of concurrent checks executed.

This also removes the hostPath for Nagios logs, as it seems to add
no value over what's already reported to the console.  Finally, as
Nagios's log file has the potential to grow very rapidly while the
service has no means to disable logging to disk, this adds a
readiness probe that both checks whether Nagios's endpoint is
being served and clears out the log file by redirecting the
no-op commands output to the nagios log file.

Change-Id: I81151c48ef4e0b7877f595c271f55b8fd479e8c1
2019-01-17 11:12:16 -06:00

210 lines
7.7 KiB
YAML

{{/*
Copyright 2017 The Openstack-Helm Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/}}
{{- if .Values.manifests.deployment }}
{{- $envAll := . }}
{{- $nagiosUserSecret := .Values.secrets.nagios.admin }}
{{- $serviceAccountName := "nagios" }}
{{ tuple $envAll "nagios" $serviceAccountName | include "helm-toolkit.snippets.kubernetes_pod_rbac_serviceaccount" }}
---
apiVersion: rbac.authorization.k8s.io/v1beta1
kind: ClusterRole
metadata:
name: {{ $serviceAccountName }}
rules:
- apiGroups:
- ""
resources:
- nodes
- nodes/proxy
- services
- endpoints
- pods
verbs:
- get
- list
- watch
- apiGroups:
- ""
resources:
- configmaps
verbs:
- get
---
apiVersion: rbac.authorization.k8s.io/v1beta1
kind: ClusterRoleBinding
metadata:
name: {{ $serviceAccountName }}
subjects:
- kind: ServiceAccount
name: {{ $serviceAccountName }}
namespace: {{ .Release.Namespace }}
roleRef:
kind: ClusterRole
name: {{ $serviceAccountName }}
apiGroup: rbac.authorization.k8s.io
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: nagios
annotations:
{{ tuple $envAll | include "helm-toolkit.snippets.release_uuid" }}
labels:
{{ tuple $envAll "nagios" "monitoring" | include "helm-toolkit.snippets.kubernetes_metadata_labels" | indent 4 }}
spec:
replicas: {{ .Values.pod.replicas.nagios }}
selector:
matchLabels:
{{ tuple $envAll "nagios" "monitoring" | include "helm-toolkit.snippets.kubernetes_metadata_labels" | indent 6 }}
{{ tuple $envAll | include "helm-toolkit.snippets.kubernetes_upgrades_deployment" | indent 2 }}
template:
metadata:
labels:
{{ tuple $envAll "nagios" "monitoring" | include "helm-toolkit.snippets.kubernetes_metadata_labels" | indent 8 }}
annotations:
configmap-bin-hash: {{ tuple "configmap-bin.yaml" . | include "helm-toolkit.utils.hash" }}
configmap-etc-hash: {{ tuple "configmap-etc.yaml" . | include "helm-toolkit.utils.hash" }}
spec:
serviceAccountName: {{ $serviceAccountName }}
nodeSelector:
{{ .Values.labels.nagios.node_selector_key }}: {{ .Values.labels.nagios.node_selector_value | quote }}
terminationGracePeriodSeconds: {{ .Values.pod.lifecycle.termination_grace_period.nagios.timeout | default "30" }}
{{- if or ( gt .Capabilities.KubeVersion.Major "1" ) ( ge .Capabilities.KubeVersion.Minor "10" ) }}
shareProcessNamespace: true
{{- else }}
hostPID: true
{{- end }}
initContainers:
{{ tuple $envAll "nagios" list | include "helm-toolkit.snippets.kubernetes_entrypoint_init_container" | indent 8 }}
containers:
- name: apache-proxy
{{ tuple $envAll "apache_proxy" | include "helm-toolkit.snippets.image" | indent 10 }}
{{ tuple $envAll $envAll.Values.pod.resources.apache_proxy | include "helm-toolkit.snippets.kubernetes_resources" | indent 10 }}
command:
- /tmp/apache.sh
- start
ports:
- name: http
containerPort: {{ tuple "nagios" "internal" "http" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}
readinessProbe:
tcpSocket:
port: {{ tuple "nagios" "internal" "http" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}
initialDelaySeconds: 20
periodSeconds: 10
env:
- name: NAGIOSADMIN_USER
valueFrom:
secretKeyRef:
name: {{ $nagiosUserSecret }}
key: NAGIOSADMIN_USER
- name: NAGIOSADMIN_PASS
valueFrom:
secretKeyRef:
name: {{ $nagiosUserSecret }}
key: NAGIOSADMIN_PASS
volumeMounts:
- name: nagios-bin
mountPath: /tmp/apache.sh
subPath: apache.sh
readOnly: true
- name: nagios-etc
mountPath: /usr/local/apache2/conf/httpd.conf
subPath: httpd.conf
readOnly: true
- name: nagios
{{ tuple $envAll "nagios" | include "helm-toolkit.snippets.image" | indent 10 }}
{{ tuple $envAll $envAll.Values.pod.resources.nagios | include "helm-toolkit.snippets.kubernetes_resources" | indent 10 }}
ports:
- name: nagios
containerPort: {{ tuple "nagios" "internal" "nagios" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}
readinessProbe:
exec:
command:
- /tmp/nagios-readiness.sh
initialDelaySeconds: 60
periodSeconds: 30
env:
- name: SNMP_NOTIF_PRIMARY_TARGET_WITH_PORT
value: {{ $envAll.Values.conf.nagios.notification.snmp.primary_target }}
- name: SNMP_NOTIF_SECONDARY_TARGET_WITH_PORT
value: {{ $envAll.Values.conf.nagios.notification.snmp.secondary_target }}
- name: REST_NOTIF_PRIMARY_TARGET_URL
value: {{ $envAll.Values.conf.nagios.notification.http.primary_target }}
- name: REST_NOTIF_SECONDARY_TARGET_URL
value: {{ $envAll.Values.conf.nagios.notification.http.secondary_target }}
- name: CEPH_MGR_SERVICE
value: {{ tuple "ceph_mgr" "internal" "metrics" $envAll | include "helm-toolkit.endpoints.host_and_port_endpoint_uri_lookup" }}/metrics
- name: PROMETHEUS_SERVICE
valueFrom:
secretKeyRef:
name: {{ $nagiosUserSecret }}
key: PROMETHEUS_SERVICE
- name: ELASTICSEARCH_SERVICE
valueFrom:
secretKeyRef:
name: {{ $nagiosUserSecret }}
key: ELASTICSEARCH_SERVICE
- name: NAGIOSADMIN_USER
valueFrom:
secretKeyRef:
name: {{ $nagiosUserSecret }}
key: NAGIOSADMIN_USER
- name: NAGIOSADMIN_PASS
valueFrom:
secretKeyRef:
name: {{ $nagiosUserSecret }}
key: NAGIOSADMIN_PASS
volumeMounts:
- name: nagios-etc
mountPath: /opt/nagios/etc/nagios.cfg
subPath: nagios.cfg
readOnly: true
- name: nagios-etc
mountPath: /opt/nagios/etc/cgi.cfg
subPath: cgi.cfg
readOnly: true
- name: nagios-etc
mountPath: /opt/nagios/etc/nagios_objects.cfg
subPath: nagios_objects.cfg
readOnly: true
- name: nagios-bin
mountPath: /tmp/nagios-readiness.sh
subPath: nagios-readiness.sh
readOnly: true
{{- if not (empty .Values.conf.nagios.query_es_clauses) }}
- name: nagios-etc
mountPath: /opt/nagios/etc/objects/query_es_clauses.json
subPath: query_es_clauses.json
readOnly: true
{{- end }}
- name: pod-var-log
mountPath: /opt/nagios/var/log
volumes:
- name: pod-var-log
emptyDir: {}
- name: nagios-etc
secret:
secretName: nagios-etc
defaultMode: 0444
- name: nagios-bin
configMap:
name: nagios-bin
defaultMode: 0555
{{- end }}