From 07ceecd8d7c357304289ed8a3de4688a99c504fa Mon Sep 17 00:00:00 2001 From: Stephen Taylor Date: Tue, 29 Jun 2021 07:58:23 -0600 Subject: [PATCH] Export crash dumps when Ceph daemons crash This change configures Ceph daemon pods so that /var/lib/ceph/crash maps to a hostPath location that persists when the pod restarts. This will allow for post-mortem examination of crash dumps to attempt to understand why daemons have crashed. Change-Id: I53277848f79a405b0809e0e3f19d90bbb80f3df8 --- ceph-client/Chart.yaml | 2 +- ceph-client/templates/bin/_init-dirs.sh.tpl | 2 +- ceph-client/templates/deployment-mds.yaml | 10 ++++++++++ ceph-client/templates/deployment-mgr.yaml | 10 ++++++++++ ceph-mon/Chart.yaml | 2 +- ceph-mon/templates/bin/_init-dirs.sh.tpl | 2 +- ceph-mon/templates/daemonset-mon.yaml | 10 ++++++++++ ceph-osd/Chart.yaml | 2 +- ceph-osd/templates/bin/_init-dirs.sh.tpl | 2 +- ceph-osd/templates/daemonset-osd.yaml | 13 +++++++++++++ ceph-rgw/Chart.yaml | 2 +- ceph-rgw/templates/bin/_init-dirs.sh.tpl | 2 +- ceph-rgw/templates/deployment-rgw.yaml | 10 ++++++++++ releasenotes/notes/ceph-client.yaml | 1 + releasenotes/notes/ceph-mon.yaml | 1 + releasenotes/notes/ceph-osd.yaml | 1 + releasenotes/notes/ceph-rgw.yaml | 1 + 17 files changed, 65 insertions(+), 8 deletions(-) diff --git a/ceph-client/Chart.yaml b/ceph-client/Chart.yaml index acc4b5b58..17b33bb8d 100644 --- a/ceph-client/Chart.yaml +++ b/ceph-client/Chart.yaml @@ -15,6 +15,6 @@ apiVersion: v1 appVersion: v1.0.0 description: OpenStack-Helm Ceph Client name: ceph-client -version: 0.1.19 +version: 0.1.20 home: https://github.com/ceph/ceph-client ... diff --git a/ceph-client/templates/bin/_init-dirs.sh.tpl b/ceph-client/templates/bin/_init-dirs.sh.tpl index b349500ed..a6b59075b 100644 --- a/ceph-client/templates/bin/_init-dirs.sh.tpl +++ b/ceph-client/templates/bin/_init-dirs.sh.tpl @@ -27,7 +27,7 @@ for keyring in ${OSD_BOOTSTRAP_KEYRING} ${MDS_BOOTSTRAP_KEYRING}; do done # Let's create the ceph directories -for DIRECTORY in mds tmp mgr; do +for DIRECTORY in mds tmp mgr crash; do mkdir -p "/var/lib/ceph/${DIRECTORY}" done diff --git a/ceph-client/templates/deployment-mds.yaml b/ceph-client/templates/deployment-mds.yaml index 84838b55a..2640c1c3d 100644 --- a/ceph-client/templates/deployment-mds.yaml +++ b/ceph-client/templates/deployment-mds.yaml @@ -74,6 +74,9 @@ spec: - name: pod-var-lib-ceph mountPath: /var/lib/ceph readOnly: false + - name: pod-var-lib-ceph-crash + mountPath: /var/lib/ceph/crash + readOnly: false containers: - name: ceph-mds {{ tuple $envAll "ceph_mds" | include "helm-toolkit.snippets.image" | indent 10 }} @@ -136,6 +139,9 @@ spec: - name: pod-var-lib-ceph mountPath: /var/lib/ceph readOnly: false + - name: pod-var-lib-ceph-crash + mountPath: /var/lib/ceph/crash + readOnly: false volumes: - name: pod-tmp emptyDir: {} @@ -154,6 +160,10 @@ spec: defaultMode: 0555 - name: pod-var-lib-ceph emptyDir: {} + - name: pod-var-lib-ceph-crash + hostPath: + path: /var/lib/openstack-helm/ceph/crash + type: DirectoryOrCreate - name: ceph-client-admin-keyring secret: secretName: {{ .Values.secrets.keyrings.admin }} diff --git a/ceph-client/templates/deployment-mgr.yaml b/ceph-client/templates/deployment-mgr.yaml index d7adccf1b..e53fe29e4 100644 --- a/ceph-client/templates/deployment-mgr.yaml +++ b/ceph-client/templates/deployment-mgr.yaml @@ -77,6 +77,9 @@ spec: - name: pod-var-lib-ceph mountPath: /var/lib/ceph readOnly: false + - name: pod-var-lib-ceph-crash + mountPath: /var/lib/ceph/crash + readOnly: false containers: - name: ceph-mgr {{ tuple $envAll "ceph_mgr" | include "helm-toolkit.snippets.image" | indent 10 }} @@ -166,6 +169,9 @@ spec: - name: pod-var-lib-ceph mountPath: /var/lib/ceph readOnly: false + - name: pod-var-lib-ceph-crash + mountPath: /var/lib/ceph/crash + readOnly: false - name: ceph-client-bin mountPath: /tmp/utils-checkPGs.py subPath: utils-checkPGs.py @@ -192,6 +198,10 @@ spec: defaultMode: 0444 - name: pod-var-lib-ceph emptyDir: {} + - name: pod-var-lib-ceph-crash + hostPath: + path: /var/lib/openstack-helm/ceph/crash + type: DirectoryOrCreate - name: ceph-client-admin-keyring secret: secretName: {{ .Values.secrets.keyrings.admin }} diff --git a/ceph-mon/Chart.yaml b/ceph-mon/Chart.yaml index 700cd901d..bc2af46c5 100644 --- a/ceph-mon/Chart.yaml +++ b/ceph-mon/Chart.yaml @@ -15,6 +15,6 @@ apiVersion: v1 appVersion: v1.0.0 description: OpenStack-Helm Ceph Mon name: ceph-mon -version: 0.1.9 +version: 0.1.10 home: https://github.com/ceph/ceph ... diff --git a/ceph-mon/templates/bin/_init-dirs.sh.tpl b/ceph-mon/templates/bin/_init-dirs.sh.tpl index 81bb58681..482a307cc 100644 --- a/ceph-mon/templates/bin/_init-dirs.sh.tpl +++ b/ceph-mon/templates/bin/_init-dirs.sh.tpl @@ -27,7 +27,7 @@ for keyring in ${OSD_BOOTSTRAP_KEYRING} ${MDS_BOOTSTRAP_KEYRING} ; do done # Let's create the ceph directories -for DIRECTORY in mon osd mds radosgw tmp mgr; do +for DIRECTORY in mon osd mds radosgw tmp mgr crash; do mkdir -p "/var/lib/ceph/${DIRECTORY}" done diff --git a/ceph-mon/templates/daemonset-mon.yaml b/ceph-mon/templates/daemonset-mon.yaml index 9b9cac250..90043913f 100644 --- a/ceph-mon/templates/daemonset-mon.yaml +++ b/ceph-mon/templates/daemonset-mon.yaml @@ -99,6 +99,9 @@ spec: - name: pod-var-lib-ceph mountPath: /var/lib/ceph readOnly: false + - name: pod-var-lib-ceph-crash + mountPath: /var/lib/ceph/crash + readOnly: false - name: ceph-log-ownership {{ tuple $envAll "ceph_mon" | include "helm-toolkit.snippets.image" | indent 10 }} {{ dict "envAll" $envAll "application" "mon" "container" "ceph_log_ownership" | include "helm-toolkit.snippets.kubernetes_container_security_context" | indent 10 }} @@ -228,6 +231,9 @@ spec: - name: pod-var-lib-ceph mountPath: /var/lib/ceph readOnly: false + - name: pod-var-lib-ceph-crash + mountPath: /var/lib/ceph/crash + readOnly: false - name: pod-var-log mountPath: /var/log/ceph readOnly: false @@ -252,6 +258,10 @@ spec: - name: pod-var-lib-ceph hostPath: path: {{ .Values.conf.storage.mon.directory }} + - name: pod-var-lib-ceph-crash + hostPath: + path: /var/lib/openstack-helm/ceph/crash + type: DirectoryOrCreate - name: ceph-client-admin-keyring secret: secretName: {{ .Values.secrets.keyrings.admin }} diff --git a/ceph-osd/Chart.yaml b/ceph-osd/Chart.yaml index 0e9cb0985..dbf096fd5 100644 --- a/ceph-osd/Chart.yaml +++ b/ceph-osd/Chart.yaml @@ -15,6 +15,6 @@ apiVersion: v1 appVersion: v1.0.0 description: OpenStack-Helm Ceph OSD name: ceph-osd -version: 0.1.24 +version: 0.1.25 home: https://github.com/ceph/ceph ... diff --git a/ceph-osd/templates/bin/_init-dirs.sh.tpl b/ceph-osd/templates/bin/_init-dirs.sh.tpl index c3618ff01..03f8c3965 100644 --- a/ceph-osd/templates/bin/_init-dirs.sh.tpl +++ b/ceph-osd/templates/bin/_init-dirs.sh.tpl @@ -21,7 +21,7 @@ export LC_ALL=C mkdir -p "$(dirname "${OSD_BOOTSTRAP_KEYRING}")" # Let's create the ceph directories -for DIRECTORY in osd tmp; do +for DIRECTORY in osd tmp crash; do mkdir -p "/var/lib/ceph/${DIRECTORY}" done diff --git a/ceph-osd/templates/daemonset-osd.yaml b/ceph-osd/templates/daemonset-osd.yaml index 23a7fa9c8..6dbab0dd1 100644 --- a/ceph-osd/templates/daemonset-osd.yaml +++ b/ceph-osd/templates/daemonset-osd.yaml @@ -130,6 +130,9 @@ spec: - name: pod-var-lib-ceph mountPath: /var/lib/ceph readOnly: false + - name: pod-var-lib-ceph-crash + mountPath: /var/lib/ceph/crash + readOnly: false - name: pod-var-lib-ceph-tmp mountPath: /var/lib/ceph/tmp readOnly: false @@ -251,6 +254,9 @@ spec: - name: pod-var-lib-ceph mountPath: /var/lib/ceph readOnly: false + - name: pod-var-lib-ceph-crash + mountPath: /var/lib/ceph/crash + readOnly: false - name: pod-var-lib-ceph-tmp mountPath: /var/lib/ceph/tmp readOnly: false @@ -411,6 +417,9 @@ spec: - name: pod-var-lib-ceph mountPath: /var/lib/ceph readOnly: false + - name: pod-var-lib-ceph-crash + mountPath: /var/lib/ceph/crash + readOnly: false - name: pod-var-lib-ceph-tmp mountPath: /var/lib/ceph/tmp readOnly: false @@ -455,6 +464,10 @@ spec: emptyDir: {} - name: pod-var-lib-ceph emptyDir: {} + - name: pod-var-lib-ceph-crash + hostPath: + path: /var/lib/openstack-helm/ceph/crash + type: DirectoryOrCreate - name: pod-var-lib-ceph-tmp hostPath: path: /var/lib/openstack-helm/ceph/var-tmp diff --git a/ceph-rgw/Chart.yaml b/ceph-rgw/Chart.yaml index 15b2c2807..422e9979c 100644 --- a/ceph-rgw/Chart.yaml +++ b/ceph-rgw/Chart.yaml @@ -15,6 +15,6 @@ apiVersion: v1 appVersion: v1.0.0 description: OpenStack-Helm Ceph RadosGW name: ceph-rgw -version: 0.1.11 +version: 0.1.12 home: https://github.com/ceph/ceph ... diff --git a/ceph-rgw/templates/bin/_init-dirs.sh.tpl b/ceph-rgw/templates/bin/_init-dirs.sh.tpl index 9ab21097c..8f727bcf4 100644 --- a/ceph-rgw/templates/bin/_init-dirs.sh.tpl +++ b/ceph-rgw/templates/bin/_init-dirs.sh.tpl @@ -25,7 +25,7 @@ for keyring in ${RGW_BOOTSTRAP_KEYRING}; do done # Let's create the ceph directories -for DIRECTORY in radosgw tmp; do +for DIRECTORY in radosgw tmp crash; do mkdir -p "/var/lib/ceph/${DIRECTORY}" done diff --git a/ceph-rgw/templates/deployment-rgw.yaml b/ceph-rgw/templates/deployment-rgw.yaml index 0849cc78d..9428abd10 100644 --- a/ceph-rgw/templates/deployment-rgw.yaml +++ b/ceph-rgw/templates/deployment-rgw.yaml @@ -92,6 +92,9 @@ spec: - name: pod-var-lib-ceph mountPath: /var/lib/ceph readOnly: false + - name: pod-var-lib-ceph-crash + mountPath: /var/lib/ceph/crash + readOnly: false - name: ceph-rgw-init {{ tuple $envAll "ceph_rgw" | include "helm-toolkit.snippets.image" | indent 10 }} {{ tuple $envAll $envAll.Values.pod.resources.rgw | include "helm-toolkit.snippets.kubernetes_resources" | indent 10 }} @@ -182,6 +185,9 @@ spec: - name: pod-var-lib-ceph mountPath: /var/lib/ceph readOnly: false + - name: pod-var-lib-ceph-crash + mountPath: /var/lib/ceph/crash + readOnly: false {{- dict "enabled" .Values.manifests.certificates "name" $tls_secret "path" "/etc/tls" | include "helm-toolkit.snippets.tls_volume_mount" | indent 12 }} volumes: - name: pod-tmp @@ -201,6 +207,10 @@ spec: defaultMode: 0444 - name: pod-var-lib-ceph emptyDir: {} + - name: pod-var-lib-ceph-crash + hostPath: + path: /var/lib/openstack-helm/ceph/crash + type: DirectoryOrCreate - name: ceph-bootstrap-rgw-keyring secret: secretName: {{ .Values.secrets.keyrings.rgw }} diff --git a/releasenotes/notes/ceph-client.yaml b/releasenotes/notes/ceph-client.yaml index 1b0dff829..cadfa78f5 100644 --- a/releasenotes/notes/ceph-client.yaml +++ b/releasenotes/notes/ceph-client.yaml @@ -20,4 +20,5 @@ ceph-client: - 0.1.17 Add pool rename support for Ceph pools - 0.1.18 Add pool delete support for Ceph pools - 0.1.19 Use full image ref for docker official images + - 0.1.20 Export crash dumps when Ceph daemons crash ... diff --git a/releasenotes/notes/ceph-mon.yaml b/releasenotes/notes/ceph-mon.yaml index 8d87d905a..20f7e91ab 100644 --- a/releasenotes/notes/ceph-mon.yaml +++ b/releasenotes/notes/ceph-mon.yaml @@ -10,4 +10,5 @@ ceph-mon: - 0.1.7 remove deprecated svc annotation tolerate-unready-endpoints - 0.1.8 Use full image ref for docker official images - 0.1.9 Remove unnecessary parameters for ceph-mon + - 0.1.10 Export crash dumps when Ceph daemons crash ... diff --git a/releasenotes/notes/ceph-osd.yaml b/releasenotes/notes/ceph-osd.yaml index d617a4001..277ee0bbb 100644 --- a/releasenotes/notes/ceph-osd.yaml +++ b/releasenotes/notes/ceph-osd.yaml @@ -25,4 +25,5 @@ ceph-osd: - 0.1.22 Refactor Ceph OSD Init Scripts - Second PS - 0.1.23 Use full image ref for docker official images - 0.1.24 Ceph OSD Init Improvements + - 0.1.25 Export crash dumps when Ceph daemons crash ... diff --git a/releasenotes/notes/ceph-rgw.yaml b/releasenotes/notes/ceph-rgw.yaml index cb8d6d231..bee54ca70 100644 --- a/releasenotes/notes/ceph-rgw.yaml +++ b/releasenotes/notes/ceph-rgw.yaml @@ -12,4 +12,5 @@ ceph-rgw: - 0.1.9 Use full image ref for docker official images - 0.1.10 Fix a bug in placement target deletion for new targets - 0.1.11 Change s3 auth order to use local before external + - 0.1.12 Export crash dumps when Ceph daemons crash ...