From b1005b23b4e9117fcc27562e94e58a2c60a99d4d Mon Sep 17 00:00:00 2001 From: Renis Makadia Date: Wed, 5 Sep 2018 05:24:32 +0000 Subject: [PATCH] Helm tests for Ceph-OSD and Ceph-Client charts Change-Id: If4a846f0593b8679558662205a8560aa3cbb18ae --- ceph-client/templates/bin/_helm-tests.sh.tpl | 179 +++++++++++++++++++ ceph-client/templates/configmap-bin.yaml | 2 + ceph-client/templates/pod-helm-tests.yaml | 85 +++++++++ ceph-client/values.yaml | 13 ++ ceph-osd/templates/bin/_helm-tests.sh.tpl | 40 +++++ ceph-osd/templates/configmap-bin.yaml | 2 + ceph-osd/templates/pod-helm-tests.yaml | 70 ++++++++ ceph-osd/values.yaml | 16 +- tools/deployment/multinode/030-ceph.sh | 2 + 9 files changed, 408 insertions(+), 1 deletion(-) create mode 100755 ceph-client/templates/bin/_helm-tests.sh.tpl create mode 100644 ceph-client/templates/pod-helm-tests.yaml create mode 100644 ceph-osd/templates/bin/_helm-tests.sh.tpl create mode 100644 ceph-osd/templates/pod-helm-tests.yaml diff --git a/ceph-client/templates/bin/_helm-tests.sh.tpl b/ceph-client/templates/bin/_helm-tests.sh.tpl new file mode 100755 index 000000000..5b852b332 --- /dev/null +++ b/ceph-client/templates/bin/_helm-tests.sh.tpl @@ -0,0 +1,179 @@ +#!/bin/bash + +{{/* +Copyright 2017 The Openstack-Helm Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/}} + +function check_cluster_status() { + echo "#### Start: Checking Ceph cluster status ####" + ceph_status_output=$(ceph -s -f json | jq -r '.health') + ceph_health_status=$(echo $ceph_status_output | jq -r '.status') + + if [ "x${ceph_health_status}" == "xHEALTH_OK" ]; then + echo "Ceph status is HEALTH_OK" + else + echo "Ceph cluster status is NOT HEALTH_OK." + exit 1 + fi +} + +function check_osd_count() { + echo "#### Start: Checking OSD count ####" + osd_stat_output=$(ceph osd stat -f json-pretty) + + num_osd=$(echo $osd_stat_output | jq .num_osds) + num_in_osds=$(echo $osd_stat_output | jq .num_in_osds) + num_up_osds=$(echo $osd_stat_output | jq .num_up_osds) + + if [ "x${EXPECTED_OSDS}" == "x${num_osd}" ] && [ "x${EXPECTED_OSDS}" == "x${num_in_osds}" ] && [ "x${EXPECTED_OSDS}" == "x${num_up_osds}" ]; then + echo "All OSDs (${EXPECTED_OSDS}) are in UP and IN status" + else + echo "All expected OSDs (${EXPECTED_OSDS}) are NOT in UP and IN status. Cluster shows OSD count=${num_osd}, UP=${num_up_osds}, IN=${num_in_osds}" + exit 1 + fi +} + +function pool_validation() { + echo "#### Start: Checking Ceph pools ####" + pool_dump=$(ceph osd pool ls detail -f json-pretty) + osd_crush_rule_dump=$(ceph osd crush rule dump -f json-pretty) + + expectedCrushRuleId="" + nrules=$(echo ${osd_crush_rule_dump} | jq length) + c=$[nrules-1] + for n in $(seq 0 ${c}) + do + name=$(echo ${osd_crush_rule_dump} | jq -r .[${n}].rule_name) + if [ "x${EXPECTED_CRUSHRULE}" == "x${name}" ]; then + expectedCrushRuleId=$(echo ${osd_crush_rule_dump} | jq .[${n}].rule_id) + echo "Checking against rule: id: ${expectedCrushRuleId}, name:${name}" + fi + done + echo "Checking cluster for size:${RBD}, min_size:${EXPECTED_POOLMINSIZE}, crush_rule:${EXPECTED_CRUSHRULE}, crush_rule_id:${expectedCrushRuleId}" + + npools=$(echo ${pool_dump} | jq length) + i=$[npools - 1] + for n in $(seq 0 ${i}) + do + size=$(echo ${pool_dump} | jq -r ".[${n}][\"size\"]") + min_size=$(echo ${pool_dump} | jq -r ".[${n}][\"min_size\"]") + pg_num=$(echo ${pool_dump} | jq -r ".[${n}][\"pg_num\"]") + pg_placement_num=$(echo ${pool_dump} | jq -r ".[${n}][\"pg_placement_num\"]") + crush_rule=$(echo ${pool_dump} | jq -r ".[${n}][\"crush_rule\"]") + name=$(echo ${pool_dump} | jq -r ".[${n}][\"pool_name\"]") + + if [ "x${size}" != "x${RBD}" ] || [ "x${min_size}" != "x${EXPECTED_POOLMINSIZE}" ] \ + || [ "x${pg_num}" != "x${pg_placement_num}" ] || [ "x${crush_rule}" != "x${expectedCrushRuleId}" ]; then + echo "Pool ${name} has incorrect parameters!!! Size=${size}, Min_Size=${min_size}, PG=${pg_num}, PGP=${pg_placement_num}, Rule=${crush_rule}" + exit 1 + else + echo "Pool ${name} seems configured properly. Size=${size}, Min_Size=${min_size}, PG=${pg_num}, PGP=${pg_placement_num}, Rule=${crush_rule}" + fi + done +} + +function pool_failuredomain_validation() { + echo "#### Start: Checking Pools are configured with specific failure domain ####" + osd_pool_ls_details=$(ceph osd pool ls detail -f json-pretty) + osd_crush_rule_dump=$(ceph osd crush rule dump -f json-pretty) + + expectedCrushRuleId="" + nrules=$(echo ${osd_crush_rule_dump} | jq length) + c=$[nrules-1] + for n in $(seq 0 ${c}) + do + name=$(echo ${osd_crush_rule_dump} | jq -r .[${n}].rule_name) + + if [ "x${EXPECTED_CRUSHRULE}" == "x${name}" ]; then + expectedCrushRuleId=$(echo ${osd_crush_rule_dump} | jq .[${n}].rule_id) + echo "Checking against rule: id: ${expectedCrushRuleId}, name:${name}" + fi + done + + echo "Checking OSD pools are configured with Crush rule name:${EXPECTED_CRUSHRULE}, id:${expectedCrushRuleId}" + + npools=$(echo ${osd_pool_ls_details} | jq length) + i=$[npools-1] + for p in $(seq 0 ${i}) + do + pool_crush_rule_id=$(echo $osd_pool_ls_details | jq -r ".[${p}][\"crush_rule\"]") + pool_name=$(echo $osd_pool_ls_details | jq -r ".[${p}][\"pool_name\"]") + if [ "x${pool_crush_rule_id}" == "x${expectedCrushRuleId}" ]; then + echo "--> Info: Pool ${pool_name} is configured with the correct rule ${pool_crush_rule_id}" + else + echo "--> Error : Pool ${pool_name} is NOT configured with the correct rule ${pool_crush_rule_id}" + exit 1 + fi + done +} + +function pg_validation() { + echo "#### Start: Checking placement groups active+clean ####" + osd_pool_ls_details=$(ceph pg stat -f json-pretty) + num_pgs=$(echo ${osd_pool_ls_details} | jq -r .num_pgs) + npoolls=$(echo ${osd_pool_ls_details} | jq -r .num_pg_by_state | jq length) + i=${npoolls-1} + for n in $(seq 0 ${i}) + do + pg_state=$(echo ${osd_pool_ls_details} | jq -r .num_pg_by_state[${n}].name) + if [ "xactive+clean" == "x${pg_state}" ]; then + active_clean_pg_num=$(echo ${osd_pool_ls_details} | jq -r .num_pg_by_state[${n}].num) + if [ $num_pgs -eq $active_clean_pg_num ]; then + echo "Success: All PGs configured (${num_pgs}) are in active+clean status" + else + echo "Error: All PGs configured (${num_pgs}) are NOT in active+clean status" + exit 1 + fi + fi + done +} + + +function mgr_validation() { + echo "#### Start: MGR validation ####" + mgr_dump=$(ceph mgr dump -f json-pretty) + echo "Checking for ${MGR_COUNT} MGRs" + + mgr_avl=$(echo ${mgr_dump} | jq -r '.["available"]') + + if [ "x${mgr_avl}" == "xtrue" ]; then + mgr_active=$(echo ${mgr_dump} | jq -r '.["active_name"]') + + # Now test to check is we have at least one valid standby + mgr_stdby_count=$(echo ${mgr_dump} | jq -r '.["standbys"]' | jq length) + if [ $mgr_stdby_count -ge 1 ] + then + echo "Active manager ${mgr_active} is up and running. ${mgr_stdby_count} standby managers available" + else + echo "No standby Manager available" + retcode=1 + fi + else + echo "Manager is not active" + retcode=1 + fi + + if [ "x${retcode}" == "x1" ] + then + exit 1 + fi +} + +check_cluster_status +check_osd_count +mgr_validation +pg_validation +pool_validation +pool_failuredomain_validation diff --git a/ceph-client/templates/configmap-bin.yaml b/ceph-client/templates/configmap-bin.yaml index d4f31c0a8..d4aef1a3b 100644 --- a/ceph-client/templates/configmap-bin.yaml +++ b/ceph-client/templates/configmap-bin.yaml @@ -48,4 +48,6 @@ data: mgr-check.sh: | {{ tuple "bin/mgr/_check.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }} + helm-tests.sh: | +{{ tuple "bin/_helm-tests.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }} {{- end }} diff --git a/ceph-client/templates/pod-helm-tests.yaml b/ceph-client/templates/pod-helm-tests.yaml new file mode 100644 index 000000000..c706d548e --- /dev/null +++ b/ceph-client/templates/pod-helm-tests.yaml @@ -0,0 +1,85 @@ +{{/* +Copyright 2017 The Openstack-Helm Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/}} + +{{- if .Values.manifests.pod_test }} +{{- $envAll := . }} +{{- $serviceAccountName := "ceph-client-test" }} +{{ tuple $envAll "tests" $serviceAccountName | include "helm-toolkit.snippets.kubernetes_pod_rbac_serviceaccount" }} +--- +apiVersion: v1 +kind: Pod +metadata: + name: {{ $serviceAccountName }} + labels: +{{ tuple $envAll "ceph-client" "test" | include "helm-toolkit.snippets.kubernetes_metadata_labels" | indent 4 }} + annotations: + "helm.sh/hook": test-success +spec: + restartPolicy: Never + serviceAccountName: {{ $serviceAccountName }} + initContainers: +{{ tuple $envAll "tests" list | include "helm-toolkit.snippets.kubernetes_entrypoint_init_container" | indent 4 }} + containers: + - name: ceph-cluster-helm-test +{{ tuple $envAll "ceph_config_helper" | include "helm-toolkit.snippets.image" | indent 6 }} +{{ tuple $envAll $envAll.Values.pod.resources.jobs.tests | include "helm-toolkit.snippets.kubernetes_resources" | indent 6 }} + env: + - name: CEPH_DEPLOYMENT_NAMESPACE + value: {{ .Release.Namespace }} + - name: EXPECTED_OSDS + value: {{ .Values.conf.pool.target.osd | quote }} + - name: EXPECTED_CRUSHRULE + value: {{ .Values.conf.pool.default.crush_rule | default "replicated_rule" | quote }} + - name: EXPECTED_POOLMINSIZE + value: "2" + - name: MGR_COUNT + value: {{ .Values.pod.replicas.mgr | default "1" | quote }} + - name: SPECS + value: {{ include "helm-toolkit.utils.joinListWithComma" .Values.conf.pool.spec }} + {{- range $pool := .Values.conf.pool.spec -}} + {{- with $pool }} + - name: {{ .name | upper | replace "." "_" }} + value: {{ .replication | quote }} + {{- end }} + {{- end }} + command: + - /tmp/helm-tests.sh + volumeMounts: + - name: ceph-client-bin + mountPath: /tmp/helm-tests.sh + subPath: helm-tests.sh + readOnly: true + - name: ceph-client-admin-keyring + mountPath: /etc/ceph/ceph.client.admin.keyring + subPath: ceph.client.admin.keyring + readOnly: true + - name: ceph-client-etc + mountPath: /etc/ceph/ceph.conf + subPath: ceph.conf + readOnly: true + volumes: + - name: ceph-client-bin + configMap: + name: ceph-client-bin + defaultMode: 0555 + - name: ceph-client-admin-keyring + secret: + secretName: {{ .Values.secrets.keyrings.admin }} + - name: ceph-client-etc + configMap: + name: ceph-client-etc + defaultMode: 0444 +{{- end }} diff --git a/ceph-client/values.yaml b/ceph-client/values.yaml index d80634e76..ea54f9c15 100644 --- a/ceph-client/values.yaml +++ b/ceph-client/values.yaml @@ -94,6 +94,13 @@ pod: limits: memory: "1024Mi" cpu: "2000m" + tests: + requests: + memory: "10Mi" + cpu: "250m" + limits: + memory: "50Mi" + cpu: "500m" secrets: keyrings: @@ -325,6 +332,10 @@ dependencies: services: - endpoint: internal service: local_image_registry + tests: + services: + - endpoint: internal + service: ceph_mon bootstrap: enabled: false @@ -415,6 +426,7 @@ monitoring: manifests: configmap_bin: true + configmap_test_bin: true configmap_etc: true deployment_mds: true deployment_mgr: true @@ -423,3 +435,4 @@ manifests: job_image_repo_sync: true job_rbd_pool: true service_mgr: true + pod_test: true diff --git a/ceph-osd/templates/bin/_helm-tests.sh.tpl b/ceph-osd/templates/bin/_helm-tests.sh.tpl new file mode 100644 index 000000000..a36f88ac1 --- /dev/null +++ b/ceph-osd/templates/bin/_helm-tests.sh.tpl @@ -0,0 +1,40 @@ +#!/bin/bash + +{{/* +Copyright 2017 The Openstack-Helm Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/}} + +# Check OSD status +function check_osd_status() { + echo "--Start: Checking OSD status--" + ceph_osd_stat_output=$(ceph osd stat -f json) + # + # Extract each value needed to check correct deployment of the OSDs + # + num_osds=$(echo $ceph_osd_stat_output | jq '.num_osds') + up_osds=$(echo $ceph_osd_stat_output | jq '.num_up_osds') + in_osds=$(echo $ceph_osd_stat_output | jq '.num_in_osds') + # + # In a correctly deployed cluster the number of UP and IN OSDs must be the same as the total number of OSDs. + # + if [ "x${num_osds}" == "x${up_osds}" ] && [ "x${num_osds}" == "x${in_osds}" ] ; then + echo "Success: Total OSDs=${num_osds} Up=${up_osds} In=${in_osds}" + else + echo "Failure: Total OSDs=${num_osds} Up=${up_osds} In=${in_osds}" + exit 1 + fi +} + +check_osd_status diff --git a/ceph-osd/templates/configmap-bin.yaml b/ceph-osd/templates/configmap-bin.yaml index c2f44e9a5..a9f5ce115 100644 --- a/ceph-osd/templates/configmap-bin.yaml +++ b/ceph-osd/templates/configmap-bin.yaml @@ -40,4 +40,6 @@ data: {{ tuple "bin/osd/_stop.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }} init-dirs.sh: | {{ tuple "bin/_init-dirs.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }} + helm-tests.sh: | +{{ tuple "bin/_helm-tests.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }} {{- end }} diff --git a/ceph-osd/templates/pod-helm-tests.yaml b/ceph-osd/templates/pod-helm-tests.yaml new file mode 100644 index 000000000..7d4272e5b --- /dev/null +++ b/ceph-osd/templates/pod-helm-tests.yaml @@ -0,0 +1,70 @@ +{{/* +Copyright 2017 The Openstack-Helm Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/}} + +{{- if .Values.manifests.helm_tests }} +{{- $envAll := . }} + +{{- $serviceAccountName := "ceph-osd-test" }} +{{ tuple $envAll "tests" $serviceAccountName | include "helm-toolkit.snippets.kubernetes_pod_rbac_serviceaccount" }} +--- +apiVersion: v1 +kind: Pod +metadata: + name: {{ $serviceAccountName }} + labels: +{{ tuple $envAll "ceph-osd" "test" | include "helm-toolkit.snippets.kubernetes_metadata_labels" | indent 4 }} + annotations: + "helm.sh/hook": test-success +spec: + restartPolicy: Never + serviceAccountName: {{ $serviceAccountName }} + initContainers: +{{ tuple $envAll "tests" list | include "helm-toolkit.snippets.kubernetes_entrypoint_init_container" | indent 4 }} + containers: + - name: ceph-cluster-helm-test +{{ tuple $envAll "ceph_config_helper" | include "helm-toolkit.snippets.image" | indent 6 }} +{{ tuple $envAll $envAll.Values.pod.resources.jobs.tests | include "helm-toolkit.snippets.kubernetes_resources" | indent 6 }} + env: + - name: CEPH_DEPLOYMENT_NAMESPACE + value: {{ .Release.Namespace }} + command: + - /tmp/helm-tests.sh + volumeMounts: + - name: ceph-osd-bin + mountPath: /tmp/helm-tests.sh + subPath: helm-tests.sh + readOnly: true + - name: ceph-client-admin-keyring + mountPath: /etc/ceph/ceph.client.admin.keyring + subPath: ceph.client.admin.keyring + readOnly: true + - name: ceph-osd-etc + mountPath: /etc/ceph/ceph.conf + subPath: ceph.conf + readOnly: true + volumes: + - name: ceph-osd-bin + configMap: + name: ceph-osd-bin + defaultMode: 0555 + - name: ceph-client-admin-keyring + secret: + secretName: {{ .Values.secrets.keyrings.admin }} + - name: ceph-osd-etc + configMap: + name: ceph-osd-etc + defaultMode: 0444 +{{- end }} diff --git a/ceph-osd/values.yaml b/ceph-osd/values.yaml index 009659313..3c7e41291 100644 --- a/ceph-osd/values.yaml +++ b/ceph-osd/values.yaml @@ -22,6 +22,7 @@ images: tags: ceph_osd: 'docker.io/ceph/daemon:tag-build-master-luminous-ubuntu-16.04' ceph_bootstrap: 'docker.io/ceph/daemon:tag-build-master-luminous-ubuntu-16.04' + ceph_config_helper: 'docker.io/port/ceph-config-helper:v1.10.3' dep_check: 'quay.io/stackanetes/kubernetes-entrypoint:v0.3.1' image_repo_sync: docker.io/docker:17.07.0 local_registry: @@ -55,6 +56,13 @@ pod: limits: memory: "1024Mi" cpu: "1000m" + tests: + requests: + memory: "10Mi" + cpu: "250m" + limits: + memory: "50Mi" + cpu: "500m" jobs: image_repo_sync: requests: @@ -67,7 +75,7 @@ pod: secrets: keyrings: osd: ceph-bootstrap-osd-keyring - + admin: ceph-client-admin-keyring network: public: 192.168.0.0/16 cluster: 192.168.0.0/16 @@ -180,6 +188,10 @@ dependencies: services: - endpoint: internal service: local_image_registry + tests: + services: + - endpoint: internal + service: ceph_mon bootstrap: enabled: false @@ -222,5 +234,7 @@ endpoints: manifests: configmap_bin: true configmap_etc: true + configmap_test_bin: true daemonset_osd: true job_image_repo_sync: true + helm_tests: true diff --git a/tools/deployment/multinode/030-ceph.sh b/tools/deployment/multinode/030-ceph.sh index 94629829c..ee003710d 100755 --- a/tools/deployment/multinode/030-ceph.sh +++ b/tools/deployment/multinode/030-ceph.sh @@ -97,3 +97,5 @@ for CHART in ceph-mon ceph-osd ceph-client ceph-provisioners; do --no-headers | awk '{ print $1; exit }') kubectl exec -n ceph ${MON_POD} -- ceph -s done +helm test ceph-osd --timeout 900 +helm test ceph-client --timeout 900