From badfff4d19c7c1786e75e92c2a511b1e966e4d04 Mon Sep 17 00:00:00 2001 From: "Kabanov, Dmitrii" Date: Wed, 12 Aug 2020 17:22:07 -0700 Subject: [PATCH] [ceph-osd] enhancement to the "post-apply" job The PS adds changes which allow to count available OSDs and compare an amount of "ready" OSDs with total quantity of OSDs. Also it allows to pass the check if the amount of "ready" OSD is more then required ("required_percent_of_osds"). Otherwise, the check will fail (including the case when one or several pods in the namespace are not ready after timeout.) Change-Id: I3cf6dbc6393b62423ee5929167f03b8fc7bbac68 --- ceph-osd/templates/bin/_post-apply.sh.tpl | 16 ++++++++++++++++ ceph-osd/templates/job-post-apply.yaml | 2 ++ 2 files changed, 18 insertions(+) diff --git a/ceph-osd/templates/bin/_post-apply.sh.tpl b/ceph-osd/templates/bin/_post-apply.sh.tpl index f4cf44f7b..fb798cc71 100644 --- a/ceph-osd/templates/bin/_post-apply.sh.tpl +++ b/ceph-osd/templates/bin/_post-apply.sh.tpl @@ -40,6 +40,13 @@ function wait_for_pods() { phase: .status.phase}" select="select((.status) or (.phase==\"Succeeded\") | not)" query=".items | map( ${fields} | ${select}) | .[]" + # Selecting containers with "ceph-osd-default" name and + # counting them based on "ready" field. + count_pods=".items | map(.status.containerStatuses | .[] | \ + select(.name==\"ceph-osd-default\")) | \ + group_by(.ready) | map({(.[0].ready | tostring): length}) | .[]" + min_osds="add | if .true >= (.false + .true)*${REQUIRED_PERCENT_OF_OSDS}/100 \ + then \"pass\" else \"fail\" end" while true; do unhealthy_pods=$(kubectl get pods --namespace="${1}" -o json | jq -c "${query}") if [[ -z "${unhealthy_pods}" ]]; then @@ -50,6 +57,15 @@ function wait_for_pods() { if [ $(date -u +%s) -gt $end ] ; then echo -e "Containers failed to start after $timeout seconds\n" kubectl get pods --namespace "${1}" -o wide + # Leaving while loop if minimum amount of OSDs are ready. + # It allows to proceed even if some OSDs are not ready + # or in "CrashLoopBackOff" state + state=$(kubectl get pods --namespace="${1}" -l component=osd -o json | jq "${count_pods}") + osd_state=$(jq -s "${min_osds}" <<< "${state}") + non_osd_state=$(kubectl get pods --namespace="${1}" -l component!=osd -o json | jq -c "${query}") + if [[ -z "${non_osd_state}" && "${osd_state}" == "pass" ]]; then + break + fi exit 1 fi done diff --git a/ceph-osd/templates/job-post-apply.yaml b/ceph-osd/templates/job-post-apply.yaml index 4134dee05..924354a46 100644 --- a/ceph-osd/templates/job-post-apply.yaml +++ b/ceph-osd/templates/job-post-apply.yaml @@ -98,6 +98,8 @@ spec: value: {{ .Release.Namespace }} - name: RELEASE_GROUP_NAME value: {{ .Release.Name }} + - name: REQUIRED_PERCENT_OF_OSDS + value: {{ .Values.conf.ceph.target.required_percent_of_osds | ceil | quote }} command: - /tmp/post-apply.sh volumeMounts: