From 4557f6fbe8d8138264706ad3f43ac716d3091902 Mon Sep 17 00:00:00 2001 From: "Kabanov, Dmitrii" Date: Thu, 13 Aug 2020 22:45:01 -0700 Subject: [PATCH] [ceph] Update queries to filter pgs correctly The PS updates queries in wait_for_pgs function in ceph-client and ceph-osd charts. It allows more accurately check the status of PGs. The output of the "ceph pg ls" command may contain many PG statuses, like "active+clean", "active+undersized+degraded", "active+recovering", "peering" and etc. But along with these statuses there may be such as "stale+active+clean". To avoid the wrong interpretation of the status of the PSs the filter was changed from "startswith(active+)" to "contains(active)". Also PS adds a delay after restart of the pods to post-apply job. It allows to reduce the number of useless queries to kubernetes. Change-Id: I0eff2ce036ad543bf2554bd586c2a2d3e91c052b --- ceph-client/templates/bin/pool/_init.sh.tpl | 2 +- ceph-osd/templates/bin/_post-apply.sh.tpl | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/ceph-client/templates/bin/pool/_init.sh.tpl b/ceph-client/templates/bin/pool/_init.sh.tpl index aed81bf72..fd7b82d53 100644 --- a/ceph-client/templates/bin/pool/_init.sh.tpl +++ b/ceph-client/templates/bin/pool/_init.sh.tpl @@ -33,7 +33,7 @@ function wait_for_pgs () { echo "#### Start: Checking pgs ####" pgs_ready=0 - query='map({state: .state}) | group_by(.state) | map({state: .[0].state, count: length}) | .[] | select(.state | startswith("active+") | not)' + query='map({state: .state}) | group_by(.state) | map({state: .[0].state, count: length}) | .[] | select(.state | contains("active") | not)' if [[ $(ceph tell mon.* version | egrep -q "nautilus"; echo $?) -eq 0 ]]; then query=".pg_stats | ${query}" diff --git a/ceph-osd/templates/bin/_post-apply.sh.tpl b/ceph-osd/templates/bin/_post-apply.sh.tpl index 03a21f18a..f4cf44f7b 100644 --- a/ceph-osd/templates/bin/_post-apply.sh.tpl +++ b/ceph-osd/templates/bin/_post-apply.sh.tpl @@ -83,7 +83,7 @@ function wait_for_pgs () { echo "#### Start: Checking pgs ####" pgs_ready=0 - query='map({state: .state}) | group_by(.state) | map({state: .[0].state, count: length}) | .[] | select(.state | startswith("active+") | not)' + query='map({state: .state}) | group_by(.state) | map({state: .[0].state, count: length}) | .[] | select(.state | contains("active") | not)' if [[ $(ceph tell mon.* version | egrep -q "nautilus"; echo $?) -eq 0 ]]; then query=".pg_stats | ${query}" @@ -136,6 +136,9 @@ function restart_by_rack() { fi done echo "waiting for the pods under rack $rack from restart" + # The pods will not be ready in first 60 seconds. Thus we can reduce + # amount of queries to kubernetes. + sleep 60 wait_for_pods $CEPH_NAMESPACE echo "waiting for inactive pgs after osds restarted from rack $rack" wait_for_pgs