[ceph] Update queries to filter pgs correctly

The PS updates queries in wait_for_pgs function in ceph-client and ceph-osd charts. It allows more accurately check the status of PGs. The output of the "ceph pg ls" command may contain many PG statuses, like "active+clean", "active+undersized+degraded", "active+recovering", "peering" and etc. But along with these statuses there may be such as "stale+active+clean". To avoid the wrong interpretation of the status of the PSs the filter was changed from "startswith(active+)" to "contains(active)". Also PS adds a delay after restart of the pods to post-apply job. It allows to reduce the number of useless queries to kubernetes. Change-Id: I0eff2ce036ad543bf2554bd586c2a2d3e91c052b
2020-08-13 22:45:01 -07:00 · 2020-08-13 22:45:01 -07:00 · 4557f6fbe8
commit 4557f6fbe8
parent adf9fc7fc4
2 changed files with 5 additions and 2 deletions
--- a/ceph-client/templates/bin/pool/_init.sh.tpl
+++ b/ceph-client/templates/bin/pool/_init.sh.tpl
@ -33,7 +33,7 @@ function wait_for_pgs () {
  echo "#### Start: Checking pgs ####"

  pgs_ready=0
-  query='map({state: .state}) | group_by(.state) | map({state: .[0].state, count: length}) | .[] | select(.state | startswith("active+") | not)'
+  query='map({state: .state}) | group_by(.state) | map({state: .[0].state, count: length}) | .[] | select(.state | contains("active") | not)'

  if [[ $(ceph tell mon.* version | egrep -q "nautilus"; echo $?) -eq 0 ]]; then
    query=".pg_stats | ${query}"
--- a/ceph-osd/templates/bin/_post-apply.sh.tpl
+++ b/ceph-osd/templates/bin/_post-apply.sh.tpl
@ -83,7 +83,7 @@ function wait_for_pgs () {
  echo "#### Start: Checking pgs ####"

  pgs_ready=0
-  query='map({state: .state}) | group_by(.state) | map({state: .[0].state, count: length}) | .[] | select(.state | startswith("active+") | not)'
+  query='map({state: .state}) | group_by(.state) | map({state: .[0].state, count: length}) | .[] | select(.state | contains("active") | not)'

  if [[ $(ceph tell mon.* version | egrep -q "nautilus"; echo $?) -eq 0 ]]; then
    query=".pg_stats | ${query}"
@ -136,6 +136,9 @@ function restart_by_rack() {
       fi
     done
     echo "waiting for the pods under rack $rack from restart"
+     # The pods will not be ready in first 60 seconds. Thus we can reduce
+     # amount of queries to kubernetes.
+     sleep 60
     wait_for_pods $CEPH_NAMESPACE
     echo "waiting for inactive pgs after osds restarted from rack $rack"
     wait_for_pgs