From 4557f6fbe8d8138264706ad3f43ac716d3091902 Mon Sep 17 00:00:00 2001
From: "Kabanov, Dmitrii" <dmitrii.kabanov@att.com>
Date: Thu, 13 Aug 2020 22:45:01 -0700
Subject: [PATCH] [ceph] Update queries to filter pgs correctly

The PS updates queries in wait_for_pgs function in ceph-client and
ceph-osd charts. It allows more accurately check the status of PGs.
The output of the "ceph pg ls" command may contain many PG statuses,
like "active+clean", "active+undersized+degraded", "active+recovering",
"peering" and etc. But along with these statuses there may be such as
"stale+active+clean". To avoid the wrong interpretation of the status
of the PSs the filter was changed from "startswith(active+)" to
"contains(active)".
Also PS adds a delay after restart of the pods to post-apply job.
It allows to reduce the number of useless queries to kubernetes.

Change-Id: I0eff2ce036ad543bf2554bd586c2a2d3e91c052b
---
 ceph-client/templates/bin/pool/_init.sh.tpl | 2 +-
 ceph-osd/templates/bin/_post-apply.sh.tpl   | 5 ++++-
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/ceph-client/templates/bin/pool/_init.sh.tpl b/ceph-client/templates/bin/pool/_init.sh.tpl
index aed81bf72..fd7b82d53 100644
--- a/ceph-client/templates/bin/pool/_init.sh.tpl
+++ b/ceph-client/templates/bin/pool/_init.sh.tpl
@@ -33,7 +33,7 @@ function wait_for_pgs () {
   echo "#### Start: Checking pgs ####"
 
   pgs_ready=0
-  query='map({state: .state}) | group_by(.state) | map({state: .[0].state, count: length}) | .[] | select(.state | startswith("active+") | not)'
+  query='map({state: .state}) | group_by(.state) | map({state: .[0].state, count: length}) | .[] | select(.state | contains("active") | not)'
 
   if [[ $(ceph tell mon.* version | egrep -q "nautilus"; echo $?) -eq 0 ]]; then
     query=".pg_stats | ${query}"
diff --git a/ceph-osd/templates/bin/_post-apply.sh.tpl b/ceph-osd/templates/bin/_post-apply.sh.tpl
index 03a21f18a..f4cf44f7b 100644
--- a/ceph-osd/templates/bin/_post-apply.sh.tpl
+++ b/ceph-osd/templates/bin/_post-apply.sh.tpl
@@ -83,7 +83,7 @@ function wait_for_pgs () {
   echo "#### Start: Checking pgs ####"
 
   pgs_ready=0
-  query='map({state: .state}) | group_by(.state) | map({state: .[0].state, count: length}) | .[] | select(.state | startswith("active+") | not)'
+  query='map({state: .state}) | group_by(.state) | map({state: .[0].state, count: length}) | .[] | select(.state | contains("active") | not)'
 
   if [[ $(ceph tell mon.* version | egrep -q "nautilus"; echo $?) -eq 0 ]]; then
     query=".pg_stats | ${query}"
@@ -136,6 +136,9 @@ function restart_by_rack() {
        fi
      done
      echo "waiting for the pods under rack $rack from restart"
+     # The pods will not be ready in first 60 seconds. Thus we can reduce
+     # amount of queries to kubernetes.
+     sleep 60
      wait_for_pods $CEPH_NAMESPACE
      echo "waiting for inactive pgs after osds restarted from rack $rack"
      wait_for_pgs