From 84f1557566da2d6a28164bceddd37fef6b1d6c03 Mon Sep 17 00:00:00 2001
From: Stephen Taylor <st053q@att.com>
Date: Tue, 28 Jul 2020 15:56:28 +0000
Subject: [PATCH] [ceph-client] Fix a helm test issue and disable PG autoscaler

Currently the Ceph helm tests pass when the deployed Ceph cluster
is unhealthy. This change expands the cluster status testing
logic to pass when all PGs are active and fail if any PG is
inactive.

The PG autoscaler is currently causing the deployment to deploy
unhealthy Ceph clusters. This change also disables it. It should
be re-enabled once those issues are resolved.

Change-Id: Iea1ff5006fc00e4570cf67c6af5ef6746a538058
---
 ceph-client/templates/bin/_helm-tests.sh.tpl | 33 +++++++++++++++++++-
 ceph-client/templates/bin/pool/_init.sh.tpl  | 18 +++++++++--
 ceph-client/values.yaml                      |  2 +-
 3 files changed, 49 insertions(+), 4 deletions(-)

diff --git a/ceph-client/templates/bin/_helm-tests.sh.tpl b/ceph-client/templates/bin/_helm-tests.sh.tpl
index eaf89ab13..64b4e4cf0 100755
--- a/ceph-client/templates/bin/_helm-tests.sh.tpl
+++ b/ceph-client/templates/bin/_helm-tests.sh.tpl
@@ -24,7 +24,37 @@ function check_cluster_status() {
   if [ "x${ceph_health_status}" == "xHEALTH_OK" ]; then
     echo "Ceph status is HEALTH_OK"
   else
-    echo "Ceph cluster status is NOT HEALTH_OK."
+    echo "Ceph cluster status is not HEALTH_OK, checking PG states"
+    retries=0
+    # If all PGs are active, pass
+    # This grep is just as robust as jq and is Ceph-version agnostic unlike jq
+    while [[ $(ceph pg ls -f json-pretty | grep '"state":' | grep -v "active") ]] && [[ retries -lt 60 ]]; do
+      # If all inactive PGs are peering, wait for peering to complete
+      # Run 'ceph pg ls' again before failing in case PG states have changed
+      if [[ $(ceph pg ls -f json-pretty | grep '"state":' | grep -v -e "active" -e "peering") ]]; then
+        # If inactive PGs aren't peering, fail
+        echo "Failure, found inactive PGs that aren't peering"
+        exit 1
+      fi
+      sleep 3
+      ((retries=retries+1))
+    done
+    # If peering PGs haven't gone active after retries have expired, fail
+    if [[ retries -ge 60 ]]; then
+      echo "PGs appear to be stuck peering"
+      exit 1
+    fi
+  fi
+}
+
+function check_recovery_flags() {
+  echo "### Start: Checking for flags that will prevent recovery"
+
+  # Ensure there are no flags set that will prevent recovery of degraded PGs
+  if [[ $(ceph osd stat | grep "norecover\|nobackfill\|norebalance") ]]; then
+    ceph osd stat
+    echo "Flags are set that prevent recovery of degraded PGs"
+    exit 1
   fi
 }
 
@@ -257,3 +287,4 @@ pool_validation
 pool_failuredomain_validation
 check_failure_domain_count_per_pool
 check_cluster_status
+check_recovery_flags
diff --git a/ceph-client/templates/bin/pool/_init.sh.tpl b/ceph-client/templates/bin/pool/_init.sh.tpl
index 3f906eec8..8d81d6602 100644
--- a/ceph-client/templates/bin/pool/_init.sh.tpl
+++ b/ceph-client/templates/bin/pool/_init.sh.tpl
@@ -46,6 +46,17 @@ function wait_for_inactive_pgs () {
   fi
 }
 
+function check_recovery_flags () {
+  echo "### Start: Checking for flags that will prevent recovery"
+
+  # Ensure there are no flags set that will prevent recovery of degraded PGs
+  if [[ $(ceph osd stat | grep "norecover\|nobackfill\|norebalance") ]]; then
+    ceph osd stat
+    echo "Flags are set that prevent recovery of degraded PGs"
+    exit 1
+  fi
+}
+
 function check_osd_count() {
   echo "#### Start: Checking OSD count ####"
   noup_flag=$(ceph osd stat | awk '/noup/ {print $2}')
@@ -119,10 +130,12 @@ function reweight_osds () {
   done
 }
 
-function enable_autoscaling () {
+function enable_or_disable_autoscaling () {
   if [[ "${ENABLE_AUTOSCALER}" == "true" ]]; then
     ceph mgr module enable pg_autoscaler
     ceph config set global osd_pool_default_pg_autoscale_mode on
+  else
+    ceph mgr module disable pg_autoscaler
   fi
 }
 
@@ -232,7 +245,7 @@ reweight_osds
 cluster_capacity=0
 if [[ -z "$(ceph osd versions | grep ceph\ version | grep -v nautilus)" ]]; then
   cluster_capacity=$(ceph --cluster "${CLUSTER}" df | grep "TOTAL" | awk '{print $2 substr($3, 1, 1)}' | numfmt --from=iec)
-  enable_autoscaling
+  enable_or_disable_autoscaling
 else
   cluster_capacity=$(ceph --cluster "${CLUSTER}" df | head -n3 | tail -n1 | awk '{print $1 substr($2, 1, 1)}' | numfmt --from=iec)
 fi
@@ -253,3 +266,4 @@ ceph --cluster "${CLUSTER}" osd crush tunables {{ .Values.conf.pool.crush.tunabl
 {{- end }}
 
 wait_for_inactive_pgs
+check_recovery_flags
diff --git a/ceph-client/values.yaml b/ceph-client/values.yaml
index a94df4d8f..f8ab98b24 100644
--- a/ceph-client/values.yaml
+++ b/ceph-client/values.yaml
@@ -254,7 +254,7 @@ conf:
   features:
     mds: true
     mgr: true
-    pg_autoscaler: true
+    pg_autoscaler: false
     cluster_flags:
       # List of flags to set or unset separated by spaces
       set: ""