From 76fb2562c60e829109d176800f021d64de5c5529 Mon Sep 17 00:00:00 2001
From: Stephen Taylor <stephen.taylor.1@att.com>
Date: Mon, 4 Apr 2022 13:35:49 -0600
Subject: [PATCH] [ceph-osd] Allow for unconditional OSD restart

This change allows OSDs to be restarted unconditionally by the
ceph-osd chart. This can be useful in upgrade scenarios where
ceph-osd pods are unhealthy during the upgrade.

Change-Id: I6de98db2b4eb1d76411e1dbffa65c263de3aecee
---
 ceph-osd/Chart.yaml                       |  2 +-
 ceph-osd/templates/bin/_post-apply.sh.tpl | 54 +++++++++++++----------
 ceph-osd/templates/job-post-apply.yaml    |  2 +
 ceph-osd/values.yaml                      |  5 +++
 releasenotes/notes/ceph-osd.yaml          |  1 +
 5 files changed, 39 insertions(+), 25 deletions(-)

diff --git a/ceph-osd/Chart.yaml b/ceph-osd/Chart.yaml
index e50427f68..be0c75bc7 100644
--- a/ceph-osd/Chart.yaml
+++ b/ceph-osd/Chart.yaml
@@ -15,6 +15,6 @@ apiVersion: v1
 appVersion: v1.0.0
 description: OpenStack-Helm Ceph OSD
 name: ceph-osd
-version: 0.1.38
+version: 0.1.39
 home: https://github.com/ceph/ceph
 ...
diff --git a/ceph-osd/templates/bin/_post-apply.sh.tpl b/ceph-osd/templates/bin/_post-apply.sh.tpl
index c8a50202d..74229676c 100644
--- a/ceph-osd/templates/bin/_post-apply.sh.tpl
+++ b/ceph-osd/templates/bin/_post-apply.sh.tpl
@@ -188,31 +188,37 @@ done
 
 echo "Latest revision of the helm chart(s) is : $max_release"
 
-if [[ $max_release -gt 1  ]]; then
-  if [[  $require_upgrade -gt 0 ]]; then
-    if [[ "$DISRUPTIVE_OSD_RESTART" == "true" ]]; then
-      echo "restarting all osds simultaneously"
-      kubectl -n $CEPH_NAMESPACE delete pod -l component=osd
-      sleep 60
-      echo "waiting for pgs to become active and for degraded objects to recover"
-      wait_for_pgs
-      wait_for_degraded_objects
-      ceph -s
-    else
-      echo "waiting for inactive pgs and degraded objects before upgrade"
-      wait_for_pgs
-      wait_for_degraded_and_misplaced_objects
-      ceph -s
-      ceph osd "set" noout
-      echo "lets restart the osds rack by rack"
-      restart_by_rack
-      ceph osd "unset" noout
+# If flags are set that will prevent recovery, don't restart OSDs
+ceph -s | grep "noup\|noin\|nobackfill\|norebalance\|norecover" > /dev/null
+if [[ $? -ne 0 ]]; then
+  if [[ "$UNCONDITIONAL_OSD_RESTART" == "true" ]] || [[ $max_release -gt 1  ]]; then
+    if [[ "$UNCONDITIONAL_OSD_RESTART" == "true" ]] || [[  $require_upgrade -gt 0 ]]; then
+      if [[ "$DISRUPTIVE_OSD_RESTART" == "true" ]]; then
+        echo "restarting all osds simultaneously"
+        kubectl -n $CEPH_NAMESPACE delete pod -l component=osd
+        sleep 60
+        echo "waiting for pgs to become active and for degraded objects to recover"
+        wait_for_pgs
+        wait_for_degraded_objects
+        ceph -s
+      else
+        echo "waiting for inactive pgs and degraded objects before upgrade"
+        wait_for_pgs
+        wait_for_degraded_and_misplaced_objects
+        ceph -s
+        ceph osd "set" noout
+        echo "lets restart the osds rack by rack"
+        restart_by_rack
+        ceph osd "unset" noout
+      fi
     fi
-  fi
 
-  #lets check all the ceph-osd daemonsets
-  echo "checking DS"
-  check_ds
+    #lets check all the ceph-osd daemonsets
+    echo "checking DS"
+    check_ds
+  else
+    echo "No revisions found for upgrade"
+  fi
 else
-  echo "No revisions found for upgrade"
+  echo "Skipping OSD restarts because flags are set that would prevent recovery"
 fi
diff --git a/ceph-osd/templates/job-post-apply.yaml b/ceph-osd/templates/job-post-apply.yaml
index 6e9a34707..393769d95 100644
--- a/ceph-osd/templates/job-post-apply.yaml
+++ b/ceph-osd/templates/job-post-apply.yaml
@@ -104,6 +104,8 @@ spec:
               value: {{ .Values.conf.ceph.target.required_percent_of_osds | ceil | quote }}
             - name: DISRUPTIVE_OSD_RESTART
               value: {{ .Values.conf.storage.disruptive_osd_restart | quote }}
+            - name: UNCONDITIONAL_OSD_RESTART
+              value: {{ .Values.conf.storage.unconditional_osd_restart | quote }}
           command:
             - /tmp/post-apply.sh
           volumeMounts:
diff --git a/ceph-osd/values.yaml b/ceph-osd/values.yaml
index 09c41e985..ad87e2a15 100644
--- a/ceph-osd/values.yaml
+++ b/ceph-osd/values.yaml
@@ -293,6 +293,11 @@ conf:
     # OSD restarts more quickly with disruption.
     disruptive_osd_restart: "false"
 
+    # The post-apply job will try to determine if OSDs need to be restarted and
+    # only restart them if necessary. Set this value to "true" to restart OSDs
+    # unconditionally.
+    unconditional_osd_restart: "false"
+
 # NOTE(portdirect): for heterogeneous clusters the overrides section can be used to define
 # OSD pods that will be deployed upon specifc nodes.
 # overrides:
diff --git a/releasenotes/notes/ceph-osd.yaml b/releasenotes/notes/ceph-osd.yaml
index dd319eafb..a66f6e597 100644
--- a/releasenotes/notes/ceph-osd.yaml
+++ b/releasenotes/notes/ceph-osd.yaml
@@ -39,4 +39,5 @@ ceph-osd:
   - 0.1.36 Add OSD device location pre-check
   - 0.1.37 Add a disruptive OSD restart to the post-apply job
   - 0.1.38 Skip pod wait in post-apply job when disruptive
+  - 0.1.39 Allow for unconditional OSD restart
 ...