Merge "[ceph-osd] Allow for unconditional OSD restart"

This commit is contained in:
Zuul 2022-04-06 17:15:14 +00:00 committed by Gerrit Code Review
commit 3e74872360
5 changed files with 39 additions and 25 deletions

View File

@ -15,6 +15,6 @@ apiVersion: v1
appVersion: v1.0.0
description: OpenStack-Helm Ceph OSD
name: ceph-osd
version: 0.1.38
version: 0.1.39
home: https://github.com/ceph/ceph
...

View File

@ -188,31 +188,37 @@ done
echo "Latest revision of the helm chart(s) is : $max_release"
if [[ $max_release -gt 1 ]]; then
if [[ $require_upgrade -gt 0 ]]; then
if [[ "$DISRUPTIVE_OSD_RESTART" == "true" ]]; then
echo "restarting all osds simultaneously"
kubectl -n $CEPH_NAMESPACE delete pod -l component=osd
sleep 60
echo "waiting for pgs to become active and for degraded objects to recover"
wait_for_pgs
wait_for_degraded_objects
ceph -s
else
echo "waiting for inactive pgs and degraded objects before upgrade"
wait_for_pgs
wait_for_degraded_and_misplaced_objects
ceph -s
ceph osd "set" noout
echo "lets restart the osds rack by rack"
restart_by_rack
ceph osd "unset" noout
# If flags are set that will prevent recovery, don't restart OSDs
ceph -s | grep "noup\|noin\|nobackfill\|norebalance\|norecover" > /dev/null
if [[ $? -ne 0 ]]; then
if [[ "$UNCONDITIONAL_OSD_RESTART" == "true" ]] || [[ $max_release -gt 1 ]]; then
if [[ "$UNCONDITIONAL_OSD_RESTART" == "true" ]] || [[ $require_upgrade -gt 0 ]]; then
if [[ "$DISRUPTIVE_OSD_RESTART" == "true" ]]; then
echo "restarting all osds simultaneously"
kubectl -n $CEPH_NAMESPACE delete pod -l component=osd
sleep 60
echo "waiting for pgs to become active and for degraded objects to recover"
wait_for_pgs
wait_for_degraded_objects
ceph -s
else
echo "waiting for inactive pgs and degraded objects before upgrade"
wait_for_pgs
wait_for_degraded_and_misplaced_objects
ceph -s
ceph osd "set" noout
echo "lets restart the osds rack by rack"
restart_by_rack
ceph osd "unset" noout
fi
fi
fi
#lets check all the ceph-osd daemonsets
echo "checking DS"
check_ds
#lets check all the ceph-osd daemonsets
echo "checking DS"
check_ds
else
echo "No revisions found for upgrade"
fi
else
echo "No revisions found for upgrade"
echo "Skipping OSD restarts because flags are set that would prevent recovery"
fi

View File

@ -104,6 +104,8 @@ spec:
value: {{ .Values.conf.ceph.target.required_percent_of_osds | ceil | quote }}
- name: DISRUPTIVE_OSD_RESTART
value: {{ .Values.conf.storage.disruptive_osd_restart | quote }}
- name: UNCONDITIONAL_OSD_RESTART
value: {{ .Values.conf.storage.unconditional_osd_restart | quote }}
command:
- /tmp/post-apply.sh
volumeMounts:

View File

@ -293,6 +293,11 @@ conf:
# OSD restarts more quickly with disruption.
disruptive_osd_restart: "false"
# The post-apply job will try to determine if OSDs need to be restarted and
# only restart them if necessary. Set this value to "true" to restart OSDs
# unconditionally.
unconditional_osd_restart: "false"
# NOTE(portdirect): for heterogeneous clusters the overrides section can be used to define
# OSD pods that will be deployed upon specifc nodes.
# overrides:

View File

@ -39,4 +39,5 @@ ceph-osd:
- 0.1.36 Add OSD device location pre-check
- 0.1.37 Add a disruptive OSD restart to the post-apply job
- 0.1.38 Skip pod wait in post-apply job when disruptive
- 0.1.39 Allow for unconditional OSD restart
...