[ceph-osd] Add a disruptive OSD restart to the post-apply job
Currently the ceph-osd post-apply job always restarts OSDs without disruption. This requires waiting for a healthy cluster state in betweeen failure domain restarts, which isn't possible in some upgrade scenarios. In those scenarios where disruption is acceptable and a simultaneous restart of all OSDs is required, the disruptive_osd_restart value now provides this option. Change-Id: I64bfc30382e86c22b0f577d85fceef0d5c106d94
This commit is contained in:
parent
95a754a2c4
commit
2fa26b2821
@ -15,6 +15,6 @@ apiVersion: v1
|
|||||||
appVersion: v1.0.0
|
appVersion: v1.0.0
|
||||||
description: OpenStack-Helm Ceph OSD
|
description: OpenStack-Helm Ceph OSD
|
||||||
name: ceph-osd
|
name: ceph-osd
|
||||||
version: 0.1.36
|
version: 0.1.37
|
||||||
home: https://github.com/ceph/ceph
|
home: https://github.com/ceph/ceph
|
||||||
...
|
...
|
||||||
|
@ -188,6 +188,15 @@ echo "Latest revision of the helm chart(s) is : $max_release"
|
|||||||
|
|
||||||
if [[ $max_release -gt 1 ]]; then
|
if [[ $max_release -gt 1 ]]; then
|
||||||
if [[ $require_upgrade -gt 0 ]]; then
|
if [[ $require_upgrade -gt 0 ]]; then
|
||||||
|
if [[ "$DISRUPTIVE_OSD_RESTART" == "true" ]]; then
|
||||||
|
echo "restarting all osds simultaneously"
|
||||||
|
kubectl -n $CEPH_NAMESPACE delete pod -l component=osd
|
||||||
|
sleep 60
|
||||||
|
echo "waiting for pgs to become active and for degraded objects to recover"
|
||||||
|
wait_for_pgs
|
||||||
|
wait_for_degraded_objects
|
||||||
|
ceph -s
|
||||||
|
else
|
||||||
echo "waiting for inactive pgs and degraded objects before upgrade"
|
echo "waiting for inactive pgs and degraded objects before upgrade"
|
||||||
wait_for_pgs
|
wait_for_pgs
|
||||||
wait_for_degraded_and_misplaced_objects
|
wait_for_degraded_and_misplaced_objects
|
||||||
@ -197,6 +206,7 @@ if [[ $max_release -gt 1 ]]; then
|
|||||||
restart_by_rack
|
restart_by_rack
|
||||||
ceph osd "unset" noout
|
ceph osd "unset" noout
|
||||||
fi
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
#lets check all the ceph-osd daemonsets
|
#lets check all the ceph-osd daemonsets
|
||||||
echo "checking DS"
|
echo "checking DS"
|
||||||
|
@ -102,6 +102,8 @@ spec:
|
|||||||
value: {{ .Release.Name }}
|
value: {{ .Release.Name }}
|
||||||
- name: REQUIRED_PERCENT_OF_OSDS
|
- name: REQUIRED_PERCENT_OF_OSDS
|
||||||
value: {{ .Values.conf.ceph.target.required_percent_of_osds | ceil | quote }}
|
value: {{ .Values.conf.ceph.target.required_percent_of_osds | ceil | quote }}
|
||||||
|
- name: DISRUPTIVE_OSD_RESTART
|
||||||
|
value: {{ .Values.conf.storage.disruptive_osd_restart | quote }}
|
||||||
command:
|
command:
|
||||||
- /tmp/post-apply.sh
|
- /tmp/post-apply.sh
|
||||||
volumeMounts:
|
volumeMounts:
|
||||||
|
@ -288,6 +288,11 @@ conf:
|
|||||||
# type: directory
|
# type: directory
|
||||||
# location: /var/lib/openstack-helm/ceph/osd/journal-one
|
# location: /var/lib/openstack-helm/ceph/osd/journal-one
|
||||||
|
|
||||||
|
# The post-apply job will restart OSDs without disruption by default. Set
|
||||||
|
# this value to "true" to restart all OSDs at once. This will accomplish
|
||||||
|
# OSD restarts more quickly with disruption.
|
||||||
|
disruptive_osd_restart: "false"
|
||||||
|
|
||||||
# NOTE(portdirect): for heterogeneous clusters the overrides section can be used to define
|
# NOTE(portdirect): for heterogeneous clusters the overrides section can be used to define
|
||||||
# OSD pods that will be deployed upon specifc nodes.
|
# OSD pods that will be deployed upon specifc nodes.
|
||||||
# overrides:
|
# overrides:
|
||||||
|
@ -37,4 +37,5 @@ ceph-osd:
|
|||||||
- 0.1.34 Remove wait for misplaced objects during OSD restarts
|
- 0.1.34 Remove wait for misplaced objects during OSD restarts
|
||||||
- 0.1.35 Consolidate mon_endpoints discovery
|
- 0.1.35 Consolidate mon_endpoints discovery
|
||||||
- 0.1.36 Add OSD device location pre-check
|
- 0.1.36 Add OSD device location pre-check
|
||||||
|
- 0.1.37 Add a disruptive OSD restart to the post-apply job
|
||||||
...
|
...
|
||||||
|
Loading…
Reference in New Issue
Block a user