[ceph-osd] Add a check for misplaced objects to the post-apply job
OSD failures during an update can cause degraded and misplaced objects. The post-apply job restarts OSDs in failure domain batches in order to accomplish the restarts efficiently. There is already a wait for degraded objects to ensure that OSDs are not restarted on degraded PGs, but misplaced objects could mean that multiple object replicas exist in the same failure domain, so the job should wait for those to recover as well before restarting OSDs in order to avoid potential disruption under these failure conditions. Change-Id: I39606e388a9a1d3a4e9c547de56aac4fc5606ea2
This commit is contained in:
parent
3205c8b778
commit
e37d1fc2ab
@ -15,6 +15,6 @@ apiVersion: v1
|
||||
appVersion: v1.0.0
|
||||
description: OpenStack-Helm Ceph OSD
|
||||
name: ceph-osd
|
||||
version: 0.1.11
|
||||
version: 0.1.12
|
||||
home: https://github.com/ceph/ceph
|
||||
...
|
||||
|
@ -115,11 +115,11 @@ function wait_for_pgs () {
|
||||
done
|
||||
}
|
||||
|
||||
function wait_for_degraded_objects () {
|
||||
echo "#### Start: Checking for degraded objects ####"
|
||||
function wait_for_degraded_and_misplaced_objects () {
|
||||
echo "#### Start: Checking for degraded and misplaced objects ####"
|
||||
|
||||
# Loop until no degraded objects
|
||||
while [[ ! -z "`ceph --cluster ${CLUSTER} -s | grep degraded`" ]]
|
||||
while [[ ! -z "`ceph --cluster ${CLUSTER} -s | grep 'degraded\|misplaced'`" ]]
|
||||
do
|
||||
sleep 3
|
||||
ceph -s
|
||||
@ -150,7 +150,7 @@ function restart_by_rack() {
|
||||
sleep 60
|
||||
# Degraded objects won't recover with noout set unless pods come back and
|
||||
# PGs become healthy, so simply wait for 0 degraded objects
|
||||
wait_for_degraded_objects
|
||||
wait_for_degraded_and_misplaced_objects
|
||||
ceph -s
|
||||
done
|
||||
}
|
||||
@ -179,7 +179,7 @@ if [[ $max_release -gt 1 ]]; then
|
||||
if [[ $require_upgrade -gt 0 ]]; then
|
||||
echo "waiting for inactive pgs and degraded obejcts before upgrade"
|
||||
wait_for_pgs
|
||||
wait_for_degraded_objects
|
||||
wait_for_degraded_and_misplaced_objects
|
||||
ceph -s
|
||||
ceph osd "set" noout
|
||||
echo "lets restart the osds rack by rack"
|
||||
|
Loading…
Reference in New Issue
Block a user