Fix AIO-DX Uncontrolled Swact ceph-mon failure
This change is the solution to resolve the scenario where after an uncontrolled swact due to killing one of the critical processes twice, the ceph-mon service doesn't start in the new active controller occasioning a new swact. It was created a flag to signalize a complete shutdown of ceph-mon. After an uncontrolled swact, the system verifies if the flag exists, and if so starts the ceph-mon service in the new active controller. Test Plan: PASS: System host-swact. PASS: Ceph recovery after rebooting the active controller. PASS: Ceph recovery after uncontrolled swact killing a critical process twice. PASS: Ceph recovery after mgmt network outage for a few minutes even when rebooting controllers. PASS: Ceph recovery after case of dead office recovery (DOR). PASS: Upgrade success from stx 7.0 to 8.0 in a duplex lab. Closes-bug: 2017133 Signed-off-by: Pedro Vinícius Silva da Cruz <pedro.silvadacruz@windriver.com> Change-Id: I6784ec76afa3e62ee14e8ca8f3d6c0212a9f6f3e
This commit is contained in:
parent
52c051e134
commit
09e29800cb
@ -49,6 +49,8 @@ if [ "${system_type}" == "All-in-one" ] && [ "${system_mode}" == "duplex" ]; the
|
|||||||
CEPH_LAST_ACTIVE_CONTROLLER_0_FLAG="${CEPH_MON_LIB_PATH}/.last_ceph_mon_active_controller_0"
|
CEPH_LAST_ACTIVE_CONTROLLER_0_FLAG="${CEPH_MON_LIB_PATH}/.last_ceph_mon_active_controller_0"
|
||||||
CEPH_LAST_ACTIVE_CONTROLLER_1_FLAG="${CEPH_MON_LIB_PATH}/.last_ceph_mon_active_controller_1"
|
CEPH_LAST_ACTIVE_CONTROLLER_1_FLAG="${CEPH_MON_LIB_PATH}/.last_ceph_mon_active_controller_1"
|
||||||
CEPH_LAST_ACTIVE_CONTROLLER_FLAG="${CEPH_MON_LIB_PATH}/.last_ceph_mon_active_${HOSTNAME/-/_}"
|
CEPH_LAST_ACTIVE_CONTROLLER_FLAG="${CEPH_MON_LIB_PATH}/.last_ceph_mon_active_${HOSTNAME/-/_}"
|
||||||
|
|
||||||
|
CEPH_MON_SHUTDOWN_COMPLETE="${CEPH_MON_LIB_PATH}/.ceph_mon_shutdown_complete"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
BINDIR=/usr/bin
|
BINDIR=/usr/bin
|
||||||
@ -181,7 +183,13 @@ can_start_ceph_mon ()
|
|||||||
else
|
else
|
||||||
local CEPH_OTHER_ACTIVE_CONTROLLER_FLAG="${CEPH_LAST_ACTIVE_CONTROLLER_0_FLAG}"
|
local CEPH_OTHER_ACTIVE_CONTROLLER_FLAG="${CEPH_LAST_ACTIVE_CONTROLLER_0_FLAG}"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [ -f "${CEPH_OTHER_ACTIVE_CONTROLLER_FLAG}" ]; then
|
if [ -f "${CEPH_OTHER_ACTIVE_CONTROLLER_FLAG}" ]; then
|
||||||
|
|
||||||
|
if [ -f "${CEPH_MON_SHUTDOWN_COMPLETE}" ]; then
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
|
||||||
# Verify drbd-cephmon status
|
# Verify drbd-cephmon status
|
||||||
for times in {9..0}; do
|
for times in {9..0}; do
|
||||||
is_drbd_cephmon_in_sync
|
is_drbd_cephmon_in_sync
|
||||||
@ -284,6 +292,8 @@ start ()
|
|||||||
# Remove old flags
|
# Remove old flags
|
||||||
rm -f "${CEPH_LAST_ACTIVE_CONTROLLER_0_FLAG}"
|
rm -f "${CEPH_LAST_ACTIVE_CONTROLLER_0_FLAG}"
|
||||||
rm -f "${CEPH_LAST_ACTIVE_CONTROLLER_1_FLAG}"
|
rm -f "${CEPH_LAST_ACTIVE_CONTROLLER_1_FLAG}"
|
||||||
|
rm -f "${CEPH_MON_SHUTDOWN_COMPLETE}"
|
||||||
|
|
||||||
# Create new flag
|
# Create new flag
|
||||||
touch "${CEPH_LAST_ACTIVE_CONTROLLER_FLAG}"
|
touch "${CEPH_LAST_ACTIVE_CONTROLLER_FLAG}"
|
||||||
fi
|
fi
|
||||||
@ -292,9 +302,15 @@ start ()
|
|||||||
|
|
||||||
stop ()
|
stop ()
|
||||||
{
|
{
|
||||||
|
local service="$1"
|
||||||
|
|
||||||
wlog "-" INFO "Ceph STOP $1 command received."
|
wlog "-" INFO "Ceph STOP $1 command received."
|
||||||
with_service_lock "$1" ${CEPH_SCRIPT} stop $1
|
with_service_lock "$1" ${CEPH_SCRIPT} stop $1
|
||||||
wlog "-" INFO "Ceph STOP $1 command finished."
|
wlog "-" INFO "Ceph STOP $1 command finished."
|
||||||
|
|
||||||
|
if [ "${service}" == "mon" ] && [ "${system_type}" == "All-in-one" ] && [ "${system_mode}" == "duplex" ]; then
|
||||||
|
touch "${CEPH_MON_SHUTDOWN_COMPLETE}"
|
||||||
|
fi
|
||||||
}
|
}
|
||||||
|
|
||||||
restart ()
|
restart ()
|
||||||
@ -304,7 +320,8 @@ restart ()
|
|||||||
exit 0
|
exit 0
|
||||||
fi
|
fi
|
||||||
wlog "-" INFO "Ceph RESTART $1 command received."
|
wlog "-" INFO "Ceph RESTART $1 command received."
|
||||||
with_service_lock "$1" ${CEPH_SCRIPT} restart $1
|
stop "$1"
|
||||||
|
start "$1"
|
||||||
wlog "-" INFO "Ceph RESTART $1 command finished."
|
wlog "-" INFO "Ceph RESTART $1 command finished."
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user