From 089ab7c1a8490384de4c5e6e7aa313763840dfbf Mon Sep 17 00:00:00 2001 From: Felipe Sanches Zanoni Date: Thu, 22 Aug 2024 16:00:51 -0300 Subject: [PATCH] Do not stop Ceph fixed monitor when disabling Controller The fixed Ceph monitor for AIO-DX should never be disabled even if the controller is Locked. This way the Ceph cluster will have a higher availability. This also blocks upgrade rollback when the from-release has only the floating monitor. When the active controller is in from-release and the standby controller is in to-release, Ceph will get unresponsive when the standby controller gets locked and the mon.controller-X stops. This change was required by commit: https://review.opendev.org/c/starlingx/update/+/926971 Test-Plan: PASS: AIO-DX: Lock standby controller and verify ceph has three monitors quorum and one host is down with HEALTH_WARN status and Degraded data redundancy alarm. Story: 2011122 Task: 50899 Signed-off-by: Felipe Sanches Zanoni Change-Id: Ie9c9ad800649f1aca65e837acc2e867c575554e6 --- ceph/ceph/files/ceph.sh | 47 ++++++++++++++++++++++++++++++----------- 1 file changed, 35 insertions(+), 12 deletions(-) diff --git a/ceph/ceph/files/ceph.sh b/ceph/ceph/files/ceph.sh index d9d1806e8..d427f40f3 100644 --- a/ceph/ceph/files/ceph.sh +++ b/ceph/ceph/files/ceph.sh @@ -1,6 +1,6 @@ #!/bin/bash # -# Copyright (c) 2023 Wind River Systems, Inc. +# Copyright (c) 2023-2024 Wind River Systems, Inc. # # SPDX-License-Identifier: Apache-2.0 @@ -37,19 +37,42 @@ start () stop () { - if [[ "$system_type" == "All-in-one" ]] && [[ "$system_mode" == "simplex" ]]; then + if [ "${system_type}" == "All-in-one" ] && [ "${system_mode}" == "simplex" ]; then + # AIO-SX logecho "Ceph services will continue to run on node" - exit 0 + RC=0 + elif [ "$system_type" == "All-in-one" ] && [ "${system_mode}" != "simplex" ]; then + # AIO-DX and AIO-DX+ + # Will stop OSDs and MDS processes only. + # mon.controller will be already stopped on standby controllers. + # mon.${hostname} must be running. + logecho "Ceph services will be stopped, except local ceph monitor" + + if [ -f ${CEPH_FILE} ]; then + rm -f ${CEPH_FILE} + fi + + ${INITDIR}/ceph-init-wrapper stop osd >> ${LOGFILE} 2>&1 + local rc_osd=$? + logecho "rc_osd=${rc_osd}" + + ${INITDIR}/ceph-init-wrapper stop mds >> ${LOGFILE} 2>&1 + local rc_mds=$? + logecho "rc_mds=${rc_mds}" + + RC=0 + [ ${rc_osd} -ne 0 ] || [ ${rc_mds} -ne 0 ] && RC=1 + else + # Standard and Standard Dedicated Storage + logecho "Stopping ceph services..." + + if [ -f ${CEPH_FILE} ]; then + rm -f ${CEPH_FILE} + fi + + ${INITDIR}/ceph-init-wrapper stop >> ${LOGFILE} 2>&1 + RC=$? fi - - logecho "Stopping ceph services..." - - if [ -f ${CEPH_FILE} ]; then - rm -f ${CEPH_FILE} - fi - - ${INITDIR}/ceph-init-wrapper stop >> ${LOGFILE} 2>&1 - RC=$? } # If system is an AIO the mtcClient will run this script twice