diff --git a/ceph/ceph/files/ceph-init-wrapper.sh b/ceph/ceph/files/ceph-init-wrapper.sh index c772598c0..427eb693a 100755 --- a/ceph/ceph/files/ceph-init-wrapper.sh +++ b/ceph/ceph/files/ceph-init-wrapper.sh @@ -305,11 +305,6 @@ has_daemon_running () start () { - if [ ! -f ${CEPH_FILE} ]; then - # Ceph is not running on this node, return success - exit 0 - fi - local service="$1" # Evaluate the parameter because of local monitor (controller.${HOSTNAME}) eval service="${service}" @@ -598,6 +593,14 @@ start=$(date +%s%N) log INFO "action:${args[0]}:start-at:${start: 0:-6} ms" case "${args[0]}" in start) + if [ ! -f ${CEPH_FILE} ]; then + # Ceph is not running on this node, return success + exit 0 + fi + + start ${args[1]} + ;; + forcestart) start ${args[1]} ;; stop) diff --git a/ceph/ceph/files/ceph.sh b/ceph/ceph/files/ceph.sh index d427f40f3..ae3796abd 100644 --- a/ceph/ceph/files/ceph.sh +++ b/ceph/ceph/files/ceph.sh @@ -6,41 +6,96 @@ INITDIR=/etc/init.d LOGFILE=/var/log/ceph/ceph-init.log -CEPH_FILE=/var/run/.ceph_started +CEPH_STARTED_FLAG=/var/run/.ceph_started +CEPH_CONFIGURED_FLAG=/etc/platform/.node_ceph_configured +CEPH_INIT="${INITDIR}/ceph-init-wrapper" -# Get our nodetype +# Get system/node configuration . /etc/platform/platform.conf -# Exit immediately if ceph not configured (i.e. no mon in the config file) -if ! grep -q "mon\." /etc/ceph/ceph.conf -then - exit 0 -fi - logecho () { - echo $1 - date >> ${LOGFILE} - echo $1 >> ${LOGFILE} + local head="$(date "+%Y-%m-%d %H:%M:%S.%3N")" + echo "$head ${BASHPID}: $@" >> ${LOGFILE} + echo "$@" } +# Exit immediately if ceph not configured +if [ ! -f "${CEPH_CONFIGURED_FLAG}" ]; then + logecho "Ceph is not configured in this node. Exiting." + exit 0 +fi + +# If system is an AIO the mtcClient will run this script twice +# from 2 locations on controllers. +# If this is a AIO DX+ it will also be called on compute nodes +# and it should be avoided since there is nothing to do. +# So exit the script if it is called from /etc//services.d/worker +if [[ "$system_type" == "All-in-one" ]]; then + dir_path=$(dirname "$(realpath $0)") + if [[ "$dir_path" == "/etc/services.d/worker" ]]; then + logecho "Calling from '${dir_path}' and this is ${system_type^}. Exiting." + exit 0 + fi +fi + start () { - # Defer ceph initialization to avoid race conditions. Let SM and Pmon to start the - # processes in the appropriate time. - # Set the flag to let ceph start later. - logecho "Setting flag to enable ceph processes to start." - if [ ! -f ${CEPH_FILE} ]; then - touch ${CEPH_FILE} + # Start Ceph processes according to the system_type and system_mode. + # Set the flag CEPH_STARTED_FLAG to let ceph to be monitored by Pmon and SM. + # The forcestart action is used to bypass the CEPH_STARTED_FLAG flag check + # that is created only after all processes are running to prevent + # Pmon and SM detecting process failure when monitoring the processes + + logecho "Starting Ceph processes on ${system_type^} ${system_mode^}" + + if [ "${system_type}" == "All-in-one" ] && [ "${system_mode}" == "simplex" ]; then + ${CEPH_INIT} forcestart mon + local rc_mon=$? + logecho "RC mon: ${rc_mon}" + + ${CEPH_INIT} forcestart mds + local rc_mds=$? + logecho "RC mds: ${rc_mds}" + + ${CEPH_INIT} forcestart osd + local rc_osd=$? + logecho "RC osd: ${rc_osd}" + fi + + if [ "${system_type}" == "All-in-one" ] && [ "${system_mode}" != "simplex" ]; then + ${CEPH_INIT} forcestart mon.${HOSTNAME} + local rc_mon=$? + logecho "RC mon.${HOSTNAME}: ${rc_mon}" + + ${CEPH_INIT} forcestart mds + local rc_mds=$? + logecho "RC mds: ${rc_mds}" + fi + + if [ "${system_type}" == "Standard" ]; then + ${CEPH_INIT} forcestart + local rc_all=$? + logecho "RC all: ${rc_all}" + + ${CEPH_INIT} forcestart mds + local rc_mds=$? + logecho "RC mds: ${rc_mds}" + fi + + logecho "Setting flag to enable ceph processes monitoring" + if [ ! -f ${CEPH_STARTED_FLAG} ]; then + touch ${CEPH_STARTED_FLAG} fi } stop () { + logecho "Stopping Ceph processes on ${system_type^} ${system_mode^}" if [ "${system_type}" == "All-in-one" ] && [ "${system_mode}" == "simplex" ]; then - # AIO-SX - logecho "Ceph services will continue to run on node" - RC=0 + # AIO-SX do not stop services + logecho "Ceph services will continue to run" + elif [ "$system_type" == "All-in-one" ] && [ "${system_mode}" != "simplex" ]; then # AIO-DX and AIO-DX+ # Will stop OSDs and MDS processes only. @@ -48,46 +103,30 @@ stop () # mon.${hostname} must be running. logecho "Ceph services will be stopped, except local ceph monitor" - if [ -f ${CEPH_FILE} ]; then - rm -f ${CEPH_FILE} + if [ -f ${CEPH_STARTED_FLAG} ]; then + rm -f ${CEPH_STARTED_FLAG} fi - ${INITDIR}/ceph-init-wrapper stop osd >> ${LOGFILE} 2>&1 + ${CEPH_INIT} stop osd >> ${LOGFILE} 2>&1 local rc_osd=$? - logecho "rc_osd=${rc_osd}" + logecho "RC osd: ${rc_osd}" - ${INITDIR}/ceph-init-wrapper stop mds >> ${LOGFILE} 2>&1 + ${CEPH_INIT} stop mds >> ${LOGFILE} 2>&1 local rc_mds=$? - logecho "rc_mds=${rc_mds}" + logecho "RC mds: ${rc_mds}" - RC=0 - [ ${rc_osd} -ne 0 ] || [ ${rc_mds} -ne 0 ] && RC=1 else # Standard and Standard Dedicated Storage logecho "Stopping ceph services..." - if [ -f ${CEPH_FILE} ]; then - rm -f ${CEPH_FILE} + if [ -f ${CEPH_STARTED_FLAG} ]; then + rm -f ${CEPH_STARTED_FLAG} fi - ${INITDIR}/ceph-init-wrapper stop >> ${LOGFILE} 2>&1 - RC=$? + ${CEPH_INIT} stop >> ${LOGFILE} 2>&1 fi } -# If system is an AIO the mtcClient will run this script twice -# from 2 locations and this generates some errors. -# So we have to exit the script if is called -# from /etc/services.d/worker in order to be executed once -if [[ "$system_type" == "All-in-one" ]]; then - dir_path=$(dirname "$(realpath $0)") - if [[ "$dir_path" == "/etc/services.d/worker" ]]; then - exit 0 - fi -fi - -RC=0 - case "$1" in start) start @@ -101,5 +140,4 @@ case "$1" in ;; esac -logecho "RC was: $RC" -exit $RC +exit 0