From d5a84a1dbc8bbd929c463cf4d2b9bbea07001a00 Mon Sep 17 00:00:00 2001 From: Hediberto C Silva Date: Tue, 2 Apr 2024 16:15:54 -0300 Subject: [PATCH] Adds support for 3 monitors on AIO-DX Adding two fixed Ceph monitors for controllers when deploying an AIO-DX to improve HA. Includes: - Creates a new shell script to manage the fixed monitors - Creates a new patch to include the mon_data parameter in the mon.pp puppet manifest. - Creates the ceph-storage-network script that is used by the storage-networking SM service to stop Ceph services in case of a network outage. - Alters the script used by the ceph-mds pmon file to ceph-init-wrapper. - Adjusts the ceph-init-wrapper to accept commands from pmon service. - Adjusts the ceph-init-wrapper to accept the forcestop command. - Stopping Ceph services using ceph-init-wrapper, it is checked if the pid exists before trying. - Stopping ceph-mon service using ceph-init-wrapper, the ceph-mds is stopped right before to force a re-peering. - Starting ceph-mon service using ceph-init-wrapper, the ceph-mds is stopped right before to force a re-peering. - Starting ceph-mds, it is checked if the ceph-mon is operational. - The forcestop command uses a TERM signal first before attempting a KILL signal after 5 seconds. Test Plan: PASS: Fresh install AIO-DX and check 3 Ceph monitors are running. PASS: Fresh install all other setups and check if Ceph is working as expected. PASS: Reboots the standby controller and check if Ceph is still running. PASS: Reboots the active controller. Ceph will stop responding, but it will recover after both controllers are running. PASS: Verify Ceph is working after a DOR test with PODs writting to the cephfs and rbd pools. PASS: Verify Ceph is resilient to switch reboots Story: 2011122 Task: 50129 Change-Id: I18d7ab9da3303265da34bc13c8be4baa23c2a7be Signed-off-by: Hediberto C Silva Signed-off-by: Felipe Sanches Zanoni --- .../debian/deb_folder/ceph-base.ceph.init | 56 ++++-- ceph/ceph/debian/deb_folder/ceph-base.install | 2 + ceph/ceph/debian/deb_folder/rules | 6 +- ceph/ceph/files/ceph-fixed-mon.conf.pmon | 26 +++ ceph/ceph/files/ceph-init-wrapper.sh | 161 +++++++++++++----- ceph/ceph/files/ceph-mds.conf.pmon | 2 +- ceph/ceph/files/ceph-storage-network.sh | 143 ++++++++++++++++ .../patches/0017-Add-mon_data-parameter.patch | 110 ++++++++++++ .../puppet-ceph-2.4.1/debian/patches/series | 1 + 9 files changed, 447 insertions(+), 60 deletions(-) create mode 100644 ceph/ceph/files/ceph-fixed-mon.conf.pmon create mode 100755 ceph/ceph/files/ceph-storage-network.sh create mode 100644 config/puppet-modules/openstack/puppet-ceph-2.4.1/debian/patches/0017-Add-mon_data-parameter.patch diff --git a/ceph/ceph/debian/deb_folder/ceph-base.ceph.init b/ceph/ceph/debian/deb_folder/ceph-base.ceph.init index 80bf58d19..b5d260408 100755 --- a/ceph/ceph/debian/deb_folder/ceph-base.ceph.init +++ b/ceph/ceph/debian/deb_folder/ceph-base.ceph.init @@ -623,7 +623,7 @@ stop_daemon() { if [ \$timeout -lt 0 ]; then break fi - timeout-=1 + timeout=\$((timeout-1)) fi cmd=\"kill $signal \$pid\" printf \"\$cmd...\" @@ -769,7 +769,7 @@ fi # When this is a AIO-DX pmon is monitoring ceph-mds process. # If ceph-mon is not running, ceph-mds will hang when starting. # Check if we are trying to bring up ceph-mds and ceph-mon is not ready yet -if [ "${system_type}" == "All-in-one" ] && [ "${system_mode}" == "duplex" ]; then +if [ "${system_type}" == "All-in-one" ] && [ "${system_mode}" != "simplex" ]; then if [ "${command}" = "start" -o "${command}" = "onestart" ]; then what_out= what_mds= @@ -873,6 +873,12 @@ for name in $what; do # conf file cmd="$cmd -c $conf" + # StarlingX: + # If this is AIO-DX, check if service is the fixed Ceph monitor and set the parameter --mon-data + if [ "${system_type}" == "All-in-one" ] && [ "${system_mode}" != "simplex" ] && [ "$type" == "mon" ] && [ "$id" == ${HOSTNAME} ]; then + cmd="$cmd --mon-data /var/lib/ceph/data/ceph-${HOSTNAME}" + fi + if echo $name | grep -q ^osd; then get_conf osd_data "/var/lib/ceph/osd/$cluster-$id" "osd data" get_conf fs_path "$osd_data" "fs path" # mount point defaults so osd data @@ -928,13 +934,15 @@ for name in $what; do [ -n "$TCMALLOC_MAX_TOTAL_THREAD_CACHE_BYTES" ] && tcmalloc="TCMALLOC_MAX_TOTAL_THREAD_CACHE_BYTES=$TCMALLOC_MAX_TOTAL_THREAD_CACHE_BYTES" - # StarlingX: start processes in scope under slice system-ceph.slice - # so that ceph processes do not start under this script's callers cgroup - if [ "$type" = "osd" ]; then - cmd="systemd-run --scope --unit=ceph-${type}-${id} --slice=system-ceph $cmd" - else - cmd="systemd-run --scope --unit=ceph-${type} --slice=system-ceph $cmd" - fi + # StarlingX: start processes in scope under slice system-ceph.slice + # so that ceph processes do not start under this script's callers cgroup + if [ "$type" = "osd" ]; then + cmd="systemd-run --scope --unit=ceph-${type}-${id} --slice=system-ceph $cmd" + elif [ "${system_type}" == "All-in-one" ] && [ "${system_mode}" != "simplex" ] && [ "$type" == "mon" ] && [ "$id" == ${HOSTNAME} ]; then + cmd="systemd-run --scope --unit=ceph-${type}-${id} --slice=system-ceph $cmd" + else + cmd="systemd-run --scope --unit=ceph-${type} --slice=system-ceph $cmd" + fi # StarlingX: not running as ceph user/group cmd="$files $tcmalloc $wrap $cmd --cluster $cluster $runmode" @@ -988,9 +996,21 @@ for name in $what; do fi fi + echo Starting Ceph $name on $host... + + if [ $type = "mds" ]; then + echo "Waiting for ceph-mon to respond before starting ceph-mds..." + execute_ceph_cmd CMD_OUTPUT $name "ceph fsid" + if [ $? -ne 0 ]; then + log $name "INFO" "Cannot start $name process. Ceph-mon is not working..." + echo "Cannot start ceph-mds. Ceph-mon is not working..." + EXIT_STATUS=$errcode + continue + fi + fi + save_proc_startup_ok $name - echo Starting Ceph $name on $host... if [ ! -d $run_dir ]; then # assume /var/run exists install -d -m0770 -o ceph -g ceph /var/run/ceph @@ -1067,10 +1087,10 @@ EOF # first try to gracefully close process, this should be fast if # its threads still respond to the TERM signal log $name "DEBUG" ">>> Sending term signal" - stop_daemon $name ceph-$type $pid_file TERM "" 5 + stop_daemon $name ceph-$type $pid_file -SIGTERM "" 5 log $name "DEBUG" ">>> Sending kill signal" # then just kill it - stop_daemon $name ceph-$type $pid_file KILL + stop_daemon $name ceph-$type $pid_file -SIGKILL fi [ -n "$pidfile" ] && rm -f $pidfile @@ -1182,7 +1202,17 @@ EOF get_conf pre_forcestop "" "pre forcestop command" get_conf post_forcestop "" "post forcestop command" [ -n "$pre_forcestop" ] && do_cmd "$pre_forcestop" - stop_daemon $name ceph-$type $pid_file -9 + + # first try to gracefully close process, this should be fast if + # its threads still respond to the TERM signal + wlog $name "DEBUG" ">>> Sending term signal" + stop_daemon $name ceph-$type $pid_file -SIGTERM "" 5 + wlog $name "DEBUG" ">>> Sending kill signal" + # then just kill it + stop_daemon $name ceph-$type $pid_file -SIGKILL + + [ -n "$pidfile" ] && rm -f $pidfile + [ -n "$asok" ] && rm -f $asok [ -n "$post_forcestop" ] && do_cmd "$post_forcestop" [ -n "$lockfile" ] && [ "$?" -eq 0 ] && rm -f $lockfile ;; diff --git a/ceph/ceph/debian/deb_folder/ceph-base.install b/ceph/ceph/debian/deb_folder/ceph-base.install index 39525e835..7f2622258 100644 --- a/ceph/ceph/debian/deb_folder/ceph-base.install +++ b/ceph/ceph/debian/deb_folder/ceph-base.install @@ -25,8 +25,10 @@ usr/bin/ceph-detect-init etc/init.d/ceph etc/init.d/mgr-restful-plugin etc/init.d/ceph-init-wrapper +etc/init.d/ceph-storage-network etc/ceph/ceph.conf.pmon etc/ceph/ceph-mds.conf.pmon +etc/ceph/ceph-fixed-mon.conf.pmon etc/ceph/ceph.conf etc/services.d/* usr/sbin/ceph-preshutdown.sh diff --git a/ceph/ceph/debian/deb_folder/rules b/ceph/ceph/debian/deb_folder/rules index def4c4abb..87dd07418 100755 --- a/ceph/ceph/debian/deb_folder/rules +++ b/ceph/ceph/debian/deb_folder/rules @@ -14,7 +14,8 @@ SOURCE8 := ceph.service SOURCE9 := mgr-restful-plugin.service SOURCE10 := ceph-preshutdown.sh SOURCE11 := stx-containerd-ceph-override.conf - +SOURCE12 := ceph-storage-network.sh +SOURCE13 := ceph-fixed-mon.conf.pmon # Paths export DESTDIR = $(CURDIR)/debian/tmp @@ -198,6 +199,8 @@ override_dh_auto_install: install -D -m 644 ${SOURCE9} $(DESTDIR)/${UNITDIR}/mgr-restful-plugin.service install -D -m 700 ${SOURCE10} $(DESTDIR)/${SBINDIR}/ceph-preshutdown.sh install -D -m 644 ${SOURCE11} $(DESTDIR)/${UNITDIR}/containerd.service.d/stx-containerd-ceph-override.conf + install -D -m 750 ${SOURCE12} $(DESTDIR)/${INITDIR}/ceph-storage-network + install -D -m 750 ${SOURCE13} $(DESTDIR)/${SYSCONFDIR}/ceph/ install -m 750 src/init-radosgw $(DESTDIR)/${INITDIR}/ceph-radosgw sed -i '/### END INIT INFO/a SYSTEMCTL_SKIP_REDIRECT=1' $(DESTDIR)/${INITDIR}/ceph-radosgw install -m 750 src/init-rbdmap $(DESTDIR)/${INITDIR}/rbdmap @@ -280,6 +283,7 @@ override_dh_fixperms: -Xceph.conf.pmon \ -Xceph-mds.conf.pmon \ -Xceph-init-wrapper \ + -Xceph-storage-network \ -Xceph.conf \ -Xceph-manage-journal \ -Xceph.service \ diff --git a/ceph/ceph/files/ceph-fixed-mon.conf.pmon b/ceph/ceph/files/ceph-fixed-mon.conf.pmon new file mode 100644 index 000000000..c8630bb57 --- /dev/null +++ b/ceph/ceph/files/ceph-fixed-mon.conf.pmon @@ -0,0 +1,26 @@ +[process] +process = ceph-fixed-mon +script = /etc/init.d/ceph-init-wrapper + +style = lsb +severity = major ; minor, major, critical +restarts = 5 ; restart retries before error assertion +interval = 30 ; number of seconds to wait between restarts + +mode = status ; Monitoring mode: passive (default) or active + ; passive: process death monitoring (default: always) + ; active : heartbeat monitoring, i.e. request / response messaging + ; status : determine process health with executing "status" command + ; "start" is used to start the process(es) again + ; ignore : do not monitor or stop monitoring + +; Status and Active Monitoring Options + +period = 30 ; monitor period in seconds +timeout = 120 ; for active mode, messaging timeout period in seconds, must be shorter than period + ; for status mode, max amount of time for a command to execute + +; Status Monitoring Options +start_arg = start mon.${HOSTNAME} ; start argument for the script +status_arg = status mon.${HOSTNAME} ; status argument for the script +status_failure_text = /tmp/ceph_status_failure.txt ; text to be added to alarms or logs, this is optional diff --git a/ceph/ceph/files/ceph-init-wrapper.sh b/ceph/ceph/files/ceph-init-wrapper.sh index f8eb302b1..d1ad73b83 100755 --- a/ceph/ceph/files/ceph-init-wrapper.sh +++ b/ceph/ceph/files/ceph-init-wrapper.sh @@ -84,6 +84,11 @@ args=("$@") if [ ! -z $ARGS ]; then IFS=";" read -r -a new_args <<< "$ARGS" args+=("${new_args[@]}") +else + # Since PMON uses a unique string to pass arguments, + # it must support splitting the string into the args array. + # Eg.: /etc/init.d/ceph-init-wrapper "start mds". + IFS=" " read -r -a args <<< "$@" fi # Log Management @@ -108,6 +113,23 @@ log () { return 0 } +# Identify the ceph network interface from /etc/platform/platform.conf file +# The network interface will be set to the 'ceph_network_interface' variable +# Return 0 if found the variable, and 1 if not. +identify_ceph_network_interface() { + if [ "${ceph_network}" == "mgmt" ]; then + ceph_network_interface="${management_interface}" + return 0 + fi + + if [ "${ceph_network}" == "cluster-host" ]; then + ceph_network_interface="${cluster_host_interface}" + return 0 + fi + + return 1 +} + # Verify if drbd-cephmon role is primary, checking the output of 'drbdadm role' # Return 0 on success and 1 if drbd-cephmon is not primary is_drbd_cephmon_primary () @@ -143,11 +165,11 @@ is_drbd_cephmon_mounted () has_all_network_no_carrier() { ip link show "${oam_interface}" | grep NO-CARRIER - oam_carrier=$? + local oam_carrier=$? ip link show "${cluster_host_interface}" | grep NO-CARRIER - cluster_host_carrier=$? + local cluster_host_carrier=$? ip link show "${management_interface}" | grep NO-CARRIER - mgmt_carrier=$? + local mgmt_carrier=$? # Check if all networks have no carrier, meaning the other host is down if [ "${oam_carrier}" -eq 0 ] && [ "${cluster_host_carrier}" -eq 0 ] && [ "${mgmt_carrier}" -eq 0 ]; then @@ -157,17 +179,23 @@ has_all_network_no_carrier() return 1 } -# Check mgmt network carrier signal -has_mgmt_network_carrier() +# Check Ceph network carrier signal +has_ceph_network_carrier() { - # Checks the carrier (cable connected) for management interface - # If no-carrier message is detected, then the interface has no physical link - ip link show "${management_interface}" | grep NO-CARRIER + # Checks the carrier (cable connected) for Ceph network interface + # If no-carrier is detected, then the interface has no physical link + eval local interface=\$${ceph_network}_interface + if [ -z ${interface} ]; then + log ERROR "Cannot detect Ceph network. Skipping network carrier detection" + return 0 + fi + + ip link show "${interface}" | grep NO-CARRIER if [ $? -eq 0 ]; then - log INFO "Management Interface '${management_interface}' has NO-CARRIER, cannot start ceph-mon" + log INFO "Ceph network '${interface}' has NO-CARRIER, cannot start ceph-mon" return 1 fi - log "-" DEBUG "Management Interface '${management_interface}' is working" + log DEBUG "Ceph network '${interface}' is working" return 0 } @@ -256,6 +284,25 @@ with_service_lock () RC=$? } +has_daemon_running () +{ + local service="$1" + if [ ${#service} -eq 3 ]; then + # Check based on service type + local count_pid_files=$(ls -1 /var/run/ceph/${service}.*.pid 2>/dev/null | wc -l) + if [ ${count_pid_files} -gt 0 ]; then + return 0 + fi + else + # Check based on service name + if [ -f /var/run/ceph/${service}.pid ]; then + return 0 + fi + fi + + return 1 +} + start () { if [ ! -f ${CEPH_FILE} ]; then @@ -264,21 +311,25 @@ start () fi local service="$1" + # Evaluate the parameter because of local monitor (controller.${HOSTNAME}) + eval service="${service}" + + log INFO "Ceph START ${service} command received" # For AIO-DX, ceph services have special treatment if [ "${system_type}" == "All-in-one" ] && [ "${system_mode}" != "simplex" ]; then - # For ceph mon, check if drbd-cephmon is ready - if [ "${service}" == "mon" ]; then + # For ceph mon.controller (floating monitor), check if drbd-cephmon is ready + if [ "${service}" == "mon.controller" ]; then can_start_ceph_mon if [ $? -ne 0 ]; then - log INFO "Ceph Monitor is not ready to start because drbd-cephmon is not ready and mounted" + log INFO "Ceph Monitor cannot start because drbd-cephmon is not ready and mounted." exit 1 fi fi - # Check mgmt network state - has_mgmt_network_carrier + # Check Ceph network state + has_ceph_network_carrier if [ $? -ne 0 ]; then # If this is a AIO-DX Direct, check if all other network interfaces are down if [ "${system_mode}" == "duplex-direct" ]; then @@ -286,31 +337,43 @@ start () if [ $? -eq 0 ]; then log INFO "All network interfaces are not functional, considering the other host is down. Let Ceph start." else - # Else AIO-DX Direct mgmt network is NOT functional - log INFO "Management Interface is not functional, defer starting Ceph processes until recovered" + # Else AIO-DX Direct Ceph network is NOT functional + log INFO "Ceph network interface is not functional, defer starting Ceph processes until recovered" exit 1 fi else - # Else AIO-DX mgmt network is NOT functional - log INFO "Management Interface is not functional, defer starting Ceph processes until recovered" + # Else AIO-DX Ceph network is NOT functional + log INFO "Ceph network interface is not functional, defer starting Ceph processes until recovered" exit 1 fi fi fi # Start the service - log INFO "Ceph START ${service} command received" with_service_lock "${service}" ${CEPH_SCRIPT} start ${service} log INFO "Ceph START ${service} command finished." } stop () { + local cmd="stop" local service="$1" + # Evaluate the parameter because of local monitor (controller.${HOSTNAME}) + eval service="${service}" + [ "$2" == "force" ] && cmd="forcestop" - log INFO "Ceph STOP $1 command received." - with_service_lock "$1" ${CEPH_SCRIPT} stop $1 - log INFO "Ceph STOP $1 command finished." + log INFO "Ceph ${cmd^^} ${service} command received." + + if [ ! -z "${service}"]; then + has_daemon_running ${service} + if [ $? -ne 0 ]; then + log INFO "Ceph ${service} daemon is already stopped. No action is required." + exit 0 + fi + fi + + with_service_lock "${service}" ${CEPH_SCRIPT} ${cmd} ${service} + log INFO "Ceph ${cmd^^} ${service} command finished." } restart () @@ -386,6 +449,8 @@ log_and_kill_hung_procs () status () { local target="$1" # no shift here + # Evaluate the parameter because of local monitor (controller.${HOSTNAME}) + eval target="$target" [ -z "${target}" ] && target="mon osd" if [ ! -f ${CEPH_FILE} ]; then @@ -393,29 +458,31 @@ status () exit 0 fi - if [[ "$system_type" == "All-in-one" ]] && [[ "$system_mode" != "simplex" ]] && [[ "$1" == "osd" ]]; then - has_mgmt_network_carrier + log INFO "status ${target}"; + + if [[ "$system_type" == "All-in-one" ]] && [[ "$system_mode" != "simplex" ]] && [[ "$target" == "osd" ]]; then + has_ceph_network_carrier if [ $? -eq 0 ]; then # Network is functional, continue - log DEBUG "Management Interface active." + log DEBUG "Ceph network interface is active." else if [ "${system_mode}" == "duplex-direct" ]; then has_all_network_no_carrier if [ $? -ne 0 ]; then # Network is NOT functional, prevent split brain corruptions - log INFO "Management Interface inactive. Stopping OSDs to force a re-peering once the network has recovered" - stop "$1" + log INFO "Ceph network interface is inactive. Stopping OSDs to force a re-peering once the network has recovered" + stop "$target" exit 0 fi else # Network is NOT functional, prevent split brain corruptions - log INFO "Management Interface inactive. Stopping OSDs to force a re-peering once the network has recovered" - stop "$1" + log INFO "Ceph network interface is inactive. Stopping OSDs to force a re-peering once the network has recovered" + stop "$target" exit 0 fi fi - timeout $CEPH_STATUS_TIMEOUT ceph -s + timeout $CEPH_STATUS_TIMEOUT ceph -s 2>&1 1>/dev/null if [ "$?" -ne 0 ]; then # Ceph cluster is not accessible. Don't panic, controller swact # may be in progress. @@ -447,21 +514,22 @@ status () flock --shared ${LOCK_CEPH_OSD_STATUS_FD} fi - result=`log INFO "status $1"; ${CEPH_SCRIPT} status $1 {LOCK_CEPH_MON_STATUS_FD}>&- {LOCK_CEPH_OSD_STATUS_FD}>&-` + result=`${CEPH_SCRIPT} status $target {LOCK_CEPH_MON_STATUS_FD}>&- {LOCK_CEPH_OSD_STATUS_FD}>&-` RC=$? if [ "$RC" -ne 0 ]; then - erred_procs=`echo "$result" | sort | uniq | awk ' /not running|dead|failed/ {printf "%s ", $1}' | sed 's/://g' | sed 's/, $//g'` - hung_procs=`echo "$result" | sort | uniq | awk ' /hung/ {printf "%s ", $1}' | sed 's/://g' | sed 's/, $//g'` - blocked_ops_procs=`echo "$result" | sort | uniq | awk ' /blocked ops/ {printf "%s ", $1}' | sed 's/://g' | sed 's/, $//g'` - stuck_peering_procs=`echo "$result" | sort | uniq | awk ' /stuck peering/ {printf "%s ", $1}' | sed 's/://g' | sed 's/, $//g'` + erred_procs=`echo "$result" | sort | uniq | awk ' /not running|dead|failed/ {printf "%s ", $target}' | sed 's/://g' | sed 's/, $//g'` + hung_procs=`echo "$result" | sort | uniq | awk ' /hung/ {printf "%s ", $target}' | sed 's/://g' | sed 's/, $//g'` + blocked_ops_procs=`echo "$result" | sort | uniq | awk ' /blocked ops/ {printf "%s ", $target}' | sed 's/://g' | sed 's/, $//g'` + stuck_peering_procs=`echo "$result" | sort | uniq | awk ' /stuck peering/ {printf "%s ", $target}' | sed 's/://g' | sed 's/, $//g'` invalid=0 host=`hostname` if [[ "$system_type" == "All-in-one" ]] && [[ "$system_mode" != "simplex" ]]; then # On 2 node configuration we have a floating monitor + host_fixed="$host" host="controller" fi for i in $(echo $erred_procs $hung_procs); do - if [[ "$i" =~ osd.?[0-9]?[0-9]|mon.$host ]]; then + if [[ "$i" =~ osd.?[0-9]?[0-9]|mon.$host|mon.$host_fixed|mds.${HOSTNAME} ]]; then continue else invalid=1 @@ -485,12 +553,12 @@ status () done echo "$text" | tr -d '\n' > $CEPH_STATUS_FAILURE_TEXT_FILE else - echo "$host: '${CEPH_SCRIPT} status $1' result contains invalid process names: $erred_procs" + echo "$host: '${CEPH_SCRIPT} status $target' result contains invalid process names: $erred_procs" echo "Undetermined osd or monitor id" > $CEPH_STATUS_FAILURE_TEXT_FILE fi fi - if [[ $RC == 0 ]] && [[ "$1" == "mon" ]] && [[ "$system_type" == "All-in-one" ]] && [[ "$system_mode" != "simplex" ]]; then + if [[ $RC == 0 ]] && [[ "$target" == "mon.controller" ]] && [[ "$system_type" == "All-in-one" ]] && [[ "$system_mode" != "simplex" ]]; then # SM needs exit code != 0 from 'status mon' argument of the init script on # standby controller otherwise it thinks that the monitor is running and # tries to stop it. @@ -504,20 +572,20 @@ status () if [ "$?" -ne 0 ]; then exit 3 else - has_mgmt_network_carrier + has_ceph_network_carrier if [ $? -ne 0 ]; then if [ "${system_mode}" == "duplex-direct" ]; then has_all_network_no_carrier if [ $? -ne 0 ]; then # Network is NOT functional, prevent split brain corruptions - log INFO "Management Interface inactive. Stopping ceph-mon to prevent localized operation" - stop "$1" + log INFO "Ceph network interface is inactive. Stopping ceph-mon to prevent localized operation" + stop "$target" exit 0 fi else # Network is NOT functional, prevent split brain corruptions - log INFO "Management Interface inactive. Stopping ceph-mon to prevent localized operation" - stop "$1" + log INFO "Ceph network interface is inactive. Stopping ceph-mon to prevent localized operation" + stop "$target" exit 0 fi fi @@ -535,6 +603,9 @@ case "${args[0]}" in stop) stop ${args[1]} ;; + forcestop) + stop ${args[1]} force + ;; restart) restart ${args[1]} ;; @@ -542,7 +613,7 @@ case "${args[0]}" in status ${args[1]} ;; *) - echo "Usage: $0 {start|stop|restart|status} [{mon|osd|osd.|mon.}]" + echo "Usage: $0 {start|stop|forcestop|restart|status} [{mon|osd|osd.|mon.}]" exit 1 ;; esac diff --git a/ceph/ceph/files/ceph-mds.conf.pmon b/ceph/ceph/files/ceph-mds.conf.pmon index f02b22b42..8d7911243 100644 --- a/ceph/ceph/files/ceph-mds.conf.pmon +++ b/ceph/ceph/files/ceph-mds.conf.pmon @@ -1,6 +1,6 @@ [process] process = ceph-mds -script = /etc/init.d/ceph +script = /etc/init.d/ceph-init-wrapper style = lsb severity = major ; minor, major, critical diff --git a/ceph/ceph/files/ceph-storage-network.sh b/ceph/ceph/files/ceph-storage-network.sh new file mode 100755 index 000000000..5ef8d4c03 --- /dev/null +++ b/ceph/ceph/files/ceph-storage-network.sh @@ -0,0 +1,143 @@ +#!/bin/bash +# +# Copyright (c) 2024 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# +# This script monitors the Ceph network for carrier on an AIO-DX system. +# To prevent data corruption, when there is no carrier from the Ceph network, +# the floating monitor, the osds and the mds processes will be stopped. + +source /etc/platform/platform.conf + +CEPH_FILE="/var/run/.ceph_started" +CEPH_SCRIPT="/etc/init.d/ceph-init-wrapper" + +source /usr/lib/ceph/ceph_common.sh +LOG_PATH=/var/log/ceph +LOG_FILE=$LOG_PATH/ceph-process-states.log +LOG_LEVEL=NORMAL # DEBUG + +# Log Management +# Adding PID and PPID informations +log () { + local name="" + local log_level="$1" + # Checking if the first parameter is not a log level + if grep -q -v ${log_level} <<< "INFO DEBUG WARN ERROR"; then + name=" ($1)"; + log_level="$2" + shift + fi + + shift + + local message="$@" + # prefix = [] + local prefix="${BASHPID} $(cat /proc/${PPID}/comm)[${PPID}]${name}" + # yyyy-MM-dd HH:mm:ss.SSSSSS /etc/init.d/ceph-storage-network : + wlog "${prefix}" "${log_level}" "${message}" + return 0 +} + +identify_ceph_network_interface() { + if [ "${ceph_network}" == "mgmt" ]; then + ceph_network_interface="${management_interface}" + return 0 + fi + + if [ "${ceph_network}" == "cluster-host" ]; then + ceph_network_interface="${cluster_host_interface}" + return 0 + fi + + return 1 +} + +RETVAL=0 + +################################################################################ +# Stop Ceph Services +################################################################################ + +stop() +{ + # This script should run only in AIO-DX called by sm. Double check it. + if [ "${system_type}" == "All-in-one" ] && [ "${system_mode}" != "simplex" ]; then + services="osd mds mon.controller" + else + services="osd mds mon" + fi + + # sequentially stopping ceph-osd, ceph-mds, then ceph-mon + for service in $services; do + ${CEPH_SCRIPT} forcestop ${service} + done + + return +} + +################################################################################ +# Status Action +################################################################################ + +has_ceph_network_carrier() +{ + # Checks the carrier (cable connected) for Ceph network interface + # If no-carrier is detected, then the interface has no physical link + eval local interface=\$${ceph_network}_interface + if [ -z ${interface} ]; then + log ERROR "Cannot detect Ceph network. Skipping network carrier detection" + return 0 + fi + + ip link show "${interface}" | grep NO-CARRIER + if [ $? -eq 0 ]; then + log INFO "Ceph network '${interface}' has NO-CARRIER, cannot start ceph-mon" + return 1 + fi + return 0 +} + +status() +{ + if [ ! -f ${CEPH_FILE} ]; then + # Ceph is not running on this node, return success + return + fi + + has_ceph_network_carrier + if [ $? -ne 0 ]; then + # communication failure detected + # stopping ceph services to avoid data corruption + stop + RETVAL=1 + fi + + return +} + +################################################################################ + +# Main Entry + +################################################################################ + +case "$1" in + start) + status + ;; + stop) + RETVAL=0 + ;; + status) + status + ;; + *) + echo "usage: $0 { start | stop | status }" + exit 1 + ;; +esac + +exit $RETVAL + diff --git a/config/puppet-modules/openstack/puppet-ceph-2.4.1/debian/patches/0017-Add-mon_data-parameter.patch b/config/puppet-modules/openstack/puppet-ceph-2.4.1/debian/patches/0017-Add-mon_data-parameter.patch new file mode 100644 index 000000000..1c08a4a1b --- /dev/null +++ b/config/puppet-modules/openstack/puppet-ceph-2.4.1/debian/patches/0017-Add-mon_data-parameter.patch @@ -0,0 +1,110 @@ +From e6f5d2cd267564ee97f53447ba1419d1ace641a1 Mon Sep 17 00:00:00 2001 +From: Hediberto C Silva +Date: Tue, 19 Mar 2024 17:17:10 -0300 +Subject: [PATCH] Add mon_data parameter + +If supplied, the mon_data parameter defines where the ceph-mon data +will be located. + +Signed-off-by: Hediberto C Silva +--- + manifests/mon.pp | 33 ++++++++++++++++++++++++++------- + 1 file changed, 26 insertions(+), 7 deletions(-) + +diff --git a/manifests/mon.pp b/manifests/mon.pp +index 6d1294e..4615d3c 100644 +--- a/manifests/mon.pp ++++ b/manifests/mon.pp +@@ -60,6 +60,7 @@ + define ceph::mon ( + $ensure = present, + $mon_enable = true, ++ $mon_data = '', + $public_addr = undef, + $cluster = undef, + $authentication_type = 'cephx', +@@ -139,7 +140,10 @@ chmod 0444 ${keyring_path} + ", + unless => "/bin/true # comment to satisfy puppet syntax requirements + set -ex +-mon_data=\$(ceph-mon ${cluster_option} --id ${id} --show-config-value mon_data) || exit 1 ++mon_data=\"${mon_data}\" ++if [ -z \${mon_data} ]; then ++ mon_data=\$(ceph-mon ${cluster_option} --id ${id} --show-config-value mon_data) || exit 1 ++fi + # if ceph-mon fails then the mon is probably not configured yet + test -e \$mon_data/done + ", +@@ -178,7 +182,10 @@ test -e /etc/ceph/${cluster_name}.client.admin.keyring", + -> exec { $ceph_mkfs: + command => "/bin/true # comment to satisfy puppet syntax requirements + set -ex +-mon_data=\$(ceph-mon ${cluster_option} --id ${id} --show-config-value mon_data) ++mon_data=\"${mon_data}\" ++if [ -z \${mon_data} ]; then ++ mon_data=\$(ceph-mon ${cluster_option} --id ${id} --show-config-value mon_data) ++fi + if [ ! -d \$mon_data ] ; then + mkdir -p \$mon_data + if getent passwd ceph >/dev/null 2>&1; then +@@ -188,7 +195,8 @@ if [ ! -d \$mon_data ] ; then + --mkfs \ + --id ${id} \ + --keyring ${keyring_path} \ +- ${fsid_option} ; then ++ ${fsid_option} \ ++ --mon-data \$mon_data ; then + touch \$mon_data/done \$mon_data/${init} \$mon_data/keyring + chown -h ceph:ceph \$mon_data/done \$mon_data/${init} \$mon_data/keyring + else +@@ -199,7 +207,8 @@ if [ ! -d \$mon_data ] ; then + --mkfs \ + --id ${id} \ + --keyring ${keyring_path} \ +- ${fsid_option} ; then ++ ${fsid_option} \ ++ --mon-data \$mon_data ; then + touch \$mon_data/done \$mon_data/${init} \$mon_data/keyring + else + rm -fr \$mon_data +@@ -209,7 +218,10 @@ fi + ", + unless => "/bin/true # comment to satisfy puppet syntax requirements + set -ex +-mon_data=\$(ceph-mon ${cluster_option} --id ${id} --show-config-value mon_data) ++mon_data=\"${mon_data}\" ++if [ -z \${mon_data} ]; then ++ mon_data=\$(ceph-mon ${cluster_option} --id ${id} --show-config-value mon_data) ++fi + test -d \$mon_data + ", + logoutput => true, +@@ -244,13 +256,19 @@ test ! -e ${keyring_path} + -> exec { "remove-mon-${id}": + command => "/bin/true # comment to satisfy puppet syntax requirements + set -ex +-mon_data=\$(ceph-mon ${cluster_option} --id ${id} --show-config-value mon_data) ++mon_data=\"${mon_data}\" ++if [ -z \${mon_data} ]; then ++ mon_data=\$(ceph-mon ${cluster_option} --id ${id} --show-config-value mon_data) ++fi + rm -fr \$mon_data + ", + unless => "/bin/true # comment to satisfy puppet syntax requirements + set -ex + which ceph-mon || exit 0 # if ceph-mon is not available we already uninstalled ceph and there is nothing to do +-mon_data=\$(ceph-mon ${cluster_option} --id ${id} --show-config-value mon_data) ++mon_data=\"${mon_data}\" ++if [ -z \${mon_data} ]; then ++ mon_data=\$(ceph-mon ${cluster_option} --id ${id} --show-config-value mon_data) ++fi + test ! -d \$mon_data + ", + logoutput => true, +@@ -263,3 +281,4 @@ test ! -d \$mon_data + fail('Ensure on MON must be either present or absent') + } + } ++ +-- +2.34.1 diff --git a/config/puppet-modules/openstack/puppet-ceph-2.4.1/debian/patches/series b/config/puppet-modules/openstack/puppet-ceph-2.4.1/debian/patches/series index 3c1f46140..0acf8ea30 100644 --- a/config/puppet-modules/openstack/puppet-ceph-2.4.1/debian/patches/series +++ b/config/puppet-modules/openstack/puppet-ceph-2.4.1/debian/patches/series @@ -13,3 +13,4 @@ 0014-Adjust-puppet-ceph-dependency-requirements.patch 0015-Fix-the-unless-condition-of-ceph-osd-prepare.patch 0016-Add-ms_bind_ipv4-option-to-ceph-paremeters.patch +0017-Add-mon_data-parameter.patch