diff --git a/ceph-osd/Chart.yaml b/ceph-osd/Chart.yaml index 97a3258a2..648c5c54f 100644 --- a/ceph-osd/Chart.yaml +++ b/ceph-osd/Chart.yaml @@ -15,6 +15,6 @@ apiVersion: v1 appVersion: v1.0.0 description: OpenStack-Helm Ceph OSD name: ceph-osd -version: 0.1.13 +version: 0.1.14 home: https://github.com/ceph/ceph ... diff --git a/ceph-osd/templates/bin/osd/ceph-volume/_block.sh.tpl b/ceph-osd/templates/bin/osd/ceph-volume/_block.sh.tpl index 7ccb8e1fe..7bf7b7570 100644 --- a/ceph-osd/templates/bin/osd/ceph-volume/_block.sh.tpl +++ b/ceph-osd/templates/bin/osd/ceph-volume/_block.sh.tpl @@ -150,3 +150,6 @@ exec /usr/bin/ceph-osd \ --setuser ceph \ --setgroup disk & echo $! > /run/ceph-osd.pid wait + +# Clean up resources held by the common script +common_cleanup diff --git a/ceph-osd/templates/bin/osd/ceph-volume/_bluestore.sh.tpl b/ceph-osd/templates/bin/osd/ceph-volume/_bluestore.sh.tpl index a3110ac56..de008b6a2 100644 --- a/ceph-osd/templates/bin/osd/ceph-volume/_bluestore.sh.tpl +++ b/ceph-osd/templates/bin/osd/ceph-volume/_bluestore.sh.tpl @@ -111,3 +111,6 @@ exec /usr/bin/ceph-osd \ --setuser ceph \ --setgroup disk & echo $! > /run/ceph-osd.pid wait + +# Clean up resources held by the common script +common_cleanup diff --git a/ceph-osd/templates/bin/osd/ceph-volume/_common.sh.tpl b/ceph-osd/templates/bin/osd/ceph-volume/_common.sh.tpl index 030e95091..be5a5f33c 100644 --- a/ceph-osd/templates/bin/osd/ceph-volume/_common.sh.tpl +++ b/ceph-osd/templates/bin/osd/ceph-volume/_common.sh.tpl @@ -15,6 +15,9 @@ limitations under the License. */}} set -ex +shopt -s expand_aliases +export lock_fd='' +export ALREADY_LOCKED=0 export PS4='+${BASH_SOURCE:+$(basename ${BASH_SOURCE}):${LINENO}:}${FUNCNAME:+${FUNCNAME}():} ' : "${CRUSH_LOCATION:=root=default host=${HOSTNAME}}" @@ -25,6 +28,85 @@ export PS4='+${BASH_SOURCE:+$(basename ${BASH_SOURCE}):${LINENO}:}${FUNCNAME:+${ : "${OSD_JOURNAL_SIZE:=$(awk '/^osd_journal_size/{print $3}' ${CEPH_CONF}.template)}" : "${OSD_WEIGHT:=1.0}" +# Obtain a global lock on /var/lib/ceph/tmp/init-osd.lock +function lock() { + # Open a file descriptor for the lock file if there isn't one already + if [[ -z "${lock_fd}" ]]; then + exec {lock_fd}>/var/lib/ceph/tmp/init-osd.lock || exit 1 + fi + flock -w 600 "${lock_fd}" &> /dev/null + ALREADY_LOCKED=1 +} + +# Release the global lock on /var/lib/ceph/tmp/init-osd.lock +function unlock() { + flock -u "${lock_fd}" &> /dev/null + ALREADY_LOCKED=0 +} + +# "Destructor" for common.sh, must be called by scripts that source this one +function common_cleanup() { + # Close the file descriptor for the lock file + if [[ ! -z "${lock_fd}" ]]; then + if [[ ${ALREADY_LOCKED} -ne 0 ]]; then + unlock + fi + eval "exec ${lock_fd}>&-" + fi +} + +# Run a command within the global synchronization lock +function locked() { + local LOCK_SCOPE=0 + + # Allow locks to be re-entrant to avoid deadlocks + if [[ ${ALREADY_LOCKED} -eq 0 ]]; then + lock + LOCK_SCOPE=1 + fi + + # Execute the synchronized command + "$@" + + # Only unlock if the lock was obtained in this scope + if [[ ${LOCK_SCOPE} -ne 0 ]]; then + unlock + fi +} + +# Alias commands that interact with disks so they are always synchronized +alias dmsetup='locked dmsetup' +alias pvs='locked pvs' +alias vgs='locked vgs' +alias lvs='locked lvs' +alias pvdisplay='locked pvdisplay' +alias vgdisplay='locked vgdisplay' +alias lvdisplay='locked lvdisplay' +alias pvcreate='locked pvcreate' +alias vgcreate='locked vgcreate' +alias lvcreate='locked lvcreate' +alias pvremove='locked pvremove' +alias vgremove='locked vgremove' +alias lvremove='locked lvremove' +alias pvrename='locked pvrename' +alias vgrename='locked vgrename' +alias lvrename='locked lvrename' +alias pvchange='locked pvchange' +alias vgchange='locked vgchange' +alias lvchange='locked lvchange' +alias pvscan='locked pvscan' +alias vgscan='locked vgscan' +alias lvscan='locked lvscan' +alias lvm_scan='locked lvm_scan' +alias partprobe='locked partprobe' +alias ceph-volume='locked ceph-volume' +alias disk_zap='locked disk_zap' +alias zap_extra_partitions='locked zap_extra_partitions' +alias udev_settle='locked udev_settle' +alias wipefs='locked wipefs' +alias sgdisk='locked sgdisk' +alias dd='locked dd' + eval CRUSH_FAILURE_DOMAIN_TYPE=$(cat /etc/ceph/storage.json | python -c 'import sys, json; data = json.load(sys.stdin); print(json.dumps(data["failure_domain"]))') eval CRUSH_FAILURE_DOMAIN_NAME=$(cat /etc/ceph/storage.json | python -c 'import sys, json; data = json.load(sys.stdin); print(json.dumps(data["failure_domain_name"]))') eval CRUSH_FAILURE_DOMAIN_NAME=$(cat /etc/ceph/storage.json | python -c 'import sys, json; data = json.load(sys.stdin); print(json.dumps(data["failure_domain_name"]))') @@ -74,19 +156,6 @@ function ceph_cmd_retry() { done } -function locked() { - exec {lock_fd}>/var/lib/ceph/tmp/init-osd.lock || exit 1 - flock -w 600 --verbose "${lock_fd}" &> /dev/null - "$@" - flock -u "${lock_fd}" &> /dev/null -} -function global_locked() { - exec {global_lock_fd}>/var/lib/ceph/tmp/init-osd-global.lock || exit 1 - flock -w 600 --verbose "${global_lock_fd}" &> /dev/null - "$@" - flock -u "${global_lock_fd}" &> /dev/null -} - function crush_create_or_move { local crush_location=${1} ceph_cmd_retry --cluster "${CLUSTER}" --name="osd.${OSD_ID}" --keyring="${OSD_KEYRING}" \ @@ -242,13 +311,13 @@ function disk_zap { dmsetup remove ${dm_device} fi done - local logical_volumes=$(locked lvdisplay | grep "LV Path" | grep "$device_filter" | awk '/ceph/{print $3}' | tr '\n' ' ') + local logical_volumes=$(lvdisplay | grep "LV Path" | grep "$device_filter" | awk '/ceph/{print $3}' | tr '\n' ' ') for logical_volume in ${logical_volumes}; do if [[ ! -z ${logical_volume} ]]; then - locked lvremove -y ${logical_volume} + lvremove -y ${logical_volume} fi done - local volume_group=$(locked pvdisplay -ddd -v ${device} | grep "VG Name" | awk '/ceph/{print $3}' | grep "ceph") + local volume_group=$(pvdisplay -ddd -v ${device} | grep "VG Name" | awk '/ceph/{print $3}' | grep "ceph") if [[ ${volume_group} ]]; then vgremove -y ${volume_group} pvremove -y ${device} @@ -274,7 +343,7 @@ function udev_settle { osd_devices="${OSD_DEVICE}" udevadm settle --timeout=600 partprobe "${OSD_DEVICE}" - locked lvm_scan + lvm_scan if [ "${OSD_BLUESTORE:-0}" -eq 1 ]; then if [ ! -z "$BLOCK_DB" ]; then osd_devices="${osd_devices}\|${BLOCK_DB}" @@ -282,9 +351,9 @@ function udev_settle { local block_db="$BLOCK_DB" local db_vg="$(echo $block_db | cut -d'/' -f1)" if [ ! -z "$db_vg" ]; then - block_db=$(locked pvdisplay -ddd -v | grep -B1 "$db_vg" | awk '/PV Name/{print $3}') + block_db=$(pvdisplay -ddd -v | grep -B1 "$db_vg" | awk '/PV Name/{print $3}') fi - locked partprobe "${block_db}" + partprobe "${block_db}" fi if [ ! -z "$BLOCK_WAL" ] && [ "$BLOCK_WAL" != "$BLOCK_DB" ]; then osd_devices="${osd_devices}\|${BLOCK_WAL}" @@ -292,9 +361,9 @@ function udev_settle { local block_wal="$BLOCK_WAL" local wal_vg="$(echo $block_wal | cut -d'/' -f1)" if [ ! -z "$wal_vg" ]; then - block_wal=$(locked pvdisplay -ddd -v | grep -B1 "$wal_vg" | awk '/PV Name/{print $3}') + block_wal=$(pvdisplay -ddd -v | grep -B1 "$wal_vg" | awk '/PV Name/{print $3}') fi - locked partprobe "${block_wal}" + partprobe "${block_wal}" fi else if [ "x$JOURNAL_TYPE" == "xblock-logical" ] && [ ! -z "$OSD_JOURNAL" ]; then @@ -302,7 +371,7 @@ function udev_settle { if [ ! -z "$OSD_JOURNAL" ]; then local JDEV=$(echo ${OSD_JOURNAL} | sed 's/[0-9]//g') osd_devices="${osd_devices}\|${JDEV}" - locked partprobe "${JDEV}" + partprobe "${JDEV}" fi fi fi @@ -328,7 +397,7 @@ function udev_settle { function get_lv_from_device { device="$1" - locked pvdisplay -ddd -v -m ${device} | awk '/Logical volume/{print $3}' + pvdisplay -ddd -v -m ${device} | awk '/Logical volume/{print $3}' } # Helper function to get an lvm tag from a logical volume @@ -341,7 +410,7 @@ function get_lvm_tag_from_volume { echo else # Get and return the specified tag from the logical volume - locked lvs -o lv_tags ${logical_volume} | tr ',' '\n' | grep ${tag} | cut -d'=' -f2 + lvs -o lv_tags ${logical_volume} | tr ',' '\n' | grep ${tag} | cut -d'=' -f2 fi } @@ -361,7 +430,7 @@ function get_lv_size_from_device { device="$1" logical_volume="$(get_lv_from_device ${device})" - locked lvs ${logical_volume} -o LV_SIZE --noheadings --units k --nosuffix | xargs | cut -d'.' -f1 + lvs ${logical_volume} -o LV_SIZE --noheadings --units k --nosuffix | xargs | cut -d'.' -f1 } # Helper function to get the crush weight for an osd device @@ -435,12 +504,12 @@ function get_lvm_path_from_device { select="$1" options="--noheadings -o lv_dm_path" - locked pvs ${options} -S "${select}" | tr -d ' ' + pvs ${options} -S "${select}" | tr -d ' ' } function get_vg_name_from_device { device="$1" - pv_uuid=$(locked pvdisplay -ddd -v ${device} | awk '/PV UUID/{print $3}') + pv_uuid=$(pvdisplay -ddd -v ${device} | awk '/PV UUID/{print $3}') if [[ "${pv_uuid}" ]]; then echo "ceph-vg-${pv_uuid}" @@ -450,7 +519,7 @@ function get_vg_name_from_device { function get_lv_name_from_device { device="$1" device_type="$2" - pv_uuid=$(locked pvdisplay -ddd -v ${device} | awk '/PV UUID/{print $3}') + pv_uuid=$(pvdisplay -ddd -v ${device} | awk '/PV UUID/{print $3}') if [[ "${pv_uuid}" ]]; then echo "ceph-${device_type}-${pv_uuid}" diff --git a/ceph-osd/templates/bin/osd/ceph-volume/_init-with-ceph-volume.sh.tpl b/ceph-osd/templates/bin/osd/ceph-volume/_init-with-ceph-volume.sh.tpl index 91f60ce0b..7daac65a7 100644 --- a/ceph-osd/templates/bin/osd/ceph-volume/_init-with-ceph-volume.sh.tpl +++ b/ceph-osd/templates/bin/osd/ceph-volume/_init-with-ceph-volume.sh.tpl @@ -38,36 +38,42 @@ else export OSD_JOURNAL=$(readlink -f ${JOURNAL_LOCATION}) fi +# Set up aliases for functions that require disk synchronization +alias rename_vg='locked rename_vg' +alias rename_lvs='locked rename_lvs' +alias update_lv_tags='locked update_lv_tags' +alias prep_device='locked prep_device' + # Renames a single VG if necessary function rename_vg { local physical_disk=$1 - local old_vg_name=$(locked pvdisplay -ddd -v ${physical_disk} | awk '/VG Name/{print $3}') + local old_vg_name=$(pvdisplay -ddd -v ${physical_disk} | awk '/VG Name/{print $3}') local vg_name=$(get_vg_name_from_device ${physical_disk}) if [[ "${old_vg_name}" ]] && [[ "${vg_name}" != "${old_vg_name}" ]]; then - locked vgrename ${old_vg_name} ${vg_name} + vgrename ${old_vg_name} ${vg_name} fi } # Renames all LVs associated with an OSD as necesasry function rename_lvs { local data_disk=$1 - local vg_name=$(locked pvdisplay -ddd -v ${data_disk} | awk '/VG Name/{print $3}') + local vg_name=$(pvdisplay -ddd -v ${data_disk} | awk '/VG Name/{print $3}') if [[ "${vg_name}" ]]; then # Rename the OSD volume if necessary - local old_lv_name=$(locked lvdisplay ${vg_name} | awk '/LV Name/{print $3}') + local old_lv_name=$(lvdisplay ${vg_name} | awk '/LV Name/{print $3}') local lv_name=$(get_lv_name_from_device ${data_disk} lv) if [[ "${old_lv_name}" ]] && [[ "${lv_name}" != "${old_lv_name}" ]]; then - locked lvrename ${vg_name} ${old_lv_name} ${lv_name} + lvrename ${vg_name} ${old_lv_name} ${lv_name} fi # Rename the OSD's block.db volume if necessary, referenced by UUID local lv_tag=$(get_lvm_tag_from_device ${data_disk} ceph.db_uuid) if [[ "${lv_tag}" ]]; then - local lv_device=$(locked lvdisplay | grep -B4 "${lv_tag}" | awk '/LV Path/{print $3}') + local lv_device=$(lvdisplay | grep -B4 "${lv_tag}" | awk '/LV Path/{print $3}') if [[ "${lv_device}" ]]; then local db_vg=$(echo ${lv_device} | awk -F "/" '{print $3}') @@ -75,7 +81,7 @@ function rename_lvs { local db_name=$(get_lv_name_from_device ${data_disk} db) if [[ "${old_lv_name}" ]] && [[ "${db_name}" != "${old_lv_name}" ]]; then - locked lvrename ${db_vg} ${old_lv_name} ${db_name} + lvrename ${db_vg} ${old_lv_name} ${db_name} fi fi fi @@ -84,7 +90,7 @@ function rename_lvs { lv_tag=$(get_lvm_tag_from_device ${data_disk} ceph.wal_uuid) if [[ "${lv_tag}" ]]; then - local lv_device=$(locked lvdisplay | grep -B4 "${lv_tag}" | awk '/LV Path/{print $3}') + local lv_device=$(lvdisplay | grep -B4 "${lv_tag}" | awk '/LV Path/{print $3}') if [[ "${lv_device}" ]]; then local wal_vg=$(echo ${lv_device} | awk -F "/" '{print $3}') @@ -92,7 +98,7 @@ function rename_lvs { local wal_name=$(get_lv_name_from_device ${data_disk} wal) if [[ "${old_lv_name}" ]] && [[ "${wal_name}" != "${old_lv_name}" ]]; then - locked lvrename ${wal_vg} ${old_lv_name} ${wal_name} + lvrename ${wal_vg} ${old_lv_name} ${wal_name} fi fi fi @@ -104,10 +110,10 @@ function rename_lvs { # renaming should be completed prior to calling this function update_lv_tags { local data_disk=$1 - local pv_uuid=$(locked pvdisplay -ddd -v ${data_disk} | awk '/PV UUID/{print $3}') + local pv_uuid=$(pvdisplay -ddd -v ${data_disk} | awk '/PV UUID/{print $3}') if [[ "${pv_uuid}" ]]; then - local volumes="$(locked lvs --no-headings | grep -e "${pv_uuid}")" + local volumes="$(lvs --no-headings | grep -e "${pv_uuid}")" local block_device db_device wal_device vg_name local old_block_device old_db_device old_wal_device @@ -131,21 +137,21 @@ function update_lv_tags { while read lv vg other_stuff; do if [[ "${block_device}" ]]; then if [[ "${old_block_device}" ]]; then - locked lvchange --deltag "ceph.block_device=${old_block_device}" /dev/${vg}/${lv} + lvchange --deltag "ceph.block_device=${old_block_device}" /dev/${vg}/${lv} fi - locked lvchange --addtag "ceph.block_device=${block_device}" /dev/${vg}/${lv} + lvchange --addtag "ceph.block_device=${block_device}" /dev/${vg}/${lv} fi if [[ "${db_device}" ]]; then if [[ "${old_db_device}" ]]; then - locked lvchange --deltag "ceph.db_device=${old_db_device}" /dev/${vg}/${lv} + lvchange --deltag "ceph.db_device=${old_db_device}" /dev/${vg}/${lv} fi - locked lvchange --addtag "ceph.db_device=${db_device}" /dev/${vg}/${lv} + lvchange --addtag "ceph.db_device=${db_device}" /dev/${vg}/${lv} fi if [[ "${wal_device}" ]]; then if [[ "${old_wal_device}" ]]; then - locked lvchange --deltag "ceph.wal_device=${old_wal_device}" /dev/${vg}/${lv} + lvchange --deltag "ceph.wal_device=${old_wal_device}" /dev/${vg}/${lv} fi - locked lvchange --addtag "ceph.wal_device=${wal_device}" /dev/${vg}/${lv} + lvchange --addtag "ceph.wal_device=${wal_device}" /dev/${vg}/${lv} fi done <<< ${volumes} fi @@ -188,7 +194,7 @@ function prep_device { udev_settle vg_name=$(get_vg_name_from_device ${BLOCK_DEVICE}) lv_name=$(get_lv_name_from_device ${data_disk} ${device_type}) - VG=$(locked vgs --noheadings -o vg_name -S "vg_name=${vg_name}" | tr -d '[:space:]') + VG=$(vgs --noheadings -o vg_name -S "vg_name=${vg_name}" | tr -d '[:space:]') if [[ $VG ]]; then DEVICE_OSD_ID=$(get_osd_id_from_volume "/dev/${vg_name}/${lv_name}") CEPH_LVM_PREPARE=1 @@ -207,13 +213,13 @@ function prep_device { CEPH_LVM_PREPARE=1 fi random_uuid=$(uuidgen) - locked vgcreate "ceph-vg-${random_uuid}" "${BLOCK_DEVICE}" + vgcreate "ceph-vg-${random_uuid}" "${BLOCK_DEVICE}" VG=$(get_vg_name_from_device ${BLOCK_DEVICE}) - locked vgrename "ceph-vg-${random_uuid}" "${VG}" + vgrename "ceph-vg-${random_uuid}" "${VG}" fi - logical_volume=$(locked lvs --noheadings -o lv_name -S "lv_name=${lv_name}" | tr -d '[:space:]') + logical_volume=$(lvs --noheadings -o lv_name -S "lv_name=${lv_name}" | tr -d '[:space:]') if [[ $logical_volume != "${lv_name}" ]]; then - locked lvcreate -L "${BLOCK_DEVICE_SIZE}" -n "${lv_name}" "${VG}" + lvcreate -L "${BLOCK_DEVICE_SIZE}" -n "${lv_name}" "${VG}" fi if [[ "${device_type}" == "db" ]]; then BLOCK_DB="${VG}/${lv_name}" @@ -399,7 +405,7 @@ function osd_disk_prepare { OSD_VG=${vg_name} fi lv_name=$(get_lv_name_from_device ${OSD_DEVICE} lv) - if [[ ! "$(locked lvdisplay | awk '/LV Name/{print $3}' | grep ${lv_name})" ]]; then + if [[ ! "$(lvdisplay | awk '/LV Name/{print $3}' | grep ${lv_name})" ]]; then lvcreate --yes -l 100%FREE -n ${lv_name} ${OSD_VG} fi OSD_LV=${OSD_VG}/${lv_name} @@ -416,15 +422,15 @@ function osd_disk_prepare { block_wal_string=$(echo ${BLOCK_WAL} | awk -F "/" '{print $2 "-" $3}') fi if [[ ${BLOCK_DB} && ${BLOCK_WAL} ]]; then - global_locked prep_device "${BLOCK_DB}" "${BLOCK_DB_SIZE}" "db" "${OSD_DEVICE}" - global_locked prep_device "${BLOCK_WAL}" "${BLOCK_WAL_SIZE}" "wal" "${OSD_DEVICE}" + prep_device "${BLOCK_DB}" "${BLOCK_DB_SIZE}" "db" "${OSD_DEVICE}" + prep_device "${BLOCK_WAL}" "${BLOCK_WAL_SIZE}" "wal" "${OSD_DEVICE}" elif [[ -z ${BLOCK_DB} && ${BLOCK_WAL} ]]; then - global_locked prep_device "${BLOCK_WAL}" "${BLOCK_WAL_SIZE}" "wal" "${OSD_DEVICE}" + prep_device "${BLOCK_WAL}" "${BLOCK_WAL_SIZE}" "wal" "${OSD_DEVICE}" elif [[ ${BLOCK_DB} && -z ${BLOCK_WAL} ]]; then - global_locked prep_device "${BLOCK_DB}" "${BLOCK_DB_SIZE}" "db" "${OSD_DEVICE}" + prep_device "${BLOCK_DB}" "${BLOCK_DB_SIZE}" "db" "${OSD_DEVICE}" fi else - if locked pvdisplay -ddd -v ${OSD_DEVICE} | awk '/VG Name/{print $3}' | grep "ceph"; then + if pvdisplay -ddd -v ${OSD_DEVICE} | awk '/VG Name/{print $3}' | grep "ceph"; then CEPH_LVM_PREPARE=0 fi fi @@ -451,7 +457,7 @@ function osd_disk_prepare { fi if [[ CEPH_LVM_PREPARE -eq 1 ]]; then - locked ceph-volume lvm -v prepare ${CLI_OPTS} + ceph-volume lvm -v prepare ${CLI_OPTS} udev_settle fi } @@ -502,3 +508,6 @@ function osd_journal_prepare { if ! [ "x${STORAGE_TYPE%-*}" == "xdirectory" ]; then osd_disk_prepare fi + +# Clean up resources held by the common script +common_cleanup