[ceph-osd] dmsetup remove logical devices using correct device names

Found another issue in disk_zap() where a needed update was missed when
https://review.opendev.org/c/openstack/openstack-helm-infra/+/745166
changed the logical volume naming convention.

The above patch set renamed volumes that followed the old convention,
so this logic will never be correct and must be updated.

Also added logic to clean up orphaned DB/WAL volumes if they are
encountered and removed some cases where a data disk is marked as in use
when it isn't set up correctly.

Change-Id: I8deeecfdb69df1f855f287caab8385ee3d6869e0
This commit is contained in:
Stephen Taylor 2020-12-17 14:41:00 -07:00
parent f08d30df6b
commit 4c097b0300
3 changed files with 62 additions and 38 deletions

View File

@ -15,6 +15,6 @@ apiVersion: v1
appVersion: v1.0.0 appVersion: v1.0.0
description: OpenStack-Helm Ceph OSD description: OpenStack-Helm Ceph OSD
name: ceph-osd name: ceph-osd
version: 0.1.15 version: 0.1.16
home: https://github.com/ceph/ceph home: https://github.com/ceph/ceph
... ...

View File

@ -57,6 +57,9 @@ function common_cleanup() {
# Run a command within the global synchronization lock # Run a command within the global synchronization lock
function locked() { function locked() {
# Don't log every command inside locked() to keep logs cleaner
{ set +x; } 2>/dev/null
local LOCK_SCOPE=0 local LOCK_SCOPE=0
# Allow locks to be re-entrant to avoid deadlocks # Allow locks to be re-entrant to avoid deadlocks
@ -66,12 +69,17 @@ function locked() {
fi fi
# Execute the synchronized command # Execute the synchronized command
set -x
"$@" "$@"
{ set +x; } 2>/dev/null
# Only unlock if the lock was obtained in this scope # Only unlock if the lock was obtained in this scope
if [[ ${LOCK_SCOPE} -ne 0 ]]; then if [[ ${LOCK_SCOPE} -ne 0 ]]; then
unlock unlock
fi fi
# Re-enable command logging
set -x
} }
# Alias commands that interact with disks so they are always synchronized # Alias commands that interact with disks so they are always synchronized
@ -304,21 +312,15 @@ function zap_extra_partitions {
function disk_zap { function disk_zap {
# Run all the commands that ceph-disk zap uses to clear a disk # Run all the commands that ceph-disk zap uses to clear a disk
local device=${1} local device=${1}
local device_filter=$(basename "${device}") local dm_devices=$(get_dm_devices_from_osd_device "${device}" | xargs)
local lv_name=$(get_lv_name_from_device "${device}" lv)
local dm_devices=$(get_lvm_path_from_device "pv_name=~${device_filter},lv_name=~ceph")
for dm_device in ${dm_devices}; do for dm_device in ${dm_devices}; do
if [[ ! -z ${dm_device} ]] && [[ ! -z $(dmsetup ls | grep ${dm_device}) ]]; then if [[ "$(dmsetup ls | grep ${dm_device})" ]]; then
dmsetup remove ${dm_device} dmsetup remove ${dm_device}
fi fi
done done
if [[ ! -z "${lv_name}" ]]; then local logical_volumes=$(get_lv_paths_from_osd_device "${device}" | xargs)
local logical_volumes=$(lvdisplay | grep "LV Path" | grep "${lv_name}" | awk '/ceph/{print $3}' | tr '\n' ' ') if [[ "${logical_volumes}" ]]; then
for logical_volume in ${logical_volumes}; do lvremove -y ${logical_volumes}
if [[ ! -z ${logical_volume} ]]; then
lvremove -y ${logical_volume}
fi
done
fi fi
local volume_group=$(pvdisplay -ddd -v ${device} | grep "VG Name" | awk '/ceph/{print $3}' | grep "ceph") local volume_group=$(pvdisplay -ddd -v ${device} | grep "VG Name" | awk '/ceph/{print $3}' | grep "ceph")
if [[ ${volume_group} ]]; then if [[ ${volume_group} ]]; then
@ -503,11 +505,24 @@ function get_block_uuid_from_device {
get_lvm_tag_from_device ${device} ceph.block_uuid get_lvm_tag_from_device ${device} ceph.block_uuid
} }
function get_lvm_path_from_device { function get_dm_devices_from_osd_device {
select="$1" device="$1"
pv_uuid=$(pvdisplay -ddd -v ${device} | awk '/PV UUID/{print $3}')
options="--noheadings -o lv_dm_path" # Return the list of dm devices that belong to the osd
pvs ${options} -S "${select}" | tr -d ' ' if [[ "${pv_uuid}" ]]; then
dmsetup ls | grep "$(echo "${pv_uuid}" | sed 's/-/--/g')" | awk '{print $1}'
fi
}
function get_lv_paths_from_osd_device {
device="$1"
pv_uuid=$(pvdisplay -ddd -v ${device} | awk '/PV UUID/{print $3}')
# Return the list of lvs that belong to the osd
if [[ "${pv_uuid}" ]]; then
lvdisplay | grep "LV Path" | grep "${pv_uuid}" | awk '{print $3}'
fi
} }
function get_vg_name_from_device { function get_vg_name_from_device {

View File

@ -195,28 +195,41 @@ function prep_device {
vg_name=$(get_vg_name_from_device ${BLOCK_DEVICE}) vg_name=$(get_vg_name_from_device ${BLOCK_DEVICE})
lv_name=$(get_lv_name_from_device ${data_disk} ${device_type}) lv_name=$(get_lv_name_from_device ${data_disk} ${device_type})
VG=$(vgs --noheadings -o vg_name -S "vg_name=${vg_name}" | tr -d '[:space:]') VG=$(vgs --noheadings -o vg_name -S "vg_name=${vg_name}" | tr -d '[:space:]')
if [[ $VG ]]; then if [[ "${VG}" ]]; then
DEVICE_OSD_ID=$(get_osd_id_from_volume "/dev/${vg_name}/${lv_name}") DEVICE_OSD_ID=$(get_osd_id_from_volume "/dev/${vg_name}/${lv_name}")
CEPH_LVM_PREPARE=1 CEPH_LVM_PREPARE=1
if [ -n "${OSD_ID}" ]; then if [[ -n "${DEVICE_OSD_ID}" ]] && [[ -n "${OSD_ID}" ]]; then
if [ "${DEVICE_OSD_ID}" == "${OSD_ID}" ]; then if [[ "${DEVICE_OSD_ID}" == "${OSD_ID}" ]]; then
CEPH_LVM_PREPARE=0 CEPH_LVM_PREPARE=0
else else
disk_zap "${OSD_DEVICE}" disk_zap "${OSD_DEVICE}"
fi fi
fi fi
logical_volumes="$(lvs --noheadings -o lv_name ${VG} | xargs)"
for volume in ${logical_volumes}; do
data_volume=$(echo ${volume} | sed -E -e 's/db|wal/lv/g')
if [[ -z $(lvs --noheadings -o lv_name -S "lv_name=${data_volume}") ]]; then
# DB or WAL volume without a corresponding data volume, remove it
lvremove -y /dev/${VG}/${volume}
fi
done
else else
logical_devices=$(get_lvm_path_from_device "pv_name=~${BLOCK_DEVICE},lv_name=~${lv_name}") if [[ "${vg_name}" ]]; then
if [[ -n "$logical_devices" ]]; then logical_devices=$(get_dm_devices_from_osd_device "${data_disk}")
dmsetup remove $logical_devices device_filter=$(echo "${vg_name}" | sed 's/-/--/g')
disk_zap "${OSD_DEVICE}" logical_devices=$(echo "${logical_devices}" | grep "${device_filter}" | xargs)
CEPH_LVM_PREPARE=1 if [[ "$logical_devices" ]]; then
dmsetup remove $logical_devices
disk_zap "${OSD_DEVICE}"
CEPH_LVM_PREPARE=1
fi
fi fi
random_uuid=$(uuidgen) random_uuid=$(uuidgen)
vgcreate "ceph-vg-${random_uuid}" "${BLOCK_DEVICE}" vgcreate "ceph-vg-${random_uuid}" "${BLOCK_DEVICE}"
VG=$(get_vg_name_from_device ${BLOCK_DEVICE}) VG=$(get_vg_name_from_device ${BLOCK_DEVICE})
vgrename "ceph-vg-${random_uuid}" "${VG}" vgrename "ceph-vg-${random_uuid}" "${VG}"
fi fi
udev_settle
logical_volume=$(lvs --noheadings -o lv_name -S "lv_name=${lv_name}" | tr -d '[:space:]') logical_volume=$(lvs --noheadings -o lv_name -S "lv_name=${lv_name}" | tr -d '[:space:]')
if [[ $logical_volume != "${lv_name}" ]]; then if [[ $logical_volume != "${lv_name}" ]]; then
lvcreate -L "${BLOCK_DEVICE_SIZE}" -n "${lv_name}" "${VG}" lvcreate -L "${BLOCK_DEVICE_SIZE}" -n "${lv_name}" "${VG}"
@ -295,20 +308,16 @@ function osd_disk_prepare {
elif [[ $(sgdisk --print ${OSD_DEVICE} | grep "F800") ]]; then elif [[ $(sgdisk --print ${OSD_DEVICE} | grep "F800") ]]; then
DM_DEV=${OSD_DEVICE}$(sgdisk --print ${OSD_DEVICE} | grep "F800" | awk '{print $1}') DM_DEV=${OSD_DEVICE}$(sgdisk --print ${OSD_DEVICE} | grep "F800" | awk '{print $1}')
CEPH_DISK_USED=1 CEPH_DISK_USED=1
elif [[ $(lsblk ${OSD_DEVICE}|grep -i ceph) ]]; then
CEPH_DISK_USED=1
else else
dm_lv_name="$(get_lv_name_from_device ${OSD_DEVICE} lv | sed 's/-/--/g')" if [[ ${CEPH_DISK_USED} -eq 1 ]]; then
if [[ ! -z "${dm_lv_name}" ]] && [[ ! -z "$(dmsetup ls | grep ${dm_lv_name})" ]]; then if [[ ${OSD_FORCE_REPAIR} -eq 1 ]]; then
CEPH_DISK_USED=1 echo "${OSD_DEVICE} isn't clean, zapping it because OSD_FORCE_REPAIR is enabled"
fi disk_zap ${OSD_DEVICE}
if [[ ${OSD_FORCE_REPAIR} -eq 1 ]] && [ ${CEPH_DISK_USED} -ne 1 ]; then else
echo "${OSD_DEVICE} isn't clean, zapping it because OSD_FORCE_REPAIR is enabled" echo "${OSD_DEVICE} isn't clean, but OSD_FORCE_REPAIR isn't enabled."
disk_zap ${OSD_DEVICE} echo "Please set OSD_FORCE_REPAIR to '1' if you want to zap this disk."
else exit 1
echo "${OSD_DEVICE} isn't clean, but OSD_FORCE_REPAIR isn't enabled." fi
echo "Please set OSD_FORCE_REPAIR to '1' if you want to zap this disk."
exit 1
fi fi
fi fi
fi fi
@ -456,7 +465,7 @@ function osd_disk_prepare {
CLI_OPTS="${CLI_OPTS} --crush-device-class ${DEVICE_CLASS}" CLI_OPTS="${CLI_OPTS} --crush-device-class ${DEVICE_CLASS}"
fi fi
if [[ CEPH_LVM_PREPARE -eq 1 ]]; then if [[ ${CEPH_LVM_PREPARE} -eq 1 ]]; then
ceph-volume lvm -v prepare ${CLI_OPTS} ceph-volume lvm -v prepare ${CLI_OPTS}
udev_settle udev_settle
fi fi