[ceph-osd] dmsetup remove logical devices using correct device names

Found another issue in disk_zap() where a needed update was missed when
https://review.opendev.org/c/openstack/openstack-helm-infra/+/745166
changed the logical volume naming convention.

The above patch set renamed volumes that followed the old convention,
so this logic will never be correct and must be updated.

Also added logic to clean up orphaned DB/WAL volumes if they are
encountered and removed some cases where a data disk is marked as in use
when it isn't set up correctly.

Change-Id: I8deeecfdb69df1f855f287caab8385ee3d6869e0
This commit is contained in:
Stephen Taylor 2020-12-17 14:41:00 -07:00
parent f08d30df6b
commit 4c097b0300
3 changed files with 62 additions and 38 deletions

View File

@ -15,6 +15,6 @@ apiVersion: v1
appVersion: v1.0.0
description: OpenStack-Helm Ceph OSD
name: ceph-osd
version: 0.1.15
version: 0.1.16
home: https://github.com/ceph/ceph
...

View File

@ -57,6 +57,9 @@ function common_cleanup() {
# Run a command within the global synchronization lock
function locked() {
# Don't log every command inside locked() to keep logs cleaner
{ set +x; } 2>/dev/null
local LOCK_SCOPE=0
# Allow locks to be re-entrant to avoid deadlocks
@ -66,12 +69,17 @@ function locked() {
fi
# Execute the synchronized command
set -x
"$@"
{ set +x; } 2>/dev/null
# Only unlock if the lock was obtained in this scope
if [[ ${LOCK_SCOPE} -ne 0 ]]; then
unlock
fi
# Re-enable command logging
set -x
}
# Alias commands that interact with disks so they are always synchronized
@ -304,21 +312,15 @@ function zap_extra_partitions {
function disk_zap {
# Run all the commands that ceph-disk zap uses to clear a disk
local device=${1}
local device_filter=$(basename "${device}")
local lv_name=$(get_lv_name_from_device "${device}" lv)
local dm_devices=$(get_lvm_path_from_device "pv_name=~${device_filter},lv_name=~ceph")
local dm_devices=$(get_dm_devices_from_osd_device "${device}" | xargs)
for dm_device in ${dm_devices}; do
if [[ ! -z ${dm_device} ]] && [[ ! -z $(dmsetup ls | grep ${dm_device}) ]]; then
if [[ "$(dmsetup ls | grep ${dm_device})" ]]; then
dmsetup remove ${dm_device}
fi
done
if [[ ! -z "${lv_name}" ]]; then
local logical_volumes=$(lvdisplay | grep "LV Path" | grep "${lv_name}" | awk '/ceph/{print $3}' | tr '\n' ' ')
for logical_volume in ${logical_volumes}; do
if [[ ! -z ${logical_volume} ]]; then
lvremove -y ${logical_volume}
fi
done
local logical_volumes=$(get_lv_paths_from_osd_device "${device}" | xargs)
if [[ "${logical_volumes}" ]]; then
lvremove -y ${logical_volumes}
fi
local volume_group=$(pvdisplay -ddd -v ${device} | grep "VG Name" | awk '/ceph/{print $3}' | grep "ceph")
if [[ ${volume_group} ]]; then
@ -503,11 +505,24 @@ function get_block_uuid_from_device {
get_lvm_tag_from_device ${device} ceph.block_uuid
}
function get_lvm_path_from_device {
select="$1"
function get_dm_devices_from_osd_device {
device="$1"
pv_uuid=$(pvdisplay -ddd -v ${device} | awk '/PV UUID/{print $3}')
options="--noheadings -o lv_dm_path"
pvs ${options} -S "${select}" | tr -d ' '
# Return the list of dm devices that belong to the osd
if [[ "${pv_uuid}" ]]; then
dmsetup ls | grep "$(echo "${pv_uuid}" | sed 's/-/--/g')" | awk '{print $1}'
fi
}
function get_lv_paths_from_osd_device {
device="$1"
pv_uuid=$(pvdisplay -ddd -v ${device} | awk '/PV UUID/{print $3}')
# Return the list of lvs that belong to the osd
if [[ "${pv_uuid}" ]]; then
lvdisplay | grep "LV Path" | grep "${pv_uuid}" | awk '{print $3}'
fi
}
function get_vg_name_from_device {

View File

@ -195,28 +195,41 @@ function prep_device {
vg_name=$(get_vg_name_from_device ${BLOCK_DEVICE})
lv_name=$(get_lv_name_from_device ${data_disk} ${device_type})
VG=$(vgs --noheadings -o vg_name -S "vg_name=${vg_name}" | tr -d '[:space:]')
if [[ $VG ]]; then
if [[ "${VG}" ]]; then
DEVICE_OSD_ID=$(get_osd_id_from_volume "/dev/${vg_name}/${lv_name}")
CEPH_LVM_PREPARE=1
if [ -n "${OSD_ID}" ]; then
if [ "${DEVICE_OSD_ID}" == "${OSD_ID}" ]; then
if [[ -n "${DEVICE_OSD_ID}" ]] && [[ -n "${OSD_ID}" ]]; then
if [[ "${DEVICE_OSD_ID}" == "${OSD_ID}" ]]; then
CEPH_LVM_PREPARE=0
else
disk_zap "${OSD_DEVICE}"
fi
fi
logical_volumes="$(lvs --noheadings -o lv_name ${VG} | xargs)"
for volume in ${logical_volumes}; do
data_volume=$(echo ${volume} | sed -E -e 's/db|wal/lv/g')
if [[ -z $(lvs --noheadings -o lv_name -S "lv_name=${data_volume}") ]]; then
# DB or WAL volume without a corresponding data volume, remove it
lvremove -y /dev/${VG}/${volume}
fi
done
else
logical_devices=$(get_lvm_path_from_device "pv_name=~${BLOCK_DEVICE},lv_name=~${lv_name}")
if [[ -n "$logical_devices" ]]; then
dmsetup remove $logical_devices
disk_zap "${OSD_DEVICE}"
CEPH_LVM_PREPARE=1
if [[ "${vg_name}" ]]; then
logical_devices=$(get_dm_devices_from_osd_device "${data_disk}")
device_filter=$(echo "${vg_name}" | sed 's/-/--/g')
logical_devices=$(echo "${logical_devices}" | grep "${device_filter}" | xargs)
if [[ "$logical_devices" ]]; then
dmsetup remove $logical_devices
disk_zap "${OSD_DEVICE}"
CEPH_LVM_PREPARE=1
fi
fi
random_uuid=$(uuidgen)
vgcreate "ceph-vg-${random_uuid}" "${BLOCK_DEVICE}"
VG=$(get_vg_name_from_device ${BLOCK_DEVICE})
vgrename "ceph-vg-${random_uuid}" "${VG}"
fi
udev_settle
logical_volume=$(lvs --noheadings -o lv_name -S "lv_name=${lv_name}" | tr -d '[:space:]')
if [[ $logical_volume != "${lv_name}" ]]; then
lvcreate -L "${BLOCK_DEVICE_SIZE}" -n "${lv_name}" "${VG}"
@ -295,20 +308,16 @@ function osd_disk_prepare {
elif [[ $(sgdisk --print ${OSD_DEVICE} | grep "F800") ]]; then
DM_DEV=${OSD_DEVICE}$(sgdisk --print ${OSD_DEVICE} | grep "F800" | awk '{print $1}')
CEPH_DISK_USED=1
elif [[ $(lsblk ${OSD_DEVICE}|grep -i ceph) ]]; then
CEPH_DISK_USED=1
else
dm_lv_name="$(get_lv_name_from_device ${OSD_DEVICE} lv | sed 's/-/--/g')"
if [[ ! -z "${dm_lv_name}" ]] && [[ ! -z "$(dmsetup ls | grep ${dm_lv_name})" ]]; then
CEPH_DISK_USED=1
fi
if [[ ${OSD_FORCE_REPAIR} -eq 1 ]] && [ ${CEPH_DISK_USED} -ne 1 ]; then
echo "${OSD_DEVICE} isn't clean, zapping it because OSD_FORCE_REPAIR is enabled"
disk_zap ${OSD_DEVICE}
else
echo "${OSD_DEVICE} isn't clean, but OSD_FORCE_REPAIR isn't enabled."
echo "Please set OSD_FORCE_REPAIR to '1' if you want to zap this disk."
exit 1
if [[ ${CEPH_DISK_USED} -eq 1 ]]; then
if [[ ${OSD_FORCE_REPAIR} -eq 1 ]]; then
echo "${OSD_DEVICE} isn't clean, zapping it because OSD_FORCE_REPAIR is enabled"
disk_zap ${OSD_DEVICE}
else
echo "${OSD_DEVICE} isn't clean, but OSD_FORCE_REPAIR isn't enabled."
echo "Please set OSD_FORCE_REPAIR to '1' if you want to zap this disk."
exit 1
fi
fi
fi
fi
@ -456,7 +465,7 @@ function osd_disk_prepare {
CLI_OPTS="${CLI_OPTS} --crush-device-class ${DEVICE_CLASS}"
fi
if [[ CEPH_LVM_PREPARE -eq 1 ]]; then
if [[ ${CEPH_LVM_PREPARE} -eq 1 ]]; then
ceph-volume lvm -v prepare ${CLI_OPTS}
udev_settle
fi