[ceph-osd] Allow logical disk labels to change for Ceph OSD disks
This change allows Ceph OSDs to respond to logical disk changes and continue to function instead of failing to initialize after such a change. For example, /dev/sdd is deployed as an OSD disk and then subsequently becomes /dev/sde due to a hardware-related event. This change allows the OSD to adapt and run as /dev/sde. Change-Id: I6c22088b8d884f9dd300d026415fb126af4b41d4
This commit is contained in:
parent
cb1bd3c6d7
commit
b4d485b8ad
@ -232,7 +232,7 @@ function disk_zap {
|
||||
local device_filter=$(basename "${device}")
|
||||
local dm_devices=$(get_lvm_path_from_device "pv_name=~${device_filter},lv_name=~ceph")
|
||||
for dm_device in ${dm_devices}; do
|
||||
if [[ ! -z ${dm_device} ]]; then
|
||||
if [[ ! -z ${dm_device} ]] && [[ ! -z $(dmsetup ls | grep ${dm_device}) ]]; then
|
||||
dmsetup remove ${dm_device}
|
||||
fi
|
||||
done
|
||||
@ -244,8 +244,8 @@ function disk_zap {
|
||||
done
|
||||
local volume_group=$(pvdisplay ${device} | grep "VG Name" | awk '/ceph/{print $3}' | grep "ceph")
|
||||
if [[ ${volume_group} ]]; then
|
||||
vgremove ${volume_group}
|
||||
pvremove ${device}
|
||||
vgremove -y ${volume_group}
|
||||
pvremove -y ${device}
|
||||
ceph-volume lvm zap ${device} --destroy
|
||||
fi
|
||||
wipefs --all ${device}
|
||||
@ -257,6 +257,9 @@ function disk_zap {
|
||||
function udev_settle {
|
||||
osd_devices="${OSD_DEVICE}"
|
||||
partprobe "${OSD_DEVICE}"
|
||||
locked pvscan --cache
|
||||
locked vgscan --cache
|
||||
locked lvscan --cache
|
||||
if [ "${OSD_BLUESTORE:-0}" -eq 1 ]; then
|
||||
if [ ! -z "$BLOCK_DB" ]; then
|
||||
osd_devices="${osd_devices}\|${BLOCK_DB}"
|
||||
@ -407,6 +410,12 @@ function get_osd_wal_device_from_device {
|
||||
get_lvm_tag_from_device ${device} ceph.wal_device
|
||||
}
|
||||
|
||||
function get_block_uuid_from_device {
|
||||
device="$1"
|
||||
|
||||
get_lvm_tag_from_device ${device} ceph.block_uuid
|
||||
}
|
||||
|
||||
function get_lvm_path_from_device {
|
||||
select="$1"
|
||||
|
||||
@ -414,6 +423,25 @@ function get_lvm_path_from_device {
|
||||
pvs ${options} -S "${select}" | tr -d ' '
|
||||
}
|
||||
|
||||
function get_vg_name_from_device {
|
||||
device="$1"
|
||||
pv_uuid=$(pvdisplay ${device} | awk '/PV UUID/{print $3}')
|
||||
|
||||
if [[ "${pv_uuid}" ]]; then
|
||||
echo "ceph-vg-${pv_uuid}"
|
||||
fi
|
||||
}
|
||||
|
||||
function get_lv_name_from_device {
|
||||
device="$1"
|
||||
device_type="$2"
|
||||
pv_uuid=$(pvdisplay ${device} | awk '/PV UUID/{print $3}')
|
||||
|
||||
if [[ "${pv_uuid}" ]]; then
|
||||
echo "ceph-${device_type}-${pv_uuid}"
|
||||
fi
|
||||
}
|
||||
|
||||
function set_device_class {
|
||||
if [ ! -z "$DEVICE_CLASS" ]; then
|
||||
if [ "x$DEVICE_CLASS" != "x$(get_device_class)" ]; then
|
||||
|
@ -38,15 +38,158 @@ else
|
||||
export OSD_JOURNAL=$(readlink -f ${JOURNAL_LOCATION})
|
||||
fi
|
||||
|
||||
# Renames a single VG if necessary
|
||||
function rename_vg {
|
||||
local physical_disk=$1
|
||||
local old_vg_name=$(locked pvdisplay ${physical_disk} | awk '/VG Name/{print $3}')
|
||||
local vg_name=$(get_vg_name_from_device ${physical_disk})
|
||||
|
||||
if [[ "${old_vg_name}" ]] && [[ "${vg_name}" != "${old_vg_name}" ]]; then
|
||||
locked vgrename ${old_vg_name} ${vg_name}
|
||||
fi
|
||||
}
|
||||
|
||||
# Renames all LVs associated with an OSD as necesasry
|
||||
function rename_lvs {
|
||||
local data_disk=$1
|
||||
local vg_name=$(locked pvdisplay ${data_disk} | awk '/VG Name/{print $3}')
|
||||
|
||||
if [[ "${vg_name}" ]]; then
|
||||
# Rename the OSD volume if necessary
|
||||
local old_lv_name=$(locked lvdisplay ${vg_name} | awk '/LV Name/{print $3}')
|
||||
local lv_name=$(get_lv_name_from_device ${data_disk} lv)
|
||||
|
||||
if [[ "${old_lv_name}" ]] && [[ "${lv_name}" != "${old_lv_name}" ]]; then
|
||||
locked lvrename ${vg_name} ${old_lv_name} ${lv_name}
|
||||
fi
|
||||
|
||||
# Rename the OSD's block.db volume if necessary, referenced by UUID
|
||||
local lv_tag=$(get_lvm_tag_from_device ${data_disk} ceph.db_uuid)
|
||||
|
||||
if [[ "${lv_tag}" ]]; then
|
||||
local lv_device=$(lvdisplay | grep -B4 "${lv_tag}" | awk '/LV Path/{print $3}')
|
||||
|
||||
if [[ "${lv_device}" ]]; then
|
||||
local db_vg=$(echo ${lv_device} | awk -F "/" '{print $3}')
|
||||
old_lv_name=$(echo ${lv_device} | awk -F "/" '{print $4}')
|
||||
local db_name=$(get_lv_name_from_device ${data_disk} db)
|
||||
|
||||
if [[ "${old_lv_name}" ]] && [[ "${db_name}" != "${old_lv_name}" ]]; then
|
||||
locked lvrename ${db_vg} ${old_lv_name} ${db_name}
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
|
||||
# Rename the OSD's WAL volume if necessary, referenced by UUID
|
||||
lv_tag=$(get_lvm_tag_from_device ${data_disk} ceph.wal_uuid)
|
||||
|
||||
if [[ "${lv_tag}" ]]; then
|
||||
local lv_device=$(lvdisplay | grep -B4 "${lv_tag}" | awk '/LV Path/{print $3}')
|
||||
|
||||
if [[ "${lv_device}" ]]; then
|
||||
local wal_vg=$(echo ${lv_device} | awk -F "/" '{print $3}')
|
||||
old_lv_name=$(echo ${lv_device} | awk -F "/" '{print $4}')
|
||||
local wal_name=$(get_lv_name_from_device ${data_disk} wal)
|
||||
|
||||
if [[ "${old_lv_name}" ]] && [[ "${wal_name}" != "${old_lv_name}" ]]; then
|
||||
locked lvrename ${wal_vg} ${old_lv_name} ${wal_name}
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
}
|
||||
|
||||
# Fixes up the tags that reference block, db, and wal logical_volumes
|
||||
# NOTE: This updates tags based on current VG and LV names, so any necessary
|
||||
# renaming should be completed prior to calling this
|
||||
function update_lv_tags {
|
||||
local data_disk=$1
|
||||
local pv_uuid=$(pvdisplay ${data_disk} | awk '/PV UUID/{print $3}')
|
||||
|
||||
if [[ "${pv_uuid}" ]]; then
|
||||
local volumes="$(lvs --no-headings | grep -e "${pv_uuid}")"
|
||||
local block_device db_device wal_device vg_name
|
||||
local old_block_device old_db_device old_wal_device
|
||||
|
||||
# Build OSD device paths from current VG and LV names
|
||||
while read lv vg other_stuff; do
|
||||
if [[ "${lv}" == "$(get_lv_name_from_device ${data_disk} lv)" ]]; then
|
||||
block_device="/dev/${vg}/${lv}"
|
||||
old_block_device=$(get_lvm_tag_from_volume ${block_device} ceph.block_device)
|
||||
fi
|
||||
if [[ "${lv}" == "$(get_lv_name_from_device ${data_disk} db)" ]]; then
|
||||
db_device="/dev/${vg}/${lv}"
|
||||
old_db_device=$(get_lvm_tag_from_volume ${block_device} ceph.db_device)
|
||||
fi
|
||||
if [[ "${lv}" == "$(get_lv_name_from_device ${data_disk} wal)" ]]; then
|
||||
wal_device="/dev/${vg}/${lv}"
|
||||
old_wal_device=$(get_lvm_tag_from_volume ${block_device} ceph.wal_device)
|
||||
fi
|
||||
done <<< ${volumes}
|
||||
|
||||
# Set new tags on all of the volumes using paths built above
|
||||
while read lv vg other_stuff; do
|
||||
if [[ "${block_device}" ]]; then
|
||||
if [[ "${old_block_device}" ]]; then
|
||||
locked lvchange --deltag "ceph.block_device=${old_block_device}" /dev/${vg}/${lv}
|
||||
fi
|
||||
locked lvchange --addtag "ceph.block_device=${block_device}" /dev/${vg}/${lv}
|
||||
fi
|
||||
if [[ "${db_device}" ]]; then
|
||||
if [[ "${old_db_device}" ]]; then
|
||||
locked lvchange --deltag "ceph.db_device=${old_db_device}" /dev/${vg}/${lv}
|
||||
fi
|
||||
locked lvchange --addtag "ceph.db_device=${db_device}" /dev/${vg}/${lv}
|
||||
fi
|
||||
if [[ "${wal_device}" ]]; then
|
||||
if [[ "${old_wal_device}" ]]; then
|
||||
locked lvchange --deltag "ceph.wal_device=${old_wal_device}" /dev/${vg}/${lv}
|
||||
fi
|
||||
locked lvchange --addtag "ceph.wal_device=${wal_device}" /dev/${vg}/${lv}
|
||||
fi
|
||||
done <<< ${volumes}
|
||||
fi
|
||||
}
|
||||
|
||||
# Settle LVM changes before inspecting volumes
|
||||
udev_settle
|
||||
|
||||
# Rename VGs first
|
||||
if [[ "${OSD_DEVICE}" ]]; then
|
||||
OSD_DEVICE=$(readlink -f ${OSD_DEVICE})
|
||||
rename_vg ${OSD_DEVICE}
|
||||
fi
|
||||
|
||||
if [[ "${BLOCK_DB}" ]]; then
|
||||
BLOCK_DB=$(readlink -f ${BLOCK_DB})
|
||||
rename_vg ${BLOCK_DB}
|
||||
fi
|
||||
|
||||
if [[ "${BLOCK_WAL}" ]]; then
|
||||
BLOCK_WAL=$(readlink -f ${BLOCK_WAL})
|
||||
rename_vg ${BLOCK_WAL}
|
||||
fi
|
||||
|
||||
# Rename LVs after VGs are correct
|
||||
rename_lvs ${OSD_DEVICE}
|
||||
|
||||
# Update tags (all VG and LV names should be correct before calling this)
|
||||
update_lv_tags ${OSD_DEVICE}
|
||||
|
||||
# Settle LVM changes again after any changes have been made
|
||||
udev_settle
|
||||
|
||||
function prep_device {
|
||||
local BLOCK_DEVICE=$1
|
||||
local BLOCK_DEVICE_SIZE=$2
|
||||
local device_type=$3
|
||||
local device_string VG DEVICE_OSD_ID logical_devices logical_volume
|
||||
device_string=$(echo "${BLOCK_DEVICE#/}" | tr '/' '-')
|
||||
VG=$(vgs --noheadings -o vg_name -S "vg_name=ceph-db-wal-${device_string}" | tr -d '[:space:]')
|
||||
local data_disk=$4
|
||||
local vg_name lv_name VG DEVICE_OSD_ID logical_devices logical_volume
|
||||
vg_name=$(get_vg_name_from_device ${BLOCK_DEVICE})
|
||||
lv_name=$(get_lv_name_from_device ${data_disk} ${device_type})
|
||||
VG=$(vgs --noheadings -o vg_name -S "vg_name=${vg_name}" | tr -d '[:space:]')
|
||||
if [[ $VG ]]; then
|
||||
DEVICE_OSD_ID=$(get_osd_id_from_volume "/dev/ceph-db-wal-${device_string}/ceph-${device_type}-${osd_dev_string}")
|
||||
DEVICE_OSD_ID=$(get_osd_id_from_volume "/dev/${vg_name}/${lv_name}")
|
||||
CEPH_LVM_PREPARE=1
|
||||
if [ -n "${OSD_ID}" ]; then
|
||||
if [ "${DEVICE_OSD_ID}" == "${OSD_ID}" ]; then
|
||||
@ -62,22 +205,24 @@ function prep_device {
|
||||
disk_zap "${OSD_DEVICE}"
|
||||
CEPH_LVM_PREPARE=1
|
||||
fi
|
||||
VG=ceph-db-wal-${device_string}
|
||||
locked vgcreate "$VG" "${BLOCK_DEVICE}"
|
||||
random_uuid=$(uuidgen)
|
||||
locked vgcreate "ceph-vg-${random_uuid}" "${BLOCK_DEVICE}"
|
||||
VG=$(get_vg_name_from_device ${BLOCK_DEVICE})
|
||||
locked vgrename "ceph-vg-${random_uuid}" "${VG}"
|
||||
fi
|
||||
logical_volume=$(lvs --noheadings -o lv_name -S "lv_name=ceph-${device_type}-${osd_dev_string}" | tr -d '[:space:]')
|
||||
if [[ $logical_volume != "ceph-${device_type}-${osd_dev_string}" ]]; then
|
||||
locked lvcreate -L "${BLOCK_DEVICE_SIZE}" -n "ceph-${device_type}-${osd_dev_string}" "${VG}"
|
||||
logical_volume=$(lvs --noheadings -o lv_name -S "lv_name=${lv_name}" | tr -d '[:space:]')
|
||||
if [[ $logical_volume != "${lv_name}" ]]; then
|
||||
locked lvcreate -L "${BLOCK_DEVICE_SIZE}" -n "${lv_name}" "${VG}"
|
||||
fi
|
||||
if [[ "${device_type}" == "db" ]]; then
|
||||
BLOCK_DB="${VG}/ceph-${device_type}-${osd_dev_string}"
|
||||
BLOCK_DB="${VG}/${lv_name}"
|
||||
elif [[ "${device_type}" == "wal" ]]; then
|
||||
BLOCK_WAL="${VG}/ceph-${device_type}-${osd_dev_string}"
|
||||
BLOCK_WAL="${VG}/${lv_name}"
|
||||
fi
|
||||
}
|
||||
|
||||
function osd_disk_prepare {
|
||||
if [[ -z "${OSD_DEVICE}" ]];then
|
||||
if [[ -z "${OSD_DEVICE}" ]]; then
|
||||
echo "ERROR- You must provide a device to build your OSD ie: /dev/sdb"
|
||||
exit 1
|
||||
fi
|
||||
@ -96,7 +241,6 @@ function osd_disk_prepare {
|
||||
#search for some ceph metadata on the disk based on the status of the disk/lvm in filestore
|
||||
CEPH_DISK_USED=0
|
||||
CEPH_LVM_PREPARE=1
|
||||
osd_dev_string=$(echo ${OSD_DEVICE} | awk -F "/" '{print $2}{print $3}' | paste -s -d'-')
|
||||
osd_dev_split=$(basename "${OSD_DEVICE}")
|
||||
udev_settle
|
||||
OSD_ID=$(get_osd_id_from_device ${OSD_DEVICE})
|
||||
@ -233,28 +377,53 @@ function osd_disk_prepare {
|
||||
echo "Moving on, trying to prepare and activate the OSD LVM now."
|
||||
fi
|
||||
|
||||
if [[ ${CEPH_DISK_USED} -eq 1 ]]; then
|
||||
CLI_OPTS="${CLI_OPTS} --data ${OSD_DEVICE}"
|
||||
ceph-volume simple scan --force ${OSD_DEVICE}$(sgdisk --print ${OSD_DEVICE} | grep "F800" | awk '{print $1}')
|
||||
elif [[ ${CEPH_LVM_PREPARE} -eq 1 ]] || [[ ${DISK_ZAPPED} -eq 1 ]]; then
|
||||
udev_settle
|
||||
vg_name=$(get_vg_name_from_device ${OSD_DEVICE})
|
||||
if [[ "${vg_name}" ]]; then
|
||||
OSD_VG=${vg_name}
|
||||
else
|
||||
random_uuid=$(uuidgen)
|
||||
vgcreate ceph-vg-${random_uuid} ${OSD_DEVICE}
|
||||
vg_name=$(get_vg_name_from_device ${OSD_DEVICE})
|
||||
vgrename ceph-vg-${random_uuid} ${vg_name}
|
||||
OSD_VG=${vg_name}
|
||||
fi
|
||||
lv_name=$(get_lv_name_from_device ${OSD_DEVICE} lv)
|
||||
if [[ ! "$(lvdisplay | awk '/LV Name/{print $3}' | grep ${lv_name})" ]]; then
|
||||
lvcreate --yes -l 100%FREE -n ${lv_name} ${OSD_VG}
|
||||
fi
|
||||
OSD_LV=${OSD_VG}/${lv_name}
|
||||
CLI_OPTS="${CLI_OPTS} --data ${OSD_LV}"
|
||||
CEPH_LVM_PREPARE=1
|
||||
udev_settle
|
||||
fi
|
||||
|
||||
if [ "${OSD_BLUESTORE:-0}" -eq 1 ] && [ ${CEPH_DISK_USED} -eq 0 ] ; then
|
||||
if [[ ${BLOCK_DB} ]]; then
|
||||
block_db_string=$(echo ${BLOCK_DB} | awk -F "/" '{print $2}{print $3}' | paste -s -d'-')
|
||||
block_db_string=$(echo ${BLOCK_DB} | awk -F "/" '{print $2 "-" $3}')
|
||||
fi
|
||||
if [[ ${BLOCK_WAL} ]]; then
|
||||
block_wal_string=$(echo ${BLOCK_WAL} | awk -F "/" '{print $2}{print $3}' | paste -s -d'-')
|
||||
block_wal_string=$(echo ${BLOCK_WAL} | awk -F "/" '{print $2 "-" $3}')
|
||||
fi
|
||||
if [[ ${BLOCK_DB} && ${BLOCK_WAL} ]]; then
|
||||
prep_device "${BLOCK_DB}" "${BLOCK_DB_SIZE}" "db"
|
||||
prep_device "${BLOCK_WAL}" "${BLOCK_WAL_SIZE}" "wal"
|
||||
prep_device "${BLOCK_DB}" "${BLOCK_DB_SIZE}" "db" "${OSD_DEVICE}"
|
||||
prep_device "${BLOCK_WAL}" "${BLOCK_WAL_SIZE}" "wal" "${OSD_DEVICE}"
|
||||
elif [[ -z ${BLOCK_DB} && ${BLOCK_WAL} ]]; then
|
||||
prep_device "${BLOCK_WAL}" "${BLOCK_WAL_SIZE}" "wal"
|
||||
prep_device "${BLOCK_WAL}" "${BLOCK_WAL_SIZE}" "wal" "${OSD_DEVICE}"
|
||||
elif [[ ${BLOCK_DB} && -z ${BLOCK_WAL} ]]; then
|
||||
prep_device "${BLOCK_DB}" "${BLOCK_DB_SIZE}" "db"
|
||||
prep_device "${BLOCK_DB}" "${BLOCK_DB_SIZE}" "db" "${OSD_DEVICE}"
|
||||
fi
|
||||
if [ -z ${BLOCK_DB} ] && [ -z ${BLOCK_WAL} ]; then
|
||||
if pvdisplay ${OSD_DEVICE} | grep "VG Name" | awk '{print $3}' | grep "ceph"; then
|
||||
if pvdisplay ${OSD_DEVICE} | awk '/VG Name/{print $3}' | grep "ceph"; then
|
||||
CEPH_LVM_PREPARE=0
|
||||
fi
|
||||
fi
|
||||
else
|
||||
if pvdisplay ${OSD_DEVICE} | grep "VG Name" | awk '{print $3}' | grep "ceph"; then
|
||||
if pvdisplay ${OSD_DEVICE} | awk '/VG Name/{print $3}' | grep "ceph"; then
|
||||
CEPH_LVM_PREPARE=0
|
||||
fi
|
||||
fi
|
||||
@ -280,22 +449,7 @@ function osd_disk_prepare {
|
||||
CLI_OPTS="${CLI_OPTS} --crush-device-class ${DEVICE_CLASS}"
|
||||
fi
|
||||
|
||||
if [[ ${CEPH_DISK_USED} -eq 1 ]]; then
|
||||
CLI_OPTS="${CLI_OPTS} --data ${OSD_DEVICE}"
|
||||
ceph-volume simple scan --force ${OSD_DEVICE}$(sgdisk --print ${OSD_DEVICE} | grep "F800" | awk '{print $1}')
|
||||
elif [[ ${CEPH_LVM_PREPARE} -eq 1 ]] || [[ ${DISK_ZAPPED} -eq 1 ]]; then
|
||||
udev_settle
|
||||
if [[ $(vgdisplay | grep "VG Name" | awk '{print $3}' | grep "ceph-vg-${osd_dev_string}") ]]; then
|
||||
OSD_VG=$(vgdisplay | grep "VG Name" | awk '{print $3}' | grep "ceph-vg-${osd_dev_string}")
|
||||
else
|
||||
vgcreate ceph-vg-${osd_dev_string} ${OSD_DEVICE}
|
||||
OSD_VG=ceph-vg-${osd_dev_string}
|
||||
fi
|
||||
if [[ $(locked lvdisplay | grep "LV Name" | awk '{print $3}' | grep "ceph-lv-${osd_dev_string}") != "ceph-lv-${osd_dev_string}" ]]; then
|
||||
lvcreate --yes -l 100%FREE -n ceph-lv-${osd_dev_string} ${OSD_VG}
|
||||
fi
|
||||
OSD_LV=${OSD_VG}/ceph-lv-${osd_dev_string}
|
||||
CLI_OPTS="${CLI_OPTS} --data ${OSD_LV}"
|
||||
if [[ CEPH_LVM_PREPARE -eq 1 ]]; then
|
||||
locked ceph-volume lvm -v prepare ${CLI_OPTS}
|
||||
udev_settle
|
||||
fi
|
||||
|
Loading…
x
Reference in New Issue
Block a user