metal/bsp-files/kickstarts/pre_disk_setup_common.cfg

%pre --erroronfail

# Get the FD used by subshells to log output
exec {stdout}>&1

# Source common functions
. /tmp/ks-functions.sh

wlog "ISO_DEV='$ISO_DEV'."
wlog "USB_DEV='$USB_DEV'."

# This is a really fancy way of finding the first usable disk for the
# install and not stomping on the USB device if it comes up first

# First, parse /proc/cmdline to find the boot args
set -- `cat /proc/cmdline`
for I in $*; do case "$I" in *=*) eval $I 2>/dev/null;; esac; done

if [ -z "$boot_device" ]; then
    boot_device=$(get_disk_dev)
fi
if [ -z "$rootfs_device" ]; then
    rootfs_device=$(get_disk_dev)
fi
if [ -z "$persistent_size" ]; then
    # Default backup partition size in MiB
    persistent_size=30000
fi

# Get root and boot devices
orig_rootfs_device=$rootfs_device
by_path_rootfs_device=$(get_by_path $rootfs_device)
rootfs_device=$(get_disk $by_path_rootfs_device)
wlog "Found rootfs $orig_rootfs_device on: $by_path_rootfs_device->$rootfs_device."

orig_boot_device=$boot_device
by_path_boot_device=$(get_by_path $boot_device)
boot_device=$(get_disk $by_path_boot_device)
wlog "Found boot $orig_boot_device on: $by_path_boot_device->$boot_device."

# Check if boot and rootfs devices are valid
if [ ! -e "$rootfs_device" -o ! -e "$boot_device" ] ; then
    # Touch this file to prevent Anaconda from dying an ungraceful death
    touch /tmp/part-include

    report_pre_failure_with_msg "ERROR: Specified installation ($orig_rootfs_device) or boot ($orig_boot_device) device is invalid."
fi

# Get all block devices of type disk in the system. This includes solid
# state devices.
# Note: /dev/* are managed by kernel tmpdevfs while links in /dev/disk/by-path/
# are managed by udev which updates them asynchronously so we should avoid using
# them while performing partition operations.
STOR_DEVS=""
wlog "Detected storage devices:"
for f in /dev/disk/by-path/*; do
    dev=$(readlink -f $f)
    exec_retry 2 0.5 "lsblk --nodeps --pairs $dev" | grep -q 'TYPE="disk"'
    if [ $? -eq 0 ]
    then
        STOR_DEVS="$STOR_DEVS $dev"
        wlog "  ${f}->${dev}"
    fi
done

# Filter STOR_DEVS variable for any duplicates as on some systems udev
# creates multiple links to the same device. This causes issues due to
# attempting to acquire a flock on the same device multiple times.
STOR_DEVS=$(echo "$STOR_DEVS" | xargs -n 1 | sort -u | xargs)
wlog "Unique storage devices: $STOR_DEVS."

if [ -z "$STOR_DEVS" ]
then
    report_pre_failure_with_msg "ERROR: No storage devices available."
fi

# Lock all devices so that udev doesn't trigger a kernel partition table
# rescan that removes and recreates all /dev nodes for partitions on those
# devices. Since udev events are asynchronous this could lead to a case
# where /dev/ links for existing partitions are briefly missing.
# Missing /dev links leads to command execution failures.
STOR_DEV_FDS="$stdout"
for dev in $STOR_DEVS; do
    exec {fd}>$dev || report_pre_failure_with_msg "ERROR: Error creating file descriptor for $dev."
    flock -n "$fd" || report_pre_failure_with_msg "ERROR: Can't get a lock on fd $fd of device $dev."
    STOR_DEV_FDS="$STOR_DEV_FDS $fd"
done

# Log info about system state at beginning of partitioning operation
for dev in $STOR_DEVS; do
    wlog "Initial partition table for $dev is:"
    parted -s $dev unit mib print
done

# Ensure specified device is not a USB drive
udevadm info --query=property --name=$rootfs_device |grep -q '^ID_BUS=usb' || \
    udevadm info --query=property --name=$boot_device |grep -q '^ID_BUS=usb'

if [ $? -eq 0 ]; then
    # Touch this file to prevent Anaconda from dying an ungraceful death
    touch /tmp/part-include

    report_pre_failure_with_msg "ERROR: Specified installation ($orig_rootfs_device) or boot ($orig_boot_device) device is a USB drive."
fi

# Deactivate existing volume groups to avoid Anaconda issues with pre-existing groups
vgs=$(exec_no_fds "$STOR_DEV_FDS" "vgs --noheadings -o vg_name")
for vg in $vgs; do
    wlog "Disabling $vg."
    exec_no_fds "$STOR_DEV_FDS" "vgchange -an $vg" 5 0.5
    [ $? -ne 0 ] && report_pre_failure_with_msg "ERROR: Failed to disable $vg."
done

# Remove the volume groups that have physical volumes on the root disk
for vg in $(exec_no_fds "$STOR_DEV_FDS" "vgs --noheadings -o vg_name"); do
    exec_no_fds "$STOR_DEV_FDS" "pvs --select \"vg_name=$vg\" --noheadings -o pv_name" | grep -q "$rootfs_device"
    if [ $? -ne 0 ]; then
        wlog "Found $vg with no PV on rootfs, ignoring."
        continue
    fi
    wlog "Removing LVs on $vg."
    exec_no_fds "$STOR_DEV_FDS" "lvremove --force $vg" 5 0.5 || wlog "WARNING: Failed to remove lvs on $vg."
    pvs=$(exec_no_fds "$STOR_DEV_FDS" "pvs --select \"vg_name=$vg\" --noheadings -o pv_name")
    wlog "VG $vg has PVs: $(echo $pvs), removing them."
    for pv in $pvs; do
        wlog "Removing PV $pv."
        exec_no_fds "$STOR_DEV_FDS" "pvremove --force --force --yes $pv" 5 0.5
        [ $? -ne 0 ] && report_pre_failure_with_msg "ERROR: Failed to remove PV."
    done
    # VG should no longer be present
    vg_check=$(exec_no_fds "$STOR_DEV_FDS" "vgs --select \"vg_name=$vg\" --noheadings -o vg_name")
    if [ -n "$vg_check" ]; then
        wlog "WARNING: VG $vg is still present after removing PVs! Removing it by force."
        exec_no_fds "$STOR_DEV_FDS" "vgremove --force $vg" 5 0.5
        [ $? -ne 0 ] && report_pre_failure_with_msg "ERROR: Failed to remove VG."
    fi
done

ONLYUSE_HDD=""
part_type_guid_str="Partition GUID code"
if [ "$(curl -sf http://pxecontroller:6385/v1/upgrade/$(hostname)/in_upgrade 2>/dev/null)" = "true" ]; then
    # In an upgrade, only wipe the disk with the rootfs and boot partition
    wlog "In upgrade, wiping only $rootfs_device"
    WIPE_HDD=$rootfs_device
    ONLYUSE_HDD="$(basename $rootfs_device)"
    if [ "$rootfs_device" != "$boot_device" ]; then
        WIPE_HDD="$WIPE_HDD,$boot_device"
        ONLYUSE_HDD="$ONLYUSE_HDD,$(basename $boot_device)"
    fi
else
    # Make a list of all the hard drives that are to be wiped
    WIPE_HDD=""
    # Partition type OSD has a unique globally identifier
    CEPH_OSD_GUID="4FBD7E29-9D25-41B8-AFD0-062C0CEFF05D"
    CEPH_JOURNAL_GUID="45B0969E-9B03-4F30-B4C6-B4B80CEFF106"

    # Check if we wipe OSDs
    if [ "$(curl -sf http://pxecontroller:6385/v1/ihosts/wipe_osds 2>/dev/null)" = "true" ]; then
        wlog "Wipe OSD data."
        WIPE_CEPH_OSDS="true"
    else
        wlog "Skip Ceph OSD data wipe."
        WIPE_CEPH_OSDS="false"
    fi

    for dev in $STOR_DEVS
    do
        # Avoid wiping USB drives
        udevadm info --query=property --name=$dev |grep -q '^ID_BUS=usb' && continue

        # Avoid wiping ceph osds if sysinv tells us so
        if [ ${WIPE_CEPH_OSDS} == "false" ]; then
            wipe_dev="true"

            pvs | grep -q "$dev *ceph"
            if [ $? -eq 0 ]; then
                wlog "skip rook provisoned disk $dev"
                continue
            fi

            part_numbers=( `parted -s $dev print | awk '$1 == "Number" {i=1; next}; i {print $1}'` )
            # Scanning the partitions looking for CEPH OSDs and
            # skipping any disk found with such partitions
            for part_number in "${part_numbers[@]}"; do
                sgdisk_part_info=$(sgdisk -i $part_number $dev)
                part_type_guid=$(echo "$sgdisk_part_info" | grep "$part_type_guid_str" | awk '{print $4;}')
                if [ "$part_type_guid" == $CEPH_OSD_GUID ]; then
                    wlog "OSD found on $dev, skipping wipe"
                    wipe_dev="false"
                    break
                fi

                pvs | grep -q -e "${dev}${part_number} *ceph" -e "${dev}p${part_number} *ceph"
                if [ $? -eq 0 ]; then
                    wlog "Rook OSD found on $dev$part_number, skip wipe"
                    wipe_dev="false"
                    break
                fi
            done

            if [ "$wipe_dev" == "false" ]; then
                continue
            fi
        fi

        # Add device to the wipe list
        devname=$(basename $dev)
        if [ -e $dev -a "$ISO_DEV" != "../../$devname" -a "$USB_DEV" != "../../$devname" ]; then
            if [ -n "$WIPE_HDD" ]; then
                WIPE_HDD=$WIPE_HDD,$dev
            else
                WIPE_HDD=$dev
            fi
        fi
    done
    wlog "Not in upgrade, wiping disks: $WIPE_HDD"
fi


ROOTFS_PART_PREFIX=$rootfs_device
#check if disk is nvme
case $rootfs_device in
    *"nvme"*)
        ROOTFS_PART_PREFIX=${ROOTFS_PART_PREFIX}p
    ;;
esac

BACKUP_CREATED=0

# Note that the BA5EBA11-0000-1111-2222- is the prefix used by STX and it's defined in sysinv constants.py.
# Since the 000000000001 suffix is used by custom stx LVM partitions,
# the next suffix is used for the persistent backup partition (000000000002)
BACKUP_PART_LABEL="Platform Backup"
BACKUP_PART_GUID="BA5EBA11-0000-1111-2222-000000000002"

for dev in ${WIPE_HDD//,/ }
do
    # Clearing previous GPT tables or LVM data
    # Delete the first few bytes at the start and end of the partition. This is required with
    # GPT partitions, they save partition info at the start and the end of the block.
    # Do this for each partition on the disk, as well.
    part_numbers=( $(parted -s $dev print | awk '$1 == "Number" {i=1; next}; i {print $1}') )
    for part_number in "${part_numbers[@]}"; do
        part=$dev$part_number
        case $part in
            *"nvme"*)
                part=${dev}p${part_number}
            ;;
        esac
        sgdisk_part_info=$(sgdisk -i $part_number $dev)
        part_type_guid=$(echo "$sgdisk_part_info" | grep "$part_type_guid_str" | awk '{print $4;}')
        if [ "$dev" == "$rootfs_device" ]; then
            part_fstype=$(exec_retry 5 0.5 "blkid -s TYPE -o value $part")
            if [ "$part_type_guid" == $BACKUP_PART_GUID -a "${part_fstype}" == "ext4" ]; then
                wlog "Skipping wipe backup partition $part"
                BACKUP_CREATED=1
                continue
           fi
        fi
        wlog "Wiping partition $part"
        if [ $WIPE_CEPH_OSDS == "true" -a "$part_type_guid" == $CEPH_JOURNAL_GUID ]; then
            # Journal partitions require additional wiping. Based on the ceph-manage-journal.py
            # script in the integ repo (at the ceph/ceph/files/ceph-manage-journal.py location)
            # wiping 100MB of data at the beginning of the partition should be enough. We also
            # wipe 100MB at the end, just to be safe.
            dd if=/dev/zero of=$part bs=1M count=100
            dd if=/dev/zero of=$part bs=1M count=100 seek=$(( `blockdev --getsz $part` / (1024 * 2) - 100 ))
        else
            dd if=/dev/zero of=$part bs=512 count=34
            dd if=/dev/zero of=$part bs=512 count=34 seek=$((`blockdev --getsz $part` - 34))
        fi
        exec_retry 5 0.5 "parted -s $dev rm $part_number"
        # LP 1876374: On some nvme systems udev doesn't correctly remove the
        # links to the deleted partitions from /dev/nvme* causing them to be
        # seen as non block devices.
        exec_retry 5 0.3 "rm -f $part" # Delete remaining /dev node leftover
    done
    if [ $BACKUP_CREATED -eq 0 -o "$dev" != "$rootfs_device" ]; then
        wlog "Creating disk label for $dev"
        parted -s $dev mktable gpt
    fi

done

# Check for remaining cgts-vg PVs, which could potentially happen
# in an upgrade where we're not wiping all disks.
# If we ever create other volume groups from kickstart in the future,
# include them in this search as well.
partitions=$(exec_no_fds "$STOR_DEV_FDS" "pvs --select 'vg_name=cgts-vg' -o pv_name --noheading" | grep -v '\[unknown\]')
for p in $partitions
do
    wlog "Pre-wiping $p from kickstart (cgts-vg present)"
    dd if=/dev/zero of=$p bs=512 count=34
    dd if=/dev/zero of=$p bs=512 count=34 seek=$((`blockdev --getsz $p` - 34))
done

let -i gb=1024*1024*1024

if [ -n "$ONLYUSE_HDD" ]; then
    cat<<EOF>>/tmp/part-include
ignoredisk --only-use=$ONLYUSE_HDD
EOF
fi