Refactor Ceph OSD Init Scripts - First PS
This is the first of multiple updates to ceph-osd where the OSD init code will be refactored for better sustainability. This patchset makes 2 changes: 1) Removes "ceph-disk" support, as ceph-disk was removed from the ceph image since nautilus. 2) Separates the initialization code for the bluestore, filestore, and directory backend configuration options. Change-Id: I116ce9cc8d3bac870adba8b84677ec652bbb0dd4
This commit is contained in:
parent
c6786de152
commit
aaa85e3fc5
@ -15,6 +15,6 @@ apiVersion: v1
|
||||
appVersion: v1.0.0
|
||||
description: OpenStack-Helm Ceph OSD
|
||||
name: ceph-osd
|
||||
version: 0.1.20
|
||||
version: 0.1.21
|
||||
home: https://github.com/ceph/ceph
|
||||
...
|
||||
|
@ -17,7 +17,7 @@ limitations under the License.
|
||||
set -ex
|
||||
export LC_ALL=C
|
||||
|
||||
source /tmp/osd-common-ceph-disk.sh
|
||||
source /tmp/osd-common-ceph-volume.sh
|
||||
|
||||
: "${JOURNAL_DIR:=/var/lib/ceph/journal}"
|
||||
|
||||
|
@ -1,131 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
{{/*
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/}}
|
||||
|
||||
source /tmp/osd-common-ceph-disk.sh
|
||||
|
||||
set -ex
|
||||
|
||||
: "${OSD_SOFT_FORCE_ZAP:=1}"
|
||||
: "${OSD_JOURNAL_DISK:=}"
|
||||
|
||||
if [ "x${STORAGE_TYPE%-*}" == "xdirectory" ]; then
|
||||
export OSD_DEVICE="/var/lib/ceph/osd"
|
||||
else
|
||||
export OSD_DEVICE=$(readlink -f ${STORAGE_LOCATION})
|
||||
fi
|
||||
|
||||
if [ "x$JOURNAL_TYPE" == "xdirectory" ]; then
|
||||
export OSD_JOURNAL="/var/lib/ceph/journal"
|
||||
else
|
||||
export OSD_JOURNAL=$(readlink -f ${JOURNAL_LOCATION})
|
||||
fi
|
||||
|
||||
if [[ -z "${OSD_DEVICE}" ]];then
|
||||
echo "ERROR- You must provide a device to build your OSD ie: /dev/sdb"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ ! -b "${OSD_DEVICE}" ]]; then
|
||||
echo "ERROR- The device pointed by OSD_DEVICE ${OSD_DEVICE} doesn't exist !"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
CEPH_DISK_OPTIONS=""
|
||||
CEPH_OSD_OPTIONS=""
|
||||
DATA_UUID=$(blkid -o value -s PARTUUID ${OSD_DEVICE}*1)
|
||||
|
||||
udev_settle
|
||||
|
||||
DATA_PART=$(dev_part ${OSD_DEVICE} 1)
|
||||
MOUNTED_PART=${DATA_PART}
|
||||
|
||||
ceph-disk -v \
|
||||
--setuser ceph \
|
||||
--setgroup disk \
|
||||
activate ${CEPH_DISK_OPTIONS} \
|
||||
--no-start-daemon ${DATA_PART}
|
||||
|
||||
OSD_ID=$(grep "${MOUNTED_PART}" /proc/mounts | awk '{print $2}' | grep -oh '[0-9]*')
|
||||
|
||||
OSD_PATH="${OSD_PATH_BASE}-${OSD_ID}"
|
||||
OSD_KEYRING="${OSD_PATH}/keyring"
|
||||
# NOTE(supamatt): set the initial crush weight of the OSD to 0 to prevent automatic rebalancing
|
||||
OSD_WEIGHT=0
|
||||
# NOTE(supamatt): add or move the OSD's CRUSH location
|
||||
crush_location
|
||||
|
||||
if [ "${OSD_BLUESTORE:-0}" -ne 1 ]; then
|
||||
if [ -n "${OSD_JOURNAL}" ]; then
|
||||
if [ -b "${OSD_JOURNAL}" ]; then
|
||||
OSD_JOURNAL_DISK="$(readlink -f ${OSD_PATH}/journal)"
|
||||
if [ -z "${OSD_JOURNAL_DISK}" ]; then
|
||||
echo "ERROR: Unable to find journal device ${OSD_JOURNAL_DISK}"
|
||||
exit 1
|
||||
else
|
||||
OSD_JOURNAL="${OSD_JOURNAL_DISK}"
|
||||
if [ -e "${OSD_PATH}/run_mkjournal" ]; then
|
||||
ceph-osd -i ${OSD_ID} --mkjournal
|
||||
rm -rf ${OSD_PATH}/run_mkjournal
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
if [ "x${JOURNAL_TYPE}" == "xdirectory" ]; then
|
||||
OSD_JOURNAL="${OSD_JOURNAL}/journal.${OSD_ID}"
|
||||
touch ${OSD_JOURNAL}
|
||||
wait_for_file "${OSD_JOURNAL}"
|
||||
else
|
||||
if [ ! -b "${OSD_JOURNAL}" ]; then
|
||||
echo "ERROR: Unable to find journal device ${OSD_JOURNAL}"
|
||||
exit 1
|
||||
else
|
||||
chown ceph. "${OSD_JOURNAL}"
|
||||
fi
|
||||
fi
|
||||
else
|
||||
wait_for_file "${OSD_JOURNAL}"
|
||||
chown ceph. "${OSD_JOURNAL}"
|
||||
fi
|
||||
fi
|
||||
|
||||
# NOTE(supamatt): Just in case permissions do not align up, we recursively set them correctly.
|
||||
if [ $(stat -c%U ${OSD_PATH}) != ceph ]; then
|
||||
chown -R ceph. ${OSD_PATH};
|
||||
fi
|
||||
|
||||
# NOTE(gagehugo): Writing the OSD_ID to tmp for logging
|
||||
echo "${OSD_ID}" > /tmp/osd-id
|
||||
|
||||
if [ "x${JOURNAL_TYPE}" == "xdirectory" ]; then
|
||||
chown -R ceph. /var/lib/ceph/journal
|
||||
ceph-osd \
|
||||
--cluster ceph \
|
||||
--osd-data ${OSD_PATH} \
|
||||
--osd-journal ${OSD_JOURNAL} \
|
||||
-f \
|
||||
-i ${OSD_ID} \
|
||||
--setuser ceph \
|
||||
--setgroup disk \
|
||||
--mkjournal
|
||||
fi
|
||||
|
||||
exec /usr/bin/ceph-osd \
|
||||
--cluster ${CLUSTER} \
|
||||
${CEPH_OSD_OPTIONS} \
|
||||
-f \
|
||||
-i ${OSD_ID} \
|
||||
--setuser ceph \
|
||||
--setgroup disk & echo $! > /run/ceph-osd.pid
|
||||
wait
|
@ -1,75 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
{{/*
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/}}
|
||||
|
||||
source /tmp/osd-common-ceph-disk.sh
|
||||
|
||||
set -ex
|
||||
|
||||
: "${OSD_SOFT_FORCE_ZAP:=1}"
|
||||
|
||||
export OSD_DEVICE=$(readlink -f ${STORAGE_LOCATION})
|
||||
|
||||
if [[ -z "${OSD_DEVICE}" ]];then
|
||||
echo "ERROR- You must provide a device to build your OSD ie: /dev/sdb"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ ! -b "${OSD_DEVICE}" ]]; then
|
||||
echo "ERROR- The device pointed by OSD_DEVICE ${OSD_DEVICE} doesn't exist !"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
CEPH_DISK_OPTIONS=""
|
||||
CEPH_OSD_OPTIONS=""
|
||||
DATA_UUID=$(blkid -o value -s PARTUUID ${OSD_DEVICE}*1)
|
||||
|
||||
udev_settle
|
||||
|
||||
DATA_PART=$(dev_part ${OSD_DEVICE} 1)
|
||||
MOUNTED_PART=${DATA_PART}
|
||||
|
||||
ceph-disk -v \
|
||||
--setuser ceph \
|
||||
--setgroup disk \
|
||||
activate ${CEPH_DISK_OPTIONS} \
|
||||
--no-start-daemon ${DATA_PART}
|
||||
|
||||
OSD_ID=$(grep "${MOUNTED_PART}" /proc/mounts | awk '{print $2}' | grep -oh '[0-9]*')
|
||||
|
||||
OSD_PATH="${OSD_PATH_BASE}-${OSD_ID}"
|
||||
OSD_KEYRING="${OSD_PATH}/keyring"
|
||||
# NOTE(supamatt): set the initial crush weight of the OSD to 0 to prevent automatic rebalancing
|
||||
OSD_WEIGHT=0
|
||||
# NOTE(supamatt): add or move the OSD's CRUSH location
|
||||
crush_location
|
||||
|
||||
|
||||
# NOTE(supamatt): Just in case permissions do not align up, we recursively set them correctly.
|
||||
if [ $(stat -c%U ${OSD_PATH}) != ceph ]; then
|
||||
chown -R ceph. ${OSD_PATH};
|
||||
fi
|
||||
|
||||
# NOTE(gagehugo): Writing the OSD_ID to tmp for logging
|
||||
echo "${OSD_ID}" > /tmp/osd-id
|
||||
|
||||
exec /usr/bin/ceph-osd \
|
||||
--cluster ${CLUSTER} \
|
||||
${CEPH_OSD_OPTIONS} \
|
||||
-f \
|
||||
-i ${OSD_ID} \
|
||||
--setuser ceph \
|
||||
--setgroup disk & echo $! > /run/ceph-osd.pid
|
||||
wait
|
@ -1,260 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
{{/*
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/}}
|
||||
|
||||
set -ex
|
||||
export PS4='+${BASH_SOURCE:+$(basename ${BASH_SOURCE}):${LINENO}:}${FUNCNAME:+${FUNCNAME}():} '
|
||||
|
||||
: "${CRUSH_LOCATION:=root=default host=${HOSTNAME}}"
|
||||
: "${OSD_PATH_BASE:=/var/lib/ceph/osd/${CLUSTER}}"
|
||||
: "${CEPH_CONF:="/etc/ceph/${CLUSTER}.conf"}"
|
||||
: "${OSD_BOOTSTRAP_KEYRING:=/var/lib/ceph/bootstrap-osd/${CLUSTER}.keyring}"
|
||||
: "${OSD_JOURNAL_UUID:=$(uuidgen)}"
|
||||
: "${OSD_JOURNAL_SIZE:=$(awk '/^osd_journal_size/{print $3}' ${CEPH_CONF}.template)}"
|
||||
: "${OSD_WEIGHT:=1.0}"
|
||||
|
||||
eval CRUSH_FAILURE_DOMAIN_TYPE=$(cat /etc/ceph/storage.json | python3 -c 'import sys, json; data = json.load(sys.stdin); print(json.dumps(data["failure_domain"]))')
|
||||
eval CRUSH_FAILURE_DOMAIN_NAME=$(cat /etc/ceph/storage.json | python3 -c 'import sys, json; data = json.load(sys.stdin); print(json.dumps(data["failure_domain_name"]))')
|
||||
eval CRUSH_FAILURE_DOMAIN_BY_HOSTNAME=$(cat /etc/ceph/storage.json | python3 -c 'import sys, json; data = json.load(sys.stdin); print(json.dumps(data["failure_domain_by_hostname"]))')
|
||||
eval CRUSH_FAILURE_DOMAIN_FROM_HOSTNAME_MAP=$(cat /etc/ceph/storage.json | jq '.failure_domain_by_hostname_map."'$HOSTNAME'"')
|
||||
eval DEVICE_CLASS=$(cat /etc/ceph/storage.json | python3 -c 'import sys, json; data = json.load(sys.stdin); print(json.dumps(data["device_class"]))')
|
||||
|
||||
if [[ $(ceph -v | egrep "octopus|nautilus|mimic|luminous" > /dev/null 2>&1; echo $?) -ne 0 ]]; then
|
||||
echo "ERROR- need Luminous/Mimic/Nautilus/Octopus release"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ -z "${HOSTNAME}" ]; then
|
||||
echo "HOSTNAME not set; This will prevent to add an OSD into the CRUSH map"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ ! -e ${CEPH_CONF}.template ]]; then
|
||||
echo "ERROR- ${CEPH_CONF}.template must exist; get it from your existing mon"
|
||||
exit 1
|
||||
else
|
||||
ENDPOINT=$(kubectl get endpoints ceph-mon-discovery -n ${NAMESPACE} -o json | awk -F'"' -v port=${MON_PORT} \
|
||||
-v version=v1 -v msgr_version=v2 \
|
||||
-v msgr2_port=${MON_PORT_V2} \
|
||||
'/"ip"/{print "["version":"$4":"port"/"0","msgr_version":"$4":"msgr2_port"/"0"]"}' | paste -sd',')
|
||||
if [[ "${ENDPOINT}" == "" ]]; then
|
||||
/bin/sh -c -e "cat ${CEPH_CONF}.template | tee ${CEPH_CONF}" || true
|
||||
else
|
||||
/bin/sh -c -e "cat ${CEPH_CONF}.template | sed 's#mon_host.*#mon_host = ${ENDPOINT}#g' | tee ${CEPH_CONF}" || true
|
||||
fi
|
||||
fi
|
||||
|
||||
# Wait for a file to exist, regardless of the type
|
||||
function wait_for_file {
|
||||
timeout 10 bash -c "while [ ! -e ${1} ]; do echo 'Waiting for ${1} to show up' && sleep 1 ; done"
|
||||
}
|
||||
|
||||
function is_available {
|
||||
command -v $@ &>/dev/null
|
||||
}
|
||||
|
||||
function ceph_cmd_retry() {
|
||||
cnt=0
|
||||
until "ceph" "$@" || [ $cnt -ge 6 ]; do
|
||||
sleep 10
|
||||
((cnt++))
|
||||
done
|
||||
}
|
||||
|
||||
function crush_create_or_move {
|
||||
local crush_location=${1}
|
||||
ceph_cmd_retry --cluster "${CLUSTER}" --name="osd.${OSD_ID}" --keyring="${OSD_KEYRING}" \
|
||||
osd crush create-or-move -- "${OSD_ID}" "${OSD_WEIGHT}" ${crush_location}
|
||||
}
|
||||
|
||||
function crush_add_and_move {
|
||||
local crush_failure_domain_type=${1}
|
||||
local crush_failure_domain_name=${2}
|
||||
local crush_location=$(echo "root=default ${crush_failure_domain_type}=${crush_failure_domain_name} host=${HOSTNAME}")
|
||||
crush_create_or_move "${crush_location}"
|
||||
local crush_failure_domain_location_check=$(ceph_cmd_retry --cluster "${CLUSTER}" --name="osd.${OSD_ID}" --keyring="${OSD_KEYRING}" osd find ${OSD_ID} | grep "${crush_failure_domain_type}" | awk -F '"' '{print $4}')
|
||||
if [ "x${crush_failure_domain_location_check}" != "x${crush_failure_domain_name}" ]; then
|
||||
# NOTE(supamatt): Manually move the buckets for previously configured CRUSH configurations
|
||||
# as create-or-move may not appropiately move them.
|
||||
ceph_cmd_retry --cluster "${CLUSTER}" --name="osd.${OSD_ID}" --keyring="${OSD_KEYRING}" \
|
||||
osd crush add-bucket "${crush_failure_domain_name}" "${crush_failure_domain_type}" || true
|
||||
ceph_cmd_retry --cluster "${CLUSTER}" --name="osd.${OSD_ID}" --keyring="${OSD_KEYRING}" \
|
||||
osd crush move "${crush_failure_domain_name}" root=default || true
|
||||
ceph_cmd_retry --cluster "${CLUSTER}" --name="osd.${OSD_ID}" --keyring="${OSD_KEYRING}" \
|
||||
osd crush move "${HOSTNAME}" "${crush_failure_domain_type}=${crush_failure_domain_name}" || true
|
||||
fi
|
||||
}
|
||||
|
||||
function crush_location {
|
||||
set_device_class
|
||||
if [ "x${CRUSH_FAILURE_DOMAIN_TYPE}" != "xhost" ]; then
|
||||
if [ "x${CRUSH_FAILURE_DOMAIN_NAME}" != "xfalse" ]; then
|
||||
crush_add_and_move "${CRUSH_FAILURE_DOMAIN_TYPE}" "${CRUSH_FAILURE_DOMAIN_NAME}"
|
||||
elif [ "x${CRUSH_FAILURE_DOMAIN_BY_HOSTNAME}" != "xfalse" ]; then
|
||||
crush_add_and_move "${CRUSH_FAILURE_DOMAIN_TYPE}" "$(echo ${CRUSH_FAILURE_DOMAIN_TYPE}_$(echo ${HOSTNAME} | cut -c ${CRUSH_FAILURE_DOMAIN_BY_HOSTNAME}))"
|
||||
elif [ "x${CRUSH_FAILURE_DOMAIN_FROM_HOSTNAME_MAP}" != "xnull" ]; then
|
||||
crush_add_and_move "${CRUSH_FAILURE_DOMAIN_TYPE}" "${CRUSH_FAILURE_DOMAIN_FROM_HOSTNAME_MAP}"
|
||||
else
|
||||
# NOTE(supamatt): neither variables are defined then we fall back to default behavior
|
||||
crush_create_or_move "${CRUSH_LOCATION}"
|
||||
fi
|
||||
else
|
||||
crush_create_or_move "${CRUSH_LOCATION}"
|
||||
fi
|
||||
}
|
||||
|
||||
# Calculate proper device names, given a device and partition number
|
||||
function dev_part {
|
||||
local osd_device=${1}
|
||||
local osd_partition=${2}
|
||||
|
||||
if [[ -L ${osd_device} ]]; then
|
||||
# This device is a symlink. Work out it's actual device
|
||||
local actual_device=$(readlink -f "${osd_device}")
|
||||
local bn=$(basename "${osd_device}")
|
||||
if [[ "${actual_device:0-1:1}" == [0-9] ]]; then
|
||||
local desired_partition="${actual_device}p${osd_partition}"
|
||||
else
|
||||
local desired_partition="${actual_device}${osd_partition}"
|
||||
fi
|
||||
# Now search for a symlink in the directory of $osd_device
|
||||
# that has the correct desired partition, and the longest
|
||||
# shared prefix with the original symlink
|
||||
local symdir=$(dirname "${osd_device}")
|
||||
local link=""
|
||||
local pfxlen=0
|
||||
for option in ${symdir}/*; do
|
||||
[[ -e $option ]] || break
|
||||
if [[ $(readlink -f "${option}") == "${desired_partition}" ]]; then
|
||||
local optprefixlen=$(prefix_length "${option}" "${bn}")
|
||||
if [[ ${optprefixlen} > ${pfxlen} ]]; then
|
||||
link=${symdir}/${option}
|
||||
pfxlen=${optprefixlen}
|
||||
fi
|
||||
fi
|
||||
done
|
||||
if [[ $pfxlen -eq 0 ]]; then
|
||||
>&2 echo "Could not locate appropriate symlink for partition ${osd_partition} of ${osd_device}"
|
||||
exit 1
|
||||
fi
|
||||
echo "$link"
|
||||
elif [[ "${osd_device:0-1:1}" == [0-9] ]]; then
|
||||
echo "${osd_device}p${osd_partition}"
|
||||
else
|
||||
echo "${osd_device}${osd_partition}"
|
||||
fi
|
||||
}
|
||||
|
||||
function zap_extra_partitions {
|
||||
# Examine temp mount and delete any block.db and block.wal partitions
|
||||
mountpoint=${1}
|
||||
journal_disk=""
|
||||
journal_part=""
|
||||
block_db_disk=""
|
||||
block_db_part=""
|
||||
block_wal_disk=""
|
||||
block_wal_part=""
|
||||
|
||||
# Discover journal, block.db, and block.wal partitions first before deleting anything
|
||||
# If the partitions are on the same disk, deleting one can affect discovery of the other(s)
|
||||
if [ -L "${mountpoint}/journal" ]; then
|
||||
journal_disk=$(readlink -m ${mountpoint}/journal | sed 's/[0-9]*//g')
|
||||
journal_part=$(readlink -m ${mountpoint}/journal | sed 's/[^0-9]*//g')
|
||||
fi
|
||||
if [ -L "${mountpoint}/block.db" ]; then
|
||||
block_db_disk=$(readlink -m ${mountpoint}/block.db | sed 's/[0-9]*//g')
|
||||
block_db_part=$(readlink -m ${mountpoint}/block.db | sed 's/[^0-9]*//g')
|
||||
fi
|
||||
if [ -L "${mountpoint}/block.wal" ]; then
|
||||
block_wal_disk=$(readlink -m ${mountpoint}/block.wal | sed 's/[0-9]*//g')
|
||||
block_wal_part=$(readlink -m ${mountpoint}/block.wal | sed 's/[^0-9]*//g')
|
||||
fi
|
||||
|
||||
# Delete any discovered journal, block.db, and block.wal partitions
|
||||
if [ ! -z "${journal_disk}" ]; then
|
||||
sgdisk -d ${journal_part} ${journal_disk}
|
||||
/sbin/udevadm settle --timeout=600
|
||||
/usr/bin/flock -s ${journal_disk} /sbin/partprobe ${journal_disk}
|
||||
/sbin/udevadm settle --timeout=600
|
||||
fi
|
||||
if [ ! -z "${block_db_disk}" ]; then
|
||||
sgdisk -d ${block_db_part} ${block_db_disk}
|
||||
/sbin/udevadm settle --timeout=600
|
||||
/usr/bin/flock -s ${block_db_disk} /sbin/partprobe ${block_db_disk}
|
||||
/sbin/udevadm settle --timeout=600
|
||||
fi
|
||||
if [ ! -z "${block_wal_disk}" ]; then
|
||||
sgdisk -d ${block_wal_part} ${block_wal_disk}
|
||||
/sbin/udevadm settle --timeout=600
|
||||
/usr/bin/flock -s ${block_wal_disk} /sbin/partprobe ${block_wal_disk}
|
||||
/sbin/udevadm settle --timeout=600
|
||||
fi
|
||||
}
|
||||
|
||||
function disk_zap {
|
||||
# Run all the commands that ceph-disk zap uses to clear a disk
|
||||
local device=${1}
|
||||
wipefs --all ${device}
|
||||
# Wipe the first 200MB boundary, as Bluestore redeployments will not work otherwise
|
||||
dd if=/dev/zero of=${device} bs=1M count=200
|
||||
sgdisk --zap-all -- ${device}
|
||||
sgdisk --clear --mbrtogpt -- ${device}
|
||||
}
|
||||
|
||||
function udev_settle {
|
||||
partprobe "${OSD_DEVICE}"
|
||||
if [ "${OSD_BLUESTORE:-0}" -eq 1 ]; then
|
||||
if [ ! -z "$BLOCK_DB" ]; then
|
||||
partprobe "${BLOCK_DB}"
|
||||
fi
|
||||
if [ ! -z "$BLOCK_WAL" ] && [ "$BLOCK_WAL" != "$BLOCK_DB" ]; then
|
||||
partprobe "${BLOCK_WAL}"
|
||||
fi
|
||||
else
|
||||
if [ "x$JOURNAL_TYPE" == "xblock-logical" ] && [ ! -z "$OSD_JOURNAL" ]; then
|
||||
OSD_JOURNAL=$(readlink -f ${OSD_JOURNAL})
|
||||
if [ ! -z "$OSD_JOURNAL" ]; then
|
||||
local JDEV=$(echo ${OSD_JOURNAL} | sed 's/[0-9]//g')
|
||||
partprobe "${JDEV}"
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
# watch the udev event queue, and exit if all current events are handled
|
||||
udevadm settle --timeout=600
|
||||
|
||||
# On occassion udev may not make the correct device symlinks for Ceph, just in case we make them manually
|
||||
mkdir -p /dev/disk/by-partuuid
|
||||
for dev in $(awk '!/rbd/{print $4}' /proc/partitions | grep "[0-9]"); do
|
||||
diskdev=$(echo "${dev//[!a-z]/}")
|
||||
partnum=$(echo "${dev//[!0-9]/}")
|
||||
ln -s "../../${dev}" "/dev/disk/by-partuuid/$(sgdisk -i ${partnum} /dev/${diskdev} | awk '/Partition unique GUID/{print tolower($4)}')" || true
|
||||
done
|
||||
}
|
||||
|
||||
function set_device_class {
|
||||
if [ ! -z "$DEVICE_CLASS" ]; then
|
||||
if [ "x$DEVICE_CLASS" != "x$(get_device_class)" ]; then
|
||||
ceph_cmd_retry --cluster "${CLUSTER}" --name="osd.${OSD_ID}" --keyring="${OSD_KEYRING}" \
|
||||
osd crush rm-device-class "osd.${OSD_ID}"
|
||||
ceph_cmd_retry --cluster "${CLUSTER}" --name="osd.${OSD_ID}" --keyring="${OSD_KEYRING}" \
|
||||
osd crush set-device-class "${DEVICE_CLASS}" "osd.${OSD_ID}"
|
||||
fi
|
||||
fi
|
||||
}
|
||||
|
||||
function get_device_class {
|
||||
echo $(ceph_cmd_retry --cluster "${CLUSTER}" --name="osd.${OSD_ID}" --keyring="${OSD_KEYRING}" \
|
||||
osd crush get-device-class "osd.${OSD_ID}")
|
||||
}
|
@ -1,231 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
{{/*
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/}}
|
||||
|
||||
set -ex
|
||||
|
||||
source /tmp/osd-common-ceph-disk.sh
|
||||
|
||||
: "${OSD_FORCE_REPAIR:=1}"
|
||||
# We do not want to zap journal disk. Tracking this option seperatly.
|
||||
: "${JOURNAL_FORCE_ZAP:=0}"
|
||||
|
||||
if [ "x${STORAGE_TYPE%-*}" == "xbluestore" ]; then
|
||||
export OSD_BLUESTORE=1
|
||||
fi
|
||||
|
||||
if [ "x${STORAGE_TYPE%-*}" == "xdirectory" ]; then
|
||||
export OSD_DEVICE="/var/lib/ceph/osd"
|
||||
else
|
||||
export OSD_DEVICE=$(readlink -f ${STORAGE_LOCATION})
|
||||
fi
|
||||
|
||||
if [ "x$JOURNAL_TYPE" == "xdirectory" ]; then
|
||||
export OSD_JOURNAL="/var/lib/ceph/journal"
|
||||
else
|
||||
export OSD_JOURNAL=$(readlink -f ${JOURNAL_LOCATION})
|
||||
fi
|
||||
|
||||
function osd_disk_prepare {
|
||||
if [[ -z "${OSD_DEVICE}" ]];then
|
||||
echo "ERROR- You must provide a device to build your OSD ie: /dev/sdb"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ ! -b "${OSD_DEVICE}" ]]; then
|
||||
echo "ERROR- The device pointed by OSD_DEVICE ($OSD_DEVICE) doesn't exist !"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ ! -e $OSD_BOOTSTRAP_KEYRING ]; then
|
||||
echo "ERROR- $OSD_BOOTSTRAP_KEYRING must exist. You can extract it from your current monitor by running 'ceph auth get client.bootstrap-osd -o $OSD_BOOTSTRAP_KEYRING'"
|
||||
exit 1
|
||||
fi
|
||||
timeout 10 ceph ${CLI_OPTS} --name client.bootstrap-osd --keyring $OSD_BOOTSTRAP_KEYRING health || exit 1
|
||||
|
||||
# check device status first
|
||||
if ! parted --script ${OSD_DEVICE} print > /dev/null 2>&1; then
|
||||
if [[ ${OSD_FORCE_REPAIR} -eq 1 ]]; then
|
||||
echo "It looks like ${OSD_DEVICE} isn't consistent, however OSD_FORCE_REPAIR is enabled so we are zapping the device anyway"
|
||||
disk_zap ${OSD_DEVICE}
|
||||
else
|
||||
echo "Regarding parted, device ${OSD_DEVICE} is inconsistent/broken/weird."
|
||||
echo "It would be too dangerous to destroy it without any notification."
|
||||
echo "Please set OSD_FORCE_REPAIR to '1' if you really want to zap this disk."
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
# then search for some ceph metadata on the disk
|
||||
if [[ "$(parted --script ${OSD_DEVICE} print | egrep '^ 1.*ceph data')" ]]; then
|
||||
if [[ ${OSD_FORCE_REPAIR} -eq 1 ]]; then
|
||||
if [ -b "${OSD_DEVICE}1" ]; then
|
||||
local cephFSID=$(ceph-conf --lookup fsid)
|
||||
if [ ! -z "${cephFSID}" ]; then
|
||||
local tmpmnt=$(mktemp -d)
|
||||
mount ${OSD_DEVICE}1 ${tmpmnt}
|
||||
if [ "${OSD_BLUESTORE:-0}" -ne 1 ] && [ "x$JOURNAL_TYPE" != "xdirectory" ]; then
|
||||
# we only care about journals for filestore.
|
||||
if [ -f "${tmpmnt}/whoami" ]; then
|
||||
OSD_JOURNAL_DISK=$(readlink -f "${tmpmnt}/journal")
|
||||
local osd_id=$(cat "${tmpmnt}/whoami")
|
||||
if [ ! -b "${OSD_JOURNAL_DISK}" ]; then
|
||||
OSD_JOURNAL=$(readlink -f ${OSD_JOURNAL})
|
||||
local jdev=$(echo ${OSD_JOURNAL} | sed 's/[0-9]//g')
|
||||
if [ ${jdev} == ${OSD_JOURNAL} ]; then
|
||||
echo "It appears that ${OSD_DEVICE} is missing the journal at ${OSD_JOURNAL}."
|
||||
echo "Because OSD_FORCE_REPAIR is set, we will wipe the metadata of the OSD and zap it."
|
||||
rm -rf ${tmpmnt}/ceph_fsid
|
||||
else
|
||||
echo "It appears that ${OSD_DEVICE} is missing the journal at ${OSD_JOURNAL_DISK}."
|
||||
echo "Because OSD_FORCE_REPAIR is set and paritions are manually defined, we will"
|
||||
echo "attempt to recreate the missing journal device partitions."
|
||||
osd_journal_create ${OSD_JOURNAL}
|
||||
ln -sf /dev/disk/by-partuuid/${OSD_JOURNAL_UUID} ${tmpmnt}/journal
|
||||
echo ${OSD_JOURNAL_UUID} | tee ${tmpmnt}/journal_uuid
|
||||
chown ceph. ${OSD_JOURNAL}
|
||||
# During OSD start we will format the journal and set the fsid
|
||||
touch ${tmpmnt}/run_mkjournal
|
||||
fi
|
||||
fi
|
||||
else
|
||||
echo "It looks like ${OSD_DEVICE} has a ceph data partition but is missing it's metadata."
|
||||
echo "The device may contain inconsistent metadata or be corrupted."
|
||||
echo "Because OSD_FORCE_REPAIR is set, we will wipe the metadata of the OSD and zap it."
|
||||
rm -rf ${tmpmnt}/ceph_fsid
|
||||
fi
|
||||
fi
|
||||
if [ -f "${tmpmnt}/ceph_fsid" ]; then
|
||||
osdFSID=$(cat "${tmpmnt}/ceph_fsid")
|
||||
if [ ${osdFSID} != ${cephFSID} ]; then
|
||||
echo "It looks like ${OSD_DEVICE} is an OSD belonging to a different (or old) ceph cluster."
|
||||
echo "The OSD FSID is ${osdFSID} while this cluster is ${cephFSID}"
|
||||
echo "Because OSD_FORCE_REPAIR was set, we will zap this device."
|
||||
zap_extra_partitions ${tmpmnt}
|
||||
umount ${tmpmnt}
|
||||
disk_zap ${OSD_DEVICE}
|
||||
else
|
||||
umount ${tmpmnt}
|
||||
echo "It looks like ${OSD_DEVICE} is an OSD belonging to a this ceph cluster."
|
||||
echo "OSD_FORCE_REPAIR is set, but will be ignored and the device will not be zapped."
|
||||
echo "Moving on, trying to activate the OSD now."
|
||||
return
|
||||
fi
|
||||
else
|
||||
echo "It looks like ${OSD_DEVICE} has a ceph data partition but no FSID."
|
||||
echo "Because OSD_FORCE_REPAIR was set, we will zap this device."
|
||||
zap_extra_partitions ${tmpmnt}
|
||||
umount ${tmpmnt}
|
||||
disk_zap ${OSD_DEVICE}
|
||||
fi
|
||||
else
|
||||
echo "Unable to determine the FSID of the current cluster."
|
||||
echo "OSD_FORCE_REPAIR is set, but this OSD will not be zapped."
|
||||
echo "Moving on, trying to activate the OSD now."
|
||||
return
|
||||
fi
|
||||
else
|
||||
echo "parted says ${OSD_DEVICE}1 should exist, but we do not see it."
|
||||
echo "We will ignore OSD_FORCE_REPAIR and try to use the device as-is"
|
||||
echo "Moving on, trying to activate the OSD now."
|
||||
return
|
||||
fi
|
||||
else
|
||||
echo "INFO- It looks like ${OSD_DEVICE} is an OSD, set OSD_FORCE_REPAIR=1 to use this device anyway and zap its content"
|
||||
echo "You can also use the disk_zap scenario on the appropriate device to zap it"
|
||||
echo "Moving on, trying to activate the OSD now."
|
||||
return
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ "${OSD_BLUESTORE:-0}" -eq 1 ]; then
|
||||
CLI_OPTS="${CLI_OPTS} --bluestore"
|
||||
|
||||
if [ ! -z "$BLOCK_DB" ]; then
|
||||
CLI_OPTS="${CLI_OPTS} --block.db ${BLOCK_DB}"
|
||||
fi
|
||||
|
||||
if [ ! -z "$BLOCK_WAL" ]; then
|
||||
CLI_OPTS="${CLI_OPTS} --block.wal ${BLOCK_WAL}"
|
||||
fi
|
||||
|
||||
CLI_OPTS="${CLI_OPTS} ${OSD_DEVICE}"
|
||||
else
|
||||
# we only care about journals for filestore.
|
||||
osd_journal_prepare
|
||||
|
||||
CLI_OPTS="${CLI_OPTS} --journal-uuid ${OSD_JOURNAL_UUID} ${OSD_DEVICE}"
|
||||
|
||||
if [ "x$JOURNAL_TYPE" == "xdirectory" ]; then
|
||||
CLI_OPTS="${CLI_OPTS} --journal-file"
|
||||
else
|
||||
CLI_OPTS="${CLI_OPTS} ${OSD_JOURNAL}"
|
||||
fi
|
||||
fi
|
||||
|
||||
udev_settle
|
||||
ceph-disk -v prepare ${CLI_OPTS}
|
||||
|
||||
if [ ! -z "$DEVICE_CLASS" ]; then
|
||||
local osd_id=$(cat "/var/lib/ceph/osd/*/whoami")
|
||||
ceph osd crush rm-device-class osd."${osd_id}"
|
||||
ceph osd crush set-device-class "${DEVICE_CLASS}" osd."${osd_id}"
|
||||
fi
|
||||
}
|
||||
|
||||
function osd_journal_create {
|
||||
local osd_journal=${1}
|
||||
local osd_journal_partition=$(echo ${osd_journal} | sed 's/[^0-9]//g')
|
||||
local jdev=$(echo ${osd_journal} | sed 's/[0-9]//g')
|
||||
if [ -b "${jdev}" ]; then
|
||||
sgdisk --new=${osd_journal_partition}:0:+${OSD_JOURNAL_SIZE}M \
|
||||
--change-name='${osd_journal_partition}:ceph journal' \
|
||||
--partition-guid=${osd_journal_partition}:${OSD_JOURNAL_UUID} \
|
||||
--typecode=${osd_journal_partition}:45b0969e-9b03-4f30-b4c6-b4b80ceff106 --mbrtogpt -- ${jdev}
|
||||
OSD_JOURNAL=$(dev_part ${jdev} ${osd_journal_partition})
|
||||
udev_settle
|
||||
else
|
||||
echo "The backing device ${jdev} for ${OSD_JOURNAL} does not exist on this system."
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
function osd_journal_prepare {
|
||||
if [ -n "${OSD_JOURNAL}" ]; then
|
||||
if [ -b ${OSD_JOURNAL} ]; then
|
||||
OSD_JOURNAL=$(readlink -f ${OSD_JOURNAL})
|
||||
OSD_JOURNAL_PARTITION=$(echo ${OSD_JOURNAL} | sed 's/[^0-9]//g')
|
||||
local jdev=$(echo ${OSD_JOURNAL} | sed 's/[0-9]//g')
|
||||
if [ -z "${OSD_JOURNAL_PARTITION}" ]; then
|
||||
OSD_JOURNAL=$(dev_part ${jdev} ${OSD_JOURNAL_PARTITION})
|
||||
else
|
||||
OSD_JOURNAL=${OSD_JOURNAL}
|
||||
fi
|
||||
elif [ "x$JOURNAL_TYPE" != "xdirectory" ]; then
|
||||
# The block device exists but doesn't appear to be paritioned, we will proceed with parititioning the device.
|
||||
OSD_JOURNAL=$(readlink -f ${OSD_JOURNAL})
|
||||
osd_journal_create ${OSD_JOURNAL}
|
||||
fi
|
||||
chown ceph. ${OSD_JOURNAL}
|
||||
elif [ "x$JOURNAL_TYPE" != "xdirectory" ]; then
|
||||
echo "No journal device specified. OSD and journal will share ${OSD_DEVICE}"
|
||||
echo "For better performance on HDD, consider moving your journal to a separate device"
|
||||
fi
|
||||
CLI_OPTS="${CLI_OPTS} --filestore"
|
||||
}
|
||||
|
||||
if ! [ "x${STORAGE_TYPE%-*}" == "xdirectory" ]; then
|
||||
osd_disk_prepare
|
||||
fi
|
@ -0,0 +1,237 @@
|
||||
#!/bin/bash
|
||||
|
||||
{{/*
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/}}
|
||||
|
||||
set -ex
|
||||
|
||||
# We do not want to zap journal disk. Tracking this option seperatly.
|
||||
: "${JOURNAL_FORCE_ZAP:=0}"
|
||||
|
||||
export OSD_DEVICE=$(readlink -f ${STORAGE_LOCATION})
|
||||
export OSD_BLUESTORE=0
|
||||
|
||||
if [ "x$JOURNAL_TYPE" == "xdirectory" ]; then
|
||||
export OSD_JOURNAL="/var/lib/ceph/journal"
|
||||
else
|
||||
export OSD_JOURNAL=$(readlink -f ${JOURNAL_LOCATION})
|
||||
fi
|
||||
|
||||
function osd_disk_prepare {
|
||||
if [[ -z "${OSD_DEVICE}" ]]; then
|
||||
echo "ERROR- You must provide a device to build your OSD ie: /dev/sdb"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ ! -b "${OSD_DEVICE}" ]]; then
|
||||
echo "ERROR- The device pointed by OSD_DEVICE ($OSD_DEVICE) doesn't exist !"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ ! -e $OSD_BOOTSTRAP_KEYRING ]; then
|
||||
echo "ERROR- $OSD_BOOTSTRAP_KEYRING must exist. You can extract it from your current monitor by running 'ceph auth get client.bootstrap-osd -o $OSD_BOOTSTRAP_KEYRING'"
|
||||
exit 1
|
||||
fi
|
||||
timeout 10 ceph ${CLI_OPTS} --name client.bootstrap-osd --keyring $OSD_BOOTSTRAP_KEYRING health || exit 1
|
||||
|
||||
#search for some ceph metadata on the disk based on the status of the disk/lvm in filestore
|
||||
CEPH_DISK_USED=0
|
||||
CEPH_LVM_PREPARE=1
|
||||
udev_settle
|
||||
OSD_ID=$(get_osd_id_from_device ${OSD_DEVICE})
|
||||
OSD_FSID=$(get_cluster_fsid_from_device ${OSD_DEVICE})
|
||||
CLUSTER_FSID=$(ceph-conf --lookup fsid)
|
||||
DISK_ZAPPED=0
|
||||
|
||||
if [[ ! -z ${OSD_ID} ]]; then
|
||||
DM_NUM=$(dmsetup ls | grep $(lsblk -J ${OSD_DEVICE} | jq -r '.blockdevices[].children[].name') | awk '{print $2}' | cut -d':' -f2 | cut -d')' -f1)
|
||||
DM_DEV="/dev/dm-"${DM_NUM}
|
||||
elif [[ $(sgdisk --print ${OSD_DEVICE} | grep "F800") ]]; then
|
||||
DM_DEV=${OSD_DEVICE}$(sgdisk --print ${OSD_DEVICE} | grep "F800" | awk '{print $1}')
|
||||
CEPH_DISK_USED=1
|
||||
else
|
||||
if [[ ${OSD_FORCE_REPAIR} -eq 1 ]]; then
|
||||
echo "It looks like ${OSD_DEVICE} isn't consistent, however OSD_FORCE_REPAIR is enabled so we are zapping the device anyway"
|
||||
disk_zap ${OSD_DEVICE}
|
||||
DISK_ZAPPED=1
|
||||
else
|
||||
echo "Regarding parted, device ${OSD_DEVICE} is inconsistent/broken/weird."
|
||||
echo "It would be too dangerous to destroy it without any notification."
|
||||
echo "Please set OSD_FORCE_REPAIR to '1' if you really want to zap this disk."
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ ${OSD_FORCE_REPAIR} -eq 1 ] && [ ! -z ${DM_DEV} ]; then
|
||||
if [ -b $DM_DEV ]; then
|
||||
local cephFSID=$(ceph-conf --lookup fsid)
|
||||
if [ ! -z "${cephFSID}" ]; then
|
||||
local tmpmnt=$(mktemp -d)
|
||||
mount ${DM_DEV} ${tmpmnt}
|
||||
if [ "x$JOURNAL_TYPE" != "xdirectory" ]; then
|
||||
if [ -f "${tmpmnt}/whoami" ]; then
|
||||
OSD_JOURNAL_DISK=$(readlink -f "${tmpmnt}/journal")
|
||||
local osd_id=$(cat "${tmpmnt}/whoami")
|
||||
if [ ! -b "${OSD_JOURNAL_DISK}" ]; then
|
||||
OSD_JOURNAL=$(readlink -f ${OSD_JOURNAL})
|
||||
local jdev=$(echo ${OSD_JOURNAL} | sed 's/[0-9]//g')
|
||||
if [ ${jdev} == ${OSD_JOURNAL} ]; then
|
||||
echo "It appears that ${OSD_DEVICE} is missing the journal at ${OSD_JOURNAL}."
|
||||
echo "Because OSD_FORCE_REPAIR is set, we will wipe the metadata of the OSD and zap it."
|
||||
rm -rf ${tmpmnt}/ceph_fsid
|
||||
else
|
||||
echo "It appears that ${OSD_DEVICE} is missing the journal at ${OSD_JOURNAL_DISK}."
|
||||
echo "Because OSD_FORCE_REPAIR is set and paritions are manually defined, we will"
|
||||
echo "attempt to recreate the missing journal device partitions."
|
||||
osd_journal_create ${OSD_JOURNAL}
|
||||
ln -sf /dev/disk/by-partuuid/${OSD_JOURNAL_UUID} ${tmpmnt}/journal
|
||||
echo ${OSD_JOURNAL_UUID} | tee ${tmpmnt}/journal_uuid
|
||||
chown ceph. ${OSD_JOURNAL}
|
||||
# During OSD start we will format the journal and set the fsid
|
||||
touch ${tmpmnt}/run_mkjournal
|
||||
fi
|
||||
fi
|
||||
else
|
||||
echo "It looks like ${OSD_DEVICE} has a ceph data partition but is missing it's metadata."
|
||||
echo "The device may contain inconsistent metadata or be corrupted."
|
||||
echo "Because OSD_FORCE_REPAIR is set, we will wipe the metadata of the OSD and zap it."
|
||||
rm -rf ${tmpmnt}/ceph_fsid
|
||||
fi
|
||||
fi
|
||||
if [ -f "${tmpmnt}/ceph_fsid" ]; then
|
||||
osdFSID=$(cat "${tmpmnt}/ceph_fsid")
|
||||
if [ ${osdFSID} != ${cephFSID} ]; then
|
||||
echo "It looks like ${OSD_DEVICE} is an OSD belonging to a different (or old) ceph cluster."
|
||||
echo "The OSD FSID is ${osdFSID} while this cluster is ${cephFSID}"
|
||||
echo "Because OSD_FORCE_REPAIR was set, we will zap this device."
|
||||
zap_extra_partitions ${tmpmnt}
|
||||
umount ${tmpmnt}
|
||||
disk_zap ${OSD_DEVICE}
|
||||
else
|
||||
umount ${tmpmnt}
|
||||
echo "It looks like ${OSD_DEVICE} is an OSD belonging to a this ceph cluster."
|
||||
echo "OSD_FORCE_REPAIR is set, but will be ignored and the device will not be zapped."
|
||||
echo "Moving on, trying to activate the OSD now."
|
||||
fi
|
||||
else
|
||||
echo "It looks like ${OSD_DEVICE} has a ceph data partition but no FSID."
|
||||
echo "Because OSD_FORCE_REPAIR was set, we will zap this device."
|
||||
zap_extra_partitions ${tmpmnt}
|
||||
umount ${tmpmnt}
|
||||
disk_zap ${OSD_DEVICE}
|
||||
fi
|
||||
else
|
||||
echo "Unable to determine the FSID of the current cluster."
|
||||
echo "OSD_FORCE_REPAIR is set, but this OSD will not be zapped."
|
||||
echo "Moving on, trying to activate the OSD now."
|
||||
return
|
||||
fi
|
||||
else
|
||||
echo "parted says ${DM_DEV} should exist, but we do not see it."
|
||||
echo "We will ignore OSD_FORCE_REPAIR and try to use the device as-is"
|
||||
echo "Moving on, trying to activate the OSD now."
|
||||
return
|
||||
fi
|
||||
else
|
||||
echo "INFO- It looks like ${OSD_DEVICE} is an OSD LVM"
|
||||
echo "Moving on, trying to prepare and activate the OSD LVM now."
|
||||
fi
|
||||
|
||||
if [[ ${CEPH_DISK_USED} -eq 1 ]]; then
|
||||
udev_settle
|
||||
CLI_OPTS="${CLI_OPTS} --data ${OSD_DEVICE}"
|
||||
ceph-volume simple scan --force ${OSD_DEVICE}$(sgdisk --print ${OSD_DEVICE} | grep "F800" | awk '{print $1}')
|
||||
elif [[ ${CEPH_LVM_PREPARE} -eq 1 ]] || [[ ${DISK_ZAPPED} -eq 1 ]]; then
|
||||
udev_settle
|
||||
vg_name=$(get_vg_name_from_device ${OSD_DEVICE})
|
||||
if [[ "${vg_name}" ]]; then
|
||||
OSD_VG=${vg_name}
|
||||
else
|
||||
random_uuid=$(uuidgen)
|
||||
vgcreate ceph-vg-${random_uuid} ${OSD_DEVICE}
|
||||
vg_name=$(get_vg_name_from_device ${OSD_DEVICE})
|
||||
vgrename ceph-vg-${random_uuid} ${vg_name}
|
||||
OSD_VG=${vg_name}
|
||||
fi
|
||||
lv_name=$(get_lv_name_from_device ${OSD_DEVICE} lv)
|
||||
if [[ ! "$(lvdisplay | awk '/LV Name/{print $3}' | grep ${lv_name})" ]]; then
|
||||
lvcreate --yes -l 100%FREE -n ${lv_name} ${OSD_VG}
|
||||
fi
|
||||
OSD_LV=${OSD_VG}/${lv_name}
|
||||
CLI_OPTS="${CLI_OPTS} --data ${OSD_LV}"
|
||||
CEPH_LVM_PREPARE=1
|
||||
udev_settle
|
||||
fi
|
||||
if [ ${CEPH_DISK_USED} -eq 0 ] ; then
|
||||
if pvdisplay -ddd -v ${OSD_DEVICE} | awk '/VG Name/{print $3}' | grep "ceph"; then
|
||||
CEPH_LVM_PREPARE=0
|
||||
fi
|
||||
fi
|
||||
|
||||
osd_journal_prepare
|
||||
CLI_OPTS="${CLI_OPTS} --data ${OSD_DEVICE} --journal ${OSD_JOURNAL}"
|
||||
udev_settle
|
||||
|
||||
if [ ! -z "$DEVICE_CLASS" ]; then
|
||||
CLI_OPTS="${CLI_OPTS} --crush-device-class ${DEVICE_CLASS}"
|
||||
fi
|
||||
|
||||
if [[ ${CEPH_LVM_PREPARE} -eq 1 ]]; then
|
||||
ceph-volume lvm -v prepare ${CLI_OPTS}
|
||||
udev_settle
|
||||
fi
|
||||
}
|
||||
|
||||
function osd_journal_create {
|
||||
local osd_journal=${1}
|
||||
local osd_journal_partition=$(echo ${osd_journal} | sed 's/[^0-9]//g')
|
||||
local jdev=$(echo ${osd_journal} | sed 's/[0-9]//g')
|
||||
if [ -b "${jdev}" ]; then
|
||||
sgdisk --new=${osd_journal_partition}:0:+${OSD_JOURNAL_SIZE}M \
|
||||
--change-name='${osd_journal_partition}:ceph journal' \
|
||||
--partition-guid=${osd_journal_partition}:${OSD_JOURNAL_UUID} \
|
||||
--typecode=${osd_journal_partition}:45b0969e-9b03-4f30-b4c6-b4b80ceff106 --mbrtogpt -- ${jdev}
|
||||
OSD_JOURNAL=$(dev_part ${jdev} ${osd_journal_partition})
|
||||
udev_settle
|
||||
else
|
||||
echo "The backing device ${jdev} for ${OSD_JOURNAL} does not exist on this system."
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
function osd_journal_prepare {
|
||||
if [ -n "${OSD_JOURNAL}" ]; then
|
||||
if [ -b ${OSD_JOURNAL} ]; then
|
||||
OSD_JOURNAL=$(readlink -f ${OSD_JOURNAL})
|
||||
OSD_JOURNAL_PARTITION=$(echo ${OSD_JOURNAL} | sed 's/[^0-9]//g')
|
||||
local jdev=$(echo ${OSD_JOURNAL} | sed 's/[0-9]//g')
|
||||
if [ -z "${OSD_JOURNAL_PARTITION}" ]; then
|
||||
OSD_JOURNAL=$(dev_part ${jdev} ${OSD_JOURNAL_PARTITION})
|
||||
else
|
||||
OSD_JOURNAL=${OSD_JOURNAL}
|
||||
fi
|
||||
elif [ "x$JOURNAL_TYPE" != "xdirectory" ]; then
|
||||
# The block device exists but doesn't appear to be paritioned, we will proceed with parititioning the device.
|
||||
OSD_JOURNAL=$(readlink -f ${OSD_JOURNAL})
|
||||
until [ -b ${OSD_JOURNAL} ]; do
|
||||
osd_journal_create ${OSD_JOURNAL}
|
||||
done
|
||||
fi
|
||||
chown ceph. ${OSD_JOURNAL};
|
||||
elif [ "x$JOURNAL_TYPE" != "xdirectory" ]; then
|
||||
echo "No journal device specified. OSD and journal will share ${OSD_DEVICE}"
|
||||
echo "For better performance on HDD, consider moving your journal to a separate device"
|
||||
fi
|
||||
CLI_OPTS="${CLI_OPTS} --filestore"
|
||||
}
|
@ -0,0 +1,191 @@
|
||||
#!/bin/bash
|
||||
|
||||
{{/*
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/}}
|
||||
|
||||
set -ex
|
||||
|
||||
export OSD_DEVICE=$(readlink -f ${STORAGE_LOCATION})
|
||||
export OSD_BLUESTORE=1
|
||||
|
||||
function osd_disk_prepare {
|
||||
if [[ -z "${OSD_DEVICE}" ]]; then
|
||||
echo "ERROR- You must provide a device to build your OSD ie: /dev/sdb"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ ! -b "${OSD_DEVICE}" ]]; then
|
||||
echo "ERROR- The device pointed by OSD_DEVICE ($OSD_DEVICE) doesn't exist !"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ ! -e $OSD_BOOTSTRAP_KEYRING ]; then
|
||||
echo "ERROR- $OSD_BOOTSTRAP_KEYRING must exist. You can extract it from your current monitor by running 'ceph auth get client.bootstrap-osd -o $OSD_BOOTSTRAP_KEYRING'"
|
||||
exit 1
|
||||
fi
|
||||
timeout 10 ceph ${CLI_OPTS} --name client.bootstrap-osd --keyring $OSD_BOOTSTRAP_KEYRING health || exit 1
|
||||
|
||||
#search for some ceph metadata on the disk based on the status of the disk/lvm in filestore
|
||||
CEPH_DISK_USED=0
|
||||
CEPH_LVM_PREPARE=1
|
||||
udev_settle
|
||||
OSD_ID=$(get_osd_id_from_device ${OSD_DEVICE})
|
||||
OSD_FSID=$(get_cluster_fsid_from_device ${OSD_DEVICE})
|
||||
CLUSTER_FSID=$(ceph-conf --lookup fsid)
|
||||
DISK_ZAPPED=0
|
||||
|
||||
if [[ ! -z "${OSD_FSID}" ]]; then
|
||||
if [[ "${OSD_FSID}" == "${CLUSTER_FSID}" ]]; then
|
||||
if [[ ! -z "${OSD_ID}" ]]; then
|
||||
if ceph --name client.bootstrap-osd --keyring $OSD_BOOTSTRAP_KEYRING osd ls |grep -w ${OSD_ID}; then
|
||||
echo "Running bluestore mode and ${OSD_DEVICE} already bootstrapped"
|
||||
CEPH_LVM_PREPARE=0
|
||||
elif [[ $OSD_FORCE_REPAIR -eq 1 ]]; then
|
||||
echo "OSD initialized for this cluster, but OSD ID not found in the cluster, reinitializing"
|
||||
else
|
||||
echo "OSD initialized for this cluster, but OSD ID not found in the cluster"
|
||||
fi
|
||||
fi
|
||||
else
|
||||
echo "OSD initialized for a different cluster, zapping it"
|
||||
disk_zap ${OSD_DEVICE}
|
||||
udev_settle
|
||||
fi
|
||||
elif [[ $(sgdisk --print ${OSD_DEVICE} | grep "F800") ]]; then
|
||||
DM_DEV=${OSD_DEVICE}$(sgdisk --print ${OSD_DEVICE} | grep "F800" | awk '{print $1}')
|
||||
CEPH_DISK_USED=1
|
||||
else
|
||||
if [[ ${CEPH_DISK_USED} -eq 1 ]]; then
|
||||
if [[ ${OSD_FORCE_REPAIR} -eq 1 ]]; then
|
||||
echo "${OSD_DEVICE} isn't clean, zapping it because OSD_FORCE_REPAIR is enabled"
|
||||
disk_zap ${OSD_DEVICE}
|
||||
else
|
||||
echo "${OSD_DEVICE} isn't clean, but OSD_FORCE_REPAIR isn't enabled."
|
||||
echo "Please set OSD_FORCE_REPAIR to '1' if you want to zap this disk."
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ ${OSD_FORCE_REPAIR} -eq 1 ] && [ ! -z ${DM_DEV} ]; then
|
||||
if [ -b $DM_DEV ]; then
|
||||
local cephFSID=$(ceph-conf --lookup fsid)
|
||||
if [ ! -z "${cephFSID}" ]; then
|
||||
local tmpmnt=$(mktemp -d)
|
||||
mount ${DM_DEV} ${tmpmnt}
|
||||
if [ -f "${tmpmnt}/ceph_fsid" ]; then
|
||||
osdFSID=$(cat "${tmpmnt}/ceph_fsid")
|
||||
if [ ${osdFSID} != ${cephFSID} ]; then
|
||||
echo "It looks like ${OSD_DEVICE} is an OSD belonging to a different (or old) ceph cluster."
|
||||
echo "The OSD FSID is ${osdFSID} while this cluster is ${cephFSID}"
|
||||
echo "Because OSD_FORCE_REPAIR was set, we will zap this device."
|
||||
zap_extra_partitions ${tmpmnt}
|
||||
umount ${tmpmnt}
|
||||
disk_zap ${OSD_DEVICE}
|
||||
else
|
||||
umount ${tmpmnt}
|
||||
echo "It looks like ${OSD_DEVICE} is an OSD belonging to a this ceph cluster."
|
||||
echo "OSD_FORCE_REPAIR is set, but will be ignored and the device will not be zapped."
|
||||
echo "Moving on, trying to activate the OSD now."
|
||||
fi
|
||||
else
|
||||
echo "It looks like ${OSD_DEVICE} has a ceph data partition but no FSID."
|
||||
echo "Because OSD_FORCE_REPAIR was set, we will zap this device."
|
||||
zap_extra_partitions ${tmpmnt}
|
||||
umount ${tmpmnt}
|
||||
disk_zap ${OSD_DEVICE}
|
||||
fi
|
||||
else
|
||||
echo "Unable to determine the FSID of the current cluster."
|
||||
echo "OSD_FORCE_REPAIR is set, but this OSD will not be zapped."
|
||||
echo "Moving on, trying to activate the OSD now."
|
||||
return
|
||||
fi
|
||||
else
|
||||
echo "parted says ${DM_DEV} should exist, but we do not see it."
|
||||
echo "We will ignore OSD_FORCE_REPAIR and try to use the device as-is"
|
||||
echo "Moving on, trying to activate the OSD now."
|
||||
return
|
||||
fi
|
||||
else
|
||||
echo "INFO- It looks like ${OSD_DEVICE} is an OSD LVM"
|
||||
echo "Moving on, trying to prepare and activate the OSD LVM now."
|
||||
fi
|
||||
|
||||
if [[ ${CEPH_DISK_USED} -eq 1 ]]; then
|
||||
udev_settle
|
||||
CLI_OPTS="${CLI_OPTS} --data ${OSD_DEVICE}"
|
||||
ceph-volume simple scan --force ${OSD_DEVICE}$(sgdisk --print ${OSD_DEVICE} | grep "F800" | awk '{print $1}')
|
||||
elif [[ ${CEPH_LVM_PREPARE} -eq 1 ]] || [[ ${DISK_ZAPPED} -eq 1 ]]; then
|
||||
udev_settle
|
||||
vg_name=$(get_vg_name_from_device ${OSD_DEVICE})
|
||||
if [[ "${vg_name}" ]]; then
|
||||
OSD_VG=${vg_name}
|
||||
else
|
||||
random_uuid=$(uuidgen)
|
||||
vgcreate ceph-vg-${random_uuid} ${OSD_DEVICE}
|
||||
vg_name=$(get_vg_name_from_device ${OSD_DEVICE})
|
||||
vgrename ceph-vg-${random_uuid} ${vg_name}
|
||||
OSD_VG=${vg_name}
|
||||
fi
|
||||
lv_name=$(get_lv_name_from_device ${OSD_DEVICE} lv)
|
||||
if [[ ! "$(lvdisplay | awk '/LV Name/{print $3}' | grep ${lv_name})" ]]; then
|
||||
lvcreate --yes -l 100%FREE -n ${lv_name} ${OSD_VG}
|
||||
fi
|
||||
OSD_LV=${OSD_VG}/${lv_name}
|
||||
CLI_OPTS="${CLI_OPTS} --data ${OSD_LV}"
|
||||
CEPH_LVM_PREPARE=1
|
||||
udev_settle
|
||||
fi
|
||||
|
||||
if [ ${CEPH_DISK_USED} -eq 0 ]; then
|
||||
if [[ ${BLOCK_DB} ]]; then
|
||||
block_db_string=$(echo ${BLOCK_DB} | awk -F "/" '{print $2 "-" $3}')
|
||||
fi
|
||||
if [[ ${BLOCK_WAL} ]]; then
|
||||
block_wal_string=$(echo ${BLOCK_WAL} | awk -F "/" '{print $2 "-" $3}')
|
||||
fi
|
||||
if [[ ${BLOCK_DB} && ${BLOCK_WAL} ]]; then
|
||||
prep_device "${BLOCK_DB}" "${BLOCK_DB_SIZE}" "db" "${OSD_DEVICE}"
|
||||
prep_device "${BLOCK_WAL}" "${BLOCK_WAL_SIZE}" "wal" "${OSD_DEVICE}"
|
||||
elif [[ -z ${BLOCK_DB} && ${BLOCK_WAL} ]]; then
|
||||
prep_device "${BLOCK_WAL}" "${BLOCK_WAL_SIZE}" "wal" "${OSD_DEVICE}"
|
||||
elif [[ ${BLOCK_DB} && -z ${BLOCK_WAL} ]]; then
|
||||
prep_device "${BLOCK_DB}" "${BLOCK_DB_SIZE}" "db" "${OSD_DEVICE}"
|
||||
fi
|
||||
else
|
||||
if pvdisplay -ddd -v ${OSD_DEVICE} | awk '/VG Name/{print $3}' | grep "ceph"; then
|
||||
CEPH_LVM_PREPARE=0
|
||||
fi
|
||||
fi
|
||||
|
||||
CLI_OPTS="${CLI_OPTS} --bluestore"
|
||||
|
||||
if [ ! -z "$BLOCK_DB" ]; then
|
||||
CLI_OPTS="${CLI_OPTS} --block.db ${BLOCK_DB}"
|
||||
fi
|
||||
|
||||
if [ ! -z "$BLOCK_WAL" ]; then
|
||||
CLI_OPTS="${CLI_OPTS} --block.wal ${BLOCK_WAL}"
|
||||
fi
|
||||
|
||||
if [ ! -z "$DEVICE_CLASS" ]; then
|
||||
CLI_OPTS="${CLI_OPTS} --crush-device-class ${DEVICE_CLASS}"
|
||||
fi
|
||||
|
||||
if [[ ${CEPH_LVM_PREPARE} -eq 1 ]]; then
|
||||
ceph-volume lvm -v prepare ${CLI_OPTS}
|
||||
udev_settle
|
||||
fi
|
||||
}
|
@ -0,0 +1,23 @@
|
||||
#!/bin/bash
|
||||
|
||||
{{/*
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/}}
|
||||
|
||||
set -ex
|
||||
|
||||
# We do not want to zap journal disk. Tracking this option seperatly.
|
||||
: "${JOURNAL_FORCE_ZAP:=0}"
|
||||
|
||||
export OSD_DEVICE="/var/lib/ceph/osd"
|
||||
export OSD_JOURNAL="/var/lib/ceph/journal"
|
@ -18,25 +18,9 @@ set -ex
|
||||
|
||||
source /tmp/osd-common-ceph-volume.sh
|
||||
|
||||
source /tmp/init-ceph-volume-helper-${STORAGE_TYPE}.sh
|
||||
|
||||
: "${OSD_FORCE_REPAIR:=0}"
|
||||
# We do not want to zap journal disk. Tracking this option seperatly.
|
||||
: "${JOURNAL_FORCE_ZAP:=0}"
|
||||
|
||||
if [ "x${STORAGE_TYPE%-*}" == "xbluestore" ]; then
|
||||
export OSD_BLUESTORE=1
|
||||
fi
|
||||
|
||||
if [ "x${STORAGE_TYPE%-*}" == "xdirectory" ]; then
|
||||
export OSD_DEVICE="/var/lib/ceph/osd"
|
||||
else
|
||||
export OSD_DEVICE=$(readlink -f ${STORAGE_LOCATION})
|
||||
fi
|
||||
|
||||
if [ "x$JOURNAL_TYPE" == "xdirectory" ]; then
|
||||
export OSD_JOURNAL="/var/lib/ceph/journal"
|
||||
else
|
||||
export OSD_JOURNAL=$(readlink -f ${JOURNAL_LOCATION})
|
||||
fi
|
||||
|
||||
# Set up aliases for functions that require disk synchronization
|
||||
alias rename_vg='locked rename_vg'
|
||||
@ -157,34 +141,6 @@ function update_lv_tags {
|
||||
fi
|
||||
}
|
||||
|
||||
# Settle LVM changes before inspecting volumes
|
||||
udev_settle
|
||||
|
||||
# Rename VGs first
|
||||
if [[ "${OSD_DEVICE}" ]]; then
|
||||
OSD_DEVICE=$(readlink -f ${OSD_DEVICE})
|
||||
rename_vg ${OSD_DEVICE}
|
||||
fi
|
||||
|
||||
if [[ "${BLOCK_DB}" ]]; then
|
||||
BLOCK_DB=$(readlink -f ${BLOCK_DB})
|
||||
rename_vg ${BLOCK_DB}
|
||||
fi
|
||||
|
||||
if [[ "${BLOCK_WAL}" ]]; then
|
||||
BLOCK_WAL=$(readlink -f ${BLOCK_WAL})
|
||||
rename_vg ${BLOCK_WAL}
|
||||
fi
|
||||
|
||||
# Rename LVs after VGs are correct
|
||||
rename_lvs ${OSD_DEVICE}
|
||||
|
||||
# Update tags (all VG and LV names should be correct before calling this)
|
||||
update_lv_tags ${OSD_DEVICE}
|
||||
|
||||
# Settle LVM changes again after any changes have been made
|
||||
udev_settle
|
||||
|
||||
function prep_device {
|
||||
local BLOCK_DEVICE=$1
|
||||
local BLOCK_DEVICE_SIZE=$2
|
||||
@ -242,281 +198,42 @@ function prep_device {
|
||||
udev_settle
|
||||
}
|
||||
|
||||
function osd_disk_prepare {
|
||||
if [[ -z "${OSD_DEVICE}" ]]; then
|
||||
echo "ERROR- You must provide a device to build your OSD ie: /dev/sdb"
|
||||
exit 1
|
||||
fi
|
||||
#######################################################################
|
||||
# Main program
|
||||
#######################################################################
|
||||
|
||||
if [[ ! -b "${OSD_DEVICE}" ]]; then
|
||||
echo "ERROR- The device pointed by OSD_DEVICE ($OSD_DEVICE) doesn't exist !"
|
||||
exit 1
|
||||
fi
|
||||
if [[ "${STORAGE_TYPE}" != "directory" ]]; then
|
||||
|
||||
if [ ! -e $OSD_BOOTSTRAP_KEYRING ]; then
|
||||
echo "ERROR- $OSD_BOOTSTRAP_KEYRING must exist. You can extract it from your current monitor by running 'ceph auth get client.bootstrap-osd -o $OSD_BOOTSTRAP_KEYRING'"
|
||||
exit 1
|
||||
fi
|
||||
timeout 10 ceph ${CLI_OPTS} --name client.bootstrap-osd --keyring $OSD_BOOTSTRAP_KEYRING health || exit 1
|
||||
|
||||
#search for some ceph metadata on the disk based on the status of the disk/lvm in filestore
|
||||
CEPH_DISK_USED=0
|
||||
CEPH_LVM_PREPARE=1
|
||||
# Settle LVM changes before inspecting volumes
|
||||
udev_settle
|
||||
OSD_ID=$(get_osd_id_from_device ${OSD_DEVICE})
|
||||
OSD_FSID=$(get_cluster_fsid_from_device ${OSD_DEVICE})
|
||||
CLUSTER_FSID=$(ceph-conf --lookup fsid)
|
||||
DISK_ZAPPED=0
|
||||
|
||||
if [ "${OSD_BLUESTORE:-0}" -ne 1 ]; then
|
||||
if [[ ! -z ${OSD_ID} ]]; then
|
||||
DM_NUM=$(dmsetup ls | grep $(lsblk -J ${OSD_DEVICE} | jq -r '.blockdevices[].children[].name') | awk '{print $2}' | cut -d':' -f2 | cut -d')' -f1)
|
||||
DM_DEV="/dev/dm-"${DM_NUM}
|
||||
elif [[ $(sgdisk --print ${OSD_DEVICE} | grep "F800") ]]; then
|
||||
DM_DEV=${OSD_DEVICE}$(sgdisk --print ${OSD_DEVICE} | grep "F800" | awk '{print $1}')
|
||||
CEPH_DISK_USED=1
|
||||
else
|
||||
if [[ ${OSD_FORCE_REPAIR} -eq 1 ]]; then
|
||||
echo "It looks like ${OSD_DEVICE} isn't consistent, however OSD_FORCE_REPAIR is enabled so we are zapping the device anyway"
|
||||
disk_zap ${OSD_DEVICE}
|
||||
DISK_ZAPPED=1
|
||||
else
|
||||
echo "Regarding parted, device ${OSD_DEVICE} is inconsistent/broken/weird."
|
||||
echo "It would be too dangerous to destroy it without any notification."
|
||||
echo "Please set OSD_FORCE_REPAIR to '1' if you really want to zap this disk."
|
||||
exit 1
|
||||
# Rename VGs first
|
||||
if [[ "${OSD_DEVICE}" ]]; then
|
||||
OSD_DEVICE=$(readlink -f ${OSD_DEVICE})
|
||||
rename_vg ${OSD_DEVICE}
|
||||
fi
|
||||
|
||||
if [[ "${BLOCK_DB}" ]]; then
|
||||
BLOCK_DB=$(readlink -f ${BLOCK_DB})
|
||||
rename_vg ${BLOCK_DB}
|
||||
fi
|
||||
else
|
||||
if [[ ! -z "${OSD_FSID}" ]]; then
|
||||
if [[ "${OSD_FSID}" == "${CLUSTER_FSID}" ]]; then
|
||||
if [[ ! -z "${OSD_ID}" ]]; then
|
||||
if ceph --name client.bootstrap-osd --keyring $OSD_BOOTSTRAP_KEYRING osd ls |grep -w ${OSD_ID}; then
|
||||
echo "Running bluestore mode and ${OSD_DEVICE} already bootstrapped"
|
||||
CEPH_LVM_PREPARE=0
|
||||
elif [[ $OSD_FORCE_REPAIR -eq 1 ]]; then
|
||||
echo "OSD initialized for this cluster, but OSD ID not found in the cluster, reinitializing"
|
||||
else
|
||||
echo "OSD initialized for this cluster, but OSD ID not found in the cluster"
|
||||
|
||||
if [[ "${BLOCK_WAL}" ]]; then
|
||||
BLOCK_WAL=$(readlink -f ${BLOCK_WAL})
|
||||
rename_vg ${BLOCK_WAL}
|
||||
fi
|
||||
fi
|
||||
else
|
||||
echo "OSD initialized for a different cluster, zapping it"
|
||||
disk_zap ${OSD_DEVICE}
|
||||
|
||||
# Rename LVs after VGs are correct
|
||||
rename_lvs ${OSD_DEVICE}
|
||||
|
||||
# Update tags (all VG and LV names should be correct before calling this)
|
||||
update_lv_tags ${OSD_DEVICE}
|
||||
|
||||
# Settle LVM changes again after any changes have been made
|
||||
udev_settle
|
||||
fi
|
||||
elif [[ $(sgdisk --print ${OSD_DEVICE} | grep "F800") ]]; then
|
||||
DM_DEV=${OSD_DEVICE}$(sgdisk --print ${OSD_DEVICE} | grep "F800" | awk '{print $1}')
|
||||
CEPH_DISK_USED=1
|
||||
else
|
||||
if [[ ${CEPH_DISK_USED} -eq 1 ]]; then
|
||||
if [[ ${OSD_FORCE_REPAIR} -eq 1 ]]; then
|
||||
echo "${OSD_DEVICE} isn't clean, zapping it because OSD_FORCE_REPAIR is enabled"
|
||||
disk_zap ${OSD_DEVICE}
|
||||
else
|
||||
echo "${OSD_DEVICE} isn't clean, but OSD_FORCE_REPAIR isn't enabled."
|
||||
echo "Please set OSD_FORCE_REPAIR to '1' if you want to zap this disk."
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
if [ ${OSD_FORCE_REPAIR} -eq 1 ] && [ ! -z ${DM_DEV} ]; then
|
||||
if [ -b $DM_DEV ]; then
|
||||
local cephFSID=$(ceph-conf --lookup fsid)
|
||||
if [ ! -z "${cephFSID}" ]; then
|
||||
local tmpmnt=$(mktemp -d)
|
||||
mount ${DM_DEV} ${tmpmnt}
|
||||
if [ "${OSD_BLUESTORE:-0}" -ne 1 ] && [ "x$JOURNAL_TYPE" != "xdirectory" ]; then
|
||||
# we only care about journals for filestore.
|
||||
if [ -f "${tmpmnt}/whoami" ]; then
|
||||
OSD_JOURNAL_DISK=$(readlink -f "${tmpmnt}/journal")
|
||||
local osd_id=$(cat "${tmpmnt}/whoami")
|
||||
if [ ! -b "${OSD_JOURNAL_DISK}" ]; then
|
||||
OSD_JOURNAL=$(readlink -f ${OSD_JOURNAL})
|
||||
local jdev=$(echo ${OSD_JOURNAL} | sed 's/[0-9]//g')
|
||||
if [ ${jdev} == ${OSD_JOURNAL} ]; then
|
||||
echo "It appears that ${OSD_DEVICE} is missing the journal at ${OSD_JOURNAL}."
|
||||
echo "Because OSD_FORCE_REPAIR is set, we will wipe the metadata of the OSD and zap it."
|
||||
rm -rf ${tmpmnt}/ceph_fsid
|
||||
else
|
||||
echo "It appears that ${OSD_DEVICE} is missing the journal at ${OSD_JOURNAL_DISK}."
|
||||
echo "Because OSD_FORCE_REPAIR is set and paritions are manually defined, we will"
|
||||
echo "attempt to recreate the missing journal device partitions."
|
||||
osd_journal_create ${OSD_JOURNAL}
|
||||
ln -sf /dev/disk/by-partuuid/${OSD_JOURNAL_UUID} ${tmpmnt}/journal
|
||||
echo ${OSD_JOURNAL_UUID} | tee ${tmpmnt}/journal_uuid
|
||||
chown ceph. ${OSD_JOURNAL}
|
||||
# During OSD start we will format the journal and set the fsid
|
||||
touch ${tmpmnt}/run_mkjournal
|
||||
fi
|
||||
fi
|
||||
else
|
||||
echo "It looks like ${OSD_DEVICE} has a ceph data partition but is missing it's metadata."
|
||||
echo "The device may contain inconsistent metadata or be corrupted."
|
||||
echo "Because OSD_FORCE_REPAIR is set, we will wipe the metadata of the OSD and zap it."
|
||||
rm -rf ${tmpmnt}/ceph_fsid
|
||||
fi
|
||||
fi
|
||||
if [ -f "${tmpmnt}/ceph_fsid" ]; then
|
||||
osdFSID=$(cat "${tmpmnt}/ceph_fsid")
|
||||
if [ ${osdFSID} != ${cephFSID} ]; then
|
||||
echo "It looks like ${OSD_DEVICE} is an OSD belonging to a different (or old) ceph cluster."
|
||||
echo "The OSD FSID is ${osdFSID} while this cluster is ${cephFSID}"
|
||||
echo "Because OSD_FORCE_REPAIR was set, we will zap this device."
|
||||
zap_extra_partitions ${tmpmnt}
|
||||
umount ${tmpmnt}
|
||||
disk_zap ${OSD_DEVICE}
|
||||
else
|
||||
umount ${tmpmnt}
|
||||
echo "It looks like ${OSD_DEVICE} is an OSD belonging to a this ceph cluster."
|
||||
echo "OSD_FORCE_REPAIR is set, but will be ignored and the device will not be zapped."
|
||||
echo "Moving on, trying to activate the OSD now."
|
||||
fi
|
||||
else
|
||||
echo "It looks like ${OSD_DEVICE} has a ceph data partition but no FSID."
|
||||
echo "Because OSD_FORCE_REPAIR was set, we will zap this device."
|
||||
zap_extra_partitions ${tmpmnt}
|
||||
umount ${tmpmnt}
|
||||
disk_zap ${OSD_DEVICE}
|
||||
fi
|
||||
else
|
||||
echo "Unable to determine the FSID of the current cluster."
|
||||
echo "OSD_FORCE_REPAIR is set, but this OSD will not be zapped."
|
||||
echo "Moving on, trying to activate the OSD now."
|
||||
return
|
||||
fi
|
||||
else
|
||||
echo "parted says ${DM_DEV} should exist, but we do not see it."
|
||||
echo "We will ignore OSD_FORCE_REPAIR and try to use the device as-is"
|
||||
echo "Moving on, trying to activate the OSD now."
|
||||
return
|
||||
fi
|
||||
else
|
||||
echo "INFO- It looks like ${OSD_DEVICE} is an OSD LVM"
|
||||
echo "Moving on, trying to prepare and activate the OSD LVM now."
|
||||
fi
|
||||
|
||||
if [[ ${CEPH_DISK_USED} -eq 1 ]]; then
|
||||
udev_settle
|
||||
CLI_OPTS="${CLI_OPTS} --data ${OSD_DEVICE}"
|
||||
ceph-volume simple scan --force ${OSD_DEVICE}$(sgdisk --print ${OSD_DEVICE} | grep "F800" | awk '{print $1}')
|
||||
elif [[ ${CEPH_LVM_PREPARE} -eq 1 ]] || [[ ${DISK_ZAPPED} -eq 1 ]]; then
|
||||
udev_settle
|
||||
vg_name=$(get_vg_name_from_device ${OSD_DEVICE})
|
||||
if [[ "${vg_name}" ]]; then
|
||||
OSD_VG=${vg_name}
|
||||
else
|
||||
random_uuid=$(uuidgen)
|
||||
vgcreate ceph-vg-${random_uuid} ${OSD_DEVICE}
|
||||
vg_name=$(get_vg_name_from_device ${OSD_DEVICE})
|
||||
vgrename ceph-vg-${random_uuid} ${vg_name}
|
||||
OSD_VG=${vg_name}
|
||||
fi
|
||||
lv_name=$(get_lv_name_from_device ${OSD_DEVICE} lv)
|
||||
if [[ ! "$(lvdisplay | awk '/LV Name/{print $3}' | grep ${lv_name})" ]]; then
|
||||
lvcreate --yes -l 100%FREE -n ${lv_name} ${OSD_VG}
|
||||
fi
|
||||
OSD_LV=${OSD_VG}/${lv_name}
|
||||
CLI_OPTS="${CLI_OPTS} --data ${OSD_LV}"
|
||||
CEPH_LVM_PREPARE=1
|
||||
udev_settle
|
||||
fi
|
||||
|
||||
if [ "${OSD_BLUESTORE:-0}" -eq 1 ] && [ ${CEPH_DISK_USED} -eq 0 ] ; then
|
||||
if [[ ${BLOCK_DB} ]]; then
|
||||
block_db_string=$(echo ${BLOCK_DB} | awk -F "/" '{print $2 "-" $3}')
|
||||
fi
|
||||
if [[ ${BLOCK_WAL} ]]; then
|
||||
block_wal_string=$(echo ${BLOCK_WAL} | awk -F "/" '{print $2 "-" $3}')
|
||||
fi
|
||||
if [[ ${BLOCK_DB} && ${BLOCK_WAL} ]]; then
|
||||
prep_device "${BLOCK_DB}" "${BLOCK_DB_SIZE}" "db" "${OSD_DEVICE}"
|
||||
prep_device "${BLOCK_WAL}" "${BLOCK_WAL_SIZE}" "wal" "${OSD_DEVICE}"
|
||||
elif [[ -z ${BLOCK_DB} && ${BLOCK_WAL} ]]; then
|
||||
prep_device "${BLOCK_WAL}" "${BLOCK_WAL_SIZE}" "wal" "${OSD_DEVICE}"
|
||||
elif [[ ${BLOCK_DB} && -z ${BLOCK_WAL} ]]; then
|
||||
prep_device "${BLOCK_DB}" "${BLOCK_DB_SIZE}" "db" "${OSD_DEVICE}"
|
||||
fi
|
||||
else
|
||||
if pvdisplay -ddd -v ${OSD_DEVICE} | awk '/VG Name/{print $3}' | grep "ceph"; then
|
||||
CEPH_LVM_PREPARE=0
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ "${OSD_BLUESTORE:-0}" -eq 1 ]; then
|
||||
CLI_OPTS="${CLI_OPTS} --bluestore"
|
||||
|
||||
if [ ! -z "$BLOCK_DB" ]; then
|
||||
CLI_OPTS="${CLI_OPTS} --block.db ${BLOCK_DB}"
|
||||
fi
|
||||
|
||||
if [ ! -z "$BLOCK_WAL" ]; then
|
||||
CLI_OPTS="${CLI_OPTS} --block.wal ${BLOCK_WAL}"
|
||||
fi
|
||||
else
|
||||
# we only care about journals for filestore.
|
||||
osd_journal_prepare
|
||||
CLI_OPTS="${CLI_OPTS} --data ${OSD_DEVICE} --journal ${OSD_JOURNAL}"
|
||||
udev_settle
|
||||
fi
|
||||
|
||||
if [ ! -z "$DEVICE_CLASS" ]; then
|
||||
CLI_OPTS="${CLI_OPTS} --crush-device-class ${DEVICE_CLASS}"
|
||||
fi
|
||||
|
||||
if [[ ${CEPH_LVM_PREPARE} -eq 1 ]]; then
|
||||
ceph-volume lvm -v prepare ${CLI_OPTS}
|
||||
udev_settle
|
||||
fi
|
||||
}
|
||||
|
||||
function osd_journal_create {
|
||||
local osd_journal=${1}
|
||||
local osd_journal_partition=$(echo ${osd_journal} | sed 's/[^0-9]//g')
|
||||
local jdev=$(echo ${osd_journal} | sed 's/[0-9]//g')
|
||||
if [ -b "${jdev}" ]; then
|
||||
sgdisk --new=${osd_journal_partition}:0:+${OSD_JOURNAL_SIZE}M \
|
||||
--change-name='${osd_journal_partition}:ceph journal' \
|
||||
--partition-guid=${osd_journal_partition}:${OSD_JOURNAL_UUID} \
|
||||
--typecode=${osd_journal_partition}:45b0969e-9b03-4f30-b4c6-b4b80ceff106 --mbrtogpt -- ${jdev}
|
||||
OSD_JOURNAL=$(dev_part ${jdev} ${osd_journal_partition})
|
||||
udev_settle
|
||||
else
|
||||
echo "The backing device ${jdev} for ${OSD_JOURNAL} does not exist on this system."
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
function osd_journal_prepare {
|
||||
if [ -n "${OSD_JOURNAL}" ]; then
|
||||
if [ -b ${OSD_JOURNAL} ]; then
|
||||
OSD_JOURNAL=$(readlink -f ${OSD_JOURNAL})
|
||||
OSD_JOURNAL_PARTITION=$(echo ${OSD_JOURNAL} | sed 's/[^0-9]//g')
|
||||
local jdev=$(echo ${OSD_JOURNAL} | sed 's/[0-9]//g')
|
||||
if [ -z "${OSD_JOURNAL_PARTITION}" ]; then
|
||||
OSD_JOURNAL=$(dev_part ${jdev} ${OSD_JOURNAL_PARTITION})
|
||||
else
|
||||
OSD_JOURNAL=${OSD_JOURNAL}
|
||||
fi
|
||||
elif [ "x$JOURNAL_TYPE" != "xdirectory" ]; then
|
||||
# The block device exists but doesn't appear to be paritioned, we will proceed with parititioning the device.
|
||||
OSD_JOURNAL=$(readlink -f ${OSD_JOURNAL})
|
||||
until [ -b ${OSD_JOURNAL} ]; do
|
||||
osd_journal_create ${OSD_JOURNAL}
|
||||
done
|
||||
fi
|
||||
chown ceph. ${OSD_JOURNAL};
|
||||
elif [ "x$JOURNAL_TYPE" != "xdirectory" ]; then
|
||||
echo "No journal device specified. OSD and journal will share ${OSD_DEVICE}"
|
||||
echo "For better performance on HDD, consider moving your journal to a separate device"
|
||||
fi
|
||||
CLI_OPTS="${CLI_OPTS} --filestore"
|
||||
}
|
||||
|
||||
if ! [ "x${STORAGE_TYPE%-*}" == "xdirectory" ]; then
|
||||
osd_disk_prepare
|
||||
fi
|
||||
|
||||
# Clean up resources held by the common script
|
||||
common_cleanup
|
||||
# Clean up resources held by the common script
|
||||
common_cleanup
|
||||
fi
|
||||
|
@ -34,20 +34,18 @@ data:
|
||||
{{ tuple "bin/osd/_start.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }}
|
||||
log-tail.sh: |
|
||||
{{ tuple "bin/osd/_log-tail.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }}
|
||||
osd-directory-ceph-disk.sh: |
|
||||
osd-directory-ceph-volume.sh: |
|
||||
{{ tuple "bin/osd/_directory.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }}
|
||||
osd-block-ceph-disk.sh: |
|
||||
{{ tuple "bin/osd/ceph-disk/_block.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }}
|
||||
osd-bluestore-ceph-disk.sh: |
|
||||
{{ tuple "bin/osd/ceph-disk/_bluestore.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }}
|
||||
osd-init-ceph-disk.sh: |
|
||||
{{ tuple "bin/osd/ceph-disk/_init-with-ceph-disk.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }}
|
||||
osd-common-ceph-disk.sh: |
|
||||
{{ tuple "bin/osd/ceph-disk/_common.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }}
|
||||
osd-block-ceph-volume.sh: |
|
||||
{{ tuple "bin/osd/ceph-volume/_block.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }}
|
||||
osd-bluestore-ceph-volume.sh: |
|
||||
{{ tuple "bin/osd/ceph-volume/_bluestore.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }}
|
||||
osd-init-ceph-volume-helper-bluestore.sh: |
|
||||
{{ tuple "bin/osd/ceph-volume/_init-ceph-volume-helper-bluestore.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }}
|
||||
osd-init-ceph-volume-helper-directory.sh: |
|
||||
{{ tuple "bin/osd/ceph-volume/_init-ceph-volume-helper-directory.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }}
|
||||
osd-init-ceph-volume-helper-block-logical.sh: |
|
||||
{{ tuple "bin/osd/ceph-volume/_init-ceph-volume-helper-block-logical.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }}
|
||||
osd-init-ceph-volume.sh: |
|
||||
{{ tuple "bin/osd/ceph-volume/_init-with-ceph-volume.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }}
|
||||
osd-common-ceph-volume.sh: |
|
||||
|
@ -214,17 +214,21 @@ spec:
|
||||
subPath: osd-init.sh
|
||||
readOnly: true
|
||||
- name: ceph-osd-bin
|
||||
mountPath: /tmp/init-ceph-disk.sh
|
||||
subPath: osd-init-ceph-disk.sh
|
||||
mountPath: /tmp/init-ceph-volume-helper-bluestore.sh
|
||||
subPath: osd-init-ceph-volume-helper-bluestore.sh
|
||||
readOnly: true
|
||||
- name: ceph-osd-bin
|
||||
mountPath: /tmp/init-ceph-volume-helper-directory.sh
|
||||
subPath: osd-init-ceph-volume-helper-directory.sh
|
||||
readOnly: true
|
||||
- name: ceph-osd-bin
|
||||
mountPath: /tmp/init-ceph-volume-helper-block-logical.sh
|
||||
subPath: osd-init-ceph-volume-helper-block-logical.sh
|
||||
readOnly: true
|
||||
- name: ceph-osd-bin
|
||||
mountPath: /tmp/init-ceph-volume.sh
|
||||
subPath: osd-init-ceph-volume.sh
|
||||
readOnly: true
|
||||
- name: ceph-osd-bin
|
||||
mountPath: /tmp/osd-common-ceph-disk.sh
|
||||
subPath: osd-common-ceph-disk.sh
|
||||
readOnly: true
|
||||
- name: ceph-osd-bin
|
||||
mountPath: /tmp/osd-common-ceph-volume.sh
|
||||
subPath: osd-common-ceph-volume.sh
|
||||
@ -358,21 +362,13 @@ spec:
|
||||
subPath: osd-start.sh
|
||||
readOnly: true
|
||||
- name: ceph-osd-bin
|
||||
mountPath: /tmp/osd-directory-ceph-disk.sh
|
||||
subPath: osd-directory-ceph-disk.sh
|
||||
readOnly: true
|
||||
- name: ceph-osd-bin
|
||||
mountPath: /tmp/osd-block-ceph-disk.sh
|
||||
subPath: osd-block-ceph-disk.sh
|
||||
mountPath: /tmp/osd-directory-ceph-volume.sh
|
||||
subPath: osd-directory-ceph-volume.sh
|
||||
readOnly: true
|
||||
- name: ceph-osd-bin
|
||||
mountPath: /tmp/osd-block-ceph-volume.sh
|
||||
subPath: osd-block-ceph-volume.sh
|
||||
readOnly: true
|
||||
- name: ceph-osd-bin
|
||||
mountPath: /tmp/osd-bluestore-ceph-disk.sh
|
||||
subPath: osd-bluestore-ceph-disk.sh
|
||||
readOnly: true
|
||||
- name: ceph-osd-bin
|
||||
mountPath: /tmp/osd-bluestore-ceph-volume.sh
|
||||
subPath: osd-bluestore-ceph-volume.sh
|
||||
@ -389,10 +385,6 @@ spec:
|
||||
mountPath: /tmp/utils-checkDNS.sh
|
||||
subPath: utils-checkDNS.sh
|
||||
readOnly: true
|
||||
- name: ceph-osd-bin
|
||||
mountPath: /tmp/osd-common-ceph-disk.sh
|
||||
subPath: osd-common-ceph-disk.sh
|
||||
readOnly: true
|
||||
- name: ceph-osd-bin
|
||||
mountPath: /tmp/osd-common-ceph-volume.sh
|
||||
subPath: osd-common-ceph-volume.sh
|
||||
|
@ -41,10 +41,8 @@ labels:
|
||||
node_selector_key: ceph-osd
|
||||
node_selector_value: enabled
|
||||
|
||||
# We could deploy ceph cluster now with either ceph-volume or ceph-disk however
|
||||
# ceph-disk is deprecated from Nautilus.
|
||||
# Keeping ceph-disk as default since gate scripts are still directory backed
|
||||
# osds, need to change this after moving the gates to disk backed osd.
|
||||
# The default deploy tool is ceph-volume. "ceph-disk" was finally removed as it
|
||||
# had been deprecated from Nautilus and was not being used.
|
||||
deploy:
|
||||
tool: "ceph-volume"
|
||||
# NOTE: set this to 1 if osd disk needs wiping in case of reusing from previous deployment
|
||||
|
@ -21,4 +21,5 @@ ceph-osd:
|
||||
- 0.1.18 Uplift from Nautilus to Octopus release
|
||||
- 0.1.19 Update rbac api version
|
||||
- 0.1.20 Update directory-based OSD deployment for image changes
|
||||
- 0.1.21 Refactor Ceph OSD Init Scripts - First PS
|
||||
...
|
||||
|
Loading…
x
Reference in New Issue
Block a user