[ceph-osd] BlueStore support for ceph-osd

This adds BlueStore support for the ceph-osd chart so that OSDs
may be deployed using BlueStore with optional --block.db and
--block.wal parameters.

Co-Authored-By: Chinasubbareddy Mallavarapu <chinasubba.reddy@att.com>

Change-Id: Ifbae8331b595c15c168ccd6e93b00ff054a607bc
This commit is contained in:
Taylor Stephen 2019-07-17 16:09:34 -06:00 committed by Chinasubbareddy Mallavarapu
parent 010fc1fc65
commit 3c55e7773b
7 changed files with 218 additions and 21 deletions

View File

@ -0,0 +1,74 @@
#!/bin/bash
{{/*
Copyright 2017 The Openstack-Helm Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/}}
source /tmp/osd-common.sh
set -ex
: "${OSD_SOFT_FORCE_ZAP:=1}"
export OSD_DEVICE=$(readlink -f ${STORAGE_LOCATION})
if [[ -z "${OSD_DEVICE}" ]];then
echo "ERROR- You must provide a device to build your OSD ie: /dev/sdb"
exit 1
fi
if [[ ! -b "${OSD_DEVICE}" ]]; then
echo "ERROR- The device pointed by OSD_DEVICE ${OSD_DEVICE} doesn't exist !"
exit 1
fi
CEPH_DISK_OPTIONS=""
CEPH_OSD_OPTIONS=""
DATA_UUID=$(blkid -o value -s PARTUUID ${OSD_DEVICE}*1)
udev_settle
DATA_PART=$(dev_part ${OSD_DEVICE} 1)
MOUNTED_PART=${DATA_PART}
ceph-disk -v \
--setuser ceph \
--setgroup disk \
activate ${CEPH_DISK_OPTIONS} \
--no-start-daemon ${DATA_PART}
OSD_ID=$(grep "${MOUNTED_PART}" /proc/mounts | awk '{print $2}' | grep -oh '[0-9]*')
OSD_PATH="${OSD_PATH_BASE}-${OSD_ID}"
OSD_KEYRING="${OSD_PATH}/keyring"
# NOTE(supamatt): set the initial crush weight of the OSD to 0 to prevent automatic rebalancing
OSD_WEIGHT=0
# NOTE(supamatt): add or move the OSD's CRUSH location
crush_location
# NOTE(supamatt): Just in case permissions do not align up, we recursively set them correctly.
if [ $(stat -c%U ${OSD_PATH}) != ceph ]; then
chown -R ceph. ${OSD_PATH};
fi
exec /usr/bin/ceph-osd \
--cluster ${CLUSTER} \
${CEPH_OSD_OPTIONS} \
-f \
-i ${OSD_ID} \
--setuser ceph \
--setgroup disk & echo $! > /run/ceph-osd.pid
wait

View File

@ -142,6 +142,43 @@ function dev_part {
fi
}
function zap_extra_partitions {
# Examine temp mount and delete any block.db and block.wal partitions
mountpoint=${1}
journal_disk=""
journal_part=""
block_db_disk=""
block_db_part=""
block_wal_disk=""
block_wal_part=""
# Discover journal, block.db, and block.wal partitions first before deleting anything
# If the partitions are on the same disk, deleting one can affect discovery of the other(s)
if [ -L "${mountpoint}/journal" ]; then
journal_disk=$(readlink -m ${mountpoint}/journal | sed 's/[0-9]*//g')
journal_part=$(readlink -m ${mountpoint}/journal | sed 's/[^0-9]*//g')
fi
if [ -L "${mountpoint}/block.db" ]; then
block_db_disk=$(readlink -m ${mountpoint}/block.db | sed 's/[0-9]*//g')
block_db_part=$(readlink -m ${mountpoint}/block.db | sed 's/[^0-9]*//g')
fi
if [ -L "${mountpoint}/block.wal" ]; then
block_wal_disk=$(readlink -m ${mountpoint}/block.wal | sed 's/[0-9]*//g')
block_wal_part=$(readlink -m ${mountpoint}/block.wal | sed 's/[^0-9]*//g')
fi
# Delete any discovered journal, block.db, and block.wal partitions
if [ ! -z "${journal_disk}" ]; then
sgdisk -d ${journal_part} ${journal_disk}
fi
if [ ! -z "${block_db_disk}" ]; then
sgdisk -d ${block_db_part} ${block_db_disk}
fi
if [ ! -z "${block_wal_disk}" ]; then
sgdisk -d ${block_wal_part} ${block_wal_disk}
fi
}
function disk_zap {
# Run all the commands that ceph-disk zap uses to clear a disk
local device=${1}
@ -154,10 +191,21 @@ function disk_zap {
function udev_settle {
partprobe "${OSD_DEVICE}"
if [ "x$JOURNAL_TYPE" == "xblock-logical" ]; then
OSD_JOURNAL=$(readlink -f ${OSD_JOURNAL})
local JDEV=$(echo ${OSD_JOURNAL} | sed 's/[0-9]//g')
partprobe "${JDEV}"
if [ "${OSD_BLUESTORE:-0}" -eq 1 ]; then
if [ ! -z "$BLOCK_DB" ]; then
partprobe "${BLOCK_DB}"
fi
if [ ! -z "$BLOCK_WAL" ] && [ "$BLOCK_WAL" != "$BLOCK_DB" ]; then
partprobe "${BLOCK_WAL}"
fi
else
if [ "x$JOURNAL_TYPE" == "xblock-logical" ] && [ ! -z "$OSD_JOURNAL" ]; then
OSD_JOURNAL=$(readlink -f ${OSD_JOURNAL})
if [ ! -z "$OSD_JOURNAL" ]; then
local JDEV=$(echo ${OSD_JOURNAL} | sed 's/[0-9]//g')
partprobe "${JDEV}"
fi
fi
fi
# watch the udev event queue, and exit if all current events are handled
udevadm settle --timeout=600

View File

@ -24,6 +24,10 @@ source /tmp/osd-common.sh
# We do not want to zap journal disk. Tracking this option seperatly.
: "${JOURNAL_FORCE_ZAP:=0}"
if [ "x${STORAGE_TYPE%-*}" == "xbluestore" ]; then
export OSD_BLUESTORE=1
fi
if [ "x${STORAGE_TYPE%-*}" == "xdirectory" ]; then
export OSD_DEVICE="/var/lib/ceph/osd"
else
@ -71,7 +75,7 @@ function osd_disk_prepare {
if [[ ${OSD_FORCE_REPAIR} -eq 1 ]]; then
if [ -b "${OSD_DEVICE}1" ]; then
local cephFSID=$(ceph-conf --lookup fsid)
if [ ! -z "${cephFSID}" ]; then
if [ ! -z "${cephFSID}" ]; then
local tmpmnt=$(mktemp -d)
mount ${OSD_DEVICE}1 ${tmpmnt}
if [ "${OSD_BLUESTORE:-0}" -ne 1 ] && [ "x$JOURNAL_TYPE" != "xdirectory" ]; then
@ -107,22 +111,25 @@ function osd_disk_prepare {
fi
if [ -f "${tmpmnt}/ceph_fsid" ]; then
osdFSID=$(cat "${tmpmnt}/ceph_fsid")
umount ${tmpmnt}
if [ ${osdFSID} != ${cephFSID} ]; then
echo "It looks like ${OSD_DEVICE} is an OSD belonging to a different (or old) ceph cluster."
echo "The OSD FSID is ${osdFSID} while this cluster is ${cephFSID}"
echo "Because OSD_FORCE_REPAIR was set, we will zap this device."
zap_extra_partitions ${tmpmnt}
umount ${tmpmnt}
disk_zap ${OSD_DEVICE}
else
umount ${tmpmnt}
echo "It looks like ${OSD_DEVICE} is an OSD belonging to a this ceph cluster."
echo "OSD_FORCE_REPAIR is set, but will be ignored and the device will not be zapped."
echo "Moving on, trying to activate the OSD now."
return
fi
else
umount ${tmpmnt}
echo "It looks like ${OSD_DEVICE} has a ceph data partition but no FSID."
echo "Because OSD_FORCE_REPAIR was set, we will zap this device."
zap_extra_partitions ${tmpmnt}
umount ${tmpmnt}
disk_zap ${OSD_DEVICE}
fi
else
@ -145,22 +152,33 @@ function osd_disk_prepare {
fi
fi
if [ "${OSD_BLUESTORE:-0}" -ne 1 ]; then
if [ "${OSD_BLUESTORE:-0}" -eq 1 ]; then
CLI_OPTS="${CLI_OPTS} --bluestore"
if [ ! -z "$BLOCK_DB" ]; then
CLI_OPTS="${CLI_OPTS} --block.db ${BLOCK_DB}"
fi
if [ ! -z "$BLOCK_WAL" ]; then
CLI_OPTS="${CLI_OPTS} --block.wal ${BLOCK_WAL}"
fi
CLI_OPTS="${CLI_OPTS} ${OSD_DEVICE}"
else
# we only care about journals for filestore.
osd_journal_prepare
else
OSD_JOURNAL=''
CLI_OPTS="${CLI_OPTS} --bluestore"
CLI_OPTS="${CLI_OPTS} --journal-uuid ${OSD_JOURNAL_UUID} ${OSD_DEVICE}"
if [ "x$JOURNAL_TYPE" == "xdirectory" ]; then
CLI_OPTS="${CLI_OPTS} --journal-file"
else
CLI_OPTS="${CLI_OPTS} ${OSD_JOURNAL}"
fi
fi
udev_settle
if [ "x$JOURNAL_TYPE" == "xdirectory" ]; then
ceph-disk -v prepare ${CLI_OPTS} --journal-uuid ${OSD_JOURNAL_UUID} ${OSD_DEVICE} --journal-file
else
ceph-disk -v prepare ${CLI_OPTS} --journal-uuid ${OSD_JOURNAL_UUID} ${OSD_DEVICE} ${OSD_JOURNAL}
fi
ceph-disk -v prepare ${CLI_OPTS}
}
function osd_journal_create {

View File

@ -36,6 +36,8 @@ data:
{{ tuple "bin/osd/_directory.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }}
osd-block.sh: |
{{ tuple "bin/osd/_block.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }}
osd-bluestore.sh: |
{{ tuple "bin/osd/_bluestore.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }}
osd-init.sh: |
{{ tuple "bin/osd/_init.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }}
osd-check.sh: |

View File

@ -280,6 +280,10 @@ spec:
mountPath: /tmp/osd-block.sh
subPath: osd-block.sh
readOnly: true
- name: ceph-osd-bin
mountPath: /tmp/osd-bluestore.sh
subPath: osd-bluestore.sh
readOnly: true
- name: ceph-osd-bin
mountPath: /tmp/osd-check.sh
subPath: osd-check.sh

View File

@ -303,6 +303,7 @@ limitations under the License.
{{- $_ := set $context.Values "__tmpPodVols" $newPodDataVols }}
{{ end }}
{{- if ne $v.data.type "bluestore" }}
{{ if eq $v.journal.type "directory" }}
{{ $journalDirVolume := dict "hostPath" (dict "path" $v.journal.location) "name" "journal" }}
{{ $newPodDataVols := append $context.Values.__tmpPodVols $journalDirVolume }}
@ -312,6 +313,11 @@ limitations under the License.
{{ $newPodDataVols := append $context.Values.__tmpPodVols $dataDirVolume }}
{{- $_ := set $context.Values "__tmpPodVols" $newPodDataVols }}
{{ end }}
{{ else }}
{{ $dataDirVolume := dict "emptyDir" dict "name" "journal" }}
{{ $newPodDataVols := append $context.Values.__tmpPodVols $dataDirVolume }}
{{- $_ := set $context.Values "__tmpPodVols" $newPodDataVols }}
{{- end }}
{{- if not $context.Values.__tmpYAML.spec }}{{- $_ := set $context.Values.__tmpYAML "spec" dict }}{{- end }}
{{- if not $context.Values.__tmpYAML.spec.template }}{{- $_ := set $context.Values.__tmpYAML.spec "template" dict }}{{- end }}
@ -330,9 +336,27 @@ limitations under the License.
{{- if empty $context.Values._tmpYAMLcontainer.env }}
{{- $_ := set $context.Values._tmpYAMLcontainer "env" ( list ) }}
{{- end }}
{{- $tmpcontainerEnv := omit $context.Values._tmpYAMLcontainer "env" }}
{{- if eq $v.data.type "bluestore" }}
{{- if and $v.block_db $v.block_wal}}
{{ $containerEnv := prepend (prepend (prepend ( prepend (index $context.Values._tmpYAMLcontainer "env") (dict "name" "STORAGE_TYPE" "value" $v.data.type)) (dict "name" "STORAGE_LOCATION" "value" $v.data.location)) (dict "name" "BLOCK_DB" "value" $v.block_db)) (dict "name" "BLOCK_WAL" "value" $v.block_wal) }}
{{- $_ := set $tmpcontainerEnv "env" $containerEnv }}
{{- else if $v.block_db }}
{{ $containerEnv := prepend (prepend ( prepend (index $context.Values._tmpYAMLcontainer "env") (dict "name" "STORAGE_TYPE" "value" $v.data.type)) (dict "name" "STORAGE_LOCATION" "value" $v.data.location)) (dict "name" "BLOCK_DB" "value" $v.block_db) }}
{{- $_ := set $tmpcontainerEnv "env" $containerEnv }}
{{- else if $v.block_wal }}
{{ $containerEnv := prepend (prepend ( prepend (index $context.Values._tmpYAMLcontainer "env") (dict "name" "STORAGE_TYPE" "value" $v.data.type)) (dict "name" "STORAGE_LOCATION" "value" $v.data.location)) (dict "name" "BLOCK_WAL" "value" $v.block_wal) }}
{{- $_ := set $tmpcontainerEnv "env" $containerEnv }}
{{ else }}
{{ $containerEnv := prepend (prepend (index $context.Values._tmpYAMLcontainer "env") (dict "name" "STORAGE_TYPE" "value" $v.data.type)) (dict "name" "STORAGE_LOCATION" "value" $v.data.location) }}
{{- $_ := set $tmpcontainerEnv "env" $containerEnv }}
{{- end }}
{{ else }}
{{ $containerEnv := prepend (prepend (prepend ( prepend (index $context.Values._tmpYAMLcontainer "env") (dict "name" "STORAGE_TYPE" "value" $v.data.type)) (dict "name" "JOURNAL_TYPE" "value" $v.journal.type)) (dict "name" "STORAGE_LOCATION" "value" $v.data.location)) (dict "name" "JOURNAL_LOCATION" "value" $v.journal.location) }}
{{- $_ := set $tmpcontainerEnv "env" $containerEnv }}
{{- end }}
{{- $localInitContainerEnv := omit $context.Values._tmpYAMLcontainer "env" }}
{{- $_ := set $localInitContainerEnv "env" $containerEnv }}
{{- $_ := set $localInitContainerEnv "env" $tmpcontainerEnv.env }}
{{ $containerList := append $context.Values.__tmpYAMLcontainers $localInitContainerEnv }}
{{ $_ := set $context.Values "__tmpYAMLcontainers" $containerList }}
{{ end }}
@ -341,9 +365,27 @@ limitations under the License.
{{- $_ := set $context.Values "__tmpYAMLinitContainers" list }}
{{- range $podContainer := $context.Values.__daemonset_yaml.spec.template.spec.initContainers }}
{{- $_ := set $context.Values "_tmpYAMLinitContainer" $podContainer }}
{{ $initContainerEnv := prepend (prepend (prepend ( prepend (index $context.Values._tmpYAMLinitContainer "env") (dict "name" "STORAGE_TYPE" "value" $v.data.type)) (dict "name" "JOURNAL_TYPE" "value" $v.journal.type)) (dict "name" "STORAGE_LOCATION" "value" $v.data.location)) (dict "name" "JOURNAL_LOCATION" "value" $v.journal.location) }}
{{- $tmpinitcontainerEnv := omit $context.Values._tmpYAMLinitContainer "env" }}
{{- if eq $v.data.type "bluestore" }}
{{- if and $v.block_db $v.block_wal}}
{{ $initcontainerEnv := prepend (prepend (prepend ( prepend (index $context.Values._tmpYAMLinitContainer "env") (dict "name" "STORAGE_TYPE" "value" $v.data.type)) (dict "name" "STORAGE_LOCATION" "value" $v.data.location)) (dict "name" "BLOCK_DB" "value" $v.block_db)) (dict "name" "BLOCK_WAL" "value" $v.block_wal) }}
{{- $_ := set $tmpinitcontainerEnv "env" $initcontainerEnv }}
{{- else if $v.block_db }}
{{ $initcontainerEnv := prepend (prepend ( prepend (index $context.Values._tmpYAMLinitContainer "env") (dict "name" "STORAGE_TYPE" "value" $v.data.type)) (dict "name" "STORAGE_LOCATION" "value" $v.data.location)) (dict "name" "BLOCK_DB" "value" $v.block_db) }}
{{- $_ := set $tmpinitcontainerEnv "env" $initcontainerEnv }}
{{- else if $v.block_wal }}
{{ $initcontainerEnv := prepend (prepend ( prepend (index $context.Values._tmpYAMLinitContainer "env") (dict "name" "STORAGE_TYPE" "value" $v.data.type)) (dict "name" "STORAGE_LOCATION" "value" $v.data.location)) (dict "name" "BLOCK_WAL" "value" $v.block_wal) }}
{{- $_ := set $tmpinitcontainerEnv "env" $initcontainerEnv }}
{{ else }}
{{ $initcontainerEnv := prepend (prepend (index $context.Values._tmpYAMLinitContainer "env") (dict "name" "STORAGE_TYPE" "value" $v.data.type)) (dict "name" "STORAGE_LOCATION" "value" $v.data.location) }}
{{- $_ := set $tmpinitcontainerEnv "env" $initcontainerEnv }}
{{- end }}
{{ else }}
{{ $initcontainerEnv := prepend (prepend (prepend ( prepend (index $context.Values._tmpYAMLinitContainer "env") (dict "name" "STORAGE_TYPE" "value" $v.data.type)) (dict "name" "JOURNAL_TYPE" "value" $v.journal.type)) (dict "name" "STORAGE_LOCATION" "value" $v.data.location)) (dict "name" "JOURNAL_LOCATION" "value" $v.journal.location) }}
{{- $_ := set $tmpinitcontainerEnv "env" $initcontainerEnv }}
{{- end }}
{{- $localInitContainerEnv := omit $context.Values._tmpYAMLinitContainer "env" }}
{{- $_ := set $localInitContainerEnv "env" $initContainerEnv }}
{{- $_ := set $localInitContainerEnv "env" $tmpinitcontainerEnv.env }}
{{ $initContainerList := append $context.Values.__tmpYAMLinitContainers $localInitContainerEnv }}
{{ $_ := set $context.Values "__tmpYAMLinitContainers" $initContainerList }}
{{ end }}

View File

@ -204,6 +204,15 @@ conf:
journal:
type: directory
location: /var/lib/openstack-helm/ceph/osd/journal-one
# - data:
# type: bluestore
# location: /dev/sdb
# Separate block devices may be used for block.db and/or block.wal
# Without these values they will be co-located on the data volume
# block_db: /dev/sdc
# block_wal: /dev/sdc
# - data:
# type: block-logical
# location: /dev/sdd