[ceph-osd] BlueStore support for ceph-osd

This adds BlueStore support for the ceph-osd chart so that OSDs
may be deployed using BlueStore with optional --block.db and
--block.wal parameters.

Co-Authored-By: Chinasubbareddy Mallavarapu <chinasubba.reddy@att.com>

Change-Id: Ifbae8331b595c15c168ccd6e93b00ff054a607bc
This commit is contained in:
Taylor Stephen 2019-07-17 16:09:34 -06:00 committed by Chinasubbareddy Mallavarapu
parent 010fc1fc65
commit 3c55e7773b
7 changed files with 218 additions and 21 deletions

View File

@ -0,0 +1,74 @@
#!/bin/bash
{{/*
Copyright 2017 The Openstack-Helm Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/}}
source /tmp/osd-common.sh
set -ex
: "${OSD_SOFT_FORCE_ZAP:=1}"
export OSD_DEVICE=$(readlink -f ${STORAGE_LOCATION})
if [[ -z "${OSD_DEVICE}" ]];then
echo "ERROR- You must provide a device to build your OSD ie: /dev/sdb"
exit 1
fi
if [[ ! -b "${OSD_DEVICE}" ]]; then
echo "ERROR- The device pointed by OSD_DEVICE ${OSD_DEVICE} doesn't exist !"
exit 1
fi
CEPH_DISK_OPTIONS=""
CEPH_OSD_OPTIONS=""
DATA_UUID=$(blkid -o value -s PARTUUID ${OSD_DEVICE}*1)
udev_settle
DATA_PART=$(dev_part ${OSD_DEVICE} 1)
MOUNTED_PART=${DATA_PART}
ceph-disk -v \
--setuser ceph \
--setgroup disk \
activate ${CEPH_DISK_OPTIONS} \
--no-start-daemon ${DATA_PART}
OSD_ID=$(grep "${MOUNTED_PART}" /proc/mounts | awk '{print $2}' | grep -oh '[0-9]*')
OSD_PATH="${OSD_PATH_BASE}-${OSD_ID}"
OSD_KEYRING="${OSD_PATH}/keyring"
# NOTE(supamatt): set the initial crush weight of the OSD to 0 to prevent automatic rebalancing
OSD_WEIGHT=0
# NOTE(supamatt): add or move the OSD's CRUSH location
crush_location
# NOTE(supamatt): Just in case permissions do not align up, we recursively set them correctly.
if [ $(stat -c%U ${OSD_PATH}) != ceph ]; then
chown -R ceph. ${OSD_PATH};
fi
exec /usr/bin/ceph-osd \
--cluster ${CLUSTER} \
${CEPH_OSD_OPTIONS} \
-f \
-i ${OSD_ID} \
--setuser ceph \
--setgroup disk & echo $! > /run/ceph-osd.pid
wait

View File

@ -142,6 +142,43 @@ function dev_part {
fi fi
} }
function zap_extra_partitions {
# Examine temp mount and delete any block.db and block.wal partitions
mountpoint=${1}
journal_disk=""
journal_part=""
block_db_disk=""
block_db_part=""
block_wal_disk=""
block_wal_part=""
# Discover journal, block.db, and block.wal partitions first before deleting anything
# If the partitions are on the same disk, deleting one can affect discovery of the other(s)
if [ -L "${mountpoint}/journal" ]; then
journal_disk=$(readlink -m ${mountpoint}/journal | sed 's/[0-9]*//g')
journal_part=$(readlink -m ${mountpoint}/journal | sed 's/[^0-9]*//g')
fi
if [ -L "${mountpoint}/block.db" ]; then
block_db_disk=$(readlink -m ${mountpoint}/block.db | sed 's/[0-9]*//g')
block_db_part=$(readlink -m ${mountpoint}/block.db | sed 's/[^0-9]*//g')
fi
if [ -L "${mountpoint}/block.wal" ]; then
block_wal_disk=$(readlink -m ${mountpoint}/block.wal | sed 's/[0-9]*//g')
block_wal_part=$(readlink -m ${mountpoint}/block.wal | sed 's/[^0-9]*//g')
fi
# Delete any discovered journal, block.db, and block.wal partitions
if [ ! -z "${journal_disk}" ]; then
sgdisk -d ${journal_part} ${journal_disk}
fi
if [ ! -z "${block_db_disk}" ]; then
sgdisk -d ${block_db_part} ${block_db_disk}
fi
if [ ! -z "${block_wal_disk}" ]; then
sgdisk -d ${block_wal_part} ${block_wal_disk}
fi
}
function disk_zap { function disk_zap {
# Run all the commands that ceph-disk zap uses to clear a disk # Run all the commands that ceph-disk zap uses to clear a disk
local device=${1} local device=${1}
@ -154,10 +191,21 @@ function disk_zap {
function udev_settle { function udev_settle {
partprobe "${OSD_DEVICE}" partprobe "${OSD_DEVICE}"
if [ "x$JOURNAL_TYPE" == "xblock-logical" ]; then if [ "${OSD_BLUESTORE:-0}" -eq 1 ]; then
OSD_JOURNAL=$(readlink -f ${OSD_JOURNAL}) if [ ! -z "$BLOCK_DB" ]; then
local JDEV=$(echo ${OSD_JOURNAL} | sed 's/[0-9]//g') partprobe "${BLOCK_DB}"
partprobe "${JDEV}" fi
if [ ! -z "$BLOCK_WAL" ] && [ "$BLOCK_WAL" != "$BLOCK_DB" ]; then
partprobe "${BLOCK_WAL}"
fi
else
if [ "x$JOURNAL_TYPE" == "xblock-logical" ] && [ ! -z "$OSD_JOURNAL" ]; then
OSD_JOURNAL=$(readlink -f ${OSD_JOURNAL})
if [ ! -z "$OSD_JOURNAL" ]; then
local JDEV=$(echo ${OSD_JOURNAL} | sed 's/[0-9]//g')
partprobe "${JDEV}"
fi
fi
fi fi
# watch the udev event queue, and exit if all current events are handled # watch the udev event queue, and exit if all current events are handled
udevadm settle --timeout=600 udevadm settle --timeout=600

View File

@ -24,6 +24,10 @@ source /tmp/osd-common.sh
# We do not want to zap journal disk. Tracking this option seperatly. # We do not want to zap journal disk. Tracking this option seperatly.
: "${JOURNAL_FORCE_ZAP:=0}" : "${JOURNAL_FORCE_ZAP:=0}"
if [ "x${STORAGE_TYPE%-*}" == "xbluestore" ]; then
export OSD_BLUESTORE=1
fi
if [ "x${STORAGE_TYPE%-*}" == "xdirectory" ]; then if [ "x${STORAGE_TYPE%-*}" == "xdirectory" ]; then
export OSD_DEVICE="/var/lib/ceph/osd" export OSD_DEVICE="/var/lib/ceph/osd"
else else
@ -71,7 +75,7 @@ function osd_disk_prepare {
if [[ ${OSD_FORCE_REPAIR} -eq 1 ]]; then if [[ ${OSD_FORCE_REPAIR} -eq 1 ]]; then
if [ -b "${OSD_DEVICE}1" ]; then if [ -b "${OSD_DEVICE}1" ]; then
local cephFSID=$(ceph-conf --lookup fsid) local cephFSID=$(ceph-conf --lookup fsid)
if [ ! -z "${cephFSID}" ]; then if [ ! -z "${cephFSID}" ]; then
local tmpmnt=$(mktemp -d) local tmpmnt=$(mktemp -d)
mount ${OSD_DEVICE}1 ${tmpmnt} mount ${OSD_DEVICE}1 ${tmpmnt}
if [ "${OSD_BLUESTORE:-0}" -ne 1 ] && [ "x$JOURNAL_TYPE" != "xdirectory" ]; then if [ "${OSD_BLUESTORE:-0}" -ne 1 ] && [ "x$JOURNAL_TYPE" != "xdirectory" ]; then
@ -107,22 +111,25 @@ function osd_disk_prepare {
fi fi
if [ -f "${tmpmnt}/ceph_fsid" ]; then if [ -f "${tmpmnt}/ceph_fsid" ]; then
osdFSID=$(cat "${tmpmnt}/ceph_fsid") osdFSID=$(cat "${tmpmnt}/ceph_fsid")
umount ${tmpmnt}
if [ ${osdFSID} != ${cephFSID} ]; then if [ ${osdFSID} != ${cephFSID} ]; then
echo "It looks like ${OSD_DEVICE} is an OSD belonging to a different (or old) ceph cluster." echo "It looks like ${OSD_DEVICE} is an OSD belonging to a different (or old) ceph cluster."
echo "The OSD FSID is ${osdFSID} while this cluster is ${cephFSID}" echo "The OSD FSID is ${osdFSID} while this cluster is ${cephFSID}"
echo "Because OSD_FORCE_REPAIR was set, we will zap this device." echo "Because OSD_FORCE_REPAIR was set, we will zap this device."
zap_extra_partitions ${tmpmnt}
umount ${tmpmnt}
disk_zap ${OSD_DEVICE} disk_zap ${OSD_DEVICE}
else else
umount ${tmpmnt}
echo "It looks like ${OSD_DEVICE} is an OSD belonging to a this ceph cluster." echo "It looks like ${OSD_DEVICE} is an OSD belonging to a this ceph cluster."
echo "OSD_FORCE_REPAIR is set, but will be ignored and the device will not be zapped." echo "OSD_FORCE_REPAIR is set, but will be ignored and the device will not be zapped."
echo "Moving on, trying to activate the OSD now." echo "Moving on, trying to activate the OSD now."
return return
fi fi
else else
umount ${tmpmnt}
echo "It looks like ${OSD_DEVICE} has a ceph data partition but no FSID." echo "It looks like ${OSD_DEVICE} has a ceph data partition but no FSID."
echo "Because OSD_FORCE_REPAIR was set, we will zap this device." echo "Because OSD_FORCE_REPAIR was set, we will zap this device."
zap_extra_partitions ${tmpmnt}
umount ${tmpmnt}
disk_zap ${OSD_DEVICE} disk_zap ${OSD_DEVICE}
fi fi
else else
@ -145,22 +152,33 @@ function osd_disk_prepare {
fi fi
fi fi
if [ "${OSD_BLUESTORE:-0}" -ne 1 ]; then if [ "${OSD_BLUESTORE:-0}" -eq 1 ]; then
CLI_OPTS="${CLI_OPTS} --bluestore"
if [ ! -z "$BLOCK_DB" ]; then
CLI_OPTS="${CLI_OPTS} --block.db ${BLOCK_DB}"
fi
if [ ! -z "$BLOCK_WAL" ]; then
CLI_OPTS="${CLI_OPTS} --block.wal ${BLOCK_WAL}"
fi
CLI_OPTS="${CLI_OPTS} ${OSD_DEVICE}"
else
# we only care about journals for filestore. # we only care about journals for filestore.
osd_journal_prepare osd_journal_prepare
else
OSD_JOURNAL='' CLI_OPTS="${CLI_OPTS} --journal-uuid ${OSD_JOURNAL_UUID} ${OSD_DEVICE}"
CLI_OPTS="${CLI_OPTS} --bluestore"
if [ "x$JOURNAL_TYPE" == "xdirectory" ]; then
CLI_OPTS="${CLI_OPTS} --journal-file"
else
CLI_OPTS="${CLI_OPTS} ${OSD_JOURNAL}"
fi
fi fi
udev_settle udev_settle
ceph-disk -v prepare ${CLI_OPTS}
if [ "x$JOURNAL_TYPE" == "xdirectory" ]; then
ceph-disk -v prepare ${CLI_OPTS} --journal-uuid ${OSD_JOURNAL_UUID} ${OSD_DEVICE} --journal-file
else
ceph-disk -v prepare ${CLI_OPTS} --journal-uuid ${OSD_JOURNAL_UUID} ${OSD_DEVICE} ${OSD_JOURNAL}
fi
} }
function osd_journal_create { function osd_journal_create {

View File

@ -36,6 +36,8 @@ data:
{{ tuple "bin/osd/_directory.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }} {{ tuple "bin/osd/_directory.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }}
osd-block.sh: | osd-block.sh: |
{{ tuple "bin/osd/_block.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }} {{ tuple "bin/osd/_block.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }}
osd-bluestore.sh: |
{{ tuple "bin/osd/_bluestore.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }}
osd-init.sh: | osd-init.sh: |
{{ tuple "bin/osd/_init.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }} {{ tuple "bin/osd/_init.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }}
osd-check.sh: | osd-check.sh: |

View File

@ -280,6 +280,10 @@ spec:
mountPath: /tmp/osd-block.sh mountPath: /tmp/osd-block.sh
subPath: osd-block.sh subPath: osd-block.sh
readOnly: true readOnly: true
- name: ceph-osd-bin
mountPath: /tmp/osd-bluestore.sh
subPath: osd-bluestore.sh
readOnly: true
- name: ceph-osd-bin - name: ceph-osd-bin
mountPath: /tmp/osd-check.sh mountPath: /tmp/osd-check.sh
subPath: osd-check.sh subPath: osd-check.sh

View File

@ -303,6 +303,7 @@ limitations under the License.
{{- $_ := set $context.Values "__tmpPodVols" $newPodDataVols }} {{- $_ := set $context.Values "__tmpPodVols" $newPodDataVols }}
{{ end }} {{ end }}
{{- if ne $v.data.type "bluestore" }}
{{ if eq $v.journal.type "directory" }} {{ if eq $v.journal.type "directory" }}
{{ $journalDirVolume := dict "hostPath" (dict "path" $v.journal.location) "name" "journal" }} {{ $journalDirVolume := dict "hostPath" (dict "path" $v.journal.location) "name" "journal" }}
{{ $newPodDataVols := append $context.Values.__tmpPodVols $journalDirVolume }} {{ $newPodDataVols := append $context.Values.__tmpPodVols $journalDirVolume }}
@ -312,6 +313,11 @@ limitations under the License.
{{ $newPodDataVols := append $context.Values.__tmpPodVols $dataDirVolume }} {{ $newPodDataVols := append $context.Values.__tmpPodVols $dataDirVolume }}
{{- $_ := set $context.Values "__tmpPodVols" $newPodDataVols }} {{- $_ := set $context.Values "__tmpPodVols" $newPodDataVols }}
{{ end }} {{ end }}
{{ else }}
{{ $dataDirVolume := dict "emptyDir" dict "name" "journal" }}
{{ $newPodDataVols := append $context.Values.__tmpPodVols $dataDirVolume }}
{{- $_ := set $context.Values "__tmpPodVols" $newPodDataVols }}
{{- end }}
{{- if not $context.Values.__tmpYAML.spec }}{{- $_ := set $context.Values.__tmpYAML "spec" dict }}{{- end }} {{- if not $context.Values.__tmpYAML.spec }}{{- $_ := set $context.Values.__tmpYAML "spec" dict }}{{- end }}
{{- if not $context.Values.__tmpYAML.spec.template }}{{- $_ := set $context.Values.__tmpYAML.spec "template" dict }}{{- end }} {{- if not $context.Values.__tmpYAML.spec.template }}{{- $_ := set $context.Values.__tmpYAML.spec "template" dict }}{{- end }}
@ -330,9 +336,27 @@ limitations under the License.
{{- if empty $context.Values._tmpYAMLcontainer.env }} {{- if empty $context.Values._tmpYAMLcontainer.env }}
{{- $_ := set $context.Values._tmpYAMLcontainer "env" ( list ) }} {{- $_ := set $context.Values._tmpYAMLcontainer "env" ( list ) }}
{{- end }} {{- end }}
{{- $tmpcontainerEnv := omit $context.Values._tmpYAMLcontainer "env" }}
{{- if eq $v.data.type "bluestore" }}
{{- if and $v.block_db $v.block_wal}}
{{ $containerEnv := prepend (prepend (prepend ( prepend (index $context.Values._tmpYAMLcontainer "env") (dict "name" "STORAGE_TYPE" "value" $v.data.type)) (dict "name" "STORAGE_LOCATION" "value" $v.data.location)) (dict "name" "BLOCK_DB" "value" $v.block_db)) (dict "name" "BLOCK_WAL" "value" $v.block_wal) }}
{{- $_ := set $tmpcontainerEnv "env" $containerEnv }}
{{- else if $v.block_db }}
{{ $containerEnv := prepend (prepend ( prepend (index $context.Values._tmpYAMLcontainer "env") (dict "name" "STORAGE_TYPE" "value" $v.data.type)) (dict "name" "STORAGE_LOCATION" "value" $v.data.location)) (dict "name" "BLOCK_DB" "value" $v.block_db) }}
{{- $_ := set $tmpcontainerEnv "env" $containerEnv }}
{{- else if $v.block_wal }}
{{ $containerEnv := prepend (prepend ( prepend (index $context.Values._tmpYAMLcontainer "env") (dict "name" "STORAGE_TYPE" "value" $v.data.type)) (dict "name" "STORAGE_LOCATION" "value" $v.data.location)) (dict "name" "BLOCK_WAL" "value" $v.block_wal) }}
{{- $_ := set $tmpcontainerEnv "env" $containerEnv }}
{{ else }}
{{ $containerEnv := prepend (prepend (index $context.Values._tmpYAMLcontainer "env") (dict "name" "STORAGE_TYPE" "value" $v.data.type)) (dict "name" "STORAGE_LOCATION" "value" $v.data.location) }}
{{- $_ := set $tmpcontainerEnv "env" $containerEnv }}
{{- end }}
{{ else }}
{{ $containerEnv := prepend (prepend (prepend ( prepend (index $context.Values._tmpYAMLcontainer "env") (dict "name" "STORAGE_TYPE" "value" $v.data.type)) (dict "name" "JOURNAL_TYPE" "value" $v.journal.type)) (dict "name" "STORAGE_LOCATION" "value" $v.data.location)) (dict "name" "JOURNAL_LOCATION" "value" $v.journal.location) }} {{ $containerEnv := prepend (prepend (prepend ( prepend (index $context.Values._tmpYAMLcontainer "env") (dict "name" "STORAGE_TYPE" "value" $v.data.type)) (dict "name" "JOURNAL_TYPE" "value" $v.journal.type)) (dict "name" "STORAGE_LOCATION" "value" $v.data.location)) (dict "name" "JOURNAL_LOCATION" "value" $v.journal.location) }}
{{- $_ := set $tmpcontainerEnv "env" $containerEnv }}
{{- end }}
{{- $localInitContainerEnv := omit $context.Values._tmpYAMLcontainer "env" }} {{- $localInitContainerEnv := omit $context.Values._tmpYAMLcontainer "env" }}
{{- $_ := set $localInitContainerEnv "env" $containerEnv }} {{- $_ := set $localInitContainerEnv "env" $tmpcontainerEnv.env }}
{{ $containerList := append $context.Values.__tmpYAMLcontainers $localInitContainerEnv }} {{ $containerList := append $context.Values.__tmpYAMLcontainers $localInitContainerEnv }}
{{ $_ := set $context.Values "__tmpYAMLcontainers" $containerList }} {{ $_ := set $context.Values "__tmpYAMLcontainers" $containerList }}
{{ end }} {{ end }}
@ -341,9 +365,27 @@ limitations under the License.
{{- $_ := set $context.Values "__tmpYAMLinitContainers" list }} {{- $_ := set $context.Values "__tmpYAMLinitContainers" list }}
{{- range $podContainer := $context.Values.__daemonset_yaml.spec.template.spec.initContainers }} {{- range $podContainer := $context.Values.__daemonset_yaml.spec.template.spec.initContainers }}
{{- $_ := set $context.Values "_tmpYAMLinitContainer" $podContainer }} {{- $_ := set $context.Values "_tmpYAMLinitContainer" $podContainer }}
{{ $initContainerEnv := prepend (prepend (prepend ( prepend (index $context.Values._tmpYAMLinitContainer "env") (dict "name" "STORAGE_TYPE" "value" $v.data.type)) (dict "name" "JOURNAL_TYPE" "value" $v.journal.type)) (dict "name" "STORAGE_LOCATION" "value" $v.data.location)) (dict "name" "JOURNAL_LOCATION" "value" $v.journal.location) }} {{- $tmpinitcontainerEnv := omit $context.Values._tmpYAMLinitContainer "env" }}
{{- if eq $v.data.type "bluestore" }}
{{- if and $v.block_db $v.block_wal}}
{{ $initcontainerEnv := prepend (prepend (prepend ( prepend (index $context.Values._tmpYAMLinitContainer "env") (dict "name" "STORAGE_TYPE" "value" $v.data.type)) (dict "name" "STORAGE_LOCATION" "value" $v.data.location)) (dict "name" "BLOCK_DB" "value" $v.block_db)) (dict "name" "BLOCK_WAL" "value" $v.block_wal) }}
{{- $_ := set $tmpinitcontainerEnv "env" $initcontainerEnv }}
{{- else if $v.block_db }}
{{ $initcontainerEnv := prepend (prepend ( prepend (index $context.Values._tmpYAMLinitContainer "env") (dict "name" "STORAGE_TYPE" "value" $v.data.type)) (dict "name" "STORAGE_LOCATION" "value" $v.data.location)) (dict "name" "BLOCK_DB" "value" $v.block_db) }}
{{- $_ := set $tmpinitcontainerEnv "env" $initcontainerEnv }}
{{- else if $v.block_wal }}
{{ $initcontainerEnv := prepend (prepend ( prepend (index $context.Values._tmpYAMLinitContainer "env") (dict "name" "STORAGE_TYPE" "value" $v.data.type)) (dict "name" "STORAGE_LOCATION" "value" $v.data.location)) (dict "name" "BLOCK_WAL" "value" $v.block_wal) }}
{{- $_ := set $tmpinitcontainerEnv "env" $initcontainerEnv }}
{{ else }}
{{ $initcontainerEnv := prepend (prepend (index $context.Values._tmpYAMLinitContainer "env") (dict "name" "STORAGE_TYPE" "value" $v.data.type)) (dict "name" "STORAGE_LOCATION" "value" $v.data.location) }}
{{- $_ := set $tmpinitcontainerEnv "env" $initcontainerEnv }}
{{- end }}
{{ else }}
{{ $initcontainerEnv := prepend (prepend (prepend ( prepend (index $context.Values._tmpYAMLinitContainer "env") (dict "name" "STORAGE_TYPE" "value" $v.data.type)) (dict "name" "JOURNAL_TYPE" "value" $v.journal.type)) (dict "name" "STORAGE_LOCATION" "value" $v.data.location)) (dict "name" "JOURNAL_LOCATION" "value" $v.journal.location) }}
{{- $_ := set $tmpinitcontainerEnv "env" $initcontainerEnv }}
{{- end }}
{{- $localInitContainerEnv := omit $context.Values._tmpYAMLinitContainer "env" }} {{- $localInitContainerEnv := omit $context.Values._tmpYAMLinitContainer "env" }}
{{- $_ := set $localInitContainerEnv "env" $initContainerEnv }} {{- $_ := set $localInitContainerEnv "env" $tmpinitcontainerEnv.env }}
{{ $initContainerList := append $context.Values.__tmpYAMLinitContainers $localInitContainerEnv }} {{ $initContainerList := append $context.Values.__tmpYAMLinitContainers $localInitContainerEnv }}
{{ $_ := set $context.Values "__tmpYAMLinitContainers" $initContainerList }} {{ $_ := set $context.Values "__tmpYAMLinitContainers" $initContainerList }}
{{ end }} {{ end }}

View File

@ -204,6 +204,15 @@ conf:
journal: journal:
type: directory type: directory
location: /var/lib/openstack-helm/ceph/osd/journal-one location: /var/lib/openstack-helm/ceph/osd/journal-one
# - data:
# type: bluestore
# location: /dev/sdb
# Separate block devices may be used for block.db and/or block.wal
# Without these values they will be co-located on the data volume
# block_db: /dev/sdc
# block_wal: /dev/sdc
# - data: # - data:
# type: block-logical # type: block-logical
# location: /dev/sdd # location: /dev/sdd