[ceph-osd] BlueStore support for ceph-osd
This adds BlueStore support for the ceph-osd chart so that OSDs may be deployed using BlueStore with optional --block.db and --block.wal parameters. Co-Authored-By: Chinasubbareddy Mallavarapu <chinasubba.reddy@att.com> Change-Id: Ifbae8331b595c15c168ccd6e93b00ff054a607bc
This commit is contained in:
parent
010fc1fc65
commit
3c55e7773b
74
ceph-osd/templates/bin/osd/_bluestore.sh.tpl
Normal file
74
ceph-osd/templates/bin/osd/_bluestore.sh.tpl
Normal file
@ -0,0 +1,74 @@
|
||||
#!/bin/bash
|
||||
|
||||
{{/*
|
||||
Copyright 2017 The Openstack-Helm Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/}}
|
||||
|
||||
source /tmp/osd-common.sh
|
||||
|
||||
set -ex
|
||||
|
||||
: "${OSD_SOFT_FORCE_ZAP:=1}"
|
||||
|
||||
export OSD_DEVICE=$(readlink -f ${STORAGE_LOCATION})
|
||||
|
||||
if [[ -z "${OSD_DEVICE}" ]];then
|
||||
echo "ERROR- You must provide a device to build your OSD ie: /dev/sdb"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ ! -b "${OSD_DEVICE}" ]]; then
|
||||
echo "ERROR- The device pointed by OSD_DEVICE ${OSD_DEVICE} doesn't exist !"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
CEPH_DISK_OPTIONS=""
|
||||
CEPH_OSD_OPTIONS=""
|
||||
DATA_UUID=$(blkid -o value -s PARTUUID ${OSD_DEVICE}*1)
|
||||
|
||||
udev_settle
|
||||
|
||||
DATA_PART=$(dev_part ${OSD_DEVICE} 1)
|
||||
MOUNTED_PART=${DATA_PART}
|
||||
|
||||
ceph-disk -v \
|
||||
--setuser ceph \
|
||||
--setgroup disk \
|
||||
activate ${CEPH_DISK_OPTIONS} \
|
||||
--no-start-daemon ${DATA_PART}
|
||||
|
||||
OSD_ID=$(grep "${MOUNTED_PART}" /proc/mounts | awk '{print $2}' | grep -oh '[0-9]*')
|
||||
|
||||
OSD_PATH="${OSD_PATH_BASE}-${OSD_ID}"
|
||||
OSD_KEYRING="${OSD_PATH}/keyring"
|
||||
# NOTE(supamatt): set the initial crush weight of the OSD to 0 to prevent automatic rebalancing
|
||||
OSD_WEIGHT=0
|
||||
# NOTE(supamatt): add or move the OSD's CRUSH location
|
||||
crush_location
|
||||
|
||||
|
||||
# NOTE(supamatt): Just in case permissions do not align up, we recursively set them correctly.
|
||||
if [ $(stat -c%U ${OSD_PATH}) != ceph ]; then
|
||||
chown -R ceph. ${OSD_PATH};
|
||||
fi
|
||||
|
||||
exec /usr/bin/ceph-osd \
|
||||
--cluster ${CLUSTER} \
|
||||
${CEPH_OSD_OPTIONS} \
|
||||
-f \
|
||||
-i ${OSD_ID} \
|
||||
--setuser ceph \
|
||||
--setgroup disk & echo $! > /run/ceph-osd.pid
|
||||
wait
|
@ -142,6 +142,43 @@ function dev_part {
|
||||
fi
|
||||
}
|
||||
|
||||
function zap_extra_partitions {
|
||||
# Examine temp mount and delete any block.db and block.wal partitions
|
||||
mountpoint=${1}
|
||||
journal_disk=""
|
||||
journal_part=""
|
||||
block_db_disk=""
|
||||
block_db_part=""
|
||||
block_wal_disk=""
|
||||
block_wal_part=""
|
||||
|
||||
# Discover journal, block.db, and block.wal partitions first before deleting anything
|
||||
# If the partitions are on the same disk, deleting one can affect discovery of the other(s)
|
||||
if [ -L "${mountpoint}/journal" ]; then
|
||||
journal_disk=$(readlink -m ${mountpoint}/journal | sed 's/[0-9]*//g')
|
||||
journal_part=$(readlink -m ${mountpoint}/journal | sed 's/[^0-9]*//g')
|
||||
fi
|
||||
if [ -L "${mountpoint}/block.db" ]; then
|
||||
block_db_disk=$(readlink -m ${mountpoint}/block.db | sed 's/[0-9]*//g')
|
||||
block_db_part=$(readlink -m ${mountpoint}/block.db | sed 's/[^0-9]*//g')
|
||||
fi
|
||||
if [ -L "${mountpoint}/block.wal" ]; then
|
||||
block_wal_disk=$(readlink -m ${mountpoint}/block.wal | sed 's/[0-9]*//g')
|
||||
block_wal_part=$(readlink -m ${mountpoint}/block.wal | sed 's/[^0-9]*//g')
|
||||
fi
|
||||
|
||||
# Delete any discovered journal, block.db, and block.wal partitions
|
||||
if [ ! -z "${journal_disk}" ]; then
|
||||
sgdisk -d ${journal_part} ${journal_disk}
|
||||
fi
|
||||
if [ ! -z "${block_db_disk}" ]; then
|
||||
sgdisk -d ${block_db_part} ${block_db_disk}
|
||||
fi
|
||||
if [ ! -z "${block_wal_disk}" ]; then
|
||||
sgdisk -d ${block_wal_part} ${block_wal_disk}
|
||||
fi
|
||||
}
|
||||
|
||||
function disk_zap {
|
||||
# Run all the commands that ceph-disk zap uses to clear a disk
|
||||
local device=${1}
|
||||
@ -154,11 +191,22 @@ function disk_zap {
|
||||
|
||||
function udev_settle {
|
||||
partprobe "${OSD_DEVICE}"
|
||||
if [ "x$JOURNAL_TYPE" == "xblock-logical" ]; then
|
||||
if [ "${OSD_BLUESTORE:-0}" -eq 1 ]; then
|
||||
if [ ! -z "$BLOCK_DB" ]; then
|
||||
partprobe "${BLOCK_DB}"
|
||||
fi
|
||||
if [ ! -z "$BLOCK_WAL" ] && [ "$BLOCK_WAL" != "$BLOCK_DB" ]; then
|
||||
partprobe "${BLOCK_WAL}"
|
||||
fi
|
||||
else
|
||||
if [ "x$JOURNAL_TYPE" == "xblock-logical" ] && [ ! -z "$OSD_JOURNAL" ]; then
|
||||
OSD_JOURNAL=$(readlink -f ${OSD_JOURNAL})
|
||||
if [ ! -z "$OSD_JOURNAL" ]; then
|
||||
local JDEV=$(echo ${OSD_JOURNAL} | sed 's/[0-9]//g')
|
||||
partprobe "${JDEV}"
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
# watch the udev event queue, and exit if all current events are handled
|
||||
udevadm settle --timeout=600
|
||||
|
||||
|
@ -24,6 +24,10 @@ source /tmp/osd-common.sh
|
||||
# We do not want to zap journal disk. Tracking this option seperatly.
|
||||
: "${JOURNAL_FORCE_ZAP:=0}"
|
||||
|
||||
if [ "x${STORAGE_TYPE%-*}" == "xbluestore" ]; then
|
||||
export OSD_BLUESTORE=1
|
||||
fi
|
||||
|
||||
if [ "x${STORAGE_TYPE%-*}" == "xdirectory" ]; then
|
||||
export OSD_DEVICE="/var/lib/ceph/osd"
|
||||
else
|
||||
@ -107,22 +111,25 @@ function osd_disk_prepare {
|
||||
fi
|
||||
if [ -f "${tmpmnt}/ceph_fsid" ]; then
|
||||
osdFSID=$(cat "${tmpmnt}/ceph_fsid")
|
||||
umount ${tmpmnt}
|
||||
if [ ${osdFSID} != ${cephFSID} ]; then
|
||||
echo "It looks like ${OSD_DEVICE} is an OSD belonging to a different (or old) ceph cluster."
|
||||
echo "The OSD FSID is ${osdFSID} while this cluster is ${cephFSID}"
|
||||
echo "Because OSD_FORCE_REPAIR was set, we will zap this device."
|
||||
zap_extra_partitions ${tmpmnt}
|
||||
umount ${tmpmnt}
|
||||
disk_zap ${OSD_DEVICE}
|
||||
else
|
||||
umount ${tmpmnt}
|
||||
echo "It looks like ${OSD_DEVICE} is an OSD belonging to a this ceph cluster."
|
||||
echo "OSD_FORCE_REPAIR is set, but will be ignored and the device will not be zapped."
|
||||
echo "Moving on, trying to activate the OSD now."
|
||||
return
|
||||
fi
|
||||
else
|
||||
umount ${tmpmnt}
|
||||
echo "It looks like ${OSD_DEVICE} has a ceph data partition but no FSID."
|
||||
echo "Because OSD_FORCE_REPAIR was set, we will zap this device."
|
||||
zap_extra_partitions ${tmpmnt}
|
||||
umount ${tmpmnt}
|
||||
disk_zap ${OSD_DEVICE}
|
||||
fi
|
||||
else
|
||||
@ -145,22 +152,33 @@ function osd_disk_prepare {
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ "${OSD_BLUESTORE:-0}" -ne 1 ]; then
|
||||
if [ "${OSD_BLUESTORE:-0}" -eq 1 ]; then
|
||||
CLI_OPTS="${CLI_OPTS} --bluestore"
|
||||
|
||||
if [ ! -z "$BLOCK_DB" ]; then
|
||||
CLI_OPTS="${CLI_OPTS} --block.db ${BLOCK_DB}"
|
||||
fi
|
||||
|
||||
if [ ! -z "$BLOCK_WAL" ]; then
|
||||
CLI_OPTS="${CLI_OPTS} --block.wal ${BLOCK_WAL}"
|
||||
fi
|
||||
|
||||
CLI_OPTS="${CLI_OPTS} ${OSD_DEVICE}"
|
||||
else
|
||||
# we only care about journals for filestore.
|
||||
osd_journal_prepare
|
||||
|
||||
CLI_OPTS="${CLI_OPTS} --journal-uuid ${OSD_JOURNAL_UUID} ${OSD_DEVICE}"
|
||||
|
||||
if [ "x$JOURNAL_TYPE" == "xdirectory" ]; then
|
||||
CLI_OPTS="${CLI_OPTS} --journal-file"
|
||||
else
|
||||
OSD_JOURNAL=''
|
||||
CLI_OPTS="${CLI_OPTS} --bluestore"
|
||||
CLI_OPTS="${CLI_OPTS} ${OSD_JOURNAL}"
|
||||
fi
|
||||
fi
|
||||
|
||||
udev_settle
|
||||
|
||||
if [ "x$JOURNAL_TYPE" == "xdirectory" ]; then
|
||||
ceph-disk -v prepare ${CLI_OPTS} --journal-uuid ${OSD_JOURNAL_UUID} ${OSD_DEVICE} --journal-file
|
||||
else
|
||||
ceph-disk -v prepare ${CLI_OPTS} --journal-uuid ${OSD_JOURNAL_UUID} ${OSD_DEVICE} ${OSD_JOURNAL}
|
||||
fi
|
||||
|
||||
ceph-disk -v prepare ${CLI_OPTS}
|
||||
}
|
||||
|
||||
function osd_journal_create {
|
||||
|
@ -36,6 +36,8 @@ data:
|
||||
{{ tuple "bin/osd/_directory.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }}
|
||||
osd-block.sh: |
|
||||
{{ tuple "bin/osd/_block.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }}
|
||||
osd-bluestore.sh: |
|
||||
{{ tuple "bin/osd/_bluestore.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }}
|
||||
osd-init.sh: |
|
||||
{{ tuple "bin/osd/_init.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }}
|
||||
osd-check.sh: |
|
||||
|
@ -280,6 +280,10 @@ spec:
|
||||
mountPath: /tmp/osd-block.sh
|
||||
subPath: osd-block.sh
|
||||
readOnly: true
|
||||
- name: ceph-osd-bin
|
||||
mountPath: /tmp/osd-bluestore.sh
|
||||
subPath: osd-bluestore.sh
|
||||
readOnly: true
|
||||
- name: ceph-osd-bin
|
||||
mountPath: /tmp/osd-check.sh
|
||||
subPath: osd-check.sh
|
||||
|
@ -303,6 +303,7 @@ limitations under the License.
|
||||
{{- $_ := set $context.Values "__tmpPodVols" $newPodDataVols }}
|
||||
{{ end }}
|
||||
|
||||
{{- if ne $v.data.type "bluestore" }}
|
||||
{{ if eq $v.journal.type "directory" }}
|
||||
{{ $journalDirVolume := dict "hostPath" (dict "path" $v.journal.location) "name" "journal" }}
|
||||
{{ $newPodDataVols := append $context.Values.__tmpPodVols $journalDirVolume }}
|
||||
@ -312,6 +313,11 @@ limitations under the License.
|
||||
{{ $newPodDataVols := append $context.Values.__tmpPodVols $dataDirVolume }}
|
||||
{{- $_ := set $context.Values "__tmpPodVols" $newPodDataVols }}
|
||||
{{ end }}
|
||||
{{ else }}
|
||||
{{ $dataDirVolume := dict "emptyDir" dict "name" "journal" }}
|
||||
{{ $newPodDataVols := append $context.Values.__tmpPodVols $dataDirVolume }}
|
||||
{{- $_ := set $context.Values "__tmpPodVols" $newPodDataVols }}
|
||||
{{- end }}
|
||||
|
||||
{{- if not $context.Values.__tmpYAML.spec }}{{- $_ := set $context.Values.__tmpYAML "spec" dict }}{{- end }}
|
||||
{{- if not $context.Values.__tmpYAML.spec.template }}{{- $_ := set $context.Values.__tmpYAML.spec "template" dict }}{{- end }}
|
||||
@ -330,9 +336,27 @@ limitations under the License.
|
||||
{{- if empty $context.Values._tmpYAMLcontainer.env }}
|
||||
{{- $_ := set $context.Values._tmpYAMLcontainer "env" ( list ) }}
|
||||
{{- end }}
|
||||
{{- $tmpcontainerEnv := omit $context.Values._tmpYAMLcontainer "env" }}
|
||||
{{- if eq $v.data.type "bluestore" }}
|
||||
{{- if and $v.block_db $v.block_wal}}
|
||||
{{ $containerEnv := prepend (prepend (prepend ( prepend (index $context.Values._tmpYAMLcontainer "env") (dict "name" "STORAGE_TYPE" "value" $v.data.type)) (dict "name" "STORAGE_LOCATION" "value" $v.data.location)) (dict "name" "BLOCK_DB" "value" $v.block_db)) (dict "name" "BLOCK_WAL" "value" $v.block_wal) }}
|
||||
{{- $_ := set $tmpcontainerEnv "env" $containerEnv }}
|
||||
{{- else if $v.block_db }}
|
||||
{{ $containerEnv := prepend (prepend ( prepend (index $context.Values._tmpYAMLcontainer "env") (dict "name" "STORAGE_TYPE" "value" $v.data.type)) (dict "name" "STORAGE_LOCATION" "value" $v.data.location)) (dict "name" "BLOCK_DB" "value" $v.block_db) }}
|
||||
{{- $_ := set $tmpcontainerEnv "env" $containerEnv }}
|
||||
{{- else if $v.block_wal }}
|
||||
{{ $containerEnv := prepend (prepend ( prepend (index $context.Values._tmpYAMLcontainer "env") (dict "name" "STORAGE_TYPE" "value" $v.data.type)) (dict "name" "STORAGE_LOCATION" "value" $v.data.location)) (dict "name" "BLOCK_WAL" "value" $v.block_wal) }}
|
||||
{{- $_ := set $tmpcontainerEnv "env" $containerEnv }}
|
||||
{{ else }}
|
||||
{{ $containerEnv := prepend (prepend (index $context.Values._tmpYAMLcontainer "env") (dict "name" "STORAGE_TYPE" "value" $v.data.type)) (dict "name" "STORAGE_LOCATION" "value" $v.data.location) }}
|
||||
{{- $_ := set $tmpcontainerEnv "env" $containerEnv }}
|
||||
{{- end }}
|
||||
{{ else }}
|
||||
{{ $containerEnv := prepend (prepend (prepend ( prepend (index $context.Values._tmpYAMLcontainer "env") (dict "name" "STORAGE_TYPE" "value" $v.data.type)) (dict "name" "JOURNAL_TYPE" "value" $v.journal.type)) (dict "name" "STORAGE_LOCATION" "value" $v.data.location)) (dict "name" "JOURNAL_LOCATION" "value" $v.journal.location) }}
|
||||
{{- $_ := set $tmpcontainerEnv "env" $containerEnv }}
|
||||
{{- end }}
|
||||
{{- $localInitContainerEnv := omit $context.Values._tmpYAMLcontainer "env" }}
|
||||
{{- $_ := set $localInitContainerEnv "env" $containerEnv }}
|
||||
{{- $_ := set $localInitContainerEnv "env" $tmpcontainerEnv.env }}
|
||||
{{ $containerList := append $context.Values.__tmpYAMLcontainers $localInitContainerEnv }}
|
||||
{{ $_ := set $context.Values "__tmpYAMLcontainers" $containerList }}
|
||||
{{ end }}
|
||||
@ -341,9 +365,27 @@ limitations under the License.
|
||||
{{- $_ := set $context.Values "__tmpYAMLinitContainers" list }}
|
||||
{{- range $podContainer := $context.Values.__daemonset_yaml.spec.template.spec.initContainers }}
|
||||
{{- $_ := set $context.Values "_tmpYAMLinitContainer" $podContainer }}
|
||||
{{ $initContainerEnv := prepend (prepend (prepend ( prepend (index $context.Values._tmpYAMLinitContainer "env") (dict "name" "STORAGE_TYPE" "value" $v.data.type)) (dict "name" "JOURNAL_TYPE" "value" $v.journal.type)) (dict "name" "STORAGE_LOCATION" "value" $v.data.location)) (dict "name" "JOURNAL_LOCATION" "value" $v.journal.location) }}
|
||||
{{- $tmpinitcontainerEnv := omit $context.Values._tmpYAMLinitContainer "env" }}
|
||||
{{- if eq $v.data.type "bluestore" }}
|
||||
{{- if and $v.block_db $v.block_wal}}
|
||||
{{ $initcontainerEnv := prepend (prepend (prepend ( prepend (index $context.Values._tmpYAMLinitContainer "env") (dict "name" "STORAGE_TYPE" "value" $v.data.type)) (dict "name" "STORAGE_LOCATION" "value" $v.data.location)) (dict "name" "BLOCK_DB" "value" $v.block_db)) (dict "name" "BLOCK_WAL" "value" $v.block_wal) }}
|
||||
{{- $_ := set $tmpinitcontainerEnv "env" $initcontainerEnv }}
|
||||
{{- else if $v.block_db }}
|
||||
{{ $initcontainerEnv := prepend (prepend ( prepend (index $context.Values._tmpYAMLinitContainer "env") (dict "name" "STORAGE_TYPE" "value" $v.data.type)) (dict "name" "STORAGE_LOCATION" "value" $v.data.location)) (dict "name" "BLOCK_DB" "value" $v.block_db) }}
|
||||
{{- $_ := set $tmpinitcontainerEnv "env" $initcontainerEnv }}
|
||||
{{- else if $v.block_wal }}
|
||||
{{ $initcontainerEnv := prepend (prepend ( prepend (index $context.Values._tmpYAMLinitContainer "env") (dict "name" "STORAGE_TYPE" "value" $v.data.type)) (dict "name" "STORAGE_LOCATION" "value" $v.data.location)) (dict "name" "BLOCK_WAL" "value" $v.block_wal) }}
|
||||
{{- $_ := set $tmpinitcontainerEnv "env" $initcontainerEnv }}
|
||||
{{ else }}
|
||||
{{ $initcontainerEnv := prepend (prepend (index $context.Values._tmpYAMLinitContainer "env") (dict "name" "STORAGE_TYPE" "value" $v.data.type)) (dict "name" "STORAGE_LOCATION" "value" $v.data.location) }}
|
||||
{{- $_ := set $tmpinitcontainerEnv "env" $initcontainerEnv }}
|
||||
{{- end }}
|
||||
{{ else }}
|
||||
{{ $initcontainerEnv := prepend (prepend (prepend ( prepend (index $context.Values._tmpYAMLinitContainer "env") (dict "name" "STORAGE_TYPE" "value" $v.data.type)) (dict "name" "JOURNAL_TYPE" "value" $v.journal.type)) (dict "name" "STORAGE_LOCATION" "value" $v.data.location)) (dict "name" "JOURNAL_LOCATION" "value" $v.journal.location) }}
|
||||
{{- $_ := set $tmpinitcontainerEnv "env" $initcontainerEnv }}
|
||||
{{- end }}
|
||||
{{- $localInitContainerEnv := omit $context.Values._tmpYAMLinitContainer "env" }}
|
||||
{{- $_ := set $localInitContainerEnv "env" $initContainerEnv }}
|
||||
{{- $_ := set $localInitContainerEnv "env" $tmpinitcontainerEnv.env }}
|
||||
{{ $initContainerList := append $context.Values.__tmpYAMLinitContainers $localInitContainerEnv }}
|
||||
{{ $_ := set $context.Values "__tmpYAMLinitContainers" $initContainerList }}
|
||||
{{ end }}
|
||||
|
@ -204,6 +204,15 @@ conf:
|
||||
journal:
|
||||
type: directory
|
||||
location: /var/lib/openstack-helm/ceph/osd/journal-one
|
||||
|
||||
# - data:
|
||||
# type: bluestore
|
||||
# location: /dev/sdb
|
||||
# Separate block devices may be used for block.db and/or block.wal
|
||||
# Without these values they will be co-located on the data volume
|
||||
# block_db: /dev/sdc
|
||||
# block_wal: /dev/sdc
|
||||
|
||||
# - data:
|
||||
# type: block-logical
|
||||
# location: /dev/sdd
|
||||
|
Loading…
Reference in New Issue
Block a user