[ceph-osd, ceph-client] Weight OSDs as they are added

Currently OSDs are added by the ceph-osd chart with zero weight
and they get reweighted to proper weights in the ceph-client chart
after all OSDs have been deployed. This causes a problem when a
deployment is partially completed and additional OSDs are added
later. In this case the ceph-client chart has already run and the
new OSDs don't ever get weighted correctly. This change weights
OSDs properly as they are deployed instead. As noted in the
script, the noin flag may be set during the deployment to prevent
rebalancing as OSDs are added if necessary.

Added the ability to set and unset Ceph cluster flags in the
ceph-client chart.

Change-Id: Ic9a3d8d5625af49b093976a855dd66e5705d2c29
This commit is contained in:
Stephen Taylor 2020-06-03 16:03:59 +00:00 committed by chinasubbareddy mallavarapu
parent 26ee617d77
commit 59b825ae48
7 changed files with 51 additions and 16 deletions

View File

@ -67,13 +67,6 @@ create_crushrule {{ .name }} {{ .crush_rule }} {{ .failure_domain }} {{ .device_
{{- end }}
{{- end }}
function reweight_osds () {
for OSD_ID in $(ceph --cluster "${CLUSTER}" osd df | awk '$3 == "0" {print $1}'); do
OSD_WEIGHT=$(ceph --cluster "${CLUSTER}" osd df --format json-pretty| grep -A7 "\bosd.${OSD_ID}\b" | awk '/"kb"/{ gsub(",",""); d= $2/1073741824 ; r = sprintf("%.2f", d); print r }');
ceph --cluster "${CLUSTER}" osd crush reweight osd.${OSD_ID} ${OSD_WEIGHT};
done
}
function enable_autoscaling () {
if [[ "${ENABLE_AUTOSCALER}" == "true" ]]; then
ceph mgr module enable pg_autoscaler
@ -81,6 +74,22 @@ function enable_autoscaling () {
fi
}
function set_cluster_flags () {
if [[ ! -z "${CLUSTER_SET_FLAGS}" ]]; then
for flag in ${CLUSTER_SET_FLAGS}; do
ceph osd set ${flag}
done
fi
}
function unset_cluster_flags () {
if [[ ! -z "${CLUSTER_UNSET_FLAGS}" ]]; then
for flag in ${CLUSTER_UNSET_FLAGS}; do
ceph osd unset ${flag}
done
fi
}
function create_pool () {
POOL_APPLICATION=$1
POOL_NAME=$2
@ -162,8 +171,6 @@ function manage_pool () {
ceph --cluster "${CLUSTER}" osd pool set-quota "${POOL_NAME}" max_bytes $POOL_QUOTA
}
reweight_osds
{{ $targetPGperOSD := .Values.conf.pool.target.pg_per_osd }}
{{ $crushRuleDefault := .Values.conf.pool.default.crush_rule }}
{{ $targetQuota := .Values.conf.pool.target.quota | default 100 }}
@ -175,6 +182,8 @@ if [[ -z "$(ceph osd versions | grep ceph\ version | grep -v nautilus)" ]]; then
else
cluster_capacity=$(ceph --cluster "${CLUSTER}" df | head -n3 | tail -n1 | awk '{print $1 substr($2, 1, 1)}' | numfmt --from=iec)
fi
set_cluster_flags
unset_cluster_flags
{{- range $pool := .Values.conf.pool.spec -}}
{{- with $pool }}
{{- if .crush_rule }}

View File

@ -52,6 +52,10 @@ spec:
value: "ceph"
- name: ENABLE_AUTOSCALER
value: {{ .Values.conf.features.pg_autoscaler | quote }}
- name: CLUSTER_SET_FLAGS
value: {{ .Values.conf.features.cluster_flags.set | quote }}
- name: CLUSTER_UNSET_FLAGS
value: {{ .Values.conf.features.cluster_flags.unset | quote }}
command:
- /tmp/pool-init.sh
volumeMounts:

View File

@ -255,6 +255,10 @@ conf:
mds: true
mgr: true
pg_autoscaler: true
cluster_flags:
# List of flags to set or unset separated by spaces
set: ""
unset: ""
pool:
# NOTE(portdirect): this drives a simple approximation of
# https://ceph.com/pgcalc/, the `target.osd` key should be set to match the

View File

@ -64,9 +64,6 @@ if [[ -n "$(find /var/lib/ceph/osd -type d -empty ! -name "lost+found")" ]]; th
# init data directory
ceph-osd -i ${OSD_ID} --mkfs --osd-uuid ${UUID} --mkjournal --osd-journal ${OSD_JOURNAL} --setuser ceph --setgroup ceph
# add the osd to the crush map
# NOTE(supamatt): set the initial crush weight of the OSD to 0 to prevent automatic rebalancing
OSD_WEIGHT=0
# NOTE(supamatt): add or move the OSD's CRUSH location
crush_location
fi

View File

@ -83,8 +83,8 @@ else
--no-systemd ${OSD_ID} ${OSD_FSID}
fi
# NOTE(supamatt): set the initial crush weight of the OSD to 0 to prevent automatic rebalancing
OSD_WEIGHT=0
# NOTE(stevetaylor): Set the OSD's crush weight (use noin flag to prevent rebalancing if necessary)
OSD_WEIGHT=$(get_osd_crush_weight_from_device ${OSD_DEVICE})
# NOTE(supamatt): add or move the OSD's CRUSH location
crush_location

View File

@ -89,8 +89,8 @@ else
fi
fi
# NOTE(supamatt): set the initial crush weight of the OSD to 0 to prevent automatic rebalancing
OSD_WEIGHT=0
# NOTE(stevetaylor): Set the OSD's crush weight (use noin flag to prevent rebalancing if necessary)
OSD_WEIGHT=$(get_osd_crush_weight_from_device ${OSD_DEVICE})
# NOTE(supamatt): add or move the OSD's CRUSH location
crush_location

View File

@ -289,6 +289,27 @@ function get_lvm_tag_from_device {
get_lvm_tag_from_volume ${logical_volume} ${tag}
}
# Helper function to get the size of a logical volume
function get_lv_size_from_device {
device="$1"
logical_volume="$(get_lv_from_device ${device})"
lvs ${logical_volume} -o LV_SIZE --noheadings --units k --nosuffix | xargs | cut -d'.' -f1
}
# Helper function to get the crush weight for an osd device
function get_osd_crush_weight_from_device {
device="$1"
lv_size="$(get_lv_size_from_device ${device})" # KiB
if [[ ! -z "${BLOCK_DB_SIZE}" ]]; then
db_size=$(echo "${BLOCK_DB_SIZE}" | cut -d'B' -f1 | numfmt --from=iec | awk '{print $1/1024}') # KiB
lv_size=$((lv_size+db_size)) # KiB
fi
echo ${lv_size} | awk '{printf("%.2f\n", $1/1073741824)}' # KiB to TiB
}
# Helper function to get a cluster FSID from a physical device
function get_cluster_fsid_from_device {
device="$1"