[ceph-osd, ceph-client] Weight OSDs as they are added

Currently OSDs are added by the ceph-osd chart with zero weight
and they get reweighted to proper weights in the ceph-client chart
after all OSDs have been deployed. This causes a problem when a
deployment is partially completed and additional OSDs are added
later. In this case the ceph-client chart has already run and the
new OSDs don't ever get weighted correctly. This change weights
OSDs properly as they are deployed instead. As noted in the
script, the noin flag may be set during the deployment to prevent
rebalancing as OSDs are added if necessary.

Added the ability to set and unset Ceph cluster flags in the
ceph-client chart.

Change-Id: Iac50352c857d874f3956776c733d09e0034a0285
This commit is contained in:
Steve Taylor 2020-05-13 14:42:30 -06:00
parent 3691681d84
commit f59cb11932
6 changed files with 51 additions and 13 deletions

View File

@ -67,13 +67,6 @@ create_crushrule {{ .name }} {{ .crush_rule }} {{ .failure_domain }} {{ .device_
{{- end }} {{- end }}
{{- end }} {{- end }}
function reweight_osds () {
for OSD_ID in $(ceph --cluster "${CLUSTER}" osd df | awk '$3 == "0" {print $1}'); do
OSD_WEIGHT=$(ceph --cluster "${CLUSTER}" osd df --format json-pretty| grep -A7 "\bosd.${OSD_ID}\b" | awk '/"kb"/{ gsub(",",""); d= $2/1073741824 ; r = sprintf("%.2f", d); print r }');
ceph --cluster "${CLUSTER}" osd crush reweight osd.${OSD_ID} ${OSD_WEIGHT};
done
}
function enable_autoscaling () { function enable_autoscaling () {
if [[ "${ENABLE_AUTOSCALER}" == "true" ]]; then if [[ "${ENABLE_AUTOSCALER}" == "true" ]]; then
ceph mgr module enable pg_autoscaler ceph mgr module enable pg_autoscaler
@ -81,6 +74,22 @@ function enable_autoscaling () {
fi fi
} }
function set_cluster_flags () {
if [[ ! -z "${CLUSTER_SET_FLAGS}" ]]; then
for flag in ${CLUSTER_SET_FLAGS}; do
ceph osd set ${flag}
done
fi
}
function unset_cluster_flags () {
if [[ ! -z "${CLUSTER_UNSET_FLAGS}" ]]; then
for flag in ${CLUSTER_UNSET_FLAGS}; do
ceph osd unset ${flag}
done
fi
}
function create_pool () { function create_pool () {
POOL_APPLICATION=$1 POOL_APPLICATION=$1
POOL_NAME=$2 POOL_NAME=$2
@ -162,8 +171,6 @@ function manage_pool () {
ceph --cluster "${CLUSTER}" osd pool set-quota "${POOL_NAME}" max_bytes $POOL_QUOTA ceph --cluster "${CLUSTER}" osd pool set-quota "${POOL_NAME}" max_bytes $POOL_QUOTA
} }
reweight_osds
{{ $targetPGperOSD := .Values.conf.pool.target.pg_per_osd }} {{ $targetPGperOSD := .Values.conf.pool.target.pg_per_osd }}
{{ $crushRuleDefault := .Values.conf.pool.default.crush_rule }} {{ $crushRuleDefault := .Values.conf.pool.default.crush_rule }}
{{ $targetQuota := .Values.conf.pool.target.quota | default 100 }} {{ $targetQuota := .Values.conf.pool.target.quota | default 100 }}
@ -175,6 +182,8 @@ if [[ -z "$(ceph osd versions | grep ceph\ version | grep -v nautilus)" ]]; then
else else
cluster_capacity=$(ceph --cluster "${CLUSTER}" df | head -n3 | tail -n1 | awk '{print $1 substr($2, 1, 1)}' | numfmt --from=iec) cluster_capacity=$(ceph --cluster "${CLUSTER}" df | head -n3 | tail -n1 | awk '{print $1 substr($2, 1, 1)}' | numfmt --from=iec)
fi fi
set_cluster_flags
unset_cluster_flags
{{- range $pool := .Values.conf.pool.spec -}} {{- range $pool := .Values.conf.pool.spec -}}
{{- with $pool }} {{- with $pool }}
{{- if .crush_rule }} {{- if .crush_rule }}

View File

@ -52,6 +52,10 @@ spec:
value: "ceph" value: "ceph"
- name: ENABLE_AUTOSCALER - name: ENABLE_AUTOSCALER
value: {{ .Values.conf.features.pg_autoscaler | quote }} value: {{ .Values.conf.features.pg_autoscaler | quote }}
- name: CLUSTER_SET_FLAGS
value: {{ .Values.conf.features.cluster_flags.set | quote }}
- name: CLUSTER_UNSET_FLAGS
value: {{ .Values.conf.features.cluster_flags.unset | quote }}
command: command:
- /tmp/pool-init.sh - /tmp/pool-init.sh
volumeMounts: volumeMounts:

View File

@ -254,6 +254,10 @@ conf:
mds: true mds: true
mgr: true mgr: true
pg_autoscaler: true pg_autoscaler: true
cluster_flags:
# List of flags to set or unset separated by spaces
set: ""
unset: ""
pool: pool:
#NOTE(portdirect): this drives a simple approximation of #NOTE(portdirect): this drives a simple approximation of
# https://ceph.com/pgcalc/, the `target.osd` key should be set to match the # https://ceph.com/pgcalc/, the `target.osd` key should be set to match the

View File

@ -83,8 +83,8 @@ else
--no-systemd ${OSD_ID} ${OSD_FSID} --no-systemd ${OSD_ID} ${OSD_FSID}
fi fi
# NOTE(supamatt): set the initial crush weight of the OSD to 0 to prevent automatic rebalancing # NOTE(stevetaylor): Set the OSD's crush weight (use noin flag to prevent rebalancing if necessary)
OSD_WEIGHT=0 OSD_WEIGHT=$(get_osd_crush_weight_from_device ${OSD_DEVICE})
# NOTE(supamatt): add or move the OSD's CRUSH location # NOTE(supamatt): add or move the OSD's CRUSH location
crush_location crush_location

View File

@ -89,8 +89,8 @@ else
fi fi
fi fi
# NOTE(supamatt): set the initial crush weight of the OSD to 0 to prevent automatic rebalancing # NOTE(stevetaylor): Set the OSD's crush weight (use noin flag to prevent rebalancing if necessary)
OSD_WEIGHT=0 OSD_WEIGHT=$(get_osd_crush_weight_from_device ${OSD_DEVICE})
# NOTE(supamatt): add or move the OSD's CRUSH location # NOTE(supamatt): add or move the OSD's CRUSH location
crush_location crush_location

View File

@ -276,6 +276,27 @@ function get_lvm_tag_from_device {
get_lvm_tag_from_volume ${logical_volume} ${tag} get_lvm_tag_from_volume ${logical_volume} ${tag}
} }
# Helper function to get the size of a logical volume
function get_lv_size_from_device {
device="$1"
logical_volume="$(get_lv_from_device ${device})"
lvs ${logical_volume} -o LV_SIZE --noheadings --units k --nosuffix | xargs | cut -d'.' -f1
}
# Helper function to get the crush weight for an osd device
function get_osd_crush_weight_from_device {
device="$1"
lv_size="$(get_lv_size_from_device ${device})" # KiB
if [[ ! -z "${BLOCK_DB_SIZE}" ]]; then
db_size=$(echo "${BLOCK_DB_SIZE}" | cut -d'B' -f1 | numfmt --from=iec | awk '{print $1/1024}') # KiB
lv_size=$((lv_size+db_size)) # KiB
fi
echo ${lv_size} | awk '{printf("%.2f\n", $1/1073741824)}' # KiB to TiB
}
# Helper function to get a cluster FSID from a physical device # Helper function to get a cluster FSID from a physical device
function get_cluster_fsid_from_device { function get_cluster_fsid_from_device {
device="$1" device="$1"