diff --git a/ceph-client/templates/bin/pool/_init.sh.tpl b/ceph-client/templates/bin/pool/_init.sh.tpl index bf8c44c65..6ce3d23cf 100644 --- a/ceph-client/templates/bin/pool/_init.sh.tpl +++ b/ceph-client/templates/bin/pool/_init.sh.tpl @@ -67,13 +67,6 @@ create_crushrule {{ .name }} {{ .crush_rule }} {{ .failure_domain }} {{ .device_ {{- end }} {{- end }} -function reweight_osds () { - for OSD_ID in $(ceph --cluster "${CLUSTER}" osd df | awk '$3 == "0" {print $1}'); do - OSD_WEIGHT=$(ceph --cluster "${CLUSTER}" osd df --format json-pretty| grep -A7 "\bosd.${OSD_ID}\b" | awk '/"kb"/{ gsub(",",""); d= $2/1073741824 ; r = sprintf("%.2f", d); print r }'); - ceph --cluster "${CLUSTER}" osd crush reweight osd.${OSD_ID} ${OSD_WEIGHT}; - done -} - function enable_autoscaling () { if [[ "${ENABLE_AUTOSCALER}" == "true" ]]; then ceph mgr module enable pg_autoscaler @@ -81,6 +74,22 @@ function enable_autoscaling () { fi } +function set_cluster_flags () { + if [[ ! -z "${CLUSTER_SET_FLAGS}" ]]; then + for flag in ${CLUSTER_SET_FLAGS}; do + ceph osd set ${flag} + done + fi +} + +function unset_cluster_flags () { + if [[ ! -z "${CLUSTER_UNSET_FLAGS}" ]]; then + for flag in ${CLUSTER_UNSET_FLAGS}; do + ceph osd unset ${flag} + done + fi +} + function create_pool () { POOL_APPLICATION=$1 POOL_NAME=$2 @@ -162,8 +171,6 @@ function manage_pool () { ceph --cluster "${CLUSTER}" osd pool set-quota "${POOL_NAME}" max_bytes $POOL_QUOTA } -reweight_osds - {{ $targetPGperOSD := .Values.conf.pool.target.pg_per_osd }} {{ $crushRuleDefault := .Values.conf.pool.default.crush_rule }} {{ $targetQuota := .Values.conf.pool.target.quota | default 100 }} @@ -175,6 +182,8 @@ if [[ -z "$(ceph osd versions | grep ceph\ version | grep -v nautilus)" ]]; then else cluster_capacity=$(ceph --cluster "${CLUSTER}" df | head -n3 | tail -n1 | awk '{print $1 substr($2, 1, 1)}' | numfmt --from=iec) fi +set_cluster_flags +unset_cluster_flags {{- range $pool := .Values.conf.pool.spec -}} {{- with $pool }} {{- if .crush_rule }} diff --git a/ceph-client/templates/job-rbd-pool.yaml b/ceph-client/templates/job-rbd-pool.yaml index 47c8bc947..351ef761d 100644 --- a/ceph-client/templates/job-rbd-pool.yaml +++ b/ceph-client/templates/job-rbd-pool.yaml @@ -52,6 +52,10 @@ spec: value: "ceph" - name: ENABLE_AUTOSCALER value: {{ .Values.conf.features.pg_autoscaler | quote }} + - name: CLUSTER_SET_FLAGS + value: {{ .Values.conf.features.cluster_flags.set | quote }} + - name: CLUSTER_UNSET_FLAGS + value: {{ .Values.conf.features.cluster_flags.unset | quote }} command: - /tmp/pool-init.sh volumeMounts: diff --git a/ceph-client/values.yaml b/ceph-client/values.yaml index f78e28f71..a94df4d8f 100644 --- a/ceph-client/values.yaml +++ b/ceph-client/values.yaml @@ -255,6 +255,10 @@ conf: mds: true mgr: true pg_autoscaler: true + cluster_flags: + # List of flags to set or unset separated by spaces + set: "" + unset: "" pool: # NOTE(portdirect): this drives a simple approximation of # https://ceph.com/pgcalc/, the `target.osd` key should be set to match the diff --git a/ceph-osd/templates/bin/osd/_directory.sh.tpl b/ceph-osd/templates/bin/osd/_directory.sh.tpl index 69d8a3172..18385d1f1 100644 --- a/ceph-osd/templates/bin/osd/_directory.sh.tpl +++ b/ceph-osd/templates/bin/osd/_directory.sh.tpl @@ -64,9 +64,6 @@ if [[ -n "$(find /var/lib/ceph/osd -type d -empty ! -name "lost+found")" ]]; th # init data directory ceph-osd -i ${OSD_ID} --mkfs --osd-uuid ${UUID} --mkjournal --osd-journal ${OSD_JOURNAL} --setuser ceph --setgroup ceph # add the osd to the crush map - # NOTE(supamatt): set the initial crush weight of the OSD to 0 to prevent automatic rebalancing - OSD_WEIGHT=0 - # NOTE(supamatt): add or move the OSD's CRUSH location crush_location fi diff --git a/ceph-osd/templates/bin/osd/ceph-volume/_block.sh.tpl b/ceph-osd/templates/bin/osd/ceph-volume/_block.sh.tpl index 68e150efb..7ccb8e1fe 100644 --- a/ceph-osd/templates/bin/osd/ceph-volume/_block.sh.tpl +++ b/ceph-osd/templates/bin/osd/ceph-volume/_block.sh.tpl @@ -83,8 +83,8 @@ else --no-systemd ${OSD_ID} ${OSD_FSID} fi -# NOTE(supamatt): set the initial crush weight of the OSD to 0 to prevent automatic rebalancing -OSD_WEIGHT=0 +# NOTE(stevetaylor): Set the OSD's crush weight (use noin flag to prevent rebalancing if necessary) +OSD_WEIGHT=$(get_osd_crush_weight_from_device ${OSD_DEVICE}) # NOTE(supamatt): add or move the OSD's CRUSH location crush_location diff --git a/ceph-osd/templates/bin/osd/ceph-volume/_bluestore.sh.tpl b/ceph-osd/templates/bin/osd/ceph-volume/_bluestore.sh.tpl index 80a16bbeb..a3110ac56 100644 --- a/ceph-osd/templates/bin/osd/ceph-volume/_bluestore.sh.tpl +++ b/ceph-osd/templates/bin/osd/ceph-volume/_bluestore.sh.tpl @@ -89,8 +89,8 @@ else fi fi -# NOTE(supamatt): set the initial crush weight of the OSD to 0 to prevent automatic rebalancing -OSD_WEIGHT=0 +# NOTE(stevetaylor): Set the OSD's crush weight (use noin flag to prevent rebalancing if necessary) +OSD_WEIGHT=$(get_osd_crush_weight_from_device ${OSD_DEVICE}) # NOTE(supamatt): add or move the OSD's CRUSH location crush_location diff --git a/ceph-osd/templates/bin/osd/ceph-volume/_common.sh.tpl b/ceph-osd/templates/bin/osd/ceph-volume/_common.sh.tpl index 39adc1bd8..2a8394616 100644 --- a/ceph-osd/templates/bin/osd/ceph-volume/_common.sh.tpl +++ b/ceph-osd/templates/bin/osd/ceph-volume/_common.sh.tpl @@ -289,6 +289,27 @@ function get_lvm_tag_from_device { get_lvm_tag_from_volume ${logical_volume} ${tag} } +# Helper function to get the size of a logical volume +function get_lv_size_from_device { + device="$1" + logical_volume="$(get_lv_from_device ${device})" + + lvs ${logical_volume} -o LV_SIZE --noheadings --units k --nosuffix | xargs | cut -d'.' -f1 +} + +# Helper function to get the crush weight for an osd device +function get_osd_crush_weight_from_device { + device="$1" + lv_size="$(get_lv_size_from_device ${device})" # KiB + + if [[ ! -z "${BLOCK_DB_SIZE}" ]]; then + db_size=$(echo "${BLOCK_DB_SIZE}" | cut -d'B' -f1 | numfmt --from=iec | awk '{print $1/1024}') # KiB + lv_size=$((lv_size+db_size)) # KiB + fi + + echo ${lv_size} | awk '{printf("%.2f\n", $1/1073741824)}' # KiB to TiB +} + # Helper function to get a cluster FSID from a physical device function get_cluster_fsid_from_device { device="$1"