Enable Ceph charts to be rack aware for CRUSH
Add support for a rack level CRUSH map. Rack level CRUSH support is enabled by using the "rack_replicated_rule" crush rule. Change-Id: I4df224f2821872faa2eddec2120832e9a22f4a7c
This commit is contained in:
parent
5d356f9265
commit
5ce9f2eb3b
@ -37,6 +37,10 @@ if ! ceph --cluster "${CLUSTER}" osd crush rule ls | grep -q "^same_host$"; then
|
||||
ceph --cluster "${CLUSTER}" osd crush rule create-simple same_host default osd
|
||||
fi
|
||||
|
||||
if ! ceph --cluster "${CLUSTER}" osd crush rule ls | grep -q "^rack_replicated_rule$"; then
|
||||
ceph --cluster "${CLUSTER}" osd crush rule create-simple rack_replicated_rule default rack
|
||||
fi
|
||||
|
||||
function reweight_osds () {
|
||||
for OSD_ID in $(ceph --cluster "${CLUSTER}" osd df | awk '$3 == "0" {print $1}'); do
|
||||
OSD_WEIGHT=$(ceph --cluster "${CLUSTER}" osd df --format json-pretty| grep -A7 "\bosd.${OSD_ID}\b" | awk '/"kb"/{ gsub(",",""); d= $2/1073741824 ; r = sprintf("%.2f", d); print r }');
|
||||
|
@ -128,9 +128,13 @@ conf:
|
||||
pg_per_osd: 100
|
||||
protected: true
|
||||
default:
|
||||
#NOTE(portdirect): this should be 'same_host' for a single node
|
||||
# cluster to be in a healthy state
|
||||
# NOTE(supamatt): Accepted values are:
|
||||
# same_host for a single node
|
||||
# replicated_rule for a multi node
|
||||
# rack_replicated_rule for a multi node in multiple (>=3) racks
|
||||
# Ceph cluster must be in a healthy state.
|
||||
crush_rule: replicated_rule
|
||||
|
||||
#NOTE(portdirect): this section describes the pools that will be managed by
|
||||
# the ceph pool management job, as it tunes the pgs and crush rule, based on
|
||||
# the above.
|
||||
|
@ -126,13 +126,26 @@ OSD_PATH="${OSD_PATH_BASE}-${OSD_ID}"
|
||||
OSD_KEYRING="${OSD_PATH}/keyring"
|
||||
# NOTE(supamatt): set the initial crush weight of the OSD to 0 to prevent automatic rebalancing
|
||||
OSD_WEIGHT=0
|
||||
ceph \
|
||||
--cluster "${CLUSTER}" \
|
||||
--name="osd.${OSD_ID}" \
|
||||
--keyring="${OSD_KEYRING}" \
|
||||
osd \
|
||||
crush \
|
||||
create-or-move -- "${OSD_ID}" "${OSD_WEIGHT}" ${CRUSH_LOCATION}
|
||||
if [ "x${CRUSH_RULE}" == "xrack_replicated_rule" ]; then
|
||||
RACK_LOCATION=$(echo rack_$(echo ${HOSTNAME} | cut -c ${RACK_REGEX}))
|
||||
CRUSH_LOCATION=$(echo "root=default rack=${RACK_LOCATION} host=${HOSTNAME}")
|
||||
ceph --cluster "${CLUSTER}" --name="osd.${OSD_ID}" --keyring="${OSD_KEYRING}" \
|
||||
osd crush create-or-move -- "${OSD_ID}" "${OSD_WEIGHT}" ${CRUSH_LOCATION} || true
|
||||
RACK_LOCATION_CHECK=$(ceph --cluster "${CLUSTER}" --name="osd.${OSD_ID}" --keyring="${OSD_KEYRING}" osd find ${OSD_ID} | awk -F'"' '/rack/{print $4}')
|
||||
if [ "x${RACK_LOCATION_CHECK}" != x${RACK_LOCATION} ]; then
|
||||
# NOTE(supamatt): Manually move the buckets for previously configured CRUSH configurations
|
||||
# as create-or-move may not appropiately move them.
|
||||
ceph --cluster "${CLUSTER}" --name="osd.${OSD_ID}" --keyring="${OSD_KEYRING}" \
|
||||
osd crush add-bucket ${RACK_LOCATION} rack || true
|
||||
ceph --cluster "${CLUSTER}" --name="osd.${OSD_ID}" --keyring="${OSD_KEYRING}" \
|
||||
osd crush move ${RACK_LOCATION} root=default || true
|
||||
ceph --cluster "${CLUSTER}" --name="osd.${OSD_ID}" --keyring="${OSD_KEYRING}" \
|
||||
osd crush move ${HOSTNAME} rack=${RACK_LOCATION} || true
|
||||
fi
|
||||
else
|
||||
ceph --cluster "${CLUSTER}" --name="osd.${OSD_ID}" --keyring="${OSD_KEYRING}" \
|
||||
osd crush create-or-move -- "${OSD_ID}" "${OSD_WEIGHT}" ${CRUSH_LOCATION} || true
|
||||
fi
|
||||
|
||||
if [ "${OSD_BLUESTORE:-0}" -ne 1 ]; then
|
||||
if [ -n "${OSD_JOURNAL}" ]; then
|
||||
|
@ -179,6 +179,10 @@ spec:
|
||||
value: "ceph"
|
||||
- name: CEPH_GET_ADMIN_KEY
|
||||
value: "1"
|
||||
- name: CRUSH_RULE
|
||||
value: {{ .Values.conf.pool.default.crush_rule }}
|
||||
- name: RACK_REGEX
|
||||
value: {{ .Values.conf.pool.default.rack_regex }}
|
||||
command:
|
||||
- /tmp/osd-start.sh
|
||||
lifecycle:
|
||||
|
@ -107,6 +107,18 @@ conf:
|
||||
osd_mount_options_xfs: "rw,noatime,largeio,inode64,swalloc,logbufs=8,logbsize=256k,allocsize=4M"
|
||||
osd_journal_size: 10240
|
||||
|
||||
pool:
|
||||
default:
|
||||
# NOTE(supamatt): Accepted values are:
|
||||
# same_host for a single node
|
||||
# replicated_rule for a multi node
|
||||
# rack_replicated_rule for a multi node in multiple (>=3) racks
|
||||
# Ceph cluster must be in a healthy state.
|
||||
crush_rule: replicated_rule
|
||||
# NOTE(supamatt): By default use the first 8 characters of the hostname to
|
||||
# define the the rack type bucket names for CRUSH.
|
||||
rack_regex: "1-8"
|
||||
|
||||
storage:
|
||||
# NOTE(portdirect): for homogeneous clusters the `osd` key can be used to
|
||||
# define OSD pods that will be deployed across the cluster.
|
||||
|
Loading…
Reference in New Issue
Block a user