c3f921c916
This is to fix the logic to disable the autosclaer on pools as its not considering newly created pools. Change-Id: I76fe106918d865b6443453b13e3a4bd6fc35206a
287 lines
11 KiB
Smarty
287 lines
11 KiB
Smarty
#!/bin/bash
|
|
|
|
{{/*
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
*/}}
|
|
|
|
set -ex
|
|
export LC_ALL=C
|
|
|
|
: "${ADMIN_KEYRING:=/etc/ceph/${CLUSTER}.client.admin.keyring}"
|
|
|
|
if [[ ! -e /etc/ceph/${CLUSTER}.conf ]]; then
|
|
echo "ERROR- /etc/ceph/${CLUSTER}.conf must exist; get it from your existing mon"
|
|
exit 1
|
|
fi
|
|
|
|
if [[ ! -e ${ADMIN_KEYRING} ]]; then
|
|
echo "ERROR- ${ADMIN_KEYRING} must exist; get it from your existing mon"
|
|
exit 1
|
|
fi
|
|
|
|
function wait_for_pgs () {
|
|
echo "#### Start: Checking pgs ####"
|
|
|
|
pgs_ready=0
|
|
query='map({state: .state}) | group_by(.state) | map({state: .[0].state, count: length}) | .[] | select(.state | contains("active") | not)'
|
|
|
|
if [[ $(ceph tell mon.* version | egrep -q "nautilus"; echo $?) -eq 0 ]]; then
|
|
query=".pg_stats | ${query}"
|
|
fi
|
|
|
|
# Loop until all pgs are active
|
|
while [[ $pgs_ready -lt 3 ]]; do
|
|
pgs_state=$(ceph --cluster ${CLUSTER} pg ls -f json | jq -c "${query}")
|
|
if [[ $(jq -c '. | select(.state | contains("peer") or contains("activating") | not)' <<< "${pgs_state}") ]]; then
|
|
# If inactive PGs aren't peering, fail
|
|
echo "Failure, found inactive PGs that aren't peering"
|
|
exit 1
|
|
fi
|
|
if [[ "${pgs_state}" ]]; then
|
|
pgs_ready=0
|
|
else
|
|
(( pgs_ready+=1 ))
|
|
fi
|
|
sleep 3
|
|
done
|
|
}
|
|
|
|
function check_recovery_flags () {
|
|
echo "### Start: Checking for flags that will prevent recovery"
|
|
|
|
# Ensure there are no flags set that will prevent recovery of degraded PGs
|
|
if [[ $(ceph osd stat | grep "norecover\|nobackfill\|norebalance") ]]; then
|
|
ceph osd stat
|
|
echo "Flags are set that prevent recovery of degraded PGs"
|
|
exit 1
|
|
fi
|
|
}
|
|
|
|
function check_osd_count() {
|
|
echo "#### Start: Checking OSD count ####"
|
|
noup_flag=$(ceph osd stat | awk '/noup/ {print $2}')
|
|
osd_stat=$(ceph osd stat -f json)
|
|
num_osd=$(jq '.osdmap.num_osds' <<< "$osd_stat")
|
|
num_in_osds=$(jq '.osdmap.num_in_osds' <<< "$osd_stat")
|
|
num_up_osds=$(jq '.osdmap.num_up_osds' <<< "$osd_stat")
|
|
EXPECTED_OSDS={{.Values.conf.pool.target.osd}}
|
|
REQUIRED_PERCENT_OF_OSDS={{.Values.conf.pool.target.required_percent_of_osds}}
|
|
|
|
if [ ${num_up_osds} -gt ${EXPECTED_OSDS} ]; then
|
|
echo "The expected amount of OSDs (${EXPECTED_OSDS}) is less than available OSDs (${num_up_osds}). Please, correct the value (.Values.conf.pool.target.osd)."
|
|
exit 1
|
|
fi
|
|
|
|
MIN_OSDS=$(($EXPECTED_OSDS*$REQUIRED_PERCENT_OF_OSDS/100))
|
|
if [ ${MIN_OSDS} -lt 1 ]; then
|
|
MIN_OSDS=1
|
|
fi
|
|
|
|
if [ "${noup_flag}" ]; then
|
|
osd_status=$(ceph osd dump -f json | jq -c '.osds[] | .state')
|
|
count=0
|
|
for osd in $osd_status; do
|
|
if [[ "$osd" == *"up"* || "$osd" == *"new"* ]]; then
|
|
((count=count+1))
|
|
fi
|
|
done
|
|
echo "Caution: noup flag is set. ${count} OSDs in up/new state. Required number of OSDs: ${MIN_OSDS}."
|
|
if [ $MIN_OSDS -gt $count ]; then
|
|
exit 1
|
|
fi
|
|
else
|
|
if [ "${num_osd}" -eq 0 ]; then
|
|
echo "There are no osds in the cluster"
|
|
exit 1
|
|
elif [ "${num_in_osds}" -ge "${MIN_OSDS}" ] && [ "${num_up_osds}" -ge "${MIN_OSDS}" ]; then
|
|
echo "Required number of OSDs (${MIN_OSDS}) are UP and IN status"
|
|
else
|
|
echo "Required number of OSDs (${MIN_OSDS}) are NOT UP and IN status. Cluster shows OSD count=${num_osd}, UP=${num_up_osds}, IN=${num_in_osds}"
|
|
exit 1
|
|
fi
|
|
fi
|
|
}
|
|
|
|
function create_crushrule () {
|
|
CRUSH_NAME=$1
|
|
CRUSH_RULE=$2
|
|
CRUSH_FAILURE_DOMAIN=$3
|
|
CRUSH_DEVICE_CLASS=$4
|
|
if ! ceph --cluster "${CLUSTER}" osd crush rule ls | grep -q "^\$CRUSH_NAME$"; then
|
|
ceph --cluster "${CLUSTER}" osd crush rule $CRUSH_RULE $CRUSH_NAME default $CRUSH_FAILURE_DOMAIN $CRUSH_DEVICE_CLASS || true
|
|
fi
|
|
}
|
|
|
|
# Set mons to use the msgr2 protocol on nautilus
|
|
if [[ -z "$(ceph mon versions | grep ceph\ version | grep -v nautilus)" ]]; then
|
|
ceph --cluster "${CLUSTER}" mon enable-msgr2
|
|
fi
|
|
|
|
check_osd_count
|
|
{{- range $crush_rule := .Values.conf.pool.crush_rules -}}
|
|
{{- with $crush_rule }}
|
|
create_crushrule {{ .name }} {{ .crush_rule }} {{ .failure_domain }} {{ .device_class }}
|
|
{{- end }}
|
|
{{- end }}
|
|
|
|
function reweight_osds () {
|
|
OSD_DF_OUTPUT=$(ceph --cluster "${CLUSTER}" osd df --format json-pretty)
|
|
for OSD_ID in $(ceph --cluster "${CLUSTER}" osd ls); do
|
|
OSD_EXPECTED_WEIGHT=$(echo "${OSD_DF_OUTPUT}" | grep -A7 "\bosd.${OSD_ID}\b" | awk '/"kb"/{ gsub(",",""); d= $2/1073741824 ; r = sprintf("%.2f", d); print r }');
|
|
OSD_WEIGHT=$(echo "${OSD_DF_OUTPUT}" | grep -A3 "\bosd.${OSD_ID}\b" | awk '/crush_weight/{print $2}' | cut -d',' -f1)
|
|
if [[ "${OSD_EXPECTED_WEIGHT}" != "0.00" ]] && [[ "${OSD_WEIGHT}" != "${OSD_EXPECTED_WEIGHT}" ]]; then
|
|
ceph --cluster "${CLUSTER}" osd crush reweight osd.${OSD_ID} ${OSD_EXPECTED_WEIGHT};
|
|
fi
|
|
done
|
|
}
|
|
|
|
function enable_or_disable_autoscaling () {
|
|
if [[ "${ENABLE_AUTOSCALER}" == "true" ]]; then
|
|
ceph mgr module enable pg_autoscaler
|
|
ceph config set global osd_pool_default_pg_autoscale_mode on
|
|
else
|
|
ceph mgr module disable pg_autoscaler
|
|
fi
|
|
}
|
|
|
|
function set_cluster_flags () {
|
|
if [[ ! -z "${CLUSTER_SET_FLAGS}" ]]; then
|
|
for flag in ${CLUSTER_SET_FLAGS}; do
|
|
ceph osd set ${flag}
|
|
done
|
|
fi
|
|
}
|
|
|
|
function unset_cluster_flags () {
|
|
if [[ ! -z "${CLUSTER_UNSET_FLAGS}" ]]; then
|
|
for flag in ${CLUSTER_UNSET_FLAGS}; do
|
|
ceph osd unset ${flag}
|
|
done
|
|
fi
|
|
}
|
|
|
|
function create_pool () {
|
|
POOL_APPLICATION=$1
|
|
POOL_NAME=$2
|
|
POOL_REPLICATION=$3
|
|
POOL_PLACEMENT_GROUPS=$4
|
|
POOL_CRUSH_RULE=$5
|
|
POOL_PROTECTION=$6
|
|
if ! ceph --cluster "${CLUSTER}" osd pool stats "${POOL_NAME}" > /dev/null 2>&1; then
|
|
ceph --cluster "${CLUSTER}" osd pool create "${POOL_NAME}" ${POOL_PLACEMENT_GROUPS}
|
|
while [ $(ceph --cluster "${CLUSTER}" -s | grep creating -c) -gt 0 ]; do echo -n .;sleep 1; done
|
|
ceph --cluster "${CLUSTER}" osd pool application enable "${POOL_NAME}" "${POOL_APPLICATION}"
|
|
fi
|
|
|
|
if [[ -z "$(ceph osd versions | grep ceph\ version | grep -v nautilus)" ]] && [[ "${ENABLE_AUTOSCALER}" == "true" ]] ; then
|
|
ceph --cluster "${CLUSTER}" osd pool set "${POOL_NAME}" pg_autoscale_mode on
|
|
else
|
|
ceph --cluster "${CLUSTER}" osd pool set "${POOL_NAME}" pg_autoscale_mode off
|
|
fi
|
|
#
|
|
# Make sure pool is not protected after creation AND expansion so we can manipulate its settings.
|
|
# Final protection settings are applied once parameters (size, pg) have been adjusted.
|
|
#
|
|
ceph --cluster "${CLUSTER}" osd pool set "${POOL_NAME}" nosizechange false
|
|
ceph --cluster "${CLUSTER}" osd pool set "${POOL_NAME}" nopgchange false
|
|
ceph --cluster "${CLUSTER}" osd pool set "${POOL_NAME}" nodelete false
|
|
#
|
|
ceph --cluster "${CLUSTER}" osd pool set "${POOL_NAME}" size ${POOL_REPLICATION}
|
|
ceph --cluster "${CLUSTER}" osd pool set "${POOL_NAME}" crush_rule "${POOL_CRUSH_RULE}"
|
|
# set pg_num to pool
|
|
if [[ -z "$(ceph osd versions | grep ceph\ version | grep -v nautilus)" ]]; then
|
|
ceph --cluster "${CLUSTER}" osd pool set "${POOL_NAME}" "pg_num" "${POOL_PLACEMENT_GROUPS}"
|
|
else
|
|
for PG_PARAM in pg_num pgp_num; do
|
|
CURRENT_PG_VALUE=$(ceph --cluster "${CLUSTER}" osd pool get "${POOL_NAME}" "${PG_PARAM}" | awk "/^${PG_PARAM}:/ { print \$NF }")
|
|
if [ "${POOL_PLACEMENT_GROUPS}" -gt "${CURRENT_PG_VALUE}" ]; then
|
|
ceph --cluster "${CLUSTER}" osd pool set "${POOL_NAME}" "${PG_PARAM}" "${POOL_PLACEMENT_GROUPS}"
|
|
fi
|
|
done
|
|
fi
|
|
|
|
#This is to handle cluster expansion case where replication may change from intilization
|
|
if [ ${POOL_REPLICATION} -gt 1 ]; then
|
|
EXPECTED_POOLMINSIZE=$[${POOL_REPLICATION}-1]
|
|
ceph --cluster "${CLUSTER}" osd pool set "${POOL_NAME}" min_size ${EXPECTED_POOLMINSIZE}
|
|
fi
|
|
#
|
|
# Handling of .Values.conf.pool.target.protected:
|
|
# Possible settings
|
|
# - true | 1 = Protect the pools after they get created
|
|
# - false | 0 = Do not protect the pools once they get created and let Ceph defaults apply
|
|
# - Absent = Do not protect the pools once they get created and let Ceph defaults apply
|
|
#
|
|
# If protection is not requested through values.yaml, just use the Ceph defaults. With Luminous we do not
|
|
# apply any protection to the pools when they get created.
|
|
#
|
|
# Note: If the /etc/ceph/ceph.conf file modifies the defaults the deployment will fail on pool creation
|
|
# - nosizechange = Do not allow size and min_size changes on the pool
|
|
# - nodelete = Do not allow deletion of the pool
|
|
#
|
|
if [ "x${POOL_PROTECTION}" == "xtrue" ] || [ "x${POOL_PROTECTION}" == "x1" ]; then
|
|
ceph --cluster "${CLUSTER}" osd pool set "${POOL_NAME}" nosizechange true
|
|
ceph --cluster "${CLUSTER}" osd pool set "${POOL_NAME}" nodelete true
|
|
fi
|
|
}
|
|
|
|
function manage_pool () {
|
|
POOL_APPLICATION=$1
|
|
POOL_NAME=$2
|
|
POOL_REPLICATION=$3
|
|
TOTAL_DATA_PERCENT=$4
|
|
TARGET_PG_PER_OSD=$5
|
|
POOL_CRUSH_RULE=$6
|
|
TARGET_QUOTA=$7
|
|
POOL_PROTECTION=$8
|
|
CLUSTER_CAPACITY=$9
|
|
TOTAL_OSDS={{.Values.conf.pool.target.osd}}
|
|
POOL_PLACEMENT_GROUPS=$(/tmp/pool-calc.py ${POOL_REPLICATION} ${TOTAL_OSDS} ${TOTAL_DATA_PERCENT} ${TARGET_PG_PER_OSD})
|
|
create_pool "${POOL_APPLICATION}" "${POOL_NAME}" "${POOL_REPLICATION}" "${POOL_PLACEMENT_GROUPS}" "${POOL_CRUSH_RULE}" "${POOL_PROTECTION}"
|
|
POOL_REPLICAS=$(ceph --cluster "${CLUSTER}" osd pool get "${POOL_NAME}" size | awk '{print $2}')
|
|
POOL_QUOTA=$(python -c "print(int($CLUSTER_CAPACITY * $TOTAL_DATA_PERCENT * $TARGET_QUOTA / $POOL_REPLICAS / 100 / 100))")
|
|
ceph --cluster "${CLUSTER}" osd pool set-quota "${POOL_NAME}" max_bytes $POOL_QUOTA
|
|
}
|
|
|
|
set_cluster_flags
|
|
unset_cluster_flags
|
|
reweight_osds
|
|
|
|
{{ $targetPGperOSD := .Values.conf.pool.target.pg_per_osd }}
|
|
{{ $crushRuleDefault := .Values.conf.pool.default.crush_rule }}
|
|
{{ $targetQuota := .Values.conf.pool.target.quota | default 100 }}
|
|
{{ $targetProtection := .Values.conf.pool.target.protected | default "false" | quote | lower }}
|
|
cluster_capacity=0
|
|
if [[ -z "$(ceph osd versions | grep ceph\ version | grep -v nautilus)" ]]; then
|
|
cluster_capacity=$(ceph --cluster "${CLUSTER}" df | grep "TOTAL" | awk '{print $2 substr($3, 1, 1)}' | numfmt --from=iec)
|
|
enable_or_disable_autoscaling
|
|
else
|
|
cluster_capacity=$(ceph --cluster "${CLUSTER}" df | head -n3 | tail -n1 | awk '{print $1 substr($2, 1, 1)}' | numfmt --from=iec)
|
|
fi
|
|
{{- range $pool := .Values.conf.pool.spec -}}
|
|
{{- with $pool }}
|
|
{{- if .crush_rule }}
|
|
manage_pool {{ .application }} {{ .name }} {{ .replication }} {{ .percent_total_data }} {{ $targetPGperOSD }} {{ .crush_rule }} {{ $targetQuota }} {{ $targetProtection }} ${cluster_capacity}
|
|
{{ else }}
|
|
manage_pool {{ .application }} {{ .name }} {{ .replication }} {{ .percent_total_data }} {{ $targetPGperOSD }} {{ $crushRuleDefault }} {{ $targetQuota }} {{ $targetProtection }} ${cluster_capacity}
|
|
{{- end }}
|
|
{{- end }}
|
|
{{- end }}
|
|
|
|
{{- if .Values.conf.pool.crush.tunables }}
|
|
ceph --cluster "${CLUSTER}" osd crush tunables {{ .Values.conf.pool.crush.tunables }}
|
|
{{- end }}
|
|
|
|
wait_for_pgs
|
|
check_recovery_flags
|