ea038c5c85
The PS updates "post-apply" job and adds execution of "wait_for_pods" function as the first step of the job. Change-Id: I98644981094cb4fb7cc348b80628006ab59cb77f
187 lines
6.4 KiB
Smarty
187 lines
6.4 KiB
Smarty
#!/bin/bash
|
|
|
|
{{/*
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
*/}}
|
|
|
|
export LC_ALL=C
|
|
|
|
: "${ADMIN_KEYRING:=/etc/ceph/${CLUSTER}.client.admin.keyring}"
|
|
|
|
if [[ ! -f /etc/ceph/${CLUSTER}.conf ]]; then
|
|
echo "ERROR- /etc/ceph/${CLUSTER}.conf must exist; get it from your existing mon"
|
|
exit 1
|
|
fi
|
|
|
|
if [[ ! -f ${ADMIN_KEYRING} ]]; then
|
|
echo "ERROR- ${ADMIN_KEYRING} must exist; get it from your existing mon"
|
|
exit 1
|
|
fi
|
|
|
|
ceph --cluster ${CLUSTER} -s
|
|
function wait_for_pods() {
|
|
end=$(date +%s)
|
|
timeout=${2:-1800}
|
|
end=$((end + timeout))
|
|
while true; do
|
|
kubectl get pods --namespace=$1 -l component=osd -o json | jq -r \
|
|
'.items[].status.phase' | grep Pending > /dev/null && \
|
|
PENDING="True" || PENDING="False"
|
|
query='.items[]|select(.status.phase=="Running")'
|
|
pod_query="$query|.status.containerStatuses[].ready"
|
|
init_query="$query|.status.initContainerStatuses[].ready"
|
|
kubectl get pods --namespace=$1 -l component=osd -o json | jq -r "$pod_query" | \
|
|
grep false > /dev/null && READY="False" || READY="True"
|
|
kubectl get pods --namespace=$1 -o json | jq -r "$init_query" | \
|
|
grep false > /dev/null && INIT_READY="False" || INIT_READY="True"
|
|
kubectl get pods --namespace=$1 | grep -E 'Terminating|PodInitializing' \
|
|
> /dev/null && UNKNOWN="True" || UNKNOWN="False"
|
|
[ $INIT_READY == "True" -a $UNKNOWN == "False" -a $PENDING == "False" -a $READY == "True" ] && \
|
|
break || true
|
|
sleep 5
|
|
now=$(date +%s)
|
|
if [ $now -gt $end ] ; then
|
|
echo "Containers failed to start after $timeout seconds"
|
|
echo
|
|
kubectl get pods --namespace $1 -o wide
|
|
echo
|
|
if [ $PENDING == "True" ] ; then
|
|
echo "Some pods are in pending state:"
|
|
kubectl get pods --field-selector=status.phase=Pending -n $1 -o wide
|
|
fi
|
|
[ $READY == "False" ] && echo "Some pods are not ready"
|
|
exit -1
|
|
fi
|
|
done
|
|
}
|
|
|
|
function check_ds() {
|
|
for ds in `kubectl get ds --namespace=$CEPH_NAMESPACE -l component=osd --no-headers=true|awk '{print $1}'`
|
|
do
|
|
ds_query=`kubectl get ds -n $CEPH_NAMESPACE $ds -o json|jq -r .status`
|
|
if echo $ds_query |grep -i "numberAvailable" ;then
|
|
currentNumberScheduled=`echo $ds_query|jq -r .currentNumberScheduled`
|
|
desiredNumberScheduled=`echo $ds_query|jq -r .desiredNumberScheduled`
|
|
numberAvailable=`echo $ds_query|jq -r .numberAvailable`
|
|
numberReady=`echo $ds_query|jq -r .numberReady`
|
|
updatedNumberScheduled=`echo $ds_query|jq -r .updatedNumberScheduled`
|
|
ds_check=`echo "$currentNumberScheduled $desiredNumberScheduled $numberAvailable $numberReady $updatedNumberScheduled"| \
|
|
tr ' ' '\n'|sort -u|wc -l`
|
|
if [ $ds_check != 1 ]; then
|
|
echo "few pods under daemonset $ds are not yet ready"
|
|
exit
|
|
else
|
|
echo "all pods ubder deamonset $ds are ready"
|
|
fi
|
|
else
|
|
echo "this are no osds under daemonset $ds"
|
|
fi
|
|
done
|
|
}
|
|
|
|
function wait_for_inactive_pgs () {
|
|
echo "#### Start: Checking for inactive pgs ####"
|
|
|
|
# Loop until all pgs are active
|
|
if [[ $(ceph tell mon.* version | egrep -q "nautilus"; echo $?) -eq 0 ]]; then
|
|
while [[ `ceph --cluster ${CLUSTER} pg ls | tail -n +2 | head -n -2 | grep -v "active+"` ]]
|
|
do
|
|
sleep 3
|
|
ceph -s
|
|
done
|
|
else
|
|
while [[ `ceph --cluster ${CLUSTER} pg ls | tail -n +2 | grep -v "active+"` ]]
|
|
do
|
|
sleep 3
|
|
ceph -s
|
|
done
|
|
fi
|
|
}
|
|
|
|
function wait_for_degraded_objects () {
|
|
echo "#### Start: Checking for degraded objects ####"
|
|
|
|
# Loop until no degraded objects
|
|
while [[ ! -z "`ceph --cluster ${CLUSTER} -s | grep degraded`" ]]
|
|
do
|
|
sleep 3
|
|
ceph -s
|
|
done
|
|
}
|
|
|
|
function restart_by_rack() {
|
|
|
|
racks=`ceph osd tree | awk '/rack/{print $4}'`
|
|
echo "Racks under ceph cluster are: $racks"
|
|
for rack in $racks
|
|
do
|
|
hosts_in_rack=(`ceph osd tree | sed -n "/rack $rack/,/rack/p" | awk '/host/{print $4}' | tr '\n' ' '|sed 's/ *$//g'`)
|
|
echo "hosts under rack "$rack" are: ${hosts_in_rack[@]}"
|
|
echo "hosts count under $rack are: ${#hosts_in_rack[@]}"
|
|
for host in ${hosts_in_rack[@]}
|
|
do
|
|
echo "host is : $host"
|
|
if [[ ! -z "$host" ]]; then
|
|
pods_on_host=`kubectl get po -n $CEPH_NAMESPACE -l component=osd -o wide |grep $host|awk '{print $1}'`
|
|
echo "Restartig the pods under host $host"
|
|
kubectl delete po -n $CEPH_NAMESPACE $pods_on_host
|
|
fi
|
|
done
|
|
echo "waiting for the pods under rack $rack from restart"
|
|
wait_for_pods $CEPH_NAMESPACE
|
|
echo "waiting for inactive pgs after osds restarted from rack $rack"
|
|
wait_for_inactive_pgs
|
|
wait_for_degraded_objects
|
|
ceph -s
|
|
done
|
|
}
|
|
|
|
wait_for_pods $CEPH_NAMESPACE
|
|
|
|
require_upgrade=0
|
|
max_release=0
|
|
|
|
for ds in `kubectl get ds --namespace=$CEPH_NAMESPACE -l component=osd --no-headers=true|awk '{print $1}'`
|
|
do
|
|
updatedNumberScheduled=`kubectl get ds -n $CEPH_NAMESPACE $ds -o json|jq -r .status.updatedNumberScheduled`
|
|
desiredNumberScheduled=`kubectl get ds -n $CEPH_NAMESPACE $ds -o json|jq -r .status.desiredNumberScheduled`
|
|
if [[ $updatedNumberScheduled != $desiredNumberScheduled ]]; then
|
|
if kubectl get ds -n $CEPH_NAMESPACE $ds -o json|jq -r .status|grep -i "numberAvailable" ;then
|
|
require_upgrade=$((require_upgrade+1))
|
|
_release=`kubectl get ds -n $CEPH_NAMESPACE $ds -o json|jq -r .status.observedGeneration`
|
|
max_release=$(( max_release > _release ? max_release : _release ))
|
|
fi
|
|
fi
|
|
done
|
|
|
|
echo "Latest revision of the helm chart(s) is : $max_release"
|
|
|
|
if [[ $max_release -gt 1 ]]; then
|
|
if [[ $require_upgrade -gt 0 ]]; then
|
|
echo "waiting for inactive pgs and degraded obejcts before upgrade"
|
|
wait_for_inactive_pgs
|
|
wait_for_degraded_objects
|
|
ceph -s
|
|
ceph osd "set" noout
|
|
echo "lets restart the osds rack by rack"
|
|
restart_by_rack
|
|
ceph osd "unset" noout
|
|
fi
|
|
|
|
#lets check all the ceph-osd daemonsets
|
|
echo "checking DS"
|
|
check_ds
|
|
else
|
|
echo "No revisions found for upgrade"
|
|
fi
|