Rabbit: Ensure node has joined cluster on initial startup
This PS extends the rabbit startup locgic to ensure nodes have actually joined the cluster on startup. Change-Id: Ib876d9abd89209d0a7972983bdf4daacf5f8f582 Signed-off-by: Pete Birley <pete@port.direct>
This commit is contained in:
parent
819cf51083
commit
9b5b901104
@ -18,4 +18,8 @@ limitations under the License.
|
|||||||
|
|
||||||
set -e
|
set -e
|
||||||
|
|
||||||
exec rabbitmqctl status
|
if [ -f /run/rabbit-disable-readiness ]; then
|
||||||
|
exit 1
|
||||||
|
else
|
||||||
|
exec rabbitmqctl status
|
||||||
|
fi
|
||||||
|
@ -29,10 +29,15 @@ function check_rabbit_node_health () {
|
|||||||
rabbitmq-diagnostics node_health_check -n "${CLUSTER_SEED_NAME}" -t 10 &>/dev/null
|
rabbitmq-diagnostics node_health_check -n "${CLUSTER_SEED_NAME}" -t 10 &>/dev/null
|
||||||
}
|
}
|
||||||
|
|
||||||
function check_rabbit_node_ready () {
|
get_node_name () {
|
||||||
TARGET_POD=$1
|
TARGET_POD=$1
|
||||||
POD_NAME_PREFIX="$(echo "${MY_POD_NAME}" | awk 'BEGIN{FS=OFS="-"}{NF--; print}')"
|
POD_NAME_PREFIX="$(echo "${MY_POD_NAME}" | awk 'BEGIN{FS=OFS="-"}{NF--; print}')"
|
||||||
CLUSTER_SEED_NAME="$(echo "${RABBITMQ_NODENAME}" | awk -F "@${MY_POD_NAME}." "{ print \$1 \"@${POD_NAME_PREFIX}-${TARGET_POD}.\" \$2 }")"
|
echo "${RABBITMQ_NODENAME}" | awk -F "@${MY_POD_NAME}." "{ print \$1 \"@${POD_NAME_PREFIX}-${TARGET_POD}.\" \$2 }"
|
||||||
|
}
|
||||||
|
|
||||||
|
function check_rabbit_node_ready () {
|
||||||
|
TARGET_POD=$1
|
||||||
|
CLUSTER_SEED_NAME="$(get_node_name ${TARGET_POD})"
|
||||||
CLUSTER_SEED_HOST="$(echo "${CLUSTER_SEED_NAME}" | awk -F '@' '{ print $NF }')"
|
CLUSTER_SEED_HOST="$(echo "${CLUSTER_SEED_NAME}" | awk -F '@' '{ print $NF }')"
|
||||||
check_rabbit_node_health "${CLUSTER_SEED_NAME}" && \
|
check_rabbit_node_health "${CLUSTER_SEED_NAME}" && \
|
||||||
check_if_open "${CLUSTER_SEED_HOST}" "${PORT_HTTP}" && \
|
check_if_open "${CLUSTER_SEED_HOST}" "${PORT_HTTP}" && \
|
||||||
@ -56,7 +61,39 @@ if ! [ "${POD_INCREMENT}" -eq "0" ] && ! [ -d "/var/lib/rabbitmq/mnesia" ] ; the
|
|||||||
fi
|
fi
|
||||||
done
|
done
|
||||||
done
|
done
|
||||||
rm -fv /run/rabbit-disable-liveness-probe
|
|
||||||
|
function reset_rabbit () {
|
||||||
|
rabbitmqctl shutdown || true
|
||||||
|
rm -rf /var/lib/rabbitmq/*
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
|
||||||
|
# Start RabbitMQ, but disable readiness from being reported so the pod is not
|
||||||
|
# marked as up prematurely.
|
||||||
|
touch /run/rabbit-disable-readiness
|
||||||
|
rabbitmq-server &
|
||||||
|
|
||||||
|
# Wait for server to start, and reset if it does not
|
||||||
|
END=$(($(date +%s) + 180))
|
||||||
|
while ! rabbitmqctl -q cluster_status; do
|
||||||
|
sleep 5
|
||||||
|
NOW=$(date +%s)
|
||||||
|
[ $NOW -gt $END ] && reset_rabbit
|
||||||
|
done
|
||||||
|
|
||||||
|
# Wait for server to join cluster, reset if it does not
|
||||||
|
POD_INCREMENT=$(echo "${MY_POD_NAME}" | awk -F '-' '{print $NF}')
|
||||||
|
END=$(($(date +%s) + 180))
|
||||||
|
while ! rabbitmqctl -l --node $(get_node_name 0) -q cluster_status | grep -q "$(get_node_name ${POD_INCREMENT})"; do
|
||||||
|
sleep 5
|
||||||
|
NOW=$(date +%s)
|
||||||
|
[ $NOW -gt $END ] && reset_rabbit
|
||||||
|
done
|
||||||
|
|
||||||
|
# Shutdown the inital server
|
||||||
|
rabbitmqctl shutdown
|
||||||
|
|
||||||
|
rm -fv /run/rabbit-disable-readiness /run/rabbit-disable-liveness-probe
|
||||||
fi
|
fi
|
||||||
|
|
||||||
exec rabbitmq-server
|
exec rabbitmq-server
|
||||||
|
@ -16,7 +16,7 @@ See the License for the specific language governing permissions and
|
|||||||
limitations under the License.
|
limitations under the License.
|
||||||
*/}}
|
*/}}
|
||||||
|
|
||||||
set -e
|
set -ex
|
||||||
|
|
||||||
# Extract connection details
|
# Extract connection details
|
||||||
RABBIT_HOSTNAME=`echo $RABBITMQ_ADMIN_CONNECTION | awk -F'[@]' '{print $2}' \
|
RABBIT_HOSTNAME=`echo $RABBITMQ_ADMIN_CONNECTION | awk -F'[@]' '{print $2}' \
|
||||||
@ -24,22 +24,30 @@ RABBIT_HOSTNAME=`echo $RABBITMQ_ADMIN_CONNECTION | awk -F'[@]' '{print $2}' \
|
|||||||
RABBIT_PORT=`echo $RABBITMQ_ADMIN_CONNECTION | awk -F'[@]' '{print $2}' \
|
RABBIT_PORT=`echo $RABBITMQ_ADMIN_CONNECTION | awk -F'[@]' '{print $2}' \
|
||||||
| awk -F'[:/]' '{print $2}'`
|
| awk -F'[:/]' '{print $2}'`
|
||||||
|
|
||||||
|
set +x
|
||||||
# Extract Admin User creadential
|
# Extract Admin User creadential
|
||||||
RABBITMQ_ADMIN_USERNAME=`echo $RABBITMQ_ADMIN_CONNECTION | awk -F'[@]' '{print $1}' \
|
RABBITMQ_ADMIN_USERNAME=`echo $RABBITMQ_ADMIN_CONNECTION | awk -F'[@]' '{print $1}' \
|
||||||
| awk -F'[//:]' '{print $4}'`
|
| awk -F'[//:]' '{print $4}'`
|
||||||
RABBITMQ_ADMIN_PASSWORD=`echo $RABBITMQ_ADMIN_CONNECTION | awk -F'[@]' '{print $1}' \
|
RABBITMQ_ADMIN_PASSWORD=`echo $RABBITMQ_ADMIN_CONNECTION | awk -F'[@]' '{print $1}' \
|
||||||
| awk -F'[//:]' '{print $5}'`
|
| awk -F'[//:]' '{print $5}'`
|
||||||
|
set -x
|
||||||
|
|
||||||
function rabbit_check_node_count () {
|
function rabbitmqadmin_authed () {
|
||||||
echo "Checking node count "
|
set +x
|
||||||
NODES_IN_CLUSTER=$(rabbitmqadmin \
|
rabbitmqadmin \
|
||||||
--host="${RABBIT_HOSTNAME}" \
|
--host="${RABBIT_HOSTNAME}" \
|
||||||
--port="${RABBIT_PORT}" \
|
--port="${RABBIT_PORT}" \
|
||||||
--username="${RABBITMQ_ADMIN_USERNAME}" \
|
--username="${RABBITMQ_ADMIN_USERNAME}" \
|
||||||
--password="${RABBITMQ_ADMIN_PASSWORD}" \
|
--password="${RABBITMQ_ADMIN_PASSWORD}" \
|
||||||
list nodes -f bash | wc -w)
|
$@
|
||||||
|
set -x
|
||||||
|
}
|
||||||
|
|
||||||
|
function rabbit_check_node_count () {
|
||||||
|
echo "Checking node count "
|
||||||
|
NODES_IN_CLUSTER=$(rabbitmqadmin_authed list nodes -f bash | wc -w)
|
||||||
if [ "$NODES_IN_CLUSTER" -eq "$RABBIT_REPLICA_COUNT" ]; then
|
if [ "$NODES_IN_CLUSTER" -eq "$RABBIT_REPLICA_COUNT" ]; then
|
||||||
echo "Number of nodes in cluster match number of desired pods ($NODES_IN_CLUSTER)"
|
echo "Number of nodes in cluster ($NODES_IN_CLUSTER) match number of desired pods ($NODES_IN_CLUSTER)"
|
||||||
else
|
else
|
||||||
echo "Number of nodes in cluster ($NODES_IN_CLUSTER) does not match number of desired pods ($RABBIT_REPLICA_COUNT)"
|
echo "Number of nodes in cluster ($NODES_IN_CLUSTER) does not match number of desired pods ($RABBIT_REPLICA_COUNT)"
|
||||||
exit 1
|
exit 1
|
||||||
@ -49,13 +57,9 @@ function rabbit_check_node_count () {
|
|||||||
rabbit_check_node_count
|
rabbit_check_node_count
|
||||||
|
|
||||||
function rabbit_find_partitions () {
|
function rabbit_find_partitions () {
|
||||||
rabbitmqadmin \
|
NODE_INFO=$(mktemp)
|
||||||
--host="${RABBIT_HOSTNAME}" \
|
rabbitmqadmin_authed list nodes -f pretty_json | tee "${NODE_INFO}"
|
||||||
--port="${RABBIT_PORT}" \
|
cat "${NODE_INFO}" | python -c "
|
||||||
--username="${RABBITMQ_ADMIN_USERNAME}" \
|
|
||||||
--password="${RABBITMQ_ADMIN_PASSWORD}" \
|
|
||||||
list nodes -f raw_json | \
|
|
||||||
python -c "
|
|
||||||
import json, sys, traceback
|
import json, sys, traceback
|
||||||
print('Checking cluster partitions')
|
print('Checking cluster partitions')
|
||||||
obj=json.load(sys.stdin)
|
obj=json.load(sys.stdin)
|
||||||
@ -66,31 +70,20 @@ for num, node in enumerate(obj):
|
|||||||
raise Exception('cluster partition found: %s' % partition)
|
raise Exception('cluster partition found: %s' % partition)
|
||||||
except KeyError:
|
except KeyError:
|
||||||
print('Error: partition key not found for node %s' % node)
|
print('Error: partition key not found for node %s' % node)
|
||||||
sys.exit(1)
|
|
||||||
print('No cluster partitions found')
|
print('No cluster partitions found')
|
||||||
"
|
"
|
||||||
|
rm -vf "${NODE_INFO}"
|
||||||
}
|
}
|
||||||
|
|
||||||
rabbit_find_partitions
|
rabbit_find_partitions
|
||||||
|
|
||||||
function rabbit_check_users_match () {
|
function rabbit_check_users_match () {
|
||||||
echo "Checking users match on all nodes"
|
echo "Checking users match on all nodes"
|
||||||
NODES=$(rabbitmqadmin \
|
NODES=$(rabbitmqadmin_authed list nodes -f bash)
|
||||||
--host="${RABBIT_HOSTNAME}" \
|
|
||||||
--port="${RABBIT_PORT}" \
|
|
||||||
--username="${RABBITMQ_ADMIN_USERNAME}" \
|
|
||||||
--password="${RABBITMQ_ADMIN_PASSWORD}" \
|
|
||||||
list nodes -f bash)
|
|
||||||
USER_LIST=$(mktemp --directory)
|
USER_LIST=$(mktemp --directory)
|
||||||
echo "Found the following nodes: ${NODES}"
|
echo "Found the following nodes: ${NODES}"
|
||||||
for NODE in ${NODES}; do
|
for NODE in ${NODES}; do
|
||||||
echo "Checking Node: ${NODE#*@}"
|
echo "Checking Node: ${NODE#*@}"
|
||||||
rabbitmqadmin \
|
rabbitmqadmin_authed list users -f bash > ${USER_LIST}/${NODE#*@}
|
||||||
--host=${NODE#*@} \
|
|
||||||
--port="${RABBIT_PORT}" \
|
|
||||||
--username="${RABBITMQ_ADMIN_USERNAME}" \
|
|
||||||
--password="${RABBITMQ_ADMIN_PASSWORD}" \
|
|
||||||
list users -f bash > ${USER_LIST}/${NODE#*@}
|
|
||||||
done
|
done
|
||||||
cd ${USER_LIST}; diff -q --from-file $(ls ${USER_LIST})
|
cd ${USER_LIST}; diff -q --from-file $(ls ${USER_LIST})
|
||||||
echo "User lists match for all nodes"
|
echo "User lists match for all nodes"
|
||||||
|
@ -30,13 +30,21 @@ RABBITMQ_ADMIN_USERNAME=`echo $RABBITMQ_ADMIN_CONNECTION | awk -F'[@]' '{print $
|
|||||||
RABBITMQ_ADMIN_PASSWORD=`echo $RABBITMQ_ADMIN_CONNECTION | awk -F'[@]' '{print $1}' \
|
RABBITMQ_ADMIN_PASSWORD=`echo $RABBITMQ_ADMIN_CONNECTION | awk -F'[@]' '{print $1}' \
|
||||||
| awk -F'[//:]' '{print $5}'`
|
| awk -F'[//:]' '{print $5}'`
|
||||||
|
|
||||||
function active_rabbit_nodes () {
|
set -ex
|
||||||
|
|
||||||
|
function rabbitmqadmin_authed () {
|
||||||
|
set +x
|
||||||
rabbitmqadmin \
|
rabbitmqadmin \
|
||||||
--host="${RABBIT_HOSTNAME}" \
|
--host="${RABBIT_HOSTNAME}" \
|
||||||
--port="${RABBIT_PORT}" \
|
--port="${RABBIT_PORT}" \
|
||||||
--username="${RABBITMQ_ADMIN_USERNAME}" \
|
--username="${RABBITMQ_ADMIN_USERNAME}" \
|
||||||
--password="${RABBITMQ_ADMIN_PASSWORD}" \
|
--password="${RABBITMQ_ADMIN_PASSWORD}" \
|
||||||
list nodes -f bash | wc -w
|
$@
|
||||||
|
set -x
|
||||||
|
}
|
||||||
|
|
||||||
|
function active_rabbit_nodes () {
|
||||||
|
rabbitmqadmin_authed list nodes -f bash | wc -w
|
||||||
}
|
}
|
||||||
|
|
||||||
until test "$(active_rabbit_nodes)" -ge "$RABBIT_REPLICA_COUNT"; do
|
until test "$(active_rabbit_nodes)" -ge "$RABBIT_REPLICA_COUNT"; do
|
||||||
|
Loading…
x
Reference in New Issue
Block a user