From af270934d44ab3f0eb2462cde7626eb2c6a1f967 Mon Sep 17 00:00:00 2001 From: Pete Birley Date: Thu, 18 Jul 2019 14:15:45 -0500 Subject: [PATCH] Rabbit: Eradicate potential crashes in wait job while upgrading cluster When upgrading/reconfiguring a rabbit cluster its possible that the nodes will not return the cluster status for some time, this ps allows us to cope with this much more gracefully than simply crashing a few times, before proceeding. Change-Id: Ibf525df9e3a9362282f70e5dbb136430734181fd Signed-off-by: Pete Birley --- rabbitmq/templates/bin/_rabbitmq-wait-for-cluster.sh.tpl | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/rabbitmq/templates/bin/_rabbitmq-wait-for-cluster.sh.tpl b/rabbitmq/templates/bin/_rabbitmq-wait-for-cluster.sh.tpl index 10fd86f67..21d7613fd 100644 --- a/rabbitmq/templates/bin/_rabbitmq-wait-for-cluster.sh.tpl +++ b/rabbitmq/templates/bin/_rabbitmq-wait-for-cluster.sh.tpl @@ -59,6 +59,10 @@ function sorted_node_list () { if test "$(active_rabbit_nodes)" -gt "$RABBIT_REPLICA_COUNT"; then echo "There are more nodes registed in the cluster than desired, pruning the cluster" PRIMARY_NODE="$(sorted_node_list | awk '{ print $1; exit }')" + until rabbitmqctl -l -n "${PRIMARY_NODE}" cluster_status >/dev/null 2>&1 ; do + echo "Waiting for primary node to return cluster status" + sleep 10 + done echo "Current cluster:" rabbitmqctl -l -n "${PRIMARY_NODE}" cluster_status NODES_TO_REMOVE="$(sorted_node_list | awk "{print substr(\$0, index(\$0,\$$((RABBIT_REPLICA_COUNT+1))))}")"