RabbitMQ: Make cluster formation more robust

This PS updates the rabbitmq chart to make cluster formation
more robust, with the previous implementation it was possible
to form multiple descrete clusters within a single deployment
of the chart (eg if a network partition existed during formation)

Change-Id: Ie241d29230419ff829d9fbb22fa1a01275926903
Signed-off-by: Pete Birley <pete@port.direct>
This commit is contained in:
Pete Birley 2019-03-20 20:29:12 -05:00 committed by Pete Birley
parent 2aea1e4fe8
commit b5e8c41d2e
5 changed files with 81 additions and 2 deletions

View File

@ -18,4 +18,8 @@ limitations under the License.
set -e
if [ -f /run/rabbit-disable-liveness-probe ]; then
exit 0
else
exec rabbitmqctl status
fi

View File

@ -0,0 +1,21 @@
#!/usr/bin/env bash
{{/*
Copyright 2017 The Openstack-Helm Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/}}
set -e
exec rabbitmqctl status

View File

@ -18,4 +18,45 @@ limitations under the License.
set -ex
function check_if_open () {
HOST=$1
PORT=$2
timeout 10 bash -c "true &>/dev/null </dev/tcp/${HOST}/${PORT}"
}
function check_rabbit_node_health () {
CLUSTER_SEED_NAME=$1
rabbitmq-diagnostics node_health_check -n "${CLUSTER_SEED_NAME}" -t 10 &>/dev/null
}
function check_rabbit_node_ready () {
TARGET_POD=$1
POD_NAME_PREFIX="$(echo "${MY_POD_NAME}" | awk 'BEGIN{FS=OFS="-"}{NF--; print}')"
CLUSTER_SEED_NAME="$(echo "${RABBITMQ_NODENAME}" | awk -F "@${MY_POD_NAME}." "{ print \$1 \"@${POD_NAME_PREFIX}-${TARGET_POD}.\" \$2 }")"
CLUSTER_SEED_HOST="$(echo "${CLUSTER_SEED_NAME}" | awk -F '@' '{ print $NF }')"
check_rabbit_node_health "${CLUSTER_SEED_NAME}" && \
check_if_open "${CLUSTER_SEED_HOST}" "${PORT_HTTP}" && \
check_if_open "${CLUSTER_SEED_HOST}" "${PORT_AMPQ}" && \
check_if_open "${CLUSTER_SEED_HOST}" "${PORT_CLUSTERING}"
}
POD_INCREMENT=$(echo "${MY_POD_NAME}" | awk -F '-' '{print $NF}')
if ! [ "${POD_INCREMENT}" -eq "0" ] && ! [ -d "/var/lib/rabbitmq/mnesia" ] ; then
echo 'This is not the 1st rabbit pod & has not been initialised'
# disable liveness probe as it may take some time for the pod to come online.
touch /run/rabbit-disable-liveness-probe
POD_NAME_PREFIX="$(echo "${MY_POD_NAME}" | awk 'BEGIN{FS=OFS="-"}{NF--; print}')"
for TARGET_POD in $(seq 0 +1 $((POD_INCREMENT - 1 ))); do
END=$(($(date +%s) + 900))
while ! check_rabbit_node_ready "${TARGET_POD}"; do
sleep 5
if [ "$(date +%s)" -gt "$END" ]; then
echo "RabbitMQ pod ${TARGET_POD} not ready in time"
exit 1
fi
done
done
rm -fv /run/rabbit-disable-liveness-probe
fi
exec rabbitmq-server

View File

@ -30,6 +30,8 @@ data:
{{ tuple "bin/_rabbitmq-test.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }}
rabbitmq-liveness.sh: |
{{ tuple "bin/_rabbitmq-liveness.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }}
rabbitmq-readiness.sh: |
{{ tuple "bin/_rabbitmq-readiness.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }}
rabbitmq-start.sh: |
{{ tuple "bin/_rabbitmq-start.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }}
rabbitmq-cookie.sh: |

View File

@ -67,6 +67,7 @@ metadata:
spec:
serviceName: {{ tuple "oslo_messaging" "discovery" . | include "helm-toolkit.endpoints.hostname_short_endpoint_lookup" }}
replicas: {{ $envAll.Values.pod.replicas.server }}
podManagementPolicy: "Parallel"
selector:
matchLabels:
{{ tuple $envAll "rabbitmq" "server" | include "helm-toolkit.snippets.kubernetes_metadata_labels" | indent 6 }}
@ -183,12 +184,18 @@ spec:
value: ".{{ tuple "oslo_messaging" "discovery" . | include "helm-toolkit.endpoints.hostname_fqdn_endpoint_lookup" }}"
- name: RABBITMQ_ERLANG_COOKIE
value: "{{ $envAll.Values.endpoints.oslo_messaging.auth.erlang_cookie }}"
- name: PORT_HTTP
value: "{{ tuple "oslo_messaging" "internal" "http" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}"
- name: PORT_AMPQ
value: "{{ tuple "oslo_messaging" "internal" "amqp" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}"
- name: PORT_CLUSTERING
value: "{{ add (tuple "oslo_messaging" "internal" "amqp" . | include "helm-toolkit.endpoints.endpoint_port_lookup") 20000 }}"
readinessProbe:
initialDelaySeconds: 10
timeoutSeconds: 10
exec:
command:
- /tmp/rabbitmq-liveness.sh
- /tmp/rabbitmq-readiness.sh
livenessProbe:
initialDelaySeconds: 30
timeoutSeconds: 10
@ -202,6 +209,10 @@ spec:
mountPath: /tmp/rabbitmq-start.sh
subPath: rabbitmq-start.sh
readOnly: true
- name: rabbitmq-bin
mountPath: /tmp/rabbitmq-readiness.sh
subPath: rabbitmq-readiness.sh
readOnly: true
- name: rabbitmq-bin
mountPath: /tmp/rabbitmq-liveness.sh
subPath: rabbitmq-liveness.sh