RabbitMQ: Make cluster formation more robust
This PS updates the rabbitmq chart to make cluster formation more robust, with the previous implementation it was possible to form multiple descrete clusters within a single deployment of the chart (eg if a network partition existed during formation) Change-Id: Ie241d29230419ff829d9fbb22fa1a01275926903 Signed-off-by: Pete Birley <pete@port.direct>
This commit is contained in:
parent
2aea1e4fe8
commit
b5e8c41d2e
@ -18,4 +18,8 @@ limitations under the License.
|
||||
|
||||
set -e
|
||||
|
||||
exec rabbitmqctl status
|
||||
if [ -f /run/rabbit-disable-liveness-probe ]; then
|
||||
exit 0
|
||||
else
|
||||
exec rabbitmqctl status
|
||||
fi
|
||||
|
21
rabbitmq/templates/bin/_rabbitmq-readiness.sh.tpl
Normal file
21
rabbitmq/templates/bin/_rabbitmq-readiness.sh.tpl
Normal file
@ -0,0 +1,21 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
{{/*
|
||||
Copyright 2017 The Openstack-Helm Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/}}
|
||||
|
||||
set -e
|
||||
|
||||
exec rabbitmqctl status
|
@ -18,4 +18,45 @@ limitations under the License.
|
||||
|
||||
set -ex
|
||||
|
||||
function check_if_open () {
|
||||
HOST=$1
|
||||
PORT=$2
|
||||
timeout 10 bash -c "true &>/dev/null </dev/tcp/${HOST}/${PORT}"
|
||||
}
|
||||
|
||||
function check_rabbit_node_health () {
|
||||
CLUSTER_SEED_NAME=$1
|
||||
rabbitmq-diagnostics node_health_check -n "${CLUSTER_SEED_NAME}" -t 10 &>/dev/null
|
||||
}
|
||||
|
||||
function check_rabbit_node_ready () {
|
||||
TARGET_POD=$1
|
||||
POD_NAME_PREFIX="$(echo "${MY_POD_NAME}" | awk 'BEGIN{FS=OFS="-"}{NF--; print}')"
|
||||
CLUSTER_SEED_NAME="$(echo "${RABBITMQ_NODENAME}" | awk -F "@${MY_POD_NAME}." "{ print \$1 \"@${POD_NAME_PREFIX}-${TARGET_POD}.\" \$2 }")"
|
||||
CLUSTER_SEED_HOST="$(echo "${CLUSTER_SEED_NAME}" | awk -F '@' '{ print $NF }')"
|
||||
check_rabbit_node_health "${CLUSTER_SEED_NAME}" && \
|
||||
check_if_open "${CLUSTER_SEED_HOST}" "${PORT_HTTP}" && \
|
||||
check_if_open "${CLUSTER_SEED_HOST}" "${PORT_AMPQ}" && \
|
||||
check_if_open "${CLUSTER_SEED_HOST}" "${PORT_CLUSTERING}"
|
||||
}
|
||||
|
||||
POD_INCREMENT=$(echo "${MY_POD_NAME}" | awk -F '-' '{print $NF}')
|
||||
if ! [ "${POD_INCREMENT}" -eq "0" ] && ! [ -d "/var/lib/rabbitmq/mnesia" ] ; then
|
||||
echo 'This is not the 1st rabbit pod & has not been initialised'
|
||||
# disable liveness probe as it may take some time for the pod to come online.
|
||||
touch /run/rabbit-disable-liveness-probe
|
||||
POD_NAME_PREFIX="$(echo "${MY_POD_NAME}" | awk 'BEGIN{FS=OFS="-"}{NF--; print}')"
|
||||
for TARGET_POD in $(seq 0 +1 $((POD_INCREMENT - 1 ))); do
|
||||
END=$(($(date +%s) + 900))
|
||||
while ! check_rabbit_node_ready "${TARGET_POD}"; do
|
||||
sleep 5
|
||||
if [ "$(date +%s)" -gt "$END" ]; then
|
||||
echo "RabbitMQ pod ${TARGET_POD} not ready in time"
|
||||
exit 1
|
||||
fi
|
||||
done
|
||||
done
|
||||
rm -fv /run/rabbit-disable-liveness-probe
|
||||
fi
|
||||
|
||||
exec rabbitmq-server
|
||||
|
@ -30,6 +30,8 @@ data:
|
||||
{{ tuple "bin/_rabbitmq-test.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }}
|
||||
rabbitmq-liveness.sh: |
|
||||
{{ tuple "bin/_rabbitmq-liveness.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }}
|
||||
rabbitmq-readiness.sh: |
|
||||
{{ tuple "bin/_rabbitmq-readiness.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }}
|
||||
rabbitmq-start.sh: |
|
||||
{{ tuple "bin/_rabbitmq-start.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }}
|
||||
rabbitmq-cookie.sh: |
|
||||
|
@ -67,6 +67,7 @@ metadata:
|
||||
spec:
|
||||
serviceName: {{ tuple "oslo_messaging" "discovery" . | include "helm-toolkit.endpoints.hostname_short_endpoint_lookup" }}
|
||||
replicas: {{ $envAll.Values.pod.replicas.server }}
|
||||
podManagementPolicy: "Parallel"
|
||||
selector:
|
||||
matchLabels:
|
||||
{{ tuple $envAll "rabbitmq" "server" | include "helm-toolkit.snippets.kubernetes_metadata_labels" | indent 6 }}
|
||||
@ -183,12 +184,18 @@ spec:
|
||||
value: ".{{ tuple "oslo_messaging" "discovery" . | include "helm-toolkit.endpoints.hostname_fqdn_endpoint_lookup" }}"
|
||||
- name: RABBITMQ_ERLANG_COOKIE
|
||||
value: "{{ $envAll.Values.endpoints.oslo_messaging.auth.erlang_cookie }}"
|
||||
- name: PORT_HTTP
|
||||
value: "{{ tuple "oslo_messaging" "internal" "http" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}"
|
||||
- name: PORT_AMPQ
|
||||
value: "{{ tuple "oslo_messaging" "internal" "amqp" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}"
|
||||
- name: PORT_CLUSTERING
|
||||
value: "{{ add (tuple "oslo_messaging" "internal" "amqp" . | include "helm-toolkit.endpoints.endpoint_port_lookup") 20000 }}"
|
||||
readinessProbe:
|
||||
initialDelaySeconds: 10
|
||||
timeoutSeconds: 10
|
||||
exec:
|
||||
command:
|
||||
- /tmp/rabbitmq-liveness.sh
|
||||
- /tmp/rabbitmq-readiness.sh
|
||||
livenessProbe:
|
||||
initialDelaySeconds: 30
|
||||
timeoutSeconds: 10
|
||||
@ -202,6 +209,10 @@ spec:
|
||||
mountPath: /tmp/rabbitmq-start.sh
|
||||
subPath: rabbitmq-start.sh
|
||||
readOnly: true
|
||||
- name: rabbitmq-bin
|
||||
mountPath: /tmp/rabbitmq-readiness.sh
|
||||
subPath: rabbitmq-readiness.sh
|
||||
readOnly: true
|
||||
- name: rabbitmq-bin
|
||||
mountPath: /tmp/rabbitmq-liveness.sh
|
||||
subPath: rabbitmq-liveness.sh
|
||||
|
Loading…
Reference in New Issue
Block a user