From a87810db7e5bccdc863dd5cb5158ca5193eb5fd3 Mon Sep 17 00:00:00 2001 From: Matt Crees Date: Tue, 7 Feb 2023 09:56:43 +0000 Subject: [PATCH] Set RabbitMQ ha-promote-on-shutdown=always Changes the default value of `rabbitmq-ha-promote-on-shutdown` to `"always"`. We are seeing issues with RabbitMQ automatically recovering when nodes are restarted. https://www.rabbitmq.com/ha.html#cluster-shutdown Rather than waiting for operator interventions, it is better we allow recovery to happen, even if that means we may loose some messages. A few failed and timed out operations is better than a totaly broken cloud. This is achieved using ha-promote-on-shutdown=always. Note, when a node failure is detected, this is already the default behaviour from 3.7.5 onwards: https://www.rabbitmq.com/ha.html#promoting-unsynchronised-mirrors Related-Bug: #1954925 Change-Id: I484a81163f703fa27112df22473d657e2a9ab964 --- ansible/roles/rabbitmq/defaults/main.yml | 2 +- ...a-promote-on-shutdown-always-e8db9ad15fd1b8fb.yaml | 11 +++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) create mode 100755 releasenotes/notes/rabbitmq-set-ha-promote-on-shutdown-always-e8db9ad15fd1b8fb.yaml diff --git a/ansible/roles/rabbitmq/defaults/main.yml b/ansible/roles/rabbitmq/defaults/main.yml index 4c059eadb0..656689a915 100644 --- a/ansible/roles/rabbitmq/defaults/main.yml +++ b/ansible/roles/rabbitmq/defaults/main.yml @@ -88,7 +88,7 @@ rabbitmq_cluster_partition_handling: "pause_minority" # The rabbitmq default for ha queues is "when-synced" # More details see: # https://www.rabbitmq.com/ha.html#promoting-unsynchronised-mirrors -rabbitmq_ha_promote_on_shutdown: +rabbitmq_ha_promote_on_shutdown: "always" # The number of rabbitmq replicas should follow this advice: # https://www.rabbitmq.com/ha.html#replication-factor # This means, if you have three rabbit nodes, we request two diff --git a/releasenotes/notes/rabbitmq-set-ha-promote-on-shutdown-always-e8db9ad15fd1b8fb.yaml b/releasenotes/notes/rabbitmq-set-ha-promote-on-shutdown-always-e8db9ad15fd1b8fb.yaml new file mode 100755 index 0000000000..71962b3493 --- /dev/null +++ b/releasenotes/notes/rabbitmq-set-ha-promote-on-shutdown-always-e8db9ad15fd1b8fb.yaml @@ -0,0 +1,11 @@ +--- +upgrade: + - | + The RabbitMQ variable `rabbitmq-ha-promote-on-shutdown` now defaults to + `"always"`. This only has an effect if + `om_enable_rabbitmq_high_availability` is set to `True`. When + `ha-promote-on-shutdown` is set to `always`, queue mirrors are promted on + shutdown even if they aren't fully synced. This means that value + availability over the risk of losing some messages. Note that the contents + of the RabbitMQ definitions.json are now changed, meaning RabbitMQ + containers will be restarted on next deploy/upgrade.