From 94f3ce0c78998e29fcc034a9b0844f9d6d602807 Mon Sep 17 00:00:00 2001 From: John Garbutt Date: Fri, 17 Dec 2021 16:20:32 +0000 Subject: [PATCH] RabbitMQ: Support setting ha-promote-on-shutdown By default ha-promote-on-shutdown=when-synced. However we are seeing issues with RabbitMQ automatically recovering when nodes are restarted. https://www.rabbitmq.com/ha.html#cluster-shutdown Rather than waiting for operator interventions, it is better we allow recovery to happen, even if that means we may loose some messages. A few failed and timed out operations is better than a totaly broken cloud. This is achieved using ha-promote-on-shutdown=always. Note, when a node failure is detected, this is already the default behaviour from 3.7.5 onwards: https://www.rabbitmq.com/ha.html#promoting-unsynchronised-mirrors This patch adds the option to change the ha-promote-on-shutdown definition, using the flag `rabbitmq_ha_promote_on_shutdown`. This value is unset by default to avoid any unexpected changes to the RabbitMQ definitions.json file, as that would trigger an unexpected restart of RabbitMQ during the next deploy. Related-Bug: #1954925 Change-Id: I2146bda2c72ddac2c9923c6941b0596395fd9ab5 --- ansible/roles/rabbitmq/defaults/main.yml | 5 +++++ .../roles/rabbitmq/templates/definitions.json.j2 | 4 ++-- ...tmq-ha-promote-on-shutdown-9099c6643f2d0cce.yaml | 13 +++++++++++++ 3 files changed, 20 insertions(+), 2 deletions(-) create mode 100644 releasenotes/notes/rabbitmq-ha-promote-on-shutdown-9099c6643f2d0cce.yaml diff --git a/ansible/roles/rabbitmq/defaults/main.yml b/ansible/roles/rabbitmq/defaults/main.yml index 388369f58b..06aaf8a9c4 100644 --- a/ansible/roles/rabbitmq/defaults/main.yml +++ b/ansible/roles/rabbitmq/defaults/main.yml @@ -84,6 +84,11 @@ rabbitmq_server_additional_erl_args: "+S 2:2 +sbwt none +sbwtdcpu none +sbwtdio rabbitmq_tls_options: {} # To avoid split-brain rabbitmq_cluster_partition_handling: "pause_minority" +# For consistency use "when-synced", for availability use "always" +# The rabbitmq default for ha queues is "when-synced" +# More details see: +# https://www.rabbitmq.com/ha.html#promoting-unsynchronised-mirrors +rabbitmq_ha_promote_on_shutdown: rabbitmq_extra_config: {} #################### diff --git a/ansible/roles/rabbitmq/templates/definitions.json.j2 b/ansible/roles/rabbitmq/templates/definitions.json.j2 index 450a04df49..d04a0deabd 100644 --- a/ansible/roles/rabbitmq/templates/definitions.json.j2 +++ b/ansible/roles/rabbitmq/templates/definitions.json.j2 @@ -18,8 +18,8 @@ ], {% if om_enable_rabbitmq_high_availability | bool %} "policies":[ - {"vhost": "/", "name": "ha-all", "pattern": "^(?!(amq\\.)|(.*_fanout_)|(reply_)).*", "apply-to": "all", "definition": {"ha-mode":"all"}, "priority":0}{% if project_name == 'outward_rabbitmq' %}, - {"vhost": "{{ murano_agent_rabbitmq_vhost }}", "name": "ha-all", "pattern": "^(?!(amq\\.)|(.*_fanout_)|(reply_)).*", "apply-to": "all", "definition": {"ha-mode":"all"}, "priority":0} + {"vhost": "/", "name": "ha-all", "pattern": "^(?!(amq\\.)|(.*_fanout_)|(reply_)).*", "apply-to": "all", "definition": {"ha-mode":"all"{% if rabbitmq_ha_promote_on_shutdown is not none %},"ha-promote-on-shutdown":"{{ rabbitmq_ha_promote_on_shutdown }}"{% endif %}}, "priority":0}{% if project_name == 'outward_rabbitmq' %}, + {"vhost": "{{ murano_agent_rabbitmq_vhost }}", "name": "ha-all", "pattern": "^(?!(amq\\.)|(.*_fanout_)|(reply_)).*", "apply-to": "all", "definition": {"ha-mode":"all"{% if rabbitmq_ha_promote_on_shutdown is not none %},"ha-promote-on-shutdown":"{{ rabbitmq_ha_promote_on_shutdown }}"{% endif %}}, "priority":0} {% endif %} ] {% else %} diff --git a/releasenotes/notes/rabbitmq-ha-promote-on-shutdown-9099c6643f2d0cce.yaml b/releasenotes/notes/rabbitmq-ha-promote-on-shutdown-9099c6643f2d0cce.yaml new file mode 100644 index 0000000000..c97d3b68a7 --- /dev/null +++ b/releasenotes/notes/rabbitmq-ha-promote-on-shutdown-9099c6643f2d0cce.yaml @@ -0,0 +1,13 @@ +--- +features: + - | + The config option `rabbitmq_ha_promote_on_shutdown` has been added, which + allows changing the RabbitMQ definition `ha-promote-on-shutdown`. By + default `ha-promote-on-shutdown` is "when-synced". We recommend changing + this to be "always". This basically means we don't mind losing some + messages, instead we give priority to rabbitmq availability. This is most + relevant when restarting rabbitmq, such as when upgrading. Note that + setting the value of this flag, even to the default value of "when-synced", + will cause RabbitMQ to be restarted on the next deploy. + For more details please see: + https://www.rabbitmq.com/ha.html#cluster-shutdown