From 0cd5b027c985a8f6d3368ae0dc08b65f67f67fe0 Mon Sep 17 00:00:00 2001 From: Mark Goddard Date: Mon, 7 Jun 2021 12:56:36 +0100 Subject: [PATCH] Fix RabbitMQ restart ordering The host list order seen during Ansible handlers may differ to the usual play host list order, due to race conditions in notifying handlers. This means that restart_services.yml for RabbitMQ may be included in a different order than the rabbitmq group, resulting in a node other than the 'first' being restarted first. This can cause some nodes to fail to join the cluster. The include_tasks loop was introduced in [1]. This change fixes the issue by splitting the handler into two tasks, and restarting the first node before all others. [1] https://review.opendev.org/c/openstack/kolla-ansible/+/763137 Change-Id: I1823301d5889589bfd48326ed7de03c6061ea5ba Closes-Bug: #1930293 --- ansible/roles/rabbitmq/handlers/main.yml | 18 +++++++++++++++++- .../notes/bug-1930293-d8a524f2070e6779.yaml | 5 +++++ 2 files changed, 22 insertions(+), 1 deletion(-) create mode 100644 releasenotes/notes/bug-1930293-d8a524f2070e6779.yaml diff --git a/ansible/roles/rabbitmq/handlers/main.yml b/ansible/roles/rabbitmq/handlers/main.yml index f3b78de856..cd5e39eb57 100644 --- a/ansible/roles/rabbitmq/handlers/main.yml +++ b/ansible/roles/rabbitmq/handlers/main.yml @@ -1,5 +1,19 @@ --- -- name: Restart rabbitmq container +# NOTE(mgoddard): These tasks perform a 'full stop upgrade', which is necessary when moving between +# major releases. In future kolla-ansible releases we may be able to change this to a rolling +# restart. For info on this process see https://www.rabbitmq.com/upgrade.html + +- name: Restart first rabbitmq container + vars: + service_name: "rabbitmq" + service: "{{ rabbitmq_services[service_name] }}" + include_tasks: 'restart_services.yml' + when: + - kolla_action != "config" + - inventory_hostname == groups[service.group] | first + listen: Restart rabbitmq container + +- name: Restart remaining rabbitmq containers vars: service_name: "rabbitmq" service: "{{ rabbitmq_services[service_name] }}" @@ -7,4 +21,6 @@ when: - kolla_action != "config" - inventory_hostname == item + - inventory_hostname != groups[service.group] | first loop: "{{ groups[service.group] }}" + listen: Restart rabbitmq container diff --git a/releasenotes/notes/bug-1930293-d8a524f2070e6779.yaml b/releasenotes/notes/bug-1930293-d8a524f2070e6779.yaml new file mode 100644 index 0000000000..f16c156556 --- /dev/null +++ b/releasenotes/notes/bug-1930293-d8a524f2070e6779.yaml @@ -0,0 +1,5 @@ +--- +fixes: + - | + Fixes more-than-2-node RabbitMQ upgrade failing randomly. + `LP#1930293 `__.