Coordinate haproxy and keepalived restarts
Keepalived and haproxy cooperate to provide control plane HA in kolla-ansible deployments. Certain care should be exerted to avoid prolonged availability loss during reconfigurations and upgrades. This patch aims to provide this care. There is nothing special about keepalived upgrade compared to reconfig, hence it is simplified to run the same code as for deploy. The broken logic of safe upgrade is replaced by common handler code which's goal is to ensure we down current master only after we have backups ready. This change introduces a switch to kolla_docker module that allows to ignore missing containers (as they are logically stopped). ignore_missing is the switch's name. All tests are included. Change-Id: I22ddec5f7ee4a7d3d502649a158a7e005fe29c48
This commit is contained in:
parent
4c4ad2b87b
commit
c2d0bf30ea
@ -936,8 +936,10 @@ class DockerWorker(object):
|
||||
graceful_timeout = 10
|
||||
container = self.check_container()
|
||||
if not container:
|
||||
self.module.fail_json(
|
||||
msg="No such container: {} to stop".format(name))
|
||||
ignore_missing = self.params.get('ignore_missing')
|
||||
if not ignore_missing:
|
||||
self.module.fail_json(
|
||||
msg="No such container: {} to stop".format(name))
|
||||
elif not container['Status'].startswith('Exited '):
|
||||
self.changed = True
|
||||
self.dc.stop(name, timeout=graceful_timeout)
|
||||
@ -1069,6 +1071,7 @@ def generate_module():
|
||||
dimensions=dict(required=False, type='dict', default=dict()),
|
||||
tty=dict(required=False, type='bool', default=False),
|
||||
client_timeout=dict(required=False, type='int', default=120),
|
||||
ignore_missing=dict(required=False, type='bool', default=False),
|
||||
)
|
||||
required_if = [
|
||||
['action', 'pull_image', ['image']],
|
||||
|
@ -1,5 +1,51 @@
|
||||
---
|
||||
- name: Restart haproxy container
|
||||
# NOTE(yoctozepto): this handler dance is to ensure we delay restarting master
|
||||
# keepalived and haproxy which control VIP address until we have working backups.
|
||||
# This could be improved by checking if backup keepalived do not report FAULT state.
|
||||
# Master node is handled specially to let it close down connections and only then
|
||||
# drop the VIP address by stopping keepalived service.
|
||||
|
||||
# NOTE(yoctozepto): we need fresh VIP address placement info (facts may be old)
|
||||
- name: Check IP addresses on the API interface
|
||||
vars:
|
||||
version: "{{ '6' if api_address_family == 'ipv6' else '4' }}"
|
||||
become: true
|
||||
command: ip -{{ version }} -o addr show dev {{ api_interface }}
|
||||
register: ip_addr_output
|
||||
changed_when: false
|
||||
when:
|
||||
- kolla_action != "config"
|
||||
listen:
|
||||
- Restart haproxy container
|
||||
- Restart keepalived container
|
||||
|
||||
- name: Group HA nodes by status
|
||||
vars:
|
||||
re_safe_address: "{{ kolla_internal_vip_address | regex_escape }}"
|
||||
group_by:
|
||||
key: kolla_ha_is_master_{{ ip_addr_output.stdout is regex('\b' + re_safe_address + '\b') }}
|
||||
when:
|
||||
- kolla_action != "config"
|
||||
listen:
|
||||
- Restart haproxy container
|
||||
- Restart keepalived container
|
||||
|
||||
- name: Stop backup keepalived container
|
||||
become: true
|
||||
kolla_docker:
|
||||
action: "stop_container"
|
||||
# NOTE(yoctozepto): backup node might not have keepalived yet - ignore
|
||||
ignore_missing: true
|
||||
common_options: "{{ docker_common_options }}"
|
||||
name: "keepalived"
|
||||
when:
|
||||
- kolla_action != "config"
|
||||
- groups.kolla_ha_is_master_False is defined
|
||||
- inventory_hostname in groups.kolla_ha_is_master_False
|
||||
listen:
|
||||
- Restart keepalived container
|
||||
|
||||
- name: Restart backup haproxy container
|
||||
vars:
|
||||
service_name: "haproxy"
|
||||
service: "{{ haproxy_services[service_name] }}"
|
||||
@ -14,12 +60,20 @@
|
||||
dimensions: "{{ service.dimensions }}"
|
||||
when:
|
||||
- kolla_action != "config"
|
||||
- inventory_hostname in groups[service.group]
|
||||
- service.enabled | bool
|
||||
- groups.kolla_ha_is_master_False is defined
|
||||
- inventory_hostname in groups.kolla_ha_is_master_False
|
||||
listen:
|
||||
- Restart haproxy container
|
||||
- Restart keepalived container
|
||||
notify:
|
||||
- Waiting for haproxy to start
|
||||
- Wait for backup haproxy to start
|
||||
|
||||
- name: Restart keepalived container
|
||||
- name: Wait for backup haproxy to start
|
||||
wait_for:
|
||||
host: "{{ api_interface_address }}"
|
||||
port: "{{ haproxy_monitor_port }}"
|
||||
|
||||
- name: Start backup keepalived container
|
||||
vars:
|
||||
service_name: "keepalived"
|
||||
service: "{{ haproxy_services[service_name] }}"
|
||||
@ -34,17 +88,92 @@
|
||||
dimensions: "{{ service.dimensions }}"
|
||||
when:
|
||||
- kolla_action != "config"
|
||||
- inventory_hostname in groups[service.group]
|
||||
- service.enabled | bool
|
||||
- groups.kolla_ha_is_master_False is defined
|
||||
- inventory_hostname in groups.kolla_ha_is_master_False
|
||||
listen:
|
||||
- Restart keepalived container
|
||||
notify:
|
||||
- Waiting for virtual IP to appear
|
||||
- Wait for virtual IP to appear
|
||||
|
||||
- name: Waiting for haproxy to start
|
||||
# NOTE(yoctozepto): This is to ensure haproxy can close any open connections
|
||||
# to the VIP address.
|
||||
- name: Stop master haproxy container
|
||||
become: true
|
||||
kolla_docker:
|
||||
action: "stop_container"
|
||||
common_options: "{{ docker_common_options }}"
|
||||
name: "haproxy"
|
||||
when:
|
||||
- kolla_action != "config"
|
||||
- groups.kolla_ha_is_master_True is defined
|
||||
- inventory_hostname in groups.kolla_ha_is_master_True
|
||||
listen:
|
||||
- Restart keepalived container
|
||||
|
||||
- name: Stop master keepalived container
|
||||
become: true
|
||||
kolla_docker:
|
||||
action: "stop_container"
|
||||
common_options: "{{ docker_common_options }}"
|
||||
name: "keepalived"
|
||||
when:
|
||||
- kolla_action != "config"
|
||||
- groups.kolla_ha_is_master_True is defined
|
||||
- inventory_hostname in groups.kolla_ha_is_master_True
|
||||
listen:
|
||||
- Restart keepalived container
|
||||
|
||||
- name: Start master haproxy container
|
||||
vars:
|
||||
service_name: "haproxy"
|
||||
service: "{{ haproxy_services[service_name] }}"
|
||||
become: true
|
||||
kolla_docker:
|
||||
action: "recreate_or_restart_container"
|
||||
common_options: "{{ docker_common_options }}"
|
||||
name: "{{ service.container_name }}"
|
||||
image: "{{ service.image }}"
|
||||
privileged: "{{ service.privileged | default(False) }}"
|
||||
volumes: "{{ service.volumes }}"
|
||||
dimensions: "{{ service.dimensions }}"
|
||||
when:
|
||||
- kolla_action != "config"
|
||||
- groups.kolla_ha_is_master_True is defined
|
||||
- inventory_hostname in groups.kolla_ha_is_master_True
|
||||
listen:
|
||||
- Restart haproxy container
|
||||
- Restart keepalived container
|
||||
notify:
|
||||
- Wait for master haproxy to start
|
||||
|
||||
- name: Wait for master haproxy to start
|
||||
wait_for:
|
||||
host: "{{ api_interface_address }}"
|
||||
port: "{{ haproxy_monitor_port }}"
|
||||
|
||||
- name: Waiting for virtual IP to appear
|
||||
- name: Start master keepalived container
|
||||
vars:
|
||||
service_name: "keepalived"
|
||||
service: "{{ haproxy_services[service_name] }}"
|
||||
become: true
|
||||
kolla_docker:
|
||||
action: "recreate_or_restart_container"
|
||||
common_options: "{{ docker_common_options }}"
|
||||
name: "{{ service.container_name }}"
|
||||
image: "{{ service.image }}"
|
||||
privileged: "{{ service.privileged | default(False) }}"
|
||||
volumes: "{{ service.volumes }}"
|
||||
dimensions: "{{ service.dimensions }}"
|
||||
when:
|
||||
- kolla_action != "config"
|
||||
- groups.kolla_ha_is_master_True is defined
|
||||
- inventory_hostname in groups.kolla_ha_is_master_True
|
||||
listen:
|
||||
- Restart keepalived container
|
||||
notify:
|
||||
- Wait for virtual IP to appear
|
||||
|
||||
- name: Wait for virtual IP to appear
|
||||
wait_for:
|
||||
host: "{{ kolla_internal_vip_address }}"
|
||||
port: "{{ haproxy_monitor_port }}"
|
||||
|
@ -1,5 +1,5 @@
|
||||
---
|
||||
- name: Deploy haproxy containers
|
||||
- name: Check haproxy containers
|
||||
become: true
|
||||
kolla_docker:
|
||||
action: "compare_container"
|
||||
|
@ -1,22 +1,2 @@
|
||||
---
|
||||
- import_tasks: config-host.yml
|
||||
|
||||
- import_tasks: config.yml
|
||||
|
||||
- name: Stopping all slave keepalived containers
|
||||
vars:
|
||||
key: "{{ 'ipv6' if api_address_family == 'ipv6' else 'ipv4_secondaries' }}"
|
||||
addresses: "{{ hostvars[inventory_hostname]['ansible_' + api_interface].get(key, []) | map(attribute='address') | list }}"
|
||||
become: true
|
||||
kolla_docker:
|
||||
action: "stop_container"
|
||||
common_options: "{{ docker_common_options }}"
|
||||
name: "keepalived"
|
||||
when: kolla_internal_vip_address not in addresses
|
||||
notify:
|
||||
- Restart keepalived container
|
||||
|
||||
# NOTE(yoctozepto): haproxy role handlers should not be flushed early.
|
||||
# site.yml handles all haproxy things in a dedicated play.
|
||||
# This is to avoid extra haproxy service restart.
|
||||
# See: https://bugs.launchpad.net/kolla-ansible/+bug/1875228
|
||||
- import_tasks: deploy.yml
|
||||
|
@ -0,0 +1,5 @@
|
||||
---
|
||||
fixes:
|
||||
- |
|
||||
Makes haproxy and keepalived restarts during Kolla-Ansible actions more
|
||||
robust, especially in multinode scenarios (HA).
|
@ -94,6 +94,7 @@ class ModuleArgsTest(base.BaseTestCase):
|
||||
tty=dict(required=False, type='bool', default=False),
|
||||
client_timeout=dict(required=False, type='int', default=120),
|
||||
healthcheck=dict(required=False, type='dict'),
|
||||
ignore_missing=dict(required=False, type='bool', default=False),
|
||||
)
|
||||
required_if = [
|
||||
['action', 'pull_image', ['image']],
|
||||
@ -175,7 +176,15 @@ FAKE_DATA = {
|
||||
'Image': 'myregistrydomain.com:5000/ubuntu:16.04',
|
||||
'ImageID': 'sha256:c5f1cf30',
|
||||
'Labels': {},
|
||||
'Names': '/my_container'}
|
||||
'Names': '/my_container'},
|
||||
{'Created': 1463578195,
|
||||
'Status': 'Exited (0) 2 hours ago',
|
||||
'HostConfig': {'NetworkMode': 'default'},
|
||||
'Id': 'e40d8e7188',
|
||||
'Image': 'myregistrydomain.com:5000/ubuntu:16.04',
|
||||
'ImageID': 'sha256:c5f1cf30',
|
||||
'Labels': {},
|
||||
'Names': '/exited_container'},
|
||||
],
|
||||
|
||||
'container_inspect': {
|
||||
@ -396,6 +405,18 @@ class TestContainer(base.BaseTestCase):
|
||||
self.assertTrue(self.dw.changed)
|
||||
self.dw.dc.containers.assert_called_once_with(all=True)
|
||||
self.dw.dc.stop.assert_called_once_with('my_container', timeout=10)
|
||||
self.dw.module.fail_json.assert_not_called()
|
||||
|
||||
def test_stop_container_already_stopped(self):
|
||||
self.dw = get_DockerWorker({'name': 'exited_container',
|
||||
'action': 'stop_container'})
|
||||
self.dw.dc.containers.return_value = self.fake_data['containers']
|
||||
self.dw.stop_container()
|
||||
|
||||
self.assertFalse(self.dw.changed)
|
||||
self.dw.dc.containers.assert_called_once_with(all=True)
|
||||
self.dw.module.fail_json.assert_not_called()
|
||||
self.dw.dc.stop.assert_not_called()
|
||||
|
||||
def test_stop_container_not_exists(self):
|
||||
self.dw = get_DockerWorker({'name': 'fake_container',
|
||||
@ -405,9 +426,22 @@ class TestContainer(base.BaseTestCase):
|
||||
|
||||
self.assertFalse(self.dw.changed)
|
||||
self.dw.dc.containers.assert_called_once_with(all=True)
|
||||
self.dw.dc.stop.assert_not_called()
|
||||
self.dw.module.fail_json.assert_called_once_with(
|
||||
msg="No such container: fake_container to stop")
|
||||
|
||||
def test_stop_container_not_exists_ignore_missing(self):
|
||||
self.dw = get_DockerWorker({'name': 'fake_container',
|
||||
'action': 'stop_container',
|
||||
'ignore_missing': True})
|
||||
self.dw.dc.containers.return_value = self.fake_data['containers']
|
||||
self.dw.stop_container()
|
||||
|
||||
self.assertFalse(self.dw.changed)
|
||||
self.dw.dc.containers.assert_called_once_with(all=True)
|
||||
self.dw.dc.stop.assert_not_called()
|
||||
self.dw.module.fail_json.assert_not_called()
|
||||
|
||||
def test_stop_and_remove_container(self):
|
||||
self.dw = get_DockerWorker({'name': 'my_container',
|
||||
'action': 'stop_and_remove_container'})
|
||||
|
Loading…
Reference in New Issue
Block a user