Fix multiple issues with MariaDB handling
These affected both deploy (and reconfigure) and upgrade resulting in WSREP issues, failed deploys or need to recover the cluster. This patch makes sure k-a does not abruptly terminate nodes to break cluster. This is achieved by cleaner separation between stages (bootstrap, restart current, deploy new) and 3 phases for restarts (to keep the quorum). Upgrade actions, which operate on a healthy cluster, went to its section. Service restart was refactored. We no longer rely on the master/slave distinction as all nodes are masters in Galera. Closes-bug: #1857908 Closes-bug: #1859145 Change-Id: I83600c69141714fc412df0976f49019a857655f5
This commit is contained in:
parent
ac62b560ff
commit
9f14ad651a
@ -17,16 +17,10 @@
|
||||
restart_policy: no
|
||||
volumes: "{{ service.volumes }}"
|
||||
dimensions: "{{ service.dimensions }}"
|
||||
when:
|
||||
- bootstrap_host is defined
|
||||
- bootstrap_host == inventory_hostname
|
||||
listen: Bootstrap MariaDB cluster
|
||||
notify:
|
||||
- restart mariadb
|
||||
|
||||
# TODO(jeffrey4l), remove the task check when the wait_for bug is fixed
|
||||
# https://github.com/ansible/ansible-modules-core/issues/2788
|
||||
- name: wait first mariadb container
|
||||
# NOTE(yoctozepto): We have to loop this to avoid breaking on connection resets
|
||||
- name: Wait for first MariaDB service port liveness
|
||||
wait_for:
|
||||
host: "{{ api_interface_address }}"
|
||||
port: "{{ mariadb_port }}"
|
||||
@ -37,157 +31,60 @@
|
||||
until: check_mariadb_port is success
|
||||
retries: 10
|
||||
delay: 6
|
||||
when:
|
||||
- bootstrap_host is defined
|
||||
- bootstrap_host == inventory_hostname
|
||||
listen: Bootstrap MariaDB cluster
|
||||
|
||||
- name: Wait for MariaDB to become operational
|
||||
- name: Wait for first MariaDB service to sync WSREP
|
||||
become: true
|
||||
command: >-
|
||||
docker exec {{ mariadb_service.container_name }}
|
||||
mysql -uroot -p{{ database_password }}
|
||||
--silent --skip-column-names
|
||||
-e 'SHOW STATUS LIKE "wsrep_evs_state"'
|
||||
-e 'SHOW STATUS LIKE "wsrep_local_state_comment"'
|
||||
changed_when: false
|
||||
register: result
|
||||
until: '"OPERATIONAL" in result.stdout'
|
||||
until: result.stdout == "wsrep_local_state_comment\tSynced"
|
||||
retries: 10
|
||||
delay: 6
|
||||
no_log: true
|
||||
when:
|
||||
- bootstrap_host is defined
|
||||
- bootstrap_host == inventory_hostname
|
||||
listen: Bootstrap MariaDB cluster
|
||||
|
||||
- name: restart slave mariadb
|
||||
vars:
|
||||
service_name: "mariadb"
|
||||
service: "{{ mariadb_services[service_name] }}"
|
||||
- name: Creating haproxy mysql user
|
||||
become: true
|
||||
kolla_docker:
|
||||
action: "recreate_or_restart_container"
|
||||
common_options: "{{ docker_common_options }}"
|
||||
name: "{{ service.container_name }}"
|
||||
image: "{{ service.image }}"
|
||||
volumes: "{{ service.volumes }}"
|
||||
dimensions: "{{ service.dimensions }}"
|
||||
kolla_toolbox:
|
||||
module_name: mysql_user
|
||||
module_args:
|
||||
login_host: "{{ api_interface_address }}"
|
||||
login_port: "{{ mariadb_port }}"
|
||||
login_user: "{{ database_user }}"
|
||||
login_password: "{{ database_password }}"
|
||||
name: "haproxy"
|
||||
password: ""
|
||||
host: "%"
|
||||
priv: "*.*:USAGE"
|
||||
listen: Bootstrap MariaDB cluster
|
||||
|
||||
- name: Restart MariaDB on existing cluster members
|
||||
include_tasks: 'restart_services.yml'
|
||||
when:
|
||||
- groups.mariadb_port_alive_True is defined
|
||||
- inventory_hostname in groups.mariadb_port_alive_True
|
||||
- groups.mariadb_port_alive_True.index(inventory_hostname) % 3 == item
|
||||
- kolla_action != "config"
|
||||
- inventory_hostname != master_host
|
||||
- not mariadb_recover | default(false)
|
||||
listen: restart mariadb
|
||||
loop:
|
||||
- 0
|
||||
- 1
|
||||
- 2
|
||||
|
||||
# TODO(jeffrey4l), remove the task check when the wait_for bug is fixed
|
||||
# https://github.com/ansible/ansible-modules-core/issues/2788
|
||||
- name: wait for slave mariadb
|
||||
wait_for:
|
||||
host: "{{ api_interface_address }}"
|
||||
port: "{{ mariadb_port }}"
|
||||
connect_timeout: 1
|
||||
timeout: 60
|
||||
search_regex: "MariaDB"
|
||||
register: check_mariadb_port
|
||||
until: check_mariadb_port is success
|
||||
retries: 10
|
||||
delay: 6
|
||||
- name: Start MariaDB on new nodes
|
||||
include_tasks: 'restart_services.yml'
|
||||
when:
|
||||
- bootstrap_host is not defined or bootstrap_host != inventory_hostname
|
||||
- groups.mariadb_port_alive_False is defined
|
||||
- inventory_hostname in groups.mariadb_port_alive_False
|
||||
- kolla_action != "config"
|
||||
- inventory_hostname != master_host
|
||||
- not mariadb_recover | default(false)
|
||||
listen: restart mariadb
|
||||
|
||||
- name: run upgrade on slave
|
||||
vars:
|
||||
service_name: "mariadb"
|
||||
service: "{{ mariadb_services[service_name] }}"
|
||||
become: true
|
||||
kolla_docker:
|
||||
action: "start_container"
|
||||
common_options: "{{ docker_common_options }}"
|
||||
detach: False
|
||||
dimensions: "{{ service.dimensions }}"
|
||||
environment:
|
||||
KOLLA_UPGRADE:
|
||||
KOLLA_CONFIG_STRATEGY: "{{ config_strategy }}"
|
||||
DB_HOST: "{{ api_interface_address }}"
|
||||
DB_PORT: "{{ mariadb_port }}"
|
||||
DB_ROOT_PASSWORD: "{{ database_password }}"
|
||||
image: "{{ service.image }}"
|
||||
labels:
|
||||
UPGRADE:
|
||||
name: "upgrade_mariadb"
|
||||
restart_policy: no
|
||||
volumes: "{{ service.volumes }}"
|
||||
no_log: true
|
||||
when:
|
||||
- kolla_action == "upgrade"
|
||||
- inventory_hostname != master_host
|
||||
- not mariadb_recover | default(false)
|
||||
listen: restart mariadb
|
||||
|
||||
- name: restart master mariadb
|
||||
vars:
|
||||
service_name: "mariadb"
|
||||
service: "{{ mariadb_services[service_name] }}"
|
||||
become: true
|
||||
kolla_docker:
|
||||
action: "recreate_or_restart_container"
|
||||
common_options: "{{ docker_common_options }}"
|
||||
name: "{{ service.container_name }}"
|
||||
image: "{{ service.image }}"
|
||||
volumes: "{{ service.volumes }}"
|
||||
dimensions: "{{ service.dimensions }}"
|
||||
when:
|
||||
- kolla_action != "config"
|
||||
- inventory_hostname == master_host
|
||||
- not mariadb_recover | default(false)
|
||||
listen: restart mariadb
|
||||
|
||||
# TODO(jeffrey4l), remove the task check when the wait_for bug is fixed
|
||||
# https://github.com/ansible/ansible-modules-core/issues/2788
|
||||
- name: Waiting for master mariadb
|
||||
wait_for:
|
||||
host: "{{ api_interface_address }}"
|
||||
port: "{{ mariadb_port }}"
|
||||
connect_timeout: 1
|
||||
timeout: 60
|
||||
search_regex: "MariaDB"
|
||||
register: check_mariadb_port
|
||||
until: check_mariadb_port is success
|
||||
retries: 10
|
||||
delay: 6
|
||||
when:
|
||||
- kolla_action != "config"
|
||||
- inventory_hostname == master_host
|
||||
- not mariadb_recover | default(false)
|
||||
listen: restart mariadb
|
||||
|
||||
- name: run upgrade on master
|
||||
vars:
|
||||
service_name: "mariadb"
|
||||
service: "{{ mariadb_services[service_name] }}"
|
||||
become: true
|
||||
kolla_docker:
|
||||
action: "start_container"
|
||||
common_options: "{{ docker_common_options }}"
|
||||
detach: False
|
||||
dimensions: "{{ service.dimensions }}"
|
||||
environment:
|
||||
KOLLA_UPGRADE:
|
||||
KOLLA_CONFIG_STRATEGY: "{{ config_strategy }}"
|
||||
DB_HOST: "{{ api_interface_address }}"
|
||||
DB_PORT: "{{ mariadb_port }}"
|
||||
DB_ROOT_PASSWORD: "{{ database_password }}"
|
||||
image: "{{ service.image }}"
|
||||
labels:
|
||||
UPGRADE:
|
||||
name: "upgrade_mariadb"
|
||||
restart_policy: no
|
||||
volumes: "{{ service.volumes }}"
|
||||
no_log: true
|
||||
when:
|
||||
- kolla_action == "upgrade"
|
||||
- inventory_hostname == master_host
|
||||
- not mariadb_recover | default(false)
|
||||
listen: restart mariadb
|
||||
- name: Ensure MariaDB is running normally on bootstrap host
|
||||
include_tasks: 'restart_services.yml'
|
||||
listen: Bootstrap MariaDB cluster
|
||||
|
@ -1,13 +1,9 @@
|
||||
---
|
||||
- name: Set a fact about the master host
|
||||
set_fact:
|
||||
master_host: "{{ groups['mariadb'][0] }}"
|
||||
|
||||
- include_tasks: lookup_cluster.yml
|
||||
|
||||
- include_tasks: bootstrap_cluster.yml
|
||||
when:
|
||||
- not has_cluster | bool
|
||||
- not mariadb_cluster_exists
|
||||
- inventory_hostname == groups['mariadb'][0]
|
||||
|
||||
- include_tasks: recover_cluster.yml
|
||||
|
@ -1,6 +1,5 @@
|
||||
---
|
||||
- name: Set a fact about the master host
|
||||
set_fact:
|
||||
master_host: "{{ groups['mariadb'][0] }}"
|
||||
|
||||
- import_tasks: check-containers.yml
|
||||
|
||||
# NOTE(yoctozepto): handlers prerequisite
|
||||
- import_tasks: lookup_cluster.yml
|
||||
|
@ -1,25 +1,5 @@
|
||||
---
|
||||
- name: Cleaning up temp file on localhost
|
||||
file:
|
||||
path: /tmp/kolla_mariadb_cluster
|
||||
state: absent
|
||||
delegate_to: localhost
|
||||
changed_when: False
|
||||
check_mode: no
|
||||
run_once: True
|
||||
|
||||
# NOTE(mnasiadka): Due to the way that we are setting fact has_cluster - content needs to be ''
|
||||
- name: Creating temp file on localhost
|
||||
copy:
|
||||
content: ''
|
||||
dest: /tmp/kolla_mariadb_cluster
|
||||
mode: 0644
|
||||
delegate_to: localhost
|
||||
changed_when: False
|
||||
check_mode: no
|
||||
run_once: True
|
||||
|
||||
- name: Creating mariadb volume
|
||||
- name: Create MariaDB volume
|
||||
become: true
|
||||
kolla_docker:
|
||||
action: "create_volume"
|
||||
@ -27,25 +7,59 @@
|
||||
name: "mariadb"
|
||||
register: mariadb_volume
|
||||
|
||||
- name: Writing hostname of host with existing cluster files to temp file
|
||||
copy:
|
||||
content: "{{ ansible_hostname }}"
|
||||
dest: /tmp/kolla_mariadb_cluster
|
||||
mode: 0644
|
||||
delegate_to: localhost
|
||||
changed_when: False
|
||||
check_mode: no
|
||||
when: mariadb_volume is not changed
|
||||
- name: Divide hosts by their MariaDB volume availability
|
||||
group_by:
|
||||
key: mariadb_had_volume_{{ mariadb_volume is not changed }}
|
||||
|
||||
- name: Registering host from temp file
|
||||
- name: Establish whether the cluster has already existed
|
||||
set_fact:
|
||||
has_cluster: "{{ lookup('file', '/tmp/kolla_mariadb_cluster') | length > 0 }}"
|
||||
mariadb_cluster_exists: "{{ groups.mariadb_had_volume_True is defined }}"
|
||||
|
||||
- name: Cleaning up temp file on localhost
|
||||
file:
|
||||
path: /tmp/kolla_mariadb_cluster
|
||||
state: absent
|
||||
delegate_to: localhost
|
||||
changed_when: False
|
||||
check_mode: no
|
||||
run_once: True
|
||||
- block:
|
||||
- name: Check MariaDB service port liveness
|
||||
wait_for:
|
||||
host: "{{ api_interface_address }}"
|
||||
port: "{{ mariadb_port }}"
|
||||
connect_timeout: 1
|
||||
timeout: 10
|
||||
search_regex: "MariaDB"
|
||||
register: check_mariadb_port_liveness
|
||||
ignore_errors: yes
|
||||
|
||||
- name: Divide hosts by their MariaDB service port liveness
|
||||
group_by:
|
||||
key: mariadb_port_alive_{{ check_mariadb_port_liveness is success }}
|
||||
|
||||
- block:
|
||||
- name: Check MariaDB service WSREP sync status
|
||||
become: true
|
||||
command: >-
|
||||
docker exec {{ mariadb_service.container_name }}
|
||||
mysql -uroot -p{{ database_password }}
|
||||
--silent --skip-column-names
|
||||
-e 'SHOW STATUS LIKE "wsrep_local_state_comment"'
|
||||
changed_when: false
|
||||
register: check_mariadb_sync_status
|
||||
no_log: true
|
||||
|
||||
# NOTE(yoctozepto): this is extracted separately to properly escape
|
||||
# the TAB character which likes to go wrong due to interaction between
|
||||
# Python/Ansible/Jinja2/YAML, the way below works
|
||||
- name: Extract MariaDB service WSREP sync status
|
||||
set_fact:
|
||||
mariadb_sync_status: "{{ check_mariadb_sync_status.stdout.split('\t')[1] }}"
|
||||
|
||||
- name: Divide hosts by their MariaDB service WSREP sync status
|
||||
group_by:
|
||||
key: mariadb_sync_status_{{ mariadb_sync_status }}
|
||||
|
||||
- name: Fail when MariaDB service is not synced
|
||||
fail:
|
||||
msg: MariaDB service is not synced. Please wait for WSREP sync before proceeding.
|
||||
when:
|
||||
- groups.mariadb_sync_status_Synced is not defined or
|
||||
inventory_hostname not in groups.mariadb_sync_status_Synced
|
||||
when:
|
||||
- groups.mariadb_port_alive_True is defined
|
||||
- inventory_hostname in groups.mariadb_port_alive_True
|
||||
when: not mariadb_recover | default(False)
|
||||
|
@ -1,7 +1,7 @@
|
||||
---
|
||||
- fail:
|
||||
msg: "MariaDB cluster was not found. Is your inventory correct?"
|
||||
when: not has_cluster | bool
|
||||
when: not mariadb_cluster_exists
|
||||
|
||||
- name: Cleaning up temp file on mariadb hosts
|
||||
file:
|
||||
@ -97,8 +97,6 @@
|
||||
|
||||
- set_fact:
|
||||
bootstrap_host: "{{ mariadb_recover_inventory_name }}"
|
||||
master_host: "{{ mariadb_recover_inventory_name }}"
|
||||
changed_when: true
|
||||
|
||||
- name: Copying grastate.dat file from MariaDB container in bootstrap host
|
||||
become: true
|
||||
|
@ -1,19 +1,4 @@
|
||||
---
|
||||
- name: Creating haproxy mysql user
|
||||
become: true
|
||||
kolla_toolbox:
|
||||
module_name: mysql_user
|
||||
module_args:
|
||||
login_host: "{{ api_interface_address }}"
|
||||
login_port: "{{ mariadb_port }}"
|
||||
login_user: "{{ database_user }}"
|
||||
login_password: "{{ database_password }}"
|
||||
name: "haproxy"
|
||||
password: ""
|
||||
host: "%"
|
||||
priv: "*.*:USAGE"
|
||||
run_once: True
|
||||
|
||||
- import_tasks: wait_for_loadbalancer.yml
|
||||
|
||||
- name: Creating the Mariabackup database
|
||||
@ -65,7 +50,3 @@
|
||||
run_once: True
|
||||
when:
|
||||
- enable_mariabackup | bool
|
||||
|
||||
- name: Cleaning up facts
|
||||
set_fact:
|
||||
delegate_host: "bootstraped"
|
||||
|
46
ansible/roles/mariadb/tasks/restart_services.yml
Normal file
46
ansible/roles/mariadb/tasks/restart_services.yml
Normal file
@ -0,0 +1,46 @@
|
||||
---
|
||||
- name: Restart MariaDB container
|
||||
vars:
|
||||
service_name: "mariadb"
|
||||
service: "{{ mariadb_services[service_name] }}"
|
||||
become: true
|
||||
kolla_docker:
|
||||
action: "recreate_or_restart_container"
|
||||
common_options: "{{ docker_common_options }}"
|
||||
name: "{{ service.container_name }}"
|
||||
image: "{{ service.image }}"
|
||||
volumes: "{{ service.volumes }}"
|
||||
dimensions: "{{ service.dimensions }}"
|
||||
|
||||
# NOTE(yoctozepto): We have to loop this to avoid breaking on connection resets
|
||||
- name: Wait for MariaDB service port liveness
|
||||
wait_for:
|
||||
host: "{{ api_interface_address }}"
|
||||
port: "{{ mariadb_port }}"
|
||||
connect_timeout: 1
|
||||
timeout: 60
|
||||
search_regex: "MariaDB"
|
||||
register: check_mariadb_port
|
||||
until: check_mariadb_port is success
|
||||
retries: 10
|
||||
delay: 6
|
||||
|
||||
- name: Wait for MariaDB service to sync WSREP
|
||||
become: true
|
||||
command: >-
|
||||
docker exec {{ mariadb_service.container_name }}
|
||||
mysql -uroot -p{{ database_password }}
|
||||
--silent --skip-column-names
|
||||
-e 'SHOW STATUS LIKE "wsrep_local_state_comment"'
|
||||
changed_when: false
|
||||
register: result
|
||||
until: result.stdout == "wsrep_local_state_comment\tSynced"
|
||||
retries: 10
|
||||
delay: 6
|
||||
no_log: true
|
||||
when:
|
||||
# NOTE(yoctozepto): we don't want to wait for new nodes to fully sync
|
||||
# with an existing cluster as this could take time
|
||||
- not mariadb_cluster_exists or
|
||||
(groups.mariadb_port_alive_True is defined and
|
||||
inventory_hostname in groups.mariadb_port_alive_True)
|
@ -1,2 +1,26 @@
|
||||
---
|
||||
- include_tasks: deploy.yml
|
||||
|
||||
- name: Run upgrade in MariaDB container
|
||||
vars:
|
||||
service_name: "mariadb"
|
||||
service: "{{ mariadb_services[service_name] }}"
|
||||
become: true
|
||||
kolla_docker:
|
||||
action: "start_container"
|
||||
common_options: "{{ docker_common_options }}"
|
||||
detach: False
|
||||
dimensions: "{{ service.dimensions }}"
|
||||
environment:
|
||||
KOLLA_UPGRADE:
|
||||
KOLLA_CONFIG_STRATEGY: "{{ config_strategy }}"
|
||||
DB_HOST: "{{ api_interface_address }}"
|
||||
DB_PORT: "{{ mariadb_port }}"
|
||||
DB_ROOT_PASSWORD: "{{ database_password }}"
|
||||
image: "{{ service.image }}"
|
||||
labels:
|
||||
UPGRADE:
|
||||
name: "upgrade_mariadb"
|
||||
restart_policy: no
|
||||
volumes: "{{ service.volumes }}"
|
||||
no_log: true
|
||||
|
@ -0,0 +1,17 @@
|
||||
---
|
||||
fixes:
|
||||
- |
|
||||
Fixes MariaDB issues in multinode scenarios which affected
|
||||
deployment, reconfiguration, upgrade and Galera cluster resizing.
|
||||
They were usually manifested by WSREP issues in various places
|
||||
and could lead to need to recover the Galera cluster.
|
||||
Note these issues were due to how MariaDB was handled during
|
||||
Kolla Ansible runs and did not affect Galera cluster during normal
|
||||
operations unless MariaDB was later touched by Kolla Ansible.
|
||||
Users wishing to run actions on their Galera clusters using
|
||||
Kolla Ansible are strongly advised to update.
|
||||
For details please see the following Launchpad bug records:
|
||||
`bug 1857908
|
||||
<https://bugs.launchpad.net/kolla-ansible/+bug/1857908>`__ and
|
||||
`bug 1859145
|
||||
<https://bugs.launchpad.net/kolla-ansible/+bug/1859145>`__.
|
Loading…
x
Reference in New Issue
Block a user