Fixes for MariaDB bootstrap and recovery

* Fix wsrep sequence number detection. Log message format is
  'WSREP: Recovered position: <UUID>:<seqno>' but we were picking out
  the UUID rather than the sequence number. This is as good as random.

* Add become: true to log file reading and removal since
  I4a5ebcedaccb9261dbc958ec67e8077d7980e496 added become: true to the
  'docker cp' command which creates it.

* Don't run handlers during recovery. If the config files change we
  would end up restarting the cluster twice.

* Wait for wsrep recovery container completion (don't detach). This
  avoids a potential race between wsrep recovery and the subsequent
  'stop_container'.

* Finally, we now wait for the bootstrap host to report that it is in
  an OPERATIONAL state. Without this we can see errors where the
  MariaDB cluster is not ready when used by other services.

Change-Id: Iaf7862be1affab390f811fc485fd0eb6879fd583
Closes-Bug: #1834467
This commit is contained in:
Mark Goddard 2019-06-27 12:17:17 +01:00
parent 54856a873f
commit 86f373a198
2 changed files with 51 additions and 8 deletions

View File

@ -42,6 +42,24 @@
- bootstrap_host == inventory_hostname
listen: Bootstrap MariaDB cluster
- name: Wait for MariaDB to become operational
become: true
command: >-
docker exec {{ mariadb_service.container_name }}
mysql -uroot -p{{ database_password }}
--silent --skip-column-names
-e 'SHOW STATUS LIKE "wsrep_evs_state"'
changed_when: false
register: result
until: '"OPERATIONAL" in result.stdout'
retries: 10
delay: 6
no_log: true
when:
- bootstrap_host is defined
- bootstrap_host == inventory_hostname
listen: Bootstrap MariaDB cluster
- name: restart slave mariadb
vars:
service_name: "mariadb"
@ -57,6 +75,7 @@
when:
- kolla_action != "config"
- inventory_hostname != master_host
- not mariadb_recover | default(false)
listen: restart mariadb
# TODO(jeffrey4l), remove the task check when the wait_for bug is fixed
@ -75,6 +94,7 @@
when:
- kolla_action != "config"
- inventory_hostname != master_host
- not mariadb_recover | default(false)
listen: restart mariadb
- name: run upgrade on slave
@ -103,6 +123,7 @@
when:
- kolla_action == "upgrade"
- inventory_hostname != master_host
- not mariadb_recover | default(false)
listen: restart mariadb
- name: restart master mariadb
@ -120,6 +141,7 @@
when:
- kolla_action != "config"
- inventory_hostname == master_host
- not mariadb_recover | default(false)
listen: restart mariadb
# TODO(jeffrey4l), remove the task check when the wait_for bug is fixed
@ -138,6 +160,7 @@
when:
- kolla_action != "config"
- inventory_hostname == master_host
- not mariadb_recover | default(false)
listen: restart mariadb
- name: run upgrade on master
@ -166,4 +189,5 @@
when:
- kolla_action == "upgrade"
- inventory_hostname == master_host
- not mariadb_recover | default(false)
listen: restart mariadb

View File

@ -26,36 +26,37 @@
name: "{{ mariadb_service.container_name }}"
action: "stop_container"
# Run wsrep recovery with detach=false to block until completion. Use a
# different container name to avoid the mariadb container being removed.
- name: Run MariaDB wsrep recovery
become: true
kolla_docker:
action: "start_container"
common_options: "{{ docker_common_options }}"
detach: false
environment:
KOLLA_CONFIG_STRATEGY: "{{ config_strategy }}"
BOOTSTRAP_ARGS: "--wsrep-recover"
image: "{{ mariadb_service.image }}"
labels:
BOOTSTRAP:
name: "{{ mariadb_service.container_name }}"
name: mariadb_wsrep_recovery
restart_policy: "never"
volumes: "{{ mariadb_service.volumes }}"
- name: Stop MariaDB containers
become: true
kolla_docker:
name: "{{ mariadb_service.container_name }}"
action: "stop_container"
- name: Copying MariaDB log file to /tmp
become: true
shell: "docker cp {{ mariadb_service.container_name }}:/var/log/kolla/mariadb/mariadb.log /tmp/mariadb_tmp.log"
# Look for sequence number in logs. Format is:
# WSREP: Recovered position: <UUID>:<seqno>.
- name: Get MariaDB wsrep recovery seqno
shell: tail -n 200 /tmp/mariadb_tmp.log | awk -F" " '$0~/Recovered position/{print $NF;exit;}' | awk -F":" '{print $1}'
become: true
shell: tail -n 200 /tmp/mariadb_tmp.log | awk -F" " '$0~/Recovered position/{print $NF;exit;}' | awk -F":" '{print $2}'
register: wsrep_recovery_seqno
- name: Removing MariaDB log file from /tmp
become: true
file:
path: /tmp/mariadb_tmp.log
state: absent
@ -104,6 +105,7 @@
- bootstrap_host == inventory_hostname
- name: Set grastate.dat file from MariaDB container in bootstrap host
become: true
lineinfile:
dest: /tmp/kolla_mariadb_grastate.dat
regexp: 'safe_to_bootstrap:(.*)$'
@ -162,6 +164,23 @@
- bootstrap_host is defined
- bootstrap_host == inventory_hostname
- name: Wait for MariaDB to become operational
become: true
command: >-
docker exec {{ mariadb_service.container_name }}
mysql -uroot -p{{ database_password }}
--silent --skip-column-names
-e 'SHOW STATUS LIKE "wsrep_evs_state"'
changed_when: false
register: result
until: '"OPERATIONAL" in result.stdout'
retries: 10
delay: 6
no_log: true
when:
- bootstrap_host is defined
- bootstrap_host == inventory_hostname
- name: Restart slave MariaDB container
become: true
kolla_docker: