Improve fault tolerance of MariaDB startup
* Changed podManagementPolicy to parallel in order to allow recovery from secondary or third master After rebooting the whole cluster on purpose or after a power failure a primary node the cluster can synchronize with is required. This is usually done automatically by selecting the node with the highest transaction id. The current implementation of the stateful set starts the nodes in sequence, preventing the start of further nodes if the process terminates with error state. Because of this, the cluster may not come up if the first or second node are not in primary state. * Elects first node started in primary state as bootstrap source. * Display warnings and runs mysqld with wsrep-recover on crashed nodes * Introduces FORCE_RECOVERY argument for crash recovery In case the primary selection failed, the cluster bootstrap process must be manually initiated from the most advanced node (highest committed transaction id). This information is available from the grastate.dat file in case of a clean shutdown. On crashed nodes an InnoDB recovery is required to get the last committed transaction id. start.sh will handle both cases and gives instructions on how to recover the cluster on a hard failure. If FORCE_RECOVERY was set to the name of a POD (mariadb-0, mariadb-1, ...), the bootstrap process will be initiated from the specified node. DocImpact Closes-Bug: #1716461 Change-Id: I96a8cb52124f64920a7d9cf21a8924ede78ebf7b
This commit is contained in:
parent
3917369bda
commit
d78f8e0901
@ -19,7 +19,128 @@ set -xe
|
|||||||
|
|
||||||
# Bootstrap database
|
# Bootstrap database
|
||||||
CLUSTER_INIT_ARGS=""
|
CLUSTER_INIT_ARGS=""
|
||||||
if [ ! -d /var/lib/mysql/mysql ]; then
|
CLUSTER_CONFIG_PATH=/etc/mysql/conf.d/10-cluster-config.cnf
|
||||||
|
|
||||||
|
function exitWithManualRecovery() {
|
||||||
|
|
||||||
|
UUID=$(sed -e 's/^.*uuid:[\ ,\t]*//' -e 'tx' -e 'd' -e ':x' /var/lib/mysql/grastate.dat)
|
||||||
|
SEQNO=$(sed -e 's/^.*seqno:[\ ,\t]*//' -e 'tx' -e 'd' -e ':x' /var/lib/mysql/grastate.dat)
|
||||||
|
|
||||||
|
cat >/dev/stderr <<EOF
|
||||||
|
**********************************************************
|
||||||
|
* MANUAL RECOVERY ACTION REQUIRED *
|
||||||
|
**********************************************************
|
||||||
|
|
||||||
|
All cluster members are down and grastate.dat indicates that it's not
|
||||||
|
safe to start the cluster from this node. If you see this message on
|
||||||
|
all nodes, you have to do a manual recovery by following these steps:
|
||||||
|
|
||||||
|
a) Find the node with the highest WSREP seq#:
|
||||||
|
|
||||||
|
POD ${PODNAME} uuid: ${UUID} seq: ${SEQNO}
|
||||||
|
|
||||||
|
If you see uuid 00000000-0000-0000-0000-000000000000 with
|
||||||
|
seq -1, the node crashed during DDL.
|
||||||
|
|
||||||
|
If seq is -1 you will find a DETECTED CRASH message
|
||||||
|
on your log. Check the output from InnoDB for the last
|
||||||
|
transaction id available.
|
||||||
|
|
||||||
|
b) Set environment variable FORCE_RECOVERY=<NAME OF POD>
|
||||||
|
to force bootstrapping from the specified node.
|
||||||
|
|
||||||
|
Remember to remove FORCE_RECOVERY after your nodes
|
||||||
|
are fully recovered! You may lose data otherwise.
|
||||||
|
|
||||||
|
You can ignore this message and wait for the next restart if at
|
||||||
|
least one node started without errors.
|
||||||
|
EOF
|
||||||
|
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
|
||||||
|
# Construct cluster config
|
||||||
|
MEMBERS=""
|
||||||
|
for i in $(seq 1 ${MARIADB_REPLICAS}); do
|
||||||
|
if [ "$i" -eq "1" ]; then
|
||||||
|
NUM="0"
|
||||||
|
else
|
||||||
|
NUM="$(expr $i - 1)"
|
||||||
|
fi
|
||||||
|
CANDIDATE_POD="${SERVICE_NAME}-$NUM.$(hostname -d)"
|
||||||
|
if [ "x${CANDIDATE_POD}" != "x${POD_NAME}.$(hostname -d)" ]; then
|
||||||
|
if [ -n "${MEMBERS}" ]; then
|
||||||
|
MEMBERS+=,
|
||||||
|
fi
|
||||||
|
MEMBERS+="${CANDIDATE_POD}:${WSREP_PORT}"
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
echo "Writing cluster config for ${POD_NAME} to ${CLUSTER_CONFIG_PATH}"
|
||||||
|
cat > ${CLUSTER_CONFIG_PATH} <<EOF
|
||||||
|
[mysqld]
|
||||||
|
wsrep_cluster_address="gcomm://${MEMBERS}"
|
||||||
|
wsrep_node_address=${POD_IP}
|
||||||
|
wsrep_node_name=${POD_NAME}.$(hostname -d)
|
||||||
|
EOF
|
||||||
|
|
||||||
|
if [ ! -z "${FORCE_RECOVERY// }" ]; then
|
||||||
|
cat >/dev/stderr <<EOF
|
||||||
|
**********************************************************
|
||||||
|
* !!! FORCE_RECOVERY WARNING !!! *
|
||||||
|
**********************************************************
|
||||||
|
|
||||||
|
POD is starting with FORCE_RECOVERY defined. Remember to unset this
|
||||||
|
variable after recovery! You may end up in recovering from a node
|
||||||
|
with old data on a crash!
|
||||||
|
|
||||||
|
You have been warned ;-)
|
||||||
|
|
||||||
|
**********************************************************
|
||||||
|
* FORCE_RECOVERY WARNING *
|
||||||
|
**********************************************************
|
||||||
|
EOF
|
||||||
|
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ -d /var/lib/mysql/mysql -a -f /var/lib/mysql/grastate.dat ]; then
|
||||||
|
|
||||||
|
# Node already initialized
|
||||||
|
|
||||||
|
if [ "$(sed -e 's/^.*seqno:[\ ,\t]*//' -e 'tx' -e 'd' -e ':x' /var/lib/mysql/grastate.dat)" = "-1" ]; then
|
||||||
|
cat >/dev/stderr <<EOF
|
||||||
|
**********************************************************
|
||||||
|
* DETECTED CRASH *
|
||||||
|
**********************************************************
|
||||||
|
|
||||||
|
Trying to recover from a previous crash by running with wsrep-recover...
|
||||||
|
EOF
|
||||||
|
mysqld --wsrep_cluster_address=gcomm:// --wsrep-recover
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "Check if we can find a cluster memeber."
|
||||||
|
if ! mysql --defaults-file=/etc/mysql/admin_user.cnf \
|
||||||
|
--connect-timeout 2 \
|
||||||
|
-e 'select 1'; then
|
||||||
|
# No other nodes are running
|
||||||
|
if [ -z "${FORCE_RECOVERY// }" -a "$(sed -e 's/^.*safe_to_bootstrap:[\ ,\t]*//' -e 'tx' -e 'd' -e ':x' /var/lib/mysql/grastate.dat)" = "1" ]; then
|
||||||
|
echo 'Bootstrapping from this node.'
|
||||||
|
CLUSTER_INIT_ARGS=--wsrep-new-cluster
|
||||||
|
elif [ "x${FORCE_RECOVERY}x" = "x${POD_NAME}x" ]; then
|
||||||
|
echo 'Forced recovery bootstrap from this node.'
|
||||||
|
CLUSTER_INIT_ARGS=--wsrep-new-cluster
|
||||||
|
cp -f /var/lib/mysql/grastate.dat /var/lib/mysql/grastate.bak
|
||||||
|
cat >/var/lib/mysql/grastate.dat <<EOF
|
||||||
|
`grep -v 'safe_to_bootstrap:' /var/lib/mysql/grastate.bak`
|
||||||
|
safe_to_bootstrap: 1
|
||||||
|
EOF
|
||||||
|
chown -R mysql:mysql /var/lib/mysql/grastate.dat
|
||||||
|
else
|
||||||
|
exitWithManualRecovery
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
elif [ ! -d /var/lib/mysql/mysql -o "x${FORCE_BOOTSTRAP}" = "xtrue" ]; then
|
||||||
if [ "x${POD_NAME}" = "x${SERVICE_NAME}-0" ]; then
|
if [ "x${POD_NAME}" = "x${SERVICE_NAME}-0" ]; then
|
||||||
echo No data found for pod 0
|
echo No data found for pod 0
|
||||||
if [ "x${FORCE_BOOTSTRAP}" = "xtrue" ]; then
|
if [ "x${FORCE_BOOTSTRAP}" = "xtrue" ]; then
|
||||||
@ -43,32 +164,6 @@ if [ ! -d /var/lib/mysql/mysql ]; then
|
|||||||
chown -R mysql:mysql /var/lib/mysql
|
chown -R mysql:mysql /var/lib/mysql
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Construct cluster config
|
|
||||||
CLUSTER_CONFIG_PATH=/etc/mysql/conf.d/10-cluster-config.cnf
|
|
||||||
|
|
||||||
MEMBERS=""
|
|
||||||
for i in $(seq 1 ${MARIADB_REPLICAS}); do
|
|
||||||
if [ "$i" -eq "1" ]; then
|
|
||||||
NUM="0"
|
|
||||||
else
|
|
||||||
NUM="$(expr $i - 1)"
|
|
||||||
fi
|
|
||||||
CANDIDATE_POD="${SERVICE_NAME}-$NUM.$(hostname -d)"
|
|
||||||
if [ "x${CANDIDATE_POD}" != "x${POD_NAME}.$(hostname -d)" ]; then
|
|
||||||
if [ -n "${MEMBERS}" ]; then
|
|
||||||
MEMBERS+=,
|
|
||||||
fi
|
|
||||||
MEMBERS+="${CANDIDATE_POD}:${WSREP_PORT}"
|
|
||||||
fi
|
|
||||||
done
|
|
||||||
|
|
||||||
echo "Writing cluster config for ${POD_NAME} to ${CLUSTER_CONFIG_PATH}"
|
|
||||||
cat >> ${CLUSTER_CONFIG_PATH} << EOF
|
|
||||||
[mysqld]
|
|
||||||
wsrep_cluster_address="gcomm://${MEMBERS}"
|
|
||||||
wsrep_node_address=${POD_IP}
|
|
||||||
wsrep_node_name=${POD_NAME}.$(hostname -d)
|
|
||||||
EOF
|
|
||||||
|
|
||||||
if [ "x${CLUSTER_BOOTSTRAP}" = "xtrue" ]; then
|
if [ "x${CLUSTER_BOOTSTRAP}" = "xtrue" ]; then
|
||||||
mysql_install_db --user=mysql --datadir=/var/lib/mysql
|
mysql_install_db --user=mysql --datadir=/var/lib/mysql
|
||||||
|
@ -27,6 +27,7 @@ metadata:
|
|||||||
name: mariadb
|
name: mariadb
|
||||||
spec:
|
spec:
|
||||||
serviceName: "{{ tuple "oslo_db" "discovery" . | include "helm-toolkit.endpoints.hostname_short_endpoint_lookup" }}"
|
serviceName: "{{ tuple "oslo_db" "discovery" . | include "helm-toolkit.endpoints.hostname_short_endpoint_lookup" }}"
|
||||||
|
podManagementPolicy: "Parallel"
|
||||||
replicas: {{ .Values.pod.replicas.server }}
|
replicas: {{ .Values.pod.replicas.server }}
|
||||||
template:
|
template:
|
||||||
metadata:
|
metadata:
|
||||||
@ -74,6 +75,8 @@ spec:
|
|||||||
fieldPath: metadata.name
|
fieldPath: metadata.name
|
||||||
- name: FORCE_BOOTSTRAP
|
- name: FORCE_BOOTSTRAP
|
||||||
value: {{ .Values.force_bootstrap | quote }}
|
value: {{ .Values.force_bootstrap | quote }}
|
||||||
|
- name: FORCE_RECOVERY
|
||||||
|
value: {{ .Values.force_recovey | quote }}
|
||||||
- name: BOOTSTRAP_FILE
|
- name: BOOTSTRAP_FILE
|
||||||
value: {{ printf "/tmp/%s.sql" (randAlphaNum 8) }}
|
value: {{ printf "/tmp/%s.sql" (randAlphaNum 8) }}
|
||||||
- name: MARIADB_REPLICAS
|
- name: MARIADB_REPLICAS
|
||||||
|
Loading…
Reference in New Issue
Block a user