Merge "Improve fault tolerance of MariaDB startup"
This commit is contained in:
commit
28a649bbbe
@ -19,7 +19,128 @@ set -xe
|
|||||||
|
|
||||||
# Bootstrap database
|
# Bootstrap database
|
||||||
CLUSTER_INIT_ARGS=""
|
CLUSTER_INIT_ARGS=""
|
||||||
if [ ! -d /var/lib/mysql/mysql ]; then
|
CLUSTER_CONFIG_PATH=/etc/mysql/conf.d/10-cluster-config.cnf
|
||||||
|
|
||||||
|
function exitWithManualRecovery() {
|
||||||
|
|
||||||
|
UUID=$(sed -e 's/^.*uuid:[\ ,\t]*//' -e 'tx' -e 'd' -e ':x' /var/lib/mysql/grastate.dat)
|
||||||
|
SEQNO=$(sed -e 's/^.*seqno:[\ ,\t]*//' -e 'tx' -e 'd' -e ':x' /var/lib/mysql/grastate.dat)
|
||||||
|
|
||||||
|
cat >/dev/stderr <<EOF
|
||||||
|
**********************************************************
|
||||||
|
* MANUAL RECOVERY ACTION REQUIRED *
|
||||||
|
**********************************************************
|
||||||
|
|
||||||
|
All cluster members are down and grastate.dat indicates that it's not
|
||||||
|
safe to start the cluster from this node. If you see this message on
|
||||||
|
all nodes, you have to do a manual recovery by following these steps:
|
||||||
|
|
||||||
|
a) Find the node with the highest WSREP seq#:
|
||||||
|
|
||||||
|
POD ${PODNAME} uuid: ${UUID} seq: ${SEQNO}
|
||||||
|
|
||||||
|
If you see uuid 00000000-0000-0000-0000-000000000000 with
|
||||||
|
seq -1, the node crashed during DDL.
|
||||||
|
|
||||||
|
If seq is -1 you will find a DETECTED CRASH message
|
||||||
|
on your log. Check the output from InnoDB for the last
|
||||||
|
transaction id available.
|
||||||
|
|
||||||
|
b) Set environment variable FORCE_RECOVERY=<NAME OF POD>
|
||||||
|
to force bootstrapping from the specified node.
|
||||||
|
|
||||||
|
Remember to remove FORCE_RECOVERY after your nodes
|
||||||
|
are fully recovered! You may lose data otherwise.
|
||||||
|
|
||||||
|
You can ignore this message and wait for the next restart if at
|
||||||
|
least one node started without errors.
|
||||||
|
EOF
|
||||||
|
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
|
||||||
|
# Construct cluster config
|
||||||
|
MEMBERS=""
|
||||||
|
for i in $(seq 1 ${MARIADB_REPLICAS}); do
|
||||||
|
if [ "$i" -eq "1" ]; then
|
||||||
|
NUM="0"
|
||||||
|
else
|
||||||
|
NUM="$(expr $i - 1)"
|
||||||
|
fi
|
||||||
|
CANDIDATE_POD="${SERVICE_NAME}-$NUM.$(hostname -d)"
|
||||||
|
if [ "x${CANDIDATE_POD}" != "x${POD_NAME}.$(hostname -d)" ]; then
|
||||||
|
if [ -n "${MEMBERS}" ]; then
|
||||||
|
MEMBERS+=,
|
||||||
|
fi
|
||||||
|
MEMBERS+="${CANDIDATE_POD}:${WSREP_PORT}"
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
echo "Writing cluster config for ${POD_NAME} to ${CLUSTER_CONFIG_PATH}"
|
||||||
|
cat > ${CLUSTER_CONFIG_PATH} <<EOF
|
||||||
|
[mysqld]
|
||||||
|
wsrep_cluster_address="gcomm://${MEMBERS}"
|
||||||
|
wsrep_node_address=${POD_IP}
|
||||||
|
wsrep_node_name=${POD_NAME}.$(hostname -d)
|
||||||
|
EOF
|
||||||
|
|
||||||
|
if [ ! -z "${FORCE_RECOVERY// }" ]; then
|
||||||
|
cat >/dev/stderr <<EOF
|
||||||
|
**********************************************************
|
||||||
|
* !!! FORCE_RECOVERY WARNING !!! *
|
||||||
|
**********************************************************
|
||||||
|
|
||||||
|
POD is starting with FORCE_RECOVERY defined. Remember to unset this
|
||||||
|
variable after recovery! You may end up in recovering from a node
|
||||||
|
with old data on a crash!
|
||||||
|
|
||||||
|
You have been warned ;-)
|
||||||
|
|
||||||
|
**********************************************************
|
||||||
|
* FORCE_RECOVERY WARNING *
|
||||||
|
**********************************************************
|
||||||
|
EOF
|
||||||
|
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ -d /var/lib/mysql/mysql -a -f /var/lib/mysql/grastate.dat ]; then
|
||||||
|
|
||||||
|
# Node already initialized
|
||||||
|
|
||||||
|
if [ "$(sed -e 's/^.*seqno:[\ ,\t]*//' -e 'tx' -e 'd' -e ':x' /var/lib/mysql/grastate.dat)" = "-1" ]; then
|
||||||
|
cat >/dev/stderr <<EOF
|
||||||
|
**********************************************************
|
||||||
|
* DETECTED CRASH *
|
||||||
|
**********************************************************
|
||||||
|
|
||||||
|
Trying to recover from a previous crash by running with wsrep-recover...
|
||||||
|
EOF
|
||||||
|
mysqld --wsrep_cluster_address=gcomm:// --wsrep-recover
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "Check if we can find a cluster memeber."
|
||||||
|
if ! mysql --defaults-file=/etc/mysql/admin_user.cnf \
|
||||||
|
--connect-timeout 2 \
|
||||||
|
-e 'select 1'; then
|
||||||
|
# No other nodes are running
|
||||||
|
if [ -z "${FORCE_RECOVERY// }" -a "$(sed -e 's/^.*safe_to_bootstrap:[\ ,\t]*//' -e 'tx' -e 'd' -e ':x' /var/lib/mysql/grastate.dat)" = "1" ]; then
|
||||||
|
echo 'Bootstrapping from this node.'
|
||||||
|
CLUSTER_INIT_ARGS=--wsrep-new-cluster
|
||||||
|
elif [ "x${FORCE_RECOVERY}x" = "x${POD_NAME}x" ]; then
|
||||||
|
echo 'Forced recovery bootstrap from this node.'
|
||||||
|
CLUSTER_INIT_ARGS=--wsrep-new-cluster
|
||||||
|
cp -f /var/lib/mysql/grastate.dat /var/lib/mysql/grastate.bak
|
||||||
|
cat >/var/lib/mysql/grastate.dat <<EOF
|
||||||
|
`grep -v 'safe_to_bootstrap:' /var/lib/mysql/grastate.bak`
|
||||||
|
safe_to_bootstrap: 1
|
||||||
|
EOF
|
||||||
|
chown -R mysql:mysql /var/lib/mysql/grastate.dat
|
||||||
|
else
|
||||||
|
exitWithManualRecovery
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
elif [ ! -d /var/lib/mysql/mysql -o "x${FORCE_BOOTSTRAP}" = "xtrue" ]; then
|
||||||
if [ "x${POD_NAME}" = "x${SERVICE_NAME}-0" ]; then
|
if [ "x${POD_NAME}" = "x${SERVICE_NAME}-0" ]; then
|
||||||
echo No data found for pod 0
|
echo No data found for pod 0
|
||||||
if [ "x${FORCE_BOOTSTRAP}" = "xtrue" ]; then
|
if [ "x${FORCE_BOOTSTRAP}" = "xtrue" ]; then
|
||||||
@ -43,32 +164,6 @@ if [ ! -d /var/lib/mysql/mysql ]; then
|
|||||||
chown -R mysql:mysql /var/lib/mysql
|
chown -R mysql:mysql /var/lib/mysql
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Construct cluster config
|
|
||||||
CLUSTER_CONFIG_PATH=/etc/mysql/conf.d/10-cluster-config.cnf
|
|
||||||
|
|
||||||
MEMBERS=""
|
|
||||||
for i in $(seq 1 ${MARIADB_REPLICAS}); do
|
|
||||||
if [ "$i" -eq "1" ]; then
|
|
||||||
NUM="0"
|
|
||||||
else
|
|
||||||
NUM="$(expr $i - 1)"
|
|
||||||
fi
|
|
||||||
CANDIDATE_POD="${SERVICE_NAME}-$NUM.$(hostname -d)"
|
|
||||||
if [ "x${CANDIDATE_POD}" != "x${POD_NAME}.$(hostname -d)" ]; then
|
|
||||||
if [ -n "${MEMBERS}" ]; then
|
|
||||||
MEMBERS+=,
|
|
||||||
fi
|
|
||||||
MEMBERS+="${CANDIDATE_POD}:${WSREP_PORT}"
|
|
||||||
fi
|
|
||||||
done
|
|
||||||
|
|
||||||
echo "Writing cluster config for ${POD_NAME} to ${CLUSTER_CONFIG_PATH}"
|
|
||||||
cat >> ${CLUSTER_CONFIG_PATH} << EOF
|
|
||||||
[mysqld]
|
|
||||||
wsrep_cluster_address="gcomm://${MEMBERS}"
|
|
||||||
wsrep_node_address=${POD_IP}
|
|
||||||
wsrep_node_name=${POD_NAME}.$(hostname -d)
|
|
||||||
EOF
|
|
||||||
|
|
||||||
if [ "x${CLUSTER_BOOTSTRAP}" = "xtrue" ]; then
|
if [ "x${CLUSTER_BOOTSTRAP}" = "xtrue" ]; then
|
||||||
mysql_install_db --user=mysql --datadir=/var/lib/mysql
|
mysql_install_db --user=mysql --datadir=/var/lib/mysql
|
||||||
|
@ -27,6 +27,7 @@ metadata:
|
|||||||
name: mariadb
|
name: mariadb
|
||||||
spec:
|
spec:
|
||||||
serviceName: "{{ tuple "oslo_db" "discovery" . | include "helm-toolkit.endpoints.hostname_short_endpoint_lookup" }}"
|
serviceName: "{{ tuple "oslo_db" "discovery" . | include "helm-toolkit.endpoints.hostname_short_endpoint_lookup" }}"
|
||||||
|
podManagementPolicy: "Parallel"
|
||||||
replicas: {{ .Values.pod.replicas.server }}
|
replicas: {{ .Values.pod.replicas.server }}
|
||||||
template:
|
template:
|
||||||
metadata:
|
metadata:
|
||||||
@ -74,6 +75,8 @@ spec:
|
|||||||
fieldPath: metadata.name
|
fieldPath: metadata.name
|
||||||
- name: FORCE_BOOTSTRAP
|
- name: FORCE_BOOTSTRAP
|
||||||
value: {{ .Values.force_bootstrap | quote }}
|
value: {{ .Values.force_bootstrap | quote }}
|
||||||
|
- name: FORCE_RECOVERY
|
||||||
|
value: {{ .Values.force_recovey | quote }}
|
||||||
- name: BOOTSTRAP_FILE
|
- name: BOOTSTRAP_FILE
|
||||||
value: {{ printf "/tmp/%s.sql" (randAlphaNum 8) }}
|
value: {{ printf "/tmp/%s.sql" (randAlphaNum 8) }}
|
||||||
- name: MARIADB_REPLICAS
|
- name: MARIADB_REPLICAS
|
||||||
|
Loading…
x
Reference in New Issue
Block a user