Mariadb: Enhance mariadb backup
* Add capability to retry uploading backup to remote server configured number of times and delay the retires randomly between configured minimum/maximum seconds. * Enhanced error checking, logging and retrying logic. Change-Id: Ida3649420bdd6d39ac6ba7412c8c7078a75e0a10
This commit is contained in:
parent
4665ebd35f
commit
47795919cb
@ -15,7 +15,7 @@ apiVersion: v1
|
||||
appVersion: v1.0.0
|
||||
description: OpenStack-Helm Helm-Toolkit
|
||||
name: helm-toolkit
|
||||
version: 0.2.28
|
||||
version: 0.2.29
|
||||
home: https://docs.openstack.org/openstack-helm
|
||||
icon: https://www.openstack.org/themes/openstack/images/project-mascots/OpenStack-Helm/OpenStack_Project_OpenStackHelm_vertical.png
|
||||
sources:
|
||||
|
@ -40,11 +40,14 @@
|
||||
# export OS_PROJECT_DOMAIN_NAME Keystone domain the user belongs to
|
||||
# export OS_IDENTITY_API_VERSION Keystone API version to use
|
||||
#
|
||||
# The following variables are optional:
|
||||
# export RGW_TIMEOUT Number of seconds to wait for the
|
||||
# connection to the RGW to be available
|
||||
# when sending a backup to the RGW. Default
|
||||
# is 1800 (30 minutes).
|
||||
# export REMOTE_BACKUP_RETRIES Number of retries to send backup to remote
|
||||
# in case of any temporary failures.
|
||||
# export MIN_DELAY_SEND_REMOTE Minimum seconds to delay before sending backup
|
||||
# to remote to stagger backups being sent to RGW
|
||||
# export MAX_DELAY_SEND_REMOTE Maximum seconds to delay before sending backup
|
||||
# to remote to stagger backups being sent to RGW.
|
||||
# A random number between min and max delay is generated
|
||||
# to set the delay.
|
||||
#
|
||||
# The database-specific functions that need to be implemented are:
|
||||
# dump_databases_to_directory <directory> <err_logfile> [scope]
|
||||
@ -81,7 +84,7 @@ set -x
|
||||
|
||||
log_backup_error_exit() {
|
||||
MSG=$1
|
||||
ERRCODE=$2
|
||||
ERRCODE=${2:-0}
|
||||
log ERROR "${DB_NAME}_backup" "${DB_NAMESPACE} namespace: ${MSG}"
|
||||
rm -f $ERR_LOG_FILE
|
||||
rm -rf $TMP_DIR
|
||||
@ -107,6 +110,13 @@ log() {
|
||||
fi
|
||||
}
|
||||
|
||||
# Generate a random number between MIN_DELAY_SEND_REMOTE and
|
||||
# MAX_DELAY_SEND_REMOTE
|
||||
random_number() {
|
||||
diff=$((${MAX_DELAY_SEND_REMOTE} - ${MIN_DELAY_SEND_REMOTE} + 1))
|
||||
echo $(($(( ${RANDOM} % ${diff} )) + ${MIN_DELAY_SEND_REMOTE} ))
|
||||
}
|
||||
|
||||
#Get the day delta since the archive file backup
|
||||
seconds_difference() {
|
||||
ARCHIVE_DATE=$( date --date="$1" +%s )
|
||||
@ -135,9 +145,17 @@ send_to_remote_server() {
|
||||
if [[ $? -ne 0 ]]; then
|
||||
# Find the swift URL from the keystone endpoint list
|
||||
SWIFT_URL=$(openstack catalog show object-store -c endpoints | grep public | awk '{print $4}')
|
||||
if [[ $? -ne 0 ]]; then
|
||||
log WARN "${DB_NAME}_backup" "Unable to get object-store enpoints from keystone catalog."
|
||||
return 2
|
||||
fi
|
||||
|
||||
# Get a token from keystone
|
||||
TOKEN=$(openstack token issue -f value -c id)
|
||||
if [[ $? -ne 0 ]]; then
|
||||
log WARN "${DB_NAME}_backup" "Unable to get keystone token."
|
||||
return 2
|
||||
fi
|
||||
|
||||
# Create the container
|
||||
RES_FILE=$(mktemp -p /tmp)
|
||||
@ -146,28 +164,28 @@ send_to_remote_server() {
|
||||
-H "X-Storage-Policy: ${STORAGE_POLICY}" 2>&1 > $RES_FILE
|
||||
|
||||
if [[ $? -ne 0 || $(grep "HTTP" $RES_FILE | awk '{print $2}') -ge 400 ]]; then
|
||||
log ERROR "${DB_NAME}_backup" "Error creating container ${CONTAINER_NAME}"
|
||||
log WARN "${DB_NAME}_backup" "Unable to create container ${CONTAINER_NAME}"
|
||||
cat $RES_FILE
|
||||
rm -f $RES_FILE
|
||||
return 1
|
||||
return 2
|
||||
fi
|
||||
rm -f $RES_FILE
|
||||
|
||||
swift stat $CONTAINER_NAME
|
||||
if [[ $? -ne 0 ]]; then
|
||||
log ERROR "${DB_NAME}_backup" "Error retrieving container ${CONTAINER_NAME} details after creation."
|
||||
return 1
|
||||
log WARN "${DB_NAME}_backup" "Unable to retrieve container ${CONTAINER_NAME} details after creation."
|
||||
return 2
|
||||
fi
|
||||
fi
|
||||
else
|
||||
echo $RESULT | grep "HTTP 401"
|
||||
echo $RESULT | grep -E "HTTP 401|HTTP 403"
|
||||
if [[ $? -eq 0 ]]; then
|
||||
log ERROR "${DB_NAME}_backup" "Access denied by keystone: ${RESULT}"
|
||||
return 1
|
||||
else
|
||||
echo $RESULT | grep -E "ConnectionError|Failed to discover available identity versions|Service Unavailable"
|
||||
echo $RESULT | grep -E "ConnectionError|Failed to discover available identity versions|Service Unavailable|HTTP 50"
|
||||
if [[ $? -eq 0 ]]; then
|
||||
log ERROR "${DB_NAME}_backup" "Could not reach the RGW: ${RESULT}"
|
||||
log WARN "${DB_NAME}_backup" "Could not reach the RGW: ${RESULT}"
|
||||
# In this case, keystone or the site/node may be temporarily down.
|
||||
# Return slightly different error code so the calling code can retry
|
||||
return 2
|
||||
@ -179,11 +197,15 @@ send_to_remote_server() {
|
||||
fi
|
||||
|
||||
# Create an object to store the file
|
||||
openstack object create --name $FILE $CONTAINER_NAME $FILEPATH/$FILE || log ERROR "${DB_NAME}_backup" "Cannot create container object ${FILE}!"
|
||||
openstack object create --name $FILE $CONTAINER_NAME $FILEPATH/$FILE
|
||||
if [[ $? -ne 0 ]]; then
|
||||
log WARN "${DB_NAME}_backup" "Cannot create container object ${FILE}!"
|
||||
return 2
|
||||
fi
|
||||
openstack object show $CONTAINER_NAME $FILE
|
||||
if [[ $? -ne 0 ]]; then
|
||||
log ERROR "${DB_NAME}_backup" "Error retrieving container object $FILE after creation."
|
||||
return 1
|
||||
log WARN "${DB_NAME}_backup" "Unable to retrieve container object $FILE after creation."
|
||||
return 2
|
||||
fi
|
||||
|
||||
log INFO "${DB_NAME}_backup" "Created file $FILE in container $CONTAINER_NAME successfully."
|
||||
@ -198,16 +220,8 @@ store_backup_remotely() {
|
||||
FILEPATH=$1
|
||||
FILE=$2
|
||||
|
||||
# If the RGW_TIMEOUT has already been set, use that value, otherwise give it
|
||||
# a default value.
|
||||
if [[ -z $RGW_TIMEOUT ]]; then
|
||||
RGW_TIMEOUT=1800
|
||||
fi
|
||||
|
||||
ERROR_SEEN=false
|
||||
DONE=false
|
||||
TIMEOUT_EXP=$(( $(date +%s) + $RGW_TIMEOUT ))
|
||||
while [[ $DONE == "false" ]]; do
|
||||
count=1
|
||||
while [[ ${count} -le ${REMOTE_BACKUP_RETRIES} ]]; do
|
||||
# Store the new archive to the remote backup storage facility.
|
||||
send_to_remote_server $FILEPATH $FILE
|
||||
SEND_RESULT="$?"
|
||||
@ -215,32 +229,29 @@ store_backup_remotely() {
|
||||
# Check if successful
|
||||
if [[ $SEND_RESULT -eq 0 ]]; then
|
||||
log INFO "${DB_NAME}_backup" "Backup file ${FILE} successfully sent to RGW."
|
||||
DONE=true
|
||||
return 0
|
||||
elif [[ $SEND_RESULT -eq 2 ]]; then
|
||||
# Temporary failure occurred. We need to retry if we have not timed out
|
||||
if [[ ${count} -ge ${REMOTE_BACKUP_RETRIES} ]]; then
|
||||
log ERROR "${DB_NAME}_backup" "Backup file ${FILE} could not be sent to the RGW in " \
|
||||
"${REMOTE_BACKUP_RETRIES} retries. Errors encountered. Exiting."
|
||||
break
|
||||
fi
|
||||
# Temporary failure occurred. We need to retry
|
||||
log WARN "${DB_NAME}_backup" "Backup file ${FILE} could not be sent to RGW due to connection issue."
|
||||
DELTA=$(( TIMEOUT_EXP - $(date +%s) ))
|
||||
if [[ $DELTA -lt 0 ]]; then
|
||||
DONE=true
|
||||
log ERROR "${DB_NAME}_backup" "Timed out waiting for RGW to become available."
|
||||
ERROR_SEEN=true
|
||||
else
|
||||
log INFO "${DB_NAME}_backup" "Sleeping 30 seconds waiting for RGW to become available..."
|
||||
sleep 30
|
||||
sleep_time=$(random_number)
|
||||
log INFO "${DB_NAME}_backup" "Sleeping ${sleep_time} seconds waiting for RGW to become available..."
|
||||
sleep ${sleep_time}
|
||||
log INFO "${DB_NAME}_backup" "Retrying..."
|
||||
fi
|
||||
else
|
||||
log ERROR "${DB_NAME}_backup" "Backup file ${FILE} could not be sent to the RGW."
|
||||
ERROR_SEEN=true
|
||||
DONE=true
|
||||
log ERROR "${DB_NAME}_backup" "Backup file ${FILE} could not be sent to the RGW. Errors encountered. Exiting."
|
||||
break
|
||||
fi
|
||||
|
||||
# Increment the counter
|
||||
count=$((count+1))
|
||||
done
|
||||
|
||||
if [[ $ERROR_SEEN == "true" ]]; then
|
||||
log ERROR "${DB_NAME}_backup" "Errors encountered. Exiting."
|
||||
return 1
|
||||
fi
|
||||
return 0
|
||||
}
|
||||
|
||||
remove_old_local_archives() {
|
||||
@ -270,7 +281,7 @@ remove_old_remote_archives() {
|
||||
|
||||
openstack object list $CONTAINER_NAME > $BACKUP_FILES
|
||||
if [[ $? -ne 0 ]]; then
|
||||
log_backup_error_exit "Could not obtain a list of current backup files in the RGW" 1
|
||||
log_backup_error_exit "Could not obtain a list of current backup files in the RGW"
|
||||
fi
|
||||
|
||||
# Filter out other types of backup files
|
||||
@ -280,7 +291,7 @@ remove_old_remote_archives() {
|
||||
ARCHIVE_DATE=$( echo $ARCHIVE_FILE | awk -F/ '{print $NF}' | cut -d'.' -f 4)
|
||||
if [[ "$(seconds_difference ${ARCHIVE_DATE})" -gt "$((${REMOTE_DAYS_TO_KEEP}*86400))" ]]; then
|
||||
log INFO "${DB_NAME}_backup" "Deleting file ${ARCHIVE_FILE} from the RGW"
|
||||
openstack object delete $CONTAINER_NAME $ARCHIVE_FILE || log_backup_error_exit "Cannot delete container object ${ARCHIVE_FILE}!" 1
|
||||
openstack object delete $CONTAINER_NAME $ARCHIVE_FILE || log_backup_error_exit "Cannot delete container object ${ARCHIVE_FILE}!"
|
||||
fi
|
||||
done
|
||||
|
||||
@ -349,6 +360,13 @@ backup_databases() {
|
||||
|
||||
REMOTE_BACKUP=$(echo $REMOTE_BACKUP_ENABLED | sed 's/"//g')
|
||||
if $REMOTE_BACKUP; then
|
||||
# Remove Quotes from the constants which were added due to reading
|
||||
# from secret.
|
||||
export REMOTE_BACKUP_RETRIES=$(echo $REMOTE_BACKUP_RETRIES | sed 's/"//g')
|
||||
export MIN_DELAY_SEND_REMOTE=$(echo $MIN_DELAY_SEND_REMOTE | sed 's/"//g')
|
||||
export MAX_DELAY_SEND_REMOTE=$(echo $MAX_DELAY_SEND_REMOTE | sed 's/"//g')
|
||||
export REMOTE_DAYS_TO_KEEP=$(echo $REMOTE_DAYS_TO_KEEP | sed 's/"//g')
|
||||
|
||||
store_backup_remotely $ARCHIVE_DIR $TARBALL_FILE
|
||||
if [[ $? -ne 0 ]]; then
|
||||
# This error should print first, then print the summary as the last
|
||||
@ -368,7 +386,6 @@ backup_databases() {
|
||||
fi
|
||||
|
||||
#Only delete the old archive after a successful archive
|
||||
export REMOTE_DAYS_TO_KEEP=$(echo $REMOTE_DAYS_TO_KEEP | sed 's/"//g')
|
||||
if [[ "$REMOTE_DAYS_TO_KEEP" -gt 0 ]]; then
|
||||
remove_old_remote_archives
|
||||
fi
|
||||
|
@ -15,7 +15,7 @@ apiVersion: v1
|
||||
appVersion: v10.2.31
|
||||
description: OpenStack-Helm MariaDB
|
||||
name: mariadb
|
||||
version: 0.2.10
|
||||
version: 0.2.11
|
||||
home: https://mariadb.com/kb/en/
|
||||
icon: http://badges.mariadb.org/mariadb-badge-180x60.png
|
||||
sources:
|
||||
|
@ -24,6 +24,9 @@ export DB_NAMESPACE=${MARIADB_POD_NAMESPACE}
|
||||
export DB_NAME="mariadb"
|
||||
export LOCAL_DAYS_TO_KEEP=${MARIADB_LOCAL_BACKUP_DAYS_TO_KEEP}
|
||||
export REMOTE_DAYS_TO_KEEP=${MARIADB_REMOTE_BACKUP_DAYS_TO_KEEP}
|
||||
export REMOTE_BACKUP_RETRIES=${NUMBER_OF_RETRIES_SEND_BACKUP_TO_REMOTE}
|
||||
export MIN_DELAY_SEND_REMOTE=${MIN_DELAY_SEND_BACKUP_TO_REMOTE}
|
||||
export MAX_DELAY_SEND_REMOTE=${MAX_DELAY_SEND_BACKUP_TO_REMOTE}
|
||||
export ARCHIVE_DIR=${MARIADB_BACKUP_BASE_DIR}/db/${DB_NAMESPACE}/${DB_NAME}/archive
|
||||
|
||||
# Dump all the database files to existing $TMP_DIR and save logs to $LOG_FILE
|
||||
|
@ -97,6 +97,12 @@ spec:
|
||||
value: {{ .Values.conf.backup.remote_backup.container_name | quote }}
|
||||
- name: STORAGE_POLICY
|
||||
value: "{{ .Values.conf.backup.remote_backup.storage_policy }}"
|
||||
- name: NUMBER_OF_RETRIES_SEND_BACKUP_TO_REMOTE
|
||||
value: {{ .Values.conf.backup.remote_backup.number_of_retries | quote }}
|
||||
- name: MIN_DELAY_SEND_BACKUP_TO_REMOTE
|
||||
value: {{ .Values.conf.backup.remote_backup.delay_range.min | quote }}
|
||||
- name: MAX_DELAY_SEND_BACKUP_TO_REMOTE
|
||||
value: {{ .Values.conf.backup.remote_backup.delay_range.max | quote }}
|
||||
{{- with $env := dict "ksUserSecret" $envAll.Values.secrets.identity.mariadb }}
|
||||
{{- include "helm-toolkit.snippets.keystone_openrc_env_vars" $env | indent 16 }}
|
||||
{{- end }}
|
||||
|
@ -23,5 +23,8 @@ data:
|
||||
REMOTE_BACKUP_CONTAINER: {{ $envAll.Values.conf.backup.remote_backup.container_name | b64enc }}
|
||||
REMOTE_BACKUP_DAYS_TO_KEEP: {{ $envAll.Values.conf.backup.remote_backup.days_to_keep | quote | b64enc }}
|
||||
REMOTE_BACKUP_STORAGE_POLICY: {{ $envAll.Values.conf.backup.remote_backup.storage_policy | b64enc }}
|
||||
REMOTE_BACKUP_RETRIES: {{ $envAll.Values.conf.backup.remote_backup.number_of_retries | quote | b64enc }}
|
||||
REMOTE_BACKUP_SEND_DELAY_MIN: {{ $envAll.Values.conf.backup.remote_backup.delay_range.min | quote | b64enc }}
|
||||
REMOTE_BACKUP_SEND_DELAY_MAX: {{ $envAll.Values.conf.backup.remote_backup.delay_range.max | quote | b64enc }}
|
||||
...
|
||||
{{- end }}
|
||||
|
@ -327,6 +327,10 @@ conf:
|
||||
container_name: mariadb
|
||||
days_to_keep: 14
|
||||
storage_policy: default-placement
|
||||
number_of_retries: 5
|
||||
delay_range:
|
||||
min: 30
|
||||
max: 60
|
||||
database:
|
||||
mysql_histfile: "/dev/null"
|
||||
my: |
|
||||
|
@ -35,4 +35,5 @@ helm-toolkit:
|
||||
- 0.2.26 Revert Set Security Context to ks-user job
|
||||
- 0.2.27 Correct private key size input for Certificates and remove minor version support
|
||||
- 0.2.28 Set Security context to ks-user job at pod and container level
|
||||
- 0.2.29 Enhance mariadb backup
|
||||
...
|
||||
|
@ -26,4 +26,5 @@ mariadb:
|
||||
- 0.2.8 Helm 3 - Fix Job labels
|
||||
- 0.2.9 Update htk requirements
|
||||
- 0.2.10 Fix Python exceptions
|
||||
- 0.2.11 Enhance mariadb backup
|
||||
...
|
||||
|
Loading…
Reference in New Issue
Block a user