
There is an additional error status 'Service Unavailable' which can indicate the service is temporary unavailable. Adding that error status to the retry list in case the issue is resolved during the backup timeframe. Change-Id: I9e2fc1a9b33dea3858de06b10d512da98a635015
398 lines
16 KiB
Smarty
Executable File
398 lines
16 KiB
Smarty
Executable File
{{- define "helm-toolkit.scripts.db-backup-restore.backup_main" }}
|
|
#!/bin/bash
|
|
|
|
# This file contains a database backup framework which database scripts
|
|
# can use to perform a backup. The idea here is that the database-specific
|
|
# functions will be implemented by the various databases using this script
|
|
# (like mariadb, postgresql or etcd for example). The database-specific
|
|
# script will need to first "source" this file like this:
|
|
# source /tmp/backup_main.sh
|
|
#
|
|
# Then the script should call the main backup function (backup_databases):
|
|
# backup_databases [scope]
|
|
# [scope] is an optional parameter, defaulted to "all". If only one specific
|
|
# database is required to be backed up then this parameter will
|
|
# contain the name of the database; otherwise all are backed up.
|
|
#
|
|
# The framework will require the following variables to be exported:
|
|
#
|
|
# export DB_NAMESPACE Namespace where the database(s) reside
|
|
# export DB_NAME Name of the database system
|
|
# export LOCAL_DAYS_TO_KEEP Number of days to keep the local backups
|
|
# export REMOTE_DAYS_TO_KEEP Number of days to keep the remote backups
|
|
# export ARCHIVE_DIR Local location where the backup tarballs should
|
|
# be stored. (full directory path)
|
|
# export BACK_UP_MODE Determines the mode of backup taken.
|
|
# export REMOTE_BACKUP_ENABLED "true" if remote backup enabled; false
|
|
# otherwise
|
|
# export CONTAINER_NAME Name of the container on the RGW to store
|
|
# the backup tarball.
|
|
# export STORAGE_POLICY Name of the storage policy defined on the
|
|
# RGW which is intended to store backups.
|
|
# RGW access variables:
|
|
# export OS_REGION_NAME Name of the region the RGW resides in
|
|
# export OS_AUTH_URL Keystone URL associated with the RGW
|
|
# export OS_PROJECT_NAME Name of the project associated with the
|
|
# keystone user
|
|
# export OS_USERNAME Name of the keystone user
|
|
# export OS_PASSWORD Password of the keystone user
|
|
# export OS_USER_DOMAIN_NAME Keystone domain the project belongs to
|
|
# export OS_PROJECT_DOMAIN_NAME Keystone domain the user belongs to
|
|
# export OS_IDENTITY_API_VERSION Keystone API version to use
|
|
#
|
|
# The following variables are optional:
|
|
# export RGW_TIMEOUT Number of seconds to wait for the
|
|
# connection to the RGW to be available
|
|
# when sending a backup to the RGW. Default
|
|
# is 1800 (30 minutes).
|
|
#
|
|
# The database-specific functions that need to be implemented are:
|
|
# dump_databases_to_directory <directory> <err_logfile> [scope]
|
|
# where:
|
|
# <directory> is the full directory path to dump the database files
|
|
# into. This is a temporary directory for this backup only.
|
|
# <err_logfile> is the full directory path where error logs are to be
|
|
# written by the application.
|
|
# [scope] set to "all" if all databases are to be backed up; or
|
|
# set to the name of a specific database to be backed up.
|
|
# This optional parameter is defaulted to "all".
|
|
# returns: 0 if no errors; 1 if any errors occurred
|
|
#
|
|
# This function is expected to dump the database file(s) to the specified
|
|
# directory path. If this function completes successfully (returns 0), the
|
|
# framework will automatically tar/zip the files in that directory and
|
|
# name the tarball appropriately according to the proper conventions.
|
|
#
|
|
# The functions in this file will take care of:
|
|
# 1) Calling "dump_databases_to_directory" and then compressing the files,
|
|
# naming the tarball properly, and then storing it locally at the specified
|
|
# local directory.
|
|
# 2) Sending the tarball built to the remote gateway, to be stored in the
|
|
# container configured to store database backups.
|
|
# 3) Removing local backup tarballs which are older than the number of days
|
|
# specified by the "LOCAL_DAYS_TO_KEEP" variable.
|
|
# 4) Removing remote backup tarballs (from the remote gateway) which are older
|
|
# than the number of days specified by the "REMOTE_DAYS_TO_KEEP" variable.
|
|
#
|
|
|
|
# Note: not using set -e in this script because more elaborate error handling
|
|
# is needed.
|
|
set -x
|
|
|
|
log_backup_error_exit() {
|
|
MSG=$1
|
|
ERRCODE=$2
|
|
log ERROR "${DB_NAME}_backup" "${DB_NAMESPACE} namespace: ${MSG}"
|
|
rm -f $ERR_LOG_FILE
|
|
rm -rf $TMP_DIR
|
|
exit $ERRCODE
|
|
}
|
|
|
|
log() {
|
|
#Log message to a file or stdout
|
|
#TODO: This can be convert into mail alert of alert send to a monitoring system
|
|
#Params: $1 log level
|
|
#Params: $2 service
|
|
#Params: $3 message
|
|
#Params: $4 Destination
|
|
LEVEL=$1
|
|
SERVICE=$2
|
|
MSG=$3
|
|
DEST=$4
|
|
DATE=$(date +"%m-%d-%y %H:%M:%S")
|
|
if [[ -z "$DEST" ]]; then
|
|
echo "${DATE} ${LEVEL}: $(hostname) ${SERVICE}: ${MSG}"
|
|
else
|
|
echo "${DATE} ${LEVEL}: $(hostname) ${SERVICE}: ${MSG}" >>$DEST
|
|
fi
|
|
}
|
|
|
|
#Get the day delta since the archive file backup
|
|
seconds_difference() {
|
|
ARCHIVE_DATE=$( date --date="$1" +%s )
|
|
if [[ $? -ne 0 ]]; then
|
|
SECOND_DELTA=0
|
|
fi
|
|
CURRENT_DATE=$( date +%s )
|
|
SECOND_DELTA=$(($CURRENT_DATE-$ARCHIVE_DATE))
|
|
if [[ "$SECOND_DELTA" -lt 0 ]]; then
|
|
SECOND_DELTA=0
|
|
fi
|
|
echo $SECOND_DELTA
|
|
}
|
|
|
|
# Send the specified tarball file at the specified filepath to the
|
|
# remote gateway.
|
|
send_to_remote_server() {
|
|
FILEPATH=$1
|
|
FILE=$2
|
|
|
|
# Grab the list of containers on the remote site
|
|
RESULT=$(openstack container list 2>&1)
|
|
|
|
if [[ $? -eq 0 ]]; then
|
|
echo $RESULT | grep $CONTAINER_NAME
|
|
if [[ $? -ne 0 ]]; then
|
|
# Find the swift URL from the keystone endpoint list
|
|
SWIFT_URL=$(openstack catalog show object-store -c endpoints | grep public | awk '{print $4}')
|
|
|
|
# Get a token from keystone
|
|
TOKEN=$(openstack token issue -f value -c id)
|
|
|
|
# Create the container
|
|
RES_FILE=$(mktemp -p /tmp)
|
|
curl -g -i -X PUT ${SWIFT_URL}/${CONTAINER_NAME} \
|
|
-H "X-Auth-Token: ${TOKEN}" \
|
|
-H "X-Storage-Policy: ${STORAGE_POLICY}" 2>&1 > $RES_FILE
|
|
|
|
if [[ $? -ne 0 || $(grep "HTTP" $RES_FILE | awk '{print $2}') -ge 400 ]]; then
|
|
log ERROR "${DB_NAME}_backup" "Error creating container ${CONTAINER_NAME}"
|
|
cat $RES_FILE
|
|
rm -f $RES_FILE
|
|
return 1
|
|
fi
|
|
rm -f $RES_FILE
|
|
|
|
swift stat $CONTAINER_NAME
|
|
if [[ $? -ne 0 ]]; then
|
|
log ERROR "${DB_NAME}_backup" "Error retrieving container ${CONTAINER_NAME} details after creation."
|
|
return 1
|
|
fi
|
|
fi
|
|
else
|
|
echo $RESULT | grep "HTTP 401"
|
|
if [[ $? -eq 0 ]]; then
|
|
log ERROR "${DB_NAME}_backup" "Access denied by keystone: ${RESULT}"
|
|
return 1
|
|
else
|
|
echo $RESULT | grep -E "ConnectionError|Failed to discover available identity versions|Service Unavailable"
|
|
if [[ $? -eq 0 ]]; then
|
|
log ERROR "${DB_NAME}_backup" "Could not reach the RGW: ${RESULT}"
|
|
# In this case, keystone or the site/node may be temporarily down.
|
|
# Return slightly different error code so the calling code can retry
|
|
return 2
|
|
else
|
|
log ERROR "${DB_NAME}_backup" "Could not get container list: ${RESULT}"
|
|
return 1
|
|
fi
|
|
fi
|
|
fi
|
|
|
|
# Create an object to store the file
|
|
openstack object create --name $FILE $CONTAINER_NAME $FILEPATH/$FILE || log ERROR "${DB_NAME}_backup" "Cannot create container object ${FILE}!"
|
|
openstack object show $CONTAINER_NAME $FILE
|
|
if [[ $? -ne 0 ]]; then
|
|
log ERROR "${DB_NAME}_backup" "Error retrieving container object $FILE after creation."
|
|
return 1
|
|
fi
|
|
|
|
log INFO "${DB_NAME}_backup" "Created file $FILE in container $CONTAINER_NAME successfully."
|
|
return 0
|
|
}
|
|
|
|
# This function attempts to store the built tarball to the remote gateway,
|
|
# with built-in logic to handle error cases like:
|
|
# 1) Network connectivity issues - retries for a specific amount of time
|
|
# 2) Authorization errors - immediately logs an ERROR and returns
|
|
store_backup_remotely() {
|
|
FILEPATH=$1
|
|
FILE=$2
|
|
|
|
# If the RGW_TIMEOUT has already been set, use that value, otherwise give it
|
|
# a default value.
|
|
if [[ -z $RGW_TIMEOUT ]]; then
|
|
RGW_TIMEOUT=1800
|
|
fi
|
|
|
|
ERROR_SEEN=false
|
|
DONE=false
|
|
TIMEOUT_EXP=$(( $(date +%s) + $RGW_TIMEOUT ))
|
|
while [[ $DONE == "false" ]]; do
|
|
# Store the new archive to the remote backup storage facility.
|
|
send_to_remote_server $FILEPATH $FILE
|
|
|
|
# Check if successful
|
|
if [[ $? -eq 0 ]]; then
|
|
log INFO "${DB_NAME}_backup" "Backup file ${FILE} successfully sent to RGW."
|
|
DONE=true
|
|
elif [[ $? -eq 2 ]]; then
|
|
# Temporary failure occurred. We need to retry if we have not timed out
|
|
log WARN "${DB_NAME}_backup" "Backup file ${FILE} could not be sent to RGW due to connection issue."
|
|
DELTA=$(( TIMEOUT_EXP - $(date +%s) ))
|
|
if [[ $DELTA -lt 0 ]]; then
|
|
DONE=true
|
|
log ERROR "${DB_NAME}_backup" "Timed out waiting for RGW to become available."
|
|
ERROR_SEEN=true
|
|
else
|
|
log INFO "${DB_NAME}_backup" "Sleeping 30 seconds waiting for RGW to become available..."
|
|
sleep 30
|
|
log INFO "${DB_NAME}_backup" "Retrying..."
|
|
fi
|
|
else
|
|
log ERROR "${DB_NAME}_backup" "Backup file ${FILE} could not be sent to the RGW."
|
|
ERROR_SEEN=true
|
|
DONE=true
|
|
fi
|
|
done
|
|
|
|
if [[ $ERROR_SEEN == "true" ]]; then
|
|
log ERROR "${DB_NAME}_backup" "Errors encountered. Exiting."
|
|
return 1
|
|
fi
|
|
return 0
|
|
}
|
|
|
|
remove_old_local_archives() {
|
|
log INFO "${DB_NAME}_backup" "Deleting backups older than ${LOCAL_DAYS_TO_KEEP} days"
|
|
if [[ -d $ARCHIVE_DIR ]]; then
|
|
for ARCHIVE_FILE in $(ls -1 $ARCHIVE_DIR/*.gz); do
|
|
ARCHIVE_DATE=$( echo $ARCHIVE_FILE | awk -F/ '{print $NF}' | cut -d'.' -f 4)
|
|
if [[ "$(seconds_difference $ARCHIVE_DATE)" -gt "$(($LOCAL_DAYS_TO_KEEP*86400))" ]]; then
|
|
log INFO "${DB_NAME}_backup" "Deleting file $ARCHIVE_FILE."
|
|
rm -rf $ARCHIVE_FILE
|
|
if [[ $? -ne 0 ]]; then
|
|
# Log error but don't exit so we can finish the script
|
|
# because at this point we haven't sent backup to RGW yet
|
|
log ERROR "${DB_NAME}_backup" "Cannot remove ${ARCHIVE_FILE}"
|
|
fi
|
|
else
|
|
log INFO "${DB_NAME}_backup" "Keeping file ${ARCHIVE_FILE}."
|
|
fi
|
|
done
|
|
fi
|
|
}
|
|
|
|
remove_old_remote_archives() {
|
|
log INFO "${DB_NAME}_backup" "Deleting backups older than ${REMOTE_DAYS_TO_KEEP} days"
|
|
BACKUP_FILES=$(mktemp -p /tmp)
|
|
DB_BACKUP_FILES=$(mktemp -p /tmp)
|
|
|
|
openstack object list $CONTAINER_NAME > $BACKUP_FILES
|
|
if [[ $? -ne 0 ]]; then
|
|
log_backup_error_exit "Could not obtain a list of current backup files in the RGW" 1
|
|
fi
|
|
|
|
# Filter out other types of backup files
|
|
cat $BACKUP_FILES | grep $DB_NAME | grep $DB_NAMESPACE | awk '{print $2}' > $DB_BACKUP_FILES
|
|
|
|
for ARCHIVE_FILE in $(cat $DB_BACKUP_FILES); do
|
|
ARCHIVE_DATE=$( echo $ARCHIVE_FILE | awk -F/ '{print $NF}' | cut -d'.' -f 4)
|
|
if [[ "$(seconds_difference ${ARCHIVE_DATE})" -gt "$((${REMOTE_DAYS_TO_KEEP}*86400))" ]]; then
|
|
log INFO "${DB_NAME}_backup" "Deleting file ${ARCHIVE_FILE} from the RGW"
|
|
openstack object delete $CONTAINER_NAME $ARCHIVE_FILE || log_backup_error_exit "Cannot delete container object ${ARCHIVE_FILE}!" 1
|
|
fi
|
|
done
|
|
|
|
# Cleanup now that we're done.
|
|
rm -f $BACKUP_FILES $DB_BACKUP_FILES
|
|
}
|
|
|
|
# Main function to backup the databases. Calling functions need to supply:
|
|
# 1) The directory where the final backup will be kept after it is compressed.
|
|
# 2) A temporary directory to use for placing database files to be compressed.
|
|
# Note: this temp directory will be deleted after backup is done.
|
|
# 3) Optional "scope" parameter indicating what database to back up. Defaults
|
|
# to "all".
|
|
backup_databases() {
|
|
SCOPE=${1:-"all"}
|
|
|
|
# Create necessary directories if they do not exist.
|
|
mkdir -p $ARCHIVE_DIR || log_backup_error_exit "Cannot create directory ${ARCHIVE_DIR}!"
|
|
export TMP_DIR=$(mktemp -d) || log_backup_error_exit "Cannot create temp directory!"
|
|
|
|
# Create temporary log file
|
|
export ERR_LOG_FILE=$(mktemp -p /tmp) || log_backup_error_exit "Cannot create log file!"
|
|
|
|
# It is expected that this function will dump the database files to the $TMP_DIR
|
|
dump_databases_to_directory $TMP_DIR $ERR_LOG_FILE $SCOPE
|
|
|
|
# If successful, there should be at least one file in the TMP_DIR
|
|
if [[ $? -ne 0 || $(ls $TMP_DIR | wc -w) -eq 0 ]]; then
|
|
cat $ERR_LOG_FILE
|
|
log_backup_error_exit "Backup of the ${DB_NAME} database failed and needs attention."
|
|
fi
|
|
|
|
log INFO "${DB_NAME}_backup" "Databases dumped successfully. Creating tarball..."
|
|
|
|
NOW=$(date +"%Y-%m-%dT%H:%M:%SZ")
|
|
if [[ -z "${BACK_UP_MODE}" ]]; then
|
|
TARBALL_FILE="${DB_NAME}.${DB_NAMESPACE}.${SCOPE}.${NOW}.tar.gz"
|
|
else
|
|
TARBALL_FILE="${DB_NAME}.${DB_NAMESPACE}.${SCOPE}.${BACK_UP_MODE}.${NOW}.tar.gz"
|
|
fi
|
|
|
|
cd $TMP_DIR || log_backup_error_exit "Cannot change to directory $TMP_DIR"
|
|
|
|
#Archive the current database files
|
|
tar zcvf $ARCHIVE_DIR/$TARBALL_FILE *
|
|
if [[ $? -ne 0 ]]; then
|
|
log_backup_error_exit "Backup tarball could not be created."
|
|
fi
|
|
|
|
# Get the size of the file
|
|
ARCHIVE_SIZE=$(ls -l $ARCHIVE_DIR/$TARBALL_FILE | awk '{print $5}')
|
|
|
|
log INFO "${DB_NAME}_backup" "Tarball $TARBALL_FILE created successfully."
|
|
|
|
cd $ARCHIVE_DIR
|
|
|
|
# Remove the temporary directory and files as they are no longer needed.
|
|
rm -rf $TMP_DIR
|
|
rm -f $ERR_LOG_FILE
|
|
|
|
#Only delete the old archive after a successful archive
|
|
export LOCAL_DAYS_TO_KEEP=$(echo $LOCAL_DAYS_TO_KEEP | sed 's/"//g')
|
|
if [[ "$LOCAL_DAYS_TO_KEEP" -gt 0 ]]; then
|
|
remove_old_local_archives
|
|
fi
|
|
|
|
REMOTE_BACKUP=$(echo $REMOTE_BACKUP_ENABLED | sed 's/"//g')
|
|
if $REMOTE_BACKUP; then
|
|
store_backup_remotely $ARCHIVE_DIR $TARBALL_FILE
|
|
if [[ $? -ne 0 ]]; then
|
|
# This error should print first, then print the summary as the last
|
|
# thing that the user sees in the output.
|
|
log ERROR "${DB_NAME}_backup" "Backup ${TARBALL_FILE} could not be sent to remote RGW."
|
|
set +x
|
|
echo "=================================================================="
|
|
echo "Local backup successful, but could not send to remote RGW."
|
|
echo "Backup archive name: $TARBALL_FILE"
|
|
echo "Backup archive size: $ARCHIVE_SIZE"
|
|
echo "=================================================================="
|
|
set -x
|
|
# Because the local backup was successful, exit with 0 so the pod will not
|
|
# continue to restart and fill the disk with more backups. The ERRORs are
|
|
# logged and alerting system should catch those errors and flag the operator.
|
|
exit 0
|
|
fi
|
|
|
|
#Only delete the old archive after a successful archive
|
|
export REMOTE_DAYS_TO_KEEP=$(echo $REMOTE_DAYS_TO_KEEP | sed 's/"//g')
|
|
if [[ "$REMOTE_DAYS_TO_KEEP" -gt 0 ]]; then
|
|
remove_old_remote_archives
|
|
fi
|
|
|
|
# Turn off trace just for a clearer printout of backup status - for manual backups, mainly.
|
|
set +x
|
|
echo "=================================================================="
|
|
echo "Local backup and backup to remote RGW successful!"
|
|
echo "Backup archive name: $TARBALL_FILE"
|
|
echo "Backup archive size: $ARCHIVE_SIZE"
|
|
echo "=================================================================="
|
|
set -x
|
|
else
|
|
# Remote backup is not enabled. This is ok; at least we have a local backup.
|
|
log INFO "${DB_NAME}_backup" "Skipping remote backup, as it is not enabled."
|
|
|
|
# Turn off trace just for a clearer printout of backup status - for manual backups, mainly.
|
|
set +x
|
|
echo "=================================================================="
|
|
echo "Local backup successful!"
|
|
echo "Backup archive name: $TARBALL_FILE"
|
|
echo "Backup archive size: $ARCHIVE_SIZE"
|
|
echo "=================================================================="
|
|
set -x
|
|
fi
|
|
}
|
|
{{- end }}
|