Postgresql backup/restore enhancements

1) Added a new backup container for accessing RGW via Openstack Swift API.
2) Modified the backup script so that tarballed databases can be sent to the RGW.
3) Added new script to send the database backup to the RGW.
4) Modified the restore script so that databases can be retrieved from the RGW.
5) Added new script to retrieve the database backups from the RGW.

Change-Id: Id17a8fcb63f5614ea038c58acdc256fb4e05f434
This commit is contained in:
Cliff Parsons 2019-04-23 13:36:07 -05:00 committed by Parsons, Cliff (cp769u)
parent 9057c770a6
commit 382d113a87
10 changed files with 878 additions and 151 deletions

View File

@ -17,9 +17,12 @@
export PGPASSWORD=$(cat /etc/postgresql/admin_user.conf \
| grep postgres | awk -F: '{print $5}')
# Note: not using set -e in this script because more elaborate error handling
# is needed.
set -x
PG_DUMPALL_OPTIONS=$POSTGRESQL_BACKUP_PG_DUMPALL_OPTIONS
TMP_DIR=/tmp/pg_backup
BACKUPS_DIR=${POSTGRESQL_BACKUP_BASE_DIR}/db/${POSTGRESQL_POD_NAMESPACE}/postgres/current
ARCHIVE_DIR=${POSTGRESQL_BACKUP_BASE_DIR}/db/${POSTGRESQL_POD_NAMESPACE}/postgres/archive
LOG_FILE=/tmp/dberror.log
@ -28,64 +31,101 @@ PG_DUMPALL="pg_dumpall \
-U $POSTGRESQL_BACKUP_USER \
-h $POSTGRESQL_SERVICE_HOST"
#Get the day delta since the archive file backup
seconds_difference() {
archive_date=$( date --date="$1" +%s )
if [ "$?" -ne 0 ]
then
second_delta=0
fi
current_date=$( date +%s )
second_delta=$(($current_date-$archive_date))
if [ "$second_delta" -lt 0 ]
then
second_delta=0
fi
echo $second_delta
}
source /tmp/common_backup_restore.sh
#Create backups directory if it does not exists.
mkdir -p $BACKUPS_DIR $ARCHIVE_DIR
# Create necessary directories if they do not exist.
mkdir -p $BACKUPS_DIR || log_backup_error_exit "Cannot create directory ${BACKUPS_DIR}!"
mkdir -p $ARCHIVE_DIR || log_backup_error_exit "Cannot create directory ${ARCHIVE_DIR}!"
mkdir -p $TMP_DIR || log_backup_error_exit "Cannot create directory ${TMP_DIR}!"
# Remove temporary directory contents.
rm -rf $BACKUPS_DIR/* || log_backup_error_exit "Cannot clear ${BACKUPS_DIR} directory contents!"
rm -rf $TMP_DIR/* || log_backup_error_exit "Cannot clear ${TMP_DIR} directory contents!"
NOW=$(date +"%Y-%m-%dT%H:%M:%SZ")
SQL_FILE=postgres.$POSTGRESQL_POD_NAMESPACE.all
TARBALL_FILE=${SQL_FILE}.${NOW}.tar.gz
cd $TMP_DIR || log_backup_error_exit "Cannot change to directory $TMP_DIR"
rm -f $LOG_FILE
#Dump all databases
DATE=$(date +"%Y-%m-%dT%H:%M:%SZ")
$PG_DUMPALL --file=$BACKUPS_DIR/postgres.all.sql 2>>$LOG_FILE
if [[ $? -eq 0 && -s "$BACKUPS_DIR/postgres.all.sql" ]]
$PG_DUMPALL --file=${TMP_DIR}/${SQL_FILE}.sql 2>>$LOG_FILE
if [[ $? -eq 0 && -s "${TMP_DIR}/${SQL_FILE}.sql" ]]
then
#Archive the current databases files
pushd $BACKUPS_DIR 1>/dev/null
tar zcvf $ARCHIVE_DIR/postgres.all.${DATE}.tar.gz *
ARCHIVE_RET=$?
popd 1>/dev/null
#Remove the current backup
if [ -d $BACKUPS_DIR ]
log INFO postgresql_backup "Databases dumped successfully. Creating tarball..."
#Archive the current database files
tar zcvf $ARCHIVE_DIR/$TARBALL_FILE *
if [[ $? -ne 0 ]]
then
rm -rf $BACKUPS_DIR/*.sql
log_backup_error_exit "Backup tarball could not be created."
fi
log INFO postgresql_backup "Tarball $TARBALL_FILE created successfully."
# Remove the sql files as they are no longer needed.
rm -rf $TMP_DIR/*
if {{ .Values.conf.backup.remote_backup.enabled }}
then
# Copy the tarball back to the BACKUPS_DIR so that the other container
# can access it for sending it to remote storage.
cp $ARCHIVE_DIR/$TARBALL_FILE $BACKUPS_DIR/$TARBALL_FILE
if [[ $? -ne 0 ]]
then
log_backup_error_exit "Backup tarball could not be copied to backup directory ${BACKUPS_DIR}."
fi
# Sleep for a few seconds to allow the file system to get caught up...also to
# help prevent race condition where the other container grabs the backup_completed
# token and the backup file hasn't completed writing to disk.
sleep 30
# Note: this next line is the trigger that tells the other container to
# start sending to remote storage. After this backup is sent to remote
# storage, the other container will delete the "current" backup.
touch $BACKUPS_DIR/backup_completed
else
# Remote backup is not enabled. This is ok; at least we have a local backup.
log INFO postgresql_backup "Skipping remote backup, as it is not enabled."
fi
else
#TODO: This can be convert into mail alert of alert send to a monitoring system
echo "Backup of postgresql failed and need attention."
cat $LOG_FILE
exit 1
rm $LOG_FILE
log_backup_error_exit "Backup of the postgresql database failed and needs attention."
fi
#Only delete the old archive after a successful archive
if [ $ARCHIVE_RET -eq 0 ]
then
if [ "$POSTGRESQL_BACKUP_DAYS_TO_KEEP" -gt 0 ]
then
echo "Deleting backups older than $POSTGRESQL_BACKUP_DAYS_TO_KEEP days"
log INFO postgresql_backup "Deleting backups older than ${POSTGRESQL_BACKUP_DAYS_TO_KEEP} days"
if [ -d $ARCHIVE_DIR ]
then
for archive_file in $(ls -1 $ARCHIVE_DIR/*.gz)
for ARCHIVE_FILE in $(ls -1 $ARCHIVE_DIR/*.gz)
do
archive_date=$( echo $archive_file | awk -F/ '{print $NF}' | cut -d'.' -f 3)
if [ "$(seconds_difference $archive_date)" -gt "$(($POSTGRESQL_BACKUP_DAYS_TO_KEEP*86400))" ]
ARCHIVE_DATE=$( echo $ARCHIVE_FILE | awk -F/ '{print $NF}' | cut -d'.' -f 4)
if [ "$(seconds_difference $ARCHIVE_DATE)" -gt "$(($POSTGRESQL_BACKUP_DAYS_TO_KEEP*86400))" ]
then
rm -rf $archive_file
log INFO postgresql_backup "Deleting file $ARCHIVE_FILE."
rm -rf $ARCHIVE_FILE
if [[ $? -ne 0 ]]
fhen
rm -rf $BACKUPS_DIR/*
log_backup_error_exit "Cannot remove ${ARCHIVE_FILE}"
fi
else
log INFO postgresql_backup "Keeping file ${ARCHIVE_FILE}."
fi
done
fi
fi
fi
# Turn off trace just for a clearer printout of backup status - for manual backups, mainly.
set +x
echo "=================================================================="
echo "Backup successful!"
echo "Backup archive name: $TARBALL_FILE"
echo "=================================================================="

View File

@ -0,0 +1,94 @@
#!/bin/bash
# Copyright 2018 The Openstack-Helm Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
# Do not use set -x here because the manual backup or restore pods may be using
# these functions, and it will distort the command output to have tracing on.
log_backup_error_exit() {
MSG=$1
ERRCODE=$2
log ERROR postgresql_backup "${MSG}"
exit $ERRCODE
}
log() {
#Log message to a file or stdout
#TODO: This can be convert into mail alert of alert send to a monitoring system
#Params: $1 log level
#Params: $2 service
#Params: $3 message
#Params: $4 Destination
LEVEL=$1
SERVICE=$2
MSG=$3
DEST=$4
DATE=$(date +"%m-%d-%y %H:%M:%S")
if [ -z "$DEST" ]
then
echo "${DATE} ${LEVEL}: $(hostname) ${SERVICE}: ${MSG}"
else
echo "${DATE} ${LEVEL}: $(hostname) ${SERVICE}: ${MSG}" >>$DEST
fi
}
#Get the day delta since the archive file backup
seconds_difference() {
archive_date=$( date --date="$1" +%s )
if [ "$?" -ne 0 ]
then
second_delta=0
fi
current_date=$( date +%s )
second_delta=$(($current_date-$archive_date))
if [ "$second_delta" -lt 0 ]
then
second_delta=0
fi
echo $second_delta
}
# Wait for a file to be available on the file system (written by the other
# container).
wait_for_file() {
WAIT_FILE=$1
NO_TIMEOUT=${2:-false}
TIMEOUT=300
if [[ $NO_TIMEOUT == "true" ]]
then
# Such a large value to virtually never timeout
TIMEOUT=999999999
fi
TIMEOUT_EXP=$(( $(date +%s) + $TIMEOUT ))
DONE=false
while [[ $DONE == "false" ]]
do
DELTA=$(( TIMEOUT_EXP - $(date +%s) ))
if [[ "$(ls -l ${WAIT_FILE} 2>/dev/null | wc -l)" -gt 0 ]];
then
DONE=true
elif [[ $DELTA -lt 0 ]]
then
DONE=true
echo "Timed out waiting for file ${WAIT_FILE}."
return 1
else
echo "Still waiting ...will time out in ${DELTA} seconds..."
sleep 5
fi
done
return 0
}

View File

@ -0,0 +1,81 @@
#!/bin/bash
# Copyright 2018 The Openstack-Helm Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
set -x
RESTORE_DIR=${POSTGRESQL_BACKUP_BASE_DIR}/db/${POSTGRESQL_POD_NAMESPACE}/postgres/restore
ARCHIVE_DIR=${POSTGRESQL_BACKUP_BASE_DIR}/db/${POSTGRESQL_POD_NAMESPACE}/postgres/archive
source /tmp/common_backup_restore.sh
# Keep processing requests for the life of the pod.
while true
do
# Wait until a restore request file is present on the disk
echo "Waiting for a restore request..."
NO_TIMEOUT=true
wait_for_file $RESTORE_DIR/*_request $NO_TIMEOUT
echo "Done waiting. Request received"
CONTAINER_NAME={{ .Values.conf.backup.remote_backup.container_name }}
if [[ -e $RESTORE_DIR/archive_listing_request ]]
then
# We've finished consuming the request, so delete the request file.
rm -rf $RESTORE_DIR/*_request
openstack container show $CONTAINER_NAME
if [[ $? -eq 0 ]]
then
# Get the list, ensureing that we only pick up postgres backups from the
# requested namespace
openstack object list $CONTAINER_NAME | grep postgres | grep $POSTGRESQL_POD_NAMESPACE | awk '{print $2}' > $RESTORE_DIR/archive_list_response
if [[ $? != 0 ]]
then
echo "Container object listing could not be obtained." >> $RESTORE_DIR/archive_list_error
else
echo "Archive listing successfully retrieved."
fi
else
echo "Container $CONTAINER_NAME does not exist." >> $RESTORE_DIR/archive_list_error
fi
elif [[ -e $RESTORE_DIR/get_archive_request ]]
then
ARCHIVE=`cat $RESTORE_DIR/get_archive_request`
echo "Request for archive $ARCHIVE received."
# We've finished consuming the request, so delete the request file.
rm -rf $RESTORE_DIR/*_request
openstack object save --file $RESTORE_DIR/$ARCHIVE $CONTAINER_NAME $ARCHIVE
if [[ $? != 0 ]]
then
echo "Archive $ARCHIVE could not be retrieved." >> $RESTORE_DIR/archive_error
else
echo "Archive $ARCHIVE successfully retrieved."
fi
# Signal to the other container that the archive is available.
touch $RESTORE_DIR/archive_response
else
rm -rf $RESTORE_DIR/*_request
echo "Invalid request received."
fi
sleep 5
done

View File

@ -0,0 +1,208 @@
#!/bin/bash
# Copyright 2018 The Openstack-Helm Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
# Note: not using set -e because more elaborate error handling is required.
set -x
BACKUPS_DIR=${POSTGRESQL_BACKUP_BASE_DIR}/db/${POSTGRESQL_POD_NAMESPACE}/postgres/current
# Create the working backups directory if the other container didn't already,
# and if this container creates it first, ensure that permissions are writable
# for the other container (running as "postgres" user) in the same "postgres"
# group.
mkdir -p $BACKUPS_DIR || log_backup_error_exit "Cannot create directory ${BACKUPS_DIR}!" 1
chmod 775 $BACKUPS_DIR
source /tmp/common_backup_restore.sh
#Send backup file to storage
send_to_storage() {
FILEPATH=$1
FILE=$2
CONTAINER_NAME={{ .Values.conf.backup.remote_backup.container_name }}
# Grab the list of containers on the remote site
RESULT=$(openstack container list 2>&1)
if [[ $? == 0 ]]
then
echo $RESULT | grep $CONTAINER_NAME
if [[ $? != 0 ]]
then
# Create the container
openstack container create $CONTAINER_NAME || log ERROR postgresql_backup "Cannot create container ${CONTAINER_NAME}!"
openstack container show $CONTAINER_NAME
if [[ $? != 0 ]]
then
log ERROR postgresql_backup "Error retrieving container $CONTAINER_NAME after creation."
return 1
fi
fi
else
echo $RESULT | grep "HTTP 401"
if [[ $? == 0 ]]
then
log ERROR postgresql_backup "Could not access keystone: HTTP 401"
return 1
else
echo $RESULT | grep "ConnectionError"
if [[ $? == 0 ]]
then
log ERROR postgresql_backup "Could not access keystone: ConnectionError"
# In this case, keystone or the site/node may be temporarily down.
# Return slightly different error code so the calling code can retry
return 2
else
log ERROR postgresql_backup "Could not get container list: ${RESULT}"
return 1
fi
fi
fi
# Create an object to store the file
openstack object create --name $FILE $CONTAINER_NAME $FILEPATH/$FILE || log ERROR postgresql_backup "Cannot create container object ${FILE}!"
openstack object show $CONTAINER_NAME $FILE
if [[ $? != 0 ]]
then
log ERROR postgresql_backup "Error retrieving container object $FILE after creation."
return 1
fi
log INFO postgresql_backup "Created file $FILE in container $CONTAINER_NAME successfully."
return 0
}
if {{ .Values.conf.backup.remote_backup.enabled }}
then
WAIT_FOR_BACKUP_TIMEOUT=1800
WAIT_FOR_RGW_AVAIL_TIMEOUT=1800
# Wait until a backup file is ready to ship to RGW, or until we time out.
DONE=false
TIMEOUT_EXP=$(( $(date +%s) + $WAIT_FOR_BACKUP_TIMEOUT ))
while [[ $DONE == "false" ]]
do
log INFO postgresql_backup "Waiting for a backup file to be written to the disk."
sleep 5
DELTA=$(( TIMEOUT_EXP - $(date +%s) ))
ls -l ${BACKUPS_DIR}/backup_completed
if [[ $? -eq 0 ]]
then
DONE=true
elif [[ $DELTA -lt 0 ]]
then
DONE=true
fi
done
log INFO postgresql_backup "Done waiting."
FILE_TO_SEND=$(ls $BACKUPS_DIR/*.tar.gz)
ERROR_SEEN=false
if [[ $FILE_TO_SEND != "" ]]
then
if [[ $(echo $FILE_TO_SEND | wc -w) -gt 1 ]]
then
# There should only be one backup file to send - this is an error
log_backup_error_exit "More than one backup file found (${FILE_TO_SEND}) - can only handle 1!" 1
fi
# Get just the filename from the file (strip the path)
FILE=$(basename $FILE_TO_SEND)
log INFO postgresql_backup "Backup file ${BACKUPS_DIR}/${FILE} found."
DONE=false
TIMEOUT_EXP=$(( $(date +%s) + $WAIT_FOR_RGW_AVAIL_TIMEOUT ))
while [[ $DONE == "false" ]]
do
# Store the new archive to the remote backup storage facility.
send_to_storage $BACKUPS_DIR $FILE
# Check if successful
if [[ $? -eq 0 ]]
then
log INFO postgresql_backup "Backup file ${BACKUPS_DIR}/${FILE} successfully sent to RGW. Deleting from current backup directory."
DONE=true
elif [[ $? -eq 2 ]]
then
# Temporary failure occurred. We need to retry if we haven't timed out
log WARN postgresql_backup "Backup file ${BACKUPS_DIR}/${FILE} could not be sent to RGW due to connection issue."
DELTA=$(( TIMEOUT_EXP - $(date +%s) ))
if [[ $DELTA -lt 0 ]]
then
DONE=true
log ERROR postgresql_backup "Timed out waiting for RGW to become available."
ERROR_SEEN=true
else
log INFO postgresql_backup "Sleeping 30 seconds waiting for RGW to become available..."
sleep 30
log INFO postgresql_backup "Retrying..."
fi
else
log ERROR postgresql_backup "Backup file ${BACKUPS_DIR}/${FILE} could not be sent to the RGW."
ERROR_SEEN=true
DONE=true
fi
done
else
log ERROR postgresql_backup "No backup file found in $BACKUPS_DIR."
ERROR_SEEN=true
fi
if [[ $ERROR_SEEN == "true" ]]
then
log ERROR postgresql_backup "Errors encountered. Exiting."
exit 1
fi
# At this point, we should remove the files in current dir.
# If an error occurred, then we need the file to remain there for future
# container restarts, and maybe it will eventually succeed.
rm -rf $BACKUPS_DIR/*
#Only delete an old archive after a successful archive
if [ "${POSTGRESQL_BACKUP_DAYS_TO_KEEP}" -gt 0 ]
then
log INFO postgresql_backup "Deleting backups older than ${POSTGRESQL_BACKUP_DAYS_TO_KEEP} days"
BACKUP_FILES=/tmp/backup_files
PG_BACKUP_FILES=/tmp/pg_backup_files
openstack object list $CONTAINER_NAME > $BACKUP_FILES
if [[ $? != 0 ]]
then
log_backup_error_exit "Could not obtain a list of current backup files in the RGW" 1
fi
# Filter out other types of files like mariadb, etcd backupes etc..
cat $BACKUP_FILES | grep postgres | grep $POSTGRESQL_POD_NAMESPACE | awk '{print $2}' > $PG_BACKUP_FILES
for ARCHIVE_FILE in $(cat $PG_BACKUP_FILES)
do
ARCHIVE_DATE=$( echo $ARCHIVE_FILE | awk -F/ '{print $NF}' | cut -d'.' -f 4)
if [ "$(seconds_difference ${ARCHIVE_DATE})" -gt "$((${POSTGRESQL_BACKUP_DAYS_TO_KEEP}*86400))" ]
then
log INFO postgresql_backup "Deleting file ${ARCHIVE_FILE} from the RGW"
openstack object delete $CONTAINER_NAME $ARCHIVE_FILE || log_backup_error_exit "Cannot delete container object ${ARCHIVE_FILE}!" 1
fi
done
fi
else
log INFO postgresql_backup "Remote backup is not enabled"
exit 0
fi

View File

@ -17,11 +17,6 @@
export PGPASSWORD=$(cat /etc/postgresql/admin_user.conf \
| grep postgres | awk -F: '{print $5}')
log_error() {
echo $1
exit 1
}
ARCHIVE_DIR=${POSTGRESQL_BACKUP_BASE_DIR}/db/${POSTGRESQL_POD_NAMESPACE}/postgres/archive
RESTORE_DIR=${POSTGRESQL_BACKUP_BASE_DIR}/db/${POSTGRESQL_POD_NAMESPACE}/postgres/restore
POSTGRESQL_HOST=$(cat /etc/postgresql/admin_user.conf | cut -d: -f 1)
@ -29,29 +24,19 @@ LOG_FILE=/tmp/dbrestore.log
ARGS=("$@")
PSQL="psql -U $POSTGRESQL_BACKUP_USER -h $POSTGRESQL_HOST"
source /tmp/common_backup_restore.sh
usage() {
ret_val=$1
echo "Usage:"
echo "Restore command options"
echo "============================="
echo "help"
echo "list_archives"
echo "list_databases <archive_filename>"
echo "restore <archive_filename> [<db_name> | ALL]"
exit $ret_val
}
#Delete file
delete_files() {
files_to_delete=("$@")
for f in "${files_to_delete[@]}"
do
if [ -f $f ]
then
echo "Deleting file $f."
rm -rf $f
fi
done
echo "list_archives [remote]"
echo "list_databases <archive_filename> [remote]"
echo "restore <archive_filename> <db_specifier> [remote]"
echo " where <db_specifier> = <dbname> | ALL"
clean_and_exit $ret_val ""
}
#Extract Single Database SQL Dump from pg_dumpall dump file
@ -60,36 +45,153 @@ extract_single_db_dump() {
${RESTORE_DIR}/$2.sql
}
#Exit cleanly with some message and return code
clean_and_exit() {
RETCODE=$1
MSG=$2
#Cleanup Restore Directory
rm -rf $RESTORE_DIR/*
if [[ "x${MSG}" != "x" ]];
then
echo $MSG
fi
exit $RETCODE
}
# Signal the other container that it should retrieve a list of archives
# from the RGW.
retrieve_remote_listing() {
# Remove the last response, if there was any
rm -rf $RESTORE_DIR/archive_list_*
# Signal by creating a file in the restore directory
touch $RESTORE_DIR/archive_listing_request
# Wait until the archive listing has been retrieved from the other container.
echo "Waiting for archive listing..."
wait_for_file $RESTORE_DIR/archive_list_*
if [[ $? -eq 1 ]]
then
clean_and_exit 1 "Request failed - container did not respond. Archive listing is NOT available."
fi
ERR=$(cat $RESTORE_DIR/archive_list_error 2>/dev/null)
if [[ $? -eq 0 ]]
then
clean_and_exit 1 "Request failed - ${ERR}"
fi
echo "Done waiting. Archive list is available."
}
# Signal the other container that it should retrieve a single archive
# from the RGW.
retrieve_remote_archive() {
ARCHIVE=$1
# Remove the last response, if there was any
rm -rf $RESTORE_DIR/archive_*
# Signal by creating a file in the restore directory containing the archive
# name.
echo "$ARCHIVE" > $RESTORE_DIR/get_archive_request
# Wait until the archive has been retrieved from the other container.
echo "Waiting for requested archive ${ARCHIVE}..."
wait_for_file $RESTORE_DIR/archive_*
if [[ $? -eq 1 ]]
then
clean_and_exit 1 "Request failed - container did not respond. Archive ${ARCHIVE} is NOT available."
fi
ERR=$(cat $RESTORE_DIR/archive_error 2>/dev/null)
if [[ $? -eq 0 ]]
then
clean_and_exit 1 "Request failed - ${ERR}"
fi
rm -rf $RESTORE_DIR/archive_response
if [[ -e $RESTORE_DIR/$ARCHIVE ]]
then
echo "Done waiting. Archive $ARCHIVE is available."
else
clean_and_exit 1 "Request failed - Archive $ARCHIVE is NOT available."
fi
}
#Display all archives
list_archives() {
if [ -d ${ARCHIVE_DIR} ]
REMOTE=$1
if [[ "x${REMOTE^^}" == "xREMOTE" ]]
then
archives=$(find ${ARCHIVE_DIR}/ -iname "*.gz" -print)
echo "All Archives"
echo "=================================="
retrieve_remote_listing
if [[ -e $RESTORE_DIR/archive_list_response ]]
then
echo
echo "All Archives from RGW Data Store"
echo "=============================================="
cat $RESTORE_DIR/archive_list_response
clean_and_exit 0 ""
else
clean_and_exit 1 "Archives could not be retrieved from the RGW."
fi
elif [[ "x${REMOTE}" == "x" ]]
then
if [ -d $ARCHIVE_DIR ]
then
archives=$(find $ARCHIVE_DIR/ -iname "*.gz" -print)
echo
echo "All Local Archives"
echo "=============================================="
for archive in $archives
do
echo $archive | cut -d '/' -f 8
done
clean_and_exit 0 ""
else
log_error "Archive directory is not available."
clean_and_exit 1 "Local archive directory is not available."
fi
else
usage 1
fi
}
#Return all databases from an archive
get_databases() {
archive_file=$1
if [ -e ${ARCHIVE_DIR}/${archive_file} ]
ARCHIVE_FILE=$1
REMOTE=$2
if [[ "x$REMOTE" == "xremote" ]]
then
files_to_purge=$(find $RESTORE_DIR/ -iname "*.sql" -print)
delete_files $files_to_purge
tar zxvf ${ARCHIVE_DIR}/${archive_file} -C ${RESTORE_DIR} 1>/dev/null
if [ -e ${RESTORE_DIR}/postgres.all.sql ]
retrieve_remote_archive $ARCHIVE_FILE
elif [[ "x$REMOTE" == "x" ]]
then
DBS=$( grep 'CREATE DATABASE' ${RESTORE_DIR}/postgres.all.sql | awk '{ print $3 }' )
else
DBS=" "
if [ -e $ARCHIVE_DIR/$ARCHIVE_FILE ]
then
cp $ARCHIVE_DIR/$ARCHIVE_FILE $RESTORE_DIR/$ARCHIVE_FILE
if [[ $? != 0 ]]
then
clean_and_exit 1 "Could not copy local archive to restore directory."
fi
else
clean_and_exit 1 "Local archive file could not be found."
fi
else
usage 1
fi
echo "Decompressing archive $ARCHIVE_FILE..."
cd $RESTORE_DIR
tar zxvf - < $RESTORE_DIR/$ARCHIVE_FILE 1>/dev/null
SQL_FILE=postgres.$POSTGRESQL_POD_NAMESPACE.all.sql
if [ -e $RESTORE_DIR/$SQL_FILE ]
then
DBS=$( grep 'CREATE DATABASE' $RESTORE_DIR/$SQL_FILE | awk '{ print $3 }' )
else
DBS=" "
fi
@ -97,14 +199,21 @@ get_databases() {
#Display all databases from an archive
list_databases() {
archive_file=$1
get_databases $archive_file
#echo $DBS
ARCHIVE_FILE=$1
REMOTE=$2
WHERE="local"
if [[ "x${REMOTE}" != "x" ]]
then
WHERE="remote"
fi
get_databases $ARCHIVE_FILE $REMOTE
if [ -n "$DBS" ]
then
echo " "
echo "Databases in the archive $archive_file"
echo "================================================================="
echo "Databases in the $WHERE archive $ARCHIVE_FILE"
echo "================================================================================"
for db in $DBS
do
echo $db
@ -112,7 +221,6 @@ list_databases() {
else
echo "There is no database in the archive."
fi
}
create_db_if_not_exist() {
@ -125,61 +233,48 @@ create_db_if_not_exist() {
#Restore a single database dump from pg_dumpall dump.
restore_single_db() {
single_db_name=$1
if [ -z "$single_db_name" ]
SINGLE_DB_NAME=$1
if [ -z "$SINGLE_DB_NAME" ]
then
usage 1
fi
if [ -f ${ARCHIVE_DIR}/${archive_file} ]
SQL_FILE=postgres.$POSTGRESQL_POD_NAMESPACE.all.sql
if [ -f $RESTORE_DIR/$SQL_FILE ]
then
files_to_purge=$(find $RESTORE_DIR/ -iname "*.sql" -print)
delete_files $files_to_purge
tar zxvf ${ARCHIVE_DIR}/${archive_file} -C ${RESTORE_DIR} 1>/dev/null
if [ -f ${RESTORE_DIR}/postgres.all.sql ]
then
extract_single_db_dump ${RESTORE_DIR}/postgres.all.sql $single_db_name
if [[ -f ${RESTORE_DIR}/${single_db_name}.sql && -s ${RESTORE_DIR}/${single_db_name}.sql ]]
extract_single_db_dump $RESTORE_DIR/$SQL_FILE $SINGLE_DB_NAME
if [[ -f $RESTORE_DIR/$SINGLE_DB_NAME.sql && -s $RESTORE_DIR/$SINGLE_DB_NAME.sql ]]
then
create_db_if_not_exist $single_db_name
$PSQL -d $single_db_name -f ${RESTORE_DIR}/${single_db_name}.sql 2>>$LOG_FILE
$PSQL -d $SINGLE_DB_NAME -f ${RESTORE_DIR}/${SINGLE_DB_NAME}.sql 2>>$LOG_FILE >> $LOG_FILE
if [ "$?" -eq 0 ]
then
echo "Database Restore Successful."
else
log_error "Database Restore Failed."
clean_and_exit 1 "Database Restore Failed."
fi
else
log_error "Database Dump For $single_db_name is empty or not available."
clean_and_exit 1 "Database Dump For $SINGLE_DB_NAME is empty or not available."
fi
else
log_error "Database file for dump_all not available to restore from"
fi
else
log_error "Archive does not exist"
clean_and_exit 1 "Database file for dump_all not available to restore from"
fi
}
#Restore all the databases
restore_all_dbs() {
if [ -f ${ARCHIVE_DIR}/${archive_file} ]
SQL_FILE=postgres.$POSTGRESQL_POD_NAMESPACE.all.sql
if [ -f $RESTORE_DIR/$SQL_FILE ]
then
files_to_purge=$(find $RESTORE_DIR/ -iname "*.sql" -print)
delete_files $files_to_purge
tar zxvf ${ARCHIVE_DIR}/${archive_file} -C ${RESTORE_DIR} 1>/dev/null
if [ -f ${RESTORE_DIR}/postgres.all.sql ]
then
$PSQL postgres -f ${RESTORE_DIR}/postgres.all.sql 2>>$LOG_FILE
$PSQL postgres -f $RESTORE_DIR/$SQL_FILE 2>>$LOG_FILE >> $LOG_FILE
if [ "$?" -eq 0 ]
then
echo "Database Restore successful."
else
log_error "Database Restore failed."
clean_and_exit 1 "Database Restore failed."
fi
else
log_error "There is no database file available to restore from"
fi
else
log_error "Archive does not exist"
clean_and_exit 1 "There is no database file available to restore from"
fi
}
@ -199,16 +294,21 @@ is_Option() {
}
#Main
#Create Restore Directory
#Create Restore Directory if it's not created already
mkdir -p $RESTORE_DIR
if [ ${#ARGS[@]} -gt 3 ]
#Cleanup Restore Directory
rm -rf $RESTORE_DIR/*
if [ ${#ARGS[@]} -gt 4 ]
then
usage 0
usage 1
elif [ ${#ARGS[@]} -eq 1 ]
then
if [ "${ARGS[0]}" == "list_archives" ]
then
list_archives
clean_and_exit 0 ""
elif [ "${ARGS[0]}" == "help" ]
then
usage 0
@ -220,40 +320,53 @@ then
if [ "${ARGS[0]}" == "list_databases" ]
then
list_databases ${ARGS[1]}
clean_and_exit 0 ""
elif [ "${ARGS[0]}" == "list_archives" ]
then
list_archives ${ARGS[1]}
clean_and_exit 0 ""
else
usage 1
fi
elif [ ${#ARGS[@]} -eq 3 ]
elif [[ ${#ARGS[@]} -eq 3 ]] || [[ ${#ARGS[@]} -eq 4 ]]
then
if [ "${ARGS[0]}" != "restore" ]
if [ "${ARGS[0]}" == "list_databases" ]
then
list_databases ${ARGS[1]} ${ARGS[2]}
clean_and_exit 0 ""
elif [ "${ARGS[0]}" != "restore" ]
then
usage 1
else
if [ -f ${ARCHIVE_DIR}/${ARGS[1]} ]
ARCHIVE=${ARGS[1]}
DB_SPEC=${ARGS[2]}
REMOTE=""
if [ ${#ARGS[@]} -eq 4 ]
then
REMOTE=${ARGS[3]}
fi
#Get all the databases in that archive
get_databases ${ARGS[1]}
get_databases $ARCHIVE $REMOTE
#check if the requested database is available in the archive
if [ $(is_Option "$DBS" ${ARGS[2]}) -eq 1 ]
if [ $(is_Option "$DBS" $DB_SPEC) -eq 1 ]
then
echo "Restoring Database ${ARGS[2]} And Grants"
restore_single_db ${ARGS[2]}
echo "Restoring Database $DB_SPEC And Grants"
restore_single_db $DB_SPEC
echo "Tail ${LOG_FILE} for restore log."
elif [ "$( echo ${ARGS[2]} | tr '[a-z]' '[A-Z]')" == "ALL" ]
clean_and_exit 0 ""
elif [ "$( echo $DB_SPEC | tr '[a-z]' '[A-Z]')" == "ALL" ]
then
echo "Restoring All The Database."
echo "Restoring All The Databases. This could take a few minutes..."
restore_all_dbs
echo "Tail ${LOG_FILE} for restore log."
clean_and_exit 0 "Tail ${LOG_FILE} for restore log."
else
echo "There is no database with that name"
fi
else
echo "Archive file not found"
clean_and_exit 1 "There is no database with that name"
fi
fi
else
usage 1
fi
exit 0
clean_and_exit 0 "Done"

View File

@ -34,6 +34,12 @@ data:
{{- if .Values.conf.backup.enabled }}
backup_postgresql.sh: {{ tuple "bin/_backup_postgresql.sh.tpl" . | include "helm-toolkit.utils.template" | b64enc }}
restore_postgresql.sh: {{ tuple "bin/_restore_postgresql.sh.tpl" . | include "helm-toolkit.utils.template" | b64enc }}
remote_store_postgresql.sh: {{ tuple "bin/_remote_store_postgresql.sh.tpl" . | include "helm-toolkit.utils.template" | b64enc }}
remote_retrieve_postgresql.sh: {{ tuple "bin/_remote_retrieve_postgresql.sh.tpl" . | include "helm-toolkit.utils.template" | b64enc }}
common_backup_restore.sh: {{ tuple "bin/_common_backup_restore.sh.tpl" . | include "helm-toolkit.utils.template" | b64enc }}
{{- end }}
{{- if .Values.manifests.job_ks_user }}
ks-user.sh: {{ include "helm-toolkit.scripts.keystone_user" . | b64enc }}
{{- end }}
set_password.sh: {{ tuple "bin/_set_password.sh.tpl" . | include "helm-toolkit.utils.template" | b64enc }}
patroni_conversion.sh: {{ tuple "bin/_patroni_conversion.sh.tpl" . | include "helm-toolkit.utils.template" | b64enc }}

View File

@ -29,6 +29,12 @@ metadata:
labels:
{{ tuple $envAll "postgresql-backup" "backup" | include "helm-toolkit.snippets.kubernetes_metadata_labels" | indent 4 }}
spec:
{{- if .Values.jobs.backup_postgresql.backoffLimit }}
backoffLimit: {{ .Values.jobs.backup_postgresql.backoffLimit }}
{{- end }}
{{- if .Values.jobs.backup_postgresql.activeDeadlineSeconds }}
activeDeadlineSeconds: {{ .Values.jobs.backup_postgresql.activeDeadlineSeconds }}
{{- end }}
schedule: {{ .Values.jobs.backup_postgresql.cron | quote }}
successfulJobsHistoryLimit: {{ .Values.jobs.backup_postgresql.history.success }}
failedJobsHistoryLimit: {{ .Values.jobs.backup_postgresql.history.failed }}
@ -79,6 +85,10 @@ spec:
volumeMounts:
- name: pod-tmp
mountPath: /tmp
- mountPath: /tmp/common_backup_restore.sh
name: postgresql-bin
readOnly: true
subPath: common_backup_restore.sh
- mountPath: /tmp/backup_postgresql.sh
name: postgresql-bin
readOnly: true
@ -89,6 +99,33 @@ spec:
mountPath: /etc/postgresql/admin_user.conf
subPath: admin_user.conf
readOnly: true
- name: postgresql-remote-store
{{ tuple $envAll "postgresql_remote_store" | include "helm-toolkit.snippets.image" | indent 14 }}
command:
- /tmp/remote_store_postgresql.sh
env:
{{- with $env := dict "ksUserSecret" $envAll.Values.secrets.identity.postgresql }}
{{- include "helm-toolkit.snippets.keystone_openrc_env_vars" $env | indent 16 }}
{{- end }}
- name: POSTGRESQL_BACKUP_BASE_DIR
value: {{ .Values.conf.backup.base_path }}
- name: POSTGRESQL_BACKUP_DAYS_TO_KEEP
value: "{{ .Values.conf.backup.days_of_backup_to_keep }}"
- name: POSTGRESQL_POD_NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
volumeMounts:
- mountPath: /tmp/common_backup_restore.sh
name: postgresql-bin
readOnly: true
subPath: common_backup_restore.sh
- mountPath: /tmp/remote_store_postgresql.sh
name: postgresql-bin
readOnly: true
subPath: remote_store_postgresql.sh
- mountPath: {{ .Values.conf.backup.base_path }}
name: postgresql-backup-dir
restartPolicy: OnFailure
serviceAccount: {{ $serviceAccountName }}
serviceAccountName: {{ $serviceAccountName }}

View File

@ -0,0 +1,22 @@
{{/*
Copyright 2019 The Openstack-Helm Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/}}
{{- if .Values.manifests.job_ks_user }}
{{- $backoffLimit := .Values.jobs.ks_user.backoffLimit }}
{{- $activeDeadlineSeconds := .Values.jobs.ks_user.activeDeadlineSeconds }}
{{- $ksUserJob := dict "envAll" . "serviceName" "postgresql" "secretBin" "postgresql-bin" "backoffLimit" $backoffLimit "activeDeadlineSeconds" $activeDeadlineSeconds -}}
{{ $ksUserJob | include "helm-toolkit.manifests.job_ks_user" }}
{{- end }}

View File

@ -0,0 +1,64 @@
{{/*
This manifest results in two secrets being created:
1) Keystone "postgresql" secret, which is needed to access the cluster
(remote or same cluster) for storing postgresql backups. If the
cluster is remote, the auth_url would be non-null.
2) Keystone "admin" secret, which is needed to create the "postgresql"
keystone account mentioned above. This may not be needed if the
account is in a remote cluster (auth_url is non-null in that case).
*/}}
{{- if .Values.conf.backup.remote_backup.enabled }}
{{- $envAll := . }}
{{- $userClass := "postgresql" }}
{{- $secretName := index $envAll.Values.secrets.identity $userClass }}
---
apiVersion: v1
kind: Secret
metadata:
name: {{ $secretName }}
type: Opaque
data:
{{- $identityClass := .Values.endpoints.identity.auth.postgresql }}
{{- if $identityClass.auth_url }}
OS_AUTH_URL: {{ $identityClass.auth_url | b64enc }}
{{- else }}
OS_AUTH_URL: {{ tuple "identity" "internal" "api" $envAll | include "helm-toolkit.endpoints.keystone_endpoint_uri_lookup" | b64enc }}
{{- end }}
OS_REGION_NAME: {{ $identityClass.region_name | b64enc }}
OS_INTERFACE: {{ $identityClass.interface | default "internal" | b64enc }}
OS_PROJECT_DOMAIN_NAME: {{ $identityClass.project_domain_name | b64enc }}
OS_PROJECT_NAME: {{ $identityClass.project_name | b64enc }}
OS_USER_DOMAIN_NAME: {{ $identityClass.user_domain_name | b64enc }}
OS_USERNAME: {{ $identityClass.username | b64enc }}
OS_PASSWORD: {{ $identityClass.password | b64enc }}
OS_DEFAULT_DOMAIN: {{ $identityClass.default_domain_id | default "default" | b64enc }}
...
{{- if .Values.manifests.job_ks_user }}
{{- $userClass := "admin" }}
{{- $secretName := index $envAll.Values.secrets.identity $userClass }}
---
apiVersion: v1
kind: Secret
metadata:
name: {{ $secretName }}
type: Opaque
data:
{{- $identityClass := index .Values.endpoints.identity.auth $userClass }}
{{- if $identityClass.auth_url }}
OS_AUTH_URL: {{ $identityClass.auth_url }}
{{- else }}
OS_AUTH_URL: {{ tuple "identity" "internal" "api" $envAll | include "helm-toolkit.endpoints.keystone_endpoint_uri_lookup" | b64enc }}
{{- end }}
OS_REGION_NAME: {{ $identityClass.region_name | b64enc }}
OS_INTERFACE: {{ $identityClass.interface | default "internal" | b64enc }}
OS_PROJECT_DOMAIN_NAME: {{ $identityClass.project_domain_name | b64enc }}
OS_PROJECT_NAME: {{ $identityClass.project_name | b64enc }}
OS_USER_DOMAIN_NAME: {{ $identityClass.user_domain_name | b64enc }}
OS_USERNAME: {{ $identityClass.username | b64enc }}
OS_PASSWORD: {{ $identityClass.password | b64enc }}
OS_DEFAULT_DOMAIN: {{ $identityClass.default_domain_id | default "default" | b64enc }}
...
{{- end }}
{{- end }}

View File

@ -117,6 +117,13 @@ pod:
limits:
memory: "1024Mi"
cpu: "2000m"
ks_user:
requests:
memory: "128Mi"
cpu: "100m"
limits:
memory: "1024Mi"
cpu: "2000m"
# using dockerhub patroni: https://hub.docker.com/r/openstackhelm/patroni/tags/
images:
@ -124,8 +131,10 @@ images:
postgresql: "docker.io/openstackhelm/patroni:latest-ubuntu_xenial"
dep_check: quay.io/airshipit/kubernetes-entrypoint:v1.0.0
image_repo_sync: docker.io/docker:17.07.0
ks_user: docker.io/openstackhelm/heat:stein-ubuntu_bionic
prometheus_postgresql_exporter: docker.io/wrouesnel/postgres_exporter:v0.4.6
prometheus_postgresql_exporter_create_user: "docker.io/postgres:9.5"
postgresql_remote_store: docker.io/openstackhelm/heat:stein-ubuntu_bionic
pull_policy: "IfNotPresent"
local_registry:
active: false
@ -169,8 +178,9 @@ dependencies:
- endpoint: node
service: local_image_registry
static:
postgresql:
jobs: null
backup_postgresql:
jobs:
- postgresql-ks-user
tests:
services:
- endpoint: internal
@ -208,10 +218,17 @@ volume:
jobs:
backup_postgresql:
# activeDeadlineSeconds == 0 means no deadline
activeDeadlineSeconds: 0
backoffLimit: 6
cron: "0 0 * * *"
history:
success: 3
failed: 1
ks_user:
# activeDeadlineSeconds == 0 means no deadline
activeDeadlineSeconds: 0
backoffLimit: 6
network_policy:
postgresql:
@ -350,10 +367,13 @@ conf:
watchdog:
mode: off # Allowed values: off, automatic, required
backup:
enabled: true
enabled: false
base_path: /var/backup
days_of_backup_to_keep: 3
pg_dumpall_options: null
remote_backup:
enabled: false
container_name: postgresql
exporter:
queries:
pg_replication:
@ -397,6 +417,9 @@ secrets:
server: postgresql-server-pki
exporter: postgresql-exporter
audit: postgresql-audit
identity:
admin: keystone-admin-user
postgresql: postgresql-backup-user
endpoints:
cluster_domain_suffix: cluster.local
@ -457,12 +480,51 @@ endpoints:
port:
metrics:
default: 9187
identity:
name: backup-storage-auth
namespace: openstack
auth:
admin:
# Auth URL of null indicates local authentication
# HTK will form the URL unless specified here
auth_url: null
region_name: RegionOne
username: admin
password: password
project_name: admin
user_domain_name: default
project_domain_name: default
postgresql:
# Auth URL of null indicates local authentication
# HTK will form the URL unless specified here
auth_url: null
role: admin
region_name: RegionOne
username: postgresql-backup-user
password: password
project_name: service
user_domain_name: service
project_domain_name: service
hosts:
default: keystone
internal: keystone-api
host_fqdn_override:
default: null
path:
default: /v3
scheme:
default: 'http'
port:
api:
default: 80
internal: 5000
manifests:
configmap_bin: true
configmap_etc: true
job_image_repo_sync: true
network_policy: false
job_ks_user: false
secret_admin: true
secret_replica: true
secret_server: true