From 37a8b1e9d01cd1d8a128f3c23a2900e607cbd791 Mon Sep 17 00:00:00 2001 From: Clark Boylan Date: Mon, 20 Jan 2025 12:08:06 -0800 Subject: [PATCH] Handle borg 1.2 rc 1 for warnings behavior Borg 1.2 exits 0 for success, exists 1 for backup completion with warnings and exits >1 for proper errors. The most common cause of a warning appear to be a file changing while it is backed up. This causes paste02 backups to consistently email us because our borg backup script exits 1 which we treat as a failure. Update the script so that when borg 1.2.8 is in use we treat rc 1 as a success with warnings rather than failure with warnings. This should make our cron job quieter and match the old 1.1.18 behavior. To better test this we also drop the backup exclusion for the borg backup log file (which we write to as we backup so it is very likely to change during backups). Change-Id: Iab69f0d5951247897d204dcb0a2face424472db0 --- .../borg-backup/templates/borg-backup.j2 | 30 ++++++++++++++----- .../zuul/templates/group_vars/all.yaml.j2 | 5 ---- 2 files changed, 23 insertions(+), 12 deletions(-) diff --git a/playbooks/roles/borg-backup/templates/borg-backup.j2 b/playbooks/roles/borg-backup/templates/borg-backup.j2 index 9bfe8816e9..ee754110cc 100644 --- a/playbooks/roles/borg-backup/templates/borg-backup.j2 +++ b/playbooks/roles/borg-backup/templates/borg-backup.j2 @@ -39,6 +39,8 @@ ${BORG_CREATE} \ backup_exit=$? +# Default stream_exit to success as we may not backup any streams +stream_exit=0 for f in $(shopt -s nullglob; echo /etc/borg-streams/*) do stream_name=$(basename $f) @@ -52,24 +54,38 @@ do info "Note that problems in the ssh connectivity might cause the streaming script to fail. You may need to check both halves of the streaming backup." stream_exit=${_status[0]} elif [[ ${_status[1]} -ne 0 ]]; then + # We don't check BORG_VERSION here because streaming backups should + # never have the file change under them while backing up. info "Borg failed (rc: ${_status[1]})!" stream_exit=${_status[1]} - else - stream_exit=0 fi - (( backup_exit = backup_exit || stream_exit )) done -if [ ${backup_exit} -eq 0 ]; then +BORG_VERSION=$(${BORG} --version) +# Default to failure +final_exit=1 + +if [ ${backup_exit} -eq 0 ] && [ ${stream_exit} -eq 0 ] ; then info "Backup finished successfully" + final_exit=0 +elif [ ${backup_exit} -eq 1 ] && [ ${stream_exit} -eq 0 ] && \ + [ ! "${BORG_VERSION}" \< "borg 1.2" ] ; then + # Borg 1.2 and newer exit with rc 1 if warnings occur. The most common + # warning is for files that change while being backed up. We treat that + # as a successful backup if it occurs. + # Note: Use not less than so that all variants of 1.1.xy don't match. + # Using greater than we'd get comparisons like 1.1.8 is greater + # than 1.1.18. + info "Backup finished with warnings." + final_exit=0 else info "Backup finished with errors" if [ ${BORG_UNDER_CRON:-0} -eq 1 ]; then echo "Backups failed on host $(hostname) at $(date)." | \ mail -s "ACTION REQUIRED: Backup failed on $(hostname)" infra-root@openstack.org fi + # Attempt to preserve as much information about the error code as possible. + (( final_exit = backup_exit || stream_exit )) fi - -exit ${backup_exit} - +exit ${final_exit} diff --git a/playbooks/zuul/templates/group_vars/all.yaml.j2 b/playbooks/zuul/templates/group_vars/all.yaml.j2 index 5cc64bea06..cb80bcbd6a 100644 --- a/playbooks/zuul/templates/group_vars/all.yaml.j2 +++ b/playbooks/zuul/templates/group_vars/all.yaml.j2 @@ -12,8 +12,3 @@ iptables_test_public_tcp_ports: {{ iptables_test_public_tcp_ports }} iptables_egress_rules: - -o lo -j ACCEPT - -p tcp -m tcp --dport 25 --tcp-flags FIN,SYN,RST,ACK SYN -j REJECT --reject-with tcp-reset -# This is the file we log our backups to. This means the file is being -# changed while backups run which can lead to a warning and non zero exit -# code from borg. Just ignore it as we don't need to backup the file. -borg_backup_excludes_extra: - - /var/log/borg-backup-borg-backup01.region.provider.opendev.org.log