From 3f23ded7d063122d0a7931e26b9203b49dd7d6ef Mon Sep 17 00:00:00 2001 From: Eric MacDonald Date: Fri, 4 May 2018 10:30:49 -0500 Subject: [PATCH] Improve collectd tool to handle additional unrecoverable tar error There are cases when tar reports a different error string when it runs out of space while tar'ing up a tarball ; "tar: error is not recoverable". This update adds that string to the current list of strings that suggest collect errored out due to space limitation. Now that there are 3 error strings to look for, the block of script code that checked for these error strings was re-factored to search a list of error strings which makes the implementation cleaner and more maintainable. If new error message strings are found in the future then such strings need only be added to this new list. Change-Id: Ifc677740c33372b48a4856ce00caea052580788d Signed-off-by: Eric MacDonald Signed-off-by: Scott Little --- tools/collector/scripts/collect_utils | 39 ++++++++++++++++++--------- 1 file changed, 26 insertions(+), 13 deletions(-) diff --git a/tools/collector/scripts/collect_utils b/tools/collector/scripts/collect_utils index 0e91013ff..141f83342 100755 --- a/tools/collector/scripts/collect_utils +++ b/tools/collector/scripts/collect_utils @@ -45,6 +45,7 @@ WARN_HOSTNAME=201 # Failure Strings FAIL_OUT_OF_SPACE_STR="No space left on device" +FAIL_TAR_OUT_OF_SPACE_STR="tar: Error is not recoverable" FAIL_INSUFFICIENT_SPACE_STR="Not enough space on device" # The minimum amount of % free space on /scratch to allow collect to proceed @@ -195,29 +196,41 @@ function log_slabinfo() # ########################################################################### +listOfOutOfSpaceErrors=( +"${FAIL_OUT_OF_SPACE_STR}" +"${FAIL_TAR_OUT_OF_SPACE_STR}" +"${FAIL_INSUFFICIENT_SPACE_STR}" +) + function collect_errors() { local host=${1} local RC=0 - # Look for "No space left on device" error - grep -q "${FAIL_OUT_OF_SPACE_STR}" ${COLLECT_ERROR_LOG} + if [ -e "${COLLECT_ERROR_LOG}" ] ; then - if [ "$?" == "0" ] ; then + ## now loop through known space related error strings + index=0 + while [ "x${listOfOutOfSpaceErrors[index]}" != "x" ] + do + grep -q "${listOfOutOfSpaceErrors[index]}" ${COLLECT_ERROR_LOG} + if [ "$?" == "0" ] ; then - string="failed to collect from ${host} (reason:${FAIL_OUT_OF_SPACE}:${FAIL_OUT_OF_SPACE_STR})" + string="failed to collect from ${host} (reason:${FAIL_OUT_OF_SPACE}:${FAIL_OUT_OF_SPACE_STR})" - # /var/log/user.log it - logger -t ${COLLECT_TAG} "${string}" + # /var/log/user.log it + logger -t ${COLLECT_TAG} "${string}" - # logs that show up in the foreground - echo "${string}" - echo "Increase available space in ${host}:${COLLECT_BASE_DIR} and retry operation." - - # return error code - RC=1 + # logs that show up in the foreground + echo "${string}" + echo "Increase available space in ${host}:${COLLECT_BASE_DIR} and retry operation." + # return error code + RC=1 + break + fi + index=$(($index+1)) + done fi - return ${RC} }