3f23ded7d0
There are cases when tar reports a different error string when it runs out of space while tar'ing up a tarball ; "tar: error is not recoverable". This update adds that string to the current list of strings that suggest collect errored out due to space limitation. Now that there are 3 error strings to look for, the block of script code that checked for these error strings was re-factored to search a list of error strings which makes the implementation cleaner and more maintainable. If new error message strings are found in the future then such strings need only be added to this new list. Change-Id: Ifc677740c33372b48a4856ce00caea052580788d Signed-off-by: Eric MacDonald <eric.macdonald@windriver.com> Signed-off-by: Scott Little <scott.little@windriver.com>
237 lines
6.1 KiB
Bash
Executable File
237 lines
6.1 KiB
Bash
Executable File
#! /bin/bash
|
|
#
|
|
# Copyright (c) 2013-2014 Wind River Systems, Inc.
|
|
#
|
|
# SPDX-License-Identifier: Apache-2.0
|
|
#
|
|
|
|
##########################################################################################
|
|
|
|
DEBUG=false
|
|
|
|
# Fail Codes
|
|
PASS=0
|
|
FAIL=1
|
|
RETRY=2
|
|
|
|
FAIL_NODETYPE=3
|
|
|
|
FAIL_TIMEOUT=10
|
|
FAIL_TIMEOUT1=11
|
|
FAIL_TIMEOUT2=12
|
|
FAIL_TIMEOUT3=13
|
|
FAIL_TIMEOUT4=14
|
|
FAIL_TIMEOUT5=15
|
|
FAIL_TIMEOUT6=16
|
|
FAIL_TIMEOUT7=17
|
|
FAIL_TIMEOUT8=18
|
|
FAIL_TIMEOUT9=19
|
|
|
|
FAIL_PASSWORD=30
|
|
FAIL_PERMISSION=31
|
|
FAIL_CLEANUP=32
|
|
FAIL_UNREACHABLE=33
|
|
FAIL_HOSTNAME=34
|
|
FAIL_INACTIVE=35
|
|
FAIL_PERMISSION_SKIP=36
|
|
FAIL_OUT_OF_SPACE=37
|
|
FAIL_INSUFFICIENT_SPACE=38
|
|
FAIL_OUT_OF_SPACE_LOCAL=39
|
|
FAIL_CREATE=39
|
|
|
|
# Warnings are above 200
|
|
WARN_WARNING=200
|
|
WARN_HOSTNAME=201
|
|
|
|
# Failure Strings
|
|
FAIL_OUT_OF_SPACE_STR="No space left on device"
|
|
FAIL_TAR_OUT_OF_SPACE_STR="tar: Error is not recoverable"
|
|
FAIL_INSUFFICIENT_SPACE_STR="Not enough space on device"
|
|
|
|
# The minimum amount of % free space on /scratch to allow collect to proceed
|
|
MIN_PERCENT_SPACE_REQUIRED=75
|
|
|
|
# Log file path/names
|
|
COLLECT_LOG=/var/log/collect.log
|
|
COLLECT_ERROR_LOG=/tmp/collect_error.log
|
|
|
|
# Load up the nova openrc file if its possible
|
|
KEYRING_PATH="/opt/platform/.keyring"
|
|
if [ -e ${KEYRING_PATH} ] ; then
|
|
CRED=`find /opt/platform/.keyring -name .CREDENTIAL`
|
|
if [ ! -z "${CRED}" ] ; then
|
|
NOVAOPENRC="/etc/nova/openrc"
|
|
if [ -e ${NOVAOPENRC} ] ; then
|
|
ACTIVE=true
|
|
source ${NOVAOPENRC} 2>/dev/null 1>/dev/null
|
|
fi
|
|
fi
|
|
fi
|
|
|
|
# get the node and subfunction types
|
|
nodetype=""
|
|
subfunction=""
|
|
PLATFORM_CONF=/etc/platform/platform.conf
|
|
if [ -e ${PLATFORM_CONF} ] ; then
|
|
source ${PLATFORM_CONF}
|
|
fi
|
|
|
|
if [ "${nodetype}" != "controller" -a "${nodetype}" != "compute" -a "${nodetype}" != "storage" ] ; then
|
|
logger -t ${COLLECT_TAG} "could not identify nodetype ($nodetype)"
|
|
exit $FAIL_NODETYPE
|
|
fi
|
|
|
|
NODETYPE=$nodetype
|
|
SUBFUNCTION=$subfunction
|
|
|
|
# Setup an expect command completion file.
|
|
# This is used to force serialization of expect
|
|
# sequences and highlight command completion
|
|
collect_done="collect done"
|
|
cmd_done_sig="expect done"
|
|
cmd_done_file="/usr/local/sbin/expect_done"
|
|
|
|
# Compression Commands
|
|
TAR_ZIP_CMD="tar -cvzf"
|
|
TAR_UZIP_CMD="tar -xvzf"
|
|
TAR_CMD="tar -cvhf"
|
|
UNTAR_CMD="tar -xvf"
|
|
ZIP_CMD="gzip"
|
|
NICE_CMD="/usr/bin/nice -n19"
|
|
IONICE_CMD="/usr/bin/ionice -c2 -n7"
|
|
COLLECT_TAG="COLLECT"
|
|
|
|
STARTDATE_OPTION="--start-date"
|
|
ENDDATE_OPTION="--end-date"
|
|
|
|
|
|
PROCESS_DETAIL_CMD="ps -e -H -o ruser,tid,pid,ppid,flags,stat,policy,rtprio,nice,priority,rss:10,vsz:10,sz:10,psr,stime,tty,cputime,wchan:14,cmd"
|
|
BUILD_INFO_CMD="cat /etc/build.info"
|
|
|
|
################################################################################
|
|
# Log Debug, Info or Error log message to syslog
|
|
################################################################################
|
|
function log
|
|
{
|
|
logger -t ${COLLECT_TAG} $@
|
|
}
|
|
|
|
function ilog
|
|
{
|
|
echo "$@"
|
|
logger -t ${COLLECT_TAG} $@
|
|
#logger -p local3.info -t ${COLLECT_TAG} $@
|
|
}
|
|
|
|
function elog
|
|
{
|
|
echo "Error: $@"
|
|
logger -t ${COLLECT_TAG} $@
|
|
}
|
|
|
|
function wlog
|
|
{
|
|
echo "Warning: $@"
|
|
logger -t ${COLLECT_TAG} $@
|
|
}
|
|
|
|
function set_debug_mode()
|
|
{
|
|
DEBUG=${1}
|
|
}
|
|
|
|
function dlog()
|
|
{
|
|
if [ "$DEBUG" == true ] ; then
|
|
logger -t ${COLLECT_TAG} $@
|
|
echo "Debug: $@"
|
|
fi
|
|
}
|
|
|
|
|
|
function delimiter()
|
|
{
|
|
echo "--------------------------------------------------------------------" >> ${1} 2>>${COLLECT_ERROR_LOG}
|
|
echo "`date` : ${myhostname} : ${2}" >> ${1} 2>>${COLLECT_ERROR_LOG}
|
|
echo "--------------------------------------------------------------------" >> ${1} 2>>${COLLECT_ERROR_LOG}
|
|
}
|
|
|
|
function log_slabinfo()
|
|
{
|
|
PAGE_SIZE=$(getconf PAGE_SIZE)
|
|
cat /proc/slabinfo | awk -v page_size_B=${PAGE_SIZE} '
|
|
BEGIN {page_KiB = page_size_B/1024; TOT_KiB = 0;}
|
|
(NF == 17) {
|
|
gsub(/[<>]/, "");
|
|
printf("%-22s %11s %8s %8s %10s %12s %1s %5s %10s %12s %1s %12s %9s %11s %8s\n",
|
|
$2, $3, $4, $5, $6, $7, $8, $10, $11, $12, $13, $15, $16, $17, "KiB");
|
|
}
|
|
(NF == 16) {
|
|
num_objs=$3; obj_per_slab=$5; pages_per_slab=$6;
|
|
KiB = (obj_per_slab > 0) ? page_KiB*num_objs/obj_per_slab*pages_per_slab : 0;
|
|
TOT_KiB += KiB;
|
|
printf("%-22s %11d %8d %8d %10d %12d %1s %5d %10d %12d %1s %12d %9d %11d %8d\n",
|
|
$1, $2, $3, $4, $5, $6, $7, $9, $10, $11, $12, $14, $15, $16, KiB);
|
|
}
|
|
END {
|
|
printf("%-22s %11s %8s %8s %10s %12s %1s %5s %10s %12s %1s %12s %9s %11s %8d\n",
|
|
"TOTAL", "-", "-", "-", "-", "-", ":", "-", "-", "-", ":", "-", "-", "-", TOT_KiB);
|
|
}
|
|
' >> ${1} 2>>${COLLECT_ERROR_LOG}
|
|
}
|
|
###########################################################################
|
|
#
|
|
# Name : collect_errors
|
|
#
|
|
# Description: search COLLECT_ERROR_LOG for "No space left on device" logs
|
|
# Return 0 if no such logs are found.
|
|
# Return 1 if such logs are found
|
|
#
|
|
# Assumptions: Caller should assume a non-zero return as an indication of
|
|
# a corrupt or incomplete collect log
|
|
#
|
|
# Create logs and screen echos that record the error for the user.
|
|
#
|
|
# May look for other errors in the future
|
|
#
|
|
###########################################################################
|
|
|
|
listOfOutOfSpaceErrors=(
|
|
"${FAIL_OUT_OF_SPACE_STR}"
|
|
"${FAIL_TAR_OUT_OF_SPACE_STR}"
|
|
"${FAIL_INSUFFICIENT_SPACE_STR}"
|
|
)
|
|
|
|
function collect_errors()
|
|
{
|
|
local host=${1}
|
|
local RC=0
|
|
|
|
if [ -e "${COLLECT_ERROR_LOG}" ] ; then
|
|
|
|
## now loop through known space related error strings
|
|
index=0
|
|
while [ "x${listOfOutOfSpaceErrors[index]}" != "x" ]
|
|
do
|
|
grep -q "${listOfOutOfSpaceErrors[index]}" ${COLLECT_ERROR_LOG}
|
|
if [ "$?" == "0" ] ; then
|
|
|
|
string="failed to collect from ${host} (reason:${FAIL_OUT_OF_SPACE}:${FAIL_OUT_OF_SPACE_STR})"
|
|
|
|
# /var/log/user.log it
|
|
logger -t ${COLLECT_TAG} "${string}"
|
|
|
|
# logs that show up in the foreground
|
|
echo "${string}"
|
|
echo "Increase available space in ${host}:${COLLECT_BASE_DIR} and retry operation."
|
|
|
|
# return error code
|
|
RC=1
|
|
break
|
|
fi
|
|
index=$(($index+1))
|
|
done
|
|
fi
|
|
return ${RC}
|
|
}
|