Merge "Configuration options for error regex and log file in the config now"

This commit is contained in:
Jenkins 2013-07-29 18:23:39 +00:00 committed by Gerrit Code Review
commit 3349013dff
3 changed files with 63 additions and 23 deletions

View File

@ -25,13 +25,6 @@ from ConfigParser import ConfigParser
from swift.common.utils import backward, get_logger
# To search for more types of errors, add the regex to the list below
error_re = [
re.compile(r'\berror\b.*\b(sd[a-z]{1,2}\d?)\b'),
re.compile(r'\b(sd[a-z]{1,2}\d?)\b.*\berror\b'),
]
def get_devices(device_dir, logger):
devices = []
for line in open('/proc/mounts').readlines():
@ -61,12 +54,17 @@ def get_devices(device_dir, logger):
return devices
def get_errors(minutes):
def get_errors(error_re, log_file_pattern, minutes):
# Assuming log rotation is being used, we need to examine
# recently rotated files in case the rotation occured
# just before the script is being run - the data we are
# looking for may have rotated.
log_files = [f for f in glob.glob('/var/log/kern.*[!.][!g][!z]')]
#
# The globbing used before would not work with all out-of-box
# distro setup for logrotate and syslog therefore moving this
# to the config where one can set it with the desired
# globbing pattern.
log_files = [f for f in glob.glob(log_file_pattern)]
log_files.sort()
now_time = datetime.datetime.now()
@ -143,13 +141,30 @@ if __name__ == '__main__':
device_dir = conf.get('device_dir', '/srv/node')
minutes = int(conf.get('minutes', 60))
error_limit = int(conf.get('error_limit', 1))
log_file_pattern = conf.get('log_file_pattern',
'/var/log/kern.*[!.][!g][!z]')
error_re = []
for conf_key in conf:
if conf_key.startswith('regex_pattern_'):
error_pattern = conf[conf_key]
try:
r = re.compile(error_pattern)
except re.error:
sys.exit('Error: unable to compile regex pattern "%s"' %
error_pattern)
error_re.append(r)
if not error_re:
error_re = [
re.compile(r'\berror\b.*\b(sd[a-z]{1,2}\d?)\b'),
re.compile(r'\b(sd[a-z]{1,2}\d?)\b.*\berror\b'),
]
conf['log_name'] = conf.get('log_name', 'drive-audit')
logger = get_logger(conf, log_route='drive-audit')
devices = get_devices(device_dir, logger)
logger.debug("Devices found: %s" % str(devices))
if not devices:
logger.error("Error: No devices found!")
errors = get_errors(minutes)
errors = get_errors(error_re, log_file_pattern, minutes)
logger.debug("Errors found: %s" % str(errors))
unmounts = 0
for kernel_device, count in errors.items():

View File

@ -156,20 +156,30 @@ settings:
[drive-audit]
================== ========== ===========================================
Option Default Description
------------------ ---------- -------------------------------------------
log_facility LOG_LOCAL0 Syslog log facility
log_level INFO Log level
device_dir /srv/node Directory devices are mounted under
minutes 60 Number of minutes to look back in
`/var/log/kern.log`
error_limit 1 Number of errors to find before a device
is unmounted
================== ========== ===========================================
================== ============== ===========================================
Option Default Description
------------------ -------------- -------------------------------------------
log_facility LOG_LOCAL0 Syslog log facility
log_level INFO Log level
device_dir /srv/node Directory devices are mounted under
minutes 60 Number of minutes to look back in
`/var/log/kern.log`
error_limit 1 Number of errors to find before a device
is unmounted
log_file_pattern /var/log/kern* Location of the log file with globbing
pattern to check against device errors
regex_pattern_X (see below) Regular expression patterns to be used to
locate device blocks with errors in the
log file
================== ============== ===========================================
This script has only been tested on Ubuntu 10.04, so if you are using a
different distro or OS, some care should be taken before using in production.
The default regex pattern used to locate device blocks with errors are
`\berror\b.*\b(sd[a-z]{1,2}\d?)\b` and `\b(sd[a-z]{1,2}\d?)\b.*\berror\b`.
One is able to overwrite the default above by providing new expressions
using the format `regex_pattern_X = regex_expression`, where `X` is a number.
This script has been tested on Ubuntu 10.04 and Ubuntu 12.04, so if you are
using a different distro or OS, some care should be taken before using in production.
--------------
Cluster Health

View File

@ -5,3 +5,18 @@
# log_address = /dev/log
# minutes = 60
# error_limit = 1
#
# Location of the log file with globbing
# pattern to check against device errors.
# log_file_pattern = /var/log/kern*
#
# Regular expression patterns to be used to locate
# device blocks with errors in the log file. Currently
# the default ones are as follows:
# \berror\b.*\b(sd[a-z]{1,2}\d?)\b
# \b(sd[a-z]{1,2}\d?)\b.*\berror\b
# One can overwrite the default ones by providing
# new expressions using the format below:
# Format: regex_pattern_X = regex_expression
# Example:
# regex_pattern_1 = \berror\b.*\b(dm-[0-9]{1,2}\d?)\b