diff --git a/bin/swift-drive-audit b/bin/swift-drive-audit index bda4cd2af7..c9c456d3af 100755 --- a/bin/swift-drive-audit +++ b/bin/swift-drive-audit @@ -16,12 +16,14 @@ import datetime import glob +import locale import os import re import subprocess import sys +import six from six.moves.configparser import ConfigParser from swift.common.utils import backward, get_logger, dump_recon_cache, \ @@ -57,7 +59,8 @@ def get_devices(device_dir, logger): return devices -def get_errors(error_re, log_file_pattern, minutes, logger): +def get_errors(error_re, log_file_pattern, minutes, logger, + log_file_encoding): # Assuming log rotation is being used, we need to examine # recently rotated files in case the rotation occurred # just before the script is being run - the data we are @@ -87,12 +90,14 @@ def get_errors(error_re, log_file_pattern, minutes, logger): reached_old_logs = False for path in log_files: try: - f = open(path) + f = open(path, 'rb') except IOError: logger.error("Error: Unable to open " + path) print("Unable to open " + path) sys.exit(1) for line in backward(f): + if not six.PY2: + line = line.decode(log_file_encoding, 'surrogateescape') if '[ 0.000000]' in line \ or 'KERNEL supported cpus:' in line \ or 'BIOS-provided physical RAM map:' in line: @@ -105,7 +110,7 @@ def get_errors(error_re, log_file_pattern, minutes, logger): if log_time_entry[0] == 'Dec' and prev_entry_month == 'Jan': year -= 1 prev_entry_month = log_time_entry[0] - log_time_string = '%s %s' % (year, ' '.join(log_time_entry)) + log_time_string = '%d %s' % (year, ' '.join(log_time_entry)) try: log_time = datetime.datetime.strptime( log_time_string, '%Y %b %d %H:%M:%S') @@ -154,6 +159,9 @@ if __name__ == '__main__': recon_cache_path = conf.get('recon_cache_path', "/var/cache/swift") log_file_pattern = conf.get('log_file_pattern', '/var/log/kern.*[!.][!g][!z]') + log_file_encoding = conf.get('log_file_encoding', 'auto') + if log_file_encoding == 'auto': + log_file_encoding = locale.getpreferredencoding() log_to_console = config_true_value(conf.get('log_to_console', False)) error_re = [] for conf_key in conf: @@ -181,7 +189,8 @@ if __name__ == '__main__': total_errors = 0 for device in devices: recon_errors[device['mount_point']] = 0 - errors = get_errors(error_re, log_file_pattern, minutes, logger) + errors = get_errors(error_re, log_file_pattern, minutes, logger, + log_file_encoding) logger.debug("Errors found: %s" % str(errors)) unmounts = 0 for kernel_device, count in errors.items(): diff --git a/etc/drive-audit.conf-sample b/etc/drive-audit.conf-sample index 7f16517cb3..69de6defbf 100644 --- a/etc/drive-audit.conf-sample +++ b/etc/drive-audit.conf-sample @@ -26,6 +26,10 @@ # pattern to check against device errors. # log_file_pattern = /var/log/kern.*[!.][!g][!z] # +# On Python 3, the encoding to use when reading the log file. Defaults +# to the result of locale.getpreferredencoding(), like Python's open(). +# log_file_encoding = auto +# # Regular expression patterns to be used to locate # device blocks with errors in the log file. Currently # the default ones are as follows: