Fix object auditor recon and logging

1. Nothing is logged until at least one audit needs more than log_time.
   If the audit runtime never exceeds this value (which is 3600 seconds
   by default) nothing is logged and the recon entry is never updated.
   It happens especially on very fast disks with a low usage and/or if
   only a few disks are audited (for example, using the --devices
   parameter on the command line).

   This patch changes this to log and update the recon cache entry
   at least one time after the first device audit.

2. If device_dirs is set the recon entry will be deleted after all
    devices are audited.

Change-Id: Ifa504d21389b3a5f7eaf914b19d6e26543dac121
This commit is contained in:
Christian Schwede 2014-06-13 10:33:03 +00:00
parent b4121d8f36
commit ba38ba5df5
2 changed files with 9 additions and 7 deletions

View File

@ -47,6 +47,7 @@ class AuditorWorker(object):
self.max_files_per_second = float(self.zero_byte_only_at_fps)
self.auditor_type = 'ZBF'
self.log_time = int(conf.get('log_time', 3600))
self.last_logged = 0
self.files_running_time = 0
self.bytes_running_time = 0
self.bytes_processed = 0
@ -91,7 +92,7 @@ class AuditorWorker(object):
self.files_running_time, self.max_files_per_second)
self.total_files_processed += 1
now = time.time()
if now - reported >= self.log_time:
if now - self.last_logged >= self.log_time:
self.logger.info(_(
'Object audit (%(type)s). '
'Since %(start_time)s: Locally: %(passes)d passed, '
@ -122,6 +123,7 @@ class AuditorWorker(object):
self.quarantines = 0
self.errors = 0
self.bytes_processed = 0
self.last_logged = now
time_auditing += (now - loop_time)
# Avoid divide by zero during very short runs
elapsed = (time.time() - begin) or 0.000001
@ -138,12 +140,6 @@ class AuditorWorker(object):
'frate': self.total_files_processed / elapsed,
'brate': self.total_bytes_processed / elapsed,
'audit': time_auditing, 'audit_rate': time_auditing / elapsed})
# Clear recon cache entry if device_dirs is set
if device_dirs:
cache_entry = self.create_recon_nested_dict(
'object_auditor_stats_%s' % (self.auditor_type),
device_dirs, {})
dump_recon_cache(cache_entry, self.rcache, self.logger)
if self.stats_sizes:
self.logger.info(
_('Object audit stats: %s') % json.dumps(self.stats_buckets))

View File

@ -175,6 +175,8 @@ class TestAuditor(unittest.TestCase):
def test_generic_exception_handling(self):
auditor_worker = auditor.AuditorWorker(self.conf, self.logger,
self.rcache, self.devices)
# pretend that we logged (and reset counters) just now
auditor_worker.last_logged = time.time()
timestamp = str(normalize_timestamp(time.time()))
pre_errors = auditor_worker.errors
data = '0' * 1024
@ -249,6 +251,8 @@ class TestAuditor(unittest.TestCase):
self.rcache, self.devices)
timestamp = str(normalize_timestamp(time.time()))
pre_quarantines = auditor_worker.quarantines
# pretend that we logged (and reset counters) just now
auditor_worker.last_logged = time.time()
data = '0' * 1024
etag = md5()
with self.disk_file.create() as writer:
@ -268,6 +272,8 @@ class TestAuditor(unittest.TestCase):
def test_object_run_once_multi_devices(self):
auditor_worker = auditor.AuditorWorker(self.conf, self.logger,
self.rcache, self.devices)
# pretend that we logged (and reset counters) just now
auditor_worker.last_logged = time.time()
timestamp = str(normalize_timestamp(time.time()))
pre_quarantines = auditor_worker.quarantines
data = '0' * 10