From ba38ba5df54b8eb24a371f5ffd19a44a5b7eeed7 Mon Sep 17 00:00:00 2001 From: Christian Schwede Date: Fri, 13 Jun 2014 10:33:03 +0000 Subject: [PATCH] Fix object auditor recon and logging 1. Nothing is logged until at least one audit needs more than log_time. If the audit runtime never exceeds this value (which is 3600 seconds by default) nothing is logged and the recon entry is never updated. It happens especially on very fast disks with a low usage and/or if only a few disks are audited (for example, using the --devices parameter on the command line). This patch changes this to log and update the recon cache entry at least one time after the first device audit. 2. If device_dirs is set the recon entry will be deleted after all devices are audited. Change-Id: Ifa504d21389b3a5f7eaf914b19d6e26543dac121 --- swift/obj/auditor.py | 10 +++------- test/unit/obj/test_auditor.py | 6 ++++++ 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/swift/obj/auditor.py b/swift/obj/auditor.py index 13d34c18c4..3e0afd52b0 100644 --- a/swift/obj/auditor.py +++ b/swift/obj/auditor.py @@ -47,6 +47,7 @@ class AuditorWorker(object): self.max_files_per_second = float(self.zero_byte_only_at_fps) self.auditor_type = 'ZBF' self.log_time = int(conf.get('log_time', 3600)) + self.last_logged = 0 self.files_running_time = 0 self.bytes_running_time = 0 self.bytes_processed = 0 @@ -91,7 +92,7 @@ class AuditorWorker(object): self.files_running_time, self.max_files_per_second) self.total_files_processed += 1 now = time.time() - if now - reported >= self.log_time: + if now - self.last_logged >= self.log_time: self.logger.info(_( 'Object audit (%(type)s). ' 'Since %(start_time)s: Locally: %(passes)d passed, ' @@ -122,6 +123,7 @@ class AuditorWorker(object): self.quarantines = 0 self.errors = 0 self.bytes_processed = 0 + self.last_logged = now time_auditing += (now - loop_time) # Avoid divide by zero during very short runs elapsed = (time.time() - begin) or 0.000001 @@ -138,12 +140,6 @@ class AuditorWorker(object): 'frate': self.total_files_processed / elapsed, 'brate': self.total_bytes_processed / elapsed, 'audit': time_auditing, 'audit_rate': time_auditing / elapsed}) - # Clear recon cache entry if device_dirs is set - if device_dirs: - cache_entry = self.create_recon_nested_dict( - 'object_auditor_stats_%s' % (self.auditor_type), - device_dirs, {}) - dump_recon_cache(cache_entry, self.rcache, self.logger) if self.stats_sizes: self.logger.info( _('Object audit stats: %s') % json.dumps(self.stats_buckets)) diff --git a/test/unit/obj/test_auditor.py b/test/unit/obj/test_auditor.py index ea14540bd2..37f16ed4f8 100644 --- a/test/unit/obj/test_auditor.py +++ b/test/unit/obj/test_auditor.py @@ -175,6 +175,8 @@ class TestAuditor(unittest.TestCase): def test_generic_exception_handling(self): auditor_worker = auditor.AuditorWorker(self.conf, self.logger, self.rcache, self.devices) + # pretend that we logged (and reset counters) just now + auditor_worker.last_logged = time.time() timestamp = str(normalize_timestamp(time.time())) pre_errors = auditor_worker.errors data = '0' * 1024 @@ -249,6 +251,8 @@ class TestAuditor(unittest.TestCase): self.rcache, self.devices) timestamp = str(normalize_timestamp(time.time())) pre_quarantines = auditor_worker.quarantines + # pretend that we logged (and reset counters) just now + auditor_worker.last_logged = time.time() data = '0' * 1024 etag = md5() with self.disk_file.create() as writer: @@ -268,6 +272,8 @@ class TestAuditor(unittest.TestCase): def test_object_run_once_multi_devices(self): auditor_worker = auditor.AuditorWorker(self.conf, self.logger, self.rcache, self.devices) + # pretend that we logged (and reset counters) just now + auditor_worker.last_logged = time.time() timestamp = str(normalize_timestamp(time.time())) pre_quarantines = auditor_worker.quarantines data = '0' * 10