diff --git a/bin/swift-recon b/bin/swift-recon index b72cc2841d..6cc27fb040 100755 --- a/bin/swift-recon +++ b/bin/swift-recon @@ -19,6 +19,21 @@ import sys import os +def seconds2timeunit(seconds): + elapsed = seconds + unit = 'seconds' + if elapsed >= 60: + elapsed = elapsed / 60.0 + unit = 'minutes' + if elapsed >= 60: + elapsed = elapsed / 60.0 + unit = 'hours' + if elapsed >= 24: + elapsed = elapsed / 24.0 + unit = 'days' + return elapsed, unit + + class Scout(object): """ Obtain swift recon information @@ -261,6 +276,10 @@ class SwiftRecon(object): recon = Scout("replication/%s" % self.server_type, self.verbose, self.suppress_errors, self.timeout) print "[%s] Checking on replication" % self._ptime() + least_recent_time = 9999999999 + least_recent_url = None + most_recent_time = 0 + most_recent_url = None for url, response, status in self.pool.imap(recon.scout, hosts): if status == 200: stats['replication_time'].append( @@ -269,6 +288,13 @@ class SwiftRecon(object): if repl_stats: for stat_key in ['attempted', 'failure', 'success']: stats[stat_key].append(repl_stats.get(stat_key)) + last = response.get('replication_last', 0) + if last < least_recent_time: + least_recent_time = last + least_recent_url = url + if last > most_recent_time: + most_recent_time = last + most_recent_url = url for k in stats: if stats[k]: if k != 'replication_time': @@ -282,6 +308,25 @@ class SwiftRecon(object): print "[%s] - No hosts returned valid data." % k else: print "[%s] - No hosts returned valid data." % k + if least_recent_url is not None: + host = urlparse(url).netloc + if not least_recent_time: + print 'Oldest completion was NEVER by %s.' % host + else: + elapsed = time.time() - least_recent_time + elapsed, elapsed_unit = seconds2timeunit(elapsed) + print 'Oldest completion was %s (%d %s ago) by %s.' % ( + time.strftime('%Y-%m-%d %H:%M:%S', + time.gmtime(least_recent_time)), + elapsed, elapsed_unit, host) + if most_recent_url is not None: + host = urlparse(url).netloc + elapsed = time.time() - most_recent_time + elapsed, elapsed_unit = seconds2timeunit(elapsed) + print 'Most recent completion was %s (%d %s ago) by %s.' % ( + time.strftime('%Y-%m-%d %H:%M:%S', + time.gmtime(most_recent_time)), + elapsed, elapsed_unit, host) print "=" * 79 def object_replication_check(self, hosts): @@ -295,9 +340,20 @@ class SwiftRecon(object): recon = Scout("replication", self.verbose, self.suppress_errors, self.timeout) print "[%s] Checking on replication" % self._ptime() + least_recent_time = 9999999999 + least_recent_url = None + most_recent_time = 0 + most_recent_url = None for url, response, status in self.pool.imap(recon.scout, hosts): if status == 200: stats[url] = response['object_replication_time'] + last = response.get('object_replication_last', 0) + if last < least_recent_time: + least_recent_time = last + least_recent_url = url + if last > most_recent_time: + most_recent_time = last + most_recent_url = url times = [x for x in stats.values() if x is not None] if len(stats) > 0 and len(times) > 0: computed = self._gen_stats(times, 'replication_time') @@ -307,6 +363,25 @@ class SwiftRecon(object): print "[replication_time] - No hosts returned valid data." else: print "[replication_time] - No hosts returned valid data." + if least_recent_url is not None: + host = urlparse(url).netloc + if not least_recent_time: + print 'Oldest completion was NEVER by %s.' % host + else: + elapsed = time.time() - least_recent_time + elapsed, elapsed_unit = seconds2timeunit(elapsed) + print 'Oldest completion was %s (%d %s ago) by %s.' % ( + time.strftime('%Y-%m-%d %H:%M:%S', + time.gmtime(least_recent_time)), + elapsed, elapsed_unit, host) + if most_recent_url is not None: + host = urlparse(url).netloc + elapsed = time.time() - most_recent_time + elapsed, elapsed_unit = seconds2timeunit(elapsed) + print 'Most recent completion was %s (%d %s ago) by %s.' % ( + time.strftime('%Y-%m-%d %H:%M:%S', + time.gmtime(most_recent_time)), + elapsed, elapsed_unit, host) print "=" * 79 def updater_check(self, hosts): diff --git a/swift/common/db_replicator.py b/swift/common/db_replicator.py index ed16177ed5..72eaa920b8 100644 --- a/swift/common/db_replicator.py +++ b/swift/common/db_replicator.py @@ -194,7 +194,8 @@ class Replicator(Daemon): % self.stats) dump_recon_cache( {'replication_stats': self.stats, - 'replication_time': time.time() - self.stats['start']}, + 'replication_time': time.time() - self.stats['start'], + 'replication_last': time.time()}, self.rcache, self.logger) self.logger.info(' '.join(['%s:%s' % item for item in self.stats.items() if item[0] in diff --git a/swift/common/middleware/recon.py b/swift/common/middleware/recon.py index 4bcb8f6db6..aa3bd963bb 100644 --- a/swift/common/middleware/recon.py +++ b/swift/common/middleware/recon.py @@ -122,14 +122,17 @@ class ReconMiddleware(object): """get replication info""" if recon_type == 'account': return self._from_recon_cache(['replication_time', - 'replication_stats'], + 'replication_stats', + 'replication_last'], self.account_recon_cache) elif recon_type == 'container': return self._from_recon_cache(['replication_time', - 'replication_stats'], + 'replication_stats', + 'replication_last'], self.container_recon_cache) elif recon_type == 'object': - return self._from_recon_cache(['object_replication_time'], + return self._from_recon_cache(['object_replication_time', + 'object_replication_last'], self.object_recon_cache) else: return None diff --git a/swift/obj/replicator.py b/swift/obj/replicator.py index 56cceb5b95..fcf6b66459 100644 --- a/swift/obj/replicator.py +++ b/swift/obj/replicator.py @@ -647,7 +647,8 @@ class ObjectReplicator(Daemon): self.logger.info( _("Object replication complete (once). (%.02f minutes)"), total) if not (override_partitions or override_devices): - dump_recon_cache({'object_replication_time': total}, + dump_recon_cache({'object_replication_time': total, + 'object_replication_last': time.time()}, self.rcache, self.logger) def run_forever(self, *args, **kwargs): @@ -661,7 +662,8 @@ class ObjectReplicator(Daemon): total = (time.time() - start) / 60 self.logger.info( _("Object replication complete. (%.02f minutes)"), total) - dump_recon_cache({'object_replication_time': total}, + dump_recon_cache({'object_replication_time': total, + 'object_replication_last': time.time()}, self.rcache, self.logger) self.logger.debug(_('Replication sleeping for %s seconds.'), self.run_pause) diff --git a/test/unit/common/middleware/test_recon.py b/test/unit/common/middleware/test_recon.py index 5bcc936d9b..4d12f8d318 100644 --- a/test/unit/common/middleware/test_recon.py +++ b/test/unit/common/middleware/test_recon.py @@ -388,11 +388,13 @@ class TestReconSuccess(TestCase): "remove": 0, "rsync": 0, "start": 1333044050.855202, "success": 2, "ts_repl": 0 }, - "replication_time": 0.2615511417388916} + "replication_time": 0.2615511417388916, + "replication_last": 1357969645.25} self.fakecache.fakeout = from_cache_response rv = self.app.get_replication_info('account') self.assertEquals(self.fakecache.fakeout_calls, - [((['replication_time', 'replication_stats'], + [((['replication_time', 'replication_stats', + 'replication_last'], '/var/cache/swift/account.recon'), {})]) self.assertEquals(rv, {"replication_stats": { "attempted": 1, "diff": 0, @@ -402,7 +404,8 @@ class TestReconSuccess(TestCase): "remove": 0, "rsync": 0, "start": 1333044050.855202, "success": 2, "ts_repl": 0 }, - "replication_time": 0.2615511417388916}) + "replication_time": 0.2615511417388916, + "replication_last": 1357969645.25}) def test_get_replication_info_container(self): from_cache_response = {"replication_time": 200.0, @@ -413,12 +416,14 @@ class TestReconSuccess(TestCase): "no_change": 358, "remote_merge": 0, "remove": 0, "rsync": 0, "start": 5.5, "success": 358, - "ts_repl": 0}} + "ts_repl": 0}, + "replication_last": 1357969645.25} self.fakecache.fakeout_calls = [] self.fakecache.fakeout = from_cache_response rv = self.app.get_replication_info('container') self.assertEquals(self.fakecache.fakeout_calls, - [((['replication_time', 'replication_stats'], + [((['replication_time', 'replication_stats', + 'replication_last'], '/var/cache/swift/container.recon'), {})]) self.assertEquals(rv, {"replication_time": 200.0, "replication_stats": { @@ -428,17 +433,21 @@ class TestReconSuccess(TestCase): "no_change": 358, "remote_merge": 0, "remove": 0, "rsync": 0, "start": 5.5, "success": 358, - "ts_repl": 0}}) + "ts_repl": 0}, + "replication_last": 1357969645.25}) def test_get_replication_object(self): - from_cache_response = {"object_replication_time": 200.0} + from_cache_response = {"object_replication_time": 200.0, + "object_replication_last": 1357962809.15} self.fakecache.fakeout_calls = [] self.fakecache.fakeout = from_cache_response rv = self.app.get_replication_info('object') self.assertEquals(self.fakecache.fakeout_calls, - [((['object_replication_time'], + [((['object_replication_time', + 'object_replication_last'], '/var/cache/swift/object.recon'), {})]) - self.assertEquals(rv, {'object_replication_time': 200.0}) + self.assertEquals(rv, {'object_replication_time': 200.0, + 'object_replication_last': 1357962809.15}) def test_get_updater_info_container(self): from_cache_response = {"container_updater_sweep": 18.476239919662476}