swift-recon: Added oldest and most recent repl
I've been doing this with cluster-wide log searches for far too long. This adds support for reporting the oldest replication pass completion as well as the most recent. This is quite useful for finding those odd replicators that have hung up for some reason and need intervention. Change-Id: I7fd7260eca162d6b085f3e82aaa3cf90670f2d53
This commit is contained in:
parent
b8626f9667
commit
a88b412e17
@ -19,6 +19,21 @@ import sys
|
||||
import os
|
||||
|
||||
|
||||
def seconds2timeunit(seconds):
|
||||
elapsed = seconds
|
||||
unit = 'seconds'
|
||||
if elapsed >= 60:
|
||||
elapsed = elapsed / 60.0
|
||||
unit = 'minutes'
|
||||
if elapsed >= 60:
|
||||
elapsed = elapsed / 60.0
|
||||
unit = 'hours'
|
||||
if elapsed >= 24:
|
||||
elapsed = elapsed / 24.0
|
||||
unit = 'days'
|
||||
return elapsed, unit
|
||||
|
||||
|
||||
class Scout(object):
|
||||
"""
|
||||
Obtain swift recon information
|
||||
@ -261,6 +276,10 @@ class SwiftRecon(object):
|
||||
recon = Scout("replication/%s" % self.server_type, self.verbose,
|
||||
self.suppress_errors, self.timeout)
|
||||
print "[%s] Checking on replication" % self._ptime()
|
||||
least_recent_time = 9999999999
|
||||
least_recent_url = None
|
||||
most_recent_time = 0
|
||||
most_recent_url = None
|
||||
for url, response, status in self.pool.imap(recon.scout, hosts):
|
||||
if status == 200:
|
||||
stats['replication_time'].append(
|
||||
@ -269,6 +288,13 @@ class SwiftRecon(object):
|
||||
if repl_stats:
|
||||
for stat_key in ['attempted', 'failure', 'success']:
|
||||
stats[stat_key].append(repl_stats.get(stat_key))
|
||||
last = response.get('replication_last', 0)
|
||||
if last < least_recent_time:
|
||||
least_recent_time = last
|
||||
least_recent_url = url
|
||||
if last > most_recent_time:
|
||||
most_recent_time = last
|
||||
most_recent_url = url
|
||||
for k in stats:
|
||||
if stats[k]:
|
||||
if k != 'replication_time':
|
||||
@ -282,6 +308,25 @@ class SwiftRecon(object):
|
||||
print "[%s] - No hosts returned valid data." % k
|
||||
else:
|
||||
print "[%s] - No hosts returned valid data." % k
|
||||
if least_recent_url is not None:
|
||||
host = urlparse(url).netloc
|
||||
if not least_recent_time:
|
||||
print 'Oldest completion was NEVER by %s.' % host
|
||||
else:
|
||||
elapsed = time.time() - least_recent_time
|
||||
elapsed, elapsed_unit = seconds2timeunit(elapsed)
|
||||
print 'Oldest completion was %s (%d %s ago) by %s.' % (
|
||||
time.strftime('%Y-%m-%d %H:%M:%S',
|
||||
time.gmtime(least_recent_time)),
|
||||
elapsed, elapsed_unit, host)
|
||||
if most_recent_url is not None:
|
||||
host = urlparse(url).netloc
|
||||
elapsed = time.time() - most_recent_time
|
||||
elapsed, elapsed_unit = seconds2timeunit(elapsed)
|
||||
print 'Most recent completion was %s (%d %s ago) by %s.' % (
|
||||
time.strftime('%Y-%m-%d %H:%M:%S',
|
||||
time.gmtime(most_recent_time)),
|
||||
elapsed, elapsed_unit, host)
|
||||
print "=" * 79
|
||||
|
||||
def object_replication_check(self, hosts):
|
||||
@ -295,9 +340,20 @@ class SwiftRecon(object):
|
||||
recon = Scout("replication", self.verbose, self.suppress_errors,
|
||||
self.timeout)
|
||||
print "[%s] Checking on replication" % self._ptime()
|
||||
least_recent_time = 9999999999
|
||||
least_recent_url = None
|
||||
most_recent_time = 0
|
||||
most_recent_url = None
|
||||
for url, response, status in self.pool.imap(recon.scout, hosts):
|
||||
if status == 200:
|
||||
stats[url] = response['object_replication_time']
|
||||
last = response.get('object_replication_last', 0)
|
||||
if last < least_recent_time:
|
||||
least_recent_time = last
|
||||
least_recent_url = url
|
||||
if last > most_recent_time:
|
||||
most_recent_time = last
|
||||
most_recent_url = url
|
||||
times = [x for x in stats.values() if x is not None]
|
||||
if len(stats) > 0 and len(times) > 0:
|
||||
computed = self._gen_stats(times, 'replication_time')
|
||||
@ -307,6 +363,25 @@ class SwiftRecon(object):
|
||||
print "[replication_time] - No hosts returned valid data."
|
||||
else:
|
||||
print "[replication_time] - No hosts returned valid data."
|
||||
if least_recent_url is not None:
|
||||
host = urlparse(url).netloc
|
||||
if not least_recent_time:
|
||||
print 'Oldest completion was NEVER by %s.' % host
|
||||
else:
|
||||
elapsed = time.time() - least_recent_time
|
||||
elapsed, elapsed_unit = seconds2timeunit(elapsed)
|
||||
print 'Oldest completion was %s (%d %s ago) by %s.' % (
|
||||
time.strftime('%Y-%m-%d %H:%M:%S',
|
||||
time.gmtime(least_recent_time)),
|
||||
elapsed, elapsed_unit, host)
|
||||
if most_recent_url is not None:
|
||||
host = urlparse(url).netloc
|
||||
elapsed = time.time() - most_recent_time
|
||||
elapsed, elapsed_unit = seconds2timeunit(elapsed)
|
||||
print 'Most recent completion was %s (%d %s ago) by %s.' % (
|
||||
time.strftime('%Y-%m-%d %H:%M:%S',
|
||||
time.gmtime(most_recent_time)),
|
||||
elapsed, elapsed_unit, host)
|
||||
print "=" * 79
|
||||
|
||||
def updater_check(self, hosts):
|
||||
|
@ -194,7 +194,8 @@ class Replicator(Daemon):
|
||||
% self.stats)
|
||||
dump_recon_cache(
|
||||
{'replication_stats': self.stats,
|
||||
'replication_time': time.time() - self.stats['start']},
|
||||
'replication_time': time.time() - self.stats['start'],
|
||||
'replication_last': time.time()},
|
||||
self.rcache, self.logger)
|
||||
self.logger.info(' '.join(['%s:%s' % item for item in
|
||||
self.stats.items() if item[0] in
|
||||
|
@ -122,14 +122,17 @@ class ReconMiddleware(object):
|
||||
"""get replication info"""
|
||||
if recon_type == 'account':
|
||||
return self._from_recon_cache(['replication_time',
|
||||
'replication_stats'],
|
||||
'replication_stats',
|
||||
'replication_last'],
|
||||
self.account_recon_cache)
|
||||
elif recon_type == 'container':
|
||||
return self._from_recon_cache(['replication_time',
|
||||
'replication_stats'],
|
||||
'replication_stats',
|
||||
'replication_last'],
|
||||
self.container_recon_cache)
|
||||
elif recon_type == 'object':
|
||||
return self._from_recon_cache(['object_replication_time'],
|
||||
return self._from_recon_cache(['object_replication_time',
|
||||
'object_replication_last'],
|
||||
self.object_recon_cache)
|
||||
else:
|
||||
return None
|
||||
|
@ -647,7 +647,8 @@ class ObjectReplicator(Daemon):
|
||||
self.logger.info(
|
||||
_("Object replication complete (once). (%.02f minutes)"), total)
|
||||
if not (override_partitions or override_devices):
|
||||
dump_recon_cache({'object_replication_time': total},
|
||||
dump_recon_cache({'object_replication_time': total,
|
||||
'object_replication_last': time.time()},
|
||||
self.rcache, self.logger)
|
||||
|
||||
def run_forever(self, *args, **kwargs):
|
||||
@ -661,7 +662,8 @@ class ObjectReplicator(Daemon):
|
||||
total = (time.time() - start) / 60
|
||||
self.logger.info(
|
||||
_("Object replication complete. (%.02f minutes)"), total)
|
||||
dump_recon_cache({'object_replication_time': total},
|
||||
dump_recon_cache({'object_replication_time': total,
|
||||
'object_replication_last': time.time()},
|
||||
self.rcache, self.logger)
|
||||
self.logger.debug(_('Replication sleeping for %s seconds.'),
|
||||
self.run_pause)
|
||||
|
@ -388,11 +388,13 @@ class TestReconSuccess(TestCase):
|
||||
"remove": 0, "rsync": 0,
|
||||
"start": 1333044050.855202,
|
||||
"success": 2, "ts_repl": 0 },
|
||||
"replication_time": 0.2615511417388916}
|
||||
"replication_time": 0.2615511417388916,
|
||||
"replication_last": 1357969645.25}
|
||||
self.fakecache.fakeout = from_cache_response
|
||||
rv = self.app.get_replication_info('account')
|
||||
self.assertEquals(self.fakecache.fakeout_calls,
|
||||
[((['replication_time', 'replication_stats'],
|
||||
[((['replication_time', 'replication_stats',
|
||||
'replication_last'],
|
||||
'/var/cache/swift/account.recon'), {})])
|
||||
self.assertEquals(rv, {"replication_stats": {
|
||||
"attempted": 1, "diff": 0,
|
||||
@ -402,7 +404,8 @@ class TestReconSuccess(TestCase):
|
||||
"remove": 0, "rsync": 0,
|
||||
"start": 1333044050.855202,
|
||||
"success": 2, "ts_repl": 0 },
|
||||
"replication_time": 0.2615511417388916})
|
||||
"replication_time": 0.2615511417388916,
|
||||
"replication_last": 1357969645.25})
|
||||
|
||||
def test_get_replication_info_container(self):
|
||||
from_cache_response = {"replication_time": 200.0,
|
||||
@ -413,12 +416,14 @@ class TestReconSuccess(TestCase):
|
||||
"no_change": 358, "remote_merge": 0,
|
||||
"remove": 0, "rsync": 0,
|
||||
"start": 5.5, "success": 358,
|
||||
"ts_repl": 0}}
|
||||
"ts_repl": 0},
|
||||
"replication_last": 1357969645.25}
|
||||
self.fakecache.fakeout_calls = []
|
||||
self.fakecache.fakeout = from_cache_response
|
||||
rv = self.app.get_replication_info('container')
|
||||
self.assertEquals(self.fakecache.fakeout_calls,
|
||||
[((['replication_time', 'replication_stats'],
|
||||
[((['replication_time', 'replication_stats',
|
||||
'replication_last'],
|
||||
'/var/cache/swift/container.recon'), {})])
|
||||
self.assertEquals(rv, {"replication_time": 200.0,
|
||||
"replication_stats": {
|
||||
@ -428,17 +433,21 @@ class TestReconSuccess(TestCase):
|
||||
"no_change": 358, "remote_merge": 0,
|
||||
"remove": 0, "rsync": 0,
|
||||
"start": 5.5, "success": 358,
|
||||
"ts_repl": 0}})
|
||||
"ts_repl": 0},
|
||||
"replication_last": 1357969645.25})
|
||||
|
||||
def test_get_replication_object(self):
|
||||
from_cache_response = {"object_replication_time": 200.0}
|
||||
from_cache_response = {"object_replication_time": 200.0,
|
||||
"object_replication_last": 1357962809.15}
|
||||
self.fakecache.fakeout_calls = []
|
||||
self.fakecache.fakeout = from_cache_response
|
||||
rv = self.app.get_replication_info('object')
|
||||
self.assertEquals(self.fakecache.fakeout_calls,
|
||||
[((['object_replication_time'],
|
||||
[((['object_replication_time',
|
||||
'object_replication_last'],
|
||||
'/var/cache/swift/object.recon'), {})])
|
||||
self.assertEquals(rv, {'object_replication_time': 200.0})
|
||||
self.assertEquals(rv, {'object_replication_time': 200.0,
|
||||
'object_replication_last': 1357962809.15})
|
||||
|
||||
def test_get_updater_info_container(self):
|
||||
from_cache_response = {"container_updater_sweep": 18.476239919662476}
|
||||
|
Loading…
Reference in New Issue
Block a user