Merge "Expand recon middleware support"

This commit is contained in:
Jenkins 2012-05-25 05:12:34 +00:00 committed by Gerrit Code Review
commit 676c338b7c
19 changed files with 1193 additions and 437 deletions

View File

@ -12,9 +12,9 @@ try:
except ImportError: except ImportError:
import json import json
from hashlib import md5 from hashlib import md5
import datetime
import eventlet import eventlet
import optparse import optparse
import time
import sys import sys
import os import os
@ -26,12 +26,7 @@ class Scout(object):
def __init__(self, recon_type, verbose=False, suppress_errors=False, def __init__(self, recon_type, verbose=False, suppress_errors=False,
timeout=5): timeout=5):
recon_uri = ["ringmd5", "async", "replication", "load", "diskusage", self.recon_type = recon_type
"unmounted", "quarantined", "sockstat"]
if recon_type not in recon_uri:
raise Exception("Invalid scout type requested")
else:
self.recon_type = recon_type
self.verbose = verbose self.verbose = verbose
self.suppress_errors = suppress_errors self.suppress_errors = suppress_errors
self.timeout = timeout self.timeout = timeout
@ -87,6 +82,44 @@ class SwiftRecon(object):
self.timeout = 5 self.timeout = 5
self.pool_size = 30 self.pool_size = 30
self.pool = eventlet.GreenPool(self.pool_size) self.pool = eventlet.GreenPool(self.pool_size)
self.check_types = ['account', 'container', 'object']
self.server_type = 'object'
def _gen_stats(self, stats, name=None):
""" compute various stats from a list of values """
cstats = [x for x in stats if x is not None]
if len(cstats) > 0:
ret_dict = {'low': min(cstats), 'high': max(cstats),
'total': sum(cstats), 'reported': len(cstats),
'number_none': len(stats) - len(cstats), 'name': name}
ret_dict['average'] = \
ret_dict['total'] / float(len(cstats))
ret_dict['perc_none'] = \
ret_dict['number_none'] * 100.0 / len(stats)
else:
ret_dict = {'reported': 0}
return ret_dict
def _print_stats(self, stats):
"""
print out formatted stats to console
:param stats: dict of stats generated by _gen_stats
"""
print '[%(name)s] low: %(low)d, high: %(high)d, avg: ' \
'%(average).1f, total: %(total)d, ' \
'Failed: %(perc_none).1f%%, no_result: %(number_none)d, ' \
'reported: %(reported)d' % stats
def _ptime(self, timev=None):
"""
:param timev: a unix timestamp or None
:returns: a pretty string of the current time or provided time
"""
if timev:
return time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(timev))
else:
return time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
def get_devices(self, zone_filter, swift_dir, ring_name): def get_devices(self, zone_filter, swift_dir, ring_name):
""" """
@ -125,10 +158,9 @@ class SwiftRecon(object):
ring_sum = md5sum.hexdigest() ring_sum = md5sum.hexdigest()
recon = Scout("ringmd5", self.verbose, self.suppress_errors, recon = Scout("ringmd5", self.verbose, self.suppress_errors,
self.timeout) self.timeout)
now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") print "[%s] Checking ring md5sums" % self._ptime()
print "[%s] Checking ring md5sum's on %s hosts..." % (now, len(hosts))
if self.verbose: if self.verbose:
print "-> On disk md5sum: %s" % ring_sum print "-> On disk %s md5sum: %s" % (ringfile, ring_sum)
for url, response, status in self.pool.imap(recon.scout, hosts): for url, response, status in self.pool.imap(recon.scout, hosts):
if status == 200: if status == 200:
stats[url] = response[ringfile] stats[url] = response[ringfile]
@ -152,23 +184,18 @@ class SwiftRecon(object):
:param hosts: set of hosts to check. in the format of: :param hosts: set of hosts to check. in the format of:
set([('127.0.0.1', 6020), ('127.0.0.2', 6030)]) set([('127.0.0.1', 6020), ('127.0.0.2', 6030)])
""" """
stats = {} scan = {}
recon = Scout("async", self.verbose, self.suppress_errors, recon = Scout("async", self.verbose, self.suppress_errors,
self.timeout) self.timeout)
now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") print "[%s] Checking async pendings" % self._ptime()
print "[%s] Checking async pendings on %s hosts..." % (now, len(hosts))
for url, response, status in self.pool.imap(recon.scout, hosts): for url, response, status in self.pool.imap(recon.scout, hosts):
if status == 200: if status == 200:
stats[url] = response['async_pending'] scan[url] = response['async_pending']
if len(stats) > 0: stats = self._gen_stats(scan.values(), 'async_pending')
low = min(stats.values()) if stats['reported'] > 0:
high = max(stats.values()) self._print_stats(stats)
total = sum(stats.values())
average = total / len(stats)
print "Async stats: low: %d, high: %d, avg: %d, total: %d" % (low,
high, average, total)
else: else:
print "Error: No hosts available or returned valid information." print "[async_pending] - No hosts returned valid data."
print "=" * 79 print "=" * 79
def umount_check(self, hosts): def umount_check(self, hosts):
@ -181,9 +208,8 @@ class SwiftRecon(object):
stats = {} stats = {}
recon = Scout("unmounted", self.verbose, self.suppress_errors, recon = Scout("unmounted", self.verbose, self.suppress_errors,
self.timeout) self.timeout)
now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
print "[%s] Getting unmounted drives from %s hosts..." % \ print "[%s] Getting unmounted drives from %s hosts..." % \
(now, len(hosts)) (self._ptime(), len(hosts))
for url, response, status in self.pool.imap(recon.scout, hosts): for url, response, status in self.pool.imap(recon.scout, hosts):
if status == 200: if status == 200:
for i in response: for i in response:
@ -193,31 +219,231 @@ class SwiftRecon(object):
print "Not mounted: %s on %s" % (stats[host], node) print "Not mounted: %s on %s" % (stats[host], node)
print "=" * 79 print "=" * 79
def expirer_check(self, hosts):
"""
Obtain and print expirer statistics
:param hosts: set of hosts to check. in the format of:
set([('127.0.0.1', 6020), ('127.0.0.2', 6030)])
"""
stats = {'object_expiration_pass': [], 'expired_last_pass': []}
recon = Scout("expirer/%s" % self.server_type, self.verbose,
self.suppress_errors, self.timeout)
print "[%s] Checking on expirers" % self._ptime()
for url, response, status in self.pool.imap(recon.scout, hosts):
if status == 200:
stats['object_expiration_pass'].append(
response.get('object_expiration_pass'))
stats['expired_last_pass'].append(
response.get('expired_last_pass'))
for k in stats:
if stats[k]:
computed = self._gen_stats(stats[k], name=k)
if computed['reported'] > 0:
self._print_stats(computed)
else:
print "[%s] - No hosts returned valid data." % k
else:
print "[%s] - No hosts returned valid data." % k
print "=" * 79
def replication_check(self, hosts): def replication_check(self, hosts):
""" """
Obtain and print replication statistics Obtain and print replication statistics
:param hosts: set of hosts to check. in the format of:
set([('127.0.0.1', 6020), ('127.0.0.2', 6030)])
"""
stats = {'replication_time': [], 'failure': [], 'success': [],
'attempted': []}
recon = Scout("replication/%s" % self.server_type, self.verbose,
self.suppress_errors, self.timeout)
print "[%s] Checking on replication" % self._ptime()
for url, response, status in self.pool.imap(recon.scout, hosts):
if status == 200:
stats['replication_time'].append(
response.get('replication_time'))
repl_stats = response['replication_stats']
if repl_stats:
for stat_key in ['attempted', 'failure', 'success']:
stats[stat_key].append(repl_stats.get(stat_key))
for k in stats:
if stats[k]:
if k != 'replication_time':
computed = self._gen_stats(stats[k],
name='replication_%s' % k)
else:
computed = self._gen_stats(stats[k], name=k)
if computed['reported'] > 0:
self._print_stats(computed)
else:
print "[%s] - No hosts returned valid data." % k
else:
print "[%s] - No hosts returned valid data." % k
print "=" * 79
def object_replication_check(self, hosts):
"""
Obtain and print replication statistics from object servers
:param hosts: set of hosts to check. in the format of: :param hosts: set of hosts to check. in the format of:
set([('127.0.0.1', 6020), ('127.0.0.2', 6030)]) set([('127.0.0.1', 6020), ('127.0.0.2', 6030)])
""" """
stats = {} stats = {}
recon = Scout("replication", self.verbose, self.suppress_errors, recon = Scout("replication", self.verbose, self.suppress_errors,
self.timeout) self.timeout)
now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") print "[%s] Checking on replication" % self._ptime()
print "[%s] Checking replication times on %s hosts..." % \
(now, len(hosts))
for url, response, status in self.pool.imap(recon.scout, hosts): for url, response, status in self.pool.imap(recon.scout, hosts):
if status == 200: if status == 200:
stats[url] = response['object_replication_time'] stats[url] = response['object_replication_time']
if len(stats) > 0: times = [x for x in stats.values() if x is not None]
low = min(stats.values()) if len(stats) > 0 and len(times) > 0:
high = max(stats.values()) computed = self._gen_stats(times, 'replication_time')
total = sum(stats.values()) if computed['reported'] > 0:
average = total / len(stats) self._print_stats(computed)
print "[Replication Times] shortest: %s, longest: %s, avg: %s" % \ else:
(low, high, average) print "[replication_time] - No hosts returned valid data."
else: else:
print "Error: No hosts available or returned valid information." print "[replication_time] - No hosts returned valid data."
print "=" * 79
def updater_check(self, hosts):
"""
Obtain and print updater statistics
:param hosts: set of hosts to check. in the format of:
set([('127.0.0.1', 6020), ('127.0.0.2', 6030)])
"""
stats = []
recon = Scout("updater/%s" % self.server_type, self.verbose,
self.suppress_errors, self.timeout)
print "[%s] Checking updater times" % self._ptime()
for url, response, status in self.pool.imap(recon.scout, hosts):
if status == 200:
if response['%s_updater_sweep' % self.server_type]:
stats.append(response['%s_updater_sweep' %
self.server_type])
if len(stats) > 0:
computed = self._gen_stats(stats, name='updater_last_sweep')
if computed['reported'] > 0:
self._print_stats(computed)
else:
print "[updater_last_sweep] - No hosts returned valid data."
else:
print "[updater_last_sweep] - No hosts returned valid data."
print "=" * 79
def auditor_check(self, hosts):
"""
Obtain and print obj auditor statistics
:param hosts: set of hosts to check. in the format of:
set([('127.0.0.1', 6020), ('127.0.0.2', 6030)])
"""
scan = {}
adone = '%s_auditor_pass_completed' % self.server_type
afail = '%s_audits_failed' % self.server_type
apass = '%s_audits_passed' % self.server_type
asince = '%s_audits_since' % self.server_type
recon = Scout("auditor/%s" % self.server_type, self.verbose,
self.suppress_errors, self.timeout)
print "[%s] Checking auditor stats" % self._ptime()
for url, response, status in self.pool.imap(recon.scout, hosts):
if status == 200:
scan[url] = response
if len(scan) < 1:
print "Error: No hosts available"
return
stats = {}
stats[adone] = [scan[i][adone] for i in scan
if scan[i][adone] is not None]
stats[afail] = [scan[i][afail] for i in scan
if scan[i][afail] is not None]
stats[apass] = [scan[i][apass] for i in scan
if scan[i][apass] is not None]
stats[asince] = [scan[i][asince] for i in scan
if scan[i][asince] is not None]
for k in stats:
if len(stats[k]) < 1:
print "[%s] - No hosts returned valid data." % k
else:
if k != asince:
computed = self._gen_stats(stats[k], k)
if computed['reported'] > 0:
self._print_stats(computed)
if len(stats[asince]) >= 1:
low = min(stats[asince])
high = max(stats[asince])
total = sum(stats[asince])
average = total / len(stats[asince])
print '[last_pass] oldest: %s, newest: %s, avg: %s' % \
(self._ptime(low), self._ptime(high), self._ptime(average))
print "=" * 79
def object_auditor_check(self, hosts):
"""
Obtain and print obj auditor statistics
:param hosts: set of hosts to check. in the format of:
set([('127.0.0.1', 6020), ('127.0.0.2', 6030)])
"""
all_scan = {}
zbf_scan = {}
atime = 'audit_time'
bprocessed = 'bytes_processed'
passes = 'passes'
errors = 'errors'
quarantined = 'quarantined'
recon = Scout("auditor/object", self.verbose, self.suppress_errors,
self.timeout)
print "[%s] Checking auditor stats " % self._ptime()
for url, response, status in self.pool.imap(recon.scout, hosts):
if status == 200:
if response['object_auditor_stats_ALL']:
all_scan[url] = response['object_auditor_stats_ALL']
if response['object_auditor_stats_ZBF']:
zbf_scan[url] = response['object_auditor_stats_ZBF']
if len(all_scan) > 0:
stats = {}
stats[atime] = [all_scan[i][atime] for i in all_scan]
stats[bprocessed] = [all_scan[i][bprocessed] for i in all_scan]
stats[passes] = [all_scan[i][passes] for i in all_scan]
stats[errors] = [all_scan[i][errors] for i in all_scan]
stats[quarantined] = [all_scan[i][quarantined] for i in all_scan]
for k in stats:
if None in stats[k]:
stats[k] = [x for x in stats[k] if x is not None]
if len(stats[k]) < 1:
print "[Auditor %s] - No hosts returned valid data." % k
else:
computed = self._gen_stats(stats[k],
name='ALL_%s_last_path' % k)
if computed['reported'] > 0:
self._print_stats(computed)
else:
print "[ALL_auditor] - No hosts returned valid data."
else:
print "[ALL_auditor] - No hosts returned valid data."
if len(zbf_scan) > 0:
stats = {}
stats[atime] = [zbf_scan[i][atime] for i in zbf_scan]
stats[bprocessed] = [zbf_scan[i][bprocessed] for i in zbf_scan]
stats[errors] = [zbf_scan[i][errors] for i in zbf_scan]
stats[quarantined] = [zbf_scan[i][quarantined] for i in zbf_scan]
for k in stats:
if None in stats[k]:
stats[k] = [x for x in stats[k] if x is not None]
if len(stats[k]) < 1:
print "[Auditor %s] - No hosts returned valid data." % k
else:
computed = self._gen_stats(stats[k],
name='ZBF_%s_last_path' % k)
if computed['reported'] > 0:
self._print_stats(computed)
else:
print "[ZBF_auditor] - No hosts returned valid data."
else:
print "[ZBF_auditor] - No hosts returned valid data."
print "=" * 79 print "=" * 79
def load_check(self, hosts): def load_check(self, hosts):
@ -232,8 +458,7 @@ class SwiftRecon(object):
load15 = {} load15 = {}
recon = Scout("load", self.verbose, self.suppress_errors, recon = Scout("load", self.verbose, self.suppress_errors,
self.timeout) self.timeout)
now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") print "[%s] Checking load averages" % self._ptime()
print "[%s] Checking load avg's on %s hosts..." % (now, len(hosts))
for url, response, status in self.pool.imap(recon.scout, hosts): for url, response, status in self.pool.imap(recon.scout, hosts):
if status == 200: if status == 200:
load1[url] = response['1m'] load1[url] = response['1m']
@ -242,14 +467,11 @@ class SwiftRecon(object):
stats = {"1m": load1, "5m": load5, "15m": load15} stats = {"1m": load1, "5m": load5, "15m": load15}
for item in stats: for item in stats:
if len(stats[item]) > 0: if len(stats[item]) > 0:
low = min(stats[item].values()) computed = self._gen_stats(stats[item].values(),
high = max(stats[item].values()) name='%s_load_avg' % item)
total = sum(stats[item].values()) self._print_stats(computed)
average = total / len(stats[item])
print "[%s load average] lowest: %s, highest: %s, avg: %s" % \
(item, low, high, average)
else: else:
print "Error: No hosts available or returned valid info." print "[%s_load_avg] - No hosts returned valid data." % item
print "=" * 79 print "=" * 79
def quarantine_check(self, hosts): def quarantine_check(self, hosts):
@ -264,8 +486,7 @@ class SwiftRecon(object):
acctq = {} acctq = {}
recon = Scout("quarantined", self.verbose, self.suppress_errors, recon = Scout("quarantined", self.verbose, self.suppress_errors,
self.timeout) self.timeout)
now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") print "[%s] Checking quarantine" % self._ptime()
print "[%s] Checking quarantine on %s hosts..." % (now, len(hosts))
for url, response, status in self.pool.imap(recon.scout, hosts): for url, response, status in self.pool.imap(recon.scout, hosts):
if status == 200: if status == 200:
objq[url] = response['objects'] objq[url] = response['objects']
@ -274,14 +495,11 @@ class SwiftRecon(object):
stats = {"objects": objq, "containers": conq, "accounts": acctq} stats = {"objects": objq, "containers": conq, "accounts": acctq}
for item in stats: for item in stats:
if len(stats[item]) > 0: if len(stats[item]) > 0:
low = min(stats[item].values()) computed = self._gen_stats(stats[item].values(),
high = max(stats[item].values()) name='quarantined_%s' % item)
total = sum(stats[item].values()) self._print_stats(computed)
average = total / len(stats[item])
print ("[Quarantined %s] low: %d, high: %d, avg: %d, total: %d"
% (item, low, high, average, total))
else: else:
print "Error: No hosts available or returned valid info." print "No hosts returned valid data."
print "=" * 79 print "=" * 79
def socket_usage(self, hosts): def socket_usage(self, hosts):
@ -298,8 +516,7 @@ class SwiftRecon(object):
orphan = {} orphan = {}
recon = Scout("sockstat", self.verbose, self.suppress_errors, recon = Scout("sockstat", self.verbose, self.suppress_errors,
self.timeout) self.timeout)
now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") print "[%s] Checking socket usage" % self._ptime()
print "[%s] Checking socket usage on %s hosts..." % (now, len(hosts))
for url, response, status in self.pool.imap(recon.scout, hosts): for url, response, status in self.pool.imap(recon.scout, hosts):
if status == 200: if status == 200:
inuse4[url] = response['tcp_in_use'] inuse4[url] = response['tcp_in_use']
@ -312,14 +529,10 @@ class SwiftRecon(object):
"orphan": orphan} "orphan": orphan}
for item in stats: for item in stats:
if len(stats[item]) > 0: if len(stats[item]) > 0:
low = min(stats[item].values()) computed = self._gen_stats(stats[item].values(), item)
high = max(stats[item].values()) self._print_stats(computed)
total = sum(stats[item].values())
average = total / len(stats[item])
print "[%s] low: %d, high: %d, avg: %d, total: %d" % \
(item, low, high, average, total)
else: else:
print "Error: No hosts or info available." print "No hosts returned valid data."
print "=" * 79 print "=" * 79
def disk_usage(self, hosts): def disk_usage(self, hosts):
@ -334,12 +547,10 @@ class SwiftRecon(object):
lows = [] lows = []
raw_total_used = [] raw_total_used = []
raw_total_avail = [] raw_total_avail = []
averages = []
percents = {} percents = {}
recon = Scout("diskusage", self.verbose, self.suppress_errors, recon = Scout("diskusage", self.verbose, self.suppress_errors,
self.timeout) self.timeout)
now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") print "[%s] Checking disk usage now" % self._ptime()
print "[%s] Checking disk usage on %s hosts..." % (now, len(hosts))
for url, response, status in self.pool.imap(recon.scout, hosts): for url, response, status in self.pool.imap(recon.scout, hosts):
if status == 200: if status == 200:
hostusage = [] hostusage = []
@ -357,11 +568,8 @@ class SwiftRecon(object):
#get per host hi/los for another day #get per host hi/los for another day
low = min(stats[url]) low = min(stats[url])
high = max(stats[url]) high = max(stats[url])
total = sum(stats[url])
average = total / len(stats[url])
highs.append(high) highs.append(high)
lows.append(low) lows.append(low)
averages.append(average)
for percent in stats[url]: for percent in stats[url]:
percents[int(percent)] = percents.get(int(percent), 0) + 1 percents[int(percent)] = percents.get(int(percent), 0) + 1
else: else:
@ -370,7 +578,6 @@ class SwiftRecon(object):
if len(lows) > 0: if len(lows) > 0:
low = min(lows) low = min(lows)
high = max(highs) high = max(highs)
average = sum(averages) / len(averages)
#dist graph shamelessly stolen from https://github.com/gholt/tcod #dist graph shamelessly stolen from https://github.com/gholt/tcod
print "Distribution Graph:" print "Distribution Graph:"
mul = 69.0 / max(percents.values()) mul = 69.0 / max(percents.values())
@ -380,12 +587,13 @@ class SwiftRecon(object):
raw_used = sum(raw_total_used) raw_used = sum(raw_total_used)
raw_avail = sum(raw_total_avail) raw_avail = sum(raw_total_avail)
raw_total = raw_used + raw_avail raw_total = raw_used + raw_avail
avg_used = 100.0 * raw_used / raw_total
print "Disk usage: space used: %s of %s" % (raw_used, raw_total) print "Disk usage: space used: %s of %s" % (raw_used, raw_total)
print "Disk usage: space free: %s of %s" % (raw_avail, raw_total) print "Disk usage: space free: %s of %s" % (raw_avail, raw_total)
print "Disk usage: lowest: %s%%, highest: %s%%, avg: %s%%" % \ print "Disk usage: lowest: %s%%, highest: %s%%, avg: %s%%" % \
(low, high, average) (low, high, avg_used)
else: else:
print "Error: No hosts available or returned valid information." print "No hosts returned valid data."
print "=" * 79 print "=" * 79
def main(self): def main(self):
@ -394,7 +602,13 @@ class SwiftRecon(object):
""" """
print "=" * 79 print "=" * 79
usage = ''' usage = '''
usage: %prog [-v] [--suppress] [-a] [-r] [-u] [-d] [-l] [--objmd5] usage: %prog <server_type> [-v] [--suppress] [-a] [-r] [-u] [-d]
[-l] [--md5] [--auditor] [--updater] [--expirer] [--sockstat]
<server_type>\taccount|container|object
Defaults to object server.
ex: %prog container -l --auditor
''' '''
args = optparse.OptionParser(usage) args = optparse.OptionParser(usage)
args.add_option('--verbose', '-v', action="store_true", args.add_option('--verbose', '-v', action="store_true",
@ -405,6 +619,12 @@ class SwiftRecon(object):
help="Get async stats") help="Get async stats")
args.add_option('--replication', '-r', action="store_true", args.add_option('--replication', '-r', action="store_true",
help="Get replication stats") help="Get replication stats")
args.add_option('--auditor', action="store_true",
help="Get auditor stats")
args.add_option('--updater', action="store_true",
help="Get updater stats")
args.add_option('--expirer', action="store_true",
help="Get expirer stats")
args.add_option('--unmounted', '-u', action="store_true", args.add_option('--unmounted', '-u', action="store_true",
help="Check cluster for unmounted devices") help="Check cluster for unmounted devices")
args.add_option('--diskusage', '-d', action="store_true", args.add_option('--diskusage', '-d', action="store_true",
@ -413,12 +633,12 @@ class SwiftRecon(object):
help="Get cluster load average stats") help="Get cluster load average stats")
args.add_option('--quarantined', '-q', action="store_true", args.add_option('--quarantined', '-q', action="store_true",
help="Get cluster quarantine stats") help="Get cluster quarantine stats")
args.add_option('--objmd5', action="store_true", args.add_option('--md5', action="store_true",
help="Get md5sums of object.ring.gz and compare to local copy") help="Get md5sum of servers ring and compare to local copy")
args.add_option('--sockstat', action="store_true", args.add_option('--sockstat', action="store_true",
help="Get cluster socket usage stats") help="Get cluster socket usage stats")
args.add_option('--all', action="store_true", args.add_option('--all', action="store_true",
help="Perform all checks. Equal to -arudlq --objmd5 --sockstat") help="Perform all checks. Equal to -arudlq --md5 --sockstat")
args.add_option('--zone', '-z', type="int", args.add_option('--zone', '-z', type="int",
help="Only query servers in specified zone") help="Only query servers in specified zone")
args.add_option('--timeout', '-t', type="int", metavar="SECONDS", args.add_option('--timeout', '-t', type="int", metavar="SECONDS",
@ -427,44 +647,88 @@ class SwiftRecon(object):
help="Default = /etc/swift") help="Default = /etc/swift")
options, arguments = args.parse_args() options, arguments = args.parse_args()
if len(sys.argv) <= 1: if len(sys.argv) <= 1 or len(arguments) > 1:
args.print_help() args.print_help()
sys.exit(0) sys.exit(0)
swift_dir = options.swiftdir if arguments:
obj_ring = os.path.join(swift_dir, 'object.ring.gz') if arguments[0] in self.check_types:
self.server_type = arguments[0]
else:
print "Invalid Server Type"
args.print_help()
sys.exit(1)
else:
self.server_type = 'object'
swift_dir = options.swiftdir
ring_file = os.path.join(swift_dir, '%s.ring.gz' % self.server_type)
self.verbose = options.verbose self.verbose = options.verbose
self.suppress_errors = options.suppress self.suppress_errors = options.suppress
self.timeout = options.timeout self.timeout = options.timeout
if options.zone: if options.zone:
hosts = self.get_devices(options.zone, swift_dir, 'object') hosts = self.get_devices(options.zone, swift_dir, self.server_type)
else: else:
hosts = self.get_devices(None, swift_dir, 'object') hosts = self.get_devices(None, swift_dir, self.server_type)
print "--> Starting reconnaissance on %s hosts" % len(hosts)
print "=" * 79
if options.all: if options.all:
self.async_check(hosts) if self.server_type == 'object':
self.async_check(hosts)
self.object_replication_check(hosts)
self.object_auditor_check(hosts)
self.updater_check(hosts)
self.expirer_check(hosts)
elif self.server_type == 'container':
self.replication_check(hosts)
self.auditor_check(hosts)
self.updater_check(hosts)
elif self.server_type == 'account':
self.replication_check(hosts)
self.auditor_check(hosts)
self.umount_check(hosts) self.umount_check(hosts)
self.replication_check(hosts)
self.load_check(hosts) self.load_check(hosts)
self.disk_usage(hosts) self.disk_usage(hosts)
self.get_ringmd5(hosts, obj_ring) self.get_ringmd5(hosts, ring_file)
self.quarantine_check(hosts) self.quarantine_check(hosts)
self.socket_usage(hosts) self.socket_usage(hosts)
else: else:
if options.async: if options.async:
self.async_check(hosts) if self.server_type == 'object':
self.async_check(hosts)
else:
print "Error: Can't check async's on non object servers."
if options.unmounted: if options.unmounted:
self.umount_check(hosts) self.umount_check(hosts)
if options.replication: if options.replication:
self.replication_check(hosts) if self.server_type == 'object':
self.object_replication_check(hosts)
else:
self.replication_check(hosts)
if options.auditor:
if self.server_type == 'object':
self.object_auditor_check(hosts)
else:
self.auditor_check(hosts)
if options.updater:
if self.server_type == 'account':
print "Error: Can't check updaters on account servers."
else:
self.updater_check(hosts)
if options.expirer:
if self.server_type == 'object':
self.expirer_check(hosts)
else:
print "Error: Can't check expired on non object servers."
if options.loadstats: if options.loadstats:
self.load_check(hosts) self.load_check(hosts)
if options.diskusage: if options.diskusage:
self.disk_usage(hosts) self.disk_usage(hosts)
if options.objmd5: if options.md5:
self.get_ringmd5(hosts, obj_ring) self.get_ringmd5(hosts, ring_file)
if options.quarantined: if options.quarantined:
self.quarantine_check(hosts) self.quarantine_check(hosts)
if options.sockstat: if options.sockstat:

View File

@ -5,17 +5,11 @@ swift-recon-cron.py
import os import os
import sys import sys
import optparse
from tempfile import NamedTemporaryFile
try:
import simplejson as json
except ImportError:
import json
from ConfigParser import ConfigParser from ConfigParser import ConfigParser
from swift.common.utils import get_logger, dump_recon_cache from swift.common.utils import get_logger, dump_recon_cache
def async_count(device_dir, logger): def get_async_count(device_dir, logger):
async_count = 0 async_count = 0
for i in os.listdir(device_dir): for i in os.listdir(device_dir):
asyncdir = os.path.join(device_dir, i, "async_pending") asyncdir = os.path.join(device_dir, i, "async_pending")
@ -53,14 +47,13 @@ def main():
print str(e) print str(e)
sys.exit(1) sys.exit(1)
try: try:
asyncs = async_count(device_dir, logger) asyncs = get_async_count(device_dir, logger)
except Exception: except Exception:
logger.exception( logger.exception(
_('Exception during recon-cron while accessing devices')) _('Exception during recon-cron while accessing devices'))
try:
dump_recon_cache('async_pending', asyncs, cache_file) dump_recon_cache({'async_pending': asyncs}, cache_file, logger)
except Exception:
logger.exception(_('Exception dumping recon cache'))
try: try:
os.rmdir(lock_dir) os.rmdir(lock_dir)
except Exception: except Exception:

View File

@ -25,7 +25,7 @@
.SH SYNOPSIS .SH SYNOPSIS
.LP .LP
.B swift-recon .B swift-recon
\ [-v] [--suppress] [-a] [-r] [-u] [-d] [-l] [--objmd5] \ <server_type> [-v] [--suppress] [-a] [-r] [-u] [-d] [-l] [--md5] [--auditor] [--updater] [--expirer] [--sockstat]
.SH DESCRIPTION .SH DESCRIPTION
.PP .PP
@ -40,6 +40,8 @@ more information in the example section below.
.SH OPTIONS .SH OPTIONS
.RS 0 .RS 0
.PD 1 .PD 1
.IP "\fB<server_type>\fR"
account|container|object - Defaults to object server.
.IP "\fB-h, --help\fR" .IP "\fB-h, --help\fR"
show this help message and exit show this help message and exit
.IP "\fB-v, --verbose\fR" .IP "\fB-v, --verbose\fR"
@ -48,6 +50,12 @@ Print verbose information
Suppress most connection related errors Suppress most connection related errors
.IP "\fB-a, --async\fR" .IP "\fB-a, --async\fR"
Get async stats Get async stats
.IP "\fB--auditor\fR"
Get auditor stats
.IP "\fB--updater\fR"
Get updater stats
.IP "\fB--expirer\fR"
Get expirer stats
.IP "\fB-r, --replication\fR" .IP "\fB-r, --replication\fR"
Get replication stats Get replication stats
.IP "\fB-u, --unmounted\fR" .IP "\fB-u, --unmounted\fR"
@ -58,10 +66,10 @@ Get disk usage stats
Get cluster load average stats Get cluster load average stats
.IP "\fB-q, --quarantined\fR" .IP "\fB-q, --quarantined\fR"
Get cluster quarantine stats Get cluster quarantine stats
.IP "\fB--objmd5\fR" .IP "\fB--md5\fR"
Get md5sums of object.ring.gz and compare to local copy Get md5sum of servers ring and compare to local cop
.IP "\fB--all\fR" .IP "\fB--all\fR"
Perform all checks. Equivalent to -arudlq --objmd5 Perform all checks. Equivalent to -arudlq --md5
.IP "\fB-z ZONE, --zone=ZONE\fR" .IP "\fB-z ZONE, --zone=ZONE\fR"
Only query servers in specified zone Only query servers in specified zone
.IP "\fB--swiftdir=PATH\fR" .IP "\fB--swiftdir=PATH\fR"

View File

@ -248,37 +248,50 @@ allows it to be more easily consumed by third party utilities::
Cluster Telemetry and Monitoring Cluster Telemetry and Monitoring
-------------------------------- --------------------------------
Various metrics and telemetry can be obtained from the object servers using Various metrics and telemetry can be obtained from the account, container, and
the recon server middleware and the swift-recon cli. To do so update your object servers using the recon server middleware and the swift-recon cli. To do
object-server.conf to enable the recon middleware by adding a pipeline entry so update your account, container, or object servers pipelines to include recon
and setting its one option:: and add the associated filter config.
object-server.conf sample::
[pipeline:main] [pipeline:main]
pipeline = recon object-server pipeline = recon object-server
[filter:recon]
use = egg:swift#recon
recon_cache_path = /var/cache/swift
container-server.conf sample::
[pipeline:main]
pipeline = recon container-server
[filter:recon]
use = egg:swift#recon
recon_cache_path = /var/cache/swift
account-server.conf sample::
[pipeline:main]
pipeline = recon account-server
[filter:recon] [filter:recon]
use = egg:swift#recon use = egg:swift#recon
recon_cache_path = /var/cache/swift recon_cache_path = /var/cache/swift
The recon_cache_path simply sets the directory where stats for a few items will The recon_cache_path simply sets the directory where stats for a few items will
be stored. Depending on the method of deployment you may need to create this be stored. Depending on the method of deployment you may need to create this
directory manually and ensure that swift has read/write. directory manually and ensure that swift has read/write access.
If you wish to enable reporting of replication times you can enable recon Finally, if you also wish to track asynchronous pending on your object
support in the object-replicator section of the object-server.conf:: servers you will need to setup a cronjob to run the swift-recon-cron script
periodically on your object servers::
[object-replicator]
...
recon_enable = yes
recon_cache_path = /var/cache/swift
Finally if you also wish to track asynchronous pending's you will need to setup
a cronjob to run the swift-recon-cron script periodically::
*/5 * * * * swift /usr/bin/swift-recon-cron /etc/swift/object-server.conf */5 * * * * swift /usr/bin/swift-recon-cron /etc/swift/object-server.conf
Once enabled a GET request for "/recon/<metric>" to the object server will Once the recon middleware is enabled a GET request for "/recon/<metric>" to
return a json formatted response:: the server will return a json formatted response::
fhines@ubuntu:~$ curl -i http://localhost:6030/recon/async fhines@ubuntu:~$ curl -i http://localhost:6030/recon/async
HTTP/1.1 200 OK HTTP/1.1 200 OK
@ -288,30 +301,39 @@ return a json formatted response::
{"async_pending": 0} {"async_pending": 0}
The following metrics and telemetry are currently exposed: The following metrics and telemetry are currently exposed::
================== ==================================================== ======================== ========================================================================================
Request URI Description Request URI Description
------------------ ---------------------------------------------------- ------------------------ ----------------------------------------------------------------------------------------
/recon/load returns 1,5, and 15 minute load average /recon/load returns 1,5, and 15 minute load average
/recon/async returns count of async pending /recon/mem returns /proc/meminfo
/recon/mem returns /proc/meminfo /recon/mounted returns *ALL* currently mounted filesystems
/recon/replication returns last logged object replication time /recon/unmounted returns all unmounted drives if mount_check = True
/recon/mounted returns *ALL* currently mounted filesystems /recon/diskusage returns disk utilization for storage devices
/recon/unmounted returns all unmounted drives if mount_check = True /recon/ringmd5 returns object/container/account ring md5sums
/recon/diskusage returns disk utilization for storage devices /recon/quarantined returns # of quarantined objects/accounts/containers
/recon/ringmd5 returns object/container/account ring md5sums /recon/sockstat returns consumable info from /proc/net/sockstat|6
/recon/quarantined returns # of quarantined objects/accounts/containers /recon/devices returns list of devices and devices dir i.e. /srv/node
/recon/sockstat returns consumable info from /proc/net/sockstat|6 /recon/async returns count of async pending
================== ==================================================== /recon/replication returns object replication times (for backward compatability)
/recon/replication/<type> returns replication info for given type (account, container, object)
/recon/auditor/<type> returns auditor stats on last reported scan for given type (account, container, object)
/recon/updater/<type> returns last updater sweep times for given type (container, object)
========================= =======================================================================================
This information can also be queried via the swift-recon command line utility:: This information can also be queried via the swift-recon command line utility::
fhines@ubuntu:~$ swift-recon -h fhines@ubuntu:~$ swift-recon -h
===============================================================================
Usage: Usage:
usage: swift-recon [-v] [--suppress] [-a] [-r] [-u] [-d] [-l] [--objmd5] usage: swift-recon <server_type> [-v] [--suppress] [-a] [-r] [-u] [-d]
[-l] [--md5] [--auditor] [--updater] [--expirer] [--sockstat]
<server_type> account|container|object
Defaults to object server.
ex: swift-recon container -l --auditor
Options: Options:
-h, --help show this help message and exit -h, --help show this help message and exit
@ -319,28 +341,32 @@ This information can also be queried via the swift-recon command line utility::
--suppress Suppress most connection related errors --suppress Suppress most connection related errors
-a, --async Get async stats -a, --async Get async stats
-r, --replication Get replication stats -r, --replication Get replication stats
--auditor Get auditor stats
--updater Get updater stats
--expirer Get expirer stats
-u, --unmounted Check cluster for unmounted devices -u, --unmounted Check cluster for unmounted devices
-d, --diskusage Get disk usage stats -d, --diskusage Get disk usage stats
-l, --loadstats Get cluster load average stats -l, --loadstats Get cluster load average stats
-q, --quarantined Get cluster quarantine stats -q, --quarantined Get cluster quarantine stats
--objmd5 Get md5sums of object.ring.gz and compare to local --md5 Get md5sum of servers ring and compare to local copy
copy
--sockstat Get cluster socket usage stats --sockstat Get cluster socket usage stats
--all Perform all checks. Equivalent to -arudlq --objmd5 --all Perform all checks. Equal to -arudlq --md5 --sockstat
--socketstat
-z ZONE, --zone=ZONE Only query servers in specified zone -z ZONE, --zone=ZONE Only query servers in specified zone
-t SECONDS, --timeout=SECONDS
Time to wait for a response from a server
--swiftdir=SWIFTDIR Default = /etc/swift --swiftdir=SWIFTDIR Default = /etc/swift
For example, to obtain quarantine stats from all hosts in zone "3":: For example, to obtain container replication info from all hosts in zone "3"::
fhines@ubuntu:~$ swift-recon -q --zone 3 fhines@ubuntu:~$ swift-recon container -r --zone 3
=============================================================================== ===============================================================================
[2011-10-18 19:36:00] Checking quarantine dirs on 1 hosts... --> Starting reconnaissance on 1 hosts
[Quarantined objects] low: 4, high: 4, avg: 4, total: 4
[Quarantined accounts] low: 0, high: 0, avg: 0, total: 0
[Quarantined containers] low: 0, high: 0, avg: 0, total: 0
=============================================================================== ===============================================================================
[2012-04-02 02:45:48] Checking on replication
[failure] low: 0.000, high: 0.000, avg: 0.000, reported: 1
[success] low: 486.000, high: 486.000, avg: 486.000, reported: 1
[replication_time] low: 20.853, high: 20.853, avg: 20.853, reported: 1
[attempted] low: 243.000, high: 243.000, avg: 243.000, reported: 1
--------------------------- ---------------------------
Reporting Metrics to StatsD Reporting Metrics to StatsD

View File

@ -22,7 +22,7 @@
# db_preallocation = off # db_preallocation = off
[pipeline:main] [pipeline:main]
pipeline = account-server pipeline = recon account-server
[app:account-server] [app:account-server]
use = egg:swift#account use = egg:swift#account
@ -33,6 +33,10 @@ use = egg:swift#account
# set log_requests = True # set log_requests = True
# auto_create_account_prefix = . # auto_create_account_prefix = .
[filter:recon]
use = egg:swift#recon
# recon_cache_path = /var/cache/swift
[account-replicator] [account-replicator]
# You can override the default log routing for this app here (don't use set!): # You can override the default log routing for this app here (don't use set!):
# log_name = account-replicator # log_name = account-replicator
@ -54,6 +58,7 @@ use = egg:swift#account
# reclaim_age = 604800 # reclaim_age = 604800
# Time in seconds to wait between replication passes # Time in seconds to wait between replication passes
# run_pause = 30 # run_pause = 30
# recon_cache_path = /var/cache/swift
[account-auditor] [account-auditor]
# You can override the default log routing for this app here (don't use set!): # You can override the default log routing for this app here (don't use set!):
@ -62,6 +67,9 @@ use = egg:swift#account
# log_level = INFO # log_level = INFO
# Will audit, at most, 1 account per device per interval # Will audit, at most, 1 account per device per interval
# interval = 1800 # interval = 1800
# log_facility = LOG_LOCAL0
# log_level = INFO
# recon_cache_path = /var/cache/swift
[account-reaper] [account-reaper]
# You can override the default log routing for this app here (don't use set!): # You can override the default log routing for this app here (don't use set!):

View File

@ -25,7 +25,7 @@
# db_preallocation = off # db_preallocation = off
[pipeline:main] [pipeline:main]
pipeline = container-server pipeline = recon container-server
[app:container-server] [app:container-server]
use = egg:swift#container use = egg:swift#container
@ -39,6 +39,10 @@ use = egg:swift#container
# allow_versions = False # allow_versions = False
# auto_create_account_prefix = . # auto_create_account_prefix = .
[filter:recon]
use = egg:swift#recon
#recon_cache_path = /var/cache/swift
[container-replicator] [container-replicator]
# You can override the default log routing for this app here (don't use set!): # You can override the default log routing for this app here (don't use set!):
# log_name = container-replicator # log_name = container-replicator
@ -55,7 +59,7 @@ use = egg:swift#container
# reclaim_age = 604800 # reclaim_age = 604800
# Time in seconds to wait between replication passes # Time in seconds to wait between replication passes
# run_pause = 30 # run_pause = 30
# recon_cache_path = /var/cache/swift
[container-updater] [container-updater]
# You can override the default log routing for this app here (don't use set!): # You can override the default log routing for this app here (don't use set!):
@ -70,6 +74,7 @@ use = egg:swift#container
# slowdown = 0.01 # slowdown = 0.01
# Seconds to suppress updating an account that has generated an error # Seconds to suppress updating an account that has generated an error
# account_suppression_time = 60 # account_suppression_time = 60
# recon_cache_path = /var/cache/swift
[container-auditor] [container-auditor]
# You can override the default log routing for this app here (don't use set!): # You can override the default log routing for this app here (don't use set!):
@ -78,6 +83,7 @@ use = egg:swift#container
# log_level = INFO # log_level = INFO
# Will audit, at most, 1 container per device per interval # Will audit, at most, 1 container per device per interval
# interval = 1800 # interval = 1800
# recon_cache_path = /var/cache/swift
[container-sync] [container-sync]
# You can override the default log routing for this app here (don't use set!): # You can override the default log routing for this app here (don't use set!):

View File

@ -45,8 +45,8 @@ use = egg:swift#object
[filter:recon] [filter:recon]
use = egg:swift#recon use = egg:swift#recon
recon_cache_path = /var/cache/swift #recon_cache_path = /var/cache/swift
recon_lock_path = /var/lock #recon_lock_path = /var/lock
[object-replicator] [object-replicator]
# You can override the default log routing for this app here (don't use set!): # You can override the default log routing for this app here (don't use set!):
@ -68,10 +68,8 @@ recon_lock_path = /var/lock
# lockup_timeout = 1800 # lockup_timeout = 1800
# The replicator also performs reclamation # The replicator also performs reclamation
# reclaim_age = 604800 # reclaim_age = 604800
# enable logging of replication stats for recon
# recon_enable = no
# recon_cache_path = /var/cache/swift
# ring_check_interval = 15 # ring_check_interval = 15
# recon_cache_path = /var/cache/swift
[object-updater] [object-updater]
# You can override the default log routing for this app here (don't use set!): # You can override the default log routing for this app here (don't use set!):
@ -84,6 +82,7 @@ recon_lock_path = /var/lock
# conn_timeout = 0.5 # conn_timeout = 0.5
# slowdown will sleep that amount between objects # slowdown will sleep that amount between objects
# slowdown = 0.01 # slowdown = 0.01
# recon_cache_path = /var/cache/swift
[object-auditor] [object-auditor]
# You can override the default log routing for this app here (don't use set!): # You can override the default log routing for this app here (don't use set!):
@ -94,4 +93,4 @@ recon_lock_path = /var/lock
# bytes_per_second = 10000000 # bytes_per_second = 10000000
# log_time = 3600 # log_time = 3600
# zero_byte_files_per_second = 50 # zero_byte_files_per_second = 50
# recon_cache_path = /var/cache/swift

View File

@ -13,6 +13,7 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import os
import time import time
from random import random from random import random
@ -20,7 +21,7 @@ import swift.common.db
from swift.account import server as account_server from swift.account import server as account_server
from swift.common.db import AccountBroker from swift.common.db import AccountBroker
from swift.common.utils import get_logger, audit_location_generator, \ from swift.common.utils import get_logger, audit_location_generator, \
TRUE_VALUES TRUE_VALUES, dump_recon_cache
from swift.common.daemon import Daemon from swift.common.daemon import Daemon
from eventlet import Timeout from eventlet import Timeout
@ -40,6 +41,9 @@ class AccountAuditor(Daemon):
self.account_failures = 0 self.account_failures = 0
swift.common.db.DB_PREALLOCATION = \ swift.common.db.DB_PREALLOCATION = \
conf.get('db_preallocation', 'f').lower() in TRUE_VALUES conf.get('db_preallocation', 'f').lower() in TRUE_VALUES
self.recon_cache_path = conf.get('recon_cache_path',
'/var/cache/swift')
self.rcache = os.path.join(self.recon_cache_path, "account.recon")
def _one_audit_pass(self, reported): def _one_audit_pass(self, reported):
all_locs = audit_location_generator(self.devices, all_locs = audit_location_generator(self.devices,
@ -53,6 +57,12 @@ class AccountAuditor(Daemon):
{'time': time.ctime(reported), {'time': time.ctime(reported),
'passed': self.account_passes, 'passed': self.account_passes,
'failed': self.account_failures}) 'failed': self.account_failures})
self.account_audit(path)
dump_recon_cache({'account_audits_since': reported,
'account_audits_passed': self.account_passes,
'account_audits_failed':
self.account_failures},
self.rcache, self.logger)
reported = time.time() reported = time.time()
self.account_passes = 0 self.account_passes = 0
self.account_failures = 0 self.account_failures = 0
@ -75,6 +85,8 @@ class AccountAuditor(Daemon):
time.sleep(self.interval - elapsed) time.sleep(self.interval - elapsed)
self.logger.info( self.logger.info(
_('Account audit pass completed: %.02fs'), elapsed) _('Account audit pass completed: %.02fs'), elapsed)
dump_recon_cache({'account_auditor_pass_completed': elapsed},
self.rcache, self.logger)
def run_once(self, *args, **kwargs): def run_once(self, *args, **kwargs):
"""Run the account audit once.""" """Run the account audit once."""
@ -84,6 +96,8 @@ class AccountAuditor(Daemon):
elapsed = time.time() - begin elapsed = time.time() - begin
self.logger.info( self.logger.info(
_('Account audit "once" mode completed: %.02fs'), elapsed) _('Account audit "once" mode completed: %.02fs'), elapsed)
dump_recon_cache({'account_auditor_pass_completed': elapsed},
self.rcache, self.logger)
def account_audit(self, path): def account_audit(self, path):
""" """

View File

@ -31,7 +31,8 @@ from webob.exc import HTTPNotFound, HTTPNoContent, HTTPAccepted, \
import swift.common.db import swift.common.db
from swift.common.utils import get_logger, whataremyips, storage_directory, \ from swift.common.utils import get_logger, whataremyips, storage_directory, \
renamer, mkdirs, lock_parent_directory, TRUE_VALUES, unlink_older_than renamer, mkdirs, lock_parent_directory, TRUE_VALUES, unlink_older_than, \
dump_recon_cache
from swift.common import ring from swift.common import ring
from swift.common.bufferedhttp import BufferedHTTPConnection from swift.common.bufferedhttp import BufferedHTTPConnection
from swift.common.exceptions import DriveNotMounted, ConnectionTimeout from swift.common.exceptions import DriveNotMounted, ConnectionTimeout
@ -124,6 +125,11 @@ class Replicator(Daemon):
swift.common.db.DB_PREALLOCATION = \ swift.common.db.DB_PREALLOCATION = \
conf.get('db_preallocation', 'f').lower() in TRUE_VALUES conf.get('db_preallocation', 'f').lower() in TRUE_VALUES
self._zero_stats() self._zero_stats()
self.recon_cache_path = conf.get('recon_cache_path',
'/var/cache/swift')
self.recon_replicator = '%s.recon' % self.server_type
self.rcache = os.path.join(self.recon_cache_path,
self.recon_replicator)
def _zero_stats(self): def _zero_stats(self):
"""Zero out the stats.""" """Zero out the stats."""
@ -144,6 +150,9 @@ class Replicator(Daemon):
self.logger.info(_('Removed %(remove)d dbs') % self.stats) self.logger.info(_('Removed %(remove)d dbs') % self.stats)
self.logger.info(_('%(success)s successes, %(failure)s failures') self.logger.info(_('%(success)s successes, %(failure)s failures')
% self.stats) % self.stats)
dump_recon_cache({'replication_stats': self.stats,
'replication_time': time.time() - self.stats['start']
}, self.rcache, self.logger)
self.logger.info(' '.join(['%s:%s' % item for item in self.logger.info(' '.join(['%s:%s' % item for item in
self.stats.items() if item[0] in self.stats.items() if item[0] in
('no_change', 'hashmatch', 'rsync', 'diff', 'ts_repl', 'empty', ('no_change', 'hashmatch', 'rsync', 'diff', 'ts_repl', 'empty',

View File

@ -17,7 +17,7 @@ import errno
import os import os
from webob import Request, Response from webob import Request, Response
from swift.common.utils import split_path, get_logger from swift.common.utils import split_path, get_logger, TRUE_VALUES
from swift.common.constraints import check_mount from swift.common.constraints import check_mount
from resource import getpagesize from resource import getpagesize
from hashlib import md5 from hashlib import md5
@ -46,16 +46,41 @@ class ReconMiddleware(object):
self.devices = conf.get('devices', '/srv/node/') self.devices = conf.get('devices', '/srv/node/')
swift_dir = conf.get('swift_dir', '/etc/swift') swift_dir = conf.get('swift_dir', '/etc/swift')
self.logger = get_logger(conf, log_route='recon') self.logger = get_logger(conf, log_route='recon')
self.recon_cache_path = conf.get('recon_cache_path', \ self.recon_cache_path = conf.get('recon_cache_path',
'/var/cache/swift') '/var/cache/swift')
self.object_recon_cache = "%s/object.recon" % self.recon_cache_path self.object_recon_cache = os.path.join(self.recon_cache_path,
'object.recon')
self.container_recon_cache = os.path.join(self.recon_cache_path,
'container.recon')
self.account_recon_cache = os.path.join(self.recon_cache_path,
'account.recon')
self.account_ring_path = os.path.join(swift_dir, 'account.ring.gz') self.account_ring_path = os.path.join(swift_dir, 'account.ring.gz')
self.container_ring_path = os.path.join(swift_dir, 'container.ring.gz') self.container_ring_path = os.path.join(swift_dir, 'container.ring.gz')
self.object_ring_path = os.path.join(swift_dir, 'object.ring.gz') self.object_ring_path = os.path.join(swift_dir, 'object.ring.gz')
self.rings = [self.account_ring_path, self.container_ring_path, \ self.rings = [self.account_ring_path, self.container_ring_path,
self.object_ring_path] self.object_ring_path]
self.mount_check = conf.get('mount_check', 'true').lower() in \ self.mount_check = conf.get('mount_check', 'true').lower() \
('true', 't', '1', 'on', 'yes', 'y') in TRUE_VALUES
def _from_recon_cache(self, cache_keys, cache_file, openr=open):
"""retrieve values from a recon cache file
:params cache_keys: list of cache items to retrieve
:params cache_file: cache file to retrieve items from.
:params openr: open to use [for unittests]
:return: dict of cache items and their value or none if not found
"""
try:
with openr(cache_file, 'r') as f:
recondata = json.load(f)
return dict((key, recondata.get(key)) for key in cache_keys)
except IOError:
self.logger.exception(_('Error reading recon cache file'))
except ValueError:
self.logger.exception(_('Error parsing recon cache file'))
except Exception:
self.logger.exception(_('Error retrieving recon data'))
return dict((key, None) for key in cache_keys)
def get_mounted(self, openr=open): def get_mounted(self, openr=open):
"""get ALL mounted fs from /proc/mounts""" """get ALL mounted fs from /proc/mounts"""
@ -89,36 +114,73 @@ class ReconMiddleware(object):
meminfo[entry[0]] = entry[1].strip() meminfo[entry[0]] = entry[1].strip()
return meminfo return meminfo
def get_async_info(self, openr=open): def get_async_info(self):
"""get # of async pendings""" """get # of async pendings"""
asyncinfo = {} return self._from_recon_cache(['async_pending'],
with openr(self.object_recon_cache, 'r') as f: self.object_recon_cache)
recondata = json.load(f)
if 'async_pending' in recondata:
asyncinfo['async_pending'] = recondata['async_pending']
else:
self.logger.notice( \
_('NOTICE: Async pendings not in recon data.'))
asyncinfo['async_pending'] = -1
return asyncinfo
def get_replication_info(self, openr=open): def get_replication_info(self, recon_type):
"""grab last object replication time""" """get replication info"""
repinfo = {} if recon_type == 'account':
with openr(self.object_recon_cache, 'r') as f: return self._from_recon_cache(['replication_time',
recondata = json.load(f) 'replication_stats'],
if 'object_replication_time' in recondata: self.account_recon_cache)
repinfo['object_replication_time'] = \ elif recon_type == 'container':
recondata['object_replication_time'] return self._from_recon_cache(['replication_time',
else: 'replication_stats'],
self.logger.notice( \ self.container_recon_cache)
_('NOTICE: obj replication time not in recon data')) elif recon_type == 'object':
repinfo['object_replication_time'] = -1 return self._from_recon_cache(['object_replication_time'],
return repinfo self.object_recon_cache)
else:
return None
def get_device_info(self): def get_device_info(self):
"""place holder, grab dev info""" """get devices"""
return self.devices try:
return {self.devices: os.listdir(self.devices)}
except Exception:
self.logger.exception(_('Error listing devices'))
return {self.devices: None}
def get_updater_info(self, recon_type):
"""get updater info"""
if recon_type == 'container':
return self._from_recon_cache(['container_updater_sweep'],
self.container_recon_cache)
elif recon_type == 'object':
return self._from_recon_cache(['object_updater_sweep'],
self.object_recon_cache)
else:
return None
def get_expirer_info(self, recon_type):
"""get expirer info"""
if recon_type == 'object':
return self._from_recon_cache(['object_expiration_pass',
'expired_last_pass'],
self.object_recon_cache)
def get_auditor_info(self, recon_type):
"""get auditor info"""
if recon_type == 'account':
return self._from_recon_cache(['account_audits_passed',
'account_auditor_pass_completed',
'account_audits_since',
'account_audits_failed'],
self.account_recon_cache)
elif recon_type == 'container':
return self._from_recon_cache(['container_audits_passed',
'container_auditor_pass_completed',
'container_audits_since',
'container_audits_failed'],
self.container_recon_cache)
elif recon_type == 'object':
return self._from_recon_cache(['object_auditor_stats_ALL',
'object_auditor_stats_ZBF'],
self.object_recon_cache)
else:
return None
def get_unmounted(self): def get_unmounted(self):
"""list unmounted (failed?) devices""" """list unmounted (failed?) devices"""
@ -152,12 +214,18 @@ class ReconMiddleware(object):
sums = {} sums = {}
for ringfile in self.rings: for ringfile in self.rings:
md5sum = md5() md5sum = md5()
with openr(ringfile, 'rb') as f: if os.path.exists(ringfile):
block = f.read(4096) try:
while block: with openr(ringfile, 'rb') as f:
md5sum.update(block) block = f.read(4096)
block = f.read(4096) while block:
sums[ringfile] = md5sum.hexdigest() md5sum.update(block)
block = f.read(4096)
sums[ringfile] = md5sum.hexdigest()
except IOError, err:
sums[ringfile] = None
if err.errno != errno.ENOENT:
self.logger.exception(_('Error reading ringfile'))
return sums return sums
def get_quarantine_count(self): def get_quarantine_count(self):
@ -193,7 +261,7 @@ class ReconMiddleware(object):
int(tcpstats[10]) * getpagesize() int(tcpstats[10]) * getpagesize()
except IOError as e: except IOError as e:
if e.errno != errno.ENOENT: if e.errno != errno.ENOENT:
raise raise
try: try:
with openr('/proc/net/sockstat6', 'r') as proc_sockstat6: with openr('/proc/net/sockstat6', 'r') as proc_sockstat6:
for entry in proc_sockstat6: for entry in proc_sockstat6:
@ -205,54 +273,50 @@ class ReconMiddleware(object):
return sockstat return sockstat
def GET(self, req): def GET(self, req):
error = False root, rcheck, rtype = split_path(req.path, 1, 3, True)
root, type = split_path(req.path, 1, 2, False) all_rtypes = ['account', 'container', 'object']
try: if rcheck == "mem":
if type == "mem": content = self.get_mem()
content = json.dumps(self.get_mem()) elif rcheck == "load":
elif type == "load": content = self.get_load()
content = json.dumps(self.get_load(), sort_keys=True) elif rcheck == "async":
elif type == "async": content = self.get_async_info()
try: elif rcheck == 'replication' and rtype in all_rtypes:
content = json.dumps(self.get_async_info()) content = self.get_replication_info(rtype)
except IOError as e: elif rcheck == 'replication' and rtype is None:
error = True #handle old style object replication requests
content = "async - %s" % e content = self.get_replication_info('object')
elif type == "replication": elif rcheck == "devices":
try: content = self.get_device_info()
content = json.dumps(self.get_replication_info()) elif rcheck == "updater" and rtype in ['container', 'object']:
except IOError as e: content = self.get_updater_info(rtype)
error = True elif rcheck == "auditor" and rtype in all_rtypes:
content = "replication - %s" % e content = self.get_auditor_info(rtype)
elif type == "mounted": elif rcheck == "expirer" and rtype == 'object':
content = json.dumps(self.get_mounted()) content = self.get_expirer_info(rtype)
elif type == "unmounted": elif rcheck == "mounted":
content = json.dumps(self.get_unmounted()) content = self.get_mounted()
elif type == "diskusage": elif rcheck == "unmounted":
content = json.dumps(self.get_diskusage()) content = self.get_unmounted()
elif type == "ringmd5": elif rcheck == "diskusage":
content = json.dumps(self.get_ring_md5()) content = self.get_diskusage()
elif type == "quarantined": elif rcheck == "ringmd5":
content = json.dumps(self.get_quarantine_count()) content = self.get_ring_md5()
elif type == "sockstat": elif rcheck == "quarantined":
content = json.dumps(self.get_socket_info()) content = self.get_quarantine_count()
else: elif rcheck == "sockstat":
content = "Invalid path: %s" % req.path content = self.get_socket_info()
return Response(request=req, status="400 Bad Request", \
body=content, content_type="text/plain")
except ValueError as e:
error = True
content = "ValueError: %s" % e
if not error:
return Response(request=req, body=content, \
content_type="application/json")
else: else:
msg = 'CRITICAL recon - %s' % str(content) content = "Invalid path: %s" % req.path
self.logger.critical(msg) return Response(request=req, status="404 Not Found",
body = "Internal server error." body=content, content_type="text/plain")
return Response(request=req, status="500 Server Error", \ if content:
body=body, content_type="text/plain") return Response(request=req, body=json.dumps(content),
content_type="application/json")
else:
return Response(request=req, status="500 Server Error",
body="Internal server error.",
content_type="text/plain")
def __call__(self, env, start_response): def __call__(self, env, start_response):
req = Request(env) req = Request(env)

View File

@ -41,7 +41,7 @@ import glob
from urlparse import urlparse as stdlib_urlparse, ParseResult from urlparse import urlparse as stdlib_urlparse, ParseResult
import eventlet import eventlet
from eventlet import GreenPool, sleep from eventlet import GreenPool, sleep, Timeout
from eventlet.green import socket, threading from eventlet.green import socket, threading
import netifaces import netifaces
@ -1184,35 +1184,39 @@ def human_readable(value):
return '%d%si' % (round(value), suffixes[index]) return '%d%si' % (round(value), suffixes[index])
def dump_recon_cache(cache_key, cache_value, cache_file, lock_timeout=2): def dump_recon_cache(cache_dict, cache_file, logger, lock_timeout=2):
"""Update recon cache values """Update recon cache values
:param cache_key: key to update :param cache_dict: Dictionary of cache key/value pairs to write out
:param cache_value: value you want to set key too
:param cache_file: cache file to update :param cache_file: cache file to update
:param logger: the logger to use to log an encountered error
:param lock_timeout: timeout (in seconds) :param lock_timeout: timeout (in seconds)
""" """
with lock_file(cache_file, lock_timeout, unlink=False) as cf: try:
cache_entry = {} with lock_file(cache_file, lock_timeout, unlink=False) as cf:
try: cache_entry = {}
existing_entry = cf.readline()
if existing_entry:
cache_entry = json.loads(existing_entry)
except ValueError:
#file doesn't have a valid entry, we'll recreate it
pass
cache_entry[cache_key] = cache_value
try:
with NamedTemporaryFile(dir=os.path.dirname(cache_file),
delete=False) as tf:
tf.write(json.dumps(cache_entry) + '\n')
os.rename(tf.name, cache_file)
finally:
try: try:
os.unlink(tf.name) existing_entry = cf.readline()
except OSError, err: if existing_entry:
if err.errno != errno.ENOENT: cache_entry = json.loads(existing_entry)
raise except ValueError:
#file doesn't have a valid entry, we'll recreate it
pass
for cache_key, cache_value in cache_dict.items():
cache_entry[cache_key] = cache_value
try:
with NamedTemporaryFile(dir=os.path.dirname(cache_file),
delete=False) as tf:
tf.write(json.dumps(cache_entry) + '\n')
os.rename(tf.name, cache_file)
finally:
try:
os.unlink(tf.name)
except OSError, err:
if err.errno != errno.ENOENT:
raise
except (Exception, Timeout):
logger.exception(_('Exception dumping recon cache'))
def listdir(path): def listdir(path):

View File

@ -13,6 +13,7 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import os
import time import time
from random import random from random import random
@ -22,7 +23,7 @@ import swift.common.db
from swift.container import server as container_server from swift.container import server as container_server
from swift.common.db import ContainerBroker from swift.common.db import ContainerBroker
from swift.common.utils import get_logger, audit_location_generator, \ from swift.common.utils import get_logger, audit_location_generator, \
TRUE_VALUES TRUE_VALUES, dump_recon_cache
from swift.common.daemon import Daemon from swift.common.daemon import Daemon
@ -36,11 +37,13 @@ class ContainerAuditor(Daemon):
self.mount_check = conf.get('mount_check', 'true').lower() in \ self.mount_check = conf.get('mount_check', 'true').lower() in \
('true', 't', '1', 'on', 'yes', 'y') ('true', 't', '1', 'on', 'yes', 'y')
self.interval = int(conf.get('interval', 1800)) self.interval = int(conf.get('interval', 1800))
swift_dir = conf.get('swift_dir', '/etc/swift')
self.container_passes = 0 self.container_passes = 0
self.container_failures = 0 self.container_failures = 0
swift.common.db.DB_PREALLOCATION = \ swift.common.db.DB_PREALLOCATION = \
conf.get('db_preallocation', 'f').lower() in TRUE_VALUES conf.get('db_preallocation', 'f').lower() in TRUE_VALUES
self.recon_cache_path = conf.get('recon_cache_path',
'/var/cache/swift')
self.rcache = os.path.join(self.recon_cache_path, "container.recon")
def _one_audit_pass(self, reported): def _one_audit_pass(self, reported):
all_locs = audit_location_generator(self.devices, all_locs = audit_location_generator(self.devices,
@ -56,6 +59,12 @@ class ContainerAuditor(Daemon):
{'time': time.ctime(reported), {'time': time.ctime(reported),
'pass': self.container_passes, 'pass': self.container_passes,
'fail': self.container_failures}) 'fail': self.container_failures})
dump_recon_cache({'container_audits_since': reported,
'container_audits_passed':
self.container_passes,
'container_audits_failed':
self.container_failures},
self.rcache, self.logger)
reported = time.time() reported = time.time()
self.container_passes = 0 self.container_passes = 0
self.container_failures = 0 self.container_failures = 0
@ -78,6 +87,8 @@ class ContainerAuditor(Daemon):
time.sleep(self.interval - elapsed) time.sleep(self.interval - elapsed)
self.logger.info( self.logger.info(
_('Container audit pass completed: %.02fs'), elapsed) _('Container audit pass completed: %.02fs'), elapsed)
dump_recon_cache({'container_auditor_pass_completed': elapsed},
self.rcache, self.logger)
def run_once(self, *args, **kwargs): def run_once(self, *args, **kwargs):
"""Run the container audit once.""" """Run the container audit once."""
@ -87,6 +98,8 @@ class ContainerAuditor(Daemon):
elapsed = time.time() - begin elapsed = time.time() - begin
self.logger.info( self.logger.info(
_('Container audit "once" mode completed: %.02fs'), elapsed) _('Container audit "once" mode completed: %.02fs'), elapsed)
dump_recon_cache({'container_auditor_pass_completed': elapsed},
self.recon_container)
def container_audit(self, path): def container_audit(self, path):
""" """

View File

@ -29,7 +29,7 @@ from swift.common.bufferedhttp import http_connect
from swift.common.db import ContainerBroker from swift.common.db import ContainerBroker
from swift.common.exceptions import ConnectionTimeout from swift.common.exceptions import ConnectionTimeout
from swift.common.ring import Ring from swift.common.ring import Ring
from swift.common.utils import get_logger, TRUE_VALUES from swift.common.utils import get_logger, TRUE_VALUES, dump_recon_cache
from swift.common.daemon import Daemon from swift.common.daemon import Daemon
from swift.common.http import is_success, HTTP_INTERNAL_SERVER_ERROR from swift.common.http import is_success, HTTP_INTERNAL_SERVER_ERROR
@ -59,6 +59,9 @@ class ContainerUpdater(Daemon):
self.new_account_suppressions = None self.new_account_suppressions = None
swift.common.db.DB_PREALLOCATION = \ swift.common.db.DB_PREALLOCATION = \
conf.get('db_preallocation', 'f').lower() in TRUE_VALUES conf.get('db_preallocation', 'f').lower() in TRUE_VALUES
self.recon_cache_path = conf.get('recon_cache_path',
'/var/cache/swift')
self.rcache = os.path.join(self.recon_cache_path, "container.recon")
def get_account_ring(self): def get_account_ring(self):
"""Get the account ring. Load it if it hasn't been yet.""" """Get the account ring. Load it if it hasn't been yet."""
@ -154,6 +157,8 @@ class ContainerUpdater(Daemon):
elapsed = time.time() - begin elapsed = time.time() - begin
self.logger.info(_('Container update sweep completed: %.02fs'), self.logger.info(_('Container update sweep completed: %.02fs'),
elapsed) elapsed)
dump_recon_cache({'container_updater_sweep': elapsed},
self.rcache, self.logger)
if elapsed < self.interval: if elapsed < self.interval:
time.sleep(self.interval - elapsed) time.sleep(self.interval - elapsed)
@ -175,6 +180,8 @@ class ContainerUpdater(Daemon):
'%(no_change)s with no changes'), '%(no_change)s with no changes'),
{'elapsed': elapsed, 'success': self.successes, {'elapsed': elapsed, 'success': self.successes,
'fail': self.failures, 'no_change': self.no_changes}) 'fail': self.failures, 'no_change': self.no_changes})
dump_recon_cache({'container_updater_sweep': elapsed},
self.rcache, self.logger)
def container_sweep(self, path): def container_sweep(self, path):
""" """

View File

@ -20,7 +20,7 @@ from eventlet import Timeout
from swift.obj import server as object_server from swift.obj import server as object_server
from swift.common.utils import get_logger, audit_location_generator, \ from swift.common.utils import get_logger, audit_location_generator, \
ratelimit_sleep, TRUE_VALUES ratelimit_sleep, TRUE_VALUES, dump_recon_cache
from swift.common.exceptions import AuditException, DiskFileError, \ from swift.common.exceptions import AuditException, DiskFileError, \
DiskFileNotExist DiskFileNotExist
from swift.common.daemon import Daemon from swift.common.daemon import Daemon
@ -54,6 +54,9 @@ class AuditorWorker(object):
self.passes = 0 self.passes = 0
self.quarantines = 0 self.quarantines = 0
self.errors = 0 self.errors = 0
self.recon_cache_path = conf.get('recon_cache_path',
'/var/cache/swift')
self.rcache = os.path.join(self.recon_cache_path, "object.recon")
def audit_all_objects(self, mode='once'): def audit_all_objects(self, mode='once'):
self.logger.info(_('Begin object audit "%s" mode (%s)' % self.logger.info(_('Begin object audit "%s" mode (%s)' %
@ -63,7 +66,6 @@ class AuditorWorker(object):
self.total_files_processed = 0 self.total_files_processed = 0
total_quarantines = 0 total_quarantines = 0
total_errors = 0 total_errors = 0
files_running_time = 0
time_auditing = 0 time_auditing = 0
all_locs = audit_location_generator(self.devices, all_locs = audit_location_generator(self.devices,
object_server.DATADIR, object_server.DATADIR,
@ -93,6 +95,16 @@ class AuditorWorker(object):
'brate': self.bytes_processed / (now - reported), 'brate': self.bytes_processed / (now - reported),
'total': (now - begin), 'audit': time_auditing, 'total': (now - begin), 'audit': time_auditing,
'audit_rate': time_auditing / (now - begin)}) 'audit_rate': time_auditing / (now - begin)})
dump_recon_cache({'object_auditor_stats_%s' %
self.auditor_type: {
'errors': self.errors,
'passes': self.passes,
'quarantined': self.quarantines,
'bytes_processed':
self.bytes_processed,
'start_time': reported,
'audit_time': time_auditing}
}, self.rcache, self.logger)
reported = now reported = now
total_quarantines += self.quarantines total_quarantines += self.quarantines
total_errors += self.errors total_errors += self.errors

View File

@ -14,16 +14,15 @@
# limitations under the License. # limitations under the License.
from random import random from random import random
from sys import exc_info
from time import time from time import time
from urllib import quote from urllib import quote
from os.path import join
from eventlet import sleep, Timeout from eventlet import sleep, Timeout
from webob import Request
from swift.common.daemon import Daemon from swift.common.daemon import Daemon
from swift.common.internal_client import InternalClient from swift.common.internal_client import InternalClient
from swift.common.utils import get_logger from swift.common.utils import get_logger, dump_recon_cache
from swift.common.http import HTTP_NOT_FOUND, HTTP_CONFLICT, \ from swift.common.http import HTTP_NOT_FOUND, HTTP_CONFLICT, \
HTTP_PRECONDITION_FAILED HTTP_PRECONDITION_FAILED
@ -55,6 +54,9 @@ class ObjectExpirer(Daemon):
self.report_interval = int(conf.get('report_interval') or 300) self.report_interval = int(conf.get('report_interval') or 300)
self.report_first_time = self.report_last_time = time() self.report_first_time = self.report_last_time = time()
self.report_objects = 0 self.report_objects = 0
self.recon_cache_path = conf.get('recon_cache_path',
'/var/cache/swift')
self.rcache = join(self.recon_cache_path, 'object.recon')
def report(self, final=False): def report(self, final=False):
""" """
@ -68,6 +70,9 @@ class ObjectExpirer(Daemon):
elapsed = time() - self.report_first_time elapsed = time() - self.report_first_time
self.logger.info(_('Pass completed in %ds; %d objects expired') % self.logger.info(_('Pass completed in %ds; %d objects expired') %
(elapsed, self.report_objects)) (elapsed, self.report_objects))
dump_recon_cache({'object_expiration_pass': elapsed,
'expired_last_pass': self.report_objects},
self.rcache, self.logger)
elif time() - self.report_last_time >= self.report_interval: elif time() - self.report_last_time >= self.report_interval:
elapsed = time() - self.report_first_time elapsed = time() - self.report_first_time
self.logger.info(_('Pass so far %ds; %d objects expired') % self.logger.info(_('Pass so far %ds; %d objects expired') %

View File

@ -32,8 +32,7 @@ from eventlet.support.greenlets import GreenletExit
from swift.common.ring import Ring from swift.common.ring import Ring
from swift.common.utils import whataremyips, unlink_older_than, lock_path, \ from swift.common.utils import whataremyips, unlink_older_than, lock_path, \
compute_eta, get_logger, write_pickle, renamer, dump_recon_cache, \ compute_eta, get_logger, write_pickle, renamer, dump_recon_cache
TRUE_VALUES
from swift.common.bufferedhttp import http_connect from swift.common.bufferedhttp import http_connect
from swift.common.daemon import Daemon from swift.common.daemon import Daemon
from swift.common.http import HTTP_OK, HTTP_INSUFFICIENT_STORAGE from swift.common.http import HTTP_OK, HTTP_INSUFFICIENT_STORAGE
@ -247,11 +246,9 @@ class ObjectReplicator(Daemon):
self.rsync_io_timeout = conf.get('rsync_io_timeout', '30') self.rsync_io_timeout = conf.get('rsync_io_timeout', '30')
self.http_timeout = int(conf.get('http_timeout', 60)) self.http_timeout = int(conf.get('http_timeout', 60))
self.lockup_timeout = int(conf.get('lockup_timeout', 1800)) self.lockup_timeout = int(conf.get('lockup_timeout', 1800))
self.recon_enable = conf.get( self.recon_cache_path = conf.get('recon_cache_path',
'recon_enable', 'no').lower() in TRUE_VALUES '/var/cache/swift')
self.recon_cache_path = conf.get( self.rcache = os.path.join(self.recon_cache_path, "object.recon")
'recon_cache_path', '/var/cache/swift')
self.recon_object = os.path.join(self.recon_cache_path, "object.recon")
def _rsync(self, args): def _rsync(self, args):
""" """
@ -598,12 +595,8 @@ class ObjectReplicator(Daemon):
total = (time.time() - start) / 60 total = (time.time() - start) / 60
self.logger.info( self.logger.info(
_("Object replication complete. (%.02f minutes)"), total) _("Object replication complete. (%.02f minutes)"), total)
if self.recon_enable: dump_recon_cache({'object_replication_time': total},
try: self.rcache, self.logger)
dump_recon_cache('object_replication_time', total, \
self.recon_object)
except (Exception, Timeout):
self.logger.exception(_('Exception dumping recon cache'))
def run_forever(self, *args, **kwargs): def run_forever(self, *args, **kwargs):
self.logger.info(_("Starting object replicator in daemon mode.")) self.logger.info(_("Starting object replicator in daemon mode."))
@ -616,12 +609,8 @@ class ObjectReplicator(Daemon):
total = (time.time() - start) / 60 total = (time.time() - start) / 60
self.logger.info( self.logger.info(
_("Object replication complete. (%.02f minutes)"), total) _("Object replication complete. (%.02f minutes)"), total)
if self.recon_enable: dump_recon_cache({'object_replication_time': total},
try: self.rcache, self.logger)
dump_recon_cache('object_replication_time', total, \
self.recon_object)
except (Exception, Timeout):
self.logger.exception(_('Exception dumping recon cache'))
self.logger.debug(_('Replication sleeping for %s seconds.'), self.logger.debug(_('Replication sleeping for %s seconds.'),
self.run_pause) self.run_pause)
sleep(self.run_pause) sleep(self.run_pause)

View File

@ -25,7 +25,8 @@ from eventlet import patcher, Timeout
from swift.common.bufferedhttp import http_connect from swift.common.bufferedhttp import http_connect
from swift.common.exceptions import ConnectionTimeout from swift.common.exceptions import ConnectionTimeout
from swift.common.ring import Ring from swift.common.ring import Ring
from swift.common.utils import get_logger, renamer, write_pickle from swift.common.utils import get_logger, renamer, write_pickle, \
dump_recon_cache
from swift.common.daemon import Daemon from swift.common.daemon import Daemon
from swift.obj.server import ASYNCDIR from swift.obj.server import ASYNCDIR
from swift.common.http import is_success, HTTP_NOT_FOUND, \ from swift.common.http import is_success, HTTP_NOT_FOUND, \
@ -50,6 +51,9 @@ class ObjectUpdater(Daemon):
self.conn_timeout = float(conf.get('conn_timeout', 0.5)) self.conn_timeout = float(conf.get('conn_timeout', 0.5))
self.successes = 0 self.successes = 0
self.failures = 0 self.failures = 0
self.recon_cache_path = conf.get('recon_cache_path',
'/var/cache/swift')
self.rcache = os.path.join(self.recon_cache_path, 'object.recon')
def get_container_ring(self): def get_container_ring(self):
"""Get the container ring. Load it, if it hasn't been yet.""" """Get the container ring. Load it, if it hasn't been yet."""
@ -97,6 +101,8 @@ class ObjectUpdater(Daemon):
elapsed = time.time() - begin elapsed = time.time() - begin
self.logger.info(_('Object update sweep completed: %.02fs'), self.logger.info(_('Object update sweep completed: %.02fs'),
elapsed) elapsed)
dump_recon_cache({'object_updater_sweep': elapsed},
self.rcache, self.logger)
if elapsed < self.interval: if elapsed < self.interval:
time.sleep(self.interval - elapsed) time.sleep(self.interval - elapsed)
@ -119,6 +125,8 @@ class ObjectUpdater(Daemon):
'%(elapsed).02fs, %(success)s successes, %(fail)s failures'), '%(elapsed).02fs, %(success)s successes, %(fail)s failures'),
{'elapsed': elapsed, 'success': self.successes, {'elapsed': elapsed, 'success': self.successes,
'fail': self.failures}) 'fail': self.failures})
dump_recon_cache({'object_updater_sweep': elapsed},
self.rcache, self.logger)
def object_sweep(self, device): def object_sweep(self, device):
""" """

View File

@ -30,6 +30,16 @@ class FakeApp(object):
def start_response(*args): def start_response(*args):
pass pass
class FakeFromCache(object):
def __init__(self, out=None):
self.fakeout = out
self.fakeout_calls = []
def fake_from_recon_cache(self, *args, **kwargs):
self.fakeout_calls.append((args, kwargs))
return self.fakeout
class OpenAndReadTester(object): class OpenAndReadTester(object):
def __init__(self, output_iter): def __init__(self, output_iter):
@ -93,12 +103,78 @@ class MockOS(object):
return stat_result(self.lstat_output_tuple) return stat_result(self.lstat_output_tuple)
class FakeRecon(object):
def __init__(self):
self.fake_replication_rtype = None
self.fake_updater_rtype = None
self.fake_auditor_rtype = None
self.fake_expirer_rtype = None
def fake_mem(self):
return {'memtest': "1"}
def fake_load(self):
return {'loadtest': "1"}
def fake_async(self):
return {'asynctest': "1"}
def fake_get_device_info(self):
return {"/srv/1/node": ["sdb1"]}
def fake_replication(self, recon_type):
self.fake_replication_rtype = recon_type
return {'replicationtest': "1"}
def fake_updater(self, recon_type):
self.fake_updater_rtype = recon_type
return {'updatertest': "1"}
def fake_auditor(self, recon_type):
self.fake_auditor_rtype = recon_type
return {'auditortest': "1"}
def fake_expirer(self, recon_type):
self.fake_expirer_rtype = recon_type
return {'expirertest': "1"}
def fake_mounted(self):
return {'mountedtest': "1"}
def fake_unmounted(self):
return {'unmountedtest': "1"}
def fake_diskusage(self):
return {'diskusagetest': "1"}
def fake_ringmd5(self):
return {'ringmd5test': "1"}
def fake_quarantined(self):
return {'quarantinedtest': "1"}
def fake_sockstat(self):
return {'sockstattest': "1"}
def nocontent(self):
return None
def raise_IOError(self, *args, **kwargs):
raise IOError
def raise_ValueError(self, *args, **kwargs):
raise ValueError
def raise_Exception(self, *args, **kwargs):
raise Exception
class TestReconSuccess(TestCase): class TestReconSuccess(TestCase):
def setUp(self): def setUp(self):
self.app = recon.ReconMiddleware(FakeApp(), {}) self.app = recon.ReconMiddleware(FakeApp(), {})
self.mockos = MockOS() self.mockos = MockOS()
self.fakecache = FakeFromCache()
self.real_listdir = os.listdir self.real_listdir = os.listdir
self.real_path_exists = os.path.exists self.real_path_exists = os.path.exists
self.real_lstat = os.lstat self.real_lstat = os.lstat
@ -107,6 +183,9 @@ class TestReconSuccess(TestCase):
os.path.exists = self.mockos.fake_path_exists os.path.exists = self.mockos.fake_path_exists
os.lstat = self.mockos.fake_lstat os.lstat = self.mockos.fake_lstat
os.statvfs = self.mockos.fake_statvfs os.statvfs = self.mockos.fake_statvfs
self.real_from_cache = self.app._from_recon_cache
self.app._from_recon_cache = self.fakecache.fake_from_recon_cache
self.frecon = FakeRecon()
def tearDown(self): def tearDown(self):
os.listdir = self.real_listdir os.listdir = self.real_listdir
@ -114,6 +193,42 @@ class TestReconSuccess(TestCase):
os.lstat = self.real_lstat os.lstat = self.real_lstat
os.statvfs = self.real_statvfs os.statvfs = self.real_statvfs
del self.mockos del self.mockos
self.app._from_recon_cache = self.real_from_cache
del self.fakecache
def test_from_recon_cache(self):
oart = OpenAndReadTester(['{"notneeded": 5, "testkey1": "canhazio"}'])
self.app._from_recon_cache = self.real_from_cache
rv = self.app._from_recon_cache(['testkey1', 'notpresentkey'],
'test.cache', openr=oart.open)
self.assertEquals(oart.read_calls, [((), {})])
self.assertEquals(oart.open_calls, [(('test.cache', 'r'), {})])
self.assertEquals(rv, {'notpresentkey': None, 'testkey1': 'canhazio'})
self.app._from_recon_cache = self.fakecache.fake_from_recon_cache
def test_from_recon_cache_ioerror(self):
oart = self.frecon.raise_IOError
self.app._from_recon_cache = self.real_from_cache
rv = self.app._from_recon_cache(['testkey1', 'notpresentkey'],
'test.cache', openr=oart)
self.assertEquals(rv, {'notpresentkey': None, 'testkey1': None})
self.app._from_recon_cache = self.fakecache.fake_from_recon_cache
def test_from_recon_cache_valueerror(self):
oart = self.frecon.raise_ValueError
self.app._from_recon_cache = self.real_from_cache
rv = self.app._from_recon_cache(['testkey1', 'notpresentkey'],
'test.cache', openr=oart)
self.assertEquals(rv, {'notpresentkey': None, 'testkey1': None})
self.app._from_recon_cache = self.fakecache.fake_from_recon_cache
def test_from_recon_cache_exception(self):
oart = self.frecon.raise_Exception
self.app._from_recon_cache = self.real_from_cache
rv = self.app._from_recon_cache(['testkey1', 'notpresentkey'],
'test.cache', openr=oart)
self.assertEquals(rv, {'notpresentkey': None, 'testkey1': None})
self.app._from_recon_cache = self.fakecache.fake_from_recon_cache
def test_get_mounted(self): def test_get_mounted(self):
mounts_content = ['rootfs / rootfs rw 0 0', mounts_content = ['rootfs / rootfs rw 0 0',
@ -255,40 +370,166 @@ class TestReconSuccess(TestCase):
self.assertEquals(rv, meminfo_resp) self.assertEquals(rv, meminfo_resp)
def test_get_async_info(self): def test_get_async_info(self):
obj_recon_content = """{"object_replication_time": 200.0, "async_pending": 5}""" from_cache_response = {'async_pending': 5}
oart = OpenAndReadTester([obj_recon_content]) self.fakecache.fakeout = from_cache_response
rv = self.app.get_async_info(openr=oart.open) rv = self.app.get_async_info()
self.assertEquals(oart.read_calls, [((), {})])
self.assertEquals(oart.open_calls, [(('/var/cache/swift/object.recon', 'r'), {})])
self.assertEquals(rv, {'async_pending': 5}) self.assertEquals(rv, {'async_pending': 5})
def test_get_async_info_empty_file(self): def test_get_replication_info_account(self):
obj_recon_content = """{"object_replication_time": 200.0}""" from_cache_response = {"replication_stats": {
oart = OpenAndReadTester([obj_recon_content]) "attempted": 1, "diff": 0,
rv = self.app.get_async_info(openr=oart.open) "diff_capped": 0, "empty": 0,
self.assertEquals(oart.read_calls, [((), {})]) "failure": 0, "hashmatch": 0,
self.assertEquals(oart.open_calls, [(('/var/cache/swift/object.recon', 'r'), {})]) "no_change": 2, "remote_merge": 0,
self.assertEquals(rv, {'async_pending': -1}) "remove": 0, "rsync": 0,
"start": 1333044050.855202,
"success": 2, "ts_repl": 0 },
"replication_time": 0.2615511417388916}
self.fakecache.fakeout = from_cache_response
rv = self.app.get_replication_info('account')
self.assertEquals(self.fakecache.fakeout_calls,
[((['replication_time', 'replication_stats'],
'/var/cache/swift/account.recon'), {})])
self.assertEquals(rv, {"replication_stats": {
"attempted": 1, "diff": 0,
"diff_capped": 0, "empty": 0,
"failure": 0, "hashmatch": 0,
"no_change": 2, "remote_merge": 0,
"remove": 0, "rsync": 0,
"start": 1333044050.855202,
"success": 2, "ts_repl": 0 },
"replication_time": 0.2615511417388916})
def test_get_replication_info(self): def test_get_replication_info_container(self):
obj_recon_content = """{"object_replication_time": 200.0, "async_pending": 5}""" from_cache_response = {"replication_time": 200.0,
oart = OpenAndReadTester([obj_recon_content]) "replication_stats": {
rv = self.app.get_replication_info(openr=oart.open) "attempted": 179, "diff": 0,
self.assertEquals(oart.read_calls, [((), {})]) "diff_capped": 0, "empty": 0,
self.assertEquals(oart.open_calls, [(('/var/cache/swift/object.recon', 'r'), {})]) "failure": 0, "hashmatch": 0,
"no_change": 358, "remote_merge": 0,
"remove": 0, "rsync": 0,
"start": 5.5, "success": 358,
"ts_repl": 0}}
self.fakecache.fakeout_calls = []
self.fakecache.fakeout = from_cache_response
rv = self.app.get_replication_info('container')
self.assertEquals(self.fakecache.fakeout_calls,
[((['replication_time', 'replication_stats'],
'/var/cache/swift/container.recon'), {})])
self.assertEquals(rv, {"replication_time": 200.0,
"replication_stats": {
"attempted": 179, "diff": 0,
"diff_capped": 0, "empty": 0,
"failure": 0, "hashmatch": 0,
"no_change": 358, "remote_merge": 0,
"remove": 0, "rsync": 0,
"start": 5.5, "success": 358,
"ts_repl": 0}})
def test_get_replication_object(self):
from_cache_response = {"object_replication_time": 200.0}
self.fakecache.fakeout_calls = []
self.fakecache.fakeout = from_cache_response
rv = self.app.get_replication_info('object')
self.assertEquals(self.fakecache.fakeout_calls,
[((['object_replication_time'],
'/var/cache/swift/object.recon'), {})])
self.assertEquals(rv, {'object_replication_time': 200.0}) self.assertEquals(rv, {'object_replication_time': 200.0})
def test_get_replication_info_empty_file(self): def test_get_updater_info_container(self):
obj_recon_content = """{"async_pending": 5}""" from_cache_response = {"container_updater_sweep": 18.476239919662476}
oart = OpenAndReadTester([obj_recon_content]) self.fakecache.fakeout_calls = []
rv = self.app.get_replication_info(openr=oart.open) self.fakecache.fakeout = from_cache_response
self.assertEquals(oart.read_calls, [((), {})]) rv = self.app.get_updater_info('container')
self.assertEquals(oart.open_calls, [(('/var/cache/swift/object.recon', 'r'), {})]) self.assertEquals(self.fakecache.fakeout_calls,
self.assertEquals(rv, {'object_replication_time': -1}) [((['container_updater_sweep'],
'/var/cache/swift/container.recon'), {})])
self.assertEquals(rv, {"container_updater_sweep": 18.476239919662476})
def test_get_device_info(self): def test_get_updater_info_object(self):
rv = self.app.get_device_info() from_cache_response = {"object_updater_sweep": 0.79848217964172363}
self.assertEquals(rv, '/srv/node/') self.fakecache.fakeout_calls = []
self.fakecache.fakeout = from_cache_response
rv = self.app.get_updater_info('object')
self.assertEquals(self.fakecache.fakeout_calls,
[((['object_updater_sweep'],
'/var/cache/swift/object.recon'), {})])
self.assertEquals(rv, {"object_updater_sweep": 0.79848217964172363})
def test_get_auditor_info_account(self):
from_cache_response = {"account_auditor_pass_completed": 0.24,
"account_audits_failed": 0,
"account_audits_passed": 6,
"account_audits_since": "1333145374.1373529"}
self.fakecache.fakeout_calls = []
self.fakecache.fakeout = from_cache_response
rv = self.app.get_auditor_info('account')
self.assertEquals(self.fakecache.fakeout_calls,
[((['account_audits_passed',
'account_auditor_pass_completed',
'account_audits_since',
'account_audits_failed'],
'/var/cache/swift/account.recon'), {})])
self.assertEquals(rv, {"account_auditor_pass_completed": 0.24,
"account_audits_failed": 0,
"account_audits_passed": 6,
"account_audits_since": "1333145374.1373529"})
def test_get_auditor_info_container(self):
from_cache_response = {"container_auditor_pass_completed": 0.24,
"container_audits_failed": 0,
"container_audits_passed": 6,
"container_audits_since": "1333145374.1373529"}
self.fakecache.fakeout_calls = []
self.fakecache.fakeout = from_cache_response
rv = self.app.get_auditor_info('container')
self.assertEquals(self.fakecache.fakeout_calls,
[((['container_audits_passed',
'container_auditor_pass_completed',
'container_audits_since',
'container_audits_failed'],
'/var/cache/swift/container.recon'), {})])
self.assertEquals(rv, {"container_auditor_pass_completed": 0.24,
"container_audits_failed": 0,
"container_audits_passed": 6,
"container_audits_since": "1333145374.1373529"})
def test_get_auditor_info_object(self):
from_cache_response = {"object_auditor_stats_ALL": {
"audit_time": 115.14418768882751,
"bytes_processed": 234660,
"completed": 115.4512460231781,
"errors": 0,
"files_processed": 2310,
"quarantined": 0 },
"object_auditor_stats_ZBF": {
"audit_time": 45.877294063568115,
"bytes_processed": 0,
"completed": 46.181446075439453,
"errors": 0,
"files_processed": 2310,
"quarantined": 0 }}
self.fakecache.fakeout_calls = []
self.fakecache.fakeout = from_cache_response
rv = self.app.get_auditor_info('object')
self.assertEquals(self.fakecache.fakeout_calls,
[((['object_auditor_stats_ALL',
'object_auditor_stats_ZBF'],
'/var/cache/swift/object.recon'), {})])
self.assertEquals(rv, {"object_auditor_stats_ALL": {
"audit_time": 115.14418768882751,
"bytes_processed": 234660,
"completed": 115.4512460231781,
"errors": 0,
"files_processed": 2310,
"quarantined": 0 },
"object_auditor_stats_ZBF": {
"audit_time": 45.877294063568115,
"bytes_processed": 0,
"completed": 46.181446075439453,
"errors": 0,
"files_processed": 2310,
"quarantined": 0 }})
def test_get_unmounted(self): def test_get_unmounted(self):
@ -319,7 +560,8 @@ class TestReconSuccess(TestCase):
self.mockos.statvfs_output=statvfs_content self.mockos.statvfs_output=statvfs_content
self.mockos.path_exists_output=True self.mockos.path_exists_output=True
rv = self.app.get_diskusage() rv = self.app.get_diskusage()
self.assertEquals(self.mockos.statvfs_calls,[(('/srv/node/canhazdrive1',), {})]) self.assertEquals(self.mockos.statvfs_calls,
[(('/srv/node/canhazdrive1',), {})])
self.assertEquals(rv, du_resp) self.assertEquals(rv, du_resp)
def test_get_diskusage_checkmount_fail(self): def test_get_diskusage_checkmount_fail(self):
@ -329,11 +571,12 @@ class TestReconSuccess(TestCase):
self.mockos.path_exists_output=False self.mockos.path_exists_output=False
rv = self.app.get_diskusage() rv = self.app.get_diskusage()
self.assertEquals(self.mockos.listdir_calls,[(('/srv/node/',), {})]) self.assertEquals(self.mockos.listdir_calls,[(('/srv/node/',), {})])
self.assertEquals(self.mockos.path_exists_calls,[(('/srv/node/canhazdrive1',), {})]) self.assertEquals(self.mockos.path_exists_calls,
[(('/srv/node/canhazdrive1',), {})])
self.assertEquals(rv, du_resp) self.assertEquals(rv, du_resp)
def test_get_quarantine_count(self): def test_get_quarantine_count(self):
#posix.lstat_result(st_mode=1, st_ino=2, st_dev=3, st_nlink=4, #posix.lstat_result(st_mode=1, st_ino=2, st_dev=3, st_nlink=4,
# st_uid=5, st_gid=6, st_size=7, st_atime=8, # st_uid=5, st_gid=6, st_size=7, st_atime=8,
# st_mtime=9, st_ctime=10) # st_mtime=9, st_ctime=10)
lstat_content = (1, 2, 3, 4, 5, 6, 7, 8, 9, 10) lstat_content = (1, 2, 3, 4, 5, 6, 7, 8, 9, 10)
@ -352,63 +595,28 @@ class TestReconSuccess(TestCase):
sockstat6_content = ['TCP6: inuse 1', sockstat6_content = ['TCP6: inuse 1',
'UDP6: inuse 3', 'UDP6: inuse 3',
'UDPLITE6: inuse 0', 'UDPLITE6: inuse 0',
'RAW6: inuse 0', 'RAW6: inuse 0',
'FRAG6: inuse 0 memory 0', 'FRAG6: inuse 0 memory 0',
''] '']
oart = OpenAndReadTester(sockstat_content) oart = OpenAndReadTester(sockstat_content)
rv = self.app.get_socket_info(openr=oart.open) rv = self.app.get_socket_info(openr=oart.open)
self.assertEquals(oart.open_calls, [(('/proc/net/sockstat', 'r'), {}), self.assertEquals(oart.open_calls, [(('/proc/net/sockstat', 'r'), {}),
(('/proc/net/sockstat6', 'r'), {})]) (('/proc/net/sockstat6', 'r'), {})])
#todo verify parsed result of sockstat6
#self.assertEquals(rv, {'time_wait': 0, 'tcp_in_use': 30, 'orphan': 0, 'tcp_mem_allocated_bytes': 0})
class FakeRecon(object): class TestReconMiddleware(unittest.TestCase):
def fake_mem(self):
return {'memtest': "1"}
def fake_load(self):
return {'loadtest': "1"}
def fake_async(self):
return {'asynctest': "1"}
def fake_replication(self):
return {'replicationtest': "1"}
def fake_mounted(self):
return {'mountedtest': "1"}
def fake_unmounted(self):
return {'unmountedtest': "1"}
def fake_diskusage(self):
return {'diskusagetest': "1"}
def fake_ringmd5(self):
return {'ringmd5test': "1"}
def fake_quarantined(self):
return {'quarantinedtest': "1"}
def fake_sockstat(self):
return {'sockstattest': "1"}
def raise_IOError(self):
raise IOError
def raise_ValueError(self):
raise ValueError
class TestHealthCheck(unittest.TestCase):
def setUp(self): def setUp(self):
self.frecon = FakeRecon() self.frecon = FakeRecon()
self.app = recon.ReconMiddleware(FakeApp(), {}) self.app = recon.ReconMiddleware(FakeApp(), {'object_recon': "true"})
#self.app.object_recon = True
self.app.get_mem = self.frecon.fake_mem self.app.get_mem = self.frecon.fake_mem
self.app.get_load = self.frecon.fake_load self.app.get_load = self.frecon.fake_load
self.app.get_async_info = self.frecon.fake_async self.app.get_async_info = self.frecon.fake_async
self.app.get_device_info = self.frecon.fake_get_device_info
self.app.get_replication_info = self.frecon.fake_replication self.app.get_replication_info = self.frecon.fake_replication
self.app.get_auditor_info = self.frecon.fake_auditor
self.app.get_updater_info = self.frecon.fake_updater
self.app.get_expirer_info = self.frecon.fake_expirer
self.app.get_mounted = self.frecon.fake_mounted self.app.get_mounted = self.frecon.fake_mounted
self.app.get_unmounted = self.frecon.fake_unmounted self.app.get_unmounted = self.frecon.fake_unmounted
self.app.get_diskusage = self.frecon.fake_diskusage self.app.get_diskusage = self.frecon.fake_diskusage
@ -434,75 +642,185 @@ class TestHealthCheck(unittest.TestCase):
resp = self.app(req.environ, start_response) resp = self.app(req.environ, start_response)
self.assertEquals(resp, get_async_resp) self.assertEquals(resp, get_async_resp)
def test_recon_get_async_ioerror(self): def test_get_device_info(self):
orig = self.app.get_async_info get_device_resp = ['{"/srv/1/node": ["sdb1"]}']
self.app.get_async_info = self.frecon.raise_IOError req = Request.blank('/recon/devices',
req = Request.blank('/recon/async', environ={'REQUEST_METHOD': 'GET'}) environ={'REQUEST_METHOD': 'GET'})
resp = self.app(req.environ, start_response) resp = self.app(req.environ, start_response)
self.app.get_async_info = orig self.assertEquals(resp, get_device_resp)
self.assertEquals(resp, ['Internal server error.'])
def test_recon_get_replication(self): def test_recon_get_replication_notype(self):
get_replication_resp = ['{"replicationtest": "1"}'] get_replication_resp = ['{"replicationtest": "1"}']
req = Request.blank('/recon/replication', environ={'REQUEST_METHOD': 'GET'}) req = Request.blank('/recon/replication',
environ={'REQUEST_METHOD': 'GET'})
resp = self.app(req.environ, start_response) resp = self.app(req.environ, start_response)
self.assertEquals(resp, get_replication_resp) self.assertEquals(resp, get_replication_resp)
self.assertEquals(self.frecon.fake_replication_rtype, 'object')
self.frecon.fake_replication_rtype = None
def test_recon_get_replication_ioerror(self): def test_recon_get_replication_all(self):
orig = self.app.get_replication_info get_replication_resp = ['{"replicationtest": "1"}']
self.app.get_replication_info = self.frecon.raise_IOError #test account
req = Request.blank('/recon/replication', environ={'REQUEST_METHOD': 'GET'}) req = Request.blank('/recon/replication/account',
environ={'REQUEST_METHOD': 'GET'})
resp = self.app(req.environ, start_response) resp = self.app(req.environ, start_response)
self.app.get_async_info = orig self.assertEquals(resp, get_replication_resp)
self.assertEquals(resp, ['Internal server error.']) self.assertEquals(self.frecon.fake_replication_rtype, 'account')
self.frecon.fake_replication_rtype = None
#test container
req = Request.blank('/recon/replication/container',
environ={'REQUEST_METHOD': 'GET'})
resp = self.app(req.environ, start_response)
self.assertEquals(resp, get_replication_resp)
self.assertEquals(self.frecon.fake_replication_rtype, 'container')
self.frecon.fake_replication_rtype = None
#test object
req = Request.blank('/recon/replication/object',
environ={'REQUEST_METHOD': 'GET'})
resp = self.app(req.environ, start_response)
self.assertEquals(resp, get_replication_resp)
self.assertEquals(self.frecon.fake_replication_rtype, 'object')
self.frecon.fake_replication_rtype = None
def test_recon_get_auditor_invalid(self):
get_auditor_resp = ['Invalid path: /recon/auditor/invalid']
req = Request.blank('/recon/auditor/invalid',
environ={'REQUEST_METHOD': 'GET'})
resp = self.app(req.environ, start_response)
self.assertEquals(resp, get_auditor_resp)
def test_recon_get_auditor_notype(self):
get_auditor_resp = ['Invalid path: /recon/auditor']
req = Request.blank('/recon/auditor',
environ={'REQUEST_METHOD': 'GET'})
resp = self.app(req.environ, start_response)
self.assertEquals(resp, get_auditor_resp)
def test_recon_get_auditor_all(self):
get_auditor_resp = ['{"auditortest": "1"}']
req = Request.blank('/recon/auditor/account',
environ={'REQUEST_METHOD': 'GET'})
resp = self.app(req.environ, start_response)
self.assertEquals(resp, get_auditor_resp)
self.assertEquals(self.frecon.fake_auditor_rtype, 'account')
self.frecon.fake_auditor_rtype = None
req = Request.blank('/recon/auditor/container',
environ={'REQUEST_METHOD': 'GET'})
resp = self.app(req.environ, start_response)
self.assertEquals(resp, get_auditor_resp)
self.assertEquals(self.frecon.fake_auditor_rtype, 'container')
self.frecon.fake_auditor_rtype = None
req = Request.blank('/recon/auditor/object',
environ={'REQUEST_METHOD': 'GET'})
resp = self.app(req.environ, start_response)
self.assertEquals(resp, get_auditor_resp)
self.assertEquals(self.frecon.fake_auditor_rtype, 'object')
self.frecon.fake_auditor_rtype = None
def test_recon_get_updater_invalid(self):
get_updater_resp = ['Invalid path: /recon/updater/invalid']
req = Request.blank('/recon/updater/invalid',
environ={'REQUEST_METHOD': 'GET'})
resp = self.app(req.environ, start_response)
self.assertEquals(resp, get_updater_resp)
def test_recon_get_updater_notype(self):
get_updater_resp = ['Invalid path: /recon/updater']
req = Request.blank('/recon/updater',
environ={'REQUEST_METHOD': 'GET'})
resp = self.app(req.environ, start_response)
self.assertEquals(resp, get_updater_resp)
def test_recon_get_updater(self):
get_updater_resp = ['{"updatertest": "1"}']
req = Request.blank('/recon/updater/container',
environ={'REQUEST_METHOD': 'GET'})
resp = self.app(req.environ, start_response)
self.assertEquals(self.frecon.fake_updater_rtype, 'container')
self.frecon.fake_updater_rtype = None
self.assertEquals(resp, get_updater_resp)
req = Request.blank('/recon/updater/object',
environ={'REQUEST_METHOD': 'GET'})
resp = self.app(req.environ, start_response)
self.assertEquals(resp, get_updater_resp)
self.assertEquals(self.frecon.fake_updater_rtype, 'object')
self.frecon.fake_updater_rtype = None
def test_recon_get_expirer_invalid(self):
get_updater_resp = ['Invalid path: /recon/expirer/invalid']
req = Request.blank('/recon/expirer/invalid',
environ={'REQUEST_METHOD': 'GET'})
resp = self.app(req.environ, start_response)
self.assertEquals(resp, get_updater_resp)
def test_recon_get_expirer_notype(self):
get_updater_resp = ['Invalid path: /recon/expirer']
req = Request.blank('/recon/expirer',
environ={'REQUEST_METHOD': 'GET'})
resp = self.app(req.environ, start_response)
self.assertEquals(resp, get_updater_resp)
def test_recon_get_expirer_object(self):
get_expirer_resp = ['{"expirertest": "1"}']
req = Request.blank('/recon/expirer/object',
environ={'REQUEST_METHOD': 'GET'})
resp = self.app(req.environ, start_response)
self.assertEquals(resp, get_expirer_resp)
self.assertEquals(self.frecon.fake_expirer_rtype, 'object')
self.frecon.fake_updater_rtype = None
def test_recon_get_mounted(self): def test_recon_get_mounted(self):
get_mounted_resp = ['{"mountedtest": "1"}'] get_mounted_resp = ['{"mountedtest": "1"}']
req = Request.blank('/recon/mounted', environ={'REQUEST_METHOD': 'GET'}) req = Request.blank('/recon/mounted',
environ={'REQUEST_METHOD': 'GET'})
resp = self.app(req.environ, start_response) resp = self.app(req.environ, start_response)
self.assertEquals(resp, get_mounted_resp) self.assertEquals(resp, get_mounted_resp)
def test_recon_get_unmounted(self): def test_recon_get_unmounted(self):
get_unmounted_resp = ['{"unmountedtest": "1"}'] get_unmounted_resp = ['{"unmountedtest": "1"}']
req = Request.blank('/recon/unmounted', environ={'REQUEST_METHOD': 'GET'}) req = Request.blank('/recon/unmounted',
environ={'REQUEST_METHOD': 'GET'})
resp = self.app(req.environ, start_response) resp = self.app(req.environ, start_response)
self.assertEquals(resp, get_unmounted_resp) self.assertEquals(resp, get_unmounted_resp)
def test_recon_get_diskusage(self): def test_recon_get_diskusage(self):
get_diskusage_resp = ['{"diskusagetest": "1"}'] get_diskusage_resp = ['{"diskusagetest": "1"}']
req = Request.blank('/recon/diskusage', environ={'REQUEST_METHOD': 'GET'}) req = Request.blank('/recon/diskusage',
environ={'REQUEST_METHOD': 'GET'})
resp = self.app(req.environ, start_response) resp = self.app(req.environ, start_response)
self.assertEquals(resp, get_diskusage_resp) self.assertEquals(resp, get_diskusage_resp)
def test_recon_get_ringmd5(self): def test_recon_get_ringmd5(self):
get_ringmd5_resp = ['{"ringmd5test": "1"}'] get_ringmd5_resp = ['{"ringmd5test": "1"}']
req = Request.blank('/recon/ringmd5', environ={'REQUEST_METHOD': 'GET'}) req = Request.blank('/recon/ringmd5',
environ={'REQUEST_METHOD': 'GET'})
resp = self.app(req.environ, start_response) resp = self.app(req.environ, start_response)
self.assertEquals(resp, get_ringmd5_resp) self.assertEquals(resp, get_ringmd5_resp)
def test_recon_get_quarantined(self): def test_recon_get_quarantined(self):
get_quarantined_resp = ['{"quarantinedtest": "1"}'] get_quarantined_resp = ['{"quarantinedtest": "1"}']
req = Request.blank('/recon/quarantined', environ={'REQUEST_METHOD': 'GET'}) req = Request.blank('/recon/quarantined',
environ={'REQUEST_METHOD': 'GET'})
resp = self.app(req.environ, start_response) resp = self.app(req.environ, start_response)
self.assertEquals(resp, get_quarantined_resp) self.assertEquals(resp, get_quarantined_resp)
def test_recon_get_sockstat(self): def test_recon_get_sockstat(self):
get_sockstat_resp = ['{"sockstattest": "1"}'] get_sockstat_resp = ['{"sockstattest": "1"}']
req = Request.blank('/recon/sockstat', environ={'REQUEST_METHOD': 'GET'}) req = Request.blank('/recon/sockstat',
environ={'REQUEST_METHOD': 'GET'})
resp = self.app(req.environ, start_response) resp = self.app(req.environ, start_response)
self.assertEquals(resp, get_sockstat_resp) self.assertEquals(resp, get_sockstat_resp)
def test_recon_invalid_path(self): def test_recon_invalid_path(self):
req = Request.blank('/recon/invalid', environ={'REQUEST_METHOD': 'GET'}) req = Request.blank('/recon/invalid',
environ={'REQUEST_METHOD': 'GET'})
resp = self.app(req.environ, start_response) resp = self.app(req.environ, start_response)
self.assertEquals(resp, ['Invalid path: /recon/invalid']) self.assertEquals(resp, ['Invalid path: /recon/invalid'])
def test_recon_failed_json_dumps(self): def test_no_content(self):
orig = self.app.get_replication_info self.app.get_load = self.frecon.nocontent
self.app.get_replication_info = self.frecon.raise_ValueError req = Request.blank('/recon/load', environ={'REQUEST_METHOD': 'GET'})
req = Request.blank('/recon/replication', environ={'REQUEST_METHOD': 'GET'})
resp = self.app(req.environ, start_response) resp = self.app(req.environ, start_response)
self.app.get_async_info = orig
self.assertEquals(resp, ['Internal server error.']) self.assertEquals(resp, ['Internal server error.'])
def test_recon_pass(self): def test_recon_pass(self):

View File

@ -96,7 +96,6 @@ class TestObjectExpirer(TestCase):
x.logger = FakeLogger() x.logger = FakeLogger()
x.swift = InternalClient() x.swift = InternalClient()
x.run_once() x.run_once()
self.assertEquals(x.logger.log_dict['exception'], [])
self.assertEquals( self.assertEquals(
x.logger.log_dict['info'], x.logger.log_dict['info'],
[(('Pass beginning; 1 possible containers; ' [(('Pass beginning; 1 possible containers; '
@ -121,7 +120,9 @@ class TestObjectExpirer(TestCase):
x.logger = FakeLogger() x.logger = FakeLogger()
x.swift = InternalClient([{'name': str(int(time() + 86400))}]) x.swift = InternalClient([{'name': str(int(time() + 86400))}])
x.run_once() x.run_once()
self.assertEquals(x.logger.log_dict['exception'], []) for exccall in x.logger.log_dict['exception']:
self.assertTrue(
'This should not have been called' not in exccall[0][0])
self.assertEquals( self.assertEquals(
x.logger.log_dict['info'], x.logger.log_dict['info'],
[(('Pass beginning; 1 possible containers; ' [(('Pass beginning; 1 possible containers; '
@ -163,7 +164,9 @@ class TestObjectExpirer(TestCase):
x.swift = InternalClient([{'name': str(int(time() - 86400))}], x.swift = InternalClient([{'name': str(int(time() - 86400))}],
[{'name': '%d-actual-obj' % int(time() + 86400)}]) [{'name': '%d-actual-obj' % int(time() + 86400)}])
x.run_once() x.run_once()
self.assertEquals(x.logger.log_dict['exception'], []) for exccall in x.logger.log_dict['exception']:
self.assertTrue(
'This should not have been called' not in exccall[0][0])
self.assertEquals(x.logger.log_dict['info'], self.assertEquals(x.logger.log_dict['info'],
[(('Pass beginning; 1 possible containers; ' [(('Pass beginning; 1 possible containers; '
'2 possible objects',), {}), '2 possible objects',), {}),
@ -177,10 +180,13 @@ class TestObjectExpirer(TestCase):
[{'name': '%d-actual-obj' % ts}]) [{'name': '%d-actual-obj' % ts}])
x.delete_actual_object = should_not_be_called x.delete_actual_object = should_not_be_called
x.run_once() x.run_once()
self.assertEquals(x.logger.log_dict['exception'], excswhiledeleting = []
[(('Exception while deleting object %d %d-actual-obj ' for exccall in x.logger.log_dict['exception']:
'This should not have been called' % (ts, ts),), {}, if exccall[0][0].startswith('Exception while deleting '):
'This should not have been called')]) excswhiledeleting.append(exccall[0][0])
self.assertEquals(excswhiledeleting,
['Exception while deleting object %d %d-actual-obj '
'This should not have been called' % (ts, ts)])
def test_failed_delete_keeps_entry(self): def test_failed_delete_keeps_entry(self):
class InternalClient(object): class InternalClient(object):
@ -217,10 +223,13 @@ class TestObjectExpirer(TestCase):
x.swift = InternalClient([{'name': str(int(time() - 86400))}], x.swift = InternalClient([{'name': str(int(time() - 86400))}],
[{'name': '%d-actual-obj' % ts}]) [{'name': '%d-actual-obj' % ts}])
x.run_once() x.run_once()
self.assertEquals(x.logger.log_dict['exception'], excswhiledeleting = []
[(('Exception while deleting object %d %d-actual-obj ' for exccall in x.logger.log_dict['exception']:
'failed to delete actual object' % (ts, ts),), {}, if exccall[0][0].startswith('Exception while deleting '):
'failed to delete actual object')]) excswhiledeleting.append(exccall[0][0])
self.assertEquals(excswhiledeleting,
['Exception while deleting object %d %d-actual-obj '
'failed to delete actual object' % (ts, ts)])
self.assertEquals(x.logger.log_dict['info'], self.assertEquals(x.logger.log_dict['info'],
[(('Pass beginning; 1 possible containers; ' [(('Pass beginning; 1 possible containers; '
'2 possible objects',), {}), '2 possible objects',), {}),
@ -234,10 +243,13 @@ class TestObjectExpirer(TestCase):
x.swift = InternalClient([{'name': str(int(time() - 86400))}], x.swift = InternalClient([{'name': str(int(time() - 86400))}],
[{'name': '%d-actual-obj' % ts}]) [{'name': '%d-actual-obj' % ts}])
x.run_once() x.run_once()
self.assertEquals(x.logger.log_dict['exception'], excswhiledeleting = []
[(('Exception while deleting object %d %d-actual-obj This should ' for exccall in x.logger.log_dict['exception']:
'not have been called' % (ts, ts),), {}, if exccall[0][0].startswith('Exception while deleting '):
'This should not have been called')]) excswhiledeleting.append(exccall[0][0])
self.assertEquals(excswhiledeleting,
['Exception while deleting object %d %d-actual-obj This should '
'not have been called' % (ts, ts)])
def test_success_gets_counted(self): def test_success_gets_counted(self):
class InternalClient(object): class InternalClient(object):
@ -268,7 +280,6 @@ class TestObjectExpirer(TestCase):
[{'name': '%d-actual-obj' % int(time() - 86400)}]) [{'name': '%d-actual-obj' % int(time() - 86400)}])
x.run_once() x.run_once()
self.assertEquals(x.report_objects, 1) self.assertEquals(x.report_objects, 1)
self.assertEquals(x.logger.log_dict['exception'], [])
self.assertEquals(x.logger.log_dict['info'], self.assertEquals(x.logger.log_dict['info'],
[(('Pass beginning; 1 possible containers; ' [(('Pass beginning; 1 possible containers; '
'2 possible objects',), {}), '2 possible objects',), {}),
@ -317,25 +328,23 @@ class TestObjectExpirer(TestCase):
x.swift = InternalClient(containers, objects) x.swift = InternalClient(containers, objects)
x.delete_actual_object = fail_delete_actual_object x.delete_actual_object = fail_delete_actual_object
x.run_once() x.run_once()
self.assertEquals(x.logger.log_dict['exception'], [ excswhiledeleting = []
(('Exception while deleting object %d %d-actual-obj failed to ' for exccall in x.logger.log_dict['exception']:
'delete actual object' % (cts, ots),), {}, if exccall[0][0].startswith('Exception while deleting '):
'failed to delete actual object'), excswhiledeleting.append(exccall[0][0])
(('Exception while deleting object %d %d-next-obj failed to ' self.assertEquals(excswhiledeleting, [
'delete actual object' % (cts, ots),), {}, 'Exception while deleting object %d %d-actual-obj failed to '
'failed to delete actual object'), 'delete actual object' % (cts, ots),
(('Exception while deleting container %d failed to delete ' 'Exception while deleting object %d %d-next-obj failed to '
'container' % (cts,),), {}, 'delete actual object' % (cts, ots),
'failed to delete container'), 'Exception while deleting container %d failed to delete '
(('Exception while deleting object %d %d-actual-obj failed to ' 'container' % (cts,),
'delete actual object' % (cts + 1, ots),), {}, 'Exception while deleting object %d %d-actual-obj failed to '
'failed to delete actual object'), 'delete actual object' % (cts + 1, ots),
(('Exception while deleting object %d %d-next-obj failed to ' 'Exception while deleting object %d %d-next-obj failed to '
'delete actual object' % (cts + 1, ots),), {}, 'delete actual object' % (cts + 1, ots),
'failed to delete actual object'), 'Exception while deleting container %d failed to delete '
(('Exception while deleting container %d failed to delete ' 'container' % (cts + 1,)])
'container' % (cts + 1,),), {},
'failed to delete container')])
self.assertEquals(x.logger.log_dict['info'], self.assertEquals(x.logger.log_dict['info'],
[(('Pass beginning; 1 possible containers; ' [(('Pass beginning; 1 possible containers; '
'2 possible objects',), {}), '2 possible objects',), {}),