From c18a4e4f43b6c4a07e64006b2edc65824fb9321e Mon Sep 17 00:00:00 2001 From: Florian Hines Date: Sat, 25 Feb 2012 20:33:15 +0000 Subject: [PATCH] swift-recon cli cleanup Refactored swift-recon completely. It's broken out into Classes and no longer uses globals. In addition, I pulled out the previous individual scout_XXXX functions that where getting added for each check. All the check's now use the same method from the Scout class to obtain stats and telemetry. Change-Id: I512ab50f29e1ef4d10bd3adbf1cac642308e7cf1 --- bin/swift-recon | 792 ++++++++++++++++++++++++++---------------------- 1 file changed, 424 insertions(+), 368 deletions(-) diff --git a/bin/swift-recon b/bin/swift-recon index 1e139b725c..f94654f951 100755 --- a/bin/swift-recon +++ b/bin/swift-recon @@ -6,7 +6,11 @@ from eventlet.green import urllib2 from swift.common.ring import Ring -import simplejson as json +from urlparse import urlparse +try: + import simplejson as json +except ImportError: + import json from hashlib import md5 import datetime import eventlet @@ -14,401 +18,453 @@ import optparse import sys import os -VERBOSE = False -SUPPRESS_ERRORS = False -TIMEOUT = 5 + +class Scout(object): + """ + Obtain swift recon information + """ + + def __init__(self, recon_type, verbose=False, suppress_errors=False, + timeout=5): + recon_uri = ["ringmd5", "async", "replication", "load", "diskusage", + "unmounted", "quarantined", "sockstat"] + if recon_type not in recon_uri: + raise Exception("Invalid scout type requested") + else: + self.recon_type = recon_type + self.verbose = verbose + self.suppress_errors = suppress_errors + self.timeout = timeout + + def scout_host(self, base_url, recon_type): + """ + Perform the actual HTTP request to obtain swift recon telemtry. + + :param base_url: the base url of the host you wish to check. str of the + format 'http://127.0.0.1:6000/recon/' + :param recon_type: the swift recon check to request. + :returns: tuple of (recon url used, response body, and status) + """ + url = base_url + recon_type + try: + body = urllib2.urlopen(url, timeout=self.timeout).read() + content = json.loads(body) + if self.verbose: + print "-> %s: %s" % (url, content) + status = 200 + except urllib2.HTTPError as err: + if not self.suppress_errors or self.verbose: + print "-> %s: %s" % (url, err) + content = err + status = err.code + except urllib2.URLError as err: + if not self.suppress_errors or self.verbose: + print "-> %s: %s" % (url, err) + content = err + status = -1 + return url, content, status + + def scout(self, host): + """ + Obtain telemetry from a host running the swift recon middleware. + + :param host: host to check + :returns: tuple of (recon url used, response body, and status) + """ + base_url = "http://%s:%s/recon/" % (host[0], host[1]) + url, content, status = self.scout_host(base_url, self.recon_type) + return url, content, status -def get_devices(zone_filter, ring_file): - ring_data = Ring(ring_file) - if zone_filter: - ips = set((n['ip'], n['port']) for n in ring_data.devs if n \ - if n['zone'] == zone_filter) - else: - ips = set((n['ip'], n['port']) for n in ring_data.devs if n) - return ips +class SwiftRecon(object): + """ + Retrieve and report cluster info from hosts running recon middleware. + """ + def __init__(self): + self.verbose = False + self.suppress_errors = False + self.timeout = 5 + self.pool_size = 30 + self.pool = eventlet.GreenPool(self.pool_size) -def scout(base_url, recon_type): - global VERBOSE, SUPPRESS_ERRORS - url = base_url + recon_type - try: - body = urllib2.urlopen(url, timeout=TIMEOUT).read() - content = json.loads(body) - if VERBOSE: - print "-> %s: %s" % (url, content) - status = 200 - except urllib2.HTTPError as e: - if not SUPPRESS_ERRORS or VERBOSE: - print "-> %s: %s" % (url, e) - content = e - status = e.code - except urllib2.URLError as e: - if not SUPPRESS_ERRORS or VERBOSE: - print "-> %s: %s" % (url, e) - content = e - status = -1 - return url, content, status + def get_devices(self, zone_filter, ring_file): + """ + Get a list of hosts in the ring + :param zone_filter: Only list zones matching given filter + :param ring_file: Ring file to obtain hosts from + :returns: a set of tuples containing the ip and port of hosts + """ + ring_data = Ring(ring_file) + if zone_filter: + ips = set((n['ip'], n['port']) for n in ring_data.devs if n \ + if n['zone'] == zone_filter) + else: + ips = set((n['ip'], n['port']) for n in ring_data.devs if n) + return ips -def scout_md5(host): - base_url = "http://%s:%s/recon/" % (host[0], host[1]) - url, content, status = scout(base_url, "ringmd5") - return url, content, status + def get_ringmd5(self, hosts, ringfile): + """ + Compare ring md5sum's with those on remote host - -def scout_async(host): - base_url = "http://%s:%s/recon/" % (host[0], host[1]) - url, content, status = scout(base_url, "async") - return url, content, status - - -def scout_replication(host): - base_url = "http://%s:%s/recon/" % (host[0], host[1]) - url, content, status = scout(base_url, "replication") - return url, content, status - - -def scout_load(host): - base_url = "http://%s:%s/recon/" % (host[0], host[1]) - url, content, status = scout(base_url, "load") - return url, content, status - - -def scout_du(host): - base_url = "http://%s:%s/recon/" % (host[0], host[1]) - url, content, status = scout(base_url, "diskusage") - return url, content, status - - -def scout_umount(host): - base_url = "http://%s:%s/recon/" % (host[0], host[1]) - url, content, status = scout(base_url, "unmounted") - return url, content, status - - -def scout_quarantine(host): - base_url = "http://%s:%s/recon/" % (host[0], host[1]) - url, content, status = scout(base_url, "quarantined") - return url, content, status - - -def scout_sockstat(host): - base_url = "http://%s:%s/recon/" % (host[0], host[1]) - url, content, status = scout(base_url, "sockstat") - return url, content, status - - -def get_ringmd5(hosts, ringfile): - stats = {} - matches = 0 - errors = 0 - md5sum = md5() - with open(ringfile, 'rb') as f: - block = f.read(4096) - while block: - md5sum.update(block) + :param hosts: set of hosts to check. in the format of: + set([('127.0.0.1', 6020), ('127.0.0.2', 6030)]) + :param ringfile: The local ring file to compare the md5sum with. + """ + stats = {} + matches = 0 + errors = 0 + md5sum = md5() + with open(ringfile, 'rb') as f: block = f.read(4096) - ring_sum = md5sum.hexdigest() - pool = eventlet.GreenPool(20) - now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") - print "[%s] Checking ring md5sum's on %s hosts..." % (now, len(hosts)) - if VERBOSE: - print "-> On disk md5sum: %s" % ring_sum - for url, response, status in pool.imap(scout_md5, hosts): - if status == 200: - #fixme - need to grab from config - stats[url] = response[ringfile] - if response[ringfile] != ring_sum: - ringsmatch = False - print "!! %s (%s) doesn't match on disk md5sum" % \ - (url, response[ringfile]) + while block: + md5sum.update(block) + block = f.read(4096) + ring_sum = md5sum.hexdigest() + recon = Scout("ringmd5", self.verbose, self.suppress_errors, + self.timeout) + now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") + print "[%s] Checking ring md5sum's on %s hosts..." % (now, len(hosts)) + if self.verbose: + print "-> On disk md5sum: %s" % ring_sum + for url, response, status in self.pool.imap(recon.scout, hosts): + if status == 200: + stats[url] = response[ringfile] + if response[ringfile] != ring_sum: + print "!! %s (%s) doesn't match on disk md5sum" % \ + (url, response[ringfile]) + else: + matches = matches + 1 + if self.verbose: + print "-> %s matches." % url else: - matches = matches + 1 - if VERBOSE: - print "-> %s matches." % url - else: - errors = errors + 1 - print "%s/%s hosts matched, %s error[s] while checking hosts." % \ - (matches, len(hosts), errors) - print "=" * 79 + errors = errors + 1 + print "%s/%s hosts matched, %s error[s] while checking hosts." % \ + (matches, len(hosts), errors) + print "=" * 79 + def async_check(self, hosts): + """ + Obtain and print async pending statistics -def async_check(hosts): - stats = {} - pool = eventlet.GreenPool(20) - now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") - print "[%s] Checking async pendings on %s hosts..." % (now, len(hosts)) - for url, response, status in pool.imap(scout_async, hosts): - if status == 200: - stats[url] = response['async_pending'] - if len(stats) > 0: - low = min(stats.values()) - high = max(stats.values()) - total = sum(stats.values()) - average = total / len(stats) - print "Async stats: low: %d, high: %d, avg: %d, total: %d" % (low, - high, average, total) - else: - print "Error: No hosts available or returned valid information." - print "=" * 79 - - -def umount_check(hosts): - stats = {} - pool = eventlet.GreenPool(20) - now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") - print "[%s] Getting unmounted drives from %s hosts..." % (now, len(hosts)) - for url, response, status in pool.imap(scout_umount, hosts): - if status == 200: - for i in response: - stats[url] = i['device'] - for host in stats: - print "Not mounted: %s on %s" % (stats[host], host) - print "=" * 79 - - -def replication_check(hosts): - stats = {} - pool = eventlet.GreenPool(20) - now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") - print "[%s] Checking replication times on %s hosts..." % (now, len(hosts)) - for url, response, status in pool.imap(scout_replication, hosts): - if status == 200: - stats[url] = response['object_replication_time'] - if len(stats) > 0: - low = min(stats.values()) - high = max(stats.values()) - total = sum(stats.values()) - average = total / len(stats) - print "[Replication Times] shortest: %s, longest: %s, avg: %s" % \ - (low, high, average) - else: - print "Error: No hosts available or returned valid information." - print "=" * 79 - - -def load_check(hosts): - load1 = {} - load5 = {} - load15 = {} - pool = eventlet.GreenPool(20) - now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") - print "[%s] Checking load avg's on %s hosts..." % (now, len(hosts)) - for url, response, status in pool.imap(scout_load, hosts): - if status == 200: - load1[url] = response['1m'] - load5[url] = response['5m'] - load15[url] = response['15m'] - stats = {"1m": load1, "5m": load5, "15m": load15} - for item in stats: - if len(stats[item]) > 0: - low = min(stats[item].values()) - high = max(stats[item].values()) - total = sum(stats[item].values()) - average = total / len(stats[item]) - print "[%s load average] lowest: %s, highest: %s, avg: %s" % \ - (item, low, high, average) + :param hosts: set of hosts to check. in the format of: + set([('127.0.0.1', 6020), ('127.0.0.2', 6030)]) + """ + stats = {} + recon = Scout("async", self.verbose, self.suppress_errors, + self.timeout) + now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") + print "[%s] Checking async pendings on %s hosts..." % (now, len(hosts)) + for url, response, status in self.pool.imap(recon.scout, hosts): + if status == 200: + stats[url] = response['async_pending'] + if len(stats) > 0: + low = min(stats.values()) + high = max(stats.values()) + total = sum(stats.values()) + average = total / len(stats) + print "Async stats: low: %d, high: %d, avg: %d, total: %d" % (low, + high, average, total) else: print "Error: No hosts available or returned valid information." - print "=" * 79 + print "=" * 79 + def umount_check(self, hosts): + """ + Check for and print unmounted drives -def quarantine_check(hosts): - objq = {} - conq = {} - acctq = {} - pool = eventlet.GreenPool(20) - now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") - print "[%s] Checking quarantine dirs on %s hosts..." % (now, len(hosts)) - for url, response, status in pool.imap(scout_quarantine, hosts): - if status == 200: - objq[url] = response['objects'] - conq[url] = response['containers'] - acctq[url] = response['accounts'] - stats = {"objects": objq, "containers": conq, "accounts": acctq} - for item in stats: - if len(stats[item]) > 0: - low = min(stats[item].values()) - high = max(stats[item].values()) - total = sum(stats[item].values()) - average = total / len(stats[item]) - print "[Quarantined %s] low: %d, high: %d, avg: %d, total: %d" % \ - (item, low, high, average, total) + :param hosts: set of hosts to check. in the format of: + set([('127.0.0.1', 6020), ('127.0.0.2', 6030)]) + """ + stats = {} + recon = Scout("unmounted", self.verbose, self.suppress_errors, + self.timeout) + now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") + print "[%s] Getting unmounted drives from %s hosts..." % \ + (now, len(hosts)) + for url, response, status in self.pool.imap(recon.scout, hosts): + if status == 200: + for i in response: + stats[url] = i['device'] + for host in stats: + node = urlparse(host).netloc + print "Not mounted: %s on %s" % (stats[host], node) + print "=" * 79 + + def replication_check(self, hosts): + """ + Obtain and print replication statistics + + :param hosts: set of hosts to check. in the format of: + set([('127.0.0.1', 6020), ('127.0.0.2', 6030)]) + """ + stats = {} + recon = Scout("replication", self.verbose, self.suppress_errors, + self.timeout) + now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") + print "[%s] Checking replication times on %s hosts..." % \ + (now, len(hosts)) + for url, response, status in self.pool.imap(recon.scout, hosts): + if status == 200: + stats[url] = response['object_replication_time'] + if len(stats) > 0: + low = min(stats.values()) + high = max(stats.values()) + total = sum(stats.values()) + average = total / len(stats) + print "[Replication Times] shortest: %s, longest: %s, avg: %s" % \ + (low, high, average) else: print "Error: No hosts available or returned valid information." - print "=" * 79 + print "=" * 79 + def load_check(self, hosts): + """ + Obtain and print load average statistics -def socket_usage(hosts): - inuse4 = {} - mem = {} - inuse6 = {} - timewait = {} - orphan = {} - pool = eventlet.GreenPool(20) - now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") - print "[%s] Checking socket usage on %s hosts..." % (now, len(hosts)) - for url, response, status in pool.imap(scout_sockstat, hosts): - if status == 200: - inuse4[url] = response['tcp_in_use'] - mem[url] = response['tcp_mem_allocated_bytes'] - inuse6[url] = response['tcp6_in_use'] - timewait[url] = response['time_wait'] - orphan[url] = response['orphan'] - stats = {"tcp_in_use": inuse4, "tcp_mem_allocated_bytes": mem, \ - "tcp6_in_use": inuse6, "time_wait": timewait, "orphan": orphan} - for item in stats: - if len(stats[item]) > 0: - low = min(stats[item].values()) - high = max(stats[item].values()) - total = sum(stats[item].values()) - average = total / len(stats[item]) - print "[%s] low: %d, high: %d, avg: %d, total: %d" % \ - (item, low, high, average, total) + :param hosts: set of hosts to check. in the format of: + set([('127.0.0.1', 6020), ('127.0.0.2', 6030)]) + """ + load1 = {} + load5 = {} + load15 = {} + recon = Scout("load", self.verbose, self.suppress_errors, + self.timeout) + now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") + print "[%s] Checking load avg's on %s hosts..." % (now, len(hosts)) + for url, response, status in self.pool.imap(recon.scout, hosts): + if status == 200: + load1[url] = response['1m'] + load5[url] = response['5m'] + load15[url] = response['15m'] + stats = {"1m": load1, "5m": load5, "15m": load15} + for item in stats: + if len(stats[item]) > 0: + low = min(stats[item].values()) + high = max(stats[item].values()) + total = sum(stats[item].values()) + average = total / len(stats[item]) + print "[%s load average] lowest: %s, highest: %s, avg: %s" % \ + (item, low, high, average) + else: + print "Error: No hosts available or returned valid info." + print "=" * 79 + + def quarantine_check(self, hosts): + """ + Obtain and print quarantine statistics + + :param hosts: set of hosts to check. in the format of: + set([('127.0.0.1', 6020), ('127.0.0.2', 6030)]) + """ + objq = {} + conq = {} + acctq = {} + recon = Scout("quarantined", self.verbose, self.suppress_errors, + self.timeout) + now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") + print "[%s] Checking quarantine on %s hosts..." % (now, len(hosts)) + for url, response, status in self.pool.imap(recon.scout, hosts): + if status == 200: + objq[url] = response['objects'] + conq[url] = response['containers'] + acctq[url] = response['accounts'] + stats = {"objects": objq, "containers": conq, "accounts": acctq} + for item in stats: + if len(stats[item]) > 0: + low = min(stats[item].values()) + high = max(stats[item].values()) + total = sum(stats[item].values()) + average = total / len(stats[item]) + print ("[Quarantined %s] low: %d, high: %d, avg: %d, total: %d" + % (item, low, high, average, total)) + else: + print "Error: No hosts available or returned valid info." + print "=" * 79 + + def socket_usage(self, hosts): + """ + Obtain and print /proc/net/sockstat statistics + + :param hosts: set of hosts to check. in the format of: + set([('127.0.0.1', 6020), ('127.0.0.2', 6030)]) + """ + inuse4 = {} + mem = {} + inuse6 = {} + timewait = {} + orphan = {} + recon = Scout("sockstat", self.verbose, self.suppress_errors, + self.timeout) + now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") + print "[%s] Checking socket usage on %s hosts..." % (now, len(hosts)) + for url, response, status in self.pool.imap(recon.scout, hosts): + if status == 200: + inuse4[url] = response['tcp_in_use'] + mem[url] = response['tcp_mem_allocated_bytes'] + inuse6[url] = response['tcp6_in_use'] + timewait[url] = response['time_wait'] + orphan[url] = response['orphan'] + stats = {"tcp_in_use": inuse4, "tcp_mem_allocated_bytes": mem, + "tcp6_in_use": inuse6, "time_wait": timewait, + "orphan": orphan} + for item in stats: + if len(stats[item]) > 0: + low = min(stats[item].values()) + high = max(stats[item].values()) + total = sum(stats[item].values()) + average = total / len(stats[item]) + print "[%s] low: %d, high: %d, avg: %d, total: %d" % \ + (item, low, high, average, total) + else: + print "Error: No hosts or info available." + print "=" * 79 + + def disk_usage(self, hosts): + """ + Obtain and print disk usage statistics + + :param hosts: set of hosts to check. in the format of: + set([('127.0.0.1', 6020), ('127.0.0.2', 6030)]) + """ + stats = {} + highs = [] + lows = [] + averages = [] + percents = {} + recon = Scout("diskusage", self.verbose, self.suppress_errors, + self.timeout) + now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") + print "[%s] Checking disk usage on %s hosts..." % (now, len(hosts)) + for url, response, status in self.pool.imap(recon.scout, hosts): + if status == 200: + hostusage = [] + for entry in response: + if entry['mounted']: + used = float(entry['used']) / float(entry['size']) \ + * 100.0 + hostusage.append(round(used, 2)) + stats[url] = hostusage + + for url in stats: + if len(stats[url]) > 0: + #get per host hi/los for another day + low = min(stats[url]) + high = max(stats[url]) + total = sum(stats[url]) + average = total / len(stats[url]) + highs.append(high) + lows.append(low) + averages.append(average) + for percent in stats[url]: + percents[int(percent)] = percents.get(int(percent), 0) + 1 + else: + print "-> %s: Error. No drive info available." % url + + if len(lows) > 0: + low = min(lows) + high = max(highs) + average = sum(averages) / len(averages) + #dist graph shamelessly stolen from https://github.com/gholt/tcod + print "Distribution Graph:" + mul = 69.0 / max(percents.values()) + for percent in sorted(percents): + print '% 3d%%%5d %s' % (percent, percents[percent], \ + '*' * int(percents[percent] * mul)) + print "Disk usage: lowest: %s%%, highest: %s%%, avg: %s%%" % \ + (low, high, average) else: - print "Error: No hosts or info available." - print "=" * 79 + print "Error: No hosts available or returned valid information." + print "=" * 79 + + def main(self): + """ + Retrieve and report cluster info from hosts running recon middleware. + """ + print "=" * 79 + usage = ''' + usage: %prog [-v] [--suppress] [-a] [-r] [-u] [-d] [-l] [--objmd5] + ''' + args = optparse.OptionParser(usage) + args.add_option('--verbose', '-v', action="store_true", + help="Print verbose info") + args.add_option('--suppress', action="store_true", + help="Suppress most connection related errors") + args.add_option('--async', '-a', action="store_true", + help="Get async stats") + args.add_option('--replication', '-r', action="store_true", + help="Get replication stats") + args.add_option('--unmounted', '-u', action="store_true", + help="Check cluster for unmounted devices") + args.add_option('--diskusage', '-d', action="store_true", + help="Get disk usage stats") + args.add_option('--loadstats', '-l', action="store_true", + help="Get cluster load average stats") + args.add_option('--quarantined', '-q', action="store_true", + help="Get cluster quarantine stats") + args.add_option('--objmd5', action="store_true", + help="Get md5sums of object.ring.gz and compare to local copy") + args.add_option('--sockstat', action="store_true", + help="Get cluster socket usage stats") + args.add_option('--all', action="store_true", + help="Perform all checks. Equal to -arudlq --objmd5 --sockstat") + args.add_option('--zone', '-z', type="int", + help="Only query servers in specified zone") + args.add_option('--timeout', '-t', type="int", metavar="SECONDS", + help="Time to wait for a response from a server", default=5) + args.add_option('--swiftdir', default="/etc/swift", + help="Default = /etc/swift") + options, arguments = args.parse_args() -def disk_usage(hosts): - stats = {} - highs = [] - lows = [] - averages = [] - percents = {} - pool = eventlet.GreenPool(20) - now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") - print "[%s] Checking disk usage on %s hosts..." % (now, len(hosts)) - for url, response, status in pool.imap(scout_du, hosts): - if status == 200: - hostusage = [] - for entry in response: - if entry['mounted']: - used = float(entry['used']) / float(entry['size']) * 100.0 - hostusage.append(round(used, 2)) - stats[url] = hostusage + if len(sys.argv) <= 1: + args.print_help() + sys.exit(0) - for url in stats: - if len(stats[url]) > 0: - #get per host hi/los for another day - low = min(stats[url]) - high = max(stats[url]) - total = sum(stats[url]) - average = total / len(stats[url]) - highs.append(high) - lows.append(low) - averages.append(average) - for percent in stats[url]: - percents[int(percent)] = percents.get(int(percent), 0) + 1 + swift_dir = options.swiftdir + obj_ring = os.path.join(swift_dir, 'object.ring.gz') + + self.verbose = options.verbose + self.suppress_errors = options.suppress + self.timeout = options.timeout + + if options.zone: + hosts = self.get_devices(options.zone, obj_ring) else: - print "-> %s: Error. No drive info available." % url + hosts = self.get_devices(None, obj_ring) - if len(lows) > 0: - low = min(lows) - high = max(highs) - average = sum(averages) / len(averages) - #distrib graph shamelessly stolen from https://github.com/gholt/tcod - print "Distribution Graph:" - mul = 69.0 / max(percents.values()) - for percent in sorted(percents): - print '% 3d%% % 4d %s' % (percent, percents[percent], \ - '*' * int(percents[percent] * mul)) - - print "Disk usage: lowest: %s%%, highest: %s%%, avg: %s%%" % \ - (low, high, average) - else: - print "Error: No hosts available or returned valid information." - print "=" * 79 - - -def main(): - global VERBOSE, SUPPRESS_ERRORS, TIMEOUT, swift_dir, pool - print "=" * 79 - usage = ''' - usage: %prog [-v] [--suppress] [-a] [-r] [-u] [-d] [-l] [--objmd5] - ''' - args = optparse.OptionParser(usage) - args.add_option('--verbose', '-v', action="store_true", - help="Print verbose info") - args.add_option('--suppress', action="store_true", - help="Suppress most connection related errors") - args.add_option('--async', '-a', action="store_true", - help="Get async stats") - args.add_option('--replication', '-r', action="store_true", - help="Get replication stats") - args.add_option('--unmounted', '-u', action="store_true", - help="Check cluster for unmounted devices") - args.add_option('--diskusage', '-d', action="store_true", - help="Get disk usage stats") - args.add_option('--loadstats', '-l', action="store_true", - help="Get cluster load average stats") - args.add_option('--quarantined', '-q', action="store_true", - help="Get cluster quarantine stats") - args.add_option('--objmd5', action="store_true", - help="Get md5sums of object.ring.gz and compare to local copy") - args.add_option('--sockstat', action="store_true", - help="Get cluster socket usage stats") - args.add_option('--all', action="store_true", - help="Perform all checks. Equivalent to -arudlq --objmd5 --sockstat") - args.add_option('--zone', '-z', type="int", - help="Only query servers in specified zone") - args.add_option('--timeout', '-t', type="int", metavar="SECONDS", - help="Time to wait for a response from a server") - - args.add_option('--swiftdir', default="/etc/swift", - help="Default = /etc/swift") - options, arguments = args.parse_args() - - if len(sys.argv) <= 1: - args.print_help() - - swift_dir = options.swiftdir - obj_ring = os.path.join(swift_dir, 'object.ring.gz') - con_ring = os.path.join(swift_dir, 'container.ring.gz') - acct_ring = os.path.join(swift_dir, 'account.ring.gz') - - VERBOSE = options.verbose - SUPPRESS_ERRORS = options.suppress - - if options.zone: - hosts = get_devices(options.zone, obj_ring) - else: - hosts = get_devices(None, obj_ring) - - if options.timeout: - TIMEOUT = options.timeout - - if options.all: - async_check(hosts) - umount_check(hosts) - replication_check(hosts) - load_check(hosts) - disk_usage(hosts) - get_ringmd5(hosts, obj_ring) - quarantine_check(hosts) - socket_usage(hosts) - else: - if options.async: - async_check(hosts) - if options.unmounted: - umount_check(hosts) - if options.replication: - replication_check(hosts) - if options.loadstats: - load_check(hosts) - if options.diskusage: - disk_usage(hosts) - if options.objmd5: - get_ringmd5(hosts, obj_ring) - if options.quarantined: - quarantine_check(hosts) - if options.sockstat: - socket_usage(hosts) + if options.all: + self.async_check(hosts) + self.umount_check(hosts) + self.replication_check(hosts) + self.load_check(hosts) + self.disk_usage(hosts) + self.get_ringmd5(hosts, obj_ring) + self.quarantine_check(hosts) + self.socket_usage(hosts) + else: + if options.async: + self.async_check(hosts) + if options.unmounted: + self.umount_check(hosts) + if options.replication: + self.replication_check(hosts) + if options.loadstats: + self.load_check(hosts) + if options.diskusage: + self.disk_usage(hosts) + if options.objmd5: + self.get_ringmd5(hosts, obj_ring) + if options.quarantined: + self.quarantine_check(hosts) + if options.sockstat: + self.socket_usage(hosts) if __name__ == '__main__': try: - main() + reconnoiter = SwiftRecon() + reconnoiter.main() except KeyboardInterrupt: print '\n'