swift-recon cli cleanup
Refactored swift-recon completely. It's broken out into Classes and no longer uses globals. In addition, I pulled out the previous individual scout_XXXX functions that where getting added for each check. All the check's now use the same method from the Scout class to obtain stats and telemetry. Change-Id: I512ab50f29e1ef4d10bd3adbf1cac642308e7cf1
This commit is contained in:
parent
a6567e60ab
commit
c18a4e4f43
792
bin/swift-recon
792
bin/swift-recon
@ -6,7 +6,11 @@
|
|||||||
|
|
||||||
from eventlet.green import urllib2
|
from eventlet.green import urllib2
|
||||||
from swift.common.ring import Ring
|
from swift.common.ring import Ring
|
||||||
import simplejson as json
|
from urlparse import urlparse
|
||||||
|
try:
|
||||||
|
import simplejson as json
|
||||||
|
except ImportError:
|
||||||
|
import json
|
||||||
from hashlib import md5
|
from hashlib import md5
|
||||||
import datetime
|
import datetime
|
||||||
import eventlet
|
import eventlet
|
||||||
@ -14,401 +18,453 @@ import optparse
|
|||||||
import sys
|
import sys
|
||||||
import os
|
import os
|
||||||
|
|
||||||
VERBOSE = False
|
|
||||||
SUPPRESS_ERRORS = False
|
class Scout(object):
|
||||||
TIMEOUT = 5
|
"""
|
||||||
|
Obtain swift recon information
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, recon_type, verbose=False, suppress_errors=False,
|
||||||
|
timeout=5):
|
||||||
|
recon_uri = ["ringmd5", "async", "replication", "load", "diskusage",
|
||||||
|
"unmounted", "quarantined", "sockstat"]
|
||||||
|
if recon_type not in recon_uri:
|
||||||
|
raise Exception("Invalid scout type requested")
|
||||||
|
else:
|
||||||
|
self.recon_type = recon_type
|
||||||
|
self.verbose = verbose
|
||||||
|
self.suppress_errors = suppress_errors
|
||||||
|
self.timeout = timeout
|
||||||
|
|
||||||
|
def scout_host(self, base_url, recon_type):
|
||||||
|
"""
|
||||||
|
Perform the actual HTTP request to obtain swift recon telemtry.
|
||||||
|
|
||||||
|
:param base_url: the base url of the host you wish to check. str of the
|
||||||
|
format 'http://127.0.0.1:6000/recon/'
|
||||||
|
:param recon_type: the swift recon check to request.
|
||||||
|
:returns: tuple of (recon url used, response body, and status)
|
||||||
|
"""
|
||||||
|
url = base_url + recon_type
|
||||||
|
try:
|
||||||
|
body = urllib2.urlopen(url, timeout=self.timeout).read()
|
||||||
|
content = json.loads(body)
|
||||||
|
if self.verbose:
|
||||||
|
print "-> %s: %s" % (url, content)
|
||||||
|
status = 200
|
||||||
|
except urllib2.HTTPError as err:
|
||||||
|
if not self.suppress_errors or self.verbose:
|
||||||
|
print "-> %s: %s" % (url, err)
|
||||||
|
content = err
|
||||||
|
status = err.code
|
||||||
|
except urllib2.URLError as err:
|
||||||
|
if not self.suppress_errors or self.verbose:
|
||||||
|
print "-> %s: %s" % (url, err)
|
||||||
|
content = err
|
||||||
|
status = -1
|
||||||
|
return url, content, status
|
||||||
|
|
||||||
|
def scout(self, host):
|
||||||
|
"""
|
||||||
|
Obtain telemetry from a host running the swift recon middleware.
|
||||||
|
|
||||||
|
:param host: host to check
|
||||||
|
:returns: tuple of (recon url used, response body, and status)
|
||||||
|
"""
|
||||||
|
base_url = "http://%s:%s/recon/" % (host[0], host[1])
|
||||||
|
url, content, status = self.scout_host(base_url, self.recon_type)
|
||||||
|
return url, content, status
|
||||||
|
|
||||||
|
|
||||||
def get_devices(zone_filter, ring_file):
|
class SwiftRecon(object):
|
||||||
ring_data = Ring(ring_file)
|
"""
|
||||||
if zone_filter:
|
Retrieve and report cluster info from hosts running recon middleware.
|
||||||
ips = set((n['ip'], n['port']) for n in ring_data.devs if n \
|
"""
|
||||||
if n['zone'] == zone_filter)
|
|
||||||
else:
|
|
||||||
ips = set((n['ip'], n['port']) for n in ring_data.devs if n)
|
|
||||||
return ips
|
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.verbose = False
|
||||||
|
self.suppress_errors = False
|
||||||
|
self.timeout = 5
|
||||||
|
self.pool_size = 30
|
||||||
|
self.pool = eventlet.GreenPool(self.pool_size)
|
||||||
|
|
||||||
def scout(base_url, recon_type):
|
def get_devices(self, zone_filter, ring_file):
|
||||||
global VERBOSE, SUPPRESS_ERRORS
|
"""
|
||||||
url = base_url + recon_type
|
Get a list of hosts in the ring
|
||||||
try:
|
|
||||||
body = urllib2.urlopen(url, timeout=TIMEOUT).read()
|
|
||||||
content = json.loads(body)
|
|
||||||
if VERBOSE:
|
|
||||||
print "-> %s: %s" % (url, content)
|
|
||||||
status = 200
|
|
||||||
except urllib2.HTTPError as e:
|
|
||||||
if not SUPPRESS_ERRORS or VERBOSE:
|
|
||||||
print "-> %s: %s" % (url, e)
|
|
||||||
content = e
|
|
||||||
status = e.code
|
|
||||||
except urllib2.URLError as e:
|
|
||||||
if not SUPPRESS_ERRORS or VERBOSE:
|
|
||||||
print "-> %s: %s" % (url, e)
|
|
||||||
content = e
|
|
||||||
status = -1
|
|
||||||
return url, content, status
|
|
||||||
|
|
||||||
|
:param zone_filter: Only list zones matching given filter
|
||||||
|
:param ring_file: Ring file to obtain hosts from
|
||||||
|
:returns: a set of tuples containing the ip and port of hosts
|
||||||
|
"""
|
||||||
|
ring_data = Ring(ring_file)
|
||||||
|
if zone_filter:
|
||||||
|
ips = set((n['ip'], n['port']) for n in ring_data.devs if n \
|
||||||
|
if n['zone'] == zone_filter)
|
||||||
|
else:
|
||||||
|
ips = set((n['ip'], n['port']) for n in ring_data.devs if n)
|
||||||
|
return ips
|
||||||
|
|
||||||
def scout_md5(host):
|
def get_ringmd5(self, hosts, ringfile):
|
||||||
base_url = "http://%s:%s/recon/" % (host[0], host[1])
|
"""
|
||||||
url, content, status = scout(base_url, "ringmd5")
|
Compare ring md5sum's with those on remote host
|
||||||
return url, content, status
|
|
||||||
|
|
||||||
|
:param hosts: set of hosts to check. in the format of:
|
||||||
def scout_async(host):
|
set([('127.0.0.1', 6020), ('127.0.0.2', 6030)])
|
||||||
base_url = "http://%s:%s/recon/" % (host[0], host[1])
|
:param ringfile: The local ring file to compare the md5sum with.
|
||||||
url, content, status = scout(base_url, "async")
|
"""
|
||||||
return url, content, status
|
stats = {}
|
||||||
|
matches = 0
|
||||||
|
errors = 0
|
||||||
def scout_replication(host):
|
md5sum = md5()
|
||||||
base_url = "http://%s:%s/recon/" % (host[0], host[1])
|
with open(ringfile, 'rb') as f:
|
||||||
url, content, status = scout(base_url, "replication")
|
|
||||||
return url, content, status
|
|
||||||
|
|
||||||
|
|
||||||
def scout_load(host):
|
|
||||||
base_url = "http://%s:%s/recon/" % (host[0], host[1])
|
|
||||||
url, content, status = scout(base_url, "load")
|
|
||||||
return url, content, status
|
|
||||||
|
|
||||||
|
|
||||||
def scout_du(host):
|
|
||||||
base_url = "http://%s:%s/recon/" % (host[0], host[1])
|
|
||||||
url, content, status = scout(base_url, "diskusage")
|
|
||||||
return url, content, status
|
|
||||||
|
|
||||||
|
|
||||||
def scout_umount(host):
|
|
||||||
base_url = "http://%s:%s/recon/" % (host[0], host[1])
|
|
||||||
url, content, status = scout(base_url, "unmounted")
|
|
||||||
return url, content, status
|
|
||||||
|
|
||||||
|
|
||||||
def scout_quarantine(host):
|
|
||||||
base_url = "http://%s:%s/recon/" % (host[0], host[1])
|
|
||||||
url, content, status = scout(base_url, "quarantined")
|
|
||||||
return url, content, status
|
|
||||||
|
|
||||||
|
|
||||||
def scout_sockstat(host):
|
|
||||||
base_url = "http://%s:%s/recon/" % (host[0], host[1])
|
|
||||||
url, content, status = scout(base_url, "sockstat")
|
|
||||||
return url, content, status
|
|
||||||
|
|
||||||
|
|
||||||
def get_ringmd5(hosts, ringfile):
|
|
||||||
stats = {}
|
|
||||||
matches = 0
|
|
||||||
errors = 0
|
|
||||||
md5sum = md5()
|
|
||||||
with open(ringfile, 'rb') as f:
|
|
||||||
block = f.read(4096)
|
|
||||||
while block:
|
|
||||||
md5sum.update(block)
|
|
||||||
block = f.read(4096)
|
block = f.read(4096)
|
||||||
ring_sum = md5sum.hexdigest()
|
while block:
|
||||||
pool = eventlet.GreenPool(20)
|
md5sum.update(block)
|
||||||
now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
block = f.read(4096)
|
||||||
print "[%s] Checking ring md5sum's on %s hosts..." % (now, len(hosts))
|
ring_sum = md5sum.hexdigest()
|
||||||
if VERBOSE:
|
recon = Scout("ringmd5", self.verbose, self.suppress_errors,
|
||||||
print "-> On disk md5sum: %s" % ring_sum
|
self.timeout)
|
||||||
for url, response, status in pool.imap(scout_md5, hosts):
|
now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
||||||
if status == 200:
|
print "[%s] Checking ring md5sum's on %s hosts..." % (now, len(hosts))
|
||||||
#fixme - need to grab from config
|
if self.verbose:
|
||||||
stats[url] = response[ringfile]
|
print "-> On disk md5sum: %s" % ring_sum
|
||||||
if response[ringfile] != ring_sum:
|
for url, response, status in self.pool.imap(recon.scout, hosts):
|
||||||
ringsmatch = False
|
if status == 200:
|
||||||
print "!! %s (%s) doesn't match on disk md5sum" % \
|
stats[url] = response[ringfile]
|
||||||
(url, response[ringfile])
|
if response[ringfile] != ring_sum:
|
||||||
|
print "!! %s (%s) doesn't match on disk md5sum" % \
|
||||||
|
(url, response[ringfile])
|
||||||
|
else:
|
||||||
|
matches = matches + 1
|
||||||
|
if self.verbose:
|
||||||
|
print "-> %s matches." % url
|
||||||
else:
|
else:
|
||||||
matches = matches + 1
|
errors = errors + 1
|
||||||
if VERBOSE:
|
print "%s/%s hosts matched, %s error[s] while checking hosts." % \
|
||||||
print "-> %s matches." % url
|
(matches, len(hosts), errors)
|
||||||
else:
|
print "=" * 79
|
||||||
errors = errors + 1
|
|
||||||
print "%s/%s hosts matched, %s error[s] while checking hosts." % \
|
|
||||||
(matches, len(hosts), errors)
|
|
||||||
print "=" * 79
|
|
||||||
|
|
||||||
|
def async_check(self, hosts):
|
||||||
|
"""
|
||||||
|
Obtain and print async pending statistics
|
||||||
|
|
||||||
def async_check(hosts):
|
:param hosts: set of hosts to check. in the format of:
|
||||||
stats = {}
|
set([('127.0.0.1', 6020), ('127.0.0.2', 6030)])
|
||||||
pool = eventlet.GreenPool(20)
|
"""
|
||||||
now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
stats = {}
|
||||||
print "[%s] Checking async pendings on %s hosts..." % (now, len(hosts))
|
recon = Scout("async", self.verbose, self.suppress_errors,
|
||||||
for url, response, status in pool.imap(scout_async, hosts):
|
self.timeout)
|
||||||
if status == 200:
|
now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
||||||
stats[url] = response['async_pending']
|
print "[%s] Checking async pendings on %s hosts..." % (now, len(hosts))
|
||||||
if len(stats) > 0:
|
for url, response, status in self.pool.imap(recon.scout, hosts):
|
||||||
low = min(stats.values())
|
if status == 200:
|
||||||
high = max(stats.values())
|
stats[url] = response['async_pending']
|
||||||
total = sum(stats.values())
|
if len(stats) > 0:
|
||||||
average = total / len(stats)
|
low = min(stats.values())
|
||||||
print "Async stats: low: %d, high: %d, avg: %d, total: %d" % (low,
|
high = max(stats.values())
|
||||||
high, average, total)
|
total = sum(stats.values())
|
||||||
else:
|
average = total / len(stats)
|
||||||
print "Error: No hosts available or returned valid information."
|
print "Async stats: low: %d, high: %d, avg: %d, total: %d" % (low,
|
||||||
print "=" * 79
|
high, average, total)
|
||||||
|
|
||||||
|
|
||||||
def umount_check(hosts):
|
|
||||||
stats = {}
|
|
||||||
pool = eventlet.GreenPool(20)
|
|
||||||
now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
|
||||||
print "[%s] Getting unmounted drives from %s hosts..." % (now, len(hosts))
|
|
||||||
for url, response, status in pool.imap(scout_umount, hosts):
|
|
||||||
if status == 200:
|
|
||||||
for i in response:
|
|
||||||
stats[url] = i['device']
|
|
||||||
for host in stats:
|
|
||||||
print "Not mounted: %s on %s" % (stats[host], host)
|
|
||||||
print "=" * 79
|
|
||||||
|
|
||||||
|
|
||||||
def replication_check(hosts):
|
|
||||||
stats = {}
|
|
||||||
pool = eventlet.GreenPool(20)
|
|
||||||
now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
|
||||||
print "[%s] Checking replication times on %s hosts..." % (now, len(hosts))
|
|
||||||
for url, response, status in pool.imap(scout_replication, hosts):
|
|
||||||
if status == 200:
|
|
||||||
stats[url] = response['object_replication_time']
|
|
||||||
if len(stats) > 0:
|
|
||||||
low = min(stats.values())
|
|
||||||
high = max(stats.values())
|
|
||||||
total = sum(stats.values())
|
|
||||||
average = total / len(stats)
|
|
||||||
print "[Replication Times] shortest: %s, longest: %s, avg: %s" % \
|
|
||||||
(low, high, average)
|
|
||||||
else:
|
|
||||||
print "Error: No hosts available or returned valid information."
|
|
||||||
print "=" * 79
|
|
||||||
|
|
||||||
|
|
||||||
def load_check(hosts):
|
|
||||||
load1 = {}
|
|
||||||
load5 = {}
|
|
||||||
load15 = {}
|
|
||||||
pool = eventlet.GreenPool(20)
|
|
||||||
now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
|
||||||
print "[%s] Checking load avg's on %s hosts..." % (now, len(hosts))
|
|
||||||
for url, response, status in pool.imap(scout_load, hosts):
|
|
||||||
if status == 200:
|
|
||||||
load1[url] = response['1m']
|
|
||||||
load5[url] = response['5m']
|
|
||||||
load15[url] = response['15m']
|
|
||||||
stats = {"1m": load1, "5m": load5, "15m": load15}
|
|
||||||
for item in stats:
|
|
||||||
if len(stats[item]) > 0:
|
|
||||||
low = min(stats[item].values())
|
|
||||||
high = max(stats[item].values())
|
|
||||||
total = sum(stats[item].values())
|
|
||||||
average = total / len(stats[item])
|
|
||||||
print "[%s load average] lowest: %s, highest: %s, avg: %s" % \
|
|
||||||
(item, low, high, average)
|
|
||||||
else:
|
else:
|
||||||
print "Error: No hosts available or returned valid information."
|
print "Error: No hosts available or returned valid information."
|
||||||
print "=" * 79
|
print "=" * 79
|
||||||
|
|
||||||
|
def umount_check(self, hosts):
|
||||||
|
"""
|
||||||
|
Check for and print unmounted drives
|
||||||
|
|
||||||
def quarantine_check(hosts):
|
:param hosts: set of hosts to check. in the format of:
|
||||||
objq = {}
|
set([('127.0.0.1', 6020), ('127.0.0.2', 6030)])
|
||||||
conq = {}
|
"""
|
||||||
acctq = {}
|
stats = {}
|
||||||
pool = eventlet.GreenPool(20)
|
recon = Scout("unmounted", self.verbose, self.suppress_errors,
|
||||||
now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
self.timeout)
|
||||||
print "[%s] Checking quarantine dirs on %s hosts..." % (now, len(hosts))
|
now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
||||||
for url, response, status in pool.imap(scout_quarantine, hosts):
|
print "[%s] Getting unmounted drives from %s hosts..." % \
|
||||||
if status == 200:
|
(now, len(hosts))
|
||||||
objq[url] = response['objects']
|
for url, response, status in self.pool.imap(recon.scout, hosts):
|
||||||
conq[url] = response['containers']
|
if status == 200:
|
||||||
acctq[url] = response['accounts']
|
for i in response:
|
||||||
stats = {"objects": objq, "containers": conq, "accounts": acctq}
|
stats[url] = i['device']
|
||||||
for item in stats:
|
for host in stats:
|
||||||
if len(stats[item]) > 0:
|
node = urlparse(host).netloc
|
||||||
low = min(stats[item].values())
|
print "Not mounted: %s on %s" % (stats[host], node)
|
||||||
high = max(stats[item].values())
|
print "=" * 79
|
||||||
total = sum(stats[item].values())
|
|
||||||
average = total / len(stats[item])
|
def replication_check(self, hosts):
|
||||||
print "[Quarantined %s] low: %d, high: %d, avg: %d, total: %d" % \
|
"""
|
||||||
(item, low, high, average, total)
|
Obtain and print replication statistics
|
||||||
|
|
||||||
|
:param hosts: set of hosts to check. in the format of:
|
||||||
|
set([('127.0.0.1', 6020), ('127.0.0.2', 6030)])
|
||||||
|
"""
|
||||||
|
stats = {}
|
||||||
|
recon = Scout("replication", self.verbose, self.suppress_errors,
|
||||||
|
self.timeout)
|
||||||
|
now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
||||||
|
print "[%s] Checking replication times on %s hosts..." % \
|
||||||
|
(now, len(hosts))
|
||||||
|
for url, response, status in self.pool.imap(recon.scout, hosts):
|
||||||
|
if status == 200:
|
||||||
|
stats[url] = response['object_replication_time']
|
||||||
|
if len(stats) > 0:
|
||||||
|
low = min(stats.values())
|
||||||
|
high = max(stats.values())
|
||||||
|
total = sum(stats.values())
|
||||||
|
average = total / len(stats)
|
||||||
|
print "[Replication Times] shortest: %s, longest: %s, avg: %s" % \
|
||||||
|
(low, high, average)
|
||||||
else:
|
else:
|
||||||
print "Error: No hosts available or returned valid information."
|
print "Error: No hosts available or returned valid information."
|
||||||
print "=" * 79
|
print "=" * 79
|
||||||
|
|
||||||
|
def load_check(self, hosts):
|
||||||
|
"""
|
||||||
|
Obtain and print load average statistics
|
||||||
|
|
||||||
def socket_usage(hosts):
|
:param hosts: set of hosts to check. in the format of:
|
||||||
inuse4 = {}
|
set([('127.0.0.1', 6020), ('127.0.0.2', 6030)])
|
||||||
mem = {}
|
"""
|
||||||
inuse6 = {}
|
load1 = {}
|
||||||
timewait = {}
|
load5 = {}
|
||||||
orphan = {}
|
load15 = {}
|
||||||
pool = eventlet.GreenPool(20)
|
recon = Scout("load", self.verbose, self.suppress_errors,
|
||||||
now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
self.timeout)
|
||||||
print "[%s] Checking socket usage on %s hosts..." % (now, len(hosts))
|
now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
||||||
for url, response, status in pool.imap(scout_sockstat, hosts):
|
print "[%s] Checking load avg's on %s hosts..." % (now, len(hosts))
|
||||||
if status == 200:
|
for url, response, status in self.pool.imap(recon.scout, hosts):
|
||||||
inuse4[url] = response['tcp_in_use']
|
if status == 200:
|
||||||
mem[url] = response['tcp_mem_allocated_bytes']
|
load1[url] = response['1m']
|
||||||
inuse6[url] = response['tcp6_in_use']
|
load5[url] = response['5m']
|
||||||
timewait[url] = response['time_wait']
|
load15[url] = response['15m']
|
||||||
orphan[url] = response['orphan']
|
stats = {"1m": load1, "5m": load5, "15m": load15}
|
||||||
stats = {"tcp_in_use": inuse4, "tcp_mem_allocated_bytes": mem, \
|
for item in stats:
|
||||||
"tcp6_in_use": inuse6, "time_wait": timewait, "orphan": orphan}
|
if len(stats[item]) > 0:
|
||||||
for item in stats:
|
low = min(stats[item].values())
|
||||||
if len(stats[item]) > 0:
|
high = max(stats[item].values())
|
||||||
low = min(stats[item].values())
|
total = sum(stats[item].values())
|
||||||
high = max(stats[item].values())
|
average = total / len(stats[item])
|
||||||
total = sum(stats[item].values())
|
print "[%s load average] lowest: %s, highest: %s, avg: %s" % \
|
||||||
average = total / len(stats[item])
|
(item, low, high, average)
|
||||||
print "[%s] low: %d, high: %d, avg: %d, total: %d" % \
|
else:
|
||||||
(item, low, high, average, total)
|
print "Error: No hosts available or returned valid info."
|
||||||
|
print "=" * 79
|
||||||
|
|
||||||
|
def quarantine_check(self, hosts):
|
||||||
|
"""
|
||||||
|
Obtain and print quarantine statistics
|
||||||
|
|
||||||
|
:param hosts: set of hosts to check. in the format of:
|
||||||
|
set([('127.0.0.1', 6020), ('127.0.0.2', 6030)])
|
||||||
|
"""
|
||||||
|
objq = {}
|
||||||
|
conq = {}
|
||||||
|
acctq = {}
|
||||||
|
recon = Scout("quarantined", self.verbose, self.suppress_errors,
|
||||||
|
self.timeout)
|
||||||
|
now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
||||||
|
print "[%s] Checking quarantine on %s hosts..." % (now, len(hosts))
|
||||||
|
for url, response, status in self.pool.imap(recon.scout, hosts):
|
||||||
|
if status == 200:
|
||||||
|
objq[url] = response['objects']
|
||||||
|
conq[url] = response['containers']
|
||||||
|
acctq[url] = response['accounts']
|
||||||
|
stats = {"objects": objq, "containers": conq, "accounts": acctq}
|
||||||
|
for item in stats:
|
||||||
|
if len(stats[item]) > 0:
|
||||||
|
low = min(stats[item].values())
|
||||||
|
high = max(stats[item].values())
|
||||||
|
total = sum(stats[item].values())
|
||||||
|
average = total / len(stats[item])
|
||||||
|
print ("[Quarantined %s] low: %d, high: %d, avg: %d, total: %d"
|
||||||
|
% (item, low, high, average, total))
|
||||||
|
else:
|
||||||
|
print "Error: No hosts available or returned valid info."
|
||||||
|
print "=" * 79
|
||||||
|
|
||||||
|
def socket_usage(self, hosts):
|
||||||
|
"""
|
||||||
|
Obtain and print /proc/net/sockstat statistics
|
||||||
|
|
||||||
|
:param hosts: set of hosts to check. in the format of:
|
||||||
|
set([('127.0.0.1', 6020), ('127.0.0.2', 6030)])
|
||||||
|
"""
|
||||||
|
inuse4 = {}
|
||||||
|
mem = {}
|
||||||
|
inuse6 = {}
|
||||||
|
timewait = {}
|
||||||
|
orphan = {}
|
||||||
|
recon = Scout("sockstat", self.verbose, self.suppress_errors,
|
||||||
|
self.timeout)
|
||||||
|
now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
||||||
|
print "[%s] Checking socket usage on %s hosts..." % (now, len(hosts))
|
||||||
|
for url, response, status in self.pool.imap(recon.scout, hosts):
|
||||||
|
if status == 200:
|
||||||
|
inuse4[url] = response['tcp_in_use']
|
||||||
|
mem[url] = response['tcp_mem_allocated_bytes']
|
||||||
|
inuse6[url] = response['tcp6_in_use']
|
||||||
|
timewait[url] = response['time_wait']
|
||||||
|
orphan[url] = response['orphan']
|
||||||
|
stats = {"tcp_in_use": inuse4, "tcp_mem_allocated_bytes": mem,
|
||||||
|
"tcp6_in_use": inuse6, "time_wait": timewait,
|
||||||
|
"orphan": orphan}
|
||||||
|
for item in stats:
|
||||||
|
if len(stats[item]) > 0:
|
||||||
|
low = min(stats[item].values())
|
||||||
|
high = max(stats[item].values())
|
||||||
|
total = sum(stats[item].values())
|
||||||
|
average = total / len(stats[item])
|
||||||
|
print "[%s] low: %d, high: %d, avg: %d, total: %d" % \
|
||||||
|
(item, low, high, average, total)
|
||||||
|
else:
|
||||||
|
print "Error: No hosts or info available."
|
||||||
|
print "=" * 79
|
||||||
|
|
||||||
|
def disk_usage(self, hosts):
|
||||||
|
"""
|
||||||
|
Obtain and print disk usage statistics
|
||||||
|
|
||||||
|
:param hosts: set of hosts to check. in the format of:
|
||||||
|
set([('127.0.0.1', 6020), ('127.0.0.2', 6030)])
|
||||||
|
"""
|
||||||
|
stats = {}
|
||||||
|
highs = []
|
||||||
|
lows = []
|
||||||
|
averages = []
|
||||||
|
percents = {}
|
||||||
|
recon = Scout("diskusage", self.verbose, self.suppress_errors,
|
||||||
|
self.timeout)
|
||||||
|
now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
||||||
|
print "[%s] Checking disk usage on %s hosts..." % (now, len(hosts))
|
||||||
|
for url, response, status in self.pool.imap(recon.scout, hosts):
|
||||||
|
if status == 200:
|
||||||
|
hostusage = []
|
||||||
|
for entry in response:
|
||||||
|
if entry['mounted']:
|
||||||
|
used = float(entry['used']) / float(entry['size']) \
|
||||||
|
* 100.0
|
||||||
|
hostusage.append(round(used, 2))
|
||||||
|
stats[url] = hostusage
|
||||||
|
|
||||||
|
for url in stats:
|
||||||
|
if len(stats[url]) > 0:
|
||||||
|
#get per host hi/los for another day
|
||||||
|
low = min(stats[url])
|
||||||
|
high = max(stats[url])
|
||||||
|
total = sum(stats[url])
|
||||||
|
average = total / len(stats[url])
|
||||||
|
highs.append(high)
|
||||||
|
lows.append(low)
|
||||||
|
averages.append(average)
|
||||||
|
for percent in stats[url]:
|
||||||
|
percents[int(percent)] = percents.get(int(percent), 0) + 1
|
||||||
|
else:
|
||||||
|
print "-> %s: Error. No drive info available." % url
|
||||||
|
|
||||||
|
if len(lows) > 0:
|
||||||
|
low = min(lows)
|
||||||
|
high = max(highs)
|
||||||
|
average = sum(averages) / len(averages)
|
||||||
|
#dist graph shamelessly stolen from https://github.com/gholt/tcod
|
||||||
|
print "Distribution Graph:"
|
||||||
|
mul = 69.0 / max(percents.values())
|
||||||
|
for percent in sorted(percents):
|
||||||
|
print '% 3d%%%5d %s' % (percent, percents[percent], \
|
||||||
|
'*' * int(percents[percent] * mul))
|
||||||
|
print "Disk usage: lowest: %s%%, highest: %s%%, avg: %s%%" % \
|
||||||
|
(low, high, average)
|
||||||
else:
|
else:
|
||||||
print "Error: No hosts or info available."
|
print "Error: No hosts available or returned valid information."
|
||||||
print "=" * 79
|
print "=" * 79
|
||||||
|
|
||||||
|
def main(self):
|
||||||
|
"""
|
||||||
|
Retrieve and report cluster info from hosts running recon middleware.
|
||||||
|
"""
|
||||||
|
print "=" * 79
|
||||||
|
usage = '''
|
||||||
|
usage: %prog [-v] [--suppress] [-a] [-r] [-u] [-d] [-l] [--objmd5]
|
||||||
|
'''
|
||||||
|
args = optparse.OptionParser(usage)
|
||||||
|
args.add_option('--verbose', '-v', action="store_true",
|
||||||
|
help="Print verbose info")
|
||||||
|
args.add_option('--suppress', action="store_true",
|
||||||
|
help="Suppress most connection related errors")
|
||||||
|
args.add_option('--async', '-a', action="store_true",
|
||||||
|
help="Get async stats")
|
||||||
|
args.add_option('--replication', '-r', action="store_true",
|
||||||
|
help="Get replication stats")
|
||||||
|
args.add_option('--unmounted', '-u', action="store_true",
|
||||||
|
help="Check cluster for unmounted devices")
|
||||||
|
args.add_option('--diskusage', '-d', action="store_true",
|
||||||
|
help="Get disk usage stats")
|
||||||
|
args.add_option('--loadstats', '-l', action="store_true",
|
||||||
|
help="Get cluster load average stats")
|
||||||
|
args.add_option('--quarantined', '-q', action="store_true",
|
||||||
|
help="Get cluster quarantine stats")
|
||||||
|
args.add_option('--objmd5', action="store_true",
|
||||||
|
help="Get md5sums of object.ring.gz and compare to local copy")
|
||||||
|
args.add_option('--sockstat', action="store_true",
|
||||||
|
help="Get cluster socket usage stats")
|
||||||
|
args.add_option('--all', action="store_true",
|
||||||
|
help="Perform all checks. Equal to -arudlq --objmd5 --sockstat")
|
||||||
|
args.add_option('--zone', '-z', type="int",
|
||||||
|
help="Only query servers in specified zone")
|
||||||
|
args.add_option('--timeout', '-t', type="int", metavar="SECONDS",
|
||||||
|
help="Time to wait for a response from a server", default=5)
|
||||||
|
args.add_option('--swiftdir', default="/etc/swift",
|
||||||
|
help="Default = /etc/swift")
|
||||||
|
options, arguments = args.parse_args()
|
||||||
|
|
||||||
|
|
||||||
def disk_usage(hosts):
|
if len(sys.argv) <= 1:
|
||||||
stats = {}
|
args.print_help()
|
||||||
highs = []
|
sys.exit(0)
|
||||||
lows = []
|
|
||||||
averages = []
|
|
||||||
percents = {}
|
|
||||||
pool = eventlet.GreenPool(20)
|
|
||||||
now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
|
||||||
print "[%s] Checking disk usage on %s hosts..." % (now, len(hosts))
|
|
||||||
for url, response, status in pool.imap(scout_du, hosts):
|
|
||||||
if status == 200:
|
|
||||||
hostusage = []
|
|
||||||
for entry in response:
|
|
||||||
if entry['mounted']:
|
|
||||||
used = float(entry['used']) / float(entry['size']) * 100.0
|
|
||||||
hostusage.append(round(used, 2))
|
|
||||||
stats[url] = hostusage
|
|
||||||
|
|
||||||
for url in stats:
|
swift_dir = options.swiftdir
|
||||||
if len(stats[url]) > 0:
|
obj_ring = os.path.join(swift_dir, 'object.ring.gz')
|
||||||
#get per host hi/los for another day
|
|
||||||
low = min(stats[url])
|
self.verbose = options.verbose
|
||||||
high = max(stats[url])
|
self.suppress_errors = options.suppress
|
||||||
total = sum(stats[url])
|
self.timeout = options.timeout
|
||||||
average = total / len(stats[url])
|
|
||||||
highs.append(high)
|
if options.zone:
|
||||||
lows.append(low)
|
hosts = self.get_devices(options.zone, obj_ring)
|
||||||
averages.append(average)
|
|
||||||
for percent in stats[url]:
|
|
||||||
percents[int(percent)] = percents.get(int(percent), 0) + 1
|
|
||||||
else:
|
else:
|
||||||
print "-> %s: Error. No drive info available." % url
|
hosts = self.get_devices(None, obj_ring)
|
||||||
|
|
||||||
if len(lows) > 0:
|
if options.all:
|
||||||
low = min(lows)
|
self.async_check(hosts)
|
||||||
high = max(highs)
|
self.umount_check(hosts)
|
||||||
average = sum(averages) / len(averages)
|
self.replication_check(hosts)
|
||||||
#distrib graph shamelessly stolen from https://github.com/gholt/tcod
|
self.load_check(hosts)
|
||||||
print "Distribution Graph:"
|
self.disk_usage(hosts)
|
||||||
mul = 69.0 / max(percents.values())
|
self.get_ringmd5(hosts, obj_ring)
|
||||||
for percent in sorted(percents):
|
self.quarantine_check(hosts)
|
||||||
print '% 3d%% % 4d %s' % (percent, percents[percent], \
|
self.socket_usage(hosts)
|
||||||
'*' * int(percents[percent] * mul))
|
else:
|
||||||
|
if options.async:
|
||||||
print "Disk usage: lowest: %s%%, highest: %s%%, avg: %s%%" % \
|
self.async_check(hosts)
|
||||||
(low, high, average)
|
if options.unmounted:
|
||||||
else:
|
self.umount_check(hosts)
|
||||||
print "Error: No hosts available or returned valid information."
|
if options.replication:
|
||||||
print "=" * 79
|
self.replication_check(hosts)
|
||||||
|
if options.loadstats:
|
||||||
|
self.load_check(hosts)
|
||||||
def main():
|
if options.diskusage:
|
||||||
global VERBOSE, SUPPRESS_ERRORS, TIMEOUT, swift_dir, pool
|
self.disk_usage(hosts)
|
||||||
print "=" * 79
|
if options.objmd5:
|
||||||
usage = '''
|
self.get_ringmd5(hosts, obj_ring)
|
||||||
usage: %prog [-v] [--suppress] [-a] [-r] [-u] [-d] [-l] [--objmd5]
|
if options.quarantined:
|
||||||
'''
|
self.quarantine_check(hosts)
|
||||||
args = optparse.OptionParser(usage)
|
if options.sockstat:
|
||||||
args.add_option('--verbose', '-v', action="store_true",
|
self.socket_usage(hosts)
|
||||||
help="Print verbose info")
|
|
||||||
args.add_option('--suppress', action="store_true",
|
|
||||||
help="Suppress most connection related errors")
|
|
||||||
args.add_option('--async', '-a', action="store_true",
|
|
||||||
help="Get async stats")
|
|
||||||
args.add_option('--replication', '-r', action="store_true",
|
|
||||||
help="Get replication stats")
|
|
||||||
args.add_option('--unmounted', '-u', action="store_true",
|
|
||||||
help="Check cluster for unmounted devices")
|
|
||||||
args.add_option('--diskusage', '-d', action="store_true",
|
|
||||||
help="Get disk usage stats")
|
|
||||||
args.add_option('--loadstats', '-l', action="store_true",
|
|
||||||
help="Get cluster load average stats")
|
|
||||||
args.add_option('--quarantined', '-q', action="store_true",
|
|
||||||
help="Get cluster quarantine stats")
|
|
||||||
args.add_option('--objmd5', action="store_true",
|
|
||||||
help="Get md5sums of object.ring.gz and compare to local copy")
|
|
||||||
args.add_option('--sockstat', action="store_true",
|
|
||||||
help="Get cluster socket usage stats")
|
|
||||||
args.add_option('--all', action="store_true",
|
|
||||||
help="Perform all checks. Equivalent to -arudlq --objmd5 --sockstat")
|
|
||||||
args.add_option('--zone', '-z', type="int",
|
|
||||||
help="Only query servers in specified zone")
|
|
||||||
args.add_option('--timeout', '-t', type="int", metavar="SECONDS",
|
|
||||||
help="Time to wait for a response from a server")
|
|
||||||
|
|
||||||
args.add_option('--swiftdir', default="/etc/swift",
|
|
||||||
help="Default = /etc/swift")
|
|
||||||
options, arguments = args.parse_args()
|
|
||||||
|
|
||||||
if len(sys.argv) <= 1:
|
|
||||||
args.print_help()
|
|
||||||
|
|
||||||
swift_dir = options.swiftdir
|
|
||||||
obj_ring = os.path.join(swift_dir, 'object.ring.gz')
|
|
||||||
con_ring = os.path.join(swift_dir, 'container.ring.gz')
|
|
||||||
acct_ring = os.path.join(swift_dir, 'account.ring.gz')
|
|
||||||
|
|
||||||
VERBOSE = options.verbose
|
|
||||||
SUPPRESS_ERRORS = options.suppress
|
|
||||||
|
|
||||||
if options.zone:
|
|
||||||
hosts = get_devices(options.zone, obj_ring)
|
|
||||||
else:
|
|
||||||
hosts = get_devices(None, obj_ring)
|
|
||||||
|
|
||||||
if options.timeout:
|
|
||||||
TIMEOUT = options.timeout
|
|
||||||
|
|
||||||
if options.all:
|
|
||||||
async_check(hosts)
|
|
||||||
umount_check(hosts)
|
|
||||||
replication_check(hosts)
|
|
||||||
load_check(hosts)
|
|
||||||
disk_usage(hosts)
|
|
||||||
get_ringmd5(hosts, obj_ring)
|
|
||||||
quarantine_check(hosts)
|
|
||||||
socket_usage(hosts)
|
|
||||||
else:
|
|
||||||
if options.async:
|
|
||||||
async_check(hosts)
|
|
||||||
if options.unmounted:
|
|
||||||
umount_check(hosts)
|
|
||||||
if options.replication:
|
|
||||||
replication_check(hosts)
|
|
||||||
if options.loadstats:
|
|
||||||
load_check(hosts)
|
|
||||||
if options.diskusage:
|
|
||||||
disk_usage(hosts)
|
|
||||||
if options.objmd5:
|
|
||||||
get_ringmd5(hosts, obj_ring)
|
|
||||||
if options.quarantined:
|
|
||||||
quarantine_check(hosts)
|
|
||||||
if options.sockstat:
|
|
||||||
socket_usage(hosts)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
try:
|
try:
|
||||||
main()
|
reconnoiter = SwiftRecon()
|
||||||
|
reconnoiter.main()
|
||||||
except KeyboardInterrupt:
|
except KeyboardInterrupt:
|
||||||
print '\n'
|
print '\n'
|
||||||
|
Loading…
Reference in New Issue
Block a user