diff --git a/bin/swift-recon b/bin/swift-recon index b5c1cc96d4..eae582e545 100755 --- a/bin/swift-recon +++ b/bin/swift-recon @@ -92,6 +92,12 @@ def scout_quarantine(host): return url, content, status +def scout_sockstat(host): + base_url = "http://%s:%s/recon/" % (host[0], host[1]) + url, content, status = scout(base_url, "sockstat") + return url, content, status + + def get_ringmd5(hosts, ringfile): stats = {} matches = 0 @@ -233,6 +239,37 @@ def quarantine_check(hosts): print "=" * 79 +def socket_usage(hosts): + inuse4 = {} + mem = {} + inuse6 = {} + timewait = {} + orphan = {} + pool = eventlet.GreenPool(20) + now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") + print "[%s] Checking socket usage on %s hosts..." % (now, len(hosts)) + for url, response, status in pool.imap(scout_sockstat, hosts): + if status == 200: + inuse4[url] = response['tcp_in_use'] + mem[url] = response['tcp_mem_allocated_bytes'] + inuse6[url] = response['tcp6_in_use'] + timewait[url] = response['time_wait'] + orphan[url] = response['orphan'] + stats = {"tcp_in_use": inuse4, "tcp_mem_allocated_bytes": mem, \ + "tcp6_in_use": inuse6, "time_wait": timewait, "orphan": orphan} + for item in stats: + if len(stats[item]) > 0: + low = min(stats[item].values()) + high = max(stats[item].values()) + total = sum(stats[item].values()) + average = total / len(stats[item]) + print "[%s] low: %d, high: %d, avg: %d, total: %d" % \ + (item, low, high, average, total) + else: + print "Error: No hosts or info available." + print "=" * 79 + + def disk_usage(hosts): stats = {} highs = [] @@ -309,10 +346,13 @@ def main(): help="Get cluster quarantine stats") args.add_option('--objmd5', action="store_true", help="Get md5sums of object.ring.gz and compare to local copy") + args.add_option('--sockstat', action="store_true", + help="Get cluster socket usage stats") args.add_option('--all', action="store_true", - help="Perform all checks. Equivelent to -arudlq --objmd5") + help="Perform all checks. Equivalent to -arudlq --objmd5 --sockstat") args.add_option('--zone', '-z', type="int", help="Only query servers in specified zone") + args.add_option('--swiftdir', default="/etc/swift", help="Default = /etc/swift") options, arguments = args.parse_args() @@ -341,6 +381,7 @@ def main(): disk_usage(hosts) get_ringmd5(hosts, obj_ring) quarantine_check(hosts) + socket_usage(hosts) else: if options.async: async_check(hosts) @@ -356,6 +397,8 @@ def main(): get_ringmd5(hosts, obj_ring) if options.quarantined: quarantine_check(hosts) + if options.sockstat: + socket_usage(hosts) if __name__ == '__main__': diff --git a/doc/source/admin_guide.rst b/doc/source/admin_guide.rst index 77eb267b0c..c8d903dfaf 100644 --- a/doc/source/admin_guide.rst +++ b/doc/source/admin_guide.rst @@ -280,6 +280,7 @@ Request URI Description /recon/diskusage returns disk utilization for storage devices /recon/ringmd5 returns object/container/account ring md5sums /recon/quarantined returns # of quarantined objects/accounts/containers +/recon/sockstat returns consumable info from /proc/net/sockstat|6 ================== ==================================================== This information can also be queried via the swift-recon command line utility:: @@ -302,7 +303,9 @@ This information can also be queried via the swift-recon command line utility:: -q, --quarantined Get cluster quarantine stats --objmd5 Get md5sums of object.ring.gz and compare to local copy + --sockstat Get cluster socket usage stats --all Perform all checks. Equivalent to -arudlq --objmd5 + --socketstat -z ZONE, --zone=ZONE Only query servers in specified zone --swiftdir=SWIFTDIR Default = /etc/swift diff --git a/swift/common/middleware/recon.py b/swift/common/middleware/recon.py index 438e1e8c36..6ee276d654 100644 --- a/swift/common/middleware/recon.py +++ b/swift/common/middleware/recon.py @@ -16,6 +16,7 @@ from webob import Request, Response from swift.common.utils import split_path, cache_from_env, get_logger from swift.common.constraints import check_mount +from resource import getpagesize from hashlib import md5 try: import simplejson as json @@ -170,6 +171,37 @@ class ReconMiddleware(object): qcounts[qtype] += linkcount - 2 return qcounts + def get_socket_info(self): + """ + get info from /proc/net/sockstat and sockstat6 + + Note: The mem value is actually kernel pages, but we return bytes + allocated based on the systems page size. + """ + sockstat = {} + try: + with open('/proc/net/sockstat') as proc_sockstat: + for entry in proc_sockstat: + if entry.startswith("TCP: inuse"): + tcpstats = entry.split() + sockstat['tcp_in_use'] = int(tcpstats[2]) + sockstat['orphan'] = int(tcpstats[4]) + sockstat['time_wait'] = int(tcpstats[6]) + sockstat['tcp_mem_allocated_bytes'] = \ + int(tcpstats[10]) * getpagesize() + except IOError as e: + if e.errno != errno.ENOENT: + raise + try: + with open('/proc/net/sockstat6') as proc_sockstat6: + for entry in proc_sockstat6: + if entry.startswith("TCP6: inuse"): + sockstat['tcp6_in_use'] = int(entry.split()[2]) + except IOError as e: + if e.errno != errno.ENOENT: + raise + return sockstat + def GET(self, req): error = False root, type = split_path(req.path, 1, 2, False) @@ -204,6 +236,8 @@ class ReconMiddleware(object): content = json.dumps(self.get_ring_md5()) elif type == "quarantined": content = json.dumps(self.get_quarantine_count()) + elif type == "sockstat": + content = json.dumps(self.get_socket_info()) else: content = "Invalid path: %s" % req.path return Response(request=req, status="400 Bad Request", \