From 60cd36e5de8186c27d1f415a17c45722ef7cc495 Mon Sep 17 00:00:00 2001 From: Ian Wienand Date: Thu, 31 May 2018 19:40:44 +1000 Subject: [PATCH] Initial framework --- .gitignore | 4 + pyafsmon/pyafsmon.py | 262 ++++++++++++++++++++++++++++++++++++++++++- requirements.txt | 1 + sample.cfg | 4 + 4 files changed, 270 insertions(+), 1 deletion(-) create mode 100644 sample.cfg diff --git a/.gitignore b/.gitignore index 894a44c..f3b8eee 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,7 @@ +*~ +*# +.stestr/ + # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] diff --git a/pyafsmon/pyafsmon.py b/pyafsmon/pyafsmon.py index a2aca8a..5f501d7 100644 --- a/pyafsmon/pyafsmon.py +++ b/pyafsmon/pyafsmon.py @@ -13,9 +13,244 @@ # under the License. import argparse +import configparser +import collections +import logging +import io import sys +import subprocess +import re + +from datetime import datetime +from enum import Enum +from prettytable import PrettyTable + +config = None + + +# +# Fileserver +# + +class FileServerStatus(Enum): + NORMAL = 0 + TEMPORARY_DISABLED = 1 + DISABLED = 2 + UNKNOWN = 3 + NO_CONNECTION = 4 + +Partition = collections.namedtuple( + 'Partition', 'partition, used, free, total, percent_used') + +Volume = collections.namedtuple( + 'Voume', 'volume, id, perms, used, quota, percent_used') + +class FileServerStats: + '''AFS fileserver status + + Note most attributes are only set if ``status`` is NORMAL + + Attributes: + status (FileServerStatus): enum of possible status + timestamp(:obj:`datetime.datetime`): time statistics retrieved + restart (:obj:`datetime.datetime`): time of last restart + uptime (:obj:`datetime.timedelta`): current uptime + partitions (:obj:`list`): list of :obj:`Partition` tuples for each + partition on the server + calls_waiting (:obj:`int`): number of calls waiting for a thread + idle_threads (:obj:`int`): number of currently idle threads + volumes (:obj:`list`): list of :obj:`Volume` tuples for each + volume present on the server + table (:obj:`PrettyTable`): a printable PrettyTable representation + ''' + + def _get_volumes(self): + cmd = ["vos", "listvol", "-long", "-server", self.hostname] + logging.debug("Running: %s" % cmd) + output = subprocess.check_output( + cmd, stderr=subprocess.STDOUT).decode('ascii') + + # Read the output into chunks where each chunk is the info for + # one volume. + chunks = [] + lines = io.StringIO(output) + while True: + line = lines.readline() + if not line: + break + chunk = '' + if "On-line" in line: # chunks start with this + chunk += line + # read in the next 9 lines of status + for i in range(8): + chunk += lines.readline() + # convert it to a Volume() + # todo: there's a bunch more we could extract... + m = re.search( + '^(?P[^\s]+)\s+(?P\d+)\s(?PR[OW])\s+(?P\d+) K', + chunk) + q = re.search('MaxQuota\s+(?P\d+) K', chunk) + used = int(m['used']) + quota = int(q['quota']) + percent_used = round(float(used) / float(quota) * 100, 2) + self.volumes.append(Volume( + m['volume'], m['id'], m['perms'], used, quota, percent_used)) + + + def _get_calls_waiting(self): + cmd = ["rxdebug", self.hostname, "7000", "-rxstats", "-noconns"] + logging.debug("Running: %s" % cmd) + output = subprocess.check_output( + cmd, stderr=subprocess.STDOUT).decode('ascii') + + for line in output.split('\n'): + m = re.search('(?P\d+) calls waiting for a thread', line) + if m: + self.calls_waiting = int(m['waiting']) + m = re.search('(?P\d+) threads are idle', line) + if m: + self.idle_threads = int(m['idle']) + + def _get_partition_stats(self): + cmd = ["vos", "partinfo", self.hostname, "-noauth"] + logging.debug("Running: %s" % cmd) + output = subprocess.check_output( + cmd, stderr=subprocess.STDOUT).decode('ascii') + + for line in output.split('\n'): + m = re.search( + 'Free space on partition ' + '/vicep(?P[a-z][a-z]?): ' + '(?P\d+) K blocks out of total (?P\d+)', line) + if m: + part = 'vicep%s' % m['partition'] + # (used, free, total, %age) + used = int(m['total']) - int(m['free']) + self.partitions.append( + Partition(part, used, int(m['free']), int(m['total']), + round(float(used) / float(m['total']) * 100, 2))) + + def _get_fs_stats(self): + cmd = ["bos", "status", self.hostname, "-long", "-noauth"] + logging.debug("Running: %s" % cmd) + try: + output = subprocess.check_output( + cmd, stderr=subprocess.STDOUT).decode('ascii') + except subprocess.CalledProcessError: + logging.debug(" ... failed!") + self.status = FileServerStatus.NO_CONNECTION + return + + if re.search('currently running normally', output): + self.status = FileServerStatus.NORMAL + m = re.search( + r'last started at (?P\w+ \w+ \w+ \d+:\d+:\d+ \d+)', + output) + self.restart = datetime.strptime(m['date'], '%a %b %d %H:%M:%S %Y') + self.uptime = self.timestamp - self.restart + + elif re.search('temporarily disabled, currently shutdown', output): + self.status = FileServerStatus.TEMPORARILY_DISABLED + elif re.search('disabled, currently shutdown', output): + self.status = FileServerStatus.DISABLED + else: + logging.debug(output) + self.status = FileServerStatus.UNKNOWN + + def get_stats(self): + '''Get the complete stats set for the fileserver''' + self.timestamp = datetime.now() + + self.restart = None + self.uptime = None + self.partitions = [] + self.volumes = [] + self.calls_waiting = None + self.idle_threads = None + + self._get_fs_stats() + if self.status == FileServerStatus.NORMAL: + self._get_partition_stats() + self._get_calls_waiting() + self._get_volumes() + + self.table = PrettyTable() + self.table.field_names = ["Metric", "Value"] + self.table.align["Metric"] = "l" + self.table.align["Value"] = "l" + self.table.add_row(["Hostname", self.hostname]) + self.table.add_row(["Timestamp", self.timestamp]) + self.table.add_row(["Status", self.status]) + self.table.add_row(["Uptime", self.uptime]) + self.table.add_row(["Last Restart", self.restart]) + self.table.add_row(["Calls Waiting", self.calls_waiting]) + self.table.add_row(["Idle Threads", self.idle_threads]) + for p in self.partitions: + n = "/%s" % p.partition + self.table.add_row(["%s used" % n, p.used]) + self.table.add_row(["%s free" % n, p.free]) + self.table.add_row(["%s total" % n, p.total]) + self.table.add_row(["%s %%used" % n, + "%s%%" % p.percent_used]) + for v in self.volumes: + if v.perms == 'RW': + n = v.volume + self.table.add_row(["%s used" % n, v.used]) + self.table.add_row(["%s quota" % n, v.quota]) + self.table.add_row(["%s %%used" % n, + "%s%%" % v.percent_used]) + + def __str__(self): + return str(self.table) + + def __init__(self, hostname): + self.hostname = hostname + +# +# Volume +# + +def get_fs_addresses(cell): + '''Get the fileservers associated with a cell''' + fs = [] + cmd = ["vos", "listaddrs", "-noauth", "-cell", cell] + logging.debug("Running: %s" % cmd) + try: + output = subprocess.check_output( + cmd, stderr=subprocess.STDOUT).decode('ascii') + except subprocess.CalledProcessError: + logging.debug(" ... failed!") + return [] + + for line in output.split('\n'): + if line.strip(): + fs.append(line) + + return fs + +def get_volumes(cell): + '''Get the volumes in a cell''' + volumes = [] + cmd = ["vos", "listvldb", "-quiet", "-noauth", + "-noresolve", "-nosort", "-cell", cell] + logging.debug("Running: %s" % cmd) + try: + output = subprocess.check_output( + cmd, stderr=subprocess.STDOUT).decode('ascii') + except subprocess.CalledProcessError: + logging.debug(" ... failed!") + return [] + + # details about the volumes are inset, so just look for non-blank lines + for line in output.split('\n'): + if line and not line.startswith(' '): + volumes.append(line.strip()) + + return volumes + def main(args=None): + global config if args is None: args = sys.argv[1:] @@ -23,6 +258,31 @@ def main(args=None): parser = argparse.ArgumentParser( description='An AFS monitoring tool') - opts = parser.parse_args(args) + parser.add_argument("config", help="Path to config file") + parser.add_argument("-d", '--debug', action="store_true") + + args = parser.parse_args(args) + + if args.debug: + logging.basicConfig(level=logging.DEBUG) + logging.debug("Debugging enabled") + + config = configparser.RawConfigParser() + config.read(args.config) + + cell = config.get('main', 'cell').strip() + +# volumes = get_volumes(cell) +# logging.debug(volumes) + + fileservers = get_fs_addresses(cell) + print(fileservers) + + for fileserver in fileservers: + logging.debug("Finding stats for: %s" % fileserver) + + fs = FileServerStats(fileserver) + fs.get_stats() + print(fs) sys.exit(0) diff --git a/requirements.txt b/requirements.txt index 7acf1b0..2d41b89 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,3 +4,4 @@ pbr!=2.1.0,>=2.0.0 # Apache-2.0 Babel!=2.4.0,>=2.3.4 # BSD +PrettyTable<0.8 # BSD diff --git a/sample.cfg b/sample.cfg new file mode 100644 index 0000000..8701eb4 --- /dev/null +++ b/sample.cfg @@ -0,0 +1,4 @@ +[main] +cell = openstack.org + +