Initial framework
This commit is contained in:
parent
04b37aa1b7
commit
60cd36e5de
4
.gitignore
vendored
4
.gitignore
vendored
@ -1,3 +1,7 @@
|
||||
*~
|
||||
*#
|
||||
.stestr/
|
||||
|
||||
# Byte-compiled / optimized / DLL files
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
|
@ -13,9 +13,244 @@
|
||||
# under the License.
|
||||
|
||||
import argparse
|
||||
import configparser
|
||||
import collections
|
||||
import logging
|
||||
import io
|
||||
import sys
|
||||
import subprocess
|
||||
import re
|
||||
|
||||
from datetime import datetime
|
||||
from enum import Enum
|
||||
from prettytable import PrettyTable
|
||||
|
||||
config = None
|
||||
|
||||
|
||||
#
|
||||
# Fileserver
|
||||
#
|
||||
|
||||
class FileServerStatus(Enum):
|
||||
NORMAL = 0
|
||||
TEMPORARY_DISABLED = 1
|
||||
DISABLED = 2
|
||||
UNKNOWN = 3
|
||||
NO_CONNECTION = 4
|
||||
|
||||
Partition = collections.namedtuple(
|
||||
'Partition', 'partition, used, free, total, percent_used')
|
||||
|
||||
Volume = collections.namedtuple(
|
||||
'Voume', 'volume, id, perms, used, quota, percent_used')
|
||||
|
||||
class FileServerStats:
|
||||
'''AFS fileserver status
|
||||
|
||||
Note most attributes are only set if ``status`` is NORMAL
|
||||
|
||||
Attributes:
|
||||
status (FileServerStatus): enum of possible status
|
||||
timestamp(:obj:`datetime.datetime`): time statistics retrieved
|
||||
restart (:obj:`datetime.datetime`): time of last restart
|
||||
uptime (:obj:`datetime.timedelta`): current uptime
|
||||
partitions (:obj:`list`): list of :obj:`Partition` tuples for each
|
||||
partition on the server
|
||||
calls_waiting (:obj:`int`): number of calls waiting for a thread
|
||||
idle_threads (:obj:`int`): number of currently idle threads
|
||||
volumes (:obj:`list`): list of :obj:`Volume` tuples for each
|
||||
volume present on the server
|
||||
table (:obj:`PrettyTable`): a printable PrettyTable representation
|
||||
'''
|
||||
|
||||
def _get_volumes(self):
|
||||
cmd = ["vos", "listvol", "-long", "-server", self.hostname]
|
||||
logging.debug("Running: %s" % cmd)
|
||||
output = subprocess.check_output(
|
||||
cmd, stderr=subprocess.STDOUT).decode('ascii')
|
||||
|
||||
# Read the output into chunks where each chunk is the info for
|
||||
# one volume.
|
||||
chunks = []
|
||||
lines = io.StringIO(output)
|
||||
while True:
|
||||
line = lines.readline()
|
||||
if not line:
|
||||
break
|
||||
chunk = ''
|
||||
if "On-line" in line: # chunks start with this
|
||||
chunk += line
|
||||
# read in the next 9 lines of status
|
||||
for i in range(8):
|
||||
chunk += lines.readline()
|
||||
# convert it to a Volume()
|
||||
# todo: there's a bunch more we could extract...
|
||||
m = re.search(
|
||||
'^(?P<volume>[^\s]+)\s+(?P<id>\d+)\s(?P<perms>R[OW])\s+(?P<used>\d+) K',
|
||||
chunk)
|
||||
q = re.search('MaxQuota\s+(?P<quota>\d+) K', chunk)
|
||||
used = int(m['used'])
|
||||
quota = int(q['quota'])
|
||||
percent_used = round(float(used) / float(quota) * 100, 2)
|
||||
self.volumes.append(Volume(
|
||||
m['volume'], m['id'], m['perms'], used, quota, percent_used))
|
||||
|
||||
|
||||
def _get_calls_waiting(self):
|
||||
cmd = ["rxdebug", self.hostname, "7000", "-rxstats", "-noconns"]
|
||||
logging.debug("Running: %s" % cmd)
|
||||
output = subprocess.check_output(
|
||||
cmd, stderr=subprocess.STDOUT).decode('ascii')
|
||||
|
||||
for line in output.split('\n'):
|
||||
m = re.search('(?P<waiting>\d+) calls waiting for a thread', line)
|
||||
if m:
|
||||
self.calls_waiting = int(m['waiting'])
|
||||
m = re.search('(?P<idle>\d+) threads are idle', line)
|
||||
if m:
|
||||
self.idle_threads = int(m['idle'])
|
||||
|
||||
def _get_partition_stats(self):
|
||||
cmd = ["vos", "partinfo", self.hostname, "-noauth"]
|
||||
logging.debug("Running: %s" % cmd)
|
||||
output = subprocess.check_output(
|
||||
cmd, stderr=subprocess.STDOUT).decode('ascii')
|
||||
|
||||
for line in output.split('\n'):
|
||||
m = re.search(
|
||||
'Free space on partition '
|
||||
'/vicep(?P<partition>[a-z][a-z]?): '
|
||||
'(?P<free>\d+) K blocks out of total (?P<total>\d+)', line)
|
||||
if m:
|
||||
part = 'vicep%s' % m['partition']
|
||||
# (used, free, total, %age)
|
||||
used = int(m['total']) - int(m['free'])
|
||||
self.partitions.append(
|
||||
Partition(part, used, int(m['free']), int(m['total']),
|
||||
round(float(used) / float(m['total']) * 100, 2)))
|
||||
|
||||
def _get_fs_stats(self):
|
||||
cmd = ["bos", "status", self.hostname, "-long", "-noauth"]
|
||||
logging.debug("Running: %s" % cmd)
|
||||
try:
|
||||
output = subprocess.check_output(
|
||||
cmd, stderr=subprocess.STDOUT).decode('ascii')
|
||||
except subprocess.CalledProcessError:
|
||||
logging.debug(" ... failed!")
|
||||
self.status = FileServerStatus.NO_CONNECTION
|
||||
return
|
||||
|
||||
if re.search('currently running normally', output):
|
||||
self.status = FileServerStatus.NORMAL
|
||||
m = re.search(
|
||||
r'last started at (?P<date>\w+ \w+ \w+ \d+:\d+:\d+ \d+)',
|
||||
output)
|
||||
self.restart = datetime.strptime(m['date'], '%a %b %d %H:%M:%S %Y')
|
||||
self.uptime = self.timestamp - self.restart
|
||||
|
||||
elif re.search('temporarily disabled, currently shutdown', output):
|
||||
self.status = FileServerStatus.TEMPORARILY_DISABLED
|
||||
elif re.search('disabled, currently shutdown', output):
|
||||
self.status = FileServerStatus.DISABLED
|
||||
else:
|
||||
logging.debug(output)
|
||||
self.status = FileServerStatus.UNKNOWN
|
||||
|
||||
def get_stats(self):
|
||||
'''Get the complete stats set for the fileserver'''
|
||||
self.timestamp = datetime.now()
|
||||
|
||||
self.restart = None
|
||||
self.uptime = None
|
||||
self.partitions = []
|
||||
self.volumes = []
|
||||
self.calls_waiting = None
|
||||
self.idle_threads = None
|
||||
|
||||
self._get_fs_stats()
|
||||
if self.status == FileServerStatus.NORMAL:
|
||||
self._get_partition_stats()
|
||||
self._get_calls_waiting()
|
||||
self._get_volumes()
|
||||
|
||||
self.table = PrettyTable()
|
||||
self.table.field_names = ["Metric", "Value"]
|
||||
self.table.align["Metric"] = "l"
|
||||
self.table.align["Value"] = "l"
|
||||
self.table.add_row(["Hostname", self.hostname])
|
||||
self.table.add_row(["Timestamp", self.timestamp])
|
||||
self.table.add_row(["Status", self.status])
|
||||
self.table.add_row(["Uptime", self.uptime])
|
||||
self.table.add_row(["Last Restart", self.restart])
|
||||
self.table.add_row(["Calls Waiting", self.calls_waiting])
|
||||
self.table.add_row(["Idle Threads", self.idle_threads])
|
||||
for p in self.partitions:
|
||||
n = "/%s" % p.partition
|
||||
self.table.add_row(["%s used" % n, p.used])
|
||||
self.table.add_row(["%s free" % n, p.free])
|
||||
self.table.add_row(["%s total" % n, p.total])
|
||||
self.table.add_row(["%s %%used" % n,
|
||||
"%s%%" % p.percent_used])
|
||||
for v in self.volumes:
|
||||
if v.perms == 'RW':
|
||||
n = v.volume
|
||||
self.table.add_row(["%s used" % n, v.used])
|
||||
self.table.add_row(["%s quota" % n, v.quota])
|
||||
self.table.add_row(["%s %%used" % n,
|
||||
"%s%%" % v.percent_used])
|
||||
|
||||
def __str__(self):
|
||||
return str(self.table)
|
||||
|
||||
def __init__(self, hostname):
|
||||
self.hostname = hostname
|
||||
|
||||
#
|
||||
# Volume
|
||||
#
|
||||
|
||||
def get_fs_addresses(cell):
|
||||
'''Get the fileservers associated with a cell'''
|
||||
fs = []
|
||||
cmd = ["vos", "listaddrs", "-noauth", "-cell", cell]
|
||||
logging.debug("Running: %s" % cmd)
|
||||
try:
|
||||
output = subprocess.check_output(
|
||||
cmd, stderr=subprocess.STDOUT).decode('ascii')
|
||||
except subprocess.CalledProcessError:
|
||||
logging.debug(" ... failed!")
|
||||
return []
|
||||
|
||||
for line in output.split('\n'):
|
||||
if line.strip():
|
||||
fs.append(line)
|
||||
|
||||
return fs
|
||||
|
||||
def get_volumes(cell):
|
||||
'''Get the volumes in a cell'''
|
||||
volumes = []
|
||||
cmd = ["vos", "listvldb", "-quiet", "-noauth",
|
||||
"-noresolve", "-nosort", "-cell", cell]
|
||||
logging.debug("Running: %s" % cmd)
|
||||
try:
|
||||
output = subprocess.check_output(
|
||||
cmd, stderr=subprocess.STDOUT).decode('ascii')
|
||||
except subprocess.CalledProcessError:
|
||||
logging.debug(" ... failed!")
|
||||
return []
|
||||
|
||||
# details about the volumes are inset, so just look for non-blank lines
|
||||
for line in output.split('\n'):
|
||||
if line and not line.startswith(' '):
|
||||
volumes.append(line.strip())
|
||||
|
||||
return volumes
|
||||
|
||||
|
||||
def main(args=None):
|
||||
global config
|
||||
|
||||
if args is None:
|
||||
args = sys.argv[1:]
|
||||
@ -23,6 +258,31 @@ def main(args=None):
|
||||
parser = argparse.ArgumentParser(
|
||||
description='An AFS monitoring tool')
|
||||
|
||||
opts = parser.parse_args(args)
|
||||
parser.add_argument("config", help="Path to config file")
|
||||
parser.add_argument("-d", '--debug', action="store_true")
|
||||
|
||||
args = parser.parse_args(args)
|
||||
|
||||
if args.debug:
|
||||
logging.basicConfig(level=logging.DEBUG)
|
||||
logging.debug("Debugging enabled")
|
||||
|
||||
config = configparser.RawConfigParser()
|
||||
config.read(args.config)
|
||||
|
||||
cell = config.get('main', 'cell').strip()
|
||||
|
||||
# volumes = get_volumes(cell)
|
||||
# logging.debug(volumes)
|
||||
|
||||
fileservers = get_fs_addresses(cell)
|
||||
print(fileservers)
|
||||
|
||||
for fileserver in fileservers:
|
||||
logging.debug("Finding stats for: %s" % fileserver)
|
||||
|
||||
fs = FileServerStats(fileserver)
|
||||
fs.get_stats()
|
||||
print(fs)
|
||||
|
||||
sys.exit(0)
|
||||
|
@ -4,3 +4,4 @@
|
||||
|
||||
pbr!=2.1.0,>=2.0.0 # Apache-2.0
|
||||
Babel!=2.4.0,>=2.3.4 # BSD
|
||||
PrettyTable<0.8 # BSD
|
||||
|
4
sample.cfg
Normal file
4
sample.cfg
Normal file
@ -0,0 +1,4 @@
|
||||
[main]
|
||||
cell = openstack.org
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user