a0e09c0da4
This commit adds some basic functionality to rpc_maas_tool.py to nuke old alarms that should no longer exist. These include alarms containing: WSREP_CLUSTER_SIZE WSREP_LOCAL_STATE_COMMENT disk_free_alarm mem_alarm We limit deletion against alarms that also include rabbit_mq_container or galera_container. We also delete any checks that were created without filesystem patch included. Note that remove-defunct-checks should not be run against an icehouse installation as the change to check name was not backported there. Closes-Bug: #1416476 Change-Id: I3528f5712d5a2d6a15e718516f0dab9ea2c68c27
170 lines
5.3 KiB
Python
Executable File
170 lines
5.3 KiB
Python
Executable File
#!/usr/bin/env python
|
|
|
|
from rackspace_monitoring.drivers.rackspace import RackspaceMonitoringValidationError
|
|
from rackspace_monitoring.providers import get_driver
|
|
from rackspace_monitoring.types import Provider
|
|
|
|
import ConfigParser
|
|
import argparse
|
|
import re
|
|
import sys
|
|
|
|
|
|
def main(args):
|
|
config = ConfigParser.RawConfigParser()
|
|
config.read('/root/.raxrc')
|
|
|
|
driver = get_driver(Provider.RACKSPACE)
|
|
conn = _get_conn(config, driver)
|
|
|
|
if conn is None:
|
|
print("Unable to get a client to MaaS, exiting")
|
|
sys.exit(1)
|
|
|
|
if args.command == 'alarms':
|
|
alarms(args, conn)
|
|
elif args.command == 'check':
|
|
check(args, conn)
|
|
elif args.command == 'delete':
|
|
delete(args, conn)
|
|
elif args.command == 'remove-defunct-checks':
|
|
remove_defunct_checks(args, conn)
|
|
elif args.command == 'remove-defunct-alarms':
|
|
remove_defunct_alarms(args, conn)
|
|
|
|
|
|
def alarms(args, conn):
|
|
for entity in _get_entities(args, conn):
|
|
alarms = conn.list_alarms(entity)
|
|
if alarms:
|
|
print('Entity %s (%s):' % (entity.id, entity.label))
|
|
for alarm in alarms:
|
|
print ' - %s' % alarm.label
|
|
|
|
|
|
def check(args, conn):
|
|
for entity in _get_entities(args, conn):
|
|
error = 0
|
|
for check in conn.list_checks(entity):
|
|
try:
|
|
result = conn.test_existing_check(check)
|
|
except RackspaceMonitoringValidationError as e:
|
|
print('Entity %s (%s):' % (entity.id, entity.label))
|
|
print(' - %s' % e)
|
|
break
|
|
|
|
available = result[0]['available']
|
|
status = result[0]['status']
|
|
|
|
if available is False or status != 'okay':
|
|
if error == 0:
|
|
print('Entity %s (%s):' % (entity.id, entity.label))
|
|
error = 1
|
|
if available is False:
|
|
print(' - Check %s (%s) did not run correctly' %
|
|
(check.id, check.label))
|
|
elif status != 'okay':
|
|
print(" - Check %s (%s) ran correctly but returned a "
|
|
"'%s' status" % (check.id, check.label, status))
|
|
|
|
|
|
def delete(args, conn):
|
|
count = 0
|
|
|
|
if args.force is False:
|
|
print "*** Proceeding WILL delete ALL your checks (and data) ****"
|
|
if raw_input("Type 'from orbit' to continue: ") != 'from orbit':
|
|
return
|
|
|
|
for entity in _get_entities(args, conn):
|
|
error = 0
|
|
for check in conn.list_checks(entity):
|
|
conn.delete_check(check)
|
|
count += 1
|
|
|
|
print "Number of checks deleted: %s" % count
|
|
|
|
|
|
def remove_defunct_checks(args, conn):
|
|
check_count = 0
|
|
|
|
for entity in _get_entities(args, conn):
|
|
for check in conn.list_checks(entity):
|
|
if re.match('filesystem--.*', check.label):
|
|
conn.delete_check(check)
|
|
check_count += 1
|
|
|
|
print "Number of checks deleted: %s" % check_count
|
|
|
|
|
|
def remove_defunct_alarms(args, conn):
|
|
alarm_count = 0
|
|
defunct_alarms = {'rabbit_mq_container': ['disk_free_alarm', 'mem_alarm'],
|
|
'galera_container': ['WSREP_CLUSTER_SIZE',
|
|
'WSREP_LOCAL_STATE_COMMENT']}
|
|
|
|
for entity in _get_entities(args, conn):
|
|
for alarm in conn.list_alarms(entity):
|
|
for container in defunct_alarms:
|
|
for defunct_alarm in defunct_alarms[container]:
|
|
if re.match('%s--.*%s' % (defunct_alarm, container), alarm.label):
|
|
conn.delete_alarm(alarm)
|
|
alarm_count += 1
|
|
|
|
print "Number of alarms deleted: %s" % alarm_count
|
|
|
|
|
|
def _get_conn(config, driver):
|
|
conn = None
|
|
|
|
if config.has_section('credentials'):
|
|
try:
|
|
user = config.get('credentials', 'username')
|
|
api_key = config.get('credentials', 'api_key')
|
|
except Exception as e:
|
|
print e
|
|
else:
|
|
conn = driver(user, api_key)
|
|
if not conn and config.has_section('api'):
|
|
try:
|
|
url = config.get('api', 'url')
|
|
token = config.get('api', 'token')
|
|
except Exception as e:
|
|
print e
|
|
else:
|
|
conn = driver(None, None, ex_force_base_url=url,
|
|
ex_force_auth_token=token)
|
|
|
|
return conn
|
|
|
|
|
|
def _get_entities(args, conn):
|
|
entities = []
|
|
|
|
for entity in conn.list_entities():
|
|
if args.prefix is None or args.prefix in entity.label:
|
|
entities.append(entity)
|
|
|
|
return entities
|
|
|
|
|
|
if __name__ == "__main__":
|
|
parser = argparse.ArgumentParser(description='Test MaaS checks')
|
|
parser.add_argument('command',
|
|
type=str,
|
|
choices=['alarms', 'check', 'delete',
|
|
'remove-defunct-checks',
|
|
'remove-defunct-alarms'],
|
|
help='Command to execute')
|
|
parser.add_argument('--force',
|
|
action="store_true",
|
|
help='Do stuff irrespective of consequence'),
|
|
parser.add_argument('--prefix',
|
|
type=str,
|
|
help='Limit testing to checks on entities labelled w/ '
|
|
'this prefix',
|
|
default=None)
|
|
args = parser.parse_args()
|
|
|
|
main(args)
|