From 514f542e4d31478d4294c0d6e13777a9ff56cf7c Mon Sep 17 00:00:00 2001 From: Craig Bryant Date: Tue, 30 Sep 2014 11:34:45 -0600 Subject: [PATCH] Change smoke tests to work with Alarmed Metric changes Use Alarm Definitions and wait for Threshold Engine to create the Alarm. Improved the purpose comment Change-Id: Id6635197cbb2960540e510dd82b85633c8a2afba --- tests/cli_wrapper.py | 39 +++++++++++++-------------- tests/smoke.py | 64 +++++++++++++++++++++++++++++++++----------- 2 files changed, 67 insertions(+), 36 deletions(-) diff --git a/tests/cli_wrapper.py b/tests/cli_wrapper.py index 98bffeb..d5f4a2f 100644 --- a/tests/cli_wrapper.py +++ b/tests/cli_wrapper.py @@ -16,15 +16,15 @@ def find_obj_for_name(object_json, name): return None -def find_alarm_by_name(alarm_name): - alarm_json = run_mon_cli(['alarm-list']) - return find_obj_for_name(alarm_json, alarm_name) +def find_alarm_definition_by_name(name): + alarm_json = run_mon_cli(['alarm-definition-list']) + return find_obj_for_name(alarm_json, name) -def delete_alarm_if_exists(alarm_name): - alarm_json = find_alarm_by_name(alarm_name) +def delete_alarm_definition_if_exists(name): + alarm_json = find_alarm_definition_by_name(name) if alarm_json: - run_mon_cli(['alarm-delete', alarm_json['id']], useJson=False) + run_mon_cli(['alarm-definition-delete', alarm_json['id']], useJson=False) def delete_notification_if_exists(notification_name): @@ -64,24 +64,24 @@ def get_alarm_state(alarm_id): return result_json['state'] -def patch_alarm(alarm_id, what, value): - result_json = run_mon_cli(['alarm-patch', what, value, alarm_id]) - return result_json - - def change_alarm_state(alarm_id, new_state): print('Changing Alarm state to %s' % new_state) - result_json = patch_alarm(alarm_id, '--state', new_state) + result_json = run_mon_cli(['alarm-patch', alarm_id, new_state]) if result_json['state'] != new_state: print('Alarm patch failed, expected state of %s but was %s' % (result_json['state'], new_state), file=sys.stderr) return 1 -def create_alarm(name, expression, description=None, ok_notif_id=None, - alarm_notif_id=None, - undetermined_notif_id=None): - args = ['alarm-create'] +def find_alarms_for_definition(alarm_definition_id): + result_json = run_mon_cli(['alarm-list', "--alarm-definition", alarm_definition_id]) + return [alarm['id'] for alarm in result_json] + + +def create_alarm_definition(name, expression, description=None, ok_notif_id=None, + alarm_notif_id=None, + undetermined_notif_id=None): + args = ['alarm-definition-create'] add_argument_if_given(args, '--description', description) add_argument_if_given(args, '--alarm-actions', alarm_notif_id) add_argument_if_given(args, '--ok-actions', ok_notif_id) @@ -89,15 +89,14 @@ def create_alarm(name, expression, description=None, ok_notif_id=None, undetermined_notif_id) args.append(name) args.append(expression) - print('Creating alarm') + print('Creating alarm definition') result_json = run_mon_cli(args) # Parse out id - alarm_id = result_json['id'] - return alarm_id + return result_json['id'] def add_argument_if_given(args, arg, value): if value is not None: args.append(arg) - args.append(value) \ No newline at end of file + args.append(value) diff --git a/tests/smoke.py b/tests/smoke.py index 0e64da8..a04afb2 100755 --- a/tests/smoke.py +++ b/tests/smoke.py @@ -1,16 +1,25 @@ #!/usr/bin/env python # """smoke - Runs a smoke test of the monitoring installation on mini-mon by ensuring - metrics are flowing and creating a new notification, alarm and that the - Threshold Engine changes the state of the alarm. This requires the mon - CLI and must be run on either the mini-mon VM for the single VM mode or + Runs a smoke test of the monitoring installation on mini-mon to ensure + the components (other than the UI) are functioning. The code tests these + components: + 1. Agent - ensures metrics are being sent to API + 2. API - ensures alarm definitions can created, listed, etc. Ensure + metrics and alarms can be queried + 3. CLI - used to talk to the API + 4. Persister - ensures metrics and alarm history has been persisted + in database because API can query them + 5. Threshold Engine - ensures alarms are created and change state + 6. Notification Engine - ensures email notifications are sent to the + local system + This must be run on either the mini-mon VM for the single VM mode or on the kafka VM in the multi VM mode. - Get it by following the instructions on - https://wiki.hpcloud.net/display/iaas/Monitoring+CLI. TODO: - 1. Add check of notification history when that is implemented + 1. Add more logic to give ideas of why a particular step failed, for + example, alarm did not get created because metrics weren't being + received """ from __future__ import print_function @@ -38,8 +47,8 @@ def get_metrics(name, dimensions, since): dimensions_arg, name, since]) -def cleanup(notification_name, alarm_name): - cli_wrapper.delete_alarm_if_exists(alarm_name) +def cleanup(notification_name, alarm_definition_name): + cli_wrapper.delete_alarm_definition_if_exists(alarm_definition_name) cli_wrapper.delete_notification_if_exists(notification_name) @@ -96,6 +105,23 @@ def ensure_at_least(actual, desired): time.sleep(desired - actual) +def wait_for_alarm_creation(alarm_definition_id): + print('Waiting for alarm to be created for Alarm Definition %s' % alarm_definition_id) + for x in range(0, 30): + time.sleep(1) + alarms = cli_wrapper.find_alarms_for_definition(alarm_definition_id) + if len(alarms) == 1: + print('Alarm was created in %d seconds' % x) + return alarms[0] + elif len(alarms) > 1: + print('%d Alarms were created. Only expected 1' % len(alarms), + file=sys.stderr) + return None + print('Alarm was not created for Alarm Definition %s in %d seconds' % (alarm_definition_id, x), + file=sys.stderr) + return None + + def main(): if not utils.ensure_has_notification_engine(): return 1 @@ -107,10 +133,10 @@ def main(): notification_name = 'Monasca Smoke Test' notification_email_addr = 'root@' + mail_host - alarm_name = 'high cpu and load' + alarm_definition_name = 'high cpu and load' metric_name = 'cpu.load_avg_1_min' metric_dimensions = {'hostname': metric_host} - cleanup(notification_name, alarm_name) + cleanup(notification_name, alarm_definition_name) # Query how many metrics there are for the Alarm hour_ago = datetime.datetime.now() - datetime.timedelta(hours=1) @@ -131,11 +157,17 @@ def main(): 'max(cpu.load_avg_1_min{hostname=' + metric_host + '}) > 0' description = 'System CPU Utilization exceeds 1% and ' + \ 'Load exceeds 3 per measurement period' - alarm_id = cli_wrapper.create_alarm(alarm_name, expression, - description=description, - ok_notif_id=notification_id, - alarm_notif_id=notification_id, - undetermined_notif_id=notification_id) + alarm_definition_id = cli_wrapper.create_alarm_definition(alarm_definition_name, expression, + description=description, + ok_notif_id=notification_id, + alarm_notif_id=notification_id, + undetermined_notif_id=notification_id) + + # Wait for an alarm to be created + alarm_id = wait_for_alarm_creation(alarm_definition_id) + if alarm_id is None: + return 1 + # Ensure it is created in the right state initial_state = 'UNDETERMINED' if not utils.check_alarm_state(alarm_id, initial_state):