Change smoke tests to work with Alarmed Metric changes

Use Alarm Definitions and wait for Threshold Engine to create the
Alarm. Improved the purpose comment

Change-Id: Id6635197cbb2960540e510dd82b85633c8a2afba
This commit is contained in:
Craig Bryant 2014-09-30 11:34:45 -06:00
parent 22e0a1e48e
commit 514f542e4d
2 changed files with 67 additions and 36 deletions

View File

@ -16,15 +16,15 @@ def find_obj_for_name(object_json, name):
return None return None
def find_alarm_by_name(alarm_name): def find_alarm_definition_by_name(name):
alarm_json = run_mon_cli(['alarm-list']) alarm_json = run_mon_cli(['alarm-definition-list'])
return find_obj_for_name(alarm_json, alarm_name) return find_obj_for_name(alarm_json, name)
def delete_alarm_if_exists(alarm_name): def delete_alarm_definition_if_exists(name):
alarm_json = find_alarm_by_name(alarm_name) alarm_json = find_alarm_definition_by_name(name)
if alarm_json: if alarm_json:
run_mon_cli(['alarm-delete', alarm_json['id']], useJson=False) run_mon_cli(['alarm-definition-delete', alarm_json['id']], useJson=False)
def delete_notification_if_exists(notification_name): def delete_notification_if_exists(notification_name):
@ -64,24 +64,24 @@ def get_alarm_state(alarm_id):
return result_json['state'] return result_json['state']
def patch_alarm(alarm_id, what, value):
result_json = run_mon_cli(['alarm-patch', what, value, alarm_id])
return result_json
def change_alarm_state(alarm_id, new_state): def change_alarm_state(alarm_id, new_state):
print('Changing Alarm state to %s' % new_state) print('Changing Alarm state to %s' % new_state)
result_json = patch_alarm(alarm_id, '--state', new_state) result_json = run_mon_cli(['alarm-patch', alarm_id, new_state])
if result_json['state'] != new_state: if result_json['state'] != new_state:
print('Alarm patch failed, expected state of %s but was %s' % print('Alarm patch failed, expected state of %s but was %s' %
(result_json['state'], new_state), file=sys.stderr) (result_json['state'], new_state), file=sys.stderr)
return 1 return 1
def create_alarm(name, expression, description=None, ok_notif_id=None, def find_alarms_for_definition(alarm_definition_id):
alarm_notif_id=None, result_json = run_mon_cli(['alarm-list', "--alarm-definition", alarm_definition_id])
undetermined_notif_id=None): return [alarm['id'] for alarm in result_json]
args = ['alarm-create']
def create_alarm_definition(name, expression, description=None, ok_notif_id=None,
alarm_notif_id=None,
undetermined_notif_id=None):
args = ['alarm-definition-create']
add_argument_if_given(args, '--description', description) add_argument_if_given(args, '--description', description)
add_argument_if_given(args, '--alarm-actions', alarm_notif_id) add_argument_if_given(args, '--alarm-actions', alarm_notif_id)
add_argument_if_given(args, '--ok-actions', ok_notif_id) add_argument_if_given(args, '--ok-actions', ok_notif_id)
@ -89,15 +89,14 @@ def create_alarm(name, expression, description=None, ok_notif_id=None,
undetermined_notif_id) undetermined_notif_id)
args.append(name) args.append(name)
args.append(expression) args.append(expression)
print('Creating alarm') print('Creating alarm definition')
result_json = run_mon_cli(args) result_json = run_mon_cli(args)
# Parse out id # Parse out id
alarm_id = result_json['id'] return result_json['id']
return alarm_id
def add_argument_if_given(args, arg, value): def add_argument_if_given(args, arg, value):
if value is not None: if value is not None:
args.append(arg) args.append(arg)
args.append(value) args.append(value)

View File

@ -1,16 +1,25 @@
#!/usr/bin/env python #!/usr/bin/env python
# #
"""smoke """smoke
Runs a smoke test of the monitoring installation on mini-mon by ensuring Runs a smoke test of the monitoring installation on mini-mon to ensure
metrics are flowing and creating a new notification, alarm and that the the components (other than the UI) are functioning. The code tests these
Threshold Engine changes the state of the alarm. This requires the mon components:
CLI and must be run on either the mini-mon VM for the single VM mode or 1. Agent - ensures metrics are being sent to API
2. API - ensures alarm definitions can created, listed, etc. Ensure
metrics and alarms can be queried
3. CLI - used to talk to the API
4. Persister - ensures metrics and alarm history has been persisted
in database because API can query them
5. Threshold Engine - ensures alarms are created and change state
6. Notification Engine - ensures email notifications are sent to the
local system
This must be run on either the mini-mon VM for the single VM mode or
on the kafka VM in the multi VM mode. on the kafka VM in the multi VM mode.
Get it by following the instructions on
https://wiki.hpcloud.net/display/iaas/Monitoring+CLI.
TODO: TODO:
1. Add check of notification history when that is implemented 1. Add more logic to give ideas of why a particular step failed, for
example, alarm did not get created because metrics weren't being
received
""" """
from __future__ import print_function from __future__ import print_function
@ -38,8 +47,8 @@ def get_metrics(name, dimensions, since):
dimensions_arg, name, since]) dimensions_arg, name, since])
def cleanup(notification_name, alarm_name): def cleanup(notification_name, alarm_definition_name):
cli_wrapper.delete_alarm_if_exists(alarm_name) cli_wrapper.delete_alarm_definition_if_exists(alarm_definition_name)
cli_wrapper.delete_notification_if_exists(notification_name) cli_wrapper.delete_notification_if_exists(notification_name)
@ -96,6 +105,23 @@ def ensure_at_least(actual, desired):
time.sleep(desired - actual) time.sleep(desired - actual)
def wait_for_alarm_creation(alarm_definition_id):
print('Waiting for alarm to be created for Alarm Definition %s' % alarm_definition_id)
for x in range(0, 30):
time.sleep(1)
alarms = cli_wrapper.find_alarms_for_definition(alarm_definition_id)
if len(alarms) == 1:
print('Alarm was created in %d seconds' % x)
return alarms[0]
elif len(alarms) > 1:
print('%d Alarms were created. Only expected 1' % len(alarms),
file=sys.stderr)
return None
print('Alarm was not created for Alarm Definition %s in %d seconds' % (alarm_definition_id, x),
file=sys.stderr)
return None
def main(): def main():
if not utils.ensure_has_notification_engine(): if not utils.ensure_has_notification_engine():
return 1 return 1
@ -107,10 +133,10 @@ def main():
notification_name = 'Monasca Smoke Test' notification_name = 'Monasca Smoke Test'
notification_email_addr = 'root@' + mail_host notification_email_addr = 'root@' + mail_host
alarm_name = 'high cpu and load' alarm_definition_name = 'high cpu and load'
metric_name = 'cpu.load_avg_1_min' metric_name = 'cpu.load_avg_1_min'
metric_dimensions = {'hostname': metric_host} metric_dimensions = {'hostname': metric_host}
cleanup(notification_name, alarm_name) cleanup(notification_name, alarm_definition_name)
# Query how many metrics there are for the Alarm # Query how many metrics there are for the Alarm
hour_ago = datetime.datetime.now() - datetime.timedelta(hours=1) hour_ago = datetime.datetime.now() - datetime.timedelta(hours=1)
@ -131,11 +157,17 @@ def main():
'max(cpu.load_avg_1_min{hostname=' + metric_host + '}) > 0' 'max(cpu.load_avg_1_min{hostname=' + metric_host + '}) > 0'
description = 'System CPU Utilization exceeds 1% and ' + \ description = 'System CPU Utilization exceeds 1% and ' + \
'Load exceeds 3 per measurement period' 'Load exceeds 3 per measurement period'
alarm_id = cli_wrapper.create_alarm(alarm_name, expression, alarm_definition_id = cli_wrapper.create_alarm_definition(alarm_definition_name, expression,
description=description, description=description,
ok_notif_id=notification_id, ok_notif_id=notification_id,
alarm_notif_id=notification_id, alarm_notif_id=notification_id,
undetermined_notif_id=notification_id) undetermined_notif_id=notification_id)
# Wait for an alarm to be created
alarm_id = wait_for_alarm_creation(alarm_definition_id)
if alarm_id is None:
return 1
# Ensure it is created in the right state # Ensure it is created in the right state
initial_state = 'UNDETERMINED' initial_state = 'UNDETERMINED'
if not utils.check_alarm_state(alarm_id, initial_state): if not utils.check_alarm_state(alarm_id, initial_state):