Change smoke tests to work with Alarmed Metric changes
Use Alarm Definitions and wait for Threshold Engine to create the Alarm. Improved the purpose comment Change-Id: Id6635197cbb2960540e510dd82b85633c8a2afba
This commit is contained in:
parent
22e0a1e48e
commit
514f542e4d
@ -16,15 +16,15 @@ def find_obj_for_name(object_json, name):
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
def find_alarm_by_name(alarm_name):
|
def find_alarm_definition_by_name(name):
|
||||||
alarm_json = run_mon_cli(['alarm-list'])
|
alarm_json = run_mon_cli(['alarm-definition-list'])
|
||||||
return find_obj_for_name(alarm_json, alarm_name)
|
return find_obj_for_name(alarm_json, name)
|
||||||
|
|
||||||
|
|
||||||
def delete_alarm_if_exists(alarm_name):
|
def delete_alarm_definition_if_exists(name):
|
||||||
alarm_json = find_alarm_by_name(alarm_name)
|
alarm_json = find_alarm_definition_by_name(name)
|
||||||
if alarm_json:
|
if alarm_json:
|
||||||
run_mon_cli(['alarm-delete', alarm_json['id']], useJson=False)
|
run_mon_cli(['alarm-definition-delete', alarm_json['id']], useJson=False)
|
||||||
|
|
||||||
|
|
||||||
def delete_notification_if_exists(notification_name):
|
def delete_notification_if_exists(notification_name):
|
||||||
@ -64,24 +64,24 @@ def get_alarm_state(alarm_id):
|
|||||||
return result_json['state']
|
return result_json['state']
|
||||||
|
|
||||||
|
|
||||||
def patch_alarm(alarm_id, what, value):
|
|
||||||
result_json = run_mon_cli(['alarm-patch', what, value, alarm_id])
|
|
||||||
return result_json
|
|
||||||
|
|
||||||
|
|
||||||
def change_alarm_state(alarm_id, new_state):
|
def change_alarm_state(alarm_id, new_state):
|
||||||
print('Changing Alarm state to %s' % new_state)
|
print('Changing Alarm state to %s' % new_state)
|
||||||
result_json = patch_alarm(alarm_id, '--state', new_state)
|
result_json = run_mon_cli(['alarm-patch', alarm_id, new_state])
|
||||||
if result_json['state'] != new_state:
|
if result_json['state'] != new_state:
|
||||||
print('Alarm patch failed, expected state of %s but was %s' %
|
print('Alarm patch failed, expected state of %s but was %s' %
|
||||||
(result_json['state'], new_state), file=sys.stderr)
|
(result_json['state'], new_state), file=sys.stderr)
|
||||||
return 1
|
return 1
|
||||||
|
|
||||||
|
|
||||||
def create_alarm(name, expression, description=None, ok_notif_id=None,
|
def find_alarms_for_definition(alarm_definition_id):
|
||||||
alarm_notif_id=None,
|
result_json = run_mon_cli(['alarm-list', "--alarm-definition", alarm_definition_id])
|
||||||
undetermined_notif_id=None):
|
return [alarm['id'] for alarm in result_json]
|
||||||
args = ['alarm-create']
|
|
||||||
|
|
||||||
|
def create_alarm_definition(name, expression, description=None, ok_notif_id=None,
|
||||||
|
alarm_notif_id=None,
|
||||||
|
undetermined_notif_id=None):
|
||||||
|
args = ['alarm-definition-create']
|
||||||
add_argument_if_given(args, '--description', description)
|
add_argument_if_given(args, '--description', description)
|
||||||
add_argument_if_given(args, '--alarm-actions', alarm_notif_id)
|
add_argument_if_given(args, '--alarm-actions', alarm_notif_id)
|
||||||
add_argument_if_given(args, '--ok-actions', ok_notif_id)
|
add_argument_if_given(args, '--ok-actions', ok_notif_id)
|
||||||
@ -89,15 +89,14 @@ def create_alarm(name, expression, description=None, ok_notif_id=None,
|
|||||||
undetermined_notif_id)
|
undetermined_notif_id)
|
||||||
args.append(name)
|
args.append(name)
|
||||||
args.append(expression)
|
args.append(expression)
|
||||||
print('Creating alarm')
|
print('Creating alarm definition')
|
||||||
result_json = run_mon_cli(args)
|
result_json = run_mon_cli(args)
|
||||||
|
|
||||||
# Parse out id
|
# Parse out id
|
||||||
alarm_id = result_json['id']
|
return result_json['id']
|
||||||
return alarm_id
|
|
||||||
|
|
||||||
|
|
||||||
def add_argument_if_given(args, arg, value):
|
def add_argument_if_given(args, arg, value):
|
||||||
if value is not None:
|
if value is not None:
|
||||||
args.append(arg)
|
args.append(arg)
|
||||||
args.append(value)
|
args.append(value)
|
||||||
|
@ -1,16 +1,25 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
#
|
#
|
||||||
"""smoke
|
"""smoke
|
||||||
Runs a smoke test of the monitoring installation on mini-mon by ensuring
|
Runs a smoke test of the monitoring installation on mini-mon to ensure
|
||||||
metrics are flowing and creating a new notification, alarm and that the
|
the components (other than the UI) are functioning. The code tests these
|
||||||
Threshold Engine changes the state of the alarm. This requires the mon
|
components:
|
||||||
CLI and must be run on either the mini-mon VM for the single VM mode or
|
1. Agent - ensures metrics are being sent to API
|
||||||
|
2. API - ensures alarm definitions can created, listed, etc. Ensure
|
||||||
|
metrics and alarms can be queried
|
||||||
|
3. CLI - used to talk to the API
|
||||||
|
4. Persister - ensures metrics and alarm history has been persisted
|
||||||
|
in database because API can query them
|
||||||
|
5. Threshold Engine - ensures alarms are created and change state
|
||||||
|
6. Notification Engine - ensures email notifications are sent to the
|
||||||
|
local system
|
||||||
|
This must be run on either the mini-mon VM for the single VM mode or
|
||||||
on the kafka VM in the multi VM mode.
|
on the kafka VM in the multi VM mode.
|
||||||
Get it by following the instructions on
|
|
||||||
https://wiki.hpcloud.net/display/iaas/Monitoring+CLI.
|
|
||||||
|
|
||||||
TODO:
|
TODO:
|
||||||
1. Add check of notification history when that is implemented
|
1. Add more logic to give ideas of why a particular step failed, for
|
||||||
|
example, alarm did not get created because metrics weren't being
|
||||||
|
received
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from __future__ import print_function
|
from __future__ import print_function
|
||||||
@ -38,8 +47,8 @@ def get_metrics(name, dimensions, since):
|
|||||||
dimensions_arg, name, since])
|
dimensions_arg, name, since])
|
||||||
|
|
||||||
|
|
||||||
def cleanup(notification_name, alarm_name):
|
def cleanup(notification_name, alarm_definition_name):
|
||||||
cli_wrapper.delete_alarm_if_exists(alarm_name)
|
cli_wrapper.delete_alarm_definition_if_exists(alarm_definition_name)
|
||||||
cli_wrapper.delete_notification_if_exists(notification_name)
|
cli_wrapper.delete_notification_if_exists(notification_name)
|
||||||
|
|
||||||
|
|
||||||
@ -96,6 +105,23 @@ def ensure_at_least(actual, desired):
|
|||||||
time.sleep(desired - actual)
|
time.sleep(desired - actual)
|
||||||
|
|
||||||
|
|
||||||
|
def wait_for_alarm_creation(alarm_definition_id):
|
||||||
|
print('Waiting for alarm to be created for Alarm Definition %s' % alarm_definition_id)
|
||||||
|
for x in range(0, 30):
|
||||||
|
time.sleep(1)
|
||||||
|
alarms = cli_wrapper.find_alarms_for_definition(alarm_definition_id)
|
||||||
|
if len(alarms) == 1:
|
||||||
|
print('Alarm was created in %d seconds' % x)
|
||||||
|
return alarms[0]
|
||||||
|
elif len(alarms) > 1:
|
||||||
|
print('%d Alarms were created. Only expected 1' % len(alarms),
|
||||||
|
file=sys.stderr)
|
||||||
|
return None
|
||||||
|
print('Alarm was not created for Alarm Definition %s in %d seconds' % (alarm_definition_id, x),
|
||||||
|
file=sys.stderr)
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
if not utils.ensure_has_notification_engine():
|
if not utils.ensure_has_notification_engine():
|
||||||
return 1
|
return 1
|
||||||
@ -107,10 +133,10 @@ def main():
|
|||||||
|
|
||||||
notification_name = 'Monasca Smoke Test'
|
notification_name = 'Monasca Smoke Test'
|
||||||
notification_email_addr = 'root@' + mail_host
|
notification_email_addr = 'root@' + mail_host
|
||||||
alarm_name = 'high cpu and load'
|
alarm_definition_name = 'high cpu and load'
|
||||||
metric_name = 'cpu.load_avg_1_min'
|
metric_name = 'cpu.load_avg_1_min'
|
||||||
metric_dimensions = {'hostname': metric_host}
|
metric_dimensions = {'hostname': metric_host}
|
||||||
cleanup(notification_name, alarm_name)
|
cleanup(notification_name, alarm_definition_name)
|
||||||
|
|
||||||
# Query how many metrics there are for the Alarm
|
# Query how many metrics there are for the Alarm
|
||||||
hour_ago = datetime.datetime.now() - datetime.timedelta(hours=1)
|
hour_ago = datetime.datetime.now() - datetime.timedelta(hours=1)
|
||||||
@ -131,11 +157,17 @@ def main():
|
|||||||
'max(cpu.load_avg_1_min{hostname=' + metric_host + '}) > 0'
|
'max(cpu.load_avg_1_min{hostname=' + metric_host + '}) > 0'
|
||||||
description = 'System CPU Utilization exceeds 1% and ' + \
|
description = 'System CPU Utilization exceeds 1% and ' + \
|
||||||
'Load exceeds 3 per measurement period'
|
'Load exceeds 3 per measurement period'
|
||||||
alarm_id = cli_wrapper.create_alarm(alarm_name, expression,
|
alarm_definition_id = cli_wrapper.create_alarm_definition(alarm_definition_name, expression,
|
||||||
description=description,
|
description=description,
|
||||||
ok_notif_id=notification_id,
|
ok_notif_id=notification_id,
|
||||||
alarm_notif_id=notification_id,
|
alarm_notif_id=notification_id,
|
||||||
undetermined_notif_id=notification_id)
|
undetermined_notif_id=notification_id)
|
||||||
|
|
||||||
|
# Wait for an alarm to be created
|
||||||
|
alarm_id = wait_for_alarm_creation(alarm_definition_id)
|
||||||
|
if alarm_id is None:
|
||||||
|
return 1
|
||||||
|
|
||||||
# Ensure it is created in the right state
|
# Ensure it is created in the right state
|
||||||
initial_state = 'UNDETERMINED'
|
initial_state = 'UNDETERMINED'
|
||||||
if not utils.check_alarm_state(alarm_id, initial_state):
|
if not utils.check_alarm_state(alarm_id, initial_state):
|
||||||
|
Loading…
Reference in New Issue
Block a user