Merge branch 'master' of github.com:hpcloud-mon/mon-vagrant
This commit is contained in:
commit
25e15e67fc
98
tests/measurement_test.py
Normal file
98
tests/measurement_test.py
Normal file
@ -0,0 +1,98 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
#
|
||||||
|
"""measurements
|
||||||
|
"""
|
||||||
|
from __future__ import print_function
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
import pytz
|
||||||
|
from datetime import datetime
|
||||||
|
from monclient import client
|
||||||
|
import monclient.exc as exc
|
||||||
|
|
||||||
|
mon_client = None
|
||||||
|
|
||||||
|
def call_mon_api(method, fields):
|
||||||
|
|
||||||
|
try:
|
||||||
|
resp = method(**fields)
|
||||||
|
except exc.HTTPException as he:
|
||||||
|
print(he.code)
|
||||||
|
print(he.message)
|
||||||
|
sys.exit(1)
|
||||||
|
else:
|
||||||
|
return resp
|
||||||
|
|
||||||
|
|
||||||
|
def create_timestamp(seconds):
|
||||||
|
return pytz.utc.localize(datetime.utcfromtimestamp(seconds)).strftime("%Y-%m-%dT%H:%M:%S%z")
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
if len(sys.argv) == 1:
|
||||||
|
print('usage: %s metric_name count' % sys.argv[0], file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
api_version = '2_0'
|
||||||
|
endpoint = 'http://192.168.10.4:8080/v2.0'
|
||||||
|
kwargs = {
|
||||||
|
'token': '82510970543135'
|
||||||
|
}
|
||||||
|
mon_client = client.Client(api_version, endpoint, **kwargs)
|
||||||
|
|
||||||
|
metric_start_time = time.time()
|
||||||
|
metric_name = sys.argv[1]
|
||||||
|
num_metrics_to_send = int(sys.argv[2])
|
||||||
|
dimensions = {'Test_Send':'Number_1'} # Should be arg
|
||||||
|
start_time = time.time()
|
||||||
|
fields = {'name':metric_name}
|
||||||
|
fields['dimensions'] = dimensions
|
||||||
|
for val in range(0, num_metrics_to_send):
|
||||||
|
fields['value'] = str(val)
|
||||||
|
fields['timestamp'] = time.time()
|
||||||
|
call_mon_api(mon_client.metrics.create, fields)
|
||||||
|
# time.sleep(1)
|
||||||
|
|
||||||
|
print("Took %d seconds to send %d measurements" %
|
||||||
|
((time.time() - start_time), num_metrics_to_send))
|
||||||
|
metric_end_time = time.time()
|
||||||
|
# API requires end time to be greater than start time
|
||||||
|
if (metric_end_time - metric_start_time) < 1:
|
||||||
|
metric_end_time = metric_start_time + 1
|
||||||
|
start_timestamp = create_timestamp(metric_start_time)
|
||||||
|
end_timestamp = create_timestamp(metric_end_time)
|
||||||
|
fields = {'name':metric_name}
|
||||||
|
fields['dimensions'] = dimensions
|
||||||
|
fields['start_time'] = start_timestamp
|
||||||
|
fields['end_time'] = end_timestamp
|
||||||
|
for i in range(0, 30):
|
||||||
|
result = call_mon_api(mon_client.metrics.list_measurements, fields)
|
||||||
|
if len(result) > 0:
|
||||||
|
measurements = result[0]['measurements']
|
||||||
|
if len(measurements) >= num_metrics_to_send:
|
||||||
|
break
|
||||||
|
print('Found %d of %d metrics so far' % (len(measurements), num_metrics_to_send))
|
||||||
|
time.sleep(1)
|
||||||
|
|
||||||
|
if len(result) == 0:
|
||||||
|
print('Did not receive any metrics in %d seconds' % i, file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
if len(measurements) != num_metrics_to_send:
|
||||||
|
print('Expected %d measurements but found %d' %
|
||||||
|
(num_metrics_to_send, len(measurements)), file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
print('Took %d seconds for metrics to fully arrive' % i)
|
||||||
|
expected = num_metrics_to_send - 1
|
||||||
|
result = 0
|
||||||
|
for index in range(0, num_metrics_to_send):
|
||||||
|
value = measurements[index]
|
||||||
|
if value[2] != expected:
|
||||||
|
print('Expected %d but found %d for %d' %
|
||||||
|
(expected, value[2], index), file=sys.stderr)
|
||||||
|
expected = expected - 1
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
sys.exit(main())
|
118
tests/notification_cycleTest.py
Normal file
118
tests/notification_cycleTest.py
Normal file
@ -0,0 +1,118 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
#
|
||||||
|
"""Notification Engine Test
|
||||||
|
Cycle the state of an Alarm the given number of times
|
||||||
|
"""
|
||||||
|
from __future__ import print_function
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
import json
|
||||||
|
import subprocess
|
||||||
|
from monclient import client
|
||||||
|
import monclient.exc as exc
|
||||||
|
|
||||||
|
mon_client = None
|
||||||
|
|
||||||
|
def call_mon_api(method, fields):
|
||||||
|
|
||||||
|
try:
|
||||||
|
resp = method(**fields)
|
||||||
|
except exc.HTTPException as he:
|
||||||
|
print(he.code)
|
||||||
|
print(he.message)
|
||||||
|
sys.exit(1)
|
||||||
|
else:
|
||||||
|
return resp
|
||||||
|
|
||||||
|
|
||||||
|
def find_alarm_id():
|
||||||
|
result = call_mon_api(mon_client.alarms.list, {})
|
||||||
|
if len(result) == 0:
|
||||||
|
print('No existing alarms, create one and rerun test', file=sys.stderr)
|
||||||
|
return None
|
||||||
|
return result[0]['id']
|
||||||
|
|
||||||
|
|
||||||
|
def get_alarm_state(alarm_id):
|
||||||
|
result = call_mon_api(mon_client.alarms.get, {'alarm_id':alarm_id})
|
||||||
|
return result['state']
|
||||||
|
|
||||||
|
|
||||||
|
def find_notifications(alarm_id):
|
||||||
|
args = ['sudo', 'grep', alarm_id, '/var/mail/root']
|
||||||
|
result = []
|
||||||
|
try:
|
||||||
|
stdout = subprocess.check_output(args)
|
||||||
|
except subprocess.CalledProcessError as e:
|
||||||
|
print(e, file=sys.stderr)
|
||||||
|
sys.exit(1)
|
||||||
|
for line in stdout.splitlines():
|
||||||
|
result.append(json.loads(line)['state']);
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
if len(sys.argv) == 1:
|
||||||
|
print('usage: %s count [alarm-id]' % sys.argv[0], file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
api_version = '2_0'
|
||||||
|
endpoint = 'http://192.168.10.4:8080/v2.0'
|
||||||
|
kwargs = {
|
||||||
|
'token': '82510970543135'
|
||||||
|
}
|
||||||
|
global mon_client
|
||||||
|
mon_client = client.Client(api_version, endpoint, **kwargs)
|
||||||
|
|
||||||
|
num_cycles = int(sys.argv[1])
|
||||||
|
if len(sys.argv) > 2:
|
||||||
|
alarm_id = sys.argv[2]
|
||||||
|
else:
|
||||||
|
alarm_id = find_alarm_id()
|
||||||
|
if alarm_id == None:
|
||||||
|
return 1
|
||||||
|
|
||||||
|
start_time = time.time()
|
||||||
|
initial_state = get_alarm_state(alarm_id)
|
||||||
|
state = initial_state
|
||||||
|
fields = {'alarm_id':alarm_id}
|
||||||
|
|
||||||
|
existing_notifications = find_notifications(alarm_id)
|
||||||
|
notifications_sent = num_cycles * 2
|
||||||
|
for _ in range(0, notifications_sent):
|
||||||
|
if state == 'OK':
|
||||||
|
state = 'ALARM'
|
||||||
|
else:
|
||||||
|
state = 'OK'
|
||||||
|
fields['state'] = state
|
||||||
|
call_mon_api(mon_client.alarms.patch, fields)
|
||||||
|
new_state = get_alarm_state(alarm_id)
|
||||||
|
if new_state != state:
|
||||||
|
print('Expected new state %s but found %s' %
|
||||||
|
(state, new_state), file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
# time.sleep(1)
|
||||||
|
|
||||||
|
print("Took %d seconds to send %d alarm state changes" %
|
||||||
|
((time.time() - start_time), num_cycles * 2))
|
||||||
|
|
||||||
|
for i in range(0, 30):
|
||||||
|
all_notifications = find_notifications(alarm_id)
|
||||||
|
if (len(all_notifications) - len(existing_notifications)) >= notifications_sent:
|
||||||
|
break
|
||||||
|
print('Found %d of %d expected notifications so far' % (len(all_notifications) - len(existing_notifications), notifications_sent))
|
||||||
|
time.sleep(1)
|
||||||
|
|
||||||
|
notifications_found = len(all_notifications) - len(existing_notifications)
|
||||||
|
if notifications_found < notifications_sent:
|
||||||
|
print('Expected %d notifications but found %d' %
|
||||||
|
(notifications_sent, notifications_found), file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
print('Took %d seconds for notifications to fully arrive' % i)
|
||||||
|
result = 0
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
sys.exit(main())
|
252
tests/smoke.py
252
tests/smoke.py
@ -1,16 +1,20 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
#
|
#
|
||||||
"""smoke
|
"""smoke
|
||||||
Runs a smoke test of the jahmon installation on mini-mon by ensuring metrics are flowing and creating a new
|
Runs a smoke test of the jahmon installation on mini-mon by ensuring
|
||||||
notification, alarm and that the Threshold Engine changes the state of the alarm.
|
metrics are flowing and creating a new notification, alarm and that the
|
||||||
This requires the mon CLI and must be run on one of the mini-mon VMs. Tested running on kafka VM.
|
Threshold Engine changes the state of the alarm. This requires the mon
|
||||||
Get it by following the instructions on https://wiki.hpcloud.net/display/iaas/Monitoring+CLI.
|
CLI and must be run on one of the mini-mon VMs. Tested running on kafka VM.
|
||||||
If you want to see the notification, you must install postfix on the kakfa VM, configure it to be local, and
|
Get it by following the instructions on
|
||||||
modify /etc/mon/notification.yaml to use localhost for the email server, then restart
|
https://wiki.hpcloud.net/display/iaas/Monitoring+CLI.
|
||||||
|
If you want to see the notification, you must install postfix on the kakfa
|
||||||
|
VM, configure it to be local, and modify /etc/mon/notification.yaml to use
|
||||||
|
localhost for the email server, then restart
|
||||||
|
|
||||||
TODO:
|
TODO:
|
||||||
1. Add check of notification history when that is implemented
|
1. Add check of notification history when that is implemented
|
||||||
2. Add check of mail getting to root when postfix is added mini-mon. This script will have to run on the kafka VM
|
2. Add check of mail getting to root when postfix is added mini-mon.
|
||||||
|
This script will have to run on the kafka VM
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from __future__ import print_function
|
from __future__ import print_function
|
||||||
@ -25,92 +29,120 @@ import time
|
|||||||
# export OS_NO_CLIENT_AUTH=1
|
# export OS_NO_CLIENT_AUTH=1
|
||||||
# export MON_API_URL=http://192.168.10.4:8080/v2.0/
|
# export MON_API_URL=http://192.168.10.4:8080/v2.0/
|
||||||
|
|
||||||
os.environ["OS_AUTH_TOKEN"] = "82510970543135"
|
os.environ['OS_AUTH_TOKEN'] = '82510970543135'
|
||||||
os.environ["OS_NO_CLIENT_AUTH"] = "1"
|
os.environ['OS_NO_CLIENT_AUTH'] = '1'
|
||||||
os.environ["MON_API_URL"] = "http://192.168.10.4:8080/v2.0/"
|
os.environ['MON_API_URL'] = 'http://192.168.10.4:8080/v2.0/'
|
||||||
|
|
||||||
|
|
||||||
|
def change_alarm_state(alarm_id, new_state):
|
||||||
|
print('Changing Alarm state to %s' % new_state)
|
||||||
|
result_json = run_mon_cli(['alarm-patch', '--state', new_state, alarm_id])
|
||||||
|
if result_json['state'] != new_state:
|
||||||
|
print('Alarm patch failed, expected state of %s but was %s' %
|
||||||
|
(result_json['state'], new_state), file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
|
||||||
def get_alarm_state(alarm_id):
|
def get_alarm_state(alarm_id):
|
||||||
stdout = run_mon_cli(["mon", "--json", "alarm-show", alarm_id])
|
result_json = run_mon_cli(['alarm-show', alarm_id])
|
||||||
response_json = json.loads(stdout)
|
return result_json['state']
|
||||||
return response_json['state']
|
|
||||||
|
|
||||||
def check_alarm_history(alarm_id):
|
|
||||||
|
def check_alarm_history(alarm_id, states):
|
||||||
|
transitions = len(states) - 1
|
||||||
print('Checking Alarm History')
|
print('Checking Alarm History')
|
||||||
# Make take a little bit of time for Alarm history to flow all the way through
|
# May take some time for Alarm history to flow all the way through
|
||||||
for x in range(0, 10):
|
for _ in range(0, 10):
|
||||||
stdout = run_mon_cli(["mon", "--json", "alarm-history", alarm_id])
|
result_json = run_mon_cli(['alarm-history', alarm_id])
|
||||||
response_json = json.loads(stdout)
|
if len(result_json) >= transitions:
|
||||||
if len(response_json) > 0:
|
|
||||||
break
|
break
|
||||||
time.sleep(4)
|
time.sleep(4)
|
||||||
|
|
||||||
result = True
|
result = True
|
||||||
if not check_expected(1, len(response_json), 'number of history entries'):
|
if not check_expected(transitions, len(result_json),
|
||||||
|
'number of history entries'):
|
||||||
return False
|
return False
|
||||||
alarm_json = response_json[0]
|
result_json.sort(key=lambda x: x['timestamp'])
|
||||||
if not check_expected('UNDETERMINED', alarm_json['old_state'], 'old_state'):
|
for i in range(0, transitions):
|
||||||
result = False
|
old_state = states[i]
|
||||||
if not check_expected('ALARM', alarm_json['new_state'], 'new_state'):
|
new_state = states[i+1]
|
||||||
result = False
|
alarm_json = result_json[i]
|
||||||
if not check_expected(alarm_id, alarm_json['alarm_id'], 'alarm_id'):
|
if not check_expected(old_state, alarm_json['old_state'], 'old_state'):
|
||||||
result = False
|
result = False
|
||||||
|
if not check_expected(new_state, alarm_json['new_state'], 'new_state'):
|
||||||
|
result = False
|
||||||
|
if not check_expected(alarm_id, alarm_json['alarm_id'], 'alarm_id'):
|
||||||
|
result = False
|
||||||
|
|
||||||
if result:
|
if result:
|
||||||
print("Alarm History is OK")
|
print('Alarm History is OK')
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
def check_expected(expected, actual, what):
|
def check_expected(expected, actual, what):
|
||||||
if (expected == actual):
|
if (expected == actual):
|
||||||
return True
|
return True
|
||||||
print("Incorrect value for alarm history " + what + " expected '" + str(expected) + "' but was '" + str(actual) + "'", file=sys.stderr)
|
print("Incorrect value for alarm history %s expected '%s' but was '%s'" %
|
||||||
|
(what, str(expected), str(actual)), file=sys.stderr)
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
def create_alarm(name, expression, notification_method_id, description=None):
|
def create_alarm(name, expression, notification_method_id, description=None):
|
||||||
args = ["mon", "alarm-create"]
|
args = ['alarm-create']
|
||||||
if (description):
|
if (description):
|
||||||
args.append("--description")
|
args.append('--description')
|
||||||
args.append(description)
|
args.append(description)
|
||||||
args.append("--alarm-actions")
|
args.append('--alarm-actions')
|
||||||
args.append(notification_method_id)
|
args.append(notification_method_id)
|
||||||
args.append("--ok-actions")
|
args.append('--ok-actions')
|
||||||
args.append(notification_method_id)
|
args.append(notification_method_id)
|
||||||
args.append("--undetermined-actions")
|
args.append('--undetermined-actions')
|
||||||
args.append(notification_method_id)
|
args.append(notification_method_id)
|
||||||
args.append(name)
|
args.append(name)
|
||||||
args.append(expression)
|
args.append(expression)
|
||||||
print("Creating alarm")
|
print('Creating alarm')
|
||||||
stdout = run_mon_cli(args)
|
result_json = run_mon_cli(args)
|
||||||
response_json = json.loads(stdout)
|
|
||||||
|
|
||||||
# Parse out id
|
# Parse out id
|
||||||
alarm_id = response_json['id']
|
alarm_id = result_json['id']
|
||||||
return alarm_id
|
return alarm_id
|
||||||
|
|
||||||
|
|
||||||
def get_metrics(name, dimensions):
|
def get_metrics(name, dimensions):
|
||||||
print("Getting metrics for " + name + str(dimensions))
|
print('Getting metrics for %s ' % (name + str(dimensions)))
|
||||||
dimensions_arg = ""
|
dimensions_arg = ''
|
||||||
for key, value in dimensions.iteritems():
|
for key, value in dimensions.iteritems():
|
||||||
if dimensions_arg != "":
|
if dimensions_arg != '':
|
||||||
dimensions_arg = dimensions_arg + ","
|
dimensions_arg = dimensions_arg + ','
|
||||||
dimensions_arg = dimensions_arg + key + "=" + value
|
dimensions_arg = dimensions_arg + key + '=' + value
|
||||||
stdout = run_mon_cli(["mon", "--json", "measurement-list", "--dimensions", dimensions_arg, name, "00"])
|
return run_mon_cli(['measurement-list', '--dimensions',
|
||||||
return json.loads(stdout)
|
dimensions_arg, name, '00'])
|
||||||
|
|
||||||
def run_mon_cli(args):
|
|
||||||
|
def run_mon_cli(args, useJson=True):
|
||||||
|
if useJson:
|
||||||
|
args.insert(0, '--json')
|
||||||
|
args.insert(0, 'mon')
|
||||||
try:
|
try:
|
||||||
stdout = subprocess.check_output(args)
|
stdout = subprocess.check_output(args)
|
||||||
return stdout
|
if useJson:
|
||||||
|
return json.loads(stdout)
|
||||||
|
else:
|
||||||
|
return stdout
|
||||||
except subprocess.CalledProcessError as e:
|
except subprocess.CalledProcessError as e:
|
||||||
print(e, file=sys.stderr)
|
print(e, file=sys.stderr)
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
|
|
||||||
def create_notification(notification_name, notification_email_addr):
|
def create_notification(notification_name, notification_email_addr):
|
||||||
print("Creating notification")
|
print('Creating notification')
|
||||||
stdout = run_mon_cli(["mon", "notification-create", notification_name, "EMAIL", notification_email_addr])
|
result_json = run_mon_cli(['notification-create', notification_name,
|
||||||
response_json = json.loads(stdout)
|
'EMAIL', notification_email_addr])
|
||||||
|
|
||||||
# Parse out id
|
# Parse out id
|
||||||
notification_method_id = response_json['id']
|
notification_method_id = result_json['id']
|
||||||
return notification_method_id
|
return notification_method_id
|
||||||
|
|
||||||
|
|
||||||
def find_id_for_name(object_json, name):
|
def find_id_for_name(object_json, name):
|
||||||
for obj in object_json:
|
for obj in object_json:
|
||||||
@ -119,74 +151,112 @@ def find_id_for_name(object_json, name):
|
|||||||
return obj['id']
|
return obj['id']
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
def cleanup(notification_name, alarm_name):
|
def cleanup(notification_name, alarm_name):
|
||||||
# Delete our alarm if it already exists
|
# Delete our alarm if it already exists
|
||||||
alarm_json = json.loads(run_mon_cli(["mon", "--json", "alarm-list"]))
|
alarm_json = run_mon_cli(['alarm-list'])
|
||||||
alarm_id = find_id_for_name(alarm_json, alarm_name)
|
alarm_id = find_id_for_name(alarm_json, alarm_name)
|
||||||
if alarm_id:
|
if alarm_id:
|
||||||
run_mon_cli(["mon", "alarm-delete", alarm_id])
|
run_mon_cli(['alarm-delete', alarm_id], useJson=False)
|
||||||
# Delete our notification if it already exists
|
# Delete our notification if it already exists
|
||||||
notification_json = json.loads(run_mon_cli(["mon", "--json", "notification-list"]))
|
notification_json = run_mon_cli(['notification-list'])
|
||||||
notification_id = find_id_for_name(notification_json, notification_name)
|
notification_id = find_id_for_name(notification_json, notification_name)
|
||||||
if notification_id:
|
if notification_id:
|
||||||
run_mon_cli(["mon", "notification-delete", notification_id])
|
run_mon_cli(['notification-delete', notification_id], useJson=False)
|
||||||
|
|
||||||
|
|
||||||
|
def wait_for_alarm_state_change(alarm_id, old_state):
|
||||||
|
# Wait for it to change state
|
||||||
|
print('Waiting for alarm to change state from %s' % old_state)
|
||||||
|
for x in range(0, 250):
|
||||||
|
time.sleep(1)
|
||||||
|
state = get_alarm_state(alarm_id)
|
||||||
|
if state != old_state:
|
||||||
|
print('Alarm state changed to %s in %d seconds' % (state, x))
|
||||||
|
return state
|
||||||
|
print('State never changed from %s in %d seconds' % (old_state, x),
|
||||||
|
file=sys.stderr)
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
notification_name = "Jahmon Smoke Test"
|
notification_name = 'Jahmon Smoke Test'
|
||||||
notification_email_addr = "root@kafka"
|
notification_email_addr = 'root@kafka'
|
||||||
alarm_name = "high cpu and load"
|
alarm_name = 'high cpu and load'
|
||||||
metric_name = "cpu_system_perc"
|
metric_name = 'cpu_system_perc'
|
||||||
metric_dimensions = {"hostname":"thresh"}
|
metric_dimensions = {'hostname': 'thresh'}
|
||||||
cleanup(notification_name, alarm_name)
|
cleanup(notification_name, alarm_name)
|
||||||
|
|
||||||
# Query how many metrics there are for the Alarm
|
# Query how many metrics there are for the Alarm
|
||||||
metric_json = get_metrics(metric_name, metric_dimensions)
|
metric_json = get_metrics(metric_name, metric_dimensions)
|
||||||
if len(metric_json) == 0:
|
if len(metric_json) == 0:
|
||||||
print("No measurements received for metric " + metric_name + str(metric_dimensions), file=sys.stderr)
|
print('No measurements received for metric %s ' %
|
||||||
sys.exit(1)
|
(metric_name + str(metric_dimensions)), file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
start_time = time.time()
|
||||||
|
|
||||||
initial_num_metrics = len(metric_json[0]['measurements'])
|
initial_num_metrics = len(metric_json[0]['measurements'])
|
||||||
|
|
||||||
# Create Notification through CLI
|
# Create Notification through CLI
|
||||||
notification_method_id = create_notification(notification_name, notification_email_addr)
|
notification_method_id = create_notification(notification_name,
|
||||||
|
notification_email_addr)
|
||||||
# Create Alarm through CLI
|
# Create Alarm through CLI
|
||||||
alarm_id = create_alarm(alarm_name, "max(cpu_system_perc) > 1 and max(load_avg_1_min{hostname=thresh}) > 3", notification_method_id, "System CPU Utilization exceeds 1% and Load exeeds 3 per measurement period")
|
expression = 'max(cpu_system_perc) > 1 and ' + \
|
||||||
|
'max(load_avg_1_min{hostname=thresh}) > 1'
|
||||||
|
description = 'System CPU Utilization exceeds 1% and ' + \
|
||||||
|
'Load exceeds 3 per measurement period'
|
||||||
|
alarm_id = create_alarm(alarm_name, expression, notification_method_id,
|
||||||
|
description)
|
||||||
state = get_alarm_state(alarm_id)
|
state = get_alarm_state(alarm_id)
|
||||||
# Ensure it is created in the right state
|
# Ensure it is created in the right state
|
||||||
if state != 'UNDETERMINED':
|
if state != 'UNDETERMINED':
|
||||||
print("Wrong initial alarm state, expected UNDETERMINED but was " + state)
|
print('Wrong initial alarm state, expected UNDETERMINED but is %s' %
|
||||||
sys.exit(1)
|
state)
|
||||||
# Wait for it to
|
return 1
|
||||||
print("Waiting for alarm to change state")
|
|
||||||
change_time = 0
|
state = wait_for_alarm_state_change(alarm_id, 'UNDETERMINED')
|
||||||
for x in range(0, 250):
|
|
||||||
time.sleep(1)
|
|
||||||
state = get_alarm_state(alarm_id)
|
|
||||||
if state != 'UNDETERMINED':
|
|
||||||
print("Alarm state changed in " + str(x) + " seconds")
|
|
||||||
change_time = x
|
|
||||||
break
|
|
||||||
|
|
||||||
if state != 'ALARM':
|
if state != 'ALARM':
|
||||||
print("Wrong final state, expected ALARM but was " + state, file=sys.stderr)
|
print('Wrong final state, expected ALARM but was %s' % state,
|
||||||
sys.exit(1)
|
file=sys.stderr)
|
||||||
print("Final state of alarm was " + state)
|
return 1
|
||||||
# If the alarm changes state too fast, then there isn't time for the new metric to arrive.
|
|
||||||
# Unlikely, but it has been seen
|
state_changes = ['UNDETERMINED', 'ALARM']
|
||||||
|
new_state = 'OK'
|
||||||
|
state_changes.append(new_state)
|
||||||
|
change_alarm_state(alarm_id, new_state)
|
||||||
|
# There is a bug in the API which allows this to work. Soon that
|
||||||
|
# will be fixed and this will fail
|
||||||
|
if len(sys.argv) > 1:
|
||||||
|
final_state = 'ALARM'
|
||||||
|
state_changes.append(final_state)
|
||||||
|
|
||||||
|
state = wait_for_alarm_state_change(alarm_id, new_state)
|
||||||
|
|
||||||
|
if state != final_state:
|
||||||
|
print('Wrong final state, expected %s but was %s' %
|
||||||
|
(final_state, state), file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
# If the alarm changes state too fast, then there isn't time for the new
|
||||||
|
# metric to arrive. Unlikely, but it has been seen
|
||||||
|
change_time = time.time() - start_time
|
||||||
if change_time < 30:
|
if change_time < 30:
|
||||||
time.sleep(30 - change_time)
|
time.sleep(30 - change_time)
|
||||||
change_time = 30
|
change_time = 30
|
||||||
metric_json = get_metrics(metric_name, metric_dimensions)
|
metric_json = get_metrics(metric_name, metric_dimensions)
|
||||||
final_num_metrics = len(metric_json[0]['measurements'])
|
final_num_metrics = len(metric_json[0]['measurements'])
|
||||||
if final_num_metrics <= initial_num_metrics:
|
if final_num_metrics <= initial_num_metrics:
|
||||||
print("No new metrics received", file=sys.stderr)
|
print('No new metrics received', file=sys.stderr)
|
||||||
sys.exit(1)
|
return 1
|
||||||
print("Received " + str(final_num_metrics - initial_num_metrics) + " metrics in " + str(change_time) + " seconds")
|
print('Received %d metrics in %d seconds' %
|
||||||
if not check_alarm_history(alarm_id):
|
((final_num_metrics - initial_num_metrics), change_time))
|
||||||
sys.exit(1)
|
if not check_alarm_history(alarm_id, state_changes):
|
||||||
|
return 1
|
||||||
|
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
sys.exit(main())
|
sys.exit(main())
|
||||||
|
Loading…
x
Reference in New Issue
Block a user