diff --git a/tests/smoke.py b/tests/smoke.py new file mode 100644 index 0000000..2e2561f --- /dev/null +++ b/tests/smoke.py @@ -0,0 +1,156 @@ +#!/usr/bin/env python +# +"""smoke + Runs a smoke test of the jahmon installation on mini-mon by ensuring metrics are flowing and creating a new + notification, alarm and that the Threshold Engine changes the state of the alarm. + This requires the mon CLI and must be run on one of the mini-mon VMs. Tested running on kafka VM. + Get it by following the instructions on https://wiki.hpcloud.net/display/iaas/Monitoring+CLI. + If you want to see the notification, you must install postfix on the kakfa VM, configure it to be local, and + modify /etc/mon/notification.yaml to use localhost for the email server, then restart + + TODO: + 1. Add check of alarm history when that works + 2. Add check of notification history when that is implemented + 3. Add check of mail getting to root when postfix is added mini-mon. This script will have to run on the kafka VM +""" + +from __future__ import print_function +import json +import sys +import os +import subprocess +import time + + +# export OS_AUTH_TOKEN=82510970543135 +# export OS_NO_CLIENT_AUTH=1 +# export MON_API_URL=http://192.168.10.4:8080/v2.0/ + +os.environ["OS_AUTH_TOKEN"] = "82510970543135" +os.environ["OS_NO_CLIENT_AUTH"] = "1" +os.environ["MON_API_URL"] = "http://192.168.10.4:8080/v2.0/" + +def get_alarm_state(alarm_id): + stdout = run_mon_cli(["mon", "--json", "alarm-show", alarm_id]) + response_json = json.loads(stdout) + return response_json['state'] + +def create_alarm(name, expression, notification_method_id, description=None): + args = ["mon", "alarm-create"] + if (description): + args.append("--description") + args.append(description) + args.append("--alarm-actions") + args.append(notification_method_id) + args.append("--ok-actions") + args.append(notification_method_id) + args.append("--undetermined-actions") + args.append(notification_method_id) + args.append(name) + args.append(expression) + print("Creating alarm") + stdout = run_mon_cli(args) + response_json = json.loads(stdout) + + # Parse out id + alarm_id = response_json['id'] + return alarm_id + +def get_metrics(name, dimensions): + print("Getting metrics for " + name) + dimensions_arg = "" + for key, value in dimensions.iteritems(): + if dimensions_arg != "": + dimensions_arg = dimensions_arg + "," + dimensions_arg = dimensions_arg + key + "=" + value + stdout = run_mon_cli(["mon", "--json", "measurement-list", "--dimensions", dimensions_arg, name, "00"]) + return json.loads(stdout) + +def run_mon_cli(args): + try: + stdout = subprocess.check_output(args) + return stdout + except subprocess.CalledProcessError as e: + print(e, file=sys.stderr) + sys.exit(1) + +def create_notification(notification_name, notification_email_addr): + print("Creating notification") + stdout = run_mon_cli(["mon", "notification-create", notification_name, "EMAIL", notification_email_addr]) + response_json = json.loads(stdout) + + # Parse out id + notification_method_id = response_json['id'] + return notification_method_id + +def find_id_for_name(object_json, name): + for obj in object_json: + this_name = obj['name'] + if name == this_name: + return obj['id'] + return None + +def cleanup(notification_name, alarm_name): + # Delete our alarm if it already exists + alarm_json = json.loads(run_mon_cli(["mon", "--json", "alarm-list"])) + alarm_id = find_id_for_name(alarm_json, alarm_name) + if alarm_id: + run_mon_cli(["mon", "alarm-delete", alarm_id]) + # Delete our notification if it already exists + notification_json = json.loads(run_mon_cli(["mon", "--json", "notification-list"])) + notification_id = find_id_for_name(notification_json, notification_name) + if notification_id: + run_mon_cli(["mon", "notification-delete", notification_id]) + +def main(): + notification_name = "Jahmon Smoke Test" + notification_email_addr = "root@kafka" + alarm_name = "high cpu and load" + metric_name = "cpu_user_perc" + metric_dimensions = {"hostname":"thresh"} + cleanup(notification_name, alarm_name) + + # Query how many metrics there are for the Alarm + metric_json = get_metrics(metric_name, metric_dimensions) + initial_num_metrics = len(metric_json[0]['measurements']) + + # Create Notification through CLI + notification_method_id = create_notification(notification_name, notification_email_addr) + # Create Alarm through CLI + alarm_id = create_alarm(alarm_name, "max(cpu_user_perc{hostname=thresh}) > 1 and max(load_avg_1_min{hostname=thresh}) > 6", notification_method_id, "CPU Utilization exceeds 1% and Load exeeds 6 per measurement period") + state = get_alarm_state(alarm_id) + # Ensure it is created in the right state + if state != 'UNDETERMINED': + print("Wrong initial alarm state, expected UNDETERMINED but was " + state) + sys.exit(1) + # Wait for it to + print("Waiting for alarm to change state") + change_time = 0 + for x in range(0, 250): + time.sleep(1) + state = get_alarm_state(alarm_id) + if state != 'UNDETERMINED': + print("Alarm state changed in " + str(x) + " seconds") + change_time = x + break + + if state != 'ALARM': + print("Wrong initial final state, expected ALARM but was " + state, file=sys.stderr) + sys.exit(1) + print("Final state of alarm was " + state) + # If the alarm changes state too fast, then there isn't time for the new metric to arrive. + # Unlikely, but it has been seen + if change_time < 30: + time.sleep(30 - change_time) + change_time = 30 + metric_json = get_metrics(metric_name, metric_dimensions) + final_num_metrics = len(metric_json[0]['measurements']) + if final_num_metrics <= initial_num_metrics: + print("No new metrics received", file=sys.stderr) + sys.exit(1) + print("Received " + str(final_num_metrics - initial_num_metrics) + " metrics in " + str(change_time) + " seconds") + return 0 + + +if __name__ == "__main__": + sys.exit(main())