monasca-vagrant/tests/smoke.py

#!/usr/bin/env python
#
"""smoke
    Runs a smoke test of the jahmon installation on mini-mon by ensuring metrics are flowing and creating a new
    notification, alarm and that the Threshold Engine changes the state of the alarm.
    This requires the mon CLI and must be run on one of the mini-mon VMs. Tested running on kafka VM.
    Get it by following the instructions on https://wiki.hpcloud.net/display/iaas/Monitoring+CLI.
    If you want to see the notification, you must install postfix on the kakfa VM, configure it to be local, and
    modify /etc/mon/notification.yaml to use localhost for the email server, then restart

    TODO:
        1. Add check of alarm history when that works
        2. Add check of notification history when that is implemented
        3. Add check of mail getting to root when postfix is added mini-mon. This script will have to run on the kafka VM
"""

from __future__ import print_function
import json
import sys
import os
import subprocess
import time


# export OS_AUTH_TOKEN=82510970543135
# export OS_NO_CLIENT_AUTH=1
# export MON_API_URL=http://192.168.10.4:8080/v2.0/

os.environ["OS_AUTH_TOKEN"] = "82510970543135"
os.environ["OS_NO_CLIENT_AUTH"] = "1"
os.environ["MON_API_URL"] = "http://192.168.10.4:8080/v2.0/"

def get_alarm_state(alarm_id):
    stdout = run_mon_cli(["mon", "--json", "alarm-show", alarm_id])
    response_json = json.loads(stdout)
    return response_json['state']

def create_alarm(name, expression, notification_method_id, description=None):
    args = ["mon", "alarm-create"]
    if (description):
            args.append("--description")
            args.append(description)
    args.append("--alarm-actions")
    args.append(notification_method_id)
    args.append("--ok-actions")
    args.append(notification_method_id)
    args.append("--undetermined-actions")
    args.append(notification_method_id)
    args.append(name)
    args.append(expression)
    print("Creating alarm")
    stdout = run_mon_cli(args)
    response_json = json.loads(stdout)

    # Parse out id
    alarm_id = response_json['id']
    return alarm_id

def get_metrics(name, dimensions):
    print("Getting metrics for " + name)
    dimensions_arg = ""
    for key, value in dimensions.iteritems():
        if dimensions_arg != "":
            dimensions_arg = dimensions_arg + ","
        dimensions_arg = dimensions_arg + key + "=" + value
    stdout = run_mon_cli(["mon", "--json", "measurement-list", "--dimensions", dimensions_arg, name, "00"])
    return json.loads(stdout)

def run_mon_cli(args):
    try:
        stdout = subprocess.check_output(args)
        return stdout
    except subprocess.CalledProcessError as e:
        print(e, file=sys.stderr)
        sys.exit(1)

def create_notification(notification_name, notification_email_addr):
    print("Creating notification")
    stdout = run_mon_cli(["mon", "notification-create", notification_name, "EMAIL", notification_email_addr])
    response_json = json.loads(stdout)

    # Parse out id
    notification_method_id = response_json['id']
    return notification_method_id

def find_id_for_name(object_json, name):
    for obj in object_json:
        this_name = obj['name']
        if name == this_name:
            return obj['id']
    return None

def cleanup(notification_name, alarm_name):
    # Delete our alarm if it already exists
    alarm_json = json.loads(run_mon_cli(["mon", "--json", "alarm-list"]))
    alarm_id = find_id_for_name(alarm_json, alarm_name)
    if alarm_id:
        run_mon_cli(["mon", "alarm-delete", alarm_id])
    # Delete our notification if it already exists
    notification_json = json.loads(run_mon_cli(["mon", "--json", "notification-list"]))
    notification_id = find_id_for_name(notification_json, notification_name)
    if notification_id:
        run_mon_cli(["mon", "notification-delete", notification_id])

def main():
    notification_name = "Jahmon Smoke Test"
    notification_email_addr = "root@kafka"
    alarm_name = "high cpu and load"
    metric_name = "cpu_user_perc"
    metric_dimensions = {"hostname":"thresh"}
    cleanup(notification_name, alarm_name)

    # Query how many metrics there are for the Alarm
    metric_json = get_metrics(metric_name, metric_dimensions)
    initial_num_metrics = len(metric_json[0]['measurements'])

    # Create Notification through CLI
    notification_method_id = create_notification(notification_name, notification_email_addr)
    # Create Alarm through CLI
    alarm_id = create_alarm(alarm_name, "max(cpu_user_perc{hostname=thresh}) > 1 and max(load_avg_1_min{hostname=thresh}) > 6", notification_method_id, "CPU Utilization exceeds 1% and Load exeeds 6 per measurement period")
    state = get_alarm_state(alarm_id)
    # Ensure it is created in the right state
    if state != 'UNDETERMINED':
        print("Wrong initial alarm state, expected UNDETERMINED but was " + state)
        sys.exit(1)
    # Wait for it to
    print("Waiting for alarm to change state")
    change_time = 0
    for x in range(0, 250):
        time.sleep(1)
        state = get_alarm_state(alarm_id)
        if state != 'UNDETERMINED':
            print("Alarm state changed in " + str(x) + " seconds")
            change_time = x
            break

    if state != 'ALARM':
        print("Wrong initial final state, expected ALARM but was " + state, file=sys.stderr)
        sys.exit(1)
    print("Final state of alarm was " + state)
    # If the alarm changes state too fast, then there isn't time for the new metric to arrive.
    # Unlikely, but it has been seen
    if change_time < 30:
        time.sleep(30 - change_time)
        change_time = 30
    metric_json = get_metrics(metric_name, metric_dimensions)
    final_num_metrics = len(metric_json[0]['measurements'])
    if final_num_metrics <= initial_num_metrics:
        print("No new metrics received", file=sys.stderr)
        sys.exit(1)
    print("Received " + str(final_num_metrics - initial_num_metrics) + " metrics in " + str(change_time) + " seconds")
    return 0


if __name__ == "__main__":
    sys.exit(main())