Merge branch 'master' of github.com:hpcloud-mon/mon-vagrant

2014-05-13 15:19:47 -06:00 · 2014-05-13 15:19:47 -06:00 · 25e15e67fc
commit 25e15e67fc
parent a71e05204c 31280d5730
3 changed files with 377 additions and 91 deletions
--- a/tests/measurement_test.py
+++ b/tests/measurement_test.py
@ -0,0 +1,98 @@
+#!/usr/bin/env python
+#
+"""measurements
+"""
+from __future__ import print_function
+import sys
+import time
+import pytz
+from datetime import datetime
+from monclient import client
+import monclient.exc as exc
+
+mon_client = None
+
+def call_mon_api(method, fields):
+
+    try:
+        resp = method(**fields)
+    except exc.HTTPException as he:
+        print(he.code)
+        print(he.message)
+        sys.exit(1)
+    else:
+        return resp
+
+
+def create_timestamp(seconds):
+    return pytz.utc.localize(datetime.utcfromtimestamp(seconds)).strftime("%Y-%m-%dT%H:%M:%S%z")
+
+
+def main():
+    if len(sys.argv) == 1:
+        print('usage: %s metric_name count' % sys.argv[0], file=sys.stderr)
+        return 1
+
+    api_version = '2_0'
+    endpoint = 'http://192.168.10.4:8080/v2.0'
+    kwargs = {
+              'token': '82510970543135'
+    }
+    mon_client = client.Client(api_version, endpoint, **kwargs)
+
+    metric_start_time = time.time()
+    metric_name = sys.argv[1]
+    num_metrics_to_send = int(sys.argv[2])
+    dimensions = {'Test_Send':'Number_1'} # Should be arg
+    start_time = time.time()
+    fields = {'name':metric_name}
+    fields['dimensions'] = dimensions
+    for val in range(0, num_metrics_to_send):
+        fields['value'] = str(val)
+        fields['timestamp'] = time.time()
+        call_mon_api(mon_client.metrics.create, fields)
+        # time.sleep(1)
+
+    print("Took %d seconds to send %d measurements" %
+          ((time.time() - start_time), num_metrics_to_send))
+    metric_end_time = time.time()
+    # API requires end time to be greater than start time
+    if (metric_end_time - metric_start_time) < 1:
+        metric_end_time = metric_start_time + 1
+    start_timestamp = create_timestamp(metric_start_time)
+    end_timestamp = create_timestamp(metric_end_time)
+    fields = {'name':metric_name}
+    fields['dimensions'] = dimensions
+    fields['start_time'] = start_timestamp
+    fields['end_time'] = end_timestamp
+    for i in range(0, 30):
+        result = call_mon_api(mon_client.metrics.list_measurements, fields)
+        if len(result) > 0:
+            measurements = result[0]['measurements']
+            if len(measurements) >= num_metrics_to_send:
+                break
+            print('Found %d of %d metrics so far' % (len(measurements), num_metrics_to_send))
+        time.sleep(1)
+
+    if len(result) == 0:
+        print('Did not receive any metrics in %d seconds' % i, file=sys.stderr)
+        return 1
+        
+    if len(measurements) != num_metrics_to_send:
+        print('Expected %d measurements but found %d' %
+              (num_metrics_to_send, len(measurements)), file=sys.stderr)
+        return 1
+    print('Took %d seconds for metrics to fully arrive' % i)
+    expected = num_metrics_to_send - 1
+    result = 0
+    for index in range(0, num_metrics_to_send):
+        value = measurements[index]
+        if value[2] != expected:
+            print('Expected %d but found %d for %d' %
+                  (expected, value[2], index), file=sys.stderr)
+        expected = expected - 1
+    return result
+
+
+if __name__ == "__main__":
+    sys.exit(main())
--- a/tests/notification_cycleTest.py
+++ b/tests/notification_cycleTest.py
@ -0,0 +1,118 @@
+#!/usr/bin/env python
+#
+"""Notification Engine Test
+    Cycle the state of an Alarm the given number of times
+"""
+from __future__ import print_function
+import sys
+import time
+import json
+import subprocess
+from monclient import client
+import monclient.exc as exc
+
+mon_client = None
+
+def call_mon_api(method, fields):
+
+    try:
+        resp = method(**fields)
+    except exc.HTTPException as he:
+        print(he.code)
+        print(he.message)
+        sys.exit(1)
+    else:
+        return resp
+
+
+def find_alarm_id():
+    result = call_mon_api(mon_client.alarms.list, {})
+    if len(result) == 0:
+        print('No existing alarms, create one and rerun test', file=sys.stderr)
+        return None
+    return result[0]['id']
+
+
+def get_alarm_state(alarm_id):
+    result = call_mon_api(mon_client.alarms.get, {'alarm_id':alarm_id})
+    return result['state']
+
+
+def find_notifications(alarm_id):
+    args = ['sudo', 'grep', alarm_id, '/var/mail/root']
+    result = []
+    try:
+        stdout = subprocess.check_output(args)
+    except subprocess.CalledProcessError as e:
+        print(e, file=sys.stderr)
+        sys.exit(1)
+    for line in stdout.splitlines():
+        result.append(json.loads(line)['state']);
+    return result
+
+
+def main():
+    if len(sys.argv) == 1:
+        print('usage: %s count [alarm-id]' % sys.argv[0], file=sys.stderr)
+        return 1
+
+    api_version = '2_0'
+    endpoint = 'http://192.168.10.4:8080/v2.0'
+    kwargs = {
+              'token': '82510970543135'
+    }
+    global mon_client
+    mon_client = client.Client(api_version, endpoint, **kwargs)
+
+    num_cycles = int(sys.argv[1])
+    if len(sys.argv) > 2:
+        alarm_id = sys.argv[2]
+    else:
+        alarm_id = find_alarm_id()
+        if alarm_id == None:
+            return 1
+
+    start_time = time.time()
+    initial_state = get_alarm_state(alarm_id)
+    state = initial_state
+    fields = {'alarm_id':alarm_id}
+
+    existing_notifications = find_notifications(alarm_id)
+    notifications_sent = num_cycles * 2
+    for _ in range(0, notifications_sent):
+        if state == 'OK':
+            state = 'ALARM'
+        else:
+            state = 'OK'
+        fields['state'] = state
+        call_mon_api(mon_client.alarms.patch, fields)
+        new_state = get_alarm_state(alarm_id)
+        if new_state != state:
+            print('Expected new state %s but found %s' %
+              (state, new_state), file=sys.stderr)
+            return 1
+        # time.sleep(1)
+
+    print("Took %d seconds to send %d alarm state changes" %
+          ((time.time() - start_time), num_cycles * 2))
+
+    for i in range(0, 30):
+        all_notifications = find_notifications(alarm_id)
+        if (len(all_notifications) - len(existing_notifications)) >= notifications_sent:
+            break
+        print('Found %d of %d expected notifications so far' % (len(all_notifications) - len(existing_notifications), notifications_sent))
+        time.sleep(1)
+
+    notifications_found = len(all_notifications) - len(existing_notifications)
+    if notifications_found < notifications_sent:
+        print('Expected %d notifications but found %d' %
+              (notifications_sent, notifications_found), file=sys.stderr)
+        return 1
+
+    print('Took %d seconds for notifications to fully arrive' % i)
+    result = 0
+    return result
+
+
+if __name__ == "__main__":
+    sys.exit(main())
--- a/tests/smoke.py
+++ b/tests/smoke.py
@ -1,16 +1,20 @@
 #!/usr/bin/env python
 #
 """smoke
-    Runs a smoke test of the jahmon installation on mini-mon by ensuring metrics are flowing and creating a new
-    notification, alarm and that the Threshold Engine changes the state of the alarm.
-    This requires the mon CLI and must be run on one of the mini-mon VMs. Tested running on kafka VM.
-    Get it by following the instructions on https://wiki.hpcloud.net/display/iaas/Monitoring+CLI.
-    If you want to see the notification, you must install postfix on the kakfa VM, configure it to be local, and
-    modify /etc/mon/notification.yaml to use localhost for the email server, then restart
+    Runs a smoke test of the jahmon installation on mini-mon by ensuring
+    metrics are flowing and creating a new notification, alarm and that the
+    Threshold Engine changes the state of the alarm.  This requires the mon
+    CLI and must be run on one of the mini-mon VMs. Tested running on kafka VM.
+    Get it by following the instructions on
+    https://wiki.hpcloud.net/display/iaas/Monitoring+CLI.
+    If you want to see the notification, you must install postfix on the kakfa
+    VM, configure it to be local, and modify /etc/mon/notification.yaml to use
+    localhost for the email server, then restart

    TODO:
        1. Add check of notification history when that is implemented
-        2. Add check of mail getting to root when postfix is added mini-mon. This script will have to run on the kafka VM
+        2. Add check of mail getting to root when postfix is added mini-mon.
+           This script will have to run on the kafka VM
 """

 from __future__ import print_function
@ -25,93 +29,121 @@ import time
 # export OS_NO_CLIENT_AUTH=1
 # export MON_API_URL=http://192.168.10.4:8080/v2.0/

-os.environ["OS_AUTH_TOKEN"] = "82510970543135"
-os.environ["OS_NO_CLIENT_AUTH"] = "1"
-os.environ["MON_API_URL"] = "http://192.168.10.4:8080/v2.0/"
+os.environ['OS_AUTH_TOKEN'] = '82510970543135'
+os.environ['OS_NO_CLIENT_AUTH'] = '1'
+os.environ['MON_API_URL'] = 'http://192.168.10.4:8080/v2.0/'
+
+
+def change_alarm_state(alarm_id, new_state):
+    print('Changing Alarm state to %s' % new_state)
+    result_json = run_mon_cli(['alarm-patch', '--state', new_state, alarm_id])
+    if result_json['state'] != new_state:
+        print('Alarm patch failed, expected state of %s but was %s' %
+              (result_json['state'], new_state), file=sys.stderr)
+        return 1
+

 def get_alarm_state(alarm_id):
-    stdout = run_mon_cli(["mon", "--json", "alarm-show", alarm_id])
-    response_json = json.loads(stdout)
-    return response_json['state']
+    result_json = run_mon_cli(['alarm-show', alarm_id])
+    return result_json['state']

-def check_alarm_history(alarm_id):
+
+def check_alarm_history(alarm_id, states):
+    transitions = len(states) - 1
    print('Checking Alarm History')
-    # Make take a little bit of time for Alarm history to flow all the way through
-    for x in range(0, 10):
-        stdout = run_mon_cli(["mon", "--json", "alarm-history", alarm_id])
-        response_json = json.loads(stdout)
-        if len(response_json) > 0:
+    # May take some time for Alarm history to flow all the way through
+    for _ in range(0, 10):
+        result_json = run_mon_cli(['alarm-history', alarm_id])
+        if len(result_json) >= transitions:
            break
        time.sleep(4)

    result = True
-    if not check_expected(1, len(response_json), 'number of history entries'):
+    if not check_expected(transitions, len(result_json),
+                          'number of history entries'):
        return False
-    alarm_json = response_json[0]
-    if not check_expected('UNDETERMINED', alarm_json['old_state'], 'old_state'):
+    result_json.sort(key=lambda x: x['timestamp'])
+    for i in range(0, transitions):
+        old_state = states[i]
+        new_state = states[i+1]
+        alarm_json = result_json[i]
+        if not check_expected(old_state, alarm_json['old_state'], 'old_state'):
            result = False
-    if not check_expected('ALARM', alarm_json['new_state'], 'new_state'):
+        if not check_expected(new_state, alarm_json['new_state'], 'new_state'):
            result = False
        if not check_expected(alarm_id, alarm_json['alarm_id'], 'alarm_id'):
            result = False
+
    if result:
-        print("Alarm History is OK")
+        print('Alarm History is OK')
    return result

+
 def check_expected(expected, actual, what):
    if (expected == actual):
        return True
-    print("Incorrect value for alarm history " + what + " expected '" + str(expected) + "' but was '" + str(actual) + "'", file=sys.stderr)
+    print("Incorrect value for alarm history %s expected '%s' but was '%s'" %
+          (what, str(expected), str(actual)), file=sys.stderr)
    return False

+
 def create_alarm(name, expression, notification_method_id, description=None):
-    args = ["mon", "alarm-create"]
+    args = ['alarm-create']
    if (description):
-            args.append("--description")
+            args.append('--description')
            args.append(description)
-    args.append("--alarm-actions")
+    args.append('--alarm-actions')
    args.append(notification_method_id)
-    args.append("--ok-actions")
+    args.append('--ok-actions')
    args.append(notification_method_id)
-    args.append("--undetermined-actions")
+    args.append('--undetermined-actions')
    args.append(notification_method_id)
    args.append(name)
    args.append(expression)
-    print("Creating alarm")
-    stdout = run_mon_cli(args)
-    response_json = json.loads(stdout)
+    print('Creating alarm')
+    result_json = run_mon_cli(args)

    # Parse out id
-    alarm_id = response_json['id']
+    alarm_id = result_json['id']
    return alarm_id

-def get_metrics(name, dimensions):
-    print("Getting metrics for " + name + str(dimensions))
-    dimensions_arg = ""
-    for key, value in dimensions.iteritems():
-        if dimensions_arg != "":
-            dimensions_arg = dimensions_arg + ","
-        dimensions_arg = dimensions_arg + key + "=" + value
-    stdout = run_mon_cli(["mon", "--json", "measurement-list", "--dimensions", dimensions_arg, name, "00"])
-    return json.loads(stdout)

-def run_mon_cli(args):
+def get_metrics(name, dimensions):
+    print('Getting metrics for %s ' % (name + str(dimensions)))
+    dimensions_arg = ''
+    for key, value in dimensions.iteritems():
+        if dimensions_arg != '':
+            dimensions_arg = dimensions_arg + ','
+        dimensions_arg = dimensions_arg + key + '=' + value
+    return run_mon_cli(['measurement-list', '--dimensions',
+                          dimensions_arg, name, '00'])
+
+
+def run_mon_cli(args, useJson=True):
+    if useJson:
+        args.insert(0, '--json')
+    args.insert(0, 'mon')
    try:
        stdout = subprocess.check_output(args)
+        if useJson:
+            return json.loads(stdout)
+        else:
            return stdout
    except subprocess.CalledProcessError as e:
        print(e, file=sys.stderr)
        sys.exit(1)

+
 def create_notification(notification_name, notification_email_addr):
-    print("Creating notification")
-    stdout = run_mon_cli(["mon", "notification-create", notification_name, "EMAIL", notification_email_addr])
-    response_json = json.loads(stdout)
+    print('Creating notification')
+    result_json = run_mon_cli(['notification-create', notification_name,
+                         'EMAIL', notification_email_addr])

    # Parse out id
-    notification_method_id = response_json['id']
+    notification_method_id = result_json['id']
    return notification_method_id

+
 def find_id_for_name(object_json, name):
    for obj in object_json:
        this_name = obj['name']
@ -119,71 +151,109 @@ def find_id_for_name(object_json, name):
            return obj['id']
    return None

+
 def cleanup(notification_name, alarm_name):
    # Delete our alarm if it already exists
-    alarm_json = json.loads(run_mon_cli(["mon", "--json", "alarm-list"]))
+    alarm_json = run_mon_cli(['alarm-list'])
    alarm_id = find_id_for_name(alarm_json, alarm_name)
    if alarm_id:
-        run_mon_cli(["mon", "alarm-delete", alarm_id])
+        run_mon_cli(['alarm-delete', alarm_id], useJson=False)
    # Delete our notification if it already exists
-    notification_json = json.loads(run_mon_cli(["mon", "--json", "notification-list"]))
+    notification_json = run_mon_cli(['notification-list'])
    notification_id = find_id_for_name(notification_json, notification_name)
    if notification_id:
-        run_mon_cli(["mon", "notification-delete", notification_id])
+        run_mon_cli(['notification-delete', notification_id], useJson=False)
+
+
+def wait_for_alarm_state_change(alarm_id, old_state):
+    # Wait for it to change state
+    print('Waiting for alarm to change state from %s' % old_state)
+    for x in range(0, 250):
+        time.sleep(1)
+        state = get_alarm_state(alarm_id)
+        if state != old_state:
+            print('Alarm state changed to %s in %d seconds' % (state, x))
+            return state
+    print('State never changed from %s in %d seconds' % (old_state, x),
+          file=sys.stderr)
+    sys.exit(1)
+

 def main():
-    notification_name = "Jahmon Smoke Test"
-    notification_email_addr = "root@kafka"
-    alarm_name = "high cpu and load"
-    metric_name = "cpu_system_perc"
-    metric_dimensions = {"hostname":"thresh"}
+    notification_name = 'Jahmon Smoke Test'
+    notification_email_addr = 'root@kafka'
+    alarm_name = 'high cpu and load'
+    metric_name = 'cpu_system_perc'
+    metric_dimensions = {'hostname': 'thresh'}
    cleanup(notification_name, alarm_name)

    # Query how many metrics there are for the Alarm
    metric_json = get_metrics(metric_name, metric_dimensions)
    if len(metric_json) == 0:
-        print("No measurements received for metric " + metric_name + str(metric_dimensions), file=sys.stderr)
-        sys.exit(1)
+        print('No measurements received for metric %s ' %
+              (metric_name + str(metric_dimensions)), file=sys.stderr)
+        return 1
+
+    start_time = time.time()

    initial_num_metrics = len(metric_json[0]['measurements'])

    # Create Notification through CLI
-    notification_method_id = create_notification(notification_name, notification_email_addr)
+    notification_method_id = create_notification(notification_name,
+                                                 notification_email_addr)
    # Create Alarm through CLI
-    alarm_id = create_alarm(alarm_name, "max(cpu_system_perc) > 1 and max(load_avg_1_min{hostname=thresh}) > 3", notification_method_id, "System CPU Utilization exceeds 1% and Load exeeds 3 per measurement period")
+    expression = 'max(cpu_system_perc) > 1 and ' + \
+                 'max(load_avg_1_min{hostname=thresh}) > 1'
+    description = 'System CPU Utilization exceeds 1% and ' + \
+                  'Load exceeds 3 per measurement period'
+    alarm_id = create_alarm(alarm_name, expression, notification_method_id,
+                            description)
    state = get_alarm_state(alarm_id)
    # Ensure it is created in the right state
    if state != 'UNDETERMINED':
-        print("Wrong initial alarm state, expected UNDETERMINED but was " + state)
-        sys.exit(1)
-    # Wait for it to 
-    print("Waiting for alarm to change state")
-    change_time = 0
-    for x in range(0, 250):
-        time.sleep(1)
-        state = get_alarm_state(alarm_id)
-        if state != 'UNDETERMINED':
-            print("Alarm state changed in " + str(x) + " seconds")
-            change_time = x
-            break
+        print('Wrong initial alarm state, expected UNDETERMINED but is %s' %
+              state)
+        return 1
+
+    state = wait_for_alarm_state_change(alarm_id, 'UNDETERMINED')

    if state != 'ALARM':
-        print("Wrong final state, expected ALARM but was " + state, file=sys.stderr)
-        sys.exit(1)
-    print("Final state of alarm was " + state)
-    # If the alarm changes state too fast, then there isn't time for the new metric to arrive.
-    # Unlikely, but it has been seen
+        print('Wrong final state, expected ALARM but was %s' % state,
+              file=sys.stderr)
+        return 1
+
+    state_changes = ['UNDETERMINED', 'ALARM']
+    new_state = 'OK'
+    state_changes.append(new_state)
+    change_alarm_state(alarm_id, new_state)
+    # There is a bug in the API which allows this to work. Soon that
+    # will be fixed and this will fail
+    if len(sys.argv) > 1:
+        final_state = 'ALARM'
+        state_changes.append(final_state)
+
+        state = wait_for_alarm_state_change(alarm_id, new_state)
+
+        if state != final_state:
+            print('Wrong final state, expected %s but was %s' %
+                  (final_state, state), file=sys.stderr)
+            return 1
+
+    # If the alarm changes state too fast, then there isn't time for the new
+    # metric to arrive. Unlikely, but it has been seen
+    change_time = time.time() - start_time
    if change_time < 30:
        time.sleep(30 - change_time)
        change_time = 30
    metric_json = get_metrics(metric_name, metric_dimensions)
    final_num_metrics = len(metric_json[0]['measurements'])
    if final_num_metrics <= initial_num_metrics:
-        print("No new metrics received", file=sys.stderr)
-        sys.exit(1)
-    print("Received " + str(final_num_metrics - initial_num_metrics) + " metrics in " + str(change_time) + " seconds")
-    if not check_alarm_history(alarm_id):
-        sys.exit(1)
+        print('No new metrics received', file=sys.stderr)
+        return 1
+    print('Received %d metrics in %d seconds' %
+          ((final_num_metrics - initial_num_metrics),  change_time))
+    if not check_alarm_history(alarm_id, state_changes):
+        return 1

    return 0