Merge branch 'master' of github.com:hpcloud-mon/mon-vagrant

2014-05-13 15:19:47 -06:00 · 2014-05-13 15:19:47 -06:00 · 25e15e67fc
commit 25e15e67fc
parent a71e05204c 31280d5730
3 changed files with 377 additions and 91 deletions
--- a/tests/measurement_test.py
+++ b/tests/measurement_test.py
@ -0,0 +1,98 @@
 #!/usr/bin/env python
 #
 """measurements
 """
 from __future__ import print_function
 import sys
 import time
 import pytz
 from datetime import datetime
 from monclient import client
 import monclient.exc as exc
 mon_client = None
 def call_mon_api(method, fields):
    try:
        resp = method(**fields)
    except exc.HTTPException as he:
        print(he.code)
        print(he.message)
        sys.exit(1)
    else:
        return resp
 def create_timestamp(seconds):
    return pytz.utc.localize(datetime.utcfromtimestamp(seconds)).strftime("%Y-%m-%dT%H:%M:%S%z")
 def main():
    if len(sys.argv) == 1:
        print('usage: %s metric_name count' % sys.argv[0], file=sys.stderr)
        return 1
    api_version = '2_0'
    endpoint = 'http://192.168.10.4:8080/v2.0'
    kwargs = {
              'token': '82510970543135'
    }
    mon_client = client.Client(api_version, endpoint, **kwargs)
    metric_start_time = time.time()
    metric_name = sys.argv[1]
    num_metrics_to_send = int(sys.argv[2])
    dimensions = {'Test_Send':'Number_1'} # Should be arg
    start_time = time.time()
    fields = {'name':metric_name}
    fields['dimensions'] = dimensions
    for val in range(0, num_metrics_to_send):
        fields['value'] = str(val)
        fields['timestamp'] = time.time()
        call_mon_api(mon_client.metrics.create, fields)
        # time.sleep(1)
    print("Took %d seconds to send %d measurements" %
          ((time.time() - start_time), num_metrics_to_send))
    metric_end_time = time.time()
    # API requires end time to be greater than start time
    if (metric_end_time - metric_start_time) < 1:
        metric_end_time = metric_start_time + 1
    start_timestamp = create_timestamp(metric_start_time)
    end_timestamp = create_timestamp(metric_end_time)
    fields = {'name':metric_name}
    fields['dimensions'] = dimensions
    fields['start_time'] = start_timestamp
    fields['end_time'] = end_timestamp
    for i in range(0, 30):
        result = call_mon_api(mon_client.metrics.list_measurements, fields)
        if len(result) > 0:
            measurements = result[0]['measurements']
            if len(measurements) >= num_metrics_to_send:
                break
            print('Found %d of %d metrics so far' % (len(measurements), num_metrics_to_send))
        time.sleep(1)
    if len(result) == 0:
        print('Did not receive any metrics in %d seconds' % i, file=sys.stderr)
        return 1
    if len(measurements) != num_metrics_to_send:
        print('Expected %d measurements but found %d' %
              (num_metrics_to_send, len(measurements)), file=sys.stderr)
        return 1
    print('Took %d seconds for metrics to fully arrive' % i)
    expected = num_metrics_to_send - 1
    result = 0
    for index in range(0, num_metrics_to_send):
        value = measurements[index]
        if value[2] != expected:
            print('Expected %d but found %d for %d' %
                  (expected, value[2], index), file=sys.stderr)
        expected = expected - 1
    return result
 if __name__ == "__main__":
    sys.exit(main())
--- a/tests/notification_cycleTest.py
+++ b/tests/notification_cycleTest.py
@ -0,0 +1,118 @@
 #!/usr/bin/env python
 #
 """Notification Engine Test
    Cycle the state of an Alarm the given number of times
 """
 from __future__ import print_function
 import sys
 import time
 import json
 import subprocess
 from monclient import client
 import monclient.exc as exc
 mon_client = None
 def call_mon_api(method, fields):
    try:
        resp = method(**fields)
    except exc.HTTPException as he:
        print(he.code)
        print(he.message)
        sys.exit(1)
    else:
        return resp
 def find_alarm_id():
    result = call_mon_api(mon_client.alarms.list, {})
    if len(result) == 0:
        print('No existing alarms, create one and rerun test', file=sys.stderr)
        return None
    return result[0]['id']
 def get_alarm_state(alarm_id):
    result = call_mon_api(mon_client.alarms.get, {'alarm_id':alarm_id})
    return result['state']
 def find_notifications(alarm_id):
    args = ['sudo', 'grep', alarm_id, '/var/mail/root']
    result = []
    try:
        stdout = subprocess.check_output(args)
    except subprocess.CalledProcessError as e:
        print(e, file=sys.stderr)
        sys.exit(1)
    for line in stdout.splitlines():
        result.append(json.loads(line)['state']);
    return result
 def main():
    if len(sys.argv) == 1:
        print('usage: %s count [alarm-id]' % sys.argv[0], file=sys.stderr)
        return 1
    api_version = '2_0'
    endpoint = 'http://192.168.10.4:8080/v2.0'
    kwargs = {
              'token': '82510970543135'
    }
    global mon_client
    mon_client = client.Client(api_version, endpoint, **kwargs)
    num_cycles = int(sys.argv[1])
    if len(sys.argv) > 2:
        alarm_id = sys.argv[2]
    else:
        alarm_id = find_alarm_id()
        if alarm_id == None:
            return 1
    start_time = time.time()
    initial_state = get_alarm_state(alarm_id)
    state = initial_state
    fields = {'alarm_id':alarm_id}
    existing_notifications = find_notifications(alarm_id)
    notifications_sent = num_cycles * 2
    for _ in range(0, notifications_sent):
        if state == 'OK':
            state = 'ALARM'
        else:
            state = 'OK'
        fields['state'] = state
        call_mon_api(mon_client.alarms.patch, fields)
        new_state = get_alarm_state(alarm_id)
        if new_state != state:
            print('Expected new state %s but found %s' %
              (state, new_state), file=sys.stderr)
            return 1
        # time.sleep(1)
    print("Took %d seconds to send %d alarm state changes" %
          ((time.time() - start_time), num_cycles * 2))
    for i in range(0, 30):
        all_notifications = find_notifications(alarm_id)
        if (len(all_notifications) - len(existing_notifications)) >= notifications_sent:
            break
        print('Found %d of %d expected notifications so far' % (len(all_notifications) - len(existing_notifications), notifications_sent))
        time.sleep(1)
    notifications_found = len(all_notifications) - len(existing_notifications)
    if notifications_found < notifications_sent:
        print('Expected %d notifications but found %d' %
              (notifications_sent, notifications_found), file=sys.stderr)
        return 1
    print('Took %d seconds for notifications to fully arrive' % i)
    result = 0
    return result
 if __name__ == "__main__":
    sys.exit(main())
--- a/tests/smoke.py
+++ b/tests/smoke.py
@ -1,16 +1,20 @@
 #!/usr/bin/env python
 #
 """smoke
-    Runs a smoke test of the jahmon installation on mini-mon by ensuring metrics are flowing and creating a new
+    Runs a smoke test of the jahmon installation on mini-mon by ensuring
-    notification, alarm and that the Threshold Engine changes the state of the alarm.
+    metrics are flowing and creating a new notification, alarm and that the
-    This requires the mon CLI and must be run on one of the mini-mon VMs. Tested running on kafka VM.
+    Threshold Engine changes the state of the alarm.  This requires the mon
-    Get it by following the instructions on https://wiki.hpcloud.net/display/iaas/Monitoring+CLI.
+    CLI and must be run on one of the mini-mon VMs. Tested running on kafka VM.
-    If you want to see the notification, you must install postfix on the kakfa VM, configure it to be local, and
+    Get it by following the instructions on
-    modify /etc/mon/notification.yaml to use localhost for the email server, then restart
+    https://wiki.hpcloud.net/display/iaas/Monitoring+CLI.
    If you want to see the notification, you must install postfix on the kakfa
    VM, configure it to be local, and modify /etc/mon/notification.yaml to use
    localhost for the email server, then restart
    TODO:
        1. Add check of notification history when that is implemented
-        2. Add check of mail getting to root when postfix is added mini-mon. This script will have to run on the kafka VM
+        2. Add check of mail getting to root when postfix is added mini-mon.
           This script will have to run on the kafka VM
 """
 from __future__ import print_function
@ -25,92 +29,120 @@ import time
 # export OS_NO_CLIENT_AUTH=1
 # export MON_API_URL=http://192.168.10.4:8080/v2.0/
-os.environ["OS_AUTH_TOKEN"] = "82510970543135"
+os.environ['OS_AUTH_TOKEN'] = '82510970543135'
-os.environ["OS_NO_CLIENT_AUTH"] = "1"
+os.environ['OS_NO_CLIENT_AUTH'] = '1'
-os.environ["MON_API_URL"] = "http://192.168.10.4:8080/v2.0/"
+os.environ['MON_API_URL'] = 'http://192.168.10.4:8080/v2.0/'
 def change_alarm_state(alarm_id, new_state):
    print('Changing Alarm state to %s' % new_state)
    result_json = run_mon_cli(['alarm-patch', '--state', new_state, alarm_id])
    if result_json['state'] != new_state:
        print('Alarm patch failed, expected state of %s but was %s' %
              (result_json['state'], new_state), file=sys.stderr)
        return 1
 def get_alarm_state(alarm_id):
-    stdout = run_mon_cli(["mon", "--json", "alarm-show", alarm_id])
+    result_json = run_mon_cli(['alarm-show', alarm_id])
-    response_json = json.loads(stdout)
+    return result_json['state']
    return response_json['state']
-def check_alarm_history(alarm_id):
+
 def check_alarm_history(alarm_id, states):
    transitions = len(states) - 1
    print('Checking Alarm History')
-    # Make take a little bit of time for Alarm history to flow all the way through
+    # May take some time for Alarm history to flow all the way through
-    for x in range(0, 10):
+    for _ in range(0, 10):
-        stdout = run_mon_cli(["mon", "--json", "alarm-history", alarm_id])
+        result_json = run_mon_cli(['alarm-history', alarm_id])
-        response_json = json.loads(stdout)
+        if len(result_json) >= transitions:
        if len(response_json) > 0:
            break
        time.sleep(4)
    result = True
-    if not check_expected(1, len(response_json), 'number of history entries'):
+    if not check_expected(transitions, len(result_json),
                          'number of history entries'):
        return False
-    alarm_json = response_json[0]
+    result_json.sort(key=lambda x: x['timestamp'])
-    if not check_expected('UNDETERMINED', alarm_json['old_state'], 'old_state'):
+    for i in range(0, transitions):
-        result = False
+        old_state = states[i]
-    if not check_expected('ALARM', alarm_json['new_state'], 'new_state'):
+        new_state = states[i+1]
-        result = False
+        alarm_json = result_json[i]
-    if not check_expected(alarm_id, alarm_json['alarm_id'], 'alarm_id'):
+        if not check_expected(old_state, alarm_json['old_state'], 'old_state'):
-        result = False
+            result = False
        if not check_expected(new_state, alarm_json['new_state'], 'new_state'):
            result = False
        if not check_expected(alarm_id, alarm_json['alarm_id'], 'alarm_id'):
            result = False
    if result:
-        print("Alarm History is OK")
+        print('Alarm History is OK')
    return result
 def check_expected(expected, actual, what):
    if (expected == actual):
        return True
-    print("Incorrect value for alarm history " + what + " expected '" + str(expected) + "' but was '" + str(actual) + "'", file=sys.stderr)
+    print("Incorrect value for alarm history %s expected '%s' but was '%s'" %
          (what, str(expected), str(actual)), file=sys.stderr)
    return False
 def create_alarm(name, expression, notification_method_id, description=None):
-    args = ["mon", "alarm-create"]
+    args = ['alarm-create']
    if (description):
-            args.append("--description")
+            args.append('--description')
            args.append(description)
-    args.append("--alarm-actions")
+    args.append('--alarm-actions')
    args.append(notification_method_id)
-    args.append("--ok-actions")
+    args.append('--ok-actions')
    args.append(notification_method_id)
-    args.append("--undetermined-actions")
+    args.append('--undetermined-actions')
    args.append(notification_method_id)
    args.append(name)
    args.append(expression)
-    print("Creating alarm")
+    print('Creating alarm')
-    stdout = run_mon_cli(args)
+    result_json = run_mon_cli(args)
    response_json = json.loads(stdout)
    # Parse out id
-    alarm_id = response_json['id']
+    alarm_id = result_json['id']
-    return alarm_id 
+    return alarm_id
 def get_metrics(name, dimensions):
-    print("Getting metrics for " + name + str(dimensions))
+    print('Getting metrics for %s ' % (name + str(dimensions)))
-    dimensions_arg = ""
+    dimensions_arg = ''
    for key, value in dimensions.iteritems():
-        if dimensions_arg != "":
+        if dimensions_arg != '':
-            dimensions_arg = dimensions_arg + ","
+            dimensions_arg = dimensions_arg + ','
-        dimensions_arg = dimensions_arg + key + "=" + value
+        dimensions_arg = dimensions_arg + key + '=' + value
-    stdout = run_mon_cli(["mon", "--json", "measurement-list", "--dimensions", dimensions_arg, name, "00"])
+    return run_mon_cli(['measurement-list', '--dimensions',
-    return json.loads(stdout)
+                          dimensions_arg, name, '00'])
-def run_mon_cli(args):
+
 def run_mon_cli(args, useJson=True):
    if useJson:
        args.insert(0, '--json')
    args.insert(0, 'mon')
    try:
        stdout = subprocess.check_output(args)
-        return stdout
+        if useJson:
            return json.loads(stdout)
        else:
            return stdout
    except subprocess.CalledProcessError as e:
        print(e, file=sys.stderr)
        sys.exit(1)
 def create_notification(notification_name, notification_email_addr):
-    print("Creating notification")
+    print('Creating notification')
-    stdout = run_mon_cli(["mon", "notification-create", notification_name, "EMAIL", notification_email_addr])
+    result_json = run_mon_cli(['notification-create', notification_name,
-    response_json = json.loads(stdout)
+                         'EMAIL', notification_email_addr])
    # Parse out id
-    notification_method_id = response_json['id']
+    notification_method_id = result_json['id']
-    return notification_method_id 
+    return notification_method_id
 def find_id_for_name(object_json, name):
    for obj in object_json:
@ -119,74 +151,112 @@ def find_id_for_name(object_json, name):
            return obj['id']
    return None
 def cleanup(notification_name, alarm_name):
    # Delete our alarm if it already exists
-    alarm_json = json.loads(run_mon_cli(["mon", "--json", "alarm-list"]))
+    alarm_json = run_mon_cli(['alarm-list'])
    alarm_id = find_id_for_name(alarm_json, alarm_name)
    if alarm_id:
-        run_mon_cli(["mon", "alarm-delete", alarm_id])
+        run_mon_cli(['alarm-delete', alarm_id], useJson=False)
    # Delete our notification if it already exists
-    notification_json = json.loads(run_mon_cli(["mon", "--json", "notification-list"]))
+    notification_json = run_mon_cli(['notification-list'])
    notification_id = find_id_for_name(notification_json, notification_name)
    if notification_id:
-        run_mon_cli(["mon", "notification-delete", notification_id])
+        run_mon_cli(['notification-delete', notification_id], useJson=False)
 def wait_for_alarm_state_change(alarm_id, old_state):
    # Wait for it to change state
    print('Waiting for alarm to change state from %s' % old_state)
    for x in range(0, 250):
        time.sleep(1)
        state = get_alarm_state(alarm_id)
        if state != old_state:
            print('Alarm state changed to %s in %d seconds' % (state, x))
            return state
    print('State never changed from %s in %d seconds' % (old_state, x),
          file=sys.stderr)
    sys.exit(1)
 def main():
-    notification_name = "Jahmon Smoke Test"
+    notification_name = 'Jahmon Smoke Test'
-    notification_email_addr = "root@kafka"
+    notification_email_addr = 'root@kafka'
-    alarm_name = "high cpu and load"
+    alarm_name = 'high cpu and load'
-    metric_name = "cpu_system_perc"
+    metric_name = 'cpu_system_perc'
-    metric_dimensions = {"hostname":"thresh"}
+    metric_dimensions = {'hostname': 'thresh'}
    cleanup(notification_name, alarm_name)
    # Query how many metrics there are for the Alarm
    metric_json = get_metrics(metric_name, metric_dimensions)
    if len(metric_json) == 0:
-        print("No measurements received for metric " + metric_name + str(metric_dimensions), file=sys.stderr)
+        print('No measurements received for metric %s ' %
-        sys.exit(1)
+              (metric_name + str(metric_dimensions)), file=sys.stderr)
        return 1
    start_time = time.time()
    initial_num_metrics = len(metric_json[0]['measurements'])
-    # Create Notification through CLI 
+    # Create Notification through CLI
-    notification_method_id = create_notification(notification_name, notification_email_addr)
+    notification_method_id = create_notification(notification_name,
                                                 notification_email_addr)
    # Create Alarm through CLI
-    alarm_id = create_alarm(alarm_name, "max(cpu_system_perc) > 1 and max(load_avg_1_min{hostname=thresh}) > 3", notification_method_id, "System CPU Utilization exceeds 1% and Load exeeds 3 per measurement period")
+    expression = 'max(cpu_system_perc) > 1 and ' + \
                 'max(load_avg_1_min{hostname=thresh}) > 1'
    description = 'System CPU Utilization exceeds 1% and ' + \
                  'Load exceeds 3 per measurement period'
    alarm_id = create_alarm(alarm_name, expression, notification_method_id,
                            description)
    state = get_alarm_state(alarm_id)
    # Ensure it is created in the right state
    if state != 'UNDETERMINED':
-        print("Wrong initial alarm state, expected UNDETERMINED but was " + state)
+        print('Wrong initial alarm state, expected UNDETERMINED but is %s' %
-        sys.exit(1)
+              state)
-    # Wait for it to 
+        return 1
-    print("Waiting for alarm to change state")
+
-    change_time = 0
+    state = wait_for_alarm_state_change(alarm_id, 'UNDETERMINED')
    for x in range(0, 250):
        time.sleep(1)
        state = get_alarm_state(alarm_id)
        if state != 'UNDETERMINED':
            print("Alarm state changed in " + str(x) + " seconds")
            change_time = x
            break
    if state != 'ALARM':
-        print("Wrong final state, expected ALARM but was " + state, file=sys.stderr)
+        print('Wrong final state, expected ALARM but was %s' % state,
-        sys.exit(1)
+              file=sys.stderr)
-    print("Final state of alarm was " + state)
+        return 1
-    # If the alarm changes state too fast, then there isn't time for the new metric to arrive.
+
-    # Unlikely, but it has been seen
+    state_changes = ['UNDETERMINED', 'ALARM']
    new_state = 'OK'
    state_changes.append(new_state)
    change_alarm_state(alarm_id, new_state)
    # There is a bug in the API which allows this to work. Soon that
    # will be fixed and this will fail
    if len(sys.argv) > 1:
        final_state = 'ALARM'
        state_changes.append(final_state)
        state = wait_for_alarm_state_change(alarm_id, new_state)
        if state != final_state:
            print('Wrong final state, expected %s but was %s' %
                  (final_state, state), file=sys.stderr)
            return 1
    # If the alarm changes state too fast, then there isn't time for the new
    # metric to arrive. Unlikely, but it has been seen
    change_time = time.time() - start_time
    if change_time < 30:
        time.sleep(30 - change_time)
        change_time = 30
    metric_json = get_metrics(metric_name, metric_dimensions)
    final_num_metrics = len(metric_json[0]['measurements'])
    if final_num_metrics <= initial_num_metrics:
-        print("No new metrics received", file=sys.stderr)
+        print('No new metrics received', file=sys.stderr)
-        sys.exit(1)
+        return 1
-    print("Received " + str(final_num_metrics - initial_num_metrics) + " metrics in " + str(change_time) + " seconds")
+    print('Received %d metrics in %d seconds' %
-    if not check_alarm_history(alarm_id):
+          ((final_num_metrics - initial_num_metrics),  change_time))
-        sys.exit(1)
+    if not check_alarm_history(alarm_id, state_changes):
        return 1
    return 0
-    
+
-    
+
 if __name__ == "__main__":
    sys.exit(main())