Add new graph type to show run_time variance by metadata

This commit adds a new graph to subunit2sql-graph. run_time_meta, which is used to display a scatter matrix of all the aggregate run times for all runs grouped by a metadata key value. Note depending on the DB size the plot generation can be fairly computationally and memory intensive. Change-Id: I369717c84df0217a9a0cf17b7f9e975e580ce5a7
2015-08-07 13:33:04 -04:00 · 2015-08-07 13:33:04 -04:00 · 80fe27bb97
commit 80fe27bb97
parent c37eb3d97d
4 changed files with 104 additions and 1 deletions
--- a/subunit2sql/analysis/graph.py
+++ b/subunit2sql/analysis/graph.py
@ -23,6 +23,7 @@ import subunit2sql.analysis.dailycount
 import subunit2sql.analysis.failures
 import subunit2sql.analysis.run_failure_rate
 import subunit2sql.analysis.run_time
+import subunit2sql.analysis.run_time_meta
 from subunit2sql import shell

 CONF = cfg.CONF
@ -56,7 +57,7 @@ def add_command_parsers(subparsers):
    graph_commands = {}
    # Put commands from in-tree commands on init list
    for command in ['failures', 'run_time', 'agg_count', 'dailycount',
-                    'run_failure_rate']:
+                    'run_failure_rate', 'run_time_meta']:
        graph_commands[command] = getattr(subunit2sql.analysis, command)

    # Load any installed out of tree commands on the init list
--- a/subunit2sql/analysis/run_time_meta.py
+++ b/subunit2sql/analysis/run_time_meta.py
@ -0,0 +1,60 @@
+# Copyright 2015 Hewlett-Packard Development Company, L.P.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+import datetime
+
+import matplotlib
+import matplotlib.pyplot as plt
+from oslo_config import cfg
+import pandas as pd
+
+from subunit2sql.db import api
+
+CONF = cfg.CONF
+matplotlib.style.use('ggplot')
+
+
+def set_cli_opts(parser):
+    parser.add_argument('metadata_key',
+                        help="The run_metadata key to group the runs by")
+
+
+def generate_series():
+    session = api.get_session()
+    if CONF.start_date:
+        start_date = datetime.datetime.strptime(CONF.start_date, '%Y-%m-%d')
+    else:
+        start_date = None
+    if CONF.stop_date:
+        stop_date = datetime.datetime.strptime(CONF.stop_date, '%Y-%m-%d')
+    else:
+        stop_date = None
+    run_times = api.get_run_times_grouped_by_run_metadata_key(
+        CONF.command.metadata_key, start_date=start_date,
+        stop_date=stop_date, session=session)
+    df = pd.DataFrame(dict(
+        [(k, pd.Series(v)) for k, v in run_times.iteritems()]))
+    if not CONF.title:
+        title = "Run aggregate run time grouped by metadata"
+    else:
+        title = CONF.title
+    # NOTE(mtreinish): Decrease label font size for the worst case where we
+    # have tons of groups
+    matplotlib.rcParams['xtick.labelsize'] = '3'
+    plt.figure()
+    plt.title(title)
+    df.plot(kind='box', rot=90)
+    plt.ylabel('Time (sec.)')
+    plt.tight_layout()
+    plt.savefig(CONF.output, dpi=900)
--- a/subunit2sql/db/api.py
+++ b/subunit2sql/db/api.py
@ -995,6 +995,39 @@ def get_ids_for_all_tests(session=None):
    return db_utils.model_query(models.Test, session).value(models.Test.id)


+def get_run_times_grouped_by_run_metadata_key(key, start_date=None,
+                                              stop_date=None, session=None):
+    """Return the aggregate run times for all runs grouped by a metadata key
+
+    :param key: The run_metadata key to use for grouping runs
+    :param session: Optional session object if one isn't provided a new session
+                        will be acquired for the duration of this operation
+
+    :return: A dictionary where keys are the value of the provided metadata key
+             and the values are a list of run_times for successful runs with
+             that metadata value
+    :rtype: dict
+    """
+    session = session or get_session()
+    run_times_query = db_utils.model_query(models.Run, session).filter(
+        models.Run.fails == 0, models.Run.passes > 0).join(
+            models.RunMetadata,
+            models.Run.id == models.RunMetadata.run_id).filter(
+                models.RunMetadata.key == key)
+
+    run_times_query = _filter_runs_by_date(run_times_query, start_date,
+                                           stop_date)
+    run_times = run_times_query.values(models.Run.run_at, models.Run.run_time,
+                                       models.RunMetadata.value)
+    result = {}
+    for run in run_times:
+        if result.get(run[2]):
+            result[run[2]].append(run[1])
+        else:
+            result[run[2]] = [run[1]]
+    return result
+
+
 def get_test_counts_in_date_range(test_id, start_date=None, stop_date=None,
                                  session=None):
    """Return the number of successes, failures, and skips for a single test.
--- a/subunit2sql/tests/db/test_api.py
+++ b/subunit2sql/tests/db/test_api.py
@ -394,6 +394,15 @@ class TestDatabaseAPI(base.TestCase):
                             result.keys())])
        self.assertEqual(5, result[list(result.keys())[0]])

+    def test_get_run_times_grouped_by_run_metadata_key(self):
+        run_a = api.create_run(run_time=2.2, passes=2)
+        run_b = api.create_run(run_time=3.5, passes=3)
+        api.add_run_metadata({'key': 'value_a'}, run_a.id)
+        api.add_run_metadata({'key': 'value_b'}, run_b.id)
+        res = api.get_run_times_grouped_by_run_metadata_key('key')
+        expected_res = {'value_a': [2.2], 'value_b': [3.5]}
+        self.assertEqual(expected_res, res)
+
    def test_get_test_run_dict_by_run_meta_key_value(self):
        timestamp_a = datetime.datetime.utcnow()
        timestamp_b = timestamp_a + datetime.timedelta(minutes=2)