Add new graph type to show run_time variance by metadata

This commit adds a new graph to subunit2sql-graph. run_time_meta, which is used to display a scatter matrix of all the aggregate run times for all runs grouped by a metadata key value. Note depending on the DB size the plot generation can be fairly computationally and memory intensive. Change-Id: I369717c84df0217a9a0cf17b7f9e975e580ce5a7
2015-08-07 13:33:04 -04:00 · 2015-08-07 13:33:04 -04:00 · 80fe27bb97
commit 80fe27bb97
parent c37eb3d97d
4 changed files with 104 additions and 1 deletions
--- a/subunit2sql/analysis/graph.py
+++ b/subunit2sql/analysis/graph.py
@ -23,6 +23,7 @@ import subunit2sql.analysis.dailycount
 import subunit2sql.analysis.failures
 import subunit2sql.analysis.run_failure_rate
 import subunit2sql.analysis.run_time
 import subunit2sql.analysis.run_time_meta
 from subunit2sql import shell
 CONF = cfg.CONF
@ -56,7 +57,7 @@ def add_command_parsers(subparsers):
    graph_commands = {}
    # Put commands from in-tree commands on init list
    for command in ['failures', 'run_time', 'agg_count', 'dailycount',
-                    'run_failure_rate']:
+                    'run_failure_rate', 'run_time_meta']:
        graph_commands[command] = getattr(subunit2sql.analysis, command)
    # Load any installed out of tree commands on the init list
--- a/subunit2sql/analysis/run_time_meta.py
+++ b/subunit2sql/analysis/run_time_meta.py
@ -0,0 +1,60 @@
 # Copyright 2015 Hewlett-Packard Development Company, L.P.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"); you may
 # not use this file except in compliance with the License. You may obtain
 # a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 # License for the specific language governing permissions and limitations
 # under the License.
 import datetime
 import matplotlib
 import matplotlib.pyplot as plt
 from oslo_config import cfg
 import pandas as pd
 from subunit2sql.db import api
 CONF = cfg.CONF
 matplotlib.style.use('ggplot')
 def set_cli_opts(parser):
    parser.add_argument('metadata_key',
                        help="The run_metadata key to group the runs by")
 def generate_series():
    session = api.get_session()
    if CONF.start_date:
        start_date = datetime.datetime.strptime(CONF.start_date, '%Y-%m-%d')
    else:
        start_date = None
    if CONF.stop_date:
        stop_date = datetime.datetime.strptime(CONF.stop_date, '%Y-%m-%d')
    else:
        stop_date = None
    run_times = api.get_run_times_grouped_by_run_metadata_key(
        CONF.command.metadata_key, start_date=start_date,
        stop_date=stop_date, session=session)
    df = pd.DataFrame(dict(
        [(k, pd.Series(v)) for k, v in run_times.iteritems()]))
    if not CONF.title:
        title = "Run aggregate run time grouped by metadata"
    else:
        title = CONF.title
    # NOTE(mtreinish): Decrease label font size for the worst case where we
    # have tons of groups
    matplotlib.rcParams['xtick.labelsize'] = '3'
    plt.figure()
    plt.title(title)
    df.plot(kind='box', rot=90)
    plt.ylabel('Time (sec.)')
    plt.tight_layout()
    plt.savefig(CONF.output, dpi=900)
--- a/subunit2sql/db/api.py
+++ b/subunit2sql/db/api.py
@ -995,6 +995,39 @@ def get_ids_for_all_tests(session=None):
    return db_utils.model_query(models.Test, session).value(models.Test.id)
 def get_run_times_grouped_by_run_metadata_key(key, start_date=None,
                                              stop_date=None, session=None):
    """Return the aggregate run times for all runs grouped by a metadata key
    :param key: The run_metadata key to use for grouping runs
    :param session: Optional session object if one isn't provided a new session
                        will be acquired for the duration of this operation
    :return: A dictionary where keys are the value of the provided metadata key
             and the values are a list of run_times for successful runs with
             that metadata value
    :rtype: dict
    """
    session = session or get_session()
    run_times_query = db_utils.model_query(models.Run, session).filter(
        models.Run.fails == 0, models.Run.passes > 0).join(
            models.RunMetadata,
            models.Run.id == models.RunMetadata.run_id).filter(
                models.RunMetadata.key == key)
    run_times_query = _filter_runs_by_date(run_times_query, start_date,
                                           stop_date)
    run_times = run_times_query.values(models.Run.run_at, models.Run.run_time,
                                       models.RunMetadata.value)
    result = {}
    for run in run_times:
        if result.get(run[2]):
            result[run[2]].append(run[1])
        else:
            result[run[2]] = [run[1]]
    return result
 def get_test_counts_in_date_range(test_id, start_date=None, stop_date=None,
                                  session=None):
    """Return the number of successes, failures, and skips for a single test.
--- a/subunit2sql/tests/db/test_api.py
+++ b/subunit2sql/tests/db/test_api.py
@ -394,6 +394,15 @@ class TestDatabaseAPI(base.TestCase):
                             result.keys())])
        self.assertEqual(5, result[list(result.keys())[0]])
    def test_get_run_times_grouped_by_run_metadata_key(self):
        run_a = api.create_run(run_time=2.2, passes=2)
        run_b = api.create_run(run_time=3.5, passes=3)
        api.add_run_metadata({'key': 'value_a'}, run_a.id)
        api.add_run_metadata({'key': 'value_b'}, run_b.id)
        res = api.get_run_times_grouped_by_run_metadata_key('key')
        expected_res = {'value_a': [2.2], 'value_b': [3.5]}
        self.assertEqual(expected_res, res)
    def test_get_test_run_dict_by_run_meta_key_value(self):
        timestamp_a = datetime.datetime.utcnow()
        timestamp_b = timestamp_a + datetime.timedelta(minutes=2)