Add new graph type to show run_time variance by metadata

This commit adds a new graph to subunit2sql-graph. run_time_meta, which
is used to display a scatter matrix of all the aggregate run times for
all runs grouped by a metadata key value. Note depending on the DB size
the plot generation can be fairly computationally and memory intensive.

Change-Id: I369717c84df0217a9a0cf17b7f9e975e580ce5a7
This commit is contained in:
Matthew Treinish 2015-08-07 13:33:04 -04:00
parent c37eb3d97d
commit 80fe27bb97
No known key found for this signature in database
GPG Key ID: FD12A0F214C9E177
4 changed files with 104 additions and 1 deletions

View File

@ -23,6 +23,7 @@ import subunit2sql.analysis.dailycount
import subunit2sql.analysis.failures
import subunit2sql.analysis.run_failure_rate
import subunit2sql.analysis.run_time
import subunit2sql.analysis.run_time_meta
from subunit2sql import shell
CONF = cfg.CONF
@ -56,7 +57,7 @@ def add_command_parsers(subparsers):
graph_commands = {}
# Put commands from in-tree commands on init list
for command in ['failures', 'run_time', 'agg_count', 'dailycount',
'run_failure_rate']:
'run_failure_rate', 'run_time_meta']:
graph_commands[command] = getattr(subunit2sql.analysis, command)
# Load any installed out of tree commands on the init list

View File

@ -0,0 +1,60 @@
# Copyright 2015 Hewlett-Packard Development Company, L.P.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import datetime
import matplotlib
import matplotlib.pyplot as plt
from oslo_config import cfg
import pandas as pd
from subunit2sql.db import api
CONF = cfg.CONF
matplotlib.style.use('ggplot')
def set_cli_opts(parser):
parser.add_argument('metadata_key',
help="The run_metadata key to group the runs by")
def generate_series():
session = api.get_session()
if CONF.start_date:
start_date = datetime.datetime.strptime(CONF.start_date, '%Y-%m-%d')
else:
start_date = None
if CONF.stop_date:
stop_date = datetime.datetime.strptime(CONF.stop_date, '%Y-%m-%d')
else:
stop_date = None
run_times = api.get_run_times_grouped_by_run_metadata_key(
CONF.command.metadata_key, start_date=start_date,
stop_date=stop_date, session=session)
df = pd.DataFrame(dict(
[(k, pd.Series(v)) for k, v in run_times.iteritems()]))
if not CONF.title:
title = "Run aggregate run time grouped by metadata"
else:
title = CONF.title
# NOTE(mtreinish): Decrease label font size for the worst case where we
# have tons of groups
matplotlib.rcParams['xtick.labelsize'] = '3'
plt.figure()
plt.title(title)
df.plot(kind='box', rot=90)
plt.ylabel('Time (sec.)')
plt.tight_layout()
plt.savefig(CONF.output, dpi=900)

View File

@ -995,6 +995,39 @@ def get_ids_for_all_tests(session=None):
return db_utils.model_query(models.Test, session).value(models.Test.id)
def get_run_times_grouped_by_run_metadata_key(key, start_date=None,
stop_date=None, session=None):
"""Return the aggregate run times for all runs grouped by a metadata key
:param key: The run_metadata key to use for grouping runs
:param session: Optional session object if one isn't provided a new session
will be acquired for the duration of this operation
:return: A dictionary where keys are the value of the provided metadata key
and the values are a list of run_times for successful runs with
that metadata value
:rtype: dict
"""
session = session or get_session()
run_times_query = db_utils.model_query(models.Run, session).filter(
models.Run.fails == 0, models.Run.passes > 0).join(
models.RunMetadata,
models.Run.id == models.RunMetadata.run_id).filter(
models.RunMetadata.key == key)
run_times_query = _filter_runs_by_date(run_times_query, start_date,
stop_date)
run_times = run_times_query.values(models.Run.run_at, models.Run.run_time,
models.RunMetadata.value)
result = {}
for run in run_times:
if result.get(run[2]):
result[run[2]].append(run[1])
else:
result[run[2]] = [run[1]]
return result
def get_test_counts_in_date_range(test_id, start_date=None, stop_date=None,
session=None):
"""Return the number of successes, failures, and skips for a single test.

View File

@ -394,6 +394,15 @@ class TestDatabaseAPI(base.TestCase):
result.keys())])
self.assertEqual(5, result[list(result.keys())[0]])
def test_get_run_times_grouped_by_run_metadata_key(self):
run_a = api.create_run(run_time=2.2, passes=2)
run_b = api.create_run(run_time=3.5, passes=3)
api.add_run_metadata({'key': 'value_a'}, run_a.id)
api.add_run_metadata({'key': 'value_b'}, run_b.id)
res = api.get_run_times_grouped_by_run_metadata_key('key')
expected_res = {'value_a': [2.2], 'value_b': [3.5]}
self.assertEqual(expected_res, res)
def test_get_test_run_dict_by_run_meta_key_value(self):
timestamp_a = datetime.datetime.utcnow()
timestamp_b = timestamp_a + datetime.timedelta(minutes=2)