From 9501108af46b65e246d4d9b84edfdd7bc6a825b6 Mon Sep 17 00:00:00 2001 From: Sean Dague Date: Tue, 17 Dec 2013 10:48:49 -0500 Subject: [PATCH] add support for classification rate this adds a bit to the end of the report for the classification rate that we currently have, as well as the jobs with the most still unclassified errors. Change-Id: Ia1426bef2d788f8b2c14e5a1402a5ac01a3561f7 --- elastic_recheck/cmd/check_success.py | 80 ++++++++++++++++++++++++++-- 1 file changed, 77 insertions(+), 3 deletions(-) diff --git a/elastic_recheck/cmd/check_success.py b/elastic_recheck/cmd/check_success.py index 8610d207..b4e6bb0c 100755 --- a/elastic_recheck/cmd/check_success.py +++ b/elastic_recheck/cmd/check_success.py @@ -15,7 +15,9 @@ # under the License. import argparse +import operator import os +import re from launchpadlib import launchpad @@ -33,25 +35,95 @@ def get_options(): parser.add_argument('--lp', '-l', help="Query Launchpad", type=bool, default=False) + parser.add_argument('--rate', '-r', help="Classification rate", + type=bool, + default=True) return parser.parse_args() +def all_fails(classifier): + """Find all the the fails in the integrated gate. + + This attempts to find all the build jobs in the integrated gate + so we can figure out how good we are doing on total classification. + """ + all_fails = {} + query = ('filename:"console.html" ' + 'AND message:"Finished: FAILURE" ' + 'AND build_queue:"gate"') + results = classifier.hits_by_query(query, size=30000) + facets = er_results.FacetSet() + facets.detect_facets(results, ["build_uuid"]) + for build in facets: + for result in facets[build]: + # not perfect, but basically an attempt to show the integrated + # gate. Would be nice if there was a zuul attr for this in es. + if re.search("(^openstack/|devstack|grenade)", result.project): + all_fails["%s.%s" % (build, result.build_name)] = False + return all_fails + + +def classifying_rate(classifier, data): + """Builds and prints the classification rate. + + It's important to know how good a job we are doing, so this + tool runs through all the failures we've got and builds the + classification rate. For every failure in the gate queue did + we find a match for it. + """ + fails = all_fails(classifier) + for bugnum in data: + bug = data[bugnum] + for job in bug['failed_jobs']: + fails[job] = True + + total = len(fails.keys()) + bad_jobs = {} + count = 0 + for f in fails: + if fails[f] is True: + count += 1 + else: + build, job = f.split('.', 1) + if job in bad_jobs: + bad_jobs[job] += 1 + else: + bad_jobs[job] = 1 + + print("Classification percentage: %2.2f%%" % + ((float(count) / float(total)) * 100.0)) + sort = sorted( + bad_jobs.iteritems(), + key=operator.itemgetter(1), + reverse=True) + print("Job fails with most unclassified errors") + for s in sort: + print " %3s : %s" % (s[1], s[0]) + + def collect_metrics(classifier): data = {} for q in classifier.queries: results = classifier.hits_by_query(q['query'], size=30000) facets = er_results.FacetSet() - facets.detect_facets(results, ["build_status", "build_uuid"]) + facets.detect_facets( + results, + ["build_status", "build_uuid"]) num_fails = 0 + failed_jobs = [] if "FAILURE" in facets: num_fails = len(facets["FAILURE"]) + for build in facets["FAILURE"]: + for result in facets["FAILURE"][build]: + failed_jobs.append("%s.%s" % (build, result.build_name)) data[q['bug']] = { 'fails': num_fails, 'hits': facets, - 'query': q['query'] - } + 'query': q['query'], + 'failed_jobs': failed_jobs + } return data @@ -90,6 +162,8 @@ def main(): classifier = er.Classifier(opts.dir) data = collect_metrics(classifier) print_metrics(data, with_lp=opts.lp) + if opts.rate: + classifying_rate(classifier, data) if __name__ == "__main__":