add support for classification rate

this adds a bit to the end of the report for the classification
rate that we currently have, as well as the jobs with the most
still unclassified errors.

Change-Id: Ia1426bef2d788f8b2c14e5a1402a5ac01a3561f7
This commit is contained in:
Sean Dague 2013-12-17 10:48:49 -05:00
parent 37a9e6fa82
commit 9501108af4

View File

@ -15,7 +15,9 @@
# under the License.
import argparse
import operator
import os
import re
from launchpadlib import launchpad
@ -33,25 +35,95 @@ def get_options():
parser.add_argument('--lp', '-l', help="Query Launchpad",
type=bool,
default=False)
parser.add_argument('--rate', '-r', help="Classification rate",
type=bool,
default=True)
return parser.parse_args()
def all_fails(classifier):
"""Find all the the fails in the integrated gate.
This attempts to find all the build jobs in the integrated gate
so we can figure out how good we are doing on total classification.
"""
all_fails = {}
query = ('filename:"console.html" '
'AND message:"Finished: FAILURE" '
'AND build_queue:"gate"')
results = classifier.hits_by_query(query, size=30000)
facets = er_results.FacetSet()
facets.detect_facets(results, ["build_uuid"])
for build in facets:
for result in facets[build]:
# not perfect, but basically an attempt to show the integrated
# gate. Would be nice if there was a zuul attr for this in es.
if re.search("(^openstack/|devstack|grenade)", result.project):
all_fails["%s.%s" % (build, result.build_name)] = False
return all_fails
def classifying_rate(classifier, data):
"""Builds and prints the classification rate.
It's important to know how good a job we are doing, so this
tool runs through all the failures we've got and builds the
classification rate. For every failure in the gate queue did
we find a match for it.
"""
fails = all_fails(classifier)
for bugnum in data:
bug = data[bugnum]
for job in bug['failed_jobs']:
fails[job] = True
total = len(fails.keys())
bad_jobs = {}
count = 0
for f in fails:
if fails[f] is True:
count += 1
else:
build, job = f.split('.', 1)
if job in bad_jobs:
bad_jobs[job] += 1
else:
bad_jobs[job] = 1
print("Classification percentage: %2.2f%%" %
((float(count) / float(total)) * 100.0))
sort = sorted(
bad_jobs.iteritems(),
key=operator.itemgetter(1),
reverse=True)
print("Job fails with most unclassified errors")
for s in sort:
print " %3s : %s" % (s[1], s[0])
def collect_metrics(classifier):
data = {}
for q in classifier.queries:
results = classifier.hits_by_query(q['query'], size=30000)
facets = er_results.FacetSet()
facets.detect_facets(results, ["build_status", "build_uuid"])
facets.detect_facets(
results,
["build_status", "build_uuid"])
num_fails = 0
failed_jobs = []
if "FAILURE" in facets:
num_fails = len(facets["FAILURE"])
for build in facets["FAILURE"]:
for result in facets["FAILURE"][build]:
failed_jobs.append("%s.%s" % (build, result.build_name))
data[q['bug']] = {
'fails': num_fails,
'hits': facets,
'query': q['query']
}
'query': q['query'],
'failed_jobs': failed_jobs
}
return data
@ -90,6 +162,8 @@ def main():
classifier = er.Classifier(opts.dir)
data = collect_metrics(classifier)
print_metrics(data, with_lp=opts.lp)
if opts.rate:
classifying_rate(classifier, data)
if __name__ == "__main__":