add support for classification rate

this adds a bit to the end of the report for the classification rate that we currently have, as well as the jobs with the most still unclassified errors. Change-Id: Ia1426bef2d788f8b2c14e5a1402a5ac01a3561f7
2013-12-17 10:48:49 -05:00 · 2013-12-17 10:48:49 -05:00 · 9501108af4
commit 9501108af4
parent 37a9e6fa82
1 changed files with 77 additions and 3 deletions
--- a/elastic_recheck/cmd/check_success.py
+++ b/elastic_recheck/cmd/check_success.py
@ -15,7 +15,9 @@
 #    under the License.

 import argparse
+import operator
 import os
+import re

 from launchpadlib import launchpad

@ -33,25 +35,95 @@ def get_options():
    parser.add_argument('--lp', '-l', help="Query Launchpad",
                        type=bool,
                        default=False)
+    parser.add_argument('--rate', '-r', help="Classification rate",
+                        type=bool,
+                        default=True)
    return parser.parse_args()


+def all_fails(classifier):
+    """Find all the the fails in the integrated gate.
+
+    This attempts to find all the build jobs in the integrated gate
+    so we can figure out how good we are doing on total classification.
+    """
+    all_fails = {}
+    query = ('filename:"console.html" '
+             'AND message:"Finished: FAILURE" '
+             'AND build_queue:"gate"')
+    results = classifier.hits_by_query(query, size=30000)
+    facets = er_results.FacetSet()
+    facets.detect_facets(results, ["build_uuid"])
+    for build in facets:
+        for result in facets[build]:
+            # not perfect, but basically an attempt to show the integrated
+            # gate. Would be nice if there was a zuul attr for this in es.
+            if re.search("(^openstack/|devstack|grenade)", result.project):
+                all_fails["%s.%s" % (build, result.build_name)] = False
+    return all_fails
+
+
+def classifying_rate(classifier, data):
+    """Builds and prints the classification rate.
+
+    It's important to know how good a job we are doing, so this
+    tool runs through all the failures we've got and builds the
+    classification rate. For every failure in the gate queue did
+    we find a match for it.
+    """
+    fails = all_fails(classifier)
+    for bugnum in data:
+        bug = data[bugnum]
+        for job in bug['failed_jobs']:
+            fails[job] = True
+
+    total = len(fails.keys())
+    bad_jobs = {}
+    count = 0
+    for f in fails:
+        if fails[f] is True:
+            count += 1
+        else:
+            build, job = f.split('.', 1)
+            if job in bad_jobs:
+                bad_jobs[job] += 1
+            else:
+                bad_jobs[job] = 1
+
+    print("Classification percentage: %2.2f%%" %
+          ((float(count) / float(total)) * 100.0))
+    sort = sorted(
+        bad_jobs.iteritems(),
+        key=operator.itemgetter(1),
+        reverse=True)
+    print("Job fails with most unclassified errors")
+    for s in sort:
+        print "  %3s : %s" % (s[1], s[0])
+
+
 def collect_metrics(classifier):
    data = {}
    for q in classifier.queries:
        results = classifier.hits_by_query(q['query'], size=30000)
        facets = er_results.FacetSet()
-        facets.detect_facets(results, ["build_status", "build_uuid"])
+        facets.detect_facets(
+            results,
+            ["build_status", "build_uuid"])

        num_fails = 0
+        failed_jobs = []
        if "FAILURE" in facets:
            num_fails = len(facets["FAILURE"])
+            for build in facets["FAILURE"]:
+                for result in facets["FAILURE"][build]:
+                    failed_jobs.append("%s.%s" % (build, result.build_name))

        data[q['bug']] = {
            'fails': num_fails,
            'hits': facets,
-            'query': q['query']
-            }
+            'query': q['query'],
+            'failed_jobs': failed_jobs
+        }

    return data

@ -90,6 +162,8 @@ def main():
    classifier = er.Classifier(opts.dir)
    data = collect_metrics(classifier)
    print_metrics(data, with_lp=opts.lp)
+    if opts.rate:
+        classifying_rate(classifier, data)


 if __name__ == "__main__":