Merge "Add script to tabulate scores"

2015-08-20 04:35:55 +00:00 · 2015-08-20 04:35:55 +00:00 · 8e22286c26
commit 8e22286c26
parent c8138d4fa5 898974c114
1 changed files with 193 additions and 0 deletions
--- a/working_materials/tabulate_scores.py
+++ b/working_materials/tabulate_scores.py
@ -0,0 +1,193 @@
 #!/usr/bin/env python
 #
 # Copyright 2015 VMware, Inc.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"); you may
 # not use this file except in compliance with the License. You may obtain
 # a copy of the License at
 #
 #      http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 # License for the specific language governing permissions and limitations
 # under the License.
 import re
 import json
 import argparse
 import textwrap
 # A custom class to preserve formatting in the help output
 # description and also show default arguments.
 class CustomFormatter(argparse.ArgumentDefaultsHelpFormatter,
                      argparse.RawDescriptionHelpFormatter):
    pass
 # Set up command line arguments.
 parser = argparse.ArgumentParser(
    description=textwrap.dedent("""\
    Tabulate capability scores and write them to files.
    This utility script tabulates scores from a DefCore scoring
    worksheet based on the weights from a given Guideline JSON file.
    It writes the scores in three formats:
    1.)  A text file that is identical to the source scoring
         worksheet, but with an added column for the total score
         for each capability.
    2.)  A CSV file with each capability's individual Criteria scores
         as well as the total.  The first line of the file will be
         the plain-English Criteria names as parsed from the Guideline
         json file.
    3.)  A simple "capability-name: total-score" output to stdout.
         This is primarily useful for getting quick feedback on
         the effect of changing scores.
    """),
    add_help=True,
    formatter_class=CustomFormatter)
 parser.add_argument(
    '-j', '--json-file',
    default='../2015.next.json',
    dest='json_file_name',
    help='Path to the Guideline JSON file to read weights and names from.')
 parser.add_argument(
    '-s', '--score-file',
    default='scoring.txt',
    dest='score_file_name',
    help='File to read capabilities scores from.')
 parser.add_argument(
    '-t', '--text-outfile',
    dest='text_outfile_name',
    help='File to write scores in text format to instead of the input file.')
 parser.add_argument(
    '-c', '--csv-outfile',
    default='tabulated_scores.csv',
    dest='csv_outfile_name',
    help='File to write scores in CSV format to.')
 args = parser.parse_args()
 args.text_outfile_name = args.text_outfile_name or args.score_file_name
 # Folks have also requested a CSV output that can be imported to
 # a spreadsheet program.  Get that ready too.
 csv_outfile = open(args.csv_outfile_name, 'w')
 # We need to know what the weights assigned to each Criteria are
 # in order to do scoring.  Read them from a Guideline JSON file.
 with open(args.json_file_name) as json_file:
    json_data = json.loads(json_file.read())
    criteria = json_data['criteria']
    # Non-Admin doesn't appear in the scores because it's not
    # an official criteria...rather it's something we use in scoring
    # to remind ourselves when a non-admin API is being studied.
    criteria['Non-Admin'] = {'name': 'Non-Admin'}
 json_file.close()
 # Now we're ready to parse scores from the scoring file.
 # We'll buffer these in memory so we can write back to
 # the same file we read them from if we're so inclined.
 buffer = []
 with open(args.score_file_name) as filehandle:
    # The line format we're expecting here is:
    #
    # capability-name:  [1,1,1] [1,1,1] [1,1,1] [1,1,1] [1] [100]
    #
    # Where the values inside the brackets can be zero, one, or a
    # question mark.  The final column is one that will be
    # overwritten by this script and represents the total score
    # for the capability.  If present already, it's ignored.
    pattern = re.compile('((\S+):\s+((\[\S,\S,\S\] ){4}\[\S\]))')
    # The scores in the tuples have the following meanings, in
    # the order they appear in the scoring files.
    scorenames = ('deployed', 'tools', 'clients',
                  'future', 'complete', 'stable',
                  'discover', 'doc', 'sticky',
                  'foundation', 'atomic', 'proximity',
                  'Non-Admin')
    # Write column headers to the CSV file using full names.
    csv_outfile.write("Capability,")
    for scorename in scorenames:
        csv_outfile.write("%s," % (criteria[scorename]['name']))
    csv_outfile.write("Total\n")
    # Parse each line in the file and find scores.
    for line in filehandle:
        # Is this a scoring line? If so grab raw scores.
        raw = pattern.match(line)
        if raw is None:
            # Not a line with a score, so just write it as-is.
            buffer.append(line)
        else:
            # Grab the capability name
            cap_name = raw.group(2)
            # Write it to the CSV file
            csv_outfile.write("%s," % cap_name)
            # Grock the scores into a dict keyed by capability name.
            scores = re.sub('[\[\]\, ]', '', raw.group(3))
            score_hash = dict(zip(scorenames, list(scores)))
            # Now tabluate scores for this capability.  Scores will
            # be negative if scoring isn't yet complete (e.g. it
            # has '?' or another character that isn't 0 or 1 as
            # it's score for any criteria.
            total = 0
            # We also need to denote whether the scoring is complete.
            # If we find capability scores that are not 0 or 1, we'll
            # set this flag so we remember to negate the final score.
            complete = 1
            # If an API is non-admin, it's vetoed and set to 0.
            # Only tabulate scores for non-admin API's.
            if int(score_hash['Non-Admin']) == 1:
                for scorename in scorenames:
                    csv_outfile.write("%s," % score_hash[scorename])
                    # If the scorename is non-admin, skip it as this
                    # doesn't affect the scoring total; it merely
                    # indicates whether the API in question is admin-only
                    # and therefore not scorable.
                    if scorename == 'Non-Admin':
                        continue
                    # If the score is a digit, add it in to the total.
                    if re.match('\d', score_hash[scorename]):
                        total += (int(score_hash[scorename]) *
                                  int(criteria[scorename]['weight']))
                    # If the score isn't a digit, we're not done scoring
                    # this criteria yet.  Denote that by making the
                    # final score negative.
                    else:
                        complete = -1
            # The total now becomes negative if scoring
            # wasn't complete.
            total = total * complete
            # Now write the total score to a couple of places.
            # Put it in the tabulated file.
            buffer.append("%s [%d]\n" % (raw.group(1), total))
            # Put in in the CSV for easy spreadsheet import.
            csv_outfile.write("%s\n" % (total))
            # And stdout is useful for folks who are experimenting with
            # the effect of changing a score.
            print "%s: %d" % (cap_name, total)
 # Now we can write the text output file.
 with open(args.text_outfile_name, 'w') as outfile:
    for line in buffer:
        outfile.write(line)
 outfile.close()
 print "\n\nText output has been written to %s" % args.text_outfile_name
 print "CSV output has been written to %s" % args.csv_outfile_name