Add script to tabulate scores

During last week's DefCore sprint in Austin, we decided to move to a plaintext file format for our scoring working document. Some participants expressed a desire to have a script to automatically tabulate total scores for capabilities scored in the new simple text format, and also to be able to export them to a CSV file for those who prefer to visualize the data with spreadsheet software. This patch adds a utility script to do just that. It reads a DefCore Guideline JSON file to collect Criteria names and weights. It also reads a scoring file in the new simple text format to pull individual Criteria scores for each Capability listed. Using the weights and Criteria scores for each capability, it then computes a total score for the Capability. It creates two output files: a text document that mirrors the scoring file but with an additional column for the total score, and a CSV file complete with column headers. The total scores are also printed to stdout. The source and destination files are configurable via command line arguments (with reasonable defaults so the script can be run with no arguments at all). The script also includes help output that can be printed by running it with the standard -h or --help arguments. Change-Id: I7539728f1e907bebc28609977251c9f580400499
2015-08-07 22:15:12 -04:00 · 2015-08-07 22:15:12 -04:00 · 898974c114
commit 898974c114
parent 19ac374a52
1 changed files with 193 additions and 0 deletions
--- a/working_materials/tabulate_scores.py
+++ b/working_materials/tabulate_scores.py
@ -0,0 +1,193 @@
 #!/usr/bin/env python
 #
 # Copyright 2015 VMware, Inc.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"); you may
 # not use this file except in compliance with the License. You may obtain
 # a copy of the License at
 #
 #      http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 # License for the specific language governing permissions and limitations
 # under the License.
 import re
 import json
 import argparse
 import textwrap
 # A custom class to preserve formatting in the help output
 # description and also show default arguments.
 class CustomFormatter(argparse.ArgumentDefaultsHelpFormatter,
                      argparse.RawDescriptionHelpFormatter):
    pass
 # Set up command line arguments.
 parser = argparse.ArgumentParser(
    description=textwrap.dedent("""\
    Tabulate capability scores and write them to files.
    This utility script tabulates scores from a DefCore scoring
    worksheet based on the weights from a given Guideline JSON file.
    It writes the scores in three formats:
    1.)  A text file that is identical to the source scoring
         worksheet, but with an added column for the total score
         for each capability.
    2.)  A CSV file with each capability's individual Criteria scores
         as well as the total.  The first line of the file will be
         the plain-English Criteria names as parsed from the Guideline
         json file.
    3.)  A simple "capability-name: total-score" output to stdout.
         This is primarily useful for getting quick feedback on
         the effect of changing scores.
    """),
    add_help=True,
    formatter_class=CustomFormatter)
 parser.add_argument(
    '-j', '--json-file',
    default='../2015.next.json',
    dest='json_file_name',
    help='Path to the Guideline JSON file to read weights and names from.')
 parser.add_argument(
    '-s', '--score-file',
    default='scoring.txt',
    dest='score_file_name',
    help='File to read capabilities scores from.')
 parser.add_argument(
    '-t', '--text-outfile',
    dest='text_outfile_name',
    help='File to write scores in text format to instead of the input file.')
 parser.add_argument(
    '-c', '--csv-outfile',
    default='tabulated_scores.csv',
    dest='csv_outfile_name',
    help='File to write scores in CSV format to.')
 args = parser.parse_args()
 args.text_outfile_name = args.text_outfile_name or args.score_file_name
 # Folks have also requested a CSV output that can be imported to
 # a spreadsheet program.  Get that ready too.
 csv_outfile = open(args.csv_outfile_name, 'w')
 # We need to know what the weights assigned to each Criteria are
 # in order to do scoring.  Read them from a Guideline JSON file.
 with open(args.json_file_name) as json_file:
    json_data = json.loads(json_file.read())
    criteria = json_data['criteria']
    # Non-Admin doesn't appear in the scores because it's not
    # an official criteria...rather it's something we use in scoring
    # to remind ourselves when a non-admin API is being studied.
    criteria['Non-Admin'] = {'name': 'Non-Admin'}
 json_file.close()
 # Now we're ready to parse scores from the scoring file.
 # We'll buffer these in memory so we can write back to
 # the same file we read them from if we're so inclined.
 buffer = []
 with open(args.score_file_name) as filehandle:
    # The line format we're expecting here is:
    #
    # capability-name:  [1,1,1] [1,1,1] [1,1,1] [1,1,1] [1] [100]
    #
    # Where the values inside the brackets can be zero, one, or a
    # question mark.  The final column is one that will be
    # overwritten by this script and represents the total score
    # for the capability.  If present already, it's ignored.
    pattern = re.compile('((\S+):\s+((\[\S,\S,\S\] ){4}\[\S\]))')
    # The scores in the tuples have the following meanings, in
    # the order they appear in the scoring files.
    scorenames = ('deployed', 'tools', 'clients',
                  'future', 'complete', 'stable',
                  'discover', 'doc', 'sticky',
                  'foundation', 'atomic', 'proximity',
                  'Non-Admin')
    # Write column headers to the CSV file using full names.
    csv_outfile.write("Capability,")
    for scorename in scorenames:
        csv_outfile.write("%s," % (criteria[scorename]['name']))
    csv_outfile.write("Total\n")
    # Parse each line in the file and find scores.
    for line in filehandle:
        # Is this a scoring line? If so grab raw scores.
        raw = pattern.match(line)
        if raw is None:
            # Not a line with a score, so just write it as-is.
            buffer.append(line)
        else:
            # Grab the capability name
            cap_name = raw.group(2)
            # Write it to the CSV file
            csv_outfile.write("%s," % cap_name)
            # Grock the scores into a dict keyed by capability name.
            scores = re.sub('[\[\]\, ]', '', raw.group(3))
            score_hash = dict(zip(scorenames, list(scores)))
            # Now tabluate scores for this capability.  Scores will
            # be negative if scoring isn't yet complete (e.g. it
            # has '?' or another character that isn't 0 or 1 as
            # it's score for any criteria.
            total = 0
            # We also need to denote whether the scoring is complete.
            # If we find capability scores that are not 0 or 1, we'll
            # set this flag so we remember to negate the final score.
            complete = 1
            # If an API is non-admin, it's vetoed and set to 0.
            # Only tabulate scores for non-admin API's.
            if int(score_hash['Non-Admin']) == 1:
                for scorename in scorenames:
                    csv_outfile.write("%s," % score_hash[scorename])
                    # If the scorename is non-admin, skip it as this
                    # doesn't affect the scoring total; it merely
                    # indicates whether the API in question is admin-only
                    # and therefore not scorable.
                    if scorename == 'Non-Admin':
                        continue
                    # If the score is a digit, add it in to the total.
                    if re.match('\d', score_hash[scorename]):
                        total += (int(score_hash[scorename]) *
                                  int(criteria[scorename]['weight']))
                    # If the score isn't a digit, we're not done scoring
                    # this criteria yet.  Denote that by making the
                    # final score negative.
                    else:
                        complete = -1
            # The total now becomes negative if scoring
            # wasn't complete.
            total = total * complete
            # Now write the total score to a couple of places.
            # Put it in the tabulated file.
            buffer.append("%s [%d]\n" % (raw.group(1), total))
            # Put in in the CSV for easy spreadsheet import.
            csv_outfile.write("%s\n" % (total))
            # And stdout is useful for folks who are experimenting with
            # the effect of changing a score.
            print "%s: %d" % (cap_name, total)
 # Now we can write the text output file.
 with open(args.text_outfile_name, 'w') as outfile:
    for line in buffer:
        outfile.write(line)
 outfile.close()
 print "\n\nText output has been written to %s" % args.text_outfile_name
 print "CSV output has been written to %s" % args.csv_outfile_name