Implemented mailing list processing

* 'email' metric is introduced

Implements blueprint mailing-list-analysis

Change-Id: Ie3e2215aa661a4fd8035de037ce0ff1e6ab64d4e
This commit is contained in:
Ilya Shakhat 2013-09-12 19:39:04 +04:00
parent 3e29cb9ea2
commit 79172191e8
17 changed files with 528 additions and 112 deletions

View File

@ -103,14 +103,8 @@ class CachedMemoryStorage(MemoryStorage):
def get_record_ids(self):
return self.records.keys()
def get_commit_ids(self):
return self.record_types_index['commit']
def get_review_ids(self):
return self.record_types_index['review']
def get_mark_ids(self):
return self.record_types_index['mark']
def get_record_ids_by_type(self, record_type):
return self.record_types_index.get(record_type, set())
def get_records(self, record_ids):
for i in record_ids:

View File

@ -44,6 +44,8 @@
}
{% if metric == 'marks' %}
$("#review_activity_template").tmpl(data["activity"]).appendTo("#activity_container");
{% elif metric == 'emails' %}
$("#email_activity_template").tmpl(data["activity"]).appendTo("#activity_container");
{% else %}
$("#commit_activity_template").tmpl(data["activity"]).appendTo("#activity_container");
{% endif %}
@ -128,6 +130,30 @@
{% endraw %}
</script>
<script id="email_activity_template" type="text/x-jquery-tmpl">
{% raw %}
<div style="margin-bottom: 1em;">
<div style='float: left; '><img src="${gravatar}" style="width: 32px; height: 32px;"></div>
<div style="margin-left: 40px;">
<div style="font-weight: bold;">{%html author_link %} ({%html company_link %})</div>
<div style="font-weight: bold;">${date_str} to <a href="https://launchpad.net/${module}">${module}</a></div>
</div>
<div style="margin-left: 40px;">
<div style='font-weight: bold;'>
{%if email_link != "" %}
<a href='${email_link}'>
{%/if%}
${subject}
{%if email_link != "" %}
</a>
{%/if%}
</div>
</div>
</div>
{% endraw %}
</script>
<script id="user_profile_template" type="text/x-jquery-tmpl">
{% raw %}
<div>
@ -165,6 +191,7 @@
{%each(i,rec) bugs %}
<li>
<a href="https://bugs.launchpad.net/bugs/${rec.id}">${rec.id}</a>
<small>${rec.module}</small>
</li>
{%/each%}
</ol>

View File

@ -48,6 +48,14 @@ METRIC_LABELS = {
'loc': 'Lines of code',
'commits': 'Commits',
'marks': 'Reviews',
'emails': 'Emails',
}
METRIC_TO_RECORD_TYPE = {
'loc': 'commit',
'commits': 'commit',
'marks': 'mark',
'emails': 'email'
}
DEFAULT_RECORDS_LIMIT = 10
@ -130,9 +138,9 @@ def init_releases(vault):
def init_project_types(vault):
runtime_storage_inst = vault['runtime_storage']
project_type_options = {}
project_type_group_index = {'all': set()}
project_type_group_index = {'all': set(['unknown'])}
for repo in runtime_storage_inst.get_by_key('repos') or []:
for repo in utils.load_repos(runtime_storage_inst):
project_type = repo['project_type'].lower()
project_group = None
if ('project_group' in repo) and (repo['project_group']):
@ -331,13 +339,10 @@ def record_filter(ignore=None, use_default=True):
c.lower() for c in param))
if 'metric' not in ignore:
param = get_parameter(kwargs, 'metric')
if 'reviews' in param:
record_ids &= memory_storage.get_review_ids()
elif 'marks' in param:
record_ids &= memory_storage.get_mark_ids()
elif ('loc' in param) or ('commits' in param):
record_ids &= memory_storage.get_commit_ids()
metrics = get_parameter(kwargs, 'metric')
for metric in metrics:
record_ids &= memory_storage.get_record_ids_by_type(
METRIC_TO_RECORD_TYPE[metric])
kwargs['records'] = memory_storage.get_records(record_ids)
return f(*args, **kwargs)
@ -352,7 +357,7 @@ def aggregate_filter():
@functools.wraps(f)
def aggregate_filter_decorated_function(*args, **kwargs):
def commit_filter(result, record, param_id):
def incremental_filter(result, record, param_id):
result[record[param_id]]['metric'] += 1
def loc_filter(result, record, param_id):
@ -391,20 +396,18 @@ def aggregate_filter():
metric_param = (flask.request.args.get('metric') or
get_default('metric'))
metric = metric_param.lower()
aggregate_filter = None
if metric == 'commits':
metric_filter = commit_filter
elif metric == 'loc':
metric_filter = loc_filter
elif metric == 'marks':
metric_filter = mark_filter
aggregate_filter = mark_finalize
else:
metric_to_filters_map = {
'commits': (incremental_filter, None),
'loc': (loc_filter, None),
'marks': (mark_filter, mark_finalize),
'emails': (incremental_filter, None),
}
if metric not in metric_to_filters_map:
raise Exception('Invalid metric %s' % metric)
kwargs['metric_filter'] = metric_filter
kwargs['finalize_handler'] = aggregate_filter
kwargs['metric_filter'] = metric_to_filters_map[metric][0]
kwargs['finalize_handler'] = metric_to_filters_map[metric][1]
return f(*args, **kwargs)
return aggregate_filter_decorated_function
@ -548,57 +551,35 @@ def page_not_found(e):
def contribution_details(records):
blueprints_map = {}
bugs_map = {}
companies_map = {}
commits = []
marks = dict((m, 0) for m in [-2, -1, 0, 1, 2])
commit_count = 0
loc = 0
for record in records:
if record['record_type'] == 'commit':
loc += record['loc']
commit = record.copy()
commit['branches'] = ','.join(commit['branches'])
commits.append(commit)
blueprint = commit['blueprint_id']
if blueprint:
if blueprint in blueprints_map:
blueprints_map[blueprint].append(commit)
else:
blueprints_map[blueprint] = [commit]
if 'blueprint_id' in record:
for bp in record['blueprint_id']:
blueprints_map[bp] = record
if 'bug_id' in record:
for bug in record['bug_id']:
bugs_map[bug] = record
bug = commit['bug_id']
if bug:
if bug in bugs_map:
bugs_map[bug].append(commit)
else:
bugs_map[bug] = [commit]
company = record['company_name']
if company:
if company in companies_map:
companies_map[company]['loc'] += record['loc']
companies_map[company]['commits'] += 1
else:
companies_map[company] = {'loc': record['loc'],
'commits': 1}
elif record['record_type'] == 'mark':
if record['record_type'] == 'mark':
marks[int(record['value'])] += 1
elif record['record_type'] == 'commit':
commit_count += 1
loc += record['loc']
blueprints = sorted([{'id': key,
'module': value[0]['module'],
'records': value}
blueprints = sorted([{'id': key, 'module': value['module']}
for key, value in blueprints_map.iteritems()],
key=lambda x: x['id'])
bugs = sorted([{'id': key, 'records': value}
bugs = sorted([{'id': key, 'module': value['module']}
for key, value in bugs_map.iteritems()],
key=lambda x: int(x['id']))
commits.sort(key=lambda x: x['date'], reverse=True)
result = {
'blueprints': blueprints,
'bugs': bugs,
'commit_count': len(commits),
'companies': companies_map,
'commit_count': commit_count,
'loc': loc,
'marks': marks,
}
@ -701,6 +682,11 @@ def get_activity_json(records):
'company': ''})
_extend_record(review)
result.append(review)
elif record['record_type'] == 'email':
email = record.copy()
_extend_record(email)
email['email_link'] = email.get('email_link') or ''
result.append(email)
result.sort(key=lambda x: x['date'], reverse=True)
return result[start_record:start_record + page_size]
@ -865,10 +851,10 @@ def timeline(records, **kwargs):
week_stat_commits_hl = dict((c, 0) for c in weeks)
param = get_parameter(kwargs, 'metric')
if ('reviews' in param) or ('marks' in param):
handler = lambda record: 0
else:
if ('commits' in param) or ('loc' in param):
handler = lambda record: record['loc']
else:
handler = lambda record: 0
# fill stats with the data
for record in records:

View File

@ -3717,6 +3717,7 @@
],
"uri": "git://github.com/openstack/nova.git",
"module": "nova",
"organization": "openstack",
"project_type": "openstack"
},
{
@ -3746,6 +3747,7 @@
],
"uri": "git://github.com/openstack/keystone.git",
"module": "keystone",
"organization": "openstack",
"project_type": "openstack"
},
{
@ -3770,6 +3772,7 @@
],
"uri": "git://github.com/openstack/cinder.git",
"module": "cinder",
"organization": "openstack",
"project_type": "openstack"
},
{
@ -3799,6 +3802,7 @@
],
"uri": "git://github.com/openstack/glance.git",
"module": "glance",
"organization": "openstack",
"project_type": "openstack"
},
{
@ -3828,6 +3832,7 @@
],
"uri": "git://github.com/openstack/neutron.git",
"module": "neutron",
"organization": "openstack",
"project_type": "openstack"
},
{
@ -3857,6 +3862,7 @@
],
"uri": "git://github.com/openstack/horizon.git",
"module": "horizon",
"organization": "openstack",
"project_type": "openstack"
},
{
@ -3886,6 +3892,7 @@
],
"uri": "git://github.com/openstack/swift.git",
"module": "swift",
"organization": "openstack",
"project_type": "openstack"
},
{
@ -3910,6 +3917,7 @@
],
"uri": "git://github.com/openstack/python-keystoneclient.git",
"module": "python-keystoneclient",
"organization": "openstack",
"project_type": "openstack"
},
{
@ -3934,6 +3942,7 @@
],
"uri": "git://github.com/openstack/python-novaclient.git",
"module": "python-novaclient",
"organization": "openstack",
"project_type": "openstack"
},
{
@ -3953,6 +3962,7 @@
],
"uri": "git://github.com/openstack/python-cinderclient.git",
"module": "python-cinderclient",
"organization": "openstack",
"project_type": "openstack"
},
{
@ -3972,6 +3982,7 @@
],
"uri": "git://github.com/openstack/python-glanceclient.git",
"module": "python-glanceclient",
"organization": "openstack",
"project_type": "openstack"
},
{
@ -3996,6 +4007,7 @@
],
"uri": "git://github.com/openstack/python-neutronclient.git",
"module": "python-neutronclient",
"organization": "openstack",
"project_type": "openstack"
},
{
@ -4015,6 +4027,7 @@
],
"uri": "git://github.com/openstack/python-swiftclient.git",
"module": "python-swiftclient",
"organization": "openstack",
"project_type": "openstack"
},
{
@ -4034,6 +4047,7 @@
],
"uri": "git://github.com/openstack/heat.git",
"module": "heat",
"organization": "openstack",
"project_type": "openstack"
},
{
@ -4053,6 +4067,7 @@
],
"uri": "git://github.com/openstack/python-heatclient.git",
"module": "python-heatclient",
"organization": "openstack",
"project_type": "openstack"
},
{
@ -4072,6 +4087,7 @@
],
"uri": "git://github.com/openstack/ceilometer.git",
"module": "ceilometer",
"organization": "openstack",
"project_type": "openstack"
},
{
@ -4086,6 +4102,7 @@
],
"uri": "git://github.com/openstack/python-ceilometerclient.git",
"module": "python-ceilometerclient",
"organization": "openstack",
"project_type": "openstack"
},
{
@ -4100,6 +4117,7 @@
],
"uri": "git://github.com/openstack/oslo-incubator.git",
"module": "oslo-incubator",
"organization": "openstack",
"project_type": "openstack"
},
{
@ -4114,6 +4132,7 @@
],
"uri": "git://github.com/openstack/oslo.config.git",
"module": "oslo.config",
"organization": "openstack",
"project_type": "openstack"
},
{
@ -4128,6 +4147,7 @@
],
"uri": "git://github.com/openstack/compute-api.git",
"module": "compute-api",
"organization": "openstack",
"project_type": "openstack"
},
{
@ -4142,6 +4162,7 @@
],
"uri": "git://github.com/openstack/identity-api.git",
"module": "identity-api",
"organization": "openstack",
"project_type": "openstack"
},
{
@ -4156,6 +4177,7 @@
],
"uri": "git://github.com/openstack/image-api.git",
"module": "image-api",
"organization": "openstack",
"project_type": "openstack"
},
{
@ -4170,6 +4192,7 @@
],
"uri": "git://github.com/openstack/netconn-api.git",
"module": "netconn-api",
"organization": "openstack",
"project_type": "openstack"
},
{
@ -4184,6 +4207,7 @@
],
"uri": "git://github.com/openstack/object-api.git",
"module": "object-api",
"organization": "openstack",
"project_type": "openstack"
},
{
@ -4198,6 +4222,7 @@
],
"uri": "git://github.com/openstack/volume-api.git",
"module": "volume-api",
"organization": "openstack",
"project_type": "openstack"
},
{
@ -4222,6 +4247,7 @@
],
"uri": "git://github.com/openstack/openstack-manuals.git",
"module": "openstack-manuals",
"organization": "openstack",
"project_type": "openstack"
},
{
@ -4236,6 +4262,7 @@
],
"uri": "git://github.com/openstack/api-site.git",
"module": "api-site",
"organization": "openstack",
"project_type": "openstack"
},
{
@ -4243,6 +4270,7 @@
"branches": ["master"],
"uri": "git://github.com/openstack/trove.git",
"project_type": "openstack",
"organization": "openstack",
"project_group": "incubation"
},
{
@ -4250,6 +4278,7 @@
"branches": ["master"],
"uri": "git://github.com/openstack/trove-integration.git",
"project_type": "openstack",
"organization": "openstack",
"project_group": "incubation"
},
{
@ -4257,6 +4286,7 @@
"branches": ["master"],
"uri": "git://github.com/openstack/python-troveclient.git",
"project_type": "openstack",
"organization": "openstack",
"project_group": "incubation"
},
{
@ -4264,6 +4294,7 @@
"branches": ["master"],
"uri": "git://github.com/openstack/ironic.git",
"project_type": "openstack",
"organization": "openstack",
"project_group": "incubation"
}
],
@ -4348,5 +4379,7 @@
"release_name": "Havana",
"end_date": "2013-Oct-17"
}
]
],
"mail_lists": ["http://lists.openstack.org/pipermail/openstack-dev/"]
}

View File

@ -74,6 +74,9 @@
"project_group": {
"type": "string"
},
"organization": {
"type": "string"
},
"module": {
"type": "string"
},
@ -102,7 +105,7 @@
}
}
},
"required": ["uri", "project_type", "module", "branches"],
"required": ["uri", "project_type", "module", "branches", "organization"],
"additionalProperties": false
}
},
@ -162,6 +165,12 @@
"required": ["module_group_name", "modules"],
"additionalProperties": false
}
},
"mail_lists": {
"type": "array",
"items": {
"type": "string"
}
}
}
}

View File

@ -138,6 +138,8 @@
"release_name": "Havana",
"end_date": "2013-Oct-17"
}
]
],
"mail_lists": ["http://lists.openstack.org/pipermail/openstack-dev/"]
}

View File

@ -47,7 +47,7 @@ def _retrieve_project_list(runtime_storage_inst, project_sources):
LOG.info('Retrieving project list from GitHub')
repo_index = {}
stored_repos = runtime_storage_inst.get_by_key('repos')
stored_repos = utils.load_repos(runtime_storage_inst)
for repo in stored_repos:
repo_index[repo['uri']] = repo
@ -105,20 +105,17 @@ def _process_companies(runtime_storage_inst, companies):
KEYS = {
'users': _process_users,
'repos': None,
'releases': None,
'companies': _process_companies,
'module_groups': None,
}
def _update_default_data(runtime_storage_inst, default_data):
LOG.debug('Update runtime storage with default data')
for key, processor in KEYS.iteritems():
if processor:
processor(runtime_storage_inst, default_data[key])
for key, value in default_data.iteritems():
if key in KEYS:
KEYS[key](runtime_storage_inst, value)
else:
runtime_storage_inst.set_by_key(key, default_data[key])
runtime_storage_inst.set_by_key(key, value)
def process(runtime_storage_inst, default_data, sources_root, force_update):
@ -133,7 +130,7 @@ def process(runtime_storage_inst, default_data, sources_root, force_update):
LOG.debug('Gather release index for all repos')
release_index = {}
for repo in runtime_storage_inst.get_by_key('repos'):
for repo in utils.load_repos(runtime_storage_inst):
vcs_inst = vcs.get_vcs(repo, sources_root)
release_index.update(vcs_inst.get_release_index())

View File

@ -22,6 +22,7 @@ from psutil import _error
from stackalytics.openstack.common import log as logging
from stackalytics.processor import config
from stackalytics.processor import default_data_processor
from stackalytics.processor import mls
from stackalytics.processor import rcs
from stackalytics.processor import record_processor
from stackalytics.processor import runtime_storage
@ -73,7 +74,7 @@ def _record_typer(record_iterator, record_type):
yield record
def process_repo(repo, runtime_storage, record_processor_inst):
def process_repo(repo, runtime_storage_inst, record_processor_inst):
uri = repo['uri']
LOG.debug('Processing repo uri %s' % uri)
@ -88,40 +89,54 @@ def process_repo(repo, runtime_storage, record_processor_inst):
LOG.debug('Processing repo %s, branch %s', uri, branch)
vcs_key = 'vcs:' + str(urllib.quote_plus(uri) + ':' + branch)
last_id = runtime_storage.get_by_key(vcs_key)
last_id = runtime_storage_inst.get_by_key(vcs_key)
commit_iterator = vcs_inst.log(branch, last_id)
commit_iterator_typed = _record_typer(commit_iterator, 'commit')
processed_commit_iterator = record_processor_inst.process(
commit_iterator_typed)
runtime_storage.set_records(processed_commit_iterator, _merge_commits)
runtime_storage_inst.set_records(
processed_commit_iterator, _merge_commits)
last_id = vcs_inst.get_last_id(branch)
runtime_storage.set_by_key(vcs_key, last_id)
runtime_storage_inst.set_by_key(vcs_key, last_id)
LOG.debug('Processing reviews for repo %s, branch %s', uri, branch)
rcs_key = 'rcs:' + str(urllib.quote_plus(uri) + ':' + branch)
last_id = runtime_storage.get_by_key(rcs_key)
last_id = runtime_storage_inst.get_by_key(rcs_key)
review_iterator = rcs_inst.log(branch, last_id)
review_iterator_typed = _record_typer(review_iterator, 'review')
processed_review_iterator = record_processor_inst.process(
review_iterator_typed)
runtime_storage.set_records(processed_review_iterator)
runtime_storage_inst.set_records(processed_review_iterator)
last_id = rcs_inst.get_last_id(branch)
runtime_storage.set_by_key(rcs_key, last_id)
runtime_storage_inst.set_by_key(rcs_key, last_id)
def update_repos(runtime_storage_inst):
repos = runtime_storage_inst.get_by_key('repos')
def process_mail_list(uri, runtime_storage_inst, record_processor_inst):
mail_iterator = mls.log(uri, runtime_storage_inst)
mail_iterator_typed = _record_typer(mail_iterator, 'email')
processed_mail_iterator = record_processor_inst.process(
mail_iterator_typed)
runtime_storage_inst.set_records(processed_mail_iterator)
def update_records(runtime_storage_inst):
repos = utils.load_repos(runtime_storage_inst)
record_processor_inst = record_processor.RecordProcessor(
runtime_storage_inst)
for repo in repos:
process_repo(repo, runtime_storage_inst, record_processor_inst)
mail_lists = runtime_storage_inst.get_by_key('mail_lists') or []
for mail_list in mail_lists:
process_mail_list(mail_list, runtime_storage_inst,
record_processor_inst)
record_processor_inst.finalize()
@ -165,7 +180,7 @@ def main():
update_pids(runtime_storage_inst)
update_repos(runtime_storage_inst)
update_records(runtime_storage_inst)
apply_corrections(cfg.CONF.corrections_uri, runtime_storage_inst)

View File

@ -0,0 +1,139 @@
# Copyright (c) 2013 Mirantis Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import gzip
import httplib
import StringIO
from email import utils as email_utils
import re
import time
import urllib
import urlparse
from stackalytics.openstack.common import log as logging
from stackalytics.processor import utils
LOG = logging.getLogger(__name__)
EMAIL_HEADER_PATTERN = ('From \S+(?: at \S+)?\s+'
'\w{3}\s+\w{3}\s+\d{1,2}\s+\d{2}:\d{2}(?::\d{2})?'
'(?:\s+\S+)?\s+\d{4}.*?\n')
MAIL_BOX_PATTERN = re.compile(
'^' + EMAIL_HEADER_PATTERN +
'From: (\S+(?: at \S+))(?:\W+(\w+(?:\s\w+)*))?.*?\n'
'Date: (.*?)\n'
'Subject: (.*?)(?=\n\S+:)'
'.*?Message-ID: (\S+)\n'
'\n(.*?)\n'
'(?=' + EMAIL_HEADER_PATTERN + 'From: )',
flags=re.MULTILINE | re.DOTALL)
MESSAGE_PATTERNS = {
'bug_id': re.compile(r'https://bugs.launchpad.net/bugs/(?P<id>\d+)',
re.IGNORECASE),
'blueprint_id': re.compile(r'https://blueprints.launchpad.net/'
r'(?P<module>[^\/]+)/\+spec/(?P<id>[a-z0-9-]+)',
re.IGNORECASE),
}
TRAILING_RECORD = ('From ishakhat at mirantis.com Tue Sep 17 07:30:43 2013'
'From: ')
def _read_uri(uri):
try:
fd = urllib.urlopen(uri)
raw = fd.read()
fd.close()
return raw
except Exception as e:
LOG.warn('Error while reading uri: %s' % e)
def _get_mail_archive_links(uri):
content = _read_uri(uri)
links = set(re.findall(r'\shref\s*=\s*[\'"]([^\'"]*\.txt\.gz)', content,
flags=re.IGNORECASE))
return [urlparse.urljoin(uri, link) for link in links]
def _link_content_changed(link, runtime_storage_inst):
LOG.debug('Check changes for mail archive located at uri: %s', link)
parsed_uri = urlparse.urlparse(link)
conn = httplib.HTTPConnection(parsed_uri.netloc)
conn.request('HEAD', parsed_uri.path)
res = conn.getresponse()
last_modified = res.getheader('last-modified')
if last_modified != runtime_storage_inst.get_by_key('mail_link:' + link):
LOG.debug('Mail archive changed, last modified at: %s', last_modified)
runtime_storage_inst.set_by_key('mail_link:' + link, last_modified)
return True
return False
def _retrieve_mails(uri):
LOG.debug('Retrieving mail archive from uri: %s', uri)
content = _read_uri(uri)
gzip_fd = gzip.GzipFile(fileobj=StringIO.StringIO(content))
content = gzip_fd.read()
LOG.debug('Mail archive is loaded, start processing')
content += TRAILING_RECORD
for rec in re.finditer(MAIL_BOX_PATTERN, content):
author_email = rec.group(1).replace(' at ', '@', 1)
if not utils.check_email_validity(author_email):
continue
author_name = rec.group(2)
date = int(time.mktime(email_utils.parsedate(rec.group(3))))
subject = rec.group(4)
message_id = rec.group(5)
body = rec.group(6)
email = {
'message_id': message_id,
'author_name': author_name,
'author_email': author_email,
'subject': subject,
'date': date,
}
for pattern_name, pattern in MESSAGE_PATTERNS.iteritems():
collection = set()
for item in re.finditer(pattern, body):
groups = item.groupdict()
collection.add(groups['id'])
if 'module' in groups:
email['module'] = groups['module']
email[pattern_name] = list(collection)
yield email
def log(uri, runtime_storage_inst):
links = _get_mail_archive_links(uri)
for link in links:
if _link_content_changed(link, runtime_storage_inst):
for mail in _retrieve_mails(link):
LOG.debug('New mail: %s', mail)
yield mail

View File

@ -78,4 +78,5 @@ NORMALIZERS = {
def normalize_default_data(default_data):
for key, normalizer in NORMALIZERS.iteritems():
normalizer(default_data[key])
if key in default_data:
normalizer(default_data[key])

View File

@ -14,7 +14,6 @@
# limitations under the License.
import bisect
import re
from stackalytics.openstack.common import log as logging
from stackalytics.processor import normalizer
@ -34,12 +33,32 @@ class RecordProcessor(object):
self.releases = runtime_storage_inst.get_by_key('releases')
self.releases_dates = [r['end_date'] for r in self.releases]
self.modules = None
self.updated_users = set()
def _get_release(self, timestamp):
release_index = bisect.bisect(self.releases_dates, timestamp)
return self.releases[release_index]['release_name']
def _get_modules(self):
if self.modules is None:
self.modules = set()
for repo in utils.load_repos(self.runtime_storage_inst):
module = repo['module'].lower()
add = True
for m in self.modules:
if module.find(m) >= 0:
add = False
break
if m.find(module) >= 0:
self.modules.remove(m)
break
if add:
self.modules.add(module)
return self.modules
def _find_company(self, companies, date):
for r in companies:
if date < r['end_date']:
@ -75,7 +94,7 @@ class RecordProcessor(object):
def _get_lp_info(self, email):
lp_profile = None
if not re.match(r'[\w\d_\.-]+@([\w\d_\.-]+\.)+[\w]+', email):
if not utils.check_email_validity(email):
LOG.debug('User email is not valid %s' % email)
else:
LOG.debug('Lookup user email %s at Launchpad' % email)
@ -217,6 +236,34 @@ class RecordProcessor(object):
for r in gen(record):
yield r
def _guess_module(self, record):
subject = record['subject'].lower()
pos = len(subject)
best_guess_module = None
for module in self._get_modules():
find = subject.find(module)
if (find >= 0) and (find < pos):
pos = find
best_guess_module = module
if best_guess_module:
if (((pos > 0) and (subject[pos - 1] == '[')) or
(not record.get('module'))):
record['module'] = best_guess_module
if not record.get('module'):
record['module'] = 'unknown'
def _process_email(self, record):
record['primary_key'] = record['message_id']
record['author_email'] = record['author_email'].lower()
self._update_record_and_user(record)
self._guess_module(record)
yield record
def _apply_type_based_processing(self, record):
if record['record_type'] == 'commit':
for r in self._process_commit(record):
@ -224,6 +271,9 @@ class RecordProcessor(object):
elif record['record_type'] == 'review':
for r in self._process_review(record):
yield r
elif record['record_type'] == 'email':
for r in self._process_email(record):
yield r
def process(self, record_iterator):
for record in record_iterator:

View File

@ -15,6 +15,7 @@
import datetime
import json
import re
import time
import urllib
@ -42,6 +43,10 @@ def week_to_date(week):
strftime('%Y-%m-%d %H:%M:%S'))
def check_email_validity(email):
return re.match(r'[\w\d_\.-]+@([\w\d_\.-]+\.)+[\w]+', email)
def read_json_from_uri(uri):
try:
fd = urllib.urlopen(uri)
@ -66,3 +71,7 @@ def store_user(runtime_storage_inst, user):
def load_user(runtime_storage_inst, user_id):
return runtime_storage_inst.get_by_key('user:%s' % user_id)
def load_repos(runtime_storage_inst):
return runtime_storage_inst.get_by_key('repos') or []

View File

@ -67,10 +67,10 @@ GIT_LOG_PATTERN = re.compile(''.join([(r[0] + ':(.*?)\n')
re.DOTALL)
MESSAGE_PATTERNS = {
'bug_id': re.compile(r'(bug)[\s#:]*(\d+)', re.IGNORECASE),
'blueprint_id': re.compile(r'\b(blueprint|bp)\b[ \t]*[#:]?[ \t]*(\S+)',
re.IGNORECASE),
'change_id': re.compile('(Change-Id): (I[0-9a-f]{40})', re.IGNORECASE),
'bug_id': re.compile(r'bug[\s#:]*(?P<id>\d+)', re.IGNORECASE),
'blueprint_id': re.compile(r'\b(?:blueprint|bp)\b[ \t]*[#:]?[ \t]*'
r'(?P<id>\S+)', re.IGNORECASE),
'change_id': re.compile('Change-Id: (?P<id>I[0-9a-f]{40})', re.IGNORECASE),
}
@ -158,12 +158,11 @@ class Git(Vcs):
commit['lines_added'] = int(lines_changed or 0)
commit['lines_deleted'] = int(lines_deleted or 0)
for key in MESSAGE_PATTERNS:
match = re.search(MESSAGE_PATTERNS[key], commit['message'])
if match:
commit[key] = match.group(2)
else:
commit[key] = None
for pattern_name, pattern in MESSAGE_PATTERNS.iteritems():
collection = set()
for item in re.finditer(pattern, commit['message']):
collection.add(item.group('id'))
commit[pattern_name] = list(collection)
commit['date'] = int(commit['date'])
commit['module'] = self.repo['module']

59
tests/unit/test_mls.py Normal file
View File

@ -0,0 +1,59 @@
# Copyright (c) 2013 Mirantis Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import re
import testtools
from stackalytics.processor import mls
class TestMls(testtools.TestCase):
def setUp(self):
super(TestMls, self).setUp()
def test_mail_parse_regex(self):
content = '''
URL: <http://lists.openstack.org/pipermail/openstack-dev/>
From sorlando at nicira.com Tue Jul 17 07:30:43 2012
From: sorlando at nicira.com (Salvatore Orlando)
Date: Tue, 17 Jul 2012 00:30:43 -0700
Subject: [openstack-dev] [nova] [pci device passthrough] fails with
"NameError: global name '_' is not defined"
In-Reply-To: <5004FBF1.1080102@redhat.com>
References: <5004FBF1.1080102@redhat.com>
Message-ID: <CAGR=i3htLvDOdh5u6mxqmo0zVP1eKKYAxAhj=e1-rpQWZOiF6Q@gmail.com>
Good morning Gary!
test works :)
From sorlando at nicira.com Tue Jul 17 07:30:43 2012
From: sorlando at nicira.com (Salvatore Orlando)
'''
match = re.search(mls.MAIL_BOX_PATTERN, content)
self.assertTrue(match)
self.assertEqual('sorlando at nicira.com', match.group(1))
self.assertEqual('Salvatore Orlando', match.group(2))
self.assertEqual('Tue, 17 Jul 2012 00:30:43 -0700', match.group(3))
self.assertEqual('[openstack-dev] [nova] [pci device passthrough] '
'fails with\n "NameError: global name \'_\' is not '
'defined"', match.group(4))
self.assertEqual('<CAGR=i3htLvDOdh5u6mxqmo0zVP1eKKYAxAhj='
'e1-rpQWZOiF6Q@gmail.com>', match.group(5))
self.assertEqual('Good morning Gary!\n\ntest works :)\n',
match.group(6))

View File

@ -68,6 +68,15 @@ RELEASES = [
},
]
REPOS = [
{
"branches": ["master"],
"module": "stackalytics",
"project_type": "stackforge",
"uri": "git://github.com/stackforge/stackalytics.git"
}
]
class TestRecordProcessor(testtools.TestCase):
def setUp(self):
@ -271,6 +280,67 @@ class TestRecordProcessor(testtools.TestCase):
self.assertEquals(0, len(updated))
def test_process_mail(self):
record_processor_inst = make_record_processor()
commit_generator = generate_emails(
subject='[openstack-dev] [Stackalytics] Configuration files')
commit = list(record_processor_inst.process(commit_generator))[0]
self.assertEquals('SuperCompany', commit['company_name'])
self.assertEquals('john_doe', commit['launchpad_id'])
self.assertEquals('stackalytics', commit['module'])
def test_process_mail_guessed(self):
runtime_storage_inst = make_runtime_storage(
repos=[{'module': 'nova'}, {'module': 'neutron'}])
record_processor_inst = make_record_processor(runtime_storage_inst)
commit_generator = generate_emails(
subject='[openstack-dev] [Neutron] [Nova] Integration issue')
commit = list(record_processor_inst.process(commit_generator))[0]
self.assertEquals('neutron', commit['module'])
def test_process_mail_guessed_module_in_body_override(self):
runtime_storage_inst = make_runtime_storage(
repos=[{'module': 'nova'}, {'module': 'heat'}])
record_processor_inst = make_record_processor(runtime_storage_inst)
commit_generator = generate_emails(
subject='[openstack-dev] [heat] Comments/questions on the',
module='nova')
commit = list(record_processor_inst.process(commit_generator))[0]
self.assertEquals('heat', commit['module'])
def test_process_mail_guessed_module_in_body(self):
runtime_storage_inst = make_runtime_storage(
repos=[{'module': 'nova'}, {'module': 'heat'}])
record_processor_inst = make_record_processor(runtime_storage_inst)
commit_generator = generate_emails(
subject='[openstack-dev] Comments/questions on the heat',
module='nova')
commit = list(record_processor_inst.process(commit_generator))[0]
self.assertEquals('nova', commit['module'])
def test_process_mail_unmatched(self):
record_processor_inst = make_record_processor()
commit_generator = generate_emails(
subject='[openstack-dev] [Photon] Configuration files')
commit = list(record_processor_inst.process(commit_generator))[0]
self.assertEquals('SuperCompany', commit['company_name'])
self.assertEquals('john_doe', commit['launchpad_id'])
self.assertEquals('unknown', commit['module'])
def test_get_modules(self):
record_processor_inst = make_record_processor()
with mock.patch('stackalytics.processor.utils.load_repos') as patch:
patch.return_value = [{'module': 'nova'},
{'module': 'python-novaclient'},
{'module': 'neutron'}]
modules = record_processor_inst._get_modules()
self.assertEqual(set(['nova', 'neutron']), set(modules))
# Helpers
@ -287,16 +357,32 @@ def generate_commits(email='johndoe@gmail.com', date=1999999999):
}
def make_runtime_storage(users=None, companies=None, releases=None):
def get_by_key(table):
if table == 'companies':
def generate_emails(email='johndoe@gmail.com', date=1999999999,
subject='[openstack-dev]', module=None):
yield {
'record_type': 'email',
'message_id': 'de7e8f297c193fb310f22815334a54b9c76a0be1',
'author_name': 'John Doe',
'author_email': email,
'date': date,
'subject': subject,
'module': module,
}
def make_runtime_storage(users=None, companies=None, releases=None,
repos=None):
def get_by_key(collection):
if collection == 'companies':
return _make_companies(companies or COMPANIES)
elif table == 'users':
elif collection == 'users':
return _make_users(users or USERS)
elif table == 'releases':
elif collection == 'releases':
return releases or RELEASES
elif collection == 'repos':
return repos or REPOS
else:
raise Exception('Wrong table %s' % table)
raise Exception('Wrong collection: %s' % collection)
rs = mock.Mock(runtime_storage.RuntimeStorage)
rs.get_by_key = mock.Mock(side_effect=get_by_key)

View File

@ -42,3 +42,11 @@ class TestUtils(testtools.TestCase):
def test_make_range_5_26_10(self):
self._test_one_range(5, 26, 10)
def test_email_valid(self):
self.assertTrue(utils.check_email_validity('pupkin@gmail.com'))
self.assertTrue(utils.check_email_validity('v.pup_kin2@ntt.co.jp'))
def test_email_invalid(self):
self.assertFalse(utils.check_email_validity('pupkin@localhost'))
self.assertFalse(utils.check_email_validity('222@some.(trash)'))

View File

@ -71,7 +71,8 @@ date:1369831203
author_name:Mark McClain
author_email:mark.mcclain@dreamhost.com
subject:add readme for 2.2.2
message:Fix bug: 1234567
message:Fixes bug: 1234567
Also fixes bug 987654
Change-Id: Id32a4a72ec1d13992b306c4a38e73605758e26c7
diff_stat:
@ -106,7 +107,7 @@ diff_stat:
self.assertEquals(21, commits[0]['files_changed'])
self.assertEquals(340, commits[0]['lines_added'])
self.assertEquals(408, commits[0]['lines_deleted'])
self.assertEquals('1167901', commits[0]['bug_id'])
self.assertEquals(['1167901'], commits[0]['bug_id'])
self.assertEquals(1, commits[1]['files_changed'])
self.assertEquals(0, commits[1]['lines_added'])
@ -115,7 +116,8 @@ diff_stat:
self.assertEquals(1, commits[2]['files_changed'])
self.assertEquals(8, commits[2]['lines_added'])
self.assertEquals(0, commits[2]['lines_deleted'])
self.assertEquals('1234567', commits[2]['bug_id'])
self.assertEquals(set(['987654', '1234567']),
set(commits[2]['bug_id']))
self.assertEquals(0, commits[3]['files_changed'])
self.assertEquals(0, commits[3]['lines_added'])