From 70bfc5e092cf7ca5ff950feed5963af5f53dc17c Mon Sep 17 00:00:00 2001 From: Paul Belanger Date: Wed, 17 Jun 2015 19:19:35 +0000 Subject: [PATCH] Track bug_modified_since per project When running stackalytics from scratch, it takes a long, long, long time to import all the stats. As such, the bug import could be optimized by project vs complete successful run. Change-Id: Id02e09696f7f27a9b0bd00edd032eeaca0fd21de Signed-off-by: Paul Belanger --- stackalytics/processor/main.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/stackalytics/processor/main.py b/stackalytics/processor/main.py index ba08055a0..51085078a 100644 --- a/stackalytics/processor/main.py +++ b/stackalytics/processor/main.py @@ -96,7 +96,7 @@ def _process_reviews(record_iterator, ci_map, module, branch): def _process_repo(repo, runtime_storage_inst, record_processor_inst, - rcs_inst, bug_modified_since): + rcs_inst): uri = repo['uri'] LOG.info('Processing repo uri: %s', uri) @@ -109,6 +109,10 @@ def _process_repo(repo, runtime_storage_inst, record_processor_inst, utils.merge_records) LOG.debug('Processing bugs for repo uri: %s', uri) + current_date = utils.date_to_timestamp('now') + bug_modified_since = runtime_storage_inst.get_by_key( + 'bug_modified_since-%s' % repo['module']) + bug_iterator = bps.log(repo, bug_modified_since) bug_iterator_typed = _record_typer(bug_iterator, 'bug') processed_bug_iterator = record_processor_inst.process( @@ -116,6 +120,9 @@ def _process_repo(repo, runtime_storage_inst, record_processor_inst, runtime_storage_inst.set_records(processed_bug_iterator, utils.merge_records) + runtime_storage_inst.set_by_key( + 'bug_modified_since-%s' % repo['module'], current_date) + vcs_inst = vcs.get_vcs(repo, cfg.CONF.sources_root) vcs_inst.fetch() @@ -201,21 +208,16 @@ def _post_process_records(record_processor_inst, repos): def process(runtime_storage_inst, record_processor_inst): repos = utils.load_repos(runtime_storage_inst) - current_date = utils.date_to_timestamp('now') - bug_modified_since = runtime_storage_inst.get_by_key('bug_modified_since') - rcs_inst = rcs.get_rcs(cfg.CONF.review_uri) rcs_inst.setup(key_filename=cfg.CONF.ssh_key_filename, username=cfg.CONF.ssh_username) for repo in repos: _process_repo(repo, runtime_storage_inst, record_processor_inst, - rcs_inst, bug_modified_since) + rcs_inst) rcs_inst.close() - runtime_storage_inst.set_by_key('bug_modified_since', current_date) - LOG.info('Processing mail lists') mail_lists = runtime_storage_inst.get_by_key('mail_lists') or [] for mail_list in mail_lists: