From 12acc9491901627bf6e5ee8e07bdf2c8f58ee7a9 Mon Sep 17 00:00:00 2001 From: Evgeny Fadeev Date: Mon, 20 Jan 2014 23:14:05 -0300 Subject: [PATCH] modified the launchpad answers importer script Change-Id: I22b61258ee30046c4924851be6b84fa25f407f4d --- launchpad.py | 248 +++++++++++++++++++++++++++++++++------------------ 1 file changed, 160 insertions(+), 88 deletions(-) diff --git a/launchpad.py b/launchpad.py index f963023..9c8c42d 100644 --- a/launchpad.py +++ b/launchpad.py @@ -22,9 +22,13 @@ def no_credential(): sys.exit() """Logs into Launchpad """ -cachedir = "/home/fifieldt/.launchpadlib/cache/" -launchpad = Launchpad.login_with('Extract Answers', version='devel', - credential_save_failed=no_credential) +#cachedir = '/Users/evgenyfadeev/.launchadlib/cache' +Launchpad.logout() +launchpad = Launchpad.login_with( + 'Extract Answers', + version='devel', + credential_save_failed=no_credential +) user_mapping = {} @@ -34,101 +38,105 @@ def get_questions(project_name): project = launchpad.projects[project_name] return project.searchQuestions() - -def find_or_create_user(user_link): - """Takes a Launchpad user link string of the format akin to - https://api.staging.launchpad.net/devel/~mat-rush - separates out the username, then uses the Launchpad user object - to determine whether a user exists in AskBot based on the email - addressesand username. If not, it creates one using the information +def get_user_data(user_link): + """returns dictionary with keys: + * username + * confirmed_email_addresses """ # check out user cache first if user_link in user_mapping: return user_mapping[user_link] username = user_link.split('~')[1] + + user_data = { + 'username': username, + 'confirmed_email_addresses': list() + } + lp_user = launchpad.people[username] - ab_user = None + + for email in lp_user.confirmed_email_addresses: + # search for the user based on their email + email = str(email).split('/')[-1] + user_data['confirmed_email_addresses'].append(email) + + user_mapping[user_link] = user_data + return user_data + +def get_or_create_user(user_data): + """returns Askbot user. + If user corresponding to the given data does not exist, + it is created + """ + username = user_data['username'] + + #check the cache by user name + if username in user_mapping: + return user_mapping[username] try: # find using identical username first - ab_user = models.User.objects.get(username=username) + user = models.User.objects.get(username=username) except models.User.DoesNotExist: # we haven't created the user yet - for email in lp_user.confirmed_email_addresses: - # search for the user based on their email - stripped_email = str(email).split('/')[-1] - try: - ab_user = models.User.objects.get(email=stripped_email) - except models.User.DoesNotExist: - pass - - if ab_user is None: - # we didn't find a user, create a new one - try: - first_email = str(lp_user.confirmed_email_addresses[0]).split('/')[-1] - ab_user = models.User(username=username, email=first_email) - ab_user.save() - except IndexError: - try: - ab_user = models.User(username=username) - ab_user.save() - except IntegrityError: - # the user already exists, but we didn't find it somehow - print "user is corrupt: " + user_link + str(e) - pass + try: + user = models.User.objects.filter(email__in=user_data['confirmed_email_addresses'])[0] + except: + user = models.User(username=username) + if len(user_data['confirmed_email_addresses']): + user.email = user_data['confirmed_email_addresses'][0] + user.save() # cache the users we've seen so far to avoid API calls - user_mapping[user_link] = ab_user - if ab_user is None: - print "ab_user still none " + user_link - return ab_user + user_mapping[username] = user + return user -def import_questions(questions, project_name): - """loops through all items in launchpad Question format, and - adds them as askbot Questions and Answers""" +def save_questions(questions, project_name, data_filename): + """gets data from the launchpad answers and then + saves it in the python pickled format + so that the data can be uploaded elsewhere + """ - status_file = open('write.status', 'r') + #create data file if not exists + data_file = open(data_filename, 'a+') + data_file.close() + + #read the data file try: - import_log = pickle.load(status_file) - if not isinstance(import_log, dict): - import_log = {} - except: - import_log = {} + data_file = open(data_filename, 'r') + question_data = pickle.load(data_file) + data_file.close() + except EOFError: + question_data = dict() - for question in questions: - print '"' + question.title + '",' + str(question.date_created) + try: + for question in questions: + print '"' + question.title + '",' + str(question.date_created) - try: - responses = question.messages_collection.entries - print str(len(responses)) - except AttributeError: - print "No Answers for question" + str(question) - responses = None + if question.self_link in question_data: + continue - if question.self_link in import_log: - print "Already imported - skipping the above question" - continue + try: + responses = question.messages_collection.entries + print str(len(responses)) + except AttributeError: + print "No Answers for question" + str(question) + responses = None - question_user = find_or_create_user(question.owner_link) - - # post the question - try: - ab_question = question_user.post_question( - title=question.title, - body_text=question.description, - timestamp=question.date_created.replace(tzinfo=None), - tags=project_name + " migrated", - ) - except IntegrityError: - # the question already exists, but we didn't find it somehow - print "Had an IntegrityError" - continue - if responses is not None: - # post all the answers + question_datum = { + 'owner': get_user_data(question.owner_link), + 'self_link': question.self_link, + 'title': question.title, + 'body_text': question.description, + 'timestamp': question.date_created.replace(tzinfo=None), + 'tags': project_name + ' migrated' + } + question_data[question.self_link] = question_datum + + answer_data = list() for response in responses: - response_user = find_or_create_user(response['owner_link']) try: timestamp=datetime.strptime(response['date_created'][0:-6], '%Y-%m-%dT%H:%M:%S.%f') @@ -136,27 +144,91 @@ def import_questions(questions, project_name): #some timestamps don't have the millisectons, thanks LP! timestamp=datetime.strptime(response['date_created'][0:-6], '%Y-%m-%dT%H:%M:%S') - if len(response['content']) > 1: + if 'content' in response and len(response['content']) > 1: #for some reason, Launchpad allows blank answers - answer = response_user.post_answer( - question=ab_question, - body_text=response['content'], - timestamp=timestamp - ) - import_log[question.self_link] = 1 #mark as imported + answer = { + 'owner': get_user_data(response['owner_link']), + 'body_text': response['content'], + 'timestamp': timestamp + } + answer_data.append(answer) + question_datum['responses'] = answer_data + finally: + data_file = open(data_filename, 'w') + pickle.dump(question_data, data_file) + data_file.close() + + +def import_questions(data_filename): + """loops through all items in launchpad Question format, and + adds them as askbot Questions and Answers""" + + status_file = open('write.status', 'a') + try: + import_log = pickle.load(status_file) + if not isinstance(import_log, dict): + import_log = {} + except: + import_log = {} + + data_file = open(data_filename, 'r') + questions = pickle.load(data_file) + + for question in questions.values(): + print '"' + question['title'] + '",' + str(question['timestamp']) + + try: + responses = question['responses'] + print str(len(responses)) + except AttributeError: + responses = None + print "No Answers" + + if question['self_link'] in import_log: + print "Already imported - skipping the above question" + continue + + # post the question + question_user = get_or_create_user(question['owner']) + try: + ab_question = question_user.post_question( + title=question['title'], + body_text=question['body_text'], + timestamp=question['timestamp'], + tags=question['tags'] + ) + except IntegrityError: + # the question already exists, but we didn't find it somehow + print "Had an IntegrityError" + continue + + for response in question['responses']: + if len(response['body_text']) == 0: + continue + response_user = get_or_create_user(response['owner']) + #for some reason, Launchpad allows blank answers + + answer = response_user.post_answer( + question=ab_question, + body_text=response['body_text'], + timestamp=response['timestamp'] + ) + + import_log[question['self_link']] = 1 #mark as imported status_file.close() status_file = open('write.status', 'w') pickle.dump(import_log, status_file) -def main(): - translation.activate('en') +def main_read(): questions = get_questions('nova') + save_questions(questions, 'nova', 'launchpad.dat') + print str(len(questions)) + " found" + +def main_write(): + translation.activate('en') setting_backup = askbot_settings.LIMIT_ONE_ANSWER_PER_USER askbot_settings.update('LIMIT_ONE_ANSWER_PER_USER', False) - print str(len(questions)) + " found" - import_questions(questions, 'nova') + import_questions('launchpad.dat') askbot_settings.update('LIMIT_ONE_ANSWER_PER_USER', setting_backup) -if __name__ == "__main__": - main()