modified the launchpad answers importer script

Change-Id: I22b61258ee30046c4924851be6b84fa25f407f4d
This commit is contained in:
Evgeny Fadeev 2014-01-20 23:14:05 -03:00
parent 3556e8a650
commit 12acc94919

View File

@ -22,9 +22,13 @@ def no_credential():
sys.exit() sys.exit()
"""Logs into Launchpad """ """Logs into Launchpad """
cachedir = "/home/fifieldt/.launchpadlib/cache/" #cachedir = '/Users/evgenyfadeev/.launchadlib/cache'
launchpad = Launchpad.login_with('Extract Answers', version='devel', Launchpad.logout()
credential_save_failed=no_credential) launchpad = Launchpad.login_with(
'Extract Answers',
version='devel',
credential_save_failed=no_credential
)
user_mapping = {} user_mapping = {}
@ -34,101 +38,105 @@ def get_questions(project_name):
project = launchpad.projects[project_name] project = launchpad.projects[project_name]
return project.searchQuestions() return project.searchQuestions()
def get_user_data(user_link):
def find_or_create_user(user_link): """returns dictionary with keys:
"""Takes a Launchpad user link string of the format akin to * username
https://api.staging.launchpad.net/devel/~mat-rush * confirmed_email_addresses
separates out the username, then uses the Launchpad user object
to determine whether a user exists in AskBot based on the email
addressesand username. If not, it creates one using the information
""" """
# check out user cache first # check out user cache first
if user_link in user_mapping: if user_link in user_mapping:
return user_mapping[user_link] return user_mapping[user_link]
username = user_link.split('~')[1] username = user_link.split('~')[1]
user_data = {
'username': username,
'confirmed_email_addresses': list()
}
lp_user = launchpad.people[username] lp_user = launchpad.people[username]
ab_user = None
for email in lp_user.confirmed_email_addresses:
# search for the user based on their email
email = str(email).split('/')[-1]
user_data['confirmed_email_addresses'].append(email)
user_mapping[user_link] = user_data
return user_data
def get_or_create_user(user_data):
"""returns Askbot user.
If user corresponding to the given data does not exist,
it is created
"""
username = user_data['username']
#check the cache by user name
if username in user_mapping:
return user_mapping[username]
try: try:
# find using identical username first # find using identical username first
ab_user = models.User.objects.get(username=username) user = models.User.objects.get(username=username)
except models.User.DoesNotExist: except models.User.DoesNotExist:
# we haven't created the user yet # we haven't created the user yet
for email in lp_user.confirmed_email_addresses: try:
# search for the user based on their email user = models.User.objects.filter(email__in=user_data['confirmed_email_addresses'])[0]
stripped_email = str(email).split('/')[-1] except:
try: user = models.User(username=username)
ab_user = models.User.objects.get(email=stripped_email) if len(user_data['confirmed_email_addresses']):
except models.User.DoesNotExist: user.email = user_data['confirmed_email_addresses'][0]
pass user.save()
if ab_user is None:
# we didn't find a user, create a new one
try:
first_email = str(lp_user.confirmed_email_addresses[0]).split('/')[-1]
ab_user = models.User(username=username, email=first_email)
ab_user.save()
except IndexError:
try:
ab_user = models.User(username=username)
ab_user.save()
except IntegrityError:
# the user already exists, but we didn't find it somehow
print "user is corrupt: " + user_link + str(e)
pass
# cache the users we've seen so far to avoid API calls # cache the users we've seen so far to avoid API calls
user_mapping[user_link] = ab_user user_mapping[username] = user
if ab_user is None: return user
print "ab_user still none " + user_link
return ab_user
def import_questions(questions, project_name): def save_questions(questions, project_name, data_filename):
"""loops through all items in launchpad Question format, and """gets data from the launchpad answers and then
adds them as askbot Questions and Answers""" saves it in the python pickled format
so that the data can be uploaded elsewhere
"""
status_file = open('write.status', 'r') #create data file if not exists
data_file = open(data_filename, 'a+')
data_file.close()
#read the data file
try: try:
import_log = pickle.load(status_file) data_file = open(data_filename, 'r')
if not isinstance(import_log, dict): question_data = pickle.load(data_file)
import_log = {} data_file.close()
except: except EOFError:
import_log = {} question_data = dict()
for question in questions: try:
print '"' + question.title + '",' + str(question.date_created) for question in questions:
print '"' + question.title + '",' + str(question.date_created)
try: if question.self_link in question_data:
responses = question.messages_collection.entries continue
print str(len(responses))
except AttributeError:
print "No Answers for question" + str(question)
responses = None
if question.self_link in import_log: try:
print "Already imported - skipping the above question" responses = question.messages_collection.entries
continue print str(len(responses))
except AttributeError:
print "No Answers for question" + str(question)
responses = None
question_user = find_or_create_user(question.owner_link) question_datum = {
'owner': get_user_data(question.owner_link),
# post the question 'self_link': question.self_link,
try: 'title': question.title,
ab_question = question_user.post_question( 'body_text': question.description,
title=question.title, 'timestamp': question.date_created.replace(tzinfo=None),
body_text=question.description, 'tags': project_name + ' migrated'
timestamp=question.date_created.replace(tzinfo=None), }
tags=project_name + " migrated", question_data[question.self_link] = question_datum
)
except IntegrityError: answer_data = list()
# the question already exists, but we didn't find it somehow
print "Had an IntegrityError"
continue
if responses is not None:
# post all the answers
for response in responses: for response in responses:
response_user = find_or_create_user(response['owner_link'])
try: try:
timestamp=datetime.strptime(response['date_created'][0:-6], timestamp=datetime.strptime(response['date_created'][0:-6],
'%Y-%m-%dT%H:%M:%S.%f') '%Y-%m-%dT%H:%M:%S.%f')
@ -136,27 +144,91 @@ def import_questions(questions, project_name):
#some timestamps don't have the millisectons, thanks LP! #some timestamps don't have the millisectons, thanks LP!
timestamp=datetime.strptime(response['date_created'][0:-6], timestamp=datetime.strptime(response['date_created'][0:-6],
'%Y-%m-%dT%H:%M:%S') '%Y-%m-%dT%H:%M:%S')
if len(response['content']) > 1: if 'content' in response and len(response['content']) > 1:
#for some reason, Launchpad allows blank answers #for some reason, Launchpad allows blank answers
answer = response_user.post_answer( answer = {
question=ab_question, 'owner': get_user_data(response['owner_link']),
body_text=response['content'], 'body_text': response['content'],
timestamp=timestamp 'timestamp': timestamp
) }
import_log[question.self_link] = 1 #mark as imported answer_data.append(answer)
question_datum['responses'] = answer_data
finally:
data_file = open(data_filename, 'w')
pickle.dump(question_data, data_file)
data_file.close()
def import_questions(data_filename):
"""loops through all items in launchpad Question format, and
adds them as askbot Questions and Answers"""
status_file = open('write.status', 'a')
try:
import_log = pickle.load(status_file)
if not isinstance(import_log, dict):
import_log = {}
except:
import_log = {}
data_file = open(data_filename, 'r')
questions = pickle.load(data_file)
for question in questions.values():
print '"' + question['title'] + '",' + str(question['timestamp'])
try:
responses = question['responses']
print str(len(responses))
except AttributeError:
responses = None
print "No Answers"
if question['self_link'] in import_log:
print "Already imported - skipping the above question"
continue
# post the question
question_user = get_or_create_user(question['owner'])
try:
ab_question = question_user.post_question(
title=question['title'],
body_text=question['body_text'],
timestamp=question['timestamp'],
tags=question['tags']
)
except IntegrityError:
# the question already exists, but we didn't find it somehow
print "Had an IntegrityError"
continue
for response in question['responses']:
if len(response['body_text']) == 0:
continue
response_user = get_or_create_user(response['owner'])
#for some reason, Launchpad allows blank answers
answer = response_user.post_answer(
question=ab_question,
body_text=response['body_text'],
timestamp=response['timestamp']
)
import_log[question['self_link']] = 1 #mark as imported
status_file.close() status_file.close()
status_file = open('write.status', 'w') status_file = open('write.status', 'w')
pickle.dump(import_log, status_file) pickle.dump(import_log, status_file)
def main(): def main_read():
translation.activate('en')
questions = get_questions('nova') questions = get_questions('nova')
save_questions(questions, 'nova', 'launchpad.dat')
print str(len(questions)) + " found"
def main_write():
translation.activate('en')
setting_backup = askbot_settings.LIMIT_ONE_ANSWER_PER_USER setting_backup = askbot_settings.LIMIT_ONE_ANSWER_PER_USER
askbot_settings.update('LIMIT_ONE_ANSWER_PER_USER', False) askbot_settings.update('LIMIT_ONE_ANSWER_PER_USER', False)
print str(len(questions)) + " found" import_questions('launchpad.dat')
import_questions(questions, 'nova')
askbot_settings.update('LIMIT_ONE_ANSWER_PER_USER', setting_backup) askbot_settings.update('LIMIT_ONE_ANSWER_PER_USER', setting_backup)
if __name__ == "__main__":
main()