modified the launchpad answers importer script
Change-Id: I22b61258ee30046c4924851be6b84fa25f407f4d
This commit is contained in:
parent
3556e8a650
commit
12acc94919
224
launchpad.py
224
launchpad.py
@ -22,9 +22,13 @@ def no_credential():
|
||||
sys.exit()
|
||||
|
||||
"""Logs into Launchpad """
|
||||
cachedir = "/home/fifieldt/.launchpadlib/cache/"
|
||||
launchpad = Launchpad.login_with('Extract Answers', version='devel',
|
||||
credential_save_failed=no_credential)
|
||||
#cachedir = '/Users/evgenyfadeev/.launchadlib/cache'
|
||||
Launchpad.logout()
|
||||
launchpad = Launchpad.login_with(
|
||||
'Extract Answers',
|
||||
version='devel',
|
||||
credential_save_failed=no_credential
|
||||
)
|
||||
|
||||
user_mapping = {}
|
||||
|
||||
@ -34,72 +38,86 @@ def get_questions(project_name):
|
||||
project = launchpad.projects[project_name]
|
||||
return project.searchQuestions()
|
||||
|
||||
|
||||
def find_or_create_user(user_link):
|
||||
"""Takes a Launchpad user link string of the format akin to
|
||||
https://api.staging.launchpad.net/devel/~mat-rush
|
||||
separates out the username, then uses the Launchpad user object
|
||||
to determine whether a user exists in AskBot based on the email
|
||||
addressesand username. If not, it creates one using the information
|
||||
def get_user_data(user_link):
|
||||
"""returns dictionary with keys:
|
||||
* username
|
||||
* confirmed_email_addresses
|
||||
"""
|
||||
# check out user cache first
|
||||
if user_link in user_mapping:
|
||||
return user_mapping[user_link]
|
||||
|
||||
username = user_link.split('~')[1]
|
||||
|
||||
user_data = {
|
||||
'username': username,
|
||||
'confirmed_email_addresses': list()
|
||||
}
|
||||
|
||||
lp_user = launchpad.people[username]
|
||||
ab_user = None
|
||||
|
||||
for email in lp_user.confirmed_email_addresses:
|
||||
# search for the user based on their email
|
||||
email = str(email).split('/')[-1]
|
||||
user_data['confirmed_email_addresses'].append(email)
|
||||
|
||||
user_mapping[user_link] = user_data
|
||||
return user_data
|
||||
|
||||
def get_or_create_user(user_data):
|
||||
"""returns Askbot user.
|
||||
If user corresponding to the given data does not exist,
|
||||
it is created
|
||||
"""
|
||||
username = user_data['username']
|
||||
|
||||
#check the cache by user name
|
||||
if username in user_mapping:
|
||||
return user_mapping[username]
|
||||
|
||||
try:
|
||||
# find using identical username first
|
||||
ab_user = models.User.objects.get(username=username)
|
||||
user = models.User.objects.get(username=username)
|
||||
except models.User.DoesNotExist:
|
||||
# we haven't created the user yet
|
||||
for email in lp_user.confirmed_email_addresses:
|
||||
# search for the user based on their email
|
||||
stripped_email = str(email).split('/')[-1]
|
||||
try:
|
||||
ab_user = models.User.objects.get(email=stripped_email)
|
||||
except models.User.DoesNotExist:
|
||||
pass
|
||||
|
||||
if ab_user is None:
|
||||
# we didn't find a user, create a new one
|
||||
try:
|
||||
first_email = str(lp_user.confirmed_email_addresses[0]).split('/')[-1]
|
||||
ab_user = models.User(username=username, email=first_email)
|
||||
ab_user.save()
|
||||
except IndexError:
|
||||
try:
|
||||
ab_user = models.User(username=username)
|
||||
ab_user.save()
|
||||
except IntegrityError:
|
||||
# the user already exists, but we didn't find it somehow
|
||||
print "user is corrupt: " + user_link + str(e)
|
||||
pass
|
||||
user = models.User.objects.filter(email__in=user_data['confirmed_email_addresses'])[0]
|
||||
except:
|
||||
user = models.User(username=username)
|
||||
if len(user_data['confirmed_email_addresses']):
|
||||
user.email = user_data['confirmed_email_addresses'][0]
|
||||
user.save()
|
||||
|
||||
# cache the users we've seen so far to avoid API calls
|
||||
user_mapping[user_link] = ab_user
|
||||
if ab_user is None:
|
||||
print "ab_user still none " + user_link
|
||||
return ab_user
|
||||
user_mapping[username] = user
|
||||
return user
|
||||
|
||||
|
||||
def import_questions(questions, project_name):
|
||||
"""loops through all items in launchpad Question format, and
|
||||
adds them as askbot Questions and Answers"""
|
||||
def save_questions(questions, project_name, data_filename):
|
||||
"""gets data from the launchpad answers and then
|
||||
saves it in the python pickled format
|
||||
so that the data can be uploaded elsewhere
|
||||
"""
|
||||
|
||||
status_file = open('write.status', 'r')
|
||||
#create data file if not exists
|
||||
data_file = open(data_filename, 'a+')
|
||||
data_file.close()
|
||||
|
||||
#read the data file
|
||||
try:
|
||||
import_log = pickle.load(status_file)
|
||||
if not isinstance(import_log, dict):
|
||||
import_log = {}
|
||||
except:
|
||||
import_log = {}
|
||||
data_file = open(data_filename, 'r')
|
||||
question_data = pickle.load(data_file)
|
||||
data_file.close()
|
||||
except EOFError:
|
||||
question_data = dict()
|
||||
|
||||
try:
|
||||
for question in questions:
|
||||
print '"' + question.title + '",' + str(question.date_created)
|
||||
|
||||
if question.self_link in question_data:
|
||||
continue
|
||||
|
||||
try:
|
||||
responses = question.messages_collection.entries
|
||||
print str(len(responses))
|
||||
@ -107,28 +125,18 @@ def import_questions(questions, project_name):
|
||||
print "No Answers for question" + str(question)
|
||||
responses = None
|
||||
|
||||
if question.self_link in import_log:
|
||||
print "Already imported - skipping the above question"
|
||||
continue
|
||||
question_datum = {
|
||||
'owner': get_user_data(question.owner_link),
|
||||
'self_link': question.self_link,
|
||||
'title': question.title,
|
||||
'body_text': question.description,
|
||||
'timestamp': question.date_created.replace(tzinfo=None),
|
||||
'tags': project_name + ' migrated'
|
||||
}
|
||||
question_data[question.self_link] = question_datum
|
||||
|
||||
question_user = find_or_create_user(question.owner_link)
|
||||
|
||||
# post the question
|
||||
try:
|
||||
ab_question = question_user.post_question(
|
||||
title=question.title,
|
||||
body_text=question.description,
|
||||
timestamp=question.date_created.replace(tzinfo=None),
|
||||
tags=project_name + " migrated",
|
||||
)
|
||||
except IntegrityError:
|
||||
# the question already exists, but we didn't find it somehow
|
||||
print "Had an IntegrityError"
|
||||
continue
|
||||
if responses is not None:
|
||||
# post all the answers
|
||||
answer_data = list()
|
||||
for response in responses:
|
||||
response_user = find_or_create_user(response['owner_link'])
|
||||
try:
|
||||
timestamp=datetime.strptime(response['date_created'][0:-6],
|
||||
'%Y-%m-%dT%H:%M:%S.%f')
|
||||
@ -136,27 +144,91 @@ def import_questions(questions, project_name):
|
||||
#some timestamps don't have the millisectons, thanks LP!
|
||||
timestamp=datetime.strptime(response['date_created'][0:-6],
|
||||
'%Y-%m-%dT%H:%M:%S')
|
||||
if len(response['content']) > 1:
|
||||
if 'content' in response and len(response['content']) > 1:
|
||||
#for some reason, Launchpad allows blank answers
|
||||
answer = {
|
||||
'owner': get_user_data(response['owner_link']),
|
||||
'body_text': response['content'],
|
||||
'timestamp': timestamp
|
||||
}
|
||||
answer_data.append(answer)
|
||||
question_datum['responses'] = answer_data
|
||||
finally:
|
||||
data_file = open(data_filename, 'w')
|
||||
pickle.dump(question_data, data_file)
|
||||
data_file.close()
|
||||
|
||||
|
||||
def import_questions(data_filename):
|
||||
"""loops through all items in launchpad Question format, and
|
||||
adds them as askbot Questions and Answers"""
|
||||
|
||||
status_file = open('write.status', 'a')
|
||||
try:
|
||||
import_log = pickle.load(status_file)
|
||||
if not isinstance(import_log, dict):
|
||||
import_log = {}
|
||||
except:
|
||||
import_log = {}
|
||||
|
||||
data_file = open(data_filename, 'r')
|
||||
questions = pickle.load(data_file)
|
||||
|
||||
for question in questions.values():
|
||||
print '"' + question['title'] + '",' + str(question['timestamp'])
|
||||
|
||||
try:
|
||||
responses = question['responses']
|
||||
print str(len(responses))
|
||||
except AttributeError:
|
||||
responses = None
|
||||
print "No Answers"
|
||||
|
||||
if question['self_link'] in import_log:
|
||||
print "Already imported - skipping the above question"
|
||||
continue
|
||||
|
||||
# post the question
|
||||
question_user = get_or_create_user(question['owner'])
|
||||
try:
|
||||
ab_question = question_user.post_question(
|
||||
title=question['title'],
|
||||
body_text=question['body_text'],
|
||||
timestamp=question['timestamp'],
|
||||
tags=question['tags']
|
||||
)
|
||||
except IntegrityError:
|
||||
# the question already exists, but we didn't find it somehow
|
||||
print "Had an IntegrityError"
|
||||
continue
|
||||
|
||||
for response in question['responses']:
|
||||
if len(response['body_text']) == 0:
|
||||
continue
|
||||
response_user = get_or_create_user(response['owner'])
|
||||
#for some reason, Launchpad allows blank answers
|
||||
|
||||
answer = response_user.post_answer(
|
||||
question=ab_question,
|
||||
body_text=response['content'],
|
||||
timestamp=timestamp
|
||||
body_text=response['body_text'],
|
||||
timestamp=response['timestamp']
|
||||
)
|
||||
import_log[question.self_link] = 1 #mark as imported
|
||||
|
||||
import_log[question['self_link']] = 1 #mark as imported
|
||||
status_file.close()
|
||||
status_file = open('write.status', 'w')
|
||||
pickle.dump(import_log, status_file)
|
||||
|
||||
|
||||
def main():
|
||||
translation.activate('en')
|
||||
def main_read():
|
||||
questions = get_questions('nova')
|
||||
save_questions(questions, 'nova', 'launchpad.dat')
|
||||
print str(len(questions)) + " found"
|
||||
|
||||
def main_write():
|
||||
translation.activate('en')
|
||||
setting_backup = askbot_settings.LIMIT_ONE_ANSWER_PER_USER
|
||||
askbot_settings.update('LIMIT_ONE_ANSWER_PER_USER', False)
|
||||
print str(len(questions)) + " found"
|
||||
import_questions(questions, 'nova')
|
||||
import_questions('launchpad.dat')
|
||||
askbot_settings.update('LIMIT_ONE_ANSWER_PER_USER', setting_backup)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
Loading…
Reference in New Issue
Block a user