modified the launchpad answers importer script
Change-Id: I22b61258ee30046c4924851be6b84fa25f407f4d
This commit is contained in:
parent
3556e8a650
commit
12acc94919
248
launchpad.py
248
launchpad.py
@ -22,9 +22,13 @@ def no_credential():
|
|||||||
sys.exit()
|
sys.exit()
|
||||||
|
|
||||||
"""Logs into Launchpad """
|
"""Logs into Launchpad """
|
||||||
cachedir = "/home/fifieldt/.launchpadlib/cache/"
|
#cachedir = '/Users/evgenyfadeev/.launchadlib/cache'
|
||||||
launchpad = Launchpad.login_with('Extract Answers', version='devel',
|
Launchpad.logout()
|
||||||
credential_save_failed=no_credential)
|
launchpad = Launchpad.login_with(
|
||||||
|
'Extract Answers',
|
||||||
|
version='devel',
|
||||||
|
credential_save_failed=no_credential
|
||||||
|
)
|
||||||
|
|
||||||
user_mapping = {}
|
user_mapping = {}
|
||||||
|
|
||||||
@ -34,101 +38,105 @@ def get_questions(project_name):
|
|||||||
project = launchpad.projects[project_name]
|
project = launchpad.projects[project_name]
|
||||||
return project.searchQuestions()
|
return project.searchQuestions()
|
||||||
|
|
||||||
|
def get_user_data(user_link):
|
||||||
def find_or_create_user(user_link):
|
"""returns dictionary with keys:
|
||||||
"""Takes a Launchpad user link string of the format akin to
|
* username
|
||||||
https://api.staging.launchpad.net/devel/~mat-rush
|
* confirmed_email_addresses
|
||||||
separates out the username, then uses the Launchpad user object
|
|
||||||
to determine whether a user exists in AskBot based on the email
|
|
||||||
addressesand username. If not, it creates one using the information
|
|
||||||
"""
|
"""
|
||||||
# check out user cache first
|
# check out user cache first
|
||||||
if user_link in user_mapping:
|
if user_link in user_mapping:
|
||||||
return user_mapping[user_link]
|
return user_mapping[user_link]
|
||||||
|
|
||||||
username = user_link.split('~')[1]
|
username = user_link.split('~')[1]
|
||||||
|
|
||||||
|
user_data = {
|
||||||
|
'username': username,
|
||||||
|
'confirmed_email_addresses': list()
|
||||||
|
}
|
||||||
|
|
||||||
lp_user = launchpad.people[username]
|
lp_user = launchpad.people[username]
|
||||||
ab_user = None
|
|
||||||
|
for email in lp_user.confirmed_email_addresses:
|
||||||
|
# search for the user based on their email
|
||||||
|
email = str(email).split('/')[-1]
|
||||||
|
user_data['confirmed_email_addresses'].append(email)
|
||||||
|
|
||||||
|
user_mapping[user_link] = user_data
|
||||||
|
return user_data
|
||||||
|
|
||||||
|
def get_or_create_user(user_data):
|
||||||
|
"""returns Askbot user.
|
||||||
|
If user corresponding to the given data does not exist,
|
||||||
|
it is created
|
||||||
|
"""
|
||||||
|
username = user_data['username']
|
||||||
|
|
||||||
|
#check the cache by user name
|
||||||
|
if username in user_mapping:
|
||||||
|
return user_mapping[username]
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# find using identical username first
|
# find using identical username first
|
||||||
ab_user = models.User.objects.get(username=username)
|
user = models.User.objects.get(username=username)
|
||||||
except models.User.DoesNotExist:
|
except models.User.DoesNotExist:
|
||||||
# we haven't created the user yet
|
# we haven't created the user yet
|
||||||
for email in lp_user.confirmed_email_addresses:
|
try:
|
||||||
# search for the user based on their email
|
user = models.User.objects.filter(email__in=user_data['confirmed_email_addresses'])[0]
|
||||||
stripped_email = str(email).split('/')[-1]
|
except:
|
||||||
try:
|
user = models.User(username=username)
|
||||||
ab_user = models.User.objects.get(email=stripped_email)
|
if len(user_data['confirmed_email_addresses']):
|
||||||
except models.User.DoesNotExist:
|
user.email = user_data['confirmed_email_addresses'][0]
|
||||||
pass
|
user.save()
|
||||||
|
|
||||||
if ab_user is None:
|
|
||||||
# we didn't find a user, create a new one
|
|
||||||
try:
|
|
||||||
first_email = str(lp_user.confirmed_email_addresses[0]).split('/')[-1]
|
|
||||||
ab_user = models.User(username=username, email=first_email)
|
|
||||||
ab_user.save()
|
|
||||||
except IndexError:
|
|
||||||
try:
|
|
||||||
ab_user = models.User(username=username)
|
|
||||||
ab_user.save()
|
|
||||||
except IntegrityError:
|
|
||||||
# the user already exists, but we didn't find it somehow
|
|
||||||
print "user is corrupt: " + user_link + str(e)
|
|
||||||
pass
|
|
||||||
|
|
||||||
# cache the users we've seen so far to avoid API calls
|
# cache the users we've seen so far to avoid API calls
|
||||||
user_mapping[user_link] = ab_user
|
user_mapping[username] = user
|
||||||
if ab_user is None:
|
return user
|
||||||
print "ab_user still none " + user_link
|
|
||||||
return ab_user
|
|
||||||
|
|
||||||
|
|
||||||
def import_questions(questions, project_name):
|
def save_questions(questions, project_name, data_filename):
|
||||||
"""loops through all items in launchpad Question format, and
|
"""gets data from the launchpad answers and then
|
||||||
adds them as askbot Questions and Answers"""
|
saves it in the python pickled format
|
||||||
|
so that the data can be uploaded elsewhere
|
||||||
|
"""
|
||||||
|
|
||||||
status_file = open('write.status', 'r')
|
#create data file if not exists
|
||||||
|
data_file = open(data_filename, 'a+')
|
||||||
|
data_file.close()
|
||||||
|
|
||||||
|
#read the data file
|
||||||
try:
|
try:
|
||||||
import_log = pickle.load(status_file)
|
data_file = open(data_filename, 'r')
|
||||||
if not isinstance(import_log, dict):
|
question_data = pickle.load(data_file)
|
||||||
import_log = {}
|
data_file.close()
|
||||||
except:
|
except EOFError:
|
||||||
import_log = {}
|
question_data = dict()
|
||||||
|
|
||||||
for question in questions:
|
try:
|
||||||
print '"' + question.title + '",' + str(question.date_created)
|
for question in questions:
|
||||||
|
print '"' + question.title + '",' + str(question.date_created)
|
||||||
|
|
||||||
try:
|
if question.self_link in question_data:
|
||||||
responses = question.messages_collection.entries
|
continue
|
||||||
print str(len(responses))
|
|
||||||
except AttributeError:
|
|
||||||
print "No Answers for question" + str(question)
|
|
||||||
responses = None
|
|
||||||
|
|
||||||
if question.self_link in import_log:
|
try:
|
||||||
print "Already imported - skipping the above question"
|
responses = question.messages_collection.entries
|
||||||
continue
|
print str(len(responses))
|
||||||
|
except AttributeError:
|
||||||
|
print "No Answers for question" + str(question)
|
||||||
|
responses = None
|
||||||
|
|
||||||
question_user = find_or_create_user(question.owner_link)
|
question_datum = {
|
||||||
|
'owner': get_user_data(question.owner_link),
|
||||||
# post the question
|
'self_link': question.self_link,
|
||||||
try:
|
'title': question.title,
|
||||||
ab_question = question_user.post_question(
|
'body_text': question.description,
|
||||||
title=question.title,
|
'timestamp': question.date_created.replace(tzinfo=None),
|
||||||
body_text=question.description,
|
'tags': project_name + ' migrated'
|
||||||
timestamp=question.date_created.replace(tzinfo=None),
|
}
|
||||||
tags=project_name + " migrated",
|
question_data[question.self_link] = question_datum
|
||||||
)
|
|
||||||
except IntegrityError:
|
answer_data = list()
|
||||||
# the question already exists, but we didn't find it somehow
|
|
||||||
print "Had an IntegrityError"
|
|
||||||
continue
|
|
||||||
if responses is not None:
|
|
||||||
# post all the answers
|
|
||||||
for response in responses:
|
for response in responses:
|
||||||
response_user = find_or_create_user(response['owner_link'])
|
|
||||||
try:
|
try:
|
||||||
timestamp=datetime.strptime(response['date_created'][0:-6],
|
timestamp=datetime.strptime(response['date_created'][0:-6],
|
||||||
'%Y-%m-%dT%H:%M:%S.%f')
|
'%Y-%m-%dT%H:%M:%S.%f')
|
||||||
@ -136,27 +144,91 @@ def import_questions(questions, project_name):
|
|||||||
#some timestamps don't have the millisectons, thanks LP!
|
#some timestamps don't have the millisectons, thanks LP!
|
||||||
timestamp=datetime.strptime(response['date_created'][0:-6],
|
timestamp=datetime.strptime(response['date_created'][0:-6],
|
||||||
'%Y-%m-%dT%H:%M:%S')
|
'%Y-%m-%dT%H:%M:%S')
|
||||||
if len(response['content']) > 1:
|
if 'content' in response and len(response['content']) > 1:
|
||||||
#for some reason, Launchpad allows blank answers
|
#for some reason, Launchpad allows blank answers
|
||||||
answer = response_user.post_answer(
|
answer = {
|
||||||
question=ab_question,
|
'owner': get_user_data(response['owner_link']),
|
||||||
body_text=response['content'],
|
'body_text': response['content'],
|
||||||
timestamp=timestamp
|
'timestamp': timestamp
|
||||||
)
|
}
|
||||||
import_log[question.self_link] = 1 #mark as imported
|
answer_data.append(answer)
|
||||||
|
question_datum['responses'] = answer_data
|
||||||
|
finally:
|
||||||
|
data_file = open(data_filename, 'w')
|
||||||
|
pickle.dump(question_data, data_file)
|
||||||
|
data_file.close()
|
||||||
|
|
||||||
|
|
||||||
|
def import_questions(data_filename):
|
||||||
|
"""loops through all items in launchpad Question format, and
|
||||||
|
adds them as askbot Questions and Answers"""
|
||||||
|
|
||||||
|
status_file = open('write.status', 'a')
|
||||||
|
try:
|
||||||
|
import_log = pickle.load(status_file)
|
||||||
|
if not isinstance(import_log, dict):
|
||||||
|
import_log = {}
|
||||||
|
except:
|
||||||
|
import_log = {}
|
||||||
|
|
||||||
|
data_file = open(data_filename, 'r')
|
||||||
|
questions = pickle.load(data_file)
|
||||||
|
|
||||||
|
for question in questions.values():
|
||||||
|
print '"' + question['title'] + '",' + str(question['timestamp'])
|
||||||
|
|
||||||
|
try:
|
||||||
|
responses = question['responses']
|
||||||
|
print str(len(responses))
|
||||||
|
except AttributeError:
|
||||||
|
responses = None
|
||||||
|
print "No Answers"
|
||||||
|
|
||||||
|
if question['self_link'] in import_log:
|
||||||
|
print "Already imported - skipping the above question"
|
||||||
|
continue
|
||||||
|
|
||||||
|
# post the question
|
||||||
|
question_user = get_or_create_user(question['owner'])
|
||||||
|
try:
|
||||||
|
ab_question = question_user.post_question(
|
||||||
|
title=question['title'],
|
||||||
|
body_text=question['body_text'],
|
||||||
|
timestamp=question['timestamp'],
|
||||||
|
tags=question['tags']
|
||||||
|
)
|
||||||
|
except IntegrityError:
|
||||||
|
# the question already exists, but we didn't find it somehow
|
||||||
|
print "Had an IntegrityError"
|
||||||
|
continue
|
||||||
|
|
||||||
|
for response in question['responses']:
|
||||||
|
if len(response['body_text']) == 0:
|
||||||
|
continue
|
||||||
|
response_user = get_or_create_user(response['owner'])
|
||||||
|
#for some reason, Launchpad allows blank answers
|
||||||
|
|
||||||
|
answer = response_user.post_answer(
|
||||||
|
question=ab_question,
|
||||||
|
body_text=response['body_text'],
|
||||||
|
timestamp=response['timestamp']
|
||||||
|
)
|
||||||
|
|
||||||
|
import_log[question['self_link']] = 1 #mark as imported
|
||||||
status_file.close()
|
status_file.close()
|
||||||
status_file = open('write.status', 'w')
|
status_file = open('write.status', 'w')
|
||||||
pickle.dump(import_log, status_file)
|
pickle.dump(import_log, status_file)
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main_read():
|
||||||
translation.activate('en')
|
|
||||||
questions = get_questions('nova')
|
questions = get_questions('nova')
|
||||||
|
save_questions(questions, 'nova', 'launchpad.dat')
|
||||||
|
print str(len(questions)) + " found"
|
||||||
|
|
||||||
|
def main_write():
|
||||||
|
translation.activate('en')
|
||||||
setting_backup = askbot_settings.LIMIT_ONE_ANSWER_PER_USER
|
setting_backup = askbot_settings.LIMIT_ONE_ANSWER_PER_USER
|
||||||
askbot_settings.update('LIMIT_ONE_ANSWER_PER_USER', False)
|
askbot_settings.update('LIMIT_ONE_ANSWER_PER_USER', False)
|
||||||
print str(len(questions)) + " found"
|
import_questions('launchpad.dat')
|
||||||
import_questions(questions, 'nova')
|
|
||||||
askbot_settings.update('LIMIT_ONE_ANSWER_PER_USER', setting_backup)
|
askbot_settings.update('LIMIT_ONE_ANSWER_PER_USER', setting_backup)
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
|
Loading…
Reference in New Issue
Block a user