Re-home migrate-to-phab.sql

This used to live in puppet-phabricator but as it has nothing to
do with the puppet module, it is being moved here.

Change-Id: I17d7cbcd3f7bb0147305bcd1d2976dd7c409fec8
Implements: spec maniphest migration
This commit is contained in:
Craige McWhirter 2015-11-01 07:41:30 +10:00
parent 55d8ad9b91
commit 8df9be2332

View File

@ -0,0 +1,528 @@
-- (c) 2015 Hewlett-Packard Development Company, L.P.
--
-- Licensed under the Apache License, Version 2.0 (the "License");
-- you may not use this file except in compliance with the License.
-- You may obtain a copy of the License at
--
-- http://www.apache.org/licenses/LICENSE-2.0
--
-- Unless required by applicable law or agreed to in writing, software
-- distributed under the License is distributed on an "AS IS" BASIS,
-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-- implied.
-- See the License for the specific language governing permissions and
-- limitations under the License.
-- Migrate data from a storyboard schema to a phabricator schema
-- Assumes standard phabricator schema and that storyboard schema is loaded
-- in storyboard adjacent to the phabricator schema
use storyboard;
delimiter //
-- phabricator uses an artificial id for everything to facilitate vertical
-- sharding without needing cross-repo joins. You can tell they started life
-- at Facebook
drop function if exists make_phid //
create function make_phid(slug varbinary(9))
returns varbinary(30)
BEGIN
return concat(
'PHID-',
concat(slug, concat('-', left(
concat(
lower(conv(floor(rand() * 99999999999999), 20, 36)),
lower(conv(floor(rand() * 99999999999999), 20, 36))
),
24-length(slug)
))));
END //
-- There are several places where columns need a random string of a length
-- This is kinda lame way to make it (make a 256 character random string, then
-- return the first len characters, but it gets the job done
drop function if exists make_cert //
create function make_cert(len integer)
returns varbinary(255)
BEGIN
return left(
concat(
md5(rand()),
md5(rand()),
md5(rand()),
md5(rand()),
md5(rand()),
md5(rand()),
md5(rand()),
md5(rand())
), len);
END //
-- We probably won't eventually use this, but this creates an apache style
-- 1000 md5s in a row password hash that we can use to seed the database
-- with a bunch of passwords
drop function if exists make_hash //
create function make_hash(input varbinary(255))
returns varbinary(255)
BEGIN
DECLARE x int;
DECLARE str VARBINARY(255);
SET x = 0;
SET str = input;
while x < 1000 DO
SET str = md5(str);
SET x = x + 1;
END WHILE;
return str;
END //
delimiter ;
-- We're going to generate PHID values in each of the original tables
-- so that we can inject via joins
alter table users add column phid varbinary(64);
alter table stories add column phid varbinary(64);
alter table tasks add column phid varbinary(64);
alter table tasks add column storyPHID varbinary(64);
alter table projects add column phid varbinary(64);
alter table project_groups add column phid varbinary(64);
alter table comments add column phid varbinary(64);
alter table comments add column transPHID varbinary(64);
-- Add PHIDs to everything
update users set phid = make_phid('USER');
update stories set phid = make_phid('TASK');
update tasks set phid = make_phid('TASK');
update projects set phid = make_phid('PROJ');
update project_groups set phid = make_phid('PROJ');
update comments set phid = make_phid('XCMT');
update comments set transPHID = make_phid('XACT-TASK');
-- We want to track what story the task was related to without needing a
-- backreference join
update stories, tasks
set tasks.storyPHID=stories.phid
where stories.id=tasks.story_id;
-- There are a bunch of duplicate users in the storyboard db that are listed
-- with @example.com email addresses. username is unique in phabricator
update users
set id=concat(id, '_')
where email like '%example.com' and id not like '%_';
-- Create temporary table that helps us sort stories with a single task
-- from stories with multiple tasks
drop table if exists task_count;
create table task_count
select story_id, storyPHID, count(storyPHID) as count
from tasks group by storyPHID;
-- Scrub the data into something a bit more usable before we import
alter table tasks
modify column `priority` enum('low', 'medium', 'high', 'wishlist');
update tasks set priority='wishlist' where priority is null;
update tasks set status='todo' where status is NULL;
-- We're straight re-using the ids, so we need to make sure story and task ids
-- don't conflict.
-- Also, id's start with a T now, so we don't need to do as much to avoid
-- overlap with launchpad ids
alter table tasks drop foreign key tasks_ibfk_4;
update stories set id = id+3000 where id < 3000;
update tasks set story_id = story_id + 3000 where story_id < 3000;
update events set story_id = story_id + 3000 where story_id < 3000;
update stories set id = id - 2000000 + 4000 where id >= 2000000;
update tasks set story_id = story_id - 2000000 + 4000 where story_id >= 2000000;
update events set story_id = story_id - 2000000 + 4000 where story_id < 2000000;
use phabricator_user
delete from user;
delete from user_email;
insert into user
select
id as id,
phid as phid,
email as userName,
if(full_name is NULL, email, full_name) as realName,
NULL as sex,
NULL as translation,
storyboard.make_cert(32) as passwordSalt,
'' as passwordHash,
unix_timestamp(created_at) as dateCreated,
if(updated_at is NULL, unix_timestamp(now()), unix_timestamp(updated_at)) as dateModified,
NULL as profileImagePHID,
0 as consoleEnabled,
0 as consoleVisible,
'' as consoleTab,
storyboard.make_cert(255) as conduitCertificate,
0 as isSystemAgent,
0 as isDisabled,
is_superuser as isAdmin,
'UTC' as timezoneIdentifier,
0 as isEmailVerified,
1 as isApproved,
1 as accountSecret,
1 as isEnrolledInMultiFactor,
NULL as profileImageCache,
NULL as availabilityCache,
NULL as availabilityCacheTTL,
0 as isMailingList
from storyboard.users;
update user
set passwordHash = concat(
'md5:', storyboard.make_hash(
concat(username, 'password', phid, passwordSalt)));
insert into user_email
select
id, phid, email, 1, 1,
storyboard.make_cert(24),
unix_timestamp(created_at),
if(updated_at is NULL, unix_timestamp(now()), unix_timestamp(updated_at))
from storyboard.users;
use phabricator_maniphest
-- priorities
-- 100 = Unbreak Now!
-- 90 = Needs Triage
-- 80 = High
-- 50 = Normal
-- 25 = Low
-- 0 = Wishlist
delete from maniphest_task;
delete from edge;
delete from maniphest_transaction;
delete from maniphest_transaction_comment;
-- stories with one task get collapsed in a single new task
insert into maniphest_task
select
s.id, -- id
s.phid, -- phid
if(s.creator_id is NULL, '', s.creator_id), -- second pass authorPHID
if(t.assignee_id is NULL, NULL, t.assignee_id), -- second pass ownerPHID
'', -- attached
case t.status -- status
when 'todo' then 'open'
when 'inprogress' then 'inprogress'
when 'invalid' then 'invalid'
when 'review' then 'review'
when 'merged' then 'merged'
when 'invalid' then 'invalid'
end,
case t.priority -- priority
when 'high' then 80
when 'medium' then 50
when 'low' then 25
when 'wishlist' then 0
end,
s.title, -- title
s.title, -- originalTitle
s.description, -- description
unix_timestamp(s.created_at), -- dateCreated
if(t.updated_at is NULL, unix_timestamp(t.created_at), unix_timestamp(t.updated_at)), -- dateUpdated
'[]', -- update in second pass - projectPHIDs
storyboard.make_cert(20), -- mailKey
NULL, -- ownerOrdering
NULL, -- originalEmailSource
0, -- subpriority
'users', -- viewPolicy
'users', -- editPolicy
NULL -- spacePHID
from storyboard.stories s, storyboard.tasks t, storyboard.task_count c
where s.id = t.story_id and c.story_id=s.id and c.count = 1;
-- For stories with more than one task, each task becomes a new task
insert into maniphest_task
select
t.id,
t.phid,
if(t.creator_id is NULL, '', t.creator_id), -- u.phid,
if(t.assignee_id is NULL, NULL, t.assignee_id), -- second pass
'',
case t.status
when 'todo' then 'open'
when 'inprogress' then 'inprogress'
when 'invalid' then 'invalid'
when 'review' then 'review'
when 'merged' then 'merged'
when 'invalid' then 'invalid'
end,
case t.priority
when 'high' then 80
when 'medium' then 50
when 'low' then 25
when 'wishlist' then 0
end,
t.title,
t.title,
'',
unix_timestamp(t.created_at),
if(t.updated_at is NULL, unix_timestamp(t.created_at), unix_timestamp(t.updated_at)),
'[]', -- update in second pass
storyboard.make_cert(20),
NULL,
NULL,
0,
'users',
'users',
NULL -- spacePHID
from storyboard.stories s, storyboard.tasks t, storyboard.task_count c
where s.id = t.story_id and c.story_id=s.id and c.count > 1;
-- For stories with more than one task, each story also becomes a task, but
-- it doesn't have a project associated with it
insert into maniphest_task
select
s.id,
s.phid,
if(s.creator_id is NULL, '', s.creator_id), -- u.phid,
NULL,
'',
'open',
50,
s.title,
s.title,
s.description,
unix_timestamp(s.created_at),
if(s.updated_at is NULL, unix_timestamp(s.created_at), unix_timestamp(s.updated_at)),
'[]',
storyboard.make_cert(20),
NULL,
NULL,
0,
'users',
'users',
NULL -- spacePHID
from storyboard.stories s, storyboard.task_count c, storyboard.users u
where c.story_id=s.id and c.count > 1
and u.id = s.creator_id;
-- Set the author and owner PHIDs as a second pass to avoid really crazy
-- join semantics above. It could be done ... but why?
update maniphest_task, storyboard.users
set maniphest_task.authorPHID=storyboard.users.phid
where maniphest_task.authorPHID=storyboard.users.id;
update maniphest_task, storyboard.users
set maniphest_task.ownerPHID=storyboard.users.phid
where maniphest_task.ownerPHID=storyboard.users.id;
-- Releationships are edges in a DAG, so set up relationships between
-- tasks with their owners and authors in both directions
insert into edge
select authorPHID, 22, phid, 0, 0, NULL from maniphest_task;
insert into edge
select phid, 21, authorPHID, 0, 0, NULL from maniphest_task;
replace into edge
select ownerPHID, 22, phid, 0, 0, NULL from maniphest_task where ownerPHID is not null;
replace into edge
select phid, 21, ownerPHID, 0, 0, NULL from maniphest_task where ownerPHID is not null;
-- Comments have two parts - the first is an entry in the transaction table
-- indicating that a comment happened and associating the comment with the task
insert into maniphest_transaction
select
c.id, -- id
c.transPHID, -- phid
u.phid, -- authorPHID
s.phid, -- objectPHID
'public', -- viewPolicy
u.phid, -- editPolicy
c.phid, -- commentPHID
1, -- commentVersion
'core:comment', -- transactionType
'null', -- oldValue
'null', -- newValue
'{"source":"web"}', -- contentSource
'[]', -- metadata
unix_timestamp(c.created_at), -- dateCreated
if(c.updated_at is null, unix_timestamp(c.created_at), unix_timestamp(c.updated_at)) -- dateUpdated
from storyboard.comments c, storyboard.events e, storyboard.stories s, storyboard.users u
where c.id = e.comment_id and s.id = e.story_id
and e.event_type='user_comment' and s.creator_id = u.id;
-- The second part is the comment payload itself
insert into maniphest_transaction_comment
select
c.id, -- id
c.phid, -- phid
c.transPHID, -- transactionPHID
u.phid, -- author
'public', -- viewPolicy
u.phid, -- editPolicy
1, --
if(c.content is NULL, '', c.content),
'{"source":"web"}',
if(c.is_active, false, true),
unix_timestamp(c.created_at),
if(c.updated_at is null, unix_timestamp(c.created_at), unix_timestamp(c.updated_at))
from storyboard.comments c, storyboard.events e, storyboard.stories s, storyboard.users u
where c.id = e.comment_id and s.id = e.story_id
and e.event_type='user_comment' and s.creator_id = u.id;
-- We go back over to storyboard repo to create some calculated tables to
-- help us do project mapping. We needed to run in the tasks first so that
-- we can easily tell which tasks we need projects for.
use storyboard
drop table if exists task_project;
drop table if exists task_project_list;
drop table if exists task_project_grouping;
-- This is a table mapping tasks to every project it's associated with in a
-- clean and easy fashion for later
create table task_project
select t.phid as task_phid, p.phid as project_phid, t.project_id as project_id
from tasks t, phabricator_maniphest.maniphest_task m, projects p
where m.phid=t.phid and t.project_id is not null and t.project_id = p.id;
-- We also add project groups to this table
insert into task_project
select t.task_phid, g.phid, t.project_id
from task_project t, project_groups g, project_group_mapping m
where t.project_id = m.project_id and g.id = m.project_group_id;
-- based on that table, we make a new table so that we can modify and create
-- the comma-separated json list
create table task_project_list
select task_phid, project_phid from task_project;
update task_project_list set project_phid = concat('"', project_phid, '"');
-- use group_concat to get a row for each task and then a comma-sep list of
-- projects. Since we wrapped them all in " above, this will be comma-sep and
-- quoted
create table task_project_grouping
select task_phid, group_concat(project_phid) as phids
from task_project_list
group by task_phid;
-- Finally, wrap the results in [ and ]
update task_project_grouping set phids = concat('[', phids, ']');
-- We need to map tasks to dependent tasks. Lucky for us, Storyboard only
-- groks one level of this. Make a table for easy of importing later
drop table if exists task_subtask;
create table task_subtask
select tasks.phid, tasks.storyPHID
from tasks, phabricator_maniphest.maniphest_task
where tasks.phid = phabricator_maniphest.maniphest_task.phid;
-- Grab a PHID to use as an author for the projects.
-- TODO: Make a system/bot account that we can use as the "owner" of these
-- projects. But I'll do for now.
select phid into @author_phid from users where email='craige@mcwhirter.com.au';
use phabricator_project
delete from project;
insert into project
select
id, -- id
name, -- name
phid, -- phid
@author_phid,
unix_timestamp(created_at),
if(updated_at is NULL, unix_timestamp(created_at), unix_timestamp(updated_at)),
0, -- status
'[]', -- subprojectPHIDs
concat(replace(lower(name), '/', '_'), '/'),
'users',
'users',
'users',
0,
NULL,
'fa-briefcase',
'blue',
'12345678901234567890' -- mailKey
from storyboard.projects;
insert into project
select
id + 1000,
name,
phid,
@author_phid,
unix_timestamp(created_at),
if(updated_at is NULL, unix_timestamp(created_at), unix_timestamp(updated_at)),
0, -- status
'[]', -- subprojectPHIDs
concat(replace(lower(name), '/', '_'), '/'),
'users',
'users',
'users',
0,
NULL,
'fa-briefcase',
'blue',
'12345678901234567890' -- mailKey
from storyboard.projects;
from storyboard.project_groups;
delete from project_slug;
insert into project_slug
select
id,
phid,
replace(lower(name), '/', '_'),
dateCreated,
dateModified
from project;
-- insert into project_datasourcetoken (need to split name on - and do a row for each value) for typeahead search in boxes
insert into project_datasourcetoken (projectID, token)
SELECT p.id, SUBSTRING_INDEX(SUBSTRING_INDEX(
p.name, '-', n.n), '-', -1) value
FROM project p CROSS JOIN (
SELECT a.N + b.N * 10 + 1 n
FROM
(SELECT 0 AS N UNION ALL SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3 UNION ALL SELECT 4 UNION ALL SELECT 5 UNION ALL SELECT 6 UNION ALL SELECT 7 UNION ALL SELECT 8 UNION ALL SELECT 9) a,
(SELECT 0 AS N UNION ALL SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3 UNION ALL SELECT 4 UNION ALL SELECT 5 UNION ALL SELECT 6 UNION ALL SELECT 7 UNION ALL SELECT 8 UNION ALL SELECT 9) b
ORDER BY n
) n
WHERE n.n <= 1 + (LENGTH(p.name) - LENGTH(REPLACE(p.name, '-', '')))
ORDER BY value;
-- More DAG magic - this will map a project to a task in the projects DB
insert into edge
select
project_phid, 42, task_phid, 0, 0, NULL
from storyboard.task_project;
use phabricator_maniphest
-- We have projects now, so inject them into the name mapping table in the
-- bug system
insert into maniphest_nameindex
select id, phid, name from phabricator_project.project;
update maniphest_task t, storyboard.task_project_grouping g
set t.projectPHIDs = g.phids
where t.phid = g.task_phid;
-- Associate tasks with projects from the task side
insert into edge
select
task_phid, 41, project_phid, 0, 0, NULL
from storyboard.task_project;
-- Relationship
-- If the task phid matches a storyboard task.phid, then it's a
-- subtask, and we should take the storyPHID from that storyboard.task
-- and use it to create the parent/child edges
-- Create Parent Edge:
-- src = story.phid, type = 3, dst = task.phid
-- Create Child Backref:
-- src = task.phid, type = 4, dst = story.phid
insert into edge
select
storyPHID, 3, phid, 0, 0, NULL
from storyboard.task_subtask;
insert into edge
select
phid, 4, storyPHID, 0, 0, NULL
from storyboard.task_subtask;