Merge "relinker: Track part_power/next_part_power in state file"
This commit is contained in:
commit
aade63b217
@ -50,13 +50,18 @@ def hook_pre_device(locks, states, datadir, device_path):
|
||||
locks[0] = fd
|
||||
|
||||
state_file = os.path.join(device_path, STATE_FILE.format(datadir=datadir))
|
||||
states.clear()
|
||||
states["state"].clear()
|
||||
try:
|
||||
with open(state_file, 'rt') as f:
|
||||
tmp = json.load(f)
|
||||
states.update(tmp)
|
||||
except ValueError:
|
||||
# Invalid JSON: remove the file to restart from scratch
|
||||
state_from_disk = json.load(f)
|
||||
if state_from_disk["next_part_power"] != states["next_part_power"]:
|
||||
raise ValueError
|
||||
if state_from_disk["part_power"] != states["part_power"]:
|
||||
states["prev_part_power"] = state_from_disk["part_power"]
|
||||
raise ValueError
|
||||
states["state"].update(state_from_disk["state"])
|
||||
except (ValueError, TypeError, KeyError):
|
||||
# Bad state file: remove the file to restart from scratch
|
||||
os.unlink(state_file)
|
||||
except IOError as err:
|
||||
# Ignore file not found error
|
||||
@ -69,33 +74,39 @@ def hook_post_device(locks, _):
|
||||
locks[0] = None
|
||||
|
||||
|
||||
def partitions_filter(states, step, part_power, next_part_power,
|
||||
def partitions_filter(states, part_power, next_part_power,
|
||||
datadir_path, partitions):
|
||||
# Remove all non partitions first (eg: auditor_status_ALL.json)
|
||||
partitions = [p for p in partitions if p.isdigit()]
|
||||
|
||||
if not (step == STEP_CLEANUP and part_power == next_part_power):
|
||||
# This is not a cleanup after cancel, partitions in the upper half are
|
||||
# new partitions, there is nothing to relink/cleanup from there
|
||||
partitions = [p for p in partitions
|
||||
if int(p) < 2 ** next_part_power / 2]
|
||||
relinking = (part_power != next_part_power)
|
||||
if relinking:
|
||||
# All partitions in the upper half are new partitions and there is
|
||||
# nothing to relink there
|
||||
partitions = [part for part in partitions
|
||||
if int(part) < 2 ** part_power]
|
||||
elif "prev_part_power" in states:
|
||||
# All partitions in the upper half are new partitions and there is
|
||||
# nothing to clean up there
|
||||
partitions = [part for part in partitions
|
||||
if int(part) < 2 ** states["prev_part_power"]]
|
||||
|
||||
# Format: { 'part': [relinked, cleaned] }
|
||||
if states:
|
||||
missing = list(set(partitions) - set(states.keys()))
|
||||
# Format: { 'part': processed }
|
||||
if states["state"]:
|
||||
missing = list(set(partitions) - set(states["state"].keys()))
|
||||
if missing:
|
||||
# All missing partitions was created after the first run of
|
||||
# relink, so after the new ring was distribued, so they already
|
||||
# are hardlinked in both partitions, but they will need to
|
||||
# cleaned.. Just update the state file.
|
||||
# All missing partitions were created after the first run of the
|
||||
# relinker with this part_power/next_part_power pair. This is
|
||||
# expected when relinking, where new partitions appear that are
|
||||
# appropriate for the target part power. In such cases, there's
|
||||
# nothing to be done. Err on the side of caution during cleanup,
|
||||
# however.
|
||||
for part in missing:
|
||||
states[part] = [True, False]
|
||||
if step == STEP_RELINK:
|
||||
partitions = [str(p) for p, (r, c) in states.items() if not r]
|
||||
elif step == STEP_CLEANUP:
|
||||
partitions = [str(p) for p, (r, c) in states.items() if not c]
|
||||
states["state"][part] = relinking
|
||||
partitions = [str(part) for part, processed in states["state"].items()
|
||||
if not processed]
|
||||
else:
|
||||
states.update({str(p): [False, False] for p in partitions})
|
||||
states["state"].update({str(part): False for part in partitions})
|
||||
|
||||
# Always scan the partitions in reverse order to minimize the amount of IO
|
||||
# (it actually only matters for relink, not for cleanup).
|
||||
@ -107,7 +118,7 @@ def partitions_filter(states, step, part_power, next_part_power,
|
||||
# If the relinker then scan partition 1, it will listdir that object while
|
||||
# it's unnecessary. By working in reverse order of partitions, this is
|
||||
# avoided.
|
||||
partitions = sorted(partitions, key=lambda x: int(x), reverse=True)
|
||||
partitions = sorted(partitions, key=lambda part: int(part), reverse=True)
|
||||
|
||||
return partitions
|
||||
|
||||
@ -124,10 +135,8 @@ def hook_post_partition(states, step,
|
||||
state_file = os.path.join(device_path,
|
||||
STATE_FILE.format(datadir=datadir_name))
|
||||
|
||||
if step == STEP_RELINK:
|
||||
states[part][0] = True
|
||||
elif step == STEP_CLEANUP:
|
||||
states[part][1] = True
|
||||
if step in (STEP_RELINK, STEP_CLEANUP):
|
||||
states["state"][part] = True
|
||||
with open(state_tmp_file, 'wt') as f:
|
||||
json.dump(states, f)
|
||||
os.fsync(f.fileno())
|
||||
@ -164,14 +173,17 @@ def relink(swift_dir='/etc/swift',
|
||||
datadir = diskfile.get_data_dir(policy)
|
||||
|
||||
locks = [None]
|
||||
states = {}
|
||||
states = {
|
||||
"part_power": part_power,
|
||||
"next_part_power": next_part_power,
|
||||
"state": {},
|
||||
}
|
||||
relink_devices_filter = partial(devices_filter, device)
|
||||
relink_hook_pre_device = partial(hook_pre_device, locks, states,
|
||||
datadir)
|
||||
relink_hook_post_device = partial(hook_post_device, locks)
|
||||
relink_partition_filter = partial(partitions_filter,
|
||||
states, STEP_RELINK,
|
||||
part_power, next_part_power)
|
||||
states, part_power, next_part_power)
|
||||
relink_hook_post_partition = partial(hook_post_partition,
|
||||
states, STEP_RELINK)
|
||||
relink_hashes_filter = partial(hashes_filter, next_part_power)
|
||||
@ -228,14 +240,17 @@ def cleanup(swift_dir='/etc/swift',
|
||||
datadir = diskfile.get_data_dir(policy)
|
||||
|
||||
locks = [None]
|
||||
states = {}
|
||||
states = {
|
||||
"part_power": part_power,
|
||||
"next_part_power": next_part_power,
|
||||
"state": {},
|
||||
}
|
||||
cleanup_devices_filter = partial(devices_filter, device)
|
||||
cleanup_hook_pre_device = partial(hook_pre_device, locks, states,
|
||||
datadir)
|
||||
cleanup_hook_post_device = partial(hook_post_device, locks)
|
||||
cleanup_partition_filter = partial(partitions_filter,
|
||||
states, STEP_CLEANUP,
|
||||
part_power, next_part_power)
|
||||
states, part_power, next_part_power)
|
||||
cleanup_hook_post_partition = partial(hook_post_partition,
|
||||
states, STEP_CLEANUP)
|
||||
cleanup_hashes_filter = partial(hashes_filter, next_part_power)
|
||||
|
@ -178,16 +178,30 @@ class TestRelinker(unittest.TestCase):
|
||||
self._save_ring()
|
||||
relinker.relink(self.testdir, self.devices, True)
|
||||
with open(state_file, 'rt') as f:
|
||||
self.assertEqual(json.load(f), {str(self.part): [True, False]})
|
||||
orig_inode = os.stat(state_file).st_ino
|
||||
self.assertEqual(json.load(f), {
|
||||
"part_power": PART_POWER,
|
||||
"next_part_power": PART_POWER + 1,
|
||||
"state": {str(self.part): True}})
|
||||
|
||||
self.rb.increase_partition_power()
|
||||
self.rb._ring = None # Force builder to reload ring
|
||||
self._save_ring()
|
||||
relinker.cleanup(self.testdir, self.devices, True)
|
||||
with open(state_file, 'rt') as f:
|
||||
self.assertEqual(json.load(f),
|
||||
{str(self.part): [True, True],
|
||||
str(self.next_part): [True, True]})
|
||||
# Keep the state file open during cleanup so the inode can't be
|
||||
# released/re-used when it gets unlinked
|
||||
self.assertEqual(orig_inode, os.stat(state_file).st_ino)
|
||||
relinker.cleanup(self.testdir, self.devices, True)
|
||||
self.assertNotEqual(orig_inode, os.stat(state_file).st_ino)
|
||||
with open(state_file, 'rt') as f:
|
||||
# NB: part_power/next_part_power tuple changed, so state was reset
|
||||
# (though we track prev_part_power for an efficient clean up)
|
||||
self.assertEqual(json.load(f), {
|
||||
"prev_part_power": PART_POWER,
|
||||
"part_power": PART_POWER + 1,
|
||||
"next_part_power": PART_POWER + 1,
|
||||
"state": {str(self.part): True,
|
||||
str(self.next_part): True}})
|
||||
|
||||
def test_devices_filter_filtering(self):
|
||||
# With no filtering, returns all devices
|
||||
@ -210,7 +224,8 @@ class TestRelinker(unittest.TestCase):
|
||||
lock_file = os.path.join(device_path, '.relink.%s.lock' % datadir)
|
||||
|
||||
# The first run gets the lock
|
||||
relinker.hook_pre_device(locks, {}, datadir, device_path)
|
||||
states = {"state": {}}
|
||||
relinker.hook_pre_device(locks, states, datadir, device_path)
|
||||
self.assertNotEqual([None], locks)
|
||||
|
||||
# A following run would block
|
||||
@ -232,20 +247,21 @@ class TestRelinker(unittest.TestCase):
|
||||
datadir_path = os.path.join(device_path, datadir)
|
||||
state_file = os.path.join(device_path, 'relink.%s.json' % datadir)
|
||||
|
||||
def call_partition_filter(step, parts):
|
||||
def call_partition_filter(part_power, next_part_power, parts):
|
||||
# Partition 312 will be ignored because it must have been created
|
||||
# by the relinker
|
||||
return relinker.partitions_filter(states, step,
|
||||
PART_POWER, PART_POWER + 1,
|
||||
return relinker.partitions_filter(states,
|
||||
part_power, next_part_power,
|
||||
datadir_path, parts)
|
||||
|
||||
# Start relinking
|
||||
states = {}
|
||||
states = {"part_power": PART_POWER, "next_part_power": PART_POWER + 1,
|
||||
"state": {}}
|
||||
|
||||
# Load the states: As it starts, it must be empty
|
||||
locks = [None]
|
||||
relinker.hook_pre_device(locks, states, datadir, device_path)
|
||||
self.assertEqual({}, states)
|
||||
self.assertEqual({}, states["state"])
|
||||
os.close(locks[0]) # Release the lock
|
||||
|
||||
# Partition 312 is ignored because it must have been created with the
|
||||
@ -253,80 +269,134 @@ class TestRelinker(unittest.TestCase):
|
||||
# 96 and 227 are reverse ordered
|
||||
# auditor_status_ALL.json is ignored because it's not a partition
|
||||
self.assertEqual(['227', '96'],
|
||||
call_partition_filter(relinker.STEP_RELINK,
|
||||
call_partition_filter(PART_POWER, PART_POWER + 1,
|
||||
['96', '227', '312',
|
||||
'auditor_status.json']))
|
||||
self.assertEqual(states, {'96': [False, False], '227': [False, False]})
|
||||
self.assertEqual(states["state"], {'96': False, '227': False})
|
||||
|
||||
# Ack partition 96
|
||||
relinker.hook_post_partition(states, relinker.STEP_RELINK,
|
||||
os.path.join(datadir_path, '96'))
|
||||
self.assertEqual(states, {'96': [True, False], '227': [False, False]})
|
||||
self.assertEqual(states["state"], {'96': True, '227': False})
|
||||
with open(state_file, 'rt') as f:
|
||||
self.assertEqual(json.load(f), {'96': [True, False],
|
||||
'227': [False, False]})
|
||||
self.assertEqual(json.load(f), {
|
||||
"part_power": PART_POWER,
|
||||
"next_part_power": PART_POWER + 1,
|
||||
"state": {'96': True, '227': False}})
|
||||
|
||||
# Restart relinking after only part 96 was done
|
||||
self.assertEqual(['227'],
|
||||
call_partition_filter(relinker.STEP_RELINK,
|
||||
call_partition_filter(PART_POWER, PART_POWER + 1,
|
||||
['96', '227', '312']))
|
||||
self.assertEqual(states, {'96': [True, False], '227': [False, False]})
|
||||
self.assertEqual(states["state"], {'96': True, '227': False})
|
||||
|
||||
# Ack partition 227
|
||||
relinker.hook_post_partition(states, relinker.STEP_RELINK,
|
||||
os.path.join(datadir_path, '227'))
|
||||
self.assertEqual(states, {'96': [True, False], '227': [True, False]})
|
||||
self.assertEqual(states["state"], {'96': True, '227': True})
|
||||
with open(state_file, 'rt') as f:
|
||||
self.assertEqual(json.load(f), {'96': [True, False],
|
||||
'227': [True, False]})
|
||||
self.assertEqual(json.load(f), {
|
||||
"part_power": PART_POWER,
|
||||
"next_part_power": PART_POWER + 1,
|
||||
"state": {'96': True, '227': True}})
|
||||
|
||||
# If the process restarts, it reload the state
|
||||
locks = [None]
|
||||
states = {}
|
||||
states = {
|
||||
"part_power": PART_POWER,
|
||||
"next_part_power": PART_POWER + 1,
|
||||
"state": {},
|
||||
}
|
||||
relinker.hook_pre_device(locks, states, datadir, device_path)
|
||||
self.assertEqual(states, {'96': [True, False], '227': [True, False]})
|
||||
self.assertEqual(states, {
|
||||
"part_power": PART_POWER,
|
||||
"next_part_power": PART_POWER + 1,
|
||||
"state": {'96': True, '227': True}})
|
||||
os.close(locks[0]) # Release the lock
|
||||
|
||||
# Start cleanup -- note that part_power and next_part_power now match!
|
||||
states = {
|
||||
"part_power": PART_POWER + 1,
|
||||
"next_part_power": PART_POWER + 1,
|
||||
"state": {},
|
||||
}
|
||||
# ...which means our state file was ignored
|
||||
relinker.hook_pre_device(locks, states, datadir, device_path)
|
||||
self.assertEqual(states, {
|
||||
"prev_part_power": PART_POWER,
|
||||
"part_power": PART_POWER + 1,
|
||||
"next_part_power": PART_POWER + 1,
|
||||
"state": {}})
|
||||
os.close(locks[0]) # Release the lock
|
||||
|
||||
# Start cleanup
|
||||
self.assertEqual(['227', '96'],
|
||||
call_partition_filter(relinker.STEP_CLEANUP,
|
||||
call_partition_filter(PART_POWER + 1, PART_POWER + 1,
|
||||
['96', '227', '312']))
|
||||
# Ack partition 227
|
||||
relinker.hook_post_partition(states, relinker.STEP_CLEANUP,
|
||||
os.path.join(datadir_path, '227'))
|
||||
self.assertEqual(states, {'96': [True, False], '227': [True, True]})
|
||||
self.assertEqual(states["state"],
|
||||
{'96': False, '227': True})
|
||||
with open(state_file, 'rt') as f:
|
||||
self.assertEqual(json.load(f), {'96': [True, False],
|
||||
'227': [True, True]})
|
||||
self.assertEqual(json.load(f), {
|
||||
"prev_part_power": PART_POWER,
|
||||
"part_power": PART_POWER + 1,
|
||||
"next_part_power": PART_POWER + 1,
|
||||
"state": {'96': False, '227': True}})
|
||||
|
||||
# Restart cleanup after only part 227 was done
|
||||
self.assertEqual(['96'],
|
||||
call_partition_filter(relinker.STEP_CLEANUP,
|
||||
call_partition_filter(PART_POWER + 1, PART_POWER + 1,
|
||||
['96', '227', '312']))
|
||||
self.assertEqual(states, {'96': [True, False], '227': [True, True]})
|
||||
self.assertEqual(states["state"],
|
||||
{'96': False, '227': True})
|
||||
|
||||
# Ack partition 96
|
||||
relinker.hook_post_partition(states, relinker.STEP_CLEANUP,
|
||||
os.path.join(datadir_path, '96'))
|
||||
self.assertEqual(states, {'96': [True, True], '227': [True, True]})
|
||||
self.assertEqual(states["state"],
|
||||
{'96': True, '227': True})
|
||||
with open(state_file, 'rt') as f:
|
||||
self.assertEqual(json.load(f), {'96': [True, True],
|
||||
'227': [True, True]})
|
||||
self.assertEqual(json.load(f), {
|
||||
"prev_part_power": PART_POWER,
|
||||
"part_power": PART_POWER + 1,
|
||||
"next_part_power": PART_POWER + 1,
|
||||
"state": {'96': True, '227': True}})
|
||||
|
||||
# At the end, the state is still accurate
|
||||
locks = [None]
|
||||
states = {}
|
||||
states = {
|
||||
"prev_part_power": PART_POWER,
|
||||
"part_power": PART_POWER + 1,
|
||||
"next_part_power": PART_POWER + 1,
|
||||
"state": {},
|
||||
}
|
||||
relinker.hook_pre_device(locks, states, datadir, device_path)
|
||||
self.assertEqual(states, {'96': [True, True], '227': [True, True]})
|
||||
self.assertEqual(states["state"],
|
||||
{'96': True, '227': True})
|
||||
os.close(locks[0]) # Release the lock
|
||||
|
||||
# If the part_power/next_part_power tuple differs, restart from scratch
|
||||
locks = [None]
|
||||
states = {
|
||||
"part_power": PART_POWER + 1,
|
||||
"next_part_power": PART_POWER + 2,
|
||||
"state": {},
|
||||
}
|
||||
relinker.hook_pre_device(locks, states, datadir, device_path)
|
||||
self.assertEqual(states["state"], {})
|
||||
self.assertFalse(os.path.exists(state_file))
|
||||
os.close(locks[0]) # Release the lock
|
||||
|
||||
# If the file gets corrupted, restart from scratch
|
||||
with open(state_file, 'wt') as f:
|
||||
f.write('NOT JSON')
|
||||
locks = [None]
|
||||
states = {}
|
||||
states = {"part_power": PART_POWER, "next_part_power": PART_POWER + 1,
|
||||
"state": {}}
|
||||
relinker.hook_pre_device(locks, states, datadir, device_path)
|
||||
self.assertEqual(states, {})
|
||||
self.assertEqual(states["state"], {})
|
||||
self.assertFalse(os.path.exists(state_file))
|
||||
os.close(locks[0]) # Release the lock
|
||||
|
||||
def test_cleanup_not_yet_relinked(self):
|
||||
|
Loading…
x
Reference in New Issue
Block a user