Add internal method to increase ring partition power
This method increases the partition power of an existing ring by one. It does not move any data nor does it exposes a CLI command yet; it is only intended to be used in a future version to do the actual ring modification itself. An existing object that is currently located on partition X will be placed either on partition 2*X or 2*X+1 after the partition power got increased. The reason for this is the Ring.get_part() method, that does a bitwise shift to the right. To avoid actual data movement to different disks or even nodes, the allocation of partitions to nodes needs to be changed. The allocation is pairwise due to the above mentioned new partition scheme. Therefore devices are allocated like this, with the partition being the index and the value being the device id: OLD: 0, 3, 7, 5, 2, 1, ... NEW: 0, 0, 3, 3, 7, 7, 5, 5, 2, 2, 1, 1, ... If an operator stops the cluster, increases the partition power and renames & hardlinks the existing data it is possible to do a power shift without actually moving data. Please see the partition power spec for further details on this. Change-Id: I063fd8077497ee8c14d9065f07b4ec0fb5cbe180 Partially-Implements: spec increasing_partition_power
This commit is contained in:
parent
5930d74d81
commit
3ff94cb785
@ -1688,3 +1688,38 @@ class RingBuilder(object):
|
||||
if matched:
|
||||
matched_devs.append(dev)
|
||||
return matched_devs
|
||||
|
||||
def increase_partition_power(self):
|
||||
""" Increases ring partition power by one.
|
||||
|
||||
Devices will be assigned to partitions like this:
|
||||
|
||||
OLD: 0, 3, 7, 5, 2, 1, ...
|
||||
NEW: 0, 0, 3, 3, 7, 7, 5, 5, 2, 2, 1, 1, ...
|
||||
|
||||
"""
|
||||
|
||||
new_replica2part2dev = []
|
||||
for replica in self._replica2part2dev:
|
||||
new_replica = array('H')
|
||||
for device in replica:
|
||||
new_replica.append(device)
|
||||
new_replica.append(device) # append device a second time
|
||||
new_replica2part2dev.append(new_replica)
|
||||
self._replica2part2dev = new_replica2part2dev
|
||||
|
||||
for device in self._iter_devs():
|
||||
device['parts'] *= 2
|
||||
|
||||
# We need to update the time when a partition has been moved the last
|
||||
# time. Since this is an array of all partitions, we need to double it
|
||||
# two
|
||||
new_last_part_moves = []
|
||||
for partition in self._last_part_moves:
|
||||
new_last_part_moves.append(partition)
|
||||
new_last_part_moves.append(partition)
|
||||
self._last_part_moves = new_last_part_moves
|
||||
|
||||
self.part_power += 1
|
||||
self.parts *= 2
|
||||
self.version += 1
|
||||
|
@ -2424,6 +2424,64 @@ class TestRingBuilder(unittest.TestCase):
|
||||
except exceptions.DuplicateDeviceError:
|
||||
self.fail("device hole not reused")
|
||||
|
||||
def test_increase_partition_power(self):
|
||||
rb = ring.RingBuilder(8, 3.0, 1)
|
||||
self.assertEqual(rb.part_power, 8)
|
||||
|
||||
# add more devices than replicas to the ring
|
||||
for i in range(10):
|
||||
dev = "sdx%s" % i
|
||||
rb.add_dev({'id': i, 'region': 0, 'zone': 0, 'weight': 1,
|
||||
'ip': '127.0.0.1', 'port': 10000, 'device': dev})
|
||||
rb.rebalance(seed=1)
|
||||
|
||||
# Let's save the ring, and get the nodes for an object
|
||||
ring_file = os.path.join(self.testdir, 'test_partpower.ring.gz')
|
||||
rd = rb.get_ring()
|
||||
rd.save(ring_file)
|
||||
r = ring.Ring(ring_file)
|
||||
old_part, old_nodes = r.get_nodes("acc", "cont", "obj")
|
||||
old_version = rb.version
|
||||
|
||||
rb.increase_partition_power()
|
||||
rb.validate()
|
||||
changed_parts, _balance, removed_devs = rb.rebalance()
|
||||
|
||||
self.assertEqual(changed_parts, 0)
|
||||
self.assertEqual(removed_devs, 0)
|
||||
|
||||
rd = rb.get_ring()
|
||||
rd.save(ring_file)
|
||||
r = ring.Ring(ring_file)
|
||||
new_part, new_nodes = r.get_nodes("acc", "cont", "obj")
|
||||
|
||||
# sanity checks
|
||||
self.assertEqual(rb.part_power, 9)
|
||||
self.assertEqual(rb.version, old_version + 2)
|
||||
|
||||
# make sure there is always the same device assigned to every pair of
|
||||
# partitions
|
||||
for replica in rb._replica2part2dev:
|
||||
for part in range(0, len(replica), 2):
|
||||
dev = replica[part]
|
||||
next_dev = replica[part + 1]
|
||||
self.assertEqual(dev, next_dev)
|
||||
|
||||
# same for last_part moves
|
||||
for part in range(0, len(replica), 2):
|
||||
this_last_moved = rb._last_part_moves[part]
|
||||
next_last_moved = rb._last_part_moves[part + 1]
|
||||
self.assertEqual(this_last_moved, next_last_moved)
|
||||
|
||||
# Due to the increased partition power, the partition each object is
|
||||
# assigned to has changed. If the old partition was X, it will now be
|
||||
# either located in 2*X or 2*X+1
|
||||
self.assertTrue(new_part in [old_part * 2, old_part * 2 + 1])
|
||||
|
||||
# Importantly, we expect the objects to be placed on the same nodes
|
||||
# after increasing the partition power
|
||||
self.assertEqual(old_nodes, new_nodes)
|
||||
|
||||
|
||||
class TestGetRequiredOverload(unittest.TestCase):
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user