Add internal method to increase ring partition power

This method increases the partition power of an existing ring by one. It does
not move any data nor does it exposes a CLI command yet; it is only intended to
be used in a future version to do the actual ring modification itself.

An existing object that is currently located on partition X will be placed
either on partition 2*X or 2*X+1 after the partition power got increased. The
reason for this is the Ring.get_part() method, that does a bitwise shift to the
right.

To avoid actual data movement to different disks or even nodes, the allocation
of partitions to nodes needs to be changed. The allocation is pairwise due to
the above mentioned new partition scheme. Therefore devices are allocated like
this, with the partition being the index and the value being the device id:

OLD: 0,    3,    7,    5,    2,    1,    ...
NEW: 0, 0, 3, 3, 7, 7, 5, 5, 2, 2, 1, 1, ...

If an operator stops the cluster, increases the partition power and renames &
hardlinks the existing data it is possible to do a power shift without actually
moving data. Please see the partition power spec for further details on this.

Change-Id: I063fd8077497ee8c14d9065f07b4ec0fb5cbe180
Partially-Implements: spec increasing_partition_power
This commit is contained in:
Christian Schwede 2016-02-15 19:16:08 +00:00 committed by Christian Schwede
parent 5930d74d81
commit 3ff94cb785
2 changed files with 93 additions and 0 deletions

View File

@ -1688,3 +1688,38 @@ class RingBuilder(object):
if matched:
matched_devs.append(dev)
return matched_devs
def increase_partition_power(self):
""" Increases ring partition power by one.
Devices will be assigned to partitions like this:
OLD: 0, 3, 7, 5, 2, 1, ...
NEW: 0, 0, 3, 3, 7, 7, 5, 5, 2, 2, 1, 1, ...
"""
new_replica2part2dev = []
for replica in self._replica2part2dev:
new_replica = array('H')
for device in replica:
new_replica.append(device)
new_replica.append(device) # append device a second time
new_replica2part2dev.append(new_replica)
self._replica2part2dev = new_replica2part2dev
for device in self._iter_devs():
device['parts'] *= 2
# We need to update the time when a partition has been moved the last
# time. Since this is an array of all partitions, we need to double it
# two
new_last_part_moves = []
for partition in self._last_part_moves:
new_last_part_moves.append(partition)
new_last_part_moves.append(partition)
self._last_part_moves = new_last_part_moves
self.part_power += 1
self.parts *= 2
self.version += 1

View File

@ -2424,6 +2424,64 @@ class TestRingBuilder(unittest.TestCase):
except exceptions.DuplicateDeviceError:
self.fail("device hole not reused")
def test_increase_partition_power(self):
rb = ring.RingBuilder(8, 3.0, 1)
self.assertEqual(rb.part_power, 8)
# add more devices than replicas to the ring
for i in range(10):
dev = "sdx%s" % i
rb.add_dev({'id': i, 'region': 0, 'zone': 0, 'weight': 1,
'ip': '127.0.0.1', 'port': 10000, 'device': dev})
rb.rebalance(seed=1)
# Let's save the ring, and get the nodes for an object
ring_file = os.path.join(self.testdir, 'test_partpower.ring.gz')
rd = rb.get_ring()
rd.save(ring_file)
r = ring.Ring(ring_file)
old_part, old_nodes = r.get_nodes("acc", "cont", "obj")
old_version = rb.version
rb.increase_partition_power()
rb.validate()
changed_parts, _balance, removed_devs = rb.rebalance()
self.assertEqual(changed_parts, 0)
self.assertEqual(removed_devs, 0)
rd = rb.get_ring()
rd.save(ring_file)
r = ring.Ring(ring_file)
new_part, new_nodes = r.get_nodes("acc", "cont", "obj")
# sanity checks
self.assertEqual(rb.part_power, 9)
self.assertEqual(rb.version, old_version + 2)
# make sure there is always the same device assigned to every pair of
# partitions
for replica in rb._replica2part2dev:
for part in range(0, len(replica), 2):
dev = replica[part]
next_dev = replica[part + 1]
self.assertEqual(dev, next_dev)
# same for last_part moves
for part in range(0, len(replica), 2):
this_last_moved = rb._last_part_moves[part]
next_last_moved = rb._last_part_moves[part + 1]
self.assertEqual(this_last_moved, next_last_moved)
# Due to the increased partition power, the partition each object is
# assigned to has changed. If the old partition was X, it will now be
# either located in 2*X or 2*X+1
self.assertTrue(new_part in [old_part * 2, old_part * 2 + 1])
# Importantly, we expect the objects to be placed on the same nodes
# after increasing the partition power
self.assertEqual(old_nodes, new_nodes)
class TestGetRequiredOverload(unittest.TestCase):