From c70abba52958a78d21c00a08488d1c2a7c3df554 Mon Sep 17 00:00:00 2001 From: Clay Gerrard Date: Thu, 17 Mar 2016 11:55:14 -0700 Subject: [PATCH] Adjust replica count before pulling parts from failed devices When your device count falls below your replica count you can either add devices or reduce the replica count. Trying to reduce your replica count fails about half the time because removing parts from from failed devices temporarily invalidates your _replica2part2dev table with NONE_DEV which can result in an IndexError in _adjust_replica2part2dev_size. If you adjust the replica count first you won't have to worry about tracking unassigned parts from failed devices. Closes-Bug: #1558751 Change-Id: I99dc776fd260a2ba68ca77d7b5ed5120d10b06de --- swift/common/ring/builder.py | 4 ++-- test/unit/common/ring/test_builder.py | 15 +++++++++++++++ 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/swift/common/ring/builder.py b/swift/common/ring/builder.py index 0459cd60de..ee25ad7caa 100644 --- a/swift/common/ring/builder.py +++ b/swift/common/ring/builder.py @@ -443,10 +443,10 @@ class RingBuilder(object): self._set_parts_wanted(replica_plan) assign_parts = defaultdict(list) - # gather parts from failed devices - removed_devs = self._gather_parts_from_failed_devices(assign_parts) # gather parts from replica count adjustment self._adjust_replica2part2dev_size(assign_parts) + # gather parts from failed devices + removed_devs = self._gather_parts_from_failed_devices(assign_parts) # gather parts for dispersion (N.B. this only picks up parts that # *must* disperse according to the replica plan) self._gather_parts_for_dispersion(assign_parts, replica_plan) diff --git a/test/unit/common/ring/test_builder.py b/test/unit/common/ring/test_builder.py index 454c6a130a..c8c5023a30 100644 --- a/test/unit/common/ring/test_builder.py +++ b/test/unit/common/ring/test_builder.py @@ -1385,6 +1385,21 @@ class TestRingBuilder(unittest.TestCase): rb.rebalance() # this would crash since parts_wanted was not set rb.validate() + def test_reduce_replicas_after_remove_device(self): + rb = ring.RingBuilder(8, 3, 1) + rb.add_dev({'id': 0, 'region': 0, 'zone': 0, 'weight': 3, + 'ip': '127.0.0.1', 'port': 10000, 'device': 'sda'}) + rb.add_dev({'id': 1, 'region': 0, 'zone': 0, 'weight': 3, + 'ip': '127.0.0.1', 'port': 10000, 'device': 'sda'}) + rb.add_dev({'id': 2, 'region': 0, 'zone': 0, 'weight': 3, + 'ip': '127.0.0.1', 'port': 10000, 'device': 'sda'}) + rb.rebalance() + rb.remove_dev(0) + self.assertRaises(exceptions.RingValidationError, rb.rebalance) + rb.set_replicas(2) + rb.rebalance() + rb.validate() + def test_rebalance_post_upgrade(self): rb = ring.RingBuilder(8, 3, 1) # 5 devices: 5 is the smallest number that does not divide 3 * 2^8,