diff --git a/swift/cli/ringbuilder.py b/swift/cli/ringbuilder.py index a6860c98de..62ff6e9c4b 100755 --- a/swift/cli/ringbuilder.py +++ b/swift/cli/ringbuilder.py @@ -784,6 +784,7 @@ swift-ring-builder rebalance [options] if options.debug: logger = logging.getLogger("swift.ring.builder") + logger.disabled = False logger.setLevel(logging.DEBUG) handler = logging.StreamHandler(stdout) formatter = logging.Formatter("%(levelname)s: %(message)s") diff --git a/swift/common/ring/builder.py b/swift/common/ring/builder.py index 2feff95d3e..a43887173f 100644 --- a/swift/common/ring/builder.py +++ b/swift/common/ring/builder.py @@ -22,6 +22,8 @@ import math import random import six.moves.cPickle as pickle from copy import deepcopy +from contextlib import contextmanager +import warnings from array import array from collections import defaultdict @@ -36,6 +38,10 @@ from swift.common.ring.utils import tiers_for_dev, build_tier_tree, \ MAX_BALANCE = 999.99 +class RingValidationWarning(Warning): + pass + + try: # python 2.7+ from logging import NullHandler @@ -112,9 +118,24 @@ class RingBuilder(object): self.logger = logging.getLogger("swift.ring.builder") if not self.logger.handlers: + self.logger.disabled = True # silence "no handler for X" error messages self.logger.addHandler(NullHandler()) + @contextmanager + def debug(self): + """ + Temporarily enables debug logging, useful in tests, e.g. + + with rb.debug(): + rb.rebalance() + """ + self.logger.disabled = False + try: + yield + finally: + self.logger.disabled = True + def weight_of_one_part(self): """ Returns the weight of each partition as calculated from the @@ -376,13 +397,24 @@ class RingBuilder(object): :returns: (number_of_partitions_altered, resulting_balance) """ + num_devices = len([d for d in self._iter_devs() if d['weight'] > 0]) + if num_devices < self.replicas: + warnings.warn(RingValidationWarning( + "Replica count of %(replicas)s requires more " + "than %(num_devices)s devices" % { + 'replicas': self.replicas, + 'num_devices': num_devices, + })) old_replica2part2dev = copy.deepcopy(self._replica2part2dev) if seed is not None: random.seed(seed) - self._effective_overload = min(self.overload, - self.get_required_overload()) + self._effective_overload = self.overload + if self.overload and self.dispersion <= 0: + # iff we're fully dispersed we want to bring in overload + self._effective_overload = min(self.overload, + self.get_required_overload()) self.logger.debug("Using effective overload of %f", self._effective_overload) @@ -547,20 +579,44 @@ class RingBuilder(object): for dev_id in part2dev: dev_usage[dev_id] += 1 - for part, replica in self._each_part_replica(): - dev_id = self._replica2part2dev[replica][part] - if dev_id >= dev_len or not self.devs[dev_id]: - raise exceptions.RingValidationError( - "Partition %d, replica %d was not allocated " - "to a device." % - (part, replica)) - for dev in self._iter_devs(): if not isinstance(dev['port'], int): raise exceptions.RingValidationError( "Device %d has port %r, which is not an integer." % (dev['id'], dev['port'])) + int_replicas = int(math.ceil(self.replicas)) + rep2part_len = map(len, self._replica2part2dev) + # check the assignments of each part's replicas + for part in range(self.parts): + devs_for_part = [] + for replica, part_len in enumerate(rep2part_len): + if part_len <= part: + # last replica may be short on parts because of floating + # replica count + if replica + 1 < int_replicas: + raise exceptions.RingValidationError( + "The partition assignments of replica %r were " + "shorter than expected (%s < %s) - this should " + "only happen for the last replica" % ( + replica, + len(self._replica2part2dev[replica]), + self.parts, + )) + break + dev_id = self._replica2part2dev[replica][part] + if dev_id >= dev_len or not self.devs[dev_id]: + raise exceptions.RingValidationError( + "Partition %d, replica %d was not allocated " + "to a device." % + (part, replica)) + devs_for_part.append(dev_id) + if len(devs_for_part) != len(set(devs_for_part)): + warnings.warn(RingValidationWarning( + "The partition %s has been assigned to " + "duplicate devices %r" % ( + part, devs_for_part))) + if stats: weight_of_one_part = self.weight_of_one_part() worst = 0 @@ -1292,9 +1348,26 @@ class RingBuilder(object): if (roomiest_tier is None or (other_replicas[roomiest_tier] > other_replicas[fudgiest_tier])): - tier = fudgiest_tier + subtier = fudgiest_tier else: - tier = roomiest_tier + subtier = roomiest_tier + # no putting multiples on the same device + if len(subtier) == 4 and ( + subtier in occupied_tiers_by_tier_len[4]): + sibling_tiers = [ + (d['region'], d['zone'], d['ip'], d['id']) + for d in tier2devs[tier]] + unused_sibling_tiers = [ + t for t in sibling_tiers + if t not in occupied_tiers_by_tier_len[4]] + if unused_sibling_tiers: + # anything is better than the alternative + subtier = random.choice(unused_sibling_tiers) + else: + warnings.warn(RingValidationWarning( + "All devices in tier %r already " + "contain a replica" % (tier,))) + tier = subtier depth += 1 dev = tier2devs[tier][-1] diff --git a/test/unit/cli/test_ring_builder_analyzer.py b/test/unit/cli/test_ring_builder_analyzer.py index f69bfcef1b..db23fec30a 100644 --- a/test/unit/cli/test_ring_builder_analyzer.py +++ b/test/unit/cli/test_ring_builder_analyzer.py @@ -31,7 +31,9 @@ class TestRunScenario(unittest.TestCase): scenario = { 'replicas': 3, 'part_power': 8, 'random_seed': 123, 'overload': 0, 'rounds': [[['add', 'r1z2-3.4.5.6:7/sda8', 100], - ['add', 'z2-3.4.5.6:7/sda9', 200]], + ['add', 'z2-3.4.5.6:7/sda9', 200], + ['add', 'z2-3.4.5.6:7/sda10', 200], + ['add', 'z2-3.4.5.6:7/sda11', 200]], [['set_weight', 0, 150]], [['remove', 1]], [['save', builder_path]]]} diff --git a/test/unit/cli/test_ringbuilder.py b/test/unit/cli/test_ringbuilder.py index fac1391cc3..e1fd35ca0e 100644 --- a/test/unit/cli/test_ringbuilder.py +++ b/test/unit/cli/test_ringbuilder.py @@ -72,12 +72,13 @@ class TestCommands(unittest.TestCase, RunSwiftRingBuilderMixin): except OSError: pass - def create_sample_ring(self): - """ Create a sample ring with two devices + def create_sample_ring(self, part_power=6): + """ Create a sample ring with four devices - At least two devices are needed to test removing - a device, since removing the last device of a ring - is not allowed """ + At least four devices are needed to test removing + a device, since having less devices than replicas + is not allowed. + """ # Ensure there is no existing test builder file because # create_sample_ring() might be used more than once in a single test @@ -86,7 +87,7 @@ class TestCommands(unittest.TestCase, RunSwiftRingBuilderMixin): except OSError: pass - ring = RingBuilder(6, 3, 1) + ring = RingBuilder(part_power, 3, 1) ring.add_dev({'weight': 100.0, 'region': 0, 'zone': 0, @@ -102,6 +103,20 @@ class TestCommands(unittest.TestCase, RunSwiftRingBuilderMixin): 'port': 6001, 'device': 'sda2' }) + ring.add_dev({'weight': 100.0, + 'region': 2, + 'zone': 2, + 'ip': '127.0.0.3', + 'port': 6002, + 'device': 'sdc3' + }) + ring.add_dev({'weight': 100.0, + 'region': 3, + 'zone': 3, + 'ip': '127.0.0.4', + 'port': 6003, + 'device': 'sdd4' + }) ring.save(self.tmpfile) def test_parse_search_values_old_format(self): @@ -153,13 +168,19 @@ class TestCommands(unittest.TestCase, RunSwiftRingBuilderMixin): rb = RingBuilder(8, 3, 0) rb.add_dev({'id': 0, 'region': 1, 'zone': 0, 'weight': 100, 'ip': '127.0.0.1', 'port': 10000, 'device': 'sda1'}) + rb.add_dev({'id': 3, 'region': 1, 'zone': 0, 'weight': 100, + 'ip': '127.0.0.1', 'port': 10000, 'device': 'sdb1'}) rb.add_dev({'id': 1, 'region': 1, 'zone': 1, 'weight': 100, 'ip': '127.0.0.1', 'port': 10001, 'device': 'sda1'}) + rb.add_dev({'id': 4, 'region': 1, 'zone': 1, 'weight': 100, + 'ip': '127.0.0.1', 'port': 10001, 'device': 'sdb1'}) rb.add_dev({'id': 2, 'region': 1, 'zone': 2, 'weight': 100, 'ip': '127.0.0.1', 'port': 10002, 'device': 'sda1'}) + rb.add_dev({'id': 5, 'region': 1, 'zone': 2, 'weight': 100, + 'ip': '127.0.0.1', 'port': 10002, 'device': 'sdb1'}) rb.rebalance() - rb.add_dev({'id': 3, 'region': 2, 'zone': 1, 'weight': 10, + rb.add_dev({'id': 6, 'region': 2, 'zone': 1, 'weight': 10, 'ip': '127.0.0.1', 'port': 10004, 'device': 'sda1'}) rb.pretend_min_part_hours_passed() rb.rebalance() @@ -270,7 +291,7 @@ class TestCommands(unittest.TestCase, RunSwiftRingBuilderMixin): # Check that device was created with given data ring = RingBuilder.load(self.tmpfile) - dev = [d for d in ring.devs if d['id'] == 2][0] + dev = ring.devs[-1] self.assertEqual(dev['region'], 2) self.assertEqual(dev['zone'], 3) self.assertEqual(dev['ip'], '127.0.0.1') @@ -293,7 +314,7 @@ class TestCommands(unittest.TestCase, RunSwiftRingBuilderMixin): # Check that device was created with given data ring = RingBuilder.load(self.tmpfile) - dev = [d for d in ring.devs if d['id'] == 2][0] + dev = ring.devs[-1] self.assertEqual(dev['region'], 2) self.assertEqual(dev['zone'], 3) self.assertEqual(dev['ip'], '2001:0:1234::c1c0:abcd:876') @@ -323,7 +344,7 @@ class TestCommands(unittest.TestCase, RunSwiftRingBuilderMixin): # Check that device was created with given data ring = RingBuilder.load(self.tmpfile) - dev = [d for d in ring.devs if d['id'] == 2][0] + dev = ring.devs[-1] self.assertEqual(dev['region'], 2) self.assertEqual(dev['zone'], 3) self.assertEqual(dev['ip'], '127.0.0.2') @@ -353,7 +374,7 @@ class TestCommands(unittest.TestCase, RunSwiftRingBuilderMixin): # Check that device was created with given data ring = RingBuilder.load(self.tmpfile) - dev = [d for d in ring.devs if d['id'] == 2][0] + dev = ring.devs[-1] self.assertEqual(dev['region'], 2) self.assertEqual(dev['zone'], 3) self.assertEqual(dev['ip'], '3001:0:1234::c1c0:abcd:876') @@ -383,7 +404,7 @@ class TestCommands(unittest.TestCase, RunSwiftRingBuilderMixin): # Check that device was created with given data ring = RingBuilder.load(self.tmpfile) - dev = [d for d in ring.devs if d['id'] == 2][0] + dev = ring.devs[-1] self.assertEqual(dev['region'], 2) self.assertEqual(dev['zone'], 3) self.assertEqual(dev['ip'], 'test.test.com') @@ -426,11 +447,10 @@ class TestCommands(unittest.TestCase, RunSwiftRingBuilderMixin): ring = RingBuilder.load(self.tmpfile) # Check that weight was set to 0 - dev = [d for d in ring.devs if d['id'] == 0][0] + dev = ring.devs[0] self.assertEqual(dev['weight'], 0) # Check that device is in list of devices to be removed - dev = [d for d in ring._remove_devs if d['id'] == 0][0] self.assertEqual(dev['region'], 0) self.assertEqual(dev['zone'], 0) self.assertEqual(dev['ip'], '127.0.0.1') @@ -442,7 +462,7 @@ class TestCommands(unittest.TestCase, RunSwiftRingBuilderMixin): self.assertEqual(dev['meta'], 'some meta data') # Check that second device in ring is not affected - dev = [d for d in ring.devs if d['id'] == 1][0] + dev = ring.devs[1] self.assertEqual(dev['weight'], 100) self.assertFalse([d for d in ring._remove_devs if d['id'] == 1]) @@ -459,11 +479,10 @@ class TestCommands(unittest.TestCase, RunSwiftRingBuilderMixin): ring = RingBuilder.load(self.tmpfile) # Check that weight was set to 0 - dev = [d for d in ring.devs if d['id'] == 0][0] + dev = ring.devs[0] self.assertEqual(dev['weight'], 0) # Check that device is in list of devices to be removed - dev = [d for d in ring._remove_devs if d['id'] == 0][0] self.assertEqual(dev['region'], 0) self.assertEqual(dev['zone'], 0) self.assertEqual(dev['ip'], '127.0.0.1') @@ -475,7 +494,7 @@ class TestCommands(unittest.TestCase, RunSwiftRingBuilderMixin): self.assertEqual(dev['meta'], 'some meta data') # Check that second device in ring is not affected - dev = [d for d in ring.devs if d['id'] == 1][0] + dev = ring.devs[1] self.assertEqual(dev['weight'], 100) self.assertFalse([d for d in ring._remove_devs if d['id'] == 1]) @@ -499,27 +518,26 @@ class TestCommands(unittest.TestCase, RunSwiftRingBuilderMixin): # Test ipv6(old format) argv = ["", self.tmpfile, "remove", - "d2r2z3-[2001:0000:1234:0000:0000:C1C0:ABCD:0876]:6000" + "d4r2z3-[2001:0000:1234:0000:0000:C1C0:ABCD:0876]:6000" "R[2::10]:7000/sda3_some meta data"] self.assertRaises(SystemExit, ringbuilder.main, argv) ring = RingBuilder.load(self.tmpfile) # Check that second device in ring is not affected - dev = [d for d in ring.devs if d['id'] == 0][0] + dev = ring.devs[0] self.assertEqual(dev['weight'], 100) self.assertFalse([d for d in ring._remove_devs if d['id'] == 0]) # Check that second device in ring is not affected - dev = [d for d in ring.devs if d['id'] == 1][0] + dev = ring.devs[1] self.assertEqual(dev['weight'], 100) self.assertFalse([d for d in ring._remove_devs if d['id'] == 1]) # Check that weight was set to 0 - dev = [d for d in ring.devs if d['id'] == 2][0] + dev = ring.devs[-1] self.assertEqual(dev['weight'], 0) # Check that device is in list of devices to be removed - dev = [d for d in ring._remove_devs if d['id'] == 2][0] self.assertEqual(dev['region'], 2) self.assertEqual(dev['zone'], 3) self.assertEqual(dev['ip'], '2001:0:1234::c1c0:abcd:876') @@ -549,11 +567,10 @@ class TestCommands(unittest.TestCase, RunSwiftRingBuilderMixin): ring = RingBuilder.load(self.tmpfile) # Check that weight was set to 0 - dev = [d for d in ring.devs if d['id'] == 0][0] + dev = ring.devs[0] self.assertEqual(dev['weight'], 0) # Check that device is in list of devices to be removed - dev = [d for d in ring._remove_devs if d['id'] == 0][0] self.assertEqual(dev['region'], 0) self.assertEqual(dev['zone'], 0) self.assertEqual(dev['ip'], '127.0.0.1') @@ -565,7 +582,7 @@ class TestCommands(unittest.TestCase, RunSwiftRingBuilderMixin): self.assertEqual(dev['meta'], 'some meta data') # Check that second device in ring is not affected - dev = [d for d in ring.devs if d['id'] == 1][0] + dev = ring.devs[1] self.assertEqual(dev['weight'], 100) self.assertFalse([d for d in ring._remove_devs if d['id'] == 1]) @@ -589,7 +606,7 @@ class TestCommands(unittest.TestCase, RunSwiftRingBuilderMixin): # Test ipv6(new format) argv = \ ["", self.tmpfile, "remove", - "--id", "2", "--region", "2", "--zone", "3", + "--id", "4", "--region", "2", "--zone", "3", "--ip", "[3001:0000:1234:0000:0000:C1C0:ABCD:0876]", "--port", "8000", "--replication-ip", "[3::10]", @@ -599,21 +616,20 @@ class TestCommands(unittest.TestCase, RunSwiftRingBuilderMixin): ring = RingBuilder.load(self.tmpfile) # Check that second device in ring is not affected - dev = [d for d in ring.devs if d['id'] == 0][0] + dev = ring.devs[0] self.assertEqual(dev['weight'], 100) self.assertFalse([d for d in ring._remove_devs if d['id'] == 0]) # Check that second device in ring is not affected - dev = [d for d in ring.devs if d['id'] == 1][0] + dev = ring.devs[1] self.assertEqual(dev['weight'], 100) self.assertFalse([d for d in ring._remove_devs if d['id'] == 1]) # Check that weight was set to 0 - dev = [d for d in ring.devs if d['id'] == 2][0] + dev = ring.devs[-1] self.assertEqual(dev['weight'], 0) # Check that device is in list of devices to be removed - dev = [d for d in ring._remove_devs if d['id'] == 2][0] self.assertEqual(dev['region'], 2) self.assertEqual(dev['zone'], 3) self.assertEqual(dev['ip'], '3001:0:1234::c1c0:abcd:876') @@ -645,7 +661,7 @@ class TestCommands(unittest.TestCase, RunSwiftRingBuilderMixin): # Test domain name argv = \ ["", self.tmpfile, "remove", - "--id", "2", "--region", "2", "--zone", "3", + "--id", "4", "--region", "2", "--zone", "3", "--ip", "test.test.com", "--port", "6000", "--replication-ip", "r.test.com", @@ -655,21 +671,20 @@ class TestCommands(unittest.TestCase, RunSwiftRingBuilderMixin): ring = RingBuilder.load(self.tmpfile) # Check that second device in ring is not affected - dev = [d for d in ring.devs if d['id'] == 0][0] + dev = ring.devs[0] self.assertEqual(dev['weight'], 100) self.assertFalse([d for d in ring._remove_devs if d['id'] == 0]) # Check that second device in ring is not affected - dev = [d for d in ring.devs if d['id'] == 1][0] + dev = ring.devs[1] self.assertEqual(dev['weight'], 100) self.assertFalse([d for d in ring._remove_devs if d['id'] == 1]) # Check that weight was set to 0 - dev = [d for d in ring.devs if d['id'] == 2][0] + dev = ring.devs[-1] self.assertEqual(dev['weight'], 0) # Check that device is in list of devices to be removed - dev = [d for d in ring._remove_devs if d['id'] == 2][0] self.assertEqual(dev['region'], 2) self.assertEqual(dev['zone'], 3) self.assertEqual(dev['ip'], 'test.test.com') @@ -716,11 +731,11 @@ class TestCommands(unittest.TestCase, RunSwiftRingBuilderMixin): ring = RingBuilder.load(self.tmpfile) # Check that weight was changed - dev = [d for d in ring.devs if d['id'] == 0][0] + dev = ring.devs[0] self.assertEqual(dev['weight'], 3.14159265359) # Check that second device in ring is not affected - dev = [d for d in ring.devs if d['id'] == 1][0] + dev = ring.devs[1] self.assertEqual(dev['weight'], 100) # Final check, rebalance and check ring is ok @@ -737,11 +752,11 @@ class TestCommands(unittest.TestCase, RunSwiftRingBuilderMixin): ring = RingBuilder.load(self.tmpfile) # Check that weight was changed - dev = [d for d in ring.devs if d['id'] == 0][0] + dev = ring.devs[0] self.assertEqual(dev['weight'], 3.14159265359) # Check that second device in ring is not affected - dev = [d for d in ring.devs if d['id'] == 1][0] + dev = ring.devs[1] self.assertEqual(dev['weight'], 100) # Final check, rebalance and check ring is ok @@ -764,21 +779,21 @@ class TestCommands(unittest.TestCase, RunSwiftRingBuilderMixin): # Test ipv6(old format) argv = ["", self.tmpfile, "set_weight", - "d2r2z3-[2001:0000:1234:0000:0000:C1C0:ABCD:0876]:6000" + "d4r2z3-[2001:0000:1234:0000:0000:C1C0:ABCD:0876]:6000" "R[2::10]:7000/sda3_some meta data", "3.14159265359"] self.assertRaises(SystemExit, ringbuilder.main, argv) ring = RingBuilder.load(self.tmpfile) # Check that second device in ring is not affected - dev = [d for d in ring.devs if d['id'] == 0][0] + dev = ring.devs[0] self.assertEqual(dev['weight'], 100) # Check that second device in ring is not affected - dev = [d for d in ring.devs if d['id'] == 1][0] + dev = ring.devs[1] self.assertEqual(dev['weight'], 100) # Check that weight was changed - dev = [d for d in ring.devs if d['id'] == 2][0] + dev = ring.devs[-1] self.assertEqual(dev['weight'], 3.14159265359) # Final check, rebalance and check ring is ok @@ -800,11 +815,11 @@ class TestCommands(unittest.TestCase, RunSwiftRingBuilderMixin): ring = RingBuilder.load(self.tmpfile) # Check that weight was changed - dev = [d for d in ring.devs if d['id'] == 0][0] + dev = ring.devs[0] self.assertEqual(dev['weight'], 3.14159265359) # Check that second device in ring is not affected - dev = [d for d in ring.devs if d['id'] == 1][0] + dev = ring.devs[1] self.assertEqual(dev['weight'], 100) # Final check, rebalance and check ring is ok @@ -828,7 +843,7 @@ class TestCommands(unittest.TestCase, RunSwiftRingBuilderMixin): # Test ipv6(new format) argv = \ ["", self.tmpfile, "set_weight", - "--id", "2", "--region", "2", "--zone", "3", + "--id", "4", "--region", "2", "--zone", "3", "--ip", "[2001:0000:1234:0000:0000:C1C0:ABCD:0876]", "--port", "6000", "--replication-ip", "[2::10]", @@ -838,15 +853,15 @@ class TestCommands(unittest.TestCase, RunSwiftRingBuilderMixin): ring = RingBuilder.load(self.tmpfile) # Check that second device in ring is not affected - dev = [d for d in ring.devs if d['id'] == 0][0] + dev = ring.devs[0] self.assertEqual(dev['weight'], 100) # Check that second device in ring is not affected - dev = [d for d in ring.devs if d['id'] == 1][0] + dev = ring.devs[1] self.assertEqual(dev['weight'], 100) # Check that weight was changed - dev = [d for d in ring.devs if d['id'] == 2][0] + dev = ring.devs[-1] self.assertEqual(dev['weight'], 3.14159265359) # Final check, rebalance and check ring is ok @@ -870,7 +885,7 @@ class TestCommands(unittest.TestCase, RunSwiftRingBuilderMixin): # Test domain name argv = \ ["", self.tmpfile, "set_weight", - "--id", "2", "--region", "2", "--zone", "3", + "--id", "4", "--region", "2", "--zone", "3", "--ip", "test.test.com", "--port", "6000", "--replication-ip", "r.test.com", @@ -880,15 +895,15 @@ class TestCommands(unittest.TestCase, RunSwiftRingBuilderMixin): ring = RingBuilder.load(self.tmpfile) # Check that second device in ring is not affected - dev = [d for d in ring.devs if d['id'] == 0][0] + dev = ring.devs[0] self.assertEqual(dev['weight'], 100) # Check that second device in ring is not affected - dev = [d for d in ring.devs if d['id'] == 1][0] + dev = ring.devs[1] self.assertEqual(dev['weight'], 100) # Check that weight was changed - dev = [d for d in ring.devs if d['id'] == 2][0] + dev = ring.devs[-1] self.assertEqual(dev['weight'], 3.14159265359) # Final check, rebalance and check ring is ok @@ -927,14 +942,14 @@ class TestCommands(unittest.TestCase, RunSwiftRingBuilderMixin): # Check that device was created with given data ring = RingBuilder.load(self.tmpfile) - dev = [d for d in ring.devs if d['id'] == 0][0] + dev = ring.devs[0] self.assertEqual(dev['ip'], '127.0.1.1') self.assertEqual(dev['port'], 8000) self.assertEqual(dev['device'], 'sda1') self.assertEqual(dev['meta'], 'other meta data') # Check that second device in ring is not affected - dev = [d for d in ring.devs if d['id'] == 1][0] + dev = ring.devs[1] self.assertEqual(dev['ip'], '127.0.0.2') self.assertEqual(dev['port'], 6001) self.assertEqual(dev['device'], 'sda2') @@ -954,7 +969,7 @@ class TestCommands(unittest.TestCase, RunSwiftRingBuilderMixin): # Check that device was created with given data ring = RingBuilder.load(self.tmpfile) - dev = [d for d in ring.devs if d['id'] == 0][0] + dev = ring.devs[0] self.assertEqual(dev['ip'], '127.0.1.1') self.assertEqual(dev['port'], 8000) self.assertEqual(dev['replication_ip'], '127.0.1.1') @@ -963,7 +978,7 @@ class TestCommands(unittest.TestCase, RunSwiftRingBuilderMixin): self.assertEqual(dev['meta'], 'other meta data') # Check that second device in ring is not affected - dev = [d for d in ring.devs if d['id'] == 1][0] + dev = ring.devs[1] self.assertEqual(dev['ip'], '127.0.0.2') self.assertEqual(dev['port'], 6001) self.assertEqual(dev['device'], 'sda2') @@ -989,7 +1004,7 @@ class TestCommands(unittest.TestCase, RunSwiftRingBuilderMixin): # Test ipv6(old format) argv = ["", self.tmpfile, "set_info", - "d2r2z3-[2001:0000:1234:0000:0000:C1C0:ABCD:0876]:6000" + "d4r2z3-[2001:0000:1234:0000:0000:C1C0:ABCD:0876]:6000" "R[2::10]:7000/sda3_some meta data", "[3001:0000:1234:0000:0000:C1C0:ABCD:0876]:8000" "R[3::10]:8000/sda30_other meta data"] @@ -997,7 +1012,7 @@ class TestCommands(unittest.TestCase, RunSwiftRingBuilderMixin): ring = RingBuilder.load(self.tmpfile) # Check that second device in ring is not affected - dev = [d for d in ring.devs if d['id'] == 0][0] + dev = ring.devs[0] self.assertEqual(dev['ip'], '127.0.0.1') self.assertEqual(dev['port'], 6000) self.assertEqual(dev['replication_ip'], '127.0.0.1') @@ -1006,15 +1021,14 @@ class TestCommands(unittest.TestCase, RunSwiftRingBuilderMixin): self.assertEqual(dev['meta'], 'some meta data') # Check that second device in ring is not affected - dev = [d for d in ring.devs if d['id'] == 1][0] + dev = ring.devs[1] self.assertEqual(dev['ip'], '127.0.0.2') self.assertEqual(dev['port'], 6001) self.assertEqual(dev['device'], 'sda2') self.assertEqual(dev['meta'], '') # Check that device was created with given data - ring = RingBuilder.load(self.tmpfile) - dev = [d for d in ring.devs if d['id'] == 2][0] + dev = ring.devs[-1] self.assertEqual(dev['ip'], '3001:0:1234::c1c0:abcd:876') self.assertEqual(dev['port'], 8000) self.assertEqual(dev['replication_ip'], '3::10') @@ -1046,7 +1060,7 @@ class TestCommands(unittest.TestCase, RunSwiftRingBuilderMixin): # Check that device was created with given data ring = RingBuilder.load(self.tmpfile) - dev = [d for d in ring.devs if d['id'] == 0][0] + dev = ring.devs[0] self.assertEqual(dev['ip'], '127.0.2.1') self.assertEqual(dev['port'], 9000) self.assertEqual(dev['replication_ip'], '127.0.2.1') @@ -1055,7 +1069,7 @@ class TestCommands(unittest.TestCase, RunSwiftRingBuilderMixin): self.assertEqual(dev['meta'], 'other meta data') # Check that second device in ring is not affected - dev = [d for d in ring.devs if d['id'] == 1][0] + dev = ring.devs[1] self.assertEqual(dev['ip'], '127.0.0.2') self.assertEqual(dev['port'], 6001) self.assertEqual(dev['device'], 'sda2') @@ -1082,7 +1096,7 @@ class TestCommands(unittest.TestCase, RunSwiftRingBuilderMixin): # Test ipv6(new format) argv = \ ["", self.tmpfile, "set_info", - "--id", "2", "--region", "2", "--zone", "3", + "--id", "4", "--region", "2", "--zone", "3", "--ip", "[2001:0000:1234:0000:0000:C1C0:ABCD:0876]", "--port", "6000", "--replication-ip", "[2::10]", @@ -1097,7 +1111,7 @@ class TestCommands(unittest.TestCase, RunSwiftRingBuilderMixin): ring = RingBuilder.load(self.tmpfile) # Check that second device in ring is not affected - dev = [d for d in ring.devs if d['id'] == 0][0] + dev = ring.devs[0] self.assertEqual(dev['ip'], '127.0.0.1') self.assertEqual(dev['port'], 6000) self.assertEqual(dev['replication_ip'], '127.0.0.1') @@ -1106,7 +1120,7 @@ class TestCommands(unittest.TestCase, RunSwiftRingBuilderMixin): self.assertEqual(dev['meta'], 'some meta data') # Check that second device in ring is not affected - dev = [d for d in ring.devs if d['id'] == 1][0] + dev = ring.devs[1] self.assertEqual(dev['ip'], '127.0.0.2') self.assertEqual(dev['port'], 6001) self.assertEqual(dev['device'], 'sda2') @@ -1114,7 +1128,7 @@ class TestCommands(unittest.TestCase, RunSwiftRingBuilderMixin): # Check that device was created with given data ring = RingBuilder.load(self.tmpfile) - dev = [d for d in ring.devs if d['id'] == 2][0] + dev = ring.devs[-1] self.assertEqual(dev['ip'], '4001:0:1234::c1c0:abcd:876') self.assertEqual(dev['port'], 9000) self.assertEqual(dev['replication_ip'], '4::10') @@ -1143,7 +1157,7 @@ class TestCommands(unittest.TestCase, RunSwiftRingBuilderMixin): # Test domain name argv = \ ["", self.tmpfile, "set_info", - "--id", "2", "--region", "2", "--zone", "3", + "--id", "4", "--region", "2", "--zone", "3", "--ip", "test.test.com", "--port", "6000", "--replication-ip", "r.test.com", @@ -1158,7 +1172,7 @@ class TestCommands(unittest.TestCase, RunSwiftRingBuilderMixin): ring = RingBuilder.load(self.tmpfile) # Check that second device in ring is not affected - dev = [d for d in ring.devs if d['id'] == 0][0] + dev = ring.devs[0] self.assertEqual(dev['ip'], '127.0.0.1') self.assertEqual(dev['port'], 6000) self.assertEqual(dev['replication_ip'], '127.0.0.1') @@ -1167,15 +1181,14 @@ class TestCommands(unittest.TestCase, RunSwiftRingBuilderMixin): self.assertEqual(dev['meta'], 'some meta data') # Check that second device in ring is not affected - dev = [d for d in ring.devs if d['id'] == 1][0] + dev = ring.devs[1] self.assertEqual(dev['ip'], '127.0.0.2') self.assertEqual(dev['port'], 6001) self.assertEqual(dev['device'], 'sda2') self.assertEqual(dev['meta'], '') # Check that device was created with given data - ring = RingBuilder.load(self.tmpfile) - dev = [d for d in ring.devs if d['id'] == 2][0] + dev = ring.devs[-1] self.assertEqual(dev['ip'], 'test.test2.com') self.assertEqual(dev['port'], 9000) self.assertEqual(dev['replication_ip'], 'r.test2.com') @@ -1705,7 +1718,15 @@ class TestCommands(unittest.TestCase, RunSwiftRingBuilderMixin): self.assertEqual(err.code, 2) def test_warn_at_risk(self): - self.create_sample_ring() + # when the number of total part replicas (3 * 2 ** 4 = 48 in + # this ring) is less than the total units of weight (310 in this + # ring) the relative number of parts per unit of weight (called + # weight_of_one_part) is less than 1 - and each whole part + # placed takes up a larger ratio of the fractional number of + # parts the device wants - so it's much more difficult to + # satisfy a device's weight exactly - that is to say less parts + # to go around tends to make things lumpy + self.create_sample_ring(4) ring = RingBuilder.load(self.tmpfile) ring.devs[0]['weight'] = 10 ring.save(self.tmpfile) @@ -1717,6 +1738,27 @@ class TestCommands(unittest.TestCase, RunSwiftRingBuilderMixin): err = e self.assertEqual(err.code, 1) + def test_no_warn_when_balanced(self): + # when the number of total part replicas (3 * 2 ** 10 = 3072 in + # this ring) is larger than the total units of weight (310 in + # this ring) the relative number of parts per unit of weight + # (called weight_of_one_part) is more than 1 - and each whole + # part placed takes up a smaller ratio of the larger number of + # parts the device wants - so it's much easier to satisfy a + # device's weight exactly - that is to say more parts to go + # around tends to smooth things out + self.create_sample_ring(10) + ring = RingBuilder.load(self.tmpfile) + ring.devs[0]['weight'] = 10 + ring.save(self.tmpfile) + argv = ["", self.tmpfile, "rebalance"] + err = None + try: + ringbuilder.main(argv) + except SystemExit as e: + err = e + self.assertEqual(err.code, 0) + def test_invalid_device_name(self): self.create_sample_ring() for device_name in ["", " ", " sda1", "sda1 ", " meta "]: diff --git a/test/unit/common/ring/test_builder.py b/test/unit/common/ring/test_builder.py index 367ea05239..d52faa60c8 100644 --- a/test/unit/common/ring/test_builder.py +++ b/test/unit/common/ring/test_builder.py @@ -25,12 +25,13 @@ from collections import defaultdict from math import ceil from tempfile import mkdtemp from shutil import rmtree +import warnings from six.moves import range from swift.common import exceptions from swift.common import ring -from swift.common.ring.builder import MAX_BALANCE +from swift.common.ring.builder import MAX_BALANCE, RingValidationWarning class TestRingBuilder(unittest.TestCase): @@ -41,15 +42,15 @@ class TestRingBuilder(unittest.TestCase): def tearDown(self): rmtree(self.testdir, ignore_errors=1) - def _partition_counts(self, builder): + def _partition_counts(self, builder, key='id'): """ - Returns a dictionary mapping (device ID) to (number of partitions - assigned to that device). + Returns a dictionary mapping the given device key to (number of + partitions assigned to to that key). """ - counts = {} + counts = defaultdict(int) for part2dev_id in builder._replica2part2dev: for dev_id in part2dev_id: - counts[dev_id] = counts.get(dev_id, 0) + 1 + counts[builder.devs[dev_id][key]] += 1 return counts def _get_population_by_region(self, builder): @@ -57,13 +58,7 @@ class TestRingBuilder(unittest.TestCase): Returns a dictionary mapping region to number of partitions in that region. """ - population_by_region = defaultdict(int) - r = builder.get_ring() - for part2dev_id in r._replica2part2dev_id: - for dev_id in part2dev_id: - dev = r.devs[dev_id] - population_by_region[dev['region']] += 1 - return dict(population_by_region.items()) + return self._partition_counts(builder, key='region') def test_init(self): rb = ring.RingBuilder(8, 3, 1) @@ -91,12 +86,24 @@ class TestRingBuilder(unittest.TestCase): rb = ring.RingBuilder(8, 3, 1) rb.add_dev({'id': 0, 'region': 0, 'zone': 0, 'weight': 1, 'ip': '127.0.0.1', 'port': 10000, 'device': 'sda1'}) + rb.add_dev({'id': 4, 'region': 0, 'zone': 0, 'weight': 1, + 'ip': '127.0.0.1', 'port': 10000, 'device': 'sdb1'}) + rb.add_dev({'id': 1, 'region': 0, 'zone': 1, 'weight': 1, 'ip': '127.0.0.1', 'port': 10001, 'device': 'sda1'}) + rb.add_dev({'id': 5, 'region': 0, 'zone': 1, 'weight': 1, + 'ip': '127.0.0.1', 'port': 10001, 'device': 'sdb1'}) + rb.add_dev({'id': 2, 'region': 0, 'zone': 2, 'weight': 1, 'ip': '127.0.0.1', 'port': 10002, 'device': 'sda1'}) + rb.add_dev({'id': 6, 'region': 0, 'zone': 2, 'weight': 1, + 'ip': '127.0.0.1', 'port': 10002, 'device': 'sdb1'}) + + # more devices in zone #1 rb.add_dev({'id': 3, 'region': 0, 'zone': 1, 'weight': 1, - 'ip': '127.0.0.1', 'port': 10004, 'device': 'sda1'}) + 'ip': '127.0.0.1', 'port': 10004, 'device': 'sdc1'}) + rb.add_dev({'id': 7, 'region': 0, 'zone': 1, 'weight': 1, + 'ip': '127.0.0.1', 'port': 10004, 'device': 'sdd1'}) rb.rebalance() rb_copy = copy.deepcopy(rb) @@ -134,9 +141,13 @@ class TestRingBuilder(unittest.TestCase): ring_builders = [] for n in range(3): rb = ring.RingBuilder(8, 3, 1) - for idx, (zone, port) in enumerate(devs): - rb.add_dev({'id': idx, 'region': 0, 'zone': zone, 'weight': 1, - 'ip': '127.0.0.1', 'port': port, 'device': 'sda1'}) + idx = 0 + for zone, port in devs: + for d in ('sda1', 'sdb1'): + rb.add_dev({'id': idx, 'region': 0, 'zone': zone, + 'ip': '127.0.0.1', 'port': port, + 'device': d, 'weight': 1}) + idx += 1 ring_builders.append(rb) rb0 = ring_builders[0] @@ -471,22 +482,28 @@ class TestRingBuilder(unittest.TestCase): (part, dev_id, replica_count, counts['dev_id'])) def test_multitier_overfull(self): - # Multitier test, #replicas > #devs + 2 (to prove even distribution) + # Multitier test, #replicas > #zones (to prove even distribution) rb = ring.RingBuilder(8, 8, 1) rb.add_dev({'id': 0, 'region': 0, 'zone': 0, 'weight': 1, 'ip': '127.0.0.1', 'port': 10000, 'device': 'sda'}) rb.add_dev({'id': 1, 'region': 0, 'zone': 0, 'weight': 1, 'ip': '127.0.0.1', 'port': 10000, 'device': 'sdb'}) + rb.add_dev({'id': 6, 'region': 0, 'zone': 0, 'weight': 1, + 'ip': '127.0.0.1', 'port': 10000, 'device': 'sdg'}) rb.add_dev({'id': 2, 'region': 0, 'zone': 1, 'weight': 1, 'ip': '127.0.0.1', 'port': 10000, 'device': 'sdc'}) rb.add_dev({'id': 3, 'region': 0, 'zone': 1, 'weight': 1, 'ip': '127.0.0.1', 'port': 10001, 'device': 'sdd'}) + rb.add_dev({'id': 7, 'region': 0, 'zone': 1, 'weight': 1, + 'ip': '127.0.0.1', 'port': 10001, 'device': 'sdh'}) rb.add_dev({'id': 4, 'region': 0, 'zone': 2, 'weight': 1, 'ip': '127.0.0.1', 'port': 10001, 'device': 'sde'}) rb.add_dev({'id': 5, 'region': 0, 'zone': 2, 'weight': 1, 'ip': '127.0.0.1', 'port': 10001, 'device': 'sdf'}) + rb.add_dev({'id': 8, 'region': 0, 'zone': 2, 'weight': 1, + 'ip': '127.0.0.1', 'port': 10001, 'device': 'sdi'}) rb.rebalance() rb.validate() @@ -513,21 +530,33 @@ class TestRingBuilder(unittest.TestCase): def test_multitier_expansion_more_devices(self): rb = ring.RingBuilder(8, 6, 1) - rb.add_dev({'id': 0, 'region': 0, 'zone': 0, 'weight': 1, + rb.add_dev({'id': 0, 'region': 0, 'zone': 0, 'weight': 0.5, 'ip': '127.0.0.1', 'port': 10000, 'device': 'sda'}) - rb.add_dev({'id': 1, 'region': 0, 'zone': 1, 'weight': 1, + rb.add_dev({'id': 1, 'region': 0, 'zone': 1, 'weight': 0.5, 'ip': '127.0.0.1', 'port': 10000, 'device': 'sdb'}) - rb.add_dev({'id': 2, 'region': 0, 'zone': 2, 'weight': 1, + rb.add_dev({'id': 2, 'region': 0, 'zone': 2, 'weight': 0.5, + 'ip': '127.0.0.1', 'port': 10000, 'device': 'sdc'}) + rb.add_dev({'id': 6, 'region': 0, 'zone': 0, 'weight': 0.5, + 'ip': '127.0.0.1', 'port': 10000, 'device': 'sda'}) + rb.add_dev({'id': 7, 'region': 0, 'zone': 1, 'weight': 0.5, + 'ip': '127.0.0.1', 'port': 10000, 'device': 'sdb'}) + rb.add_dev({'id': 8, 'region': 0, 'zone': 2, 'weight': 0.5, 'ip': '127.0.0.1', 'port': 10000, 'device': 'sdc'}) rb.rebalance() rb.validate() - rb.add_dev({'id': 3, 'region': 0, 'zone': 0, 'weight': 1, + rb.add_dev({'id': 3, 'region': 0, 'zone': 0, 'weight': 0.5, 'ip': '127.0.0.1', 'port': 10000, 'device': 'sdd'}) - rb.add_dev({'id': 4, 'region': 0, 'zone': 1, 'weight': 1, + rb.add_dev({'id': 4, 'region': 0, 'zone': 1, 'weight': 0.5, 'ip': '127.0.0.1', 'port': 10000, 'device': 'sde'}) - rb.add_dev({'id': 5, 'region': 0, 'zone': 2, 'weight': 1, + rb.add_dev({'id': 5, 'region': 0, 'zone': 2, 'weight': 0.5, + 'ip': '127.0.0.1', 'port': 10000, 'device': 'sdf'}) + rb.add_dev({'id': 9, 'region': 0, 'zone': 0, 'weight': 0.5, + 'ip': '127.0.0.1', 'port': 10000, 'device': 'sdd'}) + rb.add_dev({'id': 10, 'region': 0, 'zone': 1, 'weight': 0.5, + 'ip': '127.0.0.1', 'port': 10000, 'device': 'sde'}) + rb.add_dev({'id': 11, 'region': 0, 'zone': 2, 'weight': 0.5, 'ip': '127.0.0.1', 'port': 10000, 'device': 'sdf'}) for _ in range(5): @@ -544,13 +573,18 @@ class TestRingBuilder(unittest.TestCase): counts['dev_id'][dev['id']] += 1 self.assertEquals({0: 2, 1: 2, 2: 2}, dict(counts['zone'])) - self.assertEquals({0: 1, 1: 1, 2: 1, 3: 1, 4: 1, 5: 1}, - dict(counts['dev_id'])) + # each part is assigned once to six unique devices + self.assertEqual((counts['dev_id'].values()), [1] * 6) + self.assertEqual(len(set(counts['dev_id'].keys())), 6) def test_multitier_part_moves_with_0_min_part_hours(self): rb = ring.RingBuilder(8, 3, 0) rb.add_dev({'id': 0, 'region': 0, 'zone': 0, 'weight': 1, 'ip': '127.0.0.1', 'port': 10000, 'device': 'sda1'}) + rb.add_dev({'id': 3, 'region': 0, 'zone': 0, 'weight': 1, + 'ip': '127.0.0.1', 'port': 10000, 'device': 'sdd1'}) + rb.add_dev({'id': 4, 'region': 0, 'zone': 0, 'weight': 1, + 'ip': '127.0.0.1', 'port': 10000, 'device': 'sde1'}) rb.rebalance() rb.validate() @@ -576,14 +610,18 @@ class TestRingBuilder(unittest.TestCase): rb = ring.RingBuilder(8, 3, 99) rb.add_dev({'id': 0, 'region': 0, 'zone': 0, 'weight': 1, 'ip': '127.0.0.1', 'port': 10000, 'device': 'sda1'}) + rb.add_dev({'id': 3, 'region': 0, 'zone': 0, 'weight': 1, + 'ip': '127.0.0.1', 'port': 10000, 'device': 'sdd1'}) + rb.add_dev({'id': 4, 'region': 0, 'zone': 0, 'weight': 1, + 'ip': '127.0.0.1', 'port': 10000, 'device': 'sde1'}) rb.rebalance() rb.validate() # min_part_hours is >0, so we'll only be able to move 1 # replica to a new home - rb.add_dev({'id': 1, 'region': 0, 'zone': 0, 'weight': 1, + rb.add_dev({'id': 1, 'region': 0, 'zone': 1, 'weight': 1, 'ip': '127.0.0.1', 'port': 10000, 'device': 'sdb1'}) - rb.add_dev({'id': 2, 'region': 0, 'zone': 0, 'weight': 1, + rb.add_dev({'id': 2, 'region': 0, 'zone': 1, 'weight': 1, 'ip': '127.0.0.1', 'port': 10000, 'device': 'sdc1'}) rb.pretend_min_part_hours_passed() rb.rebalance() @@ -593,17 +631,20 @@ class TestRingBuilder(unittest.TestCase): devs = set() for replica in range(rb.replicas): devs.add(rb._replica2part2dev[replica][part]) - - if len(devs) != 2: + if not any(rb.devs[dev_id]['zone'] == 1 for dev_id in devs): raise AssertionError( - "Partition %d not on 2 devs (got %r)" % (part, devs)) + "Partition %d did not move (got %r)" % (part, devs)) def test_multitier_dont_move_too_many_replicas(self): rb = ring.RingBuilder(8, 3, 0) # there'll be at least one replica in z0 and z1 - rb.add_dev({'id': 0, 'region': 0, 'zone': 0, 'weight': 1, + rb.add_dev({'id': 0, 'region': 0, 'zone': 0, 'weight': 0.5, 'ip': '127.0.0.1', 'port': 10000, 'device': 'sda1'}) - rb.add_dev({'id': 1, 'region': 0, 'zone': 1, 'weight': 1, + rb.add_dev({'id': 1, 'region': 0, 'zone': 1, 'weight': 0.5, + 'ip': '127.0.0.1', 'port': 10000, 'device': 'sdb1'}) + rb.add_dev({'id': 5, 'region': 0, 'zone': 0, 'weight': 0.5, + 'ip': '127.0.0.1', 'port': 10000, 'device': 'sda1'}) + rb.add_dev({'id': 6, 'region': 0, 'zone': 1, 'weight': 0.5, 'ip': '127.0.0.1', 'port': 10000, 'device': 'sdb1'}) rb.rebalance() rb.validate() @@ -677,6 +718,7 @@ class TestRingBuilder(unittest.TestCase): 'ip': '127.0.0.1', 'port': 10002, 'device': 'sda1'}) rb.rebalance() + rb.validate() rb.add_dev({'id': 3, 'region': 0, 'zone': 0, 'weight': 1, 'ip': '127.0.0.1', 'port': 10003, 'device': 'sda1'}) @@ -686,19 +728,32 @@ class TestRingBuilder(unittest.TestCase): 'ip': '127.0.0.1', 'port': 10005, 'device': 'sda1'}) rb.rebalance() + rb.validate() rb.remove_dev(1) + # well now we have only one device in z0 + rb.set_overload(0.5) + rb.rebalance() + rb.validate() def test_remove_last_partition_from_zero_weight(self): rb = ring.RingBuilder(4, 3, 1) rb.add_dev({'id': 0, 'region': 0, 'zone': 1, 'weight': 1.0, 'ip': '127.0.0.1', 'port': 10000, 'device': 'sda'}) - rb.add_dev({'id': 1, 'region': 0, 'zone': 2, 'weight': 2.0, + + rb.add_dev({'id': 1, 'region': 0, 'zone': 2, 'weight': 1.0, 'ip': '127.0.0.2', 'port': 10000, 'device': 'sda'}) - rb.add_dev({'id': 2, 'region': 0, 'zone': 3, 'weight': 3.0, + rb.add_dev({'id': 4, 'region': 0, 'zone': 2, 'weight': 1.0, + 'ip': '127.0.0.2', 'port': 10000, 'device': 'sdb'}) + + rb.add_dev({'id': 2, 'region': 0, 'zone': 3, 'weight': 1.0, 'ip': '127.0.0.3', 'port': 10000, 'device': 'sda'}) + rb.add_dev({'id': 5, 'region': 0, 'zone': 3, 'weight': 1.0, + 'ip': '127.0.0.3', 'port': 10000, 'device': 'sdb'}) + rb.add_dev({'id': 6, 'region': 0, 'zone': 3, 'weight': 1.0, + 'ip': '127.0.0.3', 'port': 10000, 'device': 'sdc'}) rb.add_dev({'id': 3, 'region': 0, 'zone': 3, 'weight': 0.5, 'ip': '127.0.0.3', 'port': 10001, 'device': 'zero'}) @@ -722,11 +777,11 @@ class TestRingBuilder(unittest.TestCase): # big to include here. rb._replica2part2dev = [ # these are the relevant ones - # | | | | - # v v v v - array('H', [2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]), - array('H', [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2]), - array('H', [0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 2, 2, 2, 2])] + # | | | + # v v v + array('H', [2, 5, 6, 2, 5, 6, 2, 5, 6, 2, 5, 6, 2, 5, 6, 2]), + array('H', [1, 4, 1, 4, 1, 4, 1, 4, 1, 4, 1, 4, 1, 4, 1, 4]), + array('H', [0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 5, 6, 2, 5, 6])] rb.set_dev_weight(zero_weight_dev, 0.0) rb.pretend_min_part_hours_passed() @@ -788,10 +843,23 @@ class TestRingBuilder(unittest.TestCase): def test_adding_region_slowly_with_unbalanceable_ring(self): rb = ring.RingBuilder(8, 3, 1) - rb.add_dev({'id': 0, 'region': 0, 'zone': 0, 'weight': 2, + rb.add_dev({'id': 0, 'region': 0, 'zone': 0, 'weight': 0.5, 'ip': '127.0.0.1', 'port': 10000, 'device': 'sda1'}) - rb.add_dev({'id': 1, 'region': 0, 'zone': 1, 'weight': 2, + rb.add_dev({'id': 4, 'region': 0, 'zone': 0, 'weight': 0.5, + 'ip': '127.0.0.1', 'port': 10000, 'device': 'sdb1'}) + rb.add_dev({'id': 5, 'region': 0, 'zone': 0, 'weight': 0.5, + 'ip': '127.0.0.1', 'port': 10000, 'device': 'sdc1'}) + rb.add_dev({'id': 6, 'region': 0, 'zone': 0, 'weight': 0.5, + 'ip': '127.0.0.1', 'port': 10000, 'device': 'sdd1'}) + + rb.add_dev({'id': 1, 'region': 0, 'zone': 1, 'weight': 0.5, 'ip': '127.0.0.1', 'port': 10001, 'device': 'sda1'}) + rb.add_dev({'id': 7, 'region': 0, 'zone': 1, 'weight': 0.5, + 'ip': '127.0.0.1', 'port': 10001, 'device': 'sdb1'}) + rb.add_dev({'id': 8, 'region': 0, 'zone': 1, 'weight': 0.5, + 'ip': '127.0.0.1', 'port': 10001, 'device': 'sdc1'}) + rb.add_dev({'id': 9, 'region': 0, 'zone': 1, 'weight': 0.5, + 'ip': '127.0.0.1', 'port': 10001, 'device': 'sdd1'}) rb.rebalance(seed=2) rb.add_dev({'id': 2, 'region': 1, 'zone': 0, 'weight': 0.25, @@ -807,12 +875,15 @@ class TestRingBuilder(unittest.TestCase): population_by_region = self._get_population_by_region(rb) self.assertEquals(population_by_region, {0: 682, 1: 86}) - self.assertEqual(87, changed_parts) + # only 86 parts *should* move (to the new region) but randomly some + # parts will flop around devices in the original region too + self.assertEqual(90, changed_parts) # and since there's not enough room, subsequent rebalances will not # cause additional assignments to r1 rb.pretend_min_part_hours_passed() rb.rebalance(seed=2) + rb.validate() population_by_region = self._get_population_by_region(rb) self.assertEquals(population_by_region, {0: 682, 1: 86}) @@ -821,6 +892,7 @@ class TestRingBuilder(unittest.TestCase): rb.set_dev_weight(3, 0.5) rb.pretend_min_part_hours_passed() rb.rebalance(seed=2) + rb.validate() population_by_region = self._get_population_by_region(rb) self.assertEquals(population_by_region, {0: 614, 1: 154}) @@ -828,6 +900,7 @@ class TestRingBuilder(unittest.TestCase): rb.set_dev_weight(3, 1.0) rb.pretend_min_part_hours_passed() rb.rebalance(seed=2) + rb.validate() population_by_region = self._get_population_by_region(rb) self.assertEquals(population_by_region, {0: 512, 1: 256}) @@ -848,6 +921,7 @@ class TestRingBuilder(unittest.TestCase): rb.set_dev_weight(5, weight) rb.pretend_min_part_hours_passed() changed_parts, _balance = rb.rebalance(seed=2) + rb.validate() moved_partitions.append(changed_parts) # Ensure that the second region has enough partitions # Otherwise there will be replicas at risk @@ -857,7 +931,7 @@ class TestRingBuilder(unittest.TestCase): # Number of partitions moved on each rebalance # 10/510 * 768 ~ 15.06 -> move at least 15 partitions in first step - ref = [0, 17, 16, 16, 14, 15, 13, 13, 12, 12, 14] + ref = [0, 17, 16, 17, 13, 15, 13, 12, 11, 13, 13] self.assertEqual(ref, moved_partitions) def test_set_replicas_increase(self): @@ -866,6 +940,8 @@ class TestRingBuilder(unittest.TestCase): 'ip': '127.0.0.1', 'port': 10000, 'device': 'sda1'}) rb.add_dev({'id': 1, 'region': 0, 'zone': 1, 'weight': 1, 'ip': '127.0.0.1', 'port': 10001, 'device': 'sda1'}) + rb.add_dev({'id': 2, 'region': 0, 'zone': 2, 'weight': 1, + 'ip': '127.0.0.1', 'port': 10001, 'device': 'sda1'}) rb.rebalance() rb.validate() @@ -888,6 +964,14 @@ class TestRingBuilder(unittest.TestCase): 'ip': '127.0.0.1', 'port': 10000, 'device': 'sda1'}) rb.add_dev({'id': 1, 'region': 0, 'zone': 1, 'weight': 1, 'ip': '127.0.0.1', 'port': 10001, 'device': 'sda1'}) + rb.add_dev({'id': 2, 'region': 0, 'zone': 0, 'weight': 1, + 'ip': '127.0.0.1', 'port': 10000, 'device': 'sda1'}) + rb.add_dev({'id': 3, 'region': 0, 'zone': 1, 'weight': 1, + 'ip': '127.0.0.1', 'port': 10001, 'device': 'sda1'}) + rb.add_dev({'id': 4, 'region': 0, 'zone': 0, 'weight': 1, + 'ip': '127.0.0.1', 'port': 10000, 'device': 'sda1'}) + rb.add_dev({'id': 5, 'region': 0, 'zone': 1, 'weight': 1, + 'ip': '127.0.0.1', 'port': 10001, 'device': 'sda1'}) rb.rebalance() rb.validate() @@ -912,6 +996,8 @@ class TestRingBuilder(unittest.TestCase): 'ip': '127.0.0.1', 'port': 10000, 'device': 'sda1'}) rb.add_dev({'id': 1, 'region': 0, 'region': 0, 'zone': 1, 'weight': 1, 'ip': '127.0.0.1', 'port': 10001, 'device': 'sda1'}) + rb.add_dev({'id': 2, 'region': 0, 'region': 0, 'zone': 2, 'weight': 1, + 'ip': '127.0.0.1', 'port': 10001, 'device': 'sda1'}) rb.rebalance() # passes by not crashing rb.validate() # also passes by not crashing self.assertEqual([len(p2d) for p2d in rb._replica2part2dev], @@ -921,6 +1007,12 @@ class TestRingBuilder(unittest.TestCase): rb = ring.RingBuilder(8, 3, 1) rb.add_dev({'id': 0, 'region': 0, 'region': 0, 'zone': 0, 'weight': 3, 'ip': '127.0.0.1', 'port': 10000, 'device': 'sda'}) + rb.add_dev({'id': 1, 'region': 0, 'region': 0, 'zone': 0, 'weight': 3, + 'ip': '127.0.0.1', 'port': 10000, 'device': 'sda'}) + rb.add_dev({'id': 2, 'region': 0, 'region': 0, 'zone': 0, 'weight': 3, + 'ip': '127.0.0.1', 'port': 10000, 'device': 'sda'}) + rb.add_dev({'id': 3, 'region': 0, 'region': 0, 'zone': 0, 'weight': 3, + 'ip': '127.0.0.1', 'port': 10000, 'device': 'sda'}) rb.set_replicas(4) rb.rebalance() # this would crash since parts_wanted was not set rb.validate() @@ -1022,14 +1114,36 @@ class TestRingBuilder(unittest.TestCase): rb = ring.RingBuilder(8, 3, 1) rb.add_dev({'id': 0, 'region': 0, 'region': 0, 'zone': 0, 'weight': 1, 'ip': '127.0.0.1', 'port': 10000, 'device': 'sda'}) + rb.add_dev({'id': 3, 'region': 0, 'region': 0, 'zone': 0, 'weight': 1, + 'ip': '127.0.0.1', 'port': 10000, 'device': 'sdd'}) + rb.add_dev({'id': 4, 'region': 0, 'region': 0, 'zone': 0, 'weight': 1, + 'ip': '127.0.0.1', 'port': 10000, 'device': 'sde'}) + rb.add_dev({'id': 5, 'region': 0, 'region': 0, 'zone': 0, 'weight': 1, + 'ip': '127.0.0.1', 'port': 10000, 'device': 'sdf'}) + rb.add_dev({'id': 1, 'region': 0, 'region': 0, 'zone': 1, 'weight': 1, 'ip': '127.0.0.1', 'port': 10001, 'device': 'sdb'}) + rb.add_dev({'id': 6, 'region': 0, 'region': 0, 'zone': 1, 'weight': 1, + 'ip': '127.0.0.1', 'port': 10001, 'device': 'sdg'}) + rb.add_dev({'id': 7, 'region': 0, 'region': 0, 'zone': 1, 'weight': 1, + 'ip': '127.0.0.1', 'port': 10001, 'device': 'sdh'}) + rb.add_dev({'id': 8, 'region': 0, 'region': 0, 'zone': 1, 'weight': 1, + 'ip': '127.0.0.1', 'port': 10001, 'device': 'sdi'}) + rb.add_dev({'id': 2, 'region': 0, 'region': 0, 'zone': 2, 'weight': 2, 'ip': '127.0.0.2', 'port': 10002, 'device': 'sdc'}) + rb.add_dev({'id': 9, 'region': 0, 'region': 0, 'zone': 2, 'weight': 2, + 'ip': '127.0.0.2', 'port': 10002, 'device': 'sdj'}) + rb.add_dev({'id': 10, 'region': 0, 'region': 0, 'zone': 2, 'weight': 2, + 'ip': '127.0.0.2', 'port': 10002, 'device': 'sdk'}) + rb.add_dev({'id': 11, 'region': 0, 'region': 0, 'zone': 2, 'weight': 2, + 'ip': '127.0.0.2', 'port': 10002, 'device': 'sdl'}) + rb.rebalance(seed=12345) + rb.validate() # sanity check: balance respects weights, so default - part_counts = self._partition_counts(rb) + part_counts = self._partition_counts(rb, key='zone') self.assertEqual(part_counts[0], 192) self.assertEqual(part_counts[1], 192) self.assertEqual(part_counts[2], 384) @@ -1041,7 +1155,7 @@ class TestRingBuilder(unittest.TestCase): rb.pretend_min_part_hours_passed() rb.rebalance(seed=12345) - part_counts = self._partition_counts(rb) + part_counts = self._partition_counts(rb, key='zone') self.assertEqual(part_counts[0], 212) self.assertEqual(part_counts[1], 212) self.assertEqual(part_counts[2], 344) @@ -1053,7 +1167,7 @@ class TestRingBuilder(unittest.TestCase): rb.pretend_min_part_hours_passed() rb.rebalance(seed=12345) - part_counts = self._partition_counts(rb) + part_counts = self._partition_counts(rb, key='zone') self.assertEqual(part_counts[0], 256) self.assertEqual(part_counts[1], 256) self.assertEqual(part_counts[2], 256) @@ -1066,7 +1180,7 @@ class TestRingBuilder(unittest.TestCase): rb.pretend_min_part_hours_passed() rb.rebalance(seed=12345) - part_counts = self._partition_counts(rb) + part_counts = self._partition_counts(rb, key='zone') self.assertEqual(part_counts[0], 256) self.assertEqual(part_counts[1], 256) self.assertEqual(part_counts[2], 256) @@ -1079,51 +1193,76 @@ class TestRingBuilder(unittest.TestCase): rb.set_overload(0.125) rb.add_dev({'id': 0, 'region': 0, 'region': 0, 'zone': 0, 'weight': 1, 'ip': '127.0.0.1', 'port': 10000, 'device': 'sda'}) + rb.add_dev({'id': 3, 'region': 0, 'region': 0, 'zone': 0, 'weight': 1, + 'ip': '127.0.0.1', 'port': 10000, 'device': 'sda'}) + rb.add_dev({'id': 4, 'region': 0, 'region': 0, 'zone': 0, 'weight': 1, + 'ip': '127.0.0.1', 'port': 10000, 'device': 'sda'}) + rb.add_dev({'id': 5, 'region': 0, 'region': 0, 'zone': 0, 'weight': 1, + 'ip': '127.0.0.1', 'port': 10000, 'device': 'sda'}) + rb.add_dev({'id': 1, 'region': 0, 'region': 0, 'zone': 0, 'weight': 1, - 'ip': '127.0.0.1', 'port': 10000, 'device': 'sdb'}) + 'ip': '127.0.0.2', 'port': 10000, 'device': 'sdb'}) + rb.add_dev({'id': 6, 'region': 0, 'region': 0, 'zone': 0, 'weight': 1, + 'ip': '127.0.0.2', 'port': 10000, 'device': 'sdb'}) + rb.add_dev({'id': 7, 'region': 0, 'region': 0, 'zone': 0, 'weight': 1, + 'ip': '127.0.0.2', 'port': 10000, 'device': 'sdb'}) + rb.add_dev({'id': 8, 'region': 0, 'region': 0, 'zone': 0, 'weight': 1, + 'ip': '127.0.0.2', 'port': 10000, 'device': 'sdb'}) + rb.add_dev({'id': 2, 'region': 0, 'region': 0, 'zone': 0, 'weight': 2, - 'ip': '127.0.0.1', 'port': 10000, 'device': 'sdc'}) + 'ip': '127.0.0.3', 'port': 10000, 'device': 'sdc'}) + rb.add_dev({'id': 9, 'region': 0, 'region': 0, 'zone': 0, 'weight': 2, + 'ip': '127.0.0.3', 'port': 10000, 'device': 'sdc'}) + rb.add_dev({'id': 10, 'region': 0, 'region': 0, 'zone': 0, 'weight': 2, + 'ip': '127.0.0.3', 'port': 10000, 'device': 'sdc'}) + rb.add_dev({'id': 11, 'region': 0, 'region': 0, 'zone': 0, 'weight': 2, + 'ip': '127.0.0.3', 'port': 10000, 'device': 'sdc'}) rb.rebalance(seed=12345) # sanity check: our overload is big enough to balance things - part_counts = self._partition_counts(rb) - self.assertEqual(part_counts[0], 216) - self.assertEqual(part_counts[1], 216) - self.assertEqual(part_counts[2], 336) + part_counts = self._partition_counts(rb, key='ip') + self.assertEqual(part_counts['127.0.0.1'], 216) + self.assertEqual(part_counts['127.0.0.2'], 216) + self.assertEqual(part_counts['127.0.0.3'], 336) # Add some weight: balance improves - rb.set_dev_weight(0, 1.5) - rb.set_dev_weight(1, 1.5) + for dev in rb.devs: + if dev['ip'] in ('127.0.0.1', '127.0.0.2'): + rb.set_dev_weight(dev['id'], 1.5) rb.pretend_min_part_hours_passed() rb.rebalance(seed=12345) - part_counts = self._partition_counts(rb) - self.assertEqual(part_counts[0], 236) - self.assertEqual(part_counts[1], 236) - self.assertEqual(part_counts[2], 296) + part_counts = self._partition_counts(rb, key='ip') + self.assertEqual(part_counts['127.0.0.1'], 236) + self.assertEqual(part_counts['127.0.0.2'], 236) + self.assertEqual(part_counts['127.0.0.3'], 296) # Even out the weights: balance becomes perfect - rb.set_dev_weight(0, 2) - rb.set_dev_weight(1, 2) + for dev in rb.devs: + if dev['ip'] in ('127.0.0.1', '127.0.0.2'): + rb.set_dev_weight(dev['id'], 2) rb.pretend_min_part_hours_passed() rb.rebalance(seed=12345) - part_counts = self._partition_counts(rb) - self.assertEqual(part_counts[0], 256) - self.assertEqual(part_counts[1], 256) - self.assertEqual(part_counts[2], 256) + part_counts = self._partition_counts(rb, key='ip') + self.assertEqual(part_counts['127.0.0.1'], 256) + self.assertEqual(part_counts['127.0.0.2'], 256) + self.assertEqual(part_counts['127.0.0.3'], 256) - # Add some new devices: balance stays optimal - rb.add_dev({'id': 3, 'region': 0, 'region': 0, 'zone': 0, - 'weight': 2.0 / 3, - 'ip': '127.0.0.1', 'port': 10000, 'device': 'sdd'}) - rb.add_dev({'id': 4, 'region': 0, 'region': 0, 'zone': 0, - 'weight': 2.0 / 3, - 'ip': '127.0.0.1', 'port': 10000, 'device': 'sde'}) - rb.add_dev({'id': 5, 'region': 0, 'region': 0, 'zone': 0, - 'weight': 2.0 / 3, - 'ip': '127.0.0.1', 'port': 10000, 'device': 'sdf'}) + # Add a new server: balance stays optimal + rb.add_dev({'id': 12, 'region': 0, 'region': 0, 'zone': 0, + 'weight': 2, + 'ip': '127.0.0.4', 'port': 10000, 'device': 'sdd'}) + rb.add_dev({'id': 13, 'region': 0, 'region': 0, 'zone': 0, + 'weight': 2, + 'ip': '127.0.0.4', 'port': 10000, 'device': 'sde'}) + rb.add_dev({'id': 14, 'region': 0, 'region': 0, 'zone': 0, + 'weight': 2, + 'ip': '127.0.0.4', 'port': 10000, 'device': 'sdf'}) + rb.add_dev({'id': 15, 'region': 0, 'region': 0, 'zone': 0, + 'weight': 2, + 'ip': '127.0.0.4', 'port': 10000, 'device': 'sdf'}) # we're moving more than 1/3 of the replicas but fewer than 2/3, so # we have to do this twice @@ -1132,13 +1271,11 @@ class TestRingBuilder(unittest.TestCase): rb.pretend_min_part_hours_passed() rb.rebalance(seed=12345) - part_counts = self._partition_counts(rb) - self.assertEqual(part_counts[0], 192) - self.assertEqual(part_counts[1], 192) - self.assertEqual(part_counts[2], 192) - self.assertEqual(part_counts[3], 64) - self.assertEqual(part_counts[4], 64) - self.assertEqual(part_counts[5], 64) + part_counts = self._partition_counts(rb, key='ip') + self.assertEqual(part_counts['127.0.0.1'], 192) + self.assertEqual(part_counts['127.0.0.2'], 192) + self.assertEqual(part_counts['127.0.0.3'], 192) + self.assertEqual(part_counts['127.0.0.4'], 192) def test_overload_keeps_balanceable_things_balanced_initially(self): rb = ring.RingBuilder(8, 3, 1) @@ -1501,22 +1638,42 @@ class TestRingBuilder(unittest.TestCase): rb = ring.RingBuilder(8, 3, 1) rb.add_dev({'id': 0, 'region': 0, 'zone': 0, 'weight': 1, 'ip': '127.0.0.1', 'port': 10000, 'device': 'sda1'}) + rb.add_dev({'id': 4, 'region': 0, 'zone': 0, 'weight': 1, + 'ip': '127.0.0.1', 'port': 10000, 'device': 'sda1'}) + rb.add_dev({'id': 5, 'region': 0, 'zone': 0, 'weight': 1, + 'ip': '127.0.0.1', 'port': 10000, 'device': 'sda1'}) + rb.add_dev({'id': 6, 'region': 0, 'zone': 0, 'weight': 1, + 'ip': '127.0.0.1', 'port': 10000, 'device': 'sda1'}) rb.add_dev({'id': 1, 'region': 0, 'zone': 1, 'weight': 1, 'ip': '127.0.0.1', 'port': 10001, 'device': 'sda1'}) + rb.add_dev({'id': 7, 'region': 0, 'zone': 1, 'weight': 1, + 'ip': '127.0.0.1', 'port': 10001, 'device': 'sda1'}) + rb.add_dev({'id': 8, 'region': 0, 'zone': 1, 'weight': 1, + 'ip': '127.0.0.1', 'port': 10001, 'device': 'sda1'}) + rb.add_dev({'id': 9, 'region': 0, 'zone': 1, 'weight': 1, + 'ip': '127.0.0.1', 'port': 10001, 'device': 'sda1'}) rb.add_dev({'id': 2, 'region': 0, 'zone': 2, 'weight': 2, 'ip': '127.0.0.1', 'port': 10002, 'device': 'sda1'}) + rb.add_dev({'id': 10, 'region': 0, 'zone': 2, 'weight': 2, + 'ip': '127.0.0.1', 'port': 10002, 'device': 'sda1'}) + rb.add_dev({'id': 11, 'region': 0, 'zone': 2, 'weight': 2, + 'ip': '127.0.0.1', 'port': 10002, 'device': 'sda1'}) + rb.add_dev({'id': 12, 'region': 0, 'zone': 2, 'weight': 2, + 'ip': '127.0.0.1', 'port': 10002, 'device': 'sda1'}) rb.add_dev({'id': 3, 'region': 0, 'zone': 3, 'weight': 2, 'ip': '127.0.0.1', 'port': 10003, 'device': 'sda1'}) + rb.add_dev({'id': 13, 'region': 0, 'zone': 3, 'weight': 2, + 'ip': '127.0.0.1', 'port': 10003, 'device': 'sda1'}) + rb.add_dev({'id': 14, 'region': 0, 'zone': 3, 'weight': 2, + 'ip': '127.0.0.1', 'port': 10003, 'device': 'sda1'}) + rb.add_dev({'id': 15, 'region': 0, 'zone': 3, 'weight': 2, + 'ip': '127.0.0.1', 'port': 10003, 'device': 'sda1'}) # Degenerate case: devices added but not rebalanced yet self.assertRaises(exceptions.RingValidationError, rb.validate) rb.rebalance() - r = rb.get_ring() - counts = {} - for part2dev_id in r._replica2part2dev_id: - for dev_id in part2dev_id: - counts[dev_id] = counts.get(dev_id, 0) + 1 + counts = self._partition_counts(rb, key='zone') self.assertEquals(counts, {0: 128, 1: 128, 2: 256, 3: 256}) dev_usage, worst = rb.validate() @@ -1524,7 +1681,12 @@ class TestRingBuilder(unittest.TestCase): self.assertTrue(worst is None) dev_usage, worst = rb.validate(stats=True) - self.assertEquals(list(dev_usage), [128, 128, 256, 256]) + self.assertEquals(list(dev_usage), [32, 32, 64, 64, + 32, 32, 32, # added zone0 + 32, 32, 32, # added zone1 + 64, 64, 64, # added zone2 + 64, 64, 64, # added zone3 + ]) self.assertEquals(int(worst), 0) rb.set_dev_weight(2, 0) @@ -1566,12 +1728,70 @@ class TestRingBuilder(unittest.TestCase): # Validate that zero weight devices with no partitions don't count on # the 'worst' value. self.assertNotEquals(rb.validate(stats=True)[1], MAX_BALANCE) - rb.add_dev({'id': 4, 'region': 0, 'zone': 0, 'weight': 0, + rb.add_dev({'id': 16, 'region': 0, 'zone': 0, 'weight': 0, 'ip': '127.0.0.1', 'port': 10004, 'device': 'sda1'}) rb.pretend_min_part_hours_passed() rb.rebalance() self.assertNotEquals(rb.validate(stats=True)[1], MAX_BALANCE) + def test_validate_partial_replica(self): + rb = ring.RingBuilder(8, 2.5, 1) + rb.add_dev({'id': 0, 'region': 0, 'zone': 0, 'weight': 1, + 'ip': '127.0.0.1', 'port': 10000, 'device': 'sda'}) + rb.add_dev({'id': 1, 'region': 0, 'zone': 0, 'weight': 1, + 'ip': '127.0.0.1', 'port': 10001, 'device': 'sdb'}) + rb.add_dev({'id': 2, 'region': 0, 'zone': 0, 'weight': 1, + 'ip': '127.0.0.1', 'port': 10001, 'device': 'sdc'}) + rb.rebalance() + rb.validate() # sanity + self.assertEqual(len(rb._replica2part2dev[0]), 256) + self.assertEqual(len(rb._replica2part2dev[1]), 256) + self.assertEqual(len(rb._replica2part2dev[2]), 128) + + # now swap partial replica part maps + rb._replica2part2dev[1], rb._replica2part2dev[2] = \ + rb._replica2part2dev[2], rb._replica2part2dev[1] + self.assertRaises(exceptions.RingValidationError, rb.validate) + + def test_validate_duplicate_part_assignment(self): + rb = ring.RingBuilder(8, 3, 1) + rb.add_dev({'id': 0, 'region': 0, 'zone': 0, 'weight': 1, + 'ip': '127.0.0.1', 'port': 10000, 'device': 'sda'}) + rb.add_dev({'id': 1, 'region': 0, 'zone': 0, 'weight': 1, + 'ip': '127.0.0.1', 'port': 10001, 'device': 'sdb'}) + rb.add_dev({'id': 2, 'region': 0, 'zone': 0, 'weight': 1, + 'ip': '127.0.0.1', 'port': 10001, 'device': 'sdc'}) + rb.rebalance() + rb.validate() # sanity + # now double up a device assignment + rb._replica2part2dev[1][200] = rb._replica2part2dev[2][200] + + class SubStringMatcher(object): + def __init__(self, substr): + self.substr = substr + + def __eq__(self, other): + return self.substr in other + + with warnings.catch_warnings(): + # we're firing the warning twice in this test and resetwarnings + # doesn't work - https://bugs.python.org/issue4180 + warnings.simplefilter('always') + + # by default things will work, but log a warning + with mock.patch('sys.stderr') as mock_stderr: + rb.validate() + expected = SubStringMatcher( + 'RingValidationWarning: The partition 200 has been assigned ' + 'to duplicate devices') + # ... but the warning is written to stderr + self.assertEqual(mock_stderr.method_calls, + [mock.call.write(expected)]) + # if you make warnings errors it blows up + with warnings.catch_warnings(): + warnings.filterwarnings('error') + self.assertRaises(RingValidationWarning, rb.validate) + def test_get_part_devices(self): rb = ring.RingBuilder(8, 3, 1) self.assertEqual(rb.get_part_devices(0), []) @@ -1580,11 +1800,13 @@ class TestRingBuilder(unittest.TestCase): 'ip': '127.0.0.1', 'port': 10000, 'device': 'sda1'}) rb.add_dev({'id': 1, 'region': 0, 'zone': 1, 'weight': 1, 'ip': '127.0.0.1', 'port': 10001, 'device': 'sda1'}) + rb.add_dev({'id': 2, 'region': 0, 'zone': 2, 'weight': 1, + 'ip': '127.0.0.1', 'port': 10001, 'device': 'sda1'}) rb.rebalance() part_devs = sorted(rb.get_part_devices(0), key=operator.itemgetter('id')) - self.assertEqual(part_devs, [rb.devs[0], rb.devs[1]]) + self.assertEqual(part_devs, [rb.devs[0], rb.devs[1], rb.devs[2]]) def test_get_part_devices_partial_replicas(self): rb = ring.RingBuilder(8, 2.5, 1) @@ -1592,7 +1814,9 @@ class TestRingBuilder(unittest.TestCase): 'ip': '127.0.0.1', 'port': 10000, 'device': 'sda1'}) rb.add_dev({'id': 1, 'region': 0, 'zone': 1, 'weight': 1, 'ip': '127.0.0.1', 'port': 10001, 'device': 'sda1'}) - rb.rebalance() + rb.add_dev({'id': 2, 'region': 0, 'zone': 2, 'weight': 1, + 'ip': '127.0.0.1', 'port': 10001, 'device': 'sda1'}) + rb.rebalance(seed=9) # note: partition 255 will only have 2 replicas part_devs = sorted(rb.get_part_devices(255), @@ -1606,52 +1830,60 @@ class TestRingBuilder(unittest.TestCase): 'ip': '127.0.0.1', 'port': 10000, 'device': 'sda'}) rb.add_dev({'id': 1, 'region': 0, 'zone': 0, 'weight': 1, 'ip': '127.0.0.1', 'port': 10000, 'device': 'sdb'}) - # and a zero weight device - rb.add_dev({'id': 2, 'region': 0, 'zone': 0, 'weight': 0, + rb.add_dev({'id': 2, 'region': 0, 'zone': 0, 'weight': 1, 'ip': '127.0.0.1', 'port': 10000, 'device': 'sdc'}) + # and a zero weight device + rb.add_dev({'id': 3, 'region': 0, 'zone': 0, 'weight': 0, + 'ip': '127.0.0.1', 'port': 10000, 'device': 'sdd'}) rb.rebalance() self.assertEqual(rb.dispersion, 0.0) self.assertEqual(rb._dispersion_graph, { (0,): [0, 0, 0, 256], (0, 0): [0, 0, 0, 256], (0, 0, '127.0.0.1'): [0, 0, 0, 256], - (0, 0, '127.0.0.1', 0): [0, 128, 128, 0], - (0, 0, '127.0.0.1', 1): [0, 128, 128, 0], + (0, 0, '127.0.0.1', 0): [0, 256, 0, 0], + (0, 0, '127.0.0.1', 1): [0, 256, 0, 0], + (0, 0, '127.0.0.1', 2): [0, 256, 0, 0], }) def test_dispersion_with_zero_weight_devices_with_parts(self): rb = ring.RingBuilder(8, 3.0, 1) - # add three devices to a single server in a single zone + # add four devices to a single server in a single zone rb.add_dev({'id': 0, 'region': 0, 'zone': 0, 'weight': 1, 'ip': '127.0.0.1', 'port': 10000, 'device': 'sda'}) rb.add_dev({'id': 1, 'region': 0, 'zone': 0, 'weight': 1, 'ip': '127.0.0.1', 'port': 10000, 'device': 'sdb'}) rb.add_dev({'id': 2, 'region': 0, 'zone': 0, 'weight': 1, 'ip': '127.0.0.1', 'port': 10000, 'device': 'sdc'}) + rb.add_dev({'id': 3, 'region': 0, 'zone': 0, 'weight': 1, + 'ip': '127.0.0.1', 'port': 10000, 'device': 'sdd'}) rb.rebalance(seed=1) self.assertEqual(rb.dispersion, 0.0) self.assertEqual(rb._dispersion_graph, { (0,): [0, 0, 0, 256], (0, 0): [0, 0, 0, 256], (0, 0, '127.0.0.1'): [0, 0, 0, 256], - (0, 0, '127.0.0.1', 0): [0, 256, 0, 0], - (0, 0, '127.0.0.1', 1): [0, 256, 0, 0], - (0, 0, '127.0.0.1', 2): [0, 256, 0, 0], + (0, 0, '127.0.0.1', 0): [64, 192, 0, 0], + (0, 0, '127.0.0.1', 1): [64, 192, 0, 0], + (0, 0, '127.0.0.1', 2): [64, 192, 0, 0], + (0, 0, '127.0.0.1', 3): [64, 192, 0, 0], }) # now mark a device 2 for decom rb.set_dev_weight(2, 0.0) # we'll rebalance but can't move any parts rb.rebalance(seed=1) - # zero weight tier has one copy of *every* part - self.assertEqual(rb.dispersion, 100.0) + # zero weight tier has one copy of 1/4 part-replica + self.assertEqual(rb.dispersion, 75.0) self.assertEqual(rb._dispersion_graph, { (0,): [0, 0, 0, 256], (0, 0): [0, 0, 0, 256], (0, 0, '127.0.0.1'): [0, 0, 0, 256], - (0, 0, '127.0.0.1', 0): [0, 256, 0, 0], - (0, 0, '127.0.0.1', 1): [0, 256, 0, 0], - (0, 0, '127.0.0.1', 2): [0, 256, 0, 0], + (0, 0, '127.0.0.1', 0): [64, 192, 0, 0], + (0, 0, '127.0.0.1', 1): [64, 192, 0, 0], + (0, 0, '127.0.0.1', 2): [64, 192, 0, 0], + (0, 0, '127.0.0.1', 3): [64, 192, 0, 0], }) + # unlock the stuck parts rb.pretend_min_part_hours_passed() rb.rebalance(seed=3) self.assertEqual(rb.dispersion, 0.0) @@ -1659,10 +1891,115 @@ class TestRingBuilder(unittest.TestCase): (0,): [0, 0, 0, 256], (0, 0): [0, 0, 0, 256], (0, 0, '127.0.0.1'): [0, 0, 0, 256], - (0, 0, '127.0.0.1', 0): [0, 128, 128, 0], - (0, 0, '127.0.0.1', 1): [0, 128, 128, 0], + (0, 0, '127.0.0.1', 0): [0, 256, 0, 0], + (0, 0, '127.0.0.1', 1): [0, 256, 0, 0], + (0, 0, '127.0.0.1', 3): [0, 256, 0, 0], }) + def test_effective_overload(self): + rb = ring.RingBuilder(8, 3, 1) + # z0 + rb.add_dev({'id': 0, 'region': 0, 'zone': 0, 'weight': 100, + 'ip': '127.0.0.0', 'port': 10000, 'device': 'sda'}) + rb.add_dev({'id': 1, 'region': 0, 'zone': 0, 'weight': 100, + 'ip': '127.0.0.0', 'port': 10000, 'device': 'sdb'}) + rb.add_dev({'id': 2, 'region': 0, 'zone': 0, 'weight': 100, + 'ip': '127.0.0.0', 'port': 10000, 'device': 'sdb'}) + # z1 + rb.add_dev({'id': 3, 'region': 0, 'zone': 1, 'weight': 100, + 'ip': '127.0.0.1', 'port': 10000, 'device': 'sda'}) + rb.add_dev({'id': 4, 'region': 0, 'zone': 1, 'weight': 100, + 'ip': '127.0.0.1', 'port': 10000, 'device': 'sdb'}) + rb.add_dev({'id': 5, 'region': 0, 'zone': 1, 'weight': 100, + 'ip': '127.0.0.1', 'port': 10000, 'device': 'sdc'}) + # z2 + rb.add_dev({'id': 6, 'region': 0, 'zone': 2, 'weight': 100, + 'ip': '127.0.0.2', 'port': 10000, 'device': 'sda'}) + rb.add_dev({'id': 7, 'region': 0, 'zone': 2, 'weight': 100, + 'ip': '127.0.0.2', 'port': 10000, 'device': 'sdb'}) + + # this ring requires overload + required = rb.get_required_overload() + self.assertGreater(required, 0.1) + + # and we'll use a little bit + rb.set_overload(0.1) + + rb.rebalance(seed=7) + rb.validate() + + # but with-out enough overload we're not dispersed + self.assertGreater(rb.dispersion, 0) + + # add the other dev to z2 + rb.add_dev({'id': 8, 'region': 0, 'zone': 2, 'weight': 100, + 'ip': '127.0.0.2', 'port': 10000, 'device': 'sdc'}) + # but also fail another device in the same! + rb.remove_dev(6) + + # we still require overload + required = rb.get_required_overload() + self.assertGreater(required, 0.1) + + rb.pretend_min_part_hours_passed() + rb.rebalance(seed=7) + rb.validate() + + # ... and without enough we're full dispersed + self.assertGreater(rb.dispersion, 0) + + # ok, let's fix z2's weight for real + rb.add_dev({'id': 6, 'region': 0, 'zone': 2, 'weight': 100, + 'ip': '127.0.0.2', 'port': 10000, 'device': 'sda'}) + + # ... technically, we no longer require overload + self.assertEqual(rb.get_required_overload(), 0.0) + + # so let's rebalance w/o resetting min_part_hours + rb.rebalance(seed=7) + rb.validate() + + # ok, we didn't quite disperse + self.assertGreater(rb.dispersion, 0) + + # ... but let's unlock some parts + rb.pretend_min_part_hours_passed() + rb.rebalance(seed=7) + rb.validate() + + # ... and that got it! + self.assertEqual(rb.dispersion, 0) + + def strawman_test(self): + """ + This test demonstrates a trivial failure of part-replica placement. + + If you turn warnings into errors this will fail. + + i.e. + + export PYTHONWARNINGS=error:::swift.common.ring.builder + + N.B. try not to get *too* hung up on doing something silly to make + this particular case pass w/o warnings - it's trivial to write up a + dozen more. + """ + rb = ring.RingBuilder(8, 3, 1) + # z0 + rb.add_dev({'id': 0, 'region': 0, 'zone': 0, 'weight': 100, + 'ip': '127.0.0.0', 'port': 10000, 'device': 'sda'}) + # z1 + rb.add_dev({'id': 1, 'region': 0, 'zone': 1, 'weight': 100, + 'ip': '127.0.0.1', 'port': 10000, 'device': 'sda'}) + # z2 + rb.add_dev({'id': 2, 'region': 0, 'zone': 2, 'weight': 200, + 'ip': '127.0.0.2', 'port': 10000, 'device': 'sda'}) + + with warnings.catch_warnings(record=True) as w: + rb.rebalance(seed=7) + rb.validate() + self.assertEqual(len(w), 65) + class TestGetRequiredOverload(unittest.TestCase): def assertApproximately(self, a, b, error=1e-6): diff --git a/test/unit/common/ring/test_utils.py b/test/unit/common/ring/test_utils.py index efd073fde5..370a687abd 100644 --- a/test/unit/common/ring/test_utils.py +++ b/test/unit/common/ring/test_utils.py @@ -644,16 +644,40 @@ class TestUtils(unittest.TestCase): rb = ring.RingBuilder(8, 3, 0) rb.add_dev({'id': 0, 'region': 1, 'zone': 0, 'weight': 100, 'ip': '127.0.0.0', 'port': 10000, 'device': 'sda1'}) + rb.add_dev({'id': 3, 'region': 1, 'zone': 0, 'weight': 100, + 'ip': '127.0.0.0', 'port': 10000, 'device': 'sdb1'}) + rb.add_dev({'id': 4, 'region': 1, 'zone': 0, 'weight': 100, + 'ip': '127.0.0.0', 'port': 10000, 'device': 'sdc1'}) + rb.add_dev({'id': 5, 'region': 1, 'zone': 0, 'weight': 100, + 'ip': '127.0.0.0', 'port': 10000, 'device': 'sdd1'}) + rb.add_dev({'id': 1, 'region': 1, 'zone': 1, 'weight': 200, 'ip': '127.0.0.1', 'port': 10001, 'device': 'sda1'}) + rb.add_dev({'id': 6, 'region': 1, 'zone': 1, 'weight': 200, + 'ip': '127.0.0.1', 'port': 10001, 'device': 'sdb1'}) + rb.add_dev({'id': 7, 'region': 1, 'zone': 1, 'weight': 200, + 'ip': '127.0.0.1', 'port': 10001, 'device': 'sdc1'}) + rb.add_dev({'id': 8, 'region': 1, 'zone': 1, 'weight': 200, + 'ip': '127.0.0.1', 'port': 10001, 'device': 'sdd1'}) + rb.add_dev({'id': 2, 'region': 1, 'zone': 1, 'weight': 200, 'ip': '127.0.0.2', 'port': 10002, 'device': 'sda1'}) - rb.rebalance(seed=10) + rb.add_dev({'id': 9, 'region': 1, 'zone': 1, 'weight': 200, + 'ip': '127.0.0.2', 'port': 10002, 'device': 'sdb1'}) + rb.add_dev({'id': 10, 'region': 1, 'zone': 1, 'weight': 200, + 'ip': '127.0.0.2', 'port': 10002, 'device': 'sdc1'}) + rb.add_dev({'id': 11, 'region': 1, 'zone': 1, 'weight': 200, + 'ip': '127.0.0.2', 'port': 10002, 'device': 'sdd1'}) - self.assertEqual(rb.dispersion, 39.84375) + # this ring is pretty volatile and the assertions are pretty brittle + # so we use a specific seed + rb.rebalance(seed=100) + rb.validate() + + self.assertEqual(rb.dispersion, 39.0625) report = dispersion_report(rb) self.assertEqual(report['worst_tier'], 'r1z1') - self.assertEqual(report['max_dispersion'], 39.84375) + self.assertEqual(report['max_dispersion'], 39.0625) def build_tier_report(max_replicas, placed_parts, dispersion, replicas): @@ -669,31 +693,36 @@ class TestUtils(unittest.TestCase): # zone 1 are stored at least twice on the nodes expected = [ ['r1z1', build_tier_report( - 2, 256, 39.84375, [0, 0, 154, 102])], + 2, 256, 39.0625, [0, 0, 156, 100])], ['r1z1-127.0.0.1', build_tier_report( - 1, 256, 19.921875, [0, 205, 51, 0])], - ['r1z1-127.0.0.1/sda1', build_tier_report( - 1, 256, 19.921875, [0, 205, 51, 0])], + 1, 256, 19.53125, [0, 206, 50, 0])], ['r1z1-127.0.0.2', build_tier_report( - 1, 256, 19.921875, [0, 205, 51, 0])], - ['r1z1-127.0.0.2/sda1', build_tier_report( - 1, 256, 19.921875, [0, 205, 51, 0])], + 1, 256, 19.53125, [0, 206, 50, 0])], ] - report = dispersion_report(rb, 'r1z1.*', verbose=True) + report = dispersion_report(rb, 'r1z1[^/]*$', verbose=True) graph = report['graph'] - for i in range(len(expected)): - self.assertEqual(expected[i][0], graph[i][0]) - self.assertEqual(expected[i][1], graph[i][1]) + for i, (expected_key, expected_report) in enumerate(expected): + key, report = graph[i] + self.assertEqual( + (key, report), + (expected_key, expected_report) + ) # overcompensate in r1z0 - rb.add_dev({'id': 3, 'region': 1, 'zone': 0, 'weight': 500, - 'ip': '127.0.0.1', 'port': 10003, 'device': 'sda1'}) + rb.add_dev({'id': 12, 'region': 1, 'zone': 0, 'weight': 500, + 'ip': '127.0.0.3', 'port': 10003, 'device': 'sda1'}) + rb.add_dev({'id': 13, 'region': 1, 'zone': 0, 'weight': 500, + 'ip': '127.0.0.3', 'port': 10003, 'device': 'sdb1'}) + rb.add_dev({'id': 14, 'region': 1, 'zone': 0, 'weight': 500, + 'ip': '127.0.0.3', 'port': 10003, 'device': 'sdc1'}) + rb.add_dev({'id': 15, 'region': 1, 'zone': 0, 'weight': 500, + 'ip': '127.0.0.3', 'port': 10003, 'device': 'sdd1'}) rb.rebalance(seed=10) report = dispersion_report(rb) - self.assertEqual(rb.dispersion, 40.234375) - self.assertEqual(report['worst_tier'], 'r1z0-127.0.0.1') - self.assertEqual(report['max_dispersion'], 30.078125) + self.assertEqual(rb.dispersion, 44.53125) + self.assertEqual(report['worst_tier'], 'r1z0-127.0.0.3') + self.assertEqual(report['max_dispersion'], 32.520325203252035) def test_parse_address_old_format(self): # Test old format