Fix ring-builder crash
If you had a partition with 1 replica on a deleted device, 1 on a zero-weight device, and 1 on a normal device, then rebalancing would crash. This was because the ring builder was memoizing tiers_for_dev() for all *available* devices (i.e. weight > 0 and not deleted), but depended on it being present for all devices with partitions still on them. Since the builder moved the replica from the deleted device, it left the one on the zero-weight device alone, so you had an unavailable device with a partition replica still on it, triggering the crash. Now we go ahead and memoize tiers_for_dev() for *all* devices, not just available ones, thereby fixing the crash. Change-Id: Ie0b58b65e0353732cf785ab772e95e699f3a5b5d
This commit is contained in:
parent
86668aa1ac
commit
e7fb089f59
@ -754,6 +754,7 @@ class RingBuilder(object):
|
||||
"""
|
||||
for dev in self._iter_devs():
|
||||
dev['sort_key'] = self._sort_key_for(dev)
|
||||
dev['tiers'] = tiers_for_dev(dev)
|
||||
|
||||
available_devs = \
|
||||
sorted((d for d in self._iter_devs() if d['weight']),
|
||||
@ -764,7 +765,6 @@ class RingBuilder(object):
|
||||
tier2dev_sort_key = defaultdict(list)
|
||||
max_tier_depth = 0
|
||||
for dev in available_devs:
|
||||
dev['tiers'] = tiers_for_dev(dev)
|
||||
for tier in dev['tiers']:
|
||||
tier2devs[tier].append(dev) # <-- starts out sorted!
|
||||
tier2dev_sort_key[tier].append(dev['sort_key'])
|
||||
@ -889,7 +889,7 @@ class RingBuilder(object):
|
||||
# Just to save memory and keep from accidental reuse.
|
||||
for dev in self._iter_devs():
|
||||
del dev['sort_key']
|
||||
dev.pop('tiers', None) # May be absent for devices w/o weight
|
||||
del dev['tiers']
|
||||
|
||||
def _sort_key_for(self, dev):
|
||||
return (dev['parts_wanted'], random.randint(0, 0xFFFF), dev['id'])
|
||||
|
@ -107,6 +107,33 @@ class TestRingBuilder(unittest.TestCase):
|
||||
self.assertNotEquals(r0.to_dict(), r1.to_dict())
|
||||
self.assertEquals(r1.to_dict(), r2.to_dict())
|
||||
|
||||
def test_rebalance_part_on_deleted_other_part_on_drained(self):
|
||||
rb = ring.RingBuilder(8, 3, 1)
|
||||
rb.add_dev({'id': 0, 'region': 1, 'zone': 1, 'weight': 1,
|
||||
'ip': '127.0.0.1', 'port': 10000, 'device': 'sda1'})
|
||||
rb.add_dev({'id': 1, 'region': 1, 'zone': 1, 'weight': 1,
|
||||
'ip': '127.0.0.1', 'port': 10001, 'device': 'sda1'})
|
||||
rb.add_dev({'id': 2, 'region': 1, 'zone': 1, 'weight': 1,
|
||||
'ip': '127.0.0.1', 'port': 10002, 'device': 'sda1'})
|
||||
rb.add_dev({'id': 3, 'region': 1, 'zone': 1, 'weight': 1,
|
||||
'ip': '127.0.0.1', 'port': 10003, 'device': 'sda1'})
|
||||
rb.add_dev({'id': 4, 'region': 1, 'zone': 1, 'weight': 1,
|
||||
'ip': '127.0.0.1', 'port': 10004, 'device': 'sda1'})
|
||||
rb.add_dev({'id': 5, 'region': 1, 'zone': 1, 'weight': 1,
|
||||
'ip': '127.0.0.1', 'port': 10005, 'device': 'sda1'})
|
||||
|
||||
rb.rebalance(seed=1)
|
||||
# We want a partition where 1 replica is on a removed device, 1
|
||||
# replica is on a 0-weight device, and 1 on a normal device. To
|
||||
# guarantee we have one, we see where partition 123 is, then
|
||||
# manipulate its devices accordingly.
|
||||
zero_weight_dev_id = rb._replica2part2dev[1][123]
|
||||
delete_dev_id = rb._replica2part2dev[2][123]
|
||||
|
||||
rb.set_dev_weight(zero_weight_dev_id, 0.0)
|
||||
rb.remove_dev(delete_dev_id)
|
||||
rb.rebalance()
|
||||
|
||||
def test_set_replicas(self):
|
||||
rb = ring.RingBuilder(8, 3.2, 1)
|
||||
rb.devs_changed = False
|
||||
@ -125,7 +152,7 @@ class TestRingBuilder(unittest.TestCase):
|
||||
self.assertRaises(exceptions.DuplicateDeviceError, rb.add_dev, dev)
|
||||
self.assertEqual(dev_id, 0)
|
||||
rb = ring.RingBuilder(8, 3, 1)
|
||||
#test add new dev with no id
|
||||
# test add new dev with no id
|
||||
dev_id = rb.add_dev({'zone': 0, 'region': 1, 'weight': 1,
|
||||
'ip': '127.0.0.1', 'port': 6000})
|
||||
self.assertEquals(rb.devs[0]['id'], 0)
|
||||
|
Loading…
x
Reference in New Issue
Block a user