Merge "Use just IP, not port, when determining partition placement"

This commit is contained in:
Jenkins 2015-06-18 18:10:17 +00:00 committed by Gerrit Code Review
commit 65fce49b3b
5 changed files with 157 additions and 124 deletions

View File

@ -179,18 +179,17 @@ class Ring(object):
# doing it on every call to get_more_nodes().
regions = set()
zones = set()
ip_ports = set()
ips = set()
self._num_devs = 0
for dev in self._devs:
if dev:
regions.add(dev['region'])
zones.add((dev['region'], dev['zone']))
ip_ports.add((dev['region'], dev['zone'],
dev['ip'], dev['port']))
ips.add((dev['region'], dev['zone'], dev['ip']))
self._num_devs += 1
self._num_regions = len(regions)
self._num_zones = len(zones)
self._num_ip_ports = len(ip_ports)
self._num_ips = len(ips)
def _rebuild_tier_data(self):
self.tier2devs = defaultdict(list)
@ -329,8 +328,8 @@ class Ring(object):
used = set(d['id'] for d in primary_nodes)
same_regions = set(d['region'] for d in primary_nodes)
same_zones = set((d['region'], d['zone']) for d in primary_nodes)
same_ip_ports = set((d['region'], d['zone'], d['ip'], d['port'])
for d in primary_nodes)
same_ips = set(
(d['region'], d['zone'], d['ip']) for d in primary_nodes)
parts = len(self._replica2part2dev_id[0])
start = struct.unpack_from(
@ -356,9 +355,9 @@ class Ring(object):
used.add(dev_id)
same_regions.add(region)
zone = dev['zone']
ip_port = (region, zone, dev['ip'], dev['port'])
ip = (region, zone, dev['ip'])
same_zones.add((region, zone))
same_ip_ports.add(ip_port)
same_ips.add(ip)
if len(same_regions) == self._num_regions:
hit_all_regions = True
break
@ -380,17 +379,17 @@ class Ring(object):
yield dev
used.add(dev_id)
same_zones.add(zone)
ip_port = zone + (dev['ip'], dev['port'])
same_ip_ports.add(ip_port)
ip = zone + (dev['ip'],)
same_ips.add(ip)
if len(same_zones) == self._num_zones:
hit_all_zones = True
break
hit_all_ip_ports = len(same_ip_ports) == self._num_ip_ports
hit_all_ips = len(same_ips) == self._num_ips
for handoff_part in chain(xrange(start, parts, inc),
xrange(inc - ((parts - start) % inc),
start, inc)):
if hit_all_ip_ports:
if hit_all_ips:
# We've exhausted the pool of unused backends, so stop
# looking.
break
@ -398,14 +397,13 @@ class Ring(object):
if handoff_part < len(part2dev_id):
dev_id = part2dev_id[handoff_part]
dev = self._devs[dev_id]
ip_port = (dev['region'], dev['zone'],
dev['ip'], dev['port'])
if dev_id not in used and ip_port not in same_ip_ports:
ip = (dev['region'], dev['zone'], dev['ip'])
if dev_id not in used and ip not in same_ips:
yield dev
used.add(dev_id)
same_ip_ports.add(ip_port)
if len(same_ip_ports) == self._num_ip_ports:
hit_all_ip_ports = True
same_ips.add(ip)
if len(same_ips) == self._num_ips:
hit_all_ips = True
break
hit_all_devs = len(used) == self._num_devs

View File

@ -29,7 +29,7 @@ def tiers_for_dev(dev):
"""
t1 = dev['region']
t2 = dev['zone']
t3 = "{ip}:{port}".format(ip=dev.get('ip'), port=dev.get('port'))
t3 = dev['ip']
t4 = dev['id']
return ((t1,),
@ -48,40 +48,40 @@ def build_tier_tree(devices):
Example:
region 1 -+---- zone 1 -+---- 192.168.101.1:6000 -+---- device id 0
| | |
| | +---- device id 1
| | |
| | +---- device id 2
region 1 -+---- zone 1 -+---- 192.168.101.1 -+---- device id 0
| | |
| | +---- device id 1
| | |
| | +---- device id 2
| |
| +---- 192.168.101.2:6000 -+---- device id 3
| |
| +---- device id 4
| |
| +---- device id 5
| +---- 192.168.101.2 -+---- device id 3
| |
| +---- device id 4
| |
| +---- device id 5
|
+---- zone 2 -+---- 192.168.102.1:6000 -+---- device id 6
| |
| +---- device id 7
| |
| +---- device id 8
+---- zone 2 -+---- 192.168.102.1 -+---- device id 6
| |
| +---- device id 7
| |
| +---- device id 8
|
+---- 192.168.102.2:6000 -+---- device id 9
|
+---- device id 10
+---- 192.168.102.2 -+---- device id 9
|
+---- device id 10
region 2 -+---- zone 1 -+---- 192.168.201.1:6000 -+---- device id 12
| |
| +---- device id 13
| |
| +---- device id 14
region 2 -+---- zone 1 -+---- 192.168.201.1 -+---- device id 12
| |
| +---- device id 13
| |
| +---- device id 14
|
+---- 192.168.201.2:6000 -+---- device id 15
|
+---- device id 16
|
+---- device id 17
+---- 192.168.201.2 -+---- device id 15
|
+---- device id 16
|
+---- device id 17
The tier tree would look like:
{
@ -90,30 +90,30 @@ def build_tier_tree(devices):
(1,): [(1, 1), (1, 2)],
(2,): [(2, 1)],
(1, 1): [(1, 1, 192.168.101.1:6000),
(1, 1, 192.168.101.2:6000)],
(1, 2): [(1, 2, 192.168.102.1:6000),
(1, 2, 192.168.102.2:6000)],
(2, 1): [(2, 1, 192.168.201.1:6000),
(2, 1, 192.168.201.2:6000)],
(1, 1): [(1, 1, 192.168.101.1),
(1, 1, 192.168.101.2)],
(1, 2): [(1, 2, 192.168.102.1),
(1, 2, 192.168.102.2)],
(2, 1): [(2, 1, 192.168.201.1),
(2, 1, 192.168.201.2)],
(1, 1, 192.168.101.1:6000): [(1, 1, 192.168.101.1:6000, 0),
(1, 1, 192.168.101.1:6000, 1),
(1, 1, 192.168.101.1:6000, 2)],
(1, 1, 192.168.101.2:6000): [(1, 1, 192.168.101.2:6000, 3),
(1, 1, 192.168.101.2:6000, 4),
(1, 1, 192.168.101.2:6000, 5)],
(1, 2, 192.168.102.1:6000): [(1, 2, 192.168.102.1:6000, 6),
(1, 2, 192.168.102.1:6000, 7),
(1, 2, 192.168.102.1:6000, 8)],
(1, 2, 192.168.102.2:6000): [(1, 2, 192.168.102.2:6000, 9),
(1, 2, 192.168.102.2:6000, 10)],
(2, 1, 192.168.201.1:6000): [(2, 1, 192.168.201.1:6000, 12),
(2, 1, 192.168.201.1:6000, 13),
(2, 1, 192.168.201.1:6000, 14)],
(2, 1, 192.168.201.2:6000): [(2, 1, 192.168.201.2:6000, 15),
(2, 1, 192.168.201.2:6000, 16),
(2, 1, 192.168.201.2:6000, 17)],
(1, 1, 192.168.101.1): [(1, 1, 192.168.101.1, 0),
(1, 1, 192.168.101.1, 1),
(1, 1, 192.168.101.1, 2)],
(1, 1, 192.168.101.2): [(1, 1, 192.168.101.2, 3),
(1, 1, 192.168.101.2, 4),
(1, 1, 192.168.101.2, 5)],
(1, 2, 192.168.102.1): [(1, 2, 192.168.102.1, 6),
(1, 2, 192.168.102.1, 7),
(1, 2, 192.168.102.1, 8)],
(1, 2, 192.168.102.2): [(1, 2, 192.168.102.2, 9),
(1, 2, 192.168.102.2, 10)],
(2, 1, 192.168.201.1): [(2, 1, 192.168.201.1, 12),
(2, 1, 192.168.201.1, 13),
(2, 1, 192.168.201.1, 14)],
(2, 1, 192.168.201.2): [(2, 1, 192.168.201.2, 15),
(2, 1, 192.168.201.2, 16),
(2, 1, 192.168.201.2, 17)],
}
:devices: device dicts from which to generate the tree

View File

@ -1178,6 +1178,46 @@ class TestRingBuilder(unittest.TestCase):
9: 64,
})
def test_server_per_port(self):
# 3 servers, 3 disks each, with each disk on its own port
rb = ring.RingBuilder(8, 3, 1)
rb.add_dev({'id': 0, 'region': 0, 'region': 0, 'zone': 0, 'weight': 1,
'ip': '10.0.0.1', 'port': 10000, 'device': 'sdx'})
rb.add_dev({'id': 1, 'region': 0, 'region': 0, 'zone': 0, 'weight': 1,
'ip': '10.0.0.1', 'port': 10001, 'device': 'sdy'})
rb.add_dev({'id': 3, 'region': 0, 'region': 0, 'zone': 0, 'weight': 1,
'ip': '10.0.0.2', 'port': 10000, 'device': 'sdx'})
rb.add_dev({'id': 4, 'region': 0, 'region': 0, 'zone': 0, 'weight': 1,
'ip': '10.0.0.2', 'port': 10001, 'device': 'sdy'})
rb.add_dev({'id': 6, 'region': 0, 'region': 0, 'zone': 0, 'weight': 1,
'ip': '10.0.0.3', 'port': 10000, 'device': 'sdx'})
rb.add_dev({'id': 7, 'region': 0, 'region': 0, 'zone': 0, 'weight': 1,
'ip': '10.0.0.3', 'port': 10001, 'device': 'sdy'})
rb.rebalance(seed=1)
rb.add_dev({'id': 2, 'region': 0, 'region': 0, 'zone': 0, 'weight': 1,
'ip': '10.0.0.1', 'port': 10002, 'device': 'sdz'})
rb.add_dev({'id': 5, 'region': 0, 'region': 0, 'zone': 0, 'weight': 1,
'ip': '10.0.0.2', 'port': 10002, 'device': 'sdz'})
rb.add_dev({'id': 8, 'region': 0, 'region': 0, 'zone': 0, 'weight': 1,
'ip': '10.0.0.3', 'port': 10002, 'device': 'sdz'})
rb.pretend_min_part_hours_passed()
rb.rebalance(seed=1)
poorly_dispersed = []
for part in range(rb.parts):
on_nodes = set()
for replica in range(rb.replicas):
dev_id = rb._replica2part2dev[replica][part]
on_nodes.add(rb.devs[dev_id]['ip'])
if len(on_nodes) < rb.replicas:
poorly_dispersed.append(part)
self.assertEqual(poorly_dispersed, [])
def test_load(self):
rb = ring.RingBuilder(8, 3, 1)
devs = [{'id': 0, 'region': 0, 'zone': 0, 'weight': 1,
@ -1503,9 +1543,9 @@ class TestRingBuilder(unittest.TestCase):
self.assertEqual(rb._dispersion_graph, {
(0,): [0, 0, 0, 256],
(0, 0): [0, 0, 0, 256],
(0, 0, '127.0.0.1:10000'): [0, 0, 0, 256],
(0, 0, '127.0.0.1:10000', 0): [0, 128, 128, 0],
(0, 0, '127.0.0.1:10000', 1): [0, 128, 128, 0],
(0, 0, '127.0.0.1'): [0, 0, 0, 256],
(0, 0, '127.0.0.1', 0): [0, 128, 128, 0],
(0, 0, '127.0.0.1', 1): [0, 128, 128, 0],
})
def test_dispersion_with_zero_weight_devices_with_parts(self):
@ -1522,10 +1562,10 @@ class TestRingBuilder(unittest.TestCase):
self.assertEqual(rb._dispersion_graph, {
(0,): [0, 0, 0, 256],
(0, 0): [0, 0, 0, 256],
(0, 0, '127.0.0.1:10000'): [0, 0, 0, 256],
(0, 0, '127.0.0.1:10000', 0): [0, 256, 0, 0],
(0, 0, '127.0.0.1:10000', 1): [0, 256, 0, 0],
(0, 0, '127.0.0.1:10000', 2): [0, 256, 0, 0],
(0, 0, '127.0.0.1'): [0, 0, 0, 256],
(0, 0, '127.0.0.1', 0): [0, 256, 0, 0],
(0, 0, '127.0.0.1', 1): [0, 256, 0, 0],
(0, 0, '127.0.0.1', 2): [0, 256, 0, 0],
})
# now mark a device 2 for decom
rb.set_dev_weight(2, 0.0)
@ -1536,10 +1576,10 @@ class TestRingBuilder(unittest.TestCase):
self.assertEqual(rb._dispersion_graph, {
(0,): [0, 0, 0, 256],
(0, 0): [0, 0, 0, 256],
(0, 0, '127.0.0.1:10000'): [0, 0, 0, 256],
(0, 0, '127.0.0.1:10000', 0): [0, 256, 0, 0],
(0, 0, '127.0.0.1:10000', 1): [0, 256, 0, 0],
(0, 0, '127.0.0.1:10000', 2): [0, 256, 0, 0],
(0, 0, '127.0.0.1'): [0, 0, 0, 256],
(0, 0, '127.0.0.1', 0): [0, 256, 0, 0],
(0, 0, '127.0.0.1', 1): [0, 256, 0, 0],
(0, 0, '127.0.0.1', 2): [0, 256, 0, 0],
})
rb.pretend_min_part_hours_passed()
rb.rebalance(seed=3)
@ -1547,9 +1587,9 @@ class TestRingBuilder(unittest.TestCase):
self.assertEqual(rb._dispersion_graph, {
(0,): [0, 0, 0, 256],
(0, 0): [0, 0, 0, 256],
(0, 0, '127.0.0.1:10000'): [0, 0, 0, 256],
(0, 0, '127.0.0.1:10000', 0): [0, 128, 128, 0],
(0, 0, '127.0.0.1:10000', 1): [0, 128, 128, 0],
(0, 0, '127.0.0.1'): [0, 0, 0, 256],
(0, 0, '127.0.0.1', 0): [0, 128, 128, 0],
(0, 0, '127.0.0.1', 1): [0, 128, 128, 0],
})

View File

@ -480,7 +480,8 @@ class TestRing(TestRingBase):
for device in xrange(1, 4):
rb.add_dev({'id': next_dev_id,
'ip': '1.2.%d.%d' % (zone, server),
'port': 1234, 'zone': zone, 'region': 0,
'port': 1234 + device,
'zone': zone, 'region': 0,
'weight': 1.0})
next_dev_id += 1
rb.rebalance(seed=1)

View File

@ -70,8 +70,8 @@ class TestUtils(unittest.TestCase):
tiers_for_dev(self.test_dev),
((1,),
(1, 1),
(1, 1, '192.168.1.1:6000'),
(1, 1, '192.168.1.1:6000', 0)))
(1, 1, '192.168.1.1'),
(1, 1, '192.168.1.1', 0)))
def test_build_tier_tree(self):
ret = build_tier_tree(self.test_devs)
@ -79,27 +79,27 @@ class TestUtils(unittest.TestCase):
self.assertEqual(ret[()], set([(1,)]))
self.assertEqual(ret[(1,)], set([(1, 1), (1, 2)]))
self.assertEqual(ret[(1, 1)],
set([(1, 1, '192.168.1.2:6000'),
(1, 1, '192.168.1.1:6000')]))
set([(1, 1, '192.168.1.2'),
(1, 1, '192.168.1.1')]))
self.assertEqual(ret[(1, 2)],
set([(1, 2, '192.168.2.2:6000'),
(1, 2, '192.168.2.1:6000')]))
self.assertEqual(ret[(1, 1, '192.168.1.1:6000')],
set([(1, 1, '192.168.1.1:6000', 0),
(1, 1, '192.168.1.1:6000', 1),
(1, 1, '192.168.1.1:6000', 2)]))
self.assertEqual(ret[(1, 1, '192.168.1.2:6000')],
set([(1, 1, '192.168.1.2:6000', 3),
(1, 1, '192.168.1.2:6000', 4),
(1, 1, '192.168.1.2:6000', 5)]))
self.assertEqual(ret[(1, 2, '192.168.2.1:6000')],
set([(1, 2, '192.168.2.1:6000', 6),
(1, 2, '192.168.2.1:6000', 7),
(1, 2, '192.168.2.1:6000', 8)]))
self.assertEqual(ret[(1, 2, '192.168.2.2:6000')],
set([(1, 2, '192.168.2.2:6000', 9),
(1, 2, '192.168.2.2:6000', 10),
(1, 2, '192.168.2.2:6000', 11)]))
set([(1, 2, '192.168.2.2'),
(1, 2, '192.168.2.1')]))
self.assertEqual(ret[(1, 1, '192.168.1.1')],
set([(1, 1, '192.168.1.1', 0),
(1, 1, '192.168.1.1', 1),
(1, 1, '192.168.1.1', 2)]))
self.assertEqual(ret[(1, 1, '192.168.1.2')],
set([(1, 1, '192.168.1.2', 3),
(1, 1, '192.168.1.2', 4),
(1, 1, '192.168.1.2', 5)]))
self.assertEqual(ret[(1, 2, '192.168.2.1')],
set([(1, 2, '192.168.2.1', 6),
(1, 2, '192.168.2.1', 7),
(1, 2, '192.168.2.1', 8)]))
self.assertEqual(ret[(1, 2, '192.168.2.2')],
set([(1, 2, '192.168.2.2', 9),
(1, 2, '192.168.2.2', 10),
(1, 2, '192.168.2.2', 11)]))
def test_is_valid_ip(self):
self.assertTrue(is_valid_ip("127.0.0.1"))
@ -623,11 +623,11 @@ class TestUtils(unittest.TestCase):
def test_dispersion_report(self):
rb = ring.RingBuilder(8, 3, 0)
rb.add_dev({'id': 0, 'region': 1, 'zone': 0, 'weight': 100,
'ip': '127.0.0.1', 'port': 10000, 'device': 'sda1'})
'ip': '127.0.0.0', 'port': 10000, 'device': 'sda1'})
rb.add_dev({'id': 1, 'region': 1, 'zone': 1, 'weight': 200,
'ip': '127.0.0.1', 'port': 10001, 'device': 'sda1'})
rb.add_dev({'id': 2, 'region': 1, 'zone': 1, 'weight': 200,
'ip': '127.0.0.1', 'port': 10002, 'device': 'sda1'})
'ip': '127.0.0.2', 'port': 10002, 'device': 'sda1'})
rb.rebalance(seed=10)
self.assertEqual(rb.dispersion, 39.84375)
@ -635,16 +635,6 @@ class TestUtils(unittest.TestCase):
self.assertEqual(report['worst_tier'], 'r1z1')
self.assertEqual(report['max_dispersion'], 39.84375)
# Each node should store 256 partitions to avoid multiple replicas
# 2/5 of total weight * 768 ~= 307 -> 51 partitions on each node in
# zone 1 are stored at least twice on the nodes
expected = [
['r1z1', 2, '0', '154', '102'],
['r1z1-127.0.0.1:10001', 1, '205', '51', '0'],
['r1z1-127.0.0.1:10001/sda1', 1, '205', '51', '0'],
['r1z1-127.0.0.1:10002', 1, '205', '51', '0'],
['r1z1-127.0.0.1:10002/sda1', 1, '205', '51', '0']]
def build_tier_report(max_replicas, placed_parts, dispersion,
replicas):
return {
@ -653,16 +643,20 @@ class TestUtils(unittest.TestCase):
'dispersion': dispersion,
'replicas': replicas,
}
# Each node should store 256 partitions to avoid multiple replicas
# 2/5 of total weight * 768 ~= 307 -> 51 partitions on each node in
# zone 1 are stored at least twice on the nodes
expected = [
['r1z1', build_tier_report(
2, 256, 39.84375, [0, 0, 154, 102])],
['r1z1-127.0.0.1:10001', build_tier_report(
['r1z1-127.0.0.1', build_tier_report(
1, 256, 19.921875, [0, 205, 51, 0])],
['r1z1-127.0.0.1:10001/sda1', build_tier_report(
['r1z1-127.0.0.1/sda1', build_tier_report(
1, 256, 19.921875, [0, 205, 51, 0])],
['r1z1-127.0.0.1:10002', build_tier_report(
['r1z1-127.0.0.2', build_tier_report(
1, 256, 19.921875, [0, 205, 51, 0])],
['r1z1-127.0.0.1:10002/sda1', build_tier_report(
['r1z1-127.0.0.2/sda1', build_tier_report(
1, 256, 19.921875, [0, 205, 51, 0])],
]
report = dispersion_report(rb, 'r1z1.*', verbose=True)
@ -678,7 +672,7 @@ class TestUtils(unittest.TestCase):
report = dispersion_report(rb)
self.assertEqual(rb.dispersion, 40.234375)
self.assertEqual(report['worst_tier'], 'r1z0-127.0.0.1:10003')
self.assertEqual(report['worst_tier'], 'r1z0-127.0.0.1')
self.assertEqual(report['max_dispersion'], 30.078125)