swift-manage-shard-ranges repair: check for parent-child overlaps.
Stuck shard ranges have been seen in the production, root cause has been traced back to that s-m-s-r failed to detect parent-child relationship in overlaps and it either shrinked child shard ranges into parents or the other way around. A patch has been added to check minimum age before s-m-s-r performs repair, which will most likely prevent this from happening again, but we also need to check for parent-child relationship in overlaps explicitly during repairs. This patch will do that and remove parent or child shard ranges from doners, and prevent s-m-s-r from shrinking them into acceptor shard ranges. Drive-by 1: fixup gap repair probe test. The probe test is no longer appropriate because we're no longer allowed to repair parent-child overlaps, so replace the test with a manually created gap. Drive-by 2: address probe test TODOs. The commented assertion would fail because the node filtering comparison failed to account for the same node having different indexes when generated for the root versus the shard. Adding a new iterable function filter_nodes makes the node filtering behave as expected. Co-Authored-By: Alistair Coles <alistairncoles@gmail.com> Co-Authored-By: Clay Gerrard <clay.gerrard@gmail.com> Change-Id: Iaa89e94a2746ba939fb62449e24bdab9666d7bab
This commit is contained in:
parent
c9b898972f
commit
a53270a15a
@ -564,9 +564,25 @@ def compact_shard_ranges(broker, args):
|
||||
return EXIT_SUCCESS
|
||||
|
||||
|
||||
def _remove_young_overlapping_ranges(acceptor_path, overlapping_donors, args):
|
||||
# For range shard repair subcommand, check possible parent-children
|
||||
# relationship between acceptors and donors.
|
||||
def _remove_illegal_overlapping_donors(
|
||||
acceptor_path, overlapping_donors, args):
|
||||
# Check parent-children relationship in overlaps between acceptors and
|
||||
# donors, remove any overlapping parent or child shard range from donors.
|
||||
# Note: we can use set() here, since shard range object is hashed by
|
||||
# id and all shard ranges in overlapping_donors are unique already.
|
||||
parent_child_donors = set()
|
||||
for acceptor in acceptor_path:
|
||||
parent_child_donors.update(
|
||||
[donor for donor in overlapping_donors
|
||||
if acceptor.is_child_of(donor) or donor.is_child_of(acceptor)])
|
||||
if parent_child_donors:
|
||||
overlapping_donors = ShardRangeList(
|
||||
[sr for sr in overlapping_donors
|
||||
if sr not in parent_child_donors])
|
||||
print('%d donor shards ignored due to parent-child relationship '
|
||||
'checks' % len(parent_child_donors))
|
||||
|
||||
# Check minimum age requirement in overlaps between acceptors and donors.
|
||||
if args.min_shard_age == 0:
|
||||
return acceptor_path, overlapping_donors
|
||||
ts_now = Timestamp.now()
|
||||
@ -583,18 +599,18 @@ def _remove_young_overlapping_ranges(acceptor_path, overlapping_donors, args):
|
||||
return acceptor_path, None
|
||||
# Remove those overlapping donors whose overlapping acceptors were created
|
||||
# within age limit.
|
||||
possible_parent_donors = set()
|
||||
donors_with_young_overlap_acceptor = set()
|
||||
for acceptor_sr in acceptor_path:
|
||||
if float(acceptor_sr.timestamp) + args.min_shard_age < float(ts_now):
|
||||
continue
|
||||
possible_parent_donors.update([sr for sr in qualified_donors
|
||||
if acceptor_sr.overlaps(sr)])
|
||||
if possible_parent_donors:
|
||||
donors_with_young_overlap_acceptor.update(
|
||||
[sr for sr in qualified_donors if acceptor_sr.overlaps(sr)])
|
||||
if donors_with_young_overlap_acceptor:
|
||||
qualified_donors = ShardRangeList(
|
||||
[sr for sr in qualified_donors
|
||||
if sr not in possible_parent_donors])
|
||||
if sr not in donors_with_young_overlap_acceptor])
|
||||
print('%d donor shards ignored due to existence of overlapping young '
|
||||
'acceptors' % len(possible_parent_donors))
|
||||
'acceptors' % len(donors_with_young_overlap_acceptor))
|
||||
|
||||
return acceptor_path, qualified_donors
|
||||
|
||||
@ -649,7 +665,7 @@ def _find_overlapping_donors(shard_ranges, own_sr, args):
|
||||
'Isolated cleaved and/or active shard ranges in donor ranges',
|
||||
acceptor_path, overlapping_donors)
|
||||
|
||||
return _remove_young_overlapping_ranges(
|
||||
return _remove_illegal_overlapping_donors(
|
||||
acceptor_path, overlapping_donors, args)
|
||||
|
||||
|
||||
|
@ -5386,7 +5386,7 @@ class ShardRange(object):
|
||||
Test if this shard range is a child of another shard range. The
|
||||
parent-child relationship is inferred from the names of the shard
|
||||
ranges. This method is limited to work only within the scope of the
|
||||
same account.
|
||||
same user-facing account (with and without shard prefix).
|
||||
|
||||
:param parent: an instance of ``ShardRange``.
|
||||
:return: True if ``parent`` is the parent of this shard range, False
|
||||
|
@ -18,6 +18,7 @@ from __future__ import print_function
|
||||
import errno
|
||||
import gc
|
||||
import json
|
||||
import mock
|
||||
import os
|
||||
from subprocess import Popen, PIPE
|
||||
import sys
|
||||
@ -345,6 +346,27 @@ class Body(object):
|
||||
next = __next__
|
||||
|
||||
|
||||
def exclude_nodes(nodes, *excludes):
|
||||
"""
|
||||
Iterate over ``nodes`` yielding only those not in ``excludes``.
|
||||
|
||||
The index key of the node dicts is ignored when matching nodes against the
|
||||
``excludes`` nodes. Index is not a fundamental property of a node but a
|
||||
variable annotation added by the Ring depending upon the partition for
|
||||
which the nodes were generated.
|
||||
|
||||
:param nodes: an iterable of node dicts.
|
||||
:param *excludes: one or more node dicts that should not be yielded.
|
||||
:return: yields node dicts.
|
||||
"""
|
||||
for node in nodes:
|
||||
match_node = {k: mock.ANY if k == 'index' else v
|
||||
for k, v in node.items()}
|
||||
if any(exclude == match_node for exclude in excludes):
|
||||
continue
|
||||
yield node
|
||||
|
||||
|
||||
class ProbeTest(unittest.TestCase):
|
||||
"""
|
||||
Don't instantiate this directly, use a child class instead.
|
||||
|
@ -41,7 +41,7 @@ from test import annotate_failure
|
||||
from test.probe import PROXY_BASE_URL
|
||||
from test.probe.brain import BrainSplitter
|
||||
from test.probe.common import ReplProbeTest, get_server_number, \
|
||||
wait_for_server_to_hangup, ENABLED_POLICIES
|
||||
wait_for_server_to_hangup, ENABLED_POLICIES, exclude_nodes
|
||||
import mock
|
||||
|
||||
try:
|
||||
@ -3427,9 +3427,9 @@ class TestManagedContainerSharding(BaseTestContainerSharding):
|
||||
|
||||
self.assert_container_listing(expected_obj_names)
|
||||
|
||||
def test_manage_shard_ranges_repair_root_shrinking_gaps(self):
|
||||
# provoke shrinking/shrunk gaps by prematurely repairing a transient
|
||||
# overlap in root container; repair the gap.
|
||||
def test_manage_shard_ranges_repair_parent_child_ranges(self):
|
||||
# Test repairing a transient parent-child shard range overlap in the
|
||||
# root container, expect no repairs to be done.
|
||||
# note: be careful not to add a container listing to this test which
|
||||
# would get shard ranges into memcache
|
||||
obj_names = self._make_object_names(4)
|
||||
@ -3477,19 +3477,17 @@ class TestManagedContainerSharding(BaseTestContainerSharding):
|
||||
shard_ranges[0].account, shard_ranges[0].container)
|
||||
self.container_replicators.once(
|
||||
additional_args='--partitions=%s' % shard_part)
|
||||
# TODO: get this assertion working (node filtering wonky??)
|
||||
# for node in [n for n in shard_nodes if n != self.brain.nodes[0]]:
|
||||
# self.assert_container_state(
|
||||
# node, 'unsharded', 2, account=shard_ranges[0].account,
|
||||
# container=shard_ranges[0].container, part=shard_part)
|
||||
for node in exclude_nodes(shard_nodes, self.brain.nodes[0]):
|
||||
self.assert_container_state(
|
||||
node, 'unsharded', 2, account=shard_ranges[0].account,
|
||||
container=shard_ranges[0].container, part=shard_part)
|
||||
self.sharders_once(additional_args='--partitions=%s' % shard_part)
|
||||
# get shards to update state from parent...
|
||||
self.sharders_once()
|
||||
# TODO: get this assertion working (node filtering wonky??)
|
||||
# for node in [n for n in shard_nodes if n != self.brain.nodes[0]]:
|
||||
# self.assert_container_state(
|
||||
# node, 'sharded', 2, account=shard_ranges[0].account,
|
||||
# container=shard_ranges[0].container, part=shard_part)
|
||||
for node in exclude_nodes(shard_nodes, self.brain.nodes[0]):
|
||||
self.assert_container_state(
|
||||
node, 'sharded', 2, account=shard_ranges[0].account,
|
||||
container=shard_ranges[0].container, part=shard_part)
|
||||
|
||||
# put an object into the second of the 2 sub-shards so that the shard
|
||||
# will update the root next time the sharder is run; do this before
|
||||
@ -3540,34 +3538,96 @@ class TestManagedContainerSharding(BaseTestContainerSharding):
|
||||
[(sr.state, sr.deleted, sr.lower, sr.upper)
|
||||
for sr in root_brokers[0].get_shard_ranges(include_deleted=True)])
|
||||
|
||||
# we are allowed to fix the overlap...
|
||||
# try to fix the overlap and expect no repair has been done.
|
||||
msg = self.assert_subprocess_success(
|
||||
['swift-manage-shard-ranges', root_0_db_file, 'repair', '--yes',
|
||||
'--min-shard-age', '0'])
|
||||
self.assertIn(
|
||||
b'Repairs necessary to remove overlapping shard ranges.', msg)
|
||||
b'1 donor shards ignored due to parent-child relationship checks',
|
||||
msg)
|
||||
|
||||
# verify parent-child checks has prevented repair to be done.
|
||||
self.assertEqual(
|
||||
[(ShardRange.ACTIVE, False, ShardRange.MIN, obj_names[1]),
|
||||
(ShardRange.SHRINKING, False, obj_names[0], obj_names[1]),
|
||||
# note: overlap!
|
||||
(ShardRange.ACTIVE, False, obj_names[0], obj_names[1]),
|
||||
(ShardRange.ACTIVE, False, obj_names[1], ShardRange.MAX)],
|
||||
[(sr.state, sr.deleted, sr.lower, sr.upper)
|
||||
for sr in root_brokers[0].get_shard_ranges(include_deleted=True)])
|
||||
|
||||
# the transient overlap is 'fixed' in subsequent sharder cycles...
|
||||
self.sharders_once()
|
||||
self.sharders_once()
|
||||
self.container_replicators.once()
|
||||
|
||||
# boo :'( ... we made gap
|
||||
for broker in root_brokers:
|
||||
self.assertEqual(
|
||||
[(ShardRange.ACTIVE, False, ShardRange.MIN, obj_names[0]),
|
||||
(ShardRange.ACTIVE, False, obj_names[0], obj_names[1]),
|
||||
(ShardRange.SHARDED, True, ShardRange.MIN, obj_names[1]),
|
||||
(ShardRange.SHRUNK, True, obj_names[0], obj_names[1]),
|
||||
(ShardRange.ACTIVE, False, obj_names[1], ShardRange.MAX)],
|
||||
[(sr.state, sr.deleted, sr.lower, sr.upper)
|
||||
for sr in broker.get_shard_ranges(include_deleted=True)])
|
||||
|
||||
def test_manage_shard_ranges_repair_root_gap(self):
|
||||
# create a gap in root container; repair the gap.
|
||||
# note: be careful not to add a container listing to this test which
|
||||
# would get shard ranges into memcache
|
||||
obj_names = self._make_object_names(8)
|
||||
self.put_objects(obj_names)
|
||||
|
||||
client.post_container(self.url, self.admin_token, self.container_name,
|
||||
headers={'X-Container-Sharding': 'on'})
|
||||
|
||||
# run replicators first time to get sync points set
|
||||
self.container_replicators.once(
|
||||
additional_args='--partitions=%s' % self.brain.part)
|
||||
|
||||
# shard root
|
||||
root_0_db_file = self.get_db_file(self.brain.part, self.brain.nodes[0])
|
||||
self.assert_subprocess_success([
|
||||
'swift-manage-shard-ranges',
|
||||
root_0_db_file,
|
||||
'find_and_replace', '2', '--enable'])
|
||||
self.container_replicators.once(
|
||||
additional_args='--partitions=%s' % self.brain.part)
|
||||
for node in self.brain.nodes:
|
||||
self.assert_container_state(node, 'unsharded', 4)
|
||||
self.sharders_once(additional_args='--partitions=%s' % self.brain.part)
|
||||
# get shards to update state from parent...
|
||||
self.sharders_once()
|
||||
for node in self.brain.nodes:
|
||||
self.assert_container_state(node, 'sharded', 4)
|
||||
|
||||
# sanity check, all is well
|
||||
msg = self.assert_subprocess_success([
|
||||
'swift-manage-shard-ranges', root_0_db_file, 'repair', '--gaps',
|
||||
'--dry-run'])
|
||||
self.assertIn(b'No repairs necessary.', msg)
|
||||
|
||||
# deliberately create a gap in root shard ranges (don't ever do this
|
||||
# for real)
|
||||
# TODO: replace direct broker modification with s-m-s-r merge
|
||||
root_brokers = [self.get_broker(self.brain.part, node)
|
||||
for node in self.brain.nodes]
|
||||
shard_ranges = root_brokers[0].get_shard_ranges()
|
||||
self.assertEqual(4, len(shard_ranges))
|
||||
shard_ranges[2].set_deleted()
|
||||
root_brokers[0].merge_shard_ranges(shard_ranges)
|
||||
shard_ranges = root_brokers[0].get_shard_ranges()
|
||||
self.assertEqual(3, len(shard_ranges))
|
||||
self.container_replicators.once()
|
||||
|
||||
# confirm that we made a gap.
|
||||
for broker in root_brokers:
|
||||
self.assertEqual(
|
||||
[(ShardRange.ACTIVE, False, ShardRange.MIN, obj_names[1]),
|
||||
(ShardRange.ACTIVE, False, obj_names[1], obj_names[3]),
|
||||
(ShardRange.ACTIVE, True, obj_names[3], obj_names[5]),
|
||||
(ShardRange.ACTIVE, False, obj_names[5], ShardRange.MAX)],
|
||||
[(sr.state, sr.deleted, sr.lower, sr.upper)
|
||||
for sr in broker.get_shard_ranges(include_deleted=True)])
|
||||
|
||||
msg = self.assert_subprocess_success([
|
||||
'swift-manage-shard-ranges', root_0_db_file, 'repair', '--gaps',
|
||||
'--yes'])
|
||||
@ -3580,10 +3640,10 @@ class TestManagedContainerSharding(BaseTestContainerSharding):
|
||||
# yay! we fixed the gap (without creating an overlap)
|
||||
for broker in root_brokers:
|
||||
self.assertEqual(
|
||||
[(ShardRange.ACTIVE, False, ShardRange.MIN, obj_names[0]),
|
||||
(ShardRange.SHARDED, True, ShardRange.MIN, obj_names[1]),
|
||||
(ShardRange.SHRUNK, True, obj_names[0], obj_names[1]),
|
||||
(ShardRange.ACTIVE, False, obj_names[0], ShardRange.MAX)],
|
||||
[(ShardRange.ACTIVE, False, ShardRange.MIN, obj_names[1]),
|
||||
(ShardRange.ACTIVE, False, obj_names[1], obj_names[3]),
|
||||
(ShardRange.ACTIVE, True, obj_names[3], obj_names[5]),
|
||||
(ShardRange.ACTIVE, False, obj_names[3], ShardRange.MAX)],
|
||||
[(sr.state, sr.deleted, sr.lower, sr.upper)
|
||||
for sr in broker.get_shard_ranges(include_deleted=True)])
|
||||
|
||||
@ -3596,8 +3656,14 @@ class TestManagedContainerSharding(BaseTestContainerSharding):
|
||||
'--dry-run'])
|
||||
self.assertIn(b'No repairs necessary.', msg)
|
||||
|
||||
self.assert_container_listing(
|
||||
[obj_names[0], new_obj_name] + obj_names[1:])
|
||||
# put an object into the gap namespace
|
||||
new_objs = [obj_names[4] + 'a']
|
||||
self.put_objects(new_objs)
|
||||
# get root stats up to date
|
||||
self.sharders_once()
|
||||
# new object is in listing but old objects in the gap have been lost -
|
||||
# don't delete shard ranges!
|
||||
self.assert_container_listing(obj_names[:4] + new_objs + obj_names[6:])
|
||||
|
||||
def test_manage_shard_ranges_unsharded_deleted_root(self):
|
||||
# verify that a deleted DB will still be sharded
|
||||
|
@ -2391,6 +2391,117 @@ class TestManageShardRanges(unittest.TestCase):
|
||||
key=ShardRange.sort_key)
|
||||
self.assert_shard_ranges_equal(expected, updated_ranges)
|
||||
|
||||
def test_repair_parent_overlaps_with_children_donors(self):
|
||||
# Verify that the overlap repair command ignores expected transient
|
||||
# overlaps between parent shard acceptor and child donor shards.
|
||||
root_broker = self._make_broker()
|
||||
root_broker.set_sharding_sysmeta('Quoted-Root', 'a/c')
|
||||
self.assertTrue(root_broker.is_root_container())
|
||||
|
||||
# The parent shard range would have been set to state SHARDING in the
|
||||
# shard container but is still showing as ACTIVE in the root container.
|
||||
# (note: it is valid for a single shard to span entire namespace)
|
||||
ts_parent = next(self.ts_iter)
|
||||
parent_shard = ShardRange(
|
||||
ShardRange.make_path('.shards_a', 'c', 'c', ts_parent, 0),
|
||||
ts_parent, lower='', upper='', object_count=10,
|
||||
state=ShardRange.ACTIVE)
|
||||
|
||||
# Children shards have reported themselves to root as CLEAVING/
|
||||
# CREATED.
|
||||
ts_child = next(self.ts_iter)
|
||||
child_shards = [
|
||||
ShardRange(
|
||||
ShardRange.make_path(
|
||||
'.shards_a', 'c', parent_shard.container, ts_child, 0),
|
||||
ts_child, lower='', upper='p', object_count=1,
|
||||
state=ShardRange.CLEAVED),
|
||||
ShardRange(
|
||||
ShardRange.make_path(
|
||||
'.shards_a', 'c', parent_shard.container, ts_child, 1),
|
||||
ts_child, lower='p', upper='', object_count=1,
|
||||
state=ShardRange.CLEAVED)]
|
||||
root_broker.merge_shard_ranges([parent_shard] + child_shards)
|
||||
|
||||
out = StringIO()
|
||||
err = StringIO()
|
||||
ts_now = next(self.ts_iter)
|
||||
with mock.patch('sys.stdout', out), mock.patch('sys.stderr', err), \
|
||||
mock_timestamp_now(ts_now):
|
||||
ret = main([root_broker.db_file, 'repair',
|
||||
'--yes', '--min-shard-age', '0'])
|
||||
err_lines = err.getvalue().split('\n')
|
||||
out_lines = out.getvalue().split('\n')
|
||||
self.assertEqual(0, ret, err_lines + out_lines)
|
||||
self.assert_starts_with(err_lines[0], 'Loaded db broker for ')
|
||||
self.assertNotIn(
|
||||
'Repairs necessary to remove overlapping shard ranges.',
|
||||
out_lines)
|
||||
self.assertEqual(
|
||||
['2 donor shards ignored due to parent-child relationship'
|
||||
' checks'], out_lines[:1])
|
||||
updated_ranges = root_broker.get_shard_ranges()
|
||||
# Expect no change to shard ranges.
|
||||
expected = sorted([parent_shard] + child_shards,
|
||||
key=ShardRange.sort_key)
|
||||
self.assert_shard_ranges_equal(expected, updated_ranges)
|
||||
|
||||
def test_repair_children_overlaps_with_parent_donor(self):
|
||||
# Verify that the overlap repair command ignores expected transient
|
||||
# overlaps between child shard acceptors and parent donor shards.
|
||||
root_broker = self._make_broker()
|
||||
root_broker.set_sharding_sysmeta('Quoted-Root', 'a/c')
|
||||
self.assertTrue(root_broker.is_root_container())
|
||||
|
||||
# The parent shard range would have been set to state SHARDING in the
|
||||
# shard container but is still showing as ACTIVE in the root container.
|
||||
# (note: it is valid for a single shard to span entire namespace)
|
||||
ts_parent = next(self.ts_iter)
|
||||
parent_shard = ShardRange(
|
||||
ShardRange.make_path('.shards_a', 'c', 'c', ts_parent, 0),
|
||||
ts_parent, lower='', upper='', object_count=5,
|
||||
state=ShardRange.ACTIVE)
|
||||
|
||||
# Children shards have reported themselves to root as CLEAVING/CREATED,
|
||||
# but they will end up with becoming acceptor shards due to having more
|
||||
# objects than the above parent shard.
|
||||
ts_child = next(self.ts_iter)
|
||||
child_shards = [
|
||||
ShardRange(
|
||||
ShardRange.make_path(
|
||||
'.shards_a', 'c', parent_shard.container, ts_child, 0),
|
||||
ts_child, lower='', upper='p', object_count=5,
|
||||
state=ShardRange.CLEAVED),
|
||||
ShardRange(
|
||||
ShardRange.make_path(
|
||||
'.shards_a', 'c', parent_shard.container, ts_child, 1),
|
||||
ts_child, lower='p', upper='', object_count=5,
|
||||
state=ShardRange.CLEAVED)]
|
||||
root_broker.merge_shard_ranges([parent_shard] + child_shards)
|
||||
|
||||
out = StringIO()
|
||||
err = StringIO()
|
||||
ts_now = next(self.ts_iter)
|
||||
with mock.patch('sys.stdout', out), mock.patch('sys.stderr', err), \
|
||||
mock_timestamp_now(ts_now):
|
||||
ret = main([root_broker.db_file, 'repair',
|
||||
'--yes', '--min-shard-age', '0'])
|
||||
err_lines = err.getvalue().split('\n')
|
||||
out_lines = out.getvalue().split('\n')
|
||||
self.assertEqual(0, ret, err_lines + out_lines)
|
||||
self.assert_starts_with(err_lines[0], 'Loaded db broker for ')
|
||||
self.assertNotIn(
|
||||
'Repairs necessary to remove overlapping shard ranges.',
|
||||
out_lines)
|
||||
self.assertEqual(
|
||||
['1 donor shards ignored due to parent-child relationship'
|
||||
' checks'], out_lines[:1])
|
||||
updated_ranges = root_broker.get_shard_ranges()
|
||||
# Expect no change to shard ranges.
|
||||
expected = sorted([parent_shard] + child_shards,
|
||||
key=ShardRange.sort_key)
|
||||
self.assert_shard_ranges_equal(expected, updated_ranges)
|
||||
|
||||
@with_tempdir
|
||||
def test_show_and_analyze(self, tempdir):
|
||||
broker = self._make_broker()
|
||||
|
Loading…
Reference in New Issue
Block a user