swift-manage-shard-ranges repair: ignore recent overlaps

Add an option for the swift-manage-shard-ranges repair tool to ignore
 overlaps where the overlapping shards appear to be recently created,
 since this may indicate that they are shards of the parent with
which they overlap.

The new option is --min-shard-age with a default of 14400 seconds.

Change-Id: Ib82219a506732303a1157c2c9e1c452b4a56061b
This commit is contained in:
Jianjian Huo 2022-08-11 16:54:15 -07:00
parent d8af3d7a0d
commit 61624ab837
3 changed files with 280 additions and 37 deletions

View File

@ -167,7 +167,7 @@ from six.moves import input
from swift.common.utils import Timestamp, get_logger, ShardRange, readconf, \
ShardRangeList
ShardRangeList, non_negative_int, config_positive_int_value
from swift.container.backend import ContainerBroker, UNSHARDED
from swift.container.sharder import make_shard_ranges, sharding_enabled, \
CleavingContext, process_compactible_shard_sequences, \
@ -180,6 +180,8 @@ EXIT_ERROR = 1
EXIT_INVALID_ARGS = 2 # consistent with argparse exit code for invalid args
EXIT_USER_QUIT = 3
MIN_SHARD_RANGE_AGE_FOR_REPAIR = 4 * 3600
# Some CLI options derive their default values from DEFAULT_SHARDER_CONF if
# they have not been set. It is therefore important that the CLI parser
# provides None as a default so that we can detect that no value was set on the
@ -206,6 +208,25 @@ class InvalidSolutionException(ManageShardRangesException):
self.overlapping_donors = overlapping_donors
def wrap_for_argparse(func, msg=None):
"""
Wrap the given ``func`` to catch any ``ValueError`` and raise an
``argparse.ArgumentTypeError`` instead.
:param func: a function.
:param msg: an optional message to use with any exception that is used; if
not given then the string representation of the ValueError will be
used.
:return: a function wrapper.
"""
def wrapped_func(*args, **kwargs):
try:
return func(*args, **kwargs)
except ValueError as err:
raise argparse.ArgumentTypeError(str(err) if msg is None else msg)
return wrapped_func
def _proceed(args):
if args.dry_run:
choice = 'no'
@ -543,6 +564,41 @@ def compact_shard_ranges(broker, args):
return EXIT_SUCCESS
def _remove_young_overlapping_ranges(acceptor_path, overlapping_donors, args):
# For range shard repair subcommand, check possible parent-children
# relationship between acceptors and donors.
if args.min_shard_age == 0:
return acceptor_path, overlapping_donors
ts_now = Timestamp.now()
# Remove overlapping donor shard ranges who were created recently within
# 'min_shard_age' age limit.
qualified_donors = ShardRangeList(
[sr for sr in overlapping_donors
if float(sr.timestamp) + args.min_shard_age < float(ts_now)])
young_donors = len(overlapping_donors) - len(qualified_donors)
if young_donors > 0:
print('%d overlapping donor shards ignored due to minimum age '
'limit' % young_donors)
if not qualified_donors:
return acceptor_path, None
# Remove those overlapping donors whose overlapping acceptors were created
# within age limit.
possible_parent_donors = set()
for acceptor_sr in acceptor_path:
if float(acceptor_sr.timestamp) + args.min_shard_age < float(ts_now):
continue
possible_parent_donors.update([sr for sr in qualified_donors
if acceptor_sr.overlaps(sr)])
if possible_parent_donors:
qualified_donors = ShardRangeList(
[sr for sr in qualified_donors
if sr not in possible_parent_donors])
print('%d donor shards ignored due to existence of overlapping young '
'acceptors' % len(possible_parent_donors))
return acceptor_path, qualified_donors
def _find_overlapping_donors(shard_ranges, own_sr, args):
shard_ranges = ShardRangeList(shard_ranges)
if ShardRange.SHARDING in shard_ranges.states:
@ -593,7 +649,8 @@ def _find_overlapping_donors(shard_ranges, own_sr, args):
'Isolated cleaved and/or active shard ranges in donor ranges',
acceptor_path, overlapping_donors)
return acceptor_path, overlapping_donors
return _remove_young_overlapping_ranges(
acceptor_path, overlapping_donors, args)
def _fix_gaps(broker, args, paths_with_gaps):
@ -775,13 +832,6 @@ def analyze_shard_ranges(args):
return EXIT_SUCCESS
def _positive_int(arg):
val = int(arg)
if val <= 0:
raise argparse.ArgumentTypeError('must be > 0')
return val
def _add_find_args(parser):
parser.add_argument(
'rows_per_shard', nargs='?', type=int, default=USE_SHARDER_DEFAULT,
@ -790,7 +840,8 @@ def _add_find_args(parser):
'given in a conf file specified with --config, otherwise %s.'
% DEFAULT_SHARDER_CONF['rows_per_shard'])
parser.add_argument(
'--minimum-shard-size', type=_positive_int,
'--minimum-shard-size',
type=wrap_for_argparse(config_positive_int_value, 'must be > 0'),
default=USE_SHARDER_DEFAULT,
help='Minimum size of the final shard range. If this is greater than '
'one then the final shard range may be extended to more than '
@ -839,11 +890,12 @@ def _add_prompt_args(parser):
def _add_max_expanding_arg(parser):
parser.add_argument('--max-expanding', nargs='?',
type=_positive_int,
default=USE_SHARDER_DEFAULT,
help='Maximum number of shards that should be '
'expanded. Defaults to unlimited.')
parser.add_argument(
'--max-expanding', nargs='?',
type=wrap_for_argparse(config_positive_int_value, 'must be > 0'),
default=USE_SHARDER_DEFAULT,
help='Maximum number of shards that should be '
'expanded. Defaults to unlimited.')
def _make_parser():
@ -940,32 +992,35 @@ def _make_parser():
'of rows. This command only works on root containers.')
_add_prompt_args(compact_parser)
compact_parser.add_argument(
'--shrink-threshold', nargs='?', type=_positive_int,
'--shrink-threshold', nargs='?',
type=wrap_for_argparse(config_positive_int_value, 'must be > 0'),
default=USE_SHARDER_DEFAULT,
help='The number of rows below which a shard can qualify for '
'shrinking. '
'Defaults to %d' % DEFAULT_SHARDER_CONF['shrink_threshold'])
'shrinking. '
'Defaults to %d' % DEFAULT_SHARDER_CONF['shrink_threshold'])
compact_parser.add_argument(
'--expansion-limit', nargs='?', type=_positive_int,
'--expansion-limit', nargs='?',
type=wrap_for_argparse(config_positive_int_value, 'must be > 0'),
default=USE_SHARDER_DEFAULT,
help='Maximum number of rows for an expanding shard to have after '
'compaction has completed. '
'Defaults to %d' % DEFAULT_SHARDER_CONF['expansion_limit'])
'compaction has completed. '
'Defaults to %d' % DEFAULT_SHARDER_CONF['expansion_limit'])
# If just one donor shard is chosen to shrink to an acceptor then the
# expanded acceptor will handle object listings as soon as the donor shard
# has shrunk. If more than one donor shard are chosen to shrink to an
# acceptor then the acceptor may not handle object listings for some donor
# shards that have shrunk until *all* donors have shrunk, resulting in
# temporary gap(s) in object listings where the shrunk donors are missing.
compact_parser.add_argument('--max-shrinking', nargs='?',
type=_positive_int,
default=USE_SHARDER_DEFAULT,
help='Maximum number of shards that should be '
'shrunk into each expanding shard. '
'Defaults to 1. Using values greater '
'than 1 may result in temporary gaps in '
'object listings until all selected '
'shards have shrunk.')
compact_parser.add_argument(
'--max-shrinking', nargs='?',
type=wrap_for_argparse(config_positive_int_value, 'must be > 0'),
default=USE_SHARDER_DEFAULT,
help='Maximum number of shards that should be '
'shrunk into each expanding shard. '
'Defaults to 1. Using values greater '
'than 1 may result in temporary gaps in '
'object listings until all selected '
'shards have shrunk.')
_add_max_expanding_arg(compact_parser)
compact_parser.set_defaults(func=compact_shard_ranges)
@ -975,6 +1030,14 @@ def _make_parser():
help='Repair overlapping shard ranges. No action will be taken '
'without user confirmation unless the -y option is used.')
_add_prompt_args(repair_parser)
repair_parser.add_argument(
'--min-shard-age', nargs='?',
type=wrap_for_argparse(non_negative_int, 'must be >= 0'),
default=MIN_SHARD_RANGE_AGE_FOR_REPAIR,
help='Minimum age of a shard for it to be considered as an overlap '
'that is due for repair. Overlapping shards younger than this '
'age will be ignored. Value of 0 means no recent shards will be '
'ignored. Defaults to %d.' % MIN_SHARD_RANGE_AGE_FOR_REPAIR)
# TODO: maybe this should be a separate subcommand given that it needs
# some extra options vs repairing overlaps?
repair_parser.add_argument(
@ -988,6 +1051,14 @@ def _make_parser():
'analyze',
help='Analyze shard range json data read from file. Use -v to see '
'more detailed analysis.')
analyze_parser.add_argument(
'--min-shard-age', nargs='?',
type=wrap_for_argparse(non_negative_int, 'must be >= 0'),
default=0,
help='Minimum age of a shard for it to be considered as an overlap '
'that is due for repair. Overlapping shards younger than this '
'age will be ignored. Value of 0 means no recent shards will be '
'ignored. Defaults to 0.')
analyze_parser.set_defaults(func=analyze_shard_ranges)
return parser

View File

@ -3142,7 +3142,8 @@ class TestManagedContainerSharding(BaseTestContainerSharding):
# path with most cleaving progress, and so shrink shard ranges 0.*.
db_file = self.get_db_file(self.brain.part, self.brain.nodes[0])
self.assert_subprocess_success(
['swift-manage-shard-ranges', db_file, 'repair', '--yes'])
['swift-manage-shard-ranges', db_file, 'repair', '--yes',
'--min-shard-age', '0'])
# make sure all root replicas now sync their shard ranges
self.replicators.once()
@ -3397,7 +3398,8 @@ class TestManagedContainerSharding(BaseTestContainerSharding):
# container
db_file = self.get_db_file(self.brain.part, self.brain.nodes[0])
self.assert_subprocess_success(
['swift-manage-shard-ranges', db_file, 'repair', '--yes'])
['swift-manage-shard-ranges', db_file, 'repair', '--yes',
'--min-shard-age', '0'])
self.replicators.once()
self.sharders_once()
self.sharders_once()
@ -3539,8 +3541,9 @@ class TestManagedContainerSharding(BaseTestContainerSharding):
for sr in root_brokers[0].get_shard_ranges(include_deleted=True)])
# we are allowed to fix the overlap...
msg = self.assert_subprocess_success([
'swift-manage-shard-ranges', root_0_db_file, 'repair', '--yes'])
msg = self.assert_subprocess_success(
['swift-manage-shard-ranges', root_0_db_file, 'repair', '--yes',
'--min-shard-age', '0'])
self.assertIn(
b'Repairs necessary to remove overlapping shard ranges.', msg)
@ -3586,7 +3589,7 @@ class TestManagedContainerSharding(BaseTestContainerSharding):
msg = self.assert_subprocess_success([
'swift-manage-shard-ranges', root_0_db_file, 'repair',
'--dry-run'])
'--dry-run', '--min-shard-age', '0'])
self.assertIn(b'No repairs necessary.', msg)
msg = self.assert_subprocess_success([
'swift-manage-shard-ranges', root_0_db_file, 'repair', '--gaps',

View File

@ -2137,7 +2137,9 @@ class TestManageShardRanges(unittest.TestCase):
mock_timestamp_now(ts_now), \
mock.patch('swift.cli.manage_shard_ranges.input',
return_value=user_input):
ret = main([broker.db_file, 'repair'] + options)
ret = main(
[broker.db_file, 'repair', '--min-shard-age', '0'] +
options)
self.assertEqual(exit_code, ret)
err_lines = err.getvalue().split('\n')
self.assert_starts_with(err_lines[0], 'Loaded db broker for ')
@ -2185,6 +2187,172 @@ class TestManageShardRanges(unittest.TestCase):
key=ShardRange.sort_key)
self.assert_shard_ranges_equal(expected, updated_ranges)
def test_repair_younger_overlapping_donor_shards(self):
# test shard range repair on the normal acceptor ranges and young
# overlapping shard ranges which are younger than '--min-shard-age',
# expect them not to be repaired.
broker = self._make_broker()
broker.set_sharding_sysmeta('Quoted-Root', 'a/c')
ts_now = next(self.ts_iter)
with mock_timestamp_now(Timestamp(float(ts_now) - 61)):
acceptor_ranges = make_shard_ranges(
broker, self.shard_data, '.shards_')
with mock_timestamp_now(ts_now):
overlap_donor_ranges = make_shard_ranges(
broker, self.overlap_shard_data_2, '.shards_')
broker.merge_shard_ranges(acceptor_ranges + overlap_donor_ranges)
self.assertTrue(broker.is_root_container())
out = StringIO()
err = StringIO()
with mock.patch('sys.stdout', out), mock.patch('sys.stderr', err):
ret = main(
[broker.db_file, 'repair', '--min-shard-age', '60', '-y'])
self.assertEqual(0, ret)
err_lines = err.getvalue().split('\n')
self.assert_starts_with(err_lines[0], 'Loaded db broker for ')
out_lines = out.getvalue().split('\n')
self.assertEqual(
['2 overlapping donor shards ignored due to minimum age limit'],
out_lines[:1])
updated_ranges = broker.get_shard_ranges()
expected = sorted(
acceptor_ranges + overlap_donor_ranges,
key=ShardRange.sort_key)
self.assert_shard_ranges_equal(expected, updated_ranges)
def test_repair_younger_acceptor_with_overlapping_donor_shards(self):
# test shard range repair on the overlapping normal donor ranges and
# young acceptor shard ranges who are younger than '--min-shard-age',
# expect no overlapping ranges to be repaired.
broker = self._make_broker()
broker.set_sharding_sysmeta('Quoted-Root', 'a/c')
ts_now = next(self.ts_iter)
with mock_timestamp_now(Timestamp(float(ts_now) + 3601)):
acceptor_ranges = make_shard_ranges(
broker, self.shard_data, '.shards_')
with mock_timestamp_now(ts_now):
overlap_donor_ranges = make_shard_ranges(
broker, self.overlap_shard_data_2, '.shards_')
broker.merge_shard_ranges(acceptor_ranges + overlap_donor_ranges)
self.assertTrue(broker.is_root_container())
out = StringIO()
err = StringIO()
with mock.patch('sys.stdout', out), \
mock.patch('sys.stderr', err), \
mock_timestamp_now(Timestamp(float(ts_now) + 3601 + 59)):
ret = main(
[broker.db_file, 'repair', '--min-shard-age', '60', '-y'])
self.assertEqual(0, ret)
err_lines = err.getvalue().split('\n')
self.assert_starts_with(err_lines[0], 'Loaded db broker for ')
out_lines = out.getvalue().split('\n')
self.assertEqual(
['2 donor shards ignored due to existence of overlapping young'
' acceptors'], out_lines[:1])
updated_ranges = broker.get_shard_ranges()
expected = sorted(
acceptor_ranges + overlap_donor_ranges,
key=ShardRange.sort_key)
self.assert_shard_ranges_equal(expected, updated_ranges)
def test_repair_older_overlapping_donor_and_acceptor_shards(self):
# test shard range repair on the overlapping donor and acceptor shard
# ranges which all are older than '--min-shard-age', expect them to be
# repaired.
broker = self._make_broker()
broker.set_sharding_sysmeta('Quoted-Root', 'a/c')
ts_now = next(self.ts_iter)
with mock_timestamp_now(ts_now):
acceptor_ranges = make_shard_ranges(
broker, self.shard_data, '.shards_')
with mock_timestamp_now(Timestamp(float(ts_now) + 1800)):
overlap_donor_ranges = make_shard_ranges(
broker, self.overlap_shard_data_2, '.shards_')
broker.merge_shard_ranges(acceptor_ranges + overlap_donor_ranges)
self.assertTrue(broker.is_root_container())
out = StringIO()
err = StringIO()
ts_1hr_after = Timestamp(float(ts_now) + 3601)
with mock.patch('sys.stdout', out), \
mock.patch('sys.stderr', err), \
mock_timestamp_now(ts_1hr_after):
ret = main(
[broker.db_file, 'repair', '--min-shard-age', '60', '-y'])
self.assertEqual(0, ret)
err_lines = err.getvalue().split('\n')
self.assert_starts_with(err_lines[0], 'Loaded db broker for ')
out_lines = out.getvalue().split('\n')
self.assertEqual(
['Repairs necessary to remove overlapping shard ranges.'],
out_lines[:1])
updated_ranges = broker.get_shard_ranges()
for sr in overlap_donor_ranges:
sr.update_state(ShardRange.SHRINKING, ts_1hr_after)
sr.epoch = ts_1hr_after
expected = sorted(
acceptor_ranges + overlap_donor_ranges,
key=ShardRange.sort_key)
self.assert_shard_ranges_equal(expected, updated_ranges)
def test_repair_overlapping_donor_and_acceptor_shards_default(self):
# test shard range repair on the overlapping donor and acceptor shard
# ranges wth default '--min-shard-age' value.
broker = self._make_broker()
broker.set_sharding_sysmeta('Quoted-Root', 'a/c')
ts_now = next(self.ts_iter)
with mock_timestamp_now(ts_now):
acceptor_ranges = make_shard_ranges(
broker, self.shard_data, '.shards_')
with mock_timestamp_now(Timestamp(int(ts_now) + 1)):
overlap_donor_ranges = make_shard_ranges(
broker, self.overlap_shard_data_2, '.shards_')
broker.merge_shard_ranges(acceptor_ranges + overlap_donor_ranges)
self.assertTrue(broker.is_root_container())
out = StringIO()
err = StringIO()
ts_repair = Timestamp(int(ts_now) + 4 * 3600 - 1)
with mock.patch('sys.stdout', out), \
mock.patch('sys.stderr', err), \
mock_timestamp_now(ts_repair):
# default min-shard-age prevents repair...
ret = main([broker.db_file, 'repair', '-y'])
self.assertEqual(0, ret)
err_lines = err.getvalue().split('\n')
self.assert_starts_with(err_lines[0], 'Loaded db broker for ')
out_lines = out.getvalue().split('\n')
self.assertEqual(
['2 overlapping donor shards ignored due to minimum age limit'],
out_lines[:1])
updated_ranges = broker.get_shard_ranges()
expected = sorted(
acceptor_ranges + overlap_donor_ranges,
key=ShardRange.sort_key)
self.assert_shard_ranges_equal(expected, updated_ranges)
out = StringIO()
err = StringIO()
ts_repair = Timestamp(int(ts_now) + 4 * 3600 + 2)
with mock.patch('sys.stdout', out), \
mock.patch('sys.stderr', err), \
mock_timestamp_now(ts_repair):
# default min-shard-age allows repair now...
ret = main([broker.db_file, 'repair', '-y'])
self.assertEqual(0, ret)
err_lines = err.getvalue().split('\n')
self.assert_starts_with(err_lines[0], 'Loaded db broker for ')
out_lines = out.getvalue().split('\n')
self.assertEqual(
['Repairs necessary to remove overlapping shard ranges.'],
out_lines[:1])
updated_ranges = broker.get_shard_ranges()
for sr in overlap_donor_ranges:
sr.update_state(ShardRange.SHRINKING, ts_repair)
sr.epoch = ts_repair
expected = sorted(
acceptor_ranges + overlap_donor_ranges,
key=ShardRange.sort_key)
self.assert_shard_ranges_equal(expected, updated_ranges)
def test_repair_two_complete_sequences_one_incomplete(self):
broker = self._make_broker()
broker.set_sharding_sysmeta('Quoted-Root', 'a/c')
@ -2205,7 +2373,8 @@ class TestManageShardRanges(unittest.TestCase):
ts_now = next(self.ts_iter)
with mock.patch('sys.stdout', out), mock.patch('sys.stderr', err), \
mock_timestamp_now(ts_now):
ret = main([broker.db_file, 'repair', '--yes'])
ret = main([broker.db_file, 'repair', '--yes',
'--min-shard-age', '0'])
self.assertEqual(0, ret)
err_lines = err.getvalue().split('\n')
self.assert_starts_with(err_lines[0], 'Loaded db broker for ')