From ed6586c46002e22b99970f0e53c92da1ec11cbaf Mon Sep 17 00:00:00 2001 From: Alistair Coles Date: Wed, 3 Feb 2021 21:38:34 +0000 Subject: [PATCH] sharder: stall cleaving at shard range gaps Previously the sharder cleaving process would skip over gaps in shard ranges. Gaps are not normally expected, but could occur if, for example, multiple inconsistent decisions are made to configure shards for shrinking, resulting in a shrinking shard having insufficient acceptor shard to cover its namespace. In these circumstances the shrinking shard's cleaving process should stall when it encounters a gap in the acceptors. This is achieved by always checking that the lower bound of the next shard range to cleave is less than or equal to the current cleaving cursor. Cleaving will resume when a suitable acceptor becomes available to cover the namespace gap. Change-Id: I1046a5cf809d2a905ede5e1f285939c91843074d --- swift/container/sharder.py | 7 +++++++ test/unit/container/test_sharder.py | 9 ++++----- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/swift/container/sharder.py b/swift/container/sharder.py index 0cd0883706..875d18f4d9 100644 --- a/swift/container/sharder.py +++ b/swift/container/sharder.py @@ -1706,6 +1706,8 @@ class ContainerSharder(ContainerReplicator): quote(broker.path)) else: cleaving_context.start() + own_shard_range = broker.get_own_shard_range() + cleaving_context.cursor = own_shard_range.lower_str cleaving_context.ranges_todo = len(ranges_todo) self.logger.debug('Starting to cleave (%s todo): %s', cleaving_context.ranges_todo, quote(broker.path)) @@ -1722,6 +1724,11 @@ class ContainerSharder(ContainerReplicator): if len(ranges_done) == self.cleave_batch_size: break + if shard_range.lower > cleaving_context.cursor: + self.logger.info('Stopped cleave at gap: %r - %r' % + (cleaving_context.cursor, shard_range.lower)) + break + if shard_range.state not in (ShardRange.CREATED, ShardRange.CLEAVED, ShardRange.ACTIVE): diff --git a/test/unit/container/test_sharder.py b/test/unit/container/test_sharder.py index 45ce51fad4..f84f3b26b5 100644 --- a/test/unit/container/test_sharder.py +++ b/test/unit/container/test_sharder.py @@ -1878,14 +1878,13 @@ class TestSharder(BaseTestSharder): do_test(ShardRange.CLEAVED, (('d', 'k'), ('k', 't')), expect_delete) # shrinking to incomplete acceptors, gap at start and end of namespace do_test(ShardRange.CREATED, (('k', 't'),), expect_delete, - exp_progress_bounds=(('k', 't'),)) + exp_progress_bounds=()) # shrinking to incomplete acceptors, gap at start of namespace - expect_delete = True do_test(ShardRange.CLEAVED, (('k', 't'), ('t', '')), expect_delete, - exp_progress_bounds=(('k', 't'), ('t', ''))) - # shrinking to incomplete acceptors, gap in middle + exp_progress_bounds=()) + # shrinking to incomplete acceptors, gap in middle - some progress do_test(ShardRange.CLEAVED, (('d', 'k'), ('t', '')), expect_delete, - exp_progress_bounds=(('d', 'k'), ('t', ''))) + exp_progress_bounds=(('d', 'k'),)) def test_cleave_repeated(self): # verify that if new objects are merged into retiring db after cleaving