Address a sharder/replicator race
In the previous patch, we could clean up all container DBs, but only if the daemons went in a specific order (which cannot be guaranteed in a production system). Once a reclaim age passes, there's a race: If the container-replicator processes the root container before the container-sharder processes the shards, the deleted shards would get reaped from the root so they won't be available for the sharder. The shard containers then hang around indefinitely. Now, be willing to mark shard DBs as deleted even when we can't find our own shard range in the root. Fortunately, the shard already knows that its range has been deleted; we don't need to get that info from the root. Change-Id: If08bccf753490157f27c95b4038f3dd33d3d7f8c Related-Change: Icba98f1c9e17e8ade3f0e1b9a23360cf5ab8c86b
This commit is contained in:
parent
a4cf508a38
commit
36bd21488e
@ -746,8 +746,9 @@ class ContainerSharder(ContainerReplicator):
|
||||
# root may not yet know about this shard container
|
||||
warnings.append('root has no matching shard range')
|
||||
shard_range = None
|
||||
else:
|
||||
elif not own_shard_range.deleted:
|
||||
warnings.append('unable to get shard ranges from root')
|
||||
# else, our shard range is deleted, so root may have reclaimed it
|
||||
else:
|
||||
errors.append('missing own shard range')
|
||||
|
||||
@ -767,14 +768,16 @@ class ContainerSharder(ContainerReplicator):
|
||||
self.logger.debug('Updating shard from root %s', dict(shard_range))
|
||||
broker.merge_shard_ranges(shard_range)
|
||||
own_shard_range = broker.get_own_shard_range()
|
||||
delete_age = time.time() - self.reclaim_age
|
||||
if (own_shard_range.state == ShardRange.SHARDED and
|
||||
own_shard_range.deleted and
|
||||
own_shard_range.timestamp < delete_age and
|
||||
broker.empty()):
|
||||
broker.delete_db(Timestamp.now().internal)
|
||||
self.logger.debug('Deleted shard container %s (%s)',
|
||||
broker.db_file, quote(broker.path))
|
||||
|
||||
delete_age = time.time() - self.reclaim_age
|
||||
if (own_shard_range.state == ShardRange.SHARDED and
|
||||
own_shard_range.deleted and
|
||||
own_shard_range.timestamp < delete_age and
|
||||
broker.empty()):
|
||||
broker.delete_db(Timestamp.now().internal)
|
||||
self.logger.debug('Deleted shard container %s (%s)',
|
||||
broker.db_file, quote(broker.path))
|
||||
|
||||
self._increment_stat('audit_shard', 'success', statsd=True)
|
||||
return True
|
||||
|
||||
|
@ -4626,6 +4626,7 @@ class TestSharder(BaseTestSharder):
|
||||
own_shard_range.deleted = 1
|
||||
own_shard_range.timestamp = Timestamp.now()
|
||||
broker.merge_shard_ranges([own_shard_range])
|
||||
del shard_ranges[:] # root responds with no shard ranges
|
||||
assert_ok()
|
||||
self.assertTrue(broker.is_deleted())
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user