Do not delete root_path on ContainerBroker.delete_db

When a shard is deleted all the sysmeta is deleted along with it. But
this is problematic as we support dealing with misplaced objects in
deleted shards. Which is required.

Currently, when we deal with misplaced objects in a shard broker
marked as deleted we push the objects into a handoff broker and set
this broker's root_path attribute from the deleted shard.
Unfortunately, root_path is itself stored in sysmeta, which has been
removed. So the deleted broker falls back to using its own account and
container in the root_path.  This in turn gets pushed to the shard
responsible for the misplaced objects and because these objects are
pushed by replication the root_path meta has a newer timestamp,
replacing the shard's pointer to the root.

As a consequence listings all still works because it's root driven,
but the shard will never pull the latest shard range details from the
_real_ root container during audits.

This patch contains a probe test that demonstrates this issue and also
fixes it by making 'X-Container-Sysmeta-Shard-Quoted-Root' and
'X-Container-Sysmeta-Shard-Root' whitelisted from being cleared on
delete. Meaning a deleted shard retains it's knowledge of root so it
can correctly deal with misplaced objects.

Co-Authored-By: Alistair Coles <alistairncoles@gmail.com>
Change-Id: I3315f349e5b965cecadc78af9e1c66c3f3bcfe83
This commit is contained in:
Matthew Oliver 2021-02-03 16:30:33 +11:00 committed by Alistair Coles
parent 1f9b879547
commit 6c62c8d072
5 changed files with 222 additions and 32 deletions

View File

@ -230,6 +230,8 @@ def get_db_connection(path, timeout=30, logger=None, okay_to_create=False):
class DatabaseBroker(object):
"""Encapsulates working with a database."""
delete_meta_whitelist = []
def __init__(self, db_file, timeout=BROKER_TIMEOUT, logger=None,
account=None, container=None, pending_timeout=None,
stale_reads_ok=False, skip_commits=False):
@ -366,6 +368,8 @@ class DatabaseBroker(object):
# first, clear the metadata
cleared_meta = {}
for k in self.metadata:
if k.lower() in self.delete_meta_whitelist:
continue
cleared_meta[k] = ('', timestamp)
self.update_metadata(cleared_meta)
# then mark the db as deleted

View File

@ -332,6 +332,8 @@ class ContainerBroker(DatabaseBroker):
db_type = 'container'
db_contains_type = 'object'
db_reclaim_timestamp = 'created_at'
delete_meta_whitelist = ['x-container-sysmeta-shard-quoted-root',
'x-container-sysmeta-shard-root']
def __init__(self, db_file, timeout=BROKER_TIMEOUT, logger=None,
account=None, container=None, pending_timeout=None,
@ -2127,10 +2129,13 @@ class ContainerBroker(DatabaseBroker):
"""
_, path = self._get_root_meta()
if path is not None:
# We have metadata telling us where the root is; it's authoritative
# We have metadata telling us where the root is; it's
# authoritative; shards should always have this metadata even when
# deleted
return self.path == path
# Else, we're either a root or a deleted shard.
# Else, we're either a root or a legacy deleted shard whose sharding
# sysmeta was deleted
# Use internal method so we don't try to update stats.
own_shard_range = self._own_shard_range(no_default=True)

View File

@ -209,6 +209,13 @@ class BaseTestContainerSharding(ReplProbeTest):
return ContainerBroker(
self.get_db_file(part, node, account, container))
def get_shard_broker(self, shard_range, node_index=0):
shard_part, shard_nodes = self.brain.ring.get_nodes(
shard_range.account, shard_range.container)
return self.get_broker(
shard_part, shard_nodes[node_index], shard_range.account,
shard_range.container)
def categorize_container_dir_content(self, account=None, container=None):
account = account or self.brain.account
container = container or self.container_name
@ -2319,6 +2326,93 @@ class TestContainerSharding(BaseTestContainerSharding):
self.assert_container_object_count(1)
self.assert_container_delete_fails()
def test_misplaced_object_movement_from_deleted_shard(self):
def merge_object(shard_range, name, deleted=0):
# it's hard to get a test to put a misplaced object into a shard,
# so this hack is used force an object record directly into a shard
# container db. Note: the actual object won't exist, we're just
# using this to test object records in container dbs.
shard_part, shard_nodes = self.brain.ring.get_nodes(
shard_range.account, shard_range.container)
shard_broker = self.get_shard_broker(shard_range)
# In this test we want to merge into a deleted container shard
shard_broker.delete_db(Timestamp.now().internal)
shard_broker.merge_items(
[{'name': name, 'created_at': Timestamp.now().internal,
'size': 0, 'content_type': 'text/plain',
'etag': md5(usedforsecurity=False).hexdigest(),
'deleted': deleted,
'storage_policy_index': shard_broker.storage_policy_index}])
return shard_nodes[0]
all_obj_names = self._make_object_names(self.max_shard_size)
self.put_objects(all_obj_names)
# Shard the container
client.post_container(self.url, self.admin_token, self.container_name,
headers={'X-Container-Sharding': 'on'})
for n in self.brain.node_numbers:
self.sharders.once(
number=n, additional_args='--partitions=%s' % self.brain.part)
# sanity checks
for node in self.brain.nodes:
self.assert_container_state(node, 'sharded', 2)
self.assert_container_delete_fails()
self.assert_container_has_shard_sysmeta()
self.assert_container_post_ok('sharded')
self.assert_container_listing(all_obj_names)
# delete all objects in first shard range - updates redirected to shard
shard_ranges = self.get_container_shard_ranges()
self.assertLengthEqual(shard_ranges, 2)
shard_0_objects = [name for name in all_obj_names
if name in shard_ranges[0]]
shard_1_objects = [name for name in all_obj_names
if name in shard_ranges[1]]
self.delete_objects(shard_0_objects)
self.assert_container_listing(shard_1_objects)
self.assert_container_post_ok('has objects')
# run sharder on first shard container to update root stats
self.run_sharders(shard_ranges[0])
self.assert_container_object_count(len(shard_1_objects))
# First, test a misplaced object moving from one shard to another.
# run sharder on root to discover first shrink candidate
self.sharders.once(additional_args='--partitions=%s' % self.brain.part)
# then run sharder on first shard range to shrink it
self.run_sharders(shard_ranges[0])
# force a misplaced object into the shrunken shard range to simulate
# a client put that was in flight when it started to shrink
misplaced_node = merge_object(shard_ranges[0], 'alpha', deleted=0)
# root sees first shard has shrunk, only second shard range used for
# listing so alpha object not in listing
self.assertLengthEqual(self.get_container_shard_ranges(), 1)
self.assert_container_listing(shard_1_objects)
self.assert_container_object_count(len(shard_1_objects))
# until sharder runs on that node to move the misplaced object to the
# second shard range
shard_part, shard_nodes_numbers = self.get_part_and_node_numbers(
shard_ranges[0])
self.sharders.once(additional_args='--partitions=%s' % shard_part,
number=misplaced_node['id'] + 1)
self.assert_container_listing(['alpha'] + shard_1_objects)
# root not yet updated
self.assert_container_object_count(len(shard_1_objects))
# check the deleted shard did not push the wrong root path into the
# other container
for replica in 0, 1, 2:
shard_x_broker = self.get_shard_broker(shard_ranges[1], replica)
self.assertEqual("%s/%s" % (self.account, self.container_name),
shard_x_broker.root_path)
# run the sharder of the existing shard to update the root stats
# to prove the misplaced object was moved to the other shard _and_
# the other shard still has the correct root because it updates root's
# stats
self.run_sharders(shard_ranges[1])
self.assert_container_object_count(len(shard_1_objects) + 1)
def test_replication_to_sharded_container_from_unsharded_old_primary(self):
primary_ids = [n['id'] for n in self.brain.nodes]
handoff_node = next(n for n in self.brain.ring.devs

View File

@ -716,12 +716,13 @@ class TestDatabaseBroker(unittest.TestCase):
broker.initialize, normalize_timestamp('1'))
def test_delete_db(self):
meta = {'foo': ['bar', normalize_timestamp('0')]}
def init_stub(conn, put_timestamp, **kwargs):
conn.execute('CREATE TABLE test (one TEXT)')
conn.execute('''CREATE TABLE test_stat (
id TEXT, put_timestamp TEXT, delete_timestamp TEXT,
status TEXT, status_changed_at TEXT, metadata TEXT)''')
meta = {'foo': ('bar', normalize_timestamp('0'))}
conn.execute(
'''INSERT INTO test_stat (
id, put_timestamp, delete_timestamp, status,
@ -730,35 +731,63 @@ class TestDatabaseBroker(unittest.TestCase):
conn.execute('INSERT INTO test (one) VALUES ("1")')
conn.commit()
broker = DatabaseBroker(':memory:')
broker.db_type = 'test'
broker._initialize = init_stub
# Initializes a good broker for us
broker.initialize(normalize_timestamp('1'))
info = broker.get_info()
self.assertEqual('0', info['delete_timestamp'])
self.assertEqual('', info['status'])
self.assertIsNotNone(broker.conn)
broker.delete_db(normalize_timestamp('2'))
info = broker.get_info()
self.assertEqual(normalize_timestamp('2'), info['delete_timestamp'])
self.assertEqual('DELETED', info['status'])
def do_test(expected_metadata, delete_meta_whitelist=None):
if not delete_meta_whitelist:
delete_meta_whitelist = []
broker = DatabaseBroker(':memory:')
broker.delete_meta_whitelist = delete_meta_whitelist
broker.db_type = 'test'
broker._initialize = init_stub
# Initializes a good broker for us
broker.initialize(normalize_timestamp('1'))
info = broker.get_info()
self.assertEqual('0', info['delete_timestamp'])
self.assertEqual('', info['status'])
self.assertIsNotNone(broker.conn)
broker.delete_db(normalize_timestamp('2'))
info = broker.get_info()
self.assertEqual(normalize_timestamp('2'),
info['delete_timestamp'])
self.assertEqual('DELETED', info['status'])
broker = DatabaseBroker(os.path.join(self.testdir, '1.db'))
broker.db_type = 'test'
broker._initialize = init_stub
broker.initialize(normalize_timestamp('1'))
info = broker.get_info()
self.assertEqual('0', info['delete_timestamp'])
self.assertEqual('', info['status'])
broker.delete_db(normalize_timestamp('2'))
info = broker.get_info()
self.assertEqual(normalize_timestamp('2'), info['delete_timestamp'])
self.assertEqual('DELETED', info['status'])
# check meta
m2 = broker.metadata
self.assertEqual(m2, expected_metadata)
# ensure that metadata was cleared
m2 = broker.metadata
self.assertEqual(m2, {'foo': ['', normalize_timestamp('2')]})
broker = DatabaseBroker(os.path.join(self.testdir,
'%s.db' % uuid4()))
broker.delete_meta_whitelist = delete_meta_whitelist
broker.db_type = 'test'
broker._initialize = init_stub
broker.initialize(normalize_timestamp('1'))
info = broker.get_info()
self.assertEqual('0', info['delete_timestamp'])
self.assertEqual('', info['status'])
broker.delete_db(normalize_timestamp('2'))
info = broker.get_info()
self.assertEqual(normalize_timestamp('2'),
info['delete_timestamp'])
self.assertEqual('DELETED', info['status'])
# check meta
m2 = broker.metadata
self.assertEqual(m2, expected_metadata)
# ensure that metadata was cleared by default
do_test({'foo': ['', normalize_timestamp('2')]})
# If the meta is in the brokers delete_meta_whitelist it wont get
# cleared up
do_test(meta, ['foo'])
# delete_meta_whitelist things need to be in lower case, as the keys
# are lower()'ed before checked
meta["X-Container-Meta-Test"] = ['value', normalize_timestamp('0')]
meta["X-Something-else"] = ['other', normalize_timestamp('0')]
do_test({'foo': ['', normalize_timestamp('2')],
'X-Container-Meta-Test': ['value', normalize_timestamp('0')],
'X-Something-else': ['other', normalize_timestamp('0')]},
['x-container-meta-test', 'x-something-else'])
def test_get(self):
broker = DatabaseBroker(':memory:')

View File

@ -515,6 +515,7 @@ class TestContainerBroker(unittest.TestCase):
broker.initialize(next(self.ts).internal, 0)
broker.set_sharding_sysmeta('Quoted-Root', 'a/c')
self.assertFalse(broker.is_root_container())
self.assertEqual('a/c', broker.root_path)
def check_object_counted(broker_to_test, broker_with_object):
obj = {'name': 'o', 'created_at': next(self.ts).internal,
@ -593,12 +594,25 @@ class TestContainerBroker(unittest.TestCase):
own_sr.timestamp = next(self.ts)
broker.merge_shard_ranges([own_sr])
broker.delete_db(next(self.ts).internal)
self.assertFalse(broker.is_root_container())
self.assertEqual('a/c', broker.root_path)
# Get a fresh broker, with instance cache unset
broker = ContainerBroker(db_path, account='.shards_a', container='cc')
self.assertTrue(broker.empty())
self.assertTrue(broker.is_deleted())
self.assertFalse(broker.is_root_container())
self.assertEqual('a/c', broker.root_path)
# older versions *did* delete sharding sysmeta when db was deleted...
# but still know they are not root containers
broker.set_sharding_sysmeta('Quoted-Root', '')
self.assertFalse(broker.is_root_container())
self.assertEqual('a/c', broker.root_path)
# however, they have bogus root path once instance cache is cleared...
broker = ContainerBroker(db_path, account='.shards_a', container='cc')
self.assertFalse(broker.is_root_container())
self.assertEqual('.shards_a/cc', broker.root_path)
def test_reclaim(self):
broker = ContainerBroker(':memory:', account='test_account',
@ -1666,6 +1680,50 @@ class TestContainerBroker(unittest.TestCase):
fresh_broker.merge_shard_ranges([own_shard_range])
self.assertEqual(fresh_broker.get_db_state(), 'unsharded')
@with_tempdir
def test_delete_db_does_not_clear_root_path(self, tempdir):
acct = '.sharded_a'
cont = 'c'
hsh = hash_path(acct, cont)
db_file = "%s.db" % hsh
db_path = os.path.join(tempdir, db_file)
ts = Timestamp(0).normal
broker = ContainerBroker(db_path, account=acct, container=cont)
broker.initialize(ts, 0)
# add some metadata but include both types of root path
broker.update_metadata({
'foo': ('bar', ts),
'icecream': ('sandwich', ts),
'X-Container-Sysmeta-Some': ('meta', ts),
'X-Container-Sysmeta-Shard-Quoted-Root': ('a/c', ts),
'X-Container-Sysmeta-Shard-Root': ('a/c', ts)})
self.assertEqual('a/c', broker.root_path)
# now let's delete the db. All meta
delete_ts = Timestamp(1).normal
broker.delete_db(delete_ts)
# ensure that metadata was cleared except for root paths
def check_metadata(broker):
meta = broker.metadata
self.assertEqual(meta['X-Container-Sysmeta-Some'], ['', delete_ts])
self.assertEqual(meta['icecream'], ['', delete_ts])
self.assertEqual(meta['foo'], ['', delete_ts])
self.assertEqual(meta['X-Container-Sysmeta-Shard-Quoted-Root'],
['a/c', ts])
self.assertEqual(meta['X-Container-Sysmeta-Shard-Root'],
['a/c', ts])
self.assertEqual('a/c', broker.root_path)
self.assertFalse(broker.is_root_container())
check_metadata(broker)
# fresh broker in case values were cached in previous instance
broker = ContainerBroker(db_path)
check_metadata(broker)
@with_tempdir
def test_db_file(self, tempdir):
acct = 'account'
@ -4567,7 +4625,7 @@ class TestContainerBroker(unittest.TestCase):
check_broker_info(broker.get_info())
check_sharded_state(broker)
# delete the container - sharding sysmeta gets erased
# delete the container
broker.delete_db(next(self.ts).internal)
# but it is not considered deleted while shards have content
self.assertFalse(broker.is_deleted())
@ -4577,7 +4635,7 @@ class TestContainerBroker(unittest.TestCase):
meta_timestamp=next(self.ts))
for sr in shard_ranges]
broker.merge_shard_ranges(empty_shard_ranges)
# and no it is deleted
# and now it is deleted
self.assertTrue(broker.is_deleted())
check_sharded_state(broker)