Container-server: only check for shard range existence for DB state queries

Within ContainerBroker, at various places, for example get_db_state()
and sharding_initiated(), they query the number of shard ranges of
this container by pulling out all shard ranges from shard range table,
instantiating ShardRange objects and then counting how many they are.
Those operations are very expensive, and causing HEAD/GET into large
containers to be very slow.

Instead, this patch only checks if there is any qualified shard range
exists in the shard table with optimized SQL query, which can be very
fast. On a container server setup which serves a container with ~12000
shard ranges, this patch alone improves HTTP HEAD request rate by ~10X,
and improves HTTP GET request rate by ~2X; and together with other
optimizations (patch 888310 & 888575) targeting to fix similar problems,
strong synergistic effects are seen to bring total ~22X improvement to
HTTP HEAD and ~27X to HTTP GET request rates.

Change-Id: I01fd4f3e395c8846280f44e17a56935fc6210444
This commit is contained in:
Jianjian Huo 2023-07-12 17:11:42 -07:00
parent 7cc8a01729
commit bfbe8f909f
2 changed files with 110 additions and 3 deletions

View File

@ -446,7 +446,7 @@ class ContainerBroker(DatabaseBroker):
return UNSHARDED
if self.db_epoch != self.get_own_shard_range().epoch:
return UNSHARDED
if not self.get_shard_ranges():
if not self.has_other_shard_ranges():
return COLLAPSED
return SHARDED
@ -457,7 +457,7 @@ class ContainerBroker(DatabaseBroker):
"""
own_shard_range = self.get_own_shard_range()
if own_shard_range.state in ShardRange.CLEAVING_STATES:
return bool(self.get_shard_ranges())
return self.has_other_shard_ranges()
return False
def sharding_required(self):
@ -867,7 +867,7 @@ class ContainerBroker(DatabaseBroker):
Timestamp(info['put_timestamp']))
def is_empty_enough_to_reclaim(self):
if self.is_root_container() and (self.get_shard_ranges() or
if self.is_root_container() and (self.has_other_shard_ranges() or
self.get_db_state() == SHARDING):
return False
return self.empty()
@ -1956,6 +1956,28 @@ class ContainerBroker(DatabaseBroker):
return {'bytes_used': bytes_used,
'object_count': object_count}
def has_other_shard_ranges(self):
"""
This function tells if there is any shard range other than the
broker's own shard range, that is not marked as deleted.
:return: A boolean value as described above.
"""
with self.get() as conn:
sql = '''
SELECT 1 FROM %s
WHERE deleted = 0 AND name != ? LIMIT 1
''' % (SHARD_RANGE_TABLE)
try:
data = conn.execute(sql, [self.path])
data.row_factory = None
return True if [row for row in data] else False
except sqlite3.OperationalError as err:
if ('no such table: %s' % SHARD_RANGE_TABLE) in str(err):
return False
else:
raise
def get_all_shard_range_data(self):
"""
Returns a list of all shard range data, including own shard range and

View File

@ -70,6 +70,33 @@ class TestContainerBroker(test_db.TestDbBase):
self.assertEqual([dict(sr) for sr in expected],
[dict(sr) for sr in actual])
def _delete_table(self, broker, table):
"""
Delete the table ``table`` from broker database.
:param broker: an object instance of ContainerBroker.
:param table: the name of the table to delete.
"""
with broker.get() as conn:
try:
conn.execute("""
DROP TABLE %s
""" % table)
except sqlite3.OperationalError as err:
if ('no such table: %s' % table) in str(err):
return
else:
raise
def _add_shard_range_table(self, broker):
"""
Add the 'shard_range' table into the broker database.
:param broker: an object instance of ContainerBroker.
"""
with broker.get() as conn:
broker.create_shard_range_table(conn)
def test_creation(self):
# Test ContainerBroker.__init__
db_file = self.get_db_path()
@ -1699,6 +1726,64 @@ class TestContainerBroker(test_db.TestDbBase):
container='c')
self._test_put_object_multiple_encoded_timestamps(broker)
@with_tempdir
def test_has_other_shard_ranges(self, tempdir):
acct = 'account'
cont = 'container'
hsh = hash_path(acct, cont)
epoch = Timestamp.now()
db_file = "%s_%s.db" % (hsh, epoch.normal)
db_path = os.path.join(tempdir, db_file)
ts = Timestamp.now()
broker = ContainerBroker(db_path, account=acct,
container=cont, force_db_file=True)
# Create the test container database and all the tables.
broker.initialize(ts.internal, 0)
# Test the case which the 'shard_range' table doesn't exist yet.
self._delete_table(broker, 'shard_range')
self.assertFalse(broker.has_other_shard_ranges())
# Add the 'shard_range' table back to the database, but it doesn't
# have any shard range row in it yet.
self._add_shard_range_table(broker)
shard_ranges = broker.get_shard_ranges(
include_deleted=True, states=None, include_own=True)
self.assertEqual(shard_ranges, [])
self.assertFalse(broker.has_other_shard_ranges())
# Insert its 'own_shard_range' into this test database.
own_shard_range = broker.get_own_shard_range()
own_shard_range.update_state(ShardRange.SHARDING)
own_shard_range.epoch = epoch
broker.merge_shard_ranges([own_shard_range])
self.assertTrue(broker.get_shard_ranges(include_own=True))
self.assertFalse(broker.has_other_shard_ranges())
# Insert a child shard range into this test database.
first_child_sr = ShardRange(
'.shards_%s/%s_1' % (acct, cont), Timestamp.now())
broker.merge_shard_ranges([first_child_sr])
self.assertTrue(broker.has_other_shard_ranges())
# Mark the first child shard range as deleted.
first_child_sr.deleted = 1
first_child_sr.timestamp = Timestamp.now()
broker.merge_shard_ranges([first_child_sr])
self.assertFalse(broker.has_other_shard_ranges())
# Insert second child shard range into this test database.
second_child_sr = ShardRange(
'.shards_%s/%s_2' % (acct, cont), Timestamp.now())
broker.merge_shard_ranges([second_child_sr])
self.assertTrue(broker.has_other_shard_ranges())
# Mark the 'own_shard_range' as deleted.
own_shard_range.deleted = 1
own_shard_range.timestamp = Timestamp.now()
broker.merge_shard_ranges([own_shard_range])
self.assertTrue(broker.has_other_shard_ranges())
@with_tempdir
def test_get_db_state(self, tempdir):
acct = 'account'