From bfbe8f909ff502b4c2cd60a17301602f6fcb15e1 Mon Sep 17 00:00:00 2001 From: Jianjian Huo Date: Wed, 12 Jul 2023 17:11:42 -0700 Subject: [PATCH] Container-server: only check for shard range existence for DB state queries Within ContainerBroker, at various places, for example get_db_state() and sharding_initiated(), they query the number of shard ranges of this container by pulling out all shard ranges from shard range table, instantiating ShardRange objects and then counting how many they are. Those operations are very expensive, and causing HEAD/GET into large containers to be very slow. Instead, this patch only checks if there is any qualified shard range exists in the shard table with optimized SQL query, which can be very fast. On a container server setup which serves a container with ~12000 shard ranges, this patch alone improves HTTP HEAD request rate by ~10X, and improves HTTP GET request rate by ~2X; and together with other optimizations (patch 888310 & 888575) targeting to fix similar problems, strong synergistic effects are seen to bring total ~22X improvement to HTTP HEAD and ~27X to HTTP GET request rates. Change-Id: I01fd4f3e395c8846280f44e17a56935fc6210444 --- swift/container/backend.py | 28 +++++++++- test/unit/container/test_backend.py | 85 +++++++++++++++++++++++++++++ 2 files changed, 110 insertions(+), 3 deletions(-) diff --git a/swift/container/backend.py b/swift/container/backend.py index 6cbee63e84..791d971c91 100644 --- a/swift/container/backend.py +++ b/swift/container/backend.py @@ -446,7 +446,7 @@ class ContainerBroker(DatabaseBroker): return UNSHARDED if self.db_epoch != self.get_own_shard_range().epoch: return UNSHARDED - if not self.get_shard_ranges(): + if not self.has_other_shard_ranges(): return COLLAPSED return SHARDED @@ -457,7 +457,7 @@ class ContainerBroker(DatabaseBroker): """ own_shard_range = self.get_own_shard_range() if own_shard_range.state in ShardRange.CLEAVING_STATES: - return bool(self.get_shard_ranges()) + return self.has_other_shard_ranges() return False def sharding_required(self): @@ -867,7 +867,7 @@ class ContainerBroker(DatabaseBroker): Timestamp(info['put_timestamp'])) def is_empty_enough_to_reclaim(self): - if self.is_root_container() and (self.get_shard_ranges() or + if self.is_root_container() and (self.has_other_shard_ranges() or self.get_db_state() == SHARDING): return False return self.empty() @@ -1956,6 +1956,28 @@ class ContainerBroker(DatabaseBroker): return {'bytes_used': bytes_used, 'object_count': object_count} + def has_other_shard_ranges(self): + """ + This function tells if there is any shard range other than the + broker's own shard range, that is not marked as deleted. + + :return: A boolean value as described above. + """ + with self.get() as conn: + sql = ''' + SELECT 1 FROM %s + WHERE deleted = 0 AND name != ? LIMIT 1 + ''' % (SHARD_RANGE_TABLE) + try: + data = conn.execute(sql, [self.path]) + data.row_factory = None + return True if [row for row in data] else False + except sqlite3.OperationalError as err: + if ('no such table: %s' % SHARD_RANGE_TABLE) in str(err): + return False + else: + raise + def get_all_shard_range_data(self): """ Returns a list of all shard range data, including own shard range and diff --git a/test/unit/container/test_backend.py b/test/unit/container/test_backend.py index f0688cded8..7bc01312d4 100644 --- a/test/unit/container/test_backend.py +++ b/test/unit/container/test_backend.py @@ -70,6 +70,33 @@ class TestContainerBroker(test_db.TestDbBase): self.assertEqual([dict(sr) for sr in expected], [dict(sr) for sr in actual]) + def _delete_table(self, broker, table): + """ + Delete the table ``table`` from broker database. + + :param broker: an object instance of ContainerBroker. + :param table: the name of the table to delete. + """ + with broker.get() as conn: + try: + conn.execute(""" + DROP TABLE %s + """ % table) + except sqlite3.OperationalError as err: + if ('no such table: %s' % table) in str(err): + return + else: + raise + + def _add_shard_range_table(self, broker): + """ + Add the 'shard_range' table into the broker database. + + :param broker: an object instance of ContainerBroker. + """ + with broker.get() as conn: + broker.create_shard_range_table(conn) + def test_creation(self): # Test ContainerBroker.__init__ db_file = self.get_db_path() @@ -1699,6 +1726,64 @@ class TestContainerBroker(test_db.TestDbBase): container='c') self._test_put_object_multiple_encoded_timestamps(broker) + @with_tempdir + def test_has_other_shard_ranges(self, tempdir): + acct = 'account' + cont = 'container' + hsh = hash_path(acct, cont) + epoch = Timestamp.now() + db_file = "%s_%s.db" % (hsh, epoch.normal) + db_path = os.path.join(tempdir, db_file) + ts = Timestamp.now() + broker = ContainerBroker(db_path, account=acct, + container=cont, force_db_file=True) + # Create the test container database and all the tables. + broker.initialize(ts.internal, 0) + + # Test the case which the 'shard_range' table doesn't exist yet. + self._delete_table(broker, 'shard_range') + self.assertFalse(broker.has_other_shard_ranges()) + + # Add the 'shard_range' table back to the database, but it doesn't + # have any shard range row in it yet. + self._add_shard_range_table(broker) + shard_ranges = broker.get_shard_ranges( + include_deleted=True, states=None, include_own=True) + self.assertEqual(shard_ranges, []) + self.assertFalse(broker.has_other_shard_ranges()) + + # Insert its 'own_shard_range' into this test database. + own_shard_range = broker.get_own_shard_range() + own_shard_range.update_state(ShardRange.SHARDING) + own_shard_range.epoch = epoch + broker.merge_shard_ranges([own_shard_range]) + self.assertTrue(broker.get_shard_ranges(include_own=True)) + self.assertFalse(broker.has_other_shard_ranges()) + + # Insert a child shard range into this test database. + first_child_sr = ShardRange( + '.shards_%s/%s_1' % (acct, cont), Timestamp.now()) + broker.merge_shard_ranges([first_child_sr]) + self.assertTrue(broker.has_other_shard_ranges()) + + # Mark the first child shard range as deleted. + first_child_sr.deleted = 1 + first_child_sr.timestamp = Timestamp.now() + broker.merge_shard_ranges([first_child_sr]) + self.assertFalse(broker.has_other_shard_ranges()) + + # Insert second child shard range into this test database. + second_child_sr = ShardRange( + '.shards_%s/%s_2' % (acct, cont), Timestamp.now()) + broker.merge_shard_ranges([second_child_sr]) + self.assertTrue(broker.has_other_shard_ranges()) + + # Mark the 'own_shard_range' as deleted. + own_shard_range.deleted = 1 + own_shard_range.timestamp = Timestamp.now() + broker.merge_shard_ranges([own_shard_range]) + self.assertTrue(broker.has_other_shard_ranges()) + @with_tempdir def test_get_db_state(self, tempdir): acct = 'account'