From 6e8d82c97e28026e03f71bf38ccbcd2f1c606afa Mon Sep 17 00:00:00 2001 From: Alistair Coles Date: Fri, 14 Jul 2023 14:24:33 +0100 Subject: [PATCH] container-server: use LIMIT in get_own_shard_range() query The get_own_shard_range() method is called during every container GET or HEAD request (as well as at other times). The method delegates to get_shard_ranges() which queries the DB for shard ranges. Although the query has conditions to only select a single shard range name, and the method will return only the first matching shard range, the query had no LIMIT. Adding a LIMIT of 1 significantly reduces execution time when the DB has many shard range rows. On the author's machine, using a DB with ~12000 shard ranges this patch reduces the get_own_shard_range() execution time by a factor of approximately 100. Change-Id: I43f79de3f0603b9fab8bf6f7b4c3b1892a8919b3 --- swift/container/backend.py | 29 ++++++++---- test/unit/container/test_backend.py | 68 +++++++++++++++++++++++++++++ 2 files changed, 88 insertions(+), 9 deletions(-) diff --git a/swift/container/backend.py b/swift/container/backend.py index 5dec536ac2..9d86fbee2b 100644 --- a/swift/container/backend.py +++ b/swift/container/backend.py @@ -1688,7 +1688,8 @@ class ContainerBroker(DatabaseBroker): def _get_shard_range_rows(self, connection=None, marker=None, end_marker=None, includes=None, include_deleted=False, states=None, - include_own=False, exclude_others=False): + include_own=False, exclude_others=False, + limit=None): """ Returns a list of shard range rows. @@ -1717,6 +1718,14 @@ class ContainerBroker(DatabaseBroker): names do not match the broker's path are included in the returned list. If True, those rows are not included, otherwise they are included. Default is False. + :param limit: restricts the returned list to the given number of rows. + Should be a whole number; negative values will be ignored. + The ``limit`` parameter is useful to optimise a search + when the maximum number of expected matching rows is known, and + particularly when that maximum number is much less than the total + number of rows in the DB. However, the DB search is not ordered and + the subset of rows returned when ``limit`` is less than all + possible matching rows is therefore unpredictable. :return: a list of tuples. """ @@ -1761,6 +1770,8 @@ class ContainerBroker(DatabaseBroker): params.extend((includes, includes)) if conditions: condition = ' WHERE ' + ' AND '.join(conditions) + if limit is not None and limit >= 0: + condition += ' LIMIT %d' % limit columns = SHARD_RANGE_KEYS[:-2] for column in SHARD_RANGE_KEYS[-2:]: if column in defaults: @@ -1834,8 +1845,8 @@ class ContainerBroker(DatabaseBroker): def get_shard_ranges(self, marker=None, end_marker=None, includes=None, reverse=False, include_deleted=False, states=None, - include_own=False, - exclude_others=False, fill_gaps=False): + include_own=False, exclude_others=False, + fill_gaps=False): """ Returns a list of persisted shard ranges. @@ -1923,13 +1934,13 @@ class ContainerBroker(DatabaseBroker): default shard range is returned. :return: an instance of :class:`~swift.common.utils.ShardRange` """ - shard_ranges = self.get_shard_ranges(include_own=True, - include_deleted=True, - exclude_others=True) - if shard_ranges: - own_shard_range = shard_ranges[0] + rows = self._get_shard_range_rows( + include_own=True, include_deleted=True, exclude_others=True, + limit=1) + if rows: + own_shard_range = ShardRange(*rows[0]) elif no_default: - return None + own_shard_range = None else: own_shard_range = ShardRange( self.path, Timestamp.now(), ShardRange.MIN, ShardRange.MAX, diff --git a/test/unit/container/test_backend.py b/test/unit/container/test_backend.py index 3103aa656c..75d2c3a57b 100644 --- a/test/unit/container/test_backend.py +++ b/test/unit/container/test_backend.py @@ -4622,6 +4622,58 @@ class TestContainerBroker(test_db.TestDbBase): includes='l') self.assertEqual([shard_ranges[2]], actual) + @with_tempdir + def test_get_shard_range_rows_with_limit(self, tempdir): + db_path = os.path.join(tempdir, 'container.db') + broker = ContainerBroker(db_path, account='a', container='c') + broker.initialize(next(self.ts).internal, 0) + shard_ranges = [ + ShardRange('a/c', next(self.ts), 'a', 'c'), + ShardRange('.a/c1', next(self.ts), 'c', 'd'), + ShardRange('.a/c2', next(self.ts), 'd', 'f'), + ShardRange('.a/c3', next(self.ts), 'd', 'f', deleted=1), + ] + broker.merge_shard_ranges(shard_ranges) + actual = broker._get_shard_range_rows(include_deleted=True, + include_own=True) + self.assertEqual(4, len(actual)) + # the order of rows is not predictable, but they should be unique + self.assertEqual(4, len(set(actual))) + actual = broker._get_shard_range_rows(include_deleted=True) + self.assertEqual(3, len(actual)) + self.assertEqual(3, len(set(actual))) + # negative -> unlimited + actual = broker._get_shard_range_rows(include_deleted=True, limit=-1) + self.assertEqual(3, len(actual)) + self.assertEqual(3, len(set(actual))) + # zero is applied + actual = broker._get_shard_range_rows(include_deleted=True, limit=0) + self.assertFalse(actual) + actual = broker._get_shard_range_rows(include_deleted=True, limit=1) + self.assertEqual(1, len(actual)) + self.assertEqual(1, len(set(actual))) + actual = broker._get_shard_range_rows(include_deleted=True, limit=2) + self.assertEqual(2, len(actual)) + self.assertEqual(2, len(set(actual))) + actual = broker._get_shard_range_rows(include_deleted=True, limit=3) + self.assertEqual(3, len(actual)) + self.assertEqual(3, len(set(actual))) + actual = broker._get_shard_range_rows(include_deleted=True, limit=4) + self.assertEqual(3, len(actual)) + self.assertEqual(3, len(set(actual))) + actual = broker._get_shard_range_rows(include_deleted=True, + include_own=True, + exclude_others=True, + limit=1) + self.assertEqual(1, len(actual)) + self.assertEqual(shard_ranges[0], ShardRange(*actual[0])) + actual = broker._get_shard_range_rows(include_deleted=True, + include_own=True, + exclude_others=True, + limit=4) + self.assertEqual(1, len(actual)) + self.assertEqual(shard_ranges[0], ShardRange(*actual[0])) + @with_tempdir def test_get_own_shard_range(self, tempdir): db_path = os.path.join(tempdir, 'container.db') @@ -4688,6 +4740,22 @@ class TestContainerBroker(test_db.TestDbBase): actual = broker.get_own_shard_range() self.assertEqual(dict(own_sr), dict(actual)) + orig_execute = GreenDBConnection.execute + mock_call_args = [] + + def mock_execute(*args, **kwargs): + mock_call_args.append(args) + return orig_execute(*args, **kwargs) + + with mock.patch('swift.common.db.GreenDBConnection.execute', + mock_execute): + actual = broker.get_own_shard_range() + self.assertEqual(dict(own_sr), dict(actual)) + self.assertEqual(1, len(mock_call_args)) + # verify that SQL is optimised with LIMIT + self.assertIn("WHERE name = ? LIMIT 1", mock_call_args[0][1]) + self.assertEqual(['.shards_a/shard_c'], mock_call_args[0][2]) + @with_tempdir def test_enable_sharding(self, tempdir): db_path = os.path.join(tempdir, 'container.db')