From f7d6d5806edafc79f9ee6ad1abaddfbd462787c6 Mon Sep 17 00:00:00 2001 From: Alistair Coles Date: Wed, 12 Jul 2023 18:30:55 +0100 Subject: [PATCH] container-server: do [end_]marker filtering in SQL query Each time the sharder audits a shard container it makes a GET request to the root container for shard ranges, typically specifying [end_]marker constraints to select a single shard range. When a container DB has a large number of shard ranges, the execution time of the get_shard_ranges() method was dominated by the time taken to instantiate ShardRange objects for every shard range before then selecting only those that satisfied the [end_]marker constraints. The get_shard_ranges() call can be much faster if the [end_]marker filtering is performed by the SQL query *before* instantiating ShardRange objects for the selected shard ranges. On the author's machine, using a DB with ~12000 shard ranges and selecting a single shard range with marker and end_marker constraints, this patch reduces the get_shard_ranges() execution time from approximately 140ms to approximately 6ms. A similar change was previously made to optimise calls with the 'includes' constraint [1]. [1] Related-Change: Ie40b62432dd9ccfd314861655f7972a2123938fe Change-Id: Ic6e436f9e8fdff6a525466757f57f9be3a81c5b6 --- swift/container/backend.py | 42 ++++++++++---- test/unit/container/test_backend.py | 89 ++++++++++++++++++++++++++++- 2 files changed, 118 insertions(+), 13 deletions(-) diff --git a/swift/container/backend.py b/swift/container/backend.py index 6cbee63e84..15844eae4a 100644 --- a/swift/container/backend.py +++ b/swift/container/backend.py @@ -32,7 +32,7 @@ from swift.common.utils import Timestamp, encode_timestamps, \ decode_timestamps, extract_swift_bytes, storage_directory, hash_path, \ ShardRange, renamer, MD5_OF_EMPTY_STRING, mkdirs, get_db_files, \ parse_db_filename, make_db_file_path, split_path, RESERVED_BYTE, \ - filter_namespaces, ShardRangeList + ShardRangeList, Namespace from swift.common.db import DatabaseBroker, utf8encode, BROKER_TIMEOUT, \ zero_like, DatabaseAlreadyExists, SQLITE_ARG_LIMIT @@ -1685,7 +1685,8 @@ class ContainerBroker(DatabaseBroker): if ('no such table: %s' % SHARD_RANGE_TABLE) not in str(err): raise - def _get_shard_range_rows(self, connection=None, includes=None, + def _get_shard_range_rows(self, connection=None, marker=None, + end_marker=None, includes=None, include_deleted=False, states=None, include_own=False, exclude_others=False): """ @@ -1696,8 +1697,15 @@ class ContainerBroker(DatabaseBroker): ``exclude_others=True``. :param connection: db connection + :param marker: restricts the returned list to rows whose namespace + includes or is greater than the marker value. ``marker`` is ignored + if ``includes`` is specified. + :param end_marker: restricts the returned list to rows whose namespace + includes or is less than the end_marker value. ``end_marker`` is + ignored if ``includes`` is specified. :param includes: restricts the returned list to the shard range that - includes the given value + includes the given value; if ``includes`` is specified then + ``marker`` and ``end_marker`` are ignored :param include_deleted: include rows marked as deleted :param states: include only rows matching the given state(s); can be an int or a list of ints. @@ -1741,7 +1749,14 @@ class ContainerBroker(DatabaseBroker): if exclude_others: conditions.append('name = ?') params.append(self.path) - if includes is not None: + if includes is None: + if end_marker: + conditions.append('lower < ?') + params.append(end_marker) + if marker: + conditions.append("(upper = '' OR upper > ?)") + params.append(marker) + else: conditions.extend(('lower < ?', "(upper = '' OR upper >= ?)")) params.extend((includes, includes)) if conditions: @@ -1826,8 +1841,10 @@ class ContainerBroker(DatabaseBroker): :param marker: restricts the returned list to shard ranges whose namespace includes or is greater than the marker value. + ``marker`` is ignored if ``includes`` is specified. :param end_marker: restricts the returned list to shard ranges whose namespace includes or is less than the end_marker value. + ``end_marker`` is ignored if ``includes`` is specified. :param includes: restricts the returned list to the shard range that includes the given value; if ``includes`` is specified then ``marker`` and ``end_marker`` are ignored. @@ -1847,9 +1864,14 @@ class ContainerBroker(DatabaseBroker): :param fill_gaps: if True, insert a modified copy of own shard range to fill any gap between the end of any found shard ranges and the upper bound of own shard range. Gaps enclosed within the found - shard ranges are not filled. + shard ranges are not filled. ``fill_gaps`` is ignored if + ``includes`` is specified. :return: a list of instances of :class:`swift.common.utils.ShardRange` """ + if includes is None and (marker == Namespace.MAX + or end_marker == Namespace.MIN): + return [] + if reverse: marker, end_marker = end_marker, marker if marker and end_marker and marker >= end_marker: @@ -1858,17 +1880,13 @@ class ContainerBroker(DatabaseBroker): shard_ranges = [ ShardRange(*row) for row in self._get_shard_range_rows( - includes=includes, include_deleted=include_deleted, - states=states, include_own=include_own, - exclude_others=exclude_others)] - + marker=marker, end_marker=end_marker, includes=includes, + include_deleted=include_deleted, states=states, + include_own=include_own, exclude_others=exclude_others)] shard_ranges.sort(key=ShardRange.sort_key) if includes: return shard_ranges[:1] if shard_ranges else [] - shard_ranges = filter_namespaces( - shard_ranges, includes, marker, end_marker) - if fill_gaps: own_shard_range = self.get_own_shard_range() if shard_ranges: diff --git a/test/unit/container/test_backend.py b/test/unit/container/test_backend.py index f0688cded8..d10734c61b 100644 --- a/test/unit/container/test_backend.py +++ b/test/unit/container/test_backend.py @@ -41,7 +41,7 @@ from swift.common.db import DatabaseAlreadyExists, GreenDBConnection, \ TombstoneReclaimer, GreenDBCursor from swift.common.request_helpers import get_reserved_name from swift.common.utils import Timestamp, encode_timestamps, hash_path, \ - ShardRange, make_db_file_path, md5, ShardRangeList + ShardRange, make_db_file_path, md5, ShardRangeList, Namespace from swift.common.storage_policy import POLICIES import mock @@ -4211,6 +4211,76 @@ class TestContainerBroker(test_db.TestDbBase): actual = broker.get_shard_ranges(marker='e', end_marker='e') self.assertFalse([dict(sr) for sr in actual]) + # includes overrides include_own + actual = broker.get_shard_ranges(includes='b', include_own=True) + self.assertEqual([dict(shard_ranges[0])], [dict(sr) for sr in actual]) + # ... unless they coincide + actual = broker.get_shard_ranges(includes='t', include_own=True) + self.assertEqual([dict(own_shard_range)], [dict(sr) for sr in actual]) + + # exclude_others overrides includes + actual = broker.get_shard_ranges(includes='b', exclude_others=True) + self.assertFalse(actual) + + # include_deleted overrides includes + actual = broker.get_shard_ranges(includes='i', include_deleted=True) + self.assertEqual([dict(shard_ranges[-1])], [dict(sr) for sr in actual]) + actual = broker.get_shard_ranges(includes='i', include_deleted=False) + self.assertFalse(actual) + + # includes overrides marker/end_marker + actual = broker.get_shard_ranges(includes='b', marker='e', + end_marker='') + self.assertEqual([dict(shard_ranges[0])], [dict(sr) for sr in actual]) + + actual = broker.get_shard_ranges(includes='b', marker=Namespace.MAX) + self.assertEqual([dict(shard_ranges[0])], [dict(sr) for sr in actual]) + + # end_marker is Namespace.MAX + actual = broker.get_shard_ranges(marker='e', end_marker='') + self.assertEqual([dict(sr) for sr in undeleted[2:]], + [dict(sr) for sr in actual]) + + actual = broker.get_shard_ranges(marker='e', end_marker='', + reverse=True) + self.assertEqual([dict(sr) for sr in reversed(undeleted[:3])], + [dict(sr) for sr in actual]) + + # marker is Namespace.MIN + actual = broker.get_shard_ranges(marker='', end_marker='d') + self.assertEqual([dict(sr) for sr in shard_ranges[:2]], + [dict(sr) for sr in actual]) + + actual = broker.get_shard_ranges(marker='', end_marker='d', + reverse=True, include_deleted=True) + self.assertEqual([dict(sr) for sr in reversed(shard_ranges[2:])], + [dict(sr) for sr in actual]) + + # marker, end_marker span entire namespace + actual = broker.get_shard_ranges(marker='', end_marker='') + self.assertEqual([dict(sr) for sr in undeleted], + [dict(sr) for sr in actual]) + + # marker, end_marker override include_own + actual = broker.get_shard_ranges(marker='', end_marker='k', + include_own=True) + self.assertEqual([dict(sr) for sr in undeleted], + [dict(sr) for sr in actual]) + actual = broker.get_shard_ranges(marker='u', end_marker='', + include_own=True) + self.assertFalse(actual) + # ...unless they coincide + actual = broker.get_shard_ranges(marker='t', end_marker='', + include_own=True) + self.assertEqual([dict(own_shard_range)], [dict(sr) for sr in actual]) + + # null namespace cases + actual = broker.get_shard_ranges(end_marker=Namespace.MIN) + self.assertFalse(actual) + + actual = broker.get_shard_ranges(marker=Namespace.MAX) + self.assertFalse(actual) + orig_execute = GreenDBConnection.execute mock_call_args = [] @@ -4227,6 +4297,19 @@ class TestContainerBroker(test_db.TestDbBase): # verify that includes keyword plumbs through to an SQL condition self.assertIn("WHERE deleted=0 AND name != ? AND lower < ? AND " "(upper = '' OR upper >= ?)", mock_call_args[0][1]) + self.assertEqual(['a/c', 'f', 'f'], mock_call_args[0][2]) + + mock_call_args = [] + with mock.patch('swift.common.db.GreenDBConnection.execute', + mock_execute): + actual = broker.get_shard_ranges(marker='c', end_marker='d') + self.assertEqual([dict(sr) for sr in shard_ranges[1:2]], + [dict(sr) for sr in actual]) + self.assertEqual(1, len(mock_call_args)) + # verify that marker & end_marker plumb through to an SQL condition + self.assertIn("WHERE deleted=0 AND name != ? AND lower < ? AND " + "(upper = '' OR upper > ?)", mock_call_args[0][1]) + self.assertEqual(['a/c', 'd', 'c'], mock_call_args[0][2]) actual = broker.get_shard_ranges(includes='i') self.assertFalse(actual) @@ -4253,6 +4336,10 @@ class TestContainerBroker(test_db.TestDbBase): filler.upper = 'k' self.assertEqual([dict(sr) for sr in undeleted + [filler]], [dict(sr) for sr in actual]) + # includes overrides fill_gaps + actual = broker.get_shard_ranges(includes='b', fill_gaps=True) + self.assertEqual([dict(shard_ranges[0])], [dict(sr) for sr in actual]) + # no filler needed... actual = broker.get_shard_ranges(fill_gaps=True, end_marker='h') self.assertEqual([dict(sr) for sr in undeleted],