Sharding: No stat updates before CLEAVED state

Once a shard container has been created as part of the sharder cycle it pulls the shards own_shard_range, updates the object_count and bytes_used and pushes this to the root container. The root container can use these to display the current container stats. However, it is not until a shard gets to the CLEAVED state, that it holds enough information for it's namespace, so before this the number it returns is incorrect. Further, when we find and create a shard, it starts out with the number of objects, at the time, that are expected to go into them. This is better answer then, say, nothing. So it's better for the shard to send it's current own_shard_range but don't update the stats until it can be authoritive of that answer. This patch adds a new SHARD_UPDATE_STAT_STATES that track what ShardRange states a shard needs to be in in order to be responsible, current definition is: SHARD_UPDATE_STAT_STATES = [ShardRange.CLEAVED, ShardRange.ACTIVE, ShardRange.SHARDING, ShardRange.SHARDED, ShardRange.SHRINKING, ShardRange.SHRUNK] As we don't want to update the OSR stats and the meta_timestmap, also move tombstone updates to only happen when in a SHARD_UPDATE_STAT_STATES state. Change-Id: I838dbba3c791fffa6a36ffdcf73eceeaff718373
2022-08-08 16:25:55 +10:00 · 2022-08-08 16:25:55 +10:00 · ece4b04e82
commit ece4b04e82
parent 9063ea0ac7
4 changed files with 122 additions and 48 deletions
--- a/swift/container/backend.py
+++ b/swift/container/backend.py
@ -61,6 +61,12 @@ SHARD_AUDITING_STATES = [ShardRange.CREATED, ShardRange.CLEAVED,
                         ShardRange.ACTIVE, ShardRange.SHARDING,
                         ShardRange.SHARDED, ShardRange.SHRINKING,
                         ShardRange.SHRUNK]
 # shard's may not be fully populated while in the FOUND and CREATED
 # state, so shards should only update their own shard range's object
 # stats when they are in the following states
 SHARD_UPDATE_STAT_STATES = [ShardRange.CLEAVED, ShardRange.ACTIVE,
                            ShardRange.SHARDING, ShardRange.SHARDED,
                            ShardRange.SHRINKING, ShardRange.SHRUNK]
 # attribute names in order used when transforming shard ranges from dicts to
 # tuples and vice-versa
@ -2351,10 +2357,14 @@ class ContainerBroker(DatabaseBroker):
                    # object_count
                    shard_size = object_count - progress
-            # NB shard ranges are created with a non-zero object count so that
+            # NB shard ranges are created with a non-zero object count for a
-            # the apparent container object count remains constant, and the
+            # few reasons:
-            # container is non-deletable while shards have been found but not
+            #  1. so that the apparent container object count remains
-            # yet cleaved
+            #     consistent;
            #  2. the container is non-deletable while shards have been found
            #     but not yet cleaved; and
            #  3. So we have a rough idea of size of the shards should be
            #     while cleaving.
            found_ranges.append(
                {'index': index,
                 'lower': str(last_shard_upper),
--- a/swift/container/sharder.py
+++ b/swift/container/sharder.py
@ -40,7 +40,7 @@ from swift.common.utils import get_logger, config_true_value, \
    Everything, config_auto_int_value, ShardRangeList, config_percent_value
 from swift.container.backend import ContainerBroker, \
    RECORD_TYPE_SHARD, UNSHARDED, SHARDING, SHARDED, COLLAPSED, \
-    SHARD_UPDATE_STATES, sift_shard_ranges
+    SHARD_UPDATE_STATES, sift_shard_ranges, SHARD_UPDATE_STAT_STATES
 from swift.container.replicator import ContainerReplicator
@ -2260,20 +2260,29 @@ class ContainerSharder(ContainerSharderConf, ContainerReplicator):
        if not own_shard_range:
            return
-        # do a reclaim *now* in order to get best estimate of tombstone count
+        # Don't update the osr stats including tombstones unless its CLEAVED+
-        # that is consistent with the current object_count
+        if own_shard_range.state in SHARD_UPDATE_STAT_STATES:
-        reclaimer = self._reclaim(broker)
+            # do a reclaim *now* in order to get best estimate of tombstone
-        tombstones = reclaimer.get_tombstone_count()
+            # count that is consistent with the current object_count
-        self.logger.debug('tombstones in %s = %d',
+            reclaimer = self._reclaim(broker)
-                          quote(broker.path), tombstones)
+            tombstones = reclaimer.get_tombstone_count()
-        own_shard_range.update_tombstones(tombstones)
+            self.logger.debug('tombstones in %s = %d',
-        update_own_shard_range_stats(broker, own_shard_range)
+                              quote(broker.path), tombstones)
            # shrinking candidates are found in the root DB so that's the only
            # place we need up to date tombstone stats.
            own_shard_range.update_tombstones(tombstones)
            update_own_shard_range_stats(broker, own_shard_range)
            if not own_shard_range.reported:
                broker.merge_shard_ranges(own_shard_range)
        # we can't use `state not in SHARD_UPDATE_STAT_STATES` to return
        # because there are cases we still want to update root even if the
        # stats are wrong. Such as it's a new shard or something else has
        # decided to remove the latch to update root.
        if own_shard_range.reported:
            # no change to the stats metadata
            return
        # stats metadata has been updated so persist it
        broker.merge_shard_ranges(own_shard_range)
        # now get a consistent list of own and other shard ranges
        shard_ranges = broker.get_shard_ranges(
            include_own=True,
--- a/test/probe/test_sharder.py
+++ b/test/probe/test_sharder.py
@ -2935,7 +2935,7 @@ class TestManagedContainerSharding(BaseTestContainerSharding):
        self.sharders.once(**kwargs)
    def test_manage_shard_ranges(self):
-        obj_names = self._make_object_names(7)
+        obj_names = self._make_object_names(10)
        self.put_objects(obj_names)
        client.post_container(self.url, self.admin_token, self.container_name,
@ -2954,17 +2954,38 @@ class TestManagedContainerSharding(BaseTestContainerSharding):
            'swift-manage-shard-ranges',
            self.get_db_file(self.brain.part, self.brain.nodes[0]),
            'find_and_replace', '3', '--enable', '--minimum-shard-size', '2'])
-        self.assert_container_state(self.brain.nodes[0], 'unsharded', 2)
+        self.assert_container_state(self.brain.nodes[0], 'unsharded', 3)
        # "Run container-replicator to replicate them to other nodes."
        self.replicators.once()
        # "Run container-sharder on all nodes to shard the container."
        # first pass cleaves 2 shards
        self.sharders_once(additional_args='--partitions=%s' % self.brain.part)
        self.assert_container_state(self.brain.nodes[0], 'sharding', 3)
        self.assert_container_state(self.brain.nodes[1], 'sharding', 3)
        shard_ranges = self.assert_container_state(
            self.brain.nodes[2], 'sharding', 3)
        self.assert_container_listing(obj_names)
        # make the un-cleaved shard update the root container...
        self.assertEqual([3, 3, 4], [sr.object_count for sr in shard_ranges])
        shard_part, nodes = self.get_part_and_node_numbers(shard_ranges[2])
        self.sharders_once(additional_args='--partitions=%s' % shard_part)
        shard_ranges = self.assert_container_state(
            self.brain.nodes[2], 'sharding', 3)
        # ...it does not report zero-stats despite being empty, because it has
        # not yet reached CLEAVED state
        self.assertEqual([3, 3, 4], [sr.object_count for sr in shard_ranges])
        # second pass cleaves final shard
        self.sharders_once(additional_args='--partitions=%s' % self.brain.part)
        # Everybody's settled
-        self.assert_container_state(self.brain.nodes[0], 'sharded', 2)
+        self.assert_container_state(self.brain.nodes[0], 'sharded', 3)
-        self.assert_container_state(self.brain.nodes[1], 'sharded', 2)
+        self.assert_container_state(self.brain.nodes[1], 'sharded', 3)
-        self.assert_container_state(self.brain.nodes[2], 'sharded', 2)
+        shard_ranges = self.assert_container_state(
            self.brain.nodes[2], 'sharded', 3)
        self.assertEqual([3, 3, 4], [sr.object_count for sr in shard_ranges])
        self.assert_container_listing(obj_names)
    def test_manage_shard_ranges_compact(self):
--- a/test/unit/container/test_sharder.py
+++ b/test/unit/container/test_sharder.py
@ -1588,6 +1588,7 @@ class TestSharder(BaseTestSharder):
        # verify that objects are not missed if shard ranges change between
        # cleaving batches
        broker = self._make_broker()
        # this root db has very few object rows...
        objects = [
            ('a', self.ts_encoded(), 10, 'text/plain', 'etag_a', 0, 0),
            ('b', self.ts_encoded(), 10, 'text/plain', 'etag_b', 0, 0),
@ -1603,8 +1604,10 @@ class TestSharder(BaseTestSharder):
        broker.enable_sharding(Timestamp.now())
        shard_bounds = (('', 'd'), ('d', 'x'), ('x', ''))
        # shard ranges start life with object count that is typically much
        # larger than this DB's object population...
        shard_ranges = self._make_shard_ranges(
-            shard_bounds, state=ShardRange.CREATED)
+            shard_bounds, state=ShardRange.CREATED, object_count=500000)
        expected_shard_dbs = []
        for shard_range in shard_ranges:
            db_hash = hash_path(shard_range.account, shard_range.container)
@ -1634,8 +1637,8 @@ class TestSharder(BaseTestSharder):
        updated_shard_ranges = broker.get_shard_ranges()
        self.assertEqual(3, len(updated_shard_ranges))
-        # first 2 shard ranges should have updated object count, bytes used and
+        # now they have reached CLEAVED state, the first 2 shard ranges should
-        # meta_timestamp
+        # have updated object count, bytes used and meta_timestamp
        shard_ranges[0].bytes_used = 23
        shard_ranges[0].object_count = 4
        shard_ranges[0].state = ShardRange.CLEAVED
@ -1646,6 +1649,11 @@ class TestSharder(BaseTestSharder):
        self._check_shard_range(shard_ranges[1], updated_shard_ranges[1])
        self._check_objects(objects[:4], expected_shard_dbs[0])
        self._check_objects(objects[4:7], expected_shard_dbs[1])
        # the actual object counts were set in the new shard brokers' own_sr's
        shard_broker = ContainerBroker(expected_shard_dbs[0])
        self.assertEqual(4, shard_broker.get_own_shard_range().object_count)
        shard_broker = ContainerBroker(expected_shard_dbs[1])
        self.assertEqual(2, shard_broker.get_own_shard_range().object_count)
        self.assertFalse(os.path.exists(expected_shard_dbs[2]))
        # third shard range should be unchanged - not yet cleaved
@ -5276,6 +5284,7 @@ class TestSharder(BaseTestSharder):
    def test_update_root_container_own_range(self):
        broker = self._make_broker()
        obj_names = []
        # nothing to send
        with self._mock_sharder() as sharder:
@ -5290,16 +5299,27 @@ class TestSharder(BaseTestSharder):
            broker.merge_shard_ranges([own_shard_range])
            # add an object, expect to see it reflected in the own shard range
            # that is sent
-            broker.put_object(str(own_shard_range.object_count + 1),
+            obj_names.append(uuid4())
            broker.put_object(str(obj_names[-1]),
                              next(self.ts_iter).internal, 1, '', '')
            with mock_timestamp_now() as now:
-                # force own shard range meta updates to be at fixed timestamp
+                # check if the state if in SHARD_UPDATE_STAT_STATES
-                expected_sent = [
+                if state in [ShardRange.CLEAVED, ShardRange.ACTIVE,
-                    dict(own_shard_range,
+                             ShardRange.SHARDING, ShardRange.SHARDED,
-                         meta_timestamp=now.internal,
+                             ShardRange.SHRINKING, ShardRange.SHRUNK]:
-                         object_count=own_shard_range.object_count + 1,
+                    exp_obj_count = len(obj_names)
-                         bytes_used=own_shard_range.bytes_used + 1)]
+                    expected_sent = [
                        dict(own_shard_range,
                             meta_timestamp=now.internal,
                             object_count=len(obj_names),
                             bytes_used=len(obj_names))]
                else:
                    exp_obj_count = own_shard_range.object_count
                    expected_sent = [
                        dict(own_shard_range)]
                self.check_shard_ranges_sent(broker, expected_sent)
                self.assertEqual(
                    exp_obj_count, broker.get_own_shard_range().object_count)
        # initialise tombstones
        with mock_timestamp_now(next(self.ts_iter)):
@ -5311,6 +5331,8 @@ class TestSharder(BaseTestSharder):
            with annotate_failure(state):
                check_only_own_shard_range_sent(state)
        init_obj_count = len(obj_names)
        def check_tombstones_sent(state):
            own_shard_range = broker.get_own_shard_range()
            self.assertTrue(own_shard_range.update_state(
@ -5318,19 +5340,25 @@ class TestSharder(BaseTestSharder):
            broker.merge_shard_ranges([own_shard_range])
            # delete an object, expect to see it reflected in the own shard
            # range that is sent
-            broker.delete_object(str(own_shard_range.object_count),
+            broker.delete_object(str(obj_names.pop(-1)),
                                 next(self.ts_iter).internal)
            with mock_timestamp_now() as now:
-                # force own shard range meta updates to be at fixed timestamp
+                # check if the state if in SHARD_UPDATE_STAT_STATES
-                expected_sent = [
+                if state in [ShardRange.CLEAVED, ShardRange.ACTIVE,
-                    dict(own_shard_range,
+                             ShardRange.SHARDING, ShardRange.SHARDED,
-                         meta_timestamp=now.internal,
+                             ShardRange.SHRINKING, ShardRange.SHRUNK]:
-                         object_count=own_shard_range.object_count - 1,
+                    expected_sent = [
-                         bytes_used=own_shard_range.bytes_used - 1,
+                        dict(own_shard_range,
-                         tombstones=own_shard_range.tombstones + 1)]
+                             meta_timestamp=now.internal,
                             object_count=len(obj_names),
                             bytes_used=len(obj_names),
                             tombstones=init_obj_count - len(obj_names))]
                else:
                    expected_sent = [
                        dict(own_shard_range)]
                self.check_shard_ranges_sent(broker, expected_sent)
-        for state in ShardRange.STATES:
+        for i, state in enumerate(ShardRange.STATES):
            with annotate_failure(state):
                check_tombstones_sent(state)
@ -5371,6 +5399,7 @@ class TestSharder(BaseTestSharder):
        other_shard_ranges = self._make_shard_ranges((('', 'h'), ('h', '')))
        self.assertTrue(other_shard_ranges[0].set_deleted())
        broker.merge_shard_ranges(other_shard_ranges)
        obj_names = []
        # own range missing - send nothing
        with self._mock_sharder() as sharder:
@ -5385,17 +5414,22 @@ class TestSharder(BaseTestSharder):
            broker.merge_shard_ranges([own_shard_range])
            # add an object, expect to see it reflected in the own shard range
            # that is sent
-            broker.put_object(str(own_shard_range.object_count + 1),
+            obj_names.append(uuid4())
            broker.put_object(str(obj_names[-1]),
                              next(self.ts_iter).internal, 1, '', '')
            with mock_timestamp_now() as now:
                shard_ranges = broker.get_shard_ranges(include_deleted=True)
-                expected_sent = sorted([
+                exp_own_shard_range = own_shard_range.copy()
-                    own_shard_range.copy(
+                # check if the state if in SHARD_UPDATE_STAT_STATES
-                        meta_timestamp=now.internal,
+                if state in [ShardRange.CLEAVED, ShardRange.ACTIVE,
-                        object_count=own_shard_range.object_count + 1,
+                             ShardRange.SHARDING, ShardRange.SHARDED,
-                        bytes_used=own_shard_range.bytes_used + 1,
+                             ShardRange.SHRINKING, ShardRange.SHRUNK]:
-                        tombstones=0)] +
+                    exp_own_shard_range.object_count = len(obj_names)
-                    shard_ranges,
+                    exp_own_shard_range.bytes_used = len(obj_names)
                    exp_own_shard_range.meta_timestamp = now.internal
                    exp_own_shard_range.tombstones = 0
                expected_sent = sorted(
                    [exp_own_shard_range] + shard_ranges,
                    key=lambda sr: (sr.upper, sr.state, sr.lower))
                self.check_shard_ranges_sent(
                    broker, [dict(sr) for sr in expected_sent])