reconstructor: retire nondurable_purge_delay option

The nondurable_purge_delay option was introduced in [1] to prevent the reconstructor removing non-durable data files on handoffs that were about to be made durable. The DiskFileManager commit_window option has since been introduced [2] which specifies a similar time window during which non-durable data files should not be removed. The commit_window option can be re-used by the reconstructor, making the nondurable_purge_delay option redundant. The nondurable_purge_delay option has not been available in any tagged release and is therefore removed with no backwards compatibility. [1] Related-Change: I0d519ebaaade35249fb7b17bd5f419ffdaa616c0 [2] Related-Change: I5f3318a44af64b77a63713e6ff8d0fd3b6144f13 Change-Id: I1589a7517b7375fcc21472e2d514f26986bf5079
2021-07-16 12:27:25 +01:00 · 2021-07-16 12:27:25 +01:00 · 2696a79f09
commit 2696a79f09
parent bbaed18e9b
4 changed files with 19 additions and 41 deletions
--- a/etc/object-server.conf-sample
+++ b/etc/object-server.conf-sample
@ -89,9 +89,13 @@ bind_port = 6200
 #
 # Non-durable data files may also get reclaimed if they are older than
 # reclaim_age, but not if the time they were written to disk (i.e. mtime) is
-# less than commit_window seconds ago. A commit_window greater than zero is
-# strongly recommended to avoid unintended reclamation of data files that were
-# about to become durable; commit_window should be much less than reclaim_age.
+# less than commit_window seconds ago. The commit_window also prevents the
+# reconstructor removing recently written non-durable data files from a handoff
+# node after reverting them to a primary. This gives the object-server a window
+# in which to finish a concurrent PUT on a handoff and mark the data durable. A
+# commit_window greater than zero is strongly recommended to avoid unintended
+# removal of data files that were about to become durable; commit_window should
+# be much less than reclaim_age.
 # commit_window = 60.0
 #
 # You can set scheduling priority of processes. Niceness values range from -20
@ -433,12 +437,6 @@ use = egg:swift#recon
 # to be rebuilt). The minimum is only exceeded if request_node_count is
 # greater, and only for the purposes of quarantining.
 # request_node_count = 2 * replicas
-#
-# Sets a delay, in seconds, before the reconstructor removes non-durable data
-# files from a handoff node after reverting them to a primary. This gives the
-# object-server a window in which to finish a concurrent PUT on a handoff and
-# mark the data durable.
-# nondurable_purge_delay = 60.0

 [object-updater]
 # You can override the default log routing for this app here (don't use set!):
--- a/swift/obj/reconstructor.py
+++ b/swift/obj/reconstructor.py
@ -34,7 +34,7 @@ from swift.common.utils import (
    GreenAsyncPile, Timestamp, remove_file,
    load_recon_cache, parse_override_options, distribute_evenly,
    PrefixLoggerAdapter, remove_directory, config_request_node_count_value,
-    non_negative_int, non_negative_float)
+    non_negative_int)
 from swift.common.header_key_dict import HeaderKeyDict
 from swift.common.bufferedhttp import http_connect
 from swift.common.daemon import Daemon
@ -241,8 +241,6 @@ class ObjectReconstructor(Daemon):
                     conf.get('reclaim_age', DEFAULT_RECLAIM_AGE)))
        self.request_node_count = config_request_node_count_value(
            conf.get('request_node_count', '2 * replicas'))
-        self.nondurable_purge_delay = non_negative_float(
-            conf.get('nondurable_purge_delay', '60'))

        # When upgrading from liberasurecode<=1.5.0, you may want to continue
        # writing legacy CRCs until all nodes are upgraded and capabale of
@ -986,7 +984,7 @@ class ObjectReconstructor(Daemon):
                # know the data file is durable so that legacy durable data
                # files get purged
                nondurable_purge_delay = (0 if timestamps.get('durable')
-                                          else self.nondurable_purge_delay)
+                                          else df_mgr.commit_window)
                df.purge(timestamps['ts_data'], frag_index,
                         nondurable_purge_delay)
            except DiskFileError:
--- a/test/probe/test_reconstructor_revert.py
+++ b/test/probe/test_reconstructor_revert.py
@ -396,12 +396,12 @@ class TestReconstructorRevert(ECProbeTest):
        # fix the 507'ing primary
        self.revive_drive(pdevs[0])

-        # fire up reconstructor on handoff node only; nondurable_purge_delay is
+        # fire up reconstructor on handoff node only; commit_window is
        # set to zero to ensure the nondurable handoff frag is purged
        hnode_id = (hnodes[0]['port'] % 100) // 10
        self.run_custom_daemon(
            ObjectReconstructor, 'object-reconstructor', hnode_id,
-            {'nondurable_purge_delay': '0'})
+            {'commit_window': '0'})

        # primary now has only the newer non-durable frag
        self.assert_direct_get_fails(onodes[0], opart, 404)
--- a/test/unit/obj/test_reconstructor.py
+++ b/test/unit/obj/test_reconstructor.py
@ -1184,7 +1184,8 @@ class TestGlobalSetupObjectReconstructor(unittest.TestCase):
                for dirpath, files in visit_obj_dirs(context):
                    n_files_after += len(files)
                    for filename in files:
-                        self.assertFalse(filename.endswith(data_file_tail))
+                        self.assertFalse(
+                            filename.endswith(data_file_tail), filename)
            else:
                self.assertFalse(context.get('include_non_durable'))

@ -1192,8 +1193,8 @@ class TestGlobalSetupObjectReconstructor(unittest.TestCase):
        self.assertGreater(n_files, n_files_after)

    def test_delete_reverted_nondurable(self):
-        # verify reconstructor only deletes reverted nondurable fragments after
-        # nondurable_purge_delay
+        # verify reconstructor only deletes reverted nondurable fragments older
+        # commit_window
        shutil.rmtree(self.ec_obj_path)
        ips = utils.whataremyips(self.reconstructor.bind_ip)
        local_devs = [dev for dev in self.ec_obj_ring.devs
@ -1220,7 +1221,6 @@ class TestGlobalSetupObjectReconstructor(unittest.TestCase):
        self.assertTrue(os.path.exists(datafile_recent))
        self.assertTrue(os.path.exists(datafile_older))
        self.assertTrue(os.path.exists(datafile_durable))
-
        ssync_calls = []
        with mock.patch('swift.obj.reconstructor.ssync_sender',
                        self._make_fake_ssync(ssync_calls)):
@ -1229,19 +1229,19 @@ class TestGlobalSetupObjectReconstructor(unittest.TestCase):
        for context in ssync_calls:
            self.assertEqual(REVERT, context['job']['job_type'])
            self.assertTrue(True, context.get('include_non_durable'))
-        # neither nondurable should be removed yet with default purge delay
+        # neither nondurable should be removed yet with default commit_window
        # because their mtimes are too recent
        self.assertTrue(os.path.exists(datafile_recent))
        self.assertTrue(os.path.exists(datafile_older))
        # but durable is purged
-        self.assertFalse(os.path.exists(datafile_durable))
+        self.assertFalse(os.path.exists(datafile_durable), datafile_durable)

        ssync_calls = []
        with mock.patch('swift.obj.reconstructor.ssync_sender',
                        self._make_fake_ssync(ssync_calls)):
            self.reconstructor.handoffs_only = True
-            # turn down the purge delay...
-            self.reconstructor.nondurable_purge_delay = 0
+            # turn down the commit_window...
+            df_older.manager.commit_window = 0
            self.reconstructor.reconstruct()
        for context in ssync_calls:
            self.assertEqual(REVERT, context['job']['job_type'])
@ -5535,24 +5535,6 @@ class TestReconstructFragmentArchive(BaseTestObjectReconstructor):
                    object_reconstructor.ObjectReconstructor(
                        {'quarantine_threshold': bad})

-    def test_nondurable_purge_delay_conf(self):
-        reconstructor = object_reconstructor.ObjectReconstructor({})
-        self.assertEqual(60, reconstructor.nondurable_purge_delay)
-
-        reconstructor = object_reconstructor.ObjectReconstructor(
-            {'nondurable_purge_delay': '0'})
-        self.assertEqual(0, reconstructor.nondurable_purge_delay)
-
-        reconstructor = object_reconstructor.ObjectReconstructor(
-            {'nondurable_purge_delay': '3.2'})
-        self.assertEqual(3.2, reconstructor.nondurable_purge_delay)
-
-        for bad in ('-1', -1, 'auto', 'bad'):
-            with annotate_failure(bad):
-                with self.assertRaises(ValueError):
-                    object_reconstructor.ObjectReconstructor(
-                        {'nondurable_purge_delay': bad})
-
    def test_quarantine_age_conf(self):
        # defaults to DEFAULT_RECLAIM_AGE
        reconstructor = object_reconstructor.ObjectReconstructor({})