diff --git a/etc/object-server.conf-sample b/etc/object-server.conf-sample index 3e4dd8896f..db4d226360 100644 --- a/etc/object-server.conf-sample +++ b/etc/object-server.conf-sample @@ -89,9 +89,13 @@ bind_port = 6200 # # Non-durable data files may also get reclaimed if they are older than # reclaim_age, but not if the time they were written to disk (i.e. mtime) is -# less than commit_window seconds ago. A commit_window greater than zero is -# strongly recommended to avoid unintended reclamation of data files that were -# about to become durable; commit_window should be much less than reclaim_age. +# less than commit_window seconds ago. The commit_window also prevents the +# reconstructor removing recently written non-durable data files from a handoff +# node after reverting them to a primary. This gives the object-server a window +# in which to finish a concurrent PUT on a handoff and mark the data durable. A +# commit_window greater than zero is strongly recommended to avoid unintended +# removal of data files that were about to become durable; commit_window should +# be much less than reclaim_age. # commit_window = 60.0 # # You can set scheduling priority of processes. Niceness values range from -20 @@ -433,12 +437,6 @@ use = egg:swift#recon # to be rebuilt). The minimum is only exceeded if request_node_count is # greater, and only for the purposes of quarantining. # request_node_count = 2 * replicas -# -# Sets a delay, in seconds, before the reconstructor removes non-durable data -# files from a handoff node after reverting them to a primary. This gives the -# object-server a window in which to finish a concurrent PUT on a handoff and -# mark the data durable. -# nondurable_purge_delay = 60.0 [object-updater] # You can override the default log routing for this app here (don't use set!): diff --git a/swift/obj/reconstructor.py b/swift/obj/reconstructor.py index 7a2def7c82..edd9aec15b 100644 --- a/swift/obj/reconstructor.py +++ b/swift/obj/reconstructor.py @@ -34,7 +34,7 @@ from swift.common.utils import ( GreenAsyncPile, Timestamp, remove_file, load_recon_cache, parse_override_options, distribute_evenly, PrefixLoggerAdapter, remove_directory, config_request_node_count_value, - non_negative_int, non_negative_float) + non_negative_int) from swift.common.header_key_dict import HeaderKeyDict from swift.common.bufferedhttp import http_connect from swift.common.daemon import Daemon @@ -241,8 +241,6 @@ class ObjectReconstructor(Daemon): conf.get('reclaim_age', DEFAULT_RECLAIM_AGE))) self.request_node_count = config_request_node_count_value( conf.get('request_node_count', '2 * replicas')) - self.nondurable_purge_delay = non_negative_float( - conf.get('nondurable_purge_delay', '60')) # When upgrading from liberasurecode<=1.5.0, you may want to continue # writing legacy CRCs until all nodes are upgraded and capabale of @@ -986,7 +984,7 @@ class ObjectReconstructor(Daemon): # know the data file is durable so that legacy durable data # files get purged nondurable_purge_delay = (0 if timestamps.get('durable') - else self.nondurable_purge_delay) + else df_mgr.commit_window) df.purge(timestamps['ts_data'], frag_index, nondurable_purge_delay) except DiskFileError: diff --git a/test/probe/test_reconstructor_revert.py b/test/probe/test_reconstructor_revert.py index a58778606a..3065e95e98 100644 --- a/test/probe/test_reconstructor_revert.py +++ b/test/probe/test_reconstructor_revert.py @@ -396,12 +396,12 @@ class TestReconstructorRevert(ECProbeTest): # fix the 507'ing primary self.revive_drive(pdevs[0]) - # fire up reconstructor on handoff node only; nondurable_purge_delay is + # fire up reconstructor on handoff node only; commit_window is # set to zero to ensure the nondurable handoff frag is purged hnode_id = (hnodes[0]['port'] % 100) // 10 self.run_custom_daemon( ObjectReconstructor, 'object-reconstructor', hnode_id, - {'nondurable_purge_delay': '0'}) + {'commit_window': '0'}) # primary now has only the newer non-durable frag self.assert_direct_get_fails(onodes[0], opart, 404) diff --git a/test/unit/obj/test_reconstructor.py b/test/unit/obj/test_reconstructor.py index 81ae808152..7ef1460fb9 100644 --- a/test/unit/obj/test_reconstructor.py +++ b/test/unit/obj/test_reconstructor.py @@ -1184,7 +1184,8 @@ class TestGlobalSetupObjectReconstructor(unittest.TestCase): for dirpath, files in visit_obj_dirs(context): n_files_after += len(files) for filename in files: - self.assertFalse(filename.endswith(data_file_tail)) + self.assertFalse( + filename.endswith(data_file_tail), filename) else: self.assertFalse(context.get('include_non_durable')) @@ -1192,8 +1193,8 @@ class TestGlobalSetupObjectReconstructor(unittest.TestCase): self.assertGreater(n_files, n_files_after) def test_delete_reverted_nondurable(self): - # verify reconstructor only deletes reverted nondurable fragments after - # nondurable_purge_delay + # verify reconstructor only deletes reverted nondurable fragments older + # commit_window shutil.rmtree(self.ec_obj_path) ips = utils.whataremyips(self.reconstructor.bind_ip) local_devs = [dev for dev in self.ec_obj_ring.devs @@ -1220,7 +1221,6 @@ class TestGlobalSetupObjectReconstructor(unittest.TestCase): self.assertTrue(os.path.exists(datafile_recent)) self.assertTrue(os.path.exists(datafile_older)) self.assertTrue(os.path.exists(datafile_durable)) - ssync_calls = [] with mock.patch('swift.obj.reconstructor.ssync_sender', self._make_fake_ssync(ssync_calls)): @@ -1229,19 +1229,19 @@ class TestGlobalSetupObjectReconstructor(unittest.TestCase): for context in ssync_calls: self.assertEqual(REVERT, context['job']['job_type']) self.assertTrue(True, context.get('include_non_durable')) - # neither nondurable should be removed yet with default purge delay + # neither nondurable should be removed yet with default commit_window # because their mtimes are too recent self.assertTrue(os.path.exists(datafile_recent)) self.assertTrue(os.path.exists(datafile_older)) # but durable is purged - self.assertFalse(os.path.exists(datafile_durable)) + self.assertFalse(os.path.exists(datafile_durable), datafile_durable) ssync_calls = [] with mock.patch('swift.obj.reconstructor.ssync_sender', self._make_fake_ssync(ssync_calls)): self.reconstructor.handoffs_only = True - # turn down the purge delay... - self.reconstructor.nondurable_purge_delay = 0 + # turn down the commit_window... + df_older.manager.commit_window = 0 self.reconstructor.reconstruct() for context in ssync_calls: self.assertEqual(REVERT, context['job']['job_type']) @@ -5535,24 +5535,6 @@ class TestReconstructFragmentArchive(BaseTestObjectReconstructor): object_reconstructor.ObjectReconstructor( {'quarantine_threshold': bad}) - def test_nondurable_purge_delay_conf(self): - reconstructor = object_reconstructor.ObjectReconstructor({}) - self.assertEqual(60, reconstructor.nondurable_purge_delay) - - reconstructor = object_reconstructor.ObjectReconstructor( - {'nondurable_purge_delay': '0'}) - self.assertEqual(0, reconstructor.nondurable_purge_delay) - - reconstructor = object_reconstructor.ObjectReconstructor( - {'nondurable_purge_delay': '3.2'}) - self.assertEqual(3.2, reconstructor.nondurable_purge_delay) - - for bad in ('-1', -1, 'auto', 'bad'): - with annotate_failure(bad): - with self.assertRaises(ValueError): - object_reconstructor.ObjectReconstructor( - {'nondurable_purge_delay': bad}) - def test_quarantine_age_conf(self): # defaults to DEFAULT_RECLAIM_AGE reconstructor = object_reconstructor.ObjectReconstructor({})