reconstructor: retire nondurable_purge_delay option

The nondurable_purge_delay option was introduced in [1] to prevent the
reconstructor removing non-durable data files on handoffs that were
about to be made durable. The DiskFileManager commit_window option has
since been introduced [2] which specifies a similar time window during
which non-durable data files should not be removed. The commit_window
option can be re-used by the reconstructor, making the
nondurable_purge_delay option redundant.

The nondurable_purge_delay option has not been available in any tagged
release and is therefore removed with no backwards compatibility.

[1] Related-Change: I0d519ebaaade35249fb7b17bd5f419ffdaa616c0
[2] Related-Change: I5f3318a44af64b77a63713e6ff8d0fd3b6144f13
Change-Id: I1589a7517b7375fcc21472e2d514f26986bf5079
This commit is contained in:
Alistair Coles 2021-07-16 12:27:25 +01:00
parent bbaed18e9b
commit 2696a79f09
4 changed files with 19 additions and 41 deletions

View File

@ -89,9 +89,13 @@ bind_port = 6200
# #
# Non-durable data files may also get reclaimed if they are older than # Non-durable data files may also get reclaimed if they are older than
# reclaim_age, but not if the time they were written to disk (i.e. mtime) is # reclaim_age, but not if the time they were written to disk (i.e. mtime) is
# less than commit_window seconds ago. A commit_window greater than zero is # less than commit_window seconds ago. The commit_window also prevents the
# strongly recommended to avoid unintended reclamation of data files that were # reconstructor removing recently written non-durable data files from a handoff
# about to become durable; commit_window should be much less than reclaim_age. # node after reverting them to a primary. This gives the object-server a window
# in which to finish a concurrent PUT on a handoff and mark the data durable. A
# commit_window greater than zero is strongly recommended to avoid unintended
# removal of data files that were about to become durable; commit_window should
# be much less than reclaim_age.
# commit_window = 60.0 # commit_window = 60.0
# #
# You can set scheduling priority of processes. Niceness values range from -20 # You can set scheduling priority of processes. Niceness values range from -20
@ -433,12 +437,6 @@ use = egg:swift#recon
# to be rebuilt). The minimum is only exceeded if request_node_count is # to be rebuilt). The minimum is only exceeded if request_node_count is
# greater, and only for the purposes of quarantining. # greater, and only for the purposes of quarantining.
# request_node_count = 2 * replicas # request_node_count = 2 * replicas
#
# Sets a delay, in seconds, before the reconstructor removes non-durable data
# files from a handoff node after reverting them to a primary. This gives the
# object-server a window in which to finish a concurrent PUT on a handoff and
# mark the data durable.
# nondurable_purge_delay = 60.0
[object-updater] [object-updater]
# You can override the default log routing for this app here (don't use set!): # You can override the default log routing for this app here (don't use set!):

View File

@ -34,7 +34,7 @@ from swift.common.utils import (
GreenAsyncPile, Timestamp, remove_file, GreenAsyncPile, Timestamp, remove_file,
load_recon_cache, parse_override_options, distribute_evenly, load_recon_cache, parse_override_options, distribute_evenly,
PrefixLoggerAdapter, remove_directory, config_request_node_count_value, PrefixLoggerAdapter, remove_directory, config_request_node_count_value,
non_negative_int, non_negative_float) non_negative_int)
from swift.common.header_key_dict import HeaderKeyDict from swift.common.header_key_dict import HeaderKeyDict
from swift.common.bufferedhttp import http_connect from swift.common.bufferedhttp import http_connect
from swift.common.daemon import Daemon from swift.common.daemon import Daemon
@ -241,8 +241,6 @@ class ObjectReconstructor(Daemon):
conf.get('reclaim_age', DEFAULT_RECLAIM_AGE))) conf.get('reclaim_age', DEFAULT_RECLAIM_AGE)))
self.request_node_count = config_request_node_count_value( self.request_node_count = config_request_node_count_value(
conf.get('request_node_count', '2 * replicas')) conf.get('request_node_count', '2 * replicas'))
self.nondurable_purge_delay = non_negative_float(
conf.get('nondurable_purge_delay', '60'))
# When upgrading from liberasurecode<=1.5.0, you may want to continue # When upgrading from liberasurecode<=1.5.0, you may want to continue
# writing legacy CRCs until all nodes are upgraded and capabale of # writing legacy CRCs until all nodes are upgraded and capabale of
@ -986,7 +984,7 @@ class ObjectReconstructor(Daemon):
# know the data file is durable so that legacy durable data # know the data file is durable so that legacy durable data
# files get purged # files get purged
nondurable_purge_delay = (0 if timestamps.get('durable') nondurable_purge_delay = (0 if timestamps.get('durable')
else self.nondurable_purge_delay) else df_mgr.commit_window)
df.purge(timestamps['ts_data'], frag_index, df.purge(timestamps['ts_data'], frag_index,
nondurable_purge_delay) nondurable_purge_delay)
except DiskFileError: except DiskFileError:

View File

@ -396,12 +396,12 @@ class TestReconstructorRevert(ECProbeTest):
# fix the 507'ing primary # fix the 507'ing primary
self.revive_drive(pdevs[0]) self.revive_drive(pdevs[0])
# fire up reconstructor on handoff node only; nondurable_purge_delay is # fire up reconstructor on handoff node only; commit_window is
# set to zero to ensure the nondurable handoff frag is purged # set to zero to ensure the nondurable handoff frag is purged
hnode_id = (hnodes[0]['port'] % 100) // 10 hnode_id = (hnodes[0]['port'] % 100) // 10
self.run_custom_daemon( self.run_custom_daemon(
ObjectReconstructor, 'object-reconstructor', hnode_id, ObjectReconstructor, 'object-reconstructor', hnode_id,
{'nondurable_purge_delay': '0'}) {'commit_window': '0'})
# primary now has only the newer non-durable frag # primary now has only the newer non-durable frag
self.assert_direct_get_fails(onodes[0], opart, 404) self.assert_direct_get_fails(onodes[0], opart, 404)

View File

@ -1184,7 +1184,8 @@ class TestGlobalSetupObjectReconstructor(unittest.TestCase):
for dirpath, files in visit_obj_dirs(context): for dirpath, files in visit_obj_dirs(context):
n_files_after += len(files) n_files_after += len(files)
for filename in files: for filename in files:
self.assertFalse(filename.endswith(data_file_tail)) self.assertFalse(
filename.endswith(data_file_tail), filename)
else: else:
self.assertFalse(context.get('include_non_durable')) self.assertFalse(context.get('include_non_durable'))
@ -1192,8 +1193,8 @@ class TestGlobalSetupObjectReconstructor(unittest.TestCase):
self.assertGreater(n_files, n_files_after) self.assertGreater(n_files, n_files_after)
def test_delete_reverted_nondurable(self): def test_delete_reverted_nondurable(self):
# verify reconstructor only deletes reverted nondurable fragments after # verify reconstructor only deletes reverted nondurable fragments older
# nondurable_purge_delay # commit_window
shutil.rmtree(self.ec_obj_path) shutil.rmtree(self.ec_obj_path)
ips = utils.whataremyips(self.reconstructor.bind_ip) ips = utils.whataremyips(self.reconstructor.bind_ip)
local_devs = [dev for dev in self.ec_obj_ring.devs local_devs = [dev for dev in self.ec_obj_ring.devs
@ -1220,7 +1221,6 @@ class TestGlobalSetupObjectReconstructor(unittest.TestCase):
self.assertTrue(os.path.exists(datafile_recent)) self.assertTrue(os.path.exists(datafile_recent))
self.assertTrue(os.path.exists(datafile_older)) self.assertTrue(os.path.exists(datafile_older))
self.assertTrue(os.path.exists(datafile_durable)) self.assertTrue(os.path.exists(datafile_durable))
ssync_calls = [] ssync_calls = []
with mock.patch('swift.obj.reconstructor.ssync_sender', with mock.patch('swift.obj.reconstructor.ssync_sender',
self._make_fake_ssync(ssync_calls)): self._make_fake_ssync(ssync_calls)):
@ -1229,19 +1229,19 @@ class TestGlobalSetupObjectReconstructor(unittest.TestCase):
for context in ssync_calls: for context in ssync_calls:
self.assertEqual(REVERT, context['job']['job_type']) self.assertEqual(REVERT, context['job']['job_type'])
self.assertTrue(True, context.get('include_non_durable')) self.assertTrue(True, context.get('include_non_durable'))
# neither nondurable should be removed yet with default purge delay # neither nondurable should be removed yet with default commit_window
# because their mtimes are too recent # because their mtimes are too recent
self.assertTrue(os.path.exists(datafile_recent)) self.assertTrue(os.path.exists(datafile_recent))
self.assertTrue(os.path.exists(datafile_older)) self.assertTrue(os.path.exists(datafile_older))
# but durable is purged # but durable is purged
self.assertFalse(os.path.exists(datafile_durable)) self.assertFalse(os.path.exists(datafile_durable), datafile_durable)
ssync_calls = [] ssync_calls = []
with mock.patch('swift.obj.reconstructor.ssync_sender', with mock.patch('swift.obj.reconstructor.ssync_sender',
self._make_fake_ssync(ssync_calls)): self._make_fake_ssync(ssync_calls)):
self.reconstructor.handoffs_only = True self.reconstructor.handoffs_only = True
# turn down the purge delay... # turn down the commit_window...
self.reconstructor.nondurable_purge_delay = 0 df_older.manager.commit_window = 0
self.reconstructor.reconstruct() self.reconstructor.reconstruct()
for context in ssync_calls: for context in ssync_calls:
self.assertEqual(REVERT, context['job']['job_type']) self.assertEqual(REVERT, context['job']['job_type'])
@ -5535,24 +5535,6 @@ class TestReconstructFragmentArchive(BaseTestObjectReconstructor):
object_reconstructor.ObjectReconstructor( object_reconstructor.ObjectReconstructor(
{'quarantine_threshold': bad}) {'quarantine_threshold': bad})
def test_nondurable_purge_delay_conf(self):
reconstructor = object_reconstructor.ObjectReconstructor({})
self.assertEqual(60, reconstructor.nondurable_purge_delay)
reconstructor = object_reconstructor.ObjectReconstructor(
{'nondurable_purge_delay': '0'})
self.assertEqual(0, reconstructor.nondurable_purge_delay)
reconstructor = object_reconstructor.ObjectReconstructor(
{'nondurable_purge_delay': '3.2'})
self.assertEqual(3.2, reconstructor.nondurable_purge_delay)
for bad in ('-1', -1, 'auto', 'bad'):
with annotate_failure(bad):
with self.assertRaises(ValueError):
object_reconstructor.ObjectReconstructor(
{'nondurable_purge_delay': bad})
def test_quarantine_age_conf(self): def test_quarantine_age_conf(self):
# defaults to DEFAULT_RECLAIM_AGE # defaults to DEFAULT_RECLAIM_AGE
reconstructor = object_reconstructor.ObjectReconstructor({}) reconstructor = object_reconstructor.ObjectReconstructor({})