reconstructor: make quarantine delay configurable
Previously the reconstructor would quarantine isolated durable fragments that were more than reclaim_age old. This patch adds a quarantine_age option for the reconstructor which defaults to reclaim_age but can be used to configure the age that a fragment must reach before quarantining. Change-Id: I867f3ea0cf60620c576da0c1f2c65cec2cf19aa0
This commit is contained in:
parent
2117a32b99
commit
2fd5b87dc5
@ -495,6 +495,19 @@ ionice_priority None I/O scheduling priority o
|
||||
Work only with ionice_class.
|
||||
Ignored if IOPRIO_CLASS_IDLE
|
||||
is set.
|
||||
quarantine_threshold 0 The reconstructor may quarantine
|
||||
stale isolated fragments
|
||||
when it fails to fetch
|
||||
more than the
|
||||
quarantine_threshold
|
||||
number of fragments
|
||||
(including the stale
|
||||
fragment) during an
|
||||
attempt to reconstruct.
|
||||
quarantine_age reclaim_age Fragments are not quarantined
|
||||
until they are older than
|
||||
quarantine_age, which defaults
|
||||
to the value of reclaim_age.
|
||||
=========================== ======================== ================================
|
||||
|
||||
****************
|
||||
|
@ -412,9 +412,12 @@ use = egg:swift#recon
|
||||
# Note: the quarantine_threshold applies equally to all policies, but for each
|
||||
# policy it is effectively capped at (ec_ndata - 1) so that a fragment is never
|
||||
# quarantined when sufficient fragments exist to reconstruct the object.
|
||||
# Fragments are not quarantined until they are older than the reclaim_age.
|
||||
# quarantine_threshold = 0
|
||||
#
|
||||
# Fragments are not quarantined until they are older than
|
||||
# quarantine_age, which defaults to the value of reclaim_age.
|
||||
# quarantine_age =
|
||||
#
|
||||
# Sets the maximum number of nodes to which requests will be made before
|
||||
# quarantining a fragment. You can use '* replicas' at the end to have it use
|
||||
# the number given times the number of replicas for the ring being used for the
|
||||
|
@ -44,7 +44,7 @@ from swift.obj.ssync_sender import Sender as ssync_sender
|
||||
from swift.common.http import HTTP_OK, HTTP_NOT_FOUND, \
|
||||
HTTP_INSUFFICIENT_STORAGE
|
||||
from swift.obj.diskfile import DiskFileRouter, get_data_dir, \
|
||||
get_tmp_dir
|
||||
get_tmp_dir, DEFAULT_RECLAIM_AGE
|
||||
from swift.common.storage_policy import POLICIES, EC_POLICY
|
||||
from swift.common.exceptions import ConnectionTimeout, DiskFileError, \
|
||||
SuffixSyncError, PartitionLockTimeout
|
||||
@ -236,6 +236,9 @@ class ObjectReconstructor(Daemon):
|
||||
'rebuild_handoff_node_count', 2))
|
||||
self.quarantine_threshold = non_negative_int(
|
||||
conf.get('quarantine_threshold', 0))
|
||||
self.quarantine_age = int(
|
||||
conf.get('quarantine_age',
|
||||
conf.get('reclaim_age', DEFAULT_RECLAIM_AGE)))
|
||||
self.request_node_count = config_request_node_count_value(
|
||||
conf.get('request_node_count', '2 * replicas'))
|
||||
self.nondurable_purge_delay = non_negative_float(
|
||||
@ -507,7 +510,7 @@ class ObjectReconstructor(Daemon):
|
||||
# worth more investigation
|
||||
return False
|
||||
|
||||
if time.time() - float(local_timestamp) <= df.manager.reclaim_age:
|
||||
if time.time() - float(local_timestamp) <= self.quarantine_age:
|
||||
# If the fragment has not yet passed reclaim age then it is
|
||||
# likely that a tombstone will be reverted to this node, or
|
||||
# neighbor frags will get reverted from handoffs to *other* nodes
|
||||
|
@ -425,7 +425,7 @@ class TestReconstructorRebuild(ECProbeTest):
|
||||
for conf_index in self.configs['object-reconstructor'].keys():
|
||||
reconstructor = self.run_custom_daemon(
|
||||
ObjectReconstructor, 'object-reconstructor', conf_index,
|
||||
{'reclaim_age': '0'})
|
||||
{'quarantine_age': '0'})
|
||||
logger = reconstructor.logger.logger
|
||||
error_lines.append(logger.get_lines_for_level('error'))
|
||||
warning_lines.append(logger.get_lines_for_level('warning'))
|
||||
@ -462,7 +462,7 @@ class TestReconstructorRebuild(ECProbeTest):
|
||||
for conf_index in self.configs['object-reconstructor'].keys():
|
||||
reconstructor = self.run_custom_daemon(
|
||||
ObjectReconstructor, 'object-reconstructor', conf_index,
|
||||
{'reclaim_age': '0', 'quarantine_threshold': '1'})
|
||||
{'quarantine_age': '0', 'quarantine_threshold': '1'})
|
||||
logger = reconstructor.logger.logger
|
||||
error_lines.append(logger.get_lines_for_level('error'))
|
||||
warning_lines.append(logger.get_lines_for_level('warning'))
|
||||
@ -515,7 +515,7 @@ class TestReconstructorRebuild(ECProbeTest):
|
||||
for conf_index in self.configs['object-reconstructor'].keys():
|
||||
reconstructor = self.run_custom_daemon(
|
||||
ObjectReconstructor, 'object-reconstructor', conf_index,
|
||||
{'reclaim_age': '0', 'quarantine_threshold': '1'})
|
||||
{'quarantine_age': '0', 'quarantine_threshold': '1'})
|
||||
logger = reconstructor.logger.logger
|
||||
error_lines.append(logger.get_lines_for_level('error'))
|
||||
warning_lines.append(logger.get_lines_for_level('warning'))
|
||||
|
@ -5407,6 +5407,37 @@ class TestReconstructFragmentArchive(BaseTestObjectReconstructor):
|
||||
object_reconstructor.ObjectReconstructor(
|
||||
{'nondurable_purge_delay': bad})
|
||||
|
||||
def test_quarantine_age_conf(self):
|
||||
# defaults to DEFAULT_RECLAIM_AGE
|
||||
reconstructor = object_reconstructor.ObjectReconstructor({})
|
||||
self.assertEqual(604800, reconstructor.quarantine_age)
|
||||
|
||||
reconstructor = object_reconstructor.ObjectReconstructor(
|
||||
{'quarantine_age': '0'})
|
||||
self.assertEqual(0, reconstructor.quarantine_age)
|
||||
|
||||
reconstructor = object_reconstructor.ObjectReconstructor(
|
||||
{'quarantine_age': '1'})
|
||||
self.assertEqual(1, reconstructor.quarantine_age)
|
||||
|
||||
# trumps reclaim_age
|
||||
reconstructor = object_reconstructor.ObjectReconstructor(
|
||||
{'quarantine_age': '1', 'reclaim_age': 0})
|
||||
self.assertEqual(1, reconstructor.quarantine_age)
|
||||
reconstructor = object_reconstructor.ObjectReconstructor(
|
||||
{'quarantine_age': '1', 'reclaim_age': 2})
|
||||
self.assertEqual(1, reconstructor.quarantine_age)
|
||||
|
||||
reconstructor = object_reconstructor.ObjectReconstructor(
|
||||
{'quarantine_age': 2.2})
|
||||
self.assertEqual(2, reconstructor.quarantine_age)
|
||||
|
||||
for bad in ('1.1', 'auto', 'bad'):
|
||||
with annotate_failure(bad):
|
||||
with self.assertRaises(ValueError):
|
||||
object_reconstructor.ObjectReconstructor(
|
||||
{'quarantine_age': bad})
|
||||
|
||||
def test_request_node_count_conf(self):
|
||||
# default is 1 * replicas
|
||||
reconstructor = object_reconstructor.ObjectReconstructor({})
|
||||
@ -5600,6 +5631,18 @@ class TestReconstructFragmentArchive(BaseTestObjectReconstructor):
|
||||
self._assert_diskfile_quarantined()
|
||||
self._verify_error_lines(2, other_responses, 2)
|
||||
|
||||
def test_reconstruct_fa_quarantine_threshold_two_with_quarantine_age(self):
|
||||
num_other_resps = 2 * self.policy.object_ring.replicas - 3
|
||||
other_responses = [(404, None, None)] * num_other_resps
|
||||
conf = {'quarantine_threshold': 2,
|
||||
'quarantine_age': 0, # quarantine age trumps reclaim age
|
||||
'reclaim_age': 1000}
|
||||
exc = self._do_test_reconstruct_insufficient_frags(
|
||||
conf, 2, other_responses)
|
||||
self.assertIsInstance(exc, DiskFileQuarantined)
|
||||
self._assert_diskfile_quarantined()
|
||||
self._verify_error_lines(2, other_responses, 2)
|
||||
|
||||
def test_reconstruct_fa_no_quarantine_more_than_threshold_frags(self):
|
||||
# default config
|
||||
num_other_resps = self.policy.object_ring.replicas - 2
|
||||
@ -5727,6 +5770,15 @@ class TestReconstructFragmentArchive(BaseTestObjectReconstructor):
|
||||
self._assert_diskfile_not_quarantined()
|
||||
self._verify_error_lines(1, other_responses, 1)
|
||||
|
||||
exc = self._do_test_reconstruct_insufficient_frags(
|
||||
{'quarantine_threshold': 1,
|
||||
'quarantine_age': 10000, # quarantine_age trumps reclaim_age
|
||||
'reclaim_age': 0},
|
||||
1, other_responses)
|
||||
self.assertIsInstance(exc, DiskFileError)
|
||||
self._assert_diskfile_not_quarantined()
|
||||
self._verify_error_lines(1, other_responses, 1)
|
||||
|
||||
exc = self._do_test_reconstruct_insufficient_frags(
|
||||
{'quarantine_threshold': 1}, # default reclaim_age
|
||||
1, other_responses)
|
||||
|
Loading…
Reference in New Issue
Block a user