relinker: Stop reporting errors about reaped/cleaned-up datafiles

This isn't at all exceptional, and operators shouldn't need to bump up
reclaim_age to avoid false-positive errors in their logs.

Change-Id: I644d7afee393afb703eb37f19749204ed98a37db
This commit is contained in:
Tim Burke 2021-03-04 12:12:20 -08:00 committed by Matthew Oliver
parent 08ee65d6e1
commit b8aefd750e
2 changed files with 59 additions and 5 deletions

View File

@ -362,6 +362,29 @@ def cleanup(conf, logger, device):
locations = RateLimitedIterator(
locations, conf['files_per_second'])
for fname, device, partition in locations:
# Relinking will take a while; we'll likely have some tombstones
# transition to being reapable during the process. When we open
# them in the new partition space, they'll get cleaned up and
# raise DiskFileNotExist. Without replicators running, this is
# likely the first opportunity for clean-up. To avoid a false-
# positive error below, open up in the old space *first* -- if
# that raises DiskFileNotExist, ignore it and move on.
loc = diskfile.AuditLocation(
os.path.dirname(fname), device, partition, policy)
df = diskfile_mgr.get_diskfile_from_audit_location(loc)
try:
with df.open():
pass
except DiskFileQuarantined as exc:
logger.warning('ERROR Object %(obj)s failed audit and was'
' quarantined: %(err)r',
{'obj': loc, 'err': exc})
errors += 1
continue
except DiskFileNotExist:
logger.debug('Found reapable on-disk file: %s', fname)
continue
expected_fname = replace_partition_in_path(fname, part_power)
if fname == expected_fname:
continue

View File

@ -933,7 +933,7 @@ class TestRelinker(unittest.TestCase):
def test_cleanup_deleted(self):
self._common_test_cleanup()
# Pretend the object got deleted inbetween and there is a tombstone
# Pretend the object got deleted in between and there is a tombstone
fname_ts = self.expected_file[:-4] + "ts"
os.rename(self.expected_file, fname_ts)
@ -944,6 +944,35 @@ class TestRelinker(unittest.TestCase):
'--skip-mount',
]))
def test_cleanup_reapable(self):
# relink a tombstone
fname_ts = self.objname[:-4] + "ts"
os.rename(self.objname, fname_ts)
self.objname = fname_ts
self.expected_file = self.expected_file[:-4] + "ts"
self._common_test_cleanup()
self.assertTrue(os.path.exists(self.expected_file)) # sanity check
with mock.patch.object(relinker.logging, 'getLogger',
return_value=self.logger), \
mock.patch('time.time', return_value=1e11): # far, far future
self.assertEqual(0, relinker.main([
'cleanup',
'--swift-dir', self.testdir,
'--devices', self.devices,
'--skip-mount',
]))
self.assertEqual(self.logger.get_lines_for_level('error'), [])
self.assertEqual(self.logger.get_lines_for_level('warning'), [])
self.assertIn(
"Found reapable on-disk file: %s" % self.objname,
self.logger.get_lines_for_level('debug'))
# self.expected_file may or may not exist; it depends on whether the
# object was in the upper-half of the partition space. ultimately,
# that part doesn't really matter much -- but we definitely *don't*
# want self.objname around polluting the old partition space.
self.assertFalse(os.path.exists(self.objname))
def test_cleanup_doesnotexist(self):
self._common_test_cleanup()
@ -979,11 +1008,13 @@ class TestRelinker(unittest.TestCase):
'--skip-mount',
]))
log_lines = self.logger.get_lines_for_level('warning')
self.assertEqual(2, len(log_lines),
'Expected 2 log lines, got %r' % log_lines)
# Once for the cleanup...
self.assertEqual(3, len(log_lines),
'Expected 3 log lines, got %r' % log_lines)
# Once to check the old partition space...
self.assertIn('Bad fragment index: None', log_lines[0])
# ... then again for the rehash
# ... again for the new partition ...
self.assertIn('Bad fragment index: None', log_lines[0])
# ... and one last time for the rehash
self.assertIn('Bad fragment index: None', log_lines[1])
def test_cleanup_quarantined(self):