diskfile: Treat EUCLEAN like ENODATA

Found a new way filesystems can break in prod:

   object-auditor: ERROR Trying to audit .../7a7c4af06d2616f23eda274e2ad9c948:
   Traceback (most recent call last):
     File ".../swift/obj/diskfile.py", line 2630, in open
       files = os.listdir(self._datadir)
   OSError: [Errno 117] Structure needs cleaning: .../7a7c4af06d2616f23eda274e2ad9c948

   During handling of the above exception, another exception occurred:

   Traceback (most recent call last):
     File ".../swift/obj/auditor.py", line 238, in failsafe_object_audit
       self.object_audit(location)
     File ".../swift/obj/auditor.py", line 261, in object_audit
       with df.open(modernize=True):
     File ".../swift/obj/diskfile.py", line 2657, in open
       "Error listing directory %s: %s" % (self._datadir, err))
   swift.common.exceptions.DiskFileError:
   Error listing directory .../7a7c4af06d2616f23eda274e2ad9c948:
   [Errno 117] Structure needs cleaning: '.../7a7c4af06d2616f23eda274e2ad9c948'

Change-Id: If6731a6b6b16fbc4eebb61254ed10b53e1767a0f
This commit is contained in:
Tim Burke 2024-08-28 11:46:28 -07:00
parent 6a0153f545
commit 5e07963548
3 changed files with 101 additions and 19 deletions

View File

@ -600,7 +600,8 @@ def object_audit_location_generator(devices, datadir, mount_check=True,
try:
suffixes = listdir(part_path)
except OSError as e:
if e.errno not in (errno.ENOTDIR, errno.ENODATA):
if e.errno not in (errno.ENOTDIR, errno.ENODATA,
errno.EUCLEAN):
raise
continue
for asuffix in suffixes:
@ -608,7 +609,8 @@ def object_audit_location_generator(devices, datadir, mount_check=True,
try:
hashes = listdir(suff_path)
except OSError as e:
if e.errno not in (errno.ENOTDIR, errno.ENODATA):
if e.errno not in (errno.ENOTDIR, errno.ENODATA,
errno.EUCLEAN):
raise
continue
for hsh in hashes:
@ -1214,7 +1216,7 @@ class BaseDiskFileManager(object):
'it is not a directory', {'hsh_path': hsh_path,
'quar_path': quar_path})
continue
elif err.errno == errno.ENODATA:
elif err.errno in (errno.ENODATA, errno.EUCLEAN):
try:
# We've seen cases where bad sectors lead to ENODATA
# here; use a similar hack as above
@ -1569,7 +1571,7 @@ class BaseDiskFileManager(object):
'it is not a directory', {'object_path': object_path,
'quar_path': quar_path})
raise DiskFileNotExist()
elif err.errno == errno.ENODATA:
elif err.errno in (errno.ENODATA, errno.EUCLEAN):
try:
# We've seen cases where bad sectors lead to ENODATA here;
# use a similar hack as above
@ -2610,7 +2612,7 @@ class BaseDiskFile(object):
# want this one file and not its parent.
os.path.join(self._datadir, "made-up-filename"),
"Expected directory, found file at %s" % self._datadir)
elif err.errno == errno.ENODATA:
elif err.errno in (errno.ENODATA, errno.EUCLEAN):
try:
# We've seen cases where bad sectors lead to ENODATA here
raise self._quarantine(
@ -2640,7 +2642,7 @@ class BaseDiskFile(object):
self._fp = self._construct_from_data_file(
current_time=current_time, modernize=modernize, **file_info)
except IOError as e:
if e.errno == errno.ENODATA:
if e.errno in (errno.ENODATA, errno.EUCLEAN):
raise self._quarantine(
file_info['data_file'],
"Failed to open %s: %s" % (file_info['data_file'], e))

View File

@ -619,7 +619,8 @@ class ObjectReplicator(Daemon):
try:
tpool.execute(shutil.rmtree, path)
except OSError as e:
if e.errno not in (errno.ENOENT, errno.ENOTEMPTY, errno.ENODATA):
if e.errno not in (errno.ENOENT, errno.ENOTEMPTY, errno.ENODATA,
errno.EUCLEAN):
# Don't worry if there was a race to create or delete,
# or some disk corruption that happened after the sync
raise

View File

@ -1821,6 +1821,27 @@ class DiskFileManagerMixin(BaseDiskFileTestMixin):
('/srv/dev/objects/9/900/9a7175077c01a23ade5956b8a2bba900/' +
'made-up-filename'))
def test_get_diskfile_from_hash_unclean(self):
self.df_mgr.get_dev_path = mock.MagicMock(return_value='/srv/dev/')
with mock.patch(self._manager_mock('diskfile_cls')), \
mock.patch(self._manager_mock(
'cleanup_ondisk_files')) as cleanup, \
mock.patch('swift.obj.diskfile.read_metadata') as readmeta, \
mock.patch(self._manager_mock(
'quarantine_renamer')) as quarantine_renamer:
osexc = OSError()
osexc.errno = errno.EUCLEAN
cleanup.side_effect = osexc
readmeta.return_value = {'name': '/a/c/o'}
self.assertRaises(
DiskFileNotExist,
self.df_mgr.get_diskfile_from_hash,
'dev', '9', '9a7175077c01a23ade5956b8a2bba900', POLICIES[0])
quarantine_renamer.assert_called_once_with(
'/srv/dev/',
('/srv/dev/objects/9/900/9a7175077c01a23ade5956b8a2bba900/' +
'made-up-filename'))
def test_get_diskfile_from_hash_no_dir(self):
self.df_mgr.get_dev_path = mock.MagicMock(return_value='/srv/dev/')
with mock.patch(self._manager_mock('diskfile_cls')), \
@ -4896,6 +4917,21 @@ class DiskFileMixin(BaseDiskFileTestMixin):
'Failed to open %s: [Errno %d] -ENODATA fool!'
% (df._data_file, errno.ENODATA), str(err.exception))
def test_quarantine_ioerror_euclean(self):
df = self._get_open_disk_file()
def my_open(filename, mode, *args, **kwargs):
if mode == 'rb':
raise IOError(errno.EUCLEAN, '-EUCLEAN fool!')
return open(filename, mode, *args, **kwargs)
with mock.patch('swift.obj.diskfile.open', my_open):
with self.assertRaises(DiskFileQuarantined) as err:
df.open()
self.assertEqual(
'Failed to open %s: [Errno %d] -EUCLEAN fool!'
% (df._data_file, errno.EUCLEAN), str(err.exception))
def test_quarantine_hashdir_not_a_directory(self):
df, df_data = self._create_test_file(b'1234567890', account="abc",
container='123', obj='xyz')
@ -4914,19 +4950,21 @@ class DiskFileMixin(BaseDiskFileTestMixin):
self.assertTrue(os.path.exists(os.path.dirname(hashdir)))
def test_quarantine_hashdir_not_listable(self):
df, df_data = self._create_test_file(b'1234567890', account="abc",
container='123', obj='xyz')
hashdir = df._datadir
df = self.df_mgr.get_diskfile(self.existing_device, '0', 'abc', '123',
'xyz', policy=POLICIES.legacy)
with mock.patch('os.listdir',
side_effect=OSError(errno.ENODATA, 'nope')):
self.assertRaises(DiskFileQuarantined, df.open)
for eno in (errno.ENODATA, errno.EUCLEAN):
df, df_data = self._create_test_file(b'1234567890', account="abc",
container='123', obj='xyz')
hashdir = df._datadir
df = self.df_mgr.get_diskfile(
self.existing_device, '0', 'abc', '123', 'xyz',
policy=POLICIES.legacy)
with mock.patch('os.listdir',
side_effect=OSError(eno, 'nope')):
self.assertRaises(DiskFileQuarantined, df.open)
# make sure the right thing got quarantined; the suffix dir should not
# have moved, as that could have many objects in it
self.assertFalse(os.path.exists(hashdir))
self.assertTrue(os.path.exists(os.path.dirname(hashdir)))
# make sure the right thing got quarantined; the suffix dir should
# not have moved, as that could have many objects in it
self.assertFalse(os.path.exists(hashdir))
self.assertTrue(os.path.exists(os.path.dirname(hashdir)))
def test_create_prealloc(self):
df = self.df_mgr.get_diskfile(self.existing_device, '0', 'abc', '123',
@ -8841,6 +8879,11 @@ class TestSuffixHashes(unittest.TestCase):
"b54", errno.ENODATA)):
self.assertEqual(expected, list_locations(tmpdir, 'objects'))
diskfile.clear_auditor_status(tmpdir, 'objects')
# EUCLEAN too
with mock.patch('os.listdir', splode_if_endswith(
"b54", errno.EUCLEAN)):
self.assertEqual(expected, list_locations(tmpdir, 'objects'))
diskfile.clear_auditor_status(tmpdir, 'objects')
# sanity the other way
expected = [(hashdir1, 'sdf', '2607', POLICIES[0])]
@ -8848,6 +8891,10 @@ class TestSuffixHashes(unittest.TestCase):
"2809", errno.ENODATA)):
self.assertEqual(expected, list_locations(tmpdir, 'objects'))
diskfile.clear_auditor_status(tmpdir, 'objects')
with mock.patch('os.listdir', splode_if_endswith(
"2809", errno.EUCLEAN)):
self.assertEqual(expected, list_locations(tmpdir, 'objects'))
diskfile.clear_auditor_status(tmpdir, 'objects')
with mock.patch('os.listdir', splode_if_endswith(
"afd", errno.ENOTDIR)):
self.assertEqual(expected, list_locations(tmpdir, 'objects'))
@ -8885,6 +8932,38 @@ class TestSuffixHashes(unittest.TestCase):
)
self.assertTrue(os.path.exists(quarantine_path))
def test_hash_suffix_cleanup_ondisk_files_euclean_quarantined(self):
for policy in self.iter_policies():
df = self.df_router[policy].get_diskfile(
self.existing_device, '0', 'a', 'c', 'o', policy=policy)
# make everything down to the hash directory
os.makedirs(df._datadir)
suffix = os.path.basename(os.path.dirname(df._datadir))
orig_listdir = os.listdir
def fake_listdir(path):
if path == df._datadir:
raise OSError(errno.EUCLEAN, 'nope')
return orig_listdir(path)
df_mgr = self.df_router[policy]
with mock.patch('os.listdir', side_effect=fake_listdir):
hashes = df_mgr.get_hashes(self.existing_device, '0', [suffix],
policy)
self.assertEqual(hashes, {})
# and hash path is quarantined
self.assertFalse(os.path.exists(df._datadir))
# each device a quarantined directory
quarantine_base = os.path.join(self.devices,
self.existing_device, 'quarantined')
# the quarantine path is...
quarantine_path = os.path.join(
quarantine_base, # quarantine root
diskfile.get_data_dir(policy), # per-policy data dir
os.path.basename(df._datadir) # name of quarantined file
)
self.assertTrue(os.path.exists(quarantine_path))
def test_hash_suffix_cleanup_ondisk_files_other_oserror(self):
for policy in self.iter_policies():
timestamp = self.ts()