diskfile: Prevent get_hashes from creating missing partition dirs
The relinker currently blindly calls get_hashes on partitions in the new, upper half of the partition space, which can cause extra handoffs to be created if you're rerunning the relink step for safety. Even if the relinker were smarter, though, there's no reason that a curious operator should end up creating empty handoffs just because they're poking devices with REPLICATE requests Drive-by: be a little more strict about the "suffixes" we're willing to put in hashes.invalid and read out of hashes.pkl. Change-Id: I9aace80088cd00d02c418fe4d782b662fb5c8bcf
This commit is contained in:
parent
38d5e1339b
commit
ade3b28636
@ -341,6 +341,12 @@ def quarantine_renamer(device_path, corrupted_file_path):
|
||||
return to_dir
|
||||
|
||||
|
||||
def valid_suffix(value):
|
||||
if not isinstance(value, str) or len(value) != 3:
|
||||
return False
|
||||
return all(c in '0123456789abcdef' for c in value)
|
||||
|
||||
|
||||
def read_hashes(partition_dir):
|
||||
"""
|
||||
Read the existing hashes.pkl
|
||||
@ -365,9 +371,9 @@ def read_hashes(partition_dir):
|
||||
pass
|
||||
|
||||
# Check for corrupted data that could break os.listdir()
|
||||
for suffix in hashes.keys():
|
||||
if not suffix.isalnum():
|
||||
return {'valid': False}
|
||||
if not all(valid_suffix(key) or key in ('valid', 'updated')
|
||||
for key in hashes):
|
||||
return {'valid': False}
|
||||
|
||||
# hashes.pkl w/o valid updated key is "valid" but "forever old"
|
||||
hashes.setdefault('valid', True)
|
||||
@ -1301,6 +1307,8 @@ class BaseDiskFileManager(object):
|
||||
self.logger.debug('Run listdir on %s', partition_path)
|
||||
hashes.update((suffix, None) for suffix in recalculate)
|
||||
for suffix, hash_ in list(hashes.items()):
|
||||
if suffix in ('valid', 'updated'):
|
||||
continue
|
||||
if not hash_:
|
||||
suffix_dir = join(partition_path, suffix)
|
||||
try:
|
||||
@ -1548,12 +1556,14 @@ class BaseDiskFileManager(object):
|
||||
if not dev_path:
|
||||
raise DiskFileDeviceUnavailable()
|
||||
partition_path = get_part_path(dev_path, policy, partition)
|
||||
if not os.path.exists(partition_path):
|
||||
mkdirs(partition_path)
|
||||
suffixes = [suf for suf in suffixes or [] if valid_suffix(suf)]
|
||||
|
||||
if skip_rehash:
|
||||
for suffix in suffixes or []:
|
||||
invalidate_hash(os.path.join(partition_path, suffix))
|
||||
return None
|
||||
for suffix in suffixes:
|
||||
self.invalidate_hash(os.path.join(partition_path, suffix))
|
||||
hashes = None
|
||||
elif not os.path.exists(partition_path):
|
||||
hashes = {}
|
||||
else:
|
||||
_junk, hashes = tpool.execute(
|
||||
self._get_hashes, device, partition, policy,
|
||||
|
@ -422,6 +422,11 @@ class TestRelinker(unittest.TestCase):
|
||||
self.assertEqual('foo', hashes[self._hash[-3:]])
|
||||
self.assertFalse(os.path.exists(
|
||||
os.path.join(self.part_dir, 'hashes.invalid')))
|
||||
# Check that only the dirty partition in upper half of next part power
|
||||
# has been created and rehashed
|
||||
other_next_part = self.next_part ^ 1
|
||||
other_next_part_dir = os.path.join(self.objects, str(other_next_part))
|
||||
self.assertFalse(os.path.exists(other_next_part_dir))
|
||||
|
||||
def test_relink_link_already_exists(self):
|
||||
self.rb.prepare_increase_partition_power()
|
||||
|
@ -6743,6 +6743,15 @@ class TestSuffixHashes(unittest.TestCase):
|
||||
if suffix_dir != suffix_dir2:
|
||||
return df2
|
||||
|
||||
def test_valid_suffix(self):
|
||||
self.assertTrue(diskfile.valid_suffix('000'))
|
||||
self.assertTrue(diskfile.valid_suffix('123'))
|
||||
self.assertTrue(diskfile.valid_suffix('fff'))
|
||||
self.assertFalse(diskfile.valid_suffix(list('123')))
|
||||
self.assertFalse(diskfile.valid_suffix(123))
|
||||
self.assertFalse(diskfile.valid_suffix(' 12'))
|
||||
self.assertFalse(diskfile.valid_suffix('-00'))
|
||||
|
||||
def check_cleanup_ondisk_files(self, policy, input_files, output_files):
|
||||
orig_unlink = os.unlink
|
||||
file_list = list(input_files)
|
||||
@ -7104,7 +7113,12 @@ class TestSuffixHashes(unittest.TestCase):
|
||||
def test_invalidate_hash_empty_file_exists(self):
|
||||
for policy in self.iter_policies():
|
||||
df_mgr = self.df_router[policy]
|
||||
part_path = os.path.join(self.devices, 'sda1',
|
||||
diskfile.get_data_dir(policy), '0')
|
||||
mkdirs(part_path)
|
||||
hashes = df_mgr.get_hashes('sda1', '0', [], policy)
|
||||
pkl_path = os.path.join(part_path, diskfile.HASH_FILE)
|
||||
self.assertTrue(os.path.exists(pkl_path))
|
||||
self.assertEqual(hashes, {})
|
||||
# create something to hash
|
||||
df = df_mgr.get_diskfile('sda1', '0', 'a', 'c', 'o',
|
||||
@ -7127,6 +7141,7 @@ class TestSuffixHashes(unittest.TestCase):
|
||||
df_mgr = self.df_router[policy]
|
||||
part_path = os.path.join(self.devices, 'sda1',
|
||||
diskfile.get_data_dir(policy), '0')
|
||||
mkdirs(part_path)
|
||||
inv_file = os.path.join(
|
||||
part_path, diskfile.HASH_INVALIDATIONS_FILE)
|
||||
hash_file = os.path.join(
|
||||
@ -7162,9 +7177,14 @@ class TestSuffixHashes(unittest.TestCase):
|
||||
# get_hashes call
|
||||
for policy in self.iter_policies():
|
||||
df_mgr = self.df_router[policy]
|
||||
part_path = os.path.join(self.devices, 'sda1',
|
||||
diskfile.get_data_dir(policy), '0')
|
||||
if existing:
|
||||
mkdirs(part_path)
|
||||
# force hashes.pkl to exist
|
||||
df_mgr.get_hashes('sda1', '0', [], policy)
|
||||
self.assertTrue(os.path.exists(os.path.join(
|
||||
part_path, diskfile.HASH_FILE)))
|
||||
orig_listdir = os.listdir
|
||||
df = df_mgr.get_diskfile('sda1', '0', 'a', 'c', 'o',
|
||||
policy=policy)
|
||||
@ -7184,10 +7204,15 @@ class TestSuffixHashes(unittest.TestCase):
|
||||
df2.delete(self.ts())
|
||||
return result
|
||||
|
||||
if not existing:
|
||||
self.assertFalse(os.path.exists(os.path.join(
|
||||
part_path, diskfile.HASH_FILE)))
|
||||
with mock.patch('swift.obj.diskfile.os.listdir',
|
||||
mock_listdir):
|
||||
# creates pkl file
|
||||
# creates pkl file if not already there
|
||||
hashes = df_mgr.get_hashes('sda1', '0', [], policy)
|
||||
self.assertTrue(os.path.exists(os.path.join(
|
||||
part_path, diskfile.HASH_FILE)))
|
||||
|
||||
# second suffix added after directory listing, it's added later
|
||||
self.assertIn(suffix, hashes)
|
||||
@ -7216,7 +7241,12 @@ class TestSuffixHashes(unittest.TestCase):
|
||||
orig_hash_suffix = df_mgr._hash_suffix
|
||||
if existing:
|
||||
# create hashes.pkl
|
||||
part_path = os.path.join(self.devices, 'sda1',
|
||||
diskfile.get_data_dir(policy), '0')
|
||||
mkdirs(part_path)
|
||||
df_mgr.get_hashes('sda1', '0', [], policy)
|
||||
self.assertTrue(os.path.exists(os.path.join(
|
||||
part_path, diskfile.HASH_FILE)))
|
||||
|
||||
df = df_mgr.get_diskfile('sda1', '0', 'a', 'c', 'o',
|
||||
policy=policy)
|
||||
@ -7257,7 +7287,7 @@ class TestSuffixHashes(unittest.TestCase):
|
||||
return result
|
||||
|
||||
with mock.patch.object(df_mgr, '_hash_suffix', mock_hash_suffix):
|
||||
# creates pkl file and repeats listing when pkl modified
|
||||
# repeats listing when pkl modified
|
||||
hashes = df_mgr.get_hashes('sda1', '0', [], policy)
|
||||
|
||||
# first get_hashes should complete with suffix1 state
|
||||
@ -7452,6 +7482,12 @@ class TestSuffixHashes(unittest.TestCase):
|
||||
# verify that if consolidate_hashes raises an exception then suffixes
|
||||
# are rehashed and a hashes.pkl is written
|
||||
for policy in self.iter_policies():
|
||||
part_path = os.path.join(self.devices, 'sda1',
|
||||
diskfile.get_data_dir(policy), '0')
|
||||
hashes_file = os.path.join(part_path, diskfile.HASH_FILE)
|
||||
invalidations_file = os.path.join(
|
||||
part_path, diskfile.HASH_INVALIDATIONS_FILE)
|
||||
|
||||
self.logger.clear()
|
||||
df_mgr = self.df_router[policy]
|
||||
# create something to hash
|
||||
@ -7461,10 +7497,14 @@ class TestSuffixHashes(unittest.TestCase):
|
||||
# avoid getting O_TMPFILE warning in logs
|
||||
if not utils.o_tmpfile_in_tmpdir_supported():
|
||||
df.manager.use_linkat = False
|
||||
|
||||
self.assertFalse(os.path.exists(part_path))
|
||||
df.delete(self.ts())
|
||||
self.assertTrue(os.path.exists(invalidations_file))
|
||||
suffix_dir = os.path.dirname(df._datadir)
|
||||
suffix = os.path.basename(suffix_dir)
|
||||
# no pre-existing hashes.pkl
|
||||
self.assertFalse(os.path.exists(hashes_file))
|
||||
with mock.patch.object(df_mgr, '_hash_suffix',
|
||||
return_value='fake hash'):
|
||||
with mock.patch.object(df_mgr, 'consolidate_hashes',
|
||||
@ -7473,10 +7513,6 @@ class TestSuffixHashes(unittest.TestCase):
|
||||
self.assertEqual({suffix: 'fake hash'}, hashes)
|
||||
|
||||
# sanity check hashes file
|
||||
part_path = os.path.join(self.devices, 'sda1',
|
||||
diskfile.get_data_dir(policy), '0')
|
||||
hashes_file = os.path.join(part_path, diskfile.HASH_FILE)
|
||||
|
||||
with open(hashes_file, 'rb') as f:
|
||||
found_hashes = pickle.load(f)
|
||||
found_hashes.pop('updated')
|
||||
@ -7497,9 +7533,6 @@ class TestSuffixHashes(unittest.TestCase):
|
||||
self.assertEqual({suffix: 'new fake hash'}, hashes)
|
||||
|
||||
# sanity check hashes file
|
||||
part_path = os.path.join(self.devices, 'sda1',
|
||||
diskfile.get_data_dir(policy), '0')
|
||||
hashes_file = os.path.join(part_path, diskfile.HASH_FILE)
|
||||
with open(hashes_file, 'rb') as f:
|
||||
found_hashes = pickle.load(f)
|
||||
found_hashes.pop('updated')
|
||||
@ -8185,6 +8218,9 @@ class TestSuffixHashes(unittest.TestCase):
|
||||
def test_hash_suffix_listdir_enoent(self):
|
||||
for policy in self.iter_policies():
|
||||
df_mgr = self.df_router[policy]
|
||||
part_path = os.path.join(self.devices, 'sda1',
|
||||
diskfile.get_data_dir(policy), '0')
|
||||
mkdirs(part_path) # ensure we'll bother writing a pkl at all
|
||||
orig_listdir = os.listdir
|
||||
listdir_calls = []
|
||||
|
||||
@ -8203,9 +8239,6 @@ class TestSuffixHashes(unittest.TestCase):
|
||||
# does not exist!
|
||||
df_mgr.get_hashes('sda1', '0', ['123'], policy)
|
||||
|
||||
part_path = os.path.join(self.devices, 'sda1',
|
||||
diskfile.get_data_dir(policy), '0')
|
||||
|
||||
self.assertEqual(listdir_calls, [
|
||||
# part path gets created automatically
|
||||
(part_path, True),
|
||||
@ -8340,7 +8373,7 @@ class TestSuffixHashes(unittest.TestCase):
|
||||
|
||||
# get_hashes tests - behaviors
|
||||
|
||||
def test_get_hashes_creates_partition_and_pkl(self):
|
||||
def test_get_hashes_does_not_create_partition(self):
|
||||
for policy in self.iter_policies():
|
||||
df_mgr = self.df_router[policy]
|
||||
hashes = df_mgr.get_hashes(self.existing_device, '0', [],
|
||||
@ -8348,6 +8381,18 @@ class TestSuffixHashes(unittest.TestCase):
|
||||
self.assertEqual(hashes, {})
|
||||
part_path = os.path.join(
|
||||
self.devices, 'sda1', diskfile.get_data_dir(policy), '0')
|
||||
self.assertFalse(os.path.exists(part_path))
|
||||
|
||||
def test_get_hashes_creates_pkl(self):
|
||||
# like above, but -- if the partition already exists, make the pickle
|
||||
for policy in self.iter_policies():
|
||||
part_path = os.path.join(
|
||||
self.devices, 'sda1', diskfile.get_data_dir(policy), '0')
|
||||
mkdirs(part_path)
|
||||
df_mgr = self.df_router[policy]
|
||||
hashes = df_mgr.get_hashes(self.existing_device, '0', [],
|
||||
policy)
|
||||
self.assertEqual(hashes, {})
|
||||
self.assertTrue(os.path.exists(part_path))
|
||||
hashes_file = os.path.join(part_path,
|
||||
diskfile.HASH_FILE)
|
||||
@ -8438,6 +8483,7 @@ class TestSuffixHashes(unittest.TestCase):
|
||||
# create an empty stale pickle
|
||||
part_path = os.path.join(
|
||||
self.devices, 'sda1', diskfile.get_data_dir(policy), '0')
|
||||
mkdirs(part_path)
|
||||
hashes_file = os.path.join(part_path,
|
||||
diskfile.HASH_FILE)
|
||||
hashes = df_mgr.get_hashes(self.existing_device, '0', [], policy)
|
||||
@ -8641,6 +8687,7 @@ class TestSuffixHashes(unittest.TestCase):
|
||||
suffix2 = os.path.basename(os.path.dirname(df2._datadir))
|
||||
part_path = os.path.dirname(os.path.dirname(
|
||||
os.path.join(df._datadir)))
|
||||
mkdirs(part_path)
|
||||
hashfile_path = os.path.join(part_path, diskfile.HASH_FILE)
|
||||
# create hashes.pkl
|
||||
hashes = df_mgr.get_hashes(self.existing_device, '0', [],
|
||||
@ -8745,8 +8792,13 @@ class TestSuffixHashes(unittest.TestCase):
|
||||
def test_get_hashes_modified_recursive_retry(self):
|
||||
for policy in self.iter_policies():
|
||||
df_mgr = self.df_router[policy]
|
||||
part_path = os.path.join(self.devices, self.existing_device,
|
||||
diskfile.get_data_dir(policy), '0')
|
||||
mkdirs(part_path)
|
||||
# first create an empty pickle
|
||||
df_mgr.get_hashes(self.existing_device, '0', [], policy)
|
||||
self.assertTrue(os.path.exists(os.path.join(
|
||||
part_path, diskfile.HASH_FILE)))
|
||||
non_local = {'suffix_count': 1}
|
||||
calls = []
|
||||
|
||||
@ -8788,19 +8840,19 @@ class TestHashesHelpers(unittest.TestCase):
|
||||
rmtree(self.testdir, ignore_errors=1)
|
||||
|
||||
def test_read_legacy_hashes(self):
|
||||
hashes = {'stub': 'fake'}
|
||||
hashes = {'fff': 'fake'}
|
||||
hashes_file = os.path.join(self.testdir, diskfile.HASH_FILE)
|
||||
with open(hashes_file, 'wb') as f:
|
||||
pickle.dump(hashes, f)
|
||||
expected = {
|
||||
'stub': 'fake',
|
||||
'fff': 'fake',
|
||||
'updated': -1,
|
||||
'valid': True,
|
||||
}
|
||||
self.assertEqual(expected, diskfile.read_hashes(self.testdir))
|
||||
|
||||
def test_write_hashes_valid_updated(self):
|
||||
hashes = {'stub': 'fake', 'valid': True}
|
||||
hashes = {'888': 'fake', 'valid': True}
|
||||
now = time()
|
||||
with mock.patch('swift.obj.diskfile.time.time', return_value=now):
|
||||
diskfile.write_hashes(self.testdir, hashes)
|
||||
@ -8808,7 +8860,7 @@ class TestHashesHelpers(unittest.TestCase):
|
||||
with open(hashes_file, 'rb') as f:
|
||||
data = pickle.load(f)
|
||||
expected = {
|
||||
'stub': 'fake',
|
||||
'888': 'fake',
|
||||
'updated': now,
|
||||
'valid': True,
|
||||
}
|
||||
@ -8843,7 +8895,7 @@ class TestHashesHelpers(unittest.TestCase):
|
||||
self.assertEqual(expected, data)
|
||||
|
||||
def test_read_write_valid_hashes_mutation_and_transative_equality(self):
|
||||
hashes = {'stub': 'fake', 'valid': True}
|
||||
hashes = {'000': 'fake', 'valid': True}
|
||||
diskfile.write_hashes(self.testdir, hashes)
|
||||
# write_hashes mutates the passed in hashes, it adds the updated key
|
||||
self.assertIn('updated', hashes)
|
||||
|
@ -7158,11 +7158,11 @@ class TestObjectController(unittest.TestCase):
|
||||
def my_tpool_execute(func, *args, **kwargs):
|
||||
return func(*args, **kwargs)
|
||||
|
||||
was_get_hashes = diskfile.DiskFileManager._get_hashes
|
||||
was_tpool_exe = tpool.execute
|
||||
try:
|
||||
diskfile.DiskFileManager._get_hashes = fake_get_hashes
|
||||
tpool.execute = my_tpool_execute
|
||||
with mock.patch.object(diskfile.DiskFileManager, '_get_hashes',
|
||||
fake_get_hashes), \
|
||||
mock.patch.object(tpool, 'execute', my_tpool_execute), \
|
||||
mock.patch('swift.obj.diskfile.os.path.exists',
|
||||
return_value=True):
|
||||
req = Request.blank('/sda1/p/',
|
||||
environ={'REQUEST_METHOD': 'REPLICATE'},
|
||||
headers={})
|
||||
@ -7170,9 +7170,6 @@ class TestObjectController(unittest.TestCase):
|
||||
self.assertEqual(resp.status_int, 200)
|
||||
p_data = pickle.loads(resp.body)
|
||||
self.assertEqual(p_data, {1: 2})
|
||||
finally:
|
||||
tpool.execute = was_tpool_exe
|
||||
diskfile.DiskFileManager._get_hashes = was_get_hashes
|
||||
|
||||
def test_REPLICATE_pickle_protocol(self):
|
||||
|
||||
@ -7182,25 +7179,22 @@ class TestObjectController(unittest.TestCase):
|
||||
def my_tpool_execute(func, *args, **kwargs):
|
||||
return func(*args, **kwargs)
|
||||
|
||||
was_get_hashes = diskfile.DiskFileManager._get_hashes
|
||||
was_tpool_exe = tpool.execute
|
||||
try:
|
||||
diskfile.DiskFileManager._get_hashes = fake_get_hashes
|
||||
tpool.execute = my_tpool_execute
|
||||
with mock.patch.object(diskfile.DiskFileManager, '_get_hashes',
|
||||
fake_get_hashes), \
|
||||
mock.patch.object(tpool, 'execute', my_tpool_execute), \
|
||||
mock.patch('swift.obj.server.pickle.dumps') as fake_pickle, \
|
||||
mock.patch('swift.obj.diskfile.os.path.exists',
|
||||
return_value=True):
|
||||
req = Request.blank('/sda1/p/',
|
||||
environ={'REQUEST_METHOD': 'REPLICATE'},
|
||||
headers={})
|
||||
with mock.patch('swift.obj.server.pickle.dumps') as fake_pickle:
|
||||
fake_pickle.return_value = b''
|
||||
req.get_response(self.object_controller)
|
||||
# This is the key assertion: starting in Python 3.0, the
|
||||
# default protocol version is 3, but such pickles can't be read
|
||||
# on Python 2. As long as we may need to talk to a Python 2
|
||||
# process, we need to cap our protocol version.
|
||||
fake_pickle.assert_called_once_with({1: 2}, protocol=2)
|
||||
finally:
|
||||
tpool.execute = was_tpool_exe
|
||||
diskfile.DiskFileManager._get_hashes = was_get_hashes
|
||||
fake_pickle.return_value = b''
|
||||
req.get_response(self.object_controller)
|
||||
# This is the key assertion: starting in Python 3.0, the
|
||||
# default protocol version is 3, but such pickles can't be read
|
||||
# on Python 2. As long as we may need to talk to a Python 2
|
||||
# process, we need to cap our protocol version.
|
||||
fake_pickle.assert_called_once_with({1: 2}, protocol=2)
|
||||
|
||||
def test_REPLICATE_timeout(self):
|
||||
|
||||
@ -7210,18 +7204,17 @@ class TestObjectController(unittest.TestCase):
|
||||
def my_tpool_execute(func, *args, **kwargs):
|
||||
return func(*args, **kwargs)
|
||||
|
||||
was_get_hashes = diskfile.DiskFileManager._get_hashes
|
||||
was_tpool_exe = tpool.execute
|
||||
try:
|
||||
with mock.patch.object(diskfile.DiskFileManager, '_get_hashes',
|
||||
fake_get_hashes), \
|
||||
mock.patch.object(tpool, 'execute', my_tpool_execute), \
|
||||
mock.patch('swift.obj.diskfile.os.path.exists',
|
||||
return_value=True):
|
||||
diskfile.DiskFileManager._get_hashes = fake_get_hashes
|
||||
tpool.execute = my_tpool_execute
|
||||
req = Request.blank('/sda1/p/',
|
||||
environ={'REQUEST_METHOD': 'REPLICATE'},
|
||||
headers={})
|
||||
self.assertRaises(Timeout, self.object_controller.REPLICATE, req)
|
||||
finally:
|
||||
tpool.execute = was_tpool_exe
|
||||
diskfile.DiskFileManager._get_hashes = was_get_hashes
|
||||
|
||||
def test_REPLICATE_reclaims_tombstones(self):
|
||||
conf = {'devices': self.testdir, 'mount_check': False,
|
||||
|
Loading…
x
Reference in New Issue
Block a user