Handle removed suffix dirs the same as empty suffix dirs
When hashes suffix dirs, the directory might have gotten cleaned up while still appearing in hashes.pkl. It would even get cleaned up the next time. For example, given this really old tombstone: objects/846/3d0/d3a20154d0a828a032aba6860397c3d0/1432267961.41808.ts Prior to this commit, a call to get_hashes() would reap the old tombstone and any empty containing dirs, but the resulting hashes.pkl would still contain {'3d0': 'd41d8cd98f00b204e9800998ecf8427e'} even though there's no such suffix dir any more. ("d41d8cd98f00b204e9800998ecf8427e" is the MD5 of the empty string.) Then, a *subsequent* get_hashes() call would omit 3d0 from the resulting hash, so then hashes.pkl would no longer contain 3d0. This difference would result in a little useless replication traffic while nodes without a particular part/suffix pair, but who disagreed on how that showed up in hashes.pkl, tried to push their version of nothing to one another. Now, an empty suffix dir doesn't appear in hashes.pkl at all, whether it's for replication or EC, or whether it's for the get_hashes() call that reaped the suffix dirs or not. Co-Author: Samuel Merritt <sam@swiftstack.com> Change-Id: Ie1bfb1cc56d0fc030c6db42f97b55d140695cf1f
This commit is contained in:
parent
a5af24dd92
commit
44917c8a90
@ -626,16 +626,18 @@ class BaseDiskFileManager(object):
|
|||||||
os.rmdir(hsh_path)
|
os.rmdir(hsh_path)
|
||||||
except OSError:
|
except OSError:
|
||||||
pass
|
pass
|
||||||
# we just deleted this hsh_path, why are we waiting
|
|
||||||
# until the next suffix hash to raise PathNotDir so that
|
|
||||||
# this suffix will get del'd from the suffix hashes?
|
|
||||||
for filename in files:
|
for filename in files:
|
||||||
key, value = mapper(filename)
|
key, value = mapper(filename)
|
||||||
hashes[key].update(value)
|
hashes[key].update(value)
|
||||||
try:
|
try:
|
||||||
os.rmdir(path)
|
os.rmdir(path)
|
||||||
except OSError:
|
except OSError as e:
|
||||||
pass
|
if e.errno == errno.ENOENT:
|
||||||
|
raise PathNotDir()
|
||||||
|
else:
|
||||||
|
# if we remove it, pretend like it wasn't there to begin with so
|
||||||
|
# that the suffix key gets removed
|
||||||
|
raise PathNotDir()
|
||||||
return hashes
|
return hashes
|
||||||
|
|
||||||
def _hash_suffix(self, path, reclaim_age):
|
def _hash_suffix(self, path, reclaim_age):
|
||||||
|
@ -4244,19 +4244,14 @@ class TestSuffixHashes(unittest.TestCase):
|
|||||||
df_mgr = self.df_router[policy]
|
df_mgr = self.df_router[policy]
|
||||||
df = df_mgr.get_diskfile(
|
df = df_mgr.get_diskfile(
|
||||||
'sda1', '0', 'a', 'c', 'o', policy=policy)
|
'sda1', '0', 'a', 'c', 'o', policy=policy)
|
||||||
suffix = os.path.basename(os.path.dirname(df._datadir))
|
|
||||||
# scale back this tests manager's reclaim age a bit
|
# scale back this tests manager's reclaim age a bit
|
||||||
df_mgr.reclaim_age = 1000
|
df_mgr.reclaim_age = 1000
|
||||||
# write a tombstone that's just a *little* older
|
# write a tombstone that's just a *little* older
|
||||||
old_time = time() - 1001
|
old_time = time() - 1001
|
||||||
timestamp = Timestamp(old_time)
|
timestamp = Timestamp(old_time)
|
||||||
df.delete(timestamp.internal)
|
df.delete(timestamp.internal)
|
||||||
expected = {
|
|
||||||
REPL_POLICY: {suffix: EMPTY_ETAG},
|
|
||||||
EC_POLICY: {suffix: {}},
|
|
||||||
}[policy.policy_type]
|
|
||||||
hashes = df_mgr.get_hashes('sda1', '0', [], policy)
|
hashes = df_mgr.get_hashes('sda1', '0', [], policy)
|
||||||
self.assertEqual(hashes, expected)
|
self.assertEqual(hashes, {})
|
||||||
|
|
||||||
def test_hash_suffix_one_datafile(self):
|
def test_hash_suffix_one_datafile(self):
|
||||||
for policy in self.iter_policies():
|
for policy in self.iter_policies():
|
||||||
@ -4432,20 +4427,17 @@ class TestSuffixHashes(unittest.TestCase):
|
|||||||
hashes = df_mgr.get_hashes('sda1', '0', [suffix], policy)
|
hashes = df_mgr.get_hashes('sda1', '0', [suffix], policy)
|
||||||
# suffix dir cleaned up by get_hashes
|
# suffix dir cleaned up by get_hashes
|
||||||
self.assertFalse(os.path.exists(suffix_path))
|
self.assertFalse(os.path.exists(suffix_path))
|
||||||
expected = {
|
expected = {}
|
||||||
EC_POLICY: {'123': {}},
|
msg = 'expected %r != %r for policy %r' % (
|
||||||
REPL_POLICY: {'123': EMPTY_ETAG},
|
expected, hashes, policy)
|
||||||
}[policy.policy_type]
|
|
||||||
msg = 'expected %r != %r for policy %r' % (expected, hashes,
|
|
||||||
policy)
|
|
||||||
self.assertEqual(hashes, expected, msg)
|
self.assertEqual(hashes, expected, msg)
|
||||||
|
|
||||||
# now make the suffix path a file
|
# now make the suffix path a file
|
||||||
open(suffix_path, 'w').close()
|
open(suffix_path, 'w').close()
|
||||||
hashes = df_mgr.get_hashes('sda1', '0', [suffix], policy)
|
hashes = df_mgr.get_hashes('sda1', '0', [suffix], policy)
|
||||||
expected = {}
|
expected = {}
|
||||||
msg = 'expected %r != %r for policy %r' % (expected, hashes,
|
msg = 'expected %r != %r for policy %r' % (
|
||||||
policy)
|
expected, hashes, policy)
|
||||||
self.assertEqual(hashes, expected, msg)
|
self.assertEqual(hashes, expected, msg)
|
||||||
|
|
||||||
def test_hash_suffix_listdir_enoent(self):
|
def test_hash_suffix_listdir_enoent(self):
|
||||||
@ -4493,11 +4485,7 @@ class TestSuffixHashes(unittest.TestCase):
|
|||||||
df_mgr = self.df_router[policy]
|
df_mgr = self.df_router[policy]
|
||||||
hashes = df_mgr.get_hashes(self.existing_device, '0', [suffix],
|
hashes = df_mgr.get_hashes(self.existing_device, '0', [suffix],
|
||||||
policy)
|
policy)
|
||||||
expected = {
|
self.assertEqual(hashes, {})
|
||||||
REPL_POLICY: {suffix: EMPTY_ETAG},
|
|
||||||
EC_POLICY: {suffix: {}},
|
|
||||||
}[policy.policy_type]
|
|
||||||
self.assertEqual(hashes, expected)
|
|
||||||
# and hash path is quarantined
|
# and hash path is quarantined
|
||||||
self.assertFalse(os.path.exists(df._datadir))
|
self.assertFalse(os.path.exists(df._datadir))
|
||||||
# each device a quarantined directory
|
# each device a quarantined directory
|
||||||
@ -4705,12 +4693,9 @@ class TestSuffixHashes(unittest.TestCase):
|
|||||||
self.assertNotEqual(new_hashes, hashes)
|
self.assertNotEqual(new_hashes, hashes)
|
||||||
# and the empty suffix path is removed
|
# and the empty suffix path is removed
|
||||||
self.assertFalse(os.path.exists(suffix_path))
|
self.assertFalse(os.path.exists(suffix_path))
|
||||||
# ... but is hashed as "empty"
|
# ... and the suffix key is removed
|
||||||
expected = {
|
expected = {}
|
||||||
EC_POLICY: {},
|
self.assertEqual(expected, hashes)
|
||||||
REPL_POLICY: md5().hexdigest(),
|
|
||||||
}[policy.policy_type]
|
|
||||||
self.assertEqual({suffix: expected}, hashes)
|
|
||||||
|
|
||||||
def test_get_hashes_multi_file_multi_suffix(self):
|
def test_get_hashes_multi_file_multi_suffix(self):
|
||||||
paths, suffix = find_paths_with_matching_suffixes(needed_matches=2,
|
paths, suffix = find_paths_with_matching_suffixes(needed_matches=2,
|
||||||
@ -4887,10 +4872,7 @@ class TestSuffixHashes(unittest.TestCase):
|
|||||||
self.assertTrue(os.path.exists(suffix_path)) # sanity
|
self.assertTrue(os.path.exists(suffix_path)) # sanity
|
||||||
hashes = df_mgr.get_hashes(self.existing_device, '0', [suffix],
|
hashes = df_mgr.get_hashes(self.existing_device, '0', [suffix],
|
||||||
policy)
|
policy)
|
||||||
expected = {
|
expected = {}
|
||||||
EC_POLICY: {'123': {}},
|
|
||||||
REPL_POLICY: {'123': EMPTY_ETAG},
|
|
||||||
}[policy.policy_type]
|
|
||||||
msg = 'expected %r != %r for policy %r' % (expected, hashes,
|
msg = 'expected %r != %r for policy %r' % (expected, hashes,
|
||||||
policy)
|
policy)
|
||||||
self.assertEqual(hashes, expected, msg)
|
self.assertEqual(hashes, expected, msg)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user