Handle removed suffix dirs the same as empty suffix dirs

When hashes suffix dirs, the directory might have gotten cleaned up while
still appearing in hashes.pkl.  It would even get cleaned up the next time.

For example, given this really old tombstone:

    objects/846/3d0/d3a20154d0a828a032aba6860397c3d0/1432267961.41808.ts

Prior to this commit, a call to get_hashes() would reap the old
tombstone and any empty containing dirs, but the resulting hashes.pkl
would still contain {'3d0': 'd41d8cd98f00b204e9800998ecf8427e'} even
though there's no such suffix dir any more.
("d41d8cd98f00b204e9800998ecf8427e" is the MD5 of the empty string.)

Then, a *subsequent* get_hashes() call would omit 3d0 from the resulting
hash, so then hashes.pkl would no longer contain 3d0. This difference
would result in a little useless replication traffic while nodes without
a particular part/suffix pair, but who disagreed on how that showed up
in hashes.pkl, tried to push their version of nothing to one another.

Now, an empty suffix dir doesn't appear in hashes.pkl at all, whether
it's for replication or EC, or whether it's for the get_hashes() call
that reaped the suffix dirs or not.

Co-Author: Samuel Merritt <sam@swiftstack.com>

Change-Id: Ie1bfb1cc56d0fc030c6db42f97b55d140695cf1f
This commit is contained in:
Clay Gerrard 2015-07-20 13:06:48 -07:00 committed by Alistair Coles
parent a5af24dd92
commit 44917c8a90
2 changed files with 18 additions and 34 deletions

View File

@ -626,16 +626,18 @@ class BaseDiskFileManager(object):
os.rmdir(hsh_path)
except OSError:
pass
# we just deleted this hsh_path, why are we waiting
# until the next suffix hash to raise PathNotDir so that
# this suffix will get del'd from the suffix hashes?
for filename in files:
key, value = mapper(filename)
hashes[key].update(value)
try:
os.rmdir(path)
except OSError:
pass
except OSError as e:
if e.errno == errno.ENOENT:
raise PathNotDir()
else:
# if we remove it, pretend like it wasn't there to begin with so
# that the suffix key gets removed
raise PathNotDir()
return hashes
def _hash_suffix(self, path, reclaim_age):

View File

@ -4244,19 +4244,14 @@ class TestSuffixHashes(unittest.TestCase):
df_mgr = self.df_router[policy]
df = df_mgr.get_diskfile(
'sda1', '0', 'a', 'c', 'o', policy=policy)
suffix = os.path.basename(os.path.dirname(df._datadir))
# scale back this tests manager's reclaim age a bit
df_mgr.reclaim_age = 1000
# write a tombstone that's just a *little* older
old_time = time() - 1001
timestamp = Timestamp(old_time)
df.delete(timestamp.internal)
expected = {
REPL_POLICY: {suffix: EMPTY_ETAG},
EC_POLICY: {suffix: {}},
}[policy.policy_type]
hashes = df_mgr.get_hashes('sda1', '0', [], policy)
self.assertEqual(hashes, expected)
self.assertEqual(hashes, {})
def test_hash_suffix_one_datafile(self):
for policy in self.iter_policies():
@ -4432,20 +4427,17 @@ class TestSuffixHashes(unittest.TestCase):
hashes = df_mgr.get_hashes('sda1', '0', [suffix], policy)
# suffix dir cleaned up by get_hashes
self.assertFalse(os.path.exists(suffix_path))
expected = {
EC_POLICY: {'123': {}},
REPL_POLICY: {'123': EMPTY_ETAG},
}[policy.policy_type]
msg = 'expected %r != %r for policy %r' % (expected, hashes,
policy)
expected = {}
msg = 'expected %r != %r for policy %r' % (
expected, hashes, policy)
self.assertEqual(hashes, expected, msg)
# now make the suffix path a file
open(suffix_path, 'w').close()
hashes = df_mgr.get_hashes('sda1', '0', [suffix], policy)
expected = {}
msg = 'expected %r != %r for policy %r' % (expected, hashes,
policy)
msg = 'expected %r != %r for policy %r' % (
expected, hashes, policy)
self.assertEqual(hashes, expected, msg)
def test_hash_suffix_listdir_enoent(self):
@ -4493,11 +4485,7 @@ class TestSuffixHashes(unittest.TestCase):
df_mgr = self.df_router[policy]
hashes = df_mgr.get_hashes(self.existing_device, '0', [suffix],
policy)
expected = {
REPL_POLICY: {suffix: EMPTY_ETAG},
EC_POLICY: {suffix: {}},
}[policy.policy_type]
self.assertEqual(hashes, expected)
self.assertEqual(hashes, {})
# and hash path is quarantined
self.assertFalse(os.path.exists(df._datadir))
# each device a quarantined directory
@ -4705,12 +4693,9 @@ class TestSuffixHashes(unittest.TestCase):
self.assertNotEqual(new_hashes, hashes)
# and the empty suffix path is removed
self.assertFalse(os.path.exists(suffix_path))
# ... but is hashed as "empty"
expected = {
EC_POLICY: {},
REPL_POLICY: md5().hexdigest(),
}[policy.policy_type]
self.assertEqual({suffix: expected}, hashes)
# ... and the suffix key is removed
expected = {}
self.assertEqual(expected, hashes)
def test_get_hashes_multi_file_multi_suffix(self):
paths, suffix = find_paths_with_matching_suffixes(needed_matches=2,
@ -4887,10 +4872,7 @@ class TestSuffixHashes(unittest.TestCase):
self.assertTrue(os.path.exists(suffix_path)) # sanity
hashes = df_mgr.get_hashes(self.existing_device, '0', [suffix],
policy)
expected = {
EC_POLICY: {'123': {}},
REPL_POLICY: {'123': EMPTY_ETAG},
}[policy.policy_type]
expected = {}
msg = 'expected %r != %r for policy %r' % (expected, hashes,
policy)
self.assertEqual(hashes, expected, msg)