From 44917c8a90e79a85cd6c6821d8b6d043f83b322d Mon Sep 17 00:00:00 2001 From: Clay Gerrard Date: Mon, 20 Jul 2015 13:06:48 -0700 Subject: [PATCH] Handle removed suffix dirs the same as empty suffix dirs When hashes suffix dirs, the directory might have gotten cleaned up while still appearing in hashes.pkl. It would even get cleaned up the next time. For example, given this really old tombstone: objects/846/3d0/d3a20154d0a828a032aba6860397c3d0/1432267961.41808.ts Prior to this commit, a call to get_hashes() would reap the old tombstone and any empty containing dirs, but the resulting hashes.pkl would still contain {'3d0': 'd41d8cd98f00b204e9800998ecf8427e'} even though there's no such suffix dir any more. ("d41d8cd98f00b204e9800998ecf8427e" is the MD5 of the empty string.) Then, a *subsequent* get_hashes() call would omit 3d0 from the resulting hash, so then hashes.pkl would no longer contain 3d0. This difference would result in a little useless replication traffic while nodes without a particular part/suffix pair, but who disagreed on how that showed up in hashes.pkl, tried to push their version of nothing to one another. Now, an empty suffix dir doesn't appear in hashes.pkl at all, whether it's for replication or EC, or whether it's for the get_hashes() call that reaped the suffix dirs or not. Co-Author: Samuel Merritt Change-Id: Ie1bfb1cc56d0fc030c6db42f97b55d140695cf1f --- swift/obj/diskfile.py | 12 +++++----- test/unit/obj/test_diskfile.py | 40 ++++++++++------------------------ 2 files changed, 18 insertions(+), 34 deletions(-) diff --git a/swift/obj/diskfile.py b/swift/obj/diskfile.py index c11b16dcaa..51158a0fe4 100644 --- a/swift/obj/diskfile.py +++ b/swift/obj/diskfile.py @@ -626,16 +626,18 @@ class BaseDiskFileManager(object): os.rmdir(hsh_path) except OSError: pass - # we just deleted this hsh_path, why are we waiting - # until the next suffix hash to raise PathNotDir so that - # this suffix will get del'd from the suffix hashes? for filename in files: key, value = mapper(filename) hashes[key].update(value) try: os.rmdir(path) - except OSError: - pass + except OSError as e: + if e.errno == errno.ENOENT: + raise PathNotDir() + else: + # if we remove it, pretend like it wasn't there to begin with so + # that the suffix key gets removed + raise PathNotDir() return hashes def _hash_suffix(self, path, reclaim_age): diff --git a/test/unit/obj/test_diskfile.py b/test/unit/obj/test_diskfile.py index 8a6ae0bee6..4f64eb8aba 100644 --- a/test/unit/obj/test_diskfile.py +++ b/test/unit/obj/test_diskfile.py @@ -4244,19 +4244,14 @@ class TestSuffixHashes(unittest.TestCase): df_mgr = self.df_router[policy] df = df_mgr.get_diskfile( 'sda1', '0', 'a', 'c', 'o', policy=policy) - suffix = os.path.basename(os.path.dirname(df._datadir)) # scale back this tests manager's reclaim age a bit df_mgr.reclaim_age = 1000 # write a tombstone that's just a *little* older old_time = time() - 1001 timestamp = Timestamp(old_time) df.delete(timestamp.internal) - expected = { - REPL_POLICY: {suffix: EMPTY_ETAG}, - EC_POLICY: {suffix: {}}, - }[policy.policy_type] hashes = df_mgr.get_hashes('sda1', '0', [], policy) - self.assertEqual(hashes, expected) + self.assertEqual(hashes, {}) def test_hash_suffix_one_datafile(self): for policy in self.iter_policies(): @@ -4432,20 +4427,17 @@ class TestSuffixHashes(unittest.TestCase): hashes = df_mgr.get_hashes('sda1', '0', [suffix], policy) # suffix dir cleaned up by get_hashes self.assertFalse(os.path.exists(suffix_path)) - expected = { - EC_POLICY: {'123': {}}, - REPL_POLICY: {'123': EMPTY_ETAG}, - }[policy.policy_type] - msg = 'expected %r != %r for policy %r' % (expected, hashes, - policy) + expected = {} + msg = 'expected %r != %r for policy %r' % ( + expected, hashes, policy) self.assertEqual(hashes, expected, msg) # now make the suffix path a file open(suffix_path, 'w').close() hashes = df_mgr.get_hashes('sda1', '0', [suffix], policy) expected = {} - msg = 'expected %r != %r for policy %r' % (expected, hashes, - policy) + msg = 'expected %r != %r for policy %r' % ( + expected, hashes, policy) self.assertEqual(hashes, expected, msg) def test_hash_suffix_listdir_enoent(self): @@ -4493,11 +4485,7 @@ class TestSuffixHashes(unittest.TestCase): df_mgr = self.df_router[policy] hashes = df_mgr.get_hashes(self.existing_device, '0', [suffix], policy) - expected = { - REPL_POLICY: {suffix: EMPTY_ETAG}, - EC_POLICY: {suffix: {}}, - }[policy.policy_type] - self.assertEqual(hashes, expected) + self.assertEqual(hashes, {}) # and hash path is quarantined self.assertFalse(os.path.exists(df._datadir)) # each device a quarantined directory @@ -4705,12 +4693,9 @@ class TestSuffixHashes(unittest.TestCase): self.assertNotEqual(new_hashes, hashes) # and the empty suffix path is removed self.assertFalse(os.path.exists(suffix_path)) - # ... but is hashed as "empty" - expected = { - EC_POLICY: {}, - REPL_POLICY: md5().hexdigest(), - }[policy.policy_type] - self.assertEqual({suffix: expected}, hashes) + # ... and the suffix key is removed + expected = {} + self.assertEqual(expected, hashes) def test_get_hashes_multi_file_multi_suffix(self): paths, suffix = find_paths_with_matching_suffixes(needed_matches=2, @@ -4887,10 +4872,7 @@ class TestSuffixHashes(unittest.TestCase): self.assertTrue(os.path.exists(suffix_path)) # sanity hashes = df_mgr.get_hashes(self.existing_device, '0', [suffix], policy) - expected = { - EC_POLICY: {'123': {}}, - REPL_POLICY: {'123': EMPTY_ETAG}, - }[policy.policy_type] + expected = {} msg = 'expected %r != %r for policy %r' % (expected, hashes, policy) self.assertEqual(hashes, expected, msg)