diff --git a/swift/obj/diskfile.py b/swift/obj/diskfile.py index 37fe51de54..abc51dcc82 100644 --- a/swift/obj/diskfile.py +++ b/swift/obj/diskfile.py @@ -283,23 +283,11 @@ def consolidate_hashes(partition_dir): :param suffix_dir: absolute path to partition dir containing hashes.pkl and hashes.invalid - :returns: the hashes, or None if there's no hashes.pkl. + :returns: a dict, the suffix hashes (if any), the key 'valid' will be False + if hashes.pkl is corrupt, cannot be read or does not exist """ - hashes_file = join(partition_dir, HASH_FILE) invalidations_file = join(partition_dir, HASH_INVALIDATIONS_FILE) - if not os.path.exists(hashes_file): - if os.path.exists(invalidations_file): - # no hashes at all -> everything's invalid, so empty the file with - # the invalid suffixes in it, if it exists - try: - with open(invalidations_file, 'wb'): - pass - except OSError as e: - if e.errno != errno.ENOENT: - raise - return None - with lock_path(partition_dir): hashes = read_hashes(partition_dir) @@ -1069,9 +1057,6 @@ class BaseDiskFileManager(object): self.logger.warning('Unable to read %r', hashes_file, exc_info=True) - if orig_hashes is None: - # consolidate_hashes returns None if hashes.pkl does not exist - orig_hashes = {'valid': False} if not orig_hashes['valid']: # This is the only path to a valid hashes from invalid read (e.g. # does not exist, corrupt, etc.). Moreover, in order to write this diff --git a/test/unit/obj/test_diskfile.py b/test/unit/obj/test_diskfile.py index d2b95e0cd7..14f6aad939 100644 --- a/test/unit/obj/test_diskfile.py +++ b/test/unit/obj/test_diskfile.py @@ -6232,7 +6232,7 @@ class TestSuffixHashes(unittest.TestCase): self.assertIn(suffix, hashes) self.assertIn(suffix2, hashes) - def test_hash_invalidations_survive_racing_get_hashes_same_suffix(self): + def _check_hash_invalidations_race_get_hashes_same_suffix(self, existing): # verify that when two processes concurrently call get_hashes, then any # concurrent hash invalidation will survive and be consolidated on a # subsequent call to get_hashes (i.e. ensure first get_hashes process @@ -6243,8 +6243,9 @@ class TestSuffixHashes(unittest.TestCase): for policy in self.iter_policies(): df_mgr = self.df_router[policy] orig_hash_suffix = df_mgr._hash_suffix - # create hashes.pkl - df_mgr.get_hashes('sda1', '0', [], policy) + if existing: + # create hashes.pkl + df_mgr.get_hashes('sda1', '0', [], policy) df = df_mgr.get_diskfile('sda1', '0', 'a', 'c', 'o', policy=policy) @@ -6276,7 +6277,10 @@ class TestSuffixHashes(unittest.TestCase): # simulate another process calling get_hashes but failing # after hash invalidation have been consolidated hashes = df_mgr.consolidate_hashes(part_dir) - self.assertTrue(hashes['valid']) + if existing: + self.assertTrue(hashes['valid']) + else: + self.assertFalse(hashes['valid']) # get the updated suffix hash... non_local['hash'] = orig_hash_suffix(suffix_dir) return result @@ -6295,6 +6299,12 @@ class TestSuffixHashes(unittest.TestCase): # so hashes should have the latest suffix hash... self.assertEqual(hashes[suffix], non_local['hash']) + def test_hash_invalidations_race_get_hashes_same_suffix_new(self): + self._check_hash_invalidations_race_get_hashes_same_suffix(False) + + def test_hash_invalidations_race_get_hashes_same_suffix_existing(self): + self._check_hash_invalidations_race_get_hashes_same_suffix(True) + def _check_unpickle_error_and_get_hashes_failure(self, existing): for policy in self.iter_policies(): df_mgr = self.df_router[policy] diff --git a/test/unit/obj/test_reconstructor.py b/test/unit/obj/test_reconstructor.py index f6d040fa74..fcdf7a6386 100644 --- a/test/unit/obj/test_reconstructor.py +++ b/test/unit/obj/test_reconstructor.py @@ -810,9 +810,12 @@ class TestGlobalSetupObjectReconstructor(unittest.TestCase): self.assertFalse(os.path.exists(pol_1_part_1_path)) warnings = self.reconstructor.logger.get_lines_for_level('warning') - self.assertEqual(1, len(warnings)) - self.assertIn(pol_1_part_1_path, warnings[0]) - self.assertIn('not a directory', warnings[0].lower()) + self.assertEqual(2, len(warnings)) + # first warning is due to get_hashes failing to take lock on non-dir + self.assertIn(pol_1_part_1_path + '/hashes.pkl', warnings[0]) + self.assertIn('unable to read', warnings[0].lower()) + self.assertIn(pol_1_part_1_path, warnings[1]) + self.assertIn('not a directory', warnings[1].lower()) def test_ignores_status_file(self): # Following fd86d5a, the auditor will leave status files on each device