diff --git a/swift/obj/diskfile.py b/swift/obj/diskfile.py index fb88df3590..e8f7acf06f 100644 --- a/swift/obj/diskfile.py +++ b/swift/obj/diskfile.py @@ -273,9 +273,11 @@ def consolidate_hashes(partition_dir): hashes = None modified = False + found_invalidation_entry = False try: with open(invalidations_file, 'rb') as inv_fh: for line in inv_fh: + found_invalidation_entry = True suffix = line.strip() if hashes is not None and \ hashes.get(suffix, '') is not None: @@ -290,12 +292,9 @@ def consolidate_hashes(partition_dir): # Now that all the invalidations are reflected in hashes.pkl, it's # safe to clear out the invalidations file. - try: + if found_invalidation_entry: with open(invalidations_file, 'wb') as inv_fh: pass - except OSError as e: - if e.errno != errno.ENOENT: - raise return hashes @@ -1048,6 +1047,8 @@ class BaseDiskFileManager(object): try: hashes = self.consolidate_hashes(partition_path) except Exception: + self.logger.warning('Unable to read %r', hashes_file, + exc_info=True) do_listdir = True force_rewrite = True else: diff --git a/test/unit/obj/test_diskfile.py b/test/unit/obj/test_diskfile.py index 56f67119c4..c046cde500 100644 --- a/test/unit/obj/test_diskfile.py +++ b/test/unit/obj/test_diskfile.py @@ -27,6 +27,7 @@ import tempfile import uuid import xattr import re +import six from collections import defaultdict from random import shuffle, randint from shutil import rmtree @@ -6081,6 +6082,47 @@ class TestSuffixHashes(unittest.TestCase): hashes = df_mgr.get_hashes('sda1', '0', [], policy) self.assertIn(suffix, hashes) # sanity + def test_invalidate_hash_file_not_truncated_when_empty(self): + orig_open = open + + def watch_open(*args, **kargs): + name = os.path.basename(args[0]) + open_log[name].append(args[1]) + return orig_open(*args, **kargs) + + for policy in self.iter_policies(): + df_mgr = self.df_router[policy] + part_path = os.path.join(self.devices, 'sda1', + diskfile.get_data_dir(policy), '0') + inv_file = os.path.join( + part_path, diskfile.HASH_INVALIDATIONS_FILE) + hash_file = os.path.join( + part_path, diskfile.HASH_FILE) + + hashes = df_mgr.get_hashes('sda1', '0', [], policy) + self.assertEqual(hashes, {}) + self.assertTrue(os.path.exists(hash_file)) + # create something to hash + df = df_mgr.get_diskfile('sda1', '0', 'a', 'c', 'o', + policy=policy) + df.delete(self.ts()) + self.assertTrue(os.path.exists(inv_file)) + # invalidation file created, lets consolidate it + df_mgr.get_hashes('sda1', '0', [], policy) + + open_log = defaultdict(list) + open_loc = '__builtin__.open' if six.PY2 else 'builtins.open' + with mock.patch(open_loc, watch_open): + self.assertTrue(os.path.exists(inv_file)) + # no new suffixes get invalided... so no write iop + df_mgr.get_hashes('sda1', '0', [], policy) + # each file is opened once to read + expected = { + 'hashes.pkl': ['rb'], + 'hashes.invalid': ['rb'], + } + self.assertEqual(open_log, expected) + def test_invalidate_hash_consolidation(self): def assert_consolidation(suffixes): # verify that suffixes are invalidated after consolidation @@ -6166,6 +6208,7 @@ class TestSuffixHashes(unittest.TestCase): # verify that if consolidate_hashes raises an exception then suffixes # are rehashed and a hashes.pkl is written for policy in self.iter_policies(): + self.logger.clear() df_mgr = self.df_router[policy] # create something to hash df = df_mgr.get_diskfile('sda1', '0', 'a', 'c', 'o', @@ -6189,6 +6232,10 @@ class TestSuffixHashes(unittest.TestCase): with open(hashes_file, 'rb') as f: self.assertEqual(hashes, pickle.load(f)) + # sanity check log warning + warnings = self.logger.get_lines_for_level('warning') + self.assertEqual(warnings, ["Unable to read %r" % hashes_file]) + # repeat with pre-existing hashes.pkl with mock.patch.object(df_mgr, '_hash_suffix', return_value='new fake hash'):