From 95c7c0109b97479ecadc9a74864a8b56c4f5e985 Mon Sep 17 00:00:00 2001 From: Clay Gerrard Date: Tue, 10 Jan 2017 18:52:10 -0800 Subject: [PATCH] Optimize noop case for suffix rehash MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit REPLICATE calls where everything is in sync and no suffixes have been invalidated are supposed to be pretty common and fairly cheap. If an invalidations files is empty there's no need to perform a truncation write operation which will presumably at some point have to flush. Everyone owes Pavel a quarter for the one billion less write ops in prod ... and Matt a nickel for 20ms of not sleep back every unittest run As a drive by I remove a crufty confusing OSError exception handler around an open call that would be using O_CREAT in a directory that it either just created or opened a file from - it wasn't going to raise ENOENT. Similarly rather than loose sleep trying to reason about all the crazy exceptions that actually *could* pop anywhere in this method, instead I improve the logging where any such exception would be caught. This way we can get the details we need to focus on only errors that actually happen. Author: Pavel Kvasnička Co-Author: Matthew Oliver Related-Change-Id: I64cadb1a3feb4d819d545137eecfc295389794f0 Change-Id: If712e4431322df5c3e84808ab2d815fd06c76426 --- swift/obj/diskfile.py | 9 ++++--- test/unit/obj/test_diskfile.py | 47 ++++++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+), 4 deletions(-) diff --git a/swift/obj/diskfile.py b/swift/obj/diskfile.py index d6aec48d30..68ec5823e2 100644 --- a/swift/obj/diskfile.py +++ b/swift/obj/diskfile.py @@ -272,9 +272,11 @@ def consolidate_hashes(partition_dir): hashes = None modified = False + found_invalidation_entry = False try: with open(invalidations_file, 'rb') as inv_fh: for line in inv_fh: + found_invalidation_entry = True suffix = line.strip() if hashes is not None and \ hashes.get(suffix, '') is not None: @@ -289,12 +291,9 @@ def consolidate_hashes(partition_dir): # Now that all the invalidations are reflected in hashes.pkl, it's # safe to clear out the invalidations file. - try: + if found_invalidation_entry: with open(invalidations_file, 'wb') as inv_fh: pass - except OSError as e: - if e.errno != errno.ENOENT: - raise return hashes @@ -1053,6 +1052,8 @@ class BaseDiskFileManager(object): try: hashes = self.consolidate_hashes(partition_path) except Exception: + self.logger.warning('Unable to read %r', hashes_file, + exc_info=True) do_listdir = True force_rewrite = True else: diff --git a/test/unit/obj/test_diskfile.py b/test/unit/obj/test_diskfile.py index 8fd67edaff..216386c0f0 100644 --- a/test/unit/obj/test_diskfile.py +++ b/test/unit/obj/test_diskfile.py @@ -27,6 +27,7 @@ import tempfile import uuid import xattr import re +import six from collections import defaultdict from random import shuffle, randint from shutil import rmtree @@ -6056,6 +6057,47 @@ class TestSuffixHashes(unittest.TestCase): hashes = df_mgr.get_hashes('sda1', '0', [], policy) self.assertIn(suffix, hashes) # sanity + def test_invalidate_hash_file_not_truncated_when_empty(self): + orig_open = open + + def watch_open(*args, **kargs): + name = os.path.basename(args[0]) + open_log[name].append(args[1]) + return orig_open(*args, **kargs) + + for policy in self.iter_policies(): + df_mgr = self.df_router[policy] + part_path = os.path.join(self.devices, 'sda1', + diskfile.get_data_dir(policy), '0') + inv_file = os.path.join( + part_path, diskfile.HASH_INVALIDATIONS_FILE) + hash_file = os.path.join( + part_path, diskfile.HASH_FILE) + + hashes = df_mgr.get_hashes('sda1', '0', [], policy) + self.assertEqual(hashes, {}) + self.assertTrue(os.path.exists(hash_file)) + # create something to hash + df = df_mgr.get_diskfile('sda1', '0', 'a', 'c', 'o', + policy=policy) + df.delete(self.ts()) + self.assertTrue(os.path.exists(inv_file)) + # invalidation file created, lets consolidate it + df_mgr.get_hashes('sda1', '0', [], policy) + + open_log = defaultdict(list) + open_loc = '__builtin__.open' if six.PY2 else 'builtins.open' + with mock.patch(open_loc, watch_open): + self.assertTrue(os.path.exists(inv_file)) + # no new suffixes get invalided... so no write iop + df_mgr.get_hashes('sda1', '0', [], policy) + # each file is opened once to read + expected = { + 'hashes.pkl': ['rb'], + 'hashes.invalid': ['rb'], + } + self.assertEqual(open_log, expected) + def test_invalidate_hash_consolidation(self): def assert_consolidation(suffixes): # verify that suffixes are invalidated after consolidation @@ -6141,6 +6183,7 @@ class TestSuffixHashes(unittest.TestCase): # verify that if consolidate_hashes raises an exception then suffixes # are rehashed and a hashes.pkl is written for policy in self.iter_policies(): + self.logger.clear() df_mgr = self.df_router[policy] # create something to hash df = df_mgr.get_diskfile('sda1', '0', 'a', 'c', 'o', @@ -6164,6 +6207,10 @@ class TestSuffixHashes(unittest.TestCase): with open(hashes_file, 'rb') as f: self.assertEqual(hashes, pickle.load(f)) + # sanity check log warning + warnings = self.logger.get_lines_for_level('warning') + self.assertEqual(warnings, ["Unable to read %r" % hashes_file]) + # repeat with pre-existing hashes.pkl with mock.patch.object(df_mgr, '_hash_suffix', return_value='new fake hash'):