Optimize noop case for suffix rehash

REPLICATE calls where everything is in sync and no suffixes have been
invalidated are supposed to be pretty common and fairly cheap.  If an
invalidations files is empty there's no need to perform a truncation
write operation which will presumably at some point have to flush.

Everyone owes Pavel a quarter for the one billion less write ops in prod

... and Matt a nickel for 20ms of not sleep back every unittest run

As a drive by I remove a crufty confusing OSError exception handler
around an open call that would be using O_CREAT in a directory that it
either just created or opened a file from - it wasn't going to raise
ENOENT.  Similarly rather than loose sleep trying to reason about all
the crazy exceptions that actually *could* pop anywhere in this method,
instead I improve the logging where any such exception would be caught.
This way we can get the details we need to focus on only errors that
actually happen.

Author: Pavel Kvasnička <pavel.kvasnicka@firma.seznam.cz>
Co-Author: Matthew Oliver <matt@oliver.net.au>

Related-Change-Id: I64cadb1a3feb4d819d545137eecfc295389794f0

Change-Id: If712e4431322df5c3e84808ab2d815fd06c76426
This commit is contained in:
Clay Gerrard 2017-01-10 18:52:10 -08:00
parent e772cf95c6
commit 95c7c0109b
2 changed files with 52 additions and 4 deletions

View File

@ -272,9 +272,11 @@ def consolidate_hashes(partition_dir):
hashes = None
modified = False
found_invalidation_entry = False
try:
with open(invalidations_file, 'rb') as inv_fh:
for line in inv_fh:
found_invalidation_entry = True
suffix = line.strip()
if hashes is not None and \
hashes.get(suffix, '') is not None:
@ -289,12 +291,9 @@ def consolidate_hashes(partition_dir):
# Now that all the invalidations are reflected in hashes.pkl, it's
# safe to clear out the invalidations file.
try:
if found_invalidation_entry:
with open(invalidations_file, 'wb') as inv_fh:
pass
except OSError as e:
if e.errno != errno.ENOENT:
raise
return hashes
@ -1053,6 +1052,8 @@ class BaseDiskFileManager(object):
try:
hashes = self.consolidate_hashes(partition_path)
except Exception:
self.logger.warning('Unable to read %r', hashes_file,
exc_info=True)
do_listdir = True
force_rewrite = True
else:

View File

@ -27,6 +27,7 @@ import tempfile
import uuid
import xattr
import re
import six
from collections import defaultdict
from random import shuffle, randint
from shutil import rmtree
@ -6056,6 +6057,47 @@ class TestSuffixHashes(unittest.TestCase):
hashes = df_mgr.get_hashes('sda1', '0', [], policy)
self.assertIn(suffix, hashes) # sanity
def test_invalidate_hash_file_not_truncated_when_empty(self):
orig_open = open
def watch_open(*args, **kargs):
name = os.path.basename(args[0])
open_log[name].append(args[1])
return orig_open(*args, **kargs)
for policy in self.iter_policies():
df_mgr = self.df_router[policy]
part_path = os.path.join(self.devices, 'sda1',
diskfile.get_data_dir(policy), '0')
inv_file = os.path.join(
part_path, diskfile.HASH_INVALIDATIONS_FILE)
hash_file = os.path.join(
part_path, diskfile.HASH_FILE)
hashes = df_mgr.get_hashes('sda1', '0', [], policy)
self.assertEqual(hashes, {})
self.assertTrue(os.path.exists(hash_file))
# create something to hash
df = df_mgr.get_diskfile('sda1', '0', 'a', 'c', 'o',
policy=policy)
df.delete(self.ts())
self.assertTrue(os.path.exists(inv_file))
# invalidation file created, lets consolidate it
df_mgr.get_hashes('sda1', '0', [], policy)
open_log = defaultdict(list)
open_loc = '__builtin__.open' if six.PY2 else 'builtins.open'
with mock.patch(open_loc, watch_open):
self.assertTrue(os.path.exists(inv_file))
# no new suffixes get invalided... so no write iop
df_mgr.get_hashes('sda1', '0', [], policy)
# each file is opened once to read
expected = {
'hashes.pkl': ['rb'],
'hashes.invalid': ['rb'],
}
self.assertEqual(open_log, expected)
def test_invalidate_hash_consolidation(self):
def assert_consolidation(suffixes):
# verify that suffixes are invalidated after consolidation
@ -6141,6 +6183,7 @@ class TestSuffixHashes(unittest.TestCase):
# verify that if consolidate_hashes raises an exception then suffixes
# are rehashed and a hashes.pkl is written
for policy in self.iter_policies():
self.logger.clear()
df_mgr = self.df_router[policy]
# create something to hash
df = df_mgr.get_diskfile('sda1', '0', 'a', 'c', 'o',
@ -6164,6 +6207,10 @@ class TestSuffixHashes(unittest.TestCase):
with open(hashes_file, 'rb') as f:
self.assertEqual(hashes, pickle.load(f))
# sanity check log warning
warnings = self.logger.get_lines_for_level('warning')
self.assertEqual(warnings, ["Unable to read %r" % hashes_file])
# repeat with pre-existing hashes.pkl
with mock.patch.object(df_mgr, '_hash_suffix',
return_value='new fake hash'):