Optimize noop case for suffix rehash
REPLICATE calls where everything is in sync and no suffixes have been invalidated are supposed to be pretty common and fairly cheap. If an invalidations files is empty there's no need to perform a truncation write operation which will presumably at some point have to flush. Everyone owes Pavel a quarter for the one billion less write ops in prod ... and Matt a nickel for 20ms of not sleep back every unittest run As a drive by I remove a crufty confusing OSError exception handler around an open call that would be using O_CREAT in a directory that it either just created or opened a file from - it wasn't going to raise ENOENT. Similarly rather than loose sleep trying to reason about all the crazy exceptions that actually *could* pop anywhere in this method, instead I improve the logging where any such exception would be caught. This way we can get the details we need to focus on only errors that actually happen. Author: Pavel Kvasnička <pavel.kvasnicka@firma.seznam.cz> Co-Author: Matthew Oliver <matt@oliver.net.au> Related-Change-Id: I64cadb1a3feb4d819d545137eecfc295389794f0 Change-Id: If712e4431322df5c3e84808ab2d815fd06c76426
This commit is contained in:
parent
e772cf95c6
commit
95c7c0109b
@ -272,9 +272,11 @@ def consolidate_hashes(partition_dir):
|
||||
hashes = None
|
||||
|
||||
modified = False
|
||||
found_invalidation_entry = False
|
||||
try:
|
||||
with open(invalidations_file, 'rb') as inv_fh:
|
||||
for line in inv_fh:
|
||||
found_invalidation_entry = True
|
||||
suffix = line.strip()
|
||||
if hashes is not None and \
|
||||
hashes.get(suffix, '') is not None:
|
||||
@ -289,12 +291,9 @@ def consolidate_hashes(partition_dir):
|
||||
|
||||
# Now that all the invalidations are reflected in hashes.pkl, it's
|
||||
# safe to clear out the invalidations file.
|
||||
try:
|
||||
if found_invalidation_entry:
|
||||
with open(invalidations_file, 'wb') as inv_fh:
|
||||
pass
|
||||
except OSError as e:
|
||||
if e.errno != errno.ENOENT:
|
||||
raise
|
||||
|
||||
return hashes
|
||||
|
||||
@ -1053,6 +1052,8 @@ class BaseDiskFileManager(object):
|
||||
try:
|
||||
hashes = self.consolidate_hashes(partition_path)
|
||||
except Exception:
|
||||
self.logger.warning('Unable to read %r', hashes_file,
|
||||
exc_info=True)
|
||||
do_listdir = True
|
||||
force_rewrite = True
|
||||
else:
|
||||
|
@ -27,6 +27,7 @@ import tempfile
|
||||
import uuid
|
||||
import xattr
|
||||
import re
|
||||
import six
|
||||
from collections import defaultdict
|
||||
from random import shuffle, randint
|
||||
from shutil import rmtree
|
||||
@ -6056,6 +6057,47 @@ class TestSuffixHashes(unittest.TestCase):
|
||||
hashes = df_mgr.get_hashes('sda1', '0', [], policy)
|
||||
self.assertIn(suffix, hashes) # sanity
|
||||
|
||||
def test_invalidate_hash_file_not_truncated_when_empty(self):
|
||||
orig_open = open
|
||||
|
||||
def watch_open(*args, **kargs):
|
||||
name = os.path.basename(args[0])
|
||||
open_log[name].append(args[1])
|
||||
return orig_open(*args, **kargs)
|
||||
|
||||
for policy in self.iter_policies():
|
||||
df_mgr = self.df_router[policy]
|
||||
part_path = os.path.join(self.devices, 'sda1',
|
||||
diskfile.get_data_dir(policy), '0')
|
||||
inv_file = os.path.join(
|
||||
part_path, diskfile.HASH_INVALIDATIONS_FILE)
|
||||
hash_file = os.path.join(
|
||||
part_path, diskfile.HASH_FILE)
|
||||
|
||||
hashes = df_mgr.get_hashes('sda1', '0', [], policy)
|
||||
self.assertEqual(hashes, {})
|
||||
self.assertTrue(os.path.exists(hash_file))
|
||||
# create something to hash
|
||||
df = df_mgr.get_diskfile('sda1', '0', 'a', 'c', 'o',
|
||||
policy=policy)
|
||||
df.delete(self.ts())
|
||||
self.assertTrue(os.path.exists(inv_file))
|
||||
# invalidation file created, lets consolidate it
|
||||
df_mgr.get_hashes('sda1', '0', [], policy)
|
||||
|
||||
open_log = defaultdict(list)
|
||||
open_loc = '__builtin__.open' if six.PY2 else 'builtins.open'
|
||||
with mock.patch(open_loc, watch_open):
|
||||
self.assertTrue(os.path.exists(inv_file))
|
||||
# no new suffixes get invalided... so no write iop
|
||||
df_mgr.get_hashes('sda1', '0', [], policy)
|
||||
# each file is opened once to read
|
||||
expected = {
|
||||
'hashes.pkl': ['rb'],
|
||||
'hashes.invalid': ['rb'],
|
||||
}
|
||||
self.assertEqual(open_log, expected)
|
||||
|
||||
def test_invalidate_hash_consolidation(self):
|
||||
def assert_consolidation(suffixes):
|
||||
# verify that suffixes are invalidated after consolidation
|
||||
@ -6141,6 +6183,7 @@ class TestSuffixHashes(unittest.TestCase):
|
||||
# verify that if consolidate_hashes raises an exception then suffixes
|
||||
# are rehashed and a hashes.pkl is written
|
||||
for policy in self.iter_policies():
|
||||
self.logger.clear()
|
||||
df_mgr = self.df_router[policy]
|
||||
# create something to hash
|
||||
df = df_mgr.get_diskfile('sda1', '0', 'a', 'c', 'o',
|
||||
@ -6164,6 +6207,10 @@ class TestSuffixHashes(unittest.TestCase):
|
||||
with open(hashes_file, 'rb') as f:
|
||||
self.assertEqual(hashes, pickle.load(f))
|
||||
|
||||
# sanity check log warning
|
||||
warnings = self.logger.get_lines_for_level('warning')
|
||||
self.assertEqual(warnings, ["Unable to read %r" % hashes_file])
|
||||
|
||||
# repeat with pre-existing hashes.pkl
|
||||
with mock.patch.object(df_mgr, '_hash_suffix',
|
||||
return_value='new fake hash'):
|
||||
|
Loading…
x
Reference in New Issue
Block a user