Merge "Refactor to share on-disk layout with hash cleanup"

This commit is contained in:
Jenkins 2014-01-09 17:59:28 +00:00 committed by Gerrit Code Review
commit a877785cf2

View File

@ -130,6 +130,53 @@ def quarantine_renamer(device_path, corrupted_file_path):
return to_dir
def get_ondisk_files(files, datadir):
"""
Given a simple list of files names, determine the files to use.
:params files: simple set of files as a python list
:params datadir: directory name files are from for convenience
:returns: a tuple of data, meta and ts (tombstone) files, in one of
two states:
* ts_file is not None, data_file is None, meta_file is None
object is considered deleted
* data_file is not None, ts_file is None
object exists, and optionally has fast-POST metadata
"""
files.sort(reverse=True)
data_file = meta_file = ts_file = None
for afile in files:
assert ts_file is None, "On-disk file search loop" \
" continuing after tombstone, %s, encountered" % ts_file
assert data_file is None, "On-disk file search loop" \
" continuing after data file, %s, encountered" % data_file
if afile.endswith('.ts'):
meta_file = None
ts_file = join(datadir, afile)
break
if afile.endswith('.meta') and not meta_file:
meta_file = join(datadir, afile)
# NOTE: this does not exit this loop, since a fast-POST
# operation just updates metadata, writing one or more
# .meta files, the data file will have an older timestamp,
# so we keep looking.
continue
if afile.endswith('.data'):
data_file = join(datadir, afile)
break
assert ((data_file is None and meta_file is None and ts_file is None)
or (ts_file is not None and data_file is None
and meta_file is None)
or (data_file is not None and ts_file is None)), \
"On-disk file search algorithm contract is broken: data_file:" \
" %s, meta_file: %s, ts_file: %s" % (data_file, meta_file, ts_file)
return data_file, meta_file, ts_file
def hash_cleanup_listdir(hsh_path, reclaim_age=ONE_WEEK):
"""
List contents of a hash directory and clean up any old files.
@ -148,18 +195,13 @@ def hash_cleanup_listdir(hsh_path, reclaim_age=ONE_WEEK):
files.remove(files[0])
elif files:
files.sort(reverse=True)
meta = data = tomb = None
data_file, meta_file, ts_file = get_ondisk_files(files, '')
newest_file = data_file or ts_file
for filename in list(files):
if not meta and filename.endswith('.meta'):
meta = filename
if not data and filename.endswith('.data'):
data = filename
if not tomb and filename.endswith('.ts'):
tomb = filename
if (filename < tomb or # any file older than tomb
filename < data or # any file older than data
(filename.endswith('.meta') and
filename < meta)): # old meta
if ((filename < newest_file)
or (meta_file
and filename.endswith('.meta')
and filename < meta_file)):
os.unlink(join(hsh_path, filename))
files.remove(filename)
return files
@ -1043,9 +1085,8 @@ class DiskFile(object):
object exists, and optionally has fast-POST metadata
"""
data_file = meta_file = ts_file = None
try:
files = sorted(os.listdir(self._datadir), reverse=True)
files = os.listdir(self._datadir)
except OSError as err:
if err.errno == errno.ENOTDIR:
# If there's a file here instead of a directory, quarantine
@ -1060,33 +1101,10 @@ class DiskFile(object):
raise DiskFileError(
"Error listing directory %s: %s" % (self._datadir, err))
# The data directory does not exist, so the object cannot exist.
fileset = (None, None, None)
else:
for afile in files:
assert ts_file is None, "On-disk file search loop" \
" continuing after tombstone, %s, encountered" % ts_file
assert data_file is None, "On-disk file search loop" \
" continuing after data file, %s, encountered" % data_file
if afile.endswith('.ts'):
meta_file = None
ts_file = join(self._datadir, afile)
break
if afile.endswith('.meta') and not meta_file:
meta_file = join(self._datadir, afile)
# NOTE: this does not exit this loop, since a fast-POST
# operation just updates metadata, writing one or more
# .meta files, the data file will have an older timestamp,
# so we keep looking.
continue
if afile.endswith('.data'):
data_file = join(self._datadir, afile)
break
assert ((data_file is None and meta_file is None and ts_file is None)
or (ts_file is not None and data_file is None
and meta_file is None)
or (data_file is not None and ts_file is None)), \
"On-disk file search algorithm contract is broken: data_file:" \
" %s, meta_file: %s, ts_file: %s" % (data_file, meta_file, ts_file)
return data_file, meta_file, ts_file
fileset = get_ondisk_files(files, self._datadir)
return fileset
def _construct_exception_from_ts_file(self, ts_file):
"""