Merge "Refactor DiskFile init logic into multiple methods"

This commit is contained in:
Jenkins 2013-08-30 02:20:34 +00:00 committed by Gerrit Code Review
commit 4f0e99c185
2 changed files with 218 additions and 63 deletions

View File

@ -389,29 +389,109 @@ class DiskFile(object):
self.keep_cache = False
self.suppress_file_closing = False
self.threadpool = threadpool or ThreadPool(nthreads=0)
if not exists(self.datadir):
return
files = sorted(os.listdir(self.datadir), reverse=True)
meta_file = None
for afile in files:
if afile.endswith('.ts'):
self.data_file = None
with open(join(self.datadir, afile)) as mfp:
self.metadata = read_metadata(mfp)
self.metadata['deleted'] = True
break
if afile.endswith('.meta') and not meta_file:
meta_file = join(self.datadir, afile)
if afile.endswith('.data') and not self.data_file:
self.data_file = join(self.datadir, afile)
break
if not self.data_file:
return
self.fp = open(self.data_file, 'rb')
datafile_metadata = read_metadata(self.fp)
if not keep_data_fp:
self.close(verify_file=False)
data_file, meta_file, ts_file = self._get_ondisk_file()
if not data_file:
if ts_file:
self._construct_from_ts_file(ts_file)
else:
fp = self._construct_from_data_file(data_file, meta_file)
if keep_data_fp:
self.fp = fp
else:
fp.close()
def _get_ondisk_file(self):
"""
Do the work to figure out if the data directory exists, and if so,
determine the on-disk files to use.
:returns: a tuple of data, meta and ts (tombstone) files, in one of
three states:
1. all three are None
data directory does not exist, or there are no files in
that directory
2. ts_file is not None, data_file is None, meta_file is None
object is considered deleted
3. data_file is not None, ts_file is None
object exists, and optionally has fast-POST metadata
"""
data_file = meta_file = ts_file = None
try:
files = sorted(os.listdir(self.datadir), reverse=True)
except OSError as err:
if err.errno != errno.ENOENT:
raise
# The data directory does not exist, so the object cannot exist.
else:
for afile in files:
assert ts_file is None, "On-disk file search loop" \
" continuing after tombstone, %s, encountered" % ts_file
assert data_file is None, "On-disk file search loop" \
" continuing after data file, %s, encountered" % data_file
if afile.endswith('.ts'):
meta_file = None
ts_file = join(self.datadir, afile)
break
if afile.endswith('.meta') and not meta_file:
meta_file = join(self.datadir, afile)
# NOTE: this does not exit this loop, since a fast-POST
# operation just updates metadata, writing one or more
# .meta files, the data file will have an older timestamp,
# so we keep looking.
continue
if afile.endswith('.data'):
data_file = join(self.datadir, afile)
break
assert ((data_file is None and meta_file is None and ts_file is None)
or (ts_file is not None and data_file is None
and meta_file is None)
or (data_file is not None and ts_file is None)), \
"On-disk file search algorithm contract is broken: data_file:" \
" %s, meta_file: %s, ts_file: %s" % (data_file, meta_file, ts_file)
return data_file, meta_file, ts_file
def _construct_from_ts_file(self, ts_file):
"""
A tombstone means the object is considered deleted. We just need to
pull the metadata from the tombstone file which has the timestamp.
"""
with open(ts_file) as fp:
self.metadata = read_metadata(fp)
self.metadata['deleted'] = True
def _verify_name(self):
"""
Verify the metadata's name value matches what we think the object is
named.
"""
try:
mname = self.metadata['name']
except KeyError:
pass
else:
if mname != self.name:
self.logger.error(_('Client path %(client)s does not match '
'path stored in object metadata %(meta)s'),
{'client': self.name, 'meta': mname})
raise DiskFileCollision('Client path does not match path '
'stored in object metadata')
def _construct_from_data_file(self, data_file, meta_file):
"""
Open the data file to fetch its metadata, and fetch the metadata from
the fast-POST .meta file as well if it exists, merging them properly.
:returns: the opened data file pointer
"""
fp = open(data_file, 'rb')
datafile_metadata = read_metadata(fp)
if meta_file:
with open(meta_file) as mfp:
self.metadata = read_metadata(mfp)
@ -421,15 +501,9 @@ class DiskFile(object):
self.metadata.update(sys_metadata)
else:
self.metadata = datafile_metadata
if 'name' in self.metadata:
if self.metadata['name'] != self.name:
self.logger.error(_('Client path %(client)s does not match '
'path stored in object metadata %(meta)s'),
{'client': self.name,
'meta': self.metadata['name']})
raise DiskFileCollision('Client path does not match path '
'stored in object metadata')
self._verify_name()
self.data_file = data_file
return fp
def __iter__(self):
"""Returns an iterator over the data file."""

View File

@ -20,6 +20,7 @@ from __future__ import with_statement
import cPickle as pickle
import os
import errno
import mock
import unittest
import email
@ -363,16 +364,22 @@ class TestDiskFile(unittest.TestCase):
rmtree(os.path.dirname(self.testdir))
tpool.execute = self._orig_tpool_exc
def _create_test_file(self, data, keep_data_fp=True):
def _create_ondisk_file(self, df, data, ts, ext='.data'):
mkdirs(df.datadir)
ts = normalize_timestamp(ts)
data_file = os.path.join(df.datadir, ts + ext)
with open(data_file, 'wb') as f:
f.write(data)
md = {'X-Timestamp': ts}
setxattr(f.fileno(), diskfile.METADATA_KEY,
pickle.dumps(md, diskfile.PICKLE_PROTOCOL))
def _create_test_file(self, data, keep_data_fp=True, ts=None):
df = diskfile.DiskFile(self.testdir, 'sda1', '0', 'a', 'c', 'o',
FakeLogger())
mkdirs(df.datadir)
f = open(os.path.join(df.datadir,
normalize_timestamp(time()) + '.data'), 'wb')
f.write(data)
setxattr(f.fileno(), diskfile.METADATA_KEY,
pickle.dumps({}, diskfile.PICKLE_PROTOCOL))
f.close()
if ts is None:
ts = time()
self._create_ondisk_file(df, data, ts)
df = diskfile.DiskFile(self.testdir, 'sda1', '0', 'a', 'c', 'o',
FakeLogger(), keep_data_fp=keep_data_fp)
return df
@ -492,15 +499,7 @@ class TestDiskFile(unittest.TestCase):
self.assertEquals(hook_call_count[0], 9)
def test_quarantine(self):
df = diskfile.DiskFile(self.testdir, 'sda1', '0', 'a', 'c', 'o',
FakeLogger())
mkdirs(df.datadir)
f = open(os.path.join(df.datadir,
normalize_timestamp(time()) + '.data'), 'wb')
setxattr(f.fileno(), diskfile.METADATA_KEY,
pickle.dumps({}, diskfile.PICKLE_PROTOCOL))
df = diskfile.DiskFile(self.testdir, 'sda1', '0', 'a', 'c', 'o',
FakeLogger())
df = self._create_test_file('empty')
df.quarantine()
quar_dir = os.path.join(self.testdir, 'sda1', 'quarantined',
'objects', os.path.basename(os.path.dirname(
@ -508,15 +507,7 @@ class TestDiskFile(unittest.TestCase):
self.assert_(os.path.isdir(quar_dir))
def test_quarantine_same_file(self):
df = diskfile.DiskFile(self.testdir, 'sda1', '0', 'a', 'c', 'o',
FakeLogger())
mkdirs(df.datadir)
f = open(os.path.join(df.datadir,
normalize_timestamp(time()) + '.data'), 'wb')
setxattr(f.fileno(), diskfile.METADATA_KEY,
pickle.dumps({}, diskfile.PICKLE_PROTOCOL))
df = diskfile.DiskFile(self.testdir, 'sda1', '0', 'a', 'c', 'o',
FakeLogger())
df = self._create_test_file('empty')
new_dir = df.quarantine()
quar_dir = os.path.join(self.testdir, 'sda1', 'quarantined',
'objects', os.path.basename(os.path.dirname(
@ -524,12 +515,7 @@ class TestDiskFile(unittest.TestCase):
self.assert_(os.path.isdir(quar_dir))
self.assertEquals(quar_dir, new_dir)
# have to remake the datadir and file
mkdirs(df.datadir)
f = open(os.path.join(df.datadir,
normalize_timestamp(time()) + '.data'), 'wb')
setxattr(f.fileno(), diskfile.METADATA_KEY,
pickle.dumps({}, diskfile.PICKLE_PROTOCOL))
self._create_ondisk_file(df, 'still empty', time())
df = diskfile.DiskFile(self.testdir, 'sda1', '0', 'a', 'c', 'o',
FakeLogger(), keep_data_fp=True)
double_uuid_path = df.quarantine()
@ -711,3 +697,98 @@ class TestDiskFile(unittest.TestCase):
with mock.patch("os.path.ismount", _mock_ismount):
self.assertRaises(DiskFileDeviceUnavailable, self._get_disk_file,
mount_check=True)
def test_ondisk_search_loop_ts_meta_data(self):
df = diskfile.DiskFile(self.testdir, 'sda1', '0', 'a', 'c', 'o',
FakeLogger())
self._create_ondisk_file(df, '', ext='.ts', ts=10)
self._create_ondisk_file(df, '', ext='.ts', ts=9)
self._create_ondisk_file(df, '', ext='.meta', ts=8)
self._create_ondisk_file(df, '', ext='.meta', ts=7)
self._create_ondisk_file(df, 'B', ext='.data', ts=6)
self._create_ondisk_file(df, 'A', ext='.data', ts=5)
df = diskfile.DiskFile(self.testdir, 'sda1', '0', 'a', 'c', 'o',
FakeLogger())
self.assertTrue('X-Timestamp' in df.metadata)
self.assertEquals(df.metadata['X-Timestamp'], normalize_timestamp(10))
self.assertTrue('deleted' in df.metadata)
self.assertTrue(df.metadata['deleted'])
def test_ondisk_search_loop_meta_ts_data(self):
df = diskfile.DiskFile(self.testdir, 'sda1', '0', 'a', 'c', 'o',
FakeLogger())
self._create_ondisk_file(df, '', ext='.meta', ts=10)
self._create_ondisk_file(df, '', ext='.meta', ts=9)
self._create_ondisk_file(df, '', ext='.ts', ts=8)
self._create_ondisk_file(df, '', ext='.ts', ts=7)
self._create_ondisk_file(df, 'B', ext='.data', ts=6)
self._create_ondisk_file(df, 'A', ext='.data', ts=5)
df = diskfile.DiskFile(self.testdir, 'sda1', '0', 'a', 'c', 'o',
FakeLogger())
self.assertTrue('X-Timestamp' in df.metadata)
self.assertEquals(df.metadata['X-Timestamp'], normalize_timestamp(8))
self.assertTrue('deleted' in df.metadata)
def test_ondisk_search_loop_meta_data_ts(self):
df = diskfile.DiskFile(self.testdir, 'sda1', '0', 'a', 'c', 'o',
FakeLogger())
self._create_ondisk_file(df, '', ext='.meta', ts=10)
self._create_ondisk_file(df, '', ext='.meta', ts=9)
self._create_ondisk_file(df, 'B', ext='.data', ts=8)
self._create_ondisk_file(df, 'A', ext='.data', ts=7)
self._create_ondisk_file(df, '', ext='.ts', ts=6)
self._create_ondisk_file(df, '', ext='.ts', ts=5)
df = diskfile.DiskFile(self.testdir, 'sda1', '0', 'a', 'c', 'o',
FakeLogger())
self.assertTrue('X-Timestamp' in df.metadata)
self.assertEquals(df.metadata['X-Timestamp'], normalize_timestamp(10))
self.assertTrue('deleted' not in df.metadata)
def test_ondisk_search_loop_data_meta_ts(self):
df = diskfile.DiskFile(self.testdir, 'sda1', '0', 'a', 'c', 'o',
FakeLogger())
self._create_ondisk_file(df, 'B', ext='.data', ts=10)
self._create_ondisk_file(df, 'A', ext='.data', ts=9)
self._create_ondisk_file(df, '', ext='.ts', ts=8)
self._create_ondisk_file(df, '', ext='.ts', ts=7)
self._create_ondisk_file(df, '', ext='.meta', ts=6)
self._create_ondisk_file(df, '', ext='.meta', ts=5)
df = diskfile.DiskFile(self.testdir, 'sda1', '0', 'a', 'c', 'o',
FakeLogger())
self.assertTrue('X-Timestamp' in df.metadata)
self.assertEquals(df.metadata['X-Timestamp'], normalize_timestamp(10))
self.assertTrue('deleted' not in df.metadata)
def test_ondisk_search_loop_wayward_files_ignored(self):
df = diskfile.DiskFile(self.testdir, 'sda1', '0', 'a', 'c', 'o',
FakeLogger())
self._create_ondisk_file(df, 'X', ext='.bar', ts=11)
self._create_ondisk_file(df, 'B', ext='.data', ts=10)
self._create_ondisk_file(df, 'A', ext='.data', ts=9)
self._create_ondisk_file(df, '', ext='.ts', ts=8)
self._create_ondisk_file(df, '', ext='.ts', ts=7)
self._create_ondisk_file(df, '', ext='.meta', ts=6)
self._create_ondisk_file(df, '', ext='.meta', ts=5)
df = diskfile.DiskFile(self.testdir, 'sda1', '0', 'a', 'c', 'o',
FakeLogger())
self.assertTrue('X-Timestamp' in df.metadata)
self.assertEquals(df.metadata['X-Timestamp'], normalize_timestamp(10))
self.assertTrue('deleted' not in df.metadata)
def test_ondisk_search_loop_listdir_error(self):
df = diskfile.DiskFile(self.testdir, 'sda1', '0', 'a', 'c', 'o',
FakeLogger())
def mock_listdir_exp(*args, **kwargs):
raise OSError(errno.EACCES, os.strerror(errno.EACCES))
with mock.patch("os.listdir", mock_listdir_exp):
self._create_ondisk_file(df, 'X', ext='.bar', ts=11)
self._create_ondisk_file(df, 'B', ext='.data', ts=10)
self._create_ondisk_file(df, 'A', ext='.data', ts=9)
self._create_ondisk_file(df, '', ext='.ts', ts=8)
self._create_ondisk_file(df, '', ext='.ts', ts=7)
self._create_ondisk_file(df, '', ext='.meta', ts=6)
self._create_ondisk_file(df, '', ext='.meta', ts=5)
self.assertRaises(OSError, diskfile.DiskFile, self.testdir, 'sda1',
'0', 'a', 'c', 'o', FakeLogger())