Merge "Add metadata checksums to old objects in auditor."

This commit is contained in:
Zuul 2017-11-13 22:50:53 +00:00 committed by Gerrit Code Review
commit 5731fab90b
4 changed files with 100 additions and 11 deletions

View File

@ -240,7 +240,7 @@ class AuditorWorker(object):
df = diskfile_mgr.get_diskfile_from_audit_location(location)
reader = None
try:
with df.open():
with df.open(modernize=True):
metadata = df.get_metadata()
obj_size = int(metadata['Content-Length'])
if self.stats_sizes:

View File

@ -131,11 +131,12 @@ def _encode_metadata(metadata):
return dict(((encode_str(k), encode_str(v)) for k, v in metadata.items()))
def read_metadata(fd):
def read_metadata(fd, add_missing_checksum=False):
"""
Helper function to read the pickled metadata from an object file.
:param fd: file descriptor or filename to load the metadata from
:param add_missing_checksum: if set and checksum is missing, add it
:returns: dictionary of metadata
"""
@ -159,12 +160,17 @@ def read_metadata(fd):
metadata_checksum = None
try:
metadata_checksum = xattr.getxattr(fd, METADATA_CHECKSUM_KEY)
except (IOError, OSError) as e:
except (IOError, OSError):
# All the interesting errors were handled above; the only thing left
# here is ENODATA / ENOATTR to indicate that this attribute doesn't
# exist. This is fine; it just means that this object predates the
# introduction of metadata checksums.
pass
if add_missing_checksum:
new_checksum = hashlib.md5(metadata).hexdigest()
try:
xattr.setxattr(fd, METADATA_CHECKSUM_KEY, new_checksum)
except (IOError, OSError) as e:
logging.error("Error adding metadata: %s" % e)
if metadata_checksum:
computed_checksum = hashlib.md5(metadata).hexdigest()
@ -2182,7 +2188,7 @@ class BaseDiskFile(object):
return cls(mgr, device_path, None, partition, _datadir=hash_dir_path,
policy=policy)
def open(self):
def open(self, modernize=False):
"""
Open the object.
@ -2190,6 +2196,10 @@ class BaseDiskFile(object):
the associated metadata in the extended attributes, additionally
combining metadata from fast-POST `.meta` files.
:param modernize: if set, update this diskfile to the latest format.
Currently, this means adding metadata checksums if none are
present.
.. note::
An implementation is allowed to raise any of the following
@ -2228,7 +2238,8 @@ class BaseDiskFile(object):
self._data_file = file_info.get('data_file')
if not self._data_file:
raise self._construct_exception_from_ts_file(**file_info)
self._fp = self._construct_from_data_file(**file_info)
self._fp = self._construct_from_data_file(
modernize=modernize, **file_info)
# This method must populate the internal _metadata attribute.
self._metadata = self._metadata or {}
return self
@ -2395,7 +2406,8 @@ class BaseDiskFile(object):
self._content_length = obj_size
return obj_size
def _failsafe_read_metadata(self, source, quarantine_filename=None):
def _failsafe_read_metadata(self, source, quarantine_filename=None,
add_missing_checksum=False):
"""
Read metadata from source object file. In case of failure, quarantine
the file.
@ -2405,9 +2417,11 @@ class BaseDiskFile(object):
:param source: file descriptor or filename to load the metadata from
:param quarantine_filename: full path of file to load the metadata from
:param add_missing_checksum: if True and no metadata checksum is
present, generate one and write it down
"""
try:
return read_metadata(source)
return read_metadata(source, add_missing_checksum)
except (DiskFileXattrNotSupported, DiskFileNotExist):
raise
except DiskFileBadMetadataChecksum as err:
@ -2437,6 +2451,7 @@ class BaseDiskFile(object):
ctypefile_metadata.get('Content-Type-Timestamp')
def _construct_from_data_file(self, data_file, meta_file, ctype_file,
modernize=False,
**kwargs):
"""
Open the `.data` file to fetch its metadata, and fetch the metadata
@ -2447,16 +2462,21 @@ class BaseDiskFile(object):
:param meta_file: on-disk fast-POST `.meta` file being considered
:param ctype_file: on-disk fast-POST `.meta` file being considered that
contains content-type and content-type timestamp
:param modernize: whether to update the on-disk files to the newest
format
:returns: an opened data file pointer
:raises DiskFileError: various exceptions from
:func:`swift.obj.diskfile.DiskFile._verify_data_file`
"""
fp = open(data_file, 'rb')
self._datafile_metadata = self._failsafe_read_metadata(fp, data_file)
self._datafile_metadata = self._failsafe_read_metadata(
fp, data_file,
add_missing_checksum=modernize)
self._metadata = {}
if meta_file:
self._metafile_metadata = self._failsafe_read_metadata(
meta_file, meta_file)
meta_file, meta_file,
add_missing_checksum=modernize)
if ctype_file and ctype_file != meta_file:
self._merge_content_type_metadata(ctype_file)
sys_metadata = dict(

View File

@ -273,7 +273,7 @@ class DiskFile(object):
self._filesystem = fs
self.fragments = None
def open(self):
def open(self, modernize=False):
"""
Open the file and read the metadata.

View File

@ -19,6 +19,7 @@ import mock
import os
import time
import string
import xattr
from shutil import rmtree
from hashlib import md5
from tempfile import mkdtemp
@ -188,6 +189,74 @@ class TestAuditor(unittest.TestCase):
run_tests(self.disk_file_p1)
run_tests(self.disk_file_ec)
def test_object_audit_adds_metadata_checksums(self):
disk_file = self.df_mgr.get_diskfile('sda', '0', 'a', 'c', 'o-md',
policy=POLICIES.legacy)
# simulate a PUT
now = time.time()
data = b'boots and cats and ' * 1024
hasher = md5()
with disk_file.create() as writer:
writer.write(data)
hasher.update(data)
etag = hasher.hexdigest()
metadata = {
'ETag': etag,
'X-Timestamp': str(normalize_timestamp(now)),
'Content-Length': len(data),
'Content-Type': 'the old type',
}
writer.put(metadata)
writer.commit(Timestamp(now))
# simulate a subsequent POST
post_metadata = metadata.copy()
post_metadata['Content-Type'] = 'the new type'
post_metadata['X-Object-Meta-Biff'] = 'buff'
post_metadata['X-Timestamp'] = str(normalize_timestamp(now + 1))
disk_file.write_metadata(post_metadata)
file_paths = [os.path.join(disk_file._datadir, fname)
for fname in os.listdir(disk_file._datadir)
if fname not in ('.', '..')]
file_paths.sort()
# sanity check: make sure we have a .data and a .meta file
self.assertEqual(len(file_paths), 2)
self.assertTrue(file_paths[0].endswith(".data"))
self.assertTrue(file_paths[1].endswith(".meta"))
# Go remove the xattr "user.swift.metadata_checksum" as if this
# object were written before Swift supported metadata checksums.
for file_path in file_paths:
xattr.removexattr(file_path, "user.swift.metadata_checksum")
# Run the auditor...
auditor_worker = auditor.AuditorWorker(self.conf, self.logger,
self.rcache, self.devices)
auditor_worker.object_audit(
AuditLocation(disk_file._datadir, 'sda', '0',
policy=disk_file.policy))
self.assertEqual(auditor_worker.quarantines, 0) # sanity
# ...and the checksums are back
for file_path in file_paths:
metadata = xattr.getxattr(file_path, "user.swift.metadata")
i = 1
while True:
try:
metadata += xattr.getxattr(
file_path, "user.swift.metadata%d" % i)
i += 1
except (IOError, OSError):
break
checksum = xattr.getxattr(
file_path, "user.swift.metadata_checksum")
self.assertEqual(checksum, md5(metadata).hexdigest())
def test_object_audit_diff_data(self):
auditor_worker = auditor.AuditorWorker(self.conf, self.logger,
self.rcache, self.devices)