Add metadata checksums to old objects in auditor.

When the object auditor examines an object, it will now add any
missing metadata checksums. This goes for both .data and .meta files,
but not .ts files, as tombstones don't live very long anyway.

Change-Id: I9417a8b0cc5099470845c0504c834746188d89e8
This commit is contained in:
Samuel Merritt 2017-10-19 17:13:00 -07:00
parent 27398c3573
commit 62d21eceb7
4 changed files with 100 additions and 11 deletions

View File

@ -240,7 +240,7 @@ class AuditorWorker(object):
df = diskfile_mgr.get_diskfile_from_audit_location(location)
reader = None
try:
with df.open():
with df.open(modernize=True):
metadata = df.get_metadata()
obj_size = int(metadata['Content-Length'])
if self.stats_sizes:

View File

@ -131,11 +131,12 @@ def _encode_metadata(metadata):
return dict(((encode_str(k), encode_str(v)) for k, v in metadata.items()))
def read_metadata(fd):
def read_metadata(fd, add_missing_checksum=False):
"""
Helper function to read the pickled metadata from an object file.
:param fd: file descriptor or filename to load the metadata from
:param add_missing_checksum: if set and checksum is missing, add it
:returns: dictionary of metadata
"""
@ -159,12 +160,17 @@ def read_metadata(fd):
metadata_checksum = None
try:
metadata_checksum = xattr.getxattr(fd, METADATA_CHECKSUM_KEY)
except (IOError, OSError) as e:
except (IOError, OSError):
# All the interesting errors were handled above; the only thing left
# here is ENODATA / ENOATTR to indicate that this attribute doesn't
# exist. This is fine; it just means that this object predates the
# introduction of metadata checksums.
pass
if add_missing_checksum:
new_checksum = hashlib.md5(metadata).hexdigest()
try:
xattr.setxattr(fd, METADATA_CHECKSUM_KEY, new_checksum)
except (IOError, OSError) as e:
logging.error("Error adding metadata: %s" % e)
if metadata_checksum:
computed_checksum = hashlib.md5(metadata).hexdigest()
@ -2182,7 +2188,7 @@ class BaseDiskFile(object):
return cls(mgr, device_path, None, partition, _datadir=hash_dir_path,
policy=policy)
def open(self):
def open(self, modernize=False):
"""
Open the object.
@ -2190,6 +2196,10 @@ class BaseDiskFile(object):
the associated metadata in the extended attributes, additionally
combining metadata from fast-POST `.meta` files.
:param modernize: if set, update this diskfile to the latest format.
Currently, this means adding metadata checksums if none are
present.
.. note::
An implementation is allowed to raise any of the following
@ -2228,7 +2238,8 @@ class BaseDiskFile(object):
self._data_file = file_info.get('data_file')
if not self._data_file:
raise self._construct_exception_from_ts_file(**file_info)
self._fp = self._construct_from_data_file(**file_info)
self._fp = self._construct_from_data_file(
modernize=modernize, **file_info)
# This method must populate the internal _metadata attribute.
self._metadata = self._metadata or {}
return self
@ -2395,7 +2406,8 @@ class BaseDiskFile(object):
self._content_length = obj_size
return obj_size
def _failsafe_read_metadata(self, source, quarantine_filename=None):
def _failsafe_read_metadata(self, source, quarantine_filename=None,
add_missing_checksum=False):
"""
Read metadata from source object file. In case of failure, quarantine
the file.
@ -2405,9 +2417,11 @@ class BaseDiskFile(object):
:param source: file descriptor or filename to load the metadata from
:param quarantine_filename: full path of file to load the metadata from
:param add_missing_checksum: if True and no metadata checksum is
present, generate one and write it down
"""
try:
return read_metadata(source)
return read_metadata(source, add_missing_checksum)
except (DiskFileXattrNotSupported, DiskFileNotExist):
raise
except DiskFileBadMetadataChecksum as err:
@ -2437,6 +2451,7 @@ class BaseDiskFile(object):
ctypefile_metadata.get('Content-Type-Timestamp')
def _construct_from_data_file(self, data_file, meta_file, ctype_file,
modernize=False,
**kwargs):
"""
Open the `.data` file to fetch its metadata, and fetch the metadata
@ -2447,16 +2462,21 @@ class BaseDiskFile(object):
:param meta_file: on-disk fast-POST `.meta` file being considered
:param ctype_file: on-disk fast-POST `.meta` file being considered that
contains content-type and content-type timestamp
:param modernize: whether to update the on-disk files to the newest
format
:returns: an opened data file pointer
:raises DiskFileError: various exceptions from
:func:`swift.obj.diskfile.DiskFile._verify_data_file`
"""
fp = open(data_file, 'rb')
self._datafile_metadata = self._failsafe_read_metadata(fp, data_file)
self._datafile_metadata = self._failsafe_read_metadata(
fp, data_file,
add_missing_checksum=modernize)
self._metadata = {}
if meta_file:
self._metafile_metadata = self._failsafe_read_metadata(
meta_file, meta_file)
meta_file, meta_file,
add_missing_checksum=modernize)
if ctype_file and ctype_file != meta_file:
self._merge_content_type_metadata(ctype_file)
sys_metadata = dict(

View File

@ -273,7 +273,7 @@ class DiskFile(object):
self._filesystem = fs
self.fragments = None
def open(self):
def open(self, modernize=False):
"""
Open the file and read the metadata.

View File

@ -19,6 +19,7 @@ import mock
import os
import time
import string
import xattr
from shutil import rmtree
from hashlib import md5
from tempfile import mkdtemp
@ -188,6 +189,74 @@ class TestAuditor(unittest.TestCase):
run_tests(self.disk_file_p1)
run_tests(self.disk_file_ec)
def test_object_audit_adds_metadata_checksums(self):
disk_file = self.df_mgr.get_diskfile('sda', '0', 'a', 'c', 'o-md',
policy=POLICIES.legacy)
# simulate a PUT
now = time.time()
data = b'boots and cats and ' * 1024
hasher = md5()
with disk_file.create() as writer:
writer.write(data)
hasher.update(data)
etag = hasher.hexdigest()
metadata = {
'ETag': etag,
'X-Timestamp': str(normalize_timestamp(now)),
'Content-Length': len(data),
'Content-Type': 'the old type',
}
writer.put(metadata)
writer.commit(Timestamp(now))
# simulate a subsequent POST
post_metadata = metadata.copy()
post_metadata['Content-Type'] = 'the new type'
post_metadata['X-Object-Meta-Biff'] = 'buff'
post_metadata['X-Timestamp'] = str(normalize_timestamp(now + 1))
disk_file.write_metadata(post_metadata)
file_paths = [os.path.join(disk_file._datadir, fname)
for fname in os.listdir(disk_file._datadir)
if fname not in ('.', '..')]
file_paths.sort()
# sanity check: make sure we have a .data and a .meta file
self.assertEqual(len(file_paths), 2)
self.assertTrue(file_paths[0].endswith(".data"))
self.assertTrue(file_paths[1].endswith(".meta"))
# Go remove the xattr "user.swift.metadata_checksum" as if this
# object were written before Swift supported metadata checksums.
for file_path in file_paths:
xattr.removexattr(file_path, "user.swift.metadata_checksum")
# Run the auditor...
auditor_worker = auditor.AuditorWorker(self.conf, self.logger,
self.rcache, self.devices)
auditor_worker.object_audit(
AuditLocation(disk_file._datadir, 'sda', '0',
policy=disk_file.policy))
self.assertEqual(auditor_worker.quarantines, 0) # sanity
# ...and the checksums are back
for file_path in file_paths:
metadata = xattr.getxattr(file_path, "user.swift.metadata")
i = 1
while True:
try:
metadata += xattr.getxattr(
file_path, "user.swift.metadata%d" % i)
i += 1
except (IOError, OSError):
break
checksum = xattr.getxattr(
file_path, "user.swift.metadata_checksum")
self.assertEqual(checksum, md5(metadata).hexdigest())
def test_object_audit_diff_data(self):
auditor_worker = auditor.AuditorWorker(self.conf, self.logger,
self.rcache, self.devices)