Merge "Add metadata checksums to old objects in auditor."
This commit is contained in:
commit
5731fab90b
@ -240,7 +240,7 @@ class AuditorWorker(object):
|
||||
df = diskfile_mgr.get_diskfile_from_audit_location(location)
|
||||
reader = None
|
||||
try:
|
||||
with df.open():
|
||||
with df.open(modernize=True):
|
||||
metadata = df.get_metadata()
|
||||
obj_size = int(metadata['Content-Length'])
|
||||
if self.stats_sizes:
|
||||
|
@ -131,11 +131,12 @@ def _encode_metadata(metadata):
|
||||
return dict(((encode_str(k), encode_str(v)) for k, v in metadata.items()))
|
||||
|
||||
|
||||
def read_metadata(fd):
|
||||
def read_metadata(fd, add_missing_checksum=False):
|
||||
"""
|
||||
Helper function to read the pickled metadata from an object file.
|
||||
|
||||
:param fd: file descriptor or filename to load the metadata from
|
||||
:param add_missing_checksum: if set and checksum is missing, add it
|
||||
|
||||
:returns: dictionary of metadata
|
||||
"""
|
||||
@ -159,12 +160,17 @@ def read_metadata(fd):
|
||||
metadata_checksum = None
|
||||
try:
|
||||
metadata_checksum = xattr.getxattr(fd, METADATA_CHECKSUM_KEY)
|
||||
except (IOError, OSError) as e:
|
||||
except (IOError, OSError):
|
||||
# All the interesting errors were handled above; the only thing left
|
||||
# here is ENODATA / ENOATTR to indicate that this attribute doesn't
|
||||
# exist. This is fine; it just means that this object predates the
|
||||
# introduction of metadata checksums.
|
||||
pass
|
||||
if add_missing_checksum:
|
||||
new_checksum = hashlib.md5(metadata).hexdigest()
|
||||
try:
|
||||
xattr.setxattr(fd, METADATA_CHECKSUM_KEY, new_checksum)
|
||||
except (IOError, OSError) as e:
|
||||
logging.error("Error adding metadata: %s" % e)
|
||||
|
||||
if metadata_checksum:
|
||||
computed_checksum = hashlib.md5(metadata).hexdigest()
|
||||
@ -2182,7 +2188,7 @@ class BaseDiskFile(object):
|
||||
return cls(mgr, device_path, None, partition, _datadir=hash_dir_path,
|
||||
policy=policy)
|
||||
|
||||
def open(self):
|
||||
def open(self, modernize=False):
|
||||
"""
|
||||
Open the object.
|
||||
|
||||
@ -2190,6 +2196,10 @@ class BaseDiskFile(object):
|
||||
the associated metadata in the extended attributes, additionally
|
||||
combining metadata from fast-POST `.meta` files.
|
||||
|
||||
:param modernize: if set, update this diskfile to the latest format.
|
||||
Currently, this means adding metadata checksums if none are
|
||||
present.
|
||||
|
||||
.. note::
|
||||
|
||||
An implementation is allowed to raise any of the following
|
||||
@ -2228,7 +2238,8 @@ class BaseDiskFile(object):
|
||||
self._data_file = file_info.get('data_file')
|
||||
if not self._data_file:
|
||||
raise self._construct_exception_from_ts_file(**file_info)
|
||||
self._fp = self._construct_from_data_file(**file_info)
|
||||
self._fp = self._construct_from_data_file(
|
||||
modernize=modernize, **file_info)
|
||||
# This method must populate the internal _metadata attribute.
|
||||
self._metadata = self._metadata or {}
|
||||
return self
|
||||
@ -2395,7 +2406,8 @@ class BaseDiskFile(object):
|
||||
self._content_length = obj_size
|
||||
return obj_size
|
||||
|
||||
def _failsafe_read_metadata(self, source, quarantine_filename=None):
|
||||
def _failsafe_read_metadata(self, source, quarantine_filename=None,
|
||||
add_missing_checksum=False):
|
||||
"""
|
||||
Read metadata from source object file. In case of failure, quarantine
|
||||
the file.
|
||||
@ -2405,9 +2417,11 @@ class BaseDiskFile(object):
|
||||
|
||||
:param source: file descriptor or filename to load the metadata from
|
||||
:param quarantine_filename: full path of file to load the metadata from
|
||||
:param add_missing_checksum: if True and no metadata checksum is
|
||||
present, generate one and write it down
|
||||
"""
|
||||
try:
|
||||
return read_metadata(source)
|
||||
return read_metadata(source, add_missing_checksum)
|
||||
except (DiskFileXattrNotSupported, DiskFileNotExist):
|
||||
raise
|
||||
except DiskFileBadMetadataChecksum as err:
|
||||
@ -2437,6 +2451,7 @@ class BaseDiskFile(object):
|
||||
ctypefile_metadata.get('Content-Type-Timestamp')
|
||||
|
||||
def _construct_from_data_file(self, data_file, meta_file, ctype_file,
|
||||
modernize=False,
|
||||
**kwargs):
|
||||
"""
|
||||
Open the `.data` file to fetch its metadata, and fetch the metadata
|
||||
@ -2447,16 +2462,21 @@ class BaseDiskFile(object):
|
||||
:param meta_file: on-disk fast-POST `.meta` file being considered
|
||||
:param ctype_file: on-disk fast-POST `.meta` file being considered that
|
||||
contains content-type and content-type timestamp
|
||||
:param modernize: whether to update the on-disk files to the newest
|
||||
format
|
||||
:returns: an opened data file pointer
|
||||
:raises DiskFileError: various exceptions from
|
||||
:func:`swift.obj.diskfile.DiskFile._verify_data_file`
|
||||
"""
|
||||
fp = open(data_file, 'rb')
|
||||
self._datafile_metadata = self._failsafe_read_metadata(fp, data_file)
|
||||
self._datafile_metadata = self._failsafe_read_metadata(
|
||||
fp, data_file,
|
||||
add_missing_checksum=modernize)
|
||||
self._metadata = {}
|
||||
if meta_file:
|
||||
self._metafile_metadata = self._failsafe_read_metadata(
|
||||
meta_file, meta_file)
|
||||
meta_file, meta_file,
|
||||
add_missing_checksum=modernize)
|
||||
if ctype_file and ctype_file != meta_file:
|
||||
self._merge_content_type_metadata(ctype_file)
|
||||
sys_metadata = dict(
|
||||
|
@ -273,7 +273,7 @@ class DiskFile(object):
|
||||
self._filesystem = fs
|
||||
self.fragments = None
|
||||
|
||||
def open(self):
|
||||
def open(self, modernize=False):
|
||||
"""
|
||||
Open the file and read the metadata.
|
||||
|
||||
|
@ -19,6 +19,7 @@ import mock
|
||||
import os
|
||||
import time
|
||||
import string
|
||||
import xattr
|
||||
from shutil import rmtree
|
||||
from hashlib import md5
|
||||
from tempfile import mkdtemp
|
||||
@ -188,6 +189,74 @@ class TestAuditor(unittest.TestCase):
|
||||
run_tests(self.disk_file_p1)
|
||||
run_tests(self.disk_file_ec)
|
||||
|
||||
def test_object_audit_adds_metadata_checksums(self):
|
||||
disk_file = self.df_mgr.get_diskfile('sda', '0', 'a', 'c', 'o-md',
|
||||
policy=POLICIES.legacy)
|
||||
|
||||
# simulate a PUT
|
||||
now = time.time()
|
||||
data = b'boots and cats and ' * 1024
|
||||
hasher = md5()
|
||||
with disk_file.create() as writer:
|
||||
writer.write(data)
|
||||
hasher.update(data)
|
||||
etag = hasher.hexdigest()
|
||||
metadata = {
|
||||
'ETag': etag,
|
||||
'X-Timestamp': str(normalize_timestamp(now)),
|
||||
'Content-Length': len(data),
|
||||
'Content-Type': 'the old type',
|
||||
}
|
||||
writer.put(metadata)
|
||||
writer.commit(Timestamp(now))
|
||||
|
||||
# simulate a subsequent POST
|
||||
post_metadata = metadata.copy()
|
||||
post_metadata['Content-Type'] = 'the new type'
|
||||
post_metadata['X-Object-Meta-Biff'] = 'buff'
|
||||
post_metadata['X-Timestamp'] = str(normalize_timestamp(now + 1))
|
||||
disk_file.write_metadata(post_metadata)
|
||||
|
||||
file_paths = [os.path.join(disk_file._datadir, fname)
|
||||
for fname in os.listdir(disk_file._datadir)
|
||||
if fname not in ('.', '..')]
|
||||
file_paths.sort()
|
||||
|
||||
# sanity check: make sure we have a .data and a .meta file
|
||||
self.assertEqual(len(file_paths), 2)
|
||||
self.assertTrue(file_paths[0].endswith(".data"))
|
||||
self.assertTrue(file_paths[1].endswith(".meta"))
|
||||
|
||||
# Go remove the xattr "user.swift.metadata_checksum" as if this
|
||||
# object were written before Swift supported metadata checksums.
|
||||
for file_path in file_paths:
|
||||
xattr.removexattr(file_path, "user.swift.metadata_checksum")
|
||||
|
||||
# Run the auditor...
|
||||
auditor_worker = auditor.AuditorWorker(self.conf, self.logger,
|
||||
self.rcache, self.devices)
|
||||
auditor_worker.object_audit(
|
||||
AuditLocation(disk_file._datadir, 'sda', '0',
|
||||
policy=disk_file.policy))
|
||||
self.assertEqual(auditor_worker.quarantines, 0) # sanity
|
||||
|
||||
# ...and the checksums are back
|
||||
for file_path in file_paths:
|
||||
metadata = xattr.getxattr(file_path, "user.swift.metadata")
|
||||
i = 1
|
||||
while True:
|
||||
try:
|
||||
metadata += xattr.getxattr(
|
||||
file_path, "user.swift.metadata%d" % i)
|
||||
i += 1
|
||||
except (IOError, OSError):
|
||||
break
|
||||
|
||||
checksum = xattr.getxattr(
|
||||
file_path, "user.swift.metadata_checksum")
|
||||
|
||||
self.assertEqual(checksum, md5(metadata).hexdigest())
|
||||
|
||||
def test_object_audit_diff_data(self):
|
||||
auditor_worker = auditor.AuditorWorker(self.conf, self.logger,
|
||||
self.rcache, self.devices)
|
||||
|
Loading…
x
Reference in New Issue
Block a user