Add metadata checksums to old objects in auditor.
When the object auditor examines an object, it will now add any missing metadata checksums. This goes for both .data and .meta files, but not .ts files, as tombstones don't live very long anyway. Change-Id: I9417a8b0cc5099470845c0504c834746188d89e8
This commit is contained in:
parent
27398c3573
commit
62d21eceb7
@ -240,7 +240,7 @@ class AuditorWorker(object):
|
|||||||
df = diskfile_mgr.get_diskfile_from_audit_location(location)
|
df = diskfile_mgr.get_diskfile_from_audit_location(location)
|
||||||
reader = None
|
reader = None
|
||||||
try:
|
try:
|
||||||
with df.open():
|
with df.open(modernize=True):
|
||||||
metadata = df.get_metadata()
|
metadata = df.get_metadata()
|
||||||
obj_size = int(metadata['Content-Length'])
|
obj_size = int(metadata['Content-Length'])
|
||||||
if self.stats_sizes:
|
if self.stats_sizes:
|
||||||
|
@ -131,11 +131,12 @@ def _encode_metadata(metadata):
|
|||||||
return dict(((encode_str(k), encode_str(v)) for k, v in metadata.items()))
|
return dict(((encode_str(k), encode_str(v)) for k, v in metadata.items()))
|
||||||
|
|
||||||
|
|
||||||
def read_metadata(fd):
|
def read_metadata(fd, add_missing_checksum=False):
|
||||||
"""
|
"""
|
||||||
Helper function to read the pickled metadata from an object file.
|
Helper function to read the pickled metadata from an object file.
|
||||||
|
|
||||||
:param fd: file descriptor or filename to load the metadata from
|
:param fd: file descriptor or filename to load the metadata from
|
||||||
|
:param add_missing_checksum: if set and checksum is missing, add it
|
||||||
|
|
||||||
:returns: dictionary of metadata
|
:returns: dictionary of metadata
|
||||||
"""
|
"""
|
||||||
@ -159,12 +160,17 @@ def read_metadata(fd):
|
|||||||
metadata_checksum = None
|
metadata_checksum = None
|
||||||
try:
|
try:
|
||||||
metadata_checksum = xattr.getxattr(fd, METADATA_CHECKSUM_KEY)
|
metadata_checksum = xattr.getxattr(fd, METADATA_CHECKSUM_KEY)
|
||||||
except (IOError, OSError) as e:
|
except (IOError, OSError):
|
||||||
# All the interesting errors were handled above; the only thing left
|
# All the interesting errors were handled above; the only thing left
|
||||||
# here is ENODATA / ENOATTR to indicate that this attribute doesn't
|
# here is ENODATA / ENOATTR to indicate that this attribute doesn't
|
||||||
# exist. This is fine; it just means that this object predates the
|
# exist. This is fine; it just means that this object predates the
|
||||||
# introduction of metadata checksums.
|
# introduction of metadata checksums.
|
||||||
pass
|
if add_missing_checksum:
|
||||||
|
new_checksum = hashlib.md5(metadata).hexdigest()
|
||||||
|
try:
|
||||||
|
xattr.setxattr(fd, METADATA_CHECKSUM_KEY, new_checksum)
|
||||||
|
except (IOError, OSError) as e:
|
||||||
|
logging.error("Error adding metadata: %s" % e)
|
||||||
|
|
||||||
if metadata_checksum:
|
if metadata_checksum:
|
||||||
computed_checksum = hashlib.md5(metadata).hexdigest()
|
computed_checksum = hashlib.md5(metadata).hexdigest()
|
||||||
@ -2182,7 +2188,7 @@ class BaseDiskFile(object):
|
|||||||
return cls(mgr, device_path, None, partition, _datadir=hash_dir_path,
|
return cls(mgr, device_path, None, partition, _datadir=hash_dir_path,
|
||||||
policy=policy)
|
policy=policy)
|
||||||
|
|
||||||
def open(self):
|
def open(self, modernize=False):
|
||||||
"""
|
"""
|
||||||
Open the object.
|
Open the object.
|
||||||
|
|
||||||
@ -2190,6 +2196,10 @@ class BaseDiskFile(object):
|
|||||||
the associated metadata in the extended attributes, additionally
|
the associated metadata in the extended attributes, additionally
|
||||||
combining metadata from fast-POST `.meta` files.
|
combining metadata from fast-POST `.meta` files.
|
||||||
|
|
||||||
|
:param modernize: if set, update this diskfile to the latest format.
|
||||||
|
Currently, this means adding metadata checksums if none are
|
||||||
|
present.
|
||||||
|
|
||||||
.. note::
|
.. note::
|
||||||
|
|
||||||
An implementation is allowed to raise any of the following
|
An implementation is allowed to raise any of the following
|
||||||
@ -2228,7 +2238,8 @@ class BaseDiskFile(object):
|
|||||||
self._data_file = file_info.get('data_file')
|
self._data_file = file_info.get('data_file')
|
||||||
if not self._data_file:
|
if not self._data_file:
|
||||||
raise self._construct_exception_from_ts_file(**file_info)
|
raise self._construct_exception_from_ts_file(**file_info)
|
||||||
self._fp = self._construct_from_data_file(**file_info)
|
self._fp = self._construct_from_data_file(
|
||||||
|
modernize=modernize, **file_info)
|
||||||
# This method must populate the internal _metadata attribute.
|
# This method must populate the internal _metadata attribute.
|
||||||
self._metadata = self._metadata or {}
|
self._metadata = self._metadata or {}
|
||||||
return self
|
return self
|
||||||
@ -2395,7 +2406,8 @@ class BaseDiskFile(object):
|
|||||||
self._content_length = obj_size
|
self._content_length = obj_size
|
||||||
return obj_size
|
return obj_size
|
||||||
|
|
||||||
def _failsafe_read_metadata(self, source, quarantine_filename=None):
|
def _failsafe_read_metadata(self, source, quarantine_filename=None,
|
||||||
|
add_missing_checksum=False):
|
||||||
"""
|
"""
|
||||||
Read metadata from source object file. In case of failure, quarantine
|
Read metadata from source object file. In case of failure, quarantine
|
||||||
the file.
|
the file.
|
||||||
@ -2405,9 +2417,11 @@ class BaseDiskFile(object):
|
|||||||
|
|
||||||
:param source: file descriptor or filename to load the metadata from
|
:param source: file descriptor or filename to load the metadata from
|
||||||
:param quarantine_filename: full path of file to load the metadata from
|
:param quarantine_filename: full path of file to load the metadata from
|
||||||
|
:param add_missing_checksum: if True and no metadata checksum is
|
||||||
|
present, generate one and write it down
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
return read_metadata(source)
|
return read_metadata(source, add_missing_checksum)
|
||||||
except (DiskFileXattrNotSupported, DiskFileNotExist):
|
except (DiskFileXattrNotSupported, DiskFileNotExist):
|
||||||
raise
|
raise
|
||||||
except DiskFileBadMetadataChecksum as err:
|
except DiskFileBadMetadataChecksum as err:
|
||||||
@ -2437,6 +2451,7 @@ class BaseDiskFile(object):
|
|||||||
ctypefile_metadata.get('Content-Type-Timestamp')
|
ctypefile_metadata.get('Content-Type-Timestamp')
|
||||||
|
|
||||||
def _construct_from_data_file(self, data_file, meta_file, ctype_file,
|
def _construct_from_data_file(self, data_file, meta_file, ctype_file,
|
||||||
|
modernize=False,
|
||||||
**kwargs):
|
**kwargs):
|
||||||
"""
|
"""
|
||||||
Open the `.data` file to fetch its metadata, and fetch the metadata
|
Open the `.data` file to fetch its metadata, and fetch the metadata
|
||||||
@ -2447,16 +2462,21 @@ class BaseDiskFile(object):
|
|||||||
:param meta_file: on-disk fast-POST `.meta` file being considered
|
:param meta_file: on-disk fast-POST `.meta` file being considered
|
||||||
:param ctype_file: on-disk fast-POST `.meta` file being considered that
|
:param ctype_file: on-disk fast-POST `.meta` file being considered that
|
||||||
contains content-type and content-type timestamp
|
contains content-type and content-type timestamp
|
||||||
|
:param modernize: whether to update the on-disk files to the newest
|
||||||
|
format
|
||||||
:returns: an opened data file pointer
|
:returns: an opened data file pointer
|
||||||
:raises DiskFileError: various exceptions from
|
:raises DiskFileError: various exceptions from
|
||||||
:func:`swift.obj.diskfile.DiskFile._verify_data_file`
|
:func:`swift.obj.diskfile.DiskFile._verify_data_file`
|
||||||
"""
|
"""
|
||||||
fp = open(data_file, 'rb')
|
fp = open(data_file, 'rb')
|
||||||
self._datafile_metadata = self._failsafe_read_metadata(fp, data_file)
|
self._datafile_metadata = self._failsafe_read_metadata(
|
||||||
|
fp, data_file,
|
||||||
|
add_missing_checksum=modernize)
|
||||||
self._metadata = {}
|
self._metadata = {}
|
||||||
if meta_file:
|
if meta_file:
|
||||||
self._metafile_metadata = self._failsafe_read_metadata(
|
self._metafile_metadata = self._failsafe_read_metadata(
|
||||||
meta_file, meta_file)
|
meta_file, meta_file,
|
||||||
|
add_missing_checksum=modernize)
|
||||||
if ctype_file and ctype_file != meta_file:
|
if ctype_file and ctype_file != meta_file:
|
||||||
self._merge_content_type_metadata(ctype_file)
|
self._merge_content_type_metadata(ctype_file)
|
||||||
sys_metadata = dict(
|
sys_metadata = dict(
|
||||||
|
@ -273,7 +273,7 @@ class DiskFile(object):
|
|||||||
self._filesystem = fs
|
self._filesystem = fs
|
||||||
self.fragments = None
|
self.fragments = None
|
||||||
|
|
||||||
def open(self):
|
def open(self, modernize=False):
|
||||||
"""
|
"""
|
||||||
Open the file and read the metadata.
|
Open the file and read the metadata.
|
||||||
|
|
||||||
|
@ -19,6 +19,7 @@ import mock
|
|||||||
import os
|
import os
|
||||||
import time
|
import time
|
||||||
import string
|
import string
|
||||||
|
import xattr
|
||||||
from shutil import rmtree
|
from shutil import rmtree
|
||||||
from hashlib import md5
|
from hashlib import md5
|
||||||
from tempfile import mkdtemp
|
from tempfile import mkdtemp
|
||||||
@ -188,6 +189,74 @@ class TestAuditor(unittest.TestCase):
|
|||||||
run_tests(self.disk_file_p1)
|
run_tests(self.disk_file_p1)
|
||||||
run_tests(self.disk_file_ec)
|
run_tests(self.disk_file_ec)
|
||||||
|
|
||||||
|
def test_object_audit_adds_metadata_checksums(self):
|
||||||
|
disk_file = self.df_mgr.get_diskfile('sda', '0', 'a', 'c', 'o-md',
|
||||||
|
policy=POLICIES.legacy)
|
||||||
|
|
||||||
|
# simulate a PUT
|
||||||
|
now = time.time()
|
||||||
|
data = b'boots and cats and ' * 1024
|
||||||
|
hasher = md5()
|
||||||
|
with disk_file.create() as writer:
|
||||||
|
writer.write(data)
|
||||||
|
hasher.update(data)
|
||||||
|
etag = hasher.hexdigest()
|
||||||
|
metadata = {
|
||||||
|
'ETag': etag,
|
||||||
|
'X-Timestamp': str(normalize_timestamp(now)),
|
||||||
|
'Content-Length': len(data),
|
||||||
|
'Content-Type': 'the old type',
|
||||||
|
}
|
||||||
|
writer.put(metadata)
|
||||||
|
writer.commit(Timestamp(now))
|
||||||
|
|
||||||
|
# simulate a subsequent POST
|
||||||
|
post_metadata = metadata.copy()
|
||||||
|
post_metadata['Content-Type'] = 'the new type'
|
||||||
|
post_metadata['X-Object-Meta-Biff'] = 'buff'
|
||||||
|
post_metadata['X-Timestamp'] = str(normalize_timestamp(now + 1))
|
||||||
|
disk_file.write_metadata(post_metadata)
|
||||||
|
|
||||||
|
file_paths = [os.path.join(disk_file._datadir, fname)
|
||||||
|
for fname in os.listdir(disk_file._datadir)
|
||||||
|
if fname not in ('.', '..')]
|
||||||
|
file_paths.sort()
|
||||||
|
|
||||||
|
# sanity check: make sure we have a .data and a .meta file
|
||||||
|
self.assertEqual(len(file_paths), 2)
|
||||||
|
self.assertTrue(file_paths[0].endswith(".data"))
|
||||||
|
self.assertTrue(file_paths[1].endswith(".meta"))
|
||||||
|
|
||||||
|
# Go remove the xattr "user.swift.metadata_checksum" as if this
|
||||||
|
# object were written before Swift supported metadata checksums.
|
||||||
|
for file_path in file_paths:
|
||||||
|
xattr.removexattr(file_path, "user.swift.metadata_checksum")
|
||||||
|
|
||||||
|
# Run the auditor...
|
||||||
|
auditor_worker = auditor.AuditorWorker(self.conf, self.logger,
|
||||||
|
self.rcache, self.devices)
|
||||||
|
auditor_worker.object_audit(
|
||||||
|
AuditLocation(disk_file._datadir, 'sda', '0',
|
||||||
|
policy=disk_file.policy))
|
||||||
|
self.assertEqual(auditor_worker.quarantines, 0) # sanity
|
||||||
|
|
||||||
|
# ...and the checksums are back
|
||||||
|
for file_path in file_paths:
|
||||||
|
metadata = xattr.getxattr(file_path, "user.swift.metadata")
|
||||||
|
i = 1
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
metadata += xattr.getxattr(
|
||||||
|
file_path, "user.swift.metadata%d" % i)
|
||||||
|
i += 1
|
||||||
|
except (IOError, OSError):
|
||||||
|
break
|
||||||
|
|
||||||
|
checksum = xattr.getxattr(
|
||||||
|
file_path, "user.swift.metadata_checksum")
|
||||||
|
|
||||||
|
self.assertEqual(checksum, md5(metadata).hexdigest())
|
||||||
|
|
||||||
def test_object_audit_diff_data(self):
|
def test_object_audit_diff_data(self):
|
||||||
auditor_worker = auditor.AuditorWorker(self.conf, self.logger,
|
auditor_worker = auditor.AuditorWorker(self.conf, self.logger,
|
||||||
self.rcache, self.devices)
|
self.rcache, self.devices)
|
||||||
|
Loading…
Reference in New Issue
Block a user