Detect file change when file size remains same
There was a corner case where Etag returned would be incorrect. This happens when the object is changed from file interface but with object size remaining the same. This change introduces an additional metadata that stores the mtime of object during PUT. This stored mtime is compared with actual mtime during GET to determine if the file has changed or not. Etag is recalculated if the file has changed. The scope of this fix in addressing the above mentioned corner case is limited to new objects only. Also, refactoring the code further by moving some methods from utils.py to classes in diskfile.py should prevent some redundant (f)stat syscalls. These minor optimizations will be addressed in a separate change. Change-Id: If724697ef2b17a3c569b60bd81ebf98dab283da6 Signed-off-by: Prashanth Pai <ppai@redhat.com>
This commit is contained in:
parent
3083d14aff
commit
da53693c9b
@ -34,6 +34,7 @@ X_TIMESTAMP = 'X-Timestamp'
|
||||
X_TYPE = 'X-Type'
|
||||
X_ETAG = 'ETag'
|
||||
X_OBJECT_TYPE = 'X-Object-Type'
|
||||
X_MTIME = 'X-Object-PUT-Mtime'
|
||||
DIR_TYPE = 'application/directory'
|
||||
METADATA_KEY = 'user.swift.metadata'
|
||||
MAX_XATTR_SIZE = 65536
|
||||
@ -164,7 +165,7 @@ def clean_metadata(path_or_fd):
|
||||
key += 1
|
||||
|
||||
|
||||
def validate_object(metadata, stat=None):
|
||||
def validate_object(metadata, statinfo=None):
|
||||
if not metadata:
|
||||
return False
|
||||
|
||||
@ -176,11 +177,17 @@ def validate_object(metadata, stat=None):
|
||||
X_OBJECT_TYPE not in metadata.keys():
|
||||
return False
|
||||
|
||||
if stat and (int(metadata[X_CONTENT_LENGTH]) != stat.st_size):
|
||||
# File length has changed.
|
||||
# TODO: Handle case where file content has changed but the length
|
||||
# remains the same.
|
||||
return False
|
||||
if statinfo and stat.S_ISREG(statinfo.st_mode):
|
||||
|
||||
# File length has changed
|
||||
if int(metadata[X_CONTENT_LENGTH]) != statinfo.st_size:
|
||||
return False
|
||||
|
||||
# File might have changed with length being the same.
|
||||
if X_MTIME in metadata and \
|
||||
normalize_timestamp(metadata[X_MTIME]) != \
|
||||
normalize_timestamp(statinfo.st_mtime):
|
||||
return False
|
||||
|
||||
if metadata[X_TYPE] == OBJECT:
|
||||
return True
|
||||
@ -255,6 +262,7 @@ def get_object_metadata(obj_path_or_fd):
|
||||
X_CONTENT_TYPE: DIR_TYPE if is_dir else FILE_TYPE,
|
||||
X_OBJECT_TYPE: DIR_NON_OBJECT if is_dir else FILE,
|
||||
X_CONTENT_LENGTH: 0 if is_dir else stats.st_size,
|
||||
X_MTIME: 0 if is_dir else normalize_timestamp(stats.st_mtime),
|
||||
X_ETAG: md5().hexdigest() if is_dir else _get_etag(obj_path_or_fd)}
|
||||
return metadata
|
||||
|
||||
|
@ -44,7 +44,7 @@ from swiftonfile.swift.common.utils import read_metadata, write_metadata, \
|
||||
from swiftonfile.swift.common.utils import X_CONTENT_TYPE, \
|
||||
X_TIMESTAMP, X_TYPE, X_OBJECT_TYPE, FILE, OBJECT, DIR_TYPE, \
|
||||
FILE_TYPE, DEFAULT_UID, DEFAULT_GID, DIR_NON_OBJECT, DIR_OBJECT, \
|
||||
X_ETAG, X_CONTENT_LENGTH
|
||||
X_ETAG, X_CONTENT_LENGTH, X_MTIME
|
||||
from swift.obj.diskfile import DiskFileManager as SwiftDiskFileManager
|
||||
from swift.obj.diskfile import get_async_dir
|
||||
|
||||
@ -169,7 +169,7 @@ def make_directory(full_path, uid, gid, metadata=None):
|
||||
return True, metadata
|
||||
|
||||
|
||||
def _adjust_metadata(metadata):
|
||||
def _adjust_metadata(fd, metadata):
|
||||
# Fix up the metadata to ensure it has a proper value for the
|
||||
# Content-Type metadata, as well as an X_TYPE and X_OBJECT_TYPE
|
||||
# metadata values.
|
||||
@ -189,6 +189,12 @@ def _adjust_metadata(metadata):
|
||||
else:
|
||||
metadata[X_OBJECT_TYPE] = FILE
|
||||
|
||||
# stat.st_mtime does not change after last write(). We set this to later
|
||||
# detect if the object was changed from filesystem interface (non Swift)
|
||||
statinfo = do_fstat(fd)
|
||||
if stat.S_ISREG(statinfo.st_mode):
|
||||
metadata[X_MTIME] = normalize_timestamp(statinfo.st_mtime)
|
||||
|
||||
metadata[X_TYPE] = OBJECT
|
||||
return metadata
|
||||
|
||||
@ -385,7 +391,7 @@ class DiskFileWriter(object):
|
||||
name
|
||||
"""
|
||||
assert self._tmppath is not None
|
||||
metadata = _adjust_metadata(metadata)
|
||||
metadata = _adjust_metadata(self._fd, metadata)
|
||||
df = self._disk_file
|
||||
|
||||
if dir_is_object(metadata):
|
||||
|
@ -226,3 +226,17 @@ class TestSwiftOnFile(Base):
|
||||
self.assert_status(200)
|
||||
self.assertEqual(new_data_hash, metadata['etag'])
|
||||
self.assertEqual(len(new_data), int(metadata['content_length']))
|
||||
|
||||
# Modify the file but let the length remain same
|
||||
new_data = "I am Antman"
|
||||
new_data_hash = hashlib.md5(new_data).hexdigest()
|
||||
with open(file_path, 'w') as f:
|
||||
f.write(new_data)
|
||||
# Make sure GET works
|
||||
self.assertEqual(new_data, object_item.read())
|
||||
self.assert_status(200)
|
||||
# Check Etag and content-length is right
|
||||
metadata = object_item.info()
|
||||
self.assert_status(200)
|
||||
self.assertEqual(new_data_hash, metadata['etag'])
|
||||
self.assertEqual(len(new_data), int(metadata['content_length']))
|
||||
|
@ -469,9 +469,9 @@ class TestUtils(unittest.TestCase):
|
||||
utils.X_CONTENT_LENGTH: '12345',
|
||||
utils.X_TYPE: utils.OBJECT,
|
||||
utils.X_OBJECT_TYPE: 'na'}
|
||||
fake_stat = Mock(st_size=12346)
|
||||
fake_stat = Mock(st_size=12346, st_mode=33188)
|
||||
self.assertFalse(utils.validate_object(md, fake_stat))
|
||||
fake_stat = Mock(st_size=12345)
|
||||
fake_stat = Mock(st_size=12345, st_mode=33188)
|
||||
self.assertTrue(utils.validate_object(md, fake_stat))
|
||||
|
||||
def test_write_pickle(self):
|
||||
|
@ -188,6 +188,7 @@ class TestDiskFile(unittest.TestCase):
|
||||
'Content-Length': 4,
|
||||
'ETag': etag,
|
||||
'X-Timestamp': ts,
|
||||
'X-Object-PUT-Mtime': normalize_timestamp(stats.st_mtime),
|
||||
'Content-Type': 'application/octet-stream'}
|
||||
gdf = self._get_diskfile("vol0", "p57", "ufo47", "bar", "z")
|
||||
assert gdf._obj == "z"
|
||||
|
Loading…
Reference in New Issue
Block a user