From da53693c9b710d66cad4e6ee2b8c86bc6897121d Mon Sep 17 00:00:00 2001 From: Prashanth Pai Date: Wed, 7 Oct 2015 19:41:12 +0530 Subject: [PATCH] Detect file change when file size remains same There was a corner case where Etag returned would be incorrect. This happens when the object is changed from file interface but with object size remaining the same. This change introduces an additional metadata that stores the mtime of object during PUT. This stored mtime is compared with actual mtime during GET to determine if the file has changed or not. Etag is recalculated if the file has changed. The scope of this fix in addressing the above mentioned corner case is limited to new objects only. Also, refactoring the code further by moving some methods from utils.py to classes in diskfile.py should prevent some redundant (f)stat syscalls. These minor optimizations will be addressed in a separate change. Change-Id: If724697ef2b17a3c569b60bd81ebf98dab283da6 Signed-off-by: Prashanth Pai --- swiftonfile/swift/common/utils.py | 20 ++++++++++++++------ swiftonfile/swift/obj/diskfile.py | 12 +++++++++--- test/functional/swift_on_file_tests.py | 14 ++++++++++++++ test/unit/common/test_utils.py | 4 ++-- test/unit/obj/test_diskfile.py | 1 + 5 files changed, 40 insertions(+), 11 deletions(-) diff --git a/swiftonfile/swift/common/utils.py b/swiftonfile/swift/common/utils.py index 8e53505..f92c7eb 100644 --- a/swiftonfile/swift/common/utils.py +++ b/swiftonfile/swift/common/utils.py @@ -34,6 +34,7 @@ X_TIMESTAMP = 'X-Timestamp' X_TYPE = 'X-Type' X_ETAG = 'ETag' X_OBJECT_TYPE = 'X-Object-Type' +X_MTIME = 'X-Object-PUT-Mtime' DIR_TYPE = 'application/directory' METADATA_KEY = 'user.swift.metadata' MAX_XATTR_SIZE = 65536 @@ -164,7 +165,7 @@ def clean_metadata(path_or_fd): key += 1 -def validate_object(metadata, stat=None): +def validate_object(metadata, statinfo=None): if not metadata: return False @@ -176,11 +177,17 @@ def validate_object(metadata, stat=None): X_OBJECT_TYPE not in metadata.keys(): return False - if stat and (int(metadata[X_CONTENT_LENGTH]) != stat.st_size): - # File length has changed. - # TODO: Handle case where file content has changed but the length - # remains the same. - return False + if statinfo and stat.S_ISREG(statinfo.st_mode): + + # File length has changed + if int(metadata[X_CONTENT_LENGTH]) != statinfo.st_size: + return False + + # File might have changed with length being the same. + if X_MTIME in metadata and \ + normalize_timestamp(metadata[X_MTIME]) != \ + normalize_timestamp(statinfo.st_mtime): + return False if metadata[X_TYPE] == OBJECT: return True @@ -255,6 +262,7 @@ def get_object_metadata(obj_path_or_fd): X_CONTENT_TYPE: DIR_TYPE if is_dir else FILE_TYPE, X_OBJECT_TYPE: DIR_NON_OBJECT if is_dir else FILE, X_CONTENT_LENGTH: 0 if is_dir else stats.st_size, + X_MTIME: 0 if is_dir else normalize_timestamp(stats.st_mtime), X_ETAG: md5().hexdigest() if is_dir else _get_etag(obj_path_or_fd)} return metadata diff --git a/swiftonfile/swift/obj/diskfile.py b/swiftonfile/swift/obj/diskfile.py index 14f48ed..9f9d15e 100644 --- a/swiftonfile/swift/obj/diskfile.py +++ b/swiftonfile/swift/obj/diskfile.py @@ -44,7 +44,7 @@ from swiftonfile.swift.common.utils import read_metadata, write_metadata, \ from swiftonfile.swift.common.utils import X_CONTENT_TYPE, \ X_TIMESTAMP, X_TYPE, X_OBJECT_TYPE, FILE, OBJECT, DIR_TYPE, \ FILE_TYPE, DEFAULT_UID, DEFAULT_GID, DIR_NON_OBJECT, DIR_OBJECT, \ - X_ETAG, X_CONTENT_LENGTH + X_ETAG, X_CONTENT_LENGTH, X_MTIME from swift.obj.diskfile import DiskFileManager as SwiftDiskFileManager from swift.obj.diskfile import get_async_dir @@ -169,7 +169,7 @@ def make_directory(full_path, uid, gid, metadata=None): return True, metadata -def _adjust_metadata(metadata): +def _adjust_metadata(fd, metadata): # Fix up the metadata to ensure it has a proper value for the # Content-Type metadata, as well as an X_TYPE and X_OBJECT_TYPE # metadata values. @@ -189,6 +189,12 @@ def _adjust_metadata(metadata): else: metadata[X_OBJECT_TYPE] = FILE + # stat.st_mtime does not change after last write(). We set this to later + # detect if the object was changed from filesystem interface (non Swift) + statinfo = do_fstat(fd) + if stat.S_ISREG(statinfo.st_mode): + metadata[X_MTIME] = normalize_timestamp(statinfo.st_mtime) + metadata[X_TYPE] = OBJECT return metadata @@ -385,7 +391,7 @@ class DiskFileWriter(object): name """ assert self._tmppath is not None - metadata = _adjust_metadata(metadata) + metadata = _adjust_metadata(self._fd, metadata) df = self._disk_file if dir_is_object(metadata): diff --git a/test/functional/swift_on_file_tests.py b/test/functional/swift_on_file_tests.py index acead57..26c045c 100644 --- a/test/functional/swift_on_file_tests.py +++ b/test/functional/swift_on_file_tests.py @@ -226,3 +226,17 @@ class TestSwiftOnFile(Base): self.assert_status(200) self.assertEqual(new_data_hash, metadata['etag']) self.assertEqual(len(new_data), int(metadata['content_length'])) + + # Modify the file but let the length remain same + new_data = "I am Antman" + new_data_hash = hashlib.md5(new_data).hexdigest() + with open(file_path, 'w') as f: + f.write(new_data) + # Make sure GET works + self.assertEqual(new_data, object_item.read()) + self.assert_status(200) + # Check Etag and content-length is right + metadata = object_item.info() + self.assert_status(200) + self.assertEqual(new_data_hash, metadata['etag']) + self.assertEqual(len(new_data), int(metadata['content_length'])) diff --git a/test/unit/common/test_utils.py b/test/unit/common/test_utils.py index b81d5d3..74788ad 100644 --- a/test/unit/common/test_utils.py +++ b/test/unit/common/test_utils.py @@ -469,9 +469,9 @@ class TestUtils(unittest.TestCase): utils.X_CONTENT_LENGTH: '12345', utils.X_TYPE: utils.OBJECT, utils.X_OBJECT_TYPE: 'na'} - fake_stat = Mock(st_size=12346) + fake_stat = Mock(st_size=12346, st_mode=33188) self.assertFalse(utils.validate_object(md, fake_stat)) - fake_stat = Mock(st_size=12345) + fake_stat = Mock(st_size=12345, st_mode=33188) self.assertTrue(utils.validate_object(md, fake_stat)) def test_write_pickle(self): diff --git a/test/unit/obj/test_diskfile.py b/test/unit/obj/test_diskfile.py index f1272f8..25d1de3 100644 --- a/test/unit/obj/test_diskfile.py +++ b/test/unit/obj/test_diskfile.py @@ -188,6 +188,7 @@ class TestDiskFile(unittest.TestCase): 'Content-Length': 4, 'ETag': etag, 'X-Timestamp': ts, + 'X-Object-PUT-Mtime': normalize_timestamp(stats.st_mtime), 'Content-Type': 'application/octet-stream'} gdf = self._get_diskfile("vol0", "p57", "ufo47", "bar", "z") assert gdf._obj == "z"