Merge "Detect file change when file size remains same"

This commit is contained in:
Jenkins 2015-11-10 19:42:01 +00:00 committed by Gerrit Code Review
commit 3115575d5d
5 changed files with 40 additions and 11 deletions

View File

@ -38,6 +38,7 @@ X_TIMESTAMP = 'X-Timestamp'
X_TYPE = 'X-Type' X_TYPE = 'X-Type'
X_ETAG = 'ETag' X_ETAG = 'ETag'
X_OBJECT_TYPE = 'X-Object-Type' X_OBJECT_TYPE = 'X-Object-Type'
X_MTIME = 'X-Object-PUT-Mtime'
DIR_TYPE = 'application/directory' DIR_TYPE = 'application/directory'
METADATA_KEY = 'user.swift.metadata' METADATA_KEY = 'user.swift.metadata'
MAX_XATTR_SIZE = 65536 MAX_XATTR_SIZE = 65536
@ -217,7 +218,7 @@ def clean_metadata(path_or_fd):
key += 1 key += 1
def validate_object(metadata, stat=None): def validate_object(metadata, statinfo=None):
if not metadata: if not metadata:
return False return False
@ -229,11 +230,17 @@ def validate_object(metadata, stat=None):
X_OBJECT_TYPE not in metadata.keys(): X_OBJECT_TYPE not in metadata.keys():
return False return False
if stat and (int(metadata[X_CONTENT_LENGTH]) != stat.st_size): if statinfo and stat.S_ISREG(statinfo.st_mode):
# File length has changed.
# TODO: Handle case where file content has changed but the length # File length has changed
# remains the same. if int(metadata[X_CONTENT_LENGTH]) != statinfo.st_size:
return False return False
# File might have changed with length being the same.
if X_MTIME in metadata and \
normalize_timestamp(metadata[X_MTIME]) != \
normalize_timestamp(statinfo.st_mtime):
return False
if metadata[X_TYPE] == OBJECT: if metadata[X_TYPE] == OBJECT:
return True return True
@ -308,6 +315,7 @@ def get_object_metadata(obj_path_or_fd):
X_CONTENT_TYPE: DIR_TYPE if is_dir else FILE_TYPE, X_CONTENT_TYPE: DIR_TYPE if is_dir else FILE_TYPE,
X_OBJECT_TYPE: DIR_NON_OBJECT if is_dir else FILE, X_OBJECT_TYPE: DIR_NON_OBJECT if is_dir else FILE,
X_CONTENT_LENGTH: 0 if is_dir else stats.st_size, X_CONTENT_LENGTH: 0 if is_dir else stats.st_size,
X_MTIME: 0 if is_dir else normalize_timestamp(stats.st_mtime),
X_ETAG: md5().hexdigest() if is_dir else _get_etag(obj_path_or_fd)} X_ETAG: md5().hexdigest() if is_dir else _get_etag(obj_path_or_fd)}
return metadata return metadata

View File

@ -44,7 +44,7 @@ from swiftonfile.swift.common.utils import read_metadata, write_metadata, \
from swiftonfile.swift.common.utils import X_CONTENT_TYPE, \ from swiftonfile.swift.common.utils import X_CONTENT_TYPE, \
X_TIMESTAMP, X_TYPE, X_OBJECT_TYPE, FILE, OBJECT, DIR_TYPE, \ X_TIMESTAMP, X_TYPE, X_OBJECT_TYPE, FILE, OBJECT, DIR_TYPE, \
FILE_TYPE, DEFAULT_UID, DEFAULT_GID, DIR_NON_OBJECT, DIR_OBJECT, \ FILE_TYPE, DEFAULT_UID, DEFAULT_GID, DIR_NON_OBJECT, DIR_OBJECT, \
X_ETAG, X_CONTENT_LENGTH X_ETAG, X_CONTENT_LENGTH, X_MTIME
from swift.obj.diskfile import DiskFileManager as SwiftDiskFileManager from swift.obj.diskfile import DiskFileManager as SwiftDiskFileManager
from swift.obj.diskfile import get_async_dir from swift.obj.diskfile import get_async_dir
@ -169,7 +169,7 @@ def make_directory(full_path, uid, gid, metadata=None):
return True, metadata return True, metadata
def _adjust_metadata(metadata): def _adjust_metadata(fd, metadata):
# Fix up the metadata to ensure it has a proper value for the # Fix up the metadata to ensure it has a proper value for the
# Content-Type metadata, as well as an X_TYPE and X_OBJECT_TYPE # Content-Type metadata, as well as an X_TYPE and X_OBJECT_TYPE
# metadata values. # metadata values.
@ -189,6 +189,12 @@ def _adjust_metadata(metadata):
else: else:
metadata[X_OBJECT_TYPE] = FILE metadata[X_OBJECT_TYPE] = FILE
# stat.st_mtime does not change after last write(). We set this to later
# detect if the object was changed from filesystem interface (non Swift)
statinfo = do_fstat(fd)
if stat.S_ISREG(statinfo.st_mode):
metadata[X_MTIME] = normalize_timestamp(statinfo.st_mtime)
metadata[X_TYPE] = OBJECT metadata[X_TYPE] = OBJECT
return metadata return metadata
@ -385,7 +391,7 @@ class DiskFileWriter(object):
name name
""" """
assert self._tmppath is not None assert self._tmppath is not None
metadata = _adjust_metadata(metadata) metadata = _adjust_metadata(self._fd, metadata)
df = self._disk_file df = self._disk_file
if dir_is_object(metadata): if dir_is_object(metadata):

View File

@ -226,3 +226,17 @@ class TestSwiftOnFile(Base):
self.assert_status(200) self.assert_status(200)
self.assertEqual(new_data_hash, metadata['etag']) self.assertEqual(new_data_hash, metadata['etag'])
self.assertEqual(len(new_data), int(metadata['content_length'])) self.assertEqual(len(new_data), int(metadata['content_length']))
# Modify the file but let the length remain same
new_data = "I am Antman"
new_data_hash = hashlib.md5(new_data).hexdigest()
with open(file_path, 'w') as f:
f.write(new_data)
# Make sure GET works
self.assertEqual(new_data, object_item.read())
self.assert_status(200)
# Check Etag and content-length is right
metadata = object_item.info()
self.assert_status(200)
self.assertEqual(new_data_hash, metadata['etag'])
self.assertEqual(len(new_data), int(metadata['content_length']))

View File

@ -533,9 +533,9 @@ class TestUtils(unittest.TestCase):
utils.X_CONTENT_LENGTH: '12345', utils.X_CONTENT_LENGTH: '12345',
utils.X_TYPE: utils.OBJECT, utils.X_TYPE: utils.OBJECT,
utils.X_OBJECT_TYPE: 'na'} utils.X_OBJECT_TYPE: 'na'}
fake_stat = Mock(st_size=12346) fake_stat = Mock(st_size=12346, st_mode=33188)
self.assertFalse(utils.validate_object(md, fake_stat)) self.assertFalse(utils.validate_object(md, fake_stat))
fake_stat = Mock(st_size=12345) fake_stat = Mock(st_size=12345, st_mode=33188)
self.assertTrue(utils.validate_object(md, fake_stat)) self.assertTrue(utils.validate_object(md, fake_stat))
def test_write_pickle(self): def test_write_pickle(self):

View File

@ -188,6 +188,7 @@ class TestDiskFile(unittest.TestCase):
'Content-Length': 4, 'Content-Length': 4,
'ETag': etag, 'ETag': etag,
'X-Timestamp': ts, 'X-Timestamp': ts,
'X-Object-PUT-Mtime': normalize_timestamp(stats.st_mtime),
'Content-Type': 'application/octet-stream'} 'Content-Type': 'application/octet-stream'}
gdf = self._get_diskfile("vol0", "p57", "ufo47", "bar", "z") gdf = self._get_diskfile("vol0", "p57", "ufo47", "bar", "z")
assert gdf._obj == "z" assert gdf._obj == "z"