Fix inconsistent data being returned on GET

This is a manual forward-port of the following change merged into
icehouse branch: https://review.openstack.org/215119

When content of an object is modified from filesystem interface, a GET
on the object will return inconsistent or incomplete content because the
content length originally stored as metadata no longer reflects the
actual length of the file after modification.

The complete fix will have two parts:
(1) Return the entire content of object as is to the client
(2) The Etag returned should reflect the actual md5sum of object content

This change only fixes (1) mentioned above. This means, the client will
always get the complete content of the file.

Fix (2) is not part of this change. This means, if content length of the
object remains same even after modification, the Etag returned would be
incorrect. Fixing (2) involves more invasive changes in the code. So
that is deferred for now and will be sent as a separate change later.

Reference:
https://bugs.launchpad.net/swiftonfile/+bug/1416720
https://review.openstack.org/151897

Change-Id: I28d0ec33c59eb520be7d15a60adb968692226e3e
Closes-Bug: #1416720
Signed-off-by: Prashanth Pai <ppai@redhat.com>
This commit is contained in:
Prashanth Pai 2015-09-29 15:41:02 +05:30
parent a55740493e
commit 3083d14aff
5 changed files with 74 additions and 9 deletions

View File

@ -164,7 +164,7 @@ def clean_metadata(path_or_fd):
key += 1
def validate_object(metadata):
def validate_object(metadata, stat=None):
if not metadata:
return False
@ -176,6 +176,12 @@ def validate_object(metadata):
X_OBJECT_TYPE not in metadata.keys():
return False
if stat and (int(metadata[X_CONTENT_LENGTH]) != stat.st_size):
# File length has changed.
# TODO: Handle case where file content has changed but the length
# remains the same.
return False
if metadata[X_TYPE] == OBJECT:
return True

View File

@ -572,6 +572,7 @@ class DiskFile(object):
self._is_dir = False
self._metadata = None
self._fd = None
self._stat = None
# Don't store a value for data_file until we know it exists.
self._data_file = None
@ -623,12 +624,12 @@ class DiskFile(object):
raise DiskFileNotExist
raise
try:
stats = do_fstat(self._fd)
self._is_dir = stat.S_ISDIR(stats.st_mode)
obj_size = stats.st_size
self._stat = do_fstat(self._fd)
self._is_dir = stat.S_ISDIR(self._stat.st_mode)
obj_size = self._stat.st_size
self._metadata = read_metadata(self._fd)
if not validate_object(self._metadata):
if not validate_object(self._metadata, self._stat):
create_object_metadata(self._fd)
self._metadata = read_metadata(self._fd)
assert self._metadata is not None

View File

@ -178,4 +178,51 @@ class TestSwiftOnFile(Base):
for object_name in invalid_object_names:
file_item = self.env.container.file(object_name)
self.assertRaises(ResponseError, file_item.write) # 503 or 400
self.assertRaises(ResponseError, file_item.write) # 503 or 400
def testObjectMetadataWhenFileModified(self):
data = "I'm whatever Gotham needs me to be "
data_hash = hashlib.md5(data).hexdigest()
# Create an object through object interface
object_name = Utils.create_name()
object_item = self.env.container.file(object_name)
object_item.write(data)
# Make sure GET works
self.assertEqual(data, object_item.read())
self.assert_status(200)
# Check Etag is right
self.assertEqual(data_hash, object_item.info()['etag'])
self.assert_status(200)
# Extend/append more data to file from filesystem interface
file_path = os.path.join(self.env.root_dir,
'AUTH_' + self.env.account.name,
self.env.container.name,
object_name)
more_data = "- Batman"
with open(file_path, 'a') as f:
f.write(more_data)
total_data = data + more_data
total_data_hash = hashlib.md5(total_data).hexdigest()
# Make sure GET works
self.assertEqual(total_data, object_item.read())
self.assert_status(200)
# Check Etag and content-length is right
metadata = object_item.info()
self.assert_status(200)
self.assertEqual(total_data_hash, metadata['etag'])
self.assertEqual(len(total_data), int(metadata['content_length']))
# Re-write the file to be shorter
new_data = "I am Batman"
new_data_hash = hashlib.md5(new_data).hexdigest()
with open(file_path, 'w') as f:
f.write(new_data)
# Make sure GET works
self.assertEqual(new_data, object_item.read())
self.assert_status(200)
# Check Etag and content-length is right
metadata = object_item.info()
self.assert_status(200)
self.assertEqual(new_data_hash, metadata['etag'])
self.assertEqual(len(new_data), int(metadata['content_length']))

View File

@ -25,7 +25,7 @@ import hashlib
import tarfile
import shutil
from collections import defaultdict
from mock import patch
from mock import patch, Mock
from swiftonfile.swift.common import utils
from swiftonfile.swift.common.exceptions import SwiftOnFileSystemOSError, \
SwiftOnFileSystemIOError
@ -462,6 +462,18 @@ class TestUtils(unittest.TestCase):
ret = utils.validate_object(md)
assert ret
def test_validate_object_with_stat(self):
md = {utils.X_TIMESTAMP: 'na',
utils.X_CONTENT_TYPE: 'na',
utils.X_ETAG: 'bad',
utils.X_CONTENT_LENGTH: '12345',
utils.X_TYPE: utils.OBJECT,
utils.X_OBJECT_TYPE: 'na'}
fake_stat = Mock(st_size=12346)
self.assertFalse(utils.validate_object(md, fake_stat))
fake_stat = Mock(st_size=12345)
self.assertTrue(utils.validate_object(md, fake_stat))
def test_write_pickle(self):
td = tempfile.mkdtemp()
try:

View File

@ -222,7 +222,7 @@ class TestDiskFile(unittest.TestCase):
ini_md = {
'X-Type': 'Object',
'X-Object-Type': 'file',
'Content-Length': 5,
'Content-Length': 4,
'ETag': 'etag',
'X-Timestamp': 'ts',
'Content-Type': 'application/loctet-stream'}
@ -282,7 +282,6 @@ class TestDiskFile(unittest.TestCase):
with gdf.open():
assert gdf._is_dir
assert gdf._data_file == the_dir
assert gdf._metadata == exp_md
def _create_and_get_diskfile(self, dev, par, acc, con, obj, fsize=256):
# FIXME: assumes account === volume