Fix encoding issue in ssync_sender.send_put()
EC object metadata can currently have a mixture of bytestrings and unicode. The ssync_sender.send_put() method raises an UnicodeDecodeError when it attempts to concatenate the metadata values, if any bytestring has non-ascii characters. The root cause of this issue is that the object server uses unicode for the keys of some object metadata items that are received in the footer of an EC PUT request, whereas all other object metadata keys and values are persisted as bytestrings. This patch fixes the bug by changing diskfile write_metadata() function to encode all unicode metadata keys and values as utf8 encoded bytes before writing to disk. To cope with existing objects that have a mixture of unicode and bytestring metadata, the diskfile read_metadata() function is also changed so that all returned unicode metadata keys and values are utf8 encoded. This ensures that ssync_sender.send_put() (and any other caller of diskfile read_metadata) only reads bytestrings from object metadata. Closes-Bug: #1678018 Co-Authored-By: Alistair Coles <alistairncoles@gmail.com> Change-Id: Ic23c55754ee142f6f5388dcda592a3afc9845c39
This commit is contained in:
parent
83750cf79c
commit
091157fc7f
@ -114,6 +114,20 @@ def _get_filename(fd):
|
||||
return fd
|
||||
|
||||
|
||||
def _encode_metadata(metadata):
|
||||
"""
|
||||
UTF8 encode any unicode keys or values in given metadata dict.
|
||||
|
||||
:param metadata: a dict
|
||||
"""
|
||||
def encode_str(item):
|
||||
if isinstance(item, six.text_type):
|
||||
return item.encode('utf8')
|
||||
return item
|
||||
|
||||
return dict(((encode_str(k), encode_str(v)) for k, v in metadata.items()))
|
||||
|
||||
|
||||
def read_metadata(fd):
|
||||
"""
|
||||
Helper function to read the pickled metadata from an object file.
|
||||
@ -140,7 +154,10 @@ def read_metadata(fd):
|
||||
raise DiskFileNotExist()
|
||||
# TODO: we might want to re-raise errors that don't denote a missing
|
||||
# xattr here. Seems to be ENODATA on linux and ENOATTR on BSD/OSX.
|
||||
return pickle.loads(metadata)
|
||||
# strings are utf-8 encoded when written, but have not always been
|
||||
# (see https://bugs.launchpad.net/swift/+bug/1678018) so encode them again
|
||||
# when read
|
||||
return _encode_metadata(pickle.loads(metadata))
|
||||
|
||||
|
||||
def write_metadata(fd, metadata, xattr_size=65536):
|
||||
@ -150,7 +167,7 @@ def write_metadata(fd, metadata, xattr_size=65536):
|
||||
:param fd: file descriptor or filename to write the metadata
|
||||
:param metadata: metadata to write
|
||||
"""
|
||||
metastr = pickle.dumps(metadata, PICKLE_PROTOCOL)
|
||||
metastr = pickle.dumps(_encode_metadata(metadata), PICKLE_PROTOCOL)
|
||||
key = 0
|
||||
while metastr:
|
||||
try:
|
||||
|
@ -81,8 +81,13 @@ class TestReconstructorRebuild(ECProbeTest):
|
||||
|
||||
# PUT object and POST some metadata
|
||||
contents = Body()
|
||||
headers = {'x-object-meta-foo': 'meta-foo'}
|
||||
self.headers_post = {'x-object-meta-bar': 'meta-bar'}
|
||||
headers = {
|
||||
self._make_name('x-object-meta-').decode('utf8'):
|
||||
self._make_name('meta-foo-').decode('utf8'),
|
||||
}
|
||||
self.headers_post = {
|
||||
self._make_name('x-object-meta-').decode('utf8'):
|
||||
self._make_name('meta-bar-').decode('utf8')}
|
||||
|
||||
self.etag = client.put_object(self.url, self.token,
|
||||
self.container_name,
|
||||
|
@ -286,6 +286,58 @@ class TestDiskFileModuleMethods(unittest.TestCase):
|
||||
exp_dir = '/srv/node/sdb5/objects-1/123'
|
||||
self.assertEqual(part_dir, exp_dir)
|
||||
|
||||
def test_write_read_metadata(self):
|
||||
path = os.path.join(self.testdir, str(uuid.uuid4()))
|
||||
metadata = {'name': '/a/c/o',
|
||||
'Content-Length': 99,
|
||||
u'X-Object-Sysmeta-Ec-Frag-Index': 4,
|
||||
u'X-Object-Meta-Strange': u'should be bytes',
|
||||
b'X-Object-Meta-x\xff': b'not utf8 \xff',
|
||||
u'X-Object-Meta-y\xe8': u'not ascii \xe8'}
|
||||
expected = {b'name': b'/a/c/o',
|
||||
b'Content-Length': 99,
|
||||
b'X-Object-Sysmeta-Ec-Frag-Index': 4,
|
||||
b'X-Object-Meta-Strange': b'should be bytes',
|
||||
b'X-Object-Meta-x\xff': b'not utf8 \xff',
|
||||
b'X-Object-Meta-y\xc3\xa8': b'not ascii \xc3\xa8'}
|
||||
|
||||
def check_metadata():
|
||||
with open(path, 'rb') as fd:
|
||||
actual = diskfile.read_metadata(fd)
|
||||
self.assertEqual(expected, actual)
|
||||
for k in actual.keys():
|
||||
self.assertIsInstance(k, six.binary_type)
|
||||
for k in (b'name',
|
||||
b'X-Object-Meta-Strange',
|
||||
b'X-Object-Meta-x\xff',
|
||||
b'X-Object-Meta-y\xc3\xa8'):
|
||||
self.assertIsInstance(actual[k], six.binary_type)
|
||||
|
||||
with open(path, 'wb') as fd:
|
||||
diskfile.write_metadata(fd, metadata)
|
||||
check_metadata()
|
||||
|
||||
# mock the read path to check the write path encoded persisted metadata
|
||||
with mock.patch.object(diskfile, '_encode_metadata', lambda x: x):
|
||||
check_metadata()
|
||||
|
||||
# simulate a legacy diskfile that might have persisted unicode metadata
|
||||
with mock.patch.object(diskfile, '_encode_metadata', lambda x: x):
|
||||
with open(path, 'wb') as fd:
|
||||
diskfile.write_metadata(fd, metadata)
|
||||
# sanity check, while still mocked, that we did persist unicode
|
||||
with open(path, 'rb') as fd:
|
||||
actual = diskfile.read_metadata(fd)
|
||||
for k, v in actual.items():
|
||||
if k == u'X-Object-Meta-Strange':
|
||||
self.assertIsInstance(k, six.text_type)
|
||||
self.assertIsInstance(v, six.text_type)
|
||||
break
|
||||
else:
|
||||
self.fail('Did not find X-Object-Meta-Strange')
|
||||
# check that read_metadata converts binary_type
|
||||
check_metadata()
|
||||
|
||||
|
||||
@patch_policies
|
||||
class TestObjectAuditLocationGenerator(unittest.TestCase):
|
||||
|
@ -7255,6 +7255,19 @@ class TestObjectServer(unittest.TestCase):
|
||||
obj_datafile = found_files['.data'][0]
|
||||
self.assertEqual("%s#2#d.data" % put_timestamp.internal,
|
||||
os.path.basename(obj_datafile))
|
||||
|
||||
with open(obj_datafile) as fd:
|
||||
actual_meta = diskfile.read_metadata(fd)
|
||||
expected_meta = {'Content-Length': '82',
|
||||
'name': '/a/c/o',
|
||||
'X-Object-Sysmeta-Ec-Frag-Index': '2',
|
||||
'X-Timestamp': put_timestamp.normal,
|
||||
'Content-Type': 'text/plain'}
|
||||
for k, v in actual_meta.items():
|
||||
self.assertIsInstance(k, six.binary_type)
|
||||
self.assertIsInstance(v, six.binary_type)
|
||||
self.assertIsNotNone(actual_meta.pop('ETag', None))
|
||||
self.assertEqual(expected_meta, actual_meta)
|
||||
# but no other files
|
||||
self.assertFalse(found_files['.data'][1:])
|
||||
found_files.pop('.data')
|
||||
|
@ -1452,62 +1452,86 @@ class TestSender(BaseTest):
|
||||
exc = err
|
||||
self.assertEqual(str(exc), '0.01 seconds: send_put chunk')
|
||||
|
||||
def test_send_put(self):
|
||||
def _check_send_put(self, obj_name, meta_value):
|
||||
ts_iter = make_timestamp_iter()
|
||||
t1 = next(ts_iter)
|
||||
body = 'test'
|
||||
extra_metadata = {'Some-Other-Header': 'value'}
|
||||
df = self._make_open_diskfile(body=body, timestamp=t1,
|
||||
extra_metadata = {'Some-Other-Header': 'value',
|
||||
u'Unicode-Meta-Name': meta_value}
|
||||
df = self._make_open_diskfile(obj=obj_name, body=body,
|
||||
timestamp=t1,
|
||||
extra_metadata=extra_metadata)
|
||||
expected = dict(df.get_metadata())
|
||||
expected['body'] = body
|
||||
expected['chunk_size'] = len(body)
|
||||
expected['meta'] = meta_value
|
||||
path = six.moves.urllib.parse.quote(expected['name'])
|
||||
expected['path'] = path
|
||||
expected['length'] = format(145 + len(path) + len(meta_value), 'x')
|
||||
# .meta file metadata is not included in expected for data only PUT
|
||||
t2 = next(ts_iter)
|
||||
metadata = {'X-Timestamp': t2.internal, 'X-Object-Meta-Fruit': 'kiwi'}
|
||||
df.write_metadata(metadata)
|
||||
df.open()
|
||||
self.sender.connection = FakeConnection()
|
||||
self.sender.send_put('/a/c/o', df)
|
||||
self.sender.send_put(path, df)
|
||||
self.assertEqual(
|
||||
''.join(self.sender.connection.sent),
|
||||
'82\r\n'
|
||||
'PUT /a/c/o\r\n'
|
||||
'%(length)s\r\n'
|
||||
'PUT %(path)s\r\n'
|
||||
'Content-Length: %(Content-Length)s\r\n'
|
||||
'ETag: %(ETag)s\r\n'
|
||||
'Some-Other-Header: value\r\n'
|
||||
'Unicode-Meta-Name: %(meta)s\r\n'
|
||||
'X-Timestamp: %(X-Timestamp)s\r\n'
|
||||
'\r\n'
|
||||
'\r\n'
|
||||
'%(chunk_size)s\r\n'
|
||||
'%(body)s\r\n' % expected)
|
||||
|
||||
def test_send_post(self):
|
||||
def test_send_put(self):
|
||||
self._check_send_put('o', 'meta')
|
||||
|
||||
def test_send_put_unicode(self):
|
||||
self._check_send_put(
|
||||
'o_with_caract\xc3\xa8res_like_in_french', 'm\xc3\xa8ta')
|
||||
|
||||
def _check_send_post(self, obj_name, meta_value):
|
||||
ts_iter = make_timestamp_iter()
|
||||
# create .data file
|
||||
extra_metadata = {'X-Object-Meta-Foo': 'old_value',
|
||||
'X-Object-Sysmeta-Test': 'test_sysmeta',
|
||||
'Content-Type': 'test_content_type'}
|
||||
ts_0 = next(ts_iter)
|
||||
df = self._make_open_diskfile(extra_metadata=extra_metadata,
|
||||
df = self._make_open_diskfile(obj=obj_name,
|
||||
extra_metadata=extra_metadata,
|
||||
timestamp=ts_0)
|
||||
# create .meta file
|
||||
ts_1 = next(ts_iter)
|
||||
newer_metadata = {'X-Object-Meta-Foo': 'new_value',
|
||||
newer_metadata = {u'X-Object-Meta-Foo': meta_value,
|
||||
'X-Timestamp': ts_1.internal}
|
||||
df.write_metadata(newer_metadata)
|
||||
path = six.moves.urllib.parse.quote(df.read_metadata()['name'])
|
||||
length = format(61 + len(path) + len(meta_value), 'x')
|
||||
|
||||
self.sender.connection = FakeConnection()
|
||||
with df.open():
|
||||
self.sender.send_post('/a/c/o', df)
|
||||
self.sender.send_post(path, df)
|
||||
self.assertEqual(
|
||||
''.join(self.sender.connection.sent),
|
||||
'4c\r\n'
|
||||
'POST /a/c/o\r\n'
|
||||
'X-Object-Meta-Foo: new_value\r\n'
|
||||
'%s\r\n'
|
||||
'POST %s\r\n'
|
||||
'X-Object-Meta-Foo: %s\r\n'
|
||||
'X-Timestamp: %s\r\n'
|
||||
'\r\n'
|
||||
'\r\n' % ts_1.internal)
|
||||
'\r\n' % (length, path, meta_value, ts_1.internal))
|
||||
|
||||
def test_send_post(self):
|
||||
self._check_send_post('o', 'meta')
|
||||
|
||||
def test_send_post_unicode(self):
|
||||
self._check_send_post(
|
||||
'o_with_caract\xc3\xa8res_like_in_french', 'm\xc3\xa8ta')
|
||||
|
||||
def test_disconnect_timeout(self):
|
||||
self.sender.connection = FakeConnection()
|
||||
|
Loading…
Reference in New Issue
Block a user