From 5f25e1cc776eaf90c36487016f3b2bec282c86e7 Mon Sep 17 00:00:00 2001 From: Tim Burke Date: Tue, 25 Jan 2022 11:35:42 -0800 Subject: [PATCH] s3api: Fix non-ascii MPUs Previous problems included: - returning wsgi strings quoted assuming UTF-8 on py3 when initiating or completing multipart uploads - trying to str() some unicode on py2 when listing parts, leading to UnicodeEncodeErrors Change-Id: Ibc1d42c8deffe41c557350a574ae80751e9bd565 --- .../s3api/controllers/multi_upload.py | 22 ++++++++++++------- swift/common/middleware/s3api/s3request.py | 1 + test/functional/s3api/s3_test_client.py | 7 ++++++ test/functional/s3api/test_multi_upload.py | 21 +++++++++++++----- 4 files changed, 38 insertions(+), 13 deletions(-) diff --git a/swift/common/middleware/s3api/controllers/multi_upload.py b/swift/common/middleware/s3api/controllers/multi_upload.py index 4561507dca..8ed6feaacf 100644 --- a/swift/common/middleware/s3api/controllers/multi_upload.py +++ b/swift/common/middleware/s3api/controllers/multi_upload.py @@ -144,7 +144,7 @@ def _make_complete_body(req, s3_etag, yielded_anything): SubElement(result_elem, 'Location').text = host_url + req.path SubElement(result_elem, 'Bucket').text = req.container_name - SubElement(result_elem, 'Key').text = req.object_name + SubElement(result_elem, 'Key').text = wsgi_to_str(req.object_name) SubElement(result_elem, 'ETag').text = '"%s"' % s3_etag body = tostring(result_elem, xml_declaration=not yielded_anything) if yielded_anything: @@ -452,7 +452,7 @@ class UploadsController(Controller): result_elem = Element('InitiateMultipartUploadResult') SubElement(result_elem, 'Bucket').text = req.container_name - SubElement(result_elem, 'Key').text = req.object_name + SubElement(result_elem, 'Key').text = wsgi_to_str(req.object_name) SubElement(result_elem, 'UploadId').text = upload_id body = tostring(result_elem) @@ -498,9 +498,10 @@ class UploadController(Controller): part_num_marker = req.get_validated_param( 'part-number-marker', 0) + object_name = wsgi_to_str(req.object_name) query = { 'format': 'json', - 'prefix': '%s/%s/' % (req.object_name, upload_id), + 'prefix': '%s/%s/' % (object_name, upload_id), 'delimiter': '/', 'marker': '', } @@ -516,7 +517,10 @@ class UploadController(Controller): if not new_objects: break objects.extend(new_objects) - query['marker'] = new_objects[-1]['name'] + if six.PY2: + query['marker'] = new_objects[-1]['name'].encode('utf-8') + else: + query['marker'] = new_objects[-1]['name'] last_part = 0 @@ -542,10 +546,9 @@ class UploadController(Controller): result_elem = Element('ListPartsResult') SubElement(result_elem, 'Bucket').text = req.container_name - name = req.object_name if encoding_type == 'url': - name = quote(name) - SubElement(result_elem, 'Key').text = name + object_name = quote(object_name) + SubElement(result_elem, 'Key').text = object_name SubElement(result_elem, 'UploadId').text = upload_id initiator_elem = SubElement(result_elem, 'Initiator') @@ -613,7 +616,10 @@ class UploadController(Controller): container = req.container_name + MULTIUPLOAD_SUFFIX obj = bytes_to_wsgi(o['name'].encode('utf-8')) req.get_response(self.app, container=container, obj=obj) - query['marker'] = objects[-1]['name'] + if six.PY2: + query['marker'] = objects[-1]['name'].encode('utf-8') + else: + query['marker'] = objects[-1]['name'] resp = req.get_response(self.app, 'GET', container, '', query=query) objects = json.loads(resp.body) diff --git a/swift/common/middleware/s3api/s3request.py b/swift/common/middleware/s3api/s3request.py index d4c33512de..d324cfc482 100644 --- a/swift/common/middleware/s3api/s3request.py +++ b/swift/common/middleware/s3api/s3request.py @@ -626,6 +626,7 @@ class S3Request(swob.Request): return None def _parse_uri(self): + # NB: returns WSGI strings if not check_utf8(swob.wsgi_to_str(self.environ['PATH_INFO'])): raise InvalidURI(self.path) diff --git a/test/functional/s3api/s3_test_client.py b/test/functional/s3api/s3_test_client.py index 70166b5313..a963317dbb 100644 --- a/test/functional/s3api/s3_test_client.py +++ b/test/functional/s3api/s3_test_client.py @@ -98,6 +98,13 @@ class Connection(object): upload.cancel_upload() for obj in bucket.list_versions(): + if six.PY2: + if not isinstance(obj.name, bytes): + obj.name = obj.name.encode('utf-8') + if obj.version_id is not None and \ + not isinstance(obj.version_id, bytes): + obj.version_id = \ + obj.version_id.encode('utf-8') bucket.delete_key( obj.name, version_id=obj.version_id) diff --git a/test/functional/s3api/test_multi_upload.py b/test/functional/s3api/test_multi_upload.py index cc948cdd4c..8038a92398 100644 --- a/test/functional/s3api/test_multi_upload.py +++ b/test/functional/s3api/test_multi_upload.py @@ -23,6 +23,7 @@ import boto # pylint: disable-msg=E0611,F0401 from distutils.version import StrictVersion +import six from six.moves import zip, zip_longest import test.functional as tf @@ -111,7 +112,7 @@ class TestS3ApiMultiUpload(S3ApiBase): def test_object_multi_upload(self): bucket = 'bucket' - keys = ['obj1', 'obj2', 'obj3'] + keys = [u'obj1\N{SNOWMAN}', 'obj2', 'obj3'] bad_content_md5 = base64.b64encode(b'a' * 16).strip().decode('ascii') headers = [{'Content-Type': 'foo/bar', 'x-amz-meta-baz': 'quux'}, {'Content-MD5': bad_content_md5}, @@ -133,6 +134,8 @@ class TestS3ApiMultiUpload(S3ApiBase): elem = fromstring(body, 'InitiateMultipartUploadResult') self.assertEqual(elem.find('Bucket').text, bucket) key = elem.find('Key').text + if six.PY2: + expected_key = expected_key.encode('utf-8') self.assertEqual(expected_key, key) upload_id = elem.find('UploadId').text self.assertIsNotNone(upload_id) @@ -200,7 +203,9 @@ class TestS3ApiMultiUpload(S3ApiBase): # prepare src obj self.conn.make_request('PUT', src_bucket) - self.conn.make_request('PUT', src_bucket, src_obj, body=src_content) + with self.quiet_boto_logging(): + self.conn.make_request('PUT', src_bucket, src_obj, + body=src_content) _, headers, _ = self.conn.make_request('HEAD', src_bucket, src_obj) self.assertCommonResponseHeaders(headers) @@ -305,7 +310,8 @@ class TestS3ApiMultiUpload(S3ApiBase): self.assertTrue(lines[0].endswith(b'?>'), body) elem = fromstring(body, 'CompleteMultipartUploadResult') self.assertEqual( - '%s/bucket/obj1' % tf.config['s3_storage_url'].rstrip('/'), + '%s/bucket/obj1%%E2%%98%%83' % + tf.config['s3_storage_url'].rstrip('/'), elem.find('Location').text) self.assertEqual(elem.find('Bucket').text, bucket) self.assertEqual(elem.find('Key').text, key) @@ -346,7 +352,8 @@ class TestS3ApiMultiUpload(S3ApiBase): self.assertTrue(lines[0].endswith(b'?>'), body) elem = fromstring(body, 'CompleteMultipartUploadResult') self.assertEqual( - '%s/bucket/obj1' % tf.config['s3_storage_url'].rstrip('/'), + '%s/bucket/obj1%%E2%%98%%83' % + tf.config['s3_storage_url'].rstrip('/'), elem.find('Location').text) self.assertEqual(elem.find('Bucket').text, bucket) self.assertEqual(elem.find('Key').text, key) @@ -448,7 +455,11 @@ class TestS3ApiMultiUpload(S3ApiBase): resp_objects = list(elem.findall('./Contents')) self.assertEqual(len(resp_objects), 1) o = resp_objects[0] - self.assertEqual(o.find('Key').text, keys[0]) + if six.PY2: + expected_key = keys[0].encode('utf-8') + else: + expected_key = keys[0] + self.assertEqual(o.find('Key').text, expected_key) self.assertIsNotNone(o.find('LastModified').text) self.assertRegexpMatches( o.find('LastModified').text,