s3api: Fix prefix/delimiter/marker quoting

And stop sending WSGI strings on py3.

Change-Id: I9b769e496aa7c8ed5862c2d7310f643838328084
Closes-Bug: #1853654
This commit is contained in:
Tim Burke 2019-11-22 15:11:51 -08:00
parent f0b8790c12
commit 10c24e951c
4 changed files with 161 additions and 42 deletions

View File

@ -79,6 +79,7 @@ ceph_s3:
s3tests_boto3.functional.test_s3.test_bucket_header_acl_grants: {status: KNOWN} s3tests_boto3.functional.test_s3.test_bucket_header_acl_grants: {status: KNOWN}
s3tests_boto3.functional.test_s3.test_bucket_list_objects_anonymous: {status: KNOWN} s3tests_boto3.functional.test_s3.test_bucket_list_objects_anonymous: {status: KNOWN}
s3tests_boto3.functional.test_s3.test_bucket_list_objects_anonymous_fail: {status: KNOWN} s3tests_boto3.functional.test_s3.test_bucket_list_objects_anonymous_fail: {status: KNOWN}
s3tests_boto3.functional.test_s3.test_bucket_list_prefix_unreadable: {status: KNOWN}
s3tests_boto3.functional.test_s3.test_bucket_list_return_data: {status: KNOWN} s3tests_boto3.functional.test_s3.test_bucket_list_return_data: {status: KNOWN}
s3tests_boto3.functional.test_s3.test_bucket_list_return_data_versioning: {status: KNOWN} s3tests_boto3.functional.test_s3.test_bucket_list_return_data_versioning: {status: KNOWN}
s3tests_boto3.functional.test_s3.test_bucket_list_unordered: {status: KNOWN} s3tests_boto3.functional.test_s3.test_bucket_list_unordered: {status: KNOWN}

View File

@ -109,35 +109,35 @@ class BucketController(Controller):
'limit': max_keys + 1, 'limit': max_keys + 1,
} }
if 'prefix' in req.params: if 'prefix' in req.params:
query['prefix'] = req.params['prefix'] query['prefix'] = swob.wsgi_to_str(req.params['prefix'])
if 'delimiter' in req.params: if 'delimiter' in req.params:
query['delimiter'] = req.params['delimiter'] query['delimiter'] = swob.wsgi_to_str(req.params['delimiter'])
fetch_owner = False fetch_owner = False
if 'versions' in req.params: if 'versions' in req.params:
query['versions'] = req.params['versions'] query['versions'] = swob.wsgi_to_str(req.params['versions'])
listing_type = 'object-versions' listing_type = 'object-versions'
version_marker = swob.wsgi_to_str(req.params.get(
'version-id-marker'))
if 'key-marker' in req.params: if 'key-marker' in req.params:
query['marker'] = req.params['key-marker'] query['marker'] = swob.wsgi_to_str(req.params['key-marker'])
version_marker = req.params.get('version-id-marker')
if version_marker is not None: if version_marker is not None:
if version_marker != 'null': if version_marker != 'null':
try: try:
Timestamp(version_marker) Timestamp(version_marker)
except ValueError: except ValueError:
raise InvalidArgument( raise InvalidArgument(
'version-id-marker', 'version-id-marker', version_marker,
req.params['version-id-marker'],
'Invalid version id specified') 'Invalid version id specified')
query['version_marker'] = version_marker query['version_marker'] = version_marker
elif 'version-id-marker' in req.params: elif version_marker is not None:
err_msg = ('A version-id marker cannot be specified without ' err_msg = ('A version-id marker cannot be specified without '
'a key marker.') 'a key marker.')
raise InvalidArgument('version-id-marker', raise InvalidArgument('version-id-marker',
req.params['version-id-marker'], err_msg) version_marker, err_msg)
elif int(req.params.get('list-type', '1')) == 2: elif int(req.params.get('list-type', '1')) == 2:
listing_type = 'version-2' listing_type = 'version-2'
if 'start-after' in req.params: if 'start-after' in req.params:
query['marker'] = req.params['start-after'] query['marker'] = swob.wsgi_to_str(req.params['start-after'])
# continuation-token overrides start-after # continuation-token overrides start-after
if 'continuation-token' in req.params: if 'continuation-token' in req.params:
decoded = b64decode(req.params['continuation-token']) decoded = b64decode(req.params['continuation-token'])
@ -149,7 +149,7 @@ class BucketController(Controller):
else: else:
listing_type = 'version-1' listing_type = 'version-1'
if 'marker' in req.params: if 'marker' in req.params:
query['marker'] = req.params['marker'] query['marker'] = swob.wsgi_to_str(req.params['marker'])
return encoding_type, query, listing_type, fetch_owner return encoding_type, query, listing_type, fetch_owner
@ -157,10 +157,16 @@ class BucketController(Controller):
tag_max_keys, is_truncated): tag_max_keys, is_truncated):
elem = Element('ListVersionsResult') elem = Element('ListVersionsResult')
SubElement(elem, 'Name').text = req.container_name SubElement(elem, 'Name').text = req.container_name
SubElement(elem, 'Prefix').text = req.params.get('prefix') prefix = swob.wsgi_to_str(req.params.get('prefix'))
SubElement(elem, 'KeyMarker').text = req.params.get('key-marker') if prefix and encoding_type == 'url':
SubElement(elem, 'VersionIdMarker').text = req.params.get( prefix = quote(prefix)
'version-id-marker') SubElement(elem, 'Prefix').text = prefix
key_marker = swob.wsgi_to_str(req.params.get('key-marker'))
if key_marker and encoding_type == 'url':
key_marker = quote(key_marker)
SubElement(elem, 'KeyMarker').text = key_marker
SubElement(elem, 'VersionIdMarker').text = swob.wsgi_to_str(
req.params.get('version-id-marker'))
if is_truncated: if is_truncated:
if 'name' in objects[-1]: if 'name' in objects[-1]:
SubElement(elem, 'NextKeyMarker').text = \ SubElement(elem, 'NextKeyMarker').text = \
@ -172,24 +178,33 @@ class BucketController(Controller):
objects[-1]['subdir'] objects[-1]['subdir']
SubElement(elem, 'NextVersionIdMarker').text = 'null' SubElement(elem, 'NextVersionIdMarker').text = 'null'
SubElement(elem, 'MaxKeys').text = str(tag_max_keys) SubElement(elem, 'MaxKeys').text = str(tag_max_keys)
if 'delimiter' in req.params: delimiter = swob.wsgi_to_str(req.params.get('delimiter'))
SubElement(elem, 'Delimiter').text = req.params['delimiter'] if delimiter is not None:
if encoding_type == 'url':
delimiter = quote(delimiter)
SubElement(elem, 'Delimiter').text = delimiter
if encoding_type == 'url': if encoding_type == 'url':
SubElement(elem, 'EncodingType').text = encoding_type SubElement(elem, 'EncodingType').text = encoding_type
SubElement(elem, 'IsTruncated').text = \ SubElement(elem, 'IsTruncated').text = \
'true' if is_truncated else 'false' 'true' if is_truncated else 'false'
return elem return elem
def _build_base_listing_element(self, req): def _build_base_listing_element(self, req, encoding_type):
elem = Element('ListBucketResult') elem = Element('ListBucketResult')
SubElement(elem, 'Name').text = req.container_name SubElement(elem, 'Name').text = req.container_name
SubElement(elem, 'Prefix').text = req.params.get('prefix') prefix = swob.wsgi_to_str(req.params.get('prefix'))
if prefix and encoding_type == 'url':
prefix = quote(prefix)
SubElement(elem, 'Prefix').text = prefix
return elem return elem
def _build_list_bucket_result_type_one(self, req, objects, encoding_type, def _build_list_bucket_result_type_one(self, req, objects, encoding_type,
tag_max_keys, is_truncated): tag_max_keys, is_truncated):
elem = self._build_base_listing_element(req) elem = self._build_base_listing_element(req, encoding_type)
SubElement(elem, 'Marker').text = req.params.get('marker') marker = swob.wsgi_to_str(req.params.get('marker'))
if marker and encoding_type == 'url':
marker = quote(marker)
SubElement(elem, 'Marker').text = marker
if is_truncated and 'delimiter' in req.params: if is_truncated and 'delimiter' in req.params:
if 'name' in objects[-1]: if 'name' in objects[-1]:
name = objects[-1]['name'] name = objects[-1]['name']
@ -200,8 +215,10 @@ class BucketController(Controller):
SubElement(elem, 'NextMarker').text = name SubElement(elem, 'NextMarker').text = name
# XXX: really? no NextMarker when no delimiter?? # XXX: really? no NextMarker when no delimiter??
SubElement(elem, 'MaxKeys').text = str(tag_max_keys) SubElement(elem, 'MaxKeys').text = str(tag_max_keys)
delimiter = req.params.get('delimiter') delimiter = swob.wsgi_to_str(req.params.get('delimiter'))
if delimiter: if delimiter:
if encoding_type == 'url':
delimiter = quote(delimiter)
SubElement(elem, 'Delimiter').text = delimiter SubElement(elem, 'Delimiter').text = delimiter
if encoding_type == 'url': if encoding_type == 'url':
SubElement(elem, 'EncodingType').text = encoding_type SubElement(elem, 'EncodingType').text = encoding_type
@ -211,7 +228,7 @@ class BucketController(Controller):
def _build_list_bucket_result_type_two(self, req, objects, encoding_type, def _build_list_bucket_result_type_two(self, req, objects, encoding_type,
tag_max_keys, is_truncated): tag_max_keys, is_truncated):
elem = self._build_base_listing_element(req) elem = self._build_base_listing_element(req, encoding_type)
if is_truncated: if is_truncated:
if 'name' in objects[-1]: if 'name' in objects[-1]:
SubElement(elem, 'NextContinuationToken').text = \ SubElement(elem, 'NextContinuationToken').text = \
@ -221,14 +238,18 @@ class BucketController(Controller):
b64encode(objects[-1]['subdir'].encode('utf8')) b64encode(objects[-1]['subdir'].encode('utf8'))
if 'continuation-token' in req.params: if 'continuation-token' in req.params:
SubElement(elem, 'ContinuationToken').text = \ SubElement(elem, 'ContinuationToken').text = \
req.params['continuation-token'] swob.wsgi_to_str(req.params['continuation-token'])
if 'start-after' in req.params: start_after = swob.wsgi_to_str(req.params.get('start-after'))
SubElement(elem, 'StartAfter').text = \ if start_after is not None:
req.params['start-after'] if encoding_type == 'url':
start_after = quote(start_after)
SubElement(elem, 'StartAfter').text = start_after
SubElement(elem, 'KeyCount').text = str(len(objects)) SubElement(elem, 'KeyCount').text = str(len(objects))
SubElement(elem, 'MaxKeys').text = str(tag_max_keys) SubElement(elem, 'MaxKeys').text = str(tag_max_keys)
delimiter = req.params.get('delimiter') delimiter = swob.wsgi_to_str(req.params.get('delimiter'))
if delimiter: if delimiter:
if encoding_type == 'url':
delimiter = quote(delimiter)
SubElement(elem, 'Delimiter').text = delimiter SubElement(elem, 'Delimiter').text = delimiter
if encoding_type == 'url': if encoding_type == 'url':
SubElement(elem, 'EncodingType').text = encoding_type SubElement(elem, 'EncodingType').text = encoding_type

View File

@ -285,6 +285,49 @@ class TestS3ApiBucket(S3ApiBaseBoto3):
resp_prefixes, resp_prefixes,
[{'Prefix': p} for p in expect_prefixes]) [{'Prefix': p} for p in expect_prefixes])
def test_get_bucket_with_non_ascii_delimiter(self):
bucket = 'bucket'
put_objects = (
'bar',
'foo',
u'foobar\N{SNOWMAN}baz',
u'foo\N{SNOWMAN}bar',
u'foo\N{SNOWMAN}bar\N{SNOWMAN}baz',
)
self._prepare_test_get_bucket(bucket, put_objects)
# boto3 doesn't always unquote everything it should; see
# https://github.com/boto/botocore/pull/1901
# Fortunately, we can just drop the encoding-type=url param
self.conn.meta.events.unregister(
'before-parameter-build.s3.ListObjects',
botocore.handlers.set_list_objects_encoding_type_url)
delimiter = u'\N{SNOWMAN}'
expect_objects = ('bar', 'foo')
expect_prefixes = (u'foobar\N{SNOWMAN}', u'foo\N{SNOWMAN}')
resp = self.conn.list_objects(Bucket=bucket, Delimiter=delimiter)
self.assertEqual(200, resp['ResponseMetadata']['HTTPStatusCode'])
self.assertEqual(resp['Delimiter'], delimiter)
self._validate_object_listing(resp['Contents'], expect_objects)
resp_prefixes = resp['CommonPrefixes']
self.assertEqual(
resp_prefixes,
[{'Prefix': p} for p in expect_prefixes])
prefix = u'foo\N{SNOWMAN}'
expect_objects = (u'foo\N{SNOWMAN}bar',)
expect_prefixes = (u'foo\N{SNOWMAN}bar\N{SNOWMAN}',)
resp = self.conn.list_objects(
Bucket=bucket, Delimiter=delimiter, Prefix=prefix)
self.assertEqual(200, resp['ResponseMetadata']['HTTPStatusCode'])
self.assertEqual(resp['Delimiter'], delimiter)
self.assertEqual(resp['Prefix'], prefix)
self._validate_object_listing(resp['Contents'], expect_objects)
resp_prefixes = resp['CommonPrefixes']
self.assertEqual(
resp_prefixes,
[{'Prefix': p} for p in expect_prefixes])
def test_get_bucket_with_encoding_type(self): def test_get_bucket_with_encoding_type(self):
bucket = 'bucket' bucket = 'bucket'
put_objects = ('object', 'object2') put_objects = ('object', 'object2')

View File

@ -467,15 +467,42 @@ class TestS3ApiBucket(S3ApiTestCase):
'Date': self.get_date_header()}) 'Date': self.get_date_header()})
status, headers, body = self.call_s3api(req) status, headers, body = self.call_s3api(req)
elem = fromstring(body, 'ListBucketResult') elem = fromstring(body, 'ListBucketResult')
self.assertEqual(elem.find('./Prefix').text, '\xef\xbc\xa3') self.assertEqual(elem.find('./Prefix').text,
self.assertEqual(elem.find('./Marker').text, '\xef\xbc\xa2') swob.wsgi_to_str('\xef\xbc\xa3'))
self.assertEqual(elem.find('./Delimiter').text, '\xef\xbc\xa1') self.assertEqual(elem.find('./Marker').text,
swob.wsgi_to_str('\xef\xbc\xa2'))
self.assertEqual(elem.find('./Delimiter').text,
swob.wsgi_to_str('\xef\xbc\xa1'))
_, path = self.swift.calls[-1] _, path = self.swift.calls[-1]
_, query_string = path.split('?') _, query_string = path.split('?')
args = dict(parse_qsl(query_string)) args = [part.partition('=')[::2] for part in query_string.split('&')]
self.assertEqual(args['delimiter'], '\xef\xbc\xa1') self.assertEqual(sorted(args), [
self.assertEqual(args['marker'], '\xef\xbc\xa2') ('delimiter', '%EF%BC%A1'),
self.assertEqual(args['prefix'], '\xef\xbc\xa3') ('limit', '1001'),
('marker', '%EF%BC%A2'),
('prefix', '%EF%BC%A3'),
])
req = Request.blank(
'/%s?delimiter=\xef\xbc\xa1&marker=\xef\xbc\xa2&'
'prefix=\xef\xbc\xa3&encoding-type=url' % bucket_name,
environ={'REQUEST_METHOD': 'GET'},
headers={'Authorization': 'AWS test:tester:hmac',
'Date': self.get_date_header()})
status, headers, body = self.call_s3api(req)
elem = fromstring(body, 'ListBucketResult')
self.assertEqual(elem.find('./Prefix').text, '%EF%BC%A3')
self.assertEqual(elem.find('./Marker').text, '%EF%BC%A2')
self.assertEqual(elem.find('./Delimiter').text, '%EF%BC%A1')
_, path = self.swift.calls[-1]
_, query_string = path.split('?')
args = [part.partition('=')[::2] for part in query_string.split('&')]
self.assertEqual(sorted(args), [
('delimiter', '%EF%BC%A1'),
('limit', '1001'),
('marker', '%EF%BC%A2'),
('prefix', '%EF%BC%A3'),
])
def test_bucket_GET_v2_with_nonascii_queries(self): def test_bucket_GET_v2_with_nonascii_queries(self):
bucket_name = 'junk' bucket_name = 'junk'
@ -487,15 +514,42 @@ class TestS3ApiBucket(S3ApiTestCase):
'Date': self.get_date_header()}) 'Date': self.get_date_header()})
status, headers, body = self.call_s3api(req) status, headers, body = self.call_s3api(req)
elem = fromstring(body, 'ListBucketResult') elem = fromstring(body, 'ListBucketResult')
self.assertEqual(elem.find('./Prefix').text, '\xef\xbc\xa3') self.assertEqual(elem.find('./Prefix').text,
self.assertEqual(elem.find('./StartAfter').text, '\xef\xbc\xa2') swob.wsgi_to_str('\xef\xbc\xa3'))
self.assertEqual(elem.find('./Delimiter').text, '\xef\xbc\xa1') self.assertEqual(elem.find('./StartAfter').text,
swob.wsgi_to_str('\xef\xbc\xa2'))
self.assertEqual(elem.find('./Delimiter').text,
swob.wsgi_to_str('\xef\xbc\xa1'))
_, path = self.swift.calls[-1] _, path = self.swift.calls[-1]
_, query_string = path.split('?') _, query_string = path.split('?')
args = dict(parse_qsl(query_string)) args = [part.partition('=')[::2] for part in query_string.split('&')]
self.assertEqual(args['delimiter'], '\xef\xbc\xa1') self.assertEqual(sorted(args), [
self.assertEqual(args['marker'], '\xef\xbc\xa2') ('delimiter', '%EF%BC%A1'),
self.assertEqual(args['prefix'], '\xef\xbc\xa3') ('limit', '1001'),
('marker', '%EF%BC%A2'),
('prefix', '%EF%BC%A3'),
])
req = Request.blank(
'/%s?list-type=2&delimiter=\xef\xbc\xa1&start-after=\xef\xbc\xa2&'
'prefix=\xef\xbc\xa3&encoding-type=url' % bucket_name,
environ={'REQUEST_METHOD': 'GET'},
headers={'Authorization': 'AWS test:tester:hmac',
'Date': self.get_date_header()})
status, headers, body = self.call_s3api(req)
elem = fromstring(body, 'ListBucketResult')
self.assertEqual(elem.find('./Prefix').text, '%EF%BC%A3')
self.assertEqual(elem.find('./StartAfter').text, '%EF%BC%A2')
self.assertEqual(elem.find('./Delimiter').text, '%EF%BC%A1')
_, path = self.swift.calls[-1]
_, query_string = path.split('?')
args = [part.partition('=')[::2] for part in query_string.split('&')]
self.assertEqual(sorted(args), [
('delimiter', '%EF%BC%A1'),
('limit', '1001'),
('marker', '%EF%BC%A2'),
('prefix', '%EF%BC%A3'),
])
def test_bucket_GET_with_delimiter_max_keys(self): def test_bucket_GET_with_delimiter_max_keys(self):
bucket_name = 'junk' bucket_name = 'junk'