S3Api: handle non-ASCII markers in v1 listings.

Added a test for S3 v1 listings that use URL encoding and have non-ASCII characters. In the process discovered that the XML schema for ListBucketResult had a small problem: Delimiter and EncodingType needed to be reordered. Change-Id: Ib3124ea079a73a577b86de97657603a64b16f965
2019-05-07 16:15:59 -07:00 · 2019-05-07 16:15:59 -07:00 · dade632b0f
commit dade632b0f
parent e85dda286e
4 changed files with 47 additions and 8 deletions
--- a/doc/s3api/rnc/list_bucket_result.rnc
+++ b/doc/s3api/rnc/list_bucket_result.rnc
@ -16,8 +16,8 @@ start =
      )
    ),
    element MaxKeys { xsd:int },
    element EncodingType { xsd:string }?,
    element Delimiter { xsd:string }?,
    element EncodingType { xsd:string }?,
    element IsTruncated { xsd:boolean },
    element Contents {
      element Key { xsd:string },
--- a/swift/common/middleware/s3api/controllers/bucket.py
+++ b/swift/common/middleware/s3api/controllers/bucket.py
@ -179,16 +179,16 @@ class BucketController(Controller):
                    else:
                        name = objects[-1]['subdir']
                    if encoding_type == 'url':
-                        name = quote(name)
+                        name = quote(name.encode('utf-8'))
                    SubElement(elem, 'NextMarker').text = name
            elif listing_type == 'version-2':
                if is_truncated:
                    if 'name' in objects[-1]:
                        SubElement(elem, 'NextContinuationToken').text = \
-                            b64encode(objects[-1]['name'].encode('utf8'))
+                            b64encode(objects[-1]['name'].encode('utf-8'))
                    if 'subdir' in objects[-1]:
                        SubElement(elem, 'NextContinuationToken').text = \
-                            b64encode(objects[-1]['subdir'].encode('utf8'))
+                            b64encode(objects[-1]['subdir'].encode('utf-8'))
                if 'continuation-token' in req.params:
                    SubElement(elem, 'ContinuationToken').text = \
                        req.params['continuation-token']
--- a/swift/common/middleware/s3api/schema/list_bucket_result.rng
+++ b/swift/common/middleware/s3api/schema/list_bucket_result.rng
@ -45,12 +45,12 @@
        <data type="int"/>
      </element>
      <optional>
-        <element name="EncodingType">
+        <element name="Delimiter">
          <data type="string"/>
        </element>
      </optional>
      <optional>
-        <element name="Delimiter">
+        <element name="EncodingType">
          <data type="string"/>
        </element>
      </optional>
--- a/test/unit/common/middleware/s3api/test_bucket.py
+++ b/test/unit/common/middleware/s3api/test_bucket.py
@ -97,7 +97,7 @@ class TestS3ApiBucket(S3ApiTestCase):
            '/v1/AUTH_test/subdirs?delimiter=/&format=json&limit=3',
            swob.HTTPOk, {}, json.dumps([
                {'subdir': 'nothing/'},
-                {'subdir': 'but/'},
+                {'subdir': u'but-\u062a/'},
                {'subdir': 'subdirs/'},
            ]))
@ -245,7 +245,46 @@ class TestS3ApiBucket(S3ApiTestCase):
        status, headers, body = self.call_s3api(req)
        elem = fromstring(body, 'ListBucketResult')
        self.assertEqual(elem.find('./IsTruncated').text, 'true')
-        self.assertEqual(elem.find('./NextMarker').text, 'but/')
+        if six.PY2:
            self.assertEqual(elem.find('./NextMarker').text,
                             u'but-\u062a/'.encode('utf-8'))
        else:
            self.assertEqual(elem.find('./NextMarker').text,
                             u'but-\u062a/')
    def test_bucket_GET_is_truncated_url_encoded(self):
        bucket_name = 'junk'
        req = Request.blank(
            '/%s?encoding-type=url&max-keys=%d' % (
                bucket_name, len(self.objects)),
            environ={'REQUEST_METHOD': 'GET'},
            headers={'Authorization': 'AWS test:tester:hmac',
                     'Date': self.get_date_header()})
        status, headers, body = self.call_s3api(req)
        elem = fromstring(body, 'ListBucketResult')
        self.assertEqual(elem.find('./IsTruncated').text, 'false')
        req = Request.blank(
            '/%s?encoding-type=url&max-keys=%d' % (
                bucket_name, len(self.objects) - 1),
            environ={'REQUEST_METHOD': 'GET'},
            headers={'Authorization': 'AWS test:tester:hmac',
                     'Date': self.get_date_header()})
        status, headers, body = self.call_s3api(req)
        elem = fromstring(body, 'ListBucketResult')
        self.assertEqual(elem.find('./IsTruncated').text, 'true')
        req = Request.blank('/subdirs?encoding-type=url&delimiter=/&'
                            'max-keys=2',
                            environ={'REQUEST_METHOD': 'GET'},
                            headers={'Authorization': 'AWS test:tester:hmac',
                                     'Date': self.get_date_header()})
        status, headers, body = self.call_s3api(req)
        elem = fromstring(body, 'ListBucketResult')
        self.assertEqual(elem.find('./IsTruncated').text, 'true')
        self.assertEqual(elem.find('./NextMarker').text,
                         quote(u'but-\u062a/'.encode('utf-8')))
    def test_bucket_GET_v2_is_truncated(self):
        bucket_name = 'junk'