Add support for more characters in header keys

Some S3 clients rely on AWS's ability to preserve underscores in
header names. This doesn't mesh well with WSGI, which treats dashes
and underscores equivalently. Starting in eventlet 0.19.0, however,
the raw headers from the client are available in a `headers_raw`
environment key. If available, use it.

Change-Id: If96fc2f0713e3ec18764766225446a16a7c07f94
This commit is contained in:
Tim Burke 2016-07-21 23:12:36 +00:00
parent 5f9516b58c
commit f3a933aad1
4 changed files with 108 additions and 37 deletions

View File

@ -14,6 +14,7 @@
# limitations under the License.
import base64
from collections import defaultdict
from email.header import Header
from hashlib import sha1, sha256, md5
import hmac
@ -265,9 +266,18 @@ class SigV4Mixin(object):
:return : dict of headers to sign, the keys are all lower case
"""
headers_lower_dict = dict(
(k.lower().strip(), ' '.join(_header_strip(v or '').split()))
for (k, v) in six.iteritems(self.headers))
if 'headers_raw' in self.environ: # eventlet >= 0.19.0
# See https://github.com/eventlet/eventlet/commit/67ec999
headers_lower_dict = defaultdict(list)
for key, value in self.environ['headers_raw']:
headers_lower_dict[key.lower().strip()].append(
' '.join(_header_strip(value or '').split()))
headers_lower_dict = {k: ','.join(v)
for k, v in headers_lower_dict.items()}
else: # mostly-functional fallback
headers_lower_dict = dict(
(k.lower().strip(), ' '.join(_header_strip(v or '').split()))
for (k, v) in six.iteritems(self.headers))
if 'host' in headers_lower_dict and re.match(
'Boto/2.[0-9].[0-2]',
@ -279,7 +289,7 @@ class SigV4Mixin(object):
headers_lower_dict['host'].split(':')[0]
headers_to_sign = [
(key, value) for key, value in headers_lower_dict.items()
(key, value) for key, value in sorted(headers_lower_dict.items())
if key in self._signed_headers]
if len(headers_to_sign) != len(self._signed_headers):
@ -290,7 +300,7 @@ class SigV4Mixin(object):
# process.
raise SignatureDoesNotMatch()
return dict(headers_to_sign)
return headers_to_sign
def _canonical_uri(self):
"""
@ -328,13 +338,12 @@ class SigV4Mixin(object):
# host:iam.amazonaws.com
# x-amz-date:20150830T123600Z
headers_to_sign = self._headers_to_sign()
cr.append('\n'.join(
['%s:%s' % (key, value) for key, value in
sorted(headers_to_sign.items())]) + '\n')
cr.append(''.join('%s:%s\n' % (key, value)
for key, value in headers_to_sign))
# 5. Add signed headers into canonical request like
# content-type;host;x-amz-date
cr.append(';'.join(sorted(headers_to_sign)))
cr.append(';'.join(k for k, v in headers_to_sign))
# 6. Add payload string at the tail
if 'X-Amz-Credential' in self.params:
@ -780,9 +789,20 @@ class Request(swob.Request):
_header_strip(self.headers.get('Content-MD5')) or '',
_header_strip(self.headers.get('Content-Type')) or '']
for amz_header in sorted((key.lower() for key in self.headers
if key.lower().startswith('x-amz-'))):
amz_headers[amz_header] = self.headers[amz_header]
if 'headers_raw' in self.environ: # eventlet >= 0.19.0
# See https://github.com/eventlet/eventlet/commit/67ec999
amz_headers = defaultdict(list)
for key, value in self.environ['headers_raw']:
key = key.lower()
if not key.startswith('x-amz-'):
continue
amz_headers[key.strip()].append(value.strip())
amz_headers = dict((key, ','.join(value))
for key, value in amz_headers.items())
else: # mostly-functional fallback
amz_headers = dict((key.lower(), value)
for key, value in self.headers.items()
if key.lower().startswith('x-amz-'))
if self._is_header_auth:
if 'x-amz-date' in amz_headers:
@ -796,8 +816,8 @@ class Request(swob.Request):
# but as a sanity check...
raise AccessDenied()
for k in sorted(key.lower() for key in amz_headers):
buf.append("%s:%s" % (k, amz_headers[k]))
for key, value in sorted(amz_headers.items()):
buf.append("%s:%s" % (key, value))
path = self._canonical_uri()
if self.query_string:
@ -883,15 +903,48 @@ class Request(swob.Request):
env = self.environ.copy()
for key in self.environ:
if key.startswith('HTTP_X_AMZ_META_'):
if not(set(env[key]).issubset(string.printable)):
env[key] = Header(env[key], 'UTF-8').encode()
if env[key].startswith('=?utf-8?q?'):
env[key] = '=?UTF-8?Q?' + env[key][10:]
elif env[key].startswith('=?utf-8?b?'):
env[key] = '=?UTF-8?B?' + env[key][10:]
env['HTTP_X_OBJECT_META_' + key[16:]] = env[key]
def sanitize(value):
if set(value).issubset(string.printable):
return value
value = Header(value, 'UTF-8').encode()
if value.startswith('=?utf-8?q?'):
return '=?UTF-8?Q?' + value[10:]
elif value.startswith('=?utf-8?b?'):
return '=?UTF-8?B?' + value[10:]
else:
return value
if 'headers_raw' in env: # eventlet >= 0.19.0
# See https://github.com/eventlet/eventlet/commit/67ec999
for key, value in env['headers_raw']:
if not key.lower().startswith('x-amz-meta-'):
continue
# AWS ignores user-defined headers with these characters
if any(c in key for c in ' "),/;<=>?@[\\]{}'):
# NB: apparently, '(' *is* allowed
continue
# Note that this may have already been deleted, e.g. if the
# client sent multiple headers with the same name, or both
# x-amz-meta-foo-bar and x-amz-meta-foo_bar
env.pop('HTTP_' + key.replace('-', '_').upper(), None)
# Need to preserve underscores. Since we know '=' can't be
# present, quoted-printable seems appropriate.
key = key.replace('_', '=5F').replace('-', '_').upper()
key = 'HTTP_X_OBJECT_META_' + key[11:]
if key in env:
env[key] += ',' + sanitize(value)
else:
env[key] = sanitize(value)
else: # mostly-functional fallback
for key in self.environ:
if not key.startswith('HTTP_X_AMZ_META_'):
continue
# AWS ignores user-defined headers with these characters
if any(c in key for c in ' "),/;<=>?@[\\]{}'):
# NB: apparently, '(' *is* allowed
continue
env['HTTP_X_OBJECT_META_' + key[16:]] = sanitize(env[key])
del env[key]
if 'HTTP_X_AMZ_COPY_SOURCE' in env:

View File

@ -100,7 +100,10 @@ class Response(ResponseBase, swob.Response):
_key = key.lower()
if _key.startswith('x-object-meta-'):
headers['x-amz-meta-' + _key[14:]] = val
# Note that AWS ignores user-defined headers with '=' in the
# header name. We translated underscores to '=5F' on the way
# in, though.
headers['x-amz-meta-' + _key[14:].replace('=5f', '_')] = val
elif _key in ('content-length', 'content-type',
'content-range', 'content-encoding',
'content-disposition', 'content-language',

View File

@ -320,7 +320,9 @@ class TestSwift3Object(Swift3FunctionalTestCase):
self.assertCommonResponseHeaders(headers)
self._assertObjectEtag(self.bucket, obj, etag)
def _test_put_object_headers(self, req_headers):
def _test_put_object_headers(self, req_headers, expected_headers=None):
if expected_headers is None:
expected_headers = req_headers
obj = 'object'
content = 'abcdefghij'
etag = md5(content).hexdigest()
@ -330,7 +332,7 @@ class TestSwift3Object(Swift3FunctionalTestCase):
self.assertEqual(status, 200)
status, headers, body = \
self.conn.make_request('HEAD', self.bucket, obj)
for header, value in req_headers.items():
for header, value in expected_headers.items():
self.assertIn(header.lower(), headers)
self.assertEqual(headers[header.lower()], value)
self.assertCommonResponseHeaders(headers)
@ -341,6 +343,21 @@ class TestSwift3Object(Swift3FunctionalTestCase):
'X-Amz-Meta-Bar': 'foo',
'X-Amz-Meta-Bar2': 'foo2'})
def test_put_object_weird_metadata(self):
req_headers = dict(
('x-amz-meta-' + c, c)
for c in '!"#$%&\'()*+-./<=>?@[\\]^`{|}~')
exp_headers = dict(
('x-amz-meta-' + c, c)
for c in '!#$%&\'(*+-.^`|~')
self._test_put_object_headers(req_headers, exp_headers)
def test_put_object_underscore_in_metadata(self):
# Break this out separately for ease of testing pre-0.19.0 eventlet
self._test_put_object_headers({
'X-Amz-Meta-Foo-Bar': 'baz',
'X-Amz-Meta-Foo_Bar': 'also baz'})
def test_put_object_content_headers(self):
self._test_put_object_headers({
'Content-Type': 'foo/bar',

View File

@ -391,8 +391,8 @@ class TestRequest(Swift3TestCase):
'Authorization':
'AWS4-HMAC-SHA256 '
'Credential=test/20130524/US/s3/aws4_request, '
'SignedHeaders=host;%s,'
'Signature=X' % included_header,
'SignedHeaders=%s,'
'Signature=X' % ';'.join(sorted(['host', included_header])),
'X-Amz-Content-SHA256': '0123456789'}
headers.update(date_header)
@ -551,11 +551,10 @@ class TestRequest(Swift3TestCase):
sigv4_req = SigV4Request(req.environ)
headers_to_sign = sigv4_req._headers_to_sign()
self.assertEqual(['host', 'x-amz-content-sha256', 'x-amz-date'],
sorted(headers_to_sign.keys()))
self.assertEqual(headers_to_sign['host'], 'localhost:80')
self.assertEqual(headers_to_sign['x-amz-date'], x_amz_date)
self.assertEqual(headers_to_sign['x-amz-content-sha256'], '0123456789')
self.assertEqual(headers_to_sign, [
('host', 'localhost:80'),
('x-amz-content-sha256', '0123456789'),
('x-amz-date', x_amz_date)])
# no x-amz-date
headers = {
@ -571,10 +570,9 @@ class TestRequest(Swift3TestCase):
sigv4_req = SigV4Request(req.environ)
headers_to_sign = sigv4_req._headers_to_sign()
self.assertEqual(['host', 'x-amz-content-sha256'],
sorted(headers_to_sign.keys()))
self.assertEqual(headers_to_sign['host'], 'localhost:80')
self.assertEqual(headers_to_sign['x-amz-content-sha256'], '0123456789')
self.assertEqual(headers_to_sign, [
('host', 'localhost:80'),
('x-amz-content-sha256', '0123456789')])
# SignedHeaders says, host and x-amz-date included but there is not
# X-Amz-Date header