Fix proxy-server's support for chunked transferring in GET object

Proxy-server now requires Content-Length in the response header
when getting object and does not support chunked transferring with
"Transfer-Encoding: chunked"

This doesn't matter in normal swift, but prohibits us from putting
any middelwares to execute something like streaming processing of
objects, which can't calculate the length of their response body
before they start to send their response.

Change-Id: I60fc6c86338d734e39b7e5f1e48a2647995045ef
This commit is contained in:
Takashi Kajinami 2015-11-17 16:15:59 +09:00
parent 3927bf1f17
commit 8e4347afd5
10 changed files with 290 additions and 146 deletions

View File

@ -33,10 +33,12 @@ from swift.common.storage_policy import POLICIES
from swift.common.constraints import FORMAT2CONTENT_TYPE
from swift.common.exceptions import ListingIterError, SegmentError
from swift.common.http import is_success
from swift.common.swob import (HTTPBadRequest, HTTPNotAcceptable,
HTTPServiceUnavailable, Range)
from swift.common.swob import HTTPBadRequest, HTTPNotAcceptable, \
HTTPServiceUnavailable, Range, is_chunked
from swift.common.utils import split_path, validate_device_partition, \
close_if_possible, maybe_multipart_byteranges_to_document_iters
close_if_possible, maybe_multipart_byteranges_to_document_iters, \
multipart_byteranges_to_document_iters, parse_content_type, \
parse_content_range
from swift.common.wsgi import make_subrequest
@ -500,3 +502,45 @@ class SegmentedIterable(object):
backend server is closed.
"""
close_if_possible(self.app_iter)
def http_response_to_document_iters(response, read_chunk_size=4096):
"""
Takes a successful object-GET HTTP response and turns it into an
iterator of (first-byte, last-byte, length, headers, body-file)
5-tuples.
The response must either be a 200 or a 206; if you feed in a 204 or
something similar, this probably won't work.
:param response: HTTP response, like from bufferedhttp.http_connect(),
not a swob.Response.
"""
chunked = is_chunked(dict(response.getheaders()))
if response.status == 200:
if chunked:
# Single "range" that's the whole object with an unknown length
return iter([(0, None, None, response.getheaders(),
response)])
# Single "range" that's the whole object
content_length = int(response.getheader('Content-Length'))
return iter([(0, content_length - 1, content_length,
response.getheaders(), response)])
content_type, params_list = parse_content_type(
response.getheader('Content-Type'))
if content_type != 'multipart/byteranges':
# Single range; no MIME framing, just the bytes. The start and end
# byte indices are in the Content-Range header.
start, end, length = parse_content_range(
response.getheader('Content-Range'))
return iter([(start, end, length, response.getheaders(), response)])
else:
# Multiple ranges; the response body is a multipart/byteranges MIME
# document, and we have to parse it using the MIME boundary
# extracted from the Content-Type header.
params = dict(params_list)
return multipart_byteranges_to_document_iters(
response, params['boundary'], read_chunk_size)

View File

@ -804,6 +804,27 @@ def _host_url_property():
return property(getter, doc="Get url for request/response up to path")
def is_chunked(headers):
te = None
for key in headers:
if key.lower() == 'transfer-encoding':
te = headers.get(key)
if te:
encodings = te.split(',')
if len(encodings) > 1:
raise AttributeError('Unsupported Transfer-Coding header'
' value specified in Transfer-Encoding'
' header')
# If there are more than one transfer encoding value, the last
# one must be chunked, see RFC 2616 Sec. 3.6
if encodings[-1].lower() == 'chunked':
return True
else:
raise ValueError('Invalid Transfer-Encoding header value')
else:
return False
class Request(object):
"""
WSGI Request object.
@ -955,7 +976,7 @@ class Request(object):
@property
def is_chunked(self):
return 'chunked' in self.headers.get('transfer-encoding', '')
return is_chunked(self.headers)
@property
def url(self):
@ -1061,22 +1082,7 @@ class Request(object):
:raises AttributeError: if the last value of the transfer-encoding
header is not "chunked"
"""
te = self.headers.get('transfer-encoding')
if te:
encodings = te.split(',')
if len(encodings) > 1:
raise AttributeError('Unsupported Transfer-Coding header'
' value specified in Transfer-Encoding'
' header')
# If there are more than one transfer encoding value, the last
# one must be chunked, see RFC 2616 Sec. 3.6
if encodings[-1].lower() == 'chunked':
chunked = True
else:
raise ValueError('Invalid Transfer-Encoding header value')
else:
chunked = False
if not chunked:
if not is_chunked(self.headers):
# Because we are not using chunked transfer encoding we can pay
# attention to the content-length header.
fsize = self.headers.get('content-length', None)

View File

@ -3627,8 +3627,8 @@ def document_iters_to_http_response_body(ranges_iter, boundary, multipart,
HTTP response body, whether that's multipart/byteranges or not.
This is almost, but not quite, the inverse of
http_response_to_document_iters(). This function only yields chunks of
the body, not any headers.
request_helpers.http_response_to_document_iters(). This function only
yields chunks of the body, not any headers.
:param ranges_iter: an iterator of dictionaries, one per range.
Each dictionary must contain at least the following key:
@ -3703,41 +3703,6 @@ def multipart_byteranges_to_document_iters(input_file, boundary,
yield (first_byte, last_byte, length, headers.items(), body)
def http_response_to_document_iters(response, read_chunk_size=4096):
"""
Takes a successful object-GET HTTP response and turns it into an
iterator of (first-byte, last-byte, length, headers, body-file)
5-tuples.
The response must either be a 200 or a 206; if you feed in a 204 or
something similar, this probably won't work.
:param response: HTTP response, like from bufferedhttp.http_connect(),
not a swob.Response.
"""
if response.status == 200:
# Single "range" that's the whole object
content_length = int(response.getheader('Content-Length'))
return iter([(0, content_length - 1, content_length,
response.getheaders(), response)])
content_type, params_list = parse_content_type(
response.getheader('Content-Type'))
if content_type != 'multipart/byteranges':
# Single range; no MIME framing, just the bytes. The start and end
# byte indices are in the Content-Range header.
start, end, length = parse_content_range(
response.getheader('Content-Range'))
return iter([(start, end, length, response.getheaders(), response)])
else:
# Multiple ranges; the response body is a multipart/byteranges MIME
# document, and we have to parse it using the MIME boundary
# extracted from the Content-Type header.
params = dict(params_list)
return multipart_byteranges_to_document_iters(
response, params['boundary'], read_chunk_size)
#: Regular expression to match form attributes.
ATTRIBUTES_RE = re.compile(r'(\w+)=(".*?"|[^";]+)(; ?|$)')

View File

@ -43,7 +43,7 @@ from swift.common.wsgi import make_pre_authed_env
from swift.common.utils import Timestamp, config_true_value, \
public, split_path, list_from_csv, GreenthreadSafeIterator, \
GreenAsyncPile, quorum_size, parse_content_type, \
http_response_to_document_iters, document_iters_to_http_response_body
document_iters_to_http_response_body
from swift.common.bufferedhttp import http_connect
from swift.common.exceptions import ChunkReadTimeout, ChunkWriteTimeout, \
ConnectionTimeout, RangeAlreadyComplete
@ -55,7 +55,8 @@ from swift.common.swob import Request, Response, HeaderKeyDict, Range, \
HTTPException, HTTPRequestedRangeNotSatisfiable, HTTPServiceUnavailable, \
status_map
from swift.common.request_helpers import strip_sys_meta_prefix, \
strip_user_meta_prefix, is_user_meta, is_sys_meta, is_sys_or_user_meta
strip_user_meta_prefix, is_user_meta, is_sys_meta, is_sys_or_user_meta, \
http_response_to_document_iters
from swift.common.storage_policy import POLICIES

View File

@ -16,13 +16,16 @@
"""Tests for swift.common.request_helpers"""
import unittest
from swift.common.swob import Request, HTTPException
from swift.common.swob import Request, HTTPException, HeaderKeyDict
from swift.common.storage_policy import POLICIES, EC_POLICY, REPL_POLICY
from swift.common.request_helpers import is_sys_meta, is_user_meta, \
is_sys_or_user_meta, strip_sys_meta_prefix, strip_user_meta_prefix, \
remove_items, copy_header_subset, get_name_and_placement
remove_items, copy_header_subset, get_name_and_placement, \
http_response_to_document_iters
from test.unit import patch_policies
from test.unit.common.test_utils import FakeResponse
server_types = ['account', 'container', 'object']
@ -158,3 +161,115 @@ class TestRequestHelpers(unittest.TestCase):
self.assertEqual(suffix_parts, '') # still false-y
self.assertEqual(policy, POLICIES[1])
self.assertEqual(policy.policy_type, REPL_POLICY)
class TestHTTPResponseToDocumentIters(unittest.TestCase):
def test_200(self):
fr = FakeResponse(
200,
{'Content-Length': '10', 'Content-Type': 'application/lunch'},
'sandwiches')
doc_iters = http_response_to_document_iters(fr)
first_byte, last_byte, length, headers, body = next(doc_iters)
self.assertEqual(first_byte, 0)
self.assertEqual(last_byte, 9)
self.assertEqual(length, 10)
header_dict = HeaderKeyDict(headers)
self.assertEqual(header_dict.get('Content-Length'), '10')
self.assertEqual(header_dict.get('Content-Type'), 'application/lunch')
self.assertEqual(body.read(), 'sandwiches')
self.assertRaises(StopIteration, next, doc_iters)
fr = FakeResponse(
200,
{'Transfer-Encoding': 'chunked',
'Content-Type': 'application/lunch'},
'sandwiches')
doc_iters = http_response_to_document_iters(fr)
first_byte, last_byte, length, headers, body = next(doc_iters)
self.assertEqual(first_byte, 0)
self.assertIsNone(last_byte)
self.assertIsNone(length)
header_dict = HeaderKeyDict(headers)
self.assertEqual(header_dict.get('Transfer-Encoding'), 'chunked')
self.assertEqual(header_dict.get('Content-Type'), 'application/lunch')
self.assertEqual(body.read(), 'sandwiches')
self.assertRaises(StopIteration, next, doc_iters)
def test_206_single_range(self):
fr = FakeResponse(
206,
{'Content-Length': '8', 'Content-Type': 'application/lunch',
'Content-Range': 'bytes 1-8/10'},
'andwiche')
doc_iters = http_response_to_document_iters(fr)
first_byte, last_byte, length, headers, body = next(doc_iters)
self.assertEqual(first_byte, 1)
self.assertEqual(last_byte, 8)
self.assertEqual(length, 10)
header_dict = HeaderKeyDict(headers)
self.assertEqual(header_dict.get('Content-Length'), '8')
self.assertEqual(header_dict.get('Content-Type'), 'application/lunch')
self.assertEqual(body.read(), 'andwiche')
self.assertRaises(StopIteration, next, doc_iters)
# Chunked response should be treated in the same way as non-chunked one
fr = FakeResponse(
206,
{'Transfer-Encoding': 'chunked',
'Content-Type': 'application/lunch',
'Content-Range': 'bytes 1-8/10'},
'andwiche')
doc_iters = http_response_to_document_iters(fr)
first_byte, last_byte, length, headers, body = next(doc_iters)
self.assertEqual(first_byte, 1)
self.assertEqual(last_byte, 8)
self.assertEqual(length, 10)
header_dict = HeaderKeyDict(headers)
self.assertEqual(header_dict.get('Content-Type'), 'application/lunch')
self.assertEqual(body.read(), 'andwiche')
self.assertRaises(StopIteration, next, doc_iters)
def test_206_multiple_ranges(self):
fr = FakeResponse(
206,
{'Content-Type': 'multipart/byteranges; boundary=asdfasdfasdf'},
("--asdfasdfasdf\r\n"
"Content-Type: application/lunch\r\n"
"Content-Range: bytes 0-3/10\r\n"
"\r\n"
"sand\r\n"
"--asdfasdfasdf\r\n"
"Content-Type: application/lunch\r\n"
"Content-Range: bytes 6-9/10\r\n"
"\r\n"
"ches\r\n"
"--asdfasdfasdf--"))
doc_iters = http_response_to_document_iters(fr)
first_byte, last_byte, length, headers, body = next(doc_iters)
self.assertEqual(first_byte, 0)
self.assertEqual(last_byte, 3)
self.assertEqual(length, 10)
header_dict = HeaderKeyDict(headers)
self.assertEqual(header_dict.get('Content-Type'), 'application/lunch')
self.assertEqual(body.read(), 'sand')
first_byte, last_byte, length, headers, body = next(doc_iters)
self.assertEqual(first_byte, 6)
self.assertEqual(last_byte, 9)
self.assertEqual(length, 10)
header_dict = HeaderKeyDict(headers)
self.assertEqual(header_dict.get('Content-Type'), 'application/lunch')
self.assertEqual(body.read(), 'ches')
self.assertRaises(StopIteration, next, doc_iters)

View File

@ -339,6 +339,41 @@ class TestMatch(unittest.TestCase):
self.assertTrue('c' not in match)
class TestTransferEncoding(unittest.TestCase):
def test_is_chunked(self):
headers = {}
self.assertFalse(swift.common.swob.is_chunked(headers))
headers['Transfer-Encoding'] = 'chunked'
self.assertTrue(swift.common.swob.is_chunked(headers))
headers['Transfer-Encoding'] = 'gzip,chunked'
try:
swift.common.swob.is_chunked(headers)
except AttributeError as e:
self.assertEqual(str(e), "Unsupported Transfer-Coding header"
" value specified in Transfer-Encoding header")
else:
self.fail("Expected an AttributeError raised for 'gzip'")
headers['Transfer-Encoding'] = 'gzip'
try:
swift.common.swob.is_chunked(headers)
except ValueError as e:
self.assertEqual(str(e), "Invalid Transfer-Encoding header value")
else:
self.fail("Expected a ValueError raised for 'gzip'")
headers['Transfer-Encoding'] = 'gzip,identity'
try:
swift.common.swob.is_chunked(headers)
except AttributeError as e:
self.assertEqual(str(e), "Unsupported Transfer-Coding header"
" value specified in Transfer-Encoding header")
else:
self.fail("Expected an AttributeError raised for 'gzip,identity'")
class TestAccept(unittest.TestCase):
def test_accept_json(self):
for accept in ('application/json', 'application/json;q=1.0,*/*;q=0.9',

View File

@ -55,10 +55,9 @@ from netifaces import AF_INET6
from mock import MagicMock, patch
from six.moves.configparser import NoSectionError, NoOptionError
from swift.common.exceptions import (Timeout, MessageTimeout,
ConnectionTimeout, LockTimeout,
ReplicationLockTimeout,
MimeInvalid, ThreadPoolDead)
from swift.common.exceptions import Timeout, MessageTimeout, \
ConnectionTimeout, LockTimeout, ReplicationLockTimeout, \
MimeInvalid, ThreadPoolDead
from swift.common import utils
from swift.common.container_sync_realms import ContainerSyncRealms
from swift.common.swob import Request, Response, HeaderKeyDict
@ -5230,81 +5229,6 @@ class FakeResponse(object):
return self.body.readline(length)
class TestHTTPResponseToDocumentIters(unittest.TestCase):
def test_200(self):
fr = FakeResponse(
200,
{'Content-Length': '10', 'Content-Type': 'application/lunch'},
'sandwiches')
doc_iters = utils.http_response_to_document_iters(fr)
first_byte, last_byte, length, headers, body = next(doc_iters)
self.assertEqual(first_byte, 0)
self.assertEqual(last_byte, 9)
self.assertEqual(length, 10)
header_dict = HeaderKeyDict(headers)
self.assertEqual(header_dict.get('Content-Length'), '10')
self.assertEqual(header_dict.get('Content-Type'), 'application/lunch')
self.assertEqual(body.read(), 'sandwiches')
self.assertRaises(StopIteration, next, doc_iters)
def test_206_single_range(self):
fr = FakeResponse(
206,
{'Content-Length': '8', 'Content-Type': 'application/lunch',
'Content-Range': 'bytes 1-8/10'},
'andwiche')
doc_iters = utils.http_response_to_document_iters(fr)
first_byte, last_byte, length, headers, body = next(doc_iters)
self.assertEqual(first_byte, 1)
self.assertEqual(last_byte, 8)
self.assertEqual(length, 10)
header_dict = HeaderKeyDict(headers)
self.assertEqual(header_dict.get('Content-Length'), '8')
self.assertEqual(header_dict.get('Content-Type'), 'application/lunch')
self.assertEqual(body.read(), 'andwiche')
self.assertRaises(StopIteration, next, doc_iters)
def test_206_multiple_ranges(self):
fr = FakeResponse(
206,
{'Content-Type': 'multipart/byteranges; boundary=asdfasdfasdf'},
("--asdfasdfasdf\r\n"
"Content-Type: application/lunch\r\n"
"Content-Range: bytes 0-3/10\r\n"
"\r\n"
"sand\r\n"
"--asdfasdfasdf\r\n"
"Content-Type: application/lunch\r\n"
"Content-Range: bytes 6-9/10\r\n"
"\r\n"
"ches\r\n"
"--asdfasdfasdf--"))
doc_iters = utils.http_response_to_document_iters(fr)
first_byte, last_byte, length, headers, body = next(doc_iters)
self.assertEqual(first_byte, 0)
self.assertEqual(last_byte, 3)
self.assertEqual(length, 10)
header_dict = HeaderKeyDict(headers)
self.assertEqual(header_dict.get('Content-Type'), 'application/lunch')
self.assertEqual(body.read(), 'sand')
first_byte, last_byte, length, headers, body = next(doc_iters)
self.assertEqual(first_byte, 6)
self.assertEqual(last_byte, 9)
self.assertEqual(length, 10)
header_dict = HeaderKeyDict(headers)
self.assertEqual(header_dict.get('Content-Type'), 'application/lunch')
self.assertEqual(body.read(), 'ches')
self.assertRaises(StopIteration, next, doc_iters)
class TestDocumentItersToHTTPResponseBody(unittest.TestCase):
def test_no_parts(self):
body = utils.document_iters_to_http_response_body(

View File

@ -801,6 +801,46 @@ class TestFuncs(unittest.TestCase):
client_chunks = list(app_iter)
self.assertEqual(client_chunks, ['abcd1234', 'efgh5678'])
def test_client_chunk_size_resuming_chunked(self):
class TestChunkedSource(object):
def __init__(self, chunks):
self.chunks = list(chunks)
self.status = 200
self.headers = {'transfer-encoding': 'chunked',
'content-type': 'text/plain'}
def read(self, _read_size):
if self.chunks:
chunk = self.chunks.pop(0)
if chunk is None:
raise exceptions.ChunkReadTimeout()
else:
return chunk
else:
return ''
def getheader(self, header):
return self.headers.get(header.lower())
def getheaders(self):
return self.headers
node = {'ip': '1.2.3.4', 'port': 6000, 'device': 'sda'}
source1 = TestChunkedSource(['abcd', '1234', 'abc', None])
source2 = TestChunkedSource(['efgh5678'])
req = Request.blank('/v1/a/c/o')
handler = GetOrHeadHandler(
self.app, req, 'Object', None, None, None, {},
client_chunk_size=8)
app_iter = handler._make_app_iter(req, node, source1)
with patch.object(handler, '_get_source_and_node',
lambda: (source2, node)):
client_chunks = list(app_iter)
self.assertEqual(client_chunks, ['abcd1234', 'efgh5678'])
def test_bytes_to_skip(self):
# if you start at the beginning, skip nothing
self.assertEqual(bytes_to_skip(1024, 0), 0)

View File

@ -712,6 +712,13 @@ class TestReplicatedObjController(BaseObjectControllerMixin,
self.assertEqual(resp.status_int, 200)
self.assertIn('Accept-Ranges', resp.headers)
def test_GET_transfer_encoding_chunked(self):
req = swift.common.swob.Request.blank('/v1/a/c/o')
with set_http_connect(200, headers={'transfer-encoding': 'chunked'}):
resp = req.get_response(self.app)
self.assertEqual(resp.status_int, 200)
self.assertEqual(resp.headers['Transfer-Encoding'], 'chunked')
def test_GET_error(self):
req = swift.common.swob.Request.blank('/v1/a/c/o')
with set_http_connect(503, 200):

View File

@ -79,6 +79,7 @@ from swift.common.swob import Request, Response, HTTPUnauthorized, \
from swift.common import storage_policy
from swift.common.storage_policy import StoragePolicy, ECStoragePolicy, \
StoragePolicyCollection, POLICIES
import swift.common.request_helpers
from swift.common.request_helpers import get_sys_meta_prefix
# mocks
@ -1604,7 +1605,8 @@ class TestObjectController(unittest.TestCase):
bytes_before_timeout[0] -= len(result)
return result
orig_hrtdi = proxy_base.http_response_to_document_iters
orig_hrtdi = swift.common.request_helpers. \
http_response_to_document_iters
# Use this to mock out http_response_to_document_iters. On the first
# call, the result will be sabotaged to blow up with
@ -1635,7 +1637,8 @@ class TestObjectController(unittest.TestCase):
# do is mock out stuff so the proxy thinks it only read a certain
# number of bytes before it got a timeout.
bytes_before_timeout[0] = 300
with mock.patch.object(proxy_base, 'http_response_to_document_iters',
with mock.patch.object(proxy_base,
'http_response_to_document_iters',
single_sabotage_hrtdi):
req = Request.blank(
path,
@ -1660,7 +1663,8 @@ class TestObjectController(unittest.TestCase):
kaboomed[0] = 0
sabotaged[0] = False
prosrv._error_limiting = {} # clear out errors
with mock.patch.object(proxy_base, 'http_response_to_document_iters',
with mock.patch.object(proxy_base,
'http_response_to_document_iters',
sabotaged_hrtdi): # perma-broken
req = Request.blank(
path,
@ -1697,7 +1701,8 @@ class TestObjectController(unittest.TestCase):
kaboomed[0] = 0
sabotaged[0] = False
prosrv._error_limiting = {} # clear out errors
with mock.patch.object(proxy_base, 'http_response_to_document_iters',
with mock.patch.object(proxy_base,
'http_response_to_document_iters',
single_sabotage_hrtdi):
req = Request.blank(
path,
@ -1734,7 +1739,8 @@ class TestObjectController(unittest.TestCase):
kaboomed[0] = 0
sabotaged[0] = False
prosrv._error_limiting = {} # clear out errors
with mock.patch.object(proxy_base, 'http_response_to_document_iters',
with mock.patch.object(proxy_base,
'http_response_to_document_iters',
single_sabotage_hrtdi):
req = Request.blank(
path,
@ -1771,7 +1777,8 @@ class TestObjectController(unittest.TestCase):
kaboomed[0] = 0
sabotaged[0] = False
prosrv._error_limiting = {} # clear out errors
with mock.patch.object(proxy_base, 'http_response_to_document_iters',
with mock.patch.object(proxy_base,
'http_response_to_document_iters',
single_sabotage_hrtdi):
req = Request.blank(
path,