py3: Fix unicode-header-name handling in bufferedhttp

We need to parse additional headers earlier, before stdlib tries to
establish message framing.

Now, TestReconstructorRebuildUTF8 can pass on py3.

Closes-Bug: #2097030
Co-Authored-By: Alistair Coles <alistairncoles@gmail.com>
Related-Change: https://review.opendev.org/c/openstack/swift/+/662546
Change-Id: I6aa16fda9285c9fc3816da6fbff2615bd14a020c
This commit is contained in:
Tim Burke 2025-01-07 17:39:42 -08:00
parent e4cc228ed0
commit 0a8ecbc554
3 changed files with 171 additions and 33 deletions

View File

@ -67,7 +67,7 @@ class BufferedHTTPResponse(HTTPResponse):
self.strict = strict
self._method = method
self.headers = self.msg = None
self._headers = self.msg = None
# from the Status-Line of the response
self.version = _UNKNOWN # HTTP-Version
@ -80,23 +80,36 @@ class BufferedHTTPResponse(HTTPResponse):
self.will_close = _UNKNOWN # conn will close at end of response
self._readline_buffer = b''
def begin(self):
HTTPResponse.begin(self)
header_payload = self.headers.get_payload()
if isinstance(header_payload, list) and len(header_payload) == 1:
header_payload = header_payload[0].get_payload()
if header_payload:
# This shouldn't be here. We must've bumped up against
# https://bugs.python.org/issue37093
for line in header_payload.rstrip('\r\n').split('\n'):
if ':' not in line or line[:1] in ' \t':
# Well, we're no more broken than we were before...
# Should we support line folding?
# How can/should we handle a bad header line?
break
header, value = line.split(':', 1)
value = value.strip(' \t\n\r')
self.headers.add_header(header, value)
@property
def headers(self):
return self._headers
@headers.setter
def headers(self, hdrs):
try:
header_payload = hdrs.get_payload()
except AttributeError:
pass
else:
if isinstance(header_payload, list) and len(header_payload) == 1:
header_payload = header_payload[0].get_payload()
if header_payload:
# This shouldn't be here. We must've bumped up against
# https://bugs.python.org/issue37093
for line in header_payload.rstrip('\r\n').split('\n'):
if ':' not in line or line[:1] in ' \t':
# Well, we're no more broken than we were before...
# Should we support line folding?
# How can/should we handle a bad header line?
break
header, value = line.split(':', 1)
value = value.strip(' \t\n\r')
hdrs.add_header(header, value)
# Clear the payload now that all headers are present.
# Otherwise, we may double-up the headers parsed here
# if/when repeatedly setting the headers property.
hdrs.set_payload(None)
self._headers = hdrs
def expect_response(self):
if self.fp:

View File

@ -21,7 +21,10 @@ import random
import time
from swift.common.direct_client import DirectClientException
from swift.common.header_key_dict import HeaderKeyDict
from swift.common.internal_client import UnexpectedResponse
from swift.common.manager import Manager
from swift.common.swob import wsgi_to_str, str_to_wsgi
from swift.common.utils import md5
from swift.obj.reconstructor import ObjectReconstructor
from test.probe.common import ECProbeTest
@ -58,6 +61,7 @@ class TestReconstructorRebuild(ECProbeTest):
def setUp(self):
super(TestReconstructorRebuild, self).setUp()
self.int_client = self.make_internal_client()
# create EC container
headers = {'X-Storage-Policy': self.policy.name}
client.put_container(self.url, self.token, self.container_name,
@ -82,6 +86,19 @@ class TestReconstructorRebuild(ECProbeTest):
'X-Backend-Durable-Timestamp', hdrs,
'Missing durable timestamp in %r' % self.frag_headers)
def proxy_get(self):
# Use internal-client instead of python-swiftclient, since we can't
# handle UTF-8 headers properly w/ swiftclient.
# Still a proxy-server tho!
status, headers, body = self.int_client.get_object(
self.account,
self.container_name.decode('utf-8'),
self.object_name.decode('utf-8'))
resp_checksum = md5(usedforsecurity=False)
for chunk in body:
resp_checksum.update(chunk)
return HeaderKeyDict(headers), resp_checksum.hexdigest()
def _format_node(self, node):
return '%s#%s' % (node['device'], node['index'])
@ -130,8 +147,12 @@ class TestReconstructorRebuild(ECProbeTest):
headers, etag = self.proxy_get()
self.assertEqual(self.etag, etag)
for key in self.headers_post:
self.assertIn(key, headers)
self.assertEqual(self.headers_post[key], headers[key])
# Since we use internal_client for the GET, headers come back
# as WSGI strings
wsgi_key = str_to_wsgi(key)
self.assertIn(wsgi_key, headers)
self.assertEqual(self.headers_post[key],
wsgi_to_str(headers[wsgi_key]))
# fire up reconstructor
for i in range(reconstructor_cycles):
@ -142,8 +163,10 @@ class TestReconstructorRebuild(ECProbeTest):
headers, etag = self.proxy_get()
self.assertEqual(self.etag, etag)
for key in self.headers_post:
self.assertIn(key, headers)
self.assertEqual(self.headers_post[key], headers[key])
wsgi_key = str_to_wsgi(key)
self.assertIn(wsgi_key, headers)
self.assertEqual(self.headers_post[key],
wsgi_to_str(headers[wsgi_key]))
# check all frags are intact, durable and have expected metadata
with self._annotate_failure_with_scenario(failed, non_durable):
frag_headers, frag_etags = self._assert_all_nodes_have_frag()
@ -268,8 +291,8 @@ class TestReconstructorRebuild(ECProbeTest):
while time.time() < timeout:
try:
self.proxy_get()
except ClientException as e:
if e.http_status == 404:
except UnexpectedResponse as e:
if e.resp.status_int == 404:
break
else:
raise
@ -408,12 +431,19 @@ class TestReconstructorRebuild(ECProbeTest):
client.get_object(self.url, self.token, self.container_name,
self.object_name)
self.assertEqual(503, cm.exception.http_status)
# ... but client GET succeeds
headers = client.head_object(self.url, self.token, self.container_name,
self.object_name)
# ... but client HEAD succeeds
path = self.int_client.make_path(
self.account,
self.container_name.decode('utf-8'),
self.object_name.decode('utf-8'))
resp = self.int_client.make_request(
'HEAD', path, {}, acceptable_statuses=(2,))
for key in self.headers_post:
self.assertIn(key, headers)
self.assertEqual(self.headers_post[key], headers[key])
wsgi_key = str_to_wsgi(key)
self.assertIn(wsgi_key, resp.headers)
self.assertEqual(self.headers_post[key],
wsgi_to_str(resp.headers[wsgi_key]))
# run the reconstructor without quarantine_threshold set
error_lines = []
@ -525,10 +555,6 @@ class TestReconstructorRebuild(ECProbeTest):
class TestReconstructorRebuildUTF8(TestReconstructorRebuild):
def _make_name(self, prefix):
# The non-ASCII chars in metadata cause test hangs in
# _assert_all_nodes_have_frag because of
# https://bugs.python.org/issue37093
raise unittest.SkipTest('This never got fixed on py3')
return b'%s\xc3\xa8-%s' % (
prefix.encode(), str(uuid.uuid4()).encode())

View File

@ -13,6 +13,9 @@
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import io
from http.client import parse_headers
import mock
import unittest
import socket
@ -136,6 +139,57 @@ class TestBufferedHTTP(unittest.TestCase):
server.wait()
self.assertEqual(request[0], b'GET /path HTTP/1.1\r\n')
def test_get_with_non_ascii(self):
bindsock = listen_zero()
request = []
def accept():
with Timeout(3):
sock, addr = bindsock.accept()
fp = sock.makefile('rwb')
request.append(fp.readline())
# Ignore request headers
while fp.readline() != b'\r\n':
pass
fp.write(b'HTTP/1.1 100 Continue\r\n\r\n')
fp.flush()
fp.write(b'\r\n'.join([
b'HTTP/1.1 200 OK',
b'X-Non-Ascii-M\xc3\xa9ta: \xe1\x88\xb4',
b'Content-Length: 8',
b'',
b'RESPONSE']))
fp.flush()
# Server can look for pipelined requests
request.append(fp.readline())
server = spawn(accept)
try:
address = '%s:%s' % ('127.0.0.1', bindsock.getsockname()[1])
conn = bufferedhttp.BufferedHTTPConnection(address)
conn.putrequest('GET', '/path')
conn.endheaders()
resp = conn.getexpect()
self.assertIsInstance(resp, bufferedhttp.BufferedHTTPResponse)
self.assertEqual(resp.status, 100)
self.assertEqual(resp.version, 11)
self.assertEqual(resp.reason, 'Continue')
# I don't think you're supposed to "read" a continue response
self.assertRaises(AssertionError, resp.read)
resp = conn.getresponse()
self.assertIsInstance(resp, bufferedhttp.BufferedHTTPResponse)
self.assertEqual(resp.length, 8)
self.assertEqual(resp.read(), b'RESPONSE')
self.assertEqual(resp.read(), b'')
self.assertEqual(resp.headers['X-Non-Ascii-M\xc3\xa9ta'],
'\xe1\x88\xb4')
# it's all HTTP/1.1 so we *could* pipeline, but we won't
conn.close()
finally:
server.wait()
self.assertEqual(request, [b'GET /path HTTP/1.1\r\n', b''])
def test_closed_response(self):
resp = bufferedhttp.BufferedHTTPResponse(None)
self.assertEqual(resp.status, 'UNKNOWN')
@ -172,6 +226,51 @@ class TestBufferedHTTP(unittest.TestCase):
'Exception %r for device=%r path=%r header=%r'
% (e, dev, path, header))
def test_headers_setter_with_dict(self):
resp = bufferedhttp.BufferedHTTPResponse(None)
resp.headers = {'a': 'b', 'c': 'd'}
self.assertEqual('b', resp.headers.get('a'))
self.assertEqual('d', resp.headers.get('c'))
resp.headers = {'a': 'b', 'c': 'd'}
self.assertEqual('b', resp.headers.get('a'))
self.assertEqual('d', resp.headers.get('c'))
# XXX: AttributeError: 'dict' object has no attribute 'get_all'
# self.assertEqual(['b'], resp.headers.get_all('a'))
def test_headers_setter_with_message(self):
msg = parse_headers(io.BytesIO(b'a: b\na: bb\nc: d\n\n'))
self.assertEqual('', msg.get_payload())
resp = bufferedhttp.BufferedHTTPResponse(None)
resp.headers = msg
self.assertEqual('b', resp.headers.get('a'))
self.assertEqual(['b', 'bb'], resp.headers.get_all('a'))
self.assertEqual('d', resp.headers.get('c'))
self.assertEqual([('a', 'b'), ('a', 'bb'), ('c', 'd')],
resp.headers.items())
resp.headers = msg
self.assertEqual([('a', 'b'), ('a', 'bb'), ('c', 'd')],
resp.headers.items())
def test_headers_setter_with_message_with_payload(self):
msg = parse_headers(io.BytesIO(b'\xc3: b\n\xc3: bb\nc: d\n\n'))
self.assertEqual('Ã: b\nÃ: bb\nc: d\n\n', msg.get_payload())
resp = bufferedhttp.BufferedHTTPResponse(None)
resp.headers = resp.msg = msg
self.assertEqual('b', resp.headers.get('\xc3'))
self.assertEqual(['b', 'bb'], resp.headers.get_all('\xc3'))
self.assertEqual('d', resp.headers.get('c'))
self.assertEqual([('\xc3', 'b'), ('\xc3', 'bb'), ('c', 'd')],
resp.headers.items())
resp.headers = msg
self.assertEqual('b', resp.headers.get('\xc3'))
self.assertEqual(['b', 'bb'], resp.headers.get_all('\xc3'))
self.assertEqual('d', resp.headers.get('c'))
self.assertEqual([('\xc3', 'b'), ('\xc3', 'bb'), ('c', 'd')],
resp.headers.items())
self.assertIs(resp.headers, resp.msg)
self.assertIs(resp._headers, resp.headers)
if __name__ == '__main__':
unittest.main()