diff --git a/swift/common/middleware/formpost.py b/swift/common/middleware/formpost.py index 24b79a13ae..a30fa1a2f7 100644 --- a/swift/common/middleware/formpost.py +++ b/swift/common/middleware/formpost.py @@ -113,7 +113,6 @@ the file are simply ignored). __all__ = ['FormPost', 'filter_factory', 'READ_CHUNK_SIZE', 'MAX_VALUE_LENGTH'] import hmac -import rfc822 from hashlib import sha1 from time import time from urllib import quote @@ -121,7 +120,8 @@ from urllib import quote from swift.common.exceptions import MimeInvalid from swift.common.middleware.tempurl import get_tempurl_keys_from_metadata from swift.common.utils import streq_const_time, register_swift_info, \ - parse_content_disposition, iter_multipart_mime_documents + parse_content_disposition, parse_mime_headers, \ + iter_multipart_mime_documents from swift.common.wsgi import make_pre_authed_env from swift.common.swob import HTTPUnauthorized from swift.proxy.controllers.base import get_account_info, get_container_info @@ -254,9 +254,9 @@ class FormPost(object): file_count = 0 for fp in iter_multipart_mime_documents( env['wsgi.input'], boundary, read_chunk_size=READ_CHUNK_SIZE): - hdrs = rfc822.Message(fp, 0) + hdrs = parse_mime_headers(fp) disp, attrs = parse_content_disposition( - hdrs.getheader('Content-Disposition', '')) + hdrs.get('Content-Disposition', '')) if disp == 'form-data' and attrs.get('filename'): file_count += 1 try: diff --git a/swift/common/utils.py b/swift/common/utils.py index 2020908d82..da057e2215 100644 --- a/swift/common/utils.py +++ b/swift/common/utils.py @@ -25,13 +25,13 @@ import operator import os import pwd import re -import rfc822 import sys import threading as stdlib_threading import time import uuid import functools import weakref +import email.parser from hashlib import md5, sha1 from random import random, shuffle from urllib import quote as _quote @@ -3446,6 +3446,29 @@ def iter_multipart_mime_documents(wsgi_input, boundary, read_chunk_size=4096): input_buffer = it.input_buffer +def parse_mime_headers(doc_file): + """ + Takes a file-like object containing a MIME document and returns a + HeaderKeyDict containing the headers. The body of the message is not + consumed: the position in doc_file is left at the beginning of the body. + + This function was inspired by the Python standard library's + http.client.parse_headers. + + :param doc_file: binary file-like object containing a MIME document + :returns: a swift.common.swob.HeaderKeyDict containing the headers + """ + from swift.common.swob import HeaderKeyDict # avoid circular import + headers = [] + while True: + line = doc_file.readline() + headers.append(line) + if line in (b'\r\n', b'\n', b''): + break + header_string = b''.join(headers) + return HeaderKeyDict(email.parser.Parser().parsestr(header_string)) + + def mime_to_document_iters(input_file, boundary, read_chunk_size=4096): """ Takes a file-like object containing a multipart MIME document and @@ -3460,7 +3483,7 @@ def mime_to_document_iters(input_file, boundary, read_chunk_size=4096): read_chunk_size) for i, doc_file in enumerate(doc_files): # this consumes the headers and leaves just the body in doc_file - headers = rfc822.Message(doc_file, 0) + headers = parse_mime_headers(doc_file) yield (headers, doc_file) @@ -3596,7 +3619,7 @@ def multipart_byteranges_to_document_iters(input_file, boundary, for headers, body in mime_to_document_iters(input_file, boundary, read_chunk_size): first_byte, last_byte, length = parse_content_range( - headers.getheader('content-range')) + headers.get('content-range')) yield (first_byte, last_byte, length, headers.items(), body) diff --git a/swift/obj/server.py b/swift/obj/server.py index 5bc76edd5d..65993327d9 100644 --- a/swift/obj/server.py +++ b/swift/obj/server.py @@ -21,7 +21,6 @@ import os import multiprocessing import time import traceback -import rfc822 import socket import math from swift import gettext_ as _ @@ -33,7 +32,8 @@ from eventlet.greenthread import spawn from swift.common.utils import public, get_logger, \ config_true_value, timing_stats, replication, \ normalize_delete_at_timestamp, get_log_line, Timestamp, \ - get_expirer_container, iter_multipart_mime_documents + get_expirer_container, parse_mime_headers, \ + iter_multipart_mime_documents from swift.common.bufferedhttp import http_connect from swift.common.constraints import check_object_creation, \ valid_timestamp, check_utf8 @@ -60,7 +60,7 @@ def iter_mime_headers_and_bodies(wsgi_input, mime_boundary, read_chunk_size): wsgi_input, mime_boundary, read_chunk_size) for file_like in mime_documents_iter: - hdrs = HeaderKeyDict(rfc822.Message(file_like, 0)) + hdrs = parse_mime_headers(file_like) yield (hdrs, file_like) diff --git a/test/unit/common/test_utils.py b/test/unit/common/test_utils.py index 3683f288ef..02f8191fdc 100644 --- a/test/unit/common/test_utils.py +++ b/test/unit/common/test_utils.py @@ -34,7 +34,7 @@ import sys import json import math -from six import StringIO +from six import BytesIO, StringIO from six.moves.queue import Queue, Empty from six.moves import range from textwrap import dedent @@ -4927,6 +4927,36 @@ class TestIterMultipartMimeDocuments(unittest.TestCase): self.assertTrue(exc is not None) +class TestParseMimeHeaders(unittest.TestCase): + + def test_parse_mime_headers(self): + doc_file = BytesIO(b"""Content-Disposition: form-data; name="file_size" +Foo: Bar +NOT-title-cAsED: quux +Connexion: =?iso8859-1?q?r=E9initialis=E9e_par_l=27homologue?= +Status: =?utf-8?b?5byA5aeL6YCa6L+H5a+56LGh5aSN5Yi2?= +Latin-1: Resincronizaci\xf3n realizada con \xe9xito +Utf-8: \xd0\xba\xd0\xbe\xd0\xbd\xd1\x82\xd0\xb5\xd0\xb9\xd0\xbd\xd0\xb5\xd1\x80 + +This is the body +""") + headers = utils.parse_mime_headers(doc_file) + expected_headers = { + 'Content-Disposition': 'form-data; name="file_size"', + 'Foo': "Bar", + 'Not-Title-Cased': "quux", + # Encoded-word or non-ASCII values are treated just like any other + # bytestring (at least for now) + 'Connexion': "=?iso8859-1?q?r=E9initialis=E9e_par_l=27homologue?=", + 'Status': "=?utf-8?b?5byA5aeL6YCa6L+H5a+56LGh5aSN5Yi2?=", + 'Latin-1': "Resincronizaci\xf3n realizada con \xe9xito", + 'Utf-8': ("\xd0\xba\xd0\xbe\xd0\xbd\xd1\x82\xd0\xb5\xd0\xb9\xd0" + "\xbd\xd0\xb5\xd1\x80") + } + self.assertEqual(expected_headers, headers) + self.assertEqual(b"This is the body\n", doc_file.read()) + + class FakeResponse(object): def __init__(self, status, headers, body): self.status = status diff --git a/test/unit/proxy/test_server.py b/test/unit/proxy/test_server.py index 94613f6ea6..1c8d346dd7 100644 --- a/test/unit/proxy/test_server.py +++ b/test/unit/proxy/test_server.py @@ -20,7 +20,6 @@ import logging import math import os import pickle -import rfc822 import sys import unittest from contextlib import closing, contextmanager, nested @@ -48,7 +47,8 @@ from six import BytesIO from six import StringIO from six.moves import range from swift.common.utils import hash_path, json, storage_directory, \ - parse_content_type, iter_multipart_mime_documents, public + parse_content_type, parse_mime_headers, \ + iter_multipart_mime_documents, public from test.unit import ( connect_tcp, readuntil2crlfs, FakeLogger, fake_http_connect, FakeRing, @@ -1438,7 +1438,7 @@ class TestObjectController(unittest.TestCase): got_mime_docs = [] for mime_doc_fh in iter_multipart_mime_documents(StringIO(res.body), boundary): - headers = HeaderKeyDict(rfc822.Message(mime_doc_fh, 0).items()) + headers = parse_mime_headers(mime_doc_fh) body = mime_doc_fh.read() got_mime_docs.append((headers, body)) self.assertEqual(len(got_mime_docs), 3) @@ -1635,7 +1635,7 @@ class TestObjectController(unittest.TestCase): got_byteranges = [] for mime_doc_fh in iter_multipart_mime_documents(StringIO(body), boundary): - rfc822.Message(mime_doc_fh, 0) + parse_mime_headers(mime_doc_fh) body = mime_doc_fh.read() got_byteranges.append(body) @@ -1667,7 +1667,7 @@ class TestObjectController(unittest.TestCase): got_byteranges = [] for mime_doc_fh in iter_multipart_mime_documents(StringIO(body), boundary): - rfc822.Message(mime_doc_fh, 0) + parse_mime_headers(mime_doc_fh) body = mime_doc_fh.read() got_byteranges.append(body) @@ -1704,7 +1704,7 @@ class TestObjectController(unittest.TestCase): got_byteranges = [] for mime_doc_fh in iter_multipart_mime_documents(StringIO(body), boundary): - rfc822.Message(mime_doc_fh, 0) + parse_mime_headers(mime_doc_fh) body = mime_doc_fh.read() got_byteranges.append(body) @@ -1741,7 +1741,7 @@ class TestObjectController(unittest.TestCase): got_byteranges = [] for mime_doc_fh in iter_multipart_mime_documents(StringIO(body), boundary): - rfc822.Message(mime_doc_fh, 0) + parse_mime_headers(mime_doc_fh) body = mime_doc_fh.read() got_byteranges.append(body)