From 71f83963c2b701bdc6ec67e97429dac661e8ffc6 Mon Sep 17 00:00:00 2001 From: Tim Burke Date: Wed, 24 Jul 2019 16:59:08 -0700 Subject: [PATCH] py3: fix non-ascii metadata handling in account-server Previously, we were storing the WSGI-style UTF-8-bytes-decoded-as-Latin-1 strings in the JSON field, and sending them back to eventlet directly. If running in a mixed py2/py3 cluster, replication would eventually get that back to the py2 server, and worse, the native-string version would get back to the py3 server! Then on GET or HEAD, eventlet would barf if any characters were outside the Latin-1 range. Closes-Bug: #1837805 Change-Id: I31d942e72fd7bfbb1db4dbb1dd522dff69969e5d --- swift/account/server.py | 36 +++++++---------------- swift/account/utils.py | 6 ++-- swift/container/server.py | 11 ++++--- test/unit/account/test_server.py | 49 +++++++++++++++++++++++++++++++- 4 files changed, 66 insertions(+), 36 deletions(-) diff --git a/swift/account/server.py b/swift/account/server.py index f01829032a..3b076054e6 100644 --- a/swift/account/server.py +++ b/swift/account/server.py @@ -21,8 +21,6 @@ from swift import gettext_ as _ from eventlet import Timeout -import six - import swift.common.db from swift.account.backend import AccountBroker, DATADIR from swift.account.utils import account_listing_response, get_response_headers @@ -110,6 +108,14 @@ class AccountController(BaseStorageServer): broker.delete_db(req_timestamp.internal) return self._deleted_response(broker, req, HTTPNoContent) + def _update_metadata(self, req, broker, req_timestamp): + metadata = { + wsgi_to_str(key): (wsgi_to_str(value), req_timestamp.internal) + for key, value in req.headers.items() + if is_sys_or_user_meta('account', key)} + if metadata: + broker.update_metadata(metadata, validate_metadata=True) + @public @timing_stats() def PUT(self, req): @@ -169,24 +175,7 @@ class AccountController(BaseStorageServer): broker.update_put_timestamp(timestamp.internal) if broker.is_deleted(): return HTTPConflict(request=req) - metadata = {} - if six.PY2: - metadata.update((key, (value, timestamp.internal)) - for key, value in req.headers.items() - if is_sys_or_user_meta('account', key)) - else: - for key, value in req.headers.items(): - if is_sys_or_user_meta('account', key): - # Cast to native strings, so that json inside - # updata_metadata eats the data. - try: - value = value.encode('latin-1').decode('utf-8') - except UnicodeDecodeError: - raise HTTPBadRequest( - 'Metadata must be valid UTF-8') - metadata[key] = (value, timestamp.internal) - if metadata: - broker.update_metadata(metadata, validate_metadata=True) + self._update_metadata(req, broker, timestamp) if created: return HTTPCreated(request=req) else: @@ -287,12 +276,7 @@ class AccountController(BaseStorageServer): broker = self._get_account_broker(drive, part, account) if broker.is_deleted(): return self._deleted_response(broker, req, HTTPNotFound) - metadata = {} - metadata.update((key, (value, req_timestamp.internal)) - for key, value in req.headers.items() - if is_sys_or_user_meta('account', key)) - if metadata: - broker.update_metadata(metadata, validate_metadata=True) + self._update_metadata(req, broker, req_timestamp) return HTTPNoContent(request=req) def __call__(self, env, start_response): diff --git a/swift/account/utils.py b/swift/account/utils.py index c6f6e2b877..4e5ebe1add 100644 --- a/swift/account/utils.py +++ b/swift/account/utils.py @@ -18,7 +18,7 @@ import json import six from swift.common.middleware import listing_formats -from swift.common.swob import HTTPOk, HTTPNoContent +from swift.common.swob import HTTPOk, HTTPNoContent, str_to_wsgi from swift.common.utils import Timestamp from swift.common.storage_policy import POLICIES @@ -64,8 +64,8 @@ def get_response_headers(broker): for key, value in stats.items(): header_name = header_prefix % key.replace('_', '-') resp_headers[header_name] = value - resp_headers.update((key, value) - for key, (value, timestamp) in + resp_headers.update((str_to_wsgi(key), str_to_wsgi(value)) + for key, (value, _timestamp) in broker.metadata.items() if value != '') return resp_headers diff --git a/swift/container/server.py b/swift/container/server.py index e89e040dbd..0ae348d0a0 100644 --- a/swift/container/server.py +++ b/swift/container/server.py @@ -415,12 +415,11 @@ class ContainerController(BaseStorageServer): return created def _update_metadata(self, req, broker, req_timestamp, method): - metadata = {} - metadata.update( - (wsgi_to_str(key), (wsgi_to_str(value), req_timestamp.internal)) + metadata = { + wsgi_to_str(key): (wsgi_to_str(value), req_timestamp.internal) for key, value in req.headers.items() - if key.lower() in self.save_headers or - is_sys_or_user_meta('container', key)) + if key.lower() in self.save_headers + or is_sys_or_user_meta('container', key)} if metadata: if 'X-Container-Sync-To' in metadata: if 'X-Container-Sync-To' not in broker.metadata or \ @@ -706,7 +705,7 @@ class ContainerController(BaseStorageServer): def create_listing(self, req, out_content_type, info, resp_headers, metadata, container_list, container): - for key, (value, timestamp) in metadata.items(): + for key, (value, _timestamp) in metadata.items(): if value and (key.lower() in self.save_headers or is_sys_or_user_meta('container', key)): resp_headers[str_to_wsgi(key)] = str_to_wsgi(value) diff --git a/test/unit/account/test_server.py b/test/unit/account/test_server.py index a5e4d2306d..31359140c6 100644 --- a/test/unit/account/test_server.py +++ b/test/unit/account/test_server.py @@ -32,6 +32,7 @@ import xml.dom.minidom from swift import __version__ as swift_version from swift.common.swob import (Request, WsgiBytesIO, HTTPNoContent) from swift.common import constraints +from swift.account.backend import AccountBroker from swift.account.server import AccountController from swift.common.utils import (normalize_timestamp, replication, public, mkdirs, storage_directory, Timestamp) @@ -49,7 +50,8 @@ class TestAccountController(unittest.TestCase): self.testdir = os.path.join(self.testdir_base, 'account_server') mkdirs(os.path.join(self.testdir, 'sda1')) self.controller = AccountController( - {'devices': self.testdir, 'mount_check': 'false'}) + {'devices': self.testdir, 'mount_check': 'false'}, + logger=debug_logger()) def tearDown(self): """Tear down for testing swift.account.server.AccountController""" @@ -522,6 +524,51 @@ class TestAccountController(unittest.TestCase): resp = req.get_response(self.controller) self.assertEqual(resp.status_int, 202) + def test_utf8_metadata(self): + ts_str = normalize_timestamp(1) + + def get_test_meta(method, headers): + # Set metadata header + headers.setdefault('X-Timestamp', ts_str) + req = Request.blank( + '/sda1/p/a', environ={'REQUEST_METHOD': method}, + headers=headers) + resp = req.get_response(self.controller) + self.assertIn(resp.status_int, (201, 202, 204)) + db_path = os.path.join(*next( + (dir_name, file_name) + for dir_name, _, files in os.walk(self.testdir) + for file_name in files if file_name.endswith('.db') + )) + broker = AccountBroker(db_path) + # Why not use broker.metadata, you ask? Because we want to get + # as close to the on-disk format as is reasonable. + result = json.loads(broker.get_raw_metadata()) + # Clear it out for the next run + with broker.get() as conn: + conn.execute("UPDATE account_stat SET metadata=''") + conn.commit() + return result + + wsgi_str = '\xf0\x9f\x91\x8d' + uni_str = u'\U0001f44d' + + self.assertEqual( + get_test_meta('PUT', {'x-account-sysmeta-' + wsgi_str: wsgi_str}), + {u'X-Account-Sysmeta-' + uni_str: [uni_str, ts_str]}) + + self.assertEqual( + get_test_meta('PUT', {'x-account-meta-' + wsgi_str: wsgi_str}), + {u'X-Account-Meta-' + uni_str: [uni_str, ts_str]}) + + self.assertEqual( + get_test_meta('POST', {'x-account-sysmeta-' + wsgi_str: wsgi_str}), + {u'X-Account-Sysmeta-' + uni_str: [uni_str, ts_str]}) + + self.assertEqual( + get_test_meta('POST', {'x-account-meta-' + wsgi_str: wsgi_str}), + {u'X-Account-Meta-' + uni_str: [uni_str, ts_str]}) + def test_PUT_GET_metadata(self): # Set metadata header req = Request.blank(