py3: fix non-ascii metadata handling in account-server

Previously, we were storing the WSGI-style UTF-8-bytes-decoded-as-Latin-1
strings in the JSON field, and sending them back to eventlet directly.
If running in a mixed py2/py3 cluster, replication would eventually get
that back to the py2 server, and worse, the native-string version would
get back to the py3 server! Then on GET or HEAD, eventlet would barf
if any characters were outside the Latin-1 range.

Closes-Bug: #1837805
Change-Id: I31d942e72fd7bfbb1db4dbb1dd522dff69969e5d
This commit is contained in:
Tim Burke 2019-07-24 16:59:08 -07:00
parent d6e790d1b7
commit 71f83963c2
4 changed files with 66 additions and 36 deletions

View File

@ -21,8 +21,6 @@ from swift import gettext_ as _
from eventlet import Timeout from eventlet import Timeout
import six
import swift.common.db import swift.common.db
from swift.account.backend import AccountBroker, DATADIR from swift.account.backend import AccountBroker, DATADIR
from swift.account.utils import account_listing_response, get_response_headers from swift.account.utils import account_listing_response, get_response_headers
@ -110,6 +108,14 @@ class AccountController(BaseStorageServer):
broker.delete_db(req_timestamp.internal) broker.delete_db(req_timestamp.internal)
return self._deleted_response(broker, req, HTTPNoContent) return self._deleted_response(broker, req, HTTPNoContent)
def _update_metadata(self, req, broker, req_timestamp):
metadata = {
wsgi_to_str(key): (wsgi_to_str(value), req_timestamp.internal)
for key, value in req.headers.items()
if is_sys_or_user_meta('account', key)}
if metadata:
broker.update_metadata(metadata, validate_metadata=True)
@public @public
@timing_stats() @timing_stats()
def PUT(self, req): def PUT(self, req):
@ -169,24 +175,7 @@ class AccountController(BaseStorageServer):
broker.update_put_timestamp(timestamp.internal) broker.update_put_timestamp(timestamp.internal)
if broker.is_deleted(): if broker.is_deleted():
return HTTPConflict(request=req) return HTTPConflict(request=req)
metadata = {} self._update_metadata(req, broker, timestamp)
if six.PY2:
metadata.update((key, (value, timestamp.internal))
for key, value in req.headers.items()
if is_sys_or_user_meta('account', key))
else:
for key, value in req.headers.items():
if is_sys_or_user_meta('account', key):
# Cast to native strings, so that json inside
# updata_metadata eats the data.
try:
value = value.encode('latin-1').decode('utf-8')
except UnicodeDecodeError:
raise HTTPBadRequest(
'Metadata must be valid UTF-8')
metadata[key] = (value, timestamp.internal)
if metadata:
broker.update_metadata(metadata, validate_metadata=True)
if created: if created:
return HTTPCreated(request=req) return HTTPCreated(request=req)
else: else:
@ -287,12 +276,7 @@ class AccountController(BaseStorageServer):
broker = self._get_account_broker(drive, part, account) broker = self._get_account_broker(drive, part, account)
if broker.is_deleted(): if broker.is_deleted():
return self._deleted_response(broker, req, HTTPNotFound) return self._deleted_response(broker, req, HTTPNotFound)
metadata = {} self._update_metadata(req, broker, req_timestamp)
metadata.update((key, (value, req_timestamp.internal))
for key, value in req.headers.items()
if is_sys_or_user_meta('account', key))
if metadata:
broker.update_metadata(metadata, validate_metadata=True)
return HTTPNoContent(request=req) return HTTPNoContent(request=req)
def __call__(self, env, start_response): def __call__(self, env, start_response):

View File

@ -18,7 +18,7 @@ import json
import six import six
from swift.common.middleware import listing_formats from swift.common.middleware import listing_formats
from swift.common.swob import HTTPOk, HTTPNoContent from swift.common.swob import HTTPOk, HTTPNoContent, str_to_wsgi
from swift.common.utils import Timestamp from swift.common.utils import Timestamp
from swift.common.storage_policy import POLICIES from swift.common.storage_policy import POLICIES
@ -64,8 +64,8 @@ def get_response_headers(broker):
for key, value in stats.items(): for key, value in stats.items():
header_name = header_prefix % key.replace('_', '-') header_name = header_prefix % key.replace('_', '-')
resp_headers[header_name] = value resp_headers[header_name] = value
resp_headers.update((key, value) resp_headers.update((str_to_wsgi(key), str_to_wsgi(value))
for key, (value, timestamp) in for key, (value, _timestamp) in
broker.metadata.items() if value != '') broker.metadata.items() if value != '')
return resp_headers return resp_headers

View File

@ -415,12 +415,11 @@ class ContainerController(BaseStorageServer):
return created return created
def _update_metadata(self, req, broker, req_timestamp, method): def _update_metadata(self, req, broker, req_timestamp, method):
metadata = {} metadata = {
metadata.update( wsgi_to_str(key): (wsgi_to_str(value), req_timestamp.internal)
(wsgi_to_str(key), (wsgi_to_str(value), req_timestamp.internal))
for key, value in req.headers.items() for key, value in req.headers.items()
if key.lower() in self.save_headers or if key.lower() in self.save_headers
is_sys_or_user_meta('container', key)) or is_sys_or_user_meta('container', key)}
if metadata: if metadata:
if 'X-Container-Sync-To' in metadata: if 'X-Container-Sync-To' in metadata:
if 'X-Container-Sync-To' not in broker.metadata or \ if 'X-Container-Sync-To' not in broker.metadata or \
@ -706,7 +705,7 @@ class ContainerController(BaseStorageServer):
def create_listing(self, req, out_content_type, info, resp_headers, def create_listing(self, req, out_content_type, info, resp_headers,
metadata, container_list, container): metadata, container_list, container):
for key, (value, timestamp) in metadata.items(): for key, (value, _timestamp) in metadata.items():
if value and (key.lower() in self.save_headers or if value and (key.lower() in self.save_headers or
is_sys_or_user_meta('container', key)): is_sys_or_user_meta('container', key)):
resp_headers[str_to_wsgi(key)] = str_to_wsgi(value) resp_headers[str_to_wsgi(key)] = str_to_wsgi(value)

View File

@ -32,6 +32,7 @@ import xml.dom.minidom
from swift import __version__ as swift_version from swift import __version__ as swift_version
from swift.common.swob import (Request, WsgiBytesIO, HTTPNoContent) from swift.common.swob import (Request, WsgiBytesIO, HTTPNoContent)
from swift.common import constraints from swift.common import constraints
from swift.account.backend import AccountBroker
from swift.account.server import AccountController from swift.account.server import AccountController
from swift.common.utils import (normalize_timestamp, replication, public, from swift.common.utils import (normalize_timestamp, replication, public,
mkdirs, storage_directory, Timestamp) mkdirs, storage_directory, Timestamp)
@ -49,7 +50,8 @@ class TestAccountController(unittest.TestCase):
self.testdir = os.path.join(self.testdir_base, 'account_server') self.testdir = os.path.join(self.testdir_base, 'account_server')
mkdirs(os.path.join(self.testdir, 'sda1')) mkdirs(os.path.join(self.testdir, 'sda1'))
self.controller = AccountController( self.controller = AccountController(
{'devices': self.testdir, 'mount_check': 'false'}) {'devices': self.testdir, 'mount_check': 'false'},
logger=debug_logger())
def tearDown(self): def tearDown(self):
"""Tear down for testing swift.account.server.AccountController""" """Tear down for testing swift.account.server.AccountController"""
@ -522,6 +524,51 @@ class TestAccountController(unittest.TestCase):
resp = req.get_response(self.controller) resp = req.get_response(self.controller)
self.assertEqual(resp.status_int, 202) self.assertEqual(resp.status_int, 202)
def test_utf8_metadata(self):
ts_str = normalize_timestamp(1)
def get_test_meta(method, headers):
# Set metadata header
headers.setdefault('X-Timestamp', ts_str)
req = Request.blank(
'/sda1/p/a', environ={'REQUEST_METHOD': method},
headers=headers)
resp = req.get_response(self.controller)
self.assertIn(resp.status_int, (201, 202, 204))
db_path = os.path.join(*next(
(dir_name, file_name)
for dir_name, _, files in os.walk(self.testdir)
for file_name in files if file_name.endswith('.db')
))
broker = AccountBroker(db_path)
# Why not use broker.metadata, you ask? Because we want to get
# as close to the on-disk format as is reasonable.
result = json.loads(broker.get_raw_metadata())
# Clear it out for the next run
with broker.get() as conn:
conn.execute("UPDATE account_stat SET metadata=''")
conn.commit()
return result
wsgi_str = '\xf0\x9f\x91\x8d'
uni_str = u'\U0001f44d'
self.assertEqual(
get_test_meta('PUT', {'x-account-sysmeta-' + wsgi_str: wsgi_str}),
{u'X-Account-Sysmeta-' + uni_str: [uni_str, ts_str]})
self.assertEqual(
get_test_meta('PUT', {'x-account-meta-' + wsgi_str: wsgi_str}),
{u'X-Account-Meta-' + uni_str: [uni_str, ts_str]})
self.assertEqual(
get_test_meta('POST', {'x-account-sysmeta-' + wsgi_str: wsgi_str}),
{u'X-Account-Sysmeta-' + uni_str: [uni_str, ts_str]})
self.assertEqual(
get_test_meta('POST', {'x-account-meta-' + wsgi_str: wsgi_str}),
{u'X-Account-Meta-' + uni_str: [uni_str, ts_str]})
def test_PUT_GET_metadata(self): def test_PUT_GET_metadata(self):
# Set metadata header # Set metadata header
req = Request.blank( req = Request.blank(