Filter Etag key from ssync replication-headers

ssync rx sends a header X-Backend-Replication-Headers whose value is a
list of headers that the source object has. This list extends the list
of allowed headers on the target object server, so that the target
object metadata is faithfully reconstructed to match the source.

Unfortunately the combination of lower() and title() operations on
header keys results in the source 'ETag' value being added to the target
metadata under the key 'Etag' in addition to the 'ETag' key that the
receiving server adds (note different capitilization), both having
the same value.

The spurious 'Etag' metadata is potentially confusing for humans
inspecting the object metadata and complicates tests that wish to
assert the equality of two object metadata dicts. See for example the
test in test_ssync_sender.py that this patch cleans up.

Furthermore, the possibility of having both Etag and ETag keys has
required a workaround in the EC reconstructor [1].

[1] reconstructor fix change id: Ie59ad93a67a7f439c9a84cd9cff31540f97f334a

Change-Id: I0c89cf7924a4471bb6d268b5ef3884e2d2cb4286
This commit is contained in:
Alistair Coles 2015-04-15 16:17:23 +01:00 committed by Clay Gerrard
parent 2d39102b66
commit 6f89f71f9b
4 changed files with 144 additions and 5 deletions

View File

@ -319,7 +319,11 @@ class Receiver(object):
header = header.strip().lower()
value = value.strip()
subreq.headers[header] = value
replication_headers.append(header)
if header != 'etag':
# make sure ssync doesn't cause 'Etag' to be added to
# obj metadata in addition to 'ETag' which object server
# sets (note capitalization)
replication_headers.append(header)
if header == 'content-length':
content_length = int(value)
# Establish subrequest body, if needed.

View File

@ -1197,6 +1197,81 @@ class TestObjectController(unittest.TestCase):
resp = req.get_response(self.object_controller)
check_response(resp)
def test_PUT_with_replication_headers(self):
# check that otherwise disallowed headers are accepted when specified
# by X-Backend-Replication-Headers
# first PUT object
timestamp1 = normalize_timestamp(time())
req = Request.blank(
'/sda1/p/a/c/o', environ={'REQUEST_METHOD': 'PUT'},
headers={'X-Timestamp': timestamp1,
'Content-Type': 'text/plain',
'Content-Length': '14',
'Etag': '1000d172764c9dbc3a5798a67ec5bb76',
'Custom-Header': 'custom1',
'X-Object-Meta-1': 'meta1',
'X-Static-Large-Object': 'False'})
req.body = 'VERIFY SYSMETA'
# restrict set of allowed headers on this server
with mock.patch.object(self.object_controller, 'allowed_headers',
['Custom-Header']):
resp = req.get_response(self.object_controller)
self.assertEquals(resp.status_int, 201)
objfile = os.path.join(
self.testdir, 'sda1',
storage_directory(diskfile.get_data_dir(0), 'p',
hash_path('a', 'c', 'o')),
timestamp1 + '.data')
# X-Static-Large-Object is disallowed.
self.assertEquals(diskfile.read_metadata(objfile),
{'X-Timestamp': timestamp1,
'Content-Type': 'text/plain',
'Content-Length': '14',
'ETag': '1000d172764c9dbc3a5798a67ec5bb76',
'name': '/a/c/o',
'Custom-Header': 'custom1',
'X-Object-Meta-1': 'meta1'})
# PUT object again with X-Backend-Replication-Headers
timestamp2 = normalize_timestamp(time())
req = Request.blank(
'/sda1/p/a/c/o', environ={'REQUEST_METHOD': 'PUT'},
headers={'X-Timestamp': timestamp2,
'Content-Type': 'text/plain',
'Content-Length': '14',
'Etag': '1000d172764c9dbc3a5798a67ec5bb76',
'Custom-Header': 'custom1',
'X-Object-Meta-1': 'meta1',
'X-Static-Large-Object': 'False',
'X-Backend-Replication-Headers':
'X-Static-Large-Object'})
req.body = 'VERIFY SYSMETA'
with mock.patch.object(self.object_controller, 'allowed_headers',
['Custom-Header']):
resp = req.get_response(self.object_controller)
self.assertEquals(resp.status_int, 201)
objfile = os.path.join(
self.testdir, 'sda1',
storage_directory(diskfile.get_data_dir(0), 'p',
hash_path('a', 'c', 'o')),
timestamp2 + '.data')
# X-Static-Large-Object should be copied since it is now allowed by
# replication headers.
self.assertEquals(diskfile.read_metadata(objfile),
{'X-Timestamp': timestamp2,
'Content-Type': 'text/plain',
'Content-Length': '14',
'ETag': '1000d172764c9dbc3a5798a67ec5bb76',
'name': '/a/c/o',
'Custom-Header': 'custom1',
'X-Object-Meta-1': 'meta1',
'X-Static-Large-Object': 'False'})
def test_PUT_container_connection(self):
def mock_http_connect(response, with_exc=False):

View File

@ -1190,6 +1190,7 @@ class TestReceiver(unittest.TestCase):
':UPDATES: START\r\n'
'PUT /a/c/o\r\n'
'Content-Length: 1\r\n'
'Etag: c4ca4238a0b923820dcc509a6f75849b\r\n'
'X-Timestamp: 1364456113.12344\r\n'
'X-Object-Meta-Test1: one\r\n'
'Content-Encoding: gzip\r\n'
@ -1209,6 +1210,7 @@ class TestReceiver(unittest.TestCase):
self.assertEqual(req.path, '/device/partition/a/c/o')
self.assertEqual(req.content_length, 1)
self.assertEqual(req.headers, {
'Etag': 'c4ca4238a0b923820dcc509a6f75849b',
'Content-Length': '1',
'X-Timestamp': '1364456113.12344',
'X-Object-Meta-Test1': 'one',
@ -1220,7 +1222,68 @@ class TestReceiver(unittest.TestCase):
'X-Backend-Replication-Headers': (
'content-length x-timestamp x-object-meta-test1 '
'content-encoding specialty-header')})
self.assertEqual(req.read_body, '1')
def test_UPDATES_PUT_replication_headers(self):
self.controller.logger = mock.MagicMock()
# sanity check - regular PUT will not persist Specialty-Header
req = swob.Request.blank(
'/sda1/0/a/c/o1', body='1',
environ={'REQUEST_METHOD': 'PUT'},
headers={'Content-Length': '1',
'Content-Type': 'text/plain',
'Etag': 'c4ca4238a0b923820dcc509a6f75849b',
'X-Timestamp': '1364456113.12344',
'X-Object-Meta-Test1': 'one',
'Content-Encoding': 'gzip',
'Specialty-Header': 'value'})
resp = req.get_response(self.controller)
self.assertEqual(resp.status_int, 201)
df = self.controller.get_diskfile(
'sda1', '0', 'a', 'c', 'o1', POLICIES.default)
df.open()
self.assertFalse('Specialty-Header' in df.get_metadata())
# an SSYNC request can override PUT header filtering...
req = swob.Request.blank(
'/sda1/0',
environ={'REQUEST_METHOD': 'SSYNC'},
body=':MISSING_CHECK: START\r\n:MISSING_CHECK: END\r\n'
':UPDATES: START\r\n'
'PUT /a/c/o2\r\n'
'Content-Length: 1\r\n'
'Content-Type: text/plain\r\n'
'Etag: c4ca4238a0b923820dcc509a6f75849b\r\n'
'X-Timestamp: 1364456113.12344\r\n'
'X-Object-Meta-Test1: one\r\n'
'Content-Encoding: gzip\r\n'
'Specialty-Header: value\r\n'
'\r\n'
'1')
resp = req.get_response(self.controller)
self.assertEqual(
self.body_lines(resp.body),
[':MISSING_CHECK: START', ':MISSING_CHECK: END',
':UPDATES: START', ':UPDATES: END'])
self.assertEqual(resp.status_int, 200)
# verify diskfile has metadata permitted by replication headers
# including Specialty-Header
df = self.controller.get_diskfile(
'sda1', '0', 'a', 'c', 'o2', POLICIES.default)
df.open()
for chunk in df.reader():
self.assertEqual('1', chunk)
expected = {'ETag': 'c4ca4238a0b923820dcc509a6f75849b',
'Content-Length': '1',
'Content-Type': 'text/plain',
'X-Timestamp': '1364456113.12344',
'X-Object-Meta-Test1': 'one',
'Content-Encoding': 'gzip',
'Specialty-Header': 'value',
'name': '/a/c/o2'}
actual = df.get_metadata()
self.assertEqual(expected, actual)
def test_UPDATES_with_storage_policy(self):
# update router post policy patch

View File

@ -1453,9 +1453,6 @@ class TestBaseSsync(BaseTestSender):
continue
else:
self.assertEqual(v, rx_metadata.pop(k), k)
# ugh, ssync duplicates ETag with Etag so have to clear it out here
if 'Etag' in rx_metadata:
rx_metadata.pop('Etag')
self.assertFalse(rx_metadata)
expected_body = '%s___%s' % (tx_df._name, frag_index)
actual_body = ''.join([chunk for chunk in rx_df.reader()])