From 6f89f71f9ba11f7b4126b80076a0f13792332e1e Mon Sep 17 00:00:00 2001 From: Alistair Coles Date: Wed, 15 Apr 2015 16:17:23 +0100 Subject: [PATCH] Filter Etag key from ssync replication-headers ssync rx sends a header X-Backend-Replication-Headers whose value is a list of headers that the source object has. This list extends the list of allowed headers on the target object server, so that the target object metadata is faithfully reconstructed to match the source. Unfortunately the combination of lower() and title() operations on header keys results in the source 'ETag' value being added to the target metadata under the key 'Etag' in addition to the 'ETag' key that the receiving server adds (note different capitilization), both having the same value. The spurious 'Etag' metadata is potentially confusing for humans inspecting the object metadata and complicates tests that wish to assert the equality of two object metadata dicts. See for example the test in test_ssync_sender.py that this patch cleans up. Furthermore, the possibility of having both Etag and ETag keys has required a workaround in the EC reconstructor [1]. [1] reconstructor fix change id: Ie59ad93a67a7f439c9a84cd9cff31540f97f334a Change-Id: I0c89cf7924a4471bb6d268b5ef3884e2d2cb4286 --- swift/obj/ssync_receiver.py | 6 ++- test/unit/obj/test_server.py | 75 ++++++++++++++++++++++++++++ test/unit/obj/test_ssync_receiver.py | 65 +++++++++++++++++++++++- test/unit/obj/test_ssync_sender.py | 3 -- 4 files changed, 144 insertions(+), 5 deletions(-) diff --git a/swift/obj/ssync_receiver.py b/swift/obj/ssync_receiver.py index 6aeb4c401f..8bff25c8ba 100644 --- a/swift/obj/ssync_receiver.py +++ b/swift/obj/ssync_receiver.py @@ -319,7 +319,11 @@ class Receiver(object): header = header.strip().lower() value = value.strip() subreq.headers[header] = value - replication_headers.append(header) + if header != 'etag': + # make sure ssync doesn't cause 'Etag' to be added to + # obj metadata in addition to 'ETag' which object server + # sets (note capitalization) + replication_headers.append(header) if header == 'content-length': content_length = int(value) # Establish subrequest body, if needed. diff --git a/test/unit/obj/test_server.py b/test/unit/obj/test_server.py index df3dc3d6ab..9301d9f8d6 100755 --- a/test/unit/obj/test_server.py +++ b/test/unit/obj/test_server.py @@ -1197,6 +1197,81 @@ class TestObjectController(unittest.TestCase): resp = req.get_response(self.object_controller) check_response(resp) + def test_PUT_with_replication_headers(self): + # check that otherwise disallowed headers are accepted when specified + # by X-Backend-Replication-Headers + + # first PUT object + timestamp1 = normalize_timestamp(time()) + req = Request.blank( + '/sda1/p/a/c/o', environ={'REQUEST_METHOD': 'PUT'}, + headers={'X-Timestamp': timestamp1, + 'Content-Type': 'text/plain', + 'Content-Length': '14', + 'Etag': '1000d172764c9dbc3a5798a67ec5bb76', + 'Custom-Header': 'custom1', + 'X-Object-Meta-1': 'meta1', + 'X-Static-Large-Object': 'False'}) + req.body = 'VERIFY SYSMETA' + + # restrict set of allowed headers on this server + with mock.patch.object(self.object_controller, 'allowed_headers', + ['Custom-Header']): + resp = req.get_response(self.object_controller) + self.assertEquals(resp.status_int, 201) + + objfile = os.path.join( + self.testdir, 'sda1', + storage_directory(diskfile.get_data_dir(0), 'p', + hash_path('a', 'c', 'o')), + timestamp1 + '.data') + # X-Static-Large-Object is disallowed. + self.assertEquals(diskfile.read_metadata(objfile), + {'X-Timestamp': timestamp1, + 'Content-Type': 'text/plain', + 'Content-Length': '14', + 'ETag': '1000d172764c9dbc3a5798a67ec5bb76', + 'name': '/a/c/o', + 'Custom-Header': 'custom1', + 'X-Object-Meta-1': 'meta1'}) + + # PUT object again with X-Backend-Replication-Headers + timestamp2 = normalize_timestamp(time()) + req = Request.blank( + '/sda1/p/a/c/o', environ={'REQUEST_METHOD': 'PUT'}, + headers={'X-Timestamp': timestamp2, + 'Content-Type': 'text/plain', + 'Content-Length': '14', + 'Etag': '1000d172764c9dbc3a5798a67ec5bb76', + 'Custom-Header': 'custom1', + 'X-Object-Meta-1': 'meta1', + 'X-Static-Large-Object': 'False', + 'X-Backend-Replication-Headers': + 'X-Static-Large-Object'}) + req.body = 'VERIFY SYSMETA' + + with mock.patch.object(self.object_controller, 'allowed_headers', + ['Custom-Header']): + resp = req.get_response(self.object_controller) + self.assertEquals(resp.status_int, 201) + + objfile = os.path.join( + self.testdir, 'sda1', + storage_directory(diskfile.get_data_dir(0), 'p', + hash_path('a', 'c', 'o')), + timestamp2 + '.data') + # X-Static-Large-Object should be copied since it is now allowed by + # replication headers. + self.assertEquals(diskfile.read_metadata(objfile), + {'X-Timestamp': timestamp2, + 'Content-Type': 'text/plain', + 'Content-Length': '14', + 'ETag': '1000d172764c9dbc3a5798a67ec5bb76', + 'name': '/a/c/o', + 'Custom-Header': 'custom1', + 'X-Object-Meta-1': 'meta1', + 'X-Static-Large-Object': 'False'}) + def test_PUT_container_connection(self): def mock_http_connect(response, with_exc=False): diff --git a/test/unit/obj/test_ssync_receiver.py b/test/unit/obj/test_ssync_receiver.py index f2341fc7a0..30a47a662b 100644 --- a/test/unit/obj/test_ssync_receiver.py +++ b/test/unit/obj/test_ssync_receiver.py @@ -1190,6 +1190,7 @@ class TestReceiver(unittest.TestCase): ':UPDATES: START\r\n' 'PUT /a/c/o\r\n' 'Content-Length: 1\r\n' + 'Etag: c4ca4238a0b923820dcc509a6f75849b\r\n' 'X-Timestamp: 1364456113.12344\r\n' 'X-Object-Meta-Test1: one\r\n' 'Content-Encoding: gzip\r\n' @@ -1209,6 +1210,7 @@ class TestReceiver(unittest.TestCase): self.assertEqual(req.path, '/device/partition/a/c/o') self.assertEqual(req.content_length, 1) self.assertEqual(req.headers, { + 'Etag': 'c4ca4238a0b923820dcc509a6f75849b', 'Content-Length': '1', 'X-Timestamp': '1364456113.12344', 'X-Object-Meta-Test1': 'one', @@ -1220,7 +1222,68 @@ class TestReceiver(unittest.TestCase): 'X-Backend-Replication-Headers': ( 'content-length x-timestamp x-object-meta-test1 ' 'content-encoding specialty-header')}) - self.assertEqual(req.read_body, '1') + + def test_UPDATES_PUT_replication_headers(self): + self.controller.logger = mock.MagicMock() + + # sanity check - regular PUT will not persist Specialty-Header + req = swob.Request.blank( + '/sda1/0/a/c/o1', body='1', + environ={'REQUEST_METHOD': 'PUT'}, + headers={'Content-Length': '1', + 'Content-Type': 'text/plain', + 'Etag': 'c4ca4238a0b923820dcc509a6f75849b', + 'X-Timestamp': '1364456113.12344', + 'X-Object-Meta-Test1': 'one', + 'Content-Encoding': 'gzip', + 'Specialty-Header': 'value'}) + resp = req.get_response(self.controller) + self.assertEqual(resp.status_int, 201) + df = self.controller.get_diskfile( + 'sda1', '0', 'a', 'c', 'o1', POLICIES.default) + df.open() + self.assertFalse('Specialty-Header' in df.get_metadata()) + + # an SSYNC request can override PUT header filtering... + req = swob.Request.blank( + '/sda1/0', + environ={'REQUEST_METHOD': 'SSYNC'}, + body=':MISSING_CHECK: START\r\n:MISSING_CHECK: END\r\n' + ':UPDATES: START\r\n' + 'PUT /a/c/o2\r\n' + 'Content-Length: 1\r\n' + 'Content-Type: text/plain\r\n' + 'Etag: c4ca4238a0b923820dcc509a6f75849b\r\n' + 'X-Timestamp: 1364456113.12344\r\n' + 'X-Object-Meta-Test1: one\r\n' + 'Content-Encoding: gzip\r\n' + 'Specialty-Header: value\r\n' + '\r\n' + '1') + resp = req.get_response(self.controller) + self.assertEqual( + self.body_lines(resp.body), + [':MISSING_CHECK: START', ':MISSING_CHECK: END', + ':UPDATES: START', ':UPDATES: END']) + self.assertEqual(resp.status_int, 200) + + # verify diskfile has metadata permitted by replication headers + # including Specialty-Header + df = self.controller.get_diskfile( + 'sda1', '0', 'a', 'c', 'o2', POLICIES.default) + df.open() + for chunk in df.reader(): + self.assertEqual('1', chunk) + expected = {'ETag': 'c4ca4238a0b923820dcc509a6f75849b', + 'Content-Length': '1', + 'Content-Type': 'text/plain', + 'X-Timestamp': '1364456113.12344', + 'X-Object-Meta-Test1': 'one', + 'Content-Encoding': 'gzip', + 'Specialty-Header': 'value', + 'name': '/a/c/o2'} + actual = df.get_metadata() + self.assertEqual(expected, actual) def test_UPDATES_with_storage_policy(self): # update router post policy patch diff --git a/test/unit/obj/test_ssync_sender.py b/test/unit/obj/test_ssync_sender.py index dde96121fe..20960e83f1 100644 --- a/test/unit/obj/test_ssync_sender.py +++ b/test/unit/obj/test_ssync_sender.py @@ -1453,9 +1453,6 @@ class TestBaseSsync(BaseTestSender): continue else: self.assertEqual(v, rx_metadata.pop(k), k) - # ugh, ssync duplicates ETag with Etag so have to clear it out here - if 'Etag' in rx_metadata: - rx_metadata.pop('Etag') self.assertFalse(rx_metadata) expected_body = '%s___%s' % (tx_df._name, frag_index) actual_body = ''.join([chunk for chunk in rx_df.reader()])