From 843184f3fea18db367e40cb5f46fc0cb8f2432f6 Mon Sep 17 00:00:00 2001 From: Daisuke Morita Date: Wed, 6 Apr 2016 19:02:28 -0700 Subject: [PATCH] Sync metadata in 'rsync_then_merge' in db_replicator In previous 'rsync_then_merge' remote objects are merged with rsync'ed local objects, but remote metadata is not merged with local one. Account/Container replicator sometimes uses rsync for db sync if there is a big difference of record history in db files between 'local' and 'remote' servers. If replicator needs to rsync local db to remote but metadata in local db is older, older info of metadata can be distributed then some metadata values can be missing or go back to older. This patch fixes this problem by merging 'remote' metadata with rsync'ed local db file. Closes-Bug: #1570118 Change-Id: Icdf0a936fc456c5462471938cbc365bd012b05d4 --- swift/common/db_replicator.py | 1 + test/probe/test_db_replicator.py | 139 +++++++++++++++++++++++++ test/unit/common/test_db_replicator.py | 89 +++++++++++++++- 3 files changed, 226 insertions(+), 3 deletions(-) create mode 100755 test/probe/test_db_replicator.py diff --git a/swift/common/db_replicator.py b/swift/common/db_replicator.py index 30c6a35c12..4ac771bd01 100644 --- a/swift/common/db_replicator.py +++ b/swift/common/db_replicator.py @@ -839,6 +839,7 @@ class ReplicatorRpc(object): objects = existing_broker.get_items_since(point, 1000) sleep() new_broker.newid(args[0]) + new_broker.update_metadata(existing_broker.metadata) renamer(old_filename, db_file) return HTTPNoContent() diff --git a/test/probe/test_db_replicator.py b/test/probe/test_db_replicator.py new file mode 100755 index 0000000000..8bf66aef9a --- /dev/null +++ b/test/probe/test_db_replicator.py @@ -0,0 +1,139 @@ +# Copyright (c) 2010-2016 OpenStack Foundation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from unittest import main +from uuid import uuid4 + +from swiftclient import client, ClientException + +from test.probe.common import kill_server, ReplProbeTest, start_server +from swift.common import direct_client, utils +from swift.common.manager import Server + + +class TestDbUsyncReplicator(ReplProbeTest): + object_puts = 1 # Overridden in subclass to force rsync + + def test_metadata_sync(self): + # Create container + container = 'container-%s' % uuid4() + client.put_container(self.url, self.token, container, + headers={'X-Storage-Policy': self.policy.name, + 'X-Container-Meta-A': '1', + 'X-Container-Meta-B': '1', + 'X-Container-Meta-C': '1'}) + + cpart, cnodes = self.container_ring.get_nodes(self.account, container) + cnode = cnodes.pop() + # 2 of 3 container servers are temporarily down + for node in cnodes: + kill_server((node['ip'], node['port']), + self.ipport2server) + + # Put some meta on the lone server, to make sure it's merged properly + # This will 503 (since we don't have a quorum), but we don't care (!) + try: + client.post_container(self.url, self.token, container, + headers={'X-Container-Meta-A': '2', + 'X-Container-Meta-B': '2', + 'X-Container-Meta-D': '2'}) + except ClientException: + pass + + # object updates come to only one container server + for _ in range(self.object_puts): + obj = 'object-%s' % uuid4() + client.put_object(self.url, self.token, container, obj, 'VERIFY') + + # 2 container servers make comeback + for node in cnodes: + start_server((node['ip'], node['port']), + self.ipport2server) + # But, container-server which got object updates is down + kill_server((cnode['ip'], cnode['port']), + self.ipport2server) + + # Metadata update will be applied to 2 container servers + # (equal to quorum) + client.post_container(self.url, self.token, container, + headers={'X-Container-Meta-B': '3', + 'X-Container-Meta-E': '3'}) + # container-server which got object updates makes comeback + start_server((cnode['ip'], cnode['port']), + self.ipport2server) + + # other nodes have no objects + for node in cnodes: + resp_headers = direct_client.direct_head_container( + node, cpart, self.account, container) + self.assertIn(resp_headers.get('x-container-object-count'), + (None, '0', 0)) + + # If container-replicator on the node which got the object updates + # runs in first, db file may be replicated by rsync to other + # containers. In that case, the db file does not information about + # metadata, so metadata should be synced before replication + crepl = Server('container-replicator') + crepl.spawn(self.configs['container-replicator'][cnode['id'] + 1], + once=True) + crepl.interact() + + expected_meta = { + 'x-container-meta-a': '2', + 'x-container-meta-b': '2', + 'x-container-meta-c': '1', + 'x-container-meta-d': '2', + } + + # node that got the object updates still doesn't have the meta + resp_headers = direct_client.direct_head_container( + cnode, cpart, self.account, container) + for header, value in expected_meta.items(): + self.assertIn(header, resp_headers) + self.assertEqual(value, resp_headers[header]) + self.assertNotIn(resp_headers.get('x-container-object-count'), + (None, '0', 0)) + + expected_meta = { + 'x-container-meta-a': '2', + 'x-container-meta-b': '3', + 'x-container-meta-c': '1', + 'x-container-meta-d': '2', + 'x-container-meta-e': '3', + } + + # other nodes still have the meta, as well as objects + for node in cnodes: + resp_headers = direct_client.direct_head_container( + node, cpart, self.account, container) + for header, value in expected_meta.items(): + self.assertIn(header, resp_headers) + self.assertEqual(value, resp_headers[header]) + self.assertNotIn(resp_headers.get('x-container-object-count'), + (None, '0', 0)) + + +class TestDbRsyncReplicator(TestDbUsyncReplicator): + def setUp(self): + super(TestDbRsyncReplicator, self).setUp() + cont_configs = [utils.readconf(p, 'container-replicator') + for p in self.configs['container-replicator'].values()] + # Do more than per_diff object PUTs, to force rsync instead of usync + self.object_puts = 1 + max(int(c.get('per_diff', '1000')) + for c in cont_configs) + + +if __name__ == '__main__': + main() diff --git a/test/unit/common/test_db_replicator.py b/test/unit/common/test_db_replicator.py index 50f83c2d70..ba90e8e6d2 100644 --- a/test/unit/common/test_db_replicator.py +++ b/test/unit/common/test_db_replicator.py @@ -32,7 +32,7 @@ from six.moves import reload_module from swift.container.backend import DATADIR from swift.common import db_replicator from swift.common.utils import (normalize_timestamp, hash_path, - storage_directory) + storage_directory, Timestamp) from swift.common.exceptions import DriveNotMounted from swift.common.swob import HTTPException @@ -193,6 +193,7 @@ class FakeBroker(object): def __init__(self, *args, **kwargs): self.locked = False + self.metadata = {} return None @contextmanager @@ -1353,8 +1354,8 @@ class TestReplToNode(unittest.TestCase): self.fake_info = {'id': 'a', 'point': -1, 'max_row': 20, 'hash': 'b', 'created_at': 100, 'put_timestamp': 0, 'delete_timestamp': 0, 'count': 0, - 'metadata': { - 'Test': ('Value', normalize_timestamp(1))}} + 'metadata': json.dumps({ + 'Test': ('Value', normalize_timestamp(1))})} self.replicator.logger = mock.Mock() self.replicator._rsync_db = mock.Mock(return_value=True) self.replicator._usync_db = mock.Mock(return_value=True) @@ -1398,6 +1399,18 @@ class TestReplToNode(unittest.TestCase): self.assertEqual(self.replicator._rsync_db.call_count, 0) self.assertEqual(self.replicator._usync_db.call_count, 0) + def test_repl_to_node_metadata_update(self): + now = Timestamp(time.time()).internal + rmetadata = {"X-Container-Sysmeta-Test": ("XYZ", now)} + rinfo = {"id": 3, "point": -1, "max_row": 20, "hash": "b", + "metadata": json.dumps(rmetadata)} + self.http = ReplHttp(json.dumps(rinfo)) + self.broker.get_sync() + self.assertEqual(self.replicator._repl_to_node( + self.fake_node, self.broker, '0', self.fake_info), True) + metadata = self.broker.metadata + self.assertEqual({}, metadata) + def test_repl_to_node_not_found(self): self.http = ReplHttp('{"id": 3, "point": -1}', set_status=404) self.assertEqual(self.replicator._repl_to_node( @@ -1614,6 +1627,76 @@ class TestReplicatorSync(unittest.TestCase): parts = os.listdir(part_root) self.assertEqual(0, len(parts)) + def test_rsync_then_merge(self): + # setup current db (and broker) + broker = self._get_broker('a', 'c', node_index=0) + part, node = self._get_broker_part_node(broker) + part = str(part) + put_timestamp = normalize_timestamp(time.time()) + broker.initialize(put_timestamp) + put_metadata = {'example-meta': ['bah', put_timestamp]} + broker.update_metadata(put_metadata) + + # sanity (re-open, and the db keeps the metadata) + broker = self._get_broker('a', 'c', node_index=0) + self.assertEqual(put_metadata, broker.metadata) + + # create rsynced db in tmp dir + obj_hash = hash_path('a', 'c') + rsynced_db_broker = self.backend( + os.path.join(self.root, node['device'], 'tmp', obj_hash + '.db'), + account='a', container='b') + rsynced_db_broker.initialize(put_timestamp) + + # do rysnc_then_merge + rpc = db_replicator.ReplicatorRpc( + self.root, self.datadir, self.backend, False) + response = rpc.dispatch((node['device'], part, obj_hash), + ['rsync_then_merge', obj_hash + '.db', 'arg2']) + # sanity + self.assertEqual('204 No Content', response.status) + self.assertEqual(204, response.status_int) + + # re-open the db + broker = self._get_broker('a', 'c', node_index=0) + # keep the metadata in existing db + self.assertEqual(put_metadata, broker.metadata) + + def test_replicator_sync(self): + # setup current db (and broker) + broker = self._get_broker('a', 'c', node_index=0) + part, node = self._get_broker_part_node(broker) + part = str(part) + put_timestamp = normalize_timestamp(time.time()) + broker.initialize(put_timestamp) + put_metadata = {'example-meta': ['bah', put_timestamp]} + sync_local_metadata = { + "meta1": ["data1", put_timestamp], + "meta2": ["data2", put_timestamp]} + broker.update_metadata(put_metadata) + + # sanity (re-open, and the db keeps the metadata) + broker = self._get_broker('a', 'c', node_index=0) + self.assertEqual(put_metadata, broker.metadata) + + # do rysnc_then_merge + rpc = db_replicator.ReplicatorRpc( + self.root, self.datadir, ExampleBroker, False) + response = rpc.sync( + broker, (broker.get_sync('id_') + 1, 12345, 'id_', + put_timestamp, put_timestamp, '0', + json.dumps(sync_local_metadata))) + # sanity + self.assertEqual('200 OK', response.status) + self.assertEqual(200, response.status_int) + + # re-open the db + broker = self._get_broker('a', 'c', node_index=0) + # keep the both metadata in existing db and local db + expected = put_metadata.copy() + expected.update(sync_local_metadata) + self.assertEqual(expected, broker.metadata) + if __name__ == '__main__': unittest.main()