Sync metadata in 'rsync_then_merge' in db_replicator
In previous 'rsync_then_merge' remote objects are merged with rsync'ed local objects, but remote metadata is not merged with local one. Account/Container replicator sometimes uses rsync for db sync if there is a big difference of record history in db files between 'local' and 'remote' servers. If replicator needs to rsync local db to remote but metadata in local db is older, older info of metadata can be distributed then some metadata values can be missing or go back to older. This patch fixes this problem by merging 'remote' metadata with rsync'ed local db file. Closes-Bug: #1570118 Change-Id: Icdf0a936fc456c5462471938cbc365bd012b05d4
This commit is contained in:
parent
1d09a02481
commit
843184f3fe
@ -839,6 +839,7 @@ class ReplicatorRpc(object):
|
||||
objects = existing_broker.get_items_since(point, 1000)
|
||||
sleep()
|
||||
new_broker.newid(args[0])
|
||||
new_broker.update_metadata(existing_broker.metadata)
|
||||
renamer(old_filename, db_file)
|
||||
return HTTPNoContent()
|
||||
|
||||
|
139
test/probe/test_db_replicator.py
Executable file
139
test/probe/test_db_replicator.py
Executable file
@ -0,0 +1,139 @@
|
||||
# Copyright (c) 2010-2016 OpenStack Foundation
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
# implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from unittest import main
|
||||
from uuid import uuid4
|
||||
|
||||
from swiftclient import client, ClientException
|
||||
|
||||
from test.probe.common import kill_server, ReplProbeTest, start_server
|
||||
from swift.common import direct_client, utils
|
||||
from swift.common.manager import Server
|
||||
|
||||
|
||||
class TestDbUsyncReplicator(ReplProbeTest):
|
||||
object_puts = 1 # Overridden in subclass to force rsync
|
||||
|
||||
def test_metadata_sync(self):
|
||||
# Create container
|
||||
container = 'container-%s' % uuid4()
|
||||
client.put_container(self.url, self.token, container,
|
||||
headers={'X-Storage-Policy': self.policy.name,
|
||||
'X-Container-Meta-A': '1',
|
||||
'X-Container-Meta-B': '1',
|
||||
'X-Container-Meta-C': '1'})
|
||||
|
||||
cpart, cnodes = self.container_ring.get_nodes(self.account, container)
|
||||
cnode = cnodes.pop()
|
||||
# 2 of 3 container servers are temporarily down
|
||||
for node in cnodes:
|
||||
kill_server((node['ip'], node['port']),
|
||||
self.ipport2server)
|
||||
|
||||
# Put some meta on the lone server, to make sure it's merged properly
|
||||
# This will 503 (since we don't have a quorum), but we don't care (!)
|
||||
try:
|
||||
client.post_container(self.url, self.token, container,
|
||||
headers={'X-Container-Meta-A': '2',
|
||||
'X-Container-Meta-B': '2',
|
||||
'X-Container-Meta-D': '2'})
|
||||
except ClientException:
|
||||
pass
|
||||
|
||||
# object updates come to only one container server
|
||||
for _ in range(self.object_puts):
|
||||
obj = 'object-%s' % uuid4()
|
||||
client.put_object(self.url, self.token, container, obj, 'VERIFY')
|
||||
|
||||
# 2 container servers make comeback
|
||||
for node in cnodes:
|
||||
start_server((node['ip'], node['port']),
|
||||
self.ipport2server)
|
||||
# But, container-server which got object updates is down
|
||||
kill_server((cnode['ip'], cnode['port']),
|
||||
self.ipport2server)
|
||||
|
||||
# Metadata update will be applied to 2 container servers
|
||||
# (equal to quorum)
|
||||
client.post_container(self.url, self.token, container,
|
||||
headers={'X-Container-Meta-B': '3',
|
||||
'X-Container-Meta-E': '3'})
|
||||
# container-server which got object updates makes comeback
|
||||
start_server((cnode['ip'], cnode['port']),
|
||||
self.ipport2server)
|
||||
|
||||
# other nodes have no objects
|
||||
for node in cnodes:
|
||||
resp_headers = direct_client.direct_head_container(
|
||||
node, cpart, self.account, container)
|
||||
self.assertIn(resp_headers.get('x-container-object-count'),
|
||||
(None, '0', 0))
|
||||
|
||||
# If container-replicator on the node which got the object updates
|
||||
# runs in first, db file may be replicated by rsync to other
|
||||
# containers. In that case, the db file does not information about
|
||||
# metadata, so metadata should be synced before replication
|
||||
crepl = Server('container-replicator')
|
||||
crepl.spawn(self.configs['container-replicator'][cnode['id'] + 1],
|
||||
once=True)
|
||||
crepl.interact()
|
||||
|
||||
expected_meta = {
|
||||
'x-container-meta-a': '2',
|
||||
'x-container-meta-b': '2',
|
||||
'x-container-meta-c': '1',
|
||||
'x-container-meta-d': '2',
|
||||
}
|
||||
|
||||
# node that got the object updates still doesn't have the meta
|
||||
resp_headers = direct_client.direct_head_container(
|
||||
cnode, cpart, self.account, container)
|
||||
for header, value in expected_meta.items():
|
||||
self.assertIn(header, resp_headers)
|
||||
self.assertEqual(value, resp_headers[header])
|
||||
self.assertNotIn(resp_headers.get('x-container-object-count'),
|
||||
(None, '0', 0))
|
||||
|
||||
expected_meta = {
|
||||
'x-container-meta-a': '2',
|
||||
'x-container-meta-b': '3',
|
||||
'x-container-meta-c': '1',
|
||||
'x-container-meta-d': '2',
|
||||
'x-container-meta-e': '3',
|
||||
}
|
||||
|
||||
# other nodes still have the meta, as well as objects
|
||||
for node in cnodes:
|
||||
resp_headers = direct_client.direct_head_container(
|
||||
node, cpart, self.account, container)
|
||||
for header, value in expected_meta.items():
|
||||
self.assertIn(header, resp_headers)
|
||||
self.assertEqual(value, resp_headers[header])
|
||||
self.assertNotIn(resp_headers.get('x-container-object-count'),
|
||||
(None, '0', 0))
|
||||
|
||||
|
||||
class TestDbRsyncReplicator(TestDbUsyncReplicator):
|
||||
def setUp(self):
|
||||
super(TestDbRsyncReplicator, self).setUp()
|
||||
cont_configs = [utils.readconf(p, 'container-replicator')
|
||||
for p in self.configs['container-replicator'].values()]
|
||||
# Do more than per_diff object PUTs, to force rsync instead of usync
|
||||
self.object_puts = 1 + max(int(c.get('per_diff', '1000'))
|
||||
for c in cont_configs)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
@ -32,7 +32,7 @@ from six.moves import reload_module
|
||||
from swift.container.backend import DATADIR
|
||||
from swift.common import db_replicator
|
||||
from swift.common.utils import (normalize_timestamp, hash_path,
|
||||
storage_directory)
|
||||
storage_directory, Timestamp)
|
||||
from swift.common.exceptions import DriveNotMounted
|
||||
from swift.common.swob import HTTPException
|
||||
|
||||
@ -193,6 +193,7 @@ class FakeBroker(object):
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
self.locked = False
|
||||
self.metadata = {}
|
||||
return None
|
||||
|
||||
@contextmanager
|
||||
@ -1353,8 +1354,8 @@ class TestReplToNode(unittest.TestCase):
|
||||
self.fake_info = {'id': 'a', 'point': -1, 'max_row': 20, 'hash': 'b',
|
||||
'created_at': 100, 'put_timestamp': 0,
|
||||
'delete_timestamp': 0, 'count': 0,
|
||||
'metadata': {
|
||||
'Test': ('Value', normalize_timestamp(1))}}
|
||||
'metadata': json.dumps({
|
||||
'Test': ('Value', normalize_timestamp(1))})}
|
||||
self.replicator.logger = mock.Mock()
|
||||
self.replicator._rsync_db = mock.Mock(return_value=True)
|
||||
self.replicator._usync_db = mock.Mock(return_value=True)
|
||||
@ -1398,6 +1399,18 @@ class TestReplToNode(unittest.TestCase):
|
||||
self.assertEqual(self.replicator._rsync_db.call_count, 0)
|
||||
self.assertEqual(self.replicator._usync_db.call_count, 0)
|
||||
|
||||
def test_repl_to_node_metadata_update(self):
|
||||
now = Timestamp(time.time()).internal
|
||||
rmetadata = {"X-Container-Sysmeta-Test": ("XYZ", now)}
|
||||
rinfo = {"id": 3, "point": -1, "max_row": 20, "hash": "b",
|
||||
"metadata": json.dumps(rmetadata)}
|
||||
self.http = ReplHttp(json.dumps(rinfo))
|
||||
self.broker.get_sync()
|
||||
self.assertEqual(self.replicator._repl_to_node(
|
||||
self.fake_node, self.broker, '0', self.fake_info), True)
|
||||
metadata = self.broker.metadata
|
||||
self.assertEqual({}, metadata)
|
||||
|
||||
def test_repl_to_node_not_found(self):
|
||||
self.http = ReplHttp('{"id": 3, "point": -1}', set_status=404)
|
||||
self.assertEqual(self.replicator._repl_to_node(
|
||||
@ -1614,6 +1627,76 @@ class TestReplicatorSync(unittest.TestCase):
|
||||
parts = os.listdir(part_root)
|
||||
self.assertEqual(0, len(parts))
|
||||
|
||||
def test_rsync_then_merge(self):
|
||||
# setup current db (and broker)
|
||||
broker = self._get_broker('a', 'c', node_index=0)
|
||||
part, node = self._get_broker_part_node(broker)
|
||||
part = str(part)
|
||||
put_timestamp = normalize_timestamp(time.time())
|
||||
broker.initialize(put_timestamp)
|
||||
put_metadata = {'example-meta': ['bah', put_timestamp]}
|
||||
broker.update_metadata(put_metadata)
|
||||
|
||||
# sanity (re-open, and the db keeps the metadata)
|
||||
broker = self._get_broker('a', 'c', node_index=0)
|
||||
self.assertEqual(put_metadata, broker.metadata)
|
||||
|
||||
# create rsynced db in tmp dir
|
||||
obj_hash = hash_path('a', 'c')
|
||||
rsynced_db_broker = self.backend(
|
||||
os.path.join(self.root, node['device'], 'tmp', obj_hash + '.db'),
|
||||
account='a', container='b')
|
||||
rsynced_db_broker.initialize(put_timestamp)
|
||||
|
||||
# do rysnc_then_merge
|
||||
rpc = db_replicator.ReplicatorRpc(
|
||||
self.root, self.datadir, self.backend, False)
|
||||
response = rpc.dispatch((node['device'], part, obj_hash),
|
||||
['rsync_then_merge', obj_hash + '.db', 'arg2'])
|
||||
# sanity
|
||||
self.assertEqual('204 No Content', response.status)
|
||||
self.assertEqual(204, response.status_int)
|
||||
|
||||
# re-open the db
|
||||
broker = self._get_broker('a', 'c', node_index=0)
|
||||
# keep the metadata in existing db
|
||||
self.assertEqual(put_metadata, broker.metadata)
|
||||
|
||||
def test_replicator_sync(self):
|
||||
# setup current db (and broker)
|
||||
broker = self._get_broker('a', 'c', node_index=0)
|
||||
part, node = self._get_broker_part_node(broker)
|
||||
part = str(part)
|
||||
put_timestamp = normalize_timestamp(time.time())
|
||||
broker.initialize(put_timestamp)
|
||||
put_metadata = {'example-meta': ['bah', put_timestamp]}
|
||||
sync_local_metadata = {
|
||||
"meta1": ["data1", put_timestamp],
|
||||
"meta2": ["data2", put_timestamp]}
|
||||
broker.update_metadata(put_metadata)
|
||||
|
||||
# sanity (re-open, and the db keeps the metadata)
|
||||
broker = self._get_broker('a', 'c', node_index=0)
|
||||
self.assertEqual(put_metadata, broker.metadata)
|
||||
|
||||
# do rysnc_then_merge
|
||||
rpc = db_replicator.ReplicatorRpc(
|
||||
self.root, self.datadir, ExampleBroker, False)
|
||||
response = rpc.sync(
|
||||
broker, (broker.get_sync('id_') + 1, 12345, 'id_',
|
||||
put_timestamp, put_timestamp, '0',
|
||||
json.dumps(sync_local_metadata)))
|
||||
# sanity
|
||||
self.assertEqual('200 OK', response.status)
|
||||
self.assertEqual(200, response.status_int)
|
||||
|
||||
# re-open the db
|
||||
broker = self._get_broker('a', 'c', node_index=0)
|
||||
# keep the both metadata in existing db and local db
|
||||
expected = put_metadata.copy()
|
||||
expected.update(sync_local_metadata)
|
||||
self.assertEqual(expected, broker.metadata)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
Loading…
Reference in New Issue
Block a user