swift/test/unit/container/test_replicator.py
Tim Burke f6a436aeda Only try to fetch or sync shard ranges if the remote supports sharding
Change-Id: I7231e8af310e268484f2075f0194b7783cf1c3ea
2018-06-14 16:58:29 -07:00

2375 lines
110 KiB
Python

# Copyright (c) 2010-2012 OpenStack Foundation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import time
import shutil
import itertools
import unittest
import mock
import random
import sqlite3
from swift.common import db_replicator
from swift.common.swob import HTTPServerError
from swift.container import replicator, backend, server, sync_store
from swift.container.reconciler import (
MISPLACED_OBJECTS_ACCOUNT, get_reconciler_container_name)
from swift.common.utils import Timestamp, encode_timestamps, ShardRange, \
get_db_files, make_db_file_path
from swift.common.storage_policy import POLICIES
from test.unit.common import test_db_replicator
from test.unit import patch_policies, make_timestamp_iter, mock_check_drive, \
debug_logger, EMPTY_ETAG, FakeLogger
from contextlib import contextmanager
from test.unit.common.test_db_replicator import attach_fake_replication_rpc
@patch_policies
class TestReplicatorSync(test_db_replicator.TestReplicatorSync):
backend = backend.ContainerBroker
datadir = server.DATADIR
replicator_daemon = replicator.ContainerReplicator
replicator_rpc = replicator.ContainerReplicatorRpc
def assertShardRangesEqual(self, x, y):
# ShardRange.__eq__ only compares lower and upper; here we generate
# dict representations to compare all attributes
self.assertEqual([dict(sr) for sr in x], [dict(sr) for sr in y])
def assertShardRangesNotEqual(self, x, y):
# ShardRange.__eq__ only compares lower and upper; here we generate
# dict representations to compare all attributes
self.assertNotEqual([dict(sr) for sr in x], [dict(sr) for sr in y])
def test_report_up_to_date(self):
broker = self._get_broker('a', 'c', node_index=0)
broker.initialize(Timestamp(1).internal, int(POLICIES.default))
info = broker.get_info()
broker.reported(info['put_timestamp'],
info['delete_timestamp'],
info['object_count'],
info['bytes_used'])
full_info = broker.get_replication_info()
expected_info = {'put_timestamp': Timestamp(1).internal,
'delete_timestamp': '0',
'count': 0,
'bytes_used': 0,
'reported_put_timestamp': Timestamp(1).internal,
'reported_delete_timestamp': '0',
'reported_object_count': 0,
'reported_bytes_used': 0}
for key, value in expected_info.items():
msg = 'expected value for %r, %r != %r' % (
key, full_info[key], value)
self.assertEqual(full_info[key], value, msg)
repl = replicator.ContainerReplicator({})
self.assertTrue(repl.report_up_to_date(full_info))
full_info['delete_timestamp'] = Timestamp(2).internal
self.assertFalse(repl.report_up_to_date(full_info))
full_info['reported_delete_timestamp'] = Timestamp(2).internal
self.assertTrue(repl.report_up_to_date(full_info))
full_info['count'] = 1
self.assertFalse(repl.report_up_to_date(full_info))
full_info['reported_object_count'] = 1
self.assertTrue(repl.report_up_to_date(full_info))
full_info['bytes_used'] = 1
self.assertFalse(repl.report_up_to_date(full_info))
full_info['reported_bytes_used'] = 1
self.assertTrue(repl.report_up_to_date(full_info))
full_info['put_timestamp'] = Timestamp(3).internal
self.assertFalse(repl.report_up_to_date(full_info))
full_info['reported_put_timestamp'] = Timestamp(3).internal
self.assertTrue(repl.report_up_to_date(full_info))
def test_sync_remote_in_sync(self):
# setup a local container
broker = self._get_broker('a', 'c', node_index=0)
put_timestamp = time.time()
broker.initialize(put_timestamp, POLICIES.default.idx)
# "replicate" to same database
node = {'device': 'sdb', 'replication_ip': '127.0.0.1'}
daemon = replicator.ContainerReplicator({})
# replicate
part, node = self._get_broker_part_node(broker)
info = broker.get_replication_info()
success = daemon._repl_to_node(node, broker, part, info)
# nothing to do
self.assertTrue(success)
self.assertEqual(1, daemon.stats['no_change'])
def test_sync_remote_with_timings(self):
ts_iter = make_timestamp_iter()
# setup a local container
broker = self._get_broker('a', 'c', node_index=0)
put_timestamp = next(ts_iter)
broker.initialize(put_timestamp.internal, POLICIES.default.idx)
broker.update_metadata(
{'x-container-meta-test': ('foo', put_timestamp.internal)})
# setup remote container
remote_broker = self._get_broker('a', 'c', node_index=1)
remote_broker.initialize(next(ts_iter).internal, POLICIES.default.idx)
timestamp = next(ts_iter)
for db in (broker, remote_broker):
db.put_object(
'/a/c/o', timestamp.internal, 0, 'content-type', 'etag',
storage_policy_index=db.storage_policy_index)
# replicate
daemon = replicator.ContainerReplicator({})
part, node = self._get_broker_part_node(remote_broker)
info = broker.get_replication_info()
with mock.patch.object(db_replicator, 'DEBUG_TIMINGS_THRESHOLD', -1):
success = daemon._repl_to_node(node, broker, part, info)
# nothing to do
self.assertTrue(success)
self.assertEqual(1, daemon.stats['no_change'])
expected_timings = ('info', 'update_metadata', 'merge_timestamps',
'get_sync', 'merge_syncs')
debug_lines = self.rpc.logger.logger.get_lines_for_level('debug')
self.assertEqual(len(expected_timings), len(debug_lines),
'Expected %s debug lines but only got %s: %s' %
(len(expected_timings), len(debug_lines),
debug_lines))
for metric in expected_timings:
expected = 'replicator-rpc-sync time for %s:' % metric
self.assertTrue(any(expected in line for line in debug_lines),
'debug timing %r was not in %r' % (
expected, debug_lines))
def test_sync_remote_missing(self):
broker = self._get_broker('a', 'c', node_index=0)
put_timestamp = time.time()
broker.initialize(put_timestamp, POLICIES.default.idx)
# "replicate"
part, node = self._get_broker_part_node(broker)
daemon = self._run_once(node)
# complete rsync to all other nodes
self.assertEqual(2, daemon.stats['rsync'])
for i in range(1, 3):
remote_broker = self._get_broker('a', 'c', node_index=i)
self.assertTrue(os.path.exists(remote_broker.db_file))
remote_info = remote_broker.get_info()
local_info = self._get_broker(
'a', 'c', node_index=0).get_info()
for k, v in local_info.items():
if k == 'id':
continue
self.assertEqual(remote_info[k], v,
"mismatch remote %s %r != %r" % (
k, remote_info[k], v))
def test_rsync_failure(self):
broker = self._get_broker('a', 'c', node_index=0)
put_timestamp = time.time()
broker.initialize(put_timestamp, POLICIES.default.idx)
# "replicate" to different device
daemon = replicator.ContainerReplicator({})
def _rsync_file(*args, **kwargs):
return False
daemon._rsync_file = _rsync_file
# replicate
part, local_node = self._get_broker_part_node(broker)
node = random.choice([n for n in self._ring.devs
if n['id'] != local_node['id']])
info = broker.get_replication_info()
with mock_check_drive(ismount=True):
success = daemon._repl_to_node(node, broker, part, info)
self.assertFalse(success)
def test_sync_remote_missing_most_rows(self):
put_timestamp = time.time()
# create "local" broker
broker = self._get_broker('a', 'c', node_index=0)
broker.initialize(put_timestamp, POLICIES.default.idx)
# create "remote" broker
remote_broker = self._get_broker('a', 'c', node_index=1)
remote_broker.initialize(put_timestamp, POLICIES.default.idx)
# add a row to "local" db
broker.put_object('/a/c/o', time.time(), 0, 'content-type', 'etag',
storage_policy_index=broker.storage_policy_index)
# replicate
node = {'device': 'sdc', 'replication_ip': '127.0.0.1'}
daemon = replicator.ContainerReplicator({'per_diff': 1})
def _rsync_file(db_file, remote_file, **kwargs):
remote_server, remote_path = remote_file.split('/', 1)
dest_path = os.path.join(self.root, remote_path)
shutil.copy(db_file, dest_path)
return True
daemon._rsync_file = _rsync_file
part, node = self._get_broker_part_node(remote_broker)
info = broker.get_replication_info()
success = daemon._repl_to_node(node, broker, part, info)
self.assertTrue(success)
# row merge
self.assertEqual(1, daemon.stats['remote_merge'])
local_info = self._get_broker(
'a', 'c', node_index=0).get_info()
remote_info = self._get_broker(
'a', 'c', node_index=1).get_info()
for k, v in local_info.items():
if k == 'id':
continue
self.assertEqual(remote_info[k], v,
"mismatch remote %s %r != %r" % (
k, remote_info[k], v))
def test_sync_remote_missing_one_rows(self):
put_timestamp = time.time()
# create "local" broker
broker = self._get_broker('a', 'c', node_index=0)
broker.initialize(put_timestamp, POLICIES.default.idx)
# create "remote" broker
remote_broker = self._get_broker('a', 'c', node_index=1)
remote_broker.initialize(put_timestamp, POLICIES.default.idx)
# add some rows to both db
for i in range(10):
put_timestamp = time.time()
for db in (broker, remote_broker):
path = '/a/c/o_%s' % i
db.put_object(path, put_timestamp, 0, 'content-type', 'etag',
storage_policy_index=db.storage_policy_index)
# now a row to the "local" broker only
broker.put_object('/a/c/o_missing', time.time(), 0,
'content-type', 'etag',
storage_policy_index=broker.storage_policy_index)
# replicate
daemon = replicator.ContainerReplicator({})
part, node = self._get_broker_part_node(remote_broker)
info = broker.get_replication_info()
success = daemon._repl_to_node(node, broker, part, info)
self.assertTrue(success)
# row merge
self.assertEqual(1, daemon.stats['diff'])
local_info = self._get_broker(
'a', 'c', node_index=0).get_info()
remote_info = self._get_broker(
'a', 'c', node_index=1).get_info()
for k, v in local_info.items():
if k == 'id':
continue
self.assertEqual(remote_info[k], v,
"mismatch remote %s %r != %r" % (
k, remote_info[k], v))
def test_sync_remote_can_not_keep_up(self):
put_timestamp = time.time()
# create "local" broker
broker = self._get_broker('a', 'c', node_index=0)
broker.initialize(put_timestamp, POLICIES.default.idx)
# create "remote" broker
remote_broker = self._get_broker('a', 'c', node_index=1)
remote_broker.initialize(put_timestamp, POLICIES.default.idx)
# add some rows to both db's
for i in range(10):
put_timestamp = time.time()
for db in (broker, remote_broker):
obj_name = 'o_%s' % i
db.put_object(obj_name, put_timestamp, 0,
'content-type', 'etag',
storage_policy_index=db.storage_policy_index)
# setup REPLICATE callback to simulate adding rows during merge_items
missing_counter = itertools.count()
def put_more_objects(op, *args):
if op != 'merge_items':
return
path = '/a/c/o_missing_%s' % next(missing_counter)
broker.put_object(path, time.time(), 0, 'content-type', 'etag',
storage_policy_index=db.storage_policy_index)
test_db_replicator.FakeReplConnection = \
test_db_replicator.attach_fake_replication_rpc(
self.rpc, replicate_hook=put_more_objects)
db_replicator.ReplConnection = test_db_replicator.FakeReplConnection
# and add one extra to local db to trigger merge_items
put_more_objects('merge_items')
# limit number of times we'll call merge_items
daemon = replicator.ContainerReplicator({'max_diffs': 10})
# replicate
part, node = self._get_broker_part_node(remote_broker)
info = broker.get_replication_info()
success = daemon._repl_to_node(node, broker, part, info)
self.assertFalse(success)
# back off on the PUTs during replication...
FakeReplConnection = test_db_replicator.attach_fake_replication_rpc(
self.rpc, replicate_hook=None)
db_replicator.ReplConnection = FakeReplConnection
# retry replication
info = broker.get_replication_info()
success = daemon._repl_to_node(node, broker, part, info)
self.assertTrue(success)
# row merge
self.assertEqual(2, daemon.stats['diff'])
self.assertEqual(1, daemon.stats['diff_capped'])
local_info = self._get_broker(
'a', 'c', node_index=0).get_info()
remote_info = self._get_broker(
'a', 'c', node_index=1).get_info()
for k, v in local_info.items():
if k == 'id':
continue
self.assertEqual(remote_info[k], v,
"mismatch remote %s %r != %r" % (
k, remote_info[k], v))
def test_diff_capped_sync(self):
ts = (Timestamp(t).internal for t in
itertools.count(int(time.time())))
put_timestamp = next(ts)
# start off with with a local db that is way behind
broker = self._get_broker('a', 'c', node_index=0)
broker.initialize(put_timestamp, POLICIES.default.idx)
for i in range(50):
broker.put_object(
'o%s' % i, next(ts), 0, 'content-type-old', 'etag',
storage_policy_index=broker.storage_policy_index)
# remote primary db has all the new bits...
remote_broker = self._get_broker('a', 'c', node_index=1)
remote_broker.initialize(put_timestamp, POLICIES.default.idx)
for i in range(100):
remote_broker.put_object(
'o%s' % i, next(ts), 0, 'content-type-new', 'etag',
storage_policy_index=remote_broker.storage_policy_index)
# except there's *one* tiny thing in our local broker that's newer
broker.put_object(
'o101', next(ts), 0, 'content-type-new', 'etag',
storage_policy_index=broker.storage_policy_index)
# setup daemon with smaller per_diff and max_diffs
part, node = self._get_broker_part_node(broker)
daemon = self._get_daemon(node, conf_updates={'per_diff': 10,
'max_diffs': 3})
self.assertEqual(daemon.per_diff, 10)
self.assertEqual(daemon.max_diffs, 3)
# run once and verify diff capped
self._run_once(node, daemon=daemon)
self.assertEqual(1, daemon.stats['diff'])
self.assertEqual(1, daemon.stats['diff_capped'])
# run again and verify fully synced
self._run_once(node, daemon=daemon)
self.assertEqual(1, daemon.stats['diff'])
self.assertEqual(0, daemon.stats['diff_capped'])
# now that we're synced the new item should be in remote db
remote_names = set()
for item in remote_broker.list_objects_iter(500, '', '', '', ''):
name, ts, size, content_type, etag = item
remote_names.add(name)
self.assertEqual(content_type, 'content-type-new')
self.assertTrue('o101' in remote_names)
self.assertEqual(len(remote_names), 101)
self.assertEqual(remote_broker.get_info()['object_count'], 101)
def test_sync_status_change(self):
# setup a local container
broker = self._get_broker('a', 'c', node_index=0)
put_timestamp = time.time()
broker.initialize(put_timestamp, POLICIES.default.idx)
# setup remote container
remote_broker = self._get_broker('a', 'c', node_index=1)
remote_broker.initialize(put_timestamp, POLICIES.default.idx)
# delete local container
broker.delete_db(time.time())
# replicate
daemon = replicator.ContainerReplicator({})
part, node = self._get_broker_part_node(remote_broker)
info = broker.get_replication_info()
success = daemon._repl_to_node(node, broker, part, info)
# nothing to do
self.assertTrue(success)
self.assertEqual(1, daemon.stats['no_change'])
# status in sync
self.assertTrue(remote_broker.is_deleted())
info = broker.get_info()
remote_info = remote_broker.get_info()
self.assertTrue(Timestamp(remote_info['status_changed_at']) >
Timestamp(remote_info['put_timestamp']),
'remote status_changed_at (%s) is not '
'greater than put_timestamp (%s)' % (
remote_info['status_changed_at'],
remote_info['put_timestamp']))
self.assertTrue(Timestamp(remote_info['status_changed_at']) >
Timestamp(info['status_changed_at']),
'remote status_changed_at (%s) is not '
'greater than local status_changed_at (%s)' % (
remote_info['status_changed_at'],
info['status_changed_at']))
@contextmanager
def _wrap_merge_timestamps(self, broker, calls):
def fake_merge_timestamps(*args, **kwargs):
calls.append(args[0])
orig_merge_timestamps(*args, **kwargs)
orig_merge_timestamps = broker.merge_timestamps
broker.merge_timestamps = fake_merge_timestamps
try:
yield True
finally:
broker.merge_timestamps = orig_merge_timestamps
def test_sync_merge_timestamps(self):
ts = (Timestamp(t).internal for t in
itertools.count(int(time.time())))
# setup a local container
broker = self._get_broker('a', 'c', node_index=0)
put_timestamp = next(ts)
broker.initialize(put_timestamp, POLICIES.default.idx)
# setup remote container
remote_broker = self._get_broker('a', 'c', node_index=1)
remote_put_timestamp = next(ts)
remote_broker.initialize(remote_put_timestamp, POLICIES.default.idx)
# replicate, expect call to merge_timestamps on remote and local
daemon = replicator.ContainerReplicator({})
part, node = self._get_broker_part_node(remote_broker)
info = broker.get_replication_info()
local_calls = []
remote_calls = []
with self._wrap_merge_timestamps(broker, local_calls):
with self._wrap_merge_timestamps(broker, remote_calls):
success = daemon._repl_to_node(node, broker, part, info)
self.assertTrue(success)
self.assertEqual(1, len(remote_calls))
self.assertEqual(1, len(local_calls))
self.assertEqual(remote_put_timestamp,
broker.get_info()['put_timestamp'])
self.assertEqual(remote_put_timestamp,
remote_broker.get_info()['put_timestamp'])
# replicate again, no changes so expect no calls to merge_timestamps
info = broker.get_replication_info()
local_calls = []
remote_calls = []
with self._wrap_merge_timestamps(broker, local_calls):
with self._wrap_merge_timestamps(broker, remote_calls):
success = daemon._repl_to_node(node, broker, part, info)
self.assertTrue(success)
self.assertEqual(0, len(remote_calls))
self.assertEqual(0, len(local_calls))
self.assertEqual(remote_put_timestamp,
broker.get_info()['put_timestamp'])
self.assertEqual(remote_put_timestamp,
remote_broker.get_info()['put_timestamp'])
def test_sync_bogus_db_quarantines(self):
ts = (Timestamp(t).internal for t in
itertools.count(int(time.time())))
policy = random.choice(list(POLICIES))
# create "local" broker
local_broker = self._get_broker('a', 'c', node_index=0)
local_broker.initialize(next(ts), policy.idx)
# create "remote" broker
remote_broker = self._get_broker('a', 'c', node_index=1)
remote_broker.initialize(next(ts), policy.idx)
db_path = local_broker.db_file
self.assertTrue(os.path.exists(db_path)) # sanity check
old_inode = os.stat(db_path).st_ino
_orig_get_info = backend.ContainerBroker.get_info
def fail_like_bad_db(broker):
if broker.db_file == local_broker.db_file:
raise sqlite3.OperationalError("no such table: container_info")
else:
return _orig_get_info(broker)
part, node = self._get_broker_part_node(remote_broker)
with mock.patch('swift.container.backend.ContainerBroker.get_info',
fail_like_bad_db):
# Have the remote node replicate to local; local should see its
# corrupt DB, quarantine it, and act like the DB wasn't ever there
# in the first place.
daemon = self._run_once(node)
self.assertTrue(os.path.exists(db_path))
# Make sure we didn't just keep the old DB, but quarantined it and
# made a fresh copy.
new_inode = os.stat(db_path).st_ino
self.assertNotEqual(old_inode, new_inode)
self.assertEqual(daemon.stats['failure'], 0)
def _replication_scenarios(self, *scenarios, **kwargs):
remote_wins = kwargs.get('remote_wins', False)
# these tests are duplicated because of the differences in replication
# when row counts cause full rsync vs. merge
scenarios = scenarios or (
'no_row', 'local_row', 'remote_row', 'both_rows')
for scenario_name in scenarios:
ts = itertools.count(int(time.time()))
policy = random.choice(list(POLICIES))
remote_policy = random.choice(
[p for p in POLICIES if p is not policy])
broker = self._get_broker('a', 'c', node_index=0)
remote_broker = self._get_broker('a', 'c', node_index=1)
yield ts, policy, remote_policy, broker, remote_broker
# variations on different replication scenarios
variations = {
'no_row': (),
'local_row': (broker,),
'remote_row': (remote_broker,),
'both_rows': (broker, remote_broker),
}
dbs = variations[scenario_name]
obj_ts = next(ts)
for db in dbs:
db.put_object('/a/c/o', obj_ts, 0, 'content-type', 'etag',
storage_policy_index=db.storage_policy_index)
# replicate
part, node = self._get_broker_part_node(broker)
daemon = self._run_once(node)
self.assertEqual(0, daemon.stats['failure'])
# in sync
local_info = self._get_broker(
'a', 'c', node_index=0).get_info()
remote_info = self._get_broker(
'a', 'c', node_index=1).get_info()
if remote_wins:
expected = remote_policy.idx
err = 'local policy did not change to match remote ' \
'for replication row scenario %s' % scenario_name
else:
expected = policy.idx
err = 'local policy changed to match remote ' \
'for replication row scenario %s' % scenario_name
self.assertEqual(local_info['storage_policy_index'], expected, err)
self.assertEqual(remote_info['storage_policy_index'],
local_info['storage_policy_index'])
test_db_replicator.TestReplicatorSync.tearDown(self)
test_db_replicator.TestReplicatorSync.setUp(self)
def test_sync_local_create_policy_over_newer_remote_create(self):
for setup in self._replication_scenarios():
ts, policy, remote_policy, broker, remote_broker = setup
# create "local" broker
broker.initialize(next(ts), policy.idx)
# create "remote" broker
remote_broker.initialize(next(ts), remote_policy.idx)
def test_sync_local_create_policy_over_newer_remote_delete(self):
for setup in self._replication_scenarios():
ts, policy, remote_policy, broker, remote_broker = setup
# create older "local" broker
broker.initialize(next(ts), policy.idx)
# create "remote" broker
remote_broker.initialize(next(ts), remote_policy.idx)
# delete "remote" broker
remote_broker.delete_db(next(ts))
def test_sync_local_create_policy_over_older_remote_delete(self):
# remote_row & both_rows cases are covered by
# "test_sync_remote_half_delete_policy_over_newer_local_create"
for setup in self._replication_scenarios(
'no_row', 'local_row'):
ts, policy, remote_policy, broker, remote_broker = setup
# create older "remote" broker
remote_broker.initialize(next(ts), remote_policy.idx)
# delete older "remote" broker
remote_broker.delete_db(next(ts))
# create "local" broker
broker.initialize(next(ts), policy.idx)
def test_sync_local_half_delete_policy_over_newer_remote_create(self):
# no_row & remote_row cases are covered by
# "test_sync_remote_create_policy_over_older_local_delete"
for setup in self._replication_scenarios('local_row', 'both_rows'):
ts, policy, remote_policy, broker, remote_broker = setup
# create older "local" broker
broker.initialize(next(ts), policy.idx)
# half delete older "local" broker
broker.delete_db(next(ts))
# create "remote" broker
remote_broker.initialize(next(ts), remote_policy.idx)
def test_sync_local_recreate_policy_over_newer_remote_create(self):
for setup in self._replication_scenarios():
ts, policy, remote_policy, broker, remote_broker = setup
# create "local" broker
broker.initialize(next(ts), policy.idx)
# older recreate "local" broker
broker.delete_db(next(ts))
recreate_timestamp = next(ts)
broker.update_put_timestamp(recreate_timestamp)
broker.update_status_changed_at(recreate_timestamp)
# create "remote" broker
remote_broker.initialize(next(ts), remote_policy.idx)
def test_sync_local_recreate_policy_over_older_remote_create(self):
for setup in self._replication_scenarios():
ts, policy, remote_policy, broker, remote_broker = setup
# create older "remote" broker
remote_broker.initialize(next(ts), remote_policy.idx)
# create "local" broker
broker.initialize(next(ts), policy.idx)
# recreate "local" broker
broker.delete_db(next(ts))
recreate_timestamp = next(ts)
broker.update_put_timestamp(recreate_timestamp)
broker.update_status_changed_at(recreate_timestamp)
def test_sync_local_recreate_policy_over_newer_remote_delete(self):
for setup in self._replication_scenarios():
ts, policy, remote_policy, broker, remote_broker = setup
# create "local" broker
broker.initialize(next(ts), policy.idx)
# create "remote" broker
remote_broker.initialize(next(ts), remote_policy.idx)
# recreate "local" broker
broker.delete_db(next(ts))
recreate_timestamp = next(ts)
broker.update_put_timestamp(recreate_timestamp)
broker.update_status_changed_at(recreate_timestamp)
# older delete "remote" broker
remote_broker.delete_db(next(ts))
def test_sync_local_recreate_policy_over_older_remote_delete(self):
for setup in self._replication_scenarios():
ts, policy, remote_policy, broker, remote_broker = setup
# create "local" broker
broker.initialize(next(ts), policy.idx)
# create "remote" broker
remote_broker.initialize(next(ts), remote_policy.idx)
# older delete "remote" broker
remote_broker.delete_db(next(ts))
# recreate "local" broker
broker.delete_db(next(ts))
recreate_timestamp = next(ts)
broker.update_put_timestamp(recreate_timestamp)
broker.update_status_changed_at(recreate_timestamp)
def test_sync_local_recreate_policy_over_older_remote_recreate(self):
for setup in self._replication_scenarios():
ts, policy, remote_policy, broker, remote_broker = setup
# create "remote" broker
remote_broker.initialize(next(ts), remote_policy.idx)
# create "local" broker
broker.initialize(next(ts), policy.idx)
# older recreate "remote" broker
remote_broker.delete_db(next(ts))
remote_recreate_timestamp = next(ts)
remote_broker.update_put_timestamp(remote_recreate_timestamp)
remote_broker.update_status_changed_at(remote_recreate_timestamp)
# recreate "local" broker
broker.delete_db(next(ts))
local_recreate_timestamp = next(ts)
broker.update_put_timestamp(local_recreate_timestamp)
broker.update_status_changed_at(local_recreate_timestamp)
def test_sync_remote_create_policy_over_newer_local_create(self):
for setup in self._replication_scenarios(remote_wins=True):
ts, policy, remote_policy, broker, remote_broker = setup
# create older "remote" broker
remote_broker.initialize(next(ts), remote_policy.idx)
# create "local" broker
broker.initialize(next(ts), policy.idx)
def test_sync_remote_create_policy_over_newer_local_delete(self):
for setup in self._replication_scenarios(remote_wins=True):
ts, policy, remote_policy, broker, remote_broker = setup
# create older "remote" broker
remote_broker.initialize(next(ts), remote_policy.idx)
# create "local" broker
broker.initialize(next(ts), policy.idx)
# delete "local" broker
broker.delete_db(next(ts))
def test_sync_remote_create_policy_over_older_local_delete(self):
# local_row & both_rows cases are covered by
# "test_sync_local_half_delete_policy_over_newer_remote_create"
for setup in self._replication_scenarios(
'no_row', 'remote_row', remote_wins=True):
ts, policy, remote_policy, broker, remote_broker = setup
# create older "local" broker
broker.initialize(next(ts), policy.idx)
# delete older "local" broker
broker.delete_db(next(ts))
# create "remote" broker
remote_broker.initialize(next(ts), remote_policy.idx)
def test_sync_remote_half_delete_policy_over_newer_local_create(self):
# no_row & both_rows cases are covered by
# "test_sync_local_create_policy_over_older_remote_delete"
for setup in self._replication_scenarios('remote_row', 'both_rows',
remote_wins=True):
ts, policy, remote_policy, broker, remote_broker = setup
# create older "remote" broker
remote_broker.initialize(next(ts), remote_policy.idx)
# half delete older "remote" broker
remote_broker.delete_db(next(ts))
# create "local" broker
broker.initialize(next(ts), policy.idx)
def test_sync_remote_recreate_policy_over_newer_local_create(self):
for setup in self._replication_scenarios(remote_wins=True):
ts, policy, remote_policy, broker, remote_broker = setup
# create "remote" broker
remote_broker.initialize(next(ts), remote_policy.idx)
# older recreate "remote" broker
remote_broker.delete_db(next(ts))
recreate_timestamp = next(ts)
remote_broker.update_put_timestamp(recreate_timestamp)
remote_broker.update_status_changed_at(recreate_timestamp)
# create "local" broker
broker.initialize(next(ts), policy.idx)
def test_sync_remote_recreate_policy_over_older_local_create(self):
for setup in self._replication_scenarios(remote_wins=True):
ts, policy, remote_policy, broker, remote_broker = setup
# create older "local" broker
broker.initialize(next(ts), policy.idx)
# create "remote" broker
remote_broker.initialize(next(ts), remote_policy.idx)
# recreate "remote" broker
remote_broker.delete_db(next(ts))
recreate_timestamp = next(ts)
remote_broker.update_put_timestamp(recreate_timestamp)
remote_broker.update_status_changed_at(recreate_timestamp)
def test_sync_remote_recreate_policy_over_newer_local_delete(self):
for setup in self._replication_scenarios(remote_wins=True):
ts, policy, remote_policy, broker, remote_broker = setup
# create "local" broker
broker.initialize(next(ts), policy.idx)
# create "remote" broker
remote_broker.initialize(next(ts), remote_policy.idx)
# recreate "remote" broker
remote_broker.delete_db(next(ts))
remote_recreate_timestamp = next(ts)
remote_broker.update_put_timestamp(remote_recreate_timestamp)
remote_broker.update_status_changed_at(remote_recreate_timestamp)
# older delete "local" broker
broker.delete_db(next(ts))
def test_sync_remote_recreate_policy_over_older_local_delete(self):
for setup in self._replication_scenarios(remote_wins=True):
ts, policy, remote_policy, broker, remote_broker = setup
# create "local" broker
broker.initialize(next(ts), policy.idx)
# create "remote" broker
remote_broker.initialize(next(ts), remote_policy.idx)
# older delete "local" broker
broker.delete_db(next(ts))
# recreate "remote" broker
remote_broker.delete_db(next(ts))
remote_recreate_timestamp = next(ts)
remote_broker.update_put_timestamp(remote_recreate_timestamp)
remote_broker.update_status_changed_at(remote_recreate_timestamp)
def test_sync_remote_recreate_policy_over_older_local_recreate(self):
for setup in self._replication_scenarios(remote_wins=True):
ts, policy, remote_policy, broker, remote_broker = setup
# create older "local" broker
broker.initialize(next(ts), policy.idx)
# create "remote" broker
remote_broker.initialize(next(ts), remote_policy.idx)
# older recreate "local" broker
broker.delete_db(next(ts))
local_recreate_timestamp = next(ts)
broker.update_put_timestamp(local_recreate_timestamp)
broker.update_status_changed_at(local_recreate_timestamp)
# recreate "remote" broker
remote_broker.delete_db(next(ts))
remote_recreate_timestamp = next(ts)
remote_broker.update_put_timestamp(remote_recreate_timestamp)
remote_broker.update_status_changed_at(remote_recreate_timestamp)
def test_sync_to_remote_with_misplaced(self):
ts = (Timestamp(t).internal for t in
itertools.count(int(time.time())))
# create "local" broker
policy = random.choice(list(POLICIES))
broker = self._get_broker('a', 'c', node_index=0)
broker.initialize(next(ts), policy.idx)
# create "remote" broker
remote_policy = random.choice([p for p in POLICIES if p is not
policy])
remote_broker = self._get_broker('a', 'c', node_index=1)
remote_broker.initialize(next(ts), remote_policy.idx)
# add misplaced row to remote_broker
remote_broker.put_object(
'/a/c/o', next(ts), 0, 'content-type',
'etag', storage_policy_index=remote_broker.storage_policy_index)
# since this row matches policy index or remote, it shows up in count
self.assertEqual(remote_broker.get_info()['object_count'], 1)
self.assertEqual([], remote_broker.get_misplaced_since(-1, 1))
# replicate
part, node = self._get_broker_part_node(broker)
daemon = self._run_once(node)
# since our local broker has no rows to push it logs as no_change
self.assertEqual(1, daemon.stats['no_change'])
self.assertEqual(0, broker.get_info()['object_count'])
# remote broker updates it's policy index; this makes the remote
# broker's object count change
info = remote_broker.get_info()
expectations = {
'object_count': 0,
'storage_policy_index': policy.idx,
}
for key, value in expectations.items():
self.assertEqual(info[key], value)
# but it also knows those objects are misplaced now
misplaced = remote_broker.get_misplaced_since(-1, 100)
self.assertEqual(len(misplaced), 1)
# we also pushed out to node 3 with rsync
self.assertEqual(1, daemon.stats['rsync'])
third_broker = self._get_broker('a', 'c', node_index=2)
info = third_broker.get_info()
for key, value in expectations.items():
self.assertEqual(info[key], value)
def test_misplaced_rows_replicate_and_enqueue(self):
# force all timestamps to fall in same hour
ts = (Timestamp(t) for t in
itertools.count(int(time.time()) // 3600 * 3600))
policy = random.choice(list(POLICIES))
broker = self._get_broker('a', 'c', node_index=0)
broker.initialize(next(ts).internal, policy.idx)
remote_policy = random.choice([p for p in POLICIES if p is not
policy])
remote_broker = self._get_broker('a', 'c', node_index=1)
remote_broker.initialize(next(ts).internal, remote_policy.idx)
# add a misplaced row to *local* broker
obj_put_timestamp = next(ts).internal
broker.put_object(
'o', obj_put_timestamp, 0, 'content-type',
'etag', storage_policy_index=remote_policy.idx)
misplaced = broker.get_misplaced_since(-1, 10)
self.assertEqual(len(misplaced), 1)
# since this row is misplaced it doesn't show up in count
self.assertEqual(broker.get_info()['object_count'], 0)
# add another misplaced row to *local* broker with composite timestamp
ts_data = next(ts)
ts_ctype = next(ts)
ts_meta = next(ts)
broker.put_object(
'o2', ts_data.internal, 0, 'content-type',
'etag', storage_policy_index=remote_policy.idx,
ctype_timestamp=ts_ctype.internal, meta_timestamp=ts_meta.internal)
misplaced = broker.get_misplaced_since(-1, 10)
self.assertEqual(len(misplaced), 2)
# since this row is misplaced it doesn't show up in count
self.assertEqual(broker.get_info()['object_count'], 0)
# replicate
part, node = self._get_broker_part_node(broker)
daemon = self._run_once(node)
# push to remote, and third node was missing (also maybe reconciler)
self.assertTrue(2 < daemon.stats['rsync'] <= 3, daemon.stats['rsync'])
# grab the rsynced instance of remote_broker
remote_broker = self._get_broker('a', 'c', node_index=1)
# remote has misplaced rows too now
misplaced = remote_broker.get_misplaced_since(-1, 10)
self.assertEqual(len(misplaced), 2)
# and the correct policy_index and object_count
info = remote_broker.get_info()
expectations = {
'object_count': 0,
'storage_policy_index': policy.idx,
}
for key, value in expectations.items():
self.assertEqual(info[key], value)
# and we should have also enqueued these rows in a single reconciler,
# since we forced the object timestamps to be in the same hour.
reconciler = daemon.get_reconciler_broker(misplaced[0]['created_at'])
# but it may not be on the same node as us anymore though...
reconciler = self._get_broker(reconciler.account,
reconciler.container, node_index=0)
self.assertEqual(reconciler.get_info()['object_count'], 2)
objects = reconciler.list_objects_iter(
10, '', None, None, None, None, storage_policy_index=0)
self.assertEqual(len(objects), 2)
expected = ('%s:/a/c/o' % remote_policy.idx, obj_put_timestamp, 0,
'application/x-put', obj_put_timestamp)
self.assertEqual(objects[0], expected)
# the second object's listing has ts_meta as its last modified time
# but its full composite timestamp is in the hash field.
expected = ('%s:/a/c/o2' % remote_policy.idx, ts_meta.internal, 0,
'application/x-put',
encode_timestamps(ts_data, ts_ctype, ts_meta))
self.assertEqual(objects[1], expected)
# having safely enqueued to the reconciler we can advance
# our sync pointer
self.assertEqual(broker.get_reconciler_sync(), 2)
def test_multiple_out_sync_reconciler_enqueue_normalize(self):
ts = (Timestamp(t).internal for t in
itertools.count(int(time.time())))
policy = random.choice(list(POLICIES))
broker = self._get_broker('a', 'c', node_index=0)
broker.initialize(next(ts), policy.idx)
remote_policy = random.choice([p for p in POLICIES if p is not
policy])
remote_broker = self._get_broker('a', 'c', node_index=1)
remote_broker.initialize(next(ts), remote_policy.idx)
# add some rows to brokers
for db in (broker, remote_broker):
for p in (policy, remote_policy):
db.put_object('o-%s' % p.name, next(ts), 0, 'content-type',
'etag', storage_policy_index=p.idx)
db._commit_puts()
expected_policy_stats = {
policy.idx: {'object_count': 1, 'bytes_used': 0},
remote_policy.idx: {'object_count': 1, 'bytes_used': 0},
}
for db in (broker, remote_broker):
policy_stats = db.get_policy_stats()
self.assertEqual(policy_stats, expected_policy_stats)
# each db has 2 rows, 4 total
all_items = set()
for db in (broker, remote_broker):
items = db.get_items_since(-1, 4)
all_items.update(
(item['name'], item['created_at']) for item in items)
self.assertEqual(4, len(all_items))
# replicate both ways
part, node = self._get_broker_part_node(broker)
self._run_once(node)
part, node = self._get_broker_part_node(remote_broker)
self._run_once(node)
# only the latest timestamps should survive
most_recent_items = {}
for name, timestamp in all_items:
most_recent_items[name] = max(
timestamp, most_recent_items.get(name, -1))
self.assertEqual(2, len(most_recent_items))
for db in (broker, remote_broker):
items = db.get_items_since(-1, 4)
self.assertEqual(len(items), len(most_recent_items))
for item in items:
self.assertEqual(most_recent_items[item['name']],
item['created_at'])
# and the reconciler also collapses updates
reconciler_containers = set()
for item in all_items:
_name, timestamp = item
reconciler_containers.add(
get_reconciler_container_name(timestamp))
reconciler_items = set()
for reconciler_container in reconciler_containers:
for node_index in range(3):
reconciler = self._get_broker(MISPLACED_OBJECTS_ACCOUNT,
reconciler_container,
node_index=node_index)
items = reconciler.get_items_since(-1, 4)
reconciler_items.update(
(item['name'], item['created_at']) for item in items)
# they can't *both* be in the wrong policy ;)
self.assertEqual(1, len(reconciler_items))
for reconciler_name, timestamp in reconciler_items:
_policy_index, path = reconciler_name.split(':', 1)
a, c, name = path.lstrip('/').split('/')
self.assertEqual(most_recent_items[name], timestamp)
@contextmanager
def _wrap_update_reconciler_sync(self, broker, calls):
def wrapper_function(*args, **kwargs):
calls.append(args)
orig_function(*args, **kwargs)
orig_function = broker.update_reconciler_sync
broker.update_reconciler_sync = wrapper_function
try:
yield True
finally:
broker.update_reconciler_sync = orig_function
def test_post_replicate_hook(self):
ts = (Timestamp(t).internal for t in
itertools.count(int(time.time())))
broker = self._get_broker('a', 'c', node_index=0)
broker.initialize(next(ts), 0)
broker.put_object('foo', next(ts), 0, 'text/plain', 'xyz', deleted=0,
storage_policy_index=0)
info = broker.get_replication_info()
self.assertEqual(1, info['max_row'])
self.assertEqual(-1, broker.get_reconciler_sync())
daemon = replicator.ContainerReplicator({})
calls = []
with self._wrap_update_reconciler_sync(broker, calls):
daemon._post_replicate_hook(broker, info, [])
self.assertEqual(1, len(calls))
# repeated call to _post_replicate_hook with no change to info
# should not call update_reconciler_sync
calls = []
with self._wrap_update_reconciler_sync(broker, calls):
daemon._post_replicate_hook(broker, info, [])
self.assertEqual(0, len(calls))
def test_update_sync_store_exception(self):
class FakeContainerSyncStore(object):
def update_sync_store(self, broker):
raise OSError(1, '1')
daemon = replicator.ContainerReplicator({}, logger=self.logger)
daemon.sync_store = FakeContainerSyncStore()
ts_iter = make_timestamp_iter()
broker = self._get_broker('a', 'c', node_index=0)
timestamp = next(ts_iter)
broker.initialize(timestamp.internal, POLICIES.default.idx)
info = broker.get_replication_info()
daemon._post_replicate_hook(broker, info, [])
log_lines = self.logger.get_lines_for_level('error')
self.assertEqual(1, len(log_lines))
self.assertIn('Failed to update sync_store', log_lines[0])
def test_update_sync_store(self):
klass = 'swift.container.sync_store.ContainerSyncStore'
daemon = replicator.ContainerReplicator({})
daemon.sync_store = sync_store.ContainerSyncStore(
daemon.root, daemon.logger, daemon.mount_check)
ts_iter = make_timestamp_iter()
broker = self._get_broker('a', 'c', node_index=0)
timestamp = next(ts_iter)
broker.initialize(timestamp.internal, POLICIES.default.idx)
info = broker.get_replication_info()
with mock.patch(klass + '.remove_synced_container') as mock_remove:
with mock.patch(klass + '.add_synced_container') as mock_add:
daemon._post_replicate_hook(broker, info, [])
self.assertEqual(0, mock_remove.call_count)
self.assertEqual(0, mock_add.call_count)
timestamp = next(ts_iter)
# sync-to and sync-key empty - remove from store
broker.update_metadata(
{'X-Container-Sync-To': ('', timestamp.internal),
'X-Container-Sync-Key': ('', timestamp.internal)})
with mock.patch(klass + '.remove_synced_container') as mock_remove:
with mock.patch(klass + '.add_synced_container') as mock_add:
daemon._post_replicate_hook(broker, info, [])
self.assertEqual(0, mock_add.call_count)
mock_remove.assert_called_once_with(broker)
timestamp = next(ts_iter)
# sync-to is not empty sync-key is empty - remove from store
broker.update_metadata(
{'X-Container-Sync-To': ('a', timestamp.internal)})
with mock.patch(klass + '.remove_synced_container') as mock_remove:
with mock.patch(klass + '.add_synced_container') as mock_add:
daemon._post_replicate_hook(broker, info, [])
self.assertEqual(0, mock_add.call_count)
mock_remove.assert_called_once_with(broker)
timestamp = next(ts_iter)
# sync-to is empty sync-key is not empty - remove from store
broker.update_metadata(
{'X-Container-Sync-To': ('', timestamp.internal),
'X-Container-Sync-Key': ('secret', timestamp.internal)})
with mock.patch(klass + '.remove_synced_container') as mock_remove:
with mock.patch(klass + '.add_synced_container') as mock_add:
daemon._post_replicate_hook(broker, info, [])
self.assertEqual(0, mock_add.call_count)
mock_remove.assert_called_once_with(broker)
timestamp = next(ts_iter)
# sync-to, sync-key both not empty - add to store
broker.update_metadata(
{'X-Container-Sync-To': ('a', timestamp.internal),
'X-Container-Sync-Key': ('secret', timestamp.internal)})
with mock.patch(klass + '.remove_synced_container') as mock_remove:
with mock.patch(klass + '.add_synced_container') as mock_add:
daemon._post_replicate_hook(broker, info, [])
mock_add.assert_called_once_with(broker)
self.assertEqual(0, mock_remove.call_count)
timestamp = next(ts_iter)
# container is removed - need to remove from store
broker.delete_db(timestamp.internal)
broker.update_metadata(
{'X-Container-Sync-To': ('a', timestamp.internal),
'X-Container-Sync-Key': ('secret', timestamp.internal)})
with mock.patch(klass + '.remove_synced_container') as mock_remove:
with mock.patch(klass + '.add_synced_container') as mock_add:
daemon._post_replicate_hook(broker, info, [])
self.assertEqual(0, mock_add.call_count)
mock_remove.assert_called_once_with(broker)
def test_sync_triggers_sync_store_update(self):
klass = 'swift.container.sync_store.ContainerSyncStore'
ts_iter = make_timestamp_iter()
# Create two containers as follows:
# broker_1 which is not set for sync
# broker_2 which is set for sync and then unset
# test that while replicating both we see no activity
# for broker_1, and the anticipated activity for broker_2
broker_1 = self._get_broker('a', 'c', node_index=0)
broker_1.initialize(next(ts_iter).internal, POLICIES.default.idx)
broker_2 = self._get_broker('b', 'd', node_index=0)
broker_2.initialize(next(ts_iter).internal, POLICIES.default.idx)
broker_2.update_metadata(
{'X-Container-Sync-To': ('a', next(ts_iter).internal),
'X-Container-Sync-Key': ('secret', next(ts_iter).internal)})
# replicate once according to broker_1
# relying on the fact that FakeRing would place both
# in the same partition.
part, node = self._get_broker_part_node(broker_1)
with mock.patch(klass + '.remove_synced_container') as mock_remove:
with mock.patch(klass + '.add_synced_container') as mock_add:
self._run_once(node)
self.assertEqual(1, mock_add.call_count)
self.assertEqual(broker_2.db_file, mock_add.call_args[0][0].db_file)
self.assertEqual(0, mock_remove.call_count)
broker_2.update_metadata(
{'X-Container-Sync-To': ('', next(ts_iter).internal)})
# replicate once this time according to broker_2
# relying on the fact that FakeRing would place both
# in the same partition.
part, node = self._get_broker_part_node(broker_2)
with mock.patch(klass + '.remove_synced_container') as mock_remove:
with mock.patch(klass + '.add_synced_container') as mock_add:
self._run_once(node)
self.assertEqual(0, mock_add.call_count)
self.assertEqual(1, mock_remove.call_count)
self.assertEqual(broker_2.db_file, mock_remove.call_args[0][0].db_file)
def test_cleanup_post_replicate(self):
broker = self._get_broker('a', 'c', node_index=0)
put_timestamp = Timestamp.now()
broker.initialize(put_timestamp.internal, POLICIES.default.idx)
orig_info = broker.get_replication_info()
daemon = replicator.ContainerReplicator({}, logger=self.logger)
# db should not be here, replication ok, deleted
res = daemon.cleanup_post_replicate(broker, orig_info, [True] * 3)
self.assertTrue(res)
self.assertFalse(os.path.exists(broker.db_file))
self.assertEqual(['Successfully deleted db %s' % broker.db_file],
daemon.logger.get_lines_for_level('debug'))
daemon.logger.clear()
# failed replication, not deleted
broker.initialize(put_timestamp.internal, POLICIES.default.idx)
orig_info = broker.get_replication_info()
res = daemon.cleanup_post_replicate(broker, orig_info,
[False, True, True])
self.assertTrue(res)
self.assertTrue(os.path.exists(broker.db_file))
self.assertEqual(['Not deleting db %s (2/3 success)' % broker.db_file],
daemon.logger.get_lines_for_level('debug'))
daemon.logger.clear()
# db has shard ranges, not deleted
broker.enable_sharding(Timestamp.now())
broker.merge_shard_ranges(
[ShardRange('.shards_a/c', Timestamp.now(), '', 'm')])
self.assertTrue(broker.sharding_required()) # sanity check
res = daemon.cleanup_post_replicate(broker, orig_info, [True] * 3)
self.assertTrue(res)
self.assertTrue(os.path.exists(broker.db_file))
self.assertEqual(
['Not deleting db %s (requires sharding, state unsharded)' %
broker.db_file],
daemon.logger.get_lines_for_level('debug'))
daemon.logger.clear()
# db sharding, not deleted
self._goto_sharding_state(broker, Timestamp.now())
self.assertTrue(broker.sharding_required()) # sanity check
orig_info = broker.get_replication_info()
res = daemon.cleanup_post_replicate(broker, orig_info, [True] * 3)
self.assertTrue(res)
self.assertTrue(os.path.exists(broker.db_file))
self.assertEqual(
['Not deleting db %s (requires sharding, state sharding)' %
broker.db_file],
daemon.logger.get_lines_for_level('debug'))
daemon.logger.clear()
# db sharded, should not be here, failed replication, not deleted
self._goto_sharded_state(broker)
self.assertFalse(broker.sharding_required()) # sanity check
res = daemon.cleanup_post_replicate(broker, orig_info,
[True, False, True])
self.assertTrue(res)
self.assertTrue(os.path.exists(broker.db_file))
self.assertEqual(['Not deleting db %s (2/3 success)' %
broker.db_file],
daemon.logger.get_lines_for_level('debug'))
daemon.logger.clear()
# db sharded, should not be here, new shard ranges (e.g. from reverse
# replication), deleted
broker.merge_shard_ranges(
[ShardRange('.shards_a/c', Timestamp.now(), '', 'm')])
res = daemon.cleanup_post_replicate(broker, orig_info, [True] * 3)
self.assertTrue(res)
self.assertFalse(os.path.exists(broker.db_file))
daemon.logger.clear()
# db sharded, should not be here, replication ok, deleted
broker.initialize(put_timestamp.internal, POLICIES.default.idx)
self.assertTrue(os.path.exists(broker.db_file))
orig_info = broker.get_replication_info()
res = daemon.cleanup_post_replicate(broker, orig_info, [True] * 3)
self.assertTrue(res)
self.assertFalse(os.path.exists(broker.db_file))
self.assertEqual(['Successfully deleted db %s' % broker.db_file],
daemon.logger.get_lines_for_level('debug'))
daemon.logger.clear()
def test_sync_shard_ranges(self):
put_timestamp = Timestamp.now().internal
# create "local" broker
broker = self._get_broker('a', 'c', node_index=0)
broker.initialize(put_timestamp, POLICIES.default.idx)
# create "remote" broker
remote_broker = self._get_broker('a', 'c', node_index=1)
remote_broker.initialize(put_timestamp, POLICIES.default.idx)
def check_replicate(expected_shard_ranges, from_broker, to_broker):
daemon = replicator.ContainerReplicator({}, logger=FakeLogger())
part, node = self._get_broker_part_node(to_broker)
info = broker.get_replication_info()
success = daemon._repl_to_node(node, from_broker, part, info)
self.assertTrue(success)
self.assertEqual(
expected_shard_ranges,
to_broker.get_all_shard_range_data()
)
local_info = self._get_broker(
'a', 'c', node_index=0).get_info()
remote_info = self._get_broker(
'a', 'c', node_index=1).get_info()
for k, v in local_info.items():
if k == 'id':
continue
self.assertEqual(remote_info[k], v,
"mismatch remote %s %r != %r" % (
k, remote_info[k], v))
return daemon
bounds = (('', 'g'), ('g', 'r'), ('r', ''))
shard_ranges = [
ShardRange('.shards_a/sr-%s' % upper, Timestamp.now(), lower,
upper, i + 1, 10 * (i + 1))
for i, (lower, upper) in enumerate(bounds)
]
# add first two shard_ranges to both brokers
for shard_range in shard_ranges[:2]:
for db in (broker, remote_broker):
db.merge_shard_ranges(shard_range)
# now add a shard range and an object to the "local" broker only
broker.merge_shard_ranges(shard_ranges[2])
broker_ranges = broker.get_all_shard_range_data()
self.assertShardRangesEqual(shard_ranges, broker_ranges)
broker.put_object('obj', Timestamp.now().internal, 0, 'text/plain',
EMPTY_ETAG)
# sharding not yet enabled so replication not deferred
daemon = check_replicate(broker_ranges, broker, remote_broker)
self.assertEqual(0, daemon.stats['deferred'])
self.assertEqual(0, daemon.stats['no_change'])
self.assertEqual(0, daemon.stats['rsync'])
self.assertEqual(1, daemon.stats['diff'])
self.assertEqual({'diffs': 1},
daemon.logger.get_increment_counts())
# update one shard range
shard_ranges[1].update_meta(50, 50)
# sharding not yet enabled so replication not deferred, but the two
# brokers' object tables are in sync so no rsync or usync either
daemon = check_replicate(broker_ranges, broker, remote_broker)
self.assertEqual(0, daemon.stats['deferred'])
self.assertEqual(1, daemon.stats['no_change'])
self.assertEqual(0, daemon.stats['rsync'])
self.assertEqual(0, daemon.stats['diff'])
self.assertEqual({'no_changes': 1},
daemon.logger.get_increment_counts())
# now enable local broker for sharding
own_sr = broker.enable_sharding(Timestamp.now())
# update one shard range
shard_ranges[1].update_meta(13, 123)
broker.merge_shard_ranges(shard_ranges[1])
broker_ranges = broker.get_all_shard_range_data()
self.assertShardRangesEqual(shard_ranges + [own_sr], broker_ranges)
def check_stats(daemon):
self.assertEqual(1, daemon.stats['deferred'])
self.assertEqual(0, daemon.stats['no_change'])
self.assertEqual(0, daemon.stats['rsync'])
self.assertEqual(0, daemon.stats['diff'])
self.assertFalse(daemon.logger.get_increments())
daemon = check_replicate(broker_ranges, broker, remote_broker)
check_stats(daemon)
# update one shard range
shard_ranges[1].update_meta(99, 0)
broker.merge_shard_ranges(shard_ranges[1])
# sanity check
broker_ranges = broker.get_all_shard_range_data()
self.assertShardRangesEqual(shard_ranges + [own_sr], broker_ranges)
daemon = check_replicate(broker_ranges, broker, remote_broker)
check_stats(daemon)
# delete one shard range
shard_ranges[0].deleted = 1
shard_ranges[0].timestamp = Timestamp.now()
broker.merge_shard_ranges(shard_ranges[0])
# sanity check
broker_ranges = broker.get_all_shard_range_data()
self.assertShardRangesEqual(shard_ranges + [own_sr], broker_ranges)
daemon = check_replicate(broker_ranges, broker, remote_broker)
check_stats(daemon)
# put a shard range again
shard_ranges[2].timestamp = Timestamp.now()
shard_ranges[2].object_count = 0
broker.merge_shard_ranges(shard_ranges[2])
# sanity check
broker_ranges = broker.get_all_shard_range_data()
self.assertShardRangesEqual(shard_ranges + [own_sr], broker_ranges)
daemon = check_replicate(broker_ranges, broker, remote_broker)
check_stats(daemon)
# update same shard range on local and remote, remote later
shard_ranges[-1].meta_timestamp = Timestamp.now()
shard_ranges[-1].bytes_used += 1000
broker.merge_shard_ranges(shard_ranges[-1])
remote_shard_ranges = remote_broker.get_shard_ranges(
include_deleted=True)
remote_shard_ranges[-1].meta_timestamp = Timestamp.now()
remote_shard_ranges[-1].bytes_used += 2000
remote_broker.merge_shard_ranges(remote_shard_ranges[-1])
# sanity check
remote_broker_ranges = remote_broker.get_all_shard_range_data()
self.assertShardRangesEqual(remote_shard_ranges + [own_sr],
remote_broker_ranges)
self.assertShardRangesNotEqual(shard_ranges, remote_shard_ranges)
daemon = check_replicate(remote_broker_ranges, broker, remote_broker)
check_stats(daemon)
# undelete shard range *on the remote*
deleted_ranges = [sr for sr in remote_shard_ranges if sr.deleted]
self.assertEqual([shard_ranges[0]], deleted_ranges)
deleted_ranges[0].deleted = 0
deleted_ranges[0].timestamp = Timestamp.now()
remote_broker.merge_shard_ranges(deleted_ranges[0])
# sanity check
remote_broker_ranges = remote_broker.get_all_shard_range_data()
self.assertShardRangesEqual(remote_shard_ranges + [own_sr],
remote_broker_ranges)
self.assertShardRangesNotEqual(shard_ranges, remote_shard_ranges)
daemon = check_replicate(remote_broker_ranges, broker, remote_broker)
check_stats(daemon)
# reverse replication direction and expect syncs to propagate
daemon = check_replicate(remote_broker_ranges, remote_broker, broker)
check_stats(daemon)
def test_sync_shard_ranges_error(self):
# verify that replication is not considered successful if
# merge_shard_ranges fails
put_time = Timestamp.now().internal
broker = self._get_broker('a', 'c', node_index=0)
broker.initialize(put_time, POLICIES.default.idx)
remote_broker = self._get_broker('a', 'c', node_index=1)
remote_broker.initialize(put_time, POLICIES.default.idx)
# put an object into local broker
broker.put_object('obj', Timestamp.now().internal, 0, 'text/plain',
EMPTY_ETAG)
# get an own shard range into local broker
broker.enable_sharding(Timestamp.now())
self.assertFalse(broker.sharding_initiated())
replicate_hook = mock.MagicMock()
fake_repl_connection = attach_fake_replication_rpc(
self.rpc, errors={'merge_shard_ranges': [HTTPServerError()]},
replicate_hook=replicate_hook)
db_replicator.ReplConnection = fake_repl_connection
part, node = self._get_broker_part_node(remote_broker)
info = broker.get_replication_info()
daemon = replicator.ContainerReplicator({})
daemon.logger = FakeLogger()
success = daemon._repl_to_node(node, broker, part, info)
self.assertFalse(success)
# broker only has its own shard range so expect objects to be sync'd
self.assertEqual(
['sync', 'get_shard_ranges', 'merge_shard_ranges', 'merge_items',
'merge_syncs'],
[call[0][0] for call in replicate_hook.call_args_list])
error_lines = daemon.logger.get_lines_for_level('error')
self.assertIn('Bad response 500', error_lines[0])
self.assertFalse(error_lines[1:])
self.assertEqual(1, daemon.stats['diff'])
self.assertEqual(1, daemon.logger.get_increment_counts()['diffs'])
def test_sync_shard_ranges_none_to_sync(self):
# verify that merge_shard_ranges is not sent if there are no shard
# ranges to sync
put_time = Timestamp.now().internal
broker = self._get_broker('a', 'c', node_index=0)
broker.initialize(put_time, POLICIES.default.idx)
remote_broker = self._get_broker('a', 'c', node_index=1)
remote_broker.initialize(put_time, POLICIES.default.idx)
# put an object into local broker
broker.put_object('obj', Timestamp.now().internal, 0, 'text/plain',
EMPTY_ETAG)
replicate_hook = mock.MagicMock()
fake_repl_connection = attach_fake_replication_rpc(
self.rpc, replicate_hook=replicate_hook)
db_replicator.ReplConnection = fake_repl_connection
part, node = self._get_broker_part_node(remote_broker)
info = broker.get_replication_info()
daemon = replicator.ContainerReplicator({})
success = daemon._repl_to_node(node, broker, part, info)
self.assertTrue(success)
self.assertEqual(
['sync', 'get_shard_ranges', 'merge_items', 'merge_syncs'],
[call[0][0] for call in replicate_hook.call_args_list])
def test_sync_shard_ranges_with_rsync(self):
broker = self._get_broker('a', 'c', node_index=0)
put_timestamp = time.time()
broker.initialize(put_timestamp, POLICIES.default.idx)
bounds = (('', 'g'), ('g', 'r'), ('r', ''))
shard_ranges = [
ShardRange('.shards_a/sr-%s' % upper, Timestamp.now(), lower,
upper, i + 1, 10 * (i + 1))
for i, (lower, upper) in enumerate(bounds)
]
# add first shard range
own_sr = broker.enable_sharding(Timestamp.now())
broker.merge_shard_ranges(shard_ranges[:1])
# "replicate"
part, node = self._get_broker_part_node(broker)
daemon = self._run_once(node)
self.assertEqual(2, daemon.stats['rsync'])
# complete rsync to all other nodes
def check_replicate(expected_ranges):
for i in range(1, 3):
remote_broker = self._get_broker('a', 'c', node_index=i)
self.assertTrue(os.path.exists(remote_broker.db_file))
self.assertShardRangesEqual(
expected_ranges,
remote_broker.get_shard_ranges(include_deleted=True,
include_own=True)
)
remote_info = remote_broker.get_info()
local_info = self._get_broker(
'a', 'c', node_index=0).get_info()
for k, v in local_info.items():
if k == 'id':
continue
if k == 'hash':
self.assertEqual(remote_info[k], '0' * 32)
continue
if k == 'object_count':
self.assertEqual(remote_info[k], 0)
continue
self.assertEqual(remote_info[k], v,
"mismatch remote %s %r != %r" % (
k, remote_info[k], v))
check_replicate([shard_ranges[0], own_sr])
# delete and add some more shard ranges
shard_ranges[0].deleted = 1
shard_ranges[0].timestamp = Timestamp.now()
for shard_range in shard_ranges:
broker.merge_shard_ranges(shard_range)
daemon = self._run_once(node)
self.assertEqual(2, daemon.stats['deferred'])
check_replicate(shard_ranges + [own_sr])
def check_replicate(self, from_broker, remote_node_index, repl_conf=None,
expect_success=True, errors=None):
repl_conf = repl_conf or {}
repl_calls = []
rsync_calls = []
def repl_hook(op, *sync_args):
repl_calls.append((op, sync_args))
fake_repl_connection = attach_fake_replication_rpc(
self.rpc, replicate_hook=repl_hook, errors=errors)
db_replicator.ReplConnection = fake_repl_connection
daemon = replicator.ContainerReplicator(
repl_conf, logger=debug_logger())
self._install_fake_rsync_file(daemon, rsync_calls)
part, nodes = self._ring.get_nodes(from_broker.account,
from_broker.container)
def find_node(node_index):
for node in nodes:
if node['index'] == node_index:
return node
else:
self.fail('Failed to find node index %s' % remote_node_index)
remote_node = find_node(remote_node_index)
info = from_broker.get_replication_info()
success = daemon._repl_to_node(remote_node, from_broker, part, info)
self.assertEqual(expect_success, success)
return daemon, repl_calls, rsync_calls
def assert_synced_shard_ranges(self, expected, synced_items):
expected.sort(key=lambda sr: (sr.lower, sr.upper))
for item in synced_items:
item.pop('record_type', None)
self.assertEqual([dict(ex) for ex in expected], synced_items)
def assert_info_synced(self, local, remote_node_index, mismatches=None):
mismatches = mismatches or []
mismatches.append('id')
remote = self._get_broker(local.account, local.container,
node_index=remote_node_index)
local_info = local.get_info()
remote_info = remote.get_info()
errors = []
for k, v in local_info.items():
if remote_info.get(k) == v:
if k in mismatches:
errors.append(
"unexpected match remote %s %r == %r" % (
k, remote_info[k], v))
continue
else:
if k not in mismatches:
errors.append(
"unexpected mismatch remote %s %r != %r" % (
k, remote_info[k], v))
if errors:
self.fail('Found sync errors:\n' + '\n'.join(errors))
def assert_shard_ranges_synced(self, local_broker, remote_broker):
self.assertShardRangesEqual(
local_broker.get_shard_ranges(include_deleted=True,
include_own=True),
remote_broker.get_shard_ranges(include_deleted=True,
include_own=True)
)
def _setup_replication_test(self, node_index):
ts_iter = make_timestamp_iter()
policy_idx = POLICIES.default.idx
put_timestamp = Timestamp.now().internal
# create "local" broker
broker = self._get_broker('a', 'c', node_index=node_index)
broker.initialize(put_timestamp, policy_idx)
objs = [{'name': 'blah%03d' % i, 'created_at': next(ts_iter).internal,
'size': i, 'content_type': 'text/plain', 'etag': 'etag%s' % i,
'deleted': 0, 'storage_policy_index': policy_idx}
for i in range(20)]
bounds = (('', 'a'), ('a', 'b'), ('b', 'c'), ('c', ''))
shard_ranges = [
ShardRange(
'.sharded_a/sr-%s' % upper, Timestamp.now(), lower, upper)
for i, (lower, upper) in enumerate(bounds)
]
return {'broker': broker,
'objects': objs,
'shard_ranges': shard_ranges}
def _merge_object(self, broker, objects, index, **kwargs):
if not isinstance(index, slice):
index = slice(index, index + 1)
objs = [dict(obj) for obj in objects[index]]
broker.merge_items(objs)
def _merge_shard_range(self, broker, shard_ranges, index, **kwargs):
broker.merge_shard_ranges(shard_ranges[index:index + 1])
def _goto_sharding_state(self, broker, epoch):
broker.enable_sharding(epoch)
self.assertTrue(broker.set_sharding_state())
self.assertEqual(backend.SHARDING, broker.get_db_state())
def _goto_sharded_state(self, broker):
self.assertTrue(broker.set_sharded_state())
self.assertEqual(backend.SHARDED, broker.get_db_state())
def _assert_local_sharded_in_sync(self, local_broker, local_id):
daemon, repl_calls, rsync_calls = self.check_replicate(local_broker, 1)
self.assertEqual(['sync', 'get_shard_ranges', 'merge_shard_ranges'],
[call[0] for call in repl_calls])
self.assertEqual(1, daemon.stats['deferred'])
self.assertEqual(0, daemon.stats['rsync'])
self.assertEqual(0, daemon.stats['diff'])
self.assertFalse(rsync_calls)
# new db sync
self.assertEqual(local_id, repl_calls[0][1][2])
# ...but we still get a merge_shard_ranges for shard ranges
self.assert_synced_shard_ranges(
local_broker.get_shard_ranges(include_own=True),
repl_calls[2][1][0])
self.assertEqual(local_id, repl_calls[2][1][1])
def _check_only_shard_ranges_replicated(self, local_broker,
remote_node_index,
repl_conf,
expected_shard_ranges,
expect_success=True):
# expected_shard_ranges is expected final list of sync'd ranges
daemon, repl_calls, rsync_calls = self.check_replicate(
local_broker, remote_node_index, repl_conf,
expect_success=expect_success)
# we always expect only shard ranges to end in abort
self.assertEqual(1, daemon.stats['deferred'])
self.assertEqual(0, daemon.stats['diff'])
self.assertEqual(0, daemon.stats['rsync'])
self.assertEqual(['sync', 'get_shard_ranges', 'merge_shard_ranges'],
[call[0] for call in repl_calls])
self.assertFalse(rsync_calls)
# sync
local_id = local_broker.get_info()['id']
self.assertEqual(local_id, repl_calls[0][1][2])
# get_shard_ranges
self.assertEqual((), repl_calls[1][1])
# merge_shard_ranges for sending local shard ranges
self.assertShardRangesEqual(expected_shard_ranges, repl_calls[2][1][0])
self.assertEqual(local_id, repl_calls[2][1][1])
remote_broker = self._get_broker(
local_broker.account, local_broker.container, node_index=1)
self.assertNotEqual(local_id, remote_broker.get_info()['id'])
self.assert_shard_ranges_synced(remote_broker, local_broker)
def test_replication_local_unsharded_remote_missing(self):
context = self._setup_replication_test(0)
local_broker = context['broker']
local_id = local_broker.get_info()['id']
objs = context['objects']
self._merge_object(index=0, **context)
daemon, repl_calls, rsync_calls = self.check_replicate(local_broker, 1)
self.assert_info_synced(local_broker, 1)
self.assertEqual(1, daemon.stats['rsync'])
self.assertEqual(['sync', 'complete_rsync'],
[call[0] for call in repl_calls])
self.assertEqual(local_id, repl_calls[1][1][0])
self.assertEqual(os.path.basename(local_broker.db_file),
repl_calls[1][1][1])
self.assertEqual(local_broker.db_file, rsync_calls[0][0])
self.assertEqual(local_id, os.path.basename(rsync_calls[0][1]))
self.assertFalse(rsync_calls[1:])
remote_broker = self._get_broker('a', 'c', node_index=1)
self.assert_shard_ranges_synced(local_broker, remote_broker)
self.assertTrue(os.path.exists(remote_broker._db_file))
self.assertNotEqual(local_id, remote_broker.get_info()['id'])
self.assertEqual(objs[:1], remote_broker.get_objects())
def _check_replication_local_unsharded_remote_sharded(self, repl_conf):
context = self._setup_replication_test(0)
local_broker = context['broker']
local_id = local_broker.get_info()['id']
self._merge_object(index=slice(0, 6), **context)
remote_context = self._setup_replication_test(1)
self._merge_object(index=4, **remote_context)
remote_broker = remote_context['broker']
epoch = Timestamp.now()
self._goto_sharding_state(remote_broker, epoch=epoch)
remote_context['shard_ranges'][0].object_count = 101
remote_context['shard_ranges'][0].bytes_used = 1010
remote_context['shard_ranges'][0].state = ShardRange.ACTIVE
self._merge_shard_range(index=0, **remote_context)
self._merge_object(index=5, **remote_context)
self._goto_sharded_state(remote_broker)
self.assertEqual(backend.SHARDED, remote_broker.get_db_state())
self._check_only_shard_ranges_replicated(
local_broker, 1, repl_conf,
remote_broker.get_shard_ranges(include_own=True))
remote_broker = self._get_broker(
local_broker.account, local_broker.container, node_index=1)
self.assertEqual(backend.SHARDED, remote_broker.get_db_state())
self.assertFalse(os.path.exists(remote_broker._db_file))
self.assertNotEqual(local_id, remote_broker.get_info()['id'])
self.assertEqual(remote_context['objects'][5:6],
remote_broker.get_objects())
# Now that we have shard ranges, we're never considered in-sync :-/
self._check_only_shard_ranges_replicated(
local_broker, 1, repl_conf,
remote_broker.get_shard_ranges(include_own=True))
def test_replication_local_unsharded_remote_sharded(self):
self._check_replication_local_unsharded_remote_sharded({})
def test_replication_local_unsharded_remote_sharded_large_diff(self):
self._check_replication_local_unsharded_remote_sharded({'per_diff': 1})
def _check_replication_local_sharding_remote_missing(self, repl_conf):
local_context = self._setup_replication_test(0)
local_broker = local_context['broker']
self._merge_object(index=0, **local_context)
self._merge_object(index=1, **local_context)
epoch = Timestamp.now()
self._goto_sharding_state(local_broker, epoch)
self._merge_shard_range(index=0, **local_context)
self._merge_object(index=slice(2, 8), **local_context)
objs = local_context['objects']
daemon, repl_calls, rsync_calls = self.check_replicate(
local_broker, 1, repl_conf=repl_conf)
self.assertEqual(['sync', 'complete_rsync'],
[call[0] for call in repl_calls])
self.assertEqual(1, daemon.stats['rsync'])
self.assertEqual(0, daemon.stats['deferred'])
self.assertEqual(0, daemon.stats['diff'])
# fresh db is sync'd first...
fresh_id = local_broker.get_info()['id']
self.assertEqual(fresh_id, repl_calls[0][1][2])
self.assertEqual(fresh_id, repl_calls[1][1][0])
# retired db is not sync'd at all
old_broker = self.backend(
local_broker._db_file, account=local_broker.account,
container=local_broker.container, force_db_file=True)
old_id = old_broker.get_info()['id']
bad_calls = []
for call in repl_calls:
if old_id in call[1]:
bad_calls.append(
'old db id %r in %r call args %r' % (
old_id, call[0], call[1]))
if bad_calls:
self.fail('Found some bad calls:\n' + '\n'.join(bad_calls))
# complete_rsync
self.assertEqual(os.path.basename(local_broker.db_file),
repl_calls[1][1][1])
self.assertEqual(local_broker.db_file, rsync_calls[0][0])
self.assertEqual(fresh_id, os.path.basename(rsync_calls[0][1]))
self.assertFalse(rsync_calls[1:])
# TODO: make these stats better; in sharding state local broker pulls
# stats for 2 objects from old db, whereas remote thinks it's sharded
# and has an empty shard range table
self.assert_info_synced(local_broker, 1, mismatches=[
'object_count', 'bytes_used', 'db_state'])
remote_broker = self._get_broker('a', 'c', node_index=1)
remote_id = remote_broker.get_info()['id']
self.assertNotEqual(old_id, remote_id)
self.assertNotEqual(fresh_id, remote_id)
self.assertEqual(
[remote_broker.db_file], get_db_files(remote_broker.db_file))
self.assertEqual(os.path.basename(remote_broker.db_file),
os.path.basename(local_broker.db_file))
self.assertEqual(epoch, remote_broker.db_epoch)
# remote db has only the misplaced objects
self.assertEqual(objs[2:8], remote_broker.get_objects())
self.assert_shard_ranges_synced(local_broker, remote_broker)
# replicate again, check asserts abort
self._check_only_shard_ranges_replicated(
local_broker, 1, repl_conf,
local_broker.get_shard_ranges(include_own=True))
# sanity
remote_broker = self._get_broker('a', 'c', node_index=1)
self.assertEqual(
[remote_broker.db_file], get_db_files(remote_broker.db_file))
self.assertEqual(os.path.basename(remote_broker.db_file),
os.path.basename(local_broker.db_file))
self.assertEqual(objs[2:8], remote_broker.get_objects())
self.assertEqual(epoch, remote_broker.db_epoch)
def test_replication_local_sharding_remote_missing(self):
self._check_replication_local_sharding_remote_missing({})
def test_replication_local_sharding_remote_missing_large_diff(self):
# the local shard db has large diff with respect to the old db
self._check_replication_local_sharding_remote_missing({'per_diff': 1})
def _check_replication_local_sharding_remote_unsharded(self, repl_conf):
local_context = self._setup_replication_test(0)
self._merge_object(index=slice(0, 3), **local_context)
local_broker = local_context['broker']
epoch = Timestamp.now()
self._goto_sharding_state(local_broker, epoch)
self._merge_shard_range(index=0, **local_context)
self._merge_object(index=slice(3, 11), **local_context)
remote_context = self._setup_replication_test(1)
self._merge_object(index=11, **remote_context)
self._check_only_shard_ranges_replicated(
local_broker, 1, repl_conf,
local_broker.get_shard_ranges(include_own=True))
remote_broker = self._get_broker('a', 'c', node_index=1)
self.assertEqual(
[remote_broker._db_file], get_db_files(remote_broker.db_file))
self.assertEqual(remote_context['objects'][11:12],
remote_broker.get_objects())
self.assert_info_synced(
local_broker, 1,
mismatches=['db_state', 'object_count', 'bytes_used',
'status_changed_at', 'hash'])
self._check_only_shard_ranges_replicated(
local_broker, 1, repl_conf,
local_broker.get_shard_ranges(include_own=True))
def test_replication_local_sharding_remote_unsharded(self):
self._check_replication_local_sharding_remote_unsharded({})
def test_replication_local_sharding_remote_unsharded_large_diff(self):
self._check_replication_local_sharding_remote_unsharded(
{'per_diff': 1})
def _check_only_sync(self, local_broker, remote_node_index, repl_conf):
daemon, repl_calls, rsync_calls = self.check_replicate(
local_broker, remote_node_index, repl_conf,
expect_success=False)
# When talking to an old (pre-2.18.0) container server, abort
# replication when we're sharding or sharded. Wait for the
# rolling upgrade that's presumably in-progress to finish instead.
self.assertEqual(1, daemon.stats['deferred'])
self.assertEqual(0, daemon.stats['diff'])
self.assertEqual(0, daemon.stats['rsync'])
self.assertEqual(['sync'],
[call[0] for call in repl_calls])
self.assertFalse(rsync_calls)
lines = daemon.logger.get_lines_for_level('warning')
self.assertIn('unable to replicate shard ranges', lines[0])
self.assertIn('refusing to replicate objects', lines[1])
self.assertFalse(lines[2:])
# sync
local_id = local_broker.get_info()['id']
self.assertEqual(local_id, repl_calls[0][1][2])
remote_broker = self._get_broker(
local_broker.account, local_broker.container, node_index=1)
self.assertNotEqual(local_id, remote_broker.get_info()['id'])
self.assertEqual([], remote_broker.get_shard_ranges())
def _check_replication_local_sharding_remote_presharding(self, repl_conf):
local_context = self._setup_replication_test(0)
self._merge_object(index=slice(0, 3), **local_context)
local_broker = local_context['broker']
epoch = Timestamp.now()
self._goto_sharding_state(local_broker, epoch)
self._merge_shard_range(index=0, **local_context)
self._merge_object(index=slice(3, 11), **local_context)
remote_context = self._setup_replication_test(1)
self._merge_object(index=11, **remote_context)
orig_get_remote_info = \
replicator.ContainerReplicatorRpc._get_synced_replication_info
def presharding_get_remote_info(*args):
rinfo = orig_get_remote_info(*args)
del rinfo['shard_max_row']
return rinfo
with mock.patch('swift.container.replicator.'
'ContainerReplicatorRpc._get_synced_replication_info',
presharding_get_remote_info):
self._check_only_sync(local_broker, 1, repl_conf)
remote_broker = self._get_broker('a', 'c', node_index=1)
self.assertEqual(
[remote_broker._db_file], get_db_files(remote_broker.db_file))
self.assertEqual(remote_context['objects'][11:12],
remote_broker.get_objects())
self.assert_info_synced(
local_broker, 1,
mismatches=['db_state', 'object_count', 'bytes_used',
'status_changed_at', 'hash'])
self._check_only_sync(local_broker, 1, repl_conf)
def test_replication_local_sharding_remote_presharding(self):
self._check_replication_local_sharding_remote_presharding({})
def test_replication_local_sharding_remote_presharding_large_diff(self):
self._check_replication_local_sharding_remote_presharding(
{'per_diff': 1})
def _check_replication_local_sharding_remote_sharding(self, repl_conf):
local_context = self._setup_replication_test(0)
self._merge_object(index=slice(0, 5), **local_context)
local_broker = local_context['broker']
epoch = Timestamp.now()
self._goto_sharding_state(local_broker, epoch)
self._merge_shard_range(index=0, **local_context)
self._merge_object(index=slice(5, 10), **local_context)
remote_context = self._setup_replication_test(1)
self._merge_object(index=12, **remote_context)
# take snapshot of info now before transition to sharding...
orig_remote_info = remote_context['broker'].get_info()
remote_broker = remote_context['broker']
self._goto_sharding_state(remote_broker, epoch)
self._merge_shard_range(index=0, **remote_context)
self._merge_object(index=13, **remote_context)
self._check_only_shard_ranges_replicated(
local_broker, 1, repl_conf,
remote_broker.get_shard_ranges(include_own=True))
# in sharding state brokers only reports object stats from old db, and
# they are different
self.assert_info_synced(
local_broker, 1, mismatches=['object_count', 'bytes_used',
'status_changed_at', 'hash'])
remote_broker = self._get_broker('a', 'c', node_index=1)
shard_db = make_db_file_path(remote_broker._db_file, epoch)
self.assertEqual([remote_broker._db_file, shard_db],
get_db_files(remote_broker.db_file))
shard_db = make_db_file_path(remote_broker._db_file, epoch)
self.assertEqual([remote_broker._db_file, shard_db],
get_db_files(remote_broker.db_file))
# no local objects have been sync'd to remote shard db
self.assertEqual(remote_context['objects'][13:14],
remote_broker.get_objects())
# remote *old db* is unchanged
remote_old_broker = self.backend(
remote_broker._db_file, account=remote_broker.account,
container=remote_broker.container, force_db_file=True)
self.assertEqual(remote_context['objects'][12:13],
remote_old_broker.get_objects())
self.assertFalse(remote_old_broker.get_shard_ranges())
remote_old_info = remote_old_broker.get_info()
orig_remote_info.pop('db_state')
remote_old_info.pop('db_state')
self.assertEqual(orig_remote_info, remote_old_info)
self._check_only_shard_ranges_replicated(
local_broker, 1, repl_conf,
local_broker.get_shard_ranges(include_own=True))
def test_replication_local_sharding_remote_sharding(self):
self._check_replication_local_sharding_remote_sharding({})
def test_replication_local_sharding_remote_sharding_large_diff(self):
self._check_replication_local_sharding_remote_sharding({'per_diff': 1})
def test_replication_local_sharded_remote_missing(self):
local_context = self._setup_replication_test(0)
local_broker = local_context['broker']
epoch = Timestamp.now()
self._goto_sharding_state(local_broker, epoch)
local_context['shard_ranges'][0].object_count = 99
local_context['shard_ranges'][0].state = ShardRange.ACTIVE
self._merge_shard_range(index=0, **local_context)
self._merge_object(index=slice(0, 3), **local_context)
self._goto_sharded_state(local_broker)
objs = local_context['objects']
daemon, repl_calls, rsync_calls = self.check_replicate(local_broker, 1)
self.assertEqual(['sync', 'complete_rsync'],
[call[0] for call in repl_calls])
self.assertEqual(1, daemon.stats['rsync'])
# sync
local_id = local_broker.get_info()['id']
self.assertEqual(local_id, repl_calls[0][1][2])
# complete_rsync
self.assertEqual(local_id, repl_calls[1][1][0])
self.assertEqual(
os.path.basename(local_broker.db_file), repl_calls[1][1][1])
self.assertEqual(local_broker.db_file, rsync_calls[0][0])
self.assertEqual(local_id, os.path.basename(rsync_calls[0][1]))
self.assertFalse(rsync_calls[1:])
self.assert_info_synced(local_broker, 1)
remote_broker = self._get_broker('a', 'c', node_index=1)
remote_id = remote_broker.get_info()['id']
self.assertNotEqual(local_id, remote_id)
shard_db = make_db_file_path(remote_broker._db_file, epoch)
self.assertEqual([shard_db],
get_db_files(remote_broker.db_file))
self.assertEqual(objs[:3], remote_broker.get_objects())
self.assertEqual(local_broker.get_shard_ranges(),
remote_broker.get_shard_ranges())
# sanity check - in sync
self._assert_local_sharded_in_sync(local_broker, local_id)
remote_broker = self._get_broker('a', 'c', node_index=1)
shard_db = make_db_file_path(remote_broker._db_file, epoch)
self.assertEqual([shard_db],
get_db_files(remote_broker.db_file))
# the remote broker object_count comes from replicated shard range...
self.assertEqual(99, remote_broker.get_info()['object_count'])
# these are replicated misplaced objects...
self.assertEqual(objs[:3], remote_broker.get_objects())
self.assertEqual(local_broker.get_shard_ranges(),
remote_broker.get_shard_ranges())
def _check_replication_local_sharded_remote_unsharded(self, repl_conf):
local_context = self._setup_replication_test(0)
local_broker = local_context['broker']
epoch = Timestamp.now()
self._goto_sharding_state(local_broker, epoch)
local_context['shard_ranges'][0].object_count = 99
local_context['shard_ranges'][0].state = ShardRange.ACTIVE
self._merge_shard_range(index=0, **local_context)
self._merge_object(index=slice(0, 3), **local_context)
self._goto_sharded_state(local_broker)
remote_context = self._setup_replication_test(1)
self._merge_object(index=4, **remote_context)
self._check_only_shard_ranges_replicated(
local_broker, 1, repl_conf,
local_broker.get_shard_ranges(include_own=True),
expect_success=True)
# sharded broker takes object count from shard range whereas remote
# unsharded broker takes it from object table
self.assert_info_synced(
local_broker, 1,
mismatches=['db_state', 'object_count', 'bytes_used',
'status_changed_at', 'hash'])
remote_broker = self._get_broker('a', 'c', node_index=1)
self.assertEqual([remote_broker._db_file],
get_db_files(remote_broker.db_file))
self.assertEqual(remote_context['objects'][4:5],
remote_broker.get_objects())
self._check_only_shard_ranges_replicated(
local_broker, 1, repl_conf,
local_broker.get_shard_ranges(include_own=True),
expect_success=True)
remote_broker = self._get_broker('a', 'c', node_index=1)
self.assertEqual([remote_broker._db_file],
get_db_files(remote_broker.db_file))
self.assertEqual(remote_context['objects'][4:5],
remote_broker.get_objects())
def test_replication_local_sharded_remote_unsharded(self):
self._check_replication_local_sharded_remote_unsharded({})
def test_replication_local_sharded_remote_unsharded_large_diff(self):
self._check_replication_local_sharded_remote_unsharded({'per_diff': 1})
def _check_replication_local_sharded_remote_sharding(self, repl_conf):
local_context = self._setup_replication_test(0)
local_broker = local_context['broker']
epoch = Timestamp.now()
self._goto_sharding_state(local_broker, epoch=epoch)
local_context['shard_ranges'][0].object_count = 99
local_context['shard_ranges'][0].bytes_used = 999
local_context['shard_ranges'][0].state = ShardRange.ACTIVE
self._merge_shard_range(index=0, **local_context)
self._merge_object(index=slice(0, 5), **local_context)
self._goto_sharded_state(local_broker)
remote_context = self._setup_replication_test(1)
self._merge_object(index=6, **remote_context)
remote_broker = remote_context['broker']
remote_info_orig = remote_broker.get_info()
self._goto_sharding_state(remote_broker, epoch=epoch)
self._merge_shard_range(index=0, **remote_context)
self._merge_object(index=7, **remote_context)
self._check_only_shard_ranges_replicated(
local_broker, 1, repl_conf,
# remote has newer timestamp for shard range
remote_broker.get_shard_ranges(include_own=True),
expect_success=True)
# sharded broker takes object count from shard range whereas remote
# sharding broker takes it from object table
self.assert_info_synced(
local_broker, 1,
mismatches=['db_state', 'object_count', 'bytes_used',
'status_changed_at', 'hash'])
remote_broker = self._get_broker('a', 'c', node_index=1)
shard_db = make_db_file_path(remote_broker._db_file, epoch)
self.assertEqual([remote_broker._db_file, shard_db],
get_db_files(remote_broker.db_file))
# remote fresh db objects are unchanged
self.assertEqual(remote_context['objects'][7:8],
remote_broker.get_objects())
# remote old hash.db objects are unchanged
remote_old_broker = self.backend(
remote_broker._db_file, account=remote_broker.account,
container=remote_broker.container, force_db_file=True)
self.assertEqual(
remote_context['objects'][6:7],
remote_old_broker.get_objects())
remote_info = remote_old_broker.get_info()
remote_info_orig.pop('db_state')
remote_info.pop('db_state')
self.assertEqual(remote_info_orig, remote_info)
self.assertEqual(local_broker.get_shard_ranges(),
remote_broker.get_shard_ranges())
self._check_only_shard_ranges_replicated(
local_broker, 1, repl_conf,
remote_broker.get_shard_ranges(include_own=True),
expect_success=True)
def test_replication_local_sharded_remote_sharding(self):
self._check_replication_local_sharded_remote_sharding({})
def test_replication_local_sharded_remote_sharding_large_diff(self):
self._check_replication_local_sharded_remote_sharding({'per_diff': 1})
def _check_replication_local_sharded_remote_sharded(self, repl_conf):
local_context = self._setup_replication_test(0)
local_broker = local_context['broker']
epoch = Timestamp.now()
self._goto_sharding_state(local_broker, epoch)
local_context['shard_ranges'][0].object_count = 99
local_context['shard_ranges'][0].bytes_used = 999
local_context['shard_ranges'][0].state = ShardRange.ACTIVE
self._merge_shard_range(index=0, **local_context)
self._merge_object(index=slice(0, 6), **local_context)
self._goto_sharded_state(local_broker)
remote_context = self._setup_replication_test(1)
self._merge_object(index=6, **remote_context)
remote_broker = remote_context['broker']
self._goto_sharding_state(remote_broker, epoch)
remote_context['shard_ranges'][0].object_count = 101
remote_context['shard_ranges'][0].bytes_used = 1010
remote_context['shard_ranges'][0].state = ShardRange.ACTIVE
self._merge_shard_range(index=0, **remote_context)
self._merge_object(index=7, **remote_context)
self._goto_sharded_state(remote_broker)
self._check_only_shard_ranges_replicated(
local_broker, 1, repl_conf,
# remote has newer timestamp for shard range
remote_broker.get_shard_ranges(include_own=True),
expect_success=True)
self.assert_info_synced(
local_broker, 1,
mismatches=['status_changed_at', 'hash'])
remote_broker = self._get_broker('a', 'c', node_index=1)
shard_db = make_db_file_path(remote_broker._db_file, epoch)
self.assertEqual([shard_db],
get_db_files(remote_broker.db_file))
self.assertEqual(remote_context['objects'][7:8],
remote_broker.get_objects())
# remote shard range was newer than local so object count is not
# updated by sync'd shard range
self.assertEqual(
101, remote_broker.get_shard_ranges()[0].object_count)
self._check_only_shard_ranges_replicated(
local_broker, 1, repl_conf,
# remote has newer timestamp for shard range
remote_broker.get_shard_ranges(include_own=True),
expect_success=True)
def test_replication_local_sharded_remote_sharded(self):
self._check_replication_local_sharded_remote_sharded({})
def test_replication_local_sharded_remote_sharded_large_diff(self):
self._check_replication_local_sharded_remote_sharded({'per_diff': 1})
def test_replication_rsync_then_merge_aborts_before_merge_sharding(self):
# verify that rsync_then_merge aborts if remote starts sharding during
# the rsync
local_context = self._setup_replication_test(0)
local_broker = local_context['broker']
self._merge_object(index=slice(0, 3), **local_context)
remote_context = self._setup_replication_test(1)
remote_broker = remote_context['broker']
remote_broker.logger = debug_logger()
self._merge_object(index=5, **remote_context)
orig_func = replicator.ContainerReplicatorRpc.rsync_then_merge
def mock_rsync_then_merge(*args):
remote_broker.merge_shard_ranges(
ShardRange('.shards_a/cc', Timestamp.now()))
self._goto_sharding_state(remote_broker, Timestamp.now())
return orig_func(*args)
with mock.patch(
'swift.container.replicator.ContainerReplicatorRpc.'
'rsync_then_merge',
mock_rsync_then_merge):
with mock.patch(
'swift.container.backend.ContainerBroker.'
'get_items_since') as mock_get_items_since:
daemon, repl_calls, rsync_calls = self.check_replicate(
local_broker, 1, expect_success=False,
repl_conf={'per_diff': 1})
mock_get_items_since.assert_not_called()
self.assertEqual(['sync', 'get_shard_ranges', 'rsync_then_merge'],
[call[0] for call in repl_calls])
self.assertEqual(local_broker.db_file, rsync_calls[0][0])
self.assertEqual(local_broker.get_info()['id'],
os.path.basename(rsync_calls[0][1]))
self.assertFalse(rsync_calls[1:])
def test_replication_rsync_then_merge_aborts_before_merge_sharded(self):
# verify that rsync_then_merge aborts if remote completes sharding
# during the rsync
local_context = self._setup_replication_test(0)
local_broker = local_context['broker']
self._merge_object(index=slice(0, 3), **local_context)
remote_context = self._setup_replication_test(1)
remote_broker = remote_context['broker']
remote_broker.logger = debug_logger()
self._merge_object(index=5, **remote_context)
orig_func = replicator.ContainerReplicatorRpc.rsync_then_merge
def mock_rsync_then_merge(*args):
remote_broker.merge_shard_ranges(
ShardRange('.shards_a/cc', Timestamp.now()))
self._goto_sharding_state(remote_broker, Timestamp.now())
self._goto_sharded_state(remote_broker)
return orig_func(*args)
with mock.patch(
'swift.container.replicator.ContainerReplicatorRpc.'
'rsync_then_merge',
mock_rsync_then_merge):
with mock.patch(
'swift.container.backend.ContainerBroker.'
'get_items_since') as mock_get_items_since:
daemon, repl_calls, rsync_calls = self.check_replicate(
local_broker, 1, expect_success=False,
repl_conf={'per_diff': 1})
mock_get_items_since.assert_not_called()
self.assertEqual(['sync', 'get_shard_ranges', 'rsync_then_merge'],
[call[0] for call in repl_calls])
self.assertEqual(local_broker.db_file, rsync_calls[0][0])
self.assertEqual(local_broker.get_info()['id'],
os.path.basename(rsync_calls[0][1]))
self.assertFalse(rsync_calls[1:])
def test_replication_rsync_then_merge_aborts_after_merge_sharding(self):
# verify that rsync_then_merge aborts if remote starts sharding during
# the merge
local_context = self._setup_replication_test(0)
local_broker = local_context['broker']
self._merge_object(index=slice(0, 3), **local_context)
remote_context = self._setup_replication_test(1)
remote_broker = remote_context['broker']
remote_broker.logger = debug_logger()
self._merge_object(index=5, **remote_context)
orig_get_items_since = backend.ContainerBroker.get_items_since
calls = []
def fake_get_items_since(broker, *args):
# remote starts sharding while rpc call is merging
if not calls:
remote_broker.merge_shard_ranges(
ShardRange('.shards_a/cc', Timestamp.now()))
self._goto_sharding_state(remote_broker, Timestamp.now())
calls.append(args)
return orig_get_items_since(broker, *args)
with mock.patch(
'swift.container.backend.ContainerBroker.get_items_since',
fake_get_items_since):
daemon, repl_calls, rsync_calls = self.check_replicate(
local_broker, 1, expect_success=False,
repl_conf={'per_diff': 1})
self.assertEqual(['sync', 'get_shard_ranges', 'rsync_then_merge'],
[call[0] for call in repl_calls])
self.assertEqual(local_broker.db_file, rsync_calls[0][0])
self.assertEqual(local_broker.get_info()['id'],
os.path.basename(rsync_calls[0][1]))
self.assertFalse(rsync_calls[1:])
def test_replication_rsync_then_merge_aborts_after_merge_sharded(self):
# verify that rsync_then_merge aborts if remote completes sharding
# during the merge
local_context = self._setup_replication_test(0)
local_broker = local_context['broker']
self._merge_object(index=slice(0, 3), **local_context)
remote_context = self._setup_replication_test(1)
remote_broker = remote_context['broker']
remote_broker.logger = debug_logger()
self._merge_object(index=5, **remote_context)
orig_get_items_since = backend.ContainerBroker.get_items_since
calls = []
def fake_get_items_since(broker, *args):
# remote starts sharding while rpc call is merging
result = orig_get_items_since(broker, *args)
if calls:
remote_broker.merge_shard_ranges(
ShardRange('.shards_a/cc', Timestamp.now()))
self._goto_sharding_state(remote_broker, Timestamp.now())
self._goto_sharded_state(remote_broker)
calls.append(args)
return result
with mock.patch(
'swift.container.backend.ContainerBroker.get_items_since',
fake_get_items_since):
daemon, repl_calls, rsync_calls = self.check_replicate(
local_broker, 1, expect_success=False,
repl_conf={'per_diff': 1})
self.assertEqual(['sync', 'get_shard_ranges', 'rsync_then_merge'],
[call[0] for call in repl_calls])
self.assertEqual(local_broker.db_file, rsync_calls[0][0])
self.assertEqual(local_broker.get_info()['id'],
os.path.basename(rsync_calls[0][1]))
self.assertFalse(rsync_calls[1:])
if __name__ == '__main__':
unittest.main()