00bfc425ce
Currently we simply mock calls in the FakeLogger for calls statsd calls, and there are also some helper methods for counting and collating metrics that were called. This Fakelogger is overloaded and doesn't simulate the real world. In real life we use a Statsdclient that is attached to the logger. We've been in the situation where unit tests pass but the statsd client stacktraces because we don't actually fake the statsdclient based off the real one and let it's use its internal logic. This patch creates a new FakeStatsdClient that is based off the real one, this can then be used (like the real statsd client) and attached to the FakeLogger. There is quite a bit of churn in tests to make this work, because we now have to looking into the fake statsd client to check the faked calls made. The FakeStatsdClient does everything the real one does, except overrides the _send method and socket creation so no actual statsd metrics are emitted. Change-Id: I9cdf395e85ab559c2b67b0617f898ad2d6a870d4
3013 lines
132 KiB
Python
3013 lines
132 KiB
Python
# Copyright (c) 2010-2012 OpenStack Foundation
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
|
# implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
import collections
|
|
import errno
|
|
import io
|
|
import json
|
|
import unittest
|
|
import os
|
|
import mock
|
|
from gzip import GzipFile
|
|
from shutil import rmtree
|
|
import six
|
|
import six.moves.cPickle as pickle
|
|
import time
|
|
import tempfile
|
|
from contextlib import contextmanager, closing
|
|
from collections import defaultdict
|
|
from errno import ENOENT, ENOTEMPTY, ENOTDIR
|
|
|
|
from eventlet.green import subprocess
|
|
from eventlet import Timeout, sleep
|
|
|
|
from test.debug_logger import debug_logger
|
|
from test.unit import (patch_policies, make_timestamp_iter, mocked_http_conn,
|
|
mock_check_drive, skip_if_no_xattrs)
|
|
from swift.common import utils
|
|
from swift.common.utils import (hash_path, mkdirs, normalize_timestamp,
|
|
storage_directory)
|
|
from swift.common import ring
|
|
from swift.common.recon import RECON_OBJECT_FILE
|
|
from swift.obj import diskfile, replicator as object_replicator
|
|
from swift.common.storage_policy import StoragePolicy, POLICIES
|
|
from swift.common.exceptions import PartitionLockTimeout
|
|
|
|
|
|
def _ips(*args, **kwargs):
|
|
return ['127.0.0.0']
|
|
|
|
|
|
def mock_http_connect(status):
|
|
|
|
class FakeConn(object):
|
|
|
|
def __init__(self, status, *args, **kwargs):
|
|
self.status = status
|
|
self.reason = 'Fake'
|
|
self.host = args[0]
|
|
self.port = args[1]
|
|
self.method = args[4]
|
|
self.path = args[5]
|
|
self.with_exc = False
|
|
self.headers = kwargs.get('headers', {})
|
|
|
|
def getresponse(self):
|
|
if self.with_exc:
|
|
raise Exception('test')
|
|
return self
|
|
|
|
def getheader(self, header):
|
|
return self.headers[header]
|
|
|
|
def read(self, amt=None):
|
|
return pickle.dumps({})
|
|
|
|
def close(self):
|
|
return
|
|
return lambda *args, **kwargs: FakeConn(status, *args, **kwargs)
|
|
|
|
|
|
process_errors = []
|
|
|
|
|
|
class MockProcess(object):
|
|
ret_code = None
|
|
ret_log = None
|
|
check_args = None
|
|
captured_log = None
|
|
|
|
class Stream(object):
|
|
|
|
def read(self):
|
|
return next(MockProcess.ret_log)
|
|
|
|
def __init__(self, *args, **kwargs):
|
|
targs = next(MockProcess.check_args)
|
|
for targ in targs:
|
|
# Allow more than 2 candidate targs
|
|
# (e.g. a case that either node is fine when nodes shuffled)
|
|
if isinstance(targ, tuple):
|
|
allowed = False
|
|
for target in targ:
|
|
if target in args[0]:
|
|
allowed = True
|
|
if not allowed:
|
|
process_errors.append("Invalid: %s not in %s" % (targ,
|
|
args))
|
|
else:
|
|
if targ not in args[0]:
|
|
process_errors.append("Invalid: %s not in %s" % (targ,
|
|
args))
|
|
self.captured_info = {
|
|
'rsync_args': args[0],
|
|
}
|
|
self.stdout = self.Stream()
|
|
|
|
def wait(self):
|
|
# the _mock_process context manager assures this class attribute is a
|
|
# mutable list and takes care of resetting it
|
|
rv = next(self.ret_code)
|
|
if self.captured_log is not None:
|
|
self.captured_info['ret_code'] = rv
|
|
self.captured_log.append(self.captured_info)
|
|
return rv
|
|
|
|
|
|
@contextmanager
|
|
def _mock_process(ret):
|
|
captured_log = []
|
|
MockProcess.captured_log = captured_log
|
|
orig_process = subprocess.Popen
|
|
MockProcess.ret_code = (i[0] for i in ret)
|
|
MockProcess.ret_log = (i[1] if six.PY2 else i[1].encode('utf8')
|
|
for i in ret)
|
|
MockProcess.check_args = (i[2] for i in ret)
|
|
object_replicator.subprocess.Popen = MockProcess
|
|
yield captured_log
|
|
MockProcess.captured_log = None
|
|
object_replicator.subprocess.Popen = orig_process
|
|
|
|
|
|
class MockHungProcess(object):
|
|
def __init__(self, polls_needed=0, *args, **kwargs):
|
|
class MockStdout(object):
|
|
def read(self):
|
|
pass
|
|
self.stdout = MockStdout()
|
|
self._state = 'running'
|
|
self._calls = []
|
|
self._polls = 0
|
|
self._polls_needed = polls_needed
|
|
|
|
def wait(self, timeout=None):
|
|
self._calls.append(('wait', self._state))
|
|
if self._state == 'running':
|
|
# Sleep so we trip the rsync timeout
|
|
sleep(1)
|
|
raise BaseException('You need to mock out some timeouts')
|
|
if not self._polls_needed:
|
|
self._state = 'os-reaped'
|
|
return 137
|
|
if timeout is not None:
|
|
raise subprocess.TimeoutExpired('some cmd', timeout)
|
|
raise BaseException("You're waiting indefinitely on something "
|
|
"we've established is hung")
|
|
|
|
def poll(self):
|
|
self._calls.append(('poll', self._state))
|
|
self._polls += 1
|
|
if self._polls >= self._polls_needed:
|
|
self._state = 'os-reaped'
|
|
return 137
|
|
else:
|
|
return None
|
|
|
|
def terminate(self):
|
|
self._calls.append(('terminate', self._state))
|
|
if self._state == 'running':
|
|
self._state = 'terminating'
|
|
|
|
def kill(self):
|
|
self._calls.append(('kill', self._state))
|
|
self._state = 'killed'
|
|
|
|
|
|
def _create_test_rings(path, devs=None, next_part_power=None):
|
|
testgz = os.path.join(path, 'object.ring.gz')
|
|
intended_replica2part2dev_id = [
|
|
[0, 1, 2, 3, 4, 5, 6],
|
|
[1, 2, 3, 0, 5, 6, 4],
|
|
[2, 3, 0, 1, 6, 4, 5],
|
|
]
|
|
intended_devs = devs or [
|
|
{'id': 0, 'device': 'sda', 'zone': 0,
|
|
'region': 1, 'ip': '127.0.0.0', 'port': 6200},
|
|
{'id': 1, 'device': 'sda', 'zone': 1,
|
|
'region': 2, 'ip': '127.0.0.1', 'port': 6200},
|
|
{'id': 2, 'device': 'sda', 'zone': 2,
|
|
'region': 3, 'ip': '127.0.0.2', 'port': 6200},
|
|
{'id': 3, 'device': 'sda', 'zone': 4,
|
|
'region': 2, 'ip': '127.0.0.3', 'port': 6200},
|
|
{'id': 4, 'device': 'sda', 'zone': 5,
|
|
'region': 1, 'ip': '127.0.0.4', 'port': 6200,
|
|
'replication_ip': '127.0.1.4'},
|
|
{'id': 5, 'device': 'sda', 'zone': 6,
|
|
'region': 3, 'ip': 'fe80::202:b3ff:fe1e:8329', 'port': 6200},
|
|
{'id': 6, 'device': 'sda', 'zone': 7, 'region': 1,
|
|
'ip': '2001:0db8:85a3:0000:0000:8a2e:0370:7334', 'port': 6200},
|
|
]
|
|
intended_part_shift = 30
|
|
with closing(GzipFile(testgz, 'wb')) as f:
|
|
pickle.dump(
|
|
ring.RingData(intended_replica2part2dev_id,
|
|
intended_devs, intended_part_shift, next_part_power),
|
|
f)
|
|
|
|
testgz = os.path.join(path, 'object-1.ring.gz')
|
|
with closing(GzipFile(testgz, 'wb')) as f:
|
|
pickle.dump(
|
|
ring.RingData(intended_replica2part2dev_id,
|
|
intended_devs, intended_part_shift, next_part_power),
|
|
f)
|
|
for policy in POLICIES:
|
|
policy.object_ring = None # force reload
|
|
return
|
|
|
|
|
|
@patch_policies([StoragePolicy(0, 'zero', False),
|
|
StoragePolicy(1, 'one', True)])
|
|
class TestObjectReplicator(unittest.TestCase):
|
|
|
|
def setUp(self):
|
|
skip_if_no_xattrs()
|
|
utils.HASH_PATH_SUFFIX = b'endcap'
|
|
utils.HASH_PATH_PREFIX = b''
|
|
# recon cache path
|
|
self.recon_cache = tempfile.mkdtemp()
|
|
rmtree(self.recon_cache, ignore_errors=1)
|
|
os.mkdir(self.recon_cache)
|
|
# Setup a test ring (stolen from common/test_ring.py)
|
|
self.testdir = tempfile.mkdtemp()
|
|
self.devices = os.path.join(self.testdir, 'node')
|
|
rmtree(self.testdir, ignore_errors=1)
|
|
os.mkdir(self.testdir)
|
|
os.mkdir(self.devices)
|
|
|
|
self.objects, self.objects_1, self.parts, self.parts_1 = \
|
|
self._write_disk_data('sda')
|
|
_create_test_rings(self.testdir)
|
|
self.logger = debug_logger('test-replicator')
|
|
self.conf = dict(
|
|
bind_ip=_ips()[0], bind_port=6200,
|
|
swift_dir=self.testdir, devices=self.devices, mount_check='false',
|
|
timeout='300', stats_interval='1', sync_method='rsync',
|
|
recon_cache_path=self.recon_cache)
|
|
self._create_replicator()
|
|
self.ts = make_timestamp_iter()
|
|
|
|
def tearDown(self):
|
|
self.assertFalse(process_errors)
|
|
rmtree(self.testdir, ignore_errors=1)
|
|
rmtree(self.recon_cache, ignore_errors=1)
|
|
|
|
def test_ring_ip_and_bind_ip(self):
|
|
# make clean base_conf
|
|
base_conf = dict(self.conf)
|
|
for key in ('bind_ip', 'ring_ip'):
|
|
base_conf.pop(key, None)
|
|
|
|
# default ring_ip is always 0.0.0.0
|
|
self.conf = base_conf
|
|
self._create_replicator()
|
|
self.assertEqual('0.0.0.0', self.replicator.ring_ip)
|
|
|
|
# bind_ip works fine for legacy configs
|
|
self.conf = dict(base_conf)
|
|
self.conf['bind_ip'] = '192.168.1.42'
|
|
self._create_replicator()
|
|
self.assertEqual('192.168.1.42', self.replicator.ring_ip)
|
|
|
|
# ring_ip works fine by-itself
|
|
self.conf = dict(base_conf)
|
|
self.conf['ring_ip'] = '192.168.1.43'
|
|
self._create_replicator()
|
|
self.assertEqual('192.168.1.43', self.replicator.ring_ip)
|
|
|
|
# if you have both ring_ip wins
|
|
self.conf = dict(base_conf)
|
|
self.conf['bind_ip'] = '192.168.1.44'
|
|
self.conf['ring_ip'] = '192.168.1.45'
|
|
self._create_replicator()
|
|
self.assertEqual('192.168.1.45', self.replicator.ring_ip)
|
|
|
|
def test_handoff_replication_setting_warnings(self):
|
|
conf_tests = [
|
|
# (config, expected_warning)
|
|
({}, False),
|
|
({'handoff_delete': 'auto'}, False),
|
|
({'handoffs_first': 'no'}, False),
|
|
({'handoff_delete': '2'}, True),
|
|
({'handoffs_first': 'yes'}, True),
|
|
({'handoff_delete': '1', 'handoffs_first': 'yes'}, True),
|
|
]
|
|
log_message = 'Handoff only mode is not intended for normal ' \
|
|
'operation, please disable handoffs_first and ' \
|
|
'handoff_delete before the next normal rebalance'
|
|
for config, expected_warning in conf_tests:
|
|
self.logger.clear()
|
|
object_replicator.ObjectReplicator(config, logger=self.logger)
|
|
warning_log_lines = self.logger.get_lines_for_level('warning')
|
|
if expected_warning:
|
|
expected_log_lines = [log_message]
|
|
else:
|
|
expected_log_lines = []
|
|
self.assertEqual(expected_log_lines, warning_log_lines,
|
|
'expected %s != %s for config %r' % (
|
|
expected_log_lines,
|
|
warning_log_lines,
|
|
config,
|
|
))
|
|
|
|
def _write_disk_data(self, disk_name, with_json=False):
|
|
os.mkdir(os.path.join(self.devices, disk_name))
|
|
objects = os.path.join(self.devices, disk_name,
|
|
diskfile.get_data_dir(POLICIES[0]))
|
|
objects_1 = os.path.join(self.devices, disk_name,
|
|
diskfile.get_data_dir(POLICIES[1]))
|
|
os.mkdir(objects)
|
|
os.mkdir(objects_1)
|
|
parts = {}
|
|
parts_1 = {}
|
|
for part in ['0', '1', '2', '3']:
|
|
parts[part] = os.path.join(objects, part)
|
|
os.mkdir(parts[part])
|
|
parts_1[part] = os.path.join(objects_1, part)
|
|
os.mkdir(parts_1[part])
|
|
|
|
if with_json:
|
|
for json_file in ['auditor_status_ZBF.json',
|
|
'auditor_status_ALL.json']:
|
|
for obj_dir in [objects, objects_1]:
|
|
with open(os.path.join(obj_dir, json_file), 'w'):
|
|
pass
|
|
|
|
return objects, objects_1, parts, parts_1
|
|
|
|
def _create_replicator(self):
|
|
self.replicator = object_replicator.ObjectReplicator(self.conf)
|
|
self.replicator.logger = self.logger
|
|
self.replicator._zero_stats()
|
|
self.replicator.all_devs_info = set()
|
|
self.df_mgr = diskfile.DiskFileManager(self.conf, self.logger)
|
|
|
|
def test_run_once_no_local_device_in_ring(self):
|
|
conf = dict(swift_dir=self.testdir, devices=self.devices,
|
|
bind_ip='1.1.1.1', recon_cache_path=self.recon_cache,
|
|
mount_check='false', timeout='300', stats_interval='1')
|
|
replicator = object_replicator.ObjectReplicator(conf,
|
|
logger=self.logger)
|
|
replicator.run_once()
|
|
expected = [
|
|
"Can't find itself in policy with index 0 with ips 1.1.1.1 and"
|
|
" with port 6200 in ring file, not replicating",
|
|
"Can't find itself in policy with index 1 with ips 1.1.1.1 and"
|
|
" with port 6200 in ring file, not replicating",
|
|
]
|
|
self.assertEqual(expected, self.logger.get_lines_for_level('error'))
|
|
|
|
def test_run_once(self):
|
|
conf = dict(swift_dir=self.testdir, devices=self.devices,
|
|
bind_ip=_ips()[0], recon_cache_path=self.recon_cache,
|
|
mount_check='false', timeout='300', stats_interval='1')
|
|
replicator = object_replicator.ObjectReplicator(conf,
|
|
logger=self.logger)
|
|
was_connector = object_replicator.http_connect
|
|
object_replicator.http_connect = mock_http_connect(200)
|
|
cur_part = '0'
|
|
df = self.df_mgr.get_diskfile('sda', cur_part, 'a', 'c', 'o',
|
|
policy=POLICIES[0])
|
|
mkdirs(df._datadir)
|
|
f = open(os.path.join(df._datadir,
|
|
normalize_timestamp(time.time()) + '.data'),
|
|
'wb')
|
|
f.write(b'1234567890')
|
|
f.close()
|
|
ohash = hash_path('a', 'c', 'o')
|
|
data_dir = ohash[-3:]
|
|
whole_path_from = os.path.join(self.objects, cur_part, data_dir)
|
|
process_arg_checker = []
|
|
ring = replicator.load_object_ring(POLICIES[0])
|
|
nodes = [node for node in
|
|
ring.get_part_nodes(int(cur_part))
|
|
if node['ip'] not in _ips()]
|
|
rsync_mods = tuple(['%s::object/sda/objects/%s' %
|
|
(node['ip'], cur_part) for node in nodes])
|
|
for node in nodes:
|
|
process_arg_checker.append(
|
|
(0, '', ['rsync', whole_path_from, rsync_mods]))
|
|
start = replicator.replication_cycle
|
|
self.assertGreaterEqual(start, 0)
|
|
self.assertLessEqual(start, 9)
|
|
with _mock_process(process_arg_checker):
|
|
replicator.run_once()
|
|
self.assertEqual((start + 1) % 10, replicator.replication_cycle)
|
|
self.assertFalse(process_errors)
|
|
self.assertFalse(self.logger.get_lines_for_level('error'))
|
|
|
|
# Returns 0 at first, and 60 on all following .next() calls
|
|
def _infinite_gen():
|
|
yield 0
|
|
while True:
|
|
yield 60
|
|
|
|
for cycle in range(1, 10):
|
|
with _mock_process(process_arg_checker):
|
|
with mock.patch('time.time', side_effect=_infinite_gen()):
|
|
replicator.run_once()
|
|
self.assertEqual((start + 1 + cycle) % 10,
|
|
replicator.replication_cycle)
|
|
|
|
recon_fname = os.path.join(self.recon_cache, RECON_OBJECT_FILE)
|
|
with open(recon_fname) as cachefile:
|
|
recon = json.loads(cachefile.read())
|
|
self.assertEqual(1, recon.get('replication_time'))
|
|
self.assertIn('replication_stats', recon)
|
|
self.assertIn('replication_last', recon)
|
|
expected = 'Object replication complete (once). (1.00 minutes)'
|
|
self.assertIn(expected, self.logger.get_lines_for_level('info'))
|
|
self.assertFalse(self.logger.get_lines_for_level('error'))
|
|
object_replicator.http_connect = was_connector
|
|
|
|
# policy 1
|
|
def test_run_once_1(self):
|
|
conf = dict(swift_dir=self.testdir, devices=self.devices,
|
|
recon_cache_path=self.recon_cache,
|
|
mount_check='false', timeout='300', stats_interval='1')
|
|
replicator = object_replicator.ObjectReplicator(conf,
|
|
logger=self.logger)
|
|
was_connector = object_replicator.http_connect
|
|
object_replicator.http_connect = mock_http_connect(200)
|
|
cur_part = '0'
|
|
df = self.df_mgr.get_diskfile('sda', cur_part, 'a', 'c', 'o',
|
|
policy=POLICIES[1])
|
|
mkdirs(df._datadir)
|
|
f = open(os.path.join(df._datadir,
|
|
normalize_timestamp(time.time()) + '.data'),
|
|
'wb')
|
|
f.write(b'1234567890')
|
|
f.close()
|
|
ohash = hash_path('a', 'c', 'o')
|
|
data_dir = ohash[-3:]
|
|
whole_path_from = os.path.join(self.objects_1, cur_part, data_dir)
|
|
process_arg_checker = []
|
|
ring = replicator.load_object_ring(POLICIES[1])
|
|
nodes = [node for node in
|
|
ring.get_part_nodes(int(cur_part))
|
|
if node['ip'] not in _ips()]
|
|
rsync_mods = tuple(['%s::object/sda/objects-1/%s' %
|
|
(node['ip'], cur_part) for node in nodes])
|
|
for node in nodes:
|
|
process_arg_checker.append(
|
|
(0, '', ['rsync', whole_path_from, rsync_mods]))
|
|
with _mock_process(process_arg_checker):
|
|
with mock.patch('swift.obj.replicator.whataremyips',
|
|
side_effect=_ips):
|
|
replicator.run_once()
|
|
self.assertFalse(process_errors)
|
|
self.assertFalse(self.logger.get_lines_for_level('error'))
|
|
object_replicator.http_connect = was_connector
|
|
|
|
def test_check_ring(self):
|
|
for pol in POLICIES:
|
|
obj_ring = self.replicator.load_object_ring(pol)
|
|
self.assertTrue(self.replicator.check_ring(obj_ring))
|
|
orig_check = self.replicator.next_check
|
|
self.replicator.next_check = orig_check - 30
|
|
self.assertTrue(self.replicator.check_ring(obj_ring))
|
|
self.replicator.next_check = orig_check
|
|
orig_ring_time = obj_ring._mtime
|
|
obj_ring._mtime = orig_ring_time - 30
|
|
self.assertTrue(self.replicator.check_ring(obj_ring))
|
|
self.replicator.next_check = orig_check - 30
|
|
self.assertFalse(self.replicator.check_ring(obj_ring))
|
|
|
|
def test_collect_jobs_mkdirs_error(self):
|
|
|
|
non_local = {}
|
|
|
|
def blowup_mkdirs(path):
|
|
non_local['path'] = path
|
|
raise OSError('Ow!')
|
|
|
|
with mock.patch.object(object_replicator, 'mkdirs', blowup_mkdirs):
|
|
rmtree(self.objects, ignore_errors=1)
|
|
object_replicator.mkdirs = blowup_mkdirs
|
|
self.replicator.collect_jobs()
|
|
self.assertEqual(self.logger.get_lines_for_level('error'), [
|
|
'ERROR creating %s: ' % non_local['path']])
|
|
log_args, log_kwargs = self.logger.log_dict['error'][0]
|
|
self.assertEqual(str(log_kwargs['exc_info'][1]), 'Ow!')
|
|
|
|
def test_collect_jobs(self):
|
|
jobs = self.replicator.collect_jobs()
|
|
jobs_to_delete = [j for j in jobs if j['delete']]
|
|
jobs_by_pol_part = {}
|
|
for job in jobs:
|
|
jobs_by_pol_part[str(int(job['policy'])) + job['partition']] = job
|
|
self.assertEqual(len(jobs_to_delete), 2)
|
|
self.assertEqual('1', jobs_to_delete[0]['partition'])
|
|
self.assertEqual(
|
|
[node['id'] for node in jobs_by_pol_part['00']['nodes']], [1, 2])
|
|
self.assertEqual(
|
|
[node['id'] for node in jobs_by_pol_part['01']['nodes']],
|
|
[1, 2, 3])
|
|
self.assertEqual(
|
|
[node['id'] for node in jobs_by_pol_part['02']['nodes']], [2, 3])
|
|
self.assertEqual(
|
|
[node['id'] for node in jobs_by_pol_part['03']['nodes']], [3, 1])
|
|
self.assertEqual(
|
|
[node['id'] for node in jobs_by_pol_part['10']['nodes']], [1, 2])
|
|
self.assertEqual(
|
|
[node['id'] for node in jobs_by_pol_part['11']['nodes']],
|
|
[1, 2, 3])
|
|
self.assertEqual(
|
|
[node['id'] for node in jobs_by_pol_part['12']['nodes']], [2, 3])
|
|
self.assertEqual(
|
|
[node['id'] for node in jobs_by_pol_part['13']['nodes']], [3, 1])
|
|
for part in ['00', '01', '02', '03']:
|
|
for node in jobs_by_pol_part[part]['nodes']:
|
|
self.assertEqual(node['device'], 'sda')
|
|
self.assertEqual(jobs_by_pol_part[part]['path'],
|
|
os.path.join(self.objects, part[1:]))
|
|
for part in ['10', '11', '12', '13']:
|
|
for node in jobs_by_pol_part[part]['nodes']:
|
|
self.assertEqual(node['device'], 'sda')
|
|
self.assertEqual(jobs_by_pol_part[part]['path'],
|
|
os.path.join(self.objects_1, part[1:]))
|
|
|
|
def test_collect_jobs_unmounted(self):
|
|
with mock_check_drive() as mocks:
|
|
jobs = self.replicator.collect_jobs()
|
|
self.assertEqual(jobs, [])
|
|
self.assertEqual(mocks['ismount'].mock_calls, [])
|
|
self.assertEqual(len(mocks['isdir'].mock_calls), 2)
|
|
|
|
self.replicator.mount_check = True
|
|
with mock_check_drive() as mocks:
|
|
jobs = self.replicator.collect_jobs()
|
|
self.assertEqual(jobs, [])
|
|
self.assertEqual(mocks['isdir'].mock_calls, [])
|
|
self.assertEqual(len(mocks['ismount'].mock_calls), 2)
|
|
|
|
def test_collect_jobs_failure_report_with_auditor_stats_json(self):
|
|
devs = [
|
|
{'id': 0, 'device': 'sda', 'zone': 0,
|
|
'region': 1, 'ip': '1.1.1.1', 'port': 1111,
|
|
'replication_ip': '127.0.0.0', 'replication_port': 6200},
|
|
{'id': 1, 'device': 'sdb', 'zone': 1,
|
|
'region': 1, 'ip': '1.1.1.1', 'port': 1111,
|
|
'replication_ip': '127.0.0.0', 'replication_port': 6200},
|
|
{'id': 2, 'device': 'sdc', 'zone': 2,
|
|
'region': 1, 'ip': '1.1.1.1', 'port': 1111,
|
|
'replication_ip': '127.0.0.1', 'replication_port': 6200},
|
|
{'id': 3, 'device': 'sdd', 'zone': 3,
|
|
'region': 1, 'ip': '1.1.1.1', 'port': 1111,
|
|
'replication_ip': '127.0.0.1', 'replication_port': 6200},
|
|
]
|
|
objects_sdb, objects_1_sdb, _, _ = \
|
|
self._write_disk_data('sdb', with_json=True)
|
|
objects_sdc, objects_1_sdc, _, _ = \
|
|
self._write_disk_data('sdc', with_json=True)
|
|
objects_sdd, objects_1_sdd, _, _ = \
|
|
self._write_disk_data('sdd', with_json=True)
|
|
_create_test_rings(self.testdir, devs)
|
|
|
|
self.replicator.collect_jobs(override_partitions=[1])
|
|
self.assertEqual(self.replicator.total_stats.failure, 0)
|
|
|
|
def test_collect_jobs_with_override_parts_and_unexpected_part_dir(self):
|
|
self.replicator.collect_jobs(override_partitions=[0, 2])
|
|
self.assertEqual(self.replicator.total_stats.failure, 0)
|
|
os.mkdir(os.path.join(self.objects_1, 'foo'))
|
|
jobs = self.replicator.collect_jobs(override_partitions=[0, 2])
|
|
found_jobs = set()
|
|
for j in jobs:
|
|
found_jobs.add((int(j['policy']), int(j['partition'])))
|
|
self.assertEqual(found_jobs, {
|
|
(0, 0),
|
|
(0, 2),
|
|
(1, 0),
|
|
(1, 2),
|
|
})
|
|
num_disks = len(POLICIES[1].object_ring.devs)
|
|
# N.B. it's not clear why the UUT increments failure per device
|
|
self.assertEqual(self.replicator.total_stats.failure, num_disks)
|
|
|
|
@mock.patch('swift.obj.replicator.random.shuffle', side_effect=lambda l: l)
|
|
def test_collect_jobs_multi_disk(self, mock_shuffle):
|
|
devs = [
|
|
# Two disks on same IP/port
|
|
{'id': 0, 'device': 'sda', 'zone': 0,
|
|
'region': 1, 'ip': '1.1.1.1', 'port': 1111,
|
|
'replication_ip': '127.0.0.0', 'replication_port': 6200},
|
|
{'id': 1, 'device': 'sdb', 'zone': 1,
|
|
'region': 1, 'ip': '1.1.1.1', 'port': 1111,
|
|
'replication_ip': '127.0.0.0', 'replication_port': 6200},
|
|
# Two disks on same server, different ports
|
|
{'id': 2, 'device': 'sdc', 'zone': 2,
|
|
'region': 2, 'ip': '1.1.1.2', 'port': 1112,
|
|
'replication_ip': '127.0.0.1', 'replication_port': 6200},
|
|
{'id': 3, 'device': 'sdd', 'zone': 4,
|
|
'region': 2, 'ip': '1.1.1.2', 'port': 1112,
|
|
'replication_ip': '127.0.0.1', 'replication_port': 6201},
|
|
]
|
|
objects_sdb, objects_1_sdb, _, _ = self._write_disk_data('sdb')
|
|
objects_sdc, objects_1_sdc, _, _ = self._write_disk_data('sdc')
|
|
objects_sdd, objects_1_sdd, _, _ = self._write_disk_data('sdd')
|
|
_create_test_rings(self.testdir, devs)
|
|
|
|
jobs = self.replicator.collect_jobs()
|
|
|
|
self.assertEqual([mock.call(jobs)], mock_shuffle.mock_calls)
|
|
|
|
jobs_to_delete = [j for j in jobs if j['delete']]
|
|
self.assertEqual(len(jobs_to_delete), 4)
|
|
self.assertEqual([
|
|
'1', '2', # policy 0; 1 not on sda, 2 not on sdb
|
|
'1', '2', # policy 1; 1 not on sda, 2 not on sdb
|
|
], [j['partition'] for j in jobs_to_delete])
|
|
|
|
jobs_by_pol_part_dev = {}
|
|
for job in jobs:
|
|
# There should be no jobs with a device not in just sda & sdb
|
|
self.assertTrue(job['device'] in ('sda', 'sdb'))
|
|
jobs_by_pol_part_dev[
|
|
str(int(job['policy'])) + job['partition'] + job['device']
|
|
] = job
|
|
|
|
self.assertEqual([node['id']
|
|
for node in jobs_by_pol_part_dev['00sda']['nodes']],
|
|
[1, 2])
|
|
self.assertEqual([node['id']
|
|
for node in jobs_by_pol_part_dev['00sdb']['nodes']],
|
|
[0, 2])
|
|
self.assertEqual([node['id']
|
|
for node in jobs_by_pol_part_dev['01sda']['nodes']],
|
|
[1, 2, 3])
|
|
self.assertEqual([node['id']
|
|
for node in jobs_by_pol_part_dev['01sdb']['nodes']],
|
|
[2, 3])
|
|
self.assertEqual([node['id']
|
|
for node in jobs_by_pol_part_dev['02sda']['nodes']],
|
|
[2, 3])
|
|
self.assertEqual([node['id']
|
|
for node in jobs_by_pol_part_dev['02sdb']['nodes']],
|
|
[2, 3, 0])
|
|
self.assertEqual([node['id']
|
|
for node in jobs_by_pol_part_dev['03sda']['nodes']],
|
|
[3, 1])
|
|
self.assertEqual([node['id']
|
|
for node in jobs_by_pol_part_dev['03sdb']['nodes']],
|
|
[3, 0])
|
|
self.assertEqual([node['id']
|
|
for node in jobs_by_pol_part_dev['10sda']['nodes']],
|
|
[1, 2])
|
|
self.assertEqual([node['id']
|
|
for node in jobs_by_pol_part_dev['10sdb']['nodes']],
|
|
[0, 2])
|
|
self.assertEqual([node['id']
|
|
for node in jobs_by_pol_part_dev['11sda']['nodes']],
|
|
[1, 2, 3])
|
|
self.assertEqual([node['id']
|
|
for node in jobs_by_pol_part_dev['11sdb']['nodes']],
|
|
[2, 3])
|
|
self.assertEqual([node['id']
|
|
for node in jobs_by_pol_part_dev['12sda']['nodes']],
|
|
[2, 3])
|
|
self.assertEqual([node['id']
|
|
for node in jobs_by_pol_part_dev['12sdb']['nodes']],
|
|
[2, 3, 0])
|
|
self.assertEqual([node['id']
|
|
for node in jobs_by_pol_part_dev['13sda']['nodes']],
|
|
[3, 1])
|
|
self.assertEqual([node['id']
|
|
for node in jobs_by_pol_part_dev['13sdb']['nodes']],
|
|
[3, 0])
|
|
for part in ['00', '01', '02', '03']:
|
|
self.assertEqual(jobs_by_pol_part_dev[part + 'sda']['path'],
|
|
os.path.join(self.objects, part[1:]))
|
|
self.assertEqual(jobs_by_pol_part_dev[part + 'sdb']['path'],
|
|
os.path.join(objects_sdb, part[1:]))
|
|
for part in ['10', '11', '12', '13']:
|
|
self.assertEqual(jobs_by_pol_part_dev[part + 'sda']['path'],
|
|
os.path.join(self.objects_1, part[1:]))
|
|
self.assertEqual(jobs_by_pol_part_dev[part + 'sdb']['path'],
|
|
os.path.join(objects_1_sdb, part[1:]))
|
|
|
|
@mock.patch('swift.obj.replicator.random.shuffle', side_effect=lambda l: l)
|
|
def test_collect_jobs_multi_disk_diff_ports_normal(self, mock_shuffle):
|
|
# Normally (servers_per_port=0), replication_ip AND replication_port
|
|
# are used to determine local ring device entries. Here we show that
|
|
# with bind_ip='127.0.0.1', bind_port=6200, only "sdc" is local.
|
|
devs = [
|
|
# Two disks on same IP/port
|
|
{'id': 0, 'device': 'sda', 'zone': 0,
|
|
'region': 1, 'ip': '1.1.1.1', 'port': 1111,
|
|
'replication_ip': '127.0.0.0', 'replication_port': 6200},
|
|
{'id': 1, 'device': 'sdb', 'zone': 1,
|
|
'region': 1, 'ip': '1.1.1.1', 'port': 1111,
|
|
'replication_ip': '127.0.0.0', 'replication_port': 6200},
|
|
# Two disks on same server, different ports
|
|
{'id': 2, 'device': 'sdc', 'zone': 2,
|
|
'region': 2, 'ip': '1.1.1.2', 'port': 1112,
|
|
'replication_ip': '127.0.0.1', 'replication_port': 6200},
|
|
{'id': 3, 'device': 'sdd', 'zone': 4,
|
|
'region': 2, 'ip': '1.1.1.2', 'port': 1112,
|
|
'replication_ip': '127.0.0.1', 'replication_port': 6201},
|
|
]
|
|
objects_sdb, objects_1_sdb, _, _ = self._write_disk_data('sdb')
|
|
objects_sdc, objects_1_sdc, _, _ = self._write_disk_data('sdc')
|
|
objects_sdd, objects_1_sdd, _, _ = self._write_disk_data('sdd')
|
|
_create_test_rings(self.testdir, devs)
|
|
|
|
self.conf['bind_ip'] = '127.0.0.1'
|
|
self._create_replicator()
|
|
|
|
jobs = self.replicator.collect_jobs()
|
|
|
|
self.assertEqual([mock.call(jobs)], mock_shuffle.mock_calls)
|
|
|
|
jobs_to_delete = [j for j in jobs if j['delete']]
|
|
self.assertEqual(len(jobs_to_delete), 2)
|
|
self.assertEqual([
|
|
'3', # policy 0; 3 not on sdc
|
|
'3', # policy 1; 3 not on sdc
|
|
], [j['partition'] for j in jobs_to_delete])
|
|
|
|
jobs_by_pol_part_dev = {}
|
|
for job in jobs:
|
|
# There should be no jobs with a device not sdc
|
|
self.assertEqual(job['device'], 'sdc')
|
|
jobs_by_pol_part_dev[
|
|
str(int(job['policy'])) + job['partition'] + job['device']
|
|
] = job
|
|
|
|
self.assertEqual([node['id']
|
|
for node in jobs_by_pol_part_dev['00sdc']['nodes']],
|
|
[0, 1])
|
|
self.assertEqual([node['id']
|
|
for node in jobs_by_pol_part_dev['01sdc']['nodes']],
|
|
[1, 3])
|
|
self.assertEqual([node['id']
|
|
for node in jobs_by_pol_part_dev['02sdc']['nodes']],
|
|
[3, 0])
|
|
self.assertEqual([node['id']
|
|
for node in jobs_by_pol_part_dev['03sdc']['nodes']],
|
|
[3, 0, 1])
|
|
self.assertEqual([node['id']
|
|
for node in jobs_by_pol_part_dev['10sdc']['nodes']],
|
|
[0, 1])
|
|
self.assertEqual([node['id']
|
|
for node in jobs_by_pol_part_dev['11sdc']['nodes']],
|
|
[1, 3])
|
|
self.assertEqual([node['id']
|
|
for node in jobs_by_pol_part_dev['12sdc']['nodes']],
|
|
[3, 0])
|
|
self.assertEqual([node['id']
|
|
for node in jobs_by_pol_part_dev['13sdc']['nodes']],
|
|
[3, 0, 1])
|
|
for part in ['00', '01', '02', '03']:
|
|
self.assertEqual(jobs_by_pol_part_dev[part + 'sdc']['path'],
|
|
os.path.join(objects_sdc, part[1:]))
|
|
for part in ['10', '11', '12', '13']:
|
|
self.assertEqual(jobs_by_pol_part_dev[part + 'sdc']['path'],
|
|
os.path.join(objects_1_sdc, part[1:]))
|
|
|
|
@mock.patch('swift.obj.replicator.random.shuffle', side_effect=lambda l: l)
|
|
def test_collect_jobs_multi_disk_servers_per_port(self, mock_shuffle):
|
|
# Normally (servers_per_port=0), replication_ip AND replication_port
|
|
# are used to determine local ring device entries. Here we show that
|
|
# with servers_per_port > 0 and bind_ip='127.0.0.1', bind_port=6200,
|
|
# then both "sdc" and "sdd" are local.
|
|
devs = [
|
|
# Two disks on same IP/port
|
|
{'id': 0, 'device': 'sda', 'zone': 0,
|
|
'region': 1, 'ip': '1.1.1.1', 'port': 1111,
|
|
'replication_ip': '127.0.0.0', 'replication_port': 6200},
|
|
{'id': 1, 'device': 'sdb', 'zone': 1,
|
|
'region': 1, 'ip': '1.1.1.1', 'port': 1111,
|
|
'replication_ip': '127.0.0.0', 'replication_port': 6200},
|
|
# Two disks on same server, different ports
|
|
{'id': 2, 'device': 'sdc', 'zone': 2,
|
|
'region': 2, 'ip': '1.1.1.2', 'port': 1112,
|
|
'replication_ip': '127.0.0.1', 'replication_port': 6200},
|
|
{'id': 3, 'device': 'sdd', 'zone': 4,
|
|
'region': 2, 'ip': '1.1.1.2', 'port': 1112,
|
|
'replication_ip': '127.0.0.1', 'replication_port': 6201},
|
|
]
|
|
objects_sdb, objects_1_sdb, _, _ = self._write_disk_data('sdb')
|
|
objects_sdc, objects_1_sdc, _, _ = self._write_disk_data('sdc')
|
|
objects_sdd, objects_1_sdd, _, _ = self._write_disk_data('sdd')
|
|
_create_test_rings(self.testdir, devs)
|
|
|
|
self.conf['bind_ip'] = '127.0.0.1'
|
|
self.conf['servers_per_port'] = 1 # diff port ok
|
|
self._create_replicator()
|
|
|
|
jobs = self.replicator.collect_jobs()
|
|
|
|
self.assertEqual([mock.call(jobs)], mock_shuffle.mock_calls)
|
|
|
|
jobs_to_delete = [j for j in jobs if j['delete']]
|
|
self.assertEqual(len(jobs_to_delete), 4)
|
|
self.assertEqual([
|
|
'3', '0', # policy 0; 3 not on sdc, 0 not on sdd
|
|
'3', '0', # policy 1; 3 not on sdc, 0 not on sdd
|
|
], [j['partition'] for j in jobs_to_delete])
|
|
|
|
jobs_by_pol_part_dev = {}
|
|
for job in jobs:
|
|
# There should be no jobs with a device not in just sdc & sdd
|
|
self.assertTrue(job['device'] in ('sdc', 'sdd'))
|
|
jobs_by_pol_part_dev[
|
|
str(int(job['policy'])) + job['partition'] + job['device']
|
|
] = job
|
|
|
|
self.assertEqual([node['id']
|
|
for node in jobs_by_pol_part_dev['00sdc']['nodes']],
|
|
[0, 1])
|
|
self.assertEqual([node['id']
|
|
for node in jobs_by_pol_part_dev['00sdd']['nodes']],
|
|
[0, 1, 2])
|
|
self.assertEqual([node['id']
|
|
for node in jobs_by_pol_part_dev['01sdc']['nodes']],
|
|
[1, 3])
|
|
self.assertEqual([node['id']
|
|
for node in jobs_by_pol_part_dev['01sdd']['nodes']],
|
|
[1, 2])
|
|
self.assertEqual([node['id']
|
|
for node in jobs_by_pol_part_dev['02sdc']['nodes']],
|
|
[3, 0])
|
|
self.assertEqual([node['id']
|
|
for node in jobs_by_pol_part_dev['02sdd']['nodes']],
|
|
[2, 0])
|
|
self.assertEqual([node['id']
|
|
for node in jobs_by_pol_part_dev['03sdc']['nodes']],
|
|
[3, 0, 1])
|
|
self.assertEqual([node['id']
|
|
for node in jobs_by_pol_part_dev['03sdd']['nodes']],
|
|
[0, 1])
|
|
self.assertEqual([node['id']
|
|
for node in jobs_by_pol_part_dev['10sdc']['nodes']],
|
|
[0, 1])
|
|
self.assertEqual([node['id']
|
|
for node in jobs_by_pol_part_dev['10sdd']['nodes']],
|
|
[0, 1, 2])
|
|
self.assertEqual([node['id']
|
|
for node in jobs_by_pol_part_dev['11sdc']['nodes']],
|
|
[1, 3])
|
|
self.assertEqual([node['id']
|
|
for node in jobs_by_pol_part_dev['11sdd']['nodes']],
|
|
[1, 2])
|
|
self.assertEqual([node['id']
|
|
for node in jobs_by_pol_part_dev['12sdc']['nodes']],
|
|
[3, 0])
|
|
self.assertEqual([node['id']
|
|
for node in jobs_by_pol_part_dev['12sdd']['nodes']],
|
|
[2, 0])
|
|
self.assertEqual([node['id']
|
|
for node in jobs_by_pol_part_dev['13sdc']['nodes']],
|
|
[3, 0, 1])
|
|
self.assertEqual([node['id']
|
|
for node in jobs_by_pol_part_dev['13sdd']['nodes']],
|
|
[0, 1])
|
|
for part in ['00', '01', '02', '03']:
|
|
self.assertEqual(jobs_by_pol_part_dev[part + 'sdc']['path'],
|
|
os.path.join(objects_sdc, part[1:]))
|
|
self.assertEqual(jobs_by_pol_part_dev[part + 'sdd']['path'],
|
|
os.path.join(objects_sdd, part[1:]))
|
|
for part in ['10', '11', '12', '13']:
|
|
self.assertEqual(jobs_by_pol_part_dev[part + 'sdc']['path'],
|
|
os.path.join(objects_1_sdc, part[1:]))
|
|
self.assertEqual(jobs_by_pol_part_dev[part + 'sdd']['path'],
|
|
os.path.join(objects_1_sdd, part[1:]))
|
|
|
|
def test_collect_jobs_handoffs_first(self):
|
|
self.replicator.handoffs_first = True
|
|
jobs = self.replicator.collect_jobs()
|
|
self.assertTrue(jobs[0]['delete'])
|
|
self.assertEqual('1', jobs[0]['partition'])
|
|
|
|
def test_handoffs_first_mode_will_process_all_jobs_after_handoffs(self):
|
|
# make an object in the handoff & primary partition
|
|
expected_suffix_paths = []
|
|
for policy in POLICIES:
|
|
# primary
|
|
ts = next(self.ts)
|
|
df = self.df_mgr.get_diskfile('sda', '0', 'a', 'c', 'o', policy)
|
|
with df.create() as w:
|
|
w.write(b'asdf')
|
|
w.put({'X-Timestamp': ts.internal})
|
|
w.commit(ts)
|
|
expected_suffix_paths.append(os.path.dirname(df._datadir))
|
|
# handoff
|
|
ts = next(self.ts)
|
|
df = self.df_mgr.get_diskfile('sda', '1', 'a', 'c', 'o', policy)
|
|
with df.create() as w:
|
|
w.write(b'asdf')
|
|
w.put({'X-Timestamp': ts.internal})
|
|
w.commit(ts)
|
|
expected_suffix_paths.append(os.path.dirname(df._datadir))
|
|
|
|
# rsync will be called for all parts we created objects in
|
|
process_arg_checker = [
|
|
# (return_code, stdout, <each in capture rsync args>)
|
|
(0, '', []),
|
|
(0, '', []),
|
|
(0, '', []), # handoff job "first" policy
|
|
(0, '', []),
|
|
(0, '', []),
|
|
(0, '', []), # handoff job "second" policy
|
|
(0, '', []),
|
|
(0, '', []), # update job "first" policy
|
|
(0, '', []),
|
|
(0, '', []), # update job "second" policy
|
|
]
|
|
# each handoff partition node gets one replicate request for after
|
|
# rsync (2 * 3), each primary partition with objects gets two
|
|
# replicate requests (pre-flight and post sync) to each of each
|
|
# partners (2 * 2 * 2), the 2 remaining empty parts (2 & 3) get a
|
|
# pre-flight replicate request per node for each storage policy
|
|
# (2 * 2 * 2) - so 6 + 8 + 8 == 22
|
|
replicate_responses = [200] * 22
|
|
stub_body = pickle.dumps({})
|
|
with _mock_process(process_arg_checker) as rsync_log, \
|
|
mock.patch('swift.obj.replicator.whataremyips',
|
|
side_effect=_ips), \
|
|
mocked_http_conn(*replicate_responses,
|
|
body=stub_body) as conn_log:
|
|
self.replicator.handoffs_first = True
|
|
self.replicator.replicate()
|
|
# all jobs processed!
|
|
self.assertEqual(self.replicator.job_count,
|
|
self.replicator.total_stats.attempted)
|
|
self.assertFalse(self.replicator.handoffs_remaining)
|
|
|
|
# sanity, all the handoffs suffixes we filled in were rsync'd
|
|
found_rsync_suffix_paths = set()
|
|
for subprocess_info in rsync_log:
|
|
local_path, remote_path = subprocess_info['rsync_args'][-2:]
|
|
found_rsync_suffix_paths.add(local_path)
|
|
self.assertEqual(set(expected_suffix_paths), found_rsync_suffix_paths)
|
|
# sanity, all nodes got replicated
|
|
found_replicate_calls = defaultdict(int)
|
|
for req in conn_log.requests:
|
|
self.assertEqual(req['method'], 'REPLICATE')
|
|
found_replicate_key = (
|
|
int(req['headers']['X-Backend-Storage-Policy-Index']),
|
|
req['path'])
|
|
found_replicate_calls[found_replicate_key] += 1
|
|
expected_replicate_calls = {
|
|
(0, '/sda/1/a83'): 3,
|
|
(1, '/sda/1/a83'): 3,
|
|
(0, '/sda/0'): 2,
|
|
(0, '/sda/0/a83'): 2,
|
|
(1, '/sda/0'): 2,
|
|
(1, '/sda/0/a83'): 2,
|
|
(0, '/sda/2'): 2,
|
|
(1, '/sda/2'): 2,
|
|
(0, '/sda/3'): 2,
|
|
(1, '/sda/3'): 2,
|
|
}
|
|
self.assertEqual(dict(found_replicate_calls),
|
|
expected_replicate_calls)
|
|
|
|
def test_handoffs_first_mode_will_abort_if_handoffs_remaining(self):
|
|
# make an object in the handoff partition
|
|
handoff_suffix_paths = []
|
|
for policy in POLICIES:
|
|
ts = next(self.ts)
|
|
df = self.df_mgr.get_diskfile('sda', '1', 'a', 'c', 'o', policy)
|
|
with df.create() as w:
|
|
w.write(b'asdf')
|
|
w.put({'X-Timestamp': ts.internal})
|
|
w.commit(ts)
|
|
handoff_suffix_paths.append(os.path.dirname(df._datadir))
|
|
process_arg_checker = [
|
|
# (return_code, stdout, <each in capture rsync args>)
|
|
(0, '', []),
|
|
(1, '', []),
|
|
(0, '', []),
|
|
(0, '', []),
|
|
(0, '', []),
|
|
(0, '', []),
|
|
]
|
|
stub_body = pickle.dumps({})
|
|
with _mock_process(process_arg_checker) as rsync_log, \
|
|
mock.patch('swift.obj.replicator.whataremyips',
|
|
side_effect=_ips), \
|
|
mocked_http_conn(*[200] * 5, body=stub_body) as conn_log:
|
|
self.replicator.handoffs_first = True
|
|
self.replicator.replicate()
|
|
# stopped after handoffs!
|
|
self.assertEqual(1, self.replicator.handoffs_remaining)
|
|
self.assertEqual(8, self.replicator.job_count)
|
|
self.assertEqual(self.replicator.total_stats.failure, 1)
|
|
# in addition to the two update_deleted jobs as many as "concurrency"
|
|
# jobs may have been spawned into the pool before the failed
|
|
# update_deleted job incremented handoffs_remaining and caused the
|
|
# handoffs_first check to abort the current pass
|
|
self.assertLessEqual(self.replicator.total_stats.attempted,
|
|
2 + self.replicator.concurrency)
|
|
|
|
# sanity, all the handoffs suffixes we filled in were rsync'd
|
|
found_rsync_suffix_paths = set()
|
|
expected_replicate_requests = set()
|
|
for subprocess_info in rsync_log:
|
|
local_path, remote_path = subprocess_info['rsync_args'][-2:]
|
|
found_rsync_suffix_paths.add(local_path)
|
|
if subprocess_info['ret_code'] == 0:
|
|
node_ip = remote_path.split(':', 1)[0]
|
|
expected_replicate_requests.add(node_ip)
|
|
self.assertEqual(set(handoff_suffix_paths), found_rsync_suffix_paths)
|
|
# sanity, all successful rsync nodes got REPLICATE requests
|
|
found_replicate_requests = set()
|
|
self.assertEqual(5, len(conn_log.requests))
|
|
for req in conn_log.requests:
|
|
self.assertEqual(req['method'], 'REPLICATE')
|
|
found_replicate_requests.add(req['ip'])
|
|
self.assertEqual(expected_replicate_requests,
|
|
found_replicate_requests)
|
|
|
|
# and at least one partition got removed!
|
|
remaining_policies = []
|
|
for path in handoff_suffix_paths:
|
|
if os.path.exists(path):
|
|
policy = diskfile.extract_policy(path)
|
|
remaining_policies.append(policy)
|
|
self.assertEqual(len(remaining_policies), 1)
|
|
remaining_policy = remaining_policies[0]
|
|
|
|
# try again but with handoff_delete allowing for a single failure
|
|
with _mock_process(process_arg_checker) as rsync_log, \
|
|
mock.patch('swift.obj.replicator.whataremyips',
|
|
side_effect=_ips), \
|
|
mocked_http_conn(*[200] * 14, body=stub_body) as conn_log:
|
|
self.replicator.handoff_delete = 2
|
|
self.replicator._zero_stats()
|
|
self.replicator.replicate()
|
|
# all jobs processed!
|
|
self.assertEqual(self.replicator.job_count,
|
|
self.replicator.total_stats.attempted)
|
|
self.assertFalse(self.replicator.handoffs_remaining)
|
|
# sanity, all parts got replicated
|
|
found_replicate_calls = defaultdict(int)
|
|
for req in conn_log.requests:
|
|
self.assertEqual(req['method'], 'REPLICATE')
|
|
found_replicate_key = (
|
|
int(req['headers']['X-Backend-Storage-Policy-Index']),
|
|
req['path'])
|
|
found_replicate_calls[found_replicate_key] += 1
|
|
expected_replicate_calls = {
|
|
(int(remaining_policy), '/sda/1/a83'): 2,
|
|
(0, '/sda/0'): 2,
|
|
(1, '/sda/0'): 2,
|
|
(0, '/sda/2'): 2,
|
|
(1, '/sda/2'): 2,
|
|
(0, '/sda/3'): 2,
|
|
(1, '/sda/3'): 2,
|
|
}
|
|
self.assertEqual(dict(found_replicate_calls),
|
|
expected_replicate_calls)
|
|
|
|
# and now all handoff partitions have been rebalanced away!
|
|
removed_paths = set()
|
|
for path in handoff_suffix_paths:
|
|
if not os.path.exists(path):
|
|
removed_paths.add(path)
|
|
self.assertEqual(removed_paths, set(handoff_suffix_paths))
|
|
|
|
def test_replicator_skips_bogus_partition_dirs(self):
|
|
# A directory in the wrong place shouldn't crash the replicator
|
|
rmtree(self.objects)
|
|
rmtree(self.objects_1)
|
|
os.mkdir(self.objects)
|
|
os.mkdir(self.objects_1)
|
|
|
|
os.mkdir(os.path.join(self.objects, "burrito"))
|
|
jobs = self.replicator.collect_jobs()
|
|
self.assertEqual(len(jobs), 0)
|
|
|
|
def test_replicator_skips_rsync_temp_files(self):
|
|
# the empty pre-setup dirs aren't that useful to us
|
|
device_path = os.path.join(self.devices, 'sda')
|
|
rmtree(device_path, ignore_errors=1)
|
|
os.mkdir(device_path)
|
|
# create a real data file to trigger rsync
|
|
df = self.df_mgr.get_diskfile('sda', '0', 'a', 'c', 'o',
|
|
policy=POLICIES.legacy)
|
|
ts = next(self.ts)
|
|
with df.create() as w:
|
|
w.write(b'asdf')
|
|
w.put({'X-Timestamp': ts.internal})
|
|
w.commit(ts)
|
|
# pre-flight and post sync request for both other primaries
|
|
expected_replicate_requests = 4
|
|
process_arg_checker = [
|
|
# (return_code, stdout, <each in capture rsync args>)
|
|
(0, '', []),
|
|
(0, '', []),
|
|
]
|
|
stub_body = pickle.dumps({})
|
|
with _mock_process(process_arg_checker) as rsync_log, \
|
|
mock.patch('swift.obj.replicator.whataremyips',
|
|
side_effect=_ips), \
|
|
mocked_http_conn(*[200] * expected_replicate_requests,
|
|
body=stub_body) as conn_log:
|
|
self.replicator.replicate()
|
|
self.assertEqual(['REPLICATE'] * expected_replicate_requests,
|
|
[r['method'] for r in conn_log.requests])
|
|
# expect one rsync to each other primary node
|
|
self.assertEqual(2, len(rsync_log))
|
|
expected = '--exclude=.*.[0-9a-zA-Z][0-9a-zA-Z][0-9a-zA-Z]' \
|
|
'[0-9a-zA-Z][0-9a-zA-Z][0-9a-zA-Z]'
|
|
for subprocess_info in rsync_log:
|
|
rsync_args = subprocess_info['rsync_args']
|
|
for arg in rsync_args:
|
|
if arg.startswith('--exclude'):
|
|
self.assertEqual(arg, expected)
|
|
break
|
|
else:
|
|
self.fail('Did not find --exclude argument in %r' %
|
|
rsync_args)
|
|
|
|
def test_replicator_removes_zbf(self):
|
|
# After running xfs_repair, a partition directory could become a
|
|
# zero-byte file. If this happens, the replicator should clean it
|
|
# up, log something, and move on to the next partition.
|
|
|
|
# Surprise! Partition dir 1 is actually a zero-byte file.
|
|
pol_0_part_1_path = os.path.join(self.objects, '1')
|
|
rmtree(pol_0_part_1_path)
|
|
with open(pol_0_part_1_path, 'w'):
|
|
pass
|
|
self.assertTrue(os.path.isfile(pol_0_part_1_path)) # sanity check
|
|
|
|
# Policy 1's partition dir 1 is also a zero-byte file.
|
|
pol_1_part_1_path = os.path.join(self.objects_1, '1')
|
|
rmtree(pol_1_part_1_path)
|
|
with open(pol_1_part_1_path, 'w'):
|
|
pass
|
|
self.assertTrue(os.path.isfile(pol_1_part_1_path)) # sanity check
|
|
|
|
# Don't delete things in collect_jobs(); all the stat() calls would
|
|
# make replicator startup really slow.
|
|
self.replicator.collect_jobs()
|
|
self.assertTrue(os.path.exists(pol_0_part_1_path))
|
|
self.assertTrue(os.path.exists(pol_1_part_1_path))
|
|
|
|
# After a replication pass, the files should be gone
|
|
with mock.patch('swift.obj.replicator.http_connect',
|
|
mock_http_connect(200)):
|
|
self.replicator.run_once()
|
|
|
|
self.assertFalse(os.path.exists(pol_0_part_1_path))
|
|
self.assertFalse(os.path.exists(pol_1_part_1_path))
|
|
self.assertEqual(
|
|
sorted(self.logger.get_lines_for_level('warning')), [
|
|
('Removing partition directory which was a file: %s'
|
|
% pol_1_part_1_path),
|
|
('Removing partition directory which was a file: %s'
|
|
% pol_0_part_1_path),
|
|
])
|
|
|
|
def test_delete_partition(self):
|
|
with mock.patch('swift.obj.replicator.http_connect',
|
|
mock_http_connect(200)):
|
|
df = self.df_mgr.get_diskfile('sda', '1', 'a', 'c', 'o',
|
|
policy=POLICIES.legacy)
|
|
mkdirs(df._datadir)
|
|
f = open(os.path.join(df._datadir,
|
|
normalize_timestamp(time.time()) + '.data'),
|
|
'wb')
|
|
f.write(b'1234567890')
|
|
f.close()
|
|
ohash = hash_path('a', 'c', 'o')
|
|
data_dir = ohash[-3:]
|
|
whole_path_from = os.path.join(self.objects, '1', data_dir)
|
|
part_path = os.path.join(self.objects, '1')
|
|
self.assertTrue(os.access(part_path, os.F_OK))
|
|
ring = self.replicator.load_object_ring(POLICIES[0])
|
|
nodes = [node for node in
|
|
ring.get_part_nodes(1)
|
|
if node['ip'] not in _ips()]
|
|
process_arg_checker = []
|
|
for node in nodes:
|
|
rsync_mod = '%s::object/sda/objects/%s' % (node['ip'], 1)
|
|
process_arg_checker.append(
|
|
(0, '', ['rsync', whole_path_from, rsync_mod]))
|
|
with _mock_process(process_arg_checker):
|
|
self.replicator.replicate()
|
|
self.assertFalse(os.access(part_path, os.F_OK))
|
|
|
|
def test_delete_partition_default_sync_method(self):
|
|
self.replicator.conf.pop('sync_method')
|
|
with mock.patch('swift.obj.replicator.http_connect',
|
|
mock_http_connect(200)):
|
|
df = self.df_mgr.get_diskfile('sda', '1', 'a', 'c', 'o',
|
|
policy=POLICIES.legacy)
|
|
mkdirs(df._datadir)
|
|
f = open(os.path.join(df._datadir,
|
|
normalize_timestamp(time.time()) + '.data'),
|
|
'wb')
|
|
f.write(b'1234567890')
|
|
f.close()
|
|
ohash = hash_path('a', 'c', 'o')
|
|
data_dir = ohash[-3:]
|
|
whole_path_from = os.path.join(self.objects, '1', data_dir)
|
|
part_path = os.path.join(self.objects, '1')
|
|
self.assertTrue(os.access(part_path, os.F_OK))
|
|
ring = self.replicator.load_object_ring(POLICIES[0])
|
|
nodes = [node for node in
|
|
ring.get_part_nodes(1)
|
|
if node['ip'] not in _ips()]
|
|
process_arg_checker = []
|
|
for node in nodes:
|
|
rsync_mod = '%s::object/sda/objects/%s' % (node['ip'], 1)
|
|
process_arg_checker.append(
|
|
(0, '', ['rsync', whole_path_from, rsync_mod]))
|
|
with _mock_process(process_arg_checker):
|
|
self.replicator.replicate()
|
|
self.assertFalse(os.access(part_path, os.F_OK))
|
|
|
|
def test_delete_partition_ssync_single_region(self):
|
|
devs = [
|
|
{'id': 0, 'device': 'sda', 'zone': 0,
|
|
'region': 1, 'ip': '127.0.0.0', 'port': 6200},
|
|
{'id': 1, 'device': 'sda', 'zone': 1,
|
|
'region': 1, 'ip': '127.0.0.1', 'port': 6200},
|
|
{'id': 2, 'device': 'sda', 'zone': 2,
|
|
'region': 1, 'ip': '127.0.0.2', 'port': 6200},
|
|
{'id': 3, 'device': 'sda', 'zone': 4,
|
|
'region': 1, 'ip': '127.0.0.3', 'port': 6200},
|
|
{'id': 4, 'device': 'sda', 'zone': 5,
|
|
'region': 1, 'ip': '127.0.0.4', 'port': 6200},
|
|
{'id': 5, 'device': 'sda', 'zone': 6,
|
|
'region': 1, 'ip': 'fe80::202:b3ff:fe1e:8329', 'port': 6200},
|
|
{'id': 6, 'device': 'sda', 'zone': 7, 'region': 1,
|
|
'ip': '2001:0db8:85a3:0000:0000:8a2e:0370:7334', 'port': 6200},
|
|
]
|
|
_create_test_rings(self.testdir, devs=devs)
|
|
self.conf['sync_method'] = 'ssync'
|
|
self.replicator = object_replicator.ObjectReplicator(self.conf)
|
|
self.replicator.logger = debug_logger()
|
|
self.replicator._zero_stats()
|
|
|
|
with mock.patch('swift.obj.replicator.http_connect',
|
|
mock_http_connect(200)):
|
|
df = self.df_mgr.get_diskfile('sda', '1', 'a', 'c', 'o',
|
|
policy=POLICIES.legacy)
|
|
mkdirs(df._datadir)
|
|
ts = normalize_timestamp(time.time())
|
|
f = open(os.path.join(df._datadir, ts + '.data'),
|
|
'wb')
|
|
f.write(b'1234567890')
|
|
f.close()
|
|
ohash = hash_path('a', 'c', 'o')
|
|
whole_path_from = storage_directory(self.objects, 1, ohash)
|
|
suffix_dir_path = os.path.dirname(whole_path_from)
|
|
part_path = os.path.join(self.objects, '1')
|
|
self.assertTrue(os.access(part_path, os.F_OK))
|
|
|
|
def _fake_ssync(node, job, suffixes, **kwargs):
|
|
return True, {ohash: ts}
|
|
|
|
self.replicator.sync_method = _fake_ssync
|
|
self.replicator.replicate()
|
|
self.assertFalse(os.access(whole_path_from, os.F_OK))
|
|
self.assertFalse(os.access(suffix_dir_path, os.F_OK))
|
|
self.assertFalse(os.access(part_path, os.F_OK))
|
|
|
|
def test_delete_partition_1(self):
|
|
with mock.patch('swift.obj.replicator.http_connect',
|
|
mock_http_connect(200)):
|
|
df = self.df_mgr.get_diskfile('sda', '1', 'a', 'c', 'o',
|
|
policy=POLICIES[1])
|
|
mkdirs(df._datadir)
|
|
f = open(os.path.join(df._datadir,
|
|
normalize_timestamp(time.time()) + '.data'),
|
|
'wb')
|
|
f.write(b'1234567890')
|
|
f.close()
|
|
ohash = hash_path('a', 'c', 'o')
|
|
data_dir = ohash[-3:]
|
|
whole_path_from = os.path.join(self.objects_1, '1', data_dir)
|
|
part_path = os.path.join(self.objects_1, '1')
|
|
self.assertTrue(os.access(part_path, os.F_OK))
|
|
ring = self.replicator.load_object_ring(POLICIES[1])
|
|
nodes = [node for node in
|
|
ring.get_part_nodes(1)
|
|
if node['ip'] not in _ips()]
|
|
process_arg_checker = []
|
|
for node in nodes:
|
|
rsync_mod = '%s::object/sda/objects-1/%s' % (node['ip'], 1)
|
|
process_arg_checker.append(
|
|
(0, '', ['rsync', whole_path_from, rsync_mod]))
|
|
with _mock_process(process_arg_checker):
|
|
self.replicator.replicate()
|
|
self.assertFalse(os.access(part_path, os.F_OK))
|
|
|
|
def test_delete_partition_with_failures(self):
|
|
with mock.patch('swift.obj.replicator.http_connect',
|
|
mock_http_connect(200)):
|
|
df = self.df_mgr.get_diskfile('sda', '1', 'a', 'c', 'o',
|
|
policy=POLICIES.legacy)
|
|
mkdirs(df._datadir)
|
|
f = open(os.path.join(df._datadir,
|
|
normalize_timestamp(time.time()) + '.data'),
|
|
'wb')
|
|
f.write(b'1234567890')
|
|
f.close()
|
|
ohash = hash_path('a', 'c', 'o')
|
|
data_dir = ohash[-3:]
|
|
whole_path_from = os.path.join(self.objects, '1', data_dir)
|
|
part_path = os.path.join(self.objects, '1')
|
|
self.assertTrue(os.access(part_path, os.F_OK))
|
|
ring = self.replicator.load_object_ring(POLICIES[0])
|
|
nodes = [node for node in
|
|
ring.get_part_nodes(1)
|
|
if node['ip'] not in _ips()]
|
|
process_arg_checker = []
|
|
for i, node in enumerate(nodes):
|
|
rsync_mod = '%s::object/sda/objects/%s' % (node['ip'], 1)
|
|
if i == 0:
|
|
# force one of the rsync calls to fail
|
|
ret_code = 1
|
|
else:
|
|
ret_code = 0
|
|
process_arg_checker.append(
|
|
(ret_code, '', ['rsync', whole_path_from, rsync_mod]))
|
|
with _mock_process(process_arg_checker):
|
|
self.replicator.replicate()
|
|
# The path should still exist
|
|
self.assertTrue(os.access(part_path, os.F_OK))
|
|
|
|
def test_delete_partition_with_handoff_delete(self):
|
|
with mock.patch('swift.obj.replicator.http_connect',
|
|
mock_http_connect(200)):
|
|
self.replicator.handoff_delete = 2
|
|
df = self.df_mgr.get_diskfile('sda', '1', 'a', 'c', 'o',
|
|
policy=POLICIES.legacy)
|
|
mkdirs(df._datadir)
|
|
f = open(os.path.join(df._datadir,
|
|
normalize_timestamp(time.time()) + '.data'),
|
|
'wb')
|
|
f.write(b'1234567890')
|
|
f.close()
|
|
ohash = hash_path('a', 'c', 'o')
|
|
data_dir = ohash[-3:]
|
|
whole_path_from = os.path.join(self.objects, '1', data_dir)
|
|
part_path = os.path.join(self.objects, '1')
|
|
self.assertTrue(os.access(part_path, os.F_OK))
|
|
ring = self.replicator.load_object_ring(POLICIES[0])
|
|
nodes = [node for node in
|
|
ring.get_part_nodes(1)
|
|
if node['ip'] not in _ips()]
|
|
process_arg_checker = []
|
|
for i, node in enumerate(nodes):
|
|
rsync_mod = '%s::object/sda/objects/%s' % (node['ip'], 1)
|
|
if i == 0:
|
|
# force one of the rsync calls to fail
|
|
ret_code = 1
|
|
else:
|
|
ret_code = 0
|
|
process_arg_checker.append(
|
|
(ret_code, '', ['rsync', whole_path_from, rsync_mod]))
|
|
with _mock_process(process_arg_checker):
|
|
self.replicator.replicate()
|
|
self.assertFalse(os.access(part_path, os.F_OK))
|
|
|
|
def test_delete_partition_with_handoff_delete_failures(self):
|
|
with mock.patch('swift.obj.replicator.http_connect',
|
|
mock_http_connect(200)):
|
|
self.replicator.handoff_delete = 2
|
|
df = self.df_mgr.get_diskfile('sda', '1', 'a', 'c', 'o',
|
|
policy=POLICIES.legacy)
|
|
mkdirs(df._datadir)
|
|
f = open(os.path.join(df._datadir,
|
|
normalize_timestamp(time.time()) + '.data'),
|
|
'wb')
|
|
f.write(b'1234567890')
|
|
f.close()
|
|
ohash = hash_path('a', 'c', 'o')
|
|
data_dir = ohash[-3:]
|
|
whole_path_from = os.path.join(self.objects, '1', data_dir)
|
|
part_path = os.path.join(self.objects, '1')
|
|
self.assertTrue(os.access(part_path, os.F_OK))
|
|
ring = self.replicator.load_object_ring(POLICIES[0])
|
|
nodes = [node for node in
|
|
ring.get_part_nodes(1)
|
|
if node['ip'] not in _ips()]
|
|
process_arg_checker = []
|
|
for i, node in enumerate(nodes):
|
|
rsync_mod = '%s::object/sda/objects/%s' % (node['ip'], 1)
|
|
if i in (0, 1):
|
|
# force two of the rsync calls to fail
|
|
ret_code = 1
|
|
else:
|
|
ret_code = 0
|
|
process_arg_checker.append(
|
|
(ret_code, '', ['rsync', whole_path_from, rsync_mod]))
|
|
with _mock_process(process_arg_checker):
|
|
self.replicator.replicate()
|
|
# The file should still exist
|
|
self.assertTrue(os.access(part_path, os.F_OK))
|
|
|
|
def test_delete_partition_with_handoff_delete_fail_in_other_region(self):
|
|
with mock.patch('swift.obj.replicator.http_connect',
|
|
mock_http_connect(200)):
|
|
df = self.df_mgr.get_diskfile('sda', '1', 'a', 'c', 'o',
|
|
policy=POLICIES.legacy)
|
|
mkdirs(df._datadir)
|
|
f = open(os.path.join(df._datadir,
|
|
normalize_timestamp(time.time()) + '.data'),
|
|
'wb')
|
|
f.write(b'1234567890')
|
|
f.close()
|
|
ohash = hash_path('a', 'c', 'o')
|
|
data_dir = ohash[-3:]
|
|
whole_path_from = os.path.join(self.objects, '1', data_dir)
|
|
part_path = os.path.join(self.objects, '1')
|
|
self.assertTrue(os.access(part_path, os.F_OK))
|
|
ring = self.replicator.load_object_ring(POLICIES[0])
|
|
nodes = [node for node in
|
|
ring.get_part_nodes(1)
|
|
if node['ip'] not in _ips()]
|
|
process_arg_checker = []
|
|
for node in nodes:
|
|
rsync_mod = '%s::object/sda/objects/%s' % (node['ip'], 1)
|
|
if node['region'] != 1:
|
|
# the rsync calls for other region to fail
|
|
ret_code = 1
|
|
else:
|
|
ret_code = 0
|
|
process_arg_checker.append(
|
|
(ret_code, '', ['rsync', whole_path_from, rsync_mod]))
|
|
with _mock_process(process_arg_checker):
|
|
self.replicator.replicate()
|
|
# The file should still exist
|
|
self.assertTrue(os.access(part_path, os.F_OK))
|
|
|
|
def test_delete_partition_override_params(self):
|
|
df = self.df_mgr.get_diskfile('sda', '0', 'a', 'c', 'o',
|
|
policy=POLICIES.legacy)
|
|
mkdirs(df._datadir)
|
|
part_path = os.path.join(self.objects, '1')
|
|
self.assertTrue(os.access(part_path, os.F_OK))
|
|
self.replicator.replicate(override_devices=['sdb'])
|
|
self.assertTrue(os.access(part_path, os.F_OK))
|
|
self.replicator.replicate(override_partitions=[9])
|
|
self.assertTrue(os.access(part_path, os.F_OK))
|
|
self.replicator.replicate(override_devices=['sda'],
|
|
override_partitions=[1])
|
|
self.assertFalse(os.access(part_path, os.F_OK))
|
|
|
|
def _make_OSError(self, err):
|
|
return OSError(err, os.strerror(err))
|
|
|
|
def test_delete_partition_override_params_os_not_empty_error(self):
|
|
part_path = os.path.join(self.objects, '1')
|
|
with mock.patch('swift.obj.replicator.shutil.rmtree') as mockrmtree:
|
|
mockrmtree.side_effect = self._make_OSError(errno.ENOTEMPTY)
|
|
self.replicator.replicate(override_devices=['sda'],
|
|
override_partitions=[1],
|
|
override_policies=[0])
|
|
error_lines = self.replicator.logger.get_lines_for_level('error')
|
|
self.assertFalse(error_lines)
|
|
self.assertTrue(os.path.exists(part_path))
|
|
self.assertEqual([mock.call(part_path)], mockrmtree.call_args_list)
|
|
|
|
def test_delete_partition_ignores_os_no_entity_error(self):
|
|
part_path = os.path.join(self.objects, '1')
|
|
with mock.patch('swift.obj.replicator.shutil.rmtree') as mockrmtree:
|
|
mockrmtree.side_effect = self._make_OSError(errno.ENOENT)
|
|
self.replicator.replicate(override_devices=['sda'],
|
|
override_partitions=[1],
|
|
override_policies=[0])
|
|
error_lines = self.replicator.logger.get_lines_for_level('error')
|
|
self.assertFalse(error_lines)
|
|
self.assertTrue(os.path.exists(part_path))
|
|
self.assertEqual([mock.call(part_path)], mockrmtree.call_args_list)
|
|
|
|
def test_delete_partition_ignores_os_no_data_error(self):
|
|
part_path = os.path.join(self.objects, '1')
|
|
with mock.patch('swift.obj.replicator.shutil.rmtree') as mockrmtree:
|
|
mockrmtree.side_effect = self._make_OSError(errno.ENODATA)
|
|
self.replicator.replicate(override_devices=['sda'],
|
|
override_partitions=[1],
|
|
override_policies=[0])
|
|
error_lines = self.replicator.logger.get_lines_for_level('error')
|
|
self.assertFalse(error_lines)
|
|
self.assertTrue(os.path.exists(part_path))
|
|
self.assertEqual([mock.call(part_path)], mockrmtree.call_args_list)
|
|
|
|
def test_delete_policy_override_params(self):
|
|
df0 = self.df_mgr.get_diskfile('sda', '99', 'a', 'c', 'o',
|
|
policy=POLICIES.legacy)
|
|
df1 = self.df_mgr.get_diskfile('sda', '99', 'a', 'c', 'o',
|
|
policy=POLICIES[1])
|
|
mkdirs(df0._datadir)
|
|
mkdirs(df1._datadir)
|
|
|
|
pol0_part_path = os.path.join(self.objects, '99')
|
|
pol1_part_path = os.path.join(self.objects_1, '99')
|
|
|
|
# sanity checks
|
|
self.assertTrue(os.access(pol0_part_path, os.F_OK))
|
|
self.assertTrue(os.access(pol1_part_path, os.F_OK))
|
|
|
|
# a bogus policy index doesn't bother the replicator any more than a
|
|
# bogus device or partition does
|
|
self.replicator.run_once(policies='1,2,5')
|
|
|
|
self.assertFalse(os.access(pol1_part_path, os.F_OK))
|
|
self.assertTrue(os.access(pol0_part_path, os.F_OK))
|
|
|
|
# since we weren't operating on everything, but only a subset of
|
|
# storage policies, we didn't dump any recon stats.
|
|
self.assertFalse(os.path.exists(
|
|
os.path.join(self.recon_cache, RECON_OBJECT_FILE)))
|
|
|
|
def test_delete_partition_ssync(self):
|
|
with mock.patch('swift.obj.replicator.http_connect',
|
|
mock_http_connect(200)):
|
|
df = self.df_mgr.get_diskfile('sda', '1', 'a', 'c', 'o',
|
|
policy=POLICIES.legacy)
|
|
mkdirs(df._datadir)
|
|
ts = normalize_timestamp(time.time())
|
|
f = open(os.path.join(df._datadir, ts + '.data'),
|
|
'wb')
|
|
f.write(b'0')
|
|
f.close()
|
|
ohash = hash_path('a', 'c', 'o')
|
|
whole_path_from = storage_directory(self.objects, 1, ohash)
|
|
suffix_dir_path = os.path.dirname(whole_path_from)
|
|
part_path = os.path.join(self.objects, '1')
|
|
self.assertTrue(os.access(part_path, os.F_OK))
|
|
|
|
self.call_nums = 0
|
|
self.conf['sync_method'] = 'ssync'
|
|
|
|
def _fake_ssync(node, job, suffixes, **kwargs):
|
|
success = True
|
|
ret_val = {ohash: ts}
|
|
if self.call_nums == 2:
|
|
# ssync should return (True, []) only when the second
|
|
# candidate node has not get the replica yet.
|
|
success = False
|
|
ret_val = {}
|
|
self.call_nums += 1
|
|
return success, ret_val
|
|
|
|
self.replicator.sync_method = _fake_ssync
|
|
self.replicator.replicate()
|
|
# The file should still exist
|
|
self.assertTrue(os.access(whole_path_from, os.F_OK))
|
|
self.assertTrue(os.access(suffix_dir_path, os.F_OK))
|
|
self.assertTrue(os.access(part_path, os.F_OK))
|
|
self.replicator.replicate()
|
|
# The file should be deleted at the second replicate call
|
|
self.assertFalse(os.access(whole_path_from, os.F_OK))
|
|
self.assertFalse(os.access(suffix_dir_path, os.F_OK))
|
|
self.assertTrue(os.access(part_path, os.F_OK))
|
|
self.replicator.replicate()
|
|
# The partition should be deleted at the third replicate call
|
|
self.assertFalse(os.access(whole_path_from, os.F_OK))
|
|
self.assertFalse(os.access(suffix_dir_path, os.F_OK))
|
|
self.assertFalse(os.access(part_path, os.F_OK))
|
|
del self.call_nums
|
|
|
|
def test_delete_partition_ssync_with_sync_failure(self):
|
|
with mock.patch('swift.obj.replicator.http_connect',
|
|
mock_http_connect(200)):
|
|
df = self.df_mgr.get_diskfile('sda', '1', 'a', 'c', 'o',
|
|
policy=POLICIES.legacy)
|
|
ts = normalize_timestamp(time.time())
|
|
mkdirs(df._datadir)
|
|
f = open(os.path.join(df._datadir, ts + '.data'), 'wb')
|
|
f.write(b'0')
|
|
f.close()
|
|
ohash = hash_path('a', 'c', 'o')
|
|
whole_path_from = storage_directory(self.objects, 1, ohash)
|
|
suffix_dir_path = os.path.dirname(whole_path_from)
|
|
part_path = os.path.join(self.objects, '1')
|
|
self.assertTrue(os.access(part_path, os.F_OK))
|
|
self.call_nums = 0
|
|
self.conf['sync_method'] = 'ssync'
|
|
|
|
def _fake_ssync(node, job, suffixes, **kwags):
|
|
success = False
|
|
ret_val = {}
|
|
if self.call_nums == 2:
|
|
# ssync should return (True, []) only when the second
|
|
# candidate node has not get the replica yet.
|
|
success = True
|
|
ret_val = {ohash: ts}
|
|
self.call_nums += 1
|
|
return success, ret_val
|
|
|
|
self.replicator.sync_method = _fake_ssync
|
|
self.replicator.replicate()
|
|
# The file should still exist
|
|
self.assertTrue(os.access(whole_path_from, os.F_OK))
|
|
self.assertTrue(os.access(suffix_dir_path, os.F_OK))
|
|
self.assertTrue(os.access(part_path, os.F_OK))
|
|
self.replicator.replicate()
|
|
# The file should still exist
|
|
self.assertTrue(os.access(whole_path_from, os.F_OK))
|
|
self.assertTrue(os.access(suffix_dir_path, os.F_OK))
|
|
self.assertTrue(os.access(part_path, os.F_OK))
|
|
self.replicator.replicate()
|
|
# The file should still exist
|
|
self.assertTrue(os.access(whole_path_from, os.F_OK))
|
|
self.assertTrue(os.access(suffix_dir_path, os.F_OK))
|
|
self.assertTrue(os.access(part_path, os.F_OK))
|
|
del self.call_nums
|
|
|
|
def test_delete_objs_ssync_only_when_in_sync(self):
|
|
self.replicator.logger = debug_logger('test-replicator')
|
|
with mock.patch('swift.obj.replicator.http_connect',
|
|
mock_http_connect(200)):
|
|
df = self.df_mgr.get_diskfile('sda', '1', 'a', 'c', 'o',
|
|
policy=POLICIES.legacy)
|
|
mkdirs(df._datadir)
|
|
ts = normalize_timestamp(time.time())
|
|
f = open(os.path.join(df._datadir, ts + '.data'), 'wb')
|
|
f.write(b'0')
|
|
f.close()
|
|
ohash = hash_path('a', 'c', 'o')
|
|
whole_path_from = storage_directory(self.objects, 1, ohash)
|
|
suffix_dir_path = os.path.dirname(whole_path_from)
|
|
part_path = os.path.join(self.objects, '1')
|
|
self.assertTrue(os.access(part_path, os.F_OK))
|
|
self.call_nums = 0
|
|
self.conf['sync_method'] = 'ssync'
|
|
|
|
in_sync_objs = {}
|
|
|
|
def _fake_ssync(node, job, suffixes, remote_check_objs=None):
|
|
self.call_nums += 1
|
|
if remote_check_objs is None:
|
|
# sync job
|
|
ret_val = {ohash: ts}
|
|
else:
|
|
ret_val = in_sync_objs
|
|
return True, ret_val
|
|
|
|
self.replicator.sync_method = _fake_ssync
|
|
self.replicator.replicate()
|
|
self.assertEqual(3, self.call_nums)
|
|
# The file should still exist
|
|
self.assertTrue(os.access(whole_path_from, os.F_OK))
|
|
self.assertTrue(os.access(suffix_dir_path, os.F_OK))
|
|
self.assertTrue(os.access(part_path, os.F_OK))
|
|
|
|
del self.call_nums
|
|
|
|
def test_delete_partition_ssync_with_cleanup_failure(self):
|
|
with mock.patch('swift.obj.replicator.http_connect',
|
|
mock_http_connect(200)):
|
|
self.replicator.logger = mock_logger = \
|
|
debug_logger('test-replicator')
|
|
df = self.df_mgr.get_diskfile('sda', '1', 'a', 'c', 'o',
|
|
policy=POLICIES.legacy)
|
|
mkdirs(df._datadir)
|
|
ts = normalize_timestamp(time.time())
|
|
f = open(os.path.join(df._datadir, ts + '.data'), 'wb')
|
|
f.write(b'0')
|
|
f.close()
|
|
ohash = hash_path('a', 'c', 'o')
|
|
whole_path_from = storage_directory(self.objects, 1, ohash)
|
|
suffix_dir_path = os.path.dirname(whole_path_from)
|
|
part_path = os.path.join(self.objects, '1')
|
|
self.assertTrue(os.access(part_path, os.F_OK))
|
|
|
|
self.call_nums = 0
|
|
self.conf['sync_method'] = 'ssync'
|
|
|
|
def _fake_ssync(node, job, suffixes, **kwargs):
|
|
success = True
|
|
ret_val = {ohash: ts}
|
|
if self.call_nums == 2:
|
|
# ssync should return (True, []) only when the second
|
|
# candidate node has not get the replica yet.
|
|
success = False
|
|
ret_val = {}
|
|
self.call_nums += 1
|
|
return success, ret_val
|
|
|
|
rmdir_func = os.rmdir
|
|
|
|
def raise_exception_rmdir(exception_class, error_no):
|
|
instance = exception_class()
|
|
instance.errno = error_no
|
|
instance.strerror = os.strerror(error_no)
|
|
|
|
def func(directory, dir_fd=None):
|
|
if directory == suffix_dir_path:
|
|
raise instance
|
|
else:
|
|
rmdir_func(directory)
|
|
|
|
return func
|
|
|
|
self.replicator.sync_method = _fake_ssync
|
|
self.replicator.replicate()
|
|
# The file should still exist
|
|
self.assertTrue(os.access(whole_path_from, os.F_OK))
|
|
self.assertTrue(os.access(suffix_dir_path, os.F_OK))
|
|
self.assertTrue(os.access(part_path, os.F_OK))
|
|
|
|
# Fail with ENOENT
|
|
with mock.patch('os.rmdir',
|
|
raise_exception_rmdir(OSError, ENOENT)):
|
|
self.replicator.replicate()
|
|
self.assertFalse(mock_logger.get_lines_for_level('error'))
|
|
self.assertFalse(os.access(whole_path_from, os.F_OK))
|
|
self.assertTrue(os.access(suffix_dir_path, os.F_OK))
|
|
self.assertTrue(os.access(part_path, os.F_OK))
|
|
|
|
# Fail with ENOTEMPTY
|
|
with mock.patch('os.rmdir',
|
|
raise_exception_rmdir(OSError, ENOTEMPTY)):
|
|
self.replicator.replicate()
|
|
self.assertFalse(mock_logger.get_lines_for_level('error'))
|
|
self.assertFalse(os.access(whole_path_from, os.F_OK))
|
|
self.assertTrue(os.access(suffix_dir_path, os.F_OK))
|
|
self.assertTrue(os.access(part_path, os.F_OK))
|
|
|
|
# Fail with ENOTDIR
|
|
with mock.patch('os.rmdir',
|
|
raise_exception_rmdir(OSError, ENOTDIR)):
|
|
self.replicator.replicate()
|
|
self.assertEqual(mock_logger.get_lines_for_level('error'), [
|
|
'Unexpected error trying to cleanup suffix dir %r: ' %
|
|
os.path.dirname(df._datadir),
|
|
])
|
|
self.assertFalse(os.access(whole_path_from, os.F_OK))
|
|
self.assertTrue(os.access(suffix_dir_path, os.F_OK))
|
|
self.assertTrue(os.access(part_path, os.F_OK))
|
|
|
|
# Finally we can cleanup everything
|
|
self.replicator.replicate()
|
|
self.assertFalse(os.access(whole_path_from, os.F_OK))
|
|
self.assertFalse(os.access(suffix_dir_path, os.F_OK))
|
|
self.assertTrue(os.access(part_path, os.F_OK))
|
|
self.replicator.replicate()
|
|
self.assertFalse(os.access(whole_path_from, os.F_OK))
|
|
self.assertFalse(os.access(suffix_dir_path, os.F_OK))
|
|
self.assertFalse(os.access(part_path, os.F_OK))
|
|
|
|
def test_run_once_recover_from_failure(self):
|
|
conf = dict(swift_dir=self.testdir, devices=self.devices,
|
|
bind_ip=_ips()[0],
|
|
mount_check='false', timeout='300', stats_interval='1')
|
|
replicator = object_replicator.ObjectReplicator(conf)
|
|
was_connector = object_replicator.http_connect
|
|
try:
|
|
object_replicator.http_connect = mock_http_connect(200)
|
|
# Write some files into '1' and run replicate- they should be moved
|
|
# to the other partitions and then node should get deleted.
|
|
cur_part = '1'
|
|
df = self.df_mgr.get_diskfile('sda', cur_part, 'a', 'c', 'o',
|
|
policy=POLICIES.legacy)
|
|
mkdirs(df._datadir)
|
|
f = open(os.path.join(df._datadir,
|
|
normalize_timestamp(time.time()) + '.data'),
|
|
'wb')
|
|
f.write(b'1234567890')
|
|
f.close()
|
|
ohash = hash_path('a', 'c', 'o')
|
|
data_dir = ohash[-3:]
|
|
whole_path_from = os.path.join(self.objects, cur_part, data_dir)
|
|
ring = replicator.load_object_ring(POLICIES[0])
|
|
process_arg_checker = []
|
|
nodes = [node for node in
|
|
ring.get_part_nodes(int(cur_part))
|
|
if node['ip'] not in _ips()]
|
|
for node in nodes:
|
|
rsync_mod = '%s::object/sda/objects/%s' % (node['ip'],
|
|
cur_part)
|
|
process_arg_checker.append(
|
|
(0, '', ['rsync', whole_path_from, rsync_mod]))
|
|
self.assertTrue(os.access(os.path.join(self.objects,
|
|
'1', data_dir, ohash),
|
|
os.F_OK))
|
|
with _mock_process(process_arg_checker):
|
|
replicator.run_once()
|
|
self.assertFalse(process_errors)
|
|
for i, result in [('0', True), ('1', False),
|
|
('2', True), ('3', True)]:
|
|
self.assertEqual(os.access(
|
|
os.path.join(self.objects,
|
|
i, diskfile.HASH_FILE),
|
|
os.F_OK), result)
|
|
finally:
|
|
object_replicator.http_connect = was_connector
|
|
|
|
def test_run_once_recover_from_timeout(self):
|
|
# verify that replicator will pass over all policies' partitions even
|
|
# if a timeout occurs while replicating one partition to one node.
|
|
timeouts = [Timeout()]
|
|
|
|
def fake_get_hashes(df_mgr, device, partition, policy, **kwargs):
|
|
self.get_hash_count += 1
|
|
dev_path = df_mgr.get_dev_path(device)
|
|
part_path = os.path.join(dev_path, diskfile.get_data_dir(policy),
|
|
str(partition))
|
|
# Simulate a REPLICATE timeout by raising Timeout for second call
|
|
# to get_hashes (with recalculate suffixes) for a specific
|
|
# partition
|
|
if (timeouts and '/objects/' in part_path and
|
|
part_path.endswith('0') and 'recalculate' in kwargs):
|
|
raise timeouts.pop(0)
|
|
return 1, {'abc': 'def'}
|
|
|
|
# map partition_path -> [nodes]
|
|
sync_paths = collections.defaultdict(list)
|
|
|
|
def fake_sync(node, job, suffixes, *args, **kwargs):
|
|
sync_paths[job['path']].append(node)
|
|
return True, {}
|
|
|
|
conf = dict(swift_dir=self.testdir, devices=self.devices,
|
|
bind_ip=_ips()[0], # local dev has id=0
|
|
mount_check='false', timeout='300', stats_interval='1')
|
|
with mock.patch('swift.obj.diskfile.DiskFileManager._get_hashes',
|
|
fake_get_hashes):
|
|
with mock.patch('swift.obj.replicator.http_connect',
|
|
mock_http_connect(200)):
|
|
with mock.patch('swift.obj.replicator.dump_recon_cache'):
|
|
replicator = object_replicator.ObjectReplicator(
|
|
conf, logger=self.logger)
|
|
|
|
self.get_hash_count = 0
|
|
with mock.patch.object(replicator, 'sync', fake_sync):
|
|
replicator.run_once()
|
|
|
|
log_lines = replicator.logger.logger.get_lines_for_level('error')
|
|
self.assertIn("Error syncing with node:", log_lines[0])
|
|
self.assertFalse(log_lines[1:])
|
|
# setup creates 4 partitions; partition 1 does not map to local dev id
|
|
# 0 so will be handled by update_delete(); partitions 0, 2, 3 are
|
|
# handled by update() for each of two policies, so expect 6 paths to be
|
|
# sync'd
|
|
self.assertEqual(6, len(sync_paths))
|
|
# partition 3 has 2 nodes in remote region, only first node is sync'd.
|
|
# partition 0 in policy 0 has fake_get_hashes timeout before first
|
|
# sync, so only second node is sync'd.
|
|
# other partitions are sync'd to 2 nodes in same region.
|
|
expected_node_count = { # map path_end -> expected sync node count
|
|
'/objects/0': 1,
|
|
'/objects/1': 2,
|
|
'/objects/2': 2,
|
|
'/objects/3': 1,
|
|
'/objects-1/0': 2,
|
|
'/objects-1/1': 2,
|
|
'/objects-1/2': 2,
|
|
'/objects-1/3': 1
|
|
}
|
|
for path, nodes in sync_paths.items():
|
|
path_end = path[path.index('/objects'):]
|
|
self.assertEqual(expected_node_count[path_end], len(nodes),
|
|
'Expected %s but got %s for path %s' %
|
|
(expected_node_count[path_end], len(nodes), path))
|
|
# partitions 0 and 2 attempt 3 calls each per policy to get_hashes = 12
|
|
# partitions 3 attempts 2 calls per policy to get_hashes = 4
|
|
# partitions 1 dosn't get_hashes because of update_deleted
|
|
self.assertEqual(16, self.get_hash_count)
|
|
|
|
# attempt to 16 times but succeeded only 15 times due to Timeout
|
|
suffix_hashes = sum(
|
|
call[0][1] for call in
|
|
replicator.logger.logger.statsd_client.calls['update_stats']
|
|
if call[0][0] == 'suffix.hashes')
|
|
self.assertEqual(15, suffix_hashes)
|
|
|
|
def test_run(self):
|
|
with _mock_process([(0, '')] * 100):
|
|
with mock.patch('swift.obj.replicator.http_connect',
|
|
mock_http_connect(200)):
|
|
self.replicator.replicate()
|
|
|
|
def test_run_withlog(self):
|
|
with _mock_process([(0, "stuff in log")] * 100):
|
|
with mock.patch('swift.obj.replicator.http_connect',
|
|
mock_http_connect(200)):
|
|
self.replicator.replicate()
|
|
|
|
def test_sync_just_calls_sync_method(self):
|
|
self.replicator.sync_method = mock.MagicMock()
|
|
self.replicator.sync('node', 'job', 'suffixes')
|
|
self.replicator.sync_method.assert_called_once_with(
|
|
'node', 'job', 'suffixes')
|
|
|
|
@mock.patch('swift.obj.replicator.tpool.execute')
|
|
@mock.patch('swift.obj.replicator.http_connect', autospec=True)
|
|
@mock.patch('swift.obj.replicator._do_listdir')
|
|
def test_update(self, mock_do_listdir, mock_http, mock_tpool_execute):
|
|
|
|
def set_default(self):
|
|
self.replicator.suffix_count = 0
|
|
self.replicator.suffix_sync = 0
|
|
self.replicator.suffix_hash = 0
|
|
self.replicator.last_replication_count = 0
|
|
self.replicator._zero_stats()
|
|
self.replicator.partition_times = []
|
|
|
|
self.headers = {'Content-Length': '0',
|
|
'user-agent': 'object-replicator %s' % os.getpid()}
|
|
mock_tpool_execute.return_value = (0, {})
|
|
|
|
all_jobs = self.replicator.collect_jobs()
|
|
jobs = [job for job in all_jobs if not job['delete']]
|
|
|
|
mock_http.return_value = answer = mock.MagicMock()
|
|
answer.getresponse.return_value = resp = mock.MagicMock()
|
|
# Check incorrect http_connect with status 507 and
|
|
# count of attempts and call args
|
|
resp.status = 507
|
|
expected_listdir_calls = [
|
|
mock.call(int(job['partition']),
|
|
self.replicator.replication_cycle)
|
|
for job in jobs]
|
|
do_listdir_results = [False, False, True, False, True, False]
|
|
mock_do_listdir.side_effect = do_listdir_results
|
|
expected_tpool_calls = [
|
|
mock.call(self.replicator._df_router[job['policy']]._get_hashes,
|
|
job['device'], job['partition'], job['policy'],
|
|
do_listdir=do_listdir)
|
|
for job, do_listdir in zip(jobs, do_listdir_results)
|
|
]
|
|
for job in jobs:
|
|
set_default(self)
|
|
ring = job['policy'].object_ring
|
|
self.headers['X-Backend-Storage-Policy-Index'] = int(job['policy'])
|
|
self.replicator.update(job)
|
|
error_lines = self.logger.get_lines_for_level('error')
|
|
expected = []
|
|
error = '%s responded as unmounted'
|
|
# ... first the primaries
|
|
for node in job['nodes']:
|
|
node_str = utils.node_to_string(node, replication=True)
|
|
expected.append(error % node_str)
|
|
# ... then it will get handoffs
|
|
for node in job['policy'].object_ring.get_more_nodes(
|
|
int(job['partition'])):
|
|
node_str = utils.node_to_string(node, replication=True)
|
|
expected.append(error % node_str)
|
|
# ... and finally we get an error about running out of nodes
|
|
expected.append('Ran out of handoffs while replicating '
|
|
'partition %s of policy %d' %
|
|
(job['partition'], job['policy']))
|
|
self.assertEqual(expected, error_lines)
|
|
self.assertEqual(len(self.replicator.partition_times), 1)
|
|
self.assertEqual(mock_http.call_count, len(ring._devs) - 1)
|
|
reqs = []
|
|
for node in job['nodes']:
|
|
reqs.append(mock.call(node['ip'], node['port'], node['device'],
|
|
job['partition'], 'REPLICATE', '',
|
|
headers=self.headers))
|
|
if job['partition'] == '0':
|
|
self.assertEqual(self.replicator.suffix_hash, 0)
|
|
mock_http.assert_has_calls(reqs, any_order=True)
|
|
mock_http.reset_mock()
|
|
self.logger.clear()
|
|
mock_do_listdir.assert_has_calls(expected_listdir_calls)
|
|
mock_tpool_execute.assert_has_calls(expected_tpool_calls)
|
|
mock_do_listdir.side_effect = None
|
|
mock_do_listdir.return_value = False
|
|
# Check incorrect http_connect with status 400 != HTTP_OK
|
|
resp.status = 400
|
|
error = 'Invalid response %(resp)s from %(node)s'
|
|
for job in jobs:
|
|
set_default(self)
|
|
self.replicator.update(job)
|
|
# ... only the primaries
|
|
expected = [
|
|
error % {
|
|
"resp": 400,
|
|
"node": utils.node_to_string(node, replication=True)}
|
|
for node in job['nodes']]
|
|
self.assertEqual(expected,
|
|
self.logger.get_lines_for_level('error'))
|
|
self.assertEqual(len(self.replicator.partition_times), 1)
|
|
self.logger.clear()
|
|
|
|
# Check successful http_connection and exception with
|
|
# incorrect pickle.loads(resp.read())
|
|
resp.status = 200
|
|
resp.read.return_value = b'garbage'
|
|
expect = 'Error syncing with node: %s: '
|
|
for job in jobs:
|
|
set_default(self)
|
|
self.replicator.update(job)
|
|
# ... only the primaries
|
|
expected = [expect % utils.node_to_string(node, replication=True)
|
|
for node in job['nodes']]
|
|
error_lines = self.logger.get_lines_for_level('error')
|
|
self.assertEqual(expected, error_lines)
|
|
self.assertEqual(len(self.replicator.partition_times), 1)
|
|
self.logger.clear()
|
|
|
|
# Check successful http_connection and correct
|
|
# pickle.loads(resp.read()) for non local node
|
|
resp.status = 200
|
|
local_job = None
|
|
resp.read.return_value = pickle.dumps({})
|
|
for job in jobs:
|
|
set_default(self)
|
|
# limit local job to policy 0 for simplicity
|
|
if job['partition'] == '0' and int(job['policy']) == 0:
|
|
local_job = job.copy()
|
|
continue
|
|
self.replicator.update(job)
|
|
self.assertEqual([], self.logger.get_lines_for_level('error'))
|
|
self.assertEqual(len(self.replicator.partition_times), 1)
|
|
self.assertEqual(self.replicator.suffix_hash, 0)
|
|
self.assertEqual(self.replicator.suffix_sync, 0)
|
|
self.assertEqual(self.replicator.suffix_count, 0)
|
|
self.logger.clear()
|
|
|
|
# Check successful http_connect and sync for local node
|
|
mock_tpool_execute.return_value = (1, {'a83': 'ba47fd314242ec8c'
|
|
'7efb91f5d57336e4'})
|
|
resp.read.return_value = pickle.dumps({'a83': 'c130a2c17ed45102a'
|
|
'ada0f4eee69494ff'})
|
|
set_default(self)
|
|
self.replicator.sync = fake_func = \
|
|
mock.MagicMock(return_value=(True, []))
|
|
self.replicator.update(local_job)
|
|
reqs = []
|
|
for node in local_job['nodes']:
|
|
reqs.append(mock.call(node, local_job, ['a83']))
|
|
fake_func.assert_has_calls(reqs, any_order=True)
|
|
self.assertEqual(fake_func.call_count, 2)
|
|
stats = self.replicator.total_stats
|
|
self.assertEqual(stats.attempted, 1)
|
|
self.assertEqual(stats.suffix_sync, 2)
|
|
self.assertEqual(stats.suffix_hash, 1)
|
|
self.assertEqual(stats.suffix_count, 1)
|
|
self.assertEqual(stats.hashmatch, 0)
|
|
|
|
# Efficient Replication Case
|
|
set_default(self)
|
|
self.replicator.sync = fake_func = \
|
|
mock.MagicMock(return_value=(True, []))
|
|
all_jobs = self.replicator.collect_jobs()
|
|
job = None
|
|
for tmp in all_jobs:
|
|
if tmp['partition'] == '3':
|
|
job = tmp
|
|
break
|
|
# The candidate nodes to replicate (i.e. dev1 and dev3)
|
|
# belong to another region
|
|
self.replicator.update(job)
|
|
self.assertEqual(fake_func.call_count, 1)
|
|
stats = self.replicator.total_stats
|
|
self.assertEqual(stats.attempted, 1)
|
|
self.assertEqual(stats.suffix_sync, 1)
|
|
self.assertEqual(stats.suffix_hash, 1)
|
|
self.assertEqual(stats.suffix_count, 1)
|
|
self.assertEqual(stats.hashmatch, 0)
|
|
|
|
mock_http.reset_mock()
|
|
self.logger.clear()
|
|
|
|
# test for replication params on policy 0 only
|
|
repl_job = local_job.copy()
|
|
for node in repl_job['nodes']:
|
|
node['replication_ip'] = '127.0.0.11'
|
|
node['replication_port'] = '6011'
|
|
set_default(self)
|
|
# with only one set of headers make sure we specify index 0 here
|
|
# as otherwise it may be different from earlier tests
|
|
self.headers['X-Backend-Storage-Policy-Index'] = 0
|
|
self.replicator.update(repl_job)
|
|
reqs = []
|
|
for node in repl_job['nodes']:
|
|
reqs.append(mock.call(node['replication_ip'],
|
|
node['replication_port'], node['device'],
|
|
repl_job['partition'], 'REPLICATE',
|
|
'', headers=self.headers))
|
|
mock_http.assert_has_calls(reqs, any_order=True)
|
|
|
|
@mock.patch('swift.obj.replicator.tpool.execute')
|
|
@mock.patch('swift.obj.replicator.http_connect', autospec=True)
|
|
@mock.patch('swift.obj.replicator._do_listdir')
|
|
def test_update_local_hash_changes_during_replication(
|
|
self, mock_do_listdir, mock_http, mock_tpool_execute):
|
|
mock_http.return_value = answer = mock.MagicMock()
|
|
answer.getresponse.return_value = resp = mock.MagicMock()
|
|
resp.status = 200
|
|
resp.read.return_value = pickle.dumps({
|
|
'a83': 'c130a2c17ed45102aada0f4eee69494ff'})
|
|
|
|
self.replicator.sync = fake_sync = \
|
|
mock.MagicMock(return_value=(True, []))
|
|
local_job = [
|
|
job for job in self.replicator.collect_jobs()
|
|
if not job['delete']
|
|
and job['partition'] == '0' and int(job['policy']) == 0
|
|
][0]
|
|
|
|
mock_tpool_execute.side_effect = [
|
|
(1, {'a83': 'ba47fd314242ec8c7efb91f5d57336e4'}),
|
|
(1, {'a83': 'c130a2c17ed45102aada0f4eee69494ff'}),
|
|
]
|
|
self.replicator.update(local_job)
|
|
self.assertEqual(fake_sync.call_count, 0)
|
|
self.assertEqual(mock_http.call_count, 2)
|
|
stats = self.replicator.total_stats
|
|
self.assertEqual(stats.attempted, 1)
|
|
self.assertEqual(stats.suffix_sync, 0)
|
|
self.assertEqual(stats.suffix_hash, 1)
|
|
self.assertEqual(stats.suffix_count, 1)
|
|
self.assertEqual(stats.hashmatch, 2)
|
|
|
|
def test_rsync_compress_different_region(self):
|
|
self.assertEqual(self.replicator.sync_method, self.replicator.rsync)
|
|
jobs = self.replicator.collect_jobs()
|
|
_m_rsync = mock.Mock(return_value=0)
|
|
_m_os_path_exists = mock.Mock(return_value=True)
|
|
expected_reqs = []
|
|
with mock.patch.object(self.replicator, '_rsync', _m_rsync), \
|
|
mock.patch('os.path.exists', _m_os_path_exists), \
|
|
mocked_http_conn(
|
|
*[200] * 2 * sum(len(job['nodes']) for job in jobs),
|
|
body=pickle.dumps('{}')) as request_log:
|
|
for job in jobs:
|
|
self.assertTrue('region' in job)
|
|
for node in job['nodes']:
|
|
for rsync_compress in (True, False):
|
|
expected_reqs.append((
|
|
'REPLICATE', node['ip'],
|
|
'/%s/%s/fake_suffix' % (
|
|
node['device'], job['partition']),
|
|
))
|
|
self.replicator.rsync_compress = rsync_compress
|
|
ret = self.replicator.sync(node, job,
|
|
['fake_suffix'])
|
|
self.assertTrue(ret)
|
|
if node['region'] != job['region']:
|
|
if rsync_compress:
|
|
# --compress arg should be passed to rsync
|
|
# binary only when rsync_compress option is
|
|
# enabled AND destination node is in a
|
|
# different region
|
|
self.assertTrue('--compress' in
|
|
_m_rsync.call_args[0][0])
|
|
else:
|
|
self.assertFalse('--compress' in
|
|
_m_rsync.call_args[0][0])
|
|
else:
|
|
self.assertFalse('--compress' in
|
|
_m_rsync.call_args[0][0])
|
|
self.assertEqual(
|
|
_m_os_path_exists.call_args_list[-1][0][0],
|
|
os.path.join(job['path'], 'fake_suffix'))
|
|
self.assertEqual(
|
|
_m_os_path_exists.call_args_list[-2][0][0],
|
|
os.path.join(job['path']))
|
|
self.assertEqual(expected_reqs, [
|
|
(r['method'], r['ip'], r['path']) for r in request_log.requests])
|
|
|
|
def test_rsync_failure_logging(self):
|
|
with mock.patch('swift.obj.replicator.subprocess.Popen') as mock_popen:
|
|
mock_popen.return_value.stdout = io.BytesIO(b'\n'.join([
|
|
b'',
|
|
b'cd+++++++++ suf',
|
|
b'cd+++++++++ suf/hash1',
|
|
b'<f+++++++++ suf/hash1/1637956993.28907.data',
|
|
b'',
|
|
b'cd+++++++++ suf/hash2',
|
|
b'<f+++++++++ suf/hash2/1615174984.55017.data',
|
|
b'',
|
|
b'cd+++++++++ suf/hash3',
|
|
b'<f+++++++++ suf/hash3/1616276756.37760.data',
|
|
b'<f+++++++++ suf/hash3/1637954870.98055.meta',
|
|
b'',
|
|
b'Oh no, some error!',
|
|
]))
|
|
mock_popen.return_value.wait.return_value = 5
|
|
self.assertEqual(5, self.replicator._rsync([
|
|
'rsync', '--recursive', '--whole-file', '--human-readable',
|
|
'--xattrs', '--itemize-changes', '--ignore-existing',
|
|
'--timeout=30', '--contimeout=30', '--bwlimit=100M',
|
|
'--exclude=rsync-tempfile-pattern',
|
|
'/srv/node/d1/objects/part/suf',
|
|
'192.168.50.30::object/d8/objects/241']))
|
|
error_lines = self.logger.get_lines_for_level('error')
|
|
self.assertEqual(error_lines[:5], [
|
|
'<f+++++++++ suf/hash1/1637956993.28907.data',
|
|
'<f+++++++++ suf/hash2/1615174984.55017.data',
|
|
'<f+++++++++ suf/hash3/1616276756.37760.data',
|
|
'<f+++++++++ suf/hash3/1637954870.98055.meta',
|
|
'Oh no, some error!',
|
|
])
|
|
expected_start = "Bad rsync return code: 5 <- ['rsync', '--recursive'"
|
|
self.assertEqual(error_lines[5][:len(expected_start)], expected_start,
|
|
'Expected %r to start with %r' % (error_lines[5],
|
|
expected_start))
|
|
self.assertFalse(error_lines[6:])
|
|
self.assertFalse(self.logger.get_lines_for_level('info'))
|
|
self.assertFalse(self.logger.get_lines_for_level('debug'))
|
|
|
|
def test_rsync_failure_logging_no_transfer(self):
|
|
with mock.patch('swift.obj.replicator.subprocess.Popen') as mock_popen:
|
|
mock_popen.return_value.stdout = io.BytesIO(b'\n'.join([
|
|
b'',
|
|
b'cd+++++++++ suf',
|
|
b'cd+++++++++ suf/hash1',
|
|
b'<f+++++++++ suf/hash1/1637956993.28907.data',
|
|
b'',
|
|
b'cd+++++++++ suf/hash2',
|
|
b'<f+++++++++ suf/hash2/1615174984.55017.data',
|
|
b'',
|
|
b'cd+++++++++ suf/hash3',
|
|
b'<f+++++++++ suf/hash3/1616276756.37760.data',
|
|
b'<f+++++++++ suf/hash3/1637954870.98055.meta',
|
|
b'',
|
|
b'Oh no, some error!',
|
|
]))
|
|
mock_popen.return_value.wait.return_value = 5
|
|
self.replicator.log_rsync_transfers = False
|
|
self.assertEqual(5, self.replicator._rsync([
|
|
'rsync', '--recursive', '--whole-file', '--human-readable',
|
|
'--xattrs', '--itemize-changes', '--ignore-existing',
|
|
'--timeout=30', '--contimeout=30', '--bwlimit=100M',
|
|
'--exclude=rsync-tempfile-pattern',
|
|
'/srv/node/d1/objects/part/suf',
|
|
'192.168.50.30::object/d8/objects/241']))
|
|
error_lines = self.logger.get_lines_for_level('error')
|
|
self.assertEqual(error_lines[0], 'Oh no, some error!')
|
|
expected_start = "Bad rsync return code: 5 <- ['rsync', '--recursive'"
|
|
self.assertEqual(error_lines[1][:len(expected_start)], expected_start,
|
|
'Expected %r to start with %r' % (error_lines[1],
|
|
expected_start))
|
|
self.assertFalse(error_lines[2:])
|
|
self.assertFalse(self.logger.get_lines_for_level('info'))
|
|
self.assertFalse(self.logger.get_lines_for_level('debug'))
|
|
|
|
def test_rsync_success_logging(self):
|
|
with mock.patch(
|
|
'swift.obj.replicator.subprocess.Popen') as mock_popen, \
|
|
mock.patch('time.time', side_effect=[123.4, 123.5]):
|
|
mock_popen.return_value.stdout = io.BytesIO(b'\n'.join([
|
|
b'',
|
|
b'cd+++++++++ suf',
|
|
b'cd+++++++++ suf/hash1',
|
|
b'<f+++++++++ suf/hash1/1637956993.28907.data',
|
|
b'',
|
|
b'cd+++++++++ suf/hash2',
|
|
b'<f+++++++++ suf/hash2/1615174984.55017.data',
|
|
b'',
|
|
b'cd+++++++++ suf/hash3',
|
|
b'<f+++++++++ suf/hash3/1616276756.37760.data',
|
|
b'<f+++++++++ suf/hash3/1637954870.98055.meta',
|
|
b'',
|
|
b'Yay! It worked!',
|
|
]))
|
|
mock_popen.return_value.wait.return_value = 0
|
|
self.assertEqual(0, self.replicator._rsync([
|
|
'rsync', '--recursive', '--whole-file', '--human-readable',
|
|
'--xattrs', '--itemize-changes', '--ignore-existing',
|
|
'--timeout=30', '--contimeout=30', '--bwlimit=100M',
|
|
'--exclude=rsync-tempfile-pattern',
|
|
'/srv/node/d1/objects/part/suf',
|
|
'192.168.50.30::object/d8/objects/241']))
|
|
self.assertFalse(self.logger.get_lines_for_level('error'))
|
|
debug_lines = self.logger.get_lines_for_level('debug')
|
|
self.assertEqual(debug_lines, [
|
|
'<f+++++++++ suf/hash1/1637956993.28907.data',
|
|
'<f+++++++++ suf/hash2/1615174984.55017.data',
|
|
'<f+++++++++ suf/hash3/1616276756.37760.data',
|
|
'<f+++++++++ suf/hash3/1637954870.98055.meta',
|
|
'Yay! It worked!',
|
|
])
|
|
info_lines = self.logger.get_lines_for_level('info')
|
|
self.assertEqual(info_lines, [
|
|
'Successful rsync of /srv/node/d1/objects/part/... to '
|
|
'192.168.50.30::object/d8/objects/241 (0.100)'])
|
|
|
|
def test_rsync_success_logging_no_transfer(self):
|
|
with mock.patch(
|
|
'swift.obj.replicator.subprocess.Popen') as mock_popen, \
|
|
mock.patch('time.time', side_effect=[123.4, 123.5]):
|
|
mock_popen.return_value.stdout = io.BytesIO(b'\n'.join([
|
|
b'',
|
|
b'cd+++++++++ sf1',
|
|
b'cd+++++++++ sf1/hash1',
|
|
b'<f+++++++++ sf1/hash1/1637956993.28907.data',
|
|
b'',
|
|
b'cd+++++++++ sf1/hash2',
|
|
b'<f+++++++++ sf1/hash2/1615174984.55017.data',
|
|
b'',
|
|
b'cd+++++++++ sf2/hash3',
|
|
b'<f+++++++++ sf2/hash3/1616276756.37760.data',
|
|
b'<f+++++++++ sf2/hash3/1637954870.98055.meta',
|
|
b'',
|
|
b'Yay! It worked!',
|
|
]))
|
|
mock_popen.return_value.wait.return_value = 0
|
|
self.replicator.log_rsync_transfers = False
|
|
self.assertEqual(0, self.replicator._rsync([
|
|
'rsync', '--recursive', '--whole-file', '--human-readable',
|
|
'--xattrs', '--itemize-changes', '--ignore-existing',
|
|
'--timeout=30', '--contimeout=30', '--bwlimit=100M',
|
|
'--exclude=rsync-tempfile-pattern',
|
|
'/srv/node/d1/objects/part/sf1',
|
|
'/srv/node/d1/objects/part/sf2',
|
|
'192.168.50.30::object/d8/objects/241']))
|
|
self.assertFalse(self.logger.get_lines_for_level('error'))
|
|
debug_lines = self.logger.get_lines_for_level('debug')
|
|
self.assertEqual(debug_lines, ['Yay! It worked!'])
|
|
info_lines = self.logger.get_lines_for_level('info')
|
|
self.assertEqual(info_lines, [
|
|
'Successful rsync of /srv/node/d1/objects/part/... to '
|
|
'192.168.50.30::object/d8/objects/241 (0.100)'])
|
|
|
|
def test_do_listdir(self):
|
|
# Test if do_listdir is enabled for every 10th partition to rehash
|
|
# First number is the number of partitions in the job, list entries
|
|
# are the expected partition numbers per run
|
|
test_data = {
|
|
9: [1, 0, 1, 1, 1, 1, 1, 1, 1, 1],
|
|
29: [3, 2, 3, 3, 3, 3, 3, 3, 3, 3],
|
|
111: [12, 11, 11, 11, 11, 11, 11, 11, 11, 11]}
|
|
|
|
for partitions, expected in test_data.items():
|
|
seen = []
|
|
for phase in range(10):
|
|
invalidated = 0
|
|
for partition in range(partitions):
|
|
if object_replicator._do_listdir(partition, phase):
|
|
seen.append(partition)
|
|
invalidated += 1
|
|
# Every 10th partition is seen after each phase
|
|
self.assertEqual(expected[phase], invalidated)
|
|
|
|
# After 10 cycles every partition is seen exactly once
|
|
self.assertEqual(sorted(range(partitions)), sorted(seen))
|
|
|
|
def test_update_deleted_partition_lock_timeout(self):
|
|
self.replicator.handoffs_remaining = 0
|
|
jobs = self.replicator.collect_jobs()
|
|
delete_jobs = [j for j in jobs if j['delete']]
|
|
delete_jobs.sort(key=lambda j: j['policy'])
|
|
job = delete_jobs[0]
|
|
df_mgr = self.replicator._df_router[job['policy']]
|
|
with mock.patch.object(df_mgr, 'partition_lock',
|
|
side_effect=PartitionLockTimeout):
|
|
self.replicator.update_deleted(job)
|
|
logs = self.logger.get_lines_for_level('info')
|
|
self.assertEqual(['Unable to lock handoff partition 1 for '
|
|
'replication on device sda policy 0'], logs)
|
|
|
|
def test_replicate_skipped_partpower_increase(self):
|
|
_create_test_rings(self.testdir, next_part_power=4)
|
|
self.replicator.get_local_devices() # refresh rings
|
|
self.replicator.replicate()
|
|
self.assertEqual(0, self.replicator.job_count)
|
|
self.assertEqual(0, self.replicator.total_stats.attempted)
|
|
warnings = self.logger.get_lines_for_level('warning')
|
|
self.assertIn(
|
|
"next_part_power set in policy 'one'. Skipping", warnings)
|
|
|
|
def test_replicate_rsync_timeout(self):
|
|
cur_part = '0'
|
|
df = self.df_mgr.get_diskfile('sda', cur_part, 'a', 'c', 'o',
|
|
policy=POLICIES[0])
|
|
mkdirs(df._datadir)
|
|
f = open(os.path.join(df._datadir,
|
|
normalize_timestamp(time.time()) + '.data'),
|
|
'wb')
|
|
f.write(b'1234567890')
|
|
f.close()
|
|
|
|
mock_procs = []
|
|
|
|
def new_mock(*a, **kw):
|
|
proc = MockHungProcess()
|
|
mock_procs.append(proc)
|
|
return proc
|
|
|
|
with mock.patch('swift.obj.replicator.http_connect',
|
|
mock_http_connect(200)), \
|
|
mock.patch.object(self.replicator, 'rsync_timeout', 0.01), \
|
|
mock.patch('eventlet.green.subprocess.Popen', new_mock):
|
|
self.replicator.rsync_error_log_line_length = 40
|
|
self.replicator.run_once()
|
|
for proc in mock_procs:
|
|
self.assertEqual(proc._calls, [
|
|
('wait', 'running'),
|
|
('kill', 'running'),
|
|
('wait', 'killed'),
|
|
])
|
|
self.assertEqual(len(mock_procs), 2)
|
|
error_lines = self.replicator.logger.get_lines_for_level('error')
|
|
# verify logs are truncated to rsync_error_log_line_length
|
|
self.assertEqual(["Killing long-running rsync after 0s: ['r"] * 2,
|
|
error_lines)
|
|
|
|
def test_replicate_rsync_timeout_wedged(self):
|
|
cur_part = '0'
|
|
df = self.df_mgr.get_diskfile('sda', cur_part, 'a', 'c', 'o',
|
|
policy=POLICIES[0])
|
|
mkdirs(df._datadir)
|
|
f = open(os.path.join(df._datadir,
|
|
normalize_timestamp(time.time()) + '.data'),
|
|
'wb')
|
|
f.write(b'1234567890')
|
|
f.close()
|
|
|
|
mock_procs = []
|
|
|
|
def new_mock(*a, **kw):
|
|
proc = MockHungProcess(polls_needed=2)
|
|
mock_procs.append(proc)
|
|
return proc
|
|
|
|
with mock.patch('swift.obj.replicator.http_connect',
|
|
mock_http_connect(200)), \
|
|
mock.patch.object(self.replicator, 'rsync_timeout', 0.01), \
|
|
mock.patch('eventlet.green.subprocess.Popen', new_mock):
|
|
self.replicator.run_once()
|
|
for proc in mock_procs:
|
|
self.assertEqual(proc._calls, [
|
|
('wait', 'running'),
|
|
('kill', 'running'),
|
|
('wait', 'killed'),
|
|
('poll', 'killed'),
|
|
('poll', 'killed'),
|
|
])
|
|
self.assertEqual(len(mock_procs), 2)
|
|
|
|
def test_limit_rsync_log(self):
|
|
def do_test(length_limit, log_line, expected):
|
|
self.replicator.rsync_error_log_line_length = length_limit
|
|
result = self.replicator._limit_rsync_log(log_line)
|
|
self.assertEqual(result, expected)
|
|
|
|
tests = [{'length_limit': 20,
|
|
'log_line': 'a' * 20,
|
|
'expected': 'a' * 20},
|
|
{'length_limit': 20,
|
|
'log_line': 'a' * 19,
|
|
'expected': 'a' * 19},
|
|
{'length_limit': 20,
|
|
'log_line': 'a' * 21,
|
|
'expected': 'a' * 20},
|
|
{'length_limit': None,
|
|
'log_line': 'a' * 50,
|
|
'expected': 'a' * 50},
|
|
{'length_limit': 0,
|
|
'log_line': 'a' * 50,
|
|
'expected': 'a' * 50}]
|
|
|
|
for params in tests:
|
|
do_test(**params)
|
|
|
|
|
|
@patch_policies([StoragePolicy(0, 'zero', False),
|
|
StoragePolicy(1, 'one', True)])
|
|
class TestMultiProcessReplicator(unittest.TestCase):
|
|
def setUp(self):
|
|
# recon cache path
|
|
self.recon_cache = tempfile.mkdtemp()
|
|
rmtree(self.recon_cache, ignore_errors=1)
|
|
os.mkdir(self.recon_cache)
|
|
self.recon_file = os.path.join(self.recon_cache, RECON_OBJECT_FILE)
|
|
|
|
bind_port = 6200
|
|
|
|
# Set up some rings
|
|
self.testdir = tempfile.mkdtemp()
|
|
_create_test_rings(self.testdir, devs=[
|
|
{'id': 0, 'device': 'sda', 'zone': 0,
|
|
'region': 1, 'ip': '127.0.0.1', 'port': bind_port},
|
|
{'id': 1, 'device': 'sdb', 'zone': 0,
|
|
'region': 1, 'ip': '127.0.0.1', 'port': bind_port},
|
|
{'id': 2, 'device': 'sdc', 'zone': 0,
|
|
'region': 1, 'ip': '127.0.0.1', 'port': bind_port},
|
|
{'id': 3, 'device': 'sdd', 'zone': 0,
|
|
'region': 1, 'ip': '127.0.0.1', 'port': bind_port},
|
|
{'id': 4, 'device': 'sde', 'zone': 0,
|
|
'region': 1, 'ip': '127.0.0.1', 'port': bind_port},
|
|
{'id': 100, 'device': 'notme0', 'zone': 0,
|
|
'region': 1, 'ip': '127.99.99.99', 'port': bind_port}])
|
|
|
|
self.logger = debug_logger('test-replicator')
|
|
self.conf = dict(
|
|
bind_ip='127.0.0.1', bind_port=bind_port,
|
|
swift_dir=self.testdir,
|
|
mount_check='false', recon_cache_path=self.recon_cache,
|
|
timeout='300', stats_interval='1', sync_method='rsync')
|
|
|
|
self.replicator = object_replicator.ObjectReplicator(
|
|
self.conf, logger=self.logger)
|
|
|
|
def tearDown(self):
|
|
self.assertFalse(process_errors)
|
|
rmtree(self.testdir, ignore_errors=1)
|
|
rmtree(self.recon_cache, ignore_errors=1)
|
|
|
|
def fake_replicate(self, override_devices, **kw):
|
|
# Faked-out replicate() method. Just updates the stats, but doesn't
|
|
# do any work.
|
|
for device in override_devices:
|
|
stats = self.replicator.stats_for_dev[device]
|
|
if device == 'sda':
|
|
stats.attempted = 1
|
|
stats.success = 10
|
|
stats.failure = 100
|
|
stats.hashmatch = 1000
|
|
stats.rsync = 10000
|
|
stats.remove = 100000
|
|
stats.suffix_count = 1000000
|
|
stats.suffix_hash = 10000000
|
|
stats.suffix_sync = 100000000
|
|
stats.failure_nodes = {
|
|
'10.1.1.1': {'d11': 1}}
|
|
elif device == 'sdb':
|
|
stats.attempted = 2
|
|
stats.success = 20
|
|
stats.failure = 200
|
|
stats.hashmatch = 2000
|
|
stats.rsync = 20000
|
|
stats.remove = 200000
|
|
stats.suffix_count = 2000000
|
|
stats.suffix_hash = 20000000
|
|
stats.suffix_sync = 200000000
|
|
stats.failure_nodes = {
|
|
'10.2.2.2': {'d22': 2}}
|
|
elif device == 'sdc':
|
|
stats.attempted = 3
|
|
stats.success = 30
|
|
stats.failure = 300
|
|
stats.hashmatch = 3000
|
|
stats.rsync = 30000
|
|
stats.remove = 300000
|
|
stats.suffix_count = 3000000
|
|
stats.suffix_hash = 30000000
|
|
stats.suffix_sync = 300000000
|
|
stats.failure_nodes = {
|
|
'10.3.3.3': {'d33': 3}}
|
|
elif device == 'sdd':
|
|
stats.attempted = 4
|
|
stats.success = 40
|
|
stats.failure = 400
|
|
stats.hashmatch = 4000
|
|
stats.rsync = 40000
|
|
stats.remove = 400000
|
|
stats.suffix_count = 4000000
|
|
stats.suffix_hash = 40000000
|
|
stats.suffix_sync = 400000000
|
|
stats.failure_nodes = {
|
|
'10.4.4.4': {'d44': 4}}
|
|
elif device == 'sde':
|
|
stats.attempted = 5
|
|
stats.success = 50
|
|
stats.failure = 500
|
|
stats.hashmatch = 5000
|
|
stats.rsync = 50000
|
|
stats.remove = 500000
|
|
stats.suffix_count = 5000000
|
|
stats.suffix_hash = 50000000
|
|
stats.suffix_sync = 500000000
|
|
stats.failure_nodes = {
|
|
'10.5.5.5': {'d55': 5}}
|
|
else:
|
|
raise Exception("mock can't handle %r" % device)
|
|
|
|
def test_no_multiprocessing(self):
|
|
self.replicator.replicator_workers = 0
|
|
self.assertEqual(self.replicator.get_worker_args(), [])
|
|
|
|
def test_device_distribution(self):
|
|
self.replicator.replicator_workers = 2
|
|
self.assertEqual(self.replicator.get_worker_args(), [{
|
|
'override_devices': ['sda', 'sdc', 'sde'],
|
|
'override_partitions': [],
|
|
'override_policies': [],
|
|
'have_overrides': False,
|
|
'multiprocess_worker_index': 0,
|
|
}, {
|
|
'override_devices': ['sdb', 'sdd'],
|
|
'override_partitions': [],
|
|
'override_policies': [],
|
|
'have_overrides': False,
|
|
'multiprocess_worker_index': 1,
|
|
}])
|
|
|
|
def test_override_policies(self):
|
|
self.replicator.replicator_workers = 2
|
|
args = self.replicator.get_worker_args(policies="3,5,7", once=True)
|
|
self.assertEqual(args, [{
|
|
'override_devices': ['sda', 'sdc', 'sde'],
|
|
'override_partitions': [],
|
|
'override_policies': [3, 5, 7],
|
|
'have_overrides': True,
|
|
'multiprocess_worker_index': 0,
|
|
}, {
|
|
'override_devices': ['sdb', 'sdd'],
|
|
'override_partitions': [],
|
|
'override_policies': [3, 5, 7],
|
|
'have_overrides': True,
|
|
'multiprocess_worker_index': 1,
|
|
}])
|
|
|
|
# override policies don't apply in run-forever mode
|
|
args = self.replicator.get_worker_args(policies="3,5,7", once=False)
|
|
self.assertEqual(args, [{
|
|
'override_devices': ['sda', 'sdc', 'sde'],
|
|
'override_partitions': [],
|
|
'override_policies': [],
|
|
'have_overrides': False,
|
|
'multiprocess_worker_index': 0,
|
|
}, {
|
|
'override_devices': ['sdb', 'sdd'],
|
|
'override_partitions': [],
|
|
'override_policies': [],
|
|
'have_overrides': False,
|
|
'multiprocess_worker_index': 1,
|
|
}])
|
|
|
|
def test_more_workers_than_disks(self):
|
|
self.replicator.replicator_workers = 999
|
|
self.assertEqual(self.replicator.get_worker_args(), [{
|
|
'override_devices': ['sda'],
|
|
'override_partitions': [],
|
|
'override_policies': [],
|
|
'have_overrides': False,
|
|
'multiprocess_worker_index': 0,
|
|
}, {
|
|
'override_devices': ['sdb'],
|
|
'override_partitions': [],
|
|
'override_policies': [],
|
|
'have_overrides': False,
|
|
'multiprocess_worker_index': 1,
|
|
}, {
|
|
'override_devices': ['sdc'],
|
|
'override_partitions': [],
|
|
'override_policies': [],
|
|
'have_overrides': False,
|
|
'multiprocess_worker_index': 2,
|
|
}, {
|
|
'override_devices': ['sdd'],
|
|
'override_partitions': [],
|
|
'override_policies': [],
|
|
'have_overrides': False,
|
|
'multiprocess_worker_index': 3,
|
|
}, {
|
|
'override_devices': ['sde'],
|
|
'override_partitions': [],
|
|
'override_policies': [],
|
|
'have_overrides': False,
|
|
'multiprocess_worker_index': 4,
|
|
}])
|
|
|
|
# Remember how many workers we actually have so that the log-line
|
|
# prefixes are reasonable. Otherwise, we'd have five workers, each
|
|
# logging lines starting with things like "[worker X/999 pid=P]"
|
|
# despite there being only five.
|
|
self.assertEqual(self.replicator.replicator_workers, 5)
|
|
|
|
def test_command_line_overrides(self):
|
|
self.replicator.replicator_workers = 2
|
|
|
|
args = self.replicator.get_worker_args(
|
|
devices="sda,sdc,sdd", partitions="12,34,56", once=True)
|
|
self.assertEqual(args, [{
|
|
'override_devices': ['sda', 'sdd'],
|
|
'override_partitions': [12, 34, 56],
|
|
'override_policies': [],
|
|
'have_overrides': True,
|
|
'multiprocess_worker_index': 0,
|
|
}, {
|
|
'override_devices': ['sdc'],
|
|
'override_partitions': [12, 34, 56],
|
|
'override_policies': [],
|
|
'have_overrides': True,
|
|
'multiprocess_worker_index': 1,
|
|
}])
|
|
|
|
args = self.replicator.get_worker_args(
|
|
devices="sda,sdc,sdd", once=True)
|
|
self.assertEqual(args, [{
|
|
'override_devices': ['sda', 'sdd'],
|
|
'override_partitions': [],
|
|
'override_policies': [],
|
|
'have_overrides': True,
|
|
'multiprocess_worker_index': 0,
|
|
}, {
|
|
'override_devices': ['sdc'],
|
|
'override_partitions': [],
|
|
'override_policies': [],
|
|
'have_overrides': True,
|
|
'multiprocess_worker_index': 1,
|
|
}])
|
|
|
|
# no overrides apply in run-forever mode
|
|
args = self.replicator.get_worker_args(
|
|
devices="sda,sdc,sdd", partitions="12,34,56", once=False)
|
|
self.assertEqual(args, [{
|
|
'override_devices': ['sda', 'sdc', 'sde'],
|
|
'override_partitions': [],
|
|
'override_policies': [],
|
|
'have_overrides': False,
|
|
'multiprocess_worker_index': 0,
|
|
}, {
|
|
'override_devices': ['sdb', 'sdd'],
|
|
'override_partitions': [],
|
|
'override_policies': [],
|
|
'have_overrides': False,
|
|
'multiprocess_worker_index': 1,
|
|
}])
|
|
|
|
def test_worker_logging(self):
|
|
self.replicator.replicator_workers = 3
|
|
|
|
def log_some_stuff(*a, **kw):
|
|
self.replicator.logger.debug("debug message")
|
|
self.replicator.logger.info("info message")
|
|
self.replicator.logger.warning("warning message")
|
|
self.replicator.logger.error("error message")
|
|
|
|
with mock.patch.object(self.replicator, 'replicate', log_some_stuff), \
|
|
mock.patch("os.getpid", lambda: 8804):
|
|
self.replicator.get_worker_args()
|
|
self.replicator.run_once(multiprocess_worker_index=0,
|
|
override_devices=['sda', 'sdb'])
|
|
|
|
prefix = "[worker 1/3 pid=8804] "
|
|
for level, lines in self.logger.logger.all_log_lines().items():
|
|
for line in lines:
|
|
self.assertTrue(
|
|
line.startswith(prefix),
|
|
"%r doesn't start with %r (level %s)" % (
|
|
line, prefix, level))
|
|
|
|
def test_recon_run_once(self):
|
|
self.replicator.replicator_workers = 3
|
|
|
|
the_time = [1521680000]
|
|
|
|
def mock_time():
|
|
rv = the_time[0]
|
|
the_time[0] += 120
|
|
return rv
|
|
|
|
# Simulate a couple child processes
|
|
with mock.patch.object(self.replicator, 'replicate',
|
|
self.fake_replicate), \
|
|
mock.patch('time.time', mock_time):
|
|
self.replicator.get_worker_args()
|
|
self.replicator.run_once(multiprocess_worker_index=0,
|
|
override_devices=['sda', 'sdb'])
|
|
self.replicator.run_once(multiprocess_worker_index=1,
|
|
override_devices=['sdc'])
|
|
self.replicator.run_once(multiprocess_worker_index=2,
|
|
override_devices=['sdd', 'sde'])
|
|
|
|
with open(self.recon_file) as fh:
|
|
recon_data = json.load(fh)
|
|
self.assertIn('object_replication_per_disk', recon_data)
|
|
self.assertIn('sda', recon_data['object_replication_per_disk'])
|
|
self.assertIn('sdb', recon_data['object_replication_per_disk'])
|
|
self.assertIn('sdc', recon_data['object_replication_per_disk'])
|
|
self.assertIn('sdd', recon_data['object_replication_per_disk'])
|
|
self.assertIn('sde', recon_data['object_replication_per_disk'])
|
|
sda = recon_data['object_replication_per_disk']['sda']
|
|
|
|
# Spot-check a couple of fields
|
|
self.assertEqual(sda['replication_stats']['attempted'], 1)
|
|
self.assertEqual(sda['replication_stats']['success'], 10)
|
|
self.assertEqual(sda['object_replication_time'], 2) # minutes
|
|
self.assertEqual(sda['object_replication_last'], 1521680120)
|
|
|
|
# Aggregate the workers' recon updates
|
|
self.replicator.post_multiprocess_run()
|
|
with open(self.recon_file) as fh:
|
|
recon_data = json.load(fh)
|
|
self.assertEqual(recon_data['replication_stats']['attempted'], 15)
|
|
self.assertEqual(recon_data['replication_stats']['failure'], 1500)
|
|
self.assertEqual(recon_data['replication_stats']['hashmatch'], 15000)
|
|
self.assertEqual(recon_data['replication_stats']['remove'], 1500000)
|
|
self.assertEqual(recon_data['replication_stats']['rsync'], 150000)
|
|
self.assertEqual(recon_data['replication_stats']['success'], 150)
|
|
self.assertEqual(recon_data['replication_stats']['suffix_count'],
|
|
15000000)
|
|
self.assertEqual(recon_data['replication_stats']['suffix_hash'],
|
|
150000000)
|
|
self.assertEqual(recon_data['replication_stats']['suffix_sync'],
|
|
1500000000)
|
|
self.assertEqual(recon_data['replication_stats']['failure_nodes'], {
|
|
'10.1.1.1': {'d11': 1},
|
|
'10.2.2.2': {'d22': 2},
|
|
'10.3.3.3': {'d33': 3},
|
|
'10.4.4.4': {'d44': 4},
|
|
'10.5.5.5': {'d55': 5},
|
|
})
|
|
self.assertEqual(recon_data['object_replication_time'], 2) # minutes
|
|
self.assertEqual(recon_data['object_replication_last'], 1521680120)
|
|
|
|
def test_recon_skipped_with_overrides(self):
|
|
self.replicator.replicator_workers = 3
|
|
|
|
the_time = [1521680000]
|
|
|
|
def mock_time():
|
|
rv = the_time[0]
|
|
the_time[0] += 120
|
|
return rv
|
|
|
|
with mock.patch.object(self.replicator, 'replicate',
|
|
self.fake_replicate), \
|
|
mock.patch('time.time', mock_time):
|
|
self.replicator.get_worker_args()
|
|
self.replicator.run_once(multiprocess_worker_index=0,
|
|
have_overrides=True,
|
|
override_devices=['sda', 'sdb'])
|
|
self.assertFalse(os.path.exists(self.recon_file))
|
|
|
|
# have_overrides=False makes us get recon stats
|
|
with mock.patch.object(self.replicator, 'replicate',
|
|
self.fake_replicate), \
|
|
mock.patch('time.time', mock_time):
|
|
self.replicator.get_worker_args()
|
|
self.replicator.run_once(multiprocess_worker_index=0,
|
|
have_overrides=False,
|
|
override_devices=['sda', 'sdb'])
|
|
with open(self.recon_file) as fh:
|
|
recon_data = json.load(fh)
|
|
self.assertIn('sda', recon_data['object_replication_per_disk'])
|
|
|
|
def test_recon_run_forever(self):
|
|
the_time = [1521521521.52152]
|
|
|
|
def mock_time():
|
|
rv = the_time[0]
|
|
the_time[0] += 120
|
|
return rv
|
|
|
|
self.replicator.replicator_workers = 2
|
|
self.replicator._next_rcache_update = the_time[0]
|
|
|
|
# One worker has finished a pass, the other hasn't.
|
|
with mock.patch.object(self.replicator, 'replicate',
|
|
self.fake_replicate), \
|
|
mock.patch('time.time', mock_time):
|
|
self.replicator.get_worker_args()
|
|
# Yes, this says run_once, but this is only to populate
|
|
# object.recon with some stats. The real test is for the
|
|
# aggregation.
|
|
self.replicator.run_once(multiprocess_worker_index=0,
|
|
override_devices=['sda', 'sdb', 'sdc'])
|
|
|
|
# This will not produce aggregate stats since not every device has
|
|
# finished a pass.
|
|
the_time[0] += self.replicator.stats_interval
|
|
with mock.patch('time.time', mock_time):
|
|
rv = self.replicator.is_healthy()
|
|
self.assertTrue(rv)
|
|
with open(self.recon_file) as fh:
|
|
recon_data = json.load(fh)
|
|
self.assertNotIn('replication_stats', recon_data)
|
|
|
|
# Now all the local devices have completed a replication pass, so we
|
|
# will produce aggregate stats.
|
|
with mock.patch.object(self.replicator, 'replicate',
|
|
self.fake_replicate), \
|
|
mock.patch('time.time', mock_time):
|
|
self.replicator.get_worker_args()
|
|
self.replicator.run_once(multiprocess_worker_index=1,
|
|
override_devices=['sdd', 'sde'])
|
|
the_time[0] += self.replicator.stats_interval
|
|
with mock.patch('time.time', mock_time):
|
|
rv = self.replicator.is_healthy()
|
|
self.assertTrue(rv)
|
|
with open(self.recon_file) as fh:
|
|
recon_data = json.load(fh)
|
|
self.assertIn('replication_stats', recon_data)
|
|
|
|
# no need to exhaustively check every sum
|
|
self.assertEqual(recon_data['replication_stats']['attempted'], 15)
|
|
self.assertEqual(recon_data['replication_stats']['success'], 150)
|
|
|
|
self.assertEqual(
|
|
recon_data['replication_last'],
|
|
min(pd['replication_last']
|
|
for pd in recon_data['object_replication_per_disk'].values()))
|
|
|
|
|
|
class TestReplicatorStats(unittest.TestCase):
|
|
def test_to_recon(self):
|
|
st = object_replicator.Stats(
|
|
attempted=1, failure=2, hashmatch=3, remove=4,
|
|
rsync=5, success=7,
|
|
suffix_count=8, suffix_hash=9, suffix_sync=10,
|
|
failure_nodes={'10.1.2.3': {'sda': 100, 'sdb': 200}})
|
|
# This is what appears in the recon dump
|
|
self.assertEqual(st.to_recon(), {
|
|
'attempted': 1,
|
|
'failure': 2,
|
|
'hashmatch': 3,
|
|
'remove': 4,
|
|
'rsync': 5,
|
|
'success': 7,
|
|
'suffix_count': 8,
|
|
'suffix_hash': 9,
|
|
'suffix_sync': 10,
|
|
'failure_nodes': {'10.1.2.3': {'sda': 100, 'sdb': 200}},
|
|
})
|
|
|
|
def test_recon_roundtrip(self):
|
|
before = object_replicator.Stats(
|
|
attempted=1, failure=2, hashmatch=3, remove=4,
|
|
rsync=5, success=7,
|
|
suffix_count=8, suffix_hash=9, suffix_sync=10,
|
|
failure_nodes={'10.1.2.3': {'sda': 100, 'sdb': 200}})
|
|
after = object_replicator.Stats.from_recon(before.to_recon())
|
|
self.assertEqual(after.attempted, before.attempted)
|
|
self.assertEqual(after.failure, before.failure)
|
|
self.assertEqual(after.hashmatch, before.hashmatch)
|
|
self.assertEqual(after.remove, before.remove)
|
|
self.assertEqual(after.rsync, before.rsync)
|
|
self.assertEqual(after.success, before.success)
|
|
self.assertEqual(after.suffix_count, before.suffix_count)
|
|
self.assertEqual(after.suffix_hash, before.suffix_hash)
|
|
self.assertEqual(after.suffix_sync, before.suffix_sync)
|
|
self.assertEqual(after.failure_nodes, before.failure_nodes)
|
|
|
|
def test_from_recon_skips_extra_fields(self):
|
|
# If another attribute ever sneaks its way in, we should ignore it.
|
|
# This will make aborted upgrades a little less painful for
|
|
# operators.
|
|
recon_dict = {'attempted': 1, 'failure': 2, 'hashmatch': 3,
|
|
'spices': 5, 'treasures': 8}
|
|
stats = object_replicator.Stats.from_recon(recon_dict)
|
|
self.assertEqual(stats.attempted, 1)
|
|
self.assertEqual(stats.failure, 2)
|
|
self.assertEqual(stats.hashmatch, 3)
|
|
# We don't gain attributes just because they're in object.recon.
|
|
self.assertFalse(hasattr(stats, 'spices'))
|
|
self.assertFalse(hasattr(stats, 'treasures'))
|
|
|
|
def test_add_failure_stats(self):
|
|
st = object_replicator.Stats()
|
|
st.add_failure_stats([('10.1.1.1', 'd10'), ('10.1.1.1', 'd11')])
|
|
st.add_failure_stats([('10.1.1.1', 'd10')])
|
|
st.add_failure_stats([('10.1.1.1', 'd12'), ('10.2.2.2', 'd20'),
|
|
('10.2.2.2', 'd21'), ('10.2.2.2', 'd21'),
|
|
('10.2.2.2', 'd21')])
|
|
self.assertEqual(st.failure, 8)
|
|
|
|
as_dict = st.to_recon()
|
|
self.assertEqual(as_dict['failure_nodes'], {
|
|
'10.1.1.1': {
|
|
'd10': 2,
|
|
'd11': 1,
|
|
'd12': 1,
|
|
},
|
|
'10.2.2.2': {
|
|
'd20': 1,
|
|
'd21': 3,
|
|
},
|
|
})
|
|
|
|
def test_add(self):
|
|
st1 = object_replicator.Stats(
|
|
attempted=1, failure=2, hashmatch=3, remove=4, rsync=5,
|
|
success=6, suffix_count=7, suffix_hash=8, suffix_sync=9,
|
|
failure_nodes={
|
|
'10.1.1.1': {'sda': 10, 'sdb': 20},
|
|
'10.1.1.2': {'sda': 10, 'sdb': 20}})
|
|
st2 = object_replicator.Stats(
|
|
attempted=2, failure=4, hashmatch=6, remove=8, rsync=10,
|
|
success=12, suffix_count=14, suffix_hash=16, suffix_sync=18,
|
|
failure_nodes={
|
|
'10.1.1.2': {'sda': 10, 'sdb': 20},
|
|
'10.1.1.3': {'sda': 10, 'sdb': 20}})
|
|
total = st1 + st2
|
|
self.assertEqual(total.attempted, 3)
|
|
self.assertEqual(total.failure, 6)
|
|
self.assertEqual(total.hashmatch, 9)
|
|
self.assertEqual(total.remove, 12)
|
|
self.assertEqual(total.rsync, 15)
|
|
self.assertEqual(total.success, 18)
|
|
self.assertEqual(total.suffix_count, 21)
|
|
self.assertEqual(total.suffix_hash, 24)
|
|
self.assertEqual(total.suffix_sync, 27)
|
|
self.assertEqual(total.failure_nodes, {
|
|
'10.1.1.1': {'sda': 10, 'sdb': 20},
|
|
'10.1.1.2': {'sda': 20, 'sdb': 40},
|
|
'10.1.1.3': {'sda': 10, 'sdb': 20},
|
|
})
|
|
|
|
|
|
if __name__ == '__main__':
|
|
unittest.main()
|