From 16de32f1686a60fa5b3aa1802dcc42783846b05b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Peter=20Lis=C3=A1k?= Date: Fri, 6 Nov 2015 10:49:09 +0100 Subject: [PATCH] Log error if a local device not identified in replicator Example: * Different port in config and in ring file. * Running daemon on server not in ring file. In both cases replication daemon is running but nothing is replicated. Error log helps to distinguish a local device can't be identified. Closes-Bug: 1508228 Change-Id: I99351b7d9946f250b7750df91c13d09352a145ce --- swift/common/db_replicator.py | 6 +++++ swift/obj/replicator.py | 6 +++++ test/unit/common/test_db_replicator.py | 31 +++++++++++++++++++++++--- test/unit/obj/test_replicator.py | 31 +++++++++++++++++++++++--- 4 files changed, 68 insertions(+), 6 deletions(-) diff --git a/swift/common/db_replicator.py b/swift/common/db_replicator.py index 589faa4e69..616e742ca6 100644 --- a/swift/common/db_replicator.py +++ b/swift/common/db_replicator.py @@ -620,10 +620,12 @@ class Replicator(Daemon): self.logger.error(_('ERROR Failed to get my own IPs?')) return self._local_device_ids = set() + found_local = False for node in self.ring.devs: if node and is_local_device(ips, self.port, node['replication_ip'], node['replication_port']): + found_local = True if self.mount_check and not ismount( os.path.join(self.root, node['device'])): self._add_failure_stats( @@ -640,6 +642,10 @@ class Replicator(Daemon): if os.path.isdir(datadir): self._local_device_ids.add(node['id']) dirs.append((datadir, node['id'])) + if not found_local: + self.logger.error("Can't find itself %s with port %s in ring " + "file, not replicating", + ", ".join(ips), self.port) self.logger.info(_('Beginning replication run')) for part, object_file, node_id in roundrobin_datadirs(dirs): self.cpool.spawn_n( diff --git a/swift/obj/replicator.py b/swift/obj/replicator.py index 9689edc858..aa9686133d 100644 --- a/swift/obj/replicator.py +++ b/swift/obj/replicator.py @@ -566,6 +566,7 @@ class ObjectReplicator(Daemon): [(dev['replication_ip'], dev['device']) for dev in policy.object_ring.devs if dev]) data_dir = get_data_dir(policy) + found_local = False for local_dev in [dev for dev in policy.object_ring.devs if (dev and is_local_device(ips, @@ -574,6 +575,7 @@ class ObjectReplicator(Daemon): dev['replication_port']) and (override_devices is None or dev['device'] in override_devices))]: + found_local = True dev_path = join(self.devices_dir, local_dev['device']) obj_path = join(dev_path, data_dir) tmp_path = join(dev_path, get_tmp_dir(policy)) @@ -626,6 +628,10 @@ class ObjectReplicator(Daemon): for failure_dev in policy.object_ring.devs if failure_dev]) continue + if not found_local: + self.logger.error("Can't find itself %s with port %s in ring " + "file, not replicating", + ", ".join(ips), self.port) return jobs def collect_jobs(self, override_devices=None, override_partitions=None, diff --git a/test/unit/common/test_db_replicator.py b/test/unit/common/test_db_replicator.py index 422c0c56b8..5c5912e19b 100644 --- a/test/unit/common/test_db_replicator.py +++ b/test/unit/common/test_db_replicator.py @@ -266,10 +266,15 @@ class TestDBReplicator(unittest.TestCase): db_replicator.ring = FakeRing() self.delete_db_calls = [] self._patchers = [] + # recon cache path + self.recon_cache = mkdtemp() + rmtree(self.recon_cache, ignore_errors=1) + os.mkdir(self.recon_cache) def tearDown(self): for patcher in self._patchers: patcher.stop() + rmtree(self.recon_cache, ignore_errors=1) def _patch(self, patching_fn, *args, **kwargs): patcher = patching_fn(*args, **kwargs) @@ -463,9 +468,29 @@ class TestDBReplicator(unittest.TestCase): {'id': 'a', 'point': -1, 'max_row': 10, 'hash': 'd'}, FakeBroker(), -1)), False) - def test_run_once(self): - replicator = TestReplicator({}) - replicator.run_once() + def test_run_once_no_local_device_in_ring(self): + logger = unit.debug_logger('test-replicator') + replicator = TestReplicator({'recon_cache_path': self.recon_cache}, + logger=logger) + with patch('swift.common.db_replicator.whataremyips', + return_value=['127.0.0.1']): + replicator.run_once() + expected = [ + "Can't find itself 127.0.0.1 with port 1000 " + "in ring file, not replicating", + ] + self.assertEqual(expected, logger.get_lines_for_level('error')) + + def test_run_once_with_local_device_in_ring(self): + logger = unit.debug_logger('test-replicator') + base = 'swift.common.db_replicator.' + with patch(base + 'whataremyips', return_value=['1.1.1.1']), \ + patch(base + 'ring', FakeRingWithNodes()): + replicator = TestReplicator({'bind_port': 6000, + 'recon_cache_path': self.recon_cache}, + logger=logger) + replicator.run_once() + self.assertFalse(logger.get_lines_for_level('error')) def test_run_once_no_ips(self): replicator = TestReplicator({}, logger=unit.FakeLogger()) diff --git a/test/unit/obj/test_replicator.py b/test/unit/obj/test_replicator.py index fa08bb2684..1821c04506 100644 --- a/test/unit/obj/test_replicator.py +++ b/test/unit/obj/test_replicator.py @@ -179,6 +179,10 @@ class TestObjectReplicator(unittest.TestCase): def setUp(self): utils.HASH_PATH_SUFFIX = 'endcap' utils.HASH_PATH_PREFIX = '' + # recon cache path + self.recon_cache = tempfile.mkdtemp() + rmtree(self.recon_cache, ignore_errors=1) + os.mkdir(self.recon_cache) # Setup a test ring (stolen from common/test_ring.py) self.testdir = tempfile.mkdtemp() self.devices = os.path.join(self.testdir, 'node') @@ -200,6 +204,7 @@ class TestObjectReplicator(unittest.TestCase): def tearDown(self): self.assertFalse(process_errors) rmtree(self.testdir, ignore_errors=1) + rmtree(self.recon_cache, ignore_errors=1) def test_handoff_replication_setting_warnings(self): conf_tests = [ @@ -254,11 +259,27 @@ class TestObjectReplicator(unittest.TestCase): self.replicator.all_devs_info = set() self.df_mgr = diskfile.DiskFileManager(self.conf, self.logger) + def test_run_once_no_local_device_in_ring(self): + conf = dict(swift_dir=self.testdir, devices=self.devices, + bind_ip='1.1.1.1', recon_cache_path=self.recon_cache, + mount_check='false', timeout='300', stats_interval='1') + replicator = object_replicator.ObjectReplicator(conf, + logger=self.logger) + replicator.run_once() + expected = [ + "Can't find itself 1.1.1.1 with port 6000 " + "in ring file, not replicating", + "Can't find itself 1.1.1.1 with port 6000 " + "in ring file, not replicating", + ] + self.assertEqual(expected, self.logger.get_lines_for_level('error')) + def test_run_once(self): conf = dict(swift_dir=self.testdir, devices=self.devices, - bind_ip=_ips()[0], + bind_ip=_ips()[0], recon_cache_path=self.recon_cache, mount_check='false', timeout='300', stats_interval='1') - replicator = object_replicator.ObjectReplicator(conf) + replicator = object_replicator.ObjectReplicator(conf, + logger=self.logger) was_connector = object_replicator.http_connect object_replicator.http_connect = mock_http_connect(200) cur_part = '0' @@ -286,13 +307,16 @@ class TestObjectReplicator(unittest.TestCase): with _mock_process(process_arg_checker): replicator.run_once() self.assertFalse(process_errors) + self.assertFalse(self.logger.get_lines_for_level('error')) object_replicator.http_connect = was_connector # policy 1 def test_run_once_1(self): conf = dict(swift_dir=self.testdir, devices=self.devices, + recon_cache_path=self.recon_cache, mount_check='false', timeout='300', stats_interval='1') - replicator = object_replicator.ObjectReplicator(conf) + replicator = object_replicator.ObjectReplicator(conf, + logger=self.logger) was_connector = object_replicator.http_connect object_replicator.http_connect = mock_http_connect(200) cur_part = '0' @@ -322,6 +346,7 @@ class TestObjectReplicator(unittest.TestCase): side_effect=_ips): replicator.run_once() self.assertFalse(process_errors) + self.assertFalse(self.logger.get_lines_for_level('error')) object_replicator.http_connect = was_connector def test_check_ring(self):