From 9729bc83eb6845e65f8244b4104b2c1678f8fe38 Mon Sep 17 00:00:00 2001 From: Christian Schwede Date: Mon, 11 Apr 2016 11:56:07 +0200 Subject: [PATCH] Don't delete misplaced dbs if not replicated If one uses only a single replica and a database file is placed on a wrong partition, it will be removed instead of replicated to the correct partition. There are two reasons for this: 1. The list of nodes is empty when there is only a single replica 2. all(responses) is True even if there is no response at all, and the latter is always True if there is no node to replicate to. This patch fixes this by adding a special case if used with only one replica to the node selection loop and ensures that the list of responses is not empty. Also adds a test that fails on current master and passes with this change. Closes-Bug: 1568591 Change-Id: I028ea8c1928e8c9a401db31fb266ff82606f8371 --- swift/common/db_replicator.py | 13 +++++++----- test/unit/common/test_db_replicator.py | 29 +++++++++++++++++++++++++- 2 files changed, 36 insertions(+), 6 deletions(-) diff --git a/swift/common/db_replicator.py b/swift/common/db_replicator.py index d4abf25efe..7115a8afe4 100644 --- a/swift/common/db_replicator.py +++ b/swift/common/db_replicator.py @@ -525,10 +525,13 @@ class Replicator(Daemon): if shouldbehere: shouldbehere = bool([n for n in nodes if n['id'] == node_id]) # See Footnote [1] for an explanation of the repl_nodes assignment. - i = 0 - while i < len(nodes) and nodes[i]['id'] != node_id: - i += 1 - repl_nodes = nodes[i + 1:] + nodes[:i] + if len(nodes) > 1: + i = 0 + while i < len(nodes) and nodes[i]['id'] != node_id: + i += 1 + repl_nodes = nodes[i + 1:] + nodes[:i] + else: # Special case if using only a single replica + repl_nodes = nodes more_nodes = self.ring.get_more_nodes(int(partition)) if not local_dev: # Check further if local device is a handoff node @@ -563,7 +566,7 @@ class Replicator(Daemon): except (Exception, Timeout): self.logger.exception('UNHANDLED EXCEPTION: in post replicate ' 'hook for %s', broker.db_file) - if not shouldbehere and all(responses): + if not shouldbehere and responses and all(responses): # If the db shouldn't be on this node and has been successfully # synced to all of its peers, it can be removed. if not self.delete_db(broker): diff --git a/test/unit/common/test_db_replicator.py b/test/unit/common/test_db_replicator.py index be17d42bf8..29d66df99d 100644 --- a/test/unit/common/test_db_replicator.py +++ b/test/unit/common/test_db_replicator.py @@ -73,7 +73,7 @@ class FakeRingWithSingleNode(object): class Ring(object): devs = [dict( id=1, weight=10.0, zone=1, ip='1.1.1.1', port=6200, device='sdb', - meta='', replication_ip='1.1.1.1', replication_port=6200 + meta='', replication_ip='1.1.1.1', replication_port=6200, region=1 )] def __init__(self, path, reload_time=15, ring_name=None): @@ -633,6 +633,9 @@ class TestDBReplicator(unittest.TestCase): def test_replicate_object_delete_because_not_shouldbehere(self): replicator = TestReplicator({}) + replicator.ring = FakeRingWithNodes().Ring('path') + replicator.brokerclass = FakeAccountBroker + replicator._repl_to_node = lambda *args: True replicator.delete_db = self.stub_delete_db replicator._replicate_object('0', '/path/to/file', 'node_id') self.assertEqual(['/path/to/file'], self.delete_db_calls) @@ -669,6 +672,30 @@ class TestDBReplicator(unittest.TestCase): [(('Found /path/to/file for /a%20c%20t/c%20o%20n when it should ' 'be on partition 0; will replicate out and remove.',), {})]) + def test_replicate_container_out_of_place_no_node(self): + replicator = TestReplicator({}, logger=unit.FakeLogger()) + replicator.ring = FakeRingWithSingleNode().Ring('path') + replicator._repl_to_node = lambda *args: True + + replicator.delete_db = self.stub_delete_db + # Correct node_id, wrong part + part = replicator.ring.get_part( + TEST_ACCOUNT_NAME, TEST_CONTAINER_NAME) + 1 + node_id = replicator.ring.get_part_nodes(part)[0]['id'] + replicator._replicate_object(str(part), '/path/to/file', node_id) + self.assertEqual(['/path/to/file'], self.delete_db_calls) + + self.delete_db_calls = [] + + # No nodes this time + replicator.ring.get_part_nodes = lambda *args: [] + replicator.delete_db = self.stub_delete_db + # Correct node_id, wrong part + part = replicator.ring.get_part( + TEST_ACCOUNT_NAME, TEST_CONTAINER_NAME) + 1 + replicator._replicate_object(str(part), '/path/to/file', node_id) + self.assertEqual([], self.delete_db_calls) + def test_replicate_object_different_region(self): db_replicator.ring = FakeRingWithNodes() replicator = TestReplicator({})