From 29544a9e175b1ec9e0dbfd5288edc00a1402d5ca Mon Sep 17 00:00:00 2001 From: Samuel Merritt Date: Wed, 27 Apr 2016 16:59:00 -0500 Subject: [PATCH] Use smaller quorum size in proxy for even numbers of replicas Requiring 2/2 backends for PUT requests means that the cluster can't tolerate a single failure. Likewise, if you have 4 replicas in 2 regions, requiring 3/4 on a POST request means you cannot POST with your inter-region link down or congested. This changes the (replication) quorum size in the proxy to be at least half the nodes instead of a majority of the nodes. Daemons that were looking for a majority remain unchanged. The container reconciler, replicator, and updater still require majorities so their functioning is unchanged. Odd numbers of replicas are unaffected by this commit. Change-Id: I3b07ff0222aba6293ad7d60afe1747acafbe6ce4 --- swift/common/utils.py | 6 +++- swift/container/reconciler.py | 10 +++---- swift/container/replicator.py | 6 ++-- swift/container/updater.py | 4 +-- test/unit/common/test_storage_policy.py | 4 +-- test/unit/common/test_utils.py | 14 +++++++-- test/unit/proxy/controllers/test_account.py | 30 +++++++++---------- test/unit/proxy/controllers/test_base.py | 9 ++++-- test/unit/proxy/controllers/test_container.py | 30 +++++++++---------- 9 files changed, 65 insertions(+), 48 deletions(-) diff --git a/swift/common/utils.py b/swift/common/utils.py index fb4baa4396..ce6f9d56da 100644 --- a/swift/common/utils.py +++ b/swift/common/utils.py @@ -2937,6 +2937,10 @@ def public(func): return func +def majority_size(n): + return (n // 2) + 1 + + def quorum_size(n): """ quorum size as it applies to services that use 'replication' for data @@ -2946,7 +2950,7 @@ def quorum_size(n): Number of successful backend requests needed for the proxy to consider the client request successful. """ - return (n // 2) + 1 + return (n + 1) // 2 def rsync_ip(ip): diff --git a/swift/container/reconciler.py b/swift/container/reconciler.py index efacfd2248..f167bed79d 100644 --- a/swift/container/reconciler.py +++ b/swift/container/reconciler.py @@ -25,7 +25,7 @@ from swift.common.direct_client import ( direct_head_container, direct_delete_container_object, direct_put_container_object, ClientException) from swift.common.internal_client import InternalClient, UnexpectedResponse -from swift.common.utils import get_logger, split_path, quorum_size, \ +from swift.common.utils import get_logger, split_path, majority_size, \ FileLikeIter, Timestamp, last_modified_date_to_timestamp, \ LRUCache, decode_timestamps @@ -194,7 +194,7 @@ def add_to_reconciler_queue(container_ring, account, container, obj, server :returns: .misplaced_object container name, False on failure. "Success" - means a quorum of containers got the update. + means a majority of containers got the update. """ container_name = get_reconciler_container_name(obj_timestamp) object_name = get_reconciler_obj_name(obj_policy_index, account, @@ -232,7 +232,7 @@ def add_to_reconciler_queue(container_ring, account, container, obj, response_timeout=response_timeout) successes = sum(pile) - if successes >= quorum_size(len(nodes)): + if successes >= majority_size(len(nodes)): return container_name else: return False @@ -289,7 +289,7 @@ def direct_get_container_policy_index(container_ring, account_name, :param container_ring: ring in which to look up the container locations :param account_name: name of the container's account :param container_name: name of the container - :returns: storage policy index, or None if it couldn't get a quorum + :returns: storage policy index, or None if it couldn't get a majority """ def _eat_client_exception(*args): try: @@ -307,7 +307,7 @@ def direct_get_container_policy_index(container_ring, account_name, container_name) headers = [x for x in pile if x is not None] - if len(headers) < quorum_size(len(nodes)): + if len(headers) < majority_size(len(nodes)): return return best_policy_index(headers) diff --git a/swift/container/replicator.py b/swift/container/replicator.py index b428086bdd..f7af1efe76 100644 --- a/swift/container/replicator.py +++ b/swift/container/replicator.py @@ -31,7 +31,7 @@ from swift.common.exceptions import DeviceUnavailable from swift.common.http import is_success from swift.common.db import DatabaseAlreadyExists from swift.common.utils import (Timestamp, hash_path, - storage_directory, quorum_size) + storage_directory, majority_size) class ContainerReplicator(db_replicator.Replicator): @@ -202,9 +202,9 @@ class ContainerReplicator(db_replicator.Replicator): broker.update_reconciler_sync(info['max_row']) return max_sync = self.dump_to_reconciler(broker, point) - success = responses.count(True) >= quorum_size(len(responses)) + success = responses.count(True) >= majority_size(len(responses)) if max_sync > point and success: - # to be safe, only slide up the sync point with a quorum on + # to be safe, only slide up the sync point with a majority on # replication broker.update_reconciler_sync(max_sync) diff --git a/swift/container/updater.py b/swift/container/updater.py index f2cd8f3328..fb6d741e60 100644 --- a/swift/container/updater.py +++ b/swift/container/updater.py @@ -31,7 +31,7 @@ from swift.common.bufferedhttp import http_connect from swift.common.exceptions import ConnectionTimeout from swift.common.ring import Ring from swift.common.utils import get_logger, config_true_value, ismount, \ - dump_recon_cache, quorum_size, Timestamp + dump_recon_cache, majority_size, Timestamp from swift.common.daemon import Daemon from swift.common.http import is_success, HTTP_INTERNAL_SERVER_ERROR @@ -238,7 +238,7 @@ class ContainerUpdater(Daemon): for event in events: if is_success(event.wait()): successes += 1 - if successes >= quorum_size(len(events)): + if successes >= majority_size(len(events)): self.logger.increment('successes') self.successes += 1 self.logger.debug( diff --git a/test/unit/common/test_storage_policy.py b/test/unit/common/test_storage_policy.py index 12a743f9ba..3fd721b732 100755 --- a/test/unit/common/test_storage_policy.py +++ b/test/unit/common/test_storage_policy.py @@ -1119,9 +1119,9 @@ class TestStoragePolicies(unittest.TestCase): def test_quorum_size_replication(self): expected_sizes = {1: 1, - 2: 2, + 2: 1, 3: 2, - 4: 3, + 4: 2, 5: 3} for n, expected in expected_sizes.items(): policy = StoragePolicy(0, 'zero', diff --git a/test/unit/common/test_utils.py b/test/unit/common/test_utils.py index 626043de3d..d237970941 100644 --- a/test/unit/common/test_utils.py +++ b/test/unit/common/test_utils.py @@ -2504,13 +2504,23 @@ cluster_dfw1 = http://dfw1.host/v1/ self.assertFalse(utils.streq_const_time('a', 'aaaaa')) self.assertFalse(utils.streq_const_time('ABC123', 'abc123')) - def test_replication_quorum_size(self): + def test_quorum_size(self): + expected_sizes = {1: 1, + 2: 1, + 3: 2, + 4: 2, + 5: 3} + got_sizes = dict([(n, utils.quorum_size(n)) + for n in expected_sizes]) + self.assertEqual(expected_sizes, got_sizes) + + def test_majority_size(self): expected_sizes = {1: 1, 2: 2, 3: 2, 4: 3, 5: 3} - got_sizes = dict([(n, utils.quorum_size(n)) + got_sizes = dict([(n, utils.majority_size(n)) for n in expected_sizes]) self.assertEqual(expected_sizes, got_sizes) diff --git a/test/unit/proxy/controllers/test_account.py b/test/unit/proxy/controllers/test_account.py index c7ef854d1b..d3dd9cf504 100644 --- a/test/unit/proxy/controllers/test_account.py +++ b/test/unit/proxy/controllers/test_account.py @@ -320,16 +320,16 @@ class TestAccountController4Replicas(TestAccountController): ((201, 201, 201, 201), 201), ((201, 201, 201, 404), 201), ((201, 201, 201, 503), 201), - ((201, 201, 404, 404), 503), - ((201, 201, 404, 503), 503), - ((201, 201, 503, 503), 503), + ((201, 201, 404, 404), 201), + ((201, 201, 404, 503), 201), + ((201, 201, 503, 503), 201), ((201, 404, 404, 404), 404), - ((201, 404, 404, 503), 503), + ((201, 404, 404, 503), 404), ((201, 404, 503, 503), 503), ((201, 503, 503, 503), 503), ((404, 404, 404, 404), 404), ((404, 404, 404, 503), 404), - ((404, 404, 503, 503), 503), + ((404, 404, 503, 503), 404), ((404, 503, 503, 503), 503), ((503, 503, 503, 503), 503) ] @@ -340,16 +340,16 @@ class TestAccountController4Replicas(TestAccountController): ((204, 204, 204, 204), 204), ((204, 204, 204, 404), 204), ((204, 204, 204, 503), 204), - ((204, 204, 404, 404), 503), - ((204, 204, 404, 503), 503), - ((204, 204, 503, 503), 503), + ((204, 204, 404, 404), 204), + ((204, 204, 404, 503), 204), + ((204, 204, 503, 503), 204), ((204, 404, 404, 404), 404), - ((204, 404, 404, 503), 503), + ((204, 404, 404, 503), 404), ((204, 404, 503, 503), 503), ((204, 503, 503, 503), 503), ((404, 404, 404, 404), 404), ((404, 404, 404, 503), 404), - ((404, 404, 503, 503), 503), + ((404, 404, 503, 503), 404), ((404, 503, 503, 503), 503), ((503, 503, 503, 503), 503) ] @@ -360,16 +360,16 @@ class TestAccountController4Replicas(TestAccountController): ((204, 204, 204, 204), 204), ((204, 204, 204, 404), 204), ((204, 204, 204, 503), 204), - ((204, 204, 404, 404), 503), - ((204, 204, 404, 503), 503), - ((204, 204, 503, 503), 503), + ((204, 204, 404, 404), 204), + ((204, 204, 404, 503), 204), + ((204, 204, 503, 503), 204), ((204, 404, 404, 404), 404), - ((204, 404, 404, 503), 503), + ((204, 404, 404, 503), 404), ((204, 404, 503, 503), 503), ((204, 503, 503, 503), 503), ((404, 404, 404, 404), 404), ((404, 404, 404, 503), 404), - ((404, 404, 503, 503), 503), + ((404, 404, 503, 503), 404), ((404, 503, 503, 503), 503), ((503, 503, 503, 503), 503) ] diff --git a/test/unit/proxy/controllers/test_base.py b/test/unit/proxy/controllers/test_base.py index 330250e2c9..0715701815 100644 --- a/test/unit/proxy/controllers/test_base.py +++ b/test/unit/proxy/controllers/test_base.py @@ -662,12 +662,15 @@ class TestFuncs(unittest.TestCase): base = Controller(self.app) # just throw a bunch of test cases at it self.assertEqual(base.have_quorum([201, 404], 3), False) - self.assertEqual(base.have_quorum([201, 201], 4), False) - self.assertEqual(base.have_quorum([201, 201, 404, 404], 4), False) - self.assertEqual(base.have_quorum([201, 503, 503, 201], 4), False) + self.assertEqual(base.have_quorum([201, 201], 4), True) + self.assertEqual(base.have_quorum([201], 4), False) + self.assertEqual(base.have_quorum([201, 201, 404, 404], 4), True) + self.assertEqual(base.have_quorum([201, 302, 418, 503], 4), False) + self.assertEqual(base.have_quorum([201, 503, 503, 201], 4), True) self.assertEqual(base.have_quorum([201, 201], 3), True) self.assertEqual(base.have_quorum([404, 404], 3), True) self.assertEqual(base.have_quorum([201, 201], 2), True) + self.assertEqual(base.have_quorum([201, 404], 2), True) self.assertEqual(base.have_quorum([404, 404], 2), True) self.assertEqual(base.have_quorum([201, 404, 201, 201], 4), True) diff --git a/test/unit/proxy/controllers/test_container.py b/test/unit/proxy/controllers/test_container.py index 44e5fb5142..a95e058452 100644 --- a/test/unit/proxy/controllers/test_container.py +++ b/test/unit/proxy/controllers/test_container.py @@ -278,16 +278,16 @@ class TestContainerController4Replicas(TestContainerController): ((201, 201, 201, 201), 201), ((201, 201, 201, 404), 201), ((201, 201, 201, 503), 201), - ((201, 201, 404, 404), 503), - ((201, 201, 404, 503), 503), - ((201, 201, 503, 503), 503), + ((201, 201, 404, 404), 201), + ((201, 201, 404, 503), 201), + ((201, 201, 503, 503), 201), ((201, 404, 404, 404), 404), - ((201, 404, 404, 503), 503), + ((201, 404, 404, 503), 404), ((201, 404, 503, 503), 503), ((201, 503, 503, 503), 503), ((404, 404, 404, 404), 404), ((404, 404, 404, 503), 404), - ((404, 404, 503, 503), 503), + ((404, 404, 503, 503), 404), ((404, 503, 503, 503), 503), ((503, 503, 503, 503), 503) ] @@ -298,16 +298,16 @@ class TestContainerController4Replicas(TestContainerController): ((204, 204, 204, 204), 204), ((204, 204, 204, 404), 204), ((204, 204, 204, 503), 204), - ((204, 204, 404, 404), 503), - ((204, 204, 404, 503), 503), - ((204, 204, 503, 503), 503), + ((204, 204, 404, 404), 204), + ((204, 204, 404, 503), 204), + ((204, 204, 503, 503), 204), ((204, 404, 404, 404), 404), - ((204, 404, 404, 503), 503), + ((204, 404, 404, 503), 404), ((204, 404, 503, 503), 503), ((204, 503, 503, 503), 503), ((404, 404, 404, 404), 404), ((404, 404, 404, 503), 404), - ((404, 404, 503, 503), 503), + ((404, 404, 503, 503), 404), ((404, 503, 503, 503), 503), ((503, 503, 503, 503), 503) ] @@ -318,16 +318,16 @@ class TestContainerController4Replicas(TestContainerController): ((204, 204, 204, 204), 204), ((204, 204, 204, 404), 204), ((204, 204, 204, 503), 204), - ((204, 204, 404, 404), 503), - ((204, 204, 404, 503), 503), - ((204, 204, 503, 503), 503), + ((204, 204, 404, 404), 204), + ((204, 204, 404, 503), 204), + ((204, 204, 503, 503), 204), ((204, 404, 404, 404), 404), - ((204, 404, 404, 503), 503), + ((204, 404, 404, 503), 404), ((204, 404, 503, 503), 503), ((204, 503, 503, 503), 503), ((404, 404, 404, 404), 404), ((404, 404, 404, 503), 404), - ((404, 404, 503, 503), 503), + ((404, 404, 503, 503), 404), ((404, 503, 503, 503), 503), ((503, 503, 503, 503), 503) ]