Container drive error results double space usage on rest drives

When drive with container or account database is unmounted
replicator pushes database to handoff location. But this
handoff location finds replica with unmounted drive and
pushes database to the *next* handoff until all handoffs has
a replica - all container/account servers has replicas of
all unmounted drives.

This patch solves:
- Consumption of iterator on handoff location that results in
  replication to the next and next handoff.
- StopIteration exception stopped not finished loop over
  available handoffs if no more nodes exists for db replication
  candidency.

Regression was introduced in 2.4.0 with rsync compression.

Co-Author: Kota Tsuyuzaki <tsuyuzaki.kota@lab.ntt.co.jp>

Change-Id: I344f9daaa038c6946be11e1cf8c4ef104a09e68b
Closes-Bug: 1675500
This commit is contained in:
Pavel Kvasnička 2017-03-22 09:59:53 +01:00
parent 6bd24bf972
commit bcd0eb70af
2 changed files with 52 additions and 2 deletions

View File

@ -543,7 +543,7 @@ class Replicator(Daemon):
more_nodes = self.ring.get_more_nodes(int(partition)) more_nodes = self.ring.get_more_nodes(int(partition))
if not local_dev: if not local_dev:
# Check further if local device is a handoff node # Check further if local device is a handoff node
for node in more_nodes: for node in self.ring.get_more_nodes(int(partition)):
if node['id'] == node_id: if node['id'] == node_id:
local_dev = node local_dev = node
break break
@ -559,7 +559,13 @@ class Replicator(Daemon):
success = self._repl_to_node(node, broker, partition, info, success = self._repl_to_node(node, broker, partition, info,
different_region) different_region)
except DriveNotMounted: except DriveNotMounted:
repl_nodes.append(next(more_nodes)) try:
repl_nodes.append(next(more_nodes))
except StopIteration:
self.logger.error(
_('ERROR There are not enough handoff nodes to reach '
'replica count for partition %s'),
partition)
self.logger.error(_('ERROR Remote drive not mounted %s'), node) self.logger.error(_('ERROR Remote drive not mounted %s'), node)
except (Exception, Timeout): except (Exception, Timeout):
self.logger.exception(_('ERROR syncing %(file)s with node' self.logger.exception(_('ERROR syncing %(file)s with node'

View File

@ -643,6 +643,50 @@ class TestDBReplicator(unittest.TestCase):
replicator._replicate_object('0', '/path/to/file', 'node_id') replicator._replicate_object('0', '/path/to/file', 'node_id')
self.assertEqual(['/path/to/file'], self.delete_db_calls) self.assertEqual(['/path/to/file'], self.delete_db_calls)
def test_replicate_object_with_exception(self):
replicator = TestReplicator({})
replicator.ring = FakeRingWithNodes().Ring('path')
replicator.brokerclass = FakeAccountBroker
replicator.delete_db = self.stub_delete_db
replicator._repl_to_node = mock.Mock(side_effect=Exception())
replicator._replicate_object('0', '/path/to/file',
replicator.ring.devs[0]['id'])
self.assertEqual(2, replicator._repl_to_node.call_count)
# with one DriveNotMounted exception called on +1 more replica
replicator._repl_to_node = mock.Mock(side_effect=[DriveNotMounted()])
replicator._replicate_object('0', '/path/to/file',
replicator.ring.devs[0]['id'])
self.assertEqual(3, replicator._repl_to_node.call_count)
# called on +1 more replica and self when *first* handoff
replicator._repl_to_node = mock.Mock(side_effect=[DriveNotMounted()])
replicator._replicate_object('0', '/path/to/file',
replicator.ring.devs[3]['id'])
self.assertEqual(4, replicator._repl_to_node.call_count)
# even if it's the last handoff it works to keep 3 replicas
# 2 primaries + 1 handoff
replicator._repl_to_node = mock.Mock(side_effect=[DriveNotMounted()])
replicator._replicate_object('0', '/path/to/file',
replicator.ring.devs[-1]['id'])
self.assertEqual(4, replicator._repl_to_node.call_count)
# with two DriveNotMounted exceptions called on +2 more replica keeping
# durability
replicator._repl_to_node = mock.Mock(
side_effect=[DriveNotMounted()] * 2)
replicator._replicate_object('0', '/path/to/file',
replicator.ring.devs[0]['id'])
self.assertEqual(4, replicator._repl_to_node.call_count)
def test_replicate_object_with_exception_run_out_of_nodes(self):
replicator = TestReplicator({})
replicator.ring = FakeRingWithNodes().Ring('path')
replicator.brokerclass = FakeAccountBroker
replicator.delete_db = self.stub_delete_db
# all other devices are not mounted
replicator._repl_to_node = mock.Mock(side_effect=DriveNotMounted())
replicator._replicate_object('0', '/path/to/file',
replicator.ring.devs[0]['id'])
self.assertEqual(5, replicator._repl_to_node.call_count)
def test_replicate_account_out_of_place(self): def test_replicate_account_out_of_place(self):
replicator = TestReplicator({}, logger=unit.FakeLogger()) replicator = TestReplicator({}, logger=unit.FakeLogger())
replicator.ring = FakeRingWithNodes().Ring('path') replicator.ring = FakeRingWithNodes().Ring('path')