Fix account reaper for > 3 replicas
There's a pre-existing IndexError in the pop from the container node list in reaper's reap_object method for object rings with a replica count greater than the container replica count. Which is more likely on EC storage policies. When making the backend direct delete requests to the nodes once the container node's list is exhausted the generic exception handler logs the error and breaks out of any other backend object requests - but the reaper marches forward and eventually the tombstones are replicated. This change just cycles the container headers across all the nodes - which seems reasonable enough - but could certainly garner bikeshedding. Change-Id: I5897d00b0a8c1e05884945dd93d9ce891b207001
This commit is contained in:
parent
61a9d35fd5
commit
b2189ef47a
@ -19,6 +19,7 @@ from swift import gettext_ as _
|
||||
from logging import DEBUG
|
||||
from math import sqrt
|
||||
from time import time
|
||||
import itertools
|
||||
|
||||
from eventlet import GreenPool, sleep, Timeout
|
||||
|
||||
@ -432,7 +433,7 @@ class AccountReaper(Daemon):
|
||||
* See also: :func:`swift.common.ring.Ring.get_nodes` for a description
|
||||
of the container node dicts.
|
||||
"""
|
||||
container_nodes = list(container_nodes)
|
||||
cnodes = itertools.cycle(container_nodes)
|
||||
try:
|
||||
ring = self.get_object_ring(policy_index)
|
||||
except PolicyError:
|
||||
@ -443,7 +444,7 @@ class AccountReaper(Daemon):
|
||||
successes = 0
|
||||
failures = 0
|
||||
for node in nodes:
|
||||
cnode = container_nodes.pop()
|
||||
cnode = next(cnodes)
|
||||
try:
|
||||
direct_delete_object(
|
||||
node, part, account, container, obj,
|
||||
|
@ -141,7 +141,7 @@ cont_nodes = [{'device': 'sda1',
|
||||
@unit.patch_policies([StoragePolicy(0, 'zero', False,
|
||||
object_ring=unit.FakeRing()),
|
||||
StoragePolicy(1, 'one', True,
|
||||
object_ring=unit.FakeRing())])
|
||||
object_ring=unit.FakeRing(replicas=4))])
|
||||
class TestReaper(unittest.TestCase):
|
||||
|
||||
def setUp(self):
|
||||
@ -215,7 +215,7 @@ class TestReaper(unittest.TestCase):
|
||||
r.stats_objects_possibly_remaining = 0
|
||||
r.myips = myips
|
||||
if fakelogger:
|
||||
r.logger = FakeLogger()
|
||||
r.logger = unit.debug_logger('test-reaper')
|
||||
return r
|
||||
|
||||
def fake_reap_account(self, *args, **kwargs):
|
||||
@ -287,7 +287,7 @@ class TestReaper(unittest.TestCase):
|
||||
policy.idx)
|
||||
for i, call_args in enumerate(
|
||||
fake_direct_delete.call_args_list):
|
||||
cnode = cont_nodes[i]
|
||||
cnode = cont_nodes[i % len(cont_nodes)]
|
||||
host = '%(ip)s:%(port)s' % cnode
|
||||
device = cnode['device']
|
||||
headers = {
|
||||
@ -302,7 +302,8 @@ class TestReaper(unittest.TestCase):
|
||||
headers=headers, conn_timeout=0.5,
|
||||
response_timeout=10)
|
||||
self.assertEqual(call_args, expected)
|
||||
self.assertEqual(r.stats_objects_deleted, 3)
|
||||
self.assertEqual(r.stats_objects_deleted,
|
||||
policy.object_ring.replicas)
|
||||
|
||||
def test_reap_object_fail(self):
|
||||
r = self.init_reaper({}, fakelogger=True)
|
||||
@ -313,7 +314,26 @@ class TestReaper(unittest.TestCase):
|
||||
self.fake_direct_delete_object):
|
||||
r.reap_object('a', 'c', 'partition', cont_nodes, 'o',
|
||||
policy.idx)
|
||||
self.assertEqual(r.stats_objects_deleted, 1)
|
||||
# IMHO, the stat handling in the node loop of reap object is
|
||||
# over indented, but no one has complained, so I'm not inclined
|
||||
# to move it. However it's worth noting we're currently keeping
|
||||
# stats on deletes per *replica* - which is rather obvious from
|
||||
# these tests, but this results is surprising because of some
|
||||
# funny logic to *skip* increments on successful deletes of
|
||||
# replicas until we have more successful responses than
|
||||
# failures. This means that while the first replica doesn't
|
||||
# increment deleted because of the failure, the second one
|
||||
# *does* get successfully deleted, but *also does not* increment
|
||||
# the counter (!?).
|
||||
#
|
||||
# In the three replica case this leaves only the last deleted
|
||||
# object incrementing the counter - in the four replica case
|
||||
# this leaves the last two.
|
||||
#
|
||||
# Basically this test will always result in:
|
||||
# deleted == num_replicas - 2
|
||||
self.assertEqual(r.stats_objects_deleted,
|
||||
policy.object_ring.replicas - 2)
|
||||
self.assertEqual(r.stats_objects_remaining, 1)
|
||||
self.assertEqual(r.stats_objects_possibly_remaining, 1)
|
||||
|
||||
@ -348,7 +368,7 @@ class TestReaper(unittest.TestCase):
|
||||
mocks['direct_get_container'].side_effect = fake_get_container
|
||||
r.reap_container('a', 'partition', acc_nodes, 'c')
|
||||
mock_calls = mocks['direct_delete_object'].call_args_list
|
||||
self.assertEqual(3, len(mock_calls))
|
||||
self.assertEqual(policy.object_ring.replicas, len(mock_calls))
|
||||
for call_args in mock_calls:
|
||||
_args, kwargs = call_args
|
||||
self.assertEqual(kwargs['headers']
|
||||
@ -356,7 +376,7 @@ class TestReaper(unittest.TestCase):
|
||||
policy.idx)
|
||||
|
||||
self.assertEquals(mocks['direct_delete_container'].call_count, 3)
|
||||
self.assertEqual(r.stats_objects_deleted, 3)
|
||||
self.assertEqual(r.stats_objects_deleted, policy.object_ring.replicas)
|
||||
|
||||
def test_reap_container_get_object_fail(self):
|
||||
r = self.init_reaper({}, fakelogger=True)
|
||||
@ -374,7 +394,7 @@ class TestReaper(unittest.TestCase):
|
||||
self.fake_reap_object)]
|
||||
with nested(*ctx):
|
||||
r.reap_container('a', 'partition', acc_nodes, 'c')
|
||||
self.assertEqual(r.logger.inc['return_codes.4'], 1)
|
||||
self.assertEqual(r.logger.get_increment_counts()['return_codes.4'], 1)
|
||||
self.assertEqual(r.stats_containers_deleted, 1)
|
||||
|
||||
def test_reap_container_partial_fail(self):
|
||||
@ -393,7 +413,7 @@ class TestReaper(unittest.TestCase):
|
||||
self.fake_reap_object)]
|
||||
with nested(*ctx):
|
||||
r.reap_container('a', 'partition', acc_nodes, 'c')
|
||||
self.assertEqual(r.logger.inc['return_codes.4'], 2)
|
||||
self.assertEqual(r.logger.get_increment_counts()['return_codes.4'], 2)
|
||||
self.assertEqual(r.stats_containers_possibly_remaining, 1)
|
||||
|
||||
def test_reap_container_full_fail(self):
|
||||
@ -412,7 +432,7 @@ class TestReaper(unittest.TestCase):
|
||||
self.fake_reap_object)]
|
||||
with nested(*ctx):
|
||||
r.reap_container('a', 'partition', acc_nodes, 'c')
|
||||
self.assertEqual(r.logger.inc['return_codes.4'], 3)
|
||||
self.assertEqual(r.logger.get_increment_counts()['return_codes.4'], 3)
|
||||
self.assertEqual(r.stats_containers_remaining, 1)
|
||||
|
||||
@patch('swift.account.reaper.Ring',
|
||||
@ -437,8 +457,8 @@ class TestReaper(unittest.TestCase):
|
||||
|
||||
mocks['direct_get_container'].side_effect = fake_get_container
|
||||
r.reap_container('a', 'partition', acc_nodes, 'c')
|
||||
self.assertEqual(r.logger.msg,
|
||||
'ERROR: invalid storage policy index: 2')
|
||||
self.assertEqual(r.logger.get_lines_for_level('error'), [
|
||||
'ERROR: invalid storage policy index: 2'])
|
||||
|
||||
def fake_reap_container(self, *args, **kwargs):
|
||||
self.called_amount += 1
|
||||
@ -463,13 +483,16 @@ class TestReaper(unittest.TestCase):
|
||||
nodes = r.get_account_ring().get_part_nodes()
|
||||
self.assertTrue(r.reap_account(broker, 'partition', nodes))
|
||||
self.assertEqual(self.called_amount, 4)
|
||||
self.assertEqual(r.logger.msg.find('Completed pass'), 0)
|
||||
self.assertTrue(r.logger.msg.find('1 containers deleted'))
|
||||
self.assertTrue(r.logger.msg.find('1 objects deleted'))
|
||||
self.assertTrue(r.logger.msg.find('1 containers remaining'))
|
||||
self.assertTrue(r.logger.msg.find('1 objects remaining'))
|
||||
self.assertTrue(r.logger.msg.find('1 containers possibly remaining'))
|
||||
self.assertTrue(r.logger.msg.find('1 objects possibly remaining'))
|
||||
info_lines = r.logger.get_lines_for_level('info')
|
||||
self.assertEqual(len(info_lines), 2)
|
||||
start_line, stat_line = info_lines
|
||||
self.assertEqual(start_line, 'Beginning pass on account a')
|
||||
self.assertTrue(stat_line.find('1 containers deleted'))
|
||||
self.assertTrue(stat_line.find('1 objects deleted'))
|
||||
self.assertTrue(stat_line.find('1 containers remaining'))
|
||||
self.assertTrue(stat_line.find('1 objects remaining'))
|
||||
self.assertTrue(stat_line.find('1 containers possibly remaining'))
|
||||
self.assertTrue(stat_line.find('1 objects possibly remaining'))
|
||||
|
||||
def test_reap_account_no_container(self):
|
||||
broker = FakeAccountBroker(tuple())
|
||||
@ -483,7 +506,8 @@ class TestReaper(unittest.TestCase):
|
||||
with nested(*ctx):
|
||||
nodes = r.get_account_ring().get_part_nodes()
|
||||
self.assertTrue(r.reap_account(broker, 'partition', nodes))
|
||||
self.assertEqual(r.logger.msg.find('Completed pass'), 0)
|
||||
self.assertTrue(r.logger.get_lines_for_level(
|
||||
'info')[-1].startswith('Completed pass'))
|
||||
self.assertEqual(self.called_amount, 0)
|
||||
|
||||
def test_reap_device(self):
|
||||
|
Loading…
Reference in New Issue
Block a user