Limit number of revert tombstone SSYNC requests

Revert tombstone only parts try to talk to all primary nodes - this
fixes it to randomize selection within part_nodes. Corresponding probe
test is modified to reflect this change.

The primary improvement of this patch is the reconstuctor at a handoff
node is being able to delete local tombstones when it succeeds to sync
to less than all primary nodes. (Before this patch, it requires all
nodes are responsible for the REVERT requests)

The number of primary nodes to communicate with the reconstructor can be
in dicsussion more but, right now with this patch, it's (replicas - k + 1)
that is able to prevent stale read.

*BONUS*

- Fix mis-testsetting (was setting less replicas than ec_k + ec_m)
  for reconstructor ring in the unit test

Co-Authored-By: Kota Tsuyuzaki <tsuyuzaki.kota@lab.ntt.co.jp>
Co-Authored-By: Clay Gerrard <clay.gerrard@gmail.com>

Change-Id: I05ce8fe75f1c4a7971cc8995b003df818b69b3c1
Closes-Bug: #1668857
This commit is contained in:
Mahati Chamarthy 2017-03-01 23:18:09 +05:30
parent 1f36b5dd16
commit 188c07e12a
3 changed files with 37 additions and 59 deletions

View File

@ -854,14 +854,18 @@ class ObjectReconstructor(Daemon):
# push partitions off this node, but none of the suffixes
# have any data fragments to hint at which node would be a
# good candidate to receive the tombstones.
#
# we'll check a sample of other primaries before we delete our
# local tombstones, the exact number doesn't matter as long as
# it's enough to ensure the tombstones are not lost and less
# than *all the replicas*
nsample = (policy.ec_n_unique_fragments *
policy.ec_duplication_factor) - policy.ec_ndata + 1
jobs.append(build_job(
job_type=REVERT,
frag_index=None,
suffixes=non_data_fragment_suffixes,
# this is super safe
sync_to=part_nodes,
# something like this would be probably be better
# sync_to=random.sample(part_nodes, 3),
sync_to=random.sample(part_nodes, nsample)
))
# return a list of jobs for this part
return jobs

View File

@ -173,7 +173,7 @@ class TestReconstructorRevert(ECProbeTest):
client.put_object(self.url, self.token, self.container_name,
self.object_name, contents=contents)
# now lets shut down a couple primaries
# now lets shut down a couple of primaries
failed_nodes = random.sample(onodes, 2)
for node in failed_nodes:
self.kill_drive(self.device_dir('object', node))
@ -197,61 +197,26 @@ class TestReconstructorRevert(ECProbeTest):
self.fail('Node data on %r was not fully destroyed!' %
(node,))
# repair the first primary
self.revive_drive(self.device_dir('object', failed_nodes[0]))
# run the reconstructor on the handoffs nodes, if there are no data
# frags to hint at the node index - each hnode syncs to all primaries
for hnode in hnodes:
self.reconstructor.once(number=self.config_number(hnode))
# because not all primaries are online, the tombstones remain
for hnode in hnodes:
try:
self.direct_get(hnode, opart)
except direct_client.DirectClientException as err:
self.assertEqual(err.http_status, 404)
self.assertEqual(err.http_headers['X-Backend-Timestamp'],
delete_timestamp)
else:
self.fail('Found obj data on %r' % hnode)
# ... but it's on the first failed (now repaired) primary
try:
self.direct_get(failed_nodes[0], opart)
except direct_client.DirectClientException as err:
self.assertEqual(err.http_status, 404)
self.assertEqual(err.http_headers['X-Backend-Timestamp'],
delete_timestamp)
else:
self.fail('Found obj data on %r' % failed_nodes[0])
# repair the second primary
self.revive_drive(self.device_dir('object', failed_nodes[1]))
# run the reconstructor on the *first* handoff node
# run the reconstructor on the handoff node multiple times until
# tombstone is pushed out - each handoff node syncs to a few
# primaries each time
iterations = 0
while iterations < 52:
self.reconstructor.once(number=self.config_number(hnodes[0]))
# make sure it's tombstone was pushed out
iterations += 1
# see if the tombstone is reverted
try:
self.direct_get(hnodes[0], opart)
except direct_client.DirectClientException as err:
self.assertEqual(err.http_status, 404)
self.assertNotIn('X-Backend-Timestamp', err.http_headers)
if 'X-Backend-Timestamp' not in err.http_headers:
# this means the tombstone is *gone* so it's reverted
break
else:
self.fail('Found obj data on %r' % hnodes[0])
self.fail('Still found tombstone on %r after %s iterations' % (
hnodes[0], iterations))
# ... and now it's on the second failed primary too!
try:
self.direct_get(failed_nodes[1], opart)
except direct_client.DirectClientException as err:
self.assertEqual(err.http_status, 404)
self.assertEqual(err.http_headers['X-Backend-Timestamp'],
delete_timestamp)
else:
self.fail('Found obj data on %r' % failed_nodes[1])
# ... but still on the second handoff node
# tombstone is still on the *second* handoff
try:
self.direct_get(hnodes[1], opart)
except direct_client.DirectClientException as err:
@ -261,10 +226,14 @@ class TestReconstructorRevert(ECProbeTest):
else:
self.fail('Found obj data on %r' % hnodes[1])
# ... until it's next sync
# repair the primaries
self.revive_drive(self.device_dir('object', failed_nodes[0]))
self.revive_drive(self.device_dir('object', failed_nodes[1]))
# run reconstructor on second handoff
self.reconstructor.once(number=self.config_number(hnodes[1]))
# ... then it's tombstone is pushed off too!
# verify tombstone is reverted on the first pass
try:
self.direct_get(hnodes[1], opart)
except direct_client.DirectClientException as err:

View File

@ -1160,7 +1160,7 @@ class TestObjectReconstructor(unittest.TestCase):
self.policy.object_ring.max_more_nodes = \
self.policy.object_ring.replicas
self.ts_iter = make_timestamp_iter()
self.fabricated_ring = FabricatedRing()
self.fabricated_ring = FabricatedRing(replicas=14, devices=28)
def _configure_reconstructor(self, **kwargs):
self.conf.update(kwargs)
@ -1924,7 +1924,12 @@ class TestObjectReconstructor(unittest.TestCase):
'local_dev': self.local_dev,
'device': self.local_dev['device'],
}
self.assertEqual(ring.replica_count, len(job['sync_to']))
self.assertEqual(ring.replica_count, len(part_nodes))
expected_samples = (
(self.policy.ec_n_unique_fragments *
self.policy.ec_duplication_factor) -
self.policy.ec_ndata + 1)
self.assertEqual(len(job['sync_to']), expected_samples)
for k, v in expected.items():
msg = 'expected %s != %s for %s' % (
v, job[k], k)