Merge "Fix purge for tombstone only REVERT job"

This commit is contained in:
Jenkins 2015-09-19 18:42:30 +00:00 committed by Gerrit Code Review
commit 227e1f8235
5 changed files with 92 additions and 35 deletions

View File

@ -2166,9 +2166,14 @@ class ECDiskFile(BaseDiskFile):
:param timestamp: the object timestamp, an instance of :param timestamp: the object timestamp, an instance of
:class:`~swift.common.utils.Timestamp` :class:`~swift.common.utils.Timestamp`
:param frag_index: a fragment archive index, must be a whole number. :param frag_index: fragment archive index, must be
a whole number or None.
""" """
for ext in ('.data', '.ts'): exts = ['.ts']
# when frag_index is None it's not possible to build a data file name
if frag_index is not None:
exts.append('.data')
for ext in exts:
purge_file = self.manager.make_on_disk_filename( purge_file = self.manager.make_on_disk_filename(
timestamp, ext=ext, frag_index=frag_index) timestamp, ext=ext, frag_index=frag_index)
remove_file(os.path.join(self._datadir, purge_file)) remove_file(os.path.join(self._datadir, purge_file))

View File

@ -541,6 +541,9 @@ class ObjectReconstructor(Daemon):
frag_index=frag_index) frag_index=frag_index)
df.purge(Timestamp(timestamp), frag_index) df.purge(Timestamp(timestamp), frag_index)
except DiskFileError: except DiskFileError:
self.logger.exception(
'Unable to purge DiskFile (%r %r %r)',
object_hash, timestamp, frag_index)
continue continue
def process_job(self, job): def process_job(self, job):

View File

@ -15,6 +15,7 @@
# limitations under the License. # limitations under the License.
from hashlib import md5 from hashlib import md5
import itertools
import unittest import unittest
import uuid import uuid
import random import random
@ -94,7 +95,7 @@ class TestReconstructorRevert(ECProbeTest):
self.object_name, headers=headers_post) self.object_name, headers=headers_post)
del headers_post['X-Auth-Token'] # WTF, where did this come from? del headers_post['X-Auth-Token'] # WTF, where did this come from?
# these primaries can't servce the data any more, we expect 507 # these primaries can't serve the data any more, we expect 507
# here and not 404 because we're using mount_check to kill nodes # here and not 404 because we're using mount_check to kill nodes
for onode in (onodes[0], onodes[1]): for onode in (onodes[0], onodes[1]):
try: try:
@ -102,7 +103,7 @@ class TestReconstructorRevert(ECProbeTest):
except direct_client.DirectClientException as err: except direct_client.DirectClientException as err:
self.assertEqual(err.http_status, 507) self.assertEqual(err.http_status, 507)
else: else:
self.fail('Node data on %r was not fully destoryed!' % self.fail('Node data on %r was not fully destroyed!' %
(onode,)) (onode,))
# now take out another primary # now take out another primary
@ -115,7 +116,7 @@ class TestReconstructorRevert(ECProbeTest):
except direct_client.DirectClientException as err: except direct_client.DirectClientException as err:
self.assertEqual(err.http_status, 507) self.assertEqual(err.http_status, 507)
else: else:
self.fail('Node data on %r was not fully destoryed!' % self.fail('Node data on %r was not fully destroyed!' %
(onode,)) (onode,))
# make sure we can still GET the object and its correct # make sure we can still GET the object and its correct
@ -152,10 +153,10 @@ class TestReconstructorRevert(ECProbeTest):
except direct_client.DirectClientException as err: except direct_client.DirectClientException as err:
self.assertEqual(err.http_status, 404) self.assertEqual(err.http_status, 404)
else: else:
self.fail('Node data on %r was not fully destoryed!' % self.fail('Node data on %r was not fully destroyed!' %
(hnode,)) (hnode,))
def test_delete_propogate(self): def test_delete_propagate(self):
# create EC container # create EC container
headers = {'X-Storage-Policy': self.policy.name} headers = {'X-Storage-Policy': self.policy.name}
client.put_container(self.url, self.token, self.container_name, client.put_container(self.url, self.token, self.container_name,
@ -164,56 +165,95 @@ class TestReconstructorRevert(ECProbeTest):
# get our node lists # get our node lists
opart, onodes = self.object_ring.get_nodes( opart, onodes = self.object_ring.get_nodes(
self.account, self.container_name, self.object_name) self.account, self.container_name, self.object_name)
hnodes = self.object_ring.get_more_nodes(opart) hnodes = list(itertools.islice(
p_dev2 = self.device_dir('object', onodes[1]) self.object_ring.get_more_nodes(opart), 2))
# PUT object # PUT object
contents = Body() contents = Body()
client.put_object(self.url, self.token, self.container_name, client.put_object(self.url, self.token, self.container_name,
self.object_name, contents=contents) self.object_name, contents=contents)
# now lets shut one down # now lets shut down a couple primaries
self.kill_drive(p_dev2) failed_nodes = random.sample(onodes, 2)
for node in failed_nodes:
self.kill_drive(self.device_dir('object', node))
# delete on the ones that are left # Write tombstones over the nodes that are still online
client.delete_object(self.url, self.token, client.delete_object(self.url, self.token,
self.container_name, self.container_name,
self.object_name) self.object_name)
# spot check a node # spot check the primary nodes that are still online
delete_timestamp = None
for node in onodes:
if node in failed_nodes:
continue
try:
self.direct_get(node, opart)
except direct_client.DirectClientException as err:
self.assertEqual(err.http_status, 404)
delete_timestamp = err.http_headers['X-Backend-Timestamp']
else:
self.fail('Node data on %r was not fully destroyed!' %
(node,))
# repair the first primary
self.revive_drive(self.device_dir('object', failed_nodes[0]))
# run the reconstructor on the *second* handoff node
self.reconstructor.once(number=self.config_number(hnodes[1]))
# make sure it's tombstone was pushed out
try: try:
self.direct_get(onodes[0], opart) self.direct_get(hnodes[1], opart)
except direct_client.DirectClientException as err: except direct_client.DirectClientException as err:
self.assertEqual(err.http_status, 404) self.assertEqual(err.http_status, 404)
self.assertNotIn('X-Backend-Timestamp', err.http_headers)
else: else:
self.fail('Node data on %r was not fully destoryed!' % self.fail('Found obj data on %r' % hnodes[1])
(onodes[0],))
# enable the first node again # ... and it's on the first failed (now repaired) primary
self.revive_drive(p_dev2)
# propagate the delete...
# fire up reconstructor on handoff nodes only
for hnode in hnodes:
hnode_id = (hnode['port'] - 6000) / 10
self.reconstructor.once(number=hnode_id)
# check the first node to make sure its gone
try: try:
self.direct_get(onodes[1], opart) self.direct_get(failed_nodes[0], opart)
except direct_client.DirectClientException as err: except direct_client.DirectClientException as err:
self.assertEqual(err.http_status, 404) self.assertEqual(err.http_status, 404)
self.assertEqual(err.http_headers['X-Backend-Timestamp'],
delete_timestamp)
else: else:
self.fail('Node data on %r was not fully destoryed!' % self.fail('Found obj data on %r' % failed_nodes[0])
(onodes[0]))
# make sure proxy get can't find it # repair the second primary
self.revive_drive(self.device_dir('object', failed_nodes[1]))
# run the reconstructor on the *first* handoff node
self.reconstructor.once(number=self.config_number(hnodes[0]))
# make sure it's tombstone was pushed out
try:
self.direct_get(hnodes[0], opart)
except direct_client.DirectClientException as err:
self.assertEqual(err.http_status, 404)
self.assertNotIn('X-Backend-Timestamp', err.http_headers)
else:
self.fail('Found obj data on %r' % hnodes[0])
# ... and now it's on the second failed primary too!
try:
self.direct_get(failed_nodes[1], opart)
except direct_client.DirectClientException as err:
self.assertEqual(err.http_status, 404)
self.assertEqual(err.http_headers['X-Backend-Timestamp'],
delete_timestamp)
else:
self.fail('Found obj data on %r' % failed_nodes[1])
# sanity make sure proxy get can't find it
try: try:
self.proxy_get() self.proxy_get()
except Exception as err: except Exception as err:
self.assertEqual(err.http_status, 404) self.assertEqual(err.http_status, 404)
else: else:
self.fail('Node data on %r was not fully destoryed!' % self.fail('Node data on %r was not fully destroyed!' %
(onodes[0])) (onodes[0]))
def test_reconstruct_from_reverted_fragment_archive(self): def test_reconstruct_from_reverted_fragment_archive(self):

View File

@ -3665,6 +3665,18 @@ class TestECDiskFile(DiskFileMixin, unittest.TestCase):
df.purge(ts, 3) df.purge(ts, 3)
self.assertEqual(sorted(os.listdir(df._datadir)), []) self.assertEqual(sorted(os.listdir(df._datadir)), [])
def test_purge_without_frag(self):
ts = self.ts()
df = self._simple_get_diskfile()
df.delete(ts)
# sanity
self.assertEqual(sorted(os.listdir(df._datadir)), [
ts.internal + '.ts',
])
df.purge(ts, None)
self.assertEqual(sorted(os.listdir(df._datadir)), [])
def test_purge_old_tombstone(self): def test_purge_old_tombstone(self):
old_ts = self.ts() old_ts = self.ts()
ts = self.ts() ts = self.ts()

View File

@ -2416,11 +2416,8 @@ class TestObjectReconstructor(unittest.TestCase):
self.assertFalse(os.access(df._datadir, os.F_OK)) self.assertFalse(os.access(df._datadir, os.F_OK))
def test_process_job_revert_cleanup_tombstone(self): def test_process_job_revert_cleanup_tombstone(self):
replicas = self.policy.object_ring.replicas
frag_index = random.randint(0, replicas - 1)
sync_to = [random.choice([n for n in self.policy.object_ring.devs sync_to = [random.choice([n for n in self.policy.object_ring.devs
if n != self.local_dev])] if n != self.local_dev])]
sync_to[0]['index'] = frag_index
partition = 0 partition = 0
part_path = os.path.join(self.devices, self.local_dev['device'], part_path = os.path.join(self.devices, self.local_dev['device'],
@ -2438,7 +2435,7 @@ class TestObjectReconstructor(unittest.TestCase):
job = { job = {
'job_type': object_reconstructor.REVERT, 'job_type': object_reconstructor.REVERT,
'frag_index': frag_index, 'frag_index': None,
'suffixes': [suffix], 'suffixes': [suffix],
'sync_to': sync_to, 'sync_to': sync_to,
'partition': partition, 'partition': partition,