From 369447ec47dc8df3e38046a305cf1aa0a6499ce9 Mon Sep 17 00:00:00 2001
From: Clay Gerrard <clay.gerrard@gmail.com>
Date: Thu, 27 Aug 2015 15:09:29 -0700
Subject: [PATCH] Fix purge for tombstone only REVERT job

When we revert a partition we normally push it off to the specific
primary node for the index of the data files in the partition.  However,
when a partition is devoid of any data files (only tombstones) we build
a REVERT job with a frag_index of None.

This change updates the ECDiskFile's purge method to be robust to
purging tombstones when the frag_index is None.

Add probetest to validate tombstone only revert jobs will clean
themselves up if they can validate they're in-sync with part-replica
count nodes - even if one of the primaries is down (in which case they
sync tombstones with other handoffs to fill in for the primaries)

Change-Id: Ib9a42f412fb90d51959efce886c0f8952aba8d85
---
 swift/obj/diskfile.py                   |  9 ++-
 swift/obj/reconstructor.py              |  3 +
 test/probe/test_reconstructor_revert.py | 98 +++++++++++++++++--------
 test/unit/obj/test_diskfile.py          | 12 +++
 test/unit/obj/test_reconstructor.py     |  5 +-
 5 files changed, 92 insertions(+), 35 deletions(-)

diff --git a/swift/obj/diskfile.py b/swift/obj/diskfile.py
index d3937cf9a5..33151f86de 100644
--- a/swift/obj/diskfile.py
+++ b/swift/obj/diskfile.py
@@ -2166,9 +2166,14 @@ class ECDiskFile(BaseDiskFile):
 
         :param timestamp: the object timestamp, an instance of
                           :class:`~swift.common.utils.Timestamp`
-        :param frag_index: a fragment archive index, must be a whole number.
+        :param frag_index: fragment archive index, must be
+                           a whole number or None.
         """
-        for ext in ('.data', '.ts'):
+        exts = ['.ts']
+        # when frag_index is None it's not possible to build a data file name
+        if frag_index is not None:
+            exts.append('.data')
+        for ext in exts:
             purge_file = self.manager.make_on_disk_filename(
                 timestamp, ext=ext, frag_index=frag_index)
             remove_file(os.path.join(self._datadir, purge_file))
diff --git a/swift/obj/reconstructor.py b/swift/obj/reconstructor.py
index cfe6239c2b..3ab60a4c4d 100644
--- a/swift/obj/reconstructor.py
+++ b/swift/obj/reconstructor.py
@@ -542,6 +542,9 @@ class ObjectReconstructor(Daemon):
                     frag_index=frag_index)
                 df.purge(Timestamp(timestamp), frag_index)
             except DiskFileError:
+                self.logger.exception(
+                    'Unable to purge DiskFile (%r %r %r)',
+                    object_hash, timestamp, frag_index)
                 continue
 
     def process_job(self, job):
diff --git a/test/probe/test_reconstructor_revert.py b/test/probe/test_reconstructor_revert.py
index df4dc8beac..095843624c 100755
--- a/test/probe/test_reconstructor_revert.py
+++ b/test/probe/test_reconstructor_revert.py
@@ -15,6 +15,7 @@
 # limitations under the License.
 
 from hashlib import md5
+import itertools
 import unittest
 import uuid
 import random
@@ -94,7 +95,7 @@ class TestReconstructorRevert(ECProbeTest):
                            self.object_name, headers=headers_post)
         del headers_post['X-Auth-Token']  # WTF, where did this come from?
 
-        # these primaries can't servce the data any more, we expect 507
+        # these primaries can't serve the data any more, we expect 507
         # here and not 404 because we're using mount_check to kill nodes
         for onode in (onodes[0], onodes[1]):
             try:
@@ -102,7 +103,7 @@ class TestReconstructorRevert(ECProbeTest):
             except direct_client.DirectClientException as err:
                 self.assertEqual(err.http_status, 507)
             else:
-                self.fail('Node data on %r was not fully destoryed!' %
+                self.fail('Node data on %r was not fully destroyed!' %
                           (onode,))
 
         # now take out another primary
@@ -115,7 +116,7 @@ class TestReconstructorRevert(ECProbeTest):
         except direct_client.DirectClientException as err:
             self.assertEqual(err.http_status, 507)
         else:
-            self.fail('Node data on %r was not fully destoryed!' %
+            self.fail('Node data on %r was not fully destroyed!' %
                       (onode,))
 
         # make sure we can still GET the object and its correct
@@ -152,10 +153,10 @@ class TestReconstructorRevert(ECProbeTest):
             except direct_client.DirectClientException as err:
                 self.assertEqual(err.http_status, 404)
             else:
-                self.fail('Node data on %r was not fully destoryed!' %
+                self.fail('Node data on %r was not fully destroyed!' %
                           (hnode,))
 
-    def test_delete_propogate(self):
+    def test_delete_propagate(self):
         # create EC container
         headers = {'X-Storage-Policy': self.policy.name}
         client.put_container(self.url, self.token, self.container_name,
@@ -164,56 +165,95 @@ class TestReconstructorRevert(ECProbeTest):
         # get our node lists
         opart, onodes = self.object_ring.get_nodes(
             self.account, self.container_name, self.object_name)
-        hnodes = self.object_ring.get_more_nodes(opart)
-        p_dev2 = self.device_dir('object', onodes[1])
+        hnodes = list(itertools.islice(
+            self.object_ring.get_more_nodes(opart), 2))
 
         # PUT object
         contents = Body()
         client.put_object(self.url, self.token, self.container_name,
                           self.object_name, contents=contents)
 
-        # now lets shut one down
-        self.kill_drive(p_dev2)
+        # now lets shut down a couple primaries
+        failed_nodes = random.sample(onodes, 2)
+        for node in failed_nodes:
+            self.kill_drive(self.device_dir('object', node))
 
-        # delete on the ones that are left
+        # Write tombstones over the nodes that are still online
         client.delete_object(self.url, self.token,
                              self.container_name,
                              self.object_name)
 
-        # spot check a node
+        # spot check the primary nodes that are still online
+        delete_timestamp = None
+        for node in onodes:
+            if node in failed_nodes:
+                continue
+            try:
+                self.direct_get(node, opart)
+            except direct_client.DirectClientException as err:
+                self.assertEqual(err.http_status, 404)
+                delete_timestamp = err.http_headers['X-Backend-Timestamp']
+            else:
+                self.fail('Node data on %r was not fully destroyed!' %
+                          (node,))
+
+        # repair the first primary
+        self.revive_drive(self.device_dir('object', failed_nodes[0]))
+
+        # run the reconstructor on the *second* handoff node
+        self.reconstructor.once(number=self.config_number(hnodes[1]))
+
+        # make sure it's tombstone was pushed out
         try:
-            self.direct_get(onodes[0], opart)
+            self.direct_get(hnodes[1], opart)
         except direct_client.DirectClientException as err:
             self.assertEqual(err.http_status, 404)
+            self.assertNotIn('X-Backend-Timestamp', err.http_headers)
         else:
-            self.fail('Node data on %r was not fully destoryed!' %
-                      (onodes[0],))
+            self.fail('Found obj data on %r' % hnodes[1])
 
-        # enable the first node again
-        self.revive_drive(p_dev2)
-
-        # propagate the delete...
-        # fire up reconstructor on handoff nodes only
-        for hnode in hnodes:
-            hnode_id = (hnode['port'] - 6000) / 10
-            self.reconstructor.once(number=hnode_id)
-
-        # check the first node to make sure its gone
+        # ... and it's on the first failed (now repaired) primary
         try:
-            self.direct_get(onodes[1], opart)
+            self.direct_get(failed_nodes[0], opart)
         except direct_client.DirectClientException as err:
             self.assertEqual(err.http_status, 404)
+            self.assertEqual(err.http_headers['X-Backend-Timestamp'],
+                             delete_timestamp)
         else:
-            self.fail('Node data on %r was not fully destoryed!' %
-                      (onodes[0]))
+            self.fail('Found obj data on %r' % failed_nodes[0])
 
-        # make sure proxy get can't find it
+        # repair the second primary
+        self.revive_drive(self.device_dir('object', failed_nodes[1]))
+
+        # run the reconstructor on the *first* handoff node
+        self.reconstructor.once(number=self.config_number(hnodes[0]))
+
+        # make sure it's tombstone was pushed out
+        try:
+            self.direct_get(hnodes[0], opart)
+        except direct_client.DirectClientException as err:
+            self.assertEqual(err.http_status, 404)
+            self.assertNotIn('X-Backend-Timestamp', err.http_headers)
+        else:
+            self.fail('Found obj data on %r' % hnodes[0])
+
+        # ... and now it's on the second failed primary too!
+        try:
+            self.direct_get(failed_nodes[1], opart)
+        except direct_client.DirectClientException as err:
+            self.assertEqual(err.http_status, 404)
+            self.assertEqual(err.http_headers['X-Backend-Timestamp'],
+                             delete_timestamp)
+        else:
+            self.fail('Found obj data on %r' % failed_nodes[1])
+
+        # sanity make sure proxy get can't find it
         try:
             self.proxy_get()
         except Exception as err:
             self.assertEqual(err.http_status, 404)
         else:
-            self.fail('Node data on %r was not fully destoryed!' %
+            self.fail('Node data on %r was not fully destroyed!' %
                       (onodes[0]))
 
     def test_reconstruct_from_reverted_fragment_archive(self):
diff --git a/test/unit/obj/test_diskfile.py b/test/unit/obj/test_diskfile.py
index 47ef9b102d..6767beeeb0 100644
--- a/test/unit/obj/test_diskfile.py
+++ b/test/unit/obj/test_diskfile.py
@@ -3665,6 +3665,18 @@ class TestECDiskFile(DiskFileMixin, unittest.TestCase):
         df.purge(ts, 3)
         self.assertEqual(sorted(os.listdir(df._datadir)), [])
 
+    def test_purge_without_frag(self):
+        ts = self.ts()
+        df = self._simple_get_diskfile()
+        df.delete(ts)
+
+        # sanity
+        self.assertEqual(sorted(os.listdir(df._datadir)), [
+            ts.internal + '.ts',
+        ])
+        df.purge(ts, None)
+        self.assertEqual(sorted(os.listdir(df._datadir)), [])
+
     def test_purge_old_tombstone(self):
         old_ts = self.ts()
         ts = self.ts()
diff --git a/test/unit/obj/test_reconstructor.py b/test/unit/obj/test_reconstructor.py
index 7aa5ebc60d..5773917103 100755
--- a/test/unit/obj/test_reconstructor.py
+++ b/test/unit/obj/test_reconstructor.py
@@ -2415,11 +2415,8 @@ class TestObjectReconstructor(unittest.TestCase):
         self.assertFalse(os.access(df._datadir, os.F_OK))
 
     def test_process_job_revert_cleanup_tombstone(self):
-        replicas = self.policy.object_ring.replicas
-        frag_index = random.randint(0, replicas - 1)
         sync_to = [random.choice([n for n in self.policy.object_ring.devs
                                   if n != self.local_dev])]
-        sync_to[0]['index'] = frag_index
         partition = 0
 
         part_path = os.path.join(self.devices, self.local_dev['device'],
@@ -2437,7 +2434,7 @@ class TestObjectReconstructor(unittest.TestCase):
 
         job = {
             'job_type': object_reconstructor.REVERT,
-            'frag_index': frag_index,
+            'frag_index': None,
             'suffixes': [suffix],
             'sync_to': sync_to,
             'partition': partition,