From ff5ea003b3a1b37d8417aa17d3521237768dfe62 Mon Sep 17 00:00:00 2001
From: Tim Burke <tim.burke@gmail.com>
Date: Tue, 20 Aug 2019 22:20:44 -0700
Subject: [PATCH] ec: log durability of frags that fail to reconstruct

Whether the frag is durable or non-durable greatly affects how much I
care whether I can reconstruct it.

Change-Id: Ie6f46267d4bb567ecc0cc195d1fd7ce55c8cb325
---
 swift/obj/reconstructor.py          | 30 ++++++++++++++++++-----------
 test/unit/obj/test_reconstructor.py | 22 ++++++++++++---------
 2 files changed, 32 insertions(+), 20 deletions(-)

diff --git a/swift/obj/reconstructor.py b/swift/obj/reconstructor.py
index b8f18de4dc..02f3bb7bf8 100644
--- a/swift/obj/reconstructor.py
+++ b/swift/obj/reconstructor.py
@@ -401,6 +401,7 @@ class ObjectReconstructor(Daemon):
                        path, headers, full_get_path)
 
         buckets = defaultdict(dict)
+        durable_buckets = {}
         etag_buckets = {}
         error_resp_count = 0
         for resp in pile:
@@ -444,6 +445,10 @@ class ObjectReconstructor(Daemon):
                 continue
             timestamp = Timestamp(timestamp)
 
+            durable = resp.headers.get('X-Backend-Durable-Timestamp')
+            if durable:
+                durable_buckets[Timestamp(durable)] = True
+
             etag = resp.headers.get('X-Object-Sysmeta-Ec-Etag')
             if not etag:
                 self.logger.warning('Invalid resp from %s, frag index %s '
@@ -469,26 +474,29 @@ class ObjectReconstructor(Daemon):
                         % (fi_to_rebuild, list(buckets[timestamp])))
                     break
         else:
+            path = _full_path(node, job['partition'],
+                              datafile_metadata['name'],
+                              job['policy'])
+
             for timestamp, resp in sorted(buckets.items()):
                 etag = etag_buckets[timestamp]
+                durable = durable_buckets.get(timestamp)
                 self.logger.error(
-                    'Unable to get enough responses (%s/%s) '
-                    'to reconstruct %s frag#%s with ETag %s' % (
+                    'Unable to get enough responses (%s/%s) to reconstruct '
+                    '%s %s frag#%s with ETag %s and timestamp %s' % (
                         len(resp), job['policy'].ec_ndata,
-                        _full_path(node, job['partition'],
-                                   datafile_metadata['name'],
-                                   job['policy']),
-                        fi_to_rebuild, etag))
+                        'durable' if durable else 'non-durable',
+                        path, fi_to_rebuild, etag, timestamp.internal))
 
             if error_resp_count:
+                durable = durable_buckets.get(Timestamp(
+                    datafile_metadata['X-Timestamp']))
                 self.logger.error(
                     'Unable to get enough responses (%s error responses) '
-                    'to reconstruct %s frag#%s' % (
+                    'to reconstruct %s %s frag#%s' % (
                         error_resp_count,
-                        _full_path(node, job['partition'],
-                                   datafile_metadata['name'],
-                                   job['policy']),
-                        fi_to_rebuild))
+                        'durable' if durable else 'non-durable',
+                        path, fi_to_rebuild))
 
             raise DiskFileError('Unable to reconstruct EC archive')
 
diff --git a/test/unit/obj/test_reconstructor.py b/test/unit/obj/test_reconstructor.py
index efce60ad6a..075246751d 100644
--- a/test/unit/obj/test_reconstructor.py
+++ b/test/unit/obj/test_reconstructor.py
@@ -5031,16 +5031,18 @@ class TestReconstructFragmentArchive(BaseTestObjectReconstructor):
             archive_bodies = encode_frag_archive_bodies(self.policy, body)
             # pop the index to the destination node
             archive_bodies.pop(1)
-            ec_archive_dict[
-                (md5(body).hexdigest(), next(ts).internal)] = archive_bodies
+            key = (md5(body).hexdigest(), next(ts).internal, bool(i % 2))
+            ec_archive_dict[key] = archive_bodies
 
         responses = list()
         # fill out response list by 3 different etag bodies
-        for etag, ts in itertools.cycle(ec_archive_dict):
-            body = ec_archive_dict[(etag, ts)].pop(0)
+        for etag, ts, durable in itertools.cycle(ec_archive_dict):
+            body = ec_archive_dict[(etag, ts, durable)].pop(0)
             headers = get_header_frag_index(self, body)
             headers.update({'X-Object-Sysmeta-Ec-Etag': etag,
                             'X-Backend-Timestamp': ts})
+            if durable:
+                headers['X-Backend-Durable-Timestamp'] = ts
             responses.append((200, body, headers))
             if len(responses) >= (self.policy.object_ring.replicas - 1):
                 break
@@ -5063,7 +5065,7 @@ class TestReconstructFragmentArchive(BaseTestObjectReconstructor):
         # 1 error log per etag to report not enough responses
         self.assertEqual(3, len(error_lines))
         for error_line in error_lines:
-            for expected_etag, ts in ec_archive_dict:
+            for expected_etag, ts, durable in ec_archive_dict:
                 if expected_etag in error_line:
                     break
             else:
@@ -5072,13 +5074,15 @@ class TestReconstructFragmentArchive(BaseTestObjectReconstructor):
                     (list(ec_archive_dict), error_line))
             # remove the found etag which should not be found in the
             # following error lines
-            del ec_archive_dict[(expected_etag, ts)]
+            del ec_archive_dict[(expected_etag, ts, durable)]
 
             expected = 'Unable to get enough responses (%s/10) to ' \
-                       'reconstruct 10.0.0.1:1001/sdb/0%s policy#0 ' \
-                       'frag#1 with ETag' % \
+                       'reconstruct %s 10.0.0.1:1001/sdb/0%s policy#0 ' \
+                       'frag#1 with ETag %s and timestamp %s' % \
                        (etag_count[expected_etag],
-                        self.obj_path.decode('utf8'))
+                        'durable' if durable else 'non-durable',
+                        self.obj_path.decode('utf8'),
+                        expected_etag, ts)
             self.assertIn(
                 expected, error_line,
                 "Unexpected error line found: Expected: %s Got: %s"