ec: log durability of frags that fail to reconstruct

Whether the frag is durable or non-durable greatly affects how much I care whether I can reconstruct it. Change-Id: Ie6f46267d4bb567ecc0cc195d1fd7ce55c8cb325
2019-08-20 22:20:44 -07:00 · 2019-08-20 22:20:44 -07:00 · ff5ea003b3
commit ff5ea003b3
parent f70520239c
2 changed files with 32 additions and 20 deletions
--- a/swift/obj/reconstructor.py
+++ b/swift/obj/reconstructor.py
@ -401,6 +401,7 @@ class ObjectReconstructor(Daemon):
                       path, headers, full_get_path)
        buckets = defaultdict(dict)
        durable_buckets = {}
        etag_buckets = {}
        error_resp_count = 0
        for resp in pile:
@ -444,6 +445,10 @@ class ObjectReconstructor(Daemon):
                continue
            timestamp = Timestamp(timestamp)
            durable = resp.headers.get('X-Backend-Durable-Timestamp')
            if durable:
                durable_buckets[Timestamp(durable)] = True
            etag = resp.headers.get('X-Object-Sysmeta-Ec-Etag')
            if not etag:
                self.logger.warning('Invalid resp from %s, frag index %s '
@ -469,26 +474,29 @@ class ObjectReconstructor(Daemon):
                        % (fi_to_rebuild, list(buckets[timestamp])))
                    break
        else:
            path = _full_path(node, job['partition'],
                              datafile_metadata['name'],
                              job['policy'])
            for timestamp, resp in sorted(buckets.items()):
                etag = etag_buckets[timestamp]
                durable = durable_buckets.get(timestamp)
                self.logger.error(
-                    'Unable to get enough responses (%s/%s) '
+                    'Unable to get enough responses (%s/%s) to reconstruct '
-                    'to reconstruct %s frag#%s with ETag %s' % (
+                    '%s %s frag#%s with ETag %s and timestamp %s' % (
                        len(resp), job['policy'].ec_ndata,
-                        _full_path(node, job['partition'],
+                        'durable' if durable else 'non-durable',
-                                   datafile_metadata['name'],
+                        path, fi_to_rebuild, etag, timestamp.internal))
                                   job['policy']),
                        fi_to_rebuild, etag))
            if error_resp_count:
                durable = durable_buckets.get(Timestamp(
                    datafile_metadata['X-Timestamp']))
                self.logger.error(
                    'Unable to get enough responses (%s error responses) '
-                    'to reconstruct %s frag#%s' % (
+                    'to reconstruct %s %s frag#%s' % (
                        error_resp_count,
-                        _full_path(node, job['partition'],
+                        'durable' if durable else 'non-durable',
-                                   datafile_metadata['name'],
+                        path, fi_to_rebuild))
                                   job['policy']),
                        fi_to_rebuild))
            raise DiskFileError('Unable to reconstruct EC archive')
--- a/test/unit/obj/test_reconstructor.py
+++ b/test/unit/obj/test_reconstructor.py
@ -5031,16 +5031,18 @@ class TestReconstructFragmentArchive(BaseTestObjectReconstructor):
            archive_bodies = encode_frag_archive_bodies(self.policy, body)
            # pop the index to the destination node
            archive_bodies.pop(1)
-            ec_archive_dict[
+            key = (md5(body).hexdigest(), next(ts).internal, bool(i % 2))
-                (md5(body).hexdigest(), next(ts).internal)] = archive_bodies
+            ec_archive_dict[key] = archive_bodies
        responses = list()
        # fill out response list by 3 different etag bodies
-        for etag, ts in itertools.cycle(ec_archive_dict):
+        for etag, ts, durable in itertools.cycle(ec_archive_dict):
-            body = ec_archive_dict[(etag, ts)].pop(0)
+            body = ec_archive_dict[(etag, ts, durable)].pop(0)
            headers = get_header_frag_index(self, body)
            headers.update({'X-Object-Sysmeta-Ec-Etag': etag,
                            'X-Backend-Timestamp': ts})
            if durable:
                headers['X-Backend-Durable-Timestamp'] = ts
            responses.append((200, body, headers))
            if len(responses) >= (self.policy.object_ring.replicas - 1):
                break
@ -5063,7 +5065,7 @@ class TestReconstructFragmentArchive(BaseTestObjectReconstructor):
        # 1 error log per etag to report not enough responses
        self.assertEqual(3, len(error_lines))
        for error_line in error_lines:
-            for expected_etag, ts in ec_archive_dict:
+            for expected_etag, ts, durable in ec_archive_dict:
                if expected_etag in error_line:
                    break
            else:
@ -5072,13 +5074,15 @@ class TestReconstructFragmentArchive(BaseTestObjectReconstructor):
                    (list(ec_archive_dict), error_line))
            # remove the found etag which should not be found in the
            # following error lines
-            del ec_archive_dict[(expected_etag, ts)]
+            del ec_archive_dict[(expected_etag, ts, durable)]
            expected = 'Unable to get enough responses (%s/10) to ' \
-                       'reconstruct 10.0.0.1:1001/sdb/0%s policy#0 ' \
+                       'reconstruct %s 10.0.0.1:1001/sdb/0%s policy#0 ' \
-                       'frag#1 with ETag' % \
+                       'frag#1 with ETag %s and timestamp %s' % \
                       (etag_count[expected_etag],
-                        self.obj_path.decode('utf8'))
+                        'durable' if durable else 'non-durable',
                        self.obj_path.decode('utf8'),
                        expected_etag, ts)
            self.assertIn(
                expected, error_line,
                "Unexpected error line found: Expected: %s Got: %s"