ec: log durability of frags that fail to reconstruct

Whether the frag is durable or non-durable greatly affects how much I
care whether I can reconstruct it.

Change-Id: Ie6f46267d4bb567ecc0cc195d1fd7ce55c8cb325
This commit is contained in:
Tim Burke 2019-08-20 22:20:44 -07:00
parent f70520239c
commit ff5ea003b3
2 changed files with 32 additions and 20 deletions

View File

@ -401,6 +401,7 @@ class ObjectReconstructor(Daemon):
path, headers, full_get_path)
buckets = defaultdict(dict)
durable_buckets = {}
etag_buckets = {}
error_resp_count = 0
for resp in pile:
@ -444,6 +445,10 @@ class ObjectReconstructor(Daemon):
continue
timestamp = Timestamp(timestamp)
durable = resp.headers.get('X-Backend-Durable-Timestamp')
if durable:
durable_buckets[Timestamp(durable)] = True
etag = resp.headers.get('X-Object-Sysmeta-Ec-Etag')
if not etag:
self.logger.warning('Invalid resp from %s, frag index %s '
@ -469,26 +474,29 @@ class ObjectReconstructor(Daemon):
% (fi_to_rebuild, list(buckets[timestamp])))
break
else:
path = _full_path(node, job['partition'],
datafile_metadata['name'],
job['policy'])
for timestamp, resp in sorted(buckets.items()):
etag = etag_buckets[timestamp]
durable = durable_buckets.get(timestamp)
self.logger.error(
'Unable to get enough responses (%s/%s) '
'to reconstruct %s frag#%s with ETag %s' % (
'Unable to get enough responses (%s/%s) to reconstruct '
'%s %s frag#%s with ETag %s and timestamp %s' % (
len(resp), job['policy'].ec_ndata,
_full_path(node, job['partition'],
datafile_metadata['name'],
job['policy']),
fi_to_rebuild, etag))
'durable' if durable else 'non-durable',
path, fi_to_rebuild, etag, timestamp.internal))
if error_resp_count:
durable = durable_buckets.get(Timestamp(
datafile_metadata['X-Timestamp']))
self.logger.error(
'Unable to get enough responses (%s error responses) '
'to reconstruct %s frag#%s' % (
'to reconstruct %s %s frag#%s' % (
error_resp_count,
_full_path(node, job['partition'],
datafile_metadata['name'],
job['policy']),
fi_to_rebuild))
'durable' if durable else 'non-durable',
path, fi_to_rebuild))
raise DiskFileError('Unable to reconstruct EC archive')

View File

@ -5031,16 +5031,18 @@ class TestReconstructFragmentArchive(BaseTestObjectReconstructor):
archive_bodies = encode_frag_archive_bodies(self.policy, body)
# pop the index to the destination node
archive_bodies.pop(1)
ec_archive_dict[
(md5(body).hexdigest(), next(ts).internal)] = archive_bodies
key = (md5(body).hexdigest(), next(ts).internal, bool(i % 2))
ec_archive_dict[key] = archive_bodies
responses = list()
# fill out response list by 3 different etag bodies
for etag, ts in itertools.cycle(ec_archive_dict):
body = ec_archive_dict[(etag, ts)].pop(0)
for etag, ts, durable in itertools.cycle(ec_archive_dict):
body = ec_archive_dict[(etag, ts, durable)].pop(0)
headers = get_header_frag_index(self, body)
headers.update({'X-Object-Sysmeta-Ec-Etag': etag,
'X-Backend-Timestamp': ts})
if durable:
headers['X-Backend-Durable-Timestamp'] = ts
responses.append((200, body, headers))
if len(responses) >= (self.policy.object_ring.replicas - 1):
break
@ -5063,7 +5065,7 @@ class TestReconstructFragmentArchive(BaseTestObjectReconstructor):
# 1 error log per etag to report not enough responses
self.assertEqual(3, len(error_lines))
for error_line in error_lines:
for expected_etag, ts in ec_archive_dict:
for expected_etag, ts, durable in ec_archive_dict:
if expected_etag in error_line:
break
else:
@ -5072,13 +5074,15 @@ class TestReconstructFragmentArchive(BaseTestObjectReconstructor):
(list(ec_archive_dict), error_line))
# remove the found etag which should not be found in the
# following error lines
del ec_archive_dict[(expected_etag, ts)]
del ec_archive_dict[(expected_etag, ts, durable)]
expected = 'Unable to get enough responses (%s/10) to ' \
'reconstruct 10.0.0.1:1001/sdb/0%s policy#0 ' \
'frag#1 with ETag' % \
'reconstruct %s 10.0.0.1:1001/sdb/0%s policy#0 ' \
'frag#1 with ETag %s and timestamp %s' % \
(etag_count[expected_etag],
self.obj_path.decode('utf8'))
'durable' if durable else 'non-durable',
self.obj_path.decode('utf8'),
expected_etag, ts)
self.assertIn(
expected, error_line,
"Unexpected error line found: Expected: %s Got: %s"