Add warn level logging if we hit un-recoverable EC rebuild
Currently there is an 'all(fragments)' check in the ECAppIter to know when the iter is done. It assumes that all the fragments will be emtpy when they are done. But if this is not the case we should log. We have no idea if how often this can happen but currently we don't log it so have no idea. But it could be a cause of some EC short read 500 that clusters get from time to time. Here is an example of the message: May 26 23:42:57 saio proxy-server[7942]: Un-recoverable fragment rebuild. Only received 3/4 fragments for '/a/c/o' (txn: txb43f239d78834bdabcaa6-0060aedd01) (client_ip: 127.0.0.1) Change-Id: If68e72d613685081a3149aedbfd07a69b8669d7f
This commit is contained in:
parent
b3def185c6
commit
09ead69096
@ -1438,7 +1438,13 @@ class ECAppIter(object):
|
||||
# with an un-reconstructible list of fragments - so we'll
|
||||
# break out of the iter so WSGI can tear down the broken
|
||||
# connection.
|
||||
if not all(fragments):
|
||||
frags_with_data = sum([1 for f in fragments if f])
|
||||
if frags_with_data < len(fragments):
|
||||
if frags_with_data > 0:
|
||||
self.logger.warning(
|
||||
'Un-recoverable fragment rebuild. Only received '
|
||||
'%d/%d fragments for %r', frags_with_data,
|
||||
len(fragments), quote(self.path))
|
||||
break
|
||||
try:
|
||||
segment = self.policy.pyeclib_driver.decode(fragments)
|
||||
|
@ -4920,9 +4920,9 @@ class TestECObjController(ECObjectControllerMixin, unittest.TestCase):
|
||||
# re-raise the ChunkReadTimeout and still want a txn-id
|
||||
with set_http_connect(*status_codes, body_iter=body_iter,
|
||||
headers=headers), \
|
||||
mock.patch(
|
||||
'swift.proxy.controllers.obj.ECFragGetter.fast_forward',
|
||||
side_effect=ValueError()):
|
||||
mock.patch(
|
||||
'swift.proxy.controllers.obj.ECFragGetter.fast_forward',
|
||||
side_effect=ValueError()):
|
||||
resp = req.get_response(self.app)
|
||||
self.assertEqual(resp.status_int, 200)
|
||||
self.assertNotEqual(md5(resp.body).hexdigest(), etag)
|
||||
@ -4930,7 +4930,13 @@ class TestECObjController(ECObjectControllerMixin, unittest.TestCase):
|
||||
self.assertEqual(2, len(error_lines))
|
||||
self.assertIn('Unable to fast forward', error_lines[0])
|
||||
self.assertIn('Timeout fetching', error_lines[1])
|
||||
for line in self.logger.logger.records['ERROR']:
|
||||
warning_lines = self.logger.get_lines_for_level('warning')
|
||||
self.assertEqual(1, len(warning_lines))
|
||||
self.assertIn(
|
||||
'Un-recoverable fragment rebuild. Only received 9/10 fragments',
|
||||
warning_lines[0])
|
||||
for line in self.logger.logger.records['ERROR'] + \
|
||||
self.logger.logger.records['WARNING']:
|
||||
self.assertIn(req.headers['x-trans-id'], line)
|
||||
|
||||
def test_GET_read_timeout_resume_mixed_etag(self):
|
||||
|
Loading…
x
Reference in New Issue
Block a user