proxy: don't send multi-part terminator when no parts sent

If the proxy timed out while reading a replicated policy multi-part
response body, it would transform the ChunkReadTimeout to a
StopIteration. This masks the fact that the backend read has
terminated unexpectedly. The document_iters_to_multipart_byteranges
would complete iterating over parts and send a multipart terminator
line, even though no parts may have been sent.

This patch removes the conversion of ChunkReadTmeout to StopIteration.
The ChunkReadTimeout that is now raised prevents the
document_iters_to_multipart_byteranges 'for' loop completing and
therefore stops the multi-part terminator line being sent. It is
raised from the GetOrHeadHandler similar to other scenarios that raise
ChunkReadTimeouts while the resp body is being read.

A ChunkReadTimeout exception handler is removed in the
_iter_parts_from_response method. This handler was previously never
reached (because StopIteration rather than ChunkReadTimeout was raised
from _get_next_response_part), but if it were reached (i.e. with this
change) then it would repeat logging of the error and repeat
incrementing the node's error counter.

This change in the GetOrHeadHandler mimics a similar change in the
ECFragGetter [1].

[1] Related-Chage: I0654815543be3df059eb2875d9b3669dbd97f5b4
Co-Authored-By: Tim Burke <tim.burke@gmail.com>
Change-Id: I6dd53e239f5e7eefcf1c74229a19b1df1c989b4a
This commit is contained in:
Alistair Coles 2023-10-06 14:53:29 +01:00
parent a16e1f55a7
commit dc3eda7e89
4 changed files with 134 additions and 42 deletions

View File

@ -1368,7 +1368,7 @@ class GetOrHeadHandler(GetterBase):
except ChunkReadTimeout:
if not self._replace_source(
'Trying to read object during GET (retrying)'):
raise StopIteration()
raise
def _iter_bytes_from_response_part(self, part_file, nbytes):
# yield chunks of bytes from a single response part; if an error
@ -1444,10 +1444,6 @@ class GetOrHeadHandler(GetterBase):
if part_iter:
part_iter.close()
except ChunkReadTimeout:
self.app.exception_occurred(self.source.node, 'Object',
'Trying to read during GET')
raise
except ChunkWriteTimeout:
self.logger.info(
'Client did not read from proxy within %ss',

View File

@ -1497,3 +1497,35 @@ class FakeSource(object):
def getheaders(self):
return [('content-length', self.getheader('content-length'))] + \
[(k, v) for k, v in self.headers.items()]
def get_node_error_stats(proxy_app, ring_node):
node_key = proxy_app.error_limiter.node_key(ring_node)
return proxy_app.error_limiter.stats.get(node_key) or {}
def node_error_count(proxy_app, ring_node):
# Reach into the proxy's internals to get the error count for a
# particular node
return get_node_error_stats(proxy_app, ring_node).get('errors', 0)
def node_error_counts(proxy_app, ring_nodes):
# Reach into the proxy's internals to get the error counts for a
# list of nodes
return sorted([get_node_error_stats(proxy_app, node).get('errors', 0)
for node in ring_nodes], reverse=True)
def node_last_error(proxy_app, ring_node):
# Reach into the proxy's internals to get the last error for a
# particular node
return get_node_error_stats(proxy_app, ring_node).get('last_error')
def set_node_errors(proxy_app, ring_node, value, last_error):
# Set the node's error count to value
node_key = proxy_app.error_limiter.node_key(ring_node)
stats = {'errors': value,
'last_error': last_error}
proxy_app.error_limiter.stats[node_key] = stats

View File

@ -56,8 +56,8 @@ from test.unit import (
FakeRing, fake_http_connect, patch_policies, SlowBody, FakeStatus,
DEFAULT_TEST_EC_TYPE, encode_frag_archive_bodies, make_ec_object_stub,
fake_ec_node_response, StubResponse, mocked_http_conn,
quiet_eventlet_exceptions, FakeSource, make_timestamp_iter, FakeMemcache)
from test.unit.proxy.test_server import node_error_count
quiet_eventlet_exceptions, FakeSource, make_timestamp_iter, FakeMemcache,
node_error_count, node_error_counts)
def unchunk_body(chunked_body):
@ -1773,13 +1773,15 @@ class TestReplicatedObjController(CommonObjectControllerMixin,
req = swob.Request.blank('/v1/a/c/o', headers={
'Range': 'bytes=0-49,100-104'})
with capture_http_requests(get_response) as log:
with capture_http_requests(get_response) as captured_requests:
resp = req.get_response(self.app)
self.assertEqual(resp.status_int, 206)
actual_body = resp.body
self.assertEqual(resp.status_int, 206)
self.assertEqual(2, len(log))
self.assertEqual(2, len(captured_requests))
self.assertEqual([1] + [0] * (self.replicas() - 1),
node_error_counts(self.app, self.obj_ring.devs))
# note: client response uses boundary from first backend response
self.assertEqual(resp_body1, actual_body)
error_lines = self.app.logger.get_lines_for_level('error')
@ -1815,7 +1817,6 @@ class TestReplicatedObjController(CommonObjectControllerMixin,
def test_GET_with_multirange_slow_body_unable_to_resume(self):
self.app.recoverable_node_timeout = 0.01
self.app.object_chunk_size = 10
obj_data = b'testing' * 100
etag = md5(obj_data, usedforsecurity=False).hexdigest()
boundary = b'81eb9c110b32ced5fe'
@ -1851,15 +1852,18 @@ class TestReplicatedObjController(CommonObjectControllerMixin,
req = swob.Request.blank('/v1/a/c/o', headers={
'Range': 'bytes=0-49,100-104'})
response_chunks = []
with capture_http_requests(get_response) as log:
resp = req.get_response(self.app)
self.assertEqual(resp.status_int, 206)
actual_body = resp.body
with self.assertRaises(ChunkReadTimeout):
# note: the error is raised while the resp_iter is read...
for chunk in resp.app_iter:
response_chunks.append(chunk)
self.assertEqual(response_chunks, [])
self.assertEqual(resp.status_int, 206)
self.assertEqual([1, 1, 1],
node_error_counts(self.app, self.obj_ring.devs))
self.assertEqual(6, len(log))
resp_boundary = resp.headers['content-type'].rsplit('=', 1)[1].encode()
self.assertEqual(b'--%s--' % resp_boundary, actual_body)
error_lines = self.app.logger.get_lines_for_level('error')
self.assertEqual(3, len(error_lines))
for line in error_lines:
@ -1895,6 +1899,8 @@ class TestReplicatedObjController(CommonObjectControllerMixin,
_ = resp.body
self.assertEqual(resp.status_int, 200)
self.assertEqual(etag, resp.headers.get('ETag'))
self.assertEqual([1] * self.replicas(),
node_error_counts(self.app, self.obj_ring.devs))
error_lines = self.app.logger.get_lines_for_level('error')
self.assertEqual(3, len(error_lines))
@ -2024,6 +2030,8 @@ class TestReplicatedObjController(CommonObjectControllerMixin,
log.requests[2]['headers']['Range'])
self.assertNotIn('X-Backend-Ignore-Range-If-Metadata-Present',
log.requests[2]['headers'])
self.assertEqual([1, 1] + [0] * (self.replicas() - 2),
node_error_counts(self.app, self.obj_ring.devs))
def test_GET_transfer_encoding_chunked(self):
req = swift.common.swob.Request.blank('/v1/a/c/o')
@ -2088,6 +2096,8 @@ class TestReplicatedObjController(CommonObjectControllerMixin,
with set_http_connect(*codes):
resp = req.get_response(self.app)
self.assertEqual(resp.status_int, 503)
self.assertEqual([1] * self.replicas(),
node_error_counts(self.app, self.obj_ring.devs))
def test_HEAD_error_limit_supression_count(self):
def do_test(primary_codes, expected, clear_stats=True):
@ -4873,7 +4883,8 @@ class TestECObjController(ECObjectControllerMixin, unittest.TestCase):
'Range': 'bytes=1000-2000,14000-15000'})
with capture_http_requests(get_response) as log:
resp = req.get_response(self.app)
_ = resp.body
# note: the error is raised before the resp_iter is read
self.assertIn(b'Internal Error', resp.body)
self.assertEqual(resp.status_int, 500)
self.assertEqual(len(log), self.policy.ec_n_unique_fragments * 2)
log_lines = self.app.logger.get_lines_for_level('error')
@ -4886,6 +4897,84 @@ class TestECObjController(ECObjectControllerMixin, unittest.TestCase):
self.assertIn('Unhandled exception in request: ChunkReadTimeout',
log_lines[2])
def test_GET_with_multirange_unable_to_resume_body_started(self):
self.app.object_chunk_size = 256
self.app.recoverable_node_timeout = 0.01
test_body = b'test' * self.policy.ec_segment_size
ec_stub = make_ec_object_stub(test_body, self.policy, None)
frag_archives = ec_stub['frags']
self.assertEqual(len(frag_archives[0]), 1960)
boundary = b'81eb9c110b32ced5fe'
def make_mime_body(frag_archive):
return b'\r\n'.join([
b'--' + boundary,
b'Content-Type: application/octet-stream',
b'Content-Range: bytes 0-489/1960',
b'',
frag_archive[0:490],
b'--' + boundary,
b'Content-Type: application/octet-stream',
b'Content-Range: bytes 1470-1959/1960',
b'',
frag_archive[1470:],
b'--' + boundary + b'--',
])
obj_resp_bodies = [make_mime_body(fa) for fa
# no extra good responses
in ec_stub['frags'][:self.policy.ec_ndata]]
headers = {
'Content-Type': b'multipart/byteranges;boundary=' + boundary,
'Content-Length': len(obj_resp_bodies[0]),
'X-Object-Sysmeta-Ec-Content-Length': len(ec_stub['body']),
'X-Object-Sysmeta-Ec-Etag': ec_stub['etag'],
'X-Timestamp': Timestamp(self.ts()).normal,
}
responses = [
StubResponse(206, body, headers, i,
# make the first one slow
slowdown=0.1 if i == 0 else None)
for i, body in enumerate(obj_resp_bodies)
]
# the first response serves some bytes before slowing down
responses[0].slowdown_after = 1000
def get_response(req):
return responses.pop(0) if responses else StubResponse(404)
req = swob.Request.blank('/v1/a/c/o', headers={
'Range': 'bytes=1000-2000,14000-15000'})
response_chunks = []
with capture_http_requests(get_response) as log:
resp = req.get_response(self.app)
with self.assertRaises(ChunkReadTimeout):
# note: the error is raised while the resp_iter is read
for chunk in resp.app_iter:
response_chunks.append(chunk)
boundary = resp.headers['Content-Type'].split('=', 1)[1]
self.assertEqual(response_chunks, [
b'\r\n'.join([
b'--' + boundary.encode('ascii'),
b'Content-Type: application/octet-stream',
b'Content-Range: bytes 1000-2000/16384',
b'',
b'',
]),
test_body[0:1001],
b'\r\n',
])
self.assertEqual(resp.status_int, 206)
self.assertEqual(len(log), self.policy.ec_n_unique_fragments * 2)
log_lines = self.app.logger.get_lines_for_level('error')
self.assertEqual(2, len(log_lines), log_lines)
self.assertIn('Trying to read next part of EC multi-part GET',
log_lines[0])
self.assertIn('Trying to read during GET: ChunkReadTimeout',
log_lines[1])
def test_GET_with_multirange_short_resume_body(self):
self.app.object_chunk_size = 256
self.app.recoverable_node_timeout = 0.01

View File

@ -58,7 +58,7 @@ from test.unit import (
connect_tcp, readuntil2crlfs, fake_http_connect, FakeRing,
FakeMemcache, patch_policies, write_fake_ring, mocked_http_conn,
DEFAULT_TEST_EC_TYPE, make_timestamp_iter, skip_if_no_xattrs,
FakeHTTPResponse)
FakeHTTPResponse, node_error_count, node_last_error, set_node_errors)
from test.unit.helpers import setup_servers, teardown_servers
from swift.proxy import server as proxy_server
from swift.proxy.controllers.obj import ReplicatedObjectController
@ -154,31 +154,6 @@ def parse_headers_string(headers_str):
return headers_dict
def get_node_error_stats(proxy_app, ring_node):
node_key = proxy_app.error_limiter.node_key(ring_node)
return proxy_app.error_limiter.stats.get(node_key) or {}
def node_error_count(proxy_app, ring_node):
# Reach into the proxy's internals to get the error count for a
# particular node
return get_node_error_stats(proxy_app, ring_node).get('errors', 0)
def node_last_error(proxy_app, ring_node):
# Reach into the proxy's internals to get the last error for a
# particular node
return get_node_error_stats(proxy_app, ring_node).get('last_error')
def set_node_errors(proxy_app, ring_node, value, last_error):
# Set the node's error count to value
node_key = proxy_app.error_limiter.node_key(ring_node)
stats = {'errors': value,
'last_error': last_error}
proxy_app.error_limiter.stats[node_key] = stats
@contextmanager
def save_globals():
orig_http_connect = getattr(swift.proxy.controllers.base, 'http_connect',