From dc3eda7e899c8df7d70f5e881ac39c6243b79d91 Mon Sep 17 00:00:00 2001 From: Alistair Coles Date: Fri, 6 Oct 2023 14:53:29 +0100 Subject: [PATCH] proxy: don't send multi-part terminator when no parts sent If the proxy timed out while reading a replicated policy multi-part response body, it would transform the ChunkReadTimeout to a StopIteration. This masks the fact that the backend read has terminated unexpectedly. The document_iters_to_multipart_byteranges would complete iterating over parts and send a multipart terminator line, even though no parts may have been sent. This patch removes the conversion of ChunkReadTmeout to StopIteration. The ChunkReadTimeout that is now raised prevents the document_iters_to_multipart_byteranges 'for' loop completing and therefore stops the multi-part terminator line being sent. It is raised from the GetOrHeadHandler similar to other scenarios that raise ChunkReadTimeouts while the resp body is being read. A ChunkReadTimeout exception handler is removed in the _iter_parts_from_response method. This handler was previously never reached (because StopIteration rather than ChunkReadTimeout was raised from _get_next_response_part), but if it were reached (i.e. with this change) then it would repeat logging of the error and repeat incrementing the node's error counter. This change in the GetOrHeadHandler mimics a similar change in the ECFragGetter [1]. [1] Related-Chage: I0654815543be3df059eb2875d9b3669dbd97f5b4 Co-Authored-By: Tim Burke Change-Id: I6dd53e239f5e7eefcf1c74229a19b1df1c989b4a --- swift/proxy/controllers/base.py | 6 +- test/unit/__init__.py | 32 +++++++ test/unit/proxy/controllers/test_obj.py | 111 +++++++++++++++++++++--- test/unit/proxy/test_server.py | 27 +----- 4 files changed, 134 insertions(+), 42 deletions(-) diff --git a/swift/proxy/controllers/base.py b/swift/proxy/controllers/base.py index f19f160089..c202370ed2 100644 --- a/swift/proxy/controllers/base.py +++ b/swift/proxy/controllers/base.py @@ -1368,7 +1368,7 @@ class GetOrHeadHandler(GetterBase): except ChunkReadTimeout: if not self._replace_source( 'Trying to read object during GET (retrying)'): - raise StopIteration() + raise def _iter_bytes_from_response_part(self, part_file, nbytes): # yield chunks of bytes from a single response part; if an error @@ -1444,10 +1444,6 @@ class GetOrHeadHandler(GetterBase): if part_iter: part_iter.close() - except ChunkReadTimeout: - self.app.exception_occurred(self.source.node, 'Object', - 'Trying to read during GET') - raise except ChunkWriteTimeout: self.logger.info( 'Client did not read from proxy within %ss', diff --git a/test/unit/__init__.py b/test/unit/__init__.py index b51f10259a..6f958cb0d9 100644 --- a/test/unit/__init__.py +++ b/test/unit/__init__.py @@ -1497,3 +1497,35 @@ class FakeSource(object): def getheaders(self): return [('content-length', self.getheader('content-length'))] + \ [(k, v) for k, v in self.headers.items()] + + +def get_node_error_stats(proxy_app, ring_node): + node_key = proxy_app.error_limiter.node_key(ring_node) + return proxy_app.error_limiter.stats.get(node_key) or {} + + +def node_error_count(proxy_app, ring_node): + # Reach into the proxy's internals to get the error count for a + # particular node + return get_node_error_stats(proxy_app, ring_node).get('errors', 0) + + +def node_error_counts(proxy_app, ring_nodes): + # Reach into the proxy's internals to get the error counts for a + # list of nodes + return sorted([get_node_error_stats(proxy_app, node).get('errors', 0) + for node in ring_nodes], reverse=True) + + +def node_last_error(proxy_app, ring_node): + # Reach into the proxy's internals to get the last error for a + # particular node + return get_node_error_stats(proxy_app, ring_node).get('last_error') + + +def set_node_errors(proxy_app, ring_node, value, last_error): + # Set the node's error count to value + node_key = proxy_app.error_limiter.node_key(ring_node) + stats = {'errors': value, + 'last_error': last_error} + proxy_app.error_limiter.stats[node_key] = stats diff --git a/test/unit/proxy/controllers/test_obj.py b/test/unit/proxy/controllers/test_obj.py index 474fb160d9..5661332ab4 100644 --- a/test/unit/proxy/controllers/test_obj.py +++ b/test/unit/proxy/controllers/test_obj.py @@ -56,8 +56,8 @@ from test.unit import ( FakeRing, fake_http_connect, patch_policies, SlowBody, FakeStatus, DEFAULT_TEST_EC_TYPE, encode_frag_archive_bodies, make_ec_object_stub, fake_ec_node_response, StubResponse, mocked_http_conn, - quiet_eventlet_exceptions, FakeSource, make_timestamp_iter, FakeMemcache) -from test.unit.proxy.test_server import node_error_count + quiet_eventlet_exceptions, FakeSource, make_timestamp_iter, FakeMemcache, + node_error_count, node_error_counts) def unchunk_body(chunked_body): @@ -1773,13 +1773,15 @@ class TestReplicatedObjController(CommonObjectControllerMixin, req = swob.Request.blank('/v1/a/c/o', headers={ 'Range': 'bytes=0-49,100-104'}) - with capture_http_requests(get_response) as log: + with capture_http_requests(get_response) as captured_requests: resp = req.get_response(self.app) self.assertEqual(resp.status_int, 206) actual_body = resp.body self.assertEqual(resp.status_int, 206) - self.assertEqual(2, len(log)) + self.assertEqual(2, len(captured_requests)) + self.assertEqual([1] + [0] * (self.replicas() - 1), + node_error_counts(self.app, self.obj_ring.devs)) # note: client response uses boundary from first backend response self.assertEqual(resp_body1, actual_body) error_lines = self.app.logger.get_lines_for_level('error') @@ -1815,7 +1817,6 @@ class TestReplicatedObjController(CommonObjectControllerMixin, def test_GET_with_multirange_slow_body_unable_to_resume(self): self.app.recoverable_node_timeout = 0.01 - self.app.object_chunk_size = 10 obj_data = b'testing' * 100 etag = md5(obj_data, usedforsecurity=False).hexdigest() boundary = b'81eb9c110b32ced5fe' @@ -1851,15 +1852,18 @@ class TestReplicatedObjController(CommonObjectControllerMixin, req = swob.Request.blank('/v1/a/c/o', headers={ 'Range': 'bytes=0-49,100-104'}) + response_chunks = [] with capture_http_requests(get_response) as log: resp = req.get_response(self.app) - self.assertEqual(resp.status_int, 206) - actual_body = resp.body - + with self.assertRaises(ChunkReadTimeout): + # note: the error is raised while the resp_iter is read... + for chunk in resp.app_iter: + response_chunks.append(chunk) + self.assertEqual(response_chunks, []) self.assertEqual(resp.status_int, 206) + self.assertEqual([1, 1, 1], + node_error_counts(self.app, self.obj_ring.devs)) self.assertEqual(6, len(log)) - resp_boundary = resp.headers['content-type'].rsplit('=', 1)[1].encode() - self.assertEqual(b'--%s--' % resp_boundary, actual_body) error_lines = self.app.logger.get_lines_for_level('error') self.assertEqual(3, len(error_lines)) for line in error_lines: @@ -1895,6 +1899,8 @@ class TestReplicatedObjController(CommonObjectControllerMixin, _ = resp.body self.assertEqual(resp.status_int, 200) self.assertEqual(etag, resp.headers.get('ETag')) + self.assertEqual([1] * self.replicas(), + node_error_counts(self.app, self.obj_ring.devs)) error_lines = self.app.logger.get_lines_for_level('error') self.assertEqual(3, len(error_lines)) @@ -2024,6 +2030,8 @@ class TestReplicatedObjController(CommonObjectControllerMixin, log.requests[2]['headers']['Range']) self.assertNotIn('X-Backend-Ignore-Range-If-Metadata-Present', log.requests[2]['headers']) + self.assertEqual([1, 1] + [0] * (self.replicas() - 2), + node_error_counts(self.app, self.obj_ring.devs)) def test_GET_transfer_encoding_chunked(self): req = swift.common.swob.Request.blank('/v1/a/c/o') @@ -2088,6 +2096,8 @@ class TestReplicatedObjController(CommonObjectControllerMixin, with set_http_connect(*codes): resp = req.get_response(self.app) self.assertEqual(resp.status_int, 503) + self.assertEqual([1] * self.replicas(), + node_error_counts(self.app, self.obj_ring.devs)) def test_HEAD_error_limit_supression_count(self): def do_test(primary_codes, expected, clear_stats=True): @@ -4873,7 +4883,8 @@ class TestECObjController(ECObjectControllerMixin, unittest.TestCase): 'Range': 'bytes=1000-2000,14000-15000'}) with capture_http_requests(get_response) as log: resp = req.get_response(self.app) - _ = resp.body + # note: the error is raised before the resp_iter is read + self.assertIn(b'Internal Error', resp.body) self.assertEqual(resp.status_int, 500) self.assertEqual(len(log), self.policy.ec_n_unique_fragments * 2) log_lines = self.app.logger.get_lines_for_level('error') @@ -4886,6 +4897,84 @@ class TestECObjController(ECObjectControllerMixin, unittest.TestCase): self.assertIn('Unhandled exception in request: ChunkReadTimeout', log_lines[2]) + def test_GET_with_multirange_unable_to_resume_body_started(self): + self.app.object_chunk_size = 256 + self.app.recoverable_node_timeout = 0.01 + test_body = b'test' * self.policy.ec_segment_size + ec_stub = make_ec_object_stub(test_body, self.policy, None) + frag_archives = ec_stub['frags'] + self.assertEqual(len(frag_archives[0]), 1960) + boundary = b'81eb9c110b32ced5fe' + + def make_mime_body(frag_archive): + return b'\r\n'.join([ + b'--' + boundary, + b'Content-Type: application/octet-stream', + b'Content-Range: bytes 0-489/1960', + b'', + frag_archive[0:490], + b'--' + boundary, + b'Content-Type: application/octet-stream', + b'Content-Range: bytes 1470-1959/1960', + b'', + frag_archive[1470:], + b'--' + boundary + b'--', + ]) + + obj_resp_bodies = [make_mime_body(fa) for fa + # no extra good responses + in ec_stub['frags'][:self.policy.ec_ndata]] + + headers = { + 'Content-Type': b'multipart/byteranges;boundary=' + boundary, + 'Content-Length': len(obj_resp_bodies[0]), + 'X-Object-Sysmeta-Ec-Content-Length': len(ec_stub['body']), + 'X-Object-Sysmeta-Ec-Etag': ec_stub['etag'], + 'X-Timestamp': Timestamp(self.ts()).normal, + } + + responses = [ + StubResponse(206, body, headers, i, + # make the first one slow + slowdown=0.1 if i == 0 else None) + for i, body in enumerate(obj_resp_bodies) + ] + # the first response serves some bytes before slowing down + responses[0].slowdown_after = 1000 + + def get_response(req): + return responses.pop(0) if responses else StubResponse(404) + + req = swob.Request.blank('/v1/a/c/o', headers={ + 'Range': 'bytes=1000-2000,14000-15000'}) + response_chunks = [] + with capture_http_requests(get_response) as log: + resp = req.get_response(self.app) + with self.assertRaises(ChunkReadTimeout): + # note: the error is raised while the resp_iter is read + for chunk in resp.app_iter: + response_chunks.append(chunk) + boundary = resp.headers['Content-Type'].split('=', 1)[1] + self.assertEqual(response_chunks, [ + b'\r\n'.join([ + b'--' + boundary.encode('ascii'), + b'Content-Type: application/octet-stream', + b'Content-Range: bytes 1000-2000/16384', + b'', + b'', + ]), + test_body[0:1001], + b'\r\n', + ]) + self.assertEqual(resp.status_int, 206) + self.assertEqual(len(log), self.policy.ec_n_unique_fragments * 2) + log_lines = self.app.logger.get_lines_for_level('error') + self.assertEqual(2, len(log_lines), log_lines) + self.assertIn('Trying to read next part of EC multi-part GET', + log_lines[0]) + self.assertIn('Trying to read during GET: ChunkReadTimeout', + log_lines[1]) + def test_GET_with_multirange_short_resume_body(self): self.app.object_chunk_size = 256 self.app.recoverable_node_timeout = 0.01 diff --git a/test/unit/proxy/test_server.py b/test/unit/proxy/test_server.py index 4602f146c2..d3fcb961fb 100644 --- a/test/unit/proxy/test_server.py +++ b/test/unit/proxy/test_server.py @@ -58,7 +58,7 @@ from test.unit import ( connect_tcp, readuntil2crlfs, fake_http_connect, FakeRing, FakeMemcache, patch_policies, write_fake_ring, mocked_http_conn, DEFAULT_TEST_EC_TYPE, make_timestamp_iter, skip_if_no_xattrs, - FakeHTTPResponse) + FakeHTTPResponse, node_error_count, node_last_error, set_node_errors) from test.unit.helpers import setup_servers, teardown_servers from swift.proxy import server as proxy_server from swift.proxy.controllers.obj import ReplicatedObjectController @@ -154,31 +154,6 @@ def parse_headers_string(headers_str): return headers_dict -def get_node_error_stats(proxy_app, ring_node): - node_key = proxy_app.error_limiter.node_key(ring_node) - return proxy_app.error_limiter.stats.get(node_key) or {} - - -def node_error_count(proxy_app, ring_node): - # Reach into the proxy's internals to get the error count for a - # particular node - return get_node_error_stats(proxy_app, ring_node).get('errors', 0) - - -def node_last_error(proxy_app, ring_node): - # Reach into the proxy's internals to get the last error for a - # particular node - return get_node_error_stats(proxy_app, ring_node).get('last_error') - - -def set_node_errors(proxy_app, ring_node, value, last_error): - # Set the node's error count to value - node_key = proxy_app.error_limiter.node_key(ring_node) - stats = {'errors': value, - 'last_error': last_error} - proxy_app.error_limiter.stats[node_key] = stats - - @contextmanager def save_globals(): orig_http_connect = getattr(swift.proxy.controllers.base, 'http_connect',