proxy: don't send multi-part terminator when no parts sent

If the proxy timed out while reading a replicated policy multi-part response body, it would transform the ChunkReadTimeout to a StopIteration. This masks the fact that the backend read has terminated unexpectedly. The document_iters_to_multipart_byteranges would complete iterating over parts and send a multipart terminator line, even though no parts may have been sent. This patch removes the conversion of ChunkReadTmeout to StopIteration. The ChunkReadTimeout that is now raised prevents the document_iters_to_multipart_byteranges 'for' loop completing and therefore stops the multi-part terminator line being sent. It is raised from the GetOrHeadHandler similar to other scenarios that raise ChunkReadTimeouts while the resp body is being read. A ChunkReadTimeout exception handler is removed in the _iter_parts_from_response method. This handler was previously never reached (because StopIteration rather than ChunkReadTimeout was raised from _get_next_response_part), but if it were reached (i.e. with this change) then it would repeat logging of the error and repeat incrementing the node's error counter. This change in the GetOrHeadHandler mimics a similar change in the ECFragGetter [1]. [1] Related-Chage: I0654815543be3df059eb2875d9b3669dbd97f5b4 Co-Authored-By: Tim Burke <tim.burke@gmail.com> Change-Id: I6dd53e239f5e7eefcf1c74229a19b1df1c989b4a
2023-10-06 14:53:29 +01:00 · 2023-10-06 14:53:29 +01:00 · dc3eda7e89
commit dc3eda7e89
parent a16e1f55a7
4 changed files with 134 additions and 42 deletions
--- a/swift/proxy/controllers/base.py
+++ b/swift/proxy/controllers/base.py
@ -1368,7 +1368,7 @@ class GetOrHeadHandler(GetterBase):
            except ChunkReadTimeout:
                if not self._replace_source(
                        'Trying to read object during GET (retrying)'):
-                    raise StopIteration()
+                    raise

    def _iter_bytes_from_response_part(self, part_file, nbytes):
        # yield chunks of bytes from a single response part; if an error
@ -1444,10 +1444,6 @@ class GetOrHeadHandler(GetterBase):
                if part_iter:
                    part_iter.close()

-        except ChunkReadTimeout:
-            self.app.exception_occurred(self.source.node, 'Object',
-                                        'Trying to read during GET')
-            raise
        except ChunkWriteTimeout:
            self.logger.info(
                'Client did not read from proxy within %ss',
--- a/test/unit/init.py
+++ b/test/unit/init.py
@ -1497,3 +1497,35 @@ class FakeSource(object):
    def getheaders(self):
        return [('content-length', self.getheader('content-length'))] + \
               [(k, v) for k, v in self.headers.items()]
+
+
+def get_node_error_stats(proxy_app, ring_node):
+    node_key = proxy_app.error_limiter.node_key(ring_node)
+    return proxy_app.error_limiter.stats.get(node_key) or {}
+
+
+def node_error_count(proxy_app, ring_node):
+    # Reach into the proxy's internals to get the error count for a
+    # particular node
+    return get_node_error_stats(proxy_app, ring_node).get('errors', 0)
+
+
+def node_error_counts(proxy_app, ring_nodes):
+    # Reach into the proxy's internals to get the error counts for a
+    # list of nodes
+    return sorted([get_node_error_stats(proxy_app, node).get('errors', 0)
+                   for node in ring_nodes], reverse=True)
+
+
+def node_last_error(proxy_app, ring_node):
+    # Reach into the proxy's internals to get the last error for a
+    # particular node
+    return get_node_error_stats(proxy_app, ring_node).get('last_error')
+
+
+def set_node_errors(proxy_app, ring_node, value, last_error):
+    # Set the node's error count to value
+    node_key = proxy_app.error_limiter.node_key(ring_node)
+    stats = {'errors': value,
+             'last_error': last_error}
+    proxy_app.error_limiter.stats[node_key] = stats
--- a/test/unit/proxy/controllers/test_obj.py
+++ b/test/unit/proxy/controllers/test_obj.py
@ -56,8 +56,8 @@ from test.unit import (
    FakeRing, fake_http_connect, patch_policies, SlowBody, FakeStatus,
    DEFAULT_TEST_EC_TYPE, encode_frag_archive_bodies, make_ec_object_stub,
    fake_ec_node_response, StubResponse, mocked_http_conn,
-    quiet_eventlet_exceptions, FakeSource, make_timestamp_iter, FakeMemcache)
-from test.unit.proxy.test_server import node_error_count
+    quiet_eventlet_exceptions, FakeSource, make_timestamp_iter, FakeMemcache,
+    node_error_count, node_error_counts)


 def unchunk_body(chunked_body):
@ -1773,13 +1773,15 @@ class TestReplicatedObjController(CommonObjectControllerMixin,

        req = swob.Request.blank('/v1/a/c/o', headers={
            'Range': 'bytes=0-49,100-104'})
-        with capture_http_requests(get_response) as log:
+        with capture_http_requests(get_response) as captured_requests:
            resp = req.get_response(self.app)
            self.assertEqual(resp.status_int, 206)
            actual_body = resp.body

        self.assertEqual(resp.status_int, 206)
-        self.assertEqual(2, len(log))
+        self.assertEqual(2, len(captured_requests))
+        self.assertEqual([1] + [0] * (self.replicas() - 1),
+                         node_error_counts(self.app, self.obj_ring.devs))
        # note: client response uses boundary from first backend response
        self.assertEqual(resp_body1, actual_body)
        error_lines = self.app.logger.get_lines_for_level('error')
@ -1815,7 +1817,6 @@ class TestReplicatedObjController(CommonObjectControllerMixin,

    def test_GET_with_multirange_slow_body_unable_to_resume(self):
        self.app.recoverable_node_timeout = 0.01
-        self.app.object_chunk_size = 10
        obj_data = b'testing' * 100
        etag = md5(obj_data, usedforsecurity=False).hexdigest()
        boundary = b'81eb9c110b32ced5fe'
@ -1851,15 +1852,18 @@ class TestReplicatedObjController(CommonObjectControllerMixin,

        req = swob.Request.blank('/v1/a/c/o', headers={
            'Range': 'bytes=0-49,100-104'})
+        response_chunks = []
        with capture_http_requests(get_response) as log:
            resp = req.get_response(self.app)
+            with self.assertRaises(ChunkReadTimeout):
+                # note: the error is raised while the resp_iter is read...
+                for chunk in resp.app_iter:
+                    response_chunks.append(chunk)
+        self.assertEqual(response_chunks, [])
        self.assertEqual(resp.status_int, 206)
-            actual_body = resp.body
-
-        self.assertEqual(resp.status_int, 206)
+        self.assertEqual([1, 1, 1],
+                         node_error_counts(self.app, self.obj_ring.devs))
        self.assertEqual(6, len(log))
-        resp_boundary = resp.headers['content-type'].rsplit('=', 1)[1].encode()
-        self.assertEqual(b'--%s--' % resp_boundary, actual_body)
        error_lines = self.app.logger.get_lines_for_level('error')
        self.assertEqual(3, len(error_lines))
        for line in error_lines:
@ -1895,6 +1899,8 @@ class TestReplicatedObjController(CommonObjectControllerMixin,
                _ = resp.body
        self.assertEqual(resp.status_int, 200)
        self.assertEqual(etag, resp.headers.get('ETag'))
+        self.assertEqual([1] * self.replicas(),
+                         node_error_counts(self.app, self.obj_ring.devs))

        error_lines = self.app.logger.get_lines_for_level('error')
        self.assertEqual(3, len(error_lines))
@ -2024,6 +2030,8 @@ class TestReplicatedObjController(CommonObjectControllerMixin,
                         log.requests[2]['headers']['Range'])
        self.assertNotIn('X-Backend-Ignore-Range-If-Metadata-Present',
                         log.requests[2]['headers'])
+        self.assertEqual([1, 1] + [0] * (self.replicas() - 2),
+                         node_error_counts(self.app, self.obj_ring.devs))

    def test_GET_transfer_encoding_chunked(self):
        req = swift.common.swob.Request.blank('/v1/a/c/o')
@ -2088,6 +2096,8 @@ class TestReplicatedObjController(CommonObjectControllerMixin,
        with set_http_connect(*codes):
            resp = req.get_response(self.app)
        self.assertEqual(resp.status_int, 503)
+        self.assertEqual([1] * self.replicas(),
+                         node_error_counts(self.app, self.obj_ring.devs))

    def test_HEAD_error_limit_supression_count(self):
        def do_test(primary_codes, expected, clear_stats=True):
@ -4873,7 +4883,8 @@ class TestECObjController(ECObjectControllerMixin, unittest.TestCase):
            'Range': 'bytes=1000-2000,14000-15000'})
        with capture_http_requests(get_response) as log:
            resp = req.get_response(self.app)
-            _ = resp.body
+            # note: the error is raised before the resp_iter is read
+            self.assertIn(b'Internal Error', resp.body)
        self.assertEqual(resp.status_int, 500)
        self.assertEqual(len(log), self.policy.ec_n_unique_fragments * 2)
        log_lines = self.app.logger.get_lines_for_level('error')
@ -4886,6 +4897,84 @@ class TestECObjController(ECObjectControllerMixin, unittest.TestCase):
        self.assertIn('Unhandled exception in request: ChunkReadTimeout',
                      log_lines[2])

+    def test_GET_with_multirange_unable_to_resume_body_started(self):
+        self.app.object_chunk_size = 256
+        self.app.recoverable_node_timeout = 0.01
+        test_body = b'test' * self.policy.ec_segment_size
+        ec_stub = make_ec_object_stub(test_body, self.policy, None)
+        frag_archives = ec_stub['frags']
+        self.assertEqual(len(frag_archives[0]), 1960)
+        boundary = b'81eb9c110b32ced5fe'
+
+        def make_mime_body(frag_archive):
+            return b'\r\n'.join([
+                b'--' + boundary,
+                b'Content-Type: application/octet-stream',
+                b'Content-Range: bytes 0-489/1960',
+                b'',
+                frag_archive[0:490],
+                b'--' + boundary,
+                b'Content-Type: application/octet-stream',
+                b'Content-Range: bytes 1470-1959/1960',
+                b'',
+                frag_archive[1470:],
+                b'--' + boundary + b'--',
+            ])
+
+        obj_resp_bodies = [make_mime_body(fa) for fa
+                           # no extra good responses
+                           in ec_stub['frags'][:self.policy.ec_ndata]]
+
+        headers = {
+            'Content-Type': b'multipart/byteranges;boundary=' + boundary,
+            'Content-Length': len(obj_resp_bodies[0]),
+            'X-Object-Sysmeta-Ec-Content-Length': len(ec_stub['body']),
+            'X-Object-Sysmeta-Ec-Etag': ec_stub['etag'],
+            'X-Timestamp': Timestamp(self.ts()).normal,
+        }
+
+        responses = [
+            StubResponse(206, body, headers, i,
+                         # make the first one slow
+                         slowdown=0.1 if i == 0 else None)
+            for i, body in enumerate(obj_resp_bodies)
+        ]
+        # the first response serves some bytes before slowing down
+        responses[0].slowdown_after = 1000
+
+        def get_response(req):
+            return responses.pop(0) if responses else StubResponse(404)
+
+        req = swob.Request.blank('/v1/a/c/o', headers={
+            'Range': 'bytes=1000-2000,14000-15000'})
+        response_chunks = []
+        with capture_http_requests(get_response) as log:
+            resp = req.get_response(self.app)
+            with self.assertRaises(ChunkReadTimeout):
+                # note: the error is raised while the resp_iter is read
+                for chunk in resp.app_iter:
+                    response_chunks.append(chunk)
+        boundary = resp.headers['Content-Type'].split('=', 1)[1]
+        self.assertEqual(response_chunks, [
+            b'\r\n'.join([
+                b'--' + boundary.encode('ascii'),
+                b'Content-Type: application/octet-stream',
+                b'Content-Range: bytes 1000-2000/16384',
+                b'',
+                b'',
+            ]),
+            test_body[0:1001],
+            b'\r\n',
+        ])
+        self.assertEqual(resp.status_int, 206)
+        self.assertEqual(len(log), self.policy.ec_n_unique_fragments * 2)
+        log_lines = self.app.logger.get_lines_for_level('error')
+        self.assertEqual(2, len(log_lines), log_lines)
+        self.assertIn('Trying to read next part of EC multi-part GET',
+                      log_lines[0])
+        self.assertIn('Trying to read during GET: ChunkReadTimeout',
+                      log_lines[1])
+
    def test_GET_with_multirange_short_resume_body(self):
        self.app.object_chunk_size = 256
        self.app.recoverable_node_timeout = 0.01
--- a/test/unit/proxy/test_server.py
+++ b/test/unit/proxy/test_server.py
@ -58,7 +58,7 @@ from test.unit import (
    connect_tcp, readuntil2crlfs, fake_http_connect, FakeRing,
    FakeMemcache, patch_policies, write_fake_ring, mocked_http_conn,
    DEFAULT_TEST_EC_TYPE, make_timestamp_iter, skip_if_no_xattrs,
-    FakeHTTPResponse)
+    FakeHTTPResponse, node_error_count, node_last_error, set_node_errors)
 from test.unit.helpers import setup_servers, teardown_servers
 from swift.proxy import server as proxy_server
 from swift.proxy.controllers.obj import ReplicatedObjectController
@ -154,31 +154,6 @@ def parse_headers_string(headers_str):
    return headers_dict


-def get_node_error_stats(proxy_app, ring_node):
-    node_key = proxy_app.error_limiter.node_key(ring_node)
-    return proxy_app.error_limiter.stats.get(node_key) or {}
-
-
-def node_error_count(proxy_app, ring_node):
-    # Reach into the proxy's internals to get the error count for a
-    # particular node
-    return get_node_error_stats(proxy_app, ring_node).get('errors', 0)
-
-
-def node_last_error(proxy_app, ring_node):
-    # Reach into the proxy's internals to get the last error for a
-    # particular node
-    return get_node_error_stats(proxy_app, ring_node).get('last_error')
-
-
-def set_node_errors(proxy_app, ring_node, value, last_error):
-    # Set the node's error count to value
-    node_key = proxy_app.error_limiter.node_key(ring_node)
-    stats = {'errors': value,
-             'last_error': last_error}
-    proxy_app.error_limiter.stats[node_key] = stats
-
-
@contextmanager
 def save_globals():
    orig_http_connect = getattr(swift.proxy.controllers.base, 'http_connect',