Improve unit tests for proxy GET ChunkReadTimeouts

Unit test changes only:

- Add tests for some resuming replicated GET scenarios.

- Add test to cover resuming GET fast_forward "failing" when range
  read is complete.

- Add test to verify different node_timeout for account and container
  vs object controller getters.

- Refactor proxy.test_server.py tests to split out different
  scenarios.

Drive-by: remove some ring device manipulation setup that's not needed.

Change-Id: I38c7fa648492c9bd2173ecf92f89e423bee4abf3
Co-Authored-By: Clay Gerrard <clay.gerrard@gmail.com>
This commit is contained in:
Alistair Coles 2023-11-02 17:32:17 +00:00
parent b1836f9368
commit a16e1f55a7
3 changed files with 251 additions and 95 deletions

View File

@ -1672,6 +1672,26 @@ class TestGetterSource(unittest.TestCase):
@patch_policies([StoragePolicy(0, 'zero', True, object_ring=FakeRing())])
class TestGetOrHeadHandler(BaseTest):
def test_init_node_timeout(self):
conf = {'node_timeout': 2, 'recoverable_node_timeout': 3}
app = proxy_server.Application(conf,
logger=self.logger,
account_ring=self.account_ring,
container_ring=self.container_ring)
req = Request.blank('/v1/a/c/o')
node_iter = Namespace(num_primary_nodes=3)
# app.recoverable_node_timeout
getter = GetOrHeadHandler(
app, req, 'Object', node_iter, None, None, {})
self.assertEqual(3, getter.node_timeout)
# app.node_timeout
getter = GetOrHeadHandler(
app, req, 'Account', node_iter, None, None, {})
self.assertEqual(2, getter.node_timeout)
getter = GetOrHeadHandler(
app, req, 'Container', node_iter, None, None, {})
self.assertEqual(2, getter.node_timeout)
def test_disconnected_logging(self):
req = Request.blank('/v1/a/c/o')
headers = {'content-type': 'text/plain'}

View File

@ -40,7 +40,7 @@ else:
import swift
from swift.common import utils, swob, exceptions
from swift.common.exceptions import ChunkWriteTimeout, ShortReadError, \
ChunkReadTimeout
ChunkReadTimeout, RangeAlreadyComplete
from swift.common.utils import Timestamp, list_from_csv, md5, FileLikeIter, \
ShardRange, Namespace, NamespaceBoundList
from swift.proxy import server as proxy_server
@ -1865,6 +1865,73 @@ class TestReplicatedObjController(CommonObjectControllerMixin,
for line in error_lines:
self.assertIn('Trying to read object during GET ', line)
def test_GET_unable_to_resume(self):
self.app.recoverable_node_timeout = 0.01
self.app.client_timeout = 0.1
self.app.object_chunk_size = 10
resp_body = b'length 8'
etag = md5(resp_body, usedforsecurity=False).hexdigest()
headers = {
'Etag': etag,
'Content-Type': b'plain/text',
'Content-Length': len(resp_body),
'X-Timestamp': Timestamp(self.ts()).normal,
}
# make all responses slow...
responses = [
StubResponse(200, resp_body, headers, slowdown=0.1),
StubResponse(200, resp_body, headers, slowdown=0.1),
StubResponse(200, resp_body, headers, slowdown=0.1),
]
def get_response(req):
return responses.pop(0) if responses else StubResponse(404)
req = swob.Request.blank('/v1/a/c/o')
with capture_http_requests(get_response):
resp = req.get_response(self.app)
with self.assertRaises(ChunkReadTimeout):
_ = resp.body
self.assertEqual(resp.status_int, 200)
self.assertEqual(etag, resp.headers.get('ETag'))
error_lines = self.app.logger.get_lines_for_level('error')
self.assertEqual(3, len(error_lines))
for line in error_lines[:3]:
self.assertIn('Trying to read object during GET', line)
def test_GET_newest_will_not_resume(self):
self.app.recoverable_node_timeout = 0.01
self.app.client_timeout = 0.1
self.app.object_chunk_size = 10
resp_body = b'length 8'
etag = md5(resp_body, usedforsecurity=False).hexdigest()
headers = {
'Etag': etag,
'Content-Type': b'plain/text',
'Content-Length': len(resp_body),
'X-Timestamp': Timestamp(self.ts()).normal,
}
# make all responses slow...
responses = [
StubResponse(200, resp_body, headers, slowdown=0.1),
]
def get_response(req):
return responses.pop(0) if responses else StubResponse(404)
req = swob.Request.blank('/v1/a/c/o', headers={'X-Newest': 'true'})
with capture_http_requests(get_response):
resp = req.get_response(self.app)
with self.assertRaises(ChunkReadTimeout):
_ = resp.body
self.assertEqual(resp.status_int, 200)
self.assertEqual(etag, resp.headers.get('ETag'))
error_lines = self.app.logger.get_lines_for_level('error')
self.assertEqual(0, len(error_lines))
def test_GET_resuming_ignores_416(self):
# verify that a resuming getter will not try to use the content of a
# 416 response (because it's etag will mismatch that from the first
@ -5511,7 +5578,7 @@ class TestECObjController(ECObjectControllerMixin, unittest.TestCase):
for line in self.logger.logger.records['ERROR']:
self.assertIn(req.headers['x-trans-id'], line)
def test_GET_read_timeout_fails(self):
def _do_test_GET_read_timeout_fast_forward_fails(self, error):
segment_size = self.policy.ec_segment_size
test_data = (b'test' * segment_size)[:-333]
etag = md5(test_data).hexdigest()
@ -5538,10 +5605,18 @@ class TestECObjController(ECObjectControllerMixin, unittest.TestCase):
headers=headers), \
mock.patch(
'swift.proxy.controllers.obj.ECFragGetter.fast_forward',
side_effect=ValueError()):
side_effect=error):
resp = req.get_response(self.app)
self.assertEqual(resp.status_int, 200)
self.assertNotEqual(md5(resp.body).hexdigest(), etag)
for line in self.logger.logger.records['ERROR'] + \
self.logger.logger.records['WARNING']:
self.assertIn(req.headers['x-trans-id'], line)
def test_GET_read_timeout_fast_forward_fails(self):
self._do_test_GET_read_timeout_fast_forward_fails(ValueError())
error_lines = self.logger.get_lines_for_level('error')
self.assertEqual(2, len(error_lines))
self.assertIn('Unable to fast forward', error_lines[0])
@ -5551,9 +5626,21 @@ class TestECObjController(ECObjectControllerMixin, unittest.TestCase):
self.assertIn(
'Un-recoverable fragment rebuild. Only received 9/10 fragments',
warning_lines[0])
for line in self.logger.logger.records['ERROR'] + \
self.logger.logger.records['WARNING']:
self.assertIn(req.headers['x-trans-id'], line)
def test_GET_read_timeout_fast_forward_range_complete(self):
self._do_test_GET_read_timeout_fast_forward_fails(
RangeAlreadyComplete())
error_lines = self.logger.get_lines_for_level('error')
self.assertEqual(0, len(error_lines))
# the test is a little bogus - presumably if the range was complete
# then the fragment would be ok to rebuild. But the test pretends range
# was complete without actually feeding the bytes to the getter...
warning_lines = self.logger.get_lines_for_level('warning')
self.assertEqual(1, len(warning_lines))
self.assertIn(
'Un-recoverable fragment rebuild. Only received 9/10 fragments',
warning_lines[0])
def test_GET_one_short_fragment_archive(self):
# verify that a warning is logged when one fragment archive returns

View File

@ -5425,99 +5425,148 @@ class TestReplicatedObjectController(
resp = req.get_response(self.app)
self.assertEqual(resp.status_int, 201)
def test_node_read_timeout(self):
def _do_test_node_read_timeout(self, slow=None, etags=None):
# this helper gets as far as making the first backend request,
# returning 200, but the response body isn't read - callers read the
# body and trigger any resuming GETs.
object_ring = self.app.get_object_ring(None)
# there are only 3 devices so no handoff requests expected
self.assertEqual(3, len(object_ring.devs))
self.app.recoverable_node_timeout = 0.2
self.logger.clear()
req = Request.blank('/v1/a/c/o', environ={'REQUEST_METHOD': 'GET'})
self.app.update_request(req)
request_log = []
# we don't use mocked_http_conn because we return before the code_iter
# is empty and would get a "left over status" AssertionError
def capture_req(ipaddr, port, device, partition, method, path,
headers=None, query_string=None):
request_log.append((method, path))
# account HEAD, container HEAD, obj GET x 3
set_http_connect(200, 200, 200, 200, 200, body=b'lalala',
slow=slow, etags=etags, give_connect=capture_req)
resp = req.get_response(self.app)
self.assertEqual(200, resp.status_int)
# at this point we've only made the request to the first object
self.assertEqual([
('HEAD', '/a'),
('HEAD', '/a/c'),
('GET', '/a/c/o'),
], request_log)
return resp, request_log
def test_node_read_timeout_retry_three_time_out(self):
with save_globals():
self.app.account_ring.get_nodes('account')
for dev in self.app.account_ring.devs:
dev['ip'] = '127.0.0.1'
dev['port'] = 1
self.app.container_ring.get_nodes('account')
for dev in self.app.container_ring.devs:
dev['ip'] = '127.0.0.1'
dev['port'] = 1
object_ring = self.app.get_object_ring(None)
object_ring.get_nodes('account')
for dev in object_ring.devs:
dev['ip'] = '127.0.0.1'
dev['port'] = 1
req = Request.blank('/v1/a/c/o', environ={'REQUEST_METHOD': 'GET'})
self.app.update_request(req)
set_http_connect(200, 200, 200, slow=0.1)
req.sent_size = 0
resp = req.get_response(self.app)
got_exc = False
try:
resp.body
except ChunkReadTimeout:
got_exc = True
self.assertFalse(got_exc)
self.app.recoverable_node_timeout = 0.1
set_http_connect(200, 200, 200, slow=1.0)
resp = req.get_response(self.app)
# all obj nodes time out
resp, req_log = self._do_test_node_read_timeout(
slow=[0.0, 0.0, 1.0, 1.0, 1.0])
with self.assertRaises(ChunkReadTimeout):
resp.body
# two nodes left to attempt resume, both timeout reading body
self.assertEqual([
('GET', '/a/c/o'),
('GET', '/a/c/o'),
], req_log[3:])
error_lines = self.logger.get_lines_for_level('error')
self.assertEqual(3, len(error_lines))
for line in error_lines[:3]:
self.assertIn('Trying to read object during GET', line)
def test_node_read_timeout_retry(self):
def test_node_read_timeout_retry_only_first_time_out(self):
with save_globals():
self.app.account_ring.get_nodes('account')
for dev in self.app.account_ring.devs:
dev['ip'] = '127.0.0.1'
dev['port'] = 1
self.app.container_ring.get_nodes('account')
for dev in self.app.container_ring.devs:
dev['ip'] = '127.0.0.1'
dev['port'] = 1
object_ring = self.app.get_object_ring(None)
object_ring.get_nodes('account')
for dev in object_ring.devs:
dev['ip'] = '127.0.0.1'
dev['port'] = 1
req = Request.blank('/v1/a/c/o', environ={
'REQUEST_METHOD': 'GET', 'swift.cache': FakeMemcache()})
self.app.update_request(req)
resp, req_log = self._do_test_node_read_timeout(
slow=[0, 0, 1.0])
self.assertEqual([], req_log[3:]) # sanity
# we get the body
self.assertEqual(resp.body, b'lalala')
# only one retry
self.assertEqual([
('GET', '/a/c/o'),
], req_log[3:])
error_lines = self.logger.get_lines_for_level('error')
self.assertEqual(1, len(error_lines))
self.assertIn('Trying to read object during GET', error_lines[0])
self.app.recoverable_node_timeout = 0.1
set_http_connect(200, 200, 200, slow=[1.0, 1.0, 1.0])
resp = req.get_response(self.app)
with self.assertRaises(ChunkReadTimeout):
resp.body
set_http_connect(200, 200, 200, body=b'lalala',
slow=[1.0, 1.0])
resp = req.get_response(self.app)
self.assertEqual(resp.body, b'lalala')
set_http_connect(200, 200, 200, body=b'lalala',
slow=[1.0, 1.0], etags=['a', 'a', 'a'])
resp = req.get_response(self.app)
self.assertEqual(resp.body, b'lalala')
set_http_connect(200, 200, 200, body=b'lalala',
slow=[1.0, 1.0], etags=['a', 'b', 'a'])
resp = req.get_response(self.app)
self.assertEqual(resp.body, b'lalala')
set_http_connect(200, 200, 200, body=b'lalala',
slow=[1.0, 1.0], etags=['a', 'b', 'b'])
resp = req.get_response(self.app)
def test_node_read_timeout_retry_two_time_out(self):
with save_globals():
resp, req_log = self._do_test_node_read_timeout(
slow=[0, 0, 1.0, 1.0])
self.assertEqual([], req_log[3:]) # sanity
# first 2 obj nodes time out
self.assertEqual(resp.body, b'lalala')
# ... so two retries
self.assertEqual([
('GET', '/a/c/o'),
('GET', '/a/c/o'),
], req_log[3:])
error_lines = self.logger.get_lines_for_level('error')
self.assertEqual(2, len(error_lines))
for line in error_lines[:2]:
self.assertIn('Trying to read object during GET', line)
def test_node_read_timeout_retry_first_two_time_out_etags_match(self):
with save_globals():
resp, req_log = self._do_test_node_read_timeout(
slow=[0, 0, 1.0, 1.0],
etags=['account', 'container', 'a', 'a', 'a'])
self.assertEqual([], req_log[3:]) # sanity
# we get the body
self.assertEqual(resp.body, b'lalala')
# this is the same as above test, but explicit etag match
self.assertEqual([
('GET', '/a/c/o'),
('GET', '/a/c/o'),
], req_log[3:])
error_lines = self.logger.get_lines_for_level('error')
self.assertEqual(2, len(error_lines))
for line in error_lines[:2]:
self.assertIn('Trying to read object during GET', line)
def test_node_read_timeout_retry_one_time_out_one_etag_mismatch(self):
with save_globals():
resp, req_log = self._do_test_node_read_timeout(
slow=[0, 0, 1.0],
etags=['account', 'container', 'a', 'b', 'a'])
self.assertEqual([], req_log[3:]) # sanity
self.assertEqual(resp.body, b'lalala')
# N.B. even if you break the proxy to return second response, this
# header was sent with the original 200 ok
self.assertEqual(resp.etag, 'a')
# but we can know we got the right response because we had to retry
# twice because of etag
self.assertEqual([
('GET', '/a/c/o'),
('GET', '/a/c/o'),
], req_log[3:])
error_lines = self.logger.get_lines_for_level('error')
# only one timeout error for the first source because the second
# source with wrong etag is not read
self.assertEqual(1, len(error_lines))
self.assertIn('Trying to read object during GET', error_lines[0])
def test_node_read_timeout_retry_one_time_out_two_etag_mismatch(self):
with save_globals():
resp, req_log = self._do_test_node_read_timeout(
slow=[0, 0, 1.0],
etags=['account', 'container', 'a', 'b', 'b'])
with self.assertRaises(ChunkReadTimeout):
resp.body
self.assertEqual([
('GET', '/a/c/o'),
('GET', '/a/c/o'),
], req_log[3:])
error_lines = self.logger.get_lines_for_level('error')
# only one timeout error for the first source because the second
# and third sources with wrong etag are not read
self.assertEqual(1, len(error_lines))
self.assertIn('Trying to read object during GET', error_lines[0])
def test_node_write_timeout(self):
with save_globals():
self.app.account_ring.get_nodes('account')
for dev in self.app.account_ring.devs:
dev['ip'] = '127.0.0.1'
dev['port'] = 1
self.app.container_ring.get_nodes('account')
for dev in self.app.container_ring.devs:
dev['ip'] = '127.0.0.1'
dev['port'] = 1
object_ring = self.app.get_object_ring(None)
object_ring.get_nodes('account')
for dev in object_ring.devs:
dev['ip'] = '127.0.0.1'
dev['port'] = 1
req = Request.blank('/v1/a/c/o',
environ={'REQUEST_METHOD': 'PUT'},
headers={'Content-Length': '4',
@ -11131,18 +11180,18 @@ class TestContainerController(unittest.TestCase):
self.assertEqual(timestamp, timestamps[0])
self.assertTrue(re.match(r'[0-9]{10}\.[0-9]{5}', timestamp))
def test_node_read_timeout_retry_to_container(self):
def test_node_read_timeout_no_retry_to_container(self):
with save_globals():
req = Request.blank('/v1/a/c', environ={'REQUEST_METHOD': 'GET'})
self.app.node_timeout = 0.1
set_http_connect(200, 200, 200, body='abcdef', slow=[1.0, 1.0])
# account HEAD, container GET
set_http_connect(200, 200, body='abcdef', slow=[0.0, 1.0])
resp = req.get_response(self.app)
got_exc = False
try:
self.assertEqual(200, resp.status_int)
with self.assertRaises(ChunkReadTimeout):
resp.body
except ChunkReadTimeout:
got_exc = True
self.assertTrue(got_exc)
error_lines = self.app.logger.get_lines_for_level('error')
self.assertEqual(0, len(error_lines))
@patch_policies([StoragePolicy(0, 'zero', True, object_ring=FakeRing())])