slo: Better handle non-manifest responses when refetching manifest

Previously, we never checked whether the response we get when refetching
is even successful, much less whether it's still coming from an SLO.

Now, if the refetched data is newer, act on it. If it's older, 503.

Closes-Bug: #1837270
Change-Id: I106b94c77da220c762869aa800c31b87c3dffeeb
This commit is contained in:
Tim Burke 2019-07-19 21:24:47 -07:00
parent 2926fc96a3
commit ef5a37c2bf
2 changed files with 196 additions and 5 deletions

View File

@ -330,12 +330,14 @@ from swift.common.middleware.listing_formats import \
from swift.common.swob import Request, HTTPBadRequest, HTTPServerError, \
HTTPMethodNotAllowed, HTTPRequestEntityTooLarge, HTTPLengthRequired, \
HTTPOk, HTTPPreconditionFailed, HTTPException, HTTPNotFound, \
HTTPUnauthorized, HTTPConflict, HTTPUnprocessableEntity, Response, Range, \
HTTPUnauthorized, HTTPConflict, HTTPUnprocessableEntity, \
HTTPServiceUnavailable, Response, Range, \
RESPONSE_REASONS, str_to_wsgi, wsgi_to_str, wsgi_quote
from swift.common.utils import get_logger, config_true_value, \
get_valid_utf8_str, override_bytes_from_content_type, split_path, \
register_swift_info, RateLimitedIterator, quote, close_if_possible, \
closing_if_possible, LRUCache, StreamingPile, strict_b64decode
closing_if_possible, LRUCache, StreamingPile, strict_b64decode, \
Timestamp
from swift.common.request_helpers import SegmentedIterable, \
get_sys_meta_prefix, update_etag_is_at_header, resolve_etag_is_at_header
from swift.common.constraints import check_utf8, MAX_BUFFERED_SLO_SEGMENTS
@ -734,7 +736,7 @@ class SloGetContext(WSGIContext):
content_range = value
break
# e.g. Content-Range: bytes 0-14289/14290
match = re.match('bytes (\d+)-(\d+)/(\d+)$', content_range)
match = re.match(r'bytes (\d+)-(\d+)/(\d+)$', content_range)
if not match:
# Malformed or missing, so we don't know what we got.
return True
@ -767,7 +769,7 @@ class SloGetContext(WSGIContext):
resp_iter = self._app_call(req.environ)
# make sure this response is for a static large object manifest
slo_marker = slo_etag = slo_size = None
slo_marker = slo_etag = slo_size = slo_timestamp = None
for header, value in self._response_headers:
header = header.lower()
if header == SYSMETA_SLO_ETAG:
@ -777,8 +779,10 @@ class SloGetContext(WSGIContext):
elif (header == 'x-static-large-object' and
config_true_value(value)):
slo_marker = value
elif header == 'x-backend-timestamp':
slo_timestamp = value
if slo_marker and slo_etag and slo_size:
if slo_marker and slo_etag and slo_size and slo_timestamp:
break
if not slo_marker:
@ -836,6 +840,35 @@ class SloGetContext(WSGIContext):
headers={'x-auth-token': req.headers.get('x-auth-token')},
agent='%(orig)s SLO MultipartGET', swift_source='SLO')
resp_iter = self._app_call(get_req.environ)
slo_marker = config_true_value(self._response_header_value(
'x-static-large-object'))
if not slo_marker: # will also catch non-2xx responses
got_timestamp = self._response_header_value(
'x-backend-timestamp') or '0'
if Timestamp(got_timestamp) >= Timestamp(slo_timestamp):
# We've got a newer response available, so serve that.
# Note that if there's data, it's going to be a 200 now,
# not a 206, and we're not going to drop bytes in the
# proxy on the client's behalf. Fortunately, the RFC is
# pretty forgiving for a server; there's no guarantee that
# a Range header will be respected.
resp = Response(
status=self._response_status,
headers=self._response_headers,
app_iter=resp_iter,
request=req,
conditional_etag=resolve_etag_is_at_header(
req, self._response_headers),
conditional_response=is_success(
int(self._response_status[:3])))
return resp(req.environ, start_response)
else:
# We saw newer data that indicated it's an SLO, but
# couldn't fetch the whole thing; 503 seems reasonable?
close_if_possible(resp_iter)
raise HTTPServiceUnavailable(request=req)
# NB: we might have gotten an out-of-date manifest -- that's OK;
# we'll just try to serve the old data
# Any Content-Range from a manifest is almost certainly wrong for the
# full large object.

View File

@ -2402,6 +2402,164 @@ class TestSloGetManifest(SloTestCase):
('GET',
'/v1/AUTH_test/gettest/big_seg?multipart-manifest=get')])
def test_range_get_beyond_manifest_refetch_fails(self):
big = 'e' * 1024 * 1024
big_etag = md5hex(big)
big_manifest = json.dumps(
[{'name': '/gettest/big_seg', 'hash': big_etag,
'bytes': 1024 * 1024, 'content_type': 'application/foo'}])
self.app.register_responses(
'GET', '/v1/AUTH_test/gettest/big_manifest',
[(swob.HTTPOk, {'Content-Type': 'application/octet-stream',
'X-Static-Large-Object': 'true',
'X-Backend-Timestamp': '1234',
'Etag': md5hex(big_manifest)},
big_manifest),
(swob.HTTPNotFound, {}, None)])
req = Request.blank(
'/v1/AUTH_test/gettest/big_manifest',
environ={'REQUEST_METHOD': 'GET'},
headers={'Range': 'bytes=100000-199999'})
status, headers, body = self.call_slo(req)
headers = HeaderKeyDict(headers)
self.assertEqual(status, '503 Service Unavailable')
self.assertNotIn('X-Static-Large-Object', headers)
self.assertEqual(self.app.calls, [
# has Range header, gets 416
('GET', '/v1/AUTH_test/gettest/big_manifest'),
# retry the first one
('GET', '/v1/AUTH_test/gettest/big_manifest'),
])
def test_range_get_beyond_manifest_refetch_finds_old(self):
big = 'e' * 1024 * 1024
big_etag = md5hex(big)
big_manifest = json.dumps(
[{'name': '/gettest/big_seg', 'hash': big_etag,
'bytes': 1024 * 1024, 'content_type': 'application/foo'}])
self.app.register_responses(
'GET', '/v1/AUTH_test/gettest/big_manifest',
[(swob.HTTPOk, {'Content-Type': 'application/octet-stream',
'X-Static-Large-Object': 'true',
'X-Backend-Timestamp': '1234',
'Etag': md5hex(big_manifest)},
big_manifest),
(swob.HTTPOk, {'X-Backend-Timestamp': '1233'}, [b'small body'])])
req = Request.blank(
'/v1/AUTH_test/gettest/big_manifest',
environ={'REQUEST_METHOD': 'GET'},
headers={'Range': 'bytes=100000-199999'})
status, headers, body = self.call_slo(req)
headers = HeaderKeyDict(headers)
self.assertEqual(status, '503 Service Unavailable')
self.assertNotIn('X-Static-Large-Object', headers)
self.assertEqual(self.app.calls, [
# has Range header, gets 416
('GET', '/v1/AUTH_test/gettest/big_manifest'),
# retry the first one
('GET', '/v1/AUTH_test/gettest/big_manifest'),
])
def test_range_get_beyond_manifest_refetch_small_non_slo(self):
big = 'e' * 1024 * 1024
big_etag = md5hex(big)
big_manifest = json.dumps(
[{'name': '/gettest/big_seg', 'hash': big_etag,
'bytes': 1024 * 1024, 'content_type': 'application/foo'}])
self.app.register_responses(
'GET', '/v1/AUTH_test/gettest/big_manifest',
[(swob.HTTPOk, {'Content-Type': 'application/octet-stream',
'X-Static-Large-Object': 'true',
'X-Backend-Timestamp': '1234',
'Etag': md5hex(big_manifest)},
big_manifest),
(swob.HTTPOk, {'X-Backend-Timestamp': '1235'}, [b'small body'])])
req = Request.blank(
'/v1/AUTH_test/gettest/big_manifest',
environ={'REQUEST_METHOD': 'GET'},
headers={'Range': 'bytes=100000-199999'})
status, headers, body = self.call_slo(req)
headers = HeaderKeyDict(headers)
self.assertEqual(status, '416 Requested Range Not Satisfiable')
self.assertNotIn('X-Static-Large-Object', headers)
self.assertEqual(self.app.calls, [
# has Range header, gets 416
('GET', '/v1/AUTH_test/gettest/big_manifest'),
# retry the first one
('GET', '/v1/AUTH_test/gettest/big_manifest'),
])
def test_range_get_beyond_manifest_refetch_big_non_slo(self):
big = 'e' * 1024 * 1024
big_etag = md5hex(big)
big_manifest = json.dumps(
[{'name': '/gettest/big_seg', 'hash': big_etag,
'bytes': 1024 * 1024, 'content_type': 'application/foo'}])
self.app.register_responses(
'GET', '/v1/AUTH_test/gettest/big_manifest',
[(swob.HTTPOk, {'Content-Type': 'application/octet-stream',
'X-Static-Large-Object': 'true',
'X-Backend-Timestamp': '1234',
'Etag': md5hex(big_manifest)},
big_manifest),
(swob.HTTPOk, {'X-Backend-Timestamp': '1235'},
[b'x' * 1024 * 1024])])
req = Request.blank(
'/v1/AUTH_test/gettest/big_manifest',
environ={'REQUEST_METHOD': 'GET'},
headers={'Range': 'bytes=100000-199999'})
status, headers, body = self.call_slo(req)
headers = HeaderKeyDict(headers)
self.assertEqual(status, '200 OK') # NOT 416 or 206!
self.assertNotIn('X-Static-Large-Object', headers)
self.assertEqual(len(body), 1024 * 1024)
self.assertEqual(body, b'x' * 1024 * 1024)
self.assertEqual(self.app.calls, [
# has Range header, gets 416
('GET', '/v1/AUTH_test/gettest/big_manifest'),
# retry the first one
('GET', '/v1/AUTH_test/gettest/big_manifest'),
])
def test_range_get_beyond_manifest_refetch_tombstone(self):
big = 'e' * 1024 * 1024
big_etag = md5hex(big)
big_manifest = json.dumps(
[{'name': '/gettest/big_seg', 'hash': big_etag,
'bytes': 1024 * 1024, 'content_type': 'application/foo'}])
self.app.register_responses(
'GET', '/v1/AUTH_test/gettest/big_manifest',
[(swob.HTTPOk, {'Content-Type': 'application/octet-stream',
'X-Static-Large-Object': 'true',
'X-Backend-Timestamp': '1234',
'Etag': md5hex(big_manifest)},
big_manifest),
(swob.HTTPNotFound, {'X-Backend-Timestamp': '1345'}, None)])
req = Request.blank(
'/v1/AUTH_test/gettest/big_manifest',
environ={'REQUEST_METHOD': 'GET'},
headers={'Range': 'bytes=100000-199999'})
status, headers, body = self.call_slo(req)
headers = HeaderKeyDict(headers)
self.assertEqual(status, '404 Not Found')
self.assertNotIn('X-Static-Large-Object', headers)
self.assertEqual(self.app.calls, [
# has Range header, gets 416
('GET', '/v1/AUTH_test/gettest/big_manifest'),
# retry the first one
('GET', '/v1/AUTH_test/gettest/big_manifest'),
])
def test_range_get_bogus_content_range(self):
# Just a little paranoia; Swift currently sends back valid
# Content-Range headers, but if somehow someone sneaks an invalid one