From 25463b253f8d5dd021042a4b2fa7c4f1d359ae80 Mon Sep 17 00:00:00 2001 From: Tim Burke Date: Mon, 11 Apr 2022 11:40:33 -0700 Subject: [PATCH] sharding: Skip shards that can't include any new subdir entries Change-Id: I08cc2c0bfe803e3cec1e6ada10af4d725359e5e8 --- swift/proxy/controllers/container.py | 21 +++- test/unit/proxy/controllers/test_container.py | 101 ++++++++++++++++-- 2 files changed, 109 insertions(+), 13 deletions(-) diff --git a/swift/proxy/controllers/container.py b/swift/proxy/controllers/container.py index 4ac00d0106..2cae042c3e 100644 --- a/swift/proxy/controllers/container.py +++ b/swift/proxy/controllers/container.py @@ -31,7 +31,7 @@ from swift.proxy.controllers.base import Controller, delay_denial, \ get_cache_key, headers_from_container_info, update_headers from swift.common.storage_policy import POLICIES from swift.common.swob import HTTPBadRequest, HTTPForbidden, HTTPNotFound, \ - HTTPServiceUnavailable, str_to_wsgi, wsgi_to_str, bytes_to_wsgi, Response + HTTPServiceUnavailable, str_to_wsgi, wsgi_to_str, Response class ContainerController(Controller): @@ -395,10 +395,18 @@ class ContainerController(Controller): # is empty then the original request marker, if any, is used. This # allows misplaced objects below the expected shard range to be # included in the listing. + last_name = '' + last_name_was_subdir = False if objects: - last_name = objects[-1].get('name', - objects[-1].get('subdir', u'')) - params['marker'] = bytes_to_wsgi(last_name.encode('utf-8')) + last_name_was_subdir = 'subdir' in objects[-1] + if last_name_was_subdir: + last_name = objects[-1]['subdir'] + else: + last_name = objects[-1]['name'] + + if six.PY2: + last_name = last_name.encode('utf8') + params['marker'] = str_to_wsgi(last_name) elif marker: params['marker'] = str_to_wsgi(marker) else: @@ -433,6 +441,11 @@ class ContainerController(Controller): if just_past < shard_range: continue + if last_name_was_subdir and str( + shard_range.lower if reverse else shard_range.upper + ).startswith(last_name): + continue + self.logger.debug( 'Getting listing part %d from shard %s %s with %s', i, shard_range, shard_range.name, headers) diff --git a/test/unit/proxy/controllers/test_container.py b/test/unit/proxy/controllers/test_container.py index 27e888e1f1..87a135b877 100644 --- a/test/unit/proxy/controllers/test_container.py +++ b/test/unit/proxy/controllers/test_container.py @@ -1030,9 +1030,11 @@ class TestContainerController(TestRingBase): % ([200, 200, 503],)], errors[-1:]) def test_GET_sharded_container_with_delimiter(self): - shard_bounds = (('', 'ham'), ('ham', 'pie'), ('pie', '')) + shard_bounds = (('', 'ha/ppy'), ('ha/ppy', 'ha/ptic'), + ('ha/ptic', 'ham'), ('ham', 'pie'), ('pie', '')) shard_ranges = [ - ShardRange('.shards_a/c_%s' % upper, Timestamp.now(), lower, upper) + ShardRange('.shards_a/c_%s' % upper.replace('/', ''), + Timestamp.now(), lower, upper) for lower, upper in shard_bounds] sr_dicts = [dict(sr) for sr in shard_ranges] shard_resp_hdrs = {'X-Backend-Sharding-State': 'unsharded', @@ -1056,7 +1058,7 @@ class TestContainerController(TestRingBase): 'content_type': 'text/plain', 'deleted': 0, 'last_modified': next(self.ts_iter).isoformat} - sr_2_obj = {'name': 'pumpkin', + sr_5_obj = {'name': 'pumpkin', 'bytes': 1, 'hash': 'hash', 'content_type': 'text/plain', @@ -1068,28 +1070,109 @@ class TestContainerController(TestRingBase): (200, sr_dicts, root_shard_resp_hdrs), (200, [sr_0_obj, subdir], shard_resp_hdrs), (200, [], shard_resp_hdrs), - (200, [sr_2_obj], shard_resp_hdrs) + (200, [], shard_resp_hdrs), + (200, [sr_5_obj], shard_resp_hdrs) ] expected_requests = [ ('a/c', {'X-Backend-Record-Type': 'auto'}, dict(states='listing', delimiter='/')), # 200 (shard_ranges[0].name, {'X-Backend-Record-Type': 'auto'}, - dict(marker='', end_marker='ham\x00', limit=str(limit), + dict(marker='', end_marker='ha/ppy\x00', limit=str(limit), states='listing', delimiter='/')), # 200 - (shard_ranges[1].name, {'X-Backend-Record-Type': 'auto'}, + (shard_ranges[2].name, {'X-Backend-Record-Type': 'auto'}, + dict(marker='ha/', end_marker='ham\x00', states='listing', + limit=str(limit - 2), delimiter='/')), # 200 + (shard_ranges[3].name, {'X-Backend-Record-Type': 'auto'}, dict(marker='ha/', end_marker='pie\x00', states='listing', limit=str(limit - 2), delimiter='/')), # 200 - (shard_ranges[2].name, {'X-Backend-Record-Type': 'auto'}, + (shard_ranges[4].name, {'X-Backend-Record-Type': 'auto'}, dict(marker='ha/', end_marker='', states='listing', - limit=str(limit - 2), delimiter='/')) # 200 + limit=str(limit - 2), delimiter='/')), # 200 ] - expected_objects = [sr_0_obj, subdir, sr_2_obj] + expected_objects = [sr_0_obj, subdir, sr_5_obj] resp = self._check_GET_shard_listing( mock_responses, expected_objects, expected_requests, query_string='?delimiter=/') self.check_response(resp, root_resp_hdrs) + def test_GET_sharded_container_with_delimiter_and_reverse(self): + shard_points = ('', 'ha.d', 'ha/ppy', 'ha/ptic', 'ham', 'pie', '') + shard_bounds = tuple(zip(shard_points, shard_points[1:])) + shard_ranges = [ + ShardRange('.shards_a/c_%s' % upper.replace('/', ''), + Timestamp.now(), lower, upper) + for lower, upper in shard_bounds] + sr_dicts = [dict(sr) for sr in shard_ranges] + shard_resp_hdrs = {'X-Backend-Sharding-State': 'unsharded', + 'X-Container-Object-Count': 2, + 'X-Container-Bytes-Used': 4, + 'X-Backend-Storage-Policy-Index': 0} + + limit = CONTAINER_LISTING_LIMIT + root_resp_hdrs = {'X-Backend-Sharding-State': 'sharded', + 'X-Backend-Timestamp': '99', + # pretend root object stats are not yet updated + 'X-Container-Object-Count': 6, + 'X-Container-Bytes-Used': 12, + 'X-Backend-Storage-Policy-Index': 0} + root_shard_resp_hdrs = dict(root_resp_hdrs) + root_shard_resp_hdrs['X-Backend-Record-Type'] = 'shard' + + sr_0_obj = {'name': 'apple', + 'bytes': 1, + 'hash': 'hash', + 'content_type': 'text/plain', + 'deleted': 0, + 'last_modified': next(self.ts_iter).isoformat} + sr_1_obj = {'name': 'ha.ggle', + 'bytes': 1, + 'hash': 'hash', + 'content_type': 'text/plain', + 'deleted': 0, + 'last_modified': next(self.ts_iter).isoformat} + sr_5_obj = {'name': 'pumpkin', + 'bytes': 1, + 'hash': 'hash', + 'content_type': 'text/plain', + 'deleted': 0, + 'last_modified': next(self.ts_iter).isoformat} + subdir = {'subdir': 'ha/'} + mock_responses = [ + # status, body, headers + (200, list(reversed(sr_dicts)), root_shard_resp_hdrs), + (200, [sr_5_obj], shard_resp_hdrs), + (200, [], shard_resp_hdrs), + (200, [subdir], shard_resp_hdrs), + (200, [sr_1_obj], shard_resp_hdrs), + (200, [sr_0_obj], shard_resp_hdrs), + ] + expected_requests = [ + ('a/c', {'X-Backend-Record-Type': 'auto'}, + dict(states='listing', delimiter='/', reverse='on')), # 200 + (shard_ranges[5].name, {'X-Backend-Record-Type': 'auto'}, + dict(marker='', end_marker='pie', states='listing', + limit=str(limit), delimiter='/', reverse='on')), # 200 + (shard_ranges[4].name, {'X-Backend-Record-Type': 'auto'}, + dict(marker='pumpkin', end_marker='ham', states='listing', + limit=str(limit - 1), delimiter='/', reverse='on')), # 200 + (shard_ranges[3].name, {'X-Backend-Record-Type': 'auto'}, + dict(marker='pumpkin', end_marker='ha/ptic', states='listing', + limit=str(limit - 1), delimiter='/', reverse='on')), # 200 + (shard_ranges[1].name, {'X-Backend-Record-Type': 'auto'}, + dict(marker='ha/', end_marker='ha.d', limit=str(limit - 2), + states='listing', delimiter='/', reverse='on')), # 200 + (shard_ranges[0].name, {'X-Backend-Record-Type': 'auto'}, + dict(marker='ha.ggle', end_marker='', limit=str(limit - 3), + states='listing', delimiter='/', reverse='on')), # 200 + ] + + expected_objects = [sr_5_obj, subdir, sr_1_obj, sr_0_obj] + resp = self._check_GET_shard_listing( + mock_responses, expected_objects, expected_requests, + query_string='?delimiter=/&reverse=on', reverse=True) + self.check_response(resp, root_resp_hdrs) + def test_GET_sharded_container_shard_redirects_to_root(self): # check that if the root redirects listing to a shard, but the shard # returns the root shard (e.g. it was the final shard to shrink into