From ff832955fc8e0ac746a05c78c0140336707699c7 Mon Sep 17 00:00:00 2001 From: Alistair Coles Date: Fri, 18 May 2018 14:45:23 +0100 Subject: [PATCH] Improve building listings from shards When building a listing from shard containers, objects fetched from each shard range are appended to the existing listing provided their name is greater than the last entry in the current listing and less than or equal to the fetched shard range. This allows misplaced objects below the shard range to possibly be included in the listing in correct name order. Previously that behaviour only occurred if the existing listing had entries, but now it occurs even if no objects have yet been found. Change-Id: I25cab53b9aa2252c98ebcf70aafb9d39887a11f1 --- swift/proxy/controllers/container.py | 28 +++-- test/unit/proxy/controllers/test_container.py | 109 +++++++++++++++--- 2 files changed, 108 insertions(+), 29 deletions(-) diff --git a/swift/proxy/controllers/container.py b/swift/proxy/controllers/container.py index e90632a294..fbe3567515 100644 --- a/swift/proxy/controllers/container.py +++ b/swift/proxy/controllers/container.py @@ -168,30 +168,28 @@ class ContainerController(Controller): for shard_range in shard_ranges: params['limit'] = limit # Always set marker to ensure that object names less than or equal - # to those already in the listing are not fetched + # to those already in the listing are not fetched; if the listing + # is empty then the original request marker, if any, is used. This + # allows misplaced objects below the expected shard range to be + # included in the listing. if objects: last_name = objects[-1].get('name', objects[-1].get('subdir', u'')) params['marker'] = last_name.encode('utf-8') - elif reverse and marker and marker > shard_range.lower: - params['marker'] = marker - elif marker and marker <= shard_range.upper: + elif marker: params['marker'] = marker else: - params['marker'] = shard_range.upper_str if reverse \ - else shard_range.lower_str - if params['marker'] and reverse: - params['marker'] += '\x00' - - # Always set end_marker to ensure that misplaced objects beyond - # the expected shard range are not fetched + params['marker'] = '' + # Always set end_marker to ensure that misplaced objects beyond the + # expected shard range are not fetched. This prevents a misplaced + # object obscuring correctly placed objects in the next shard + # range. if end_marker and end_marker in shard_range: params['end_marker'] = end_marker + elif reverse: + params['end_marker'] = shard_range.lower_str else: - params['end_marker'] = shard_range.lower_str if reverse \ - else shard_range.upper_str - if params['end_marker'] and not reverse: - params['end_marker'] += '\x00' + params['end_marker'] = shard_range.end_marker if (shard_range.account == self.account_name and shard_range.container == self.container_name): diff --git a/test/unit/proxy/controllers/test_container.py b/test/unit/proxy/controllers/test_container.py index ae44f8b001..a73f2c3128 100644 --- a/test/unit/proxy/controllers/test_container.py +++ b/test/unit/proxy/controllers/test_container.py @@ -992,14 +992,12 @@ class TestContainerController(TestRingBase): def test_GET_sharded_container_empty_shard(self): # verify ordered listing when a shard is empty - shard_bounds = (('', 'ham'), ('ham', 'pie'), ('lemon', '')) + shard_bounds = (('', 'ham'), ('ham', 'pie'), ('pie', '')) shard_ranges = [ ShardRange('.shards_a/c_%s' % upper, Timestamp.now(), lower, upper) for lower, upper in shard_bounds] sr_dicts = [dict(sr) for sr in shard_ranges] sr_objs = [self._make_shard_objects(sr) for sr in shard_ranges] - # empty second shard range - sr_objs[1] = [] shard_resp_hdrs = [ {'X-Backend-Sharding-State': 'unsharded', 'X-Container-Object-Count': len(sr_objs[i]), @@ -1008,15 +1006,97 @@ class TestContainerController(TestRingBase): 'X-Container-Meta-Flavour': 'flavour%d' % i, 'X-Backend-Storage-Policy-Index': 0} for i in range(3)] + empty_shard_resp_hdrs = { + 'X-Backend-Sharding-State': 'unsharded', + 'X-Container-Object-Count': 0, + 'X-Container-Bytes-Used': 0, + 'X-Container-Meta-Flavour': 'flavour%d' % i, + 'X-Backend-Storage-Policy-Index': 0} - all_objects = [] - for objects in sr_objs: - all_objects.extend(objects) + # empty first shard range + all_objects = sr_objs[1] + sr_objs[2] size_all_objects = sum([obj['bytes'] for obj in all_objects]) - num_all_objects = len(all_objects) - limit = CONTAINER_LISTING_LIMIT root_resp_hdrs = {'X-Backend-Sharding-State': 'sharded', - 'X-Container-Object-Count': num_all_objects, + 'X-Container-Object-Count': len(all_objects), + 'X-Container-Bytes-Used': size_all_objects, + 'X-Container-Meta-Flavour': 'peach', + 'X-Backend-Storage-Policy-Index': 0} + root_shard_resp_hdrs = dict(root_resp_hdrs) + root_shard_resp_hdrs['X-Backend-Record-Type'] = 'shard' + + mock_responses = [ + # status, body, headers + (200, sr_dicts, root_shard_resp_hdrs), + (200, [], empty_shard_resp_hdrs), + (200, sr_objs[1], shard_resp_hdrs[1]), + (200, sr_objs[2], shard_resp_hdrs[2]) + ] + # NB marker does not advance until an object is in the listing + limit = CONTAINER_LISTING_LIMIT + expected_requests = [ + # path, headers, params + ('a/c', {'X-Backend-Record-Type': 'auto'}, + dict(states='listing')), # 200 + (shard_ranges[0].name, {'X-Backend-Record-Type': 'auto'}, + dict(marker='', end_marker='ham\x00', states='listing', + limit=str(limit))), # 200 + (shard_ranges[1].name, {'X-Backend-Record-Type': 'auto'}, + dict(marker='', end_marker='pie\x00', states='listing', + limit=str(limit))), # 200 + (shard_ranges[2].name, {'X-Backend-Record-Type': 'auto'}, + dict(marker='p', end_marker='', states='listing', + limit=str(limit - len(sr_objs[1])))) # 200 + ] + + resp = self._check_GET_shard_listing( + mock_responses, sr_objs[1] + sr_objs[2], expected_requests) + self.check_response(resp, root_resp_hdrs) + + # empty last shard range, reverse + all_objects = sr_objs[0] + sr_objs[1] + size_all_objects = sum([obj['bytes'] for obj in all_objects]) + root_resp_hdrs = {'X-Backend-Sharding-State': 'sharded', + 'X-Container-Object-Count': len(all_objects), + 'X-Container-Bytes-Used': size_all_objects, + 'X-Container-Meta-Flavour': 'peach', + 'X-Backend-Storage-Policy-Index': 0} + root_shard_resp_hdrs = dict(root_resp_hdrs) + root_shard_resp_hdrs['X-Backend-Record-Type'] = 'shard' + + mock_responses = [ + # status, body, headers + (200, list(reversed(sr_dicts)), root_shard_resp_hdrs), + (200, [], empty_shard_resp_hdrs), + (200, list(reversed(sr_objs[1])), shard_resp_hdrs[1]), + (200, list(reversed(sr_objs[0])), shard_resp_hdrs[0]), + ] + limit = CONTAINER_LISTING_LIMIT + expected_requests = [ + # path, headers, params + ('a/c', {'X-Backend-Record-Type': 'auto'}, + dict(states='listing', reverse='true')), # 200 + (shard_ranges[2].name, {'X-Backend-Record-Type': 'auto'}, + dict(marker='', end_marker='pie', states='listing', + limit=str(limit), reverse='true')), # 200 + (shard_ranges[1].name, {'X-Backend-Record-Type': 'auto'}, + dict(marker='', end_marker='ham', states='listing', + limit=str(limit), reverse='true')), # 200 + (shard_ranges[0].name, {'X-Backend-Record-Type': 'auto'}, + dict(marker=sr_objs[1][0]['name'], end_marker='', + states='listing', reverse='true', + limit=str(limit - len(sr_objs[1])))) # 200 + ] + + resp = self._check_GET_shard_listing( + mock_responses, list(reversed(sr_objs[0] + sr_objs[1])), + expected_requests, query_string='?reverse=true', reverse=True) + self.check_response(resp, root_resp_hdrs) + + # empty second shard range + all_objects = sr_objs[0] + sr_objs[2] + size_all_objects = sum([obj['bytes'] for obj in all_objects]) + root_resp_hdrs = {'X-Backend-Sharding-State': 'sharded', + 'X-Container-Object-Count': len(all_objects), 'X-Container-Bytes-Used': size_all_objects, 'X-Container-Meta-Flavour': 'peach', 'X-Backend-Storage-Policy-Index': 0} @@ -1027,10 +1107,11 @@ class TestContainerController(TestRingBase): # status, body, headers (200, sr_dicts, root_shard_resp_hdrs), (200, sr_objs[0], shard_resp_hdrs[0]), - (200, sr_objs[1], shard_resp_hdrs[1]), + (200, [], empty_shard_resp_hdrs), (200, sr_objs[2], shard_resp_hdrs[2]) ] # NB marker always advances to last object name + limit = CONTAINER_LISTING_LIMIT expected_requests = [ # path, headers, params ('a/c', {'X-Backend-Record-Type': 'auto'}, @@ -1043,11 +1124,11 @@ class TestContainerController(TestRingBase): limit=str(limit - len(sr_objs[0])))), # 200 (shard_ranges[2].name, {'X-Backend-Record-Type': 'auto'}, dict(marker='h', end_marker='', states='listing', - limit=str(limit - len(sr_objs[0] + sr_objs[1])))) # 200 + limit=str(limit - len(sr_objs[0])))) # 200 ] resp = self._check_GET_shard_listing( - mock_responses, all_objects, expected_requests) + mock_responses, sr_objs[0] + sr_objs[2], expected_requests) # root object count will overridden by actual length of listing self.check_response(resp, root_resp_hdrs) @@ -1055,7 +1136,7 @@ class TestContainerController(TestRingBase): mock_responses = [ # status, body, headers (200, sr_dicts[1:], root_shard_resp_hdrs), - (200, sr_objs[1], shard_resp_hdrs[1]), + (200, [], empty_shard_resp_hdrs), (200, sr_objs[2], shard_resp_hdrs[2]) ] # NB marker unchanged when getting from third range @@ -1081,7 +1162,7 @@ class TestContainerController(TestRingBase): mock_responses = [ # status, body, headers (200, list(reversed(sr_dicts[:2])), root_shard_resp_hdrs), - (200, list(reversed(sr_objs[1])), shard_resp_hdrs[1]), + (200, [], empty_shard_resp_hdrs), (200, list(reversed(sr_objs[0])), shard_resp_hdrs[2]) ] # NB marker unchanged when getting from first range