Merge "Proxy: restructure cached listing shard ranges"

2023-04-28 22:44:26 +00:00 · 2023-04-28 22:44:26 +00:00 · e2682f4a83
commit e2682f4a83
parent 38f1f2d33e 71d507f8e1
8 changed files with 268 additions and 167 deletions
--- a/swift/common/utils/init.py
+++ b/swift/common/utils/init.py
@ -4700,6 +4700,12 @@ class NamespaceBoundList(object):
        """
        self.bounds = [] if bounds is None else bounds

+    def __eq__(self, other):
+        # test for equality of NamespaceBoundList objects only
+        if not isinstance(other, NamespaceBoundList):
+            return False
+        return self.bounds == other.bounds
+
    @classmethod
    def parse(cls, namespaces):
        """
@ -4755,7 +4761,12 @@ class NamespaceBoundList(object):

    def get_namespace(self, item):
        """
-        Get a Namespace instance that contains ``item``.
+        Get a Namespace instance that contains ``item`` by bisecting on the
+        lower bounds directly. This function is used for performance sensitive
+        path, for example, '_get_update_shard' in proxy object controller. For
+        normal paths, convert NamespaceBoundList to a list of Namespaces, and
+        use `~swift.common.utils.find_namespace` or
+        `~swift.common.utils.filter_namespaces`.

        :param item: The item for a which a Namespace is to be found.
        :return: the Namespace that contains ``item``.
@ -4766,6 +4777,24 @@ class NamespaceBoundList(object):
                 else self.bounds[pos + 1][0])
        return Namespace(name, lower, upper)

+    def get_namespaces(self):
+        """
+        Get the contained namespaces as a list of contiguous Namespaces ordered
+        by lower bound.
+
+        :return: A list of Namespace objects which are ordered by
+            ``lower bound``.
+        """
+        if not self.bounds:
+            return []
+        namespaces = []
+        num_ns = len(self.bounds)
+        for i in range(num_ns):
+            lower, name = self.bounds[i]
+            upper = ('' if i + 1 == num_ns else self.bounds[i + 1][0])
+            namespaces.append(Namespace(name, lower, upper))
+        return namespaces
+

 class ShardName(object):
    """
@ -4950,11 +4979,11 @@ class ShardRange(Namespace):
        '_deleted', '_state', '_count', '_bytes',
        '_tombstones', '_reported')

-    def __init__(self, name, timestamp,
+    def __init__(self, name, timestamp=0,
                 lower=Namespace.MIN, upper=Namespace.MAX,
                 object_count=0, bytes_used=0, meta_timestamp=None,
                 deleted=False, state=None, state_timestamp=None, epoch=None,
-                 reported=False, tombstones=-1):
+                 reported=False, tombstones=-1, **kwargs):
        super(ShardRange, self).__init__(name=name, lower=lower, upper=upper)
        self.account = self.container = self._timestamp = \
            self._meta_timestamp = self._state_timestamp = self._epoch = None
@ -4977,7 +5006,8 @@ class ShardRange(Namespace):
    def sort_key(cls, sr):
        # defines the sort order for shard ranges
        # note if this ever changes to *not* sort by upper first then it breaks
-        # a key assumption for bisect, which is used by utils.find_shard_range
+        # a key assumption for bisect, which is used by utils.find_namespace
+        # with shard ranges.
        return sr.upper, sr.state, sr.lower, sr.name

    def is_child_of(self, parent):
@ -5533,7 +5563,7 @@ class ShardRangeList(UserList):
            containing the filtered shard ranges.
        """
        return ShardRangeList(
-            filter_shard_ranges(self, includes, marker, end_marker))
+            filter_namespaces(self, includes, marker, end_marker))

    def find_lower(self, condition):
        """
@ -5554,44 +5584,45 @@ class ShardRangeList(UserList):
        return self.upper


-def find_shard_range(item, ranges):
+def find_namespace(item, namespaces):
    """
-    Find a ShardRange in given list of ``shard_ranges`` whose namespace
+    Find a Namespace/ShardRange in given list of ``namespaces`` whose namespace
    contains ``item``.

-    :param item: The item for a which a ShardRange is to be found.
-    :param ranges: a sorted list of ShardRanges.
-    :return: the ShardRange whose namespace contains ``item``, or None if
-        no suitable range is found.
+    :param item: The item for a which a Namespace is to be found.
+    :param ranges: a sorted list of Namespaces.
+    :return: the Namespace/ShardRange whose namespace contains ``item``, or
+        None if no suitable Namespace is found.
    """
-    index = bisect.bisect_left(ranges, item)
-    if index != len(ranges) and item in ranges[index]:
-        return ranges[index]
+    index = bisect.bisect_left(namespaces, item)
+    if index != len(namespaces) and item in namespaces[index]:
+        return namespaces[index]
    return None


-def filter_shard_ranges(shard_ranges, includes, marker, end_marker):
+def filter_namespaces(namespaces, includes, marker, end_marker):
    """
-    Filter the given shard ranges to those whose namespace includes the
-    ``includes`` name or any part of the namespace between ``marker`` and
+    Filter the given Namespaces/ShardRanges to those whose namespace includes
+    the ``includes`` name or any part of the namespace between ``marker`` and
    ``end_marker``. If none of ``includes``, ``marker`` or ``end_marker`` are
-    specified then all shard ranges will be returned.
+    specified then all Namespaces will be returned.

-    :param shard_ranges: A list of :class:`~swift.common.utils.ShardRange`.
-    :param includes: a string; if not empty then only the shard range, if any,
-        whose namespace includes this string will be returned, and ``marker``
-        and ``end_marker`` will be ignored.
+    :param namespaces: A list of :class:`~swift.common.utils.Namespace` or
+        :class:`~swift.common.utils.ShardRange`.
+    :param includes: a string; if not empty then only the Namespace,
+        if any, whose namespace includes this string will be returned,
+        ``marker`` and ``end_marker`` will be ignored.
    :param marker: if specified then only shard ranges whose upper bound is
        greater than this value will be returned.
    :param end_marker: if specified then only shard ranges whose lower bound is
        less than this value will be returned.
-    :return: A filtered list of :class:`~swift.common.utils.ShardRange`.
+    :return: A filtered list of :class:`~swift.common.utils.Namespace`.
    """
    if includes:
-        shard_range = find_shard_range(includes, shard_ranges)
-        return [shard_range] if shard_range else []
+        namespace = find_namespace(includes, namespaces)
+        return [namespace] if namespace else []

-    def shard_range_filter(sr):
+    def namespace_filter(sr):
        end = start = True
        if end_marker:
            end = end_marker > sr.lower
@ -5600,13 +5631,13 @@ def filter_shard_ranges(shard_ranges, includes, marker, end_marker):
        return start and end

    if marker or end_marker:
-        return list(filter(shard_range_filter, shard_ranges))
+        return list(filter(namespace_filter, namespaces))

    if marker == Namespace.MAX or end_marker == Namespace.MIN:
-        # MIN and MAX are both Falsy so not handled by shard_range_filter
+        # MIN and MAX are both Falsy so not handled by namespace_filter
        return []

-    return shard_ranges
+    return namespaces


 def o_tmpfile_in_path_supported(dirpath):
--- a/swift/container/backend.py
+++ b/swift/container/backend.py
@ -32,7 +32,7 @@ from swift.common.utils import Timestamp, encode_timestamps, \
    decode_timestamps, extract_swift_bytes, storage_directory, hash_path, \
    ShardRange, renamer, MD5_OF_EMPTY_STRING, mkdirs, get_db_files, \
    parse_db_filename, make_db_file_path, split_path, RESERVED_BYTE, \
-    filter_shard_ranges, ShardRangeList
+    filter_namespaces, ShardRangeList
 from swift.common.db import DatabaseBroker, utf8encode, BROKER_TIMEOUT, \
    zero_like, DatabaseAlreadyExists, SQLITE_ARG_LIMIT

@ -1866,8 +1866,8 @@ class ContainerBroker(DatabaseBroker):
        if includes:
            return shard_ranges[:1] if shard_ranges else []

-        shard_ranges = filter_shard_ranges(shard_ranges, includes,
-                                           marker, end_marker)
+        shard_ranges = filter_namespaces(
+            shard_ranges, includes, marker, end_marker)

        if fill_gaps:
            own_shard_range = self.get_own_shard_range()
--- a/swift/proxy/controllers/base.py
+++ b/swift/proxy/controllers/base.py
@ -615,10 +615,7 @@ def get_cache_key(account, container=None, obj=None, shard=None):
            raise ValueError('Shard cache key requires account and container')
        if obj:
            raise ValueError('Shard cache key cannot have obj')
-        if shard == 'updating':
-            cache_key = 'shard-%s-v2/%s/%s' % (shard, account, container)
-        else:
-            cache_key = 'shard-%s/%s/%s' % (shard, account, container)
+        cache_key = 'shard-%s-v2/%s/%s' % (shard, account, container)
    elif obj:
        if not (account and container):
            raise ValueError('Object cache key requires account and container')
--- a/swift/proxy/controllers/container.py
+++ b/swift/proxy/controllers/container.py
@ -21,7 +21,8 @@ from six.moves.urllib.parse import unquote

 from swift.common.memcached import MemcacheConnectionError
 from swift.common.utils import public, private, csv_append, Timestamp, \
-    config_true_value, ShardRange, cache_from_env, filter_shard_ranges
+    config_true_value, ShardRange, cache_from_env, filter_namespaces, \
+    NamespaceBoundList
 from swift.common.constraints import check_metadata, CONTAINER_LISTING_LIMIT
 from swift.common.http import HTTP_ACCEPTED, is_success
 from swift.common.request_helpers import get_sys_meta_prefix, get_param, \
@ -109,25 +110,42 @@ class ContainerController(Controller):
            req.swift_entity_path, concurrency)
        return resp

-    def _make_shard_ranges_response_body(self, req, shard_range_dicts):
-        # filter shard ranges according to request constraints and return a
-        # serialised list of shard ranges
+    def _make_namespaces_response_body(self, req, ns_bound_list):
+        """
+        Filter namespaces according to request constraints and return a
+        serialised list of namespaces.
+
+        :param req: the request object.
+        :param ns_bound_list: an instance of
+            :class:`~swift.common.utils.NamespaceBoundList`.
+        :return: a serialised list of namespaces.
+        """
        marker = get_param(req, 'marker', '')
        end_marker = get_param(req, 'end_marker')
        includes = get_param(req, 'includes')
        reverse = config_true_value(get_param(req, 'reverse'))
        if reverse:
            marker, end_marker = end_marker, marker
-        shard_ranges = [
-            ShardRange.from_dict(shard_range)
-            for shard_range in shard_range_dicts]
-        shard_ranges = filter_shard_ranges(shard_ranges, includes, marker,
-                                           end_marker)
+        namespaces = ns_bound_list.get_namespaces()
+        namespaces = filter_namespaces(
+            namespaces, includes, marker, end_marker)
        if reverse:
-            shard_ranges.reverse()
-        return json.dumps([dict(sr) for sr in shard_ranges]).encode('ascii')
+            namespaces.reverse()
+        return json.dumps([dict(ns) for ns in namespaces]).encode('ascii')

    def _get_shard_ranges_from_cache(self, req, headers):
+        """
+        Try to fetch shard namespace data from cache and, if successful, return
+        a response. Also return the cache state.
+
+        The response body will be a list of dicts each of which describes
+        a Namespace (i.e. includes the keys ``lower``, ``upper`` and ``name``).
+
+        :param req: an instance of ``swob.Request``.
+        :param headers: Headers to be sent with request.
+        :return: a tuple comprising (an instance of ``swob.Response``or
+            ``None`` if no namespaces were found in cache, the cache state).
+        """
        infocache = req.environ.setdefault('swift.infocache', {})
        memcache = cache_from_env(req.environ, True)
        cache_key = get_cache_key(self.account_name,
@ -135,11 +153,10 @@ class ContainerController(Controller):
                                  shard='listing')

        resp_body = None
-        cached_range_dicts = infocache.get(cache_key)
-        if cached_range_dicts:
+        ns_bound_list = infocache.get(cache_key)
+        if ns_bound_list:
            cache_state = 'infocache_hit'
-            resp_body = self._make_shard_ranges_response_body(
-                req, cached_range_dicts)
+            resp_body = self._make_namespaces_response_body(req, ns_bound_list)
        elif memcache:
            skip_chance = \
                self.app.container_listing_shard_ranges_skip_cache
@ -147,12 +164,20 @@ class ContainerController(Controller):
                cache_state = 'skip'
            else:
                try:
-                    cached_range_dicts = memcache.get(
+                    cached_namespaces = memcache.get(
                        cache_key, raise_on_error=True)
-                    if cached_range_dicts:
+                    if cached_namespaces:
                        cache_state = 'hit'
-                        resp_body = self._make_shard_ranges_response_body(
-                            req, cached_range_dicts)
+                        if six.PY2:
+                            # json.loads() in memcache.get will convert json
+                            # 'string' to 'unicode' with python2, here we cast
+                            # 'unicode' back to 'str'
+                            cached_namespaces = [
+                                [lower.encode('utf-8'), name.encode('utf-8')]
+                                for lower, name in cached_namespaces]
+                        ns_bound_list = NamespaceBoundList(cached_namespaces)
+                        resp_body = self._make_namespaces_response_body(
+                            req, ns_bound_list)
                    else:
                        cache_state = 'miss'
                except MemcacheConnectionError:
@ -162,9 +187,9 @@ class ContainerController(Controller):
            resp = None
        else:
            # shard ranges can be returned from cache
-            infocache[cache_key] = tuple(cached_range_dicts)
+            infocache[cache_key] = ns_bound_list
            self.logger.debug('Found %d shards in cache for %s',
-                              len(cached_range_dicts), req.path_qs)
+                              len(ns_bound_list.bounds), req.path_qs)
            headers.update({'x-backend-record-type': 'shard',
                            'x-backend-cached-results': 'true'})
            # mimic GetOrHeadHandler.get_working_response...
@ -180,36 +205,62 @@ class ContainerController(Controller):
        return resp, cache_state

    def _store_shard_ranges_in_cache(self, req, resp):
-        # parse shard ranges returned from backend, store them in infocache and
-        # memcache, and return a list of dicts
-        cache_key = get_cache_key(self.account_name, self.container_name,
-                                  shard='listing')
+        """
+        Parse shard ranges returned from backend, store them in both infocache
+        and memcache.
+
+        :param req: the request object.
+        :param resp: the response object for the shard range listing.
+        :return: an instance of
+            :class:`~swift.common.utils.NamespaceBoundList`.
+        """
+        # Note: Any gaps in the response's shard ranges will be 'lost' as a
+        # result of compacting the list of shard ranges to a
+        # NamespaceBoundList. That is ok. When the cached NamespaceBoundList is
+        # transformed back to shard range Namespaces to perform a listing, the
+        # Namespace before each gap will have expanded to include the gap,
+        # which means that the backend GET to that shard will have an
+        # end_marker beyond that shard's upper bound, and equal to the next
+        # available shard's lower. At worst, some misplaced objects, in the gap
+        # above the shard's upper, may be included in the shard's response.
        data = self._parse_listing_response(req, resp)
        backend_shard_ranges = self._parse_shard_ranges(req, data, resp)
        if backend_shard_ranges is None:
            return None

-        cached_range_dicts = [dict(sr) for sr in backend_shard_ranges]
+        ns_bound_list = NamespaceBoundList.parse(backend_shard_ranges)
        if resp.headers.get('x-backend-sharding-state') == 'sharded':
            # cache in infocache even if no shard ranges returned; this
            # is unexpected but use that result for this request
            infocache = req.environ.setdefault('swift.infocache', {})
-            infocache[cache_key] = tuple(cached_range_dicts)
+            cache_key = get_cache_key(
+                self.account_name, self.container_name, shard='listing')
+            infocache[cache_key] = ns_bound_list
            memcache = cache_from_env(req.environ, True)
-            if memcache and cached_range_dicts:
+            if memcache and ns_bound_list:
                # cache in memcache only if shard ranges as expected
                self.logger.debug('Caching %d shards for %s',
-                                  len(cached_range_dicts), req.path_qs)
-                memcache.set(cache_key, cached_range_dicts,
+                                  len(ns_bound_list.bounds), req.path_qs)
+                memcache.set(cache_key, ns_bound_list.bounds,
                             time=self.app.recheck_listing_shard_ranges)
-        return cached_range_dicts
+        return ns_bound_list

    def _get_shard_ranges_from_backend(self, req):
-        # Make a backend request for shard ranges. The response is cached and
-        # then returned as a list of dicts.
+        """
+        Make a backend request for shard ranges and return a response.
+
+        The response body will be a list of dicts each of which describes
+        a Namespace (i.e. includes the keys ``lower``, ``upper`` and ``name``).
+        If the response headers indicate that the response body contains a
+        complete list of shard ranges for a sharded container then the response
+        body will be transformed to a ``NamespaceBoundsList`` and cached.
+
+        :param req: an instance of ``swob.Request``.
+        :return: an instance of ``swob.Response``.
+        """
        # Note: We instruct the backend server to ignore name constraints in
        # request params if returning shard ranges so that the response can
-        # potentially be cached. Only do this if the container state is
+        # potentially be cached, but we only cache it if the container state is
        # 'sharded'. We don't attempt to cache shard ranges for a 'sharding'
        # container as they may include the container itself as a 'gap filler'
        # for shard ranges that have not yet cleaved; listings from 'gap
@ -232,10 +283,10 @@ class ContainerController(Controller):
        if (resp_record_type == 'shard' and
                sharding_state == 'sharded' and
                complete_listing):
-            cached_range_dicts = self._store_shard_ranges_in_cache(req, resp)
-            if cached_range_dicts:
-                resp.body = self._make_shard_ranges_response_body(
-                    req, cached_range_dicts)
+            ns_bound_list = self._store_shard_ranges_in_cache(req, resp)
+            if ns_bound_list:
+                resp.body = self._make_namespaces_response_body(
+                    req, ns_bound_list)
        return resp

    def _record_shard_listing_cache_metrics(
@ -334,7 +385,6 @@ class ContainerController(Controller):
            params['states'] = 'listing'
        req.params = params

-        memcache = cache_from_env(req.environ, True)
        if (req.method == 'GET'
                and get_param(req, 'states') == 'listing'
                and record_type != 'object'):
@ -346,6 +396,7 @@ class ContainerController(Controller):
            info = None
            may_get_listing_shards = False

+        memcache = cache_from_env(req.environ, True)
        sr_cache_state = None
        if (may_get_listing_shards and
                self.app.recheck_listing_shard_ranges > 0
@ -424,8 +475,15 @@ class ContainerController(Controller):
            # 'X-Backend-Storage-Policy-Index'.
            req.headers[policy_key] = resp.headers[policy_key]
        shard_listing_history.append((self.account_name, self.container_name))
-        shard_ranges = [ShardRange.from_dict(data)
-                        for data in json.loads(resp.body)]
+        # Note: when the response body has been synthesised from cached data,
+        # each item in the list only has 'name', 'lower' and 'upper' keys. We
+        # therefore cannot use ShardRange.from_dict(), and the ShardRange
+        # instances constructed here will only have 'name', 'lower' and 'upper'
+        # attributes set.
+        # Ideally we would construct Namespace objects here, but later we use
+        # the ShardRange account and container properties to access parsed
+        # parts of the name.
+        shard_ranges = [ShardRange(**data) for data in json.loads(resp.body)]
        self.logger.debug('GET listing from %s shards for: %s',
                          len(shard_ranges), req.path_qs)
        if not shard_ranges:
--- a/swift/proxy/controllers/obj.py
+++ b/swift/proxy/controllers/obj.py
@ -48,7 +48,7 @@ from swift.common.utils import (
    normalize_delete_at_timestamp, public, get_expirer_container,
    document_iters_to_http_response_body, parse_content_range,
    quorum_size, reiterate, close_if_possible, safe_json_loads, md5,
-    ShardRange, find_shard_range, cache_from_env, NamespaceBoundList)
+    ShardRange, find_namespace, cache_from_env, NamespaceBoundList)
 from swift.common.bufferedhttp import http_connect
 from swift.common.constraints import check_metadata, check_object_creation
 from swift.common import constraints
@ -388,7 +388,7 @@ class BaseObjectController(Controller):
                    memcache.set(
                        cache_key, cached_namespaces.bounds,
                        time=self.app.recheck_updating_shard_ranges)
-            update_shard = find_shard_range(obj, shard_ranges or [])
+            update_shard = find_namespace(obj, shard_ranges or [])
        record_cache_op_metrics(
            self.logger, 'shard_updating', cache_state, response)
        return update_shard
--- a/test/unit/common/test_utils.py
+++ b/test/unit/common/test_utils.py
@ -3403,7 +3403,7 @@ cluster_dfw1 = http://dfw1.host/v1/
            if tempdir:
                shutil.rmtree(tempdir)

-    def test_find_shard_range(self):
+    def test_find_namespace(self):
        ts = utils.Timestamp.now().internal
        start = utils.ShardRange('a/-a', ts, '', 'a')
        atof = utils.ShardRange('a/a-f', ts, 'a', 'f')
@ -3413,29 +3413,29 @@ cluster_dfw1 = http://dfw1.host/v1/
        end = utils.ShardRange('a/z-', ts, 'z', '')
        ranges = [start, atof, ftol, ltor, rtoz, end]

-        found = utils.find_shard_range('', ranges)
+        found = utils.find_namespace('', ranges)
        self.assertEqual(found, None)
-        found = utils.find_shard_range(' ', ranges)
+        found = utils.find_namespace(' ', ranges)
        self.assertEqual(found, start)
-        found = utils.find_shard_range(' ', ranges[1:])
+        found = utils.find_namespace(' ', ranges[1:])
        self.assertEqual(found, None)
-        found = utils.find_shard_range('b', ranges)
+        found = utils.find_namespace('b', ranges)
        self.assertEqual(found, atof)
-        found = utils.find_shard_range('f', ranges)
+        found = utils.find_namespace('f', ranges)
        self.assertEqual(found, atof)
-        found = utils.find_shard_range('f\x00', ranges)
+        found = utils.find_namespace('f\x00', ranges)
        self.assertEqual(found, ftol)
-        found = utils.find_shard_range('x', ranges)
+        found = utils.find_namespace('x', ranges)
        self.assertEqual(found, rtoz)
-        found = utils.find_shard_range('r', ranges)
+        found = utils.find_namespace('r', ranges)
        self.assertEqual(found, ltor)
-        found = utils.find_shard_range('}', ranges)
+        found = utils.find_namespace('}', ranges)
        self.assertEqual(found, end)
-        found = utils.find_shard_range('}', ranges[:-1])
+        found = utils.find_namespace('}', ranges[:-1])
        self.assertEqual(found, None)
        # remove l-r from list of ranges and try and find a shard range for an
        # item in that range.
-        found = utils.find_shard_range('p', ranges[:-3] + ranges[-2:])
+        found = utils.find_namespace('p', ranges[:-3] + ranges[-2:])
        self.assertEqual(found, None)

        # add some sub-shards; a sub-shard's state is less than its parent
@ -3445,20 +3445,20 @@ cluster_dfw1 = http://dfw1.host/v1/
        htok = utils.ShardRange('a/h-k', ts, 'h', 'k')

        overlapping_ranges = ranges[:2] + [ftoh, htok] + ranges[2:]
-        found = utils.find_shard_range('g', overlapping_ranges)
+        found = utils.find_namespace('g', overlapping_ranges)
        self.assertEqual(found, ftoh)
-        found = utils.find_shard_range('h', overlapping_ranges)
+        found = utils.find_namespace('h', overlapping_ranges)
        self.assertEqual(found, ftoh)
-        found = utils.find_shard_range('k', overlapping_ranges)
+        found = utils.find_namespace('k', overlapping_ranges)
        self.assertEqual(found, htok)
-        found = utils.find_shard_range('l', overlapping_ranges)
+        found = utils.find_namespace('l', overlapping_ranges)
        self.assertEqual(found, ftol)
-        found = utils.find_shard_range('m', overlapping_ranges)
+        found = utils.find_namespace('m', overlapping_ranges)
        self.assertEqual(found, ltor)

        ktol = utils.ShardRange('a/k-l', ts, 'k', 'l')
        overlapping_ranges = ranges[:2] + [ftoh, htok, ktol] + ranges[2:]
-        found = utils.find_shard_range('l', overlapping_ranges)
+        found = utils.find_namespace('l', overlapping_ranges)
        self.assertEqual(found, ktol)

    def test_parse_db_filename(self):
@ -7960,7 +7960,7 @@ class TestShardRange(unittest.TestCase):
            with self.assertRaises(KeyError):
                utils.ShardRange.from_dict(bad_dict)
            # But __init__ still (generally) works!
-            if key not in ('name', 'timestamp'):
+            if key != 'name':
                utils.ShardRange(**bad_dict)
            else:
                with self.assertRaises(TypeError):
--- a/test/unit/proxy/controllers/test_base.py
+++ b/test/unit/proxy/controllers/test_base.py
@ -499,7 +499,7 @@ class TestFuncs(BaseTest):
                         expected)

        self.assertEqual(get_cache_key("account", "cont", shard="listing"),
-                         'shard-listing/account/cont')
+                         'shard-listing-v2/account/cont')
        self.assertEqual(get_cache_key("account", "cont", shard="updating"),
                         'shard-updating-v2/account/cont')
        self.assertRaises(ValueError,
--- a/test/unit/proxy/controllers/test_container.py
+++ b/test/unit/proxy/controllers/test_container.py
@ -24,7 +24,8 @@ from six.moves import urllib

 from swift.common.constraints import CONTAINER_LISTING_LIMIT
 from swift.common.swob import Request, bytes_to_wsgi, str_to_wsgi, wsgi_quote
-from swift.common.utils import ShardRange, Timestamp
+from swift.common.utils import ShardRange, Timestamp, Namespace, \
+    NamespaceBoundList
 from swift.proxy import server as proxy_server
 from swift.proxy.controllers.base import headers_to_container_info, \
    Controller, get_container_info, get_cache_key
@ -1970,6 +1971,7 @@ class TestContainerController(TestRingBase):
            (200, sr_objs[2], shard_resp_hdrs[2])
        ]
        # NB marker always advances to last object name
+        # NB end_markers are upper of the current available shard range
        expected_requests = [
            # path, headers, params
            ('a/c', {'X-Backend-Record-Type': 'auto'},
@ -1991,7 +1993,7 @@ class TestContainerController(TestRingBase):
        self.check_response(resp, root_resp_hdrs,
                            exp_sharding_state='sharding')
        self.assertIn('swift.cache', resp.request.environ)
-        self.assertNotIn('shard-listing/a/c',
+        self.assertNotIn('shard-listing-v2/a/c',
                         resp.request.environ['swift.cache'].store)

    def test_GET_sharded_container_gap_in_shards_memcache(self):
@ -2035,15 +2037,17 @@ class TestContainerController(TestRingBase):
            (200, sr_objs[2], shard_resp_hdrs[2])
        ]
        # NB marker always advances to last object name
+        # NB compaction of shard range data to cached bounds loses the gaps, so
+        # end_markers are lower of the next available shard range
        expected_requests = [
            # path, headers, params
            ('a/c', {'X-Backend-Record-Type': 'auto'},
             dict(states='listing')),  # 200
            (shard_ranges[0].name, {'X-Backend-Record-Type': 'auto'},
-             dict(marker='', end_marker='ham\x00', states='listing',
+             dict(marker='', end_marker='onion\x00', states='listing',
                  limit=str(limit))),  # 200
            (shard_ranges[1].name, {'X-Backend-Record-Type': 'auto'},
-             dict(marker='h', end_marker='pie\x00', states='listing',
+             dict(marker='h', end_marker='rhubarb\x00', states='listing',
                  limit=str(limit - len(sr_objs[0])))),  # 200
            (shard_ranges[2].name, {'X-Backend-Record-Type': 'auto'},
             dict(marker='p', end_marker='', states='listing',
@ -2055,11 +2059,14 @@ class TestContainerController(TestRingBase):
        # root object count will be overridden by actual length of listing
        self.check_response(resp, root_resp_hdrs)
        self.assertIn('swift.cache', resp.request.environ)
-        self.assertIn('shard-listing/a/c',
+        self.assertIn('shard-listing-v2/a/c',
                      resp.request.environ['swift.cache'].store)
+        # NB compact bounds in cache do not reveal the gap in shard ranges
        self.assertEqual(
-            sr_dicts,
-            resp.request.environ['swift.cache'].store['shard-listing/a/c'])
+            [['', '.shards_a/c_ham'],
+             ['onion', '.shards_a/c_pie'],
+             ['rhubarb', '.shards_a/c_']],
+            resp.request.environ['swift.cache'].store['shard-listing-v2/a/c'])

    def test_GET_sharded_container_empty_shard(self):
        # verify ordered listing when a shard is empty
@ -2699,10 +2706,14 @@ class TestContainerController(TestRingBase):
    def _setup_shard_range_stubs(self):
        self.memcache = FakeMemcache()
        shard_bounds = (('', 'ham'), ('ham', 'pie'), ('pie', ''))
-        shard_ranges = [
-            ShardRange('.shards_a/c_%s' % upper, Timestamp.now(), lower, upper)
-            for lower, upper in shard_bounds]
-        self.sr_dicts = [dict(sr) for sr in shard_ranges]
+        self.ns_dicts = [{'name': '.shards_a/c_%s' % upper,
+                          'lower': lower,
+                          'upper': upper}
+                         for lower, upper in shard_bounds]
+        self.namespaces = [Namespace(**ns) for ns in self.ns_dicts]
+        self.ns_bound_list = NamespaceBoundList.parse(self.namespaces)
+        self.sr_dicts = [dict(ShardRange(timestamp=Timestamp.now(), **ns))
+                         for ns in self.ns_dicts]
        self._stub_shards_dump = json.dumps(self.sr_dicts).encode('ascii')
        self.root_resp_hdrs = {
            'Accept-Ranges': 'bytes',
@ -2737,22 +2748,24 @@ class TestContainerController(TestRingBase):
            req, backend_req,
            extra_hdrs={'X-Backend-Record-Type': record_type,
                        'X-Backend-Override-Shard-Name-Filter': 'sharded'})
-        self._check_response(resp, self.sr_dicts, {
+        self._check_response(resp, self.ns_dicts, {
            'X-Backend-Recheck-Container-Existence': '60',
            'X-Backend-Record-Type': 'shard',
            'X-Backend-Sharding-State': sharding_state})
+
+        cache_key = 'shard-listing-v2/a/c'
        self.assertEqual(
            [mock.call.get('container/a/c'),
-             mock.call.set('shard-listing/a/c', self.sr_dicts,
+             mock.call.set(cache_key, self.ns_bound_list.bounds,
                           time=exp_recheck_listing),
             mock.call.set('container/a/c', mock.ANY, time=60)],
            self.memcache.calls)
        self.assertEqual(sharding_state,
                         self.memcache.calls[2][1][1]['sharding_state'])
        self.assertIn('swift.infocache', req.environ)
-        self.assertIn('shard-listing/a/c', req.environ['swift.infocache'])
-        self.assertEqual(tuple(self.sr_dicts),
-                         req.environ['swift.infocache']['shard-listing/a/c'])
+        self.assertIn(cache_key, req.environ['swift.infocache'])
+        self.assertEqual(self.ns_bound_list,
+                         req.environ['swift.infocache'][cache_key])
        self.assertEqual(
            [x[0][0] for x in self.logger.logger.log_dict['increment']],
            ['container.info.cache.miss',
@ -2760,7 +2773,7 @@ class TestContainerController(TestRingBase):

        # container is sharded and proxy has that state cached, but
        # no shard ranges cached; expect a cache miss and write-back
-        self.memcache.delete('shard-listing/a/c')
+        self.memcache.delete(cache_key)
        self.memcache.clear_calls()
        self.logger.clear()
        req = self._build_request({'X-Backend-Record-Type': record_type},
@ -2774,23 +2787,23 @@ class TestContainerController(TestRingBase):
            req, backend_req,
            extra_hdrs={'X-Backend-Record-Type': record_type,
                        'X-Backend-Override-Shard-Name-Filter': 'sharded'})
-        self._check_response(resp, self.sr_dicts, {
+        self._check_response(resp, self.ns_dicts, {
            'X-Backend-Recheck-Container-Existence': '60',
            'X-Backend-Record-Type': 'shard',
            'X-Backend-Sharding-State': sharding_state})
        self.assertEqual(
            [mock.call.get('container/a/c'),
-             mock.call.get('shard-listing/a/c', raise_on_error=True),
-             mock.call.set('shard-listing/a/c', self.sr_dicts,
+             mock.call.get(cache_key, raise_on_error=True),
+             mock.call.set(cache_key, self.ns_bound_list.bounds,
                           time=exp_recheck_listing),
             # Since there was a backend request, we go ahead and cache
             # container info, too
             mock.call.set('container/a/c', mock.ANY, time=60)],
            self.memcache.calls)
        self.assertIn('swift.infocache', req.environ)
-        self.assertIn('shard-listing/a/c', req.environ['swift.infocache'])
-        self.assertEqual(tuple(self.sr_dicts),
-                         req.environ['swift.infocache']['shard-listing/a/c'])
+        self.assertIn(cache_key, req.environ['swift.infocache'])
+        self.assertEqual(self.ns_bound_list,
+                         req.environ['swift.infocache'][cache_key])
        self.assertEqual(
            [x[0][0] for x in self.logger.logger.log_dict['increment']],
            ['container.info.cache.hit',
@ -2803,18 +2816,18 @@ class TestContainerController(TestRingBase):
        req = self._build_request({'X-Backend-Record-Type': record_type},
                                  {'states': 'listing'}, {})
        resp = req.get_response(self.app)
-        self._check_response(resp, self.sr_dicts, {
+        self._check_response(resp, self.ns_dicts, {
            'X-Backend-Cached-Results': 'true',
            'X-Backend-Record-Type': 'shard',
            'X-Backend-Sharding-State': sharding_state})
        self.assertEqual(
            [mock.call.get('container/a/c'),
-             mock.call.get('shard-listing/a/c', raise_on_error=True)],
+             mock.call.get(cache_key, raise_on_error=True)],
            self.memcache.calls)
        self.assertIn('swift.infocache', req.environ)
-        self.assertIn('shard-listing/a/c', req.environ['swift.infocache'])
-        self.assertEqual(tuple(self.sr_dicts),
-                         req.environ['swift.infocache']['shard-listing/a/c'])
+        self.assertIn(cache_key, req.environ['swift.infocache'])
+        self.assertEqual(self.ns_bound_list,
+                         req.environ['swift.infocache'][cache_key])
        self.assertEqual(
            [x[0][0] for x in self.logger.logger.log_dict['increment']],
            ['container.info.cache.hit',
@ -2836,22 +2849,22 @@ class TestContainerController(TestRingBase):
            req, backend_req,
            extra_hdrs={'X-Backend-Record-Type': record_type,
                        'X-Backend-Override-Shard-Name-Filter': 'sharded'})
-        self._check_response(resp, self.sr_dicts, {
+        self._check_response(resp, self.ns_dicts, {
            'X-Backend-Recheck-Container-Existence': '60',
            'X-Backend-Record-Type': 'shard',
            'X-Backend-Sharding-State': sharding_state})
        self.assertEqual(
            [mock.call.get('container/a/c'),
-             mock.call.set('shard-listing/a/c', self.sr_dicts,
+             mock.call.set(cache_key, self.ns_bound_list.bounds,
                           time=exp_recheck_listing),
             # Since there was a backend request, we go ahead and cache
             # container info, too
             mock.call.set('container/a/c', mock.ANY, time=60)],
            self.memcache.calls)
        self.assertIn('swift.infocache', req.environ)
-        self.assertIn('shard-listing/a/c', req.environ['swift.infocache'])
-        self.assertEqual(tuple(self.sr_dicts),
-                         req.environ['swift.infocache']['shard-listing/a/c'])
+        self.assertIn(cache_key, req.environ['swift.infocache'])
+        self.assertEqual(self.ns_bound_list,
+                         req.environ['swift.infocache'][cache_key])
        self.assertEqual(
            [x[0][0] for x in self.logger.logger.log_dict['increment']],
            ['container.info.cache.hit',
@ -2864,18 +2877,18 @@ class TestContainerController(TestRingBase):
                                  {'states': 'listing'}, {})
        with mock.patch('random.random', return_value=0.11):
            resp = req.get_response(self.app)
-        self._check_response(resp, self.sr_dicts, {
+        self._check_response(resp, self.ns_dicts, {
            'X-Backend-Cached-Results': 'true',
            'X-Backend-Record-Type': 'shard',
            'X-Backend-Sharding-State': sharding_state})
        self.assertEqual(
            [mock.call.get('container/a/c'),
-             mock.call.get('shard-listing/a/c', raise_on_error=True)],
+             mock.call.get(cache_key, raise_on_error=True)],
            self.memcache.calls)
        self.assertIn('swift.infocache', req.environ)
-        self.assertIn('shard-listing/a/c', req.environ['swift.infocache'])
-        self.assertEqual(tuple(self.sr_dicts),
-                         req.environ['swift.infocache']['shard-listing/a/c'])
+        self.assertIn(cache_key, req.environ['swift.infocache'])
+        self.assertEqual(self.ns_bound_list,
+                         req.environ['swift.infocache'][cache_key])
        self.assertEqual(
            [x[0][0] for x in self.logger.logger.log_dict['increment']],
            ['container.info.cache.hit',
@ -2890,15 +2903,15 @@ class TestContainerController(TestRingBase):
            infocache=req.environ['swift.infocache'])
        with mock.patch('random.random', return_value=0.11):
            resp = req.get_response(self.app)
-        self._check_response(resp, self.sr_dicts, {
+        self._check_response(resp, self.ns_dicts, {
            'X-Backend-Cached-Results': 'true',
            'X-Backend-Record-Type': 'shard',
            'X-Backend-Sharding-State': sharding_state})
        self.assertEqual([], self.memcache.calls)
        self.assertIn('swift.infocache', req.environ)
-        self.assertIn('shard-listing/a/c', req.environ['swift.infocache'])
-        self.assertEqual(tuple(self.sr_dicts),
-                         req.environ['swift.infocache']['shard-listing/a/c'])
+        self.assertIn(cache_key, req.environ['swift.infocache'])
+        self.assertEqual(self.ns_bound_list,
+                         req.environ['swift.infocache'][cache_key])
        self.assertEqual(
            [x[0][0] for x in self.logger.logger.log_dict['increment']],
            ['container.shard_listing.infocache.hit'])
@ -2916,7 +2929,7 @@ class TestContainerController(TestRingBase):
                                      num_resp=self.CONTAINER_REPLICAS)
        self.assertEqual(
            [mock.call.delete('container/a/c'),
-             mock.call.delete('shard-listing/a/c')],
+             mock.call.delete(cache_key)],
            self.memcache.calls)

    def test_get_from_shards_add_root_spi(self):
@ -3046,7 +3059,7 @@ class TestContainerController(TestRingBase):
        # deleted from cache
        self.assertEqual(
            [mock.call.get('container/a/c'),
-             mock.call.get('shard-listing/a/c', raise_on_error=True),
+             mock.call.get('shard-listing-v2/a/c', raise_on_error=True),
             mock.call.set('container/a/c', mock.ANY, time=6.0)],
            self.memcache.calls)
        self.assertEqual(404, self.memcache.calls[2][1][1]['status'])
@ -3079,7 +3092,7 @@ class TestContainerController(TestRingBase):
        self.assertNotIn('X-Backend-Cached-Results', resp.headers)
        self.assertEqual(
            [mock.call.get('container/a/c'),
-             mock.call.get('shard-listing/a/c', raise_on_error=True),
+             mock.call.get('shard-listing-v2/a/c', raise_on_error=True),
             mock.call.set('container/a/c', mock.ANY, time=6.0)],
            self.memcache.calls)
        self.assertEqual(404, self.memcache.calls[2][1][1]['status'])
@ -3098,7 +3111,7 @@ class TestContainerController(TestRingBase):
        info['status'] = 200
        info['sharding_state'] = 'sharded'
        self.memcache.set('container/a/c', info)
-        self.memcache.set('shard-listing/a/c', self.sr_dicts)
+        self.memcache.set('shard-listing-v2/a/c', self.ns_bound_list.bounds)
        self.memcache.clear_calls()

        req_hdrs = {'X-Backend-Record-Type': record_type}
@ -3106,7 +3119,7 @@ class TestContainerController(TestRingBase):
        resp = req.get_response(self.app)
        self.assertEqual(
            [mock.call.get('container/a/c'),
-             mock.call.get('shard-listing/a/c', raise_on_error=True)],
+             mock.call.get('shard-listing-v2/a/c', raise_on_error=True)],
            self.memcache.calls)
        self.assertEqual({'container.info.cache.hit': 1,
                          'container.shard_listing.cache.hit': 1},
@ -3122,26 +3135,26 @@ class TestContainerController(TestRingBase):

        resp = self._do_test_GET_shard_ranges_read_from_cache(
            {'states': 'listing'}, 'shard')
-        self._check_response(resp, self.sr_dicts, exp_hdrs)
+        self._check_response(resp, self.ns_dicts, exp_hdrs)

        resp = self._do_test_GET_shard_ranges_read_from_cache(
            {'states': 'listing', 'reverse': 'true'}, 'shard')
-        exp_shards = list(self.sr_dicts)
+        exp_shards = list(self.ns_dicts)
        exp_shards.reverse()
        self._check_response(resp, exp_shards, exp_hdrs)

        resp = self._do_test_GET_shard_ranges_read_from_cache(
            {'states': 'listing', 'marker': 'jam'}, 'shard')
-        self._check_response(resp, self.sr_dicts[1:], exp_hdrs)
+        self._check_response(resp, self.ns_dicts[1:], exp_hdrs)

        resp = self._do_test_GET_shard_ranges_read_from_cache(
            {'states': 'listing', 'marker': 'jam', 'end_marker': 'kale'},
            'shard')
-        self._check_response(resp, self.sr_dicts[1:2], exp_hdrs)
+        self._check_response(resp, self.ns_dicts[1:2], exp_hdrs)

        resp = self._do_test_GET_shard_ranges_read_from_cache(
            {'states': 'listing', 'includes': 'egg'}, 'shard')
-        self._check_response(resp, self.sr_dicts[:1], exp_hdrs)
+        self._check_response(resp, self.ns_dicts[:1], exp_hdrs)

        # override _get_from_shards so that the response contains the shard
        # listing that we want to verify even though the record_type is 'auto'
@ -3153,22 +3166,22 @@ class TestContainerController(TestRingBase):
                        mock_get_from_shards):
            resp = self._do_test_GET_shard_ranges_read_from_cache(
                {'states': 'listing', 'reverse': 'true'}, 'auto')
-            exp_shards = list(self.sr_dicts)
+            exp_shards = list(self.ns_dicts)
            exp_shards.reverse()
            self._check_response(resp, exp_shards, exp_hdrs)

            resp = self._do_test_GET_shard_ranges_read_from_cache(
                {'states': 'listing', 'marker': 'jam'}, 'auto')
-            self._check_response(resp, self.sr_dicts[1:], exp_hdrs)
+            self._check_response(resp, self.ns_dicts[1:], exp_hdrs)

            resp = self._do_test_GET_shard_ranges_read_from_cache(
                {'states': 'listing', 'marker': 'jam', 'end_marker': 'kale'},
                'auto')
-            self._check_response(resp, self.sr_dicts[1:2], exp_hdrs)
+            self._check_response(resp, self.ns_dicts[1:2], exp_hdrs)

            resp = self._do_test_GET_shard_ranges_read_from_cache(
                {'states': 'listing', 'includes': 'egg'}, 'auto')
-            self._check_response(resp, self.sr_dicts[:1], exp_hdrs)
+            self._check_response(resp, self.ns_dicts[:1], exp_hdrs)

    def _do_test_GET_shard_ranges_write_to_cache(self, params, record_type):
        # verify that shard range listing are written to cache when appropriate
@ -3193,7 +3206,8 @@ class TestContainerController(TestRingBase):
        expected_hdrs.update(resp_hdrs)
        self.assertEqual(
            [mock.call.get('container/a/c'),
-             mock.call.set('shard-listing/a/c', self.sr_dicts, time=600),
+             mock.call.set(
+                 'shard-listing-v2/a/c', self.ns_bound_list.bounds, time=600),
             mock.call.set('container/a/c', mock.ANY, time=60)],
            self.memcache.calls)
        # shards were cached
@ -3213,26 +3227,26 @@ class TestContainerController(TestRingBase):

        resp = self._do_test_GET_shard_ranges_write_to_cache(
            {'states': 'listing'}, 'shard')
-        self._check_response(resp, self.sr_dicts, exp_hdrs)
+        self._check_response(resp, self.ns_dicts, exp_hdrs)

        resp = self._do_test_GET_shard_ranges_write_to_cache(
            {'states': 'listing', 'reverse': 'true'}, 'shard')
-        exp_shards = list(self.sr_dicts)
+        exp_shards = list(self.ns_dicts)
        exp_shards.reverse()
        self._check_response(resp, exp_shards, exp_hdrs)

        resp = self._do_test_GET_shard_ranges_write_to_cache(
            {'states': 'listing', 'marker': 'jam'}, 'shard')
-        self._check_response(resp, self.sr_dicts[1:], exp_hdrs)
+        self._check_response(resp, self.ns_dicts[1:], exp_hdrs)

        resp = self._do_test_GET_shard_ranges_write_to_cache(
            {'states': 'listing', 'marker': 'jam', 'end_marker': 'kale'},
            'shard')
-        self._check_response(resp, self.sr_dicts[1:2], exp_hdrs)
+        self._check_response(resp, self.ns_dicts[1:2], exp_hdrs)

        resp = self._do_test_GET_shard_ranges_write_to_cache(
            {'states': 'listing', 'includes': 'egg'}, 'shard')
-        self._check_response(resp, self.sr_dicts[:1], exp_hdrs)
+        self._check_response(resp, self.ns_dicts[:1], exp_hdrs)

        # override _get_from_shards so that the response contains the shard
        # listing that we want to verify even though the record_type is 'auto'
@ -3244,22 +3258,22 @@ class TestContainerController(TestRingBase):
                        mock_get_from_shards):
            resp = self._do_test_GET_shard_ranges_write_to_cache(
                {'states': 'listing', 'reverse': 'true'}, 'auto')
-            exp_shards = list(self.sr_dicts)
+            exp_shards = list(self.ns_dicts)
            exp_shards.reverse()
            self._check_response(resp, exp_shards, exp_hdrs)

            resp = self._do_test_GET_shard_ranges_write_to_cache(
                {'states': 'listing', 'marker': 'jam'}, 'auto')
-            self._check_response(resp, self.sr_dicts[1:], exp_hdrs)
+            self._check_response(resp, self.ns_dicts[1:], exp_hdrs)

            resp = self._do_test_GET_shard_ranges_write_to_cache(
                {'states': 'listing', 'marker': 'jam', 'end_marker': 'kale'},
                'auto')
-            self._check_response(resp, self.sr_dicts[1:2], exp_hdrs)
+            self._check_response(resp, self.ns_dicts[1:2], exp_hdrs)

            resp = self._do_test_GET_shard_ranges_write_to_cache(
                {'states': 'listing', 'includes': 'egg'}, 'auto')
-            self._check_response(resp, self.sr_dicts[:1], exp_hdrs)
+            self._check_response(resp, self.ns_dicts[:1], exp_hdrs)

    def test_GET_shard_ranges_write_to_cache_with_x_newest(self):
        # when x-newest is sent, verify that there is no cache lookup to check
@ -3285,10 +3299,11 @@ class TestContainerController(TestRingBase):
                        'X-Backend-Override-Shard-Name-Filter': 'sharded'})
        expected_hdrs = {'X-Backend-Recheck-Container-Existence': '60'}
        expected_hdrs.update(resp_hdrs)
-        self._check_response(resp, self.sr_dicts, expected_hdrs)
+        self._check_response(resp, self.ns_dicts, expected_hdrs)
        self.assertEqual(
            [mock.call.get('container/a/c'),
-             mock.call.set('shard-listing/a/c', self.sr_dicts, time=600),
+             mock.call.set(
+                 'shard-listing-v2/a/c', self.ns_bound_list.bounds, time=600),
             mock.call.set('container/a/c', mock.ANY, time=60)],
            self.memcache.calls)
        self.assertEqual('sharded',