proxy: refactor ContainerController._GET_using_cache

Refactor the ContainerController._GET_using_cache. No behavioral changes. * Make the top-level method shorter. * Make the flow easier to follow. * Make various return points more obvious. * Change variable names to clarify those that are lists of ShardRange objects from those that are lists of dicts representing shard ranges. Change-Id: Ibb7cd761be4a5b1ec53dd16b7c5d256ed7666a88
2022-11-25 10:03:02 +00:00 · 2022-11-25 10:03:02 +00:00 · bd5d099035
commit bd5d099035
parent e748cff5c4
1 changed files with 110 additions and 85 deletions
--- a/swift/proxy/controllers/container.py
+++ b/swift/proxy/controllers/container.py
@ -108,8 +108,9 @@ class ContainerController(Controller):
            req.swift_entity_path, concurrency)
        return resp

-    def _filter_resp_shard_ranges(self, req, cached_ranges):
-        # filter returned shard ranges according to request constraints
+    def _make_shard_ranges_response_body(self, req, shard_range_dicts):
+        # filter shard ranges according to request constraints and return a
+        # serialised list of shard ranges
        marker = get_param(req, 'marker', '')
        end_marker = get_param(req, 'end_marker')
        includes = get_param(req, 'includes')
@ -118,73 +119,100 @@ class ContainerController(Controller):
            marker, end_marker = end_marker, marker
        shard_ranges = [
            ShardRange.from_dict(shard_range)
-            for shard_range in cached_ranges]
+            for shard_range in shard_range_dicts]
        shard_ranges = filter_shard_ranges(shard_ranges, includes, marker,
                                           end_marker)
        if reverse:
            shard_ranges.reverse()
        return json.dumps([dict(sr) for sr in shard_ranges]).encode('ascii')

-    def _GET_using_cache(self, req, info):
-        # It may be possible to fulfil the request from cache: we only reach
-        # here if request record_type is 'shard' or 'auto', so if the container
-        # state is 'sharded' then look for cached shard ranges. However, if
-        # X-Newest is true then we always fetch from the backend servers.
-        get_newest = config_true_value(req.headers.get('x-newest', False))
-        if get_newest:
-            self.logger.debug(
-                'Skipping shard cache lookup (x-newest) for %s', req.path_qs)
-        elif (info and is_success(info['status']) and
-                info.get('sharding_state') == 'sharded'):
-            # container is sharded so we may have the shard ranges cached
-            headers = headers_from_container_info(info)
-            if headers:
-                # only use cached values if all required headers available
-                infocache = req.environ.setdefault('swift.infocache', {})
-                memcache = cache_from_env(req.environ, True)
-                cache_key = get_cache_key(self.account_name,
-                                          self.container_name,
-                                          shard='listing')
-                cached_ranges = infocache.get(cache_key)
-                if cached_ranges is None and memcache:
-                    skip_chance = \
-                        self.app.container_listing_shard_ranges_skip_cache
-                    if skip_chance and random.random() < skip_chance:
-                        self.logger.increment('shard_listing.cache.skip')
+    def _get_shard_ranges_from_cache(self, req, info):
+        headers = headers_from_container_info(info)
+        if not headers:
+            # only use cached values if all required headers available
+            return None
+
+        infocache = req.environ.setdefault('swift.infocache', {})
+        memcache = cache_from_env(req.environ, True)
+        cache_key = get_cache_key(self.account_name,
+                                  self.container_name,
+                                  shard='listing')
+
+        resp_body = None
+        cached_range_dicts = infocache.get(cache_key)
+        if cached_range_dicts:
+            resp_body = self._make_shard_ranges_response_body(
+                req, cached_range_dicts)
+        elif memcache:
+            skip_chance = \
+                self.app.container_listing_shard_ranges_skip_cache
+            if skip_chance and random.random() < skip_chance:
+                self.logger.increment('shard_listing.cache.skip')
+            else:
+                try:
+                    cached_range_dicts = memcache.get(
+                        cache_key, raise_on_error=True)
+                    if cached_range_dicts:
+                        cache_state = 'hit'
+                        resp_body = self._make_shard_ranges_response_body(
+                            req, cached_range_dicts)
                    else:
-                        try:
-                            cached_ranges = memcache.get(
-                                cache_key, raise_on_error=True)
-                            cache_state = 'hit' if cached_ranges else 'miss'
-                        except MemcacheConnectionError:
-                            cache_state = 'error'
-                        self.logger.increment(
-                            'shard_listing.cache.%s' % cache_state)
+                        cache_state = 'miss'
+                except MemcacheConnectionError:
+                    cache_state = 'error'
+                self.logger.increment(
+                    'shard_listing.cache.%s' % cache_state)

-                if cached_ranges is not None:
-                    infocache[cache_key] = tuple(cached_ranges)
-                    # shard ranges can be returned from cache
-                    self.logger.debug('Found %d shards in cache for %s',
-                                      len(cached_ranges), req.path_qs)
-                    headers.update({'x-backend-record-type': 'shard',
-                                    'x-backend-cached-results': 'true'})
-                    shard_range_body = self._filter_resp_shard_ranges(
-                        req, cached_ranges)
-                    # mimic GetOrHeadHandler.get_working_response...
-                    # note: server sets charset with content_type but proxy
-                    # GETorHEAD_base does not, so don't set it here either
-                    resp = Response(request=req, body=shard_range_body)
-                    update_headers(resp, headers)
-                    resp.last_modified = Timestamp(
-                        headers['x-put-timestamp']).ceil()
-                    resp.environ['swift_x_timestamp'] = headers.get(
-                        'x-timestamp')
-                    resp.accept_ranges = 'bytes'
-                    resp.content_type = 'application/json'
-                    return resp
+        if resp_body is None:
+            resp = None
+        else:
+            # shard ranges can be returned from cache
+            infocache[cache_key] = tuple(cached_range_dicts)
+            self.logger.debug('Found %d shards in cache for %s',
+                              len(cached_range_dicts), req.path_qs)
+            headers.update({'x-backend-record-type': 'shard',
+                            'x-backend-cached-results': 'true'})
+            # mimic GetOrHeadHandler.get_working_response...
+            # note: server sets charset with content_type but proxy
+            # GETorHEAD_base does not, so don't set it here either
+            resp = Response(request=req, body=resp_body)
+            update_headers(resp, headers)
+            resp.last_modified = Timestamp(headers['x-put-timestamp']).ceil()
+            resp.environ['swift_x_timestamp'] = headers.get('x-timestamp')
+            resp.accept_ranges = 'bytes'
+            resp.content_type = 'application/json'

-        # The request was not fulfilled from cache so send to the backend
-        # server, but instruct the backend server to ignore name constraints in
+        return resp
+
+    def _store_shard_ranges_in_cache(self, req, resp):
+        # parse shard ranges returned from backend, store them in infocache and
+        # memcache, and return a list of dicts
+        cache_key = get_cache_key(self.account_name, self.container_name,
+                                  shard='listing')
+        data = self._parse_listing_response(req, resp)
+        backend_shard_ranges = self._parse_shard_ranges(req, data, resp)
+        if backend_shard_ranges is None:
+            return None
+
+        cached_range_dicts = [dict(sr) for sr in backend_shard_ranges]
+        if resp.headers.get('x-backend-sharding-state') == 'sharded':
+            # cache in infocache even if no shard ranges returned; this
+            # is unexpected but use that result for this request
+            infocache = req.environ.setdefault('swift.infocache', {})
+            infocache[cache_key] = tuple(cached_range_dicts)
+            memcache = cache_from_env(req.environ, True)
+            if memcache and cached_range_dicts:
+                # cache in memcache only if shard ranges as expected
+                self.logger.debug('Caching %d shards for %s',
+                                  len(cached_range_dicts), req.path_qs)
+                memcache.set(cache_key, cached_range_dicts,
+                             time=self.app.recheck_listing_shard_ranges)
+        return cached_range_dicts
+
+    def _get_shard_ranges_from_backend(self, req):
+        # Make a backend request for shard ranges. The response is cached and
+        # then returned as a list of dicts.
+        # Note: We instruct the backend server to ignore name constraints in
        # request params if returning shard ranges so that the response can
        # potentially be cached. Only do this if the container state is
        # 'sharded'. We don't attempt to cache shard ranges for a 'sharding'
@ -209,33 +237,30 @@ class ContainerController(Controller):
        if (resp_record_type == 'shard' and
                sharding_state == 'sharded' and
                complete_listing):
-            # backend returned unfiltered listing state shard ranges so parse
-            # them and replace response body with filtered listing
-            cache_key = get_cache_key(self.account_name, self.container_name,
-                                      shard='listing')
-            data = self._parse_listing_response(req, resp)
-            backend_shard_ranges = self._parse_shard_ranges(req, data, resp)
-            if backend_shard_ranges is not None:
-                cached_ranges = [dict(sr) for sr in backend_shard_ranges]
-                if resp.headers.get('x-backend-sharding-state') == 'sharded':
-                    # cache in infocache even if no shard ranges returned; this
-                    # is unexpected but use that result for this request
-                    infocache = req.environ.setdefault('swift.infocache', {})
-                    infocache[cache_key] = tuple(cached_ranges)
-                    memcache = cache_from_env(req.environ, True)
-                    if memcache and cached_ranges:
-                        # cache in memcache only if shard ranges as expected
-                        self.logger.debug('Caching %d shards for %s',
-                                          len(cached_ranges), req.path_qs)
-                        memcache.set(
-                            cache_key, cached_ranges,
-                            time=self.app.recheck_listing_shard_ranges)
-
-                # filter returned shard ranges according to request constraints
-                resp.body = self._filter_resp_shard_ranges(req, cached_ranges)
-
+            cached_range_dicts = self._store_shard_ranges_in_cache(req, resp)
+            if cached_range_dicts:
+                resp.body = self._make_shard_ranges_response_body(
+                    req, cached_range_dicts)
        return resp

+    def _GET_using_cache(self, req, info):
+        # It may be possible to fulfil the request from cache: we only reach
+        # here if request record_type is 'shard' or 'auto', so if the container
+        # state is 'sharded' then look for cached shard ranges. However, if
+        # X-Newest is true then we always fetch from the backend servers.
+        get_newest = config_true_value(req.headers.get('x-newest', False))
+        if get_newest:
+            self.logger.debug(
+                'Skipping shard cache lookup (x-newest) for %s', req.path_qs)
+        elif (info and is_success(info['status']) and
+                info.get('sharding_state') == 'sharded'):
+            # container is sharded so we may have the shard ranges cached
+            resp = self._get_shard_ranges_from_cache(req, info)
+            if resp:
+                return resp
+        # The request was not fulfilled from cache so send to backend server
+        return self._get_shard_ranges_from_backend(req)
+
    def GETorHEAD(self, req):
        """Handler for HTTP GET/HEAD requests."""
        ai = self.account_info(self.account_name, req)