proxy: add new metrics to account/container_info cache for skip/miss

This patch will add more granularity to metrics of account_info or
container_info cache and related backend lookups.

Before this patch, related metrics are:
  1.account.info.cache.[hit|miss|skip]
  2.container.info.cache.[hit|miss|skip]

With this patch, they are going to become:
  1.account/container.info.infocache.hit
    cache hits with infocache.
  2.account/container.info.cache.hit
    cache hits with memcache.
  3.account/container.info.cache.[miss|skip|disabled]
                    .<status_int>
    Those are operations made to backend due to below reasons.
      miss: cache misses.
      skip: the selective skips per skip percentage config.
      disabled: memcache is disabled.
    For each kind of operation metrics, suffix <status_int> will
    count operations with different status. Then a sum of all
    status sub-metrics will the total metrics of that operation.

UpgradeImpact
=============
Metrics dashboard will need updates to display those changed metrics
correctly, also some infocache metrics are newly added, please see
above message for all changes needed.

Change-Id: I60a9f1c349b4bc78ecb850fb26ae56eb20fa39c6
This commit is contained in:
Jianjian Huo 2023-06-08 11:36:45 -07:00
parent c51e81f640
commit bc300a516b
5 changed files with 197 additions and 84 deletions

View File

@ -416,11 +416,43 @@ def get_object_info(env, app, path=None, swift_source=None):
return info
def get_container_info(env, app, swift_source=None):
def _record_ac_info_cache_metrics(
app, cache_state, container=None, resp=None):
"""
Record a single cache operation by account or container lookup into its
corresponding metrics.
:param app: the application object
:param cache_state: the state of this cache operation, includes
infocache_hit, memcache hit, miss, error, skip, force_skip
and disabled.
:param container: the container name
:param resp: the response from either backend or cache hit.
"""
try:
proxy_app = app._pipeline_final_app
except AttributeError:
logger = None
else:
logger = proxy_app.logger
op_type = 'container.info' if container else 'account.info'
if logger:
record_cache_op_metrics(logger, op_type, cache_state, resp)
def get_container_info(env, app, swift_source=None, cache_only=False):
"""
Get the info structure for a container, based on env and app.
This is useful to middlewares.
:param env: the environment used by the current request
:param app: the application object
:param swift_source: Used to mark the request as originating out of
middleware. Will be logged in proxy logs.
:param cache_only: If true, indicates that caller doesn't want to HEAD the
backend container when cache miss.
:returns: the object info
.. note::
This call bypasses auth. Success does not imply that the request has
@ -445,9 +477,11 @@ def get_container_info(env, app, swift_source=None):
except AttributeError:
logged_app = proxy_app = app
# Check in environment cache and in memcache (in that order)
info = _get_info_from_caches(proxy_app, env, account, container)
info, cache_state = _get_info_from_caches(
proxy_app, env, account, container)
if not info:
resp = None
if not info and not cache_only:
# Cache miss; go HEAD the container and populate the caches
env.setdefault('swift.infocache', {})
# Before checking the container, make sure the account exists.
@ -462,6 +496,8 @@ def get_container_info(env, app, swift_source=None):
if not is_autocreate_account:
account_info = get_account_info(env, logged_app, swift_source)
if not account_info or not is_success(account_info['status']):
_record_ac_info_cache_metrics(
logged_app, cache_state, container)
return headers_to_container_info({}, 0)
req = _prepare_pre_auth_info_request(
@ -483,7 +519,8 @@ def get_container_info(env, app, swift_source=None):
if info:
info = deepcopy(info) # avoid mutating what's in swift.infocache
else:
info = headers_to_container_info({}, 503)
status_int = 0 if cache_only else 503
info = headers_to_container_info({}, status_int)
# Old data format in memcache immediately after a Swift upgrade; clean
# it up so consumers of get_container_info() aren't exposed to it.
@ -510,6 +547,7 @@ def get_container_info(env, app, swift_source=None):
versions_info = get_container_info(versions_req.environ, app)
info['bytes'] = info['bytes'] + versions_info['bytes']
_record_ac_info_cache_metrics(logged_app, cache_state, container, resp)
return info
@ -539,10 +577,12 @@ def get_account_info(env, app, swift_source=None):
except AttributeError:
pass
# Check in environment cache and in memcache (in that order)
info = _get_info_from_caches(app, env, account)
info, cache_state = _get_info_from_caches(app, env, account)
# Cache miss; go HEAD the account and populate the caches
if not info:
if info:
resp = None
else:
env.setdefault('swift.infocache', {})
req = _prepare_pre_auth_info_request(
env, "/%s/%s" % (version, wsgi_account),
@ -581,6 +621,7 @@ def get_account_info(env, app, swift_source=None):
else:
info[field] = int(info[field])
_record_ac_info_cache_metrics(app, cache_state, container=None, resp=resp)
return info
@ -766,10 +807,13 @@ def record_cache_op_metrics(
else:
# the cases of cache_state is memcache miss, error, skip, force_skip
# or disabled.
if resp is not None:
# Note: currently there is no case that 'resp' will be None.
if resp:
logger.increment(
'%s.cache.%s.%d' % (op_type, cache_state, resp.status_int))
else:
# In some situation, we choose not to lookup backend after cache
# miss.
logger.increment('%s.cache.%s' % (op_type, cache_state))
def _get_info_from_memcache(app, env, account, container=None):
@ -781,61 +825,58 @@ def _get_info_from_memcache(app, env, account, container=None):
:param account: the account name
:param container: the container name
:returns: a dictionary of cached info on cache hit, None on miss. Also
returns None if memcache is not in use.
:returns: a tuple of two values, the first is a dictionary of cached info
on cache hit, None on miss or if memcache is not in use; the second is
cache state.
"""
cache_key = get_cache_key(account, container)
memcache = cache_from_env(env, True)
if memcache:
try:
proxy_app = app._pipeline_final_app
except AttributeError:
# Only the middleware entry-points get a reference to the
# proxy-server app; if a middleware composes itself as multiple
# filters, we'll just have to choose a reasonable default
skip_chance = 0.0
logger = None
if not memcache:
return None, 'disabled'
try:
proxy_app = app._pipeline_final_app
except AttributeError:
# Only the middleware entry-points get a reference to the
# proxy-server app; if a middleware composes itself as multiple
# filters, we'll just have to choose a reasonable default
skip_chance = 0.0
else:
if container:
skip_chance = proxy_app.container_existence_skip_cache
else:
if container:
skip_chance = proxy_app.container_existence_skip_cache
skip_chance = proxy_app.account_existence_skip_cache
cache_key = get_cache_key(account, container)
if skip_chance and random.random() < skip_chance:
info = None
cache_state = 'skip'
else:
info = memcache.get(cache_key)
cache_state = 'hit' if info else 'miss'
if info and six.PY2:
# Get back to native strings
new_info = {}
for key in info:
new_key = key.encode("utf-8") if isinstance(
key, six.text_type) else key
if isinstance(info[key], six.text_type):
new_info[new_key] = info[key].encode("utf-8")
elif isinstance(info[key], dict):
new_info[new_key] = {}
for subkey, value in info[key].items():
new_subkey = subkey.encode("utf-8") if isinstance(
subkey, six.text_type) else subkey
if isinstance(value, six.text_type):
new_info[new_key][new_subkey] = \
value.encode("utf-8")
else:
new_info[new_key][new_subkey] = value
else:
skip_chance = proxy_app.account_existence_skip_cache
logger = proxy_app.logger
info_type = 'container' if container else 'account'
if skip_chance and random.random() < skip_chance:
info = None
if logger:
logger.increment('%s.info.cache.skip' % info_type)
else:
info = memcache.get(cache_key)
if logger:
logger.increment('%s.info.cache.%s' % (
info_type, 'hit' if info else 'miss'))
if info and six.PY2:
# Get back to native strings
new_info = {}
for key in info:
new_key = key.encode("utf-8") if isinstance(
key, six.text_type) else key
if isinstance(info[key], six.text_type):
new_info[new_key] = info[key].encode("utf-8")
elif isinstance(info[key], dict):
new_info[new_key] = {}
for subkey, value in info[key].items():
new_subkey = subkey.encode("utf-8") if isinstance(
subkey, six.text_type) else subkey
if isinstance(value, six.text_type):
new_info[new_key][new_subkey] = \
value.encode("utf-8")
else:
new_info[new_key][new_subkey] = value
else:
new_info[new_key] = info[key]
info = new_info
if info:
env.setdefault('swift.infocache', {})[cache_key] = info
return info
return None
new_info[new_key] = info[key]
info = new_info
if info:
env.setdefault('swift.infocache', {})[cache_key] = info
return info, cache_state
def _get_info_from_caches(app, env, account, container=None):
@ -845,13 +886,16 @@ def _get_info_from_caches(app, env, account, container=None):
:param app: the application object
:param env: the environment used by the current request
:returns: the cached info or None if not cached
:returns: a tuple of (the cached info or None if not cached, cache state)
"""
info = _get_info_from_infocache(env, account, container)
if info is None:
info = _get_info_from_memcache(app, env, account, container)
return info
if info:
cache_state = 'infocache_hit'
else:
info, cache_state = _get_info_from_memcache(
app, env, account, container)
return info, cache_state
def _prepare_pre_auth_info_request(env, path, swift_source):

View File

@ -28,7 +28,7 @@ from swift.common.http import HTTP_ACCEPTED, is_success
from swift.common.request_helpers import get_sys_meta_prefix, get_param, \
constrain_req_limit, validate_container_params
from swift.proxy.controllers.base import Controller, delay_denial, \
cors_validation, set_info_cache, clear_info_cache, _get_info_from_caches, \
cors_validation, set_info_cache, clear_info_cache, get_container_info, \
record_cache_op_metrics, get_cache_key, headers_from_container_info, \
update_headers
from swift.common.storage_policy import POLICIES
@ -389,9 +389,10 @@ class ContainerController(Controller):
and get_param(req, 'states') == 'listing'
and record_type != 'object'):
may_get_listing_shards = True
info = _get_info_from_caches(self.app, req.environ,
self.account_name,
self.container_name)
# Only lookup container info from cache and skip the backend HEAD,
# since we are going to GET the backend container anyway.
info = get_container_info(
req.environ, self.app, swift_source=None, cache_only=True)
else:
info = None
may_get_listing_shards = False

View File

@ -542,6 +542,50 @@ class TestFuncs(BaseTest):
[(k, str, v, str)
for k, v in subdict.items()])
def test_get_container_info_only_lookup_cache(self):
# no container info is cached in cache.
req = Request.blank("/v1/AUTH_account/cont",
environ={'swift.cache': FakeCache({})})
resp = get_container_info(
req.environ, self.app, swift_source=None, cache_only=True)
self.assertEqual(resp['storage_policy'], 0)
self.assertEqual(resp['bytes'], 0)
self.assertEqual(resp['object_count'], 0)
self.assertEqual(resp['versions'], None)
self.assertEqual(
[x[0][0] for x in self.logger.logger.log_dict['increment']],
['container.info.cache.miss'])
# container info is cached in cache.
self.logger.clear()
cache_stub = {
'status': 404, 'bytes': 3333, 'object_count': 10,
'versions': u"\U0001F4A9",
'meta': {u'some-\N{SNOWMAN}': u'non-ascii meta \U0001F334'}}
req = Request.blank("/v1/account/cont",
environ={'swift.cache': FakeCache(cache_stub)})
resp = get_container_info(
req.environ, self.app, swift_source=None, cache_only=True)
self.assertEqual([(k, type(k)) for k in resp],
[(k, str) for k in resp])
self.assertEqual(resp['storage_policy'], 0)
self.assertEqual(resp['bytes'], 3333)
self.assertEqual(resp['object_count'], 10)
self.assertEqual(resp['status'], 404)
expected = u'\U0001F4A9'
if six.PY2:
expected = expected.encode('utf8')
self.assertEqual(resp['versions'], expected)
for subdict in resp.values():
if isinstance(subdict, dict):
self.assertEqual([(k, type(k), v, type(v))
for k, v in subdict.items()],
[(k, str, v, str)
for k, v in subdict.items()])
self.assertEqual(
[x[0][0] for x in self.logger.logger.log_dict['increment']],
['container.info.cache.hit'])
def test_get_cache_key(self):
self.assertEqual(get_cache_key("account", "cont"),
'container/account/cont')

View File

@ -2914,7 +2914,8 @@ class TestContainerController(TestRingBase):
req.environ['swift.infocache'][cache_key])
self.assertEqual(
[x[0][0] for x in self.logger.logger.log_dict['increment']],
['container.shard_listing.infocache.hit'])
['container.info.infocache.hit',
'container.shard_listing.infocache.hit'])
# put this back the way we found it for later subtests
self.app.container_listing_shard_ranges_skip_cache = 0.0

View File

@ -4330,7 +4330,11 @@ class TestReplicatedObjectController(
self.assertEqual(resp.status_int, 202)
stats = self.app.logger.get_increment_counts()
self.assertEqual(
{'object.shard_updating.cache.disabled.200': 1},
{'account.info.cache.disabled.200': 1,
'account.info.infocache.hit': 2,
'container.info.cache.disabled.200': 1,
'container.info.infocache.hit': 1,
'object.shard_updating.cache.disabled.200': 1},
stats)
backend_requests = fake_conn.requests
# verify statsd prefix is not mutated
@ -4422,8 +4426,10 @@ class TestReplicatedObjectController(
self.assertEqual(resp.status_int, 202)
stats = self.app.logger.get_increment_counts()
self.assertEqual({'account.info.cache.miss': 1,
'container.info.cache.miss': 1,
self.assertEqual({'account.info.cache.miss.200': 1,
'account.info.infocache.hit': 2,
'container.info.cache.miss.200': 1,
'container.info.infocache.hit': 1,
'object.shard_updating.cache.miss.200': 1},
stats)
self.assertEqual([], self.app.logger.log_dict['set_statsd_prefix'])
@ -4533,8 +4539,10 @@ class TestReplicatedObjectController(
self.assertEqual(resp.status_int, 202)
stats = self.app.logger.get_increment_counts()
self.assertEqual({'account.info.cache.miss': 1,
'container.info.cache.miss': 1,
self.assertEqual({'account.info.cache.miss.200': 1,
'account.info.infocache.hit': 1,
'container.info.cache.miss.200': 1,
'container.info.infocache.hit': 1,
'object.shard_updating.cache.hit': 1}, stats)
# verify statsd prefix is not mutated
self.assertEqual([], self.app.logger.log_dict['set_statsd_prefix'])
@ -4629,7 +4637,11 @@ class TestReplicatedObjectController(
# verify request hitted infocache.
self.assertEqual(resp.status_int, 202)
stats = self.app.logger.get_increment_counts()
self.assertEqual({'object.shard_updating.infocache.hit': 1}, stats)
self.assertEqual({'account.info.cache.disabled.200': 1,
'account.info.infocache.hit': 1,
'container.info.cache.disabled.200': 1,
'container.info.infocache.hit': 1,
'object.shard_updating.infocache.hit': 1}, stats)
# verify statsd prefix is not mutated
self.assertEqual([], self.app.logger.log_dict['set_statsd_prefix'])
@ -4726,8 +4738,10 @@ class TestReplicatedObjectController(
self.assertEqual(resp.status_int, 202)
stats = self.app.logger.get_increment_counts()
self.assertEqual({'account.info.cache.miss': 1,
'container.info.cache.miss': 1,
self.assertEqual({'account.info.cache.miss.200': 1,
'account.info.infocache.hit': 1,
'container.info.cache.miss.200': 1,
'container.info.infocache.hit': 1,
'object.shard_updating.cache.hit': 1}, stats)
# cached shard ranges are still there
@ -4769,12 +4783,15 @@ class TestReplicatedObjectController(
self.assertEqual(resp.status_int, 202)
stats = self.app.logger.get_increment_counts()
self.assertEqual({'account.info.cache.miss': 1,
'account.info.cache.hit': 1,
'container.info.cache.miss': 1,
self.assertEqual({'account.info.cache.miss.200': 1,
'account.info.infocache.hit': 1,
'container.info.cache.miss.200': 1,
'container.info.infocache.hit': 2,
'object.shard_updating.cache.hit': 1,
'container.info.cache.hit': 1,
'object.shard_updating.cache.skip.200': 1,
'object.shard_updating.cache.hit': 1}, stats)
'account.info.cache.hit': 1,
'object.shard_updating.cache.skip.200': 1},
stats)
# verify statsd prefix is not mutated
self.assertEqual([], self.app.logger.log_dict['set_statsd_prefix'])
@ -4836,9 +4853,11 @@ class TestReplicatedObjectController(
stats = self.app.logger.get_increment_counts()
self.assertEqual(stats, {
'account.info.cache.hit': 2,
'account.info.cache.miss': 1,
'account.info.cache.miss.200': 1,
'account.info.infocache.hit': 1,
'container.info.cache.hit': 2,
'container.info.cache.miss': 1,
'container.info.cache.miss.200': 1,
'container.info.infocache.hit': 3,
'object.shard_updating.cache.skip.200': 1,
'object.shard_updating.cache.hit': 1,
'object.shard_updating.cache.error.200': 1})
@ -4879,7 +4898,11 @@ class TestReplicatedObjectController(
self.assertEqual(resp.status_int, 202)
stats = self.app.logger.get_increment_counts()
self.assertEqual(
{'object.shard_updating.cache.disabled.404': 1},
{'account.info.cache.disabled.200': 1,
'account.info.infocache.hit': 2,
'container.info.cache.disabled.200': 1,
'container.info.infocache.hit': 1,
'object.shard_updating.cache.disabled.404': 1},
stats)
backend_requests = fake_conn.requests