Merge "Make statsd sample rate behave better."
This commit is contained in:
commit
23f33b2069
@ -416,7 +416,8 @@ configuration entries (see the sample configuration files)::
|
||||
|
||||
log_statsd_host = localhost
|
||||
log_statsd_port = 8125
|
||||
log_statsd_default_sample_rate = 1
|
||||
log_statsd_default_sample_rate = 1.0
|
||||
log_statsd_sample_rate_factor = 1.0
|
||||
log_statsd_metric_prefix = [empty-string]
|
||||
|
||||
If `log_statsd_host` is not set, this feature is disabled. The default values
|
||||
@ -431,9 +432,24 @@ probability of sending a sample for any given event or timing measurement.
|
||||
This sample rate is sent with each sample to StatsD and used to
|
||||
multiply the value. For example, with a sample rate of 0.5, StatsD will
|
||||
multiply that counter's value by 2 when flushing the metric to an upstream
|
||||
monitoring system (Graphite_, Ganglia_, etc.). To get the best data, start
|
||||
with the default `log_statsd_default_sample_rate` value of 1 and only lower
|
||||
it as needed.
|
||||
monitoring system (Graphite_, Ganglia_, etc.).
|
||||
|
||||
Some relatively high-frequency metrics have a default sample rate less than
|
||||
one. If you want to override the default sample rate for all metrics whose
|
||||
default sample rate is not specified in the Swift source, you may set
|
||||
`log_statsd_default_sample_rate` to a value less than one. This is NOT
|
||||
recommended (see next paragraph). A better way to reduce StatsD load is to
|
||||
adjust `log_statsd_sample_rate_factor` to a value less than one. The
|
||||
`log_statsd_sample_rate_factor` is multiplied to any sample rate (either the
|
||||
global default or one specified by the actual metric logging call in the Swift
|
||||
source) prior to handling. In other words, this one tunable can lower the
|
||||
frequency of all StatsD logging by a proportional amount.
|
||||
|
||||
To get the best data, start with the default `log_statsd_default_sample_rate`
|
||||
and `log_statsd_sample_rate_factor` values of 1 and only lower
|
||||
`log_statsd_sample_rate_factor` if needed. The
|
||||
`log_statsd_default_sample_rate` should not be used and remains for backward
|
||||
compatibility only.
|
||||
|
||||
The metric prefix will be prepended to every metric sent to the StatsD server
|
||||
For example, with::
|
||||
|
@ -24,7 +24,8 @@
|
||||
# You can enable StatsD logging here:
|
||||
# log_statsd_host = localhost
|
||||
# log_statsd_port = 8125
|
||||
# log_statsd_default_sample_rate = 1
|
||||
# log_statsd_default_sample_rate = 1.0
|
||||
# log_statsd_sample_rate_factor = 1.0
|
||||
# log_statsd_metric_prefix =
|
||||
# If you don't mind the extra disk space usage in overhead, you can turn this
|
||||
# on to preallocate disk space with SQLite databases to decrease fragmentation.
|
||||
|
@ -27,7 +27,8 @@
|
||||
# You can enable StatsD logging here:
|
||||
# log_statsd_host = localhost
|
||||
# log_statsd_port = 8125
|
||||
# log_statsd_default_sample_rate = 1
|
||||
# log_statsd_default_sample_rate = 1.0
|
||||
# log_statsd_sample_rate_factor = 1.0
|
||||
# log_statsd_metric_prefix =
|
||||
# If you don't mind the extra disk space usage in overhead, you can turn this
|
||||
# on to preallocate disk space with SQLite databases to decrease fragmentation.
|
||||
|
@ -16,7 +16,8 @@
|
||||
# You can enable StatsD logging here:
|
||||
# log_statsd_host = localhost
|
||||
# log_statsd_port = 8125
|
||||
# log_statsd_default_sample_rate = 1
|
||||
# log_statsd_default_sample_rate = 1.0
|
||||
# log_statsd_sample_rate_factor = 1.0
|
||||
# log_statsd_metric_prefix =
|
||||
|
||||
[object-expirer]
|
||||
|
@ -25,7 +25,8 @@
|
||||
# You can enable StatsD logging here:
|
||||
# log_statsd_host = localhost
|
||||
# log_statsd_port = 8125
|
||||
# log_statsd_default_sample_rate = 1
|
||||
# log_statsd_default_sample_rate = 1.0
|
||||
# log_statsd_sample_rate_factor = 1.0
|
||||
# log_statsd_metric_prefix =
|
||||
# eventlet_debug = false
|
||||
# You can set fallocate_reserve to the number of bytes you'd like fallocate to
|
||||
|
@ -26,7 +26,8 @@
|
||||
# You can enable StatsD logging here:
|
||||
# log_statsd_host = localhost
|
||||
# log_statsd_port = 8125
|
||||
# log_statsd_default_sample_rate = 1
|
||||
# log_statsd_default_sample_rate = 1.0
|
||||
# log_statsd_sample_rate_factor = 1.0
|
||||
# log_statsd_metric_prefix =
|
||||
# Use a comma separated list of full url (http://foo.bar:1234,https://foo.bar)
|
||||
# cors_allow_origin =
|
||||
@ -316,7 +317,8 @@ use = egg:swift#proxy_logging
|
||||
# You can use log_statsd_* from [DEFAULT] or override them here:
|
||||
# access_log_statsd_host = localhost
|
||||
# access_log_statsd_port = 8125
|
||||
# access_log_statsd_default_sample_rate = 1
|
||||
# access_log_statsd_default_sample_rate = 1.0
|
||||
# access_log_statsd_sample_rate_factor = 1.0
|
||||
# access_log_statsd_metric_prefix =
|
||||
# access_log_headers = False
|
||||
# What HTTP methods are allowed for StatsD logging (comma-sep); request methods
|
||||
|
@ -62,7 +62,7 @@ class AccountController(object):
|
||||
return AccountBroker(db_path, account=account, logger=self.logger)
|
||||
|
||||
@public
|
||||
@timing_stats
|
||||
@timing_stats()
|
||||
def DELETE(self, req):
|
||||
"""Handle HTTP DELETE request."""
|
||||
try:
|
||||
@ -84,7 +84,7 @@ class AccountController(object):
|
||||
return HTTPNoContent(request=req)
|
||||
|
||||
@public
|
||||
@timing_stats
|
||||
@timing_stats()
|
||||
def PUT(self, req):
|
||||
"""Handle HTTP PUT request."""
|
||||
try:
|
||||
@ -139,7 +139,7 @@ class AccountController(object):
|
||||
return HTTPAccepted(request=req)
|
||||
|
||||
@public
|
||||
@timing_stats
|
||||
@timing_stats()
|
||||
def HEAD(self, req):
|
||||
"""Handle HTTP HEAD request."""
|
||||
# TODO(refactor): The account server used to provide a 'account and
|
||||
@ -186,7 +186,7 @@ class AccountController(object):
|
||||
return HTTPNoContent(request=req, headers=headers, charset='utf-8')
|
||||
|
||||
@public
|
||||
@timing_stats
|
||||
@timing_stats()
|
||||
def GET(self, req):
|
||||
"""Handle HTTP GET request."""
|
||||
try:
|
||||
@ -274,7 +274,7 @@ class AccountController(object):
|
||||
return ret
|
||||
|
||||
@public
|
||||
@timing_stats
|
||||
@timing_stats()
|
||||
def REPLICATE(self, req):
|
||||
"""
|
||||
Handle HTTP REPLICATE request.
|
||||
@ -298,7 +298,7 @@ class AccountController(object):
|
||||
return ret
|
||||
|
||||
@public
|
||||
@timing_stats
|
||||
@timing_stats()
|
||||
def POST(self, req):
|
||||
"""Handle HTTP POST request."""
|
||||
try:
|
||||
|
@ -90,6 +90,7 @@ class ProxyLoggingMiddleware(object):
|
||||
for key in ('log_facility', 'log_name', 'log_level', 'log_udp_host',
|
||||
'log_udp_port', 'log_statsd_host', 'log_statsd_port',
|
||||
'log_statsd_default_sample_rate',
|
||||
'log_statsd_sample_rate_factor',
|
||||
'log_statsd_metric_prefix'):
|
||||
value = conf.get('access_' + key, conf.get(key, None))
|
||||
if value:
|
||||
|
@ -421,12 +421,13 @@ class LoggerFileObject(object):
|
||||
|
||||
class StatsdClient(object):
|
||||
def __init__(self, host, port, base_prefix='', tail_prefix='',
|
||||
default_sample_rate=1):
|
||||
default_sample_rate=1, sample_rate_factor=1):
|
||||
self._host = host
|
||||
self._port = port
|
||||
self._base_prefix = base_prefix
|
||||
self.set_prefix(tail_prefix)
|
||||
self._default_sample_rate = default_sample_rate
|
||||
self._sample_rate_factor = sample_rate_factor
|
||||
self._target = (self._host, self._port)
|
||||
self.random = random
|
||||
|
||||
@ -443,6 +444,7 @@ class StatsdClient(object):
|
||||
def _send(self, m_name, m_value, m_type, sample_rate):
|
||||
if sample_rate is None:
|
||||
sample_rate = self._default_sample_rate
|
||||
sample_rate = sample_rate * self._sample_rate_factor
|
||||
parts = ['%s%s:%s' % (self._prefix, m_name, m_value), m_type]
|
||||
if sample_rate < 1:
|
||||
if self.random() < sample_rate:
|
||||
@ -474,25 +476,30 @@ class StatsdClient(object):
|
||||
sample_rate)
|
||||
|
||||
|
||||
def timing_stats(func):
|
||||
def timing_stats(**dec_kwargs):
|
||||
"""
|
||||
Decorator that logs timing events or errors for public methods in swift's
|
||||
wsgi server controllers, based on response code.
|
||||
Returns a decorator that logs timing events or errors for public methods in
|
||||
swift's wsgi server controllers, based on response code.
|
||||
"""
|
||||
def decorating_func(func):
|
||||
method = func.func_name
|
||||
|
||||
@functools.wraps(func)
|
||||
def _timing_stats(ctrl, *args, **kwargs):
|
||||
start_time = time.time()
|
||||
resp = func(ctrl, *args, **kwargs)
|
||||
if is_success(resp.status_int) or is_redirection(resp.status_int) or \
|
||||
if is_success(resp.status_int) or \
|
||||
is_redirection(resp.status_int) or \
|
||||
resp.status_int == HTTP_NOT_FOUND:
|
||||
ctrl.logger.timing_since(method + '.timing', start_time)
|
||||
ctrl.logger.timing_since(method + '.timing',
|
||||
start_time, **dec_kwargs)
|
||||
else:
|
||||
ctrl.logger.timing_since(method + '.errors.timing', start_time)
|
||||
ctrl.logger.timing_since(method + '.errors.timing',
|
||||
start_time, **dec_kwargs)
|
||||
return resp
|
||||
|
||||
return _timing_stats
|
||||
return decorating_func
|
||||
|
||||
|
||||
# double inheritance to support property with setter
|
||||
@ -600,14 +607,11 @@ class LogAdapter(logging.LoggerAdapter, object):
|
||||
|
||||
def statsd_delegate(statsd_func_name):
|
||||
"""
|
||||
Factory which creates methods which delegate to methods on
|
||||
Factory to create methods which delegate to methods on
|
||||
self.logger.statsd_client (an instance of StatsdClient). The
|
||||
created methods conditionally delegate to a method whose name is given
|
||||
in 'statsd_func_name'. The created delegate methods are a no-op when
|
||||
StatsD logging is not configured. The created delegate methods also
|
||||
handle the defaulting of sample_rate (to either the default specified
|
||||
in the config with 'log_statsd_default_sample_rate' or the value passed
|
||||
into delegate function).
|
||||
StatsD logging is not configured.
|
||||
|
||||
:param statsd_func_name: the name of a method on StatsdClient.
|
||||
"""
|
||||
@ -665,7 +669,8 @@ def get_logger(conf, name=None, log_to_console=False, log_route=None,
|
||||
log_address = /dev/log
|
||||
log_statsd_host = (disabled)
|
||||
log_statsd_port = 8125
|
||||
log_statsd_default_sample_rate = 1
|
||||
log_statsd_default_sample_rate = 1.0
|
||||
log_statsd_sample_rate_factor = 1.0
|
||||
log_statsd_metric_prefix = (empty-string)
|
||||
|
||||
:param conf: Configuration dict to read settings from
|
||||
@ -697,7 +702,8 @@ def get_logger(conf, name=None, log_to_console=False, log_route=None,
|
||||
SysLogHandler.LOG_LOCAL0)
|
||||
udp_host = conf.get('log_udp_host')
|
||||
if udp_host:
|
||||
udp_port = conf.get('log_udp_port', logging.handlers.SYSLOG_UDP_PORT)
|
||||
udp_port = int(conf.get('log_udp_port',
|
||||
logging.handlers.SYSLOG_UDP_PORT))
|
||||
handler = SysLogHandler(address=(udp_host, udp_port),
|
||||
facility=facility)
|
||||
else:
|
||||
@ -737,8 +743,11 @@ def get_logger(conf, name=None, log_to_console=False, log_route=None,
|
||||
base_prefix = conf.get('log_statsd_metric_prefix', '')
|
||||
default_sample_rate = float(conf.get(
|
||||
'log_statsd_default_sample_rate', 1))
|
||||
sample_rate_factor = float(conf.get(
|
||||
'log_statsd_sample_rate_factor', 1))
|
||||
statsd_client = StatsdClient(statsd_host, statsd_port, base_prefix,
|
||||
name, default_sample_rate)
|
||||
name, default_sample_rate,
|
||||
sample_rate_factor)
|
||||
logger.statsd_client = statsd_client
|
||||
else:
|
||||
logger.statsd_client = None
|
||||
|
@ -163,7 +163,7 @@ class ContainerController(object):
|
||||
return None
|
||||
|
||||
@public
|
||||
@timing_stats
|
||||
@timing_stats()
|
||||
def DELETE(self, req):
|
||||
"""Handle HTTP DELETE request."""
|
||||
try:
|
||||
@ -205,7 +205,7 @@ class ContainerController(object):
|
||||
return HTTPNotFound()
|
||||
|
||||
@public
|
||||
@timing_stats
|
||||
@timing_stats()
|
||||
def PUT(self, req):
|
||||
"""Handle HTTP PUT request."""
|
||||
try:
|
||||
@ -268,7 +268,7 @@ class ContainerController(object):
|
||||
return HTTPAccepted(request=req)
|
||||
|
||||
@public
|
||||
@timing_stats
|
||||
@timing_stats(sample_rate=0.1)
|
||||
def HEAD(self, req):
|
||||
"""Handle HTTP HEAD request."""
|
||||
try:
|
||||
@ -306,7 +306,7 @@ class ContainerController(object):
|
||||
return HTTPNoContent(request=req, headers=headers, charset='utf-8')
|
||||
|
||||
@public
|
||||
@timing_stats
|
||||
@timing_stats()
|
||||
def GET(self, req):
|
||||
"""Handle HTTP GET request."""
|
||||
try:
|
||||
@ -413,7 +413,7 @@ class ContainerController(object):
|
||||
return ret
|
||||
|
||||
@public
|
||||
@timing_stats
|
||||
@timing_stats(sample_rate=0.01)
|
||||
def REPLICATE(self, req):
|
||||
"""
|
||||
Handle HTTP REPLICATE request (json-encoded RPC calls for replication.)
|
||||
@ -436,7 +436,7 @@ class ContainerController(object):
|
||||
return ret
|
||||
|
||||
@public
|
||||
@timing_stats
|
||||
@timing_stats()
|
||||
def POST(self, req):
|
||||
"""Handle HTTP POST request."""
|
||||
try:
|
||||
|
@ -563,7 +563,7 @@ class ObjectController(object):
|
||||
host, partition, contdevice, headers_out, objdevice)
|
||||
|
||||
@public
|
||||
@timing_stats
|
||||
@timing_stats()
|
||||
def POST(self, request):
|
||||
"""Handle HTTP POST requests for the Swift Object Server."""
|
||||
try:
|
||||
@ -612,7 +612,7 @@ class ObjectController(object):
|
||||
return HTTPAccepted(request=request)
|
||||
|
||||
@public
|
||||
@timing_stats
|
||||
@timing_stats()
|
||||
def PUT(self, request):
|
||||
"""Handle HTTP PUT requests for the Swift Object Server."""
|
||||
try:
|
||||
@ -710,7 +710,7 @@ class ObjectController(object):
|
||||
return resp
|
||||
|
||||
@public
|
||||
@timing_stats
|
||||
@timing_stats()
|
||||
def GET(self, request):
|
||||
"""Handle HTTP GET requests for the Swift Object Server."""
|
||||
try:
|
||||
@ -790,7 +790,7 @@ class ObjectController(object):
|
||||
return request.get_response(response)
|
||||
|
||||
@public
|
||||
@timing_stats
|
||||
@timing_stats(sample_rate=0.8)
|
||||
def HEAD(self, request):
|
||||
"""Handle HTTP HEAD requests for the Swift Object Server."""
|
||||
try:
|
||||
@ -830,7 +830,7 @@ class ObjectController(object):
|
||||
return response
|
||||
|
||||
@public
|
||||
@timing_stats
|
||||
@timing_stats()
|
||||
def DELETE(self, request):
|
||||
"""Handle HTTP DELETE requests for the Swift Object Server."""
|
||||
try:
|
||||
@ -878,7 +878,7 @@ class ObjectController(object):
|
||||
return resp
|
||||
|
||||
@public
|
||||
@timing_stats
|
||||
@timing_stats(sample_rate=0.1)
|
||||
def REPLICATE(self, request):
|
||||
"""
|
||||
Handle REPLICATE requests for the Swift Object Server. This is used
|
||||
|
@ -35,6 +35,7 @@ from shutil import rmtree
|
||||
from StringIO import StringIO
|
||||
from functools import partial
|
||||
from tempfile import TemporaryFile, NamedTemporaryFile
|
||||
from logging import handlers as logging_handlers
|
||||
|
||||
from eventlet import sleep
|
||||
|
||||
@ -378,6 +379,62 @@ class TestUtils(unittest.TestCase):
|
||||
self.assertEquals(sio.getvalue(),
|
||||
'test1\ntest3\ntest4\ntest6\n')
|
||||
|
||||
def test_get_logger_sysloghandler_plumbing(self):
|
||||
orig_sysloghandler = utils.SysLogHandler
|
||||
syslog_handler_args = []
|
||||
def syslog_handler_catcher(*args, **kwargs):
|
||||
syslog_handler_args.append((args, kwargs))
|
||||
return orig_sysloghandler(*args, **kwargs)
|
||||
syslog_handler_catcher.LOG_LOCAL0 = orig_sysloghandler.LOG_LOCAL0
|
||||
syslog_handler_catcher.LOG_LOCAL3 = orig_sysloghandler.LOG_LOCAL3
|
||||
|
||||
try:
|
||||
utils.SysLogHandler = syslog_handler_catcher
|
||||
logger = utils.get_logger({
|
||||
'log_facility': 'LOG_LOCAL3',
|
||||
}, 'server', log_route='server')
|
||||
self.assertEquals([
|
||||
((), {'address': '/dev/log',
|
||||
'facility': orig_sysloghandler.LOG_LOCAL3})],
|
||||
syslog_handler_args)
|
||||
|
||||
syslog_handler_args = []
|
||||
logger = utils.get_logger({
|
||||
'log_facility': 'LOG_LOCAL3',
|
||||
'log_address': '/foo/bar',
|
||||
}, 'server', log_route='server')
|
||||
self.assertEquals([
|
||||
((), {'address': '/foo/bar',
|
||||
'facility': orig_sysloghandler.LOG_LOCAL3}),
|
||||
# Second call is because /foo/bar didn't exist (and wasn't a
|
||||
# UNIX domain socket).
|
||||
((), {'facility': orig_sysloghandler.LOG_LOCAL3})],
|
||||
syslog_handler_args)
|
||||
|
||||
# Using UDP with default port
|
||||
syslog_handler_args = []
|
||||
logger = utils.get_logger({
|
||||
'log_udp_host': 'syslog.funtimes.com',
|
||||
}, 'server', log_route='server')
|
||||
self.assertEquals([
|
||||
((), {'address': ('syslog.funtimes.com',
|
||||
logging.handlers.SYSLOG_UDP_PORT),
|
||||
'facility': orig_sysloghandler.LOG_LOCAL0})],
|
||||
syslog_handler_args)
|
||||
|
||||
# Using UDP with non-default port
|
||||
syslog_handler_args = []
|
||||
logger = utils.get_logger({
|
||||
'log_udp_host': 'syslog.funtimes.com',
|
||||
'log_udp_port': '2123',
|
||||
}, 'server', log_route='server')
|
||||
self.assertEquals([
|
||||
((), {'address': ('syslog.funtimes.com', 2123),
|
||||
'facility': orig_sysloghandler.LOG_LOCAL0})],
|
||||
syslog_handler_args)
|
||||
finally:
|
||||
utils.SysLogHandler = orig_sysloghandler
|
||||
|
||||
def test_clean_logger_exception(self):
|
||||
# setup stream logging
|
||||
sio = StringIO()
|
||||
@ -1101,8 +1158,9 @@ class TestStatsdLogging(unittest.TestCase):
|
||||
def test_get_logger_statsd_client_non_defaults(self):
|
||||
logger = utils.get_logger({
|
||||
'log_statsd_host': 'another.host.com',
|
||||
'log_statsd_port': 9876,
|
||||
'log_statsd_default_sample_rate': 0.75,
|
||||
'log_statsd_port': '9876',
|
||||
'log_statsd_default_sample_rate': '0.75',
|
||||
'log_statsd_sample_rate_factor': '0.81',
|
||||
'log_statsd_metric_prefix': 'tomato.sauce',
|
||||
}, 'some-name', log_route='some-route')
|
||||
self.assertEqual(logger.logger.statsd_client._prefix,
|
||||
@ -1116,6 +1174,8 @@ class TestStatsdLogging(unittest.TestCase):
|
||||
self.assertEqual(logger.logger.statsd_client._port, 9876)
|
||||
self.assertEqual(logger.logger.statsd_client._default_sample_rate,
|
||||
0.75)
|
||||
self.assertEqual(logger.logger.statsd_client._sample_rate_factor,
|
||||
0.81)
|
||||
|
||||
def test_sample_rates(self):
|
||||
logger = utils.get_logger({'log_statsd_host': 'some.host.com'})
|
||||
@ -1138,6 +1198,42 @@ class TestStatsdLogging(unittest.TestCase):
|
||||
payload = mock_socket.sent[0][0]
|
||||
self.assertTrue(payload.endswith("|@0.5"))
|
||||
|
||||
def test_sample_rates_with_sample_rate_factor(self):
|
||||
logger = utils.get_logger({
|
||||
'log_statsd_host': 'some.host.com',
|
||||
'log_statsd_default_sample_rate': '0.82',
|
||||
'log_statsd_sample_rate_factor': '0.91',
|
||||
})
|
||||
effective_sample_rate = 0.82 * 0.91
|
||||
|
||||
mock_socket = MockUdpSocket()
|
||||
# encapsulation? what's that?
|
||||
statsd_client = logger.logger.statsd_client
|
||||
self.assertTrue(statsd_client.random is random.random)
|
||||
|
||||
statsd_client._open_socket = lambda *_: mock_socket
|
||||
statsd_client.random = lambda: effective_sample_rate + 0.001
|
||||
|
||||
logger.increment('tribbles')
|
||||
self.assertEqual(len(mock_socket.sent), 0)
|
||||
|
||||
statsd_client.random = lambda: effective_sample_rate - 0.001
|
||||
logger.increment('tribbles')
|
||||
self.assertEqual(len(mock_socket.sent), 1)
|
||||
|
||||
payload = mock_socket.sent[0][0]
|
||||
self.assertTrue(payload.endswith("|@%s" % effective_sample_rate),
|
||||
payload)
|
||||
|
||||
effective_sample_rate = 0.587 * 0.91
|
||||
statsd_client.random = lambda: effective_sample_rate - 0.001
|
||||
logger.increment('tribbles', sample_rate=0.587)
|
||||
self.assertEqual(len(mock_socket.sent), 2)
|
||||
|
||||
payload = mock_socket.sent[1][0]
|
||||
self.assertTrue(payload.endswith("|@%s" % effective_sample_rate),
|
||||
payload)
|
||||
|
||||
def test_timing_stats(self):
|
||||
class MockController(object):
|
||||
def __init__(self, status):
|
||||
@ -1150,7 +1246,7 @@ class TestStatsdLogging(unittest.TestCase):
|
||||
self.called = 'timing'
|
||||
self.args = args
|
||||
|
||||
@utils.timing_stats
|
||||
@utils.timing_stats()
|
||||
def METHOD(controller):
|
||||
return Response(status=controller.status)
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user