diff --git a/doc/source/admin_guide.rst b/doc/source/admin_guide.rst index d8b1c4898d..e76a957a10 100644 --- a/doc/source/admin_guide.rst +++ b/doc/source/admin_guide.rst @@ -416,7 +416,8 @@ configuration entries (see the sample configuration files):: log_statsd_host = localhost log_statsd_port = 8125 - log_statsd_default_sample_rate = 1 + log_statsd_default_sample_rate = 1.0 + log_statsd_sample_rate_factor = 1.0 log_statsd_metric_prefix = [empty-string] If `log_statsd_host` is not set, this feature is disabled. The default values @@ -431,9 +432,24 @@ probability of sending a sample for any given event or timing measurement. This sample rate is sent with each sample to StatsD and used to multiply the value. For example, with a sample rate of 0.5, StatsD will multiply that counter's value by 2 when flushing the metric to an upstream -monitoring system (Graphite_, Ganglia_, etc.). To get the best data, start -with the default `log_statsd_default_sample_rate` value of 1 and only lower -it as needed. +monitoring system (Graphite_, Ganglia_, etc.). + +Some relatively high-frequency metrics have a default sample rate less than +one. If you want to override the default sample rate for all metrics whose +default sample rate is not specified in the Swift source, you may set +`log_statsd_default_sample_rate` to a value less than one. This is NOT +recommended (see next paragraph). A better way to reduce StatsD load is to +adjust `log_statsd_sample_rate_factor` to a value less than one. The +`log_statsd_sample_rate_factor` is multiplied to any sample rate (either the +global default or one specified by the actual metric logging call in the Swift +source) prior to handling. In other words, this one tunable can lower the +frequency of all StatsD logging by a proportional amount. + +To get the best data, start with the default `log_statsd_default_sample_rate` +and `log_statsd_sample_rate_factor` values of 1 and only lower +`log_statsd_sample_rate_factor` if needed. The +`log_statsd_default_sample_rate` should not be used and remains for backward +compatibility only. The metric prefix will be prepended to every metric sent to the StatsD server For example, with:: diff --git a/etc/account-server.conf-sample b/etc/account-server.conf-sample index fd3b5a3765..50244e5769 100644 --- a/etc/account-server.conf-sample +++ b/etc/account-server.conf-sample @@ -24,7 +24,8 @@ # You can enable StatsD logging here: # log_statsd_host = localhost # log_statsd_port = 8125 -# log_statsd_default_sample_rate = 1 +# log_statsd_default_sample_rate = 1.0 +# log_statsd_sample_rate_factor = 1.0 # log_statsd_metric_prefix = # If you don't mind the extra disk space usage in overhead, you can turn this # on to preallocate disk space with SQLite databases to decrease fragmentation. diff --git a/etc/container-server.conf-sample b/etc/container-server.conf-sample index 384aa72481..7721b69be9 100644 --- a/etc/container-server.conf-sample +++ b/etc/container-server.conf-sample @@ -27,7 +27,8 @@ # You can enable StatsD logging here: # log_statsd_host = localhost # log_statsd_port = 8125 -# log_statsd_default_sample_rate = 1 +# log_statsd_default_sample_rate = 1.0 +# log_statsd_sample_rate_factor = 1.0 # log_statsd_metric_prefix = # If you don't mind the extra disk space usage in overhead, you can turn this # on to preallocate disk space with SQLite databases to decrease fragmentation. diff --git a/etc/object-expirer.conf-sample b/etc/object-expirer.conf-sample index cef0f0f19d..f6062586a9 100644 --- a/etc/object-expirer.conf-sample +++ b/etc/object-expirer.conf-sample @@ -16,7 +16,8 @@ # You can enable StatsD logging here: # log_statsd_host = localhost # log_statsd_port = 8125 -# log_statsd_default_sample_rate = 1 +# log_statsd_default_sample_rate = 1.0 +# log_statsd_sample_rate_factor = 1.0 # log_statsd_metric_prefix = [object-expirer] diff --git a/etc/object-server.conf-sample b/etc/object-server.conf-sample index 70bbebcb3a..e4a88c0007 100644 --- a/etc/object-server.conf-sample +++ b/etc/object-server.conf-sample @@ -25,7 +25,8 @@ # You can enable StatsD logging here: # log_statsd_host = localhost # log_statsd_port = 8125 -# log_statsd_default_sample_rate = 1 +# log_statsd_default_sample_rate = 1.0 +# log_statsd_sample_rate_factor = 1.0 # log_statsd_metric_prefix = # eventlet_debug = false # You can set fallocate_reserve to the number of bytes you'd like fallocate to diff --git a/etc/proxy-server.conf-sample b/etc/proxy-server.conf-sample index 5b656dcc10..3f5ddcec09 100644 --- a/etc/proxy-server.conf-sample +++ b/etc/proxy-server.conf-sample @@ -26,7 +26,8 @@ # You can enable StatsD logging here: # log_statsd_host = localhost # log_statsd_port = 8125 -# log_statsd_default_sample_rate = 1 +# log_statsd_default_sample_rate = 1.0 +# log_statsd_sample_rate_factor = 1.0 # log_statsd_metric_prefix = # Use a comma separated list of full url (http://foo.bar:1234,https://foo.bar) # cors_allow_origin = @@ -316,7 +317,8 @@ use = egg:swift#proxy_logging # You can use log_statsd_* from [DEFAULT] or override them here: # access_log_statsd_host = localhost # access_log_statsd_port = 8125 -# access_log_statsd_default_sample_rate = 1 +# access_log_statsd_default_sample_rate = 1.0 +# access_log_statsd_sample_rate_factor = 1.0 # access_log_statsd_metric_prefix = # access_log_headers = False # What HTTP methods are allowed for StatsD logging (comma-sep); request methods diff --git a/swift/account/server.py b/swift/account/server.py index 86800d1fb2..803b90cca8 100644 --- a/swift/account/server.py +++ b/swift/account/server.py @@ -62,7 +62,7 @@ class AccountController(object): return AccountBroker(db_path, account=account, logger=self.logger) @public - @timing_stats + @timing_stats() def DELETE(self, req): """Handle HTTP DELETE request.""" try: @@ -84,7 +84,7 @@ class AccountController(object): return HTTPNoContent(request=req) @public - @timing_stats + @timing_stats() def PUT(self, req): """Handle HTTP PUT request.""" try: @@ -139,7 +139,7 @@ class AccountController(object): return HTTPAccepted(request=req) @public - @timing_stats + @timing_stats() def HEAD(self, req): """Handle HTTP HEAD request.""" # TODO(refactor): The account server used to provide a 'account and @@ -186,7 +186,7 @@ class AccountController(object): return HTTPNoContent(request=req, headers=headers, charset='utf-8') @public - @timing_stats + @timing_stats() def GET(self, req): """Handle HTTP GET request.""" try: @@ -274,7 +274,7 @@ class AccountController(object): return ret @public - @timing_stats + @timing_stats() def REPLICATE(self, req): """ Handle HTTP REPLICATE request. @@ -298,7 +298,7 @@ class AccountController(object): return ret @public - @timing_stats + @timing_stats() def POST(self, req): """Handle HTTP POST request.""" try: diff --git a/swift/common/middleware/proxy_logging.py b/swift/common/middleware/proxy_logging.py index 0407bb1dc0..2139b0ed99 100644 --- a/swift/common/middleware/proxy_logging.py +++ b/swift/common/middleware/proxy_logging.py @@ -90,6 +90,7 @@ class ProxyLoggingMiddleware(object): for key in ('log_facility', 'log_name', 'log_level', 'log_udp_host', 'log_udp_port', 'log_statsd_host', 'log_statsd_port', 'log_statsd_default_sample_rate', + 'log_statsd_sample_rate_factor', 'log_statsd_metric_prefix'): value = conf.get('access_' + key, conf.get(key, None)) if value: diff --git a/swift/common/utils.py b/swift/common/utils.py index 5837649624..cd0b06f5f8 100644 --- a/swift/common/utils.py +++ b/swift/common/utils.py @@ -421,12 +421,13 @@ class LoggerFileObject(object): class StatsdClient(object): def __init__(self, host, port, base_prefix='', tail_prefix='', - default_sample_rate=1): + default_sample_rate=1, sample_rate_factor=1): self._host = host self._port = port self._base_prefix = base_prefix self.set_prefix(tail_prefix) self._default_sample_rate = default_sample_rate + self._sample_rate_factor = sample_rate_factor self._target = (self._host, self._port) self.random = random @@ -443,6 +444,7 @@ class StatsdClient(object): def _send(self, m_name, m_value, m_type, sample_rate): if sample_rate is None: sample_rate = self._default_sample_rate + sample_rate = sample_rate * self._sample_rate_factor parts = ['%s%s:%s' % (self._prefix, m_name, m_value), m_type] if sample_rate < 1: if self.random() < sample_rate: @@ -474,25 +476,30 @@ class StatsdClient(object): sample_rate) -def timing_stats(func): +def timing_stats(**dec_kwargs): """ - Decorator that logs timing events or errors for public methods in swift's - wsgi server controllers, based on response code. + Returns a decorator that logs timing events or errors for public methods in + swift's wsgi server controllers, based on response code. """ - method = func.func_name + def decorating_func(func): + method = func.func_name - @functools.wraps(func) - def _timing_stats(ctrl, *args, **kwargs): - start_time = time.time() - resp = func(ctrl, *args, **kwargs) - if is_success(resp.status_int) or is_redirection(resp.status_int) or \ - resp.status_int == HTTP_NOT_FOUND: - ctrl.logger.timing_since(method + '.timing', start_time) - else: - ctrl.logger.timing_since(method + '.errors.timing', start_time) - return resp + @functools.wraps(func) + def _timing_stats(ctrl, *args, **kwargs): + start_time = time.time() + resp = func(ctrl, *args, **kwargs) + if is_success(resp.status_int) or \ + is_redirection(resp.status_int) or \ + resp.status_int == HTTP_NOT_FOUND: + ctrl.logger.timing_since(method + '.timing', + start_time, **dec_kwargs) + else: + ctrl.logger.timing_since(method + '.errors.timing', + start_time, **dec_kwargs) + return resp - return _timing_stats + return _timing_stats + return decorating_func # double inheritance to support property with setter @@ -600,14 +607,11 @@ class LogAdapter(logging.LoggerAdapter, object): def statsd_delegate(statsd_func_name): """ - Factory which creates methods which delegate to methods on + Factory to create methods which delegate to methods on self.logger.statsd_client (an instance of StatsdClient). The created methods conditionally delegate to a method whose name is given in 'statsd_func_name'. The created delegate methods are a no-op when - StatsD logging is not configured. The created delegate methods also - handle the defaulting of sample_rate (to either the default specified - in the config with 'log_statsd_default_sample_rate' or the value passed - into delegate function). + StatsD logging is not configured. :param statsd_func_name: the name of a method on StatsdClient. """ @@ -665,7 +669,8 @@ def get_logger(conf, name=None, log_to_console=False, log_route=None, log_address = /dev/log log_statsd_host = (disabled) log_statsd_port = 8125 - log_statsd_default_sample_rate = 1 + log_statsd_default_sample_rate = 1.0 + log_statsd_sample_rate_factor = 1.0 log_statsd_metric_prefix = (empty-string) :param conf: Configuration dict to read settings from @@ -697,7 +702,8 @@ def get_logger(conf, name=None, log_to_console=False, log_route=None, SysLogHandler.LOG_LOCAL0) udp_host = conf.get('log_udp_host') if udp_host: - udp_port = conf.get('log_udp_port', logging.handlers.SYSLOG_UDP_PORT) + udp_port = int(conf.get('log_udp_port', + logging.handlers.SYSLOG_UDP_PORT)) handler = SysLogHandler(address=(udp_host, udp_port), facility=facility) else: @@ -737,8 +743,11 @@ def get_logger(conf, name=None, log_to_console=False, log_route=None, base_prefix = conf.get('log_statsd_metric_prefix', '') default_sample_rate = float(conf.get( 'log_statsd_default_sample_rate', 1)) + sample_rate_factor = float(conf.get( + 'log_statsd_sample_rate_factor', 1)) statsd_client = StatsdClient(statsd_host, statsd_port, base_prefix, - name, default_sample_rate) + name, default_sample_rate, + sample_rate_factor) logger.statsd_client = statsd_client else: logger.statsd_client = None diff --git a/swift/container/server.py b/swift/container/server.py index 9cf1d2b392..73746f0075 100644 --- a/swift/container/server.py +++ b/swift/container/server.py @@ -163,7 +163,7 @@ class ContainerController(object): return None @public - @timing_stats + @timing_stats() def DELETE(self, req): """Handle HTTP DELETE request.""" try: @@ -205,7 +205,7 @@ class ContainerController(object): return HTTPNotFound() @public - @timing_stats + @timing_stats() def PUT(self, req): """Handle HTTP PUT request.""" try: @@ -268,7 +268,7 @@ class ContainerController(object): return HTTPAccepted(request=req) @public - @timing_stats + @timing_stats(sample_rate=0.1) def HEAD(self, req): """Handle HTTP HEAD request.""" try: @@ -306,7 +306,7 @@ class ContainerController(object): return HTTPNoContent(request=req, headers=headers, charset='utf-8') @public - @timing_stats + @timing_stats() def GET(self, req): """Handle HTTP GET request.""" try: @@ -413,7 +413,7 @@ class ContainerController(object): return ret @public - @timing_stats + @timing_stats(sample_rate=0.01) def REPLICATE(self, req): """ Handle HTTP REPLICATE request (json-encoded RPC calls for replication.) @@ -436,7 +436,7 @@ class ContainerController(object): return ret @public - @timing_stats + @timing_stats() def POST(self, req): """Handle HTTP POST request.""" try: diff --git a/swift/obj/server.py b/swift/obj/server.py index 5d6f503d49..19e7cbdfdb 100755 --- a/swift/obj/server.py +++ b/swift/obj/server.py @@ -563,7 +563,7 @@ class ObjectController(object): host, partition, contdevice, headers_out, objdevice) @public - @timing_stats + @timing_stats() def POST(self, request): """Handle HTTP POST requests for the Swift Object Server.""" try: @@ -612,7 +612,7 @@ class ObjectController(object): return HTTPAccepted(request=request) @public - @timing_stats + @timing_stats() def PUT(self, request): """Handle HTTP PUT requests for the Swift Object Server.""" try: @@ -710,7 +710,7 @@ class ObjectController(object): return resp @public - @timing_stats + @timing_stats() def GET(self, request): """Handle HTTP GET requests for the Swift Object Server.""" try: @@ -790,7 +790,7 @@ class ObjectController(object): return request.get_response(response) @public - @timing_stats + @timing_stats(sample_rate=0.8) def HEAD(self, request): """Handle HTTP HEAD requests for the Swift Object Server.""" try: @@ -830,7 +830,7 @@ class ObjectController(object): return response @public - @timing_stats + @timing_stats() def DELETE(self, request): """Handle HTTP DELETE requests for the Swift Object Server.""" try: @@ -878,7 +878,7 @@ class ObjectController(object): return resp @public - @timing_stats + @timing_stats(sample_rate=0.1) def REPLICATE(self, request): """ Handle REPLICATE requests for the Swift Object Server. This is used diff --git a/test/unit/common/test_utils.py b/test/unit/common/test_utils.py index 769e1ca6ed..9a268d5218 100644 --- a/test/unit/common/test_utils.py +++ b/test/unit/common/test_utils.py @@ -35,6 +35,7 @@ from shutil import rmtree from StringIO import StringIO from functools import partial from tempfile import TemporaryFile, NamedTemporaryFile +from logging import handlers as logging_handlers from eventlet import sleep @@ -378,6 +379,62 @@ class TestUtils(unittest.TestCase): self.assertEquals(sio.getvalue(), 'test1\ntest3\ntest4\ntest6\n') + def test_get_logger_sysloghandler_plumbing(self): + orig_sysloghandler = utils.SysLogHandler + syslog_handler_args = [] + def syslog_handler_catcher(*args, **kwargs): + syslog_handler_args.append((args, kwargs)) + return orig_sysloghandler(*args, **kwargs) + syslog_handler_catcher.LOG_LOCAL0 = orig_sysloghandler.LOG_LOCAL0 + syslog_handler_catcher.LOG_LOCAL3 = orig_sysloghandler.LOG_LOCAL3 + + try: + utils.SysLogHandler = syslog_handler_catcher + logger = utils.get_logger({ + 'log_facility': 'LOG_LOCAL3', + }, 'server', log_route='server') + self.assertEquals([ + ((), {'address': '/dev/log', + 'facility': orig_sysloghandler.LOG_LOCAL3})], + syslog_handler_args) + + syslog_handler_args = [] + logger = utils.get_logger({ + 'log_facility': 'LOG_LOCAL3', + 'log_address': '/foo/bar', + }, 'server', log_route='server') + self.assertEquals([ + ((), {'address': '/foo/bar', + 'facility': orig_sysloghandler.LOG_LOCAL3}), + # Second call is because /foo/bar didn't exist (and wasn't a + # UNIX domain socket). + ((), {'facility': orig_sysloghandler.LOG_LOCAL3})], + syslog_handler_args) + + # Using UDP with default port + syslog_handler_args = [] + logger = utils.get_logger({ + 'log_udp_host': 'syslog.funtimes.com', + }, 'server', log_route='server') + self.assertEquals([ + ((), {'address': ('syslog.funtimes.com', + logging.handlers.SYSLOG_UDP_PORT), + 'facility': orig_sysloghandler.LOG_LOCAL0})], + syslog_handler_args) + + # Using UDP with non-default port + syslog_handler_args = [] + logger = utils.get_logger({ + 'log_udp_host': 'syslog.funtimes.com', + 'log_udp_port': '2123', + }, 'server', log_route='server') + self.assertEquals([ + ((), {'address': ('syslog.funtimes.com', 2123), + 'facility': orig_sysloghandler.LOG_LOCAL0})], + syslog_handler_args) + finally: + utils.SysLogHandler = orig_sysloghandler + def test_clean_logger_exception(self): # setup stream logging sio = StringIO() @@ -1101,8 +1158,9 @@ class TestStatsdLogging(unittest.TestCase): def test_get_logger_statsd_client_non_defaults(self): logger = utils.get_logger({ 'log_statsd_host': 'another.host.com', - 'log_statsd_port': 9876, - 'log_statsd_default_sample_rate': 0.75, + 'log_statsd_port': '9876', + 'log_statsd_default_sample_rate': '0.75', + 'log_statsd_sample_rate_factor': '0.81', 'log_statsd_metric_prefix': 'tomato.sauce', }, 'some-name', log_route='some-route') self.assertEqual(logger.logger.statsd_client._prefix, @@ -1116,6 +1174,8 @@ class TestStatsdLogging(unittest.TestCase): self.assertEqual(logger.logger.statsd_client._port, 9876) self.assertEqual(logger.logger.statsd_client._default_sample_rate, 0.75) + self.assertEqual(logger.logger.statsd_client._sample_rate_factor, + 0.81) def test_sample_rates(self): logger = utils.get_logger({'log_statsd_host': 'some.host.com'}) @@ -1138,6 +1198,42 @@ class TestStatsdLogging(unittest.TestCase): payload = mock_socket.sent[0][0] self.assertTrue(payload.endswith("|@0.5")) + def test_sample_rates_with_sample_rate_factor(self): + logger = utils.get_logger({ + 'log_statsd_host': 'some.host.com', + 'log_statsd_default_sample_rate': '0.82', + 'log_statsd_sample_rate_factor': '0.91', + }) + effective_sample_rate = 0.82 * 0.91 + + mock_socket = MockUdpSocket() + # encapsulation? what's that? + statsd_client = logger.logger.statsd_client + self.assertTrue(statsd_client.random is random.random) + + statsd_client._open_socket = lambda *_: mock_socket + statsd_client.random = lambda: effective_sample_rate + 0.001 + + logger.increment('tribbles') + self.assertEqual(len(mock_socket.sent), 0) + + statsd_client.random = lambda: effective_sample_rate - 0.001 + logger.increment('tribbles') + self.assertEqual(len(mock_socket.sent), 1) + + payload = mock_socket.sent[0][0] + self.assertTrue(payload.endswith("|@%s" % effective_sample_rate), + payload) + + effective_sample_rate = 0.587 * 0.91 + statsd_client.random = lambda: effective_sample_rate - 0.001 + logger.increment('tribbles', sample_rate=0.587) + self.assertEqual(len(mock_socket.sent), 2) + + payload = mock_socket.sent[1][0] + self.assertTrue(payload.endswith("|@%s" % effective_sample_rate), + payload) + def test_timing_stats(self): class MockController(object): def __init__(self, status): @@ -1150,7 +1246,7 @@ class TestStatsdLogging(unittest.TestCase): self.called = 'timing' self.args = args - @utils.timing_stats + @utils.timing_stats() def METHOD(controller): return Response(status=controller.status)