diff --git a/doc/source/middleware.rst b/doc/source/middleware.rst index ef282bfa35..ee2e3dc674 100644 --- a/doc/source/middleware.rst +++ b/doc/source/middleware.rst @@ -102,6 +102,13 @@ AWS S3 Api :members: :show-inheritance: +Backend Ratelimit +================= + +.. automodule:: swift.common.middleware.backend_ratelimit + :members: + :show-inheritance: + .. _bulk: Bulk Operations (Delete and Archive Auto Extraction) diff --git a/etc/account-server.conf-sample b/etc/account-server.conf-sample index ef274cbfd3..ade806819f 100644 --- a/etc/account-server.conf-sample +++ b/etc/account-server.conf-sample @@ -80,7 +80,7 @@ bind_port = 6202 # ionice_priority = [pipeline:main] -pipeline = healthcheck recon account-server +pipeline = healthcheck recon backend_ratelimit account-server [app:account-server] use = egg:swift#account @@ -128,6 +128,18 @@ use = egg:swift#healthcheck use = egg:swift#recon # recon_cache_path = /var/cache/swift +[filter:backend_ratelimit] +use = egg:swift#backend_ratelimit +# Set the maximum rate of requests per second per device per worker. Beyond +# this rate the server will return 529 responses and emit a 'backend.ratelimit' +# statsd metric without logging. The default value of zero causes no +# rate-limiting to be applied. +# requests_per_device_per_second = 0.0 +# +# Set the number of seconds of unused rate-limiting allowance that can +# accumulate and be used to allow a subsequent burst of requests. +# requests_per_device_rate_buffer = 1.0 + [account-replicator] # You can override the default log routing for this app here (don't use set!): # log_name = account-replicator diff --git a/etc/container-server.conf-sample b/etc/container-server.conf-sample index 93887f60a5..7184c1fb26 100644 --- a/etc/container-server.conf-sample +++ b/etc/container-server.conf-sample @@ -86,7 +86,7 @@ bind_port = 6201 # ionice_priority = [pipeline:main] -pipeline = healthcheck recon container-server +pipeline = healthcheck recon backend_ratelimit container-server [app:container-server] use = egg:swift#container @@ -138,6 +138,18 @@ use = egg:swift#healthcheck use = egg:swift#recon #recon_cache_path = /var/cache/swift +[filter:backend_ratelimit] +use = egg:swift#backend_ratelimit +# Set the maximum rate of requests per second per device per worker. Beyond +# this rate the server will return 529 responses and emit a 'backend.ratelimit' +# statsd metric without logging. The default value of zero causes no +# rate-limiting to be applied. +# requests_per_device_per_second = 0.0 +# +# Set the number of seconds of unused rate-limiting allowance that can +# accumulate and be used to allow a subsequent burst of requests. +# requests_per_device_rate_buffer = 1.0 + [container-replicator] # You can override the default log routing for this app here (don't use set!): # log_name = container-replicator diff --git a/etc/object-server.conf-sample b/etc/object-server.conf-sample index ee8e581042..fd516f7807 100644 --- a/etc/object-server.conf-sample +++ b/etc/object-server.conf-sample @@ -117,7 +117,7 @@ bind_port = 6200 # ionice_priority = [pipeline:main] -pipeline = healthcheck recon object-server +pipeline = healthcheck recon backend_ratelimit object-server [app:object-server] use = egg:swift#object @@ -237,6 +237,18 @@ use = egg:swift#recon #recon_cache_path = /var/cache/swift #recon_lock_path = /var/lock +[filter:backend_ratelimit] +use = egg:swift#backend_ratelimit +# Set the maximum rate of requests per second per device per worker. Beyond +# this rate the server will return 529 responses and emit a 'backend.ratelimit' +# statsd metric without logging. The default value of zero causes no +# rate-limiting to be applied. +# requests_per_device_per_second = 0.0 +# +# Set the number of seconds of unused rate-limiting allowance that can +# accumulate and be used to allow a subsequent burst of requests. +# requests_per_device_rate_buffer = 1.0 + [object-replicator] # You can override the default log routing for this app here (don't use set!): # log_name = object-replicator diff --git a/setup.cfg b/setup.cfg index 318d502ef7..8999808877 100644 --- a/setup.cfg +++ b/setup.cfg @@ -98,6 +98,7 @@ paste.filter_factory = memcache = swift.common.middleware.memcache:filter_factory read_only = swift.common.middleware.read_only:filter_factory ratelimit = swift.common.middleware.ratelimit:filter_factory + backend_ratelimit = swift.common.middleware.backend_ratelimit:filter_factory cname_lookup = swift.common.middleware.cname_lookup:filter_factory catch_errors = swift.common.middleware.catch_errors:filter_factory domain_remap = swift.common.middleware.domain_remap:filter_factory diff --git a/swift/common/middleware/backend_ratelimit.py b/swift/common/middleware/backend_ratelimit.py new file mode 100644 index 0000000000..980e9edc4b --- /dev/null +++ b/swift/common/middleware/backend_ratelimit.py @@ -0,0 +1,86 @@ +# Copyright (c) 2022 NVIDIA +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import time +from collections import defaultdict + +from swift.common.request_helpers import split_and_validate_path +from swift.common.swob import Request, HTTPTooManyBackendRequests +from swift.common.utils import get_logger, non_negative_float, \ + EventletRateLimiter + +RATE_LIMITED_METHODS = ('GET', 'HEAD', 'PUT', 'POST', 'DELETE', 'UPDATE', + 'REPLICATE') + + +class BackendRateLimitMiddleware(object): + """ + Backend rate-limiting middleware. + + Rate-limits requests to backend storage node devices. Each device is + independently rate-limited. All requests with a 'GET', 'HEAD', 'PUT', + 'POST', 'DELETE', 'UPDATE' or 'REPLICATE' method are included in a device's + rate limit. + + If a request would cause the rate-limit to be exceeded then a response with + a 529 status code is returned. + """ + def __init__(self, app, conf, logger=None): + self.app = app + self.logger = logger or get_logger(conf, log_route='backend_ratelimit') + self.requests_per_device_per_second = non_negative_float( + conf.get('requests_per_device_per_second', 0.0)) + self.requests_per_device_rate_buffer = non_negative_float( + conf.get('requests_per_device_rate_buffer', 1.0)) + + # map device -> RateLimiter + self.rate_limiters = defaultdict( + lambda: EventletRateLimiter( + max_rate=self.requests_per_device_per_second, + rate_buffer=self.requests_per_device_rate_buffer, + running_time=time.time(), + burst_after_idle=True)) + + def __call__(self, env, start_response): + """ + WSGI entry point. + + :param env: WSGI environment dictionary + :param start_response: WSGI callable + """ + req = Request(env) + handler = self.app + if req.method in RATE_LIMITED_METHODS: + try: + device, partition, _ = split_and_validate_path(req, 1, 3, True) + int(partition) # check it's a valid partition + rate_limiter = self.rate_limiters[device] + if not rate_limiter.is_allowed(): + self.logger.increment('backend.ratelimit') + handler = HTTPTooManyBackendRequests() + except Exception: # noqa + # request may not have device/partition e.g. a healthcheck req + pass + return handler(env, start_response) + + +def filter_factory(global_conf, **local_conf): + conf = global_conf.copy() + conf.update(local_conf) + + def backend_ratelimit_filter(app): + return BackendRateLimitMiddleware(app, conf) + + return backend_ratelimit_filter diff --git a/swift/common/swob.py b/swift/common/swob.py index 6eaa33a744..7a14d1cf10 100644 --- a/swift/common/swob.py +++ b/swift/common/swob.py @@ -114,6 +114,8 @@ RESPONSE_REASONS = { 'backend server.'), 507: ('Insufficient Storage', 'There was not enough space to save the ' 'resource. Drive: %(drive)s'), + 529: ('Too Many Backend Requests', 'The server is incapable of performing ' + 'the requested operation due to too many requests. Slow down.') } MAX_RANGE_OVERLAPS = 2 @@ -1619,3 +1621,4 @@ HTTPNotImplemented = status_map[501] HTTPBadGateway = status_map[502] HTTPServiceUnavailable = status_map[503] HTTPInsufficientStorage = status_map[507] +HTTPTooManyBackendRequests = status_map[529] diff --git a/swift/common/utils.py b/swift/common/utils.py index 8a409c9608..b0fef44195 100644 --- a/swift/common/utils.py +++ b/swift/common/utils.py @@ -325,9 +325,13 @@ def non_negative_float(value): :raises ValueError: if the value cannot be cast to a float or is negative. :return: a float """ - value = float(value) - if value < 0: - raise ValueError + try: + value = float(value) + if value < 0: + raise ValueError + except (TypeError, ValueError): + raise ValueError('Value must be a non-negative float number, not "%s".' + % value) return value diff --git a/test/unit/common/middleware/helpers.py b/test/unit/common/middleware/helpers.py index 3c91c9cc1a..caa8122f83 100644 --- a/test/unit/common/middleware/helpers.py +++ b/test/unit/common/middleware/helpers.py @@ -76,7 +76,7 @@ class FakeSwift(object): """ ALLOWED_METHODS = [ 'PUT', 'POST', 'DELETE', 'GET', 'HEAD', 'OPTIONS', 'REPLICATE', - 'UPDATE'] + 'SSYNC', 'UPDATE'] def __init__(self): self._calls = [] diff --git a/test/unit/common/middleware/test_backend_ratelimit.py b/test/unit/common/middleware/test_backend_ratelimit.py new file mode 100644 index 0000000000..15be2f50a6 --- /dev/null +++ b/test/unit/common/middleware/test_backend_ratelimit.py @@ -0,0 +1,170 @@ +# Copyright (c) 2022 NVIDIA +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Used by get_swift_info and register_swift_info to store information about +# the swift cluster. +import time +import unittest +from collections import defaultdict + +import mock + +from swift.common.middleware import backend_ratelimit +from swift.common.middleware.backend_ratelimit import \ + BackendRateLimitMiddleware +from swift.common.swob import Request, HTTPOk +from test.debug_logger import debug_logger +from test.unit.common.middleware.helpers import FakeSwift + + +class FakeApp(object): + def __init__(self): + self.calls = [] + + def __call__(self, env, start_response): + start_response('200 OK', {}) + return [''] + + +class TestBackendRatelimitMiddleware(unittest.TestCase): + def setUp(self): + super(TestBackendRatelimitMiddleware, self).setUp() + self.swift = FakeSwift() + + def test_init(self): + conf = {} + factory = backend_ratelimit.filter_factory(conf) + rl = factory(self.swift) + self.assertEqual(0.0, rl.requests_per_device_per_second) + self.assertEqual(1.0, rl.requests_per_device_rate_buffer) + + conf = {'requests_per_device_per_second': 1.3, + 'requests_per_device_rate_buffer': 2.4} + factory = backend_ratelimit.filter_factory(conf) + rl = factory(self.swift) + self.assertEqual(1.3, rl.requests_per_device_per_second) + self.assertEqual(2.4, rl.requests_per_device_rate_buffer) + + conf = {'requests_per_device_per_second': -1} + factory = backend_ratelimit.filter_factory(conf) + with self.assertRaises(ValueError) as cm: + factory(self.swift) + self.assertEqual( + 'Value must be a non-negative float number, not "-1.0".', + str(cm.exception)) + + conf = {'requests_per_device_rate_buffer': -1} + factory = backend_ratelimit.filter_factory(conf) + with self.assertRaises(ValueError): + factory(self.swift) + self.assertEqual( + 'Value must be a non-negative float number, not "-1.0".', + str(cm.exception)) + + def _do_test_ratelimit(self, method, req_per_sec, rate_buffer): + # send 20 requests, time increments by 0.01 between each request + start = time.time() + fake_time = [start] + + def mock_time(): + return fake_time[0] + + app = FakeSwift() + logger = debug_logger() + # apply a ratelimit + conf = {'requests_per_device_per_second': req_per_sec, + 'requests_per_device_rate_buffer': rate_buffer} + rl = BackendRateLimitMiddleware(app, conf, logger) + success = defaultdict(int) + ratelimited = 0 + + with mock.patch('swift.common.utils.time.time', mock_time): + for i in range(20): + for dev in ['sda1', 'sda2', 'sda3']: + req = Request.blank('/%s/99/a/c/o' % dev, + environ={'REQUEST_METHOD': method}) + app.register(method, req.path, HTTPOk, {}) + resp = req.get_response(rl) + if resp.status_int == 200: + success[dev] += 1 + else: + self.assertEqual(529, resp.status_int) + self.assertTrue(resp.status.startswith( + '529 Too Many Backend Requests')) + ratelimited += 1 + fake_time[0] += 0.01 + self.assertEqual( + ratelimited, + logger.get_increment_counts().get('backend.ratelimit', 0)) + return success + + def test_ratelimited(self): + def do_test_ratelimit(method): + # no rate-limiting + success_per_dev = self._do_test_ratelimit(method, 0, 0) + self.assertEqual([20] * 3, list(success_per_dev.values())) + + # rate-limited + success_per_dev = self._do_test_ratelimit(method, 1, 0) + self.assertEqual([1] * 3, list(success_per_dev.values())) + + success_per_dev = self._do_test_ratelimit(method, 10, 0) + self.assertEqual([2] * 3, list(success_per_dev.values())) + + success_per_dev = self._do_test_ratelimit(method, 101, 0) + self.assertEqual([20] * 3, list(success_per_dev.values())) + + # startup burst of 1 seconds allowance plus current allowance... + success_per_dev = self._do_test_ratelimit(method, 1, 1) + self.assertEqual([2] * 3, list(success_per_dev.values())) + success_per_dev = self._do_test_ratelimit(method, 10, 1) + self.assertEqual([12] * 3, list(success_per_dev.values())) + + do_test_ratelimit('GET') + do_test_ratelimit('HEAD') + do_test_ratelimit('PUT') + do_test_ratelimit('POST') + do_test_ratelimit('DELETE') + do_test_ratelimit('UPDATE') + do_test_ratelimit('REPLICATE') + + def test_not_ratelimited(self): + def do_test_no_ratelimit(method): + # verify no rate-limiting + success_per_dev = self._do_test_ratelimit(method, 1, 0) + self.assertEqual([20] * 3, list(success_per_dev.values())) + + do_test_no_ratelimit('OPTIONS') + do_test_no_ratelimit('SSYNC') + + def test_unhandled_request(self): + app = FakeSwift() + logger = debug_logger() + conf = {'requests_per_device_per_second': 1, + 'requests_per_device_rate_buffer': 1} + + def do_test(path): + rl = BackendRateLimitMiddleware(app, conf, logger) + req = Request.blank(path) + app.register('GET', req.path, HTTPOk, {}) + for i in range(10): + resp = req.get_response(rl) + self.assertEqual(200, resp.status_int) + self.assertEqual( + 0, logger.get_increment_counts().get('backend.ratelimit', 0)) + + do_test('/recon/version') + do_test('/healthcheck') + do_test('/v1/a/c/o') diff --git a/test/unit/common/test_utils.py b/test/unit/common/test_utils.py index 5efc1f00b1..c99c39a9f7 100644 --- a/test/unit/common/test_utils.py +++ b/test/unit/common/test_utils.py @@ -3106,12 +3106,26 @@ cluster_dfw1 = http://dfw1.host/v1/ self.assertEqual(1, utils.non_negative_float(True)) self.assertEqual(0, utils.non_negative_float(False)) - with self.assertRaises(ValueError): + with self.assertRaises(ValueError) as cm: utils.non_negative_float(-1.1) - with self.assertRaises(ValueError): + self.assertEqual( + 'Value must be a non-negative float number, not "-1.1".', + str(cm.exception)) + with self.assertRaises(ValueError) as cm: utils.non_negative_float('-1.1') - with self.assertRaises(ValueError): + self.assertEqual( + 'Value must be a non-negative float number, not "-1.1".', + str(cm.exception)) + with self.assertRaises(ValueError) as cm: utils.non_negative_float('one') + self.assertEqual( + 'Value must be a non-negative float number, not "one".', + str(cm.exception)) + with self.assertRaises(ValueError) as cm: + utils.non_negative_float(None) + self.assertEqual( + 'Value must be a non-negative float number, not "None".', + str(cm.exception)) def test_non_negative_int(self): self.assertEqual(0, utils.non_negative_int('0'))