Add backend rate limiting middleware

This is a fairly blunt tool: ratelimiting is per device and
applied independently in each worker, but this at least provides
some limit to disk IO on backend servers.

GET, HEAD, PUT, POST, DELETE, UPDATE and REPLICATE methods may be
rate-limited.

Only requests with a path starting '<device>/<partition>', where
<partition> can be cast to an integer, will be rate-limited. Other
requests, including, for example, recon requests with paths such as
'recon/version', are unconditionally forwarded to the next app in the
pipeline.

OPTIONS and SSYNC methods are not rate-limited. Note that
SSYNC sub-requests are passed directly to the object server app
and will not pass though this middleware.

Change-Id: I78b59a081698a6bff0d74cbac7525e28f7b5d7c1
This commit is contained in:
Alistair Coles 2022-03-31 16:36:32 +01:00
parent 507cf18f96
commit ccaf49a00c
11 changed files with 331 additions and 10 deletions

View File

@ -102,6 +102,13 @@ AWS S3 Api
:members: :members:
:show-inheritance: :show-inheritance:
Backend Ratelimit
=================
.. automodule:: swift.common.middleware.backend_ratelimit
:members:
:show-inheritance:
.. _bulk: .. _bulk:
Bulk Operations (Delete and Archive Auto Extraction) Bulk Operations (Delete and Archive Auto Extraction)

View File

@ -80,7 +80,7 @@ bind_port = 6202
# ionice_priority = # ionice_priority =
[pipeline:main] [pipeline:main]
pipeline = healthcheck recon account-server pipeline = healthcheck recon backend_ratelimit account-server
[app:account-server] [app:account-server]
use = egg:swift#account use = egg:swift#account
@ -128,6 +128,18 @@ use = egg:swift#healthcheck
use = egg:swift#recon use = egg:swift#recon
# recon_cache_path = /var/cache/swift # recon_cache_path = /var/cache/swift
[filter:backend_ratelimit]
use = egg:swift#backend_ratelimit
# Set the maximum rate of requests per second per device per worker. Beyond
# this rate the server will return 529 responses and emit a 'backend.ratelimit'
# statsd metric without logging. The default value of zero causes no
# rate-limiting to be applied.
# requests_per_device_per_second = 0.0
#
# Set the number of seconds of unused rate-limiting allowance that can
# accumulate and be used to allow a subsequent burst of requests.
# requests_per_device_rate_buffer = 1.0
[account-replicator] [account-replicator]
# You can override the default log routing for this app here (don't use set!): # You can override the default log routing for this app here (don't use set!):
# log_name = account-replicator # log_name = account-replicator

View File

@ -86,7 +86,7 @@ bind_port = 6201
# ionice_priority = # ionice_priority =
[pipeline:main] [pipeline:main]
pipeline = healthcheck recon container-server pipeline = healthcheck recon backend_ratelimit container-server
[app:container-server] [app:container-server]
use = egg:swift#container use = egg:swift#container
@ -138,6 +138,18 @@ use = egg:swift#healthcheck
use = egg:swift#recon use = egg:swift#recon
#recon_cache_path = /var/cache/swift #recon_cache_path = /var/cache/swift
[filter:backend_ratelimit]
use = egg:swift#backend_ratelimit
# Set the maximum rate of requests per second per device per worker. Beyond
# this rate the server will return 529 responses and emit a 'backend.ratelimit'
# statsd metric without logging. The default value of zero causes no
# rate-limiting to be applied.
# requests_per_device_per_second = 0.0
#
# Set the number of seconds of unused rate-limiting allowance that can
# accumulate and be used to allow a subsequent burst of requests.
# requests_per_device_rate_buffer = 1.0
[container-replicator] [container-replicator]
# You can override the default log routing for this app here (don't use set!): # You can override the default log routing for this app here (don't use set!):
# log_name = container-replicator # log_name = container-replicator

View File

@ -111,7 +111,7 @@ bind_port = 6200
# ionice_priority = # ionice_priority =
[pipeline:main] [pipeline:main]
pipeline = healthcheck recon object-server pipeline = healthcheck recon backend_ratelimit object-server
[app:object-server] [app:object-server]
use = egg:swift#object use = egg:swift#object
@ -231,6 +231,18 @@ use = egg:swift#recon
#recon_cache_path = /var/cache/swift #recon_cache_path = /var/cache/swift
#recon_lock_path = /var/lock #recon_lock_path = /var/lock
[filter:backend_ratelimit]
use = egg:swift#backend_ratelimit
# Set the maximum rate of requests per second per device per worker. Beyond
# this rate the server will return 529 responses and emit a 'backend.ratelimit'
# statsd metric without logging. The default value of zero causes no
# rate-limiting to be applied.
# requests_per_device_per_second = 0.0
#
# Set the number of seconds of unused rate-limiting allowance that can
# accumulate and be used to allow a subsequent burst of requests.
# requests_per_device_rate_buffer = 1.0
[object-replicator] [object-replicator]
# You can override the default log routing for this app here (don't use set!): # You can override the default log routing for this app here (don't use set!):
# log_name = object-replicator # log_name = object-replicator

View File

@ -98,6 +98,7 @@ paste.filter_factory =
memcache = swift.common.middleware.memcache:filter_factory memcache = swift.common.middleware.memcache:filter_factory
read_only = swift.common.middleware.read_only:filter_factory read_only = swift.common.middleware.read_only:filter_factory
ratelimit = swift.common.middleware.ratelimit:filter_factory ratelimit = swift.common.middleware.ratelimit:filter_factory
backend_ratelimit = swift.common.middleware.backend_ratelimit:filter_factory
cname_lookup = swift.common.middleware.cname_lookup:filter_factory cname_lookup = swift.common.middleware.cname_lookup:filter_factory
catch_errors = swift.common.middleware.catch_errors:filter_factory catch_errors = swift.common.middleware.catch_errors:filter_factory
domain_remap = swift.common.middleware.domain_remap:filter_factory domain_remap = swift.common.middleware.domain_remap:filter_factory

View File

@ -0,0 +1,86 @@
# Copyright (c) 2022 NVIDIA
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import time
from collections import defaultdict
from swift.common.request_helpers import split_and_validate_path
from swift.common.swob import Request, HTTPTooManyBackendRequests
from swift.common.utils import get_logger, non_negative_float, \
EventletRateLimiter
RATE_LIMITED_METHODS = ('GET', 'HEAD', 'PUT', 'POST', 'DELETE', 'UPDATE',
'REPLICATE')
class BackendRateLimitMiddleware(object):
"""
Backend rate-limiting middleware.
Rate-limits requests to backend storage node devices. Each device is
independently rate-limited. All requests with a 'GET', 'HEAD', 'PUT',
'POST', 'DELETE', 'UPDATE' or 'REPLICATE' method are included in a device's
rate limit.
If a request would cause the rate-limit to be exceeded then a response with
a 529 status code is returned.
"""
def __init__(self, app, conf, logger=None):
self.app = app
self.logger = logger or get_logger(conf, log_route='backend_ratelimit')
self.requests_per_device_per_second = non_negative_float(
conf.get('requests_per_device_per_second', 0.0))
self.requests_per_device_rate_buffer = non_negative_float(
conf.get('requests_per_device_rate_buffer', 1.0))
# map device -> RateLimiter
self.rate_limiters = defaultdict(
lambda: EventletRateLimiter(
max_rate=self.requests_per_device_per_second,
rate_buffer=self.requests_per_device_rate_buffer,
running_time=time.time(),
burst_after_idle=True))
def __call__(self, env, start_response):
"""
WSGI entry point.
:param env: WSGI environment dictionary
:param start_response: WSGI callable
"""
req = Request(env)
handler = self.app
if req.method in RATE_LIMITED_METHODS:
try:
device, partition, _ = split_and_validate_path(req, 1, 3, True)
int(partition) # check it's a valid partition
rate_limiter = self.rate_limiters[device]
if not rate_limiter.is_allowed():
self.logger.increment('backend.ratelimit')
handler = HTTPTooManyBackendRequests()
except Exception: # noqa
# request may not have device/partition e.g. a healthcheck req
pass
return handler(env, start_response)
def filter_factory(global_conf, **local_conf):
conf = global_conf.copy()
conf.update(local_conf)
def backend_ratelimit_filter(app):
return BackendRateLimitMiddleware(app, conf)
return backend_ratelimit_filter

View File

@ -114,6 +114,8 @@ RESPONSE_REASONS = {
'backend server.'), 'backend server.'),
507: ('Insufficient Storage', 'There was not enough space to save the ' 507: ('Insufficient Storage', 'There was not enough space to save the '
'resource. Drive: %(drive)s'), 'resource. Drive: %(drive)s'),
529: ('Too Many Backend Requests', 'The server is incapable of performing '
'the requested operation due to too many requests. Slow down.')
} }
MAX_RANGE_OVERLAPS = 2 MAX_RANGE_OVERLAPS = 2
@ -1619,3 +1621,4 @@ HTTPNotImplemented = status_map[501]
HTTPBadGateway = status_map[502] HTTPBadGateway = status_map[502]
HTTPServiceUnavailable = status_map[503] HTTPServiceUnavailable = status_map[503]
HTTPInsufficientStorage = status_map[507] HTTPInsufficientStorage = status_map[507]
HTTPTooManyBackendRequests = status_map[529]

View File

@ -367,9 +367,13 @@ def non_negative_float(value):
:raises ValueError: if the value cannot be cast to a float or is negative. :raises ValueError: if the value cannot be cast to a float or is negative.
:return: a float :return: a float
""" """
value = float(value) try:
if value < 0: value = float(value)
raise ValueError if value < 0:
raise ValueError
except (TypeError, ValueError):
raise ValueError('Value must be a non-negative float number, not "%s".'
% value)
return value return value

View File

@ -76,7 +76,7 @@ class FakeSwift(object):
""" """
ALLOWED_METHODS = [ ALLOWED_METHODS = [
'PUT', 'POST', 'DELETE', 'GET', 'HEAD', 'OPTIONS', 'REPLICATE', 'PUT', 'POST', 'DELETE', 'GET', 'HEAD', 'OPTIONS', 'REPLICATE',
'UPDATE'] 'SSYNC', 'UPDATE']
def __init__(self): def __init__(self):
self._calls = [] self._calls = []

View File

@ -0,0 +1,170 @@
# Copyright (c) 2022 NVIDIA
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Used by get_swift_info and register_swift_info to store information about
# the swift cluster.
import time
import unittest
from collections import defaultdict
import mock
from swift.common.middleware import backend_ratelimit
from swift.common.middleware.backend_ratelimit import \
BackendRateLimitMiddleware
from swift.common.swob import Request, HTTPOk
from test.debug_logger import debug_logger
from test.unit.common.middleware.helpers import FakeSwift
class FakeApp(object):
def __init__(self):
self.calls = []
def __call__(self, env, start_response):
start_response('200 OK', {})
return ['']
class TestBackendRatelimitMiddleware(unittest.TestCase):
def setUp(self):
super(TestBackendRatelimitMiddleware, self).setUp()
self.swift = FakeSwift()
def test_init(self):
conf = {}
factory = backend_ratelimit.filter_factory(conf)
rl = factory(self.swift)
self.assertEqual(0.0, rl.requests_per_device_per_second)
self.assertEqual(1.0, rl.requests_per_device_rate_buffer)
conf = {'requests_per_device_per_second': 1.3,
'requests_per_device_rate_buffer': 2.4}
factory = backend_ratelimit.filter_factory(conf)
rl = factory(self.swift)
self.assertEqual(1.3, rl.requests_per_device_per_second)
self.assertEqual(2.4, rl.requests_per_device_rate_buffer)
conf = {'requests_per_device_per_second': -1}
factory = backend_ratelimit.filter_factory(conf)
with self.assertRaises(ValueError) as cm:
factory(self.swift)
self.assertEqual(
'Value must be a non-negative float number, not "-1.0".',
str(cm.exception))
conf = {'requests_per_device_rate_buffer': -1}
factory = backend_ratelimit.filter_factory(conf)
with self.assertRaises(ValueError):
factory(self.swift)
self.assertEqual(
'Value must be a non-negative float number, not "-1.0".',
str(cm.exception))
def _do_test_ratelimit(self, method, req_per_sec, rate_buffer):
# send 20 requests, time increments by 0.01 between each request
start = time.time()
fake_time = [start]
def mock_time():
return fake_time[0]
app = FakeSwift()
logger = debug_logger()
# apply a ratelimit
conf = {'requests_per_device_per_second': req_per_sec,
'requests_per_device_rate_buffer': rate_buffer}
rl = BackendRateLimitMiddleware(app, conf, logger)
success = defaultdict(int)
ratelimited = 0
with mock.patch('swift.common.utils.time.time', mock_time):
for i in range(20):
for dev in ['sda1', 'sda2', 'sda3']:
req = Request.blank('/%s/99/a/c/o' % dev,
environ={'REQUEST_METHOD': method})
app.register(method, req.path, HTTPOk, {})
resp = req.get_response(rl)
if resp.status_int == 200:
success[dev] += 1
else:
self.assertEqual(529, resp.status_int)
self.assertTrue(resp.status.startswith(
'529 Too Many Backend Requests'))
ratelimited += 1
fake_time[0] += 0.01
self.assertEqual(
ratelimited,
logger.get_increment_counts().get('backend.ratelimit', 0))
return success
def test_ratelimited(self):
def do_test_ratelimit(method):
# no rate-limiting
success_per_dev = self._do_test_ratelimit(method, 0, 0)
self.assertEqual([20] * 3, list(success_per_dev.values()))
# rate-limited
success_per_dev = self._do_test_ratelimit(method, 1, 0)
self.assertEqual([1] * 3, list(success_per_dev.values()))
success_per_dev = self._do_test_ratelimit(method, 10, 0)
self.assertEqual([2] * 3, list(success_per_dev.values()))
success_per_dev = self._do_test_ratelimit(method, 101, 0)
self.assertEqual([20] * 3, list(success_per_dev.values()))
# startup burst of 1 seconds allowance plus current allowance...
success_per_dev = self._do_test_ratelimit(method, 1, 1)
self.assertEqual([2] * 3, list(success_per_dev.values()))
success_per_dev = self._do_test_ratelimit(method, 10, 1)
self.assertEqual([12] * 3, list(success_per_dev.values()))
do_test_ratelimit('GET')
do_test_ratelimit('HEAD')
do_test_ratelimit('PUT')
do_test_ratelimit('POST')
do_test_ratelimit('DELETE')
do_test_ratelimit('UPDATE')
do_test_ratelimit('REPLICATE')
def test_not_ratelimited(self):
def do_test_no_ratelimit(method):
# verify no rate-limiting
success_per_dev = self._do_test_ratelimit(method, 1, 0)
self.assertEqual([20] * 3, list(success_per_dev.values()))
do_test_no_ratelimit('OPTIONS')
do_test_no_ratelimit('SSYNC')
def test_unhandled_request(self):
app = FakeSwift()
logger = debug_logger()
conf = {'requests_per_device_per_second': 1,
'requests_per_device_rate_buffer': 1}
def do_test(path):
rl = BackendRateLimitMiddleware(app, conf, logger)
req = Request.blank(path)
app.register('GET', req.path, HTTPOk, {})
for i in range(10):
resp = req.get_response(rl)
self.assertEqual(200, resp.status_int)
self.assertEqual(
0, logger.get_increment_counts().get('backend.ratelimit', 0))
do_test('/recon/version')
do_test('/healthcheck')
do_test('/v1/a/c/o')

View File

@ -3105,12 +3105,26 @@ cluster_dfw1 = http://dfw1.host/v1/
self.assertEqual(1, utils.non_negative_float(True)) self.assertEqual(1, utils.non_negative_float(True))
self.assertEqual(0, utils.non_negative_float(False)) self.assertEqual(0, utils.non_negative_float(False))
with self.assertRaises(ValueError): with self.assertRaises(ValueError) as cm:
utils.non_negative_float(-1.1) utils.non_negative_float(-1.1)
with self.assertRaises(ValueError): self.assertEqual(
'Value must be a non-negative float number, not "-1.1".',
str(cm.exception))
with self.assertRaises(ValueError) as cm:
utils.non_negative_float('-1.1') utils.non_negative_float('-1.1')
with self.assertRaises(ValueError): self.assertEqual(
'Value must be a non-negative float number, not "-1.1".',
str(cm.exception))
with self.assertRaises(ValueError) as cm:
utils.non_negative_float('one') utils.non_negative_float('one')
self.assertEqual(
'Value must be a non-negative float number, not "one".',
str(cm.exception))
with self.assertRaises(ValueError) as cm:
utils.non_negative_float(None)
self.assertEqual(
'Value must be a non-negative float number, not "None".',
str(cm.exception))
def test_non_negative_int(self): def test_non_negative_int(self):
self.assertEqual(0, utils.non_negative_int('0')) self.assertEqual(0, utils.non_negative_int('0'))