From 87a42ab9ca032e1a539f50ac37600f7ee8312e60 Mon Sep 17 00:00:00 2001 From: gholt Date: Fri, 25 Jan 2013 02:11:19 +0000 Subject: [PATCH] Added fallocate_reserve option Some systems behave badly when they completely run out of space. To alleviate this problem, you can set the fallocate_reserve conf value to a number of bytes to "reserve" on each disk. When the disk free space falls at or below this amount, fallocate calls will fail, even if the underlying OS fallocate call would succeed. For example, a fallocate_reserve of 5368709120 (5G) would make all fallocate calls fail, even for zero-byte files, when the disk free space falls under 5G. The default fallocate_reserve is 0, meaning "no reserve", and so the software behaves exactly as it always has unless you set this conf value to something non-zero. Also fixed ring builder's search_devs doc bugs. Related: To get rsync to do the same, see https://github.com/rackspace/cloudfiles-rsync Specifically, see this patch: https://github.com/rackspace/cloudfiles-rsync/blob/master/debian/patches/limit-fs-fullness.diff DocImpact Change-Id: I8db176ae0ca5b41c9bcfeb7cb8abb31c2e614527 --- doc/source/deployment_guide.rst | 21 +++++ etc/account-server.conf-sample | 3 + etc/container-server.conf-sample | 3 + etc/object-server.conf-sample | 3 + swift/common/daemon.py | 4 + swift/common/ring/builder.py | 16 +++- swift/common/utils.py | 32 ++++--- swift/common/wsgi.py | 5 ++ swift/obj/server.py | 10 +-- test/unit/common/test_utils.py | 141 +++++++++++++++++++++++++++++++ 10 files changed, 218 insertions(+), 20 deletions(-) diff --git a/doc/source/deployment_guide.rst b/doc/source/deployment_guide.rst index badcd22ba2..fa03243469 100644 --- a/doc/source/deployment_guide.rst +++ b/doc/source/deployment_guide.rst @@ -237,6 +237,13 @@ disable_fallocate false Disable "fast fail" fallocate checks if the log_custom_handlers None Comma-separated list of functions to call to setup custom log handlers. eventlet_debug false If true, turn on debug logging for eventlet +fallocate_reserve 0 You can set fallocate_reserve to the number of + bytes you'd like fallocate to reserve, whether + there is space for the given file size or not. + This is useful for systems that behave badly + when they completely run out of space; you can + make the services pretend they're out of space + early. =================== ========== ============================================= [object-server] @@ -348,6 +355,13 @@ disable_fallocate false Disable "fast fail" fallocate checks if the log_custom_handlers None Comma-separated list of functions to call to setup custom log handlers. eventlet_debug false If true, turn on debug logging for eventlet +fallocate_reserve 0 You can set fallocate_reserve to the number of + bytes you'd like fallocate to reserve, whether + there is space for the given file size or not. + This is useful for systems that behave badly + when they completely run out of space; you can + make the services pretend they're out of space + early. =================== ========== ============================================ [container-server] @@ -452,6 +466,13 @@ disable_fallocate false Disable "fast fail" fallocate checks if the log_custom_handlers None Comma-separated list of functions to call to setup custom log handlers. eventlet_debug false If true, turn on debug logging for eventlet +fallocate_reserve 0 You can set fallocate_reserve to the number of + bytes you'd like fallocate to reserve, whether + there is space for the given file size or not. + This is useful for systems that behave badly + when they completely run out of space; you can + make the services pretend they're out of space + early. =================== ========== ============================================= [account-server] diff --git a/etc/account-server.conf-sample b/etc/account-server.conf-sample index 5cb0d2897e..fd3b5a3765 100644 --- a/etc/account-server.conf-sample +++ b/etc/account-server.conf-sample @@ -30,6 +30,9 @@ # on to preallocate disk space with SQLite databases to decrease fragmentation. # db_preallocation = off # eventlet_debug = false +# You can set fallocate_reserve to the number of bytes you'd like fallocate to +# reserve, whether there is space for the given file size or not. +# fallocate_reserve = 0 [pipeline:main] pipeline = healthcheck recon account-server diff --git a/etc/container-server.conf-sample b/etc/container-server.conf-sample index 728fa4cd3c..384aa72481 100644 --- a/etc/container-server.conf-sample +++ b/etc/container-server.conf-sample @@ -33,6 +33,9 @@ # on to preallocate disk space with SQLite databases to decrease fragmentation. # db_preallocation = off # eventlet_debug = false +# You can set fallocate_reserve to the number of bytes you'd like fallocate to +# reserve, whether there is space for the given file size or not. +# fallocate_reserve = 0 [pipeline:main] pipeline = healthcheck recon container-server diff --git a/etc/object-server.conf-sample b/etc/object-server.conf-sample index 2e8b54f4eb..70bbebcb3a 100644 --- a/etc/object-server.conf-sample +++ b/etc/object-server.conf-sample @@ -28,6 +28,9 @@ # log_statsd_default_sample_rate = 1 # log_statsd_metric_prefix = # eventlet_debug = false +# You can set fallocate_reserve to the number of bytes you'd like fallocate to +# reserve, whether there is space for the given file size or not. +# fallocate_reserve = 0 [pipeline:main] pipeline = healthcheck recon object-server diff --git a/swift/common/daemon.py b/swift/common/daemon.py index 009619cc78..1955b1326e 100644 --- a/swift/common/daemon.py +++ b/swift/common/daemon.py @@ -90,6 +90,10 @@ def run_daemon(klass, conf_file, section_name='', once=False, **kwargs): # disable fallocate if desired if utils.config_true_value(conf.get('disable_fallocate', 'no')): utils.disable_fallocate() + # set utils.FALLOCATE_RESERVE if desired + reserve = int(conf.get('fallocate_reserve', 0)) + if reserve > 0: + utils.FALLOCATE_RESERVE = reserve # By default, disable eventlet printing stacktraces eventlet_debug = utils.config_true_value(conf.get('eventlet_debug', 'no')) diff --git a/swift/common/ring/builder.py b/swift/common/ring/builder.py index 2f301976c1..09d6303586 100644 --- a/swift/common/ring/builder.py +++ b/swift/common/ring/builder.py @@ -799,10 +799,14 @@ class RingBuilder(object): def search_devs(self, search_value): """ -The can be of the form: - dz-:/_ + The can be of the form:: + + dz-:/_ + Any part is optional, but you must include at least one part. - Examples: + + Examples:: + d74 Matches the device id 74 z1 Matches devices in zone 1 z1-1.2.3.4 Matches devices in zone 1 with the ip 1.2.3.4 @@ -814,9 +818,13 @@ The can be of the form: _"snet: 5.6.7.8" Matches devices with snet: 5.6.7.8 in the meta data [::1] Matches devices in any zone with the ip ::1 z1-[::1]:5678 Matches devices in zone 1 with ip ::1 and port 5678 - Most specific example: + + Most specific example:: + d74z1-1.2.3.4:5678/sdb1_"snet: 5.6.7.8" + Nerd explanation: + All items require their single character prefix except the ip, in which case the - is optional unless the device id or zone is also included. """ diff --git a/swift/common/utils.py b/swift/common/utils.py index a9f72c893d..9d95816640 100644 --- a/swift/common/utils.py +++ b/swift/common/utils.py @@ -70,6 +70,10 @@ _sys_fsync = None _sys_fallocate = None _posix_fadvise = None +# If set to non-zero, fallocate routines will fail based on free space +# available being at or below this amount, in bytes. +FALLOCATE_RESERVE = 0 + # Used by hash_path to offer a bit more security when generating hashes for # paths. It simply appends this value to all paths; guessing the hash a path # will end up with would also require knowing this suffix. @@ -156,10 +160,17 @@ class FallocateWrapper(object): logging.warn(_("Unable to locate fallocate, posix_fallocate in " "libc. Leaving as a no-op.")) - def __call__(self, fd, mode, offset, len): + def __call__(self, fd, mode, offset, length): + """ The length parameter must be a ctypes.c_uint64 """ + if FALLOCATE_RESERVE > 0: + st = os.fstatvfs(fd) + free = st.f_frsize * st.f_bavail - length.value + if free <= FALLOCATE_RESERVE: + raise OSError('FALLOCATE_RESERVE fail %s <= %s' % ( + free, FALLOCATE_RESERVE)) args = { - 'fallocate': (fd, mode, offset, len), - 'posix_fallocate': (fd, offset, len) + 'fallocate': (fd, mode, offset, length), + 'posix_fallocate': (fd, offset, length) } return self.fallocate(*args[self.func_name]) @@ -179,13 +190,14 @@ def fallocate(fd, size): global _sys_fallocate if _sys_fallocate is None: _sys_fallocate = FallocateWrapper() - if size > 0: - # 1 means "FALLOC_FL_KEEP_SIZE", which means it pre-allocates invisibly - ret = _sys_fallocate(fd, 1, 0, ctypes.c_uint64(size)) - err = ctypes.get_errno() - if ret and err not in (0, errno.ENOSYS, errno.EOPNOTSUPP, - errno.EINVAL): - raise OSError(err, 'Unable to fallocate(%s)' % size) + if size < 0: + size = 0 + # 1 means "FALLOC_FL_KEEP_SIZE", which means it pre-allocates invisibly + ret = _sys_fallocate(fd, 1, 0, ctypes.c_uint64(size)) + err = ctypes.get_errno() + if ret and err not in (0, errno.ENOSYS, errno.EOPNOTSUPP, + errno.EINVAL): + raise OSError(err, 'Unable to fallocate(%s)' % size) class FsyncWrapper(object): diff --git a/swift/common/wsgi.py b/swift/common/wsgi.py index 02dccfba99..29e3f1b0b6 100644 --- a/swift/common/wsgi.py +++ b/swift/common/wsgi.py @@ -30,6 +30,7 @@ from paste.deploy import loadapp, appconfig from eventlet.green import socket, ssl from urllib import unquote +from swift.common import utils from swift.common.swob import Request from swift.common.utils import capture_stdio, disable_fallocate, \ drop_privileges, get_logger, NullLogger, config_true_value, \ @@ -124,6 +125,10 @@ def run_wsgi(conf_file, app_section, *args, **kwargs): sock = get_socket(conf, default_port=kwargs.get('default_port', 8080)) # remaining tasks should not require elevated privileges drop_privileges(conf.get('user', 'swift')) + # set utils.FALLOCATE_RESERVE if desired + reserve = int(conf.get('fallocate_reserve', 0)) + if reserve > 0: + utils.FALLOCATE_RESERVE = reserve # redirect errors to logger and close stdio capture_stdio(logger) diff --git a/swift/obj/server.py b/swift/obj/server.py index d6e5865d87..5a6adff285 100755 --- a/swift/obj/server.py +++ b/swift/obj/server.py @@ -644,12 +644,10 @@ class ObjectController(object): upload_size = 0 last_sync = 0 with file.mkstemp() as fd: - if 'content-length' in request.headers: - try: - fallocate(fd, int(request.headers['content-length'])) - except OSError: - return HTTPInsufficientStorage(drive=device, - request=request) + try: + fallocate(fd, int(request.headers.get('content-length', 0))) + except OSError: + return HTTPInsufficientStorage(drive=device, request=request) reader = request.environ['wsgi.input'].read for chunk in iter(lambda: reader(self.network_chunk_size), ''): upload_size += len(chunk) diff --git a/test/unit/common/test_utils.py b/test/unit/common/test_utils.py index 25a9b45226..ce166e7db6 100644 --- a/test/unit/common/test_utils.py +++ b/test/unit/common/test_utils.py @@ -17,6 +17,7 @@ from __future__ import with_statement from test.unit import temptree +import ctypes import errno import logging import mimetools @@ -933,6 +934,146 @@ log_name = %(yarr)s''' self.assertEqual( utils.rsync_ip('::ffff:192.0.2.128'), '[::ffff:192.0.2.128]') + def test_fallocate_reserve(self): + + class StatVFS(object): + f_frsize = 1024 + f_bavail = 1 + + def fstatvfs(fd): + return StatVFS() + + orig_FALLOCATE_RESERVE = utils.FALLOCATE_RESERVE + orig_fstatvfs = utils.os.fstatvfs + try: + fallocate = utils.FallocateWrapper(noop=True) + utils.os.fstatvfs = fstatvfs + # Want 1023 reserved, have 1024 * 1 free, so succeeds + utils.FALLOCATE_RESERVE = 1023 + StatVFS.f_frsize = 1024 + StatVFS.f_bavail = 1 + self.assertEquals(fallocate(0, 1, 0, ctypes.c_uint64(0)), 0) + # Want 1023 reserved, have 512 * 2 free, so succeeds + utils.FALLOCATE_RESERVE = 1023 + StatVFS.f_frsize = 512 + StatVFS.f_bavail = 2 + self.assertEquals(fallocate(0, 1, 0, ctypes.c_uint64(0)), 0) + # Want 1024 reserved, have 1024 * 1 free, so fails + utils.FALLOCATE_RESERVE = 1024 + StatVFS.f_frsize = 1024 + StatVFS.f_bavail = 1 + exc = None + try: + fallocate(0, 1, 0, ctypes.c_uint64(0)) + except OSError, err: + exc = err + self.assertEquals(str(exc), 'FALLOCATE_RESERVE fail 1024 <= 1024') + # Want 1024 reserved, have 512 * 2 free, so fails + utils.FALLOCATE_RESERVE = 1024 + StatVFS.f_frsize = 512 + StatVFS.f_bavail = 2 + exc = None + try: + fallocate(0, 1, 0, ctypes.c_uint64(0)) + except OSError, err: + exc = err + self.assertEquals(str(exc), 'FALLOCATE_RESERVE fail 1024 <= 1024') + # Want 2048 reserved, have 1024 * 1 free, so fails + utils.FALLOCATE_RESERVE = 2048 + StatVFS.f_frsize = 1024 + StatVFS.f_bavail = 1 + exc = None + try: + fallocate(0, 1, 0, ctypes.c_uint64(0)) + except OSError, err: + exc = err + self.assertEquals(str(exc), 'FALLOCATE_RESERVE fail 1024 <= 2048') + # Want 2048 reserved, have 512 * 2 free, so fails + utils.FALLOCATE_RESERVE = 2048 + StatVFS.f_frsize = 512 + StatVFS.f_bavail = 2 + exc = None + try: + fallocate(0, 1, 0, ctypes.c_uint64(0)) + except OSError, err: + exc = err + self.assertEquals(str(exc), 'FALLOCATE_RESERVE fail 1024 <= 2048') + # Want 1023 reserved, have 1024 * 1 free, but file size is 1, so + # fails + utils.FALLOCATE_RESERVE = 1023 + StatVFS.f_frsize = 1024 + StatVFS.f_bavail = 1 + exc = None + try: + fallocate(0, 1, 0, ctypes.c_uint64(1)) + except OSError, err: + exc = err + self.assertEquals(str(exc), 'FALLOCATE_RESERVE fail 1023 <= 1023') + # Want 1022 reserved, have 1024 * 1 free, and file size is 1, so + # succeeds + utils.FALLOCATE_RESERVE = 1022 + StatVFS.f_frsize = 1024 + StatVFS.f_bavail = 1 + self.assertEquals(fallocate(0, 1, 0, ctypes.c_uint64(1)), 0) + # Want 1023 reserved, have 1024 * 1 free, and file size is 0, so + # succeeds + utils.FALLOCATE_RESERVE = 1023 + StatVFS.f_frsize = 1024 + StatVFS.f_bavail = 1 + self.assertEquals(fallocate(0, 1, 0, ctypes.c_uint64(0)), 0) + # Want 1024 reserved, have 1024 * 1 free, and even though + # file size is 0, since we're under the reserve, fails + utils.FALLOCATE_RESERVE = 1024 + StatVFS.f_frsize = 1024 + StatVFS.f_bavail = 1 + exc = None + try: + fallocate(0, 1, 0, ctypes.c_uint64(0)) + except OSError, err: + exc = err + self.assertEquals(str(exc), 'FALLOCATE_RESERVE fail 1024 <= 1024') + finally: + utils.FALLOCATE_RESERVE = orig_FALLOCATE_RESERVE + utils.os.fstatvfs = orig_fstatvfs + + def test_fallocate_func(self): + + class FallocateWrapper(object): + + def __init__(self): + self.last_call = None + + def __call__(self, *args): + self.last_call = list(args) + self.last_call[-1] = self.last_call[-1].value + return 0 + + orig__sys_fallocate = utils._sys_fallocate + try: + utils._sys_fallocate = FallocateWrapper() + # Ensure fallocate calls _sys_fallocate even with 0 bytes + utils._sys_fallocate.last_call = None + utils.fallocate(1234, 0) + self.assertEquals(utils._sys_fallocate.last_call, + [1234, 1, 0, 0]) + # Ensure fallocate calls _sys_fallocate even with negative bytes + utils._sys_fallocate.last_call = None + utils.fallocate(1234, -5678) + self.assertEquals(utils._sys_fallocate.last_call, + [1234, 1, 0, 0]) + # Ensure fallocate calls _sys_fallocate properly with positive + # bytes + utils._sys_fallocate.last_call = None + utils.fallocate(1234, 1) + self.assertEquals(utils._sys_fallocate.last_call, + [1234, 1, 0, 1]) + utils._sys_fallocate.last_call = None + utils.fallocate(1234, 10 * 1024 * 1024 * 1024) + self.assertEquals(utils._sys_fallocate.last_call, + [1234, 1, 0, 10 * 1024 * 1024 * 1024]) + finally: + utils._sys_fallocate = orig__sys_fallocate + class TestStatsdLogging(unittest.TestCase): def test_get_logger_statsd_client_not_specified(self):