From c6d7ddc4bcdefbe7e30946c5c7eb26e74ad0ff0e Mon Sep 17 00:00:00 2001 From: Prashanth Pai Date: Wed, 30 Oct 2013 16:13:50 +0530 Subject: [PATCH] Improve logging and raising DiskFileNoSpace This commit only improves logging whenever ENOSPC (No space on disk) or EDQUOT (Quota limit exceeded) is returned by glusterfs Also, added methods to: - get filename from file descriptor - log with rate limit Caveat: Although raising DiskFileNoSpace results in object-server returning HTTPInsufficientStorage[507] correctly, the swift proxy-server invokes "best_response" method that returns [503] to the user. When write-behind translator is turned on in glusterfs, it may set errno to EIO instead of ENOSPC/EDQUOT. This is documented in BZ 986812 BUG: 985862, 985253, 1020724 Change-Id: Ib0c5e41c11a8cdccc2077f71c31d8a23229452bb Signed-off-by: Prashanth Pai Reviewed-on: http://review.gluster.org/6199 Reviewed-by: Luis Pabon Tested-by: Luis Pabon Reviewed-on: http://review.gluster.org/6269 --- gluster/swift/common/fs_utils.py | 102 +++++++++++++++++++++++- gluster/swift/common/utils.py | 19 ++++- gluster/swift/container/server.py | 14 ++++ gluster/swift/obj/diskfile.py | 5 +- test/unit/common/test_fs_utils.py | 127 +++++++++++++++++++++++++++++- test/unit/common/test_utils.py | 23 ++++++ 6 files changed, 279 insertions(+), 11 deletions(-) diff --git a/gluster/swift/common/fs_utils.py b/gluster/swift/common/fs_utils.py index 199984a..1ef68ed 100644 --- a/gluster/swift/common/fs_utils.py +++ b/gluster/swift/common/fs_utils.py @@ -18,12 +18,16 @@ import os import errno import stat import random +import time +from collections import defaultdict +from itertools import repeat import ctypes import os.path as _os_path from eventlet import sleep from swift.common.utils import load_libc_function from gluster.swift.common.exceptions import FileOrDirNotFoundError, \ NotDirectoryError, GlusterFileSystemOSError +from swift.common.exceptions import DiskFileNoSpace os_path = _os_path @@ -37,8 +41,14 @@ def do_write(fd, buf): try: cnt = os.write(fd, buf) except OSError as err: - raise GlusterFileSystemOSError( - err.errno, '%s, os.write("%s", ...)' % (err.strerror, fd)) + filename = get_filename_from_fd(fd) + if err.errno in (errno.ENOSPC, errno.EDQUOT): + do_log_rl("do_write(%d, msg[%d]) failed: %s : %s", + fd, len(buf), err, filename) + raise DiskFileNoSpace() + else: + raise GlusterFileSystemOSError( + err.errno, '%s, os.write("%s", ...)' % (err.strerror, fd)) return cnt @@ -100,6 +110,9 @@ def do_mkdir(path): except OSError as err: if err.errno == errno.EEXIST: logging.warn("fs_utils: os.mkdir - path %s already exists", path) + elif err.errno in (errno.ENOSPC, errno.EDQUOT): + do_log_rl("do_mkdir(%s) failed: %s", path, err) + raise DiskFileNoSpace() else: raise GlusterFileSystemOSError( err.errno, '%s, os.mkdir("%s")' % (err.strerror, path)) @@ -288,5 +301,86 @@ def mkdirs(path): except OSError as err: if err.errno == errno.EEXIST and os.path.isdir(path): return - raise GlusterFileSystemOSError( - err.errno, '%s, os.makedirs("%s")' % (err.strerror, path)) + elif err.errno in (errno.ENOSPC, errno.EDQUOT): + do_log_rl("mkdirs(%s) failed: %s", path, err) + raise DiskFileNoSpace() + else: + raise GlusterFileSystemOSError( + err.errno, '%s, os.makedirs("%s")' % (err.strerror, path)) + + +def get_filename_from_fd(fd, verify=False): + """ + Given the file descriptor, this method attempts to get the filename as it + was when opened. This may not give accurate results in following cases: + - file was renamed/moved/deleted after it was opened + - file has multiple hardlinks + + :param fd: file descriptor of file opened + :param verify: If True, performs additional checks using inode number + """ + filename = None + if isinstance(fd, int): + try: + filename = os.readlink("/proc/self/fd/" + str(fd)) + except OSError: + pass + + if not verify: + return filename + + # If verify = True, we compare st_dev and st_ino of file and fd. + # This involves additional stat and fstat calls. So this is disabled + # by default. + if filename and fd: + s_file = do_stat(filename) + s_fd = do_fstat(fd) + + if s_file and s_fd: + if (s_file.st_ino, s_file.st_dev) == (s_fd.st_ino, s_fd.st_dev): + return filename + + return None + + +def static_var(varname, value): + """Decorator function to create pseudo static variables.""" + def decorate(func): + setattr(func, varname, value) + return func + return decorate + +# Rate limit to emit log message once a second +_DO_LOG_RL_INTERVAL = 1.0 + + +@static_var("counter", defaultdict(int)) +@static_var("last_called", defaultdict(repeat(0.0).next)) +def do_log_rl(msg, *args, **kwargs): + """ + Rate limited logger. + + :param msg: String or message to be logged + :param log_level: Possible values- error, warning, info, debug, critical + """ + log_level = kwargs.get('log_level', "error") + if log_level not in ("error", "warning", "info", "debug", "critical"): + log_level = "error" + + do_log_rl.counter[msg] += 1 # Increment msg counter + interval = time.time() - do_log_rl.last_called[msg] + + if interval >= _DO_LOG_RL_INTERVAL: + # Prefix PID of process and message count to original log msg + emit_msg = "[PID:" + str(os.getpid()) + "]" \ + + "[RateLimitedLog;Count:" + str(do_log_rl.counter[msg]) + "] " \ + + msg + # log_level is a param for do_log_rl and not for logging.* methods + try: + del kwargs['log_level'] + except KeyError: + pass + + getattr(logging, log_level)(emit_msg, *args, **kwargs) # Emit msg + do_log_rl.counter[msg] = 0 # Reset msg counter when message is emitted + do_log_rl.last_called[msg] = time.time() # Reset msg time diff --git a/gluster/swift/common/utils.py b/gluster/swift/common/utils.py index 6773836..0b2ad28 100644 --- a/gluster/swift/common/utils.py +++ b/gluster/swift/common/utils.py @@ -22,8 +22,9 @@ from hashlib import md5 from eventlet import sleep import cPickle as pickle from gluster.swift.common.exceptions import GlusterFileSystemIOError +from swift.common.exceptions import DiskFileNoSpace from gluster.swift.common.fs_utils import os_path, do_stat, do_listdir, \ - do_walk, do_rmdir, do_fstat + do_walk, do_rmdir, do_fstat, do_log_rl, get_filename_from_fd from gluster.swift.common import Glusterfs X_CONTENT_TYPE = 'Content-Type' @@ -137,9 +138,19 @@ def write_metadata(path_or_fd, metadata): '%s%s' % (METADATA_KEY, key or ''), metastr[:MAX_XATTR_SIZE]) except IOError as err: - raise GlusterFileSystemIOError( - err.errno, - 'xattr.setxattr("%s", %s, metastr)' % (path_or_fd, key)) + if err.errno in (errno.ENOSPC, errno.EDQUOT): + if isinstance(path_or_fd, int): + filename = get_filename_from_fd(path_or_fd) + do_log_rl("write_metadata(%d, metadata) failed: %s : %s", + path_or_fd, err, filename) + else: + do_log_rl("write_metadata(%s, metadata) failed: %s", + path_or_fd, err) + raise DiskFileNoSpace() + else: + raise GlusterFileSystemIOError( + err.errno, + 'xattr.setxattr("%s", %s, metastr)' % (path_or_fd, key)) metastr = metastr[MAX_XATTR_SIZE:] key += 1 diff --git a/gluster/swift/container/server.py b/gluster/swift/container/server.py index e832248..173976a 100644 --- a/gluster/swift/container/server.py +++ b/gluster/swift/container/server.py @@ -21,6 +21,9 @@ import gluster.swift.common.constraints # noqa from swift.container import server from gluster.swift.common.DiskDir import DiskDir +from swift.common.utils import public, timing_stats +from swift.common.exceptions import DiskFileNoSpace +from swift.common.swob import HTTPInsufficientStorage class ContainerController(server.ContainerController): @@ -63,6 +66,17 @@ class ContainerController(server.ContainerController): """ return None + @public + @timing_stats() + def PUT(self, req): + try: + return server.ContainerController.PUT(self, req) + except DiskFileNoSpace: + # As container=directory in gluster-swift, we might run out of + # space or exceed quota when creating containers. + drive = req.split_path(1, 1, True) + return HTTPInsufficientStorage(drive=drive, request=req) + def app_factory(global_conf, **local_conf): """paste.deploy app factory for creating WSGI container server apps.""" diff --git a/gluster/swift/obj/diskfile.py b/gluster/swift/obj/diskfile.py index b09179d..6e6856d 100644 --- a/gluster/swift/obj/diskfile.py +++ b/gluster/swift/obj/diskfile.py @@ -904,8 +904,9 @@ class DiskFile(object): fd = do_open(tmppath, os.O_WRONLY | os.O_CREAT | os.O_EXCL | O_CLOEXEC) except GlusterFileSystemOSError as gerr: - if gerr.errno == errno.ENOSPC: - # Raise DiskFileNoSpace to be handled by upper layers + if gerr.errno in (errno.ENOSPC, errno.EDQUOT): + # Raise DiskFileNoSpace to be handled by upper layers when + # there is no space on disk OR when quota is exceeded raise DiskFileNoSpace() if gerr.errno not in (errno.ENOENT, errno.EEXIST, errno.EIO): # FIXME: Other cases we should handle? diff --git a/test/unit/common/test_fs_utils.py b/test/unit/common/test_fs_utils.py index c7f969e..98f8620 100644 --- a/test/unit/common/test_fs_utils.py +++ b/test/unit/common/test_fs_utils.py @@ -20,12 +20,14 @@ import errno import unittest import eventlet from nose import SkipTest -from mock import patch +from mock import patch, Mock +from time import sleep from tempfile import mkdtemp, mkstemp from gluster.swift.common import fs_utils as fs from gluster.swift.common.exceptions import NotDirectoryError, \ FileOrDirNotFoundError, GlusterFileSystemOSError, \ GlusterFileSystemIOError +from swift.common.exceptions import DiskFileNoSpace def mock_os_fsync(fd): return True @@ -33,6 +35,12 @@ def mock_os_fsync(fd): def mock_os_fdatasync(fd): return True +def mock_os_mkdir_makedirs_enospc(path): + raise OSError(errno.ENOSPC, os.strerror(errno.ENOSPC)) + +def mock_os_mkdir_makedirs_edquot(path): + raise OSError(errno.EDQUOT, os.strerror(errno.EDQUOT)) + class TestFsUtils(unittest.TestCase): """ Tests for common.fs_utils """ @@ -235,6 +243,30 @@ class TestFsUtils(unittest.TestCase): os.close(fd) os.remove(tmpfile) + def test_do_write_DiskFileNoSpace(self): + + def mock_os_write_enospc(fd, msg): + raise OSError(errno.ENOSPC, os.strerror(errno.ENOSPC)) + + def mock_os_write_edquot(fd, msg): + raise OSError(errno.EDQUOT, os.strerror(errno.EDQUOT)) + + with patch('os.write', mock_os_write_enospc): + try: + fs.do_write(9, "blah") + except DiskFileNoSpace: + pass + else: + self.fail("Expected DiskFileNoSpace exception") + + with patch('os.write', mock_os_write_edquot): + try: + fs.do_write(9, "blah") + except DiskFileNoSpace: + pass + else: + self.fail("Expected DiskFileNoSpace exception") + def test_mkdirs(self): try: subdir = os.path.join('/tmp', str(random.random())) @@ -293,6 +325,25 @@ class TestFsUtils(unittest.TestCase): os.close(fd) shutil.rmtree(tmpdir) + def test_mkdirs_DiskFileNoSpace(self): + + with patch('os.makedirs', mock_os_mkdir_makedirs_enospc): + try: + fs.mkdirs("blah") + except DiskFileNoSpace: + pass + else: + self.fail("Expected DiskFileNoSpace exception") + + with patch('os.makedirs', mock_os_mkdir_makedirs_edquot): + try: + fs.mkdirs("blah") + except DiskFileNoSpace: + pass + else: + self.fail("Expected DiskFileNoSpace exception") + + def test_do_mkdir(self): try: path = os.path.join('/tmp', str(random.random())) @@ -311,6 +362,23 @@ class TestFsUtils(unittest.TestCase): else: self.fail("GlusterFileSystemOSError expected") + with patch('os.mkdir', mock_os_mkdir_makedirs_enospc): + try: + fs.do_mkdir("blah") + except DiskFileNoSpace: + pass + else: + self.fail("Expected DiskFileNoSpace exception") + + with patch('os.mkdir', mock_os_mkdir_makedirs_edquot): + try: + fs.do_mkdir("blah") + except DiskFileNoSpace: + pass + else: + self.fail("Expected DiskFileNoSpace exception") + + def test_do_listdir(self): tmpdir = mkdtemp() try: @@ -712,3 +780,60 @@ class TestFsUtils(unittest.TestCase): self.fail("Expected GlusterFileSystemOSError") finally: shutil.rmtree(tmpdir) + + def test_get_filename_from_fd(self): + tmpdir = mkdtemp() + try: + fd, tmpfile = mkstemp(dir=tmpdir) + result = fs.get_filename_from_fd(fd) + self.assertEqual(tmpfile, result) + result = fs.get_filename_from_fd(fd, True) + self.assertEqual(tmpfile, result) + finally: + shutil.rmtree(tmpdir) + + def test_get_filename_from_fd_err(self): + result = fs.get_filename_from_fd("blah") + self.assertIsNone(result) + result = fs.get_filename_from_fd(-1000) + self.assertIsNone(result) + result = fs.get_filename_from_fd("blah", True) + self.assertIsNone(result) + + def test_static_var(self): + @fs.static_var("counter", 0) + def test_func(): + test_func.counter += 1 + return test_func.counter + + result = test_func() + self.assertEqual(result, 1) + + def test_do_log_rl(self): + _mock = Mock() + pid = os.getpid() + with patch("logging.error", _mock): + # The first call always invokes logging.error + fs.do_log_rl("Hello %s", "world") + _mock.reset_mock() + # We call do_log_rl 3 times. Twice in immediate succession and + # then after an interval of 1.1 second. Thus, the last call will + # invoke logging.error + for i in range(2): + fs.do_log_rl("Hello %s", "world") + sleep(1.1) + fs.do_log_rl("Hello %s", "world") + + # We check if logging.error was called exactly once even though + # do_log_rl was called 3 times. + _mock.assert_called_once_with('[PID:' + str(pid) + '][RateLimitedLog;' + 'Count:3] Hello %s', 'world') + + def test_do_log_rl_err(self): + _mock = Mock() + pid = os.getpid() + sleep(1.1) + with patch("logging.error", _mock): + fs.do_log_rl("Hello %s", "world", log_level="blah") + _mock.assert_called_once_with('[PID:' + str(pid) + '][RateLimitedLog;' + 'Count:1] Hello %s', 'world') diff --git a/test/unit/common/test_utils.py b/test/unit/common/test_utils.py index 4d345da..2582558 100644 --- a/test/unit/common/test_utils.py +++ b/test/unit/common/test_utils.py @@ -28,6 +28,7 @@ from collections import defaultdict from mock import patch from gluster.swift.common import utils, Glusterfs from gluster.swift.common.exceptions import GlusterFileSystemOSError +from swift.common.exceptions import DiskFileNoSpace # # Somewhat hacky way of emulating the operation of xattr calls. They are made @@ -169,6 +170,28 @@ class TestUtils(unittest.TestCase): else: self.fail("Expected an IOError exception on write") + def test_write_metadata_space_err(self): + + def _mock_xattr_setattr(item, name, value): + raise IOError(errno.ENOSPC, os.strerror(errno.ENOSPC)) + + with patch('xattr.setxattr', _mock_xattr_setattr): + path = "/tmp/foo/w" + orig_d = {'bar': 'foo'} + try: + utils.write_metadata(path, orig_d) + except DiskFileNoSpace: + pass + else: + self.fail("Expected DiskFileNoSpace exception") + fd = 0 + try: + utils.write_metadata(fd, orig_d) + except DiskFileNoSpace: + pass + else: + self.fail("Expected DiskFileNoSpace exception") + def test_write_metadata_multiple(self): # At 64 KB an xattr key/value pair, this should generate three keys. path = "/tmp/foo/w"