From 9e6fbbcee91105ea8e5fa67a86b4b5d3054e32aa Mon Sep 17 00:00:00 2001 From: Luis Pabon Date: Fri, 26 Jul 2013 15:56:26 -0400 Subject: [PATCH] perf: Container and account performance inc * Container and accounts performance increase by removing the need to update either the object count or the container count, respectively. New hidden configuratoins added to re-enable the functionality. * object_only configuratoin removed from fs.conf and replaced with a hidden configuration. The new hidden configuration reports gratuituosly crated directories as objects, to support a compatibility behavior with previous version 1.4.8 (even though it was incorrect). BUG 988969: https://bugzilla.redhat.com/show_bug.cgi?id=988969 Change-Id: Idca20b8629ec38606ff5692fe62bc2cadabffc86 Signed-off-by: Luis Pabon Reviewed-on: http://review.gluster.org/5403 Reviewed-by: Peter Portante Tested-by: Peter Portante Reviewed-on: http://review.gluster.org/5504 --- etc/fs.conf-gluster | 5 ---- gluster/swift/common/DiskDir.py | 21 +++------------ gluster/swift/common/Glusterfs.py | 44 ++++++++++++++++++++++++++----- gluster/swift/common/utils.py | 5 +++- test/functional/conf/fs.conf | 9 +++---- test/unit/common/test_diskdir.py | 10 +++++++ test/unit/common/test_utils.py | 9 ++----- 7 files changed, 61 insertions(+), 42 deletions(-) diff --git a/etc/fs.conf-gluster b/etc/fs.conf-gluster index 44ad5f7..6d2a791 100644 --- a/etc/fs.conf-gluster +++ b/etc/fs.conf-gluster @@ -4,11 +4,6 @@ # volumes to be served via Swift API. mount_ip = localhost -# By default it is assumed the Gluster volumes can be accessed using other -# methods besides UFO (not object only), which disables a caching -# optimizations in order to keep in sync with file system changes. -object_only = yes - # Performance optimization parameter. When turned off, the filesystem will # see a reduced number of stat calls, resulting in substantially faster # response time for GET and HEAD container requests on containers with large diff --git a/gluster/swift/common/DiskDir.py b/gluster/swift/common/DiskDir.py index 4c835d3..556907f 100644 --- a/gluster/swift/common/DiskDir.py +++ b/gluster/swift/common/DiskDir.py @@ -373,7 +373,7 @@ class DiskDir(DiskCommon): # test cases working for now. if e.errno != errno.ENOENT: raise - if Glusterfs.OBJECT_ONLY and metadata \ + if not Glusterfs._implicit_dir_objects and metadata \ and metadata[X_CONTENT_TYPE] == DIR_TYPE \ and not dir_is_object(metadata): continue @@ -412,16 +412,8 @@ class DiskDir(DiskCommon): reported_put_timestamp, reported_delete_timestamp, reported_object_count, and reported_bytes_used. """ - if self._dir_exists: - if not Glusterfs.OBJECT_ONLY: - # If we are not configured for object only environments, - # we should update the object counts in case they changed - # behind our back. - self._update_object_count() - else: - # FIXME: to facilitate testing, we need to update all - # the time - self._update_object_count() + if self._dir_exists and Glusterfs._container_update_object_count: + self._update_object_count() data = {'account': self.account, 'container': self.container, 'object_count': self.metadata.get( @@ -697,12 +689,7 @@ class DiskAccount(DiskCommon): delete_timestamp, container_count, object_count, bytes_used, hash, id """ - if not Glusterfs.OBJECT_ONLY: - # If we are not configured for object only environments, we should - # update the container counts in case they changed behind our back. - self._update_container_count() - else: - # FIXME: to facilitate testing, we need to update all the time + if Glusterfs._account_update_container_count: self._update_container_count() data = {'account': self.account, 'created_at': '1', diff --git a/gluster/swift/common/Glusterfs.py b/gluster/swift/common/Glusterfs.py index 01cfcc0..9ff54ba 100644 --- a/gluster/swift/common/Glusterfs.py +++ b/gluster/swift/common/Glusterfs.py @@ -31,23 +31,19 @@ from gluster.swift.common.exceptions import GlusterfsException, \ # _fs_conf = ConfigParser() MOUNT_IP = 'localhost' -OBJECT_ONLY = True RUN_DIR = '/var/run/swift' SWIFT_DIR = '/etc/swift' _do_getsize = False _allow_mount_per_server = False +_implicit_dir_objects = False +_container_update_object_count = False +_account_update_container_count = False if _fs_conf.read(os.path.join(SWIFT_DIR, 'fs.conf')): try: MOUNT_IP = _fs_conf.get('DEFAULT', 'mount_ip', MOUNT_IP) except (NoSectionError, NoOptionError): pass - try: - OBJECT_ONLY = _fs_conf.get('DEFAULT', - 'object_only', - "yes") in TRUE_VALUES - except (NoSectionError, NoOptionError): - pass try: RUN_DIR = _fs_conf.get('DEFAULT', 'run_dir', RUN_DIR) except (NoSectionError, NoOptionError): @@ -68,6 +64,40 @@ if _fs_conf.read(os.path.join(SWIFT_DIR, 'fs.conf')): except (NoSectionError, NoOptionError): pass + # -- Hidden configuration option -- + # Report gratuitously created directories as objects + # Directories can be gratuitously created on the path to a given + # object. This option turn on or off the reporting of those directories. + # It defaults to False so that only those directories explicitly + # created by the object server PUT REST API are reported + try: + _implicit_dir_objects = \ + _fs_conf.get('DEFAULT', + 'implicit_dir_objects', + "no") in TRUE_VALUES + except (NoSectionError, NoOptionError): + pass + + # -- Hidden configuration option -- + # Due to the impact on performance, this option is disabled by default + try: + _container_update_object_count = \ + _fs_conf.get('DEFAULT', + 'container_update_object_count', + "no") in TRUE_VALUES + except (NoSectionError, NoOptionError): + pass + + # -- Hidden configuration option -- + # Due to the impact on performance, this option is disabled by default + try: + _account_update_container_count = \ + _fs_conf.get('DEFAULT', + 'account_update_container_count', + "no") in TRUE_VALUES + except (NoSectionError, NoOptionError): + pass + NAME = 'glusterfs' diff --git a/gluster/swift/common/utils.py b/gluster/swift/common/utils.py index 5152861..522d307 100644 --- a/gluster/swift/common/utils.py +++ b/gluster/swift/common/utils.py @@ -243,7 +243,10 @@ def _update_list(path, cont_path, src_list, reg_file=True, object_count=0, obj_path = path.replace(cont_path, '').strip(os.path.sep) for obj_name in src_list: - if not reg_file and Glusterfs.OBJECT_ONLY: + # If it is not a reg_file then it is a directory. + if not reg_file and not Glusterfs._implicit_dir_objects: + # Now check if this is a dir object or a gratuiously crated + # directory metadata = \ read_metadata(os.path.join(cont_path, obj_path, obj_name)) if not dir_is_object(metadata): diff --git a/test/functional/conf/fs.conf b/test/functional/conf/fs.conf index 43f9b45..b06a854 100644 --- a/test/functional/conf/fs.conf +++ b/test/functional/conf/fs.conf @@ -4,11 +4,6 @@ # volumes to be served via Swift API. mount_ip = localhost -# By default it is assumed the Gluster volumes can be accessed using other -# methods besides UFO (not object only), which disables a caching -# optimizations in order to keep in sync with file system changes. -object_only = yes - # Performance optimization parameter. When turned off, the filesystem will # see a reduced number of stat calls, resulting in substantially faster # response time for GET and HEAD container requests on containers with large @@ -18,3 +13,7 @@ object_only = yes # # *** Keep on for Functional Tests *** accurate_size_in_listing = on + +# *** Keep on for Functional Tests *** +container_update_object_count = on +account_update_container_count = on diff --git a/test/unit/common/test_diskdir.py b/test/unit/common/test_diskdir.py index be0c922..bbdb168 100644 --- a/test/unit/common/test_diskdir.py +++ b/test/unit/common/test_diskdir.py @@ -468,6 +468,9 @@ class TestContainerBroker(unittest.TestCase): def test_get_info(self): # Test swift.common.db.ContainerBroker.get_info + __save_config = \ + gluster.swift.common.Glusterfs._container_update_object_count + gluster.swift.common.Glusterfs._container_update_object_count = True broker = self._get_broker(account='test1', container='test2') broker.initialize(self.initial_ts) @@ -513,6 +516,8 @@ class TestContainerBroker(unittest.TestCase): info = broker.get_info() self.assertEquals(info['x_container_sync_point1'], -1) self.assertEquals(info['x_container_sync_point2'], -1) + gluster.swift.common.Glusterfs._container_update_object_count = \ + __save_config def test_get_info_nonexistent_container(self): broker = dd.DiskDir(self.path, self.drive, account='no_account', @@ -1046,6 +1051,9 @@ class TestAccountBroker(unittest.TestCase): def test_get_info(self): # Test swift.common.db.AccountBroker.get_info + __save_config = \ + gluster.swift.common.Glusterfs._account_update_container_count + gluster.swift.common.Glusterfs._account_update_container_count = True broker = self._get_broker(account='test1') broker.initialize(self.initial_ts) @@ -1074,6 +1082,8 @@ class TestAccountBroker(unittest.TestCase): os.rmdir(c2) info = broker.get_info() self.assertEquals(info['container_count'], 0) + gluster.swift.common.Glusterfs._account_update_container_count = \ + __save_config def test_list_containers_iter(self): # Test swift.common.db.AccountBroker.list_containers_iter diff --git a/test/unit/common/test_utils.py b/test/unit/common/test_utils.py index 6d6319d..4aae4c3 100644 --- a/test/unit/common/test_utils.py +++ b/test/unit/common/test_utils.py @@ -585,23 +585,19 @@ class TestUtils(unittest.TestCase): os.chdir(orig_cwd) shutil.rmtree(td) - def test_get_container_details_ufo(self): + def test_get_container_details(self): orig_cwd = os.getcwd() - __obj_only = Glusterfs.OBJECT_ONLY td = tempfile.mkdtemp() try: tf = tarfile.open("common/data/container_tree.tar.bz2", "r:bz2") os.chdir(td) tf.extractall() - Glusterfs.OBJECT_ONLY = False - obj_list, object_count, bytes_used = \ utils.get_container_details(td) assert bytes_used == 0, repr(bytes_used) - assert object_count == 8, repr(object_count) + assert object_count == 5, repr(object_count) assert set(obj_list) == set(['file1', 'file3', 'file2', - 'dir3', 'dir1', 'dir2', 'dir1/file1', 'dir1/file2' ]), repr(obj_list) @@ -616,7 +612,6 @@ class TestUtils(unittest.TestCase): finally: os.chdir(orig_cwd) shutil.rmtree(td) - Glusterfs.OBJECT_ONLY = __obj_only def test_get_container_details_from_fs_do_getsize_true(self): orig_cwd = os.getcwd()