From df22032d79c74c047974c360331463a0e062c0ea Mon Sep 17 00:00:00 2001 From: Clay Gerrard Date: Tue, 15 Oct 2024 10:00:53 -0500 Subject: [PATCH] object-expirer: add round_robin_cache_size option Drive-Bys: * DRY out redundent configuration examples in expiring objects overview documentation. * Add missing delay_reaping man page docs. Co-Authored-By: Alistair Coles Change-Id: I8879dbd13527233c878dff764ec411ce9619ee39 --- doc/manpages/object-expirer.conf.5 | 9 +++ doc/manpages/object-server.conf.5 | 9 +++ doc/source/config/object_server_config.rst | 1 + doc/source/overview_expiring_objects.rst | 82 ++-------------------- etc/object-expirer.conf-sample | 7 +- etc/object-server.conf-sample | 7 +- swift/obj/expirer.py | 4 +- test/unit/obj/test_expirer.py | 18 ++++- 8 files changed, 56 insertions(+), 81 deletions(-) diff --git a/doc/manpages/object-expirer.conf.5 b/doc/manpages/object-expirer.conf.5 index 40406824d1..24a4d0ddf2 100644 --- a/doc/manpages/object-expirer.conf.5 +++ b/doc/manpages/object-expirer.conf.5 @@ -88,6 +88,15 @@ The default is 1. The default is 1. .IP \fBlog_statsd_metric_prefix\fR The default is empty. +.IP \fBdelay_reaping_account/container\fR +Normally, the expirer begins reaping expired objects immediately. You can add +options prefixed with "delay_reaping_" in the form of +"delay_reaping_[/]" to cause the expirer to delay processing of +tasks in those account or account/container. The [/] part of the +config option names should url-quote the paths. The value is in seconds. The +default is no delay for any tasks. +.IP \fBround_robin_task_cache_size\fR +Number of tasks objects to cache before processing. .IP \fBnice_priority\fR Modify scheduling priority of server processes. Niceness values range from -20 (most favorable to the process) to 19 (least favorable to the process). diff --git a/doc/manpages/object-server.conf.5 b/doc/manpages/object-server.conf.5 index f00d2d805a..f193304d84 100644 --- a/doc/manpages/object-server.conf.5 +++ b/doc/manpages/object-server.conf.5 @@ -623,6 +623,15 @@ to use 3 processes, you should run processes with process set to 0, 1, and 2. Th The expirer will re-attempt expiring if the source object is not available up to reclaim_age seconds before it gives up and deletes the task in the queue. The default is 604800 seconds (= 1 week). +.IP \fBdelay_reaping_account/container\fR +Normally, the expirer begins reaping expired objects immediately. You can add +options prefixed with "delay_reaping_" in the form of +"delay_reaping_[/]" to cause the expirer to delay processing of +tasks in those account or account/container. The [/] part of the +config option names should url-quote the paths. The value is in seconds. The +default is no delay for any tasks. +.IP \fBround_robin_task_cache_size\fR +Number of tasks objects to cache before processing. .IP \fBrecon_cache_path\fR Path to recon cache directory. The default is /var/cache/swift .IP \fBnice_priority\fR diff --git a/doc/source/config/object_server_config.rst b/doc/source/config/object_server_config.rst index b6b6b0991a..b9d7cadd1e 100644 --- a/doc/source/config/object_server_config.rst +++ b/doc/source/config/object_server_config.rst @@ -705,6 +705,7 @@ concurrency 1 Level of concurren this value must be set to at least 1 expiring_objects_account_name expiring_objects name for legacy expirer task queue dequeue_from_legacy False This service will look for jobs on the legacy expirer task queue. +round_robin_task_cache_size 100000 Number of tasks objects to cache before processing. processes 0 How many parts to divide the legacy work into, one part per process that will be doing the work. When set 0 means that a single legacy diff --git a/doc/source/overview_expiring_objects.rst b/doc/source/overview_expiring_objects.rst index b52c6e1a32..9fe0fefe5e 100644 --- a/doc/source/overview_expiring_objects.rst +++ b/doc/source/overview_expiring_objects.rst @@ -136,6 +136,11 @@ Upgrading impact: General Task Queue vs Legacy Queue The expirer daemon will be moving to a new general task-queue based design that will divide the work across all object servers, as such only expirers defined in the object-server config will be able to use the new system. + +The legacy object expirer config is documented in +``etc/object-expirer.conf-sample``. The alternative object-server config +section is documented in ``etc/object-server.conf-sample``. + The parameters in both files are identical except for a new option in the object-server ``[object-expirer]`` section, ``dequeue_from_legacy`` which when set to ``True`` will tell the expirer that in addition to using @@ -176,83 +181,6 @@ the concurrency level for the legacy queue. containers. On a large cluster one may inadvertently overload the acccount/container servers handling the legacy expirer queue. -Here is a quick sample of the ``object-expirer`` section required in the -``object-server.conf``:: - - [object-expirer] - # log_name = object-expirer - # log_facility = LOG_LOCAL0 - # log_level = INFO - # log_address = /dev/log - # - interval = 300 - - # If this true, expirer execute tasks in legacy expirer task queue - dequeue_from_legacy = false - - # processes can only be used in conjunction with `dequeue_from_legacy`. - # So this option is ignored if dequeue_from_legacy=false. - # processes is how many parts to divide the legacy work into, one part per - # process that will be doing the work - # processes set 0 means that a single legacy process will be doing all the work - # processes can also be specified on the command line and will override the - # config value - # processes = 0 - - # process can only be used in conjunction with `dequeue_from_legacy`. - # So this option is ignored if dequeue_from_legacy=false. - # process is which of the parts a particular legacy process will work on - # process can also be specified on the command line and will override the config - # value - # process is "zero based", if you want to use 3 processes, you should run - # processes with process set to 0, 1, and 2 - # process = 0 - - report_interval = 300 - - # request_tries is the number of times the expirer's internal client will - # attempt any given request in the event of failure. The default is 3. - # request_tries = 3 - - # concurrency is the level of concurrency to use to do the work, this value - # must be set to at least 1 - # concurrency = 1 - - # The expirer will re-attempt expiring if the source object is not available - # up to reclaim_age seconds before it gives up and deletes the entry in the - # queue. - # reclaim_age = 604800 - -And for completeness, here is a quick sample of the legacy -``object-expirer.conf`` file:: - - [DEFAULT] - # swift_dir = /etc/swift - # user = swift - # You can specify default log routing here if you want: - # log_name = swift - # log_facility = LOG_LOCAL0 - # log_level = INFO - - [object-expirer] - interval = 300 - - [pipeline:main] - pipeline = catch_errors cache proxy-server - - [app:proxy-server] - use = egg:swift#proxy - # See proxy-server.conf-sample for options - - [filter:cache] - use = egg:swift#memcache - # See proxy-server.conf-sample for options - - [filter:catch_errors] - use = egg:swift#catch_errors - # See proxy-server.conf-sample for options - - .. note:: When running legacy expirers, the daemon needs to run on a machine with access to all the backend servers in the cluster, but does not need proxy diff --git a/etc/object-expirer.conf-sample b/etc/object-expirer.conf-sample index 1e8762d6bc..16dc004039 100644 --- a/etc/object-expirer.conf-sample +++ b/etc/object-expirer.conf-sample @@ -93,7 +93,12 @@ # delay_reaping_AUTH_test/special%0Achars%3Dshould%20be%20quoted # N.B. By default no delay_reaping value is configured for any accounts or # containers. -# + +# Number of tasks objects to cache before processing. With many nodes it may +# take some time to fill a larger cache_size but may also have a better chance +# to distribute DELETEs to multiple target containers. +# round_robin_task_cache_size = 100000 + # recon_cache_path = /var/cache/swift # # You can set scheduling priority of processes. Niceness values range from -20 diff --git a/etc/object-server.conf-sample b/etc/object-server.conf-sample index 9bcef96ec0..10c050906f 100644 --- a/etc/object-server.conf-sample +++ b/etc/object-server.conf-sample @@ -680,7 +680,12 @@ use = egg:swift#backend_ratelimit # up to reclaim_age seconds before it gives up and deletes the entry in the # queue. # reclaim_age = 604800 -# + +# Number of tasks objects to cache before processing. With many nodes it may +# take some time to fill a larger cache_size but may also have a better chance +# to distribute DELETEs to multiple target containers. +# round_robin_task_cache_size = 100000 + # recon_cache_path = /var/cache/swift # # You can set scheduling priority of processes. Niceness values range from -20 diff --git a/swift/obj/expirer.py b/swift/obj/expirer.py index 6a1cf386cd..5d903b32a1 100644 --- a/swift/obj/expirer.py +++ b/swift/obj/expirer.py @@ -184,6 +184,8 @@ class ObjectExpirer(Daemon): self.reclaim_age = int(conf.get('reclaim_age', 604800)) self.delay_reaping_times = read_conf_for_delay_reaping_times(conf) + self.round_robin_task_cache_size = int( + conf.get('round_robin_task_cache_size', MAX_OBJECTS_TO_CACHE)) def _make_internal_client(self, is_legacy_conf): default_ic_conf_path = '/etc/swift/internal-client.conf' @@ -268,7 +270,7 @@ class ObjectExpirer(Daemon): obj_cache[cache_key].append(delete_task) cnt += 1 - if cnt > MAX_OBJECTS_TO_CACHE: + if cnt > self.round_robin_task_cache_size: for task in dump_obj_cache_in_round_robin(): yield task cnt = 0 diff --git a/test/unit/obj/test_expirer.py b/test/unit/obj/test_expirer.py index effe07efd5..0902260dc0 100644 --- a/test/unit/obj/test_expirer.py +++ b/test/unit/obj/test_expirer.py @@ -324,6 +324,21 @@ class TestObjectExpirer(TestCase): self.assertEqual(x.expiring_objects_account, '.expiring_objects') self.assertIs(x.swift, self.fake_swift) + def test_init_default_round_robin_cache_default(self): + conf = {} + x = expirer.ObjectExpirer(conf, logger=self.logger, + swift=self.fake_swift) + self.assertEqual(x.round_robin_task_cache_size, + expirer.MAX_OBJECTS_TO_CACHE) + + def test_init_large_round_robin_cache(self): + conf = { + 'round_robin_task_cache_size': '1000000', + } + x = expirer.ObjectExpirer(conf, logger=self.logger, + swift=self.fake_swift) + self.assertEqual(x.round_robin_task_cache_size, 1000000) + def test_init_internal_client_path_from_expirer_conf(self): # conf read from object-expirer.conf, no internal_client_conf_path conf = {'__file__': '/etc/swift/object-expirer.conf'} @@ -1874,7 +1889,8 @@ class TestObjectExpirer(TestCase): def test_success_gets_counted(self): self.assertEqual(self.expirer.report_objects, 0) - with mock.patch('swift.obj.expirer.MAX_OBJECTS_TO_CACHE', 0), \ + with mock.patch.object(self.expirer, + 'round_robin_task_cache_size', 0), \ mock.patch.object(self.expirer, 'delete_actual_object', lambda o, t, b: None), \ mock.patch.object(self.expirer, 'pop_queue',