From f39aae0c95567d78e9a29dbb7e74abe1104c97d0 Mon Sep 17 00:00:00 2001 From: Ruby Loo Date: Thu, 17 May 2018 15:22:42 +0000 Subject: [PATCH] Disable periodic tasks if interval set to 0 For periodic tasks that are specified with the decorator @perodics.periodic(), a ValueError exception was raised if a value <= 0 was specified for any of the spacing values (taken from configuration options). Specifying a value <=0 used to work, to disable the task altogether. It broke when we switched to using the futurist package (some time in mitaka cycle). This fixes it so that setting these configuration options to 0 (or a negative value) will disable the periodic tasks: - [conductor]sync_power_state_interval: sync power states for the nodes - [conductor]check_provision_state_interval: - check deployments and time out if the deployment takes too long - check the status of cleaning a node and time out if it takes too long - check the status of inspecting a node and time out if it takes too long - check for and handle nodes that are taken over by new conductors (if an old conductor disappeared) - [conductor]send_sensor_data_interval: send sensor data to ceilometer - [conductor]sync_local_state_interval: refresh a conductor's copy of the consistent hash ring. If any mappings have changed, determines which, if any, nodes need to be "taken over". The ensuing actions could include preparing a PXE environment, updating the DHCP server, and so on. - [oneview]periodic_check_interval: - check for nodes taken over by OneView users - check for nodes freed by OneView users Change-Id: I62708e239295344d0dcf0bff7dd68ec8c34ab9a0 Story: #2002059 Task: #19708 --- ironic/conductor/manager.py | 34 ++++++++++--- ironic/conf/conductor.py | 10 ++-- ironic/conf/drac.py | 1 + ironic/drivers/modules/oneview/deploy.py | 6 ++- ...sable_periodic_tasks-0ea39fa7a8a108c6.yaml | 49 +++++++++++++++++++ 5 files changed, 87 insertions(+), 13 deletions(-) create mode 100644 releasenotes/notes/disable_periodic_tasks-0ea39fa7a8a108c6.yaml diff --git a/ironic/conductor/manager.py b/ironic/conductor/manager.py index 8b9e4c1c8b..274965f4c6 100644 --- a/ironic/conductor/manager.py +++ b/ironic/conductor/manager.py @@ -1494,7 +1494,8 @@ class ConductorManager(base_manager.BaseConductorManager): state=node.provision_state) @METRICS.timer('ConductorManager._sync_power_states') - @periodics.periodic(spacing=CONF.conductor.sync_power_state_interval) + @periodics.periodic(spacing=CONF.conductor.sync_power_state_interval, + enabled=CONF.conductor.sync_power_state_interval > 0) def _sync_power_states(self, context): """Periodic task to sync power states for the nodes. @@ -1564,7 +1565,10 @@ class ConductorManager(base_manager.BaseConductorManager): eventlet.sleep(0) @METRICS.timer('ConductorManager._check_deploy_timeouts') - @periodics.periodic(spacing=CONF.conductor.check_provision_state_interval) + @periodics.periodic( + spacing=CONF.conductor.check_provision_state_interval, + enabled=CONF.conductor.check_provision_state_interval > 0 + and CONF.conductor.deploy_callback_timeout != 0) def _check_deploy_timeouts(self, context): """Periodically checks whether a deploy RPC call has timed out. @@ -1572,6 +1576,8 @@ class ConductorManager(base_manager.BaseConductorManager): :param context: request context. """ + # FIXME(rloo): If the value is < 0, it will be enabled. That doesn't + # seem right. callback_timeout = CONF.conductor.deploy_callback_timeout if not callback_timeout: return @@ -1587,7 +1593,9 @@ class ConductorManager(base_manager.BaseConductorManager): sort_key, callback_method, err_handler) @METRICS.timer('ConductorManager._check_orphan_nodes') - @periodics.periodic(spacing=CONF.conductor.check_provision_state_interval) + @periodics.periodic( + spacing=CONF.conductor.check_provision_state_interval, + enabled=CONF.conductor.check_provision_state_interval > 0) def _check_orphan_nodes(self, context): """Periodically checks the status of nodes that were taken over. @@ -1774,7 +1782,10 @@ class ConductorManager(base_manager.BaseConductorManager): task, 'console_restore', fields.NotificationStatus.ERROR) @METRICS.timer('ConductorManager._check_cleanwait_timeouts') - @periodics.periodic(spacing=CONF.conductor.check_provision_state_interval) + @periodics.periodic( + spacing=CONF.conductor.check_provision_state_interval, + enabled=CONF.conductor.check_provision_state_interval > 0 + and CONF.conductor.clean_callback_timeout != 0) def _check_cleanwait_timeouts(self, context): """Periodically checks for nodes being cleaned. @@ -1783,6 +1794,8 @@ class ConductorManager(base_manager.BaseConductorManager): :param context: request context. """ + # FIXME(rloo): If the value is < 0, it will be enabled. That doesn't + # seem right. callback_timeout = CONF.conductor.clean_callback_timeout if not callback_timeout: return @@ -1818,7 +1831,8 @@ class ConductorManager(base_manager.BaseConductorManager): ) @METRICS.timer('ConductorManager._sync_local_state') - @periodics.periodic(spacing=CONF.conductor.sync_local_state_interval) + @periodics.periodic(spacing=CONF.conductor.sync_local_state_interval, + enabled=CONF.conductor.sync_local_state_interval > 0) def _sync_local_state(self, context): """Perform any actions necessary to sync local state. @@ -2574,7 +2588,8 @@ class ConductorManager(base_manager.BaseConductorManager): eventlet.sleep(0) @METRICS.timer('ConductorManager._send_sensor_data') - @periodics.periodic(spacing=CONF.conductor.send_sensor_data_interval) + @periodics.periodic(spacing=CONF.conductor.send_sensor_data_interval, + enabled=CONF.conductor.send_sensor_data) def _send_sensor_data(self, context): """Periodically sends sensor data to Ceilometer.""" @@ -2806,13 +2821,18 @@ class ConductorManager(base_manager.BaseConductorManager): state=task.node.provision_state) @METRICS.timer('ConductorManager._check_inspect_wait_timeouts') - @periodics.periodic(spacing=CONF.conductor.check_provision_state_interval) + @periodics.periodic( + spacing=CONF.conductor.check_provision_state_interval, + enabled=CONF.conductor.check_provision_state_interval > 0 + and CONF.conductor.inspect_wait_timeout != 0) def _check_inspect_wait_timeouts(self, context): """Periodically checks inspect_wait_timeout and fails upon reaching it. :param: context: request context """ + # FIXME(rloo): If the value is < 0, it will be enabled. That doesn't + # seem right. callback_timeout = CONF.conductor.inspect_wait_timeout if not callback_timeout: return diff --git a/ironic/conf/conductor.py b/ironic/conf/conductor.py index 1d1175c31c..7b1ebf6246 100644 --- a/ironic/conf/conductor.py +++ b/ironic/conf/conductor.py @@ -47,11 +47,12 @@ opts = [ cfg.IntOpt('sync_power_state_interval', default=60, help=_('Interval between syncing the node power state to the ' - 'database, in seconds.')), + 'database, in seconds. Set to 0 to disable syncing.')), cfg.IntOpt('check_provision_state_interval', default=60, + min=0, help=_('Interval between checks of provision timeouts, ' - 'in seconds.')), + 'in seconds. Set to 0 to disable checks.')), cfg.IntOpt('check_rescue_state_interval', default=60, min=1, @@ -90,6 +91,7 @@ opts = [ 'notification bus')), cfg.IntOpt('send_sensor_data_interval', default=600, + min=1, help=_('Seconds between conductor sending sensor data message ' 'to ceilometer via the notification bus.')), cfg.IntOpt('send_sensor_data_workers', @@ -115,8 +117,8 @@ opts = [ 'local state as nodes are moved around the cluster. ' 'This option controls how often, in seconds, each ' 'conductor will check for nodes that it should ' - '"take over". Set it to a negative value to disable ' - 'the check entirely.')), + '"take over". Set it to 0 (or a negative value) to ' + 'disable the check entirely.')), cfg.StrOpt('configdrive_swift_container', default='ironic_configdrive_container', help=_('Name of the Swift container to store config drive ' diff --git a/ironic/conf/drac.py b/ironic/conf/drac.py index fcc193012c..f132574bec 100644 --- a/ironic/conf/drac.py +++ b/ironic/conf/drac.py @@ -18,6 +18,7 @@ from ironic.common.i18n import _ opts = [ cfg.IntOpt('query_raid_config_job_status_interval', default=120, + min=1, help=_('Interval (in seconds) between periodic RAID job status ' 'checks to determine whether the asynchronous RAID ' 'configuration was successfully finished or not.')) diff --git a/ironic/drivers/modules/oneview/deploy.py b/ironic/drivers/modules/oneview/deploy.py index d3cfcf3bbb..2f4051bc61 100644 --- a/ironic/drivers/modules/oneview/deploy.py +++ b/ironic/drivers/modules/oneview/deploy.py @@ -41,7 +41,8 @@ class OneViewPeriodicTasks(object): pass @periodics.periodic(spacing=CONF.oneview.periodic_check_interval, - enabled=CONF.oneview.enable_periodic_tasks) + enabled=CONF.oneview.enable_periodic_tasks + and CONF.oneview.periodic_check_interval > 0) def _periodic_check_nodes_taken_by_oneview(self, manager, context): """Checks if nodes in Ironic were taken by OneView users. @@ -98,7 +99,8 @@ class OneViewPeriodicTasks(object): manager.do_provisioning_action(context, node.uuid, 'manage') @periodics.periodic(spacing=CONF.oneview.periodic_check_interval, - enabled=CONF.oneview.enable_periodic_tasks) + enabled=CONF.oneview.enable_periodic_tasks + and CONF.oneview.periodic_check_interval > 0) def _periodic_check_nodes_freed_by_oneview(self, manager, context): """Checks if nodes taken by OneView users were freed. diff --git a/releasenotes/notes/disable_periodic_tasks-0ea39fa7a8a108c6.yaml b/releasenotes/notes/disable_periodic_tasks-0ea39fa7a8a108c6.yaml new file mode 100644 index 0000000000..a2811537e0 --- /dev/null +++ b/releasenotes/notes/disable_periodic_tasks-0ea39fa7a8a108c6.yaml @@ -0,0 +1,49 @@ +--- +features: + - | + Setting these configuration options to 0 will disable the periodic tasks: + + * [conductor]sync_power_state_interval: sync power states for the nodes + * [conductor]check_provision_state_interval: + + * check deployments and time out if the deployment takes too long + * check the status of cleaning a node and time out if it takes too long + * check the status of inspecting a node and time out if it takes too long + * check for and handle nodes that are taken over by new conductors (if an + old conductor disappeared) + + * [conductor]send_sensor_data_interval: send sensor data to ceilometer + * [conductor]sync_local_state_interval: refresh a conductor's copy of the + consistent hash ring. If any mappings have changed, determines which, + if any, nodes need to be "taken over". The ensuing actions could include + preparing a PXE environment, updating the DHCP server, and so on. + * [oneview]periodic_check_interval: + + * check for nodes taken over by OneView users + * check for nodes freed by OneView users + +fixes: + - | + Fixes an issue where setting these configuration options to 0 caused a + ValueError exception to be raised. You can now set them to 0 to disable the + associated periodic tasks. (For more information, see `story 2002059 + `_.): + + * [conductor]sync_power_state_interval: sync power states for the nodes + * [conductor]check_provision_state_interval: + + * check deployments and time out if the deployment takes too long + * check the status of cleaning a node and time out if it takes too long + * check the status of inspecting a node and time out if it takes too long + * check for and handle nodes that are taken over by new conductors (if an + old conductor disappeared) + + * [conductor]send_sensor_data_interval: send sensor data to ceilometer + * [conductor]sync_local_state_interval: refresh a conductor's copy of the + consistent hash ring. If any mappings have changed, determines which, + if any, nodes need to be "taken over". The ensuing actions could include + preparing a PXE environment, updating the DHCP server, and so on. + * [oneview]periodic_check_interval: + + * check for nodes taken over by OneView users + * check for nodes freed by OneView users