diff --git a/ironic/common/async_steps.py b/ironic/common/async_steps.py new file mode 100644 index 0000000000..8bec847627 --- /dev/null +++ b/ironic/common/async_steps.py @@ -0,0 +1,168 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +from ironic.common import states + +# These flags tell the conductor that we're rebooting inside or after a step. + +CLEANING_REBOOT = "cleaning_reboot" +DEPLOYMENT_REBOOT = "deployment_reboot" +SERVICING_REBOOT = "servicing_reboot" + +# These flags tell the conductor whether the currently running step should be +# skipped after the agent heartbeats again. Setting them to false causes +# the conductor to re-enter the previously running step after a reboot. + +SKIP_CURRENT_CLEAN_STEP = "skip_current_clean_step" +SKIP_CURRENT_DEPLOY_STEP = "skip_current_deploy_step" +SKIP_CURRENT_SERVICE_STEP = "skip_current_service_step" + +# These flags tell the conductor that something else (most likely, a periodic +# task in some hardware interface) is polling the step for completion. + +CLEANING_POLLING = "cleaning_polling" +DEPLOYMENT_POLLING = "deployment_polling" +SERVICING_POLLING = "servicing_polling" + +_ALL_FLAGS = [CLEANING_REBOOT, DEPLOYMENT_REBOOT, SERVICING_REBOOT, + SKIP_CURRENT_CLEAN_STEP, SKIP_CURRENT_DEPLOY_STEP, + SKIP_CURRENT_SERVICE_STEP, + CLEANING_POLLING, DEPLOYMENT_POLLING, SERVICING_POLLING] + + +def get_return_state(node): + """Returns state based on operation being invoked. + + :param node: an ironic node object. + :returns: states.CLEANWAIT if cleaning operation in progress, + or states.DEPLOYWAIT if deploy operation in progress, + or states.SERVICEWAIT if servicing in progress. + """ + # FIXME(dtantsur): this distinction is rather useless, create a new + # constant to use for all step types? + if node.clean_step: + return states.CLEANWAIT + elif node.service_step: + return states.SERVICEWAIT + else: + # TODO(dtantsur): ideally, check for node.deploy_step and raise + # something if this function is called without any step field set. + # Unfortunately, a lot of unit tests rely on exactly this. + return states.DEPLOYWAIT + + +def _check_agent_token_prior_to_agent_reboot(node): + """Removes the agent token if it was not pregenerated. + + Removal of the agent token in cases where it is not pregenerated + is a vital action prior to rebooting the agent, as without doing + so the agent will be unable to establish communication with + the ironic API after the reboot. Effectively locking itself out + as in cases where the value is not pregenerated, it is not + already included in the payload and must be generated again + upon lookup. + + :param node: The Node object. + """ + if not node.driver_internal_info.get('agent_secret_token_pregenerated', + False): + node.del_driver_internal_info('agent_secret_token') + + +def _step_type(node, step_type): + if step_type: + return step_type + if node.clean_step: + return 'clean' + elif node.service_step: + return 'service' + else: + return 'deploy' + + +def set_node_flags(node, reboot=None, skip_current_step=None, polling=None, + step_type=None): + """Sets appropriate reboot flags in driver_internal_info based on operation + + :param node: an ironic node object. + :param reboot: Boolean value to set for node's driver_internal_info flag + cleaning_reboot, servicing_reboot or deployment_reboot based on the + operation in progress. If it is None, corresponding reboot flag is + not set in node's driver_internal_info. + :param skip_current_step: Boolean value to set for node's + driver_internal_info flag skip_current_clean_step, + skip_current_service_step or skip_current_deploy_step based on the + operation in progress. If it is None, corresponding skip step flag is + not set in node's driver_internal_info. + :param polling: Boolean value to set for node's driver_internal_info flag + deployment_polling, servicing_polling or cleaning_polling. If it is + None, the corresponding polling flag is not set in the node's + driver_internal_info. + :param step_type: The type of steps to process: 'clean', 'service' + or 'deploy'. If None, detected from the node. + """ + step_type = _step_type(node, step_type) + if step_type == 'clean': + reboot_field = CLEANING_REBOOT + skip_field = SKIP_CURRENT_CLEAN_STEP + polling_field = CLEANING_POLLING + elif step_type == 'service': + reboot_field = SERVICING_REBOOT + skip_field = SKIP_CURRENT_SERVICE_STEP + polling_field = SERVICING_POLLING + else: + reboot_field = DEPLOYMENT_REBOOT + skip_field = SKIP_CURRENT_DEPLOY_STEP + polling_field = DEPLOYMENT_POLLING + + if reboot is not None: + node.set_driver_internal_info(reboot_field, reboot) + if reboot: + # If rebooting, we must ensure that we check and remove + # an agent token if necessary. + _check_agent_token_prior_to_agent_reboot(node) + if skip_current_step is not None: + node.set_driver_internal_info(skip_field, skip_current_step) + if polling is not None: + node.set_driver_internal_info(polling_field, polling) + node.save() + + +def remove_node_flags(node): + """Remove all flags for the node. + + :param node: A Node object + """ + for flag in _ALL_FLAGS: + node.del_driver_internal_info(flag) + + +def prepare_node_for_next_step(node, step_type=None): + """Remove the flags responsible for the next step. + + Cleans the polling and the skip-next step flags. + + :param node: A Node object + :param step_type: The type of steps to process: 'clean', 'service' + or 'deploy'. If None, detected from the node. + :returns: The last value of the skip-next flag. + """ + step_type = _step_type(node, step_type) + skip_current_step = node.del_driver_internal_info( + 'skip_current_%s_step' % step_type, True) + if step_type == 'clean': + node.del_driver_internal_info(CLEANING_POLLING) + elif step_type == 'service': + node.del_driver_internal_info(SERVICING_POLLING) + else: + node.del_driver_internal_info(DEPLOYMENT_POLLING) + return skip_current_step diff --git a/ironic/conductor/cleaning.py b/ironic/conductor/cleaning.py index dc3c3e9a17..7fea76263b 100644 --- a/ironic/conductor/cleaning.py +++ b/ironic/conductor/cleaning.py @@ -14,6 +14,7 @@ from oslo_log import log +from ironic.common import async_steps from ironic.common import exception from ironic.common.i18n import _ from ironic.common import states @@ -192,13 +193,14 @@ def do_next_clean_step(task, step_index, disable_ramdisk=None): except Exception as e: if isinstance(e, exception.AgentConnectionFailed): - if task.node.driver_internal_info.get('cleaning_reboot'): + if task.node.driver_internal_info.get( + async_steps.CLEANING_REBOOT): LOG.info('Agent is not yet running on node %(node)s ' 'after cleaning reboot, waiting for agent to ' 'come up to run next clean step %(step)s.', {'node': node.uuid, 'step': step}) - node.set_driver_internal_info('skip_current_clean_step', - False) + node.set_driver_internal_info( + async_steps.SKIP_CURRENT_CLEAN_STEP, False) target_state = (states.MANAGEABLE if manual_clean else None) task.process_event('wait', target_state=target_state) @@ -209,7 +211,8 @@ def do_next_clean_step(task, step_index, disable_ramdisk=None): 'executing a command. Error: %(error)s', {'node': task.node.uuid, 'error': e}) - node.set_driver_internal_info('skip_current_clean_step', False) + node.set_driver_internal_info( + async_steps.SKIP_CURRENT_CLEAN_STEP, False) target_state = states.MANAGEABLE if manual_clean else None task.process_event('wait', target_state=target_state) return diff --git a/ironic/conductor/deployments.py b/ironic/conductor/deployments.py index 1bc867f843..980baad03e 100644 --- a/ironic/conductor/deployments.py +++ b/ironic/conductor/deployments.py @@ -19,6 +19,7 @@ from oslo_db import exception as db_exception from oslo_log import log from oslo_utils import excutils +from ironic.common import async_steps from ironic.common import exception from ironic.common.glance_service import service_utils as glance_utils from ironic.common.i18n import _ @@ -302,19 +303,20 @@ def do_next_deploy_step(task, step_index): 'executing a command. Error: %(error)s', {'node': task.node.uuid, 'error': e}) - node.set_driver_internal_info('skip_current_deploy_step', - False) + node.set_driver_internal_info( + async_steps.SKIP_CURRENT_DEPLOY_STEP, False) task.process_event('wait') return except exception.IronicException as e: if isinstance(e, exception.AgentConnectionFailed): - if task.node.driver_internal_info.get('deployment_reboot'): + if task.node.driver_internal_info.get( + async_steps.DEPLOYMENT_REBOOT): LOG.info('Agent is not yet running on node %(node)s after ' 'deployment reboot, waiting for agent to come up ' 'to run next deploy step %(step)s.', {'node': node.uuid, 'step': step}) - node.set_driver_internal_info('skip_current_deploy_step', - False) + node.set_driver_internal_info( + async_steps.SKIP_CURRENT_DEPLOY_STEP, False) task.process_event('wait') return diff --git a/ironic/conductor/servicing.py b/ironic/conductor/servicing.py index 598038a18a..1dd929d39b 100644 --- a/ironic/conductor/servicing.py +++ b/ironic/conductor/servicing.py @@ -14,6 +14,7 @@ from oslo_log import log +from ironic.common import async_steps from ironic.common import exception from ironic.common.i18n import _ from ironic.common import states @@ -156,13 +157,14 @@ def do_next_service_step(task, step_index, disable_ramdisk=None): except Exception as e: if isinstance(e, exception.AgentConnectionFailed): - if task.node.driver_internal_info.get('service_reboot'): + if task.node.driver_internal_info.get( + async_steps.SERVICING_REBOOT): LOG.info('Agent is not yet running on node %(node)s ' 'after service reboot, waiting for agent to ' 'come up to run next service step %(step)s.', {'node': node.uuid, 'step': step}) - node.set_driver_internal_info('skip_current_service_step', - False) + node.set_driver_internal_info( + async_steps.SKIP_CURRENT_SERVICE_STEP, False) task.process_event('wait') return if isinstance(e, exception.AgentInProgress): @@ -172,7 +174,7 @@ def do_next_service_step(task, step_index, disable_ramdisk=None): {'node': task.node.uuid, 'error': e}) node.set_driver_internal_info( - 'skip_current_service_step', False) + async_steps.SKIP_CURRENT_SERVICE_STEP, False) task.process_event('wait') return diff --git a/ironic/conductor/utils.py b/ironic/conductor/utils.py index 6757c90065..761b19fd75 100644 --- a/ironic/conductor/utils.py +++ b/ironic/conductor/utils.py @@ -29,6 +29,7 @@ from oslo_utils import excutils from oslo_utils import strutils from oslo_utils import timeutils +from ironic.common import async_steps from ironic.common import boot_devices from ironic.common import exception from ironic.common import faults @@ -493,9 +494,7 @@ def cleaning_error_handler(task, logmsg, errmsg=None, traceback=False, node.clean_step = {} # Clear any leftover metadata about cleaning node.del_driver_internal_info('clean_step_index') - node.del_driver_internal_info('cleaning_reboot') - node.del_driver_internal_info('cleaning_polling') - node.del_driver_internal_info('skip_current_clean_step') + async_steps.remove_node_flags(node) # We don't need to keep the old agent URL, or token # as it should change upon the next cleaning attempt. wipe_token_and_url(task) @@ -556,10 +555,8 @@ def wipe_deploy_internal_info(task): node.del_driver_internal_info('user_deploy_steps') node.del_driver_internal_info('agent_cached_deploy_steps') node.del_driver_internal_info('deploy_step_index') - node.del_driver_internal_info('deployment_reboot') - node.del_driver_internal_info('deployment_polling') - node.del_driver_internal_info('skip_current_deploy_step') node.del_driver_internal_info('steps_validated') + async_steps.remove_node_flags(node) def wipe_cleaning_internal_info(task): @@ -570,11 +567,9 @@ def wipe_cleaning_internal_info(task): node.set_driver_internal_info('clean_steps', None) node.del_driver_internal_info('agent_cached_clean_steps') node.del_driver_internal_info('clean_step_index') - node.del_driver_internal_info('cleaning_reboot') - node.del_driver_internal_info('cleaning_polling') node.del_driver_internal_info('cleaning_disable_ramdisk') - node.del_driver_internal_info('skip_current_clean_step') node.del_driver_internal_info('steps_validated') + async_steps.remove_node_flags(node) def wipe_service_internal_info(task): @@ -584,11 +579,9 @@ def wipe_service_internal_info(task): node.set_driver_internal_info('service_steps', None) node.del_driver_internal_info('agent_cached_service_steps') node.del_driver_internal_info('service_step_index') - node.del_driver_internal_info('service_reboot') - node.del_driver_internal_info('service_polling') node.del_driver_internal_info('service_disable_ramdisk') - node.del_driver_internal_info('skip_current_service_step') node.del_driver_internal_info('steps_validated') + async_steps.remove_node_flags(node) def deploying_error_handler(task, logmsg, errmsg=None, traceback=False, @@ -1268,14 +1261,8 @@ def update_next_step_index(task, step_type): :param step_type: The type of steps to process: 'clean' or 'deploy'. :returns: Index of the next step. """ - skip_current_step = task.node.del_driver_internal_info( - 'skip_current_%s_step' % step_type, True) - if step_type == 'clean': - task.node.del_driver_internal_info('cleaning_polling') - else: - task.node.del_driver_internal_info('deployment_polling') - task.node.save() - + skip_current_step = async_steps.prepare_node_for_next_step( + task.node, step_type) return _get_node_next_steps(task, step_type, skip_current_step=skip_current_step) @@ -1810,9 +1797,7 @@ def servicing_error_handler(task, logmsg, errmsg=None, traceback=False, node.service_step = {} # Clear any leftover metadata about cleaning node.del_driver_internal_info('service_step_index') - node.del_driver_internal_info('servicing_reboot') - node.del_driver_internal_info('servicing_polling') - node.del_driver_internal_info('skip_current_service_step') + async_steps.remove_node_flags(node) # We don't need to keep the old agent URL, or token # as it should change upon the next cleaning attempt. wipe_token_and_url(task) diff --git a/ironic/drivers/modules/agent.py b/ironic/drivers/modules/agent.py index 52a672f94f..a9822b026a 100644 --- a/ironic/drivers/modules/agent.py +++ b/ironic/drivers/modules/agent.py @@ -18,6 +18,7 @@ from ironic_lib import metrics_utils from oslo_log import log from oslo_utils import units +from ironic.common import async_steps from ironic.common import exception from ironic.common.glance_service import service_utils from ironic.common.i18n import _ @@ -266,10 +267,11 @@ class CustomAgentDeploy(agent_base.AgentBaseMixin, agent_base.AgentDeployMixin, elif task.driver.storage.should_write_image(task): # Check if the driver has already performed a reboot in a previous # deploy step. - if not task.node.driver_internal_info.get('deployment_reboot'): - manager_utils.node_power_action(task, states.REBOOT) - task.node.del_driver_internal_info('deployment_reboot') + already_rebooted = task.node.del_driver_internal_info( + async_steps.DEPLOYMENT_REBOOT) task.node.save() + if not already_rebooted: + manager_utils.node_power_action(task, states.REBOOT) return states.DEPLOYWAIT @METRICS.timer('CustomAgentDeployMixin.prepare_instance_boot') diff --git a/ironic/drivers/modules/agent_base.py b/ironic/drivers/modules/agent_base.py index c99fdeb767..936691a24c 100644 --- a/ironic/drivers/modules/agent_base.py +++ b/ironic/drivers/modules/agent_base.py @@ -23,6 +23,7 @@ from oslo_log import log from oslo_utils import strutils import tenacity +from ironic.common import async_steps from ironic.common import boot_devices from ironic.common import dhcp_factory from ironic.common import exception @@ -217,18 +218,7 @@ def _post_step_reboot(task, step_type): return # Signify that we've rebooted - if step_type == 'clean': - task.node.set_driver_internal_info('cleaning_reboot', True) - elif step_type == 'deploy': - task.node.set_driver_internal_info('deployment_reboot', True) - elif step_type == 'service': - task.node.set_driver_internal_info('servicing_reboot', True) - - if not task.node.driver_internal_info.get( - 'agent_secret_token_pregenerated', False): - # Wipes out the existing recorded token because the machine will - # need to re-establish the token. - task.node.del_driver_internal_info('agent_secret_token') + async_steps.set_node_flags(task.node, reboot=True, step_type=step_type) task.node.save() @@ -531,7 +521,7 @@ class HeartbeatMixin(object): # Check if the driver is polling for completion of # a step, via the 'deployment_polling' flag. polling = node.driver_internal_info.get( - 'deployment_polling', False) + async_steps.DEPLOYMENT_POLLING, False) if not polling: msg = _('Failed to process the next deploy step') self.process_next_step(task, 'deploy') @@ -567,7 +557,7 @@ class HeartbeatMixin(object): # Check if the driver is polling for completion of a step, # via the 'cleaning_polling' flag. polling = node.driver_internal_info.get( - 'cleaning_polling', False) + async_steps.CLEANING_POLLING, False) if not polling: self.continue_cleaning(task) except Exception as e: @@ -608,9 +598,9 @@ class HeartbeatMixin(object): else: msg = _('Node failed to check service progress') # Check if the driver is polling for completion of a step, - # via the 'cleaning_polling' flag. + # via the 'servicing_polling' flag. polling = node.driver_internal_info.get( - 'service_polling', False) + async_steps.SERVICING_POLLING, False) if not polling: self.continue_servicing(task) except Exception as e: @@ -1003,7 +993,8 @@ class AgentBaseMixin(object): 'continuing from current step %(step)s.', {'node': node.uuid, 'step': node.clean_step}) - node.set_driver_internal_info('skip_current_clean_step', False) + node.set_driver_internal_info( + async_steps.SKIP_CURRENT_CLEAN_STEP, False) node.save() else: # Restart the process, agent must have rebooted to new version @@ -1054,14 +1045,14 @@ class AgentBaseMixin(object): agent_commands = client.get_commands_status(task.node) if _freshly_booted(agent_commands, step_type): if step_type == 'clean': - field = 'cleaning_reboot' + field = async_steps.CLEANING_REBOOT elif step_type == 'service': - field = 'servicing_reboot' + field = async_steps.SERVICING_REBOOT else: # TODO(TheJulia): One day we should standardize the field # names here, but we also need to balance human ability # to understand what is going on so *shrug*. - field = 'deployment_reboot' + field = async_steps.DEPLOYMENT_REBOOT utils.pop_node_nested_field(node, 'driver_internal_info', field) node.save() return _continue_steps(task, step_type) diff --git a/ironic/drivers/modules/deploy_utils.py b/ironic/drivers/modules/deploy_utils.py index 523567731f..c2b799727c 100644 --- a/ironic/drivers/modules/deploy_utils.py +++ b/ironic/drivers/modules/deploy_utils.py @@ -25,6 +25,7 @@ from oslo_utils import excutils from oslo_utils import fileutils from oslo_utils import strutils +from ironic.common import async_steps from ironic.common import exception from ironic.common import faults from ironic.common.glance_service import service_utils @@ -1449,85 +1450,9 @@ parse_instance_info_capabilities = ( utils.parse_instance_info_capabilities ) - -def get_async_step_return_state(node): - """Returns state based on operation being invoked. - - :param node: an ironic node object. - :returns: states.CLEANWAIT if cleaning operation in progress, - or states.DEPLOYWAIT if deploy operation in progress, - or states.SERVICEWAIT if servicing in progress. - """ - # FIXME(dtantsur): this distinction is rather useless, create a new - # constant to use for all step types? - if node.clean_step: - return states.CLEANWAIT - elif node.service_step: - return states.SERVICEWAIT - else: - # TODO(dtantsur): ideally, check for node.deploy_step and raise - # something if this function is called without any step field set. - # Unfortunately, a lot of unit tests rely on exactly this. - return states.DEPLOYWAIT - - -def _check_agent_token_prior_to_agent_reboot(node): - """Removes the agent token if it was not pregenerated. - - Removal of the agent token in cases where it is not pregenerated - is a vital action prior to rebooting the agent, as without doing - so the agent will be unable to establish communication with - the ironic API after the reboot. Effectively locking itself out - as in cases where the value is not pregenerated, it is not - already included in the payload and must be generated again - upon lookup. - - :param node: The Node object. - """ - if not node.driver_internal_info.get('agent_secret_token_pregenerated', - False): - node.del_driver_internal_info('agent_secret_token') - - -def set_async_step_flags(node, reboot=None, skip_current_step=None, - polling=None): - """Sets appropriate reboot flags in driver_internal_info based on operation - - :param node: an ironic node object. - :param reboot: Boolean value to set for node's driver_internal_info flag - cleaning_reboot or deployment_reboot based on cleaning or deployment - operation in progress. If it is None, corresponding reboot flag is - not set in node's driver_internal_info. - :param skip_current_step: Boolean value to set for node's - driver_internal_info flag skip_current_clean_step or - skip_current_deploy_step based on cleaning or deployment operation - in progress. If it is None, corresponding skip step flag is not set - in node's driver_internal_info. - :param polling: Boolean value to set for node's driver_internal_info flag - deployment_polling or cleaning_polling. If it is None, the - corresponding polling flag is not set in the node's - driver_internal_info. - """ - if node.clean_step: - reboot_field = 'cleaning_reboot' - skip_field = 'skip_current_clean_step' - polling_field = 'cleaning_polling' - else: - reboot_field = 'deployment_reboot' - skip_field = 'skip_current_deploy_step' - polling_field = 'deployment_polling' - - if reboot is not None: - node.set_driver_internal_info(reboot_field, reboot) - if reboot: - # If rebooting, we must ensure that we check and remove - # an agent token if necessary. - _check_agent_token_prior_to_agent_reboot(node) - if skip_current_step is not None: - node.set_driver_internal_info(skip_field, skip_current_step) - if polling is not None: - node.set_driver_internal_info(polling_field, polling) - node.save() +# NOTE(dtantsur): backward compatibility, do not use +get_async_step_return_state = async_steps.get_return_state +set_async_step_flags = async_steps.set_node_flags def prepare_agent_boot(task): @@ -1558,7 +1483,7 @@ def reboot_to_finish_step(task, timeout=None): prepare_agent_boot(task) manager_utils.node_power_action(task, states.REBOOT, timeout) - return get_async_step_return_state(task.node) + return async_steps.get_return_state(task.node) def step_error_handler(task, logmsg, errmsg=None): diff --git a/ironic/tests/unit/conductor/test_servicing.py b/ironic/tests/unit/conductor/test_servicing.py index dda03784c4..4604a9ad82 100644 --- a/ironic/tests/unit/conductor/test_servicing.py +++ b/ironic/tests/unit/conductor/test_servicing.py @@ -609,7 +609,7 @@ class DoNodeServiceTestCase(db_base.DbTestCase): last_error=None, driver_internal_info={'service_steps': self.service_steps, 'service_step_index': None, - 'service_reboot': True}, + 'servicing_reboot': True}, service_step={}) mock_execute.side_effect = exception.AgentConnectionFailed( reason='failed') @@ -642,7 +642,7 @@ class DoNodeServiceTestCase(db_base.DbTestCase): last_error=None, driver_internal_info={'service_steps': self.service_steps, 'service_step_index': None, - 'service_reboot': True}, + 'servicing_reboot': True}, service_step={}) mock_execute.side_effect = exception.AgentInProgress( reason='still meowing') @@ -667,7 +667,7 @@ class DoNodeServiceTestCase(db_base.DbTestCase): # Resume where last_step is the last service step tgt_prov_state = states.ACTIVE info = {'service_steps': self.service_steps, - 'service_reboot': True, + 'servicing_reboot': True, 'service_step_index': len(self.service_steps) - 1} node = obj_utils.create_test_node( @@ -689,7 +689,7 @@ class DoNodeServiceTestCase(db_base.DbTestCase): self.assertEqual(states.NOSTATE, node.target_provision_state) self.assertEqual({}, node.service_step) self.assertNotIn('service_step_index', node.driver_internal_info) - self.assertNotIn('service_reboot', node.driver_internal_info) + self.assertNotIn('servicing_reboot', node.driver_internal_info) self.assertIsNone(node.driver_internal_info['service_steps']) self.assertFalse(mock_execute.called) @@ -933,8 +933,8 @@ class DoNodeServiceAbortTestCase(db_base.DbTestCase): 'agent_url': 'some url', 'agent_secret_token': 'token', 'service_step_index': 2, - 'service_reboot': True, - 'service_polling': True, + 'servicing_reboot': True, + 'servicing_polling': True, 'skip_current_service_step': True}) with task_manager.acquire(self.context, node.uuid) as task: @@ -948,9 +948,9 @@ class DoNodeServiceAbortTestCase(db_base.DbTestCase): self.assertEqual({}, task.node.service_step) self.assertNotIn('service_step_index', task.node.driver_internal_info) - self.assertNotIn('service_reboot', + self.assertNotIn('servicing_reboot', task.node.driver_internal_info) - self.assertNotIn('service_polling', + self.assertNotIn('servicing_polling', task.node.driver_internal_info) self.assertNotIn('skip_current_service_step', task.node.driver_internal_info) diff --git a/ironic/tests/unit/conductor/test_utils.py b/ironic/tests/unit/conductor/test_utils.py index e99784bd11..ee651e0701 100644 --- a/ironic/tests/unit/conductor/test_utils.py +++ b/ironic/tests/unit/conductor/test_utils.py @@ -1497,8 +1497,8 @@ class ErrorHandlersTestCase(db_base.DbTestCase): target = 'baz' self.node.target_provision_state = target self.node.service_step = {'key': 'val'} - self.node.set_driver_internal_info('service_reboot', True) - self.node.set_driver_internal_info('service_polling', True) + self.node.set_driver_internal_info('servicing_reboot', True) + self.node.set_driver_internal_info('servicing_polling', True) self.node.set_driver_internal_info('skip_current_service_step', True) self.node.set_driver_internal_info('service_step_index', 0) self.node.set_driver_internal_info('agent_url', 'url') @@ -1513,8 +1513,8 @@ class ErrorHandlersTestCase(db_base.DbTestCase): self.node.save.assert_called_once_with() self.assertEqual({}, self.node.service_step) self.assertNotIn('service_step_index', self.node.driver_internal_info) - self.assertNotIn('service_reboot', self.node.driver_internal_info) - self.assertNotIn('service_polling', self.node.driver_internal_info) + self.assertNotIn('servicing_reboot', self.node.driver_internal_info) + self.assertNotIn('servicing_polling', self.node.driver_internal_info) self.assertNotIn('skip_current_service_step', self.node.driver_internal_info) self.assertNotIn('agent_secret_token', self.node.driver_internal_info) @@ -2842,16 +2842,16 @@ class ServiceUtilsTestCase(db_base.DbTestCase): self.node.driver_internal_info = { 'service_steps': {'foo': 'bar'}, 'agent_cached_service_steps': {'more_foo': None}, - 'service_reboot': False, - 'service_polling': 1, + 'servicing_reboot': False, + 'servicing_polling': 1, 'service_disable_ramdisk': False, 'skip_current_service_step': False, 'steps_validated': 'meow' 'agent_secret_token'} self.node.save() not_in_list = ['agent_cached_service_steps', - 'serivce_reboot', - 'service_polling', + 'servicing_reboot', + 'servicing_polling', 'service_disable_ramdisk', 'skip_current_service_step', 'steps_validated', diff --git a/ironic/tests/unit/drivers/modules/drac/test_management.py b/ironic/tests/unit/drivers/modules/drac/test_management.py index ea425e2c54..6d693fcf0b 100644 --- a/ironic/tests/unit/drivers/modules/drac/test_management.py +++ b/ironic/tests/unit/drivers/modules/drac/test_management.py @@ -930,8 +930,6 @@ class DracRedfishManagementTestCase(test_utils.BaseDracTest): self.assertRaises(exception.InvalidParameterValue, self.management.import_configuration, task, 'edge') - @mock.patch.object(deploy_utils, 'get_async_step_return_state', - autospec=True) @mock.patch.object(deploy_utils, 'set_async_step_flags', autospec=True) @mock.patch.object(deploy_utils, 'build_agent_options', autospec=True) @mock.patch.object(manager_utils, 'node_power_action', autospec=True) @@ -941,11 +939,9 @@ class DracRedfishManagementTestCase(test_utils.BaseDracTest): def test_import_configuration_success( self, mock_get_system, mock_get_configuration, mock_log, mock_power, mock_build_agent_options, - mock_set_async_step_flags, mock_get_async_step_return_state): + mock_set_async_step_flags): deploy_opts = mock.Mock() mock_build_agent_options.return_value = deploy_opts - step_result = mock.Mock() - mock_get_async_step_return_state.return_value = step_result task = mock.Mock(node=self.node, context=self.context) fake_manager_oem1 = mock.Mock() fake_manager_oem1.import_system_configuration.side_effect = ( @@ -961,6 +957,7 @@ class DracRedfishManagementTestCase(test_utils.BaseDracTest): '"data": {"prop1": "value1", "prop2": 2}}}') result = self.management.import_configuration(task, 'edge') + self.assertEqual(states.DEPLOYWAIT, result) fake_manager_oem2.import_system_configuration.assert_called_once_with( '{"prop1": "value1", "prop2": 2}') @@ -971,8 +968,6 @@ class DracRedfishManagementTestCase(test_utils.BaseDracTest): mock_build_agent_options.assert_called_once_with(task.node) task.driver.boot.prepare_ramdisk.assert_called_once_with( task, deploy_opts) - mock_get_async_step_return_state.assert_called_once_with(task.node) - self.assertEqual(step_result, result) @mock.patch.object(drac_mgmt.DracRedfishManagement, 'import_configuration', autospec=True) diff --git a/ironic/tests/unit/drivers/modules/redfish/test_management.py b/ironic/tests/unit/drivers/modules/redfish/test_management.py index b1176476bf..bf782520f0 100644 --- a/ironic/tests/unit/drivers/modules/redfish/test_management.py +++ b/ironic/tests/unit/drivers/modules/redfish/test_management.py @@ -1012,13 +1012,10 @@ class RedfishManagementTestCase(db_base.DbTestCase): @mock.patch.object(redfish_boot.RedfishVirtualMediaBoot, 'prepare_ramdisk', spec_set=True, autospec=True) @mock.patch.object(manager_utils, 'node_power_action', autospec=True) - @mock.patch.object(deploy_utils, 'get_async_step_return_state', - autospec=True) @mock.patch.object(deploy_utils, 'set_async_step_flags', autospec=True) @mock.patch.object(redfish_utils, 'get_update_service', autospec=True) def test_update_firmware(self, mock_get_update_service, mock_set_async_step_flags, - mock_get_async_step_return_state, mock_node_power_action, mock_prepare, build_mock): build_mock.return_value = {'a': 'b'} @@ -1032,12 +1029,13 @@ class RedfishManagementTestCase(db_base.DbTestCase): shared=False) as task: task.node.save = mock.Mock() - task.driver.management.update_firmware( + result = task.driver.management.update_firmware( task, [{'url': 'http://test1', 'checksum': 'aaf4c61ddcc5e8a2dabede0f3b482cd9aea9434d'}, {'url': 'http://test2', 'checksum': '9f6227549221920e312fed2cfc6586ee832cc546'}]) + self.assertEqual(states.DEPLOYWAIT, result) mock_get_update_service.assert_called_once_with(task.node) mock_update_service.simple_update.assert_called_once_with( @@ -1054,8 +1052,6 @@ class RedfishManagementTestCase(db_base.DbTestCase): task.node.driver_internal_info.get('firmware_cleanup')) mock_set_async_step_flags.assert_called_once_with( task.node, reboot=True, skip_current_step=True, polling=True) - mock_get_async_step_return_state.assert_called_once_with( - task.node) mock_node_power_action.assert_called_once_with( task, states.REBOOT, None) @@ -1066,14 +1062,11 @@ class RedfishManagementTestCase(db_base.DbTestCase): @mock.patch.object(redfish_boot.RedfishVirtualMediaBoot, 'prepare_ramdisk', spec_set=True, autospec=True) @mock.patch.object(manager_utils, 'node_power_action', autospec=True) - @mock.patch.object(deploy_utils, 'get_async_step_return_state', - autospec=True) @mock.patch.object(deploy_utils, 'set_async_step_flags', autospec=True) @mock.patch.object(redfish_utils, 'get_update_service', autospec=True) def test_update_firmware_stage( self, mock_get_update_service, mock_set_async_step_flags, - mock_get_async_step_return_state, mock_node_power_action, - mock_prepare, build_mock, mock_stage): + mock_node_power_action, mock_prepare, build_mock, mock_stage): build_mock.return_value = {'a': 'b'} mock_task_monitor = mock.Mock() mock_task_monitor.task_monitor_uri = '/task/123' @@ -1109,8 +1102,6 @@ class RedfishManagementTestCase(db_base.DbTestCase): ['http'], task.node.driver_internal_info['firmware_cleanup']) mock_set_async_step_flags.assert_called_once_with( task.node, reboot=True, skip_current_step=True, polling=True) - mock_get_async_step_return_state.assert_called_once_with( - task.node) mock_node_power_action.assert_called_once_with( task, states.REBOOT, None) @@ -1121,14 +1112,11 @@ class RedfishManagementTestCase(db_base.DbTestCase): @mock.patch.object(redfish_boot.RedfishVirtualMediaBoot, 'prepare_ramdisk', spec_set=True, autospec=True) @mock.patch.object(manager_utils, 'node_power_action', autospec=True) - @mock.patch.object(deploy_utils, 'get_async_step_return_state', - autospec=True) @mock.patch.object(deploy_utils, 'set_async_step_flags', autospec=True) @mock.patch.object(redfish_utils, 'get_update_service', autospec=True) def test_update_firmware_stage_both( self, mock_get_update_service, mock_set_async_step_flags, - mock_get_async_step_return_state, mock_node_power_action, - mock_prepare, build_mock, mock_stage): + mock_node_power_action, mock_prepare, build_mock, mock_stage): build_mock.return_value = {'a': 'b'} mock_task_monitor = mock.Mock() mock_task_monitor.task_monitor_uri = '/task/123' @@ -1170,8 +1158,6 @@ class RedfishManagementTestCase(db_base.DbTestCase): task.node.driver_internal_info['firmware_cleanup']) mock_set_async_step_flags.assert_called_once_with( task.node, reboot=True, skip_current_step=True, polling=True) - mock_get_async_step_return_state.assert_called_once_with( - task.node) mock_node_power_action.assert_called_once_with( task, states.REBOOT, None) diff --git a/ironic/tests/unit/drivers/modules/redfish/test_raid.py b/ironic/tests/unit/drivers/modules/redfish/test_raid.py index 026c3c0802..17c90e79a6 100644 --- a/ironic/tests/unit/drivers/modules/redfish/test_raid.py +++ b/ironic/tests/unit/drivers/modules/redfish/test_raid.py @@ -181,13 +181,10 @@ class RedfishRAIDTestCase(db_base.DbTestCase): spec_set=True, autospec=True) @mock.patch.object(deploy_utils, 'build_agent_options', autospec=True) @mock.patch.object(manager_utils, 'node_power_action', autospec=True) - @mock.patch.object(deploy_utils, 'get_async_step_return_state', - autospec=True) @mock.patch.object(deploy_utils, 'set_async_step_flags', autospec=True) def test_create_config_case_1a( self, mock_set_async_step_flags, - mock_get_async_step_return_state, mock_node_power_action, mock_build_agent_options, mock_prepare_ramdisk, @@ -215,13 +212,10 @@ class RedfishRAIDTestCase(db_base.DbTestCase): spec_set=True, autospec=True) @mock.patch.object(deploy_utils, 'build_agent_options', autospec=True) @mock.patch.object(manager_utils, 'node_power_action', autospec=True) - @mock.patch.object(deploy_utils, 'get_async_step_return_state', - autospec=True) @mock.patch.object(deploy_utils, 'set_async_step_flags', autospec=True) def test_create_config_case_1b( self, mock_set_async_step_flags, - mock_get_async_step_return_state, mock_node_power_action, mock_build_agent_options, mock_prepare_ramdisk, @@ -264,13 +258,10 @@ class RedfishRAIDTestCase(db_base.DbTestCase): spec_set=True, autospec=True) @mock.patch.object(deploy_utils, 'build_agent_options', autospec=True) @mock.patch.object(manager_utils, 'node_power_action', autospec=True) - @mock.patch.object(deploy_utils, 'get_async_step_return_state', - autospec=True) @mock.patch.object(deploy_utils, 'set_async_step_flags', autospec=True) def test_create_config_case_1b_apply_time_immediate( self, mock_set_async_step_flags, - mock_get_async_step_return_state, mock_node_power_action, mock_build_agent_options, mock_prepare_ramdisk, @@ -305,7 +296,7 @@ class RedfishRAIDTestCase(db_base.DbTestCase): self.node.save() with task_manager.acquire(self.context, self.node.uuid, shared=True) as task: - task.driver.raid.create_configuration(task) + self.assertIsNone(task.driver.raid.create_configuration(task)) pre = '/redfish/v1/Systems/1/Storage/1/Drives/' expected_payload = { 'RAIDType': 'RAID5', @@ -323,7 +314,6 @@ class RedfishRAIDTestCase(db_base.DbTestCase): expected_payload, apply_time=sushy.APPLY_TIME_IMMEDIATE) mock_set_async_step_flags.assert_called_once_with( task.node, reboot=False, skip_current_step=True, polling=True) - self.assertEqual(mock_get_async_step_return_state.call_count, 0) self.assertEqual(mock_node_power_action.call_count, 0) self.assertEqual(mock_build_agent_options.call_count, 0) self.assertEqual(mock_prepare_ramdisk.call_count, 0) @@ -341,13 +331,10 @@ class RedfishRAIDTestCase(db_base.DbTestCase): spec_set=True, autospec=True) @mock.patch.object(deploy_utils, 'build_agent_options', autospec=True) @mock.patch.object(manager_utils, 'node_power_action', autospec=True) - @mock.patch.object(deploy_utils, 'get_async_step_return_state', - autospec=True) @mock.patch.object(deploy_utils, 'set_async_step_flags', autospec=True) def test_create_config_case_1b_apply_time_on_reset( self, mock_set_async_step_flags, - mock_get_async_step_return_state, mock_node_power_action, mock_build_agent_options, mock_prepare_ramdisk, @@ -376,7 +363,8 @@ class RedfishRAIDTestCase(db_base.DbTestCase): self.node.save() with task_manager.acquire(self.context, self.node.uuid, shared=True) as task: - task.driver.raid.create_configuration(task) + result = task.driver.raid.create_configuration(task) + self.assertEqual(states.DEPLOYWAIT, result) pre = '/redfish/v1/Systems/1/Storage/1/Drives/' expected_payload = { 'RAIDType': 'RAID5', @@ -393,8 +381,6 @@ class RedfishRAIDTestCase(db_base.DbTestCase): expected_payload, apply_time=sushy.APPLY_TIME_ON_RESET) mock_set_async_step_flags.assert_called_once_with( task.node, reboot=True, skip_current_step=True, polling=True) - mock_get_async_step_return_state.assert_called_once_with( - task.node) mock_node_power_action.assert_called_once_with( task, states.REBOOT, None) mock_build_agent_options.assert_called_once_with(task.node) @@ -406,13 +392,10 @@ class RedfishRAIDTestCase(db_base.DbTestCase): spec_set=True, autospec=True) @mock.patch.object(deploy_utils, 'build_agent_options', autospec=True) @mock.patch.object(manager_utils, 'node_power_action', autospec=True) - @mock.patch.object(deploy_utils, 'get_async_step_return_state', - autospec=True) @mock.patch.object(deploy_utils, 'set_async_step_flags', autospec=True) def test_create_config_case_2( self, mock_set_async_step_flags, - mock_get_async_step_return_state, mock_node_power_action, mock_build_agent_options, mock_prepare_ramdisk, @@ -497,13 +480,10 @@ class RedfishRAIDTestCase(db_base.DbTestCase): spec_set=True, autospec=True) @mock.patch.object(deploy_utils, 'build_agent_options', autospec=True) @mock.patch.object(manager_utils, 'node_power_action', autospec=True) - @mock.patch.object(deploy_utils, 'get_async_step_return_state', - autospec=True) @mock.patch.object(deploy_utils, 'set_async_step_flags', autospec=True) def test_create_config_case_2_on_reset( self, mock_set_async_step_flags, - mock_get_async_step_return_state, mock_node_power_action, mock_build_agent_options, mock_prepare_ramdisk, @@ -579,13 +559,10 @@ class RedfishRAIDTestCase(db_base.DbTestCase): spec_set=True, autospec=True) @mock.patch.object(deploy_utils, 'build_agent_options', autospec=True) @mock.patch.object(manager_utils, 'node_power_action', autospec=True) - @mock.patch.object(deploy_utils, 'get_async_step_return_state', - autospec=True) @mock.patch.object(deploy_utils, 'set_async_step_flags', autospec=True) def test_create_config_case_3( self, mock_set_async_step_flags, - mock_get_async_step_return_state, mock_node_power_action, mock_build_agent_options, mock_prepare_ramdisk, @@ -635,13 +612,10 @@ class RedfishRAIDTestCase(db_base.DbTestCase): spec_set=True, autospec=True) @mock.patch.object(deploy_utils, 'build_agent_options', autospec=True) @mock.patch.object(manager_utils, 'node_power_action', autospec=True) - @mock.patch.object(deploy_utils, 'get_async_step_return_state', - autospec=True) @mock.patch.object(deploy_utils, 'set_async_step_flags', autospec=True) def test_create_config_case_4( self, mock_set_async_step_flags, - mock_get_async_step_return_state, mock_node_power_action, mock_build_agent_options, mock_prepare_ramdisk, @@ -756,13 +730,10 @@ class RedfishRAIDTestCase(db_base.DbTestCase): spec_set=True, autospec=True) @mock.patch.object(deploy_utils, 'build_agent_options', autospec=True) @mock.patch.object(manager_utils, 'node_power_action', autospec=True) - @mock.patch.object(deploy_utils, 'get_async_step_return_state', - autospec=True) @mock.patch.object(deploy_utils, 'set_async_step_flags', autospec=True) def test_create_config_case_5a( self, mock_set_async_step_flags, - mock_get_async_step_return_state, mock_node_power_action, mock_build_agent_options, mock_prepare_ramdisk, @@ -795,13 +766,10 @@ class RedfishRAIDTestCase(db_base.DbTestCase): spec_set=True, autospec=True) @mock.patch.object(deploy_utils, 'build_agent_options', autospec=True) @mock.patch.object(manager_utils, 'node_power_action', autospec=True) - @mock.patch.object(deploy_utils, 'get_async_step_return_state', - autospec=True) @mock.patch.object(deploy_utils, 'set_async_step_flags', autospec=True) def test_create_config_case_5b( self, mock_set_async_step_flags, - mock_get_async_step_return_state, mock_node_power_action, mock_build_agent_options, mock_prepare_ramdisk, @@ -863,13 +831,10 @@ class RedfishRAIDTestCase(db_base.DbTestCase): spec_set=True, autospec=True) @mock.patch.object(deploy_utils, 'build_agent_options', autospec=True) @mock.patch.object(manager_utils, 'node_power_action', autospec=True) - @mock.patch.object(deploy_utils, 'get_async_step_return_state', - autospec=True) @mock.patch.object(deploy_utils, 'set_async_step_flags', autospec=True) def test_create_config_case_6( self, mock_set_async_step_flags, - mock_get_async_step_return_state, mock_node_power_action, mock_build_agent_options, mock_prepare_ramdisk, @@ -903,13 +868,10 @@ class RedfishRAIDTestCase(db_base.DbTestCase): spec_set=True, autospec=True) @mock.patch.object(deploy_utils, 'build_agent_options', autospec=True) @mock.patch.object(manager_utils, 'node_power_action', autospec=True) - @mock.patch.object(deploy_utils, 'get_async_step_return_state', - autospec=True) @mock.patch.object(deploy_utils, 'set_async_step_flags', autospec=True) def test_create_config_interface_type( self, mock_set_async_step_flags, - mock_get_async_step_return_state, mock_node_power_action, mock_build_agent_options, mock_prepare_ramdisk, @@ -996,13 +958,10 @@ class RedfishRAIDTestCase(db_base.DbTestCase): spec_set=True, autospec=True) @mock.patch.object(deploy_utils, 'build_agent_options', autospec=True) @mock.patch.object(manager_utils, 'node_power_action', autospec=True) - @mock.patch.object(deploy_utils, 'get_async_step_return_state', - autospec=True) @mock.patch.object(deploy_utils, 'set_async_step_flags', autospec=True) def test_delete_config_immediate( self, mock_set_async_step_flags, - mock_get_async_step_return_state, mock_node_power_action, mock_build_agent_options, mock_prepare_ramdisk, @@ -1031,12 +990,11 @@ class RedfishRAIDTestCase(db_base.DbTestCase): 'raid_level': '1', 'size_gb': 100}], 'last_updated': last_updated} - task.driver.raid.delete_configuration(task) + self.assertIsNone(task.driver.raid.delete_configuration(task)) self.assertEqual(mock_volumes[0].delete.call_count, 1) self.assertEqual(mock_volumes[1].delete.call_count, 1) mock_set_async_step_flags.assert_called_once_with( task.node, reboot=False, skip_current_step=True, polling=True) - self.assertEqual(mock_get_async_step_return_state.call_count, 0) self.assertEqual(mock_node_power_action.call_count, 0) self.assertEqual(mock_build_agent_options.call_count, 0) self.assertEqual(mock_prepare_ramdisk.call_count, 0) @@ -1050,13 +1008,10 @@ class RedfishRAIDTestCase(db_base.DbTestCase): spec_set=True, autospec=True) @mock.patch.object(deploy_utils, 'build_agent_options', autospec=True) @mock.patch.object(manager_utils, 'node_power_action', autospec=True) - @mock.patch.object(deploy_utils, 'get_async_step_return_state', - autospec=True) @mock.patch.object(deploy_utils, 'set_async_step_flags', autospec=True) def test_delete_config_on_reset( self, mock_set_async_step_flags, - mock_get_async_step_return_state, mock_node_power_action, mock_build_agent_options, mock_prepare_ramdisk, @@ -1088,13 +1043,12 @@ class RedfishRAIDTestCase(db_base.DbTestCase): 'size_gb': 100}], 'last_updated': '2022-05-18 08:49:17.585443'} task.node.raid_config = raid_config - task.driver.raid.delete_configuration(task) + result = task.driver.raid.delete_configuration(task) + self.assertEqual(states.DEPLOYWAIT, result) self.assertEqual(mock_volumes[0].delete.call_count, 1) self.assertEqual(mock_volumes[1].delete.call_count, 0) mock_set_async_step_flags.assert_called_once_with( task.node, reboot=True, skip_current_step=True, polling=True) - mock_get_async_step_return_state.assert_called_once_with( - task.node) mock_node_power_action.assert_called_once_with( task, states.REBOOT, None) mock_build_agent_options.assert_called_once_with(task.node) diff --git a/releasenotes/notes/servicing-reboot-502f474a01f937a8.yaml b/releasenotes/notes/servicing-reboot-502f474a01f937a8.yaml new file mode 100644 index 0000000000..02a358d6d7 --- /dev/null +++ b/releasenotes/notes/servicing-reboot-502f474a01f937a8.yaml @@ -0,0 +1,5 @@ +--- +fixes: + - | + Fixes service steps that rely on a reboot. Previously, the reboot was not + properly recognized in the conductor logic.