diff --git a/doc/source/contributor/deploy-steps.rst b/doc/source/contributor/deploy-steps.rst index be5b960f07..e6407d41e3 100644 --- a/doc/source/contributor/deploy-steps.rst +++ b/doc/source/contributor/deploy-steps.rst @@ -114,6 +114,80 @@ Just as deploy steps, in-band clean steps have to be implemented in a custom :ironic-python-agent-doc:`IPA hardware manager `. +Asynchronous steps +------------------ + +If the step returns ``None``, ironic assumes its execution is finished and +proceeds to the next step. Many steps are executed asynchronously; in this case +you need to inform ironic that the step is not finished. There are several +possibilities: + +Combined in-band and out-of-band step +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +If your step starts as out-of-band and then proceeds as in-band (i.e. inside +the agent), you only need to return ``CLEANWAIT``/``DEPLOYWAIT`` from +the step. + +.. code-block:: python + + from ironic.drivers import base + from ironic.drivers.modules import agent + from ironic.drivers.modules import agent_base + from ironic.drivers.modules import agent_client + from ironic.drivers.modules import deploy_utils + + class MyDeploy(agent.CustomAgentDeploy): + ... + + @base.deploy_step(priority=80) + def my_deploy(self, task): + ... + return deploy_utils.get_async_step_return_state(task.node) + + # Usually you can use a more high-level pattern: + + @base.deploy_step(priority=60) + def my_deploy2(self, task): + new_step = {'interface': 'deploy', + 'step': 'my_deploy2', + 'args': {...}} + client = agent_client.get_client(task) + return agent_base.execute_step(task, new_step, 'deploy', + client=client) + +.. warning:: + This approach only works for steps implemented on a ``deploy`` + interface that inherits agent deploy. + +Execution on reboot +~~~~~~~~~~~~~~~~~~~ + +Some steps are executed out-of-band, but require a reboot to complete. Use the +following pattern: + +.. code-block:: python + + from ironic.drivers import base + from ironic.drivers.modules import deploy_utils + + class MyManagement(base.ManagementInterface): + ... + + @base.clean_step(priority=0) + def my_action(self, task): + ... + + # Tell ironic that... + deploy_utils.set_async_step_flags( + node, + # ... we're waiting for IPA to come back after reboot + reboot=True, + # ... the current step is done + skip_current_step=True) + + return deploy_utils.reboot_to_finish_step(task) + Implementing RAID ----------------- diff --git a/ironic/drivers/modules/agent.py b/ironic/drivers/modules/agent.py index 776b0186d8..e692d9eadb 100644 --- a/ironic/drivers/modules/agent.py +++ b/ironic/drivers/modules/agent.py @@ -401,8 +401,7 @@ class CustomAgentDeploy(agent_base.AgentBaseMixin, agent_base.AgentDeployMixin, states.RESCUE, states.RESCUEFAIL): self._update_instance_info(task) if CONF.agent.manage_agent_boot: - deploy_opts = deploy_utils.build_agent_options(node) - task.driver.boot.prepare_ramdisk(task, deploy_opts) + deploy_utils.prepare_agent_boot(task) @METRICS.timer('CustomAgentDeploy.clean_up') @task_manager.require_exclusive_lock @@ -856,9 +855,8 @@ class AgentRescue(base.RescueInterface): task.driver.network.unconfigure_tenant_networks(task) task.driver.network.add_rescuing_network(task) if CONF.agent.manage_agent_boot: - ramdisk_opts = deploy_utils.build_agent_options(task.node) # prepare_ramdisk will set the boot device - task.driver.boot.prepare_ramdisk(task, ramdisk_opts) + deploy_utils.prepare_agent_boot(task) manager_utils.node_power_action(task, states.POWER_ON) return states.RESCUEWAIT diff --git a/ironic/drivers/modules/agent_base.py b/ironic/drivers/modules/agent_base.py index 99a49a7a8c..43b2ba8539 100644 --- a/ironic/drivers/modules/agent_base.py +++ b/ironic/drivers/modules/agent_base.py @@ -186,11 +186,9 @@ def _post_step_reboot(task, step_type): current_step = (task.node.clean_step if step_type == 'clean' else task.node.deploy_step) try: - # NOTE(fellypefca): Call prepare_ramdisk on ensure that the - # baremetal node boots back into the ramdisk after reboot. - deploy_opts = deploy_utils.build_agent_options(task.node) - task.driver.boot.prepare_ramdisk(task, deploy_opts) - manager_utils.node_power_action(task, states.REBOOT) + # NOTE(fellypefca): ensure that the baremetal node boots back into + # the ramdisk after reboot. + deploy_utils.reboot_to_finish_step(task) except Exception as e: msg = (_('Reboot requested by %(type)s step %(step)s failed for ' 'node %(node)s: %(err)s') % diff --git a/ironic/drivers/modules/ansible/deploy.py b/ironic/drivers/modules/ansible/deploy.py index 8e7b1a3a61..cd3f4c68f6 100644 --- a/ironic/drivers/modules/ansible/deploy.py +++ b/ironic/drivers/modules/ansible/deploy.py @@ -509,8 +509,7 @@ class AnsibleDeploy(agent_base.HeartbeatMixin, node.instance_info = deploy_utils.build_instance_info_for_deploy( task) node.save() - boot_opt = deploy_utils.build_agent_options(node) - task.driver.boot.prepare_ramdisk(task, boot_opt) + deploy_utils.prepare_agent_boot(task) @METRICS.timer('AnsibleDeploy.clean_up') def clean_up(self, task): @@ -595,8 +594,7 @@ class AnsibleDeploy(agent_base.HeartbeatMixin, task.driver.network.add_cleaning_network(task) manager_utils.restore_power_state_if_needed( task, power_state_to_restore) - boot_opt = deploy_utils.build_agent_options(node) - task.driver.boot.prepare_ramdisk(task, boot_opt) + deploy_utils.prepare_agent_boot(task) if not fast_track: manager_utils.node_power_action(task, states.REBOOT) return states.CLEANWAIT diff --git a/ironic/drivers/modules/deploy_utils.py b/ironic/drivers/modules/deploy_utils.py index 0124516512..8dff2bf0ee 100644 --- a/ironic/drivers/modules/deploy_utils.py +++ b/ironic/drivers/modules/deploy_utils.py @@ -721,8 +721,7 @@ def prepare_inband_cleaning(task, manage_boot=True): agent_add_clean_params(task) if manage_boot: - ramdisk_opts = build_agent_options(task.node) - task.driver.boot.prepare_ramdisk(task, ramdisk_opts) + prepare_agent_boot(task) # NOTE(dtantsur): calling prepare_ramdisk may power off the node, so we # need to check fast-track again and reboot if needed. @@ -1386,6 +1385,27 @@ def set_async_step_flags(node, reboot=None, skip_current_step=None, node.save() +def prepare_agent_boot(task): + """Prepare booting the agent on the node. + + :param task: a TaskManager instance. + """ + deploy_opts = build_agent_options(task.node) + task.driver.boot.prepare_ramdisk(task, deploy_opts) + + +def reboot_to_finish_step(task): + """Reboot the node into IPA to finish a deploy/clean step. + + :param task: a TaskManager instance. + :returns: states.CLEANWAIT if cleaning operation in progress + or states.DEPLOYWAIT if deploy operation in progress. + """ + prepare_agent_boot(task) + manager_utils.node_power_action(task, states.REBOOT) + return get_async_step_return_state(task.node) + + def get_root_device_for_deploy(node): """Get a root device requested for deployment or None. diff --git a/ironic/drivers/modules/drac/management.py b/ironic/drivers/modules/drac/management.py index 637f837d19..dd614b42a9 100644 --- a/ironic/drivers/modules/drac/management.py +++ b/ironic/drivers/modules/drac/management.py @@ -34,7 +34,6 @@ from ironic.common import boot_devices from ironic.common import exception from ironic.common.i18n import _ from ironic.common import molds -from ironic.common import states from ironic.conductor import task_manager from ironic.conductor import utils as manager_utils from ironic.conf import CONF @@ -455,11 +454,7 @@ class DracRedfishManagement(redfish_management.RedfishManagement): reboot=True, skip_current_step=True, polling=True) - deploy_opts = deploy_utils.build_agent_options(task.node) - task.driver.boot.prepare_ramdisk(task, deploy_opts) - manager_utils.node_power_action(task, states.REBOOT) - - return deploy_utils.get_async_step_return_state(task.node) + return deploy_utils.reboot_to_finish_step(task) @base.clean_step(priority=0, argsinfo=IMPORT_EXPORT_CONFIGURATION_ARGSINFO) diff --git a/ironic/drivers/modules/drac/raid.py b/ironic/drivers/modules/drac/raid.py index dd405fc471..1bdd36d85d 100644 --- a/ironic/drivers/modules/drac/raid.py +++ b/ironic/drivers/modules/drac/raid.py @@ -1470,8 +1470,7 @@ class DracRedfishRAID(redfish_raid.RedfishRAID): reboot=True, skip_current_step=True, polling=True) - deploy_opts = deploy_utils.build_agent_options(task.node) - task.driver.boot.prepare_ramdisk(task, deploy_opts) + deploy_utils.prepare_agent_boot(task) # Reboot already done by non real time task task.upgrade_lock() info = task.node.driver_internal_info diff --git a/ironic/drivers/modules/ilo/bios.py b/ironic/drivers/modules/ilo/bios.py index 01927407af..a602554238 100644 --- a/ironic/drivers/modules/ilo/bios.py +++ b/ironic/drivers/modules/ilo/bios.py @@ -21,8 +21,6 @@ from oslo_utils import importutils from ironic.common import exception from ironic.common.i18n import _ -from ironic.common import states -from ironic.conductor import utils as manager_utils from ironic.drivers import base from ironic.drivers.modules import deploy_utils from ironic.drivers.modules.ilo import common as ilo_common @@ -98,9 +96,7 @@ class IloBIOS(base.BIOSInterface): raise exception.NodeCleaningFailure(errmsg) raise exception.InstanceDeployFailure(reason=errmsg) - deploy_opts = deploy_utils.build_agent_options(node) - task.driver.boot.prepare_ramdisk(task, deploy_opts) - manager_utils.node_power_action(task, states.REBOOT) + return_state = deploy_utils.reboot_to_finish_step(task) deploy_utils.set_async_step_flags(node, reboot=True, skip_current_step=False) @@ -112,7 +108,7 @@ class IloBIOS(base.BIOSInterface): node.driver_internal_info = driver_internal_info node.save() - return deploy_utils.get_async_step_return_state(node) + return return_state def _execute_post_boot_bios_step(self, task, step): """Perform operations required after the reboot. diff --git a/ironic/drivers/modules/ilo/management.py b/ironic/drivers/modules/ilo/management.py index e78db25ef5..3e2eb5622b 100644 --- a/ironic/drivers/modules/ilo/management.py +++ b/ironic/drivers/modules/ilo/management.py @@ -395,8 +395,7 @@ class IloManagement(base.ManagementInterface): # Reset iLO ejects virtual media # Re-create the environment for agent boot, if required task.driver.boot.clean_up_ramdisk(task) - deploy_opts = deploy_utils.build_agent_options(node) - task.driver.boot.prepare_ramdisk(task, deploy_opts) + deploy_utils.prepare_agent_boot(task) @METRICS.timer('IloManagement.reset_ilo_credential') @base.deploy_step(priority=0, argsinfo=_RESET_ILO_CREDENTIALS_ARGSINFO) @@ -657,8 +656,7 @@ class IloManagement(base.ManagementInterface): # Firmware might have ejected the virtual media, if it was used. # Re-create the environment for agent boot, if required task.driver.boot.clean_up_ramdisk(task) - deploy_opts = deploy_utils.build_agent_options(node) - task.driver.boot.prepare_ramdisk(task, deploy_opts) + deploy_utils.prepare_agent_boot(task) LOG.info("All Firmware update operations completed successfully " "for node: %s.", node.uuid) @@ -1066,10 +1064,7 @@ class Ilo5Management(IloManagement): 'ilo_disk_erase_hdd_check') self._set_driver_internal_value( task, False, 'skip_current_clean_step') - deploy_opts = deploy_utils.build_agent_options(task.node) - task.driver.boot.prepare_ramdisk(task, deploy_opts) - manager_utils.node_power_action(task, states.REBOOT) - return states.CLEANWAIT + return deploy_utils.reboot_to_finish_step(task) if not driver_internal_info.get( 'ilo_disk_erase_ssd_check') and ('SSD' in disk_types): @@ -1079,10 +1074,7 @@ class Ilo5Management(IloManagement): 'ilo_disk_erase_ssd_check', 'cleaning_reboot') self._set_driver_internal_value( task, False, 'skip_current_clean_step') - deploy_opts = deploy_utils.build_agent_options(task.node) - task.driver.boot.prepare_ramdisk(task, deploy_opts) - manager_utils.node_power_action(task, states.REBOOT) - return states.CLEANWAIT + return deploy_utils.reboot_to_finish_step(task) # It will wait until disk erase will complete if self._wait_for_disk_erase_status(task.node): diff --git a/ironic/drivers/modules/ilo/raid.py b/ironic/drivers/modules/ilo/raid.py index 32278844e3..ae701cc550 100644 --- a/ironic/drivers/modules/ilo/raid.py +++ b/ironic/drivers/modules/ilo/raid.py @@ -99,9 +99,7 @@ class Ilo5RAID(base.RAIDInterface): task.node.save() def _prepare_for_read_raid(self, task, raid_step): - deploy_opts = deploy_utils.build_agent_options(task.node) - task.driver.boot.prepare_ramdisk(task, deploy_opts) - manager_utils.node_power_action(task, states.REBOOT) + deploy_utils.reboot_to_finish_step(task) if raid_step == 'create_raid': self._set_driver_internal_true_value( task, 'ilo_raid_create_in_progress') diff --git a/ironic/drivers/modules/redfish/bios.py b/ironic/drivers/modules/redfish/bios.py index c1af56f248..3ac7fcd6ea 100644 --- a/ironic/drivers/modules/redfish/bios.py +++ b/ironic/drivers/modules/redfish/bios.py @@ -267,8 +267,7 @@ class RedfishBIOS(base.BIOSInterface): :param task: a TaskManager instance containing the node to act on. """ - deploy_opts = deploy_utils.build_agent_options(task.node) - task.driver.boot.prepare_ramdisk(task, deploy_opts) + deploy_utils.prepare_agent_boot(task) self._reboot(task) def post_configuration(self, task, settings): @@ -282,8 +281,7 @@ class RedfishBIOS(base.BIOSInterface): :param task: a TaskManager instance containing the node to act on. :param settings: a list of BIOS settings to be updated. """ - deploy_opts = deploy_utils.build_agent_options(task.node) - task.driver.boot.prepare_ramdisk(task, deploy_opts) + deploy_utils.prepare_agent_boot(task) self._reboot(task) def get_properties(self): diff --git a/ironic/drivers/modules/redfish/management.py b/ironic/drivers/modules/redfish/management.py index 5e572f84a8..9a68d99754 100644 --- a/ironic/drivers/modules/redfish/management.py +++ b/ironic/drivers/modules/redfish/management.py @@ -760,11 +760,7 @@ class RedfishManagement(base.ManagementInterface): skip_current_step=True, polling=True) - deploy_opts = deploy_utils.build_agent_options(task.node) - task.driver.boot.prepare_ramdisk(task, deploy_opts) - manager_utils.node_power_action(task, states.REBOOT) - - return deploy_utils.get_async_step_return_state(task.node) + return deploy_utils.reboot_to_finish_step(task) def _apply_firmware_update(self, node, update_service, firmware_updates): """Applies the next firmware update to the node diff --git a/ironic/drivers/modules/redfish/raid.py b/ironic/drivers/modules/redfish/raid.py index 9d0a947127..c01d08a9cf 100644 --- a/ironic/drivers/modules/redfish/raid.py +++ b/ironic/drivers/modules/redfish/raid.py @@ -853,10 +853,7 @@ class RedfishRAID(base.RAIDInterface): skip_current_step=True, polling=True) if reboot_required: - return_state = deploy_utils.get_async_step_return_state(task.node) - deploy_opts = deploy_utils.build_agent_options(task.node) - task.driver.boot.prepare_ramdisk(task, deploy_opts) - manager_utils.node_power_action(task, states.REBOOT) + return_state = deploy_utils.reboot_to_finish_step(task) return self.post_create_configuration( task, raid_configs, return_state=return_state) @@ -929,10 +926,7 @@ class RedfishRAID(base.RAIDInterface): skip_current_step=True, polling=True) if reboot_required: - return_state = deploy_utils.get_async_step_return_state(task.node) - deploy_opts = deploy_utils.build_agent_options(task.node) - task.driver.boot.prepare_ramdisk(task, deploy_opts) - manager_utils.node_power_action(task, states.REBOOT) + return_state = deploy_utils.reboot_to_finish_step(task) return self.post_delete_configuration( task, raid_configs, return_state=return_state)