Add and document high-level helpers for async steps

The pattern we use right now is too verbose to even put in
documentation. This change adds two more helpers:
* deploy_utils.prepare_agent_boot replaces the omnipresent 2-line
  snippet around task.boot.prepare_ramdisk.
* deploy_utils.reboot_to_finish_step adds rebooting to it.

Change-Id: I88540268d3bacebda775a0e94015c29a82c2c6a0
This commit is contained in:
Dmitry Tantsur 2021-09-03 10:38:28 +02:00
parent efe5a2cf54
commit 7f9badb543
13 changed files with 117 additions and 61 deletions

View File

@ -114,6 +114,80 @@ Just as deploy steps, in-band clean steps have to be
implemented in a custom :ironic-python-agent-doc:`IPA hardware manager
<contributor/hardware_managers.html#custom-hardwaremanagers-and-cleaning>`.
Asynchronous steps
------------------
If the step returns ``None``, ironic assumes its execution is finished and
proceeds to the next step. Many steps are executed asynchronously; in this case
you need to inform ironic that the step is not finished. There are several
possibilities:
Combined in-band and out-of-band step
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
If your step starts as out-of-band and then proceeds as in-band (i.e. inside
the agent), you only need to return ``CLEANWAIT``/``DEPLOYWAIT`` from
the step.
.. code-block:: python
from ironic.drivers import base
from ironic.drivers.modules import agent
from ironic.drivers.modules import agent_base
from ironic.drivers.modules import agent_client
from ironic.drivers.modules import deploy_utils
class MyDeploy(agent.CustomAgentDeploy):
...
@base.deploy_step(priority=80)
def my_deploy(self, task):
...
return deploy_utils.get_async_step_return_state(task.node)
# Usually you can use a more high-level pattern:
@base.deploy_step(priority=60)
def my_deploy2(self, task):
new_step = {'interface': 'deploy',
'step': 'my_deploy2',
'args': {...}}
client = agent_client.get_client(task)
return agent_base.execute_step(task, new_step, 'deploy',
client=client)
.. warning::
This approach only works for steps implemented on a ``deploy``
interface that inherits agent deploy.
Execution on reboot
~~~~~~~~~~~~~~~~~~~
Some steps are executed out-of-band, but require a reboot to complete. Use the
following pattern:
.. code-block:: python
from ironic.drivers import base
from ironic.drivers.modules import deploy_utils
class MyManagement(base.ManagementInterface):
...
@base.clean_step(priority=0)
def my_action(self, task):
...
# Tell ironic that...
deploy_utils.set_async_step_flags(
node,
# ... we're waiting for IPA to come back after reboot
reboot=True,
# ... the current step is done
skip_current_step=True)
return deploy_utils.reboot_to_finish_step(task)
Implementing RAID
-----------------

View File

@ -401,8 +401,7 @@ class CustomAgentDeploy(agent_base.AgentBaseMixin, agent_base.AgentDeployMixin,
states.RESCUE, states.RESCUEFAIL):
self._update_instance_info(task)
if CONF.agent.manage_agent_boot:
deploy_opts = deploy_utils.build_agent_options(node)
task.driver.boot.prepare_ramdisk(task, deploy_opts)
deploy_utils.prepare_agent_boot(task)
@METRICS.timer('CustomAgentDeploy.clean_up')
@task_manager.require_exclusive_lock
@ -856,9 +855,8 @@ class AgentRescue(base.RescueInterface):
task.driver.network.unconfigure_tenant_networks(task)
task.driver.network.add_rescuing_network(task)
if CONF.agent.manage_agent_boot:
ramdisk_opts = deploy_utils.build_agent_options(task.node)
# prepare_ramdisk will set the boot device
task.driver.boot.prepare_ramdisk(task, ramdisk_opts)
deploy_utils.prepare_agent_boot(task)
manager_utils.node_power_action(task, states.POWER_ON)
return states.RESCUEWAIT

View File

@ -186,11 +186,9 @@ def _post_step_reboot(task, step_type):
current_step = (task.node.clean_step if step_type == 'clean'
else task.node.deploy_step)
try:
# NOTE(fellypefca): Call prepare_ramdisk on ensure that the
# baremetal node boots back into the ramdisk after reboot.
deploy_opts = deploy_utils.build_agent_options(task.node)
task.driver.boot.prepare_ramdisk(task, deploy_opts)
manager_utils.node_power_action(task, states.REBOOT)
# NOTE(fellypefca): ensure that the baremetal node boots back into
# the ramdisk after reboot.
deploy_utils.reboot_to_finish_step(task)
except Exception as e:
msg = (_('Reboot requested by %(type)s step %(step)s failed for '
'node %(node)s: %(err)s') %

View File

@ -509,8 +509,7 @@ class AnsibleDeploy(agent_base.HeartbeatMixin,
node.instance_info = deploy_utils.build_instance_info_for_deploy(
task)
node.save()
boot_opt = deploy_utils.build_agent_options(node)
task.driver.boot.prepare_ramdisk(task, boot_opt)
deploy_utils.prepare_agent_boot(task)
@METRICS.timer('AnsibleDeploy.clean_up')
def clean_up(self, task):
@ -595,8 +594,7 @@ class AnsibleDeploy(agent_base.HeartbeatMixin,
task.driver.network.add_cleaning_network(task)
manager_utils.restore_power_state_if_needed(
task, power_state_to_restore)
boot_opt = deploy_utils.build_agent_options(node)
task.driver.boot.prepare_ramdisk(task, boot_opt)
deploy_utils.prepare_agent_boot(task)
if not fast_track:
manager_utils.node_power_action(task, states.REBOOT)
return states.CLEANWAIT

View File

@ -721,8 +721,7 @@ def prepare_inband_cleaning(task, manage_boot=True):
agent_add_clean_params(task)
if manage_boot:
ramdisk_opts = build_agent_options(task.node)
task.driver.boot.prepare_ramdisk(task, ramdisk_opts)
prepare_agent_boot(task)
# NOTE(dtantsur): calling prepare_ramdisk may power off the node, so we
# need to check fast-track again and reboot if needed.
@ -1386,6 +1385,27 @@ def set_async_step_flags(node, reboot=None, skip_current_step=None,
node.save()
def prepare_agent_boot(task):
"""Prepare booting the agent on the node.
:param task: a TaskManager instance.
"""
deploy_opts = build_agent_options(task.node)
task.driver.boot.prepare_ramdisk(task, deploy_opts)
def reboot_to_finish_step(task):
"""Reboot the node into IPA to finish a deploy/clean step.
:param task: a TaskManager instance.
:returns: states.CLEANWAIT if cleaning operation in progress
or states.DEPLOYWAIT if deploy operation in progress.
"""
prepare_agent_boot(task)
manager_utils.node_power_action(task, states.REBOOT)
return get_async_step_return_state(task.node)
def get_root_device_for_deploy(node):
"""Get a root device requested for deployment or None.

View File

@ -34,7 +34,6 @@ from ironic.common import boot_devices
from ironic.common import exception
from ironic.common.i18n import _
from ironic.common import molds
from ironic.common import states
from ironic.conductor import task_manager
from ironic.conductor import utils as manager_utils
from ironic.conf import CONF
@ -455,11 +454,7 @@ class DracRedfishManagement(redfish_management.RedfishManagement):
reboot=True,
skip_current_step=True,
polling=True)
deploy_opts = deploy_utils.build_agent_options(task.node)
task.driver.boot.prepare_ramdisk(task, deploy_opts)
manager_utils.node_power_action(task, states.REBOOT)
return deploy_utils.get_async_step_return_state(task.node)
return deploy_utils.reboot_to_finish_step(task)
@base.clean_step(priority=0,
argsinfo=IMPORT_EXPORT_CONFIGURATION_ARGSINFO)

View File

@ -1470,8 +1470,7 @@ class DracRedfishRAID(redfish_raid.RedfishRAID):
reboot=True,
skip_current_step=True,
polling=True)
deploy_opts = deploy_utils.build_agent_options(task.node)
task.driver.boot.prepare_ramdisk(task, deploy_opts)
deploy_utils.prepare_agent_boot(task)
# Reboot already done by non real time task
task.upgrade_lock()
info = task.node.driver_internal_info

View File

@ -21,8 +21,6 @@ from oslo_utils import importutils
from ironic.common import exception
from ironic.common.i18n import _
from ironic.common import states
from ironic.conductor import utils as manager_utils
from ironic.drivers import base
from ironic.drivers.modules import deploy_utils
from ironic.drivers.modules.ilo import common as ilo_common
@ -98,9 +96,7 @@ class IloBIOS(base.BIOSInterface):
raise exception.NodeCleaningFailure(errmsg)
raise exception.InstanceDeployFailure(reason=errmsg)
deploy_opts = deploy_utils.build_agent_options(node)
task.driver.boot.prepare_ramdisk(task, deploy_opts)
manager_utils.node_power_action(task, states.REBOOT)
return_state = deploy_utils.reboot_to_finish_step(task)
deploy_utils.set_async_step_flags(node, reboot=True,
skip_current_step=False)
@ -112,7 +108,7 @@ class IloBIOS(base.BIOSInterface):
node.driver_internal_info = driver_internal_info
node.save()
return deploy_utils.get_async_step_return_state(node)
return return_state
def _execute_post_boot_bios_step(self, task, step):
"""Perform operations required after the reboot.

View File

@ -395,8 +395,7 @@ class IloManagement(base.ManagementInterface):
# Reset iLO ejects virtual media
# Re-create the environment for agent boot, if required
task.driver.boot.clean_up_ramdisk(task)
deploy_opts = deploy_utils.build_agent_options(node)
task.driver.boot.prepare_ramdisk(task, deploy_opts)
deploy_utils.prepare_agent_boot(task)
@METRICS.timer('IloManagement.reset_ilo_credential')
@base.deploy_step(priority=0, argsinfo=_RESET_ILO_CREDENTIALS_ARGSINFO)
@ -657,8 +656,7 @@ class IloManagement(base.ManagementInterface):
# Firmware might have ejected the virtual media, if it was used.
# Re-create the environment for agent boot, if required
task.driver.boot.clean_up_ramdisk(task)
deploy_opts = deploy_utils.build_agent_options(node)
task.driver.boot.prepare_ramdisk(task, deploy_opts)
deploy_utils.prepare_agent_boot(task)
LOG.info("All Firmware update operations completed successfully "
"for node: %s.", node.uuid)
@ -1066,10 +1064,7 @@ class Ilo5Management(IloManagement):
'ilo_disk_erase_hdd_check')
self._set_driver_internal_value(
task, False, 'skip_current_clean_step')
deploy_opts = deploy_utils.build_agent_options(task.node)
task.driver.boot.prepare_ramdisk(task, deploy_opts)
manager_utils.node_power_action(task, states.REBOOT)
return states.CLEANWAIT
return deploy_utils.reboot_to_finish_step(task)
if not driver_internal_info.get(
'ilo_disk_erase_ssd_check') and ('SSD' in disk_types):
@ -1079,10 +1074,7 @@ class Ilo5Management(IloManagement):
'ilo_disk_erase_ssd_check', 'cleaning_reboot')
self._set_driver_internal_value(
task, False, 'skip_current_clean_step')
deploy_opts = deploy_utils.build_agent_options(task.node)
task.driver.boot.prepare_ramdisk(task, deploy_opts)
manager_utils.node_power_action(task, states.REBOOT)
return states.CLEANWAIT
return deploy_utils.reboot_to_finish_step(task)
# It will wait until disk erase will complete
if self._wait_for_disk_erase_status(task.node):

View File

@ -99,9 +99,7 @@ class Ilo5RAID(base.RAIDInterface):
task.node.save()
def _prepare_for_read_raid(self, task, raid_step):
deploy_opts = deploy_utils.build_agent_options(task.node)
task.driver.boot.prepare_ramdisk(task, deploy_opts)
manager_utils.node_power_action(task, states.REBOOT)
deploy_utils.reboot_to_finish_step(task)
if raid_step == 'create_raid':
self._set_driver_internal_true_value(
task, 'ilo_raid_create_in_progress')

View File

@ -267,8 +267,7 @@ class RedfishBIOS(base.BIOSInterface):
:param task: a TaskManager instance containing the node to act on.
"""
deploy_opts = deploy_utils.build_agent_options(task.node)
task.driver.boot.prepare_ramdisk(task, deploy_opts)
deploy_utils.prepare_agent_boot(task)
self._reboot(task)
def post_configuration(self, task, settings):
@ -282,8 +281,7 @@ class RedfishBIOS(base.BIOSInterface):
:param task: a TaskManager instance containing the node to act on.
:param settings: a list of BIOS settings to be updated.
"""
deploy_opts = deploy_utils.build_agent_options(task.node)
task.driver.boot.prepare_ramdisk(task, deploy_opts)
deploy_utils.prepare_agent_boot(task)
self._reboot(task)
def get_properties(self):

View File

@ -760,11 +760,7 @@ class RedfishManagement(base.ManagementInterface):
skip_current_step=True,
polling=True)
deploy_opts = deploy_utils.build_agent_options(task.node)
task.driver.boot.prepare_ramdisk(task, deploy_opts)
manager_utils.node_power_action(task, states.REBOOT)
return deploy_utils.get_async_step_return_state(task.node)
return deploy_utils.reboot_to_finish_step(task)
def _apply_firmware_update(self, node, update_service, firmware_updates):
"""Applies the next firmware update to the node

View File

@ -853,10 +853,7 @@ class RedfishRAID(base.RAIDInterface):
skip_current_step=True,
polling=True)
if reboot_required:
return_state = deploy_utils.get_async_step_return_state(task.node)
deploy_opts = deploy_utils.build_agent_options(task.node)
task.driver.boot.prepare_ramdisk(task, deploy_opts)
manager_utils.node_power_action(task, states.REBOOT)
return_state = deploy_utils.reboot_to_finish_step(task)
return self.post_create_configuration(
task, raid_configs, return_state=return_state)
@ -929,10 +926,7 @@ class RedfishRAID(base.RAIDInterface):
skip_current_step=True,
polling=True)
if reboot_required:
return_state = deploy_utils.get_async_step_return_state(task.node)
deploy_opts = deploy_utils.build_agent_options(task.node)
task.driver.boot.prepare_ramdisk(task, deploy_opts)
manager_utils.node_power_action(task, states.REBOOT)
return_state = deploy_utils.reboot_to_finish_step(task)
return self.post_delete_configuration(
task, raid_configs, return_state=return_state)