diff --git a/ironic/drivers/modules/agent.py b/ironic/drivers/modules/agent.py index 5f2185b654..776b0186d8 100644 --- a/ironic/drivers/modules/agent.py +++ b/ironic/drivers/modules/agent.py @@ -201,7 +201,7 @@ def validate_http_provisioning_configuration(node): deploy_utils.check_for_missing_params(params, error_msg) -class CustomAgentDeploy(agent_base.AgentDeployMixin, agent_base.AgentBaseMixin, +class CustomAgentDeploy(agent_base.AgentBaseMixin, agent_base.AgentDeployMixin, base.DeployInterface): """A deploy interface that relies on a custom agent to deploy. diff --git a/ironic/drivers/modules/agent_base.py b/ironic/drivers/modules/agent_base.py index 1263b2df35..95570c61cc 100644 --- a/ironic/drivers/modules/agent_base.py +++ b/ironic/drivers/modules/agent_base.py @@ -639,19 +639,16 @@ class HeartbeatMixin(object): class AgentBaseMixin(object): - """Mixin with base methods not relying on any deploy steps.""" + """Mixin with base methods not relying on any deploy steps. + + Provides full support for in-band and out-of-band cleaning and + the machinery to support both deploy and clean in-band steps. + """ def should_manage_boot(self, task): """Whether agent boot is managed by ironic.""" return True - def refresh_steps(self, task, step_type): - """Refresh the node's cached steps. - - :param task: a TaskManager instance - :param step_type: "clean" or "deploy" - """ - @METRICS.timer('AgentBaseMixin.tear_down') @task_manager.require_exclusive_lock def tear_down(self, task): @@ -701,7 +698,7 @@ class AgentBaseMixin(object): """ pass - @METRICS.timer('AgentDeployMixin.prepare_cleaning') + @METRICS.timer('AgentBaseMixin.prepare_cleaning') def prepare_cleaning(self, task): """Boot into the agent to prepare for cleaning. @@ -719,7 +716,7 @@ class AgentBaseMixin(object): self.refresh_steps(task, 'clean') return result - @METRICS.timer('AgentDeployMixin.tear_down_cleaning') + @METRICS.timer('AgentBaseMixin.tear_down_cleaning') def tear_down_cleaning(self, task): """Clean up the PXE and DHCP files after cleaning. @@ -730,64 +727,7 @@ class AgentBaseMixin(object): deploy_utils.tear_down_inband_cleaning( task, manage_boot=self.should_manage_boot(task)) - -class AgentOobStepsMixin(object): - """Mixin with out-of-band deploy steps.""" - - @METRICS.timer('AgentDeployMixin.switch_to_tenant_network') - @base.deploy_step(priority=30) - @task_manager.require_exclusive_lock - def switch_to_tenant_network(self, task): - """Deploy step to switch the node to the tenant network. - - :param task: a TaskManager object containing the node - """ - try: - with manager_utils.power_state_for_network_configuration(task): - task.driver.network.remove_provisioning_network(task) - task.driver.network.configure_tenant_networks(task) - except Exception as e: - msg = (_('Error changing node %(node)s to tenant networks after ' - 'deploy. %(cls)s: %(error)s') % - {'node': task.node.uuid, 'cls': e.__class__.__name__, - 'error': e}) - # NOTE(mgoddard): Don't collect logs since the node has been - # powered off. - log_and_raise_deployment_error(task, msg, collect_logs=False, - exc=e) - - @METRICS.timer('AgentDeployMixin.boot_instance') - @base.deploy_step(priority=20) - @task_manager.require_exclusive_lock - def boot_instance(self, task): - """Deploy step to boot the final instance. - - :param task: a TaskManager object containing the node - """ - can_power_on = (states.POWER_ON in - task.driver.power.get_supported_power_states(task)) - try: - if can_power_on: - manager_utils.node_power_action(task, states.POWER_ON) - else: - LOG.debug('Not trying to power on node %s that does not ' - 'support powering on, assuming already running', - task.node.uuid) - except Exception as e: - msg = (_('Error booting node %(node)s after deploy. ' - '%(cls)s: %(error)s') % - {'node': task.node.uuid, 'cls': e.__class__.__name__, - 'error': e}) - # NOTE(mgoddard): Don't collect logs since the node has been - # powered off. - log_and_raise_deployment_error(task, msg, collect_logs=False, - exc=e) - - -class AgentDeployMixin(HeartbeatMixin, AgentOobStepsMixin): - """Mixin with deploy methods.""" - - @METRICS.timer('AgentDeployMixin.get_clean_steps') + @METRICS.timer('AgentBaseMixin.get_clean_steps') def get_clean_steps(self, task): """Get the list of clean steps from the agent. @@ -806,26 +746,7 @@ class AgentDeployMixin(HeartbeatMixin, AgentOobStepsMixin): task, 'clean', interface='deploy', override_priorities=new_priorities) - @METRICS.timer('AgentDeployMixin.get_deploy_steps') - def get_deploy_steps(self, task): - """Get the list of deploy steps from the agent. - - :param task: a TaskManager object containing the node - :raises InstanceDeployFailure: if the deploy steps are not yet - available (cached), for example, when a node has just been - enrolled and has not been deployed yet. - :returns: A list of deploy step dictionaries - """ - steps = super(AgentDeployMixin, self).get_deploy_steps(task)[:] - ib_steps = get_steps(task, 'deploy', interface='deploy') - # NOTE(dtantsur): we allow in-band steps to be shadowed by out-of-band - # ones, see the docstring of execute_deploy_step for details. - steps += [step for step in ib_steps - # FIXME(dtantsur): nested loops are not too efficient - if not conductor_steps.find_step(steps, step)] - return steps - - @METRICS.timer('AgentDeployMixin.refresh_steps') + @METRICS.timer('AgentBaseMixin.refresh_steps') def refresh_steps(self, task, step_type): """Refresh the node's cached clean/deploy steps from the booted agent. @@ -911,7 +832,7 @@ class AgentDeployMixin(HeartbeatMixin, AgentOobStepsMixin): '%(steps)s', {'node': node.uuid, 'steps': steps, 'type': step_type}) - @METRICS.timer('AgentDeployMixin.execute_clean_step') + @METRICS.timer('AgentBaseMixin.execute_clean_step') def execute_clean_step(self, task, step): """Execute a clean step asynchronously on the agent. @@ -923,37 +844,6 @@ class AgentDeployMixin(HeartbeatMixin, AgentOobStepsMixin): """ return execute_step(task, step, 'clean') - @METRICS.timer('AgentDeployMixin.execute_deploy_step') - def execute_deploy_step(self, task, step): - """Execute a deploy step. - - We're trying to find a step among both out-of-band and in-band steps. - In case of duplicates, out-of-band steps take priority. This property - allows having an out-of-band deploy step that calls into - a corresponding in-band step after some preparation (e.g. with - additional input). - - :param task: a TaskManager object containing the node - :param step: a deploy step dictionary to execute - :raises: InstanceDeployFailure if the agent does not return a command - status - :returns: states.DEPLOYWAIT to signify the step will be completed async - """ - agent_running = task.node.driver_internal_info.get( - 'agent_cached_deploy_steps') - oob_steps = self.deploy_steps - - if conductor_steps.find_step(oob_steps, step): - return super(AgentDeployMixin, self).execute_deploy_step( - task, step) - elif not agent_running: - raise exception.InstanceDeployFailure( - _('Deploy step %(step)s has not been found. Available ' - 'out-of-band steps: %(oob)s. Agent is not running.') % - {'step': step, 'oob': oob_steps}) - else: - return execute_step(task, step, 'deploy') - def _process_version_mismatch(self, task, step_type): node = task.node # For manual clean, the target provision state is MANAGEABLE, whereas @@ -1015,7 +905,7 @@ class AgentDeployMixin(HeartbeatMixin, AgentOobStepsMixin): manager_utils.notify_conductor_resume_operation(task, step_type) - @METRICS.timer('AgentDeployMixin.process_next_step') + @METRICS.timer('AgentBaseMixin.process_next_step') def process_next_step(self, task, step_type, **kwargs): """Start the next clean/deploy step if the previous one is complete. @@ -1113,6 +1003,113 @@ class AgentDeployMixin(HeartbeatMixin, AgentOobStepsMixin): 'type': step_type}) return _step_failure_handler(task, msg, step_type) + +class AgentOobStepsMixin(object): + """Mixin with out-of-band deploy steps.""" + + @METRICS.timer('AgentOobStepsMixin.switch_to_tenant_network') + @base.deploy_step(priority=30) + @task_manager.require_exclusive_lock + def switch_to_tenant_network(self, task): + """Deploy step to switch the node to the tenant network. + + :param task: a TaskManager object containing the node + """ + try: + with manager_utils.power_state_for_network_configuration(task): + task.driver.network.remove_provisioning_network(task) + task.driver.network.configure_tenant_networks(task) + except Exception as e: + msg = (_('Error changing node %(node)s to tenant networks after ' + 'deploy. %(cls)s: %(error)s') % + {'node': task.node.uuid, 'cls': e.__class__.__name__, + 'error': e}) + # NOTE(mgoddard): Don't collect logs since the node has been + # powered off. + log_and_raise_deployment_error(task, msg, collect_logs=False, + exc=e) + + @METRICS.timer('AgentOobStepsMixin.boot_instance') + @base.deploy_step(priority=20) + @task_manager.require_exclusive_lock + def boot_instance(self, task): + """Deploy step to boot the final instance. + + :param task: a TaskManager object containing the node + """ + can_power_on = (states.POWER_ON in + task.driver.power.get_supported_power_states(task)) + try: + if can_power_on: + manager_utils.node_power_action(task, states.POWER_ON) + else: + LOG.debug('Not trying to power on node %s that does not ' + 'support powering on, assuming already running', + task.node.uuid) + except Exception as e: + msg = (_('Error booting node %(node)s after deploy. ' + '%(cls)s: %(error)s') % + {'node': task.node.uuid, 'cls': e.__class__.__name__, + 'error': e}) + # NOTE(mgoddard): Don't collect logs since the node has been + # powered off. + log_and_raise_deployment_error(task, msg, collect_logs=False, + exc=e) + + +class AgentDeployMixin(HeartbeatMixin, AgentOobStepsMixin): + """Mixin with deploy methods.""" + + @METRICS.timer('AgentDeployMixin.get_deploy_steps') + def get_deploy_steps(self, task): + """Get the list of deploy steps from the agent. + + :param task: a TaskManager object containing the node + :raises InstanceDeployFailure: if the deploy steps are not yet + available (cached), for example, when a node has just been + enrolled and has not been deployed yet. + :returns: A list of deploy step dictionaries + """ + steps = super(AgentDeployMixin, self).get_deploy_steps(task)[:] + ib_steps = get_steps(task, 'deploy', interface='deploy') + # NOTE(dtantsur): we allow in-band steps to be shadowed by out-of-band + # ones, see the docstring of execute_deploy_step for details. + steps += [step for step in ib_steps + # FIXME(dtantsur): nested loops are not too efficient + if not conductor_steps.find_step(steps, step)] + return steps + + @METRICS.timer('AgentDeployMixin.execute_deploy_step') + def execute_deploy_step(self, task, step): + """Execute a deploy step. + + We're trying to find a step among both out-of-band and in-band steps. + In case of duplicates, out-of-band steps take priority. This property + allows having an out-of-band deploy step that calls into + a corresponding in-band step after some preparation (e.g. with + additional input). + + :param task: a TaskManager object containing the node + :param step: a deploy step dictionary to execute + :raises: InstanceDeployFailure if the agent does not return a command + status + :returns: states.DEPLOYWAIT to signify the step will be completed async + """ + agent_running = task.node.driver_internal_info.get( + 'agent_cached_deploy_steps') + oob_steps = self.deploy_steps + + if conductor_steps.find_step(oob_steps, step): + return super(AgentDeployMixin, self).execute_deploy_step( + task, step) + elif not agent_running: + raise exception.InstanceDeployFailure( + _('Deploy step %(step)s has not been found. Available ' + 'out-of-band steps: %(oob)s. Agent is not running.') % + {'step': step, 'oob': oob_steps}) + else: + return execute_step(task, step, 'deploy') + @METRICS.timer('AgentDeployMixin.tear_down_agent') @base.deploy_step(priority=40) @task_manager.require_exclusive_lock diff --git a/ironic/tests/unit/drivers/modules/test_agent_base.py b/ironic/tests/unit/drivers/modules/test_agent_base.py index 23d45feb1e..99e7166fd4 100644 --- a/ironic/tests/unit/drivers/modules/test_agent_base.py +++ b/ironic/tests/unit/drivers/modules/test_agent_base.py @@ -48,6 +48,11 @@ DRIVER_INFO = db_utils.get_test_agent_driver_info() DRIVER_INTERNAL_INFO = db_utils.get_test_agent_driver_internal_info() +class FakeAgentDeploy(agent_base.AgentBaseMixin, agent_base.AgentDeployMixin, + fake.FakeDeploy): + pass + + class AgentDeployMixinBaseTest(db_base.DbTestCase): def setUp(self): @@ -66,7 +71,7 @@ class AgentDeployMixinBaseTest(db_base.DbTestCase): 'default_%s_interface' % iface: impl} self.config(**config_kwarg) self.config(enabled_hardware_types=['fake-hardware']) - self.deploy = agent_base.AgentDeployMixin() + self.deploy = FakeAgentDeploy() n = { 'driver': 'fake-hardware', 'instance_info': INSTANCE_INFO, @@ -1712,8 +1717,8 @@ class AgentDeployMixinTest(AgentDeployMixinBaseTest): autospec=True) @mock.patch.object(manager_utils, 'notify_conductor_resume_operation', autospec=True) - @mock.patch.object(agent_base.AgentDeployMixin, - 'refresh_steps', autospec=True) + @mock.patch.object(agent_base.AgentBaseMixin, 'refresh_steps', + autospec=True) @mock.patch.object(agent_client.AgentClient, 'get_commands_status', autospec=True) def _test_continue_cleaning_clean_version_mismatch( @@ -1752,8 +1757,8 @@ class AgentDeployMixinTest(AgentDeployMixinBaseTest): autospec=True) @mock.patch.object(manager_utils, 'notify_conductor_resume_operation', autospec=True) - @mock.patch.object(agent_base.AgentDeployMixin, - 'refresh_steps', autospec=True) + @mock.patch.object(agent_base.AgentBaseMixin, 'refresh_steps', + autospec=True) @mock.patch.object(agent_client.AgentClient, 'get_commands_status', autospec=True) def test_continue_cleaning_clean_version_mismatch_fail( @@ -1847,6 +1852,7 @@ class TestRefreshCleanSteps(AgentDeployMixinBaseTest): def setUp(self): super(TestRefreshCleanSteps, self).setUp() + self.deploy = agent_base.AgentBaseMixin() self.node.driver_internal_info['agent_url'] = 'http://127.0.0.1:9999' self.ports = [object_utils.create_test_port(self.context, node_id=self.node.id)] @@ -1984,10 +1990,6 @@ class TestRefreshCleanSteps(AgentDeployMixinBaseTest): task.ports) -class FakeAgentDeploy(agent_base.AgentDeployMixin, fake.FakeDeploy): - pass - - class StepMethodsTestCase(db_base.DbTestCase): def setUp(self): diff --git a/ironic/tests/unit/drivers/modules/test_pxe.py b/ironic/tests/unit/drivers/modules/test_pxe.py index ea78e6ddf8..430930780d 100644 --- a/ironic/tests/unit/drivers/modules/test_pxe.py +++ b/ironic/tests/unit/drivers/modules/test_pxe.py @@ -1041,6 +1041,58 @@ class PXERamdiskDeployTestCase(db_base.DbTestCase): self.assertIsNone(task.driver.deploy.deploy(task)) self.assertTrue(mock_warning.called) + @mock.patch.object(agent_base, 'get_steps', autospec=True) + def test_get_clean_steps(self, mock_get_steps): + # Test getting clean steps + mock_steps = [{'priority': 10, 'interface': 'deploy', + 'step': 'erase_devices'}] + mock_get_steps.return_value = mock_steps + with task_manager.acquire(self.context, self.node.uuid) as task: + steps = task.driver.deploy.get_clean_steps(task) + mock_get_steps.assert_called_once_with( + task, 'clean', interface='deploy', + override_priorities={'erase_devices': None, + 'erase_devices_metadata': None}) + self.assertEqual(mock_steps, steps) + + def test_get_deploy_steps(self): + # Only the default deploy step exists in the ramdisk deploy + expected = [{'argsinfo': None, 'interface': 'deploy', 'priority': 100, + 'step': 'deploy'}] + with task_manager.acquire(self.context, self.node.uuid) as task: + steps = task.driver.deploy.get_deploy_steps(task) + self.assertEqual(expected, steps) + + @mock.patch.object(agent_base, 'execute_step', autospec=True) + def test_execute_clean_step(self, mock_execute_step): + step = { + 'priority': 10, + 'interface': 'deploy', + 'step': 'erase_devices', + 'reboot_requested': False + } + with task_manager.acquire(self.context, self.node.uuid) as task: + result = task.driver.deploy.execute_clean_step(task, step) + self.assertIs(result, mock_execute_step.return_value) + mock_execute_step.assert_called_once_with(task, step, 'clean') + + @mock.patch.object(deploy_utils, 'prepare_inband_cleaning', autospec=True) + def test_prepare_cleaning(self, prepare_inband_cleaning_mock): + prepare_inband_cleaning_mock.return_value = states.CLEANWAIT + with task_manager.acquire(self.context, self.node.uuid) as task: + self.assertEqual( + states.CLEANWAIT, task.driver.deploy.prepare_cleaning(task)) + prepare_inband_cleaning_mock.assert_called_once_with( + task, manage_boot=True) + + @mock.patch.object(deploy_utils, 'tear_down_inband_cleaning', + autospec=True) + def test_tear_down_cleaning(self, tear_down_cleaning_mock): + with task_manager.acquire(self.context, self.node.uuid) as task: + task.driver.deploy.tear_down_cleaning(task) + tear_down_cleaning_mock.assert_called_once_with( + task, manage_boot=True) + class PXEAnacondaDeployTestCase(db_base.DbTestCase): diff --git a/releasenotes/notes/ramdisk-cleaning-f4e061f978bd6ac4.yaml b/releasenotes/notes/ramdisk-cleaning-f4e061f978bd6ac4.yaml new file mode 100644 index 0000000000..568a100ceb --- /dev/null +++ b/releasenotes/notes/ramdisk-cleaning-f4e061f978bd6ac4.yaml @@ -0,0 +1,17 @@ +--- +upgrade: + - | + In-band cleaning has been fixed for ``ramdisk`` and ``anaconda`` + deploy interfaces. If you rely on actual clean steps not running, + you need to disable cleaning instead for the relevant nodes:: + + baremetal node set --no-automated-clean +fixes: + - | + Fixes in-band cleaning for the ``ramdisk`` and ``anaconda`` deploy + interfaces. Previously no in-band steps were fetched from the ramdisk. +other: + - | + The cleaning code has been moved from ``AgentDeployMixin`` to + ``AgentBaseMixin``. Most of 3rd party deploy interfaces will need to + include both anyway.