From 8e472a07b533d821b0f0493b16796b9192f25af9 Mon Sep 17 00:00:00 2001 From: Dmitry Tantsur Date: Wed, 18 Mar 2020 09:36:30 +0100 Subject: [PATCH] Raise human-friendly messages on attempt to use pre-deploy steps drivers This is a follow up to commit 529c3ff06693d0a1aa13081acff92010fe855c50. It adds a proper exception on missing deploy steps and provides a proper deprecation for the compatibility code when trying to continue deploy for a node in the DEPLOYING state. Change-Id: I6dc176c12a913cb481164a90881bb1c3107b36eb Story: #2006963 --- ironic/conductor/deployments.py | 10 +++++++++- ironic/conductor/manager.py | 11 +++++++++-- ironic/drivers/modules/agent_base.py | 4 ++++ ironic/drivers/modules/ansible/deploy.py | 3 +++ .../tests/unit/conductor/test_deployments.py | 19 +++++++++++++++++++ .../tests/unit/drivers/modules/test_agent.py | 10 +++++----- .../unit/drivers/modules/test_agent_base.py | 14 +++++++------- ...ue-node-deploy-state-63d9dc9cdcf8e37a.yaml | 9 +++++++++ 8 files changed, 65 insertions(+), 15 deletions(-) create mode 100644 releasenotes/notes/continue-node-deploy-state-63d9dc9cdcf8e37a.yaml diff --git a/ironic/conductor/deployments.py b/ironic/conductor/deployments.py index c04b578d62..7bf621f52f 100644 --- a/ironic/conductor/deployments.py +++ b/ironic/conductor/deployments.py @@ -185,7 +185,7 @@ def do_node_deploy(task, conductor_id=None, configdrive=None): traceback=True, clean_up=False) try: - # This gets the deploy steps (if any) and puts them in the node's + # This gets the deploy steps and puts them in the node's # driver_internal_info['deploy_steps']. conductor_steps.set_node_deployment_steps(task) except exception.InstanceDeployFailure as e: @@ -196,6 +196,14 @@ def do_node_deploy(task, conductor_id=None, configdrive=None): '%(node)s. Error: %(err)s' % {'node': node.uuid, 'err': e}, _("Cannot get deploy steps; failed to deploy: %s") % e) + if not node.driver_internal_info.get('deploy_steps'): + msg = _('Error while getting deploy steps: no steps returned for ' + 'node %s') % node.uuid + utils.deploying_error_handler( + task, msg, + _("No deploy steps returned by the driver")) + raise exception.InstanceDeployFailure(msg) + do_next_deploy_step(task, 0, conductor_id) diff --git a/ironic/conductor/manager.py b/ironic/conductor/manager.py index 6dd31b5345..bfd2f84506 100644 --- a/ironic/conductor/manager.py +++ b/ironic/conductor/manager.py @@ -886,8 +886,15 @@ class ConductorManager(base_manager.BaseConductorManager): task, skip_current_step=skip_current_step) # TODO(rloo): When deprecation period is over and node is in - # states.DEPLOYWAIT only, delete the 'if' and always 'resume'. - if node.provision_state != states.DEPLOYING: + # states.DEPLOYWAIT only, delete the check and always 'resume'. + if node.provision_state == states.DEPLOYING: + LOG.warning('Node %(node)s was found in the state %(state)s ' + 'in the continue_node_deploy RPC call. This is ' + 'deprecated, the driver must be updated to leave ' + 'nodes in %(new)s state instead.', + {'node': node.uuid, 'state': states.DEPLOYING, + 'new': states.DEPLOYWAIT}) + else: task.process_event('resume') task.set_spawn_error_hook(utils.spawn_deploying_error_handler, diff --git a/ironic/drivers/modules/agent_base.py b/ironic/drivers/modules/agent_base.py index 5c4f94f046..c5f4871d42 100644 --- a/ironic/drivers/modules/agent_base.py +++ b/ironic/drivers/modules/agent_base.py @@ -846,6 +846,10 @@ class AgentDeployMixin(HeartbeatMixin): # powered off. log_and_raise_deployment_error(task, msg, collect_logs=False, exc=e) + + # TODO(dtantsur): remove these two calls when this function becomes a + # real deploy step. + task.process_event('wait') manager_utils.notify_conductor_resume_deploy(task) @METRICS.timer('AgentDeployMixin.prepare_instance_to_boot') diff --git a/ironic/drivers/modules/ansible/deploy.py b/ironic/drivers/modules/ansible/deploy.py index 4a1a478d8f..cbecdc9760 100644 --- a/ironic/drivers/modules/ansible/deploy.py +++ b/ironic/drivers/modules/ansible/deploy.py @@ -608,6 +608,9 @@ class AnsibleDeploy(agent_base.HeartbeatMixin, base.DeployInterface): self.reboot_and_finish_deploy(task) task.driver.boot.clean_up_ramdisk(task) + # TODO(dtantsur): remove these two calls when this function becomes a + # real deploy step. + task.process_event('wait') manager_utils.notify_conductor_resume_deploy(task) @METRICS.timer('AnsibleDeploy.reboot_and_finish_deploy') diff --git a/ironic/tests/unit/conductor/test_deployments.py b/ironic/tests/unit/conductor/test_deployments.py index b81b529a3c..f41797c1e6 100644 --- a/ironic/tests/unit/conductor/test_deployments.py +++ b/ironic/tests/unit/conductor/test_deployments.py @@ -341,6 +341,25 @@ class DoNodeDeployTestCase(mgr_utils.ServiceSetUpMixin, db_base.DbTestCase): self.assertEqual(fake_deploy_steps, task.node.driver_internal_info['deploy_steps']) + @mock.patch('ironic.drivers.modules.fake.FakeDeploy.deploy', autospec=True) + def test__do_node_deploy_driver_raises_error_old(self, mock_deploy): + # Mocking FakeDeploy.deploy before starting the service, causes + # it not to be a deploy_step. + self._start_service() + node = obj_utils.create_test_node(self.context, driver='fake-hardware', + provision_state=states.DEPLOYING, + target_provision_state=states.ACTIVE) + task = task_manager.TaskManager(self.context, node.uuid) + + self.assertRaises(exception.InstanceDeployFailure, + deployments.do_node_deploy, task, + self.service.conductor.id) + node.refresh() + self.assertEqual(states.DEPLOYFAIL, node.provision_state) + self.assertEqual(states.ACTIVE, node.target_provision_state) + self.assertIsNotNone(node.last_error) + self.assertFalse(mock_deploy.called) + @mgr_utils.mock_record_keepalive class DoNextDeployStepTestCase(mgr_utils.ServiceSetUpMixin, diff --git a/ironic/tests/unit/drivers/modules/test_agent.py b/ironic/tests/unit/drivers/modules/test_agent.py index 81790a28ae..3918f9e376 100644 --- a/ironic/tests/unit/drivers/modules/test_agent.py +++ b/ironic/tests/unit/drivers/modules/test_agent.py @@ -1280,7 +1280,7 @@ class TestAgentDeploy(db_base.DbTestCase): get_power_state_mock.assert_called_once_with(task) node_power_action_mock.assert_called_once_with( task, states.POWER_ON) - self.assertEqual(states.DEPLOYING, task.node.provision_state) + self.assertEqual(states.DEPLOYWAIT, task.node.provision_state) self.assertEqual(states.ACTIVE, task.node.target_provision_state) self.assertTrue(remove_symlink_mock.called) resume_mock.assert_called_once_with(task) @@ -1330,7 +1330,7 @@ class TestAgentDeploy(db_base.DbTestCase): get_power_state_mock.assert_called_once_with(task) node_power_action_mock.assert_called_once_with( task, states.POWER_ON) - self.assertEqual(states.DEPLOYING, task.node.provision_state) + self.assertEqual(states.DEPLOYWAIT, task.node.provision_state) self.assertEqual(states.ACTIVE, task.node.target_provision_state) resume_mock.assert_called_once_with(task) @@ -1391,7 +1391,7 @@ class TestAgentDeploy(db_base.DbTestCase): get_power_state_mock.assert_called_once_with(task) node_power_action_mock.assert_called_once_with( task, states.POWER_ON) - self.assertEqual(states.DEPLOYING, task.node.provision_state) + self.assertEqual(states.DEPLOYWAIT, task.node.provision_state) self.assertEqual(states.ACTIVE, task.node.target_provision_state) resume_mock.assert_called_once_with(task) @@ -1455,7 +1455,7 @@ class TestAgentDeploy(db_base.DbTestCase): get_power_state_mock.assert_called_once_with(task) node_power_action_mock.assert_called_once_with( task, states.POWER_ON) - self.assertEqual(states.DEPLOYING, task.node.provision_state) + self.assertEqual(states.DEPLOYWAIT, task.node.provision_state) self.assertEqual(states.ACTIVE, task.node.target_provision_state) @mock.patch.object(agent.LOG, 'warning', spec_set=True, autospec=True) @@ -1554,7 +1554,7 @@ class TestAgentDeploy(db_base.DbTestCase): get_power_state_mock.assert_called_once_with(task) node_power_action_mock.assert_called_once_with( task, states.POWER_ON) - self.assertEqual(states.DEPLOYING, task.node.provision_state) + self.assertEqual(states.DEPLOYWAIT, task.node.provision_state) self.assertEqual(states.ACTIVE, task.node.target_provision_state) resume_mock.assert_called_once_with(task) diff --git a/ironic/tests/unit/drivers/modules/test_agent_base.py b/ironic/tests/unit/drivers/modules/test_agent_base.py index de8c671521..5eb64e6e0a 100644 --- a/ironic/tests/unit/drivers/modules/test_agent_base.py +++ b/ironic/tests/unit/drivers/modules/test_agent_base.py @@ -789,7 +789,7 @@ class AgentDeployMixinTest(AgentDeployMixinBaseTest): self.assertEqual(2, get_power_state_mock.call_count) node_power_action_mock.assert_called_once_with( task, states.POWER_ON) - self.assertEqual(states.DEPLOYING, task.node.provision_state) + self.assertEqual(states.DEPLOYWAIT, task.node.provision_state) self.assertEqual(states.ACTIVE, task.node.target_provision_state) collect_mock.assert_called_once_with(task.node) resume_mock.assert_called_once_with(task) @@ -830,7 +830,7 @@ class AgentDeployMixinTest(AgentDeployMixinBaseTest): remove_provisioning_net_mock.assert_called_once_with(mock.ANY, task) configure_tenant_net_mock.assert_called_once_with(mock.ANY, task) - self.assertEqual(states.DEPLOYING, task.node.provision_state) + self.assertEqual(states.DEPLOYWAIT, task.node.provision_state) self.assertEqual(states.ACTIVE, task.node.target_provision_state) self.assertFalse(mock_collect.called) resume_mock.assert_called_once_with(task) @@ -862,7 +862,7 @@ class AgentDeployMixinTest(AgentDeployMixinBaseTest): remove_provisioning_net_mock.assert_called_once_with(mock.ANY, task) configure_tenant_net_mock.assert_called_once_with(mock.ANY, task) - self.assertEqual(states.DEPLOYING, task.node.provision_state) + self.assertEqual(states.DEPLOYWAIT, task.node.provision_state) self.assertEqual(states.ACTIVE, task.node.target_provision_state) self.assertFalse(mock_collect.called) @@ -900,7 +900,7 @@ class AgentDeployMixinTest(AgentDeployMixinBaseTest): remove_provisioning_net_mock.assert_called_once_with(mock.ANY, task) configure_tenant_net_mock.assert_called_once_with(mock.ANY, task) - self.assertEqual(states.DEPLOYING, task.node.provision_state) + self.assertEqual(states.DEPLOYWAIT, task.node.provision_state) self.assertEqual(states.ACTIVE, task.node.target_provision_state) self.assertFalse(mock_collect.called) @@ -1036,7 +1036,7 @@ class AgentDeployMixinTest(AgentDeployMixinBaseTest): mock.call(task, states.POWER_OFF), mock.call(task, states.POWER_ON), ]) - self.assertEqual(states.DEPLOYING, task.node.provision_state) + self.assertEqual(states.DEPLOYWAIT, task.node.provision_state) self.assertEqual(states.ACTIVE, task.node.target_provision_state) self.assertFalse(mock_collect.called) resume_mock.assert_called_once_with(task) @@ -1069,7 +1069,7 @@ class AgentDeployMixinTest(AgentDeployMixinBaseTest): mock.call(task, states.POWER_OFF), mock.call(task, states.POWER_ON), ]) - self.assertEqual(states.DEPLOYING, task.node.provision_state) + self.assertEqual(states.DEPLOYWAIT, task.node.provision_state) self.assertEqual(states.ACTIVE, task.node.target_provision_state) log_error = ('The version of the IPA ramdisk used in the ' 'deployment do not support the command "sync"') @@ -1947,7 +1947,7 @@ class AgentDeployMixinTest(AgentDeployMixinBaseTest): self.assertEqual(2, get_power_state_mock.call_count) node_power_action_mock.assert_called_once_with( task, states.POWER_ON) - self.assertEqual(states.DEPLOYING, task.node.provision_state) + self.assertEqual(states.DEPLOYWAIT, task.node.provision_state) self.assertEqual(states.ACTIVE, task.node.target_provision_state) collect_mock.assert_called_once_with(task.node) resume_mock.assert_called_once_with(task) diff --git a/releasenotes/notes/continue-node-deploy-state-63d9dc9cdcf8e37a.yaml b/releasenotes/notes/continue-node-deploy-state-63d9dc9cdcf8e37a.yaml new file mode 100644 index 0000000000..2bfacf358f --- /dev/null +++ b/releasenotes/notes/continue-node-deploy-state-63d9dc9cdcf8e37a.yaml @@ -0,0 +1,9 @@ +--- +deprecations: + - | + Some deploy interfaces use the ``continue_node_deploy`` RPC call to notify + the conductor when they're ready to leave the ``deploy`` core deploy step. + Currently ironic allows a node to be either in ``wait call-back`` or + ``deploying`` state when entering this call. This is deprecated, and in + the next release a node will have to be in the ``wait call-back`` + (``DEPLOYWAIT``) state for this call.