diff --git a/ironic/drivers/modules/agent_base_vendor.py b/ironic/drivers/modules/agent_base_vendor.py index 33acd031f0..13cc94b8b4 100644 --- a/ironic/drivers/modules/agent_base_vendor.py +++ b/ironic/drivers/modules/agent_base_vendor.py @@ -206,8 +206,8 @@ def log_and_raise_deployment_error(task, msg): raise exception.InstanceDeployFailure(msg) -class AgentDeployMixin(object): - """Mixin with deploy methods.""" +class HeartbeatMixin(object): + """Mixin class implementing heartbeat processing.""" def __init__(self): self._client = _get_client() @@ -240,8 +240,104 @@ class AgentDeployMixin(object): """ - @METRICS.timer('AgentDeployMixin._refresh_clean_steps') - def _refresh_clean_steps(self, task): + def refresh_clean_steps(self, task): + """Refresh the node's cached clean steps + + :param task: a TaskManager instance + + """ + + def continue_cleaning(self, task): + """Start the next cleaning step if the previous one is complete. + + :param task: a TaskManager instance + + """ + + @METRICS.timer('HeartbeatMixin.heartbeat') + def heartbeat(self, task, callback_url): + """Process a heartbeat. + + :param task: task to work with. + :param callback_url: agent HTTP API URL. + """ + # TODO(dtantsur): upgrade lock only if we actually take action other + # than updating the last timestamp. + task.upgrade_lock() + + node = task.node + LOG.debug('Heartbeat from node %s', node.uuid) + + driver_internal_info = node.driver_internal_info + driver_internal_info['agent_url'] = callback_url + + # TODO(rloo): 'agent_last_heartbeat' was deprecated since it wasn't + # being used so remove that entry if it exists. + # Hopefully all nodes will have been updated by Pike, so + # we can delete this code then. + driver_internal_info.pop('agent_last_heartbeat', None) + + node.driver_internal_info = driver_internal_info + node.save() + + # Async call backs don't set error state on their own + # TODO(jimrollenhagen) improve error messages here + msg = _('Failed checking if deploy is done.') + try: + if node.maintenance: + # this shouldn't happen often, but skip the rest if it does. + LOG.debug('Heartbeat from node %(node)s in maintenance mode; ' + 'not taking any action.', {'node': node.uuid}) + return + elif (node.provision_state == states.DEPLOYWAIT and + not self.deploy_has_started(task)): + msg = _('Node failed to deploy.') + self.continue_deploy(task) + elif (node.provision_state == states.DEPLOYWAIT and + self.deploy_is_done(task)): + msg = _('Node failed to move to active state.') + self.reboot_to_instance(task) + elif (node.provision_state == states.DEPLOYWAIT and + self.deploy_has_started(task)): + node.touch_provisioning() + elif node.provision_state == states.CLEANWAIT: + node.touch_provisioning() + try: + if not node.clean_step: + LOG.debug('Node %s just booted to start cleaning.', + node.uuid) + msg = _('Node failed to start the first cleaning ' + 'step.') + # First, cache the clean steps + self.refresh_clean_steps(task) + # Then set/verify node clean steps and start cleaning + manager_utils.set_node_cleaning_steps(task) + _notify_conductor_resume_clean(task) + else: + msg = _('Node failed to check cleaning progress.') + self.continue_cleaning(task) + except exception.NoFreeConductorWorker: + # waiting for the next heartbeat, node.last_error and + # logging message is filled already via conductor's hook + pass + + except Exception as e: + err_info = {'node': node.uuid, 'msg': msg, 'e': e} + last_error = _('Asynchronous exception for node %(node)s: ' + '%(msg)s Exception: %(e)s') % err_info + LOG.exception(last_error) + if node.provision_state in (states.CLEANING, states.CLEANWAIT): + manager_utils.cleaning_error_handler(task, last_error) + elif node.provision_state in (states.DEPLOYING, states.DEPLOYWAIT): + deploy_utils.set_failed_state( + task, last_error, collect_logs=bool(self._client)) + + +class AgentDeployMixin(HeartbeatMixin): + """Mixin with deploy methods.""" + + @METRICS.timer('AgentDeployMixin.refresh_clean_steps') + def refresh_clean_steps(self, task): """Refresh the node's cached clean steps from the booted agent. Gets the node's clean steps from the booted agent and caches them. @@ -353,7 +449,7 @@ class AgentDeployMixin(object): elif command.get('command_status') == 'CLEAN_VERSION_MISMATCH': # Cache the new clean steps (and 'hardware_manager_version') try: - self._refresh_clean_steps(task) + self.refresh_clean_steps(task) except exception.NodeCleaningFailure as e: msg = (_('Could not continue cleaning on node ' '%(node)s: %(err)s.') % @@ -431,83 +527,6 @@ class AgentDeployMixin(object): LOG.error(msg) return manager_utils.cleaning_error_handler(task, msg) - @METRICS.timer('AgentDeployMixin.heartbeat') - def heartbeat(self, task, callback_url): - """Process a heartbeat. - - :param task: task to work with. - :param callback_url: agent HTTP API URL. - """ - # TODO(dtantsur): upgrade lock only if we actually take action other - # than updating the last timestamp. - task.upgrade_lock() - - node = task.node - LOG.debug('Heartbeat from node %s', node.uuid) - - driver_internal_info = node.driver_internal_info - driver_internal_info['agent_url'] = callback_url - - # TODO(rloo): 'agent_last_heartbeat' was deprecated since it wasn't - # being used so remove that entry if it exists. - # Hopefully all nodes will have been updated by Pike, so - # we can delete this code then. - driver_internal_info.pop('agent_last_heartbeat', None) - - node.driver_internal_info = driver_internal_info - node.save() - - # Async call backs don't set error state on their own - # TODO(jimrollenhagen) improve error messages here - msg = _('Failed checking if deploy is done.') - try: - if node.maintenance: - # this shouldn't happen often, but skip the rest if it does. - LOG.debug('Heartbeat from node %(node)s in maintenance mode; ' - 'not taking any action.', {'node': node.uuid}) - return - elif (node.provision_state == states.DEPLOYWAIT and - not self.deploy_has_started(task)): - msg = _('Node failed to deploy.') - self.continue_deploy(task) - elif (node.provision_state == states.DEPLOYWAIT and - self.deploy_is_done(task)): - msg = _('Node failed to move to active state.') - self.reboot_to_instance(task) - elif (node.provision_state == states.DEPLOYWAIT and - self.deploy_has_started(task)): - node.touch_provisioning() - elif node.provision_state == states.CLEANWAIT: - node.touch_provisioning() - try: - if not node.clean_step: - LOG.debug('Node %s just booted to start cleaning.', - node.uuid) - msg = _('Node failed to start the first cleaning ' - 'step.') - # First, cache the clean steps - self._refresh_clean_steps(task) - # Then set/verify node clean steps and start cleaning - manager_utils.set_node_cleaning_steps(task) - _notify_conductor_resume_clean(task) - else: - msg = _('Node failed to check cleaning progress.') - self.continue_cleaning(task) - except exception.NoFreeConductorWorker: - # waiting for the next heartbeat, node.last_error and - # logging message is filled already via conductor's hook - pass - - except Exception as e: - err_info = {'node': node.uuid, 'msg': msg, 'e': e} - last_error = _('Asynchronous exception for node %(node)s: ' - '%(msg)s Exception: %(e)s') % err_info - LOG.exception(last_error) - if node.provision_state in (states.CLEANING, states.CLEANWAIT): - manager_utils.cleaning_error_handler(task, last_error) - elif node.provision_state in (states.DEPLOYING, states.DEPLOYWAIT): - deploy_utils.set_failed_state(task, last_error) - @METRICS.timer('AgentDeployMixin.reboot_and_finish_deploy') def reboot_and_finish_deploy(self, task): """Helper method to trigger reboot on the node and finish deploy. diff --git a/ironic/tests/unit/drivers/modules/test_agent_base_vendor.py b/ironic/tests/unit/drivers/modules/test_agent_base_vendor.py index 46b8e325d2..183814abd9 100644 --- a/ironic/tests/unit/drivers/modules/test_agent_base_vendor.py +++ b/ironic/tests/unit/drivers/modules/test_agent_base_vendor.py @@ -58,12 +58,16 @@ class AgentDeployMixinBaseTest(db_base.DbTestCase): self.node = object_utils.create_test_node(self.context, **n) -class TestHeartbeat(AgentDeployMixinBaseTest): +class HeartbeatMixinTest(AgentDeployMixinBaseTest): - @mock.patch.object(agent_base_vendor.AgentDeployMixin, + def setUp(self): + super(HeartbeatMixinTest, self).setUp() + self.deploy = agent_base_vendor.HeartbeatMixin() + + @mock.patch.object(agent_base_vendor.HeartbeatMixin, 'deploy_has_started', autospec=True) @mock.patch.object(deploy_utils, 'set_failed_state', autospec=True) - @mock.patch.object(agent_base_vendor.AgentDeployMixin, 'deploy_is_done', + @mock.patch.object(agent_base_vendor.HeartbeatMixin, 'deploy_is_done', autospec=True) @mock.patch.object(agent_base_vendor.LOG, 'exception', autospec=True) def test_heartbeat_deploy_done_fails(self, log_mock, done_mock, @@ -75,16 +79,17 @@ class TestHeartbeat(AgentDeployMixinBaseTest): task.node.provision_state = states.DEPLOYWAIT task.node.target_provision_state = states.ACTIVE self.deploy.heartbeat(task, 'http://127.0.0.1:8080') - failed_mock.assert_called_once_with(task, mock.ANY) + failed_mock.assert_called_once_with( + task, mock.ANY, collect_logs=True) log_mock.assert_called_once_with( 'Asynchronous exception for node ' '1be26c0b-03f2-4d2e-ae87-c02d7f33c123: Failed checking if deploy ' 'is done. Exception: LlamaException') - @mock.patch.object(agent_base_vendor.AgentDeployMixin, + @mock.patch.object(agent_base_vendor.HeartbeatMixin, 'deploy_has_started', autospec=True) @mock.patch.object(deploy_utils, 'set_failed_state', autospec=True) - @mock.patch.object(agent_base_vendor.AgentDeployMixin, 'deploy_is_done', + @mock.patch.object(agent_base_vendor.HeartbeatMixin, 'deploy_is_done', autospec=True) @mock.patch.object(agent_base_vendor.LOG, 'exception', autospec=True) def test_heartbeat_deploy_done_raises_with_event(self, log_mock, done_mock, @@ -114,8 +119,8 @@ class TestHeartbeat(AgentDeployMixinBaseTest): 'is done. Exception: LlamaException') @mock.patch.object(objects.node.Node, 'touch_provisioning', autospec=True) - @mock.patch.object(agent_base_vendor.AgentDeployMixin, - '_refresh_clean_steps', autospec=True) + @mock.patch.object(agent_base_vendor.HeartbeatMixin, + 'refresh_clean_steps', autospec=True) @mock.patch.object(manager_utils, 'set_node_cleaning_steps', autospec=True) @mock.patch.object(agent_base_vendor, '_notify_conductor_resume_clean', autospec=True) @@ -135,8 +140,8 @@ class TestHeartbeat(AgentDeployMixinBaseTest): @mock.patch.object(manager_utils, 'cleaning_error_handler') @mock.patch.object(objects.node.Node, 'touch_provisioning', autospec=True) - @mock.patch.object(agent_base_vendor.AgentDeployMixin, - '_refresh_clean_steps', autospec=True) + @mock.patch.object(agent_base_vendor.HeartbeatMixin, + 'refresh_clean_steps', autospec=True) @mock.patch.object(manager_utils, 'set_node_cleaning_steps', autospec=True) @mock.patch.object(agent_base_vendor, '_notify_conductor_resume_clean', autospec=True) @@ -169,7 +174,7 @@ class TestHeartbeat(AgentDeployMixinBaseTest): failed_mock.side_effect = None @mock.patch.object(objects.node.Node, 'touch_provisioning', autospec=True) - @mock.patch.object(agent_base_vendor.AgentDeployMixin, + @mock.patch.object(agent_base_vendor.HeartbeatMixin, 'continue_cleaning', autospec=True) def test_heartbeat_continue_cleaning(self, mock_continue, mock_touch): self.node.clean_step = { @@ -188,7 +193,7 @@ class TestHeartbeat(AgentDeployMixinBaseTest): mock_continue.assert_called_once_with(mock.ANY, task) @mock.patch.object(manager_utils, 'cleaning_error_handler') - @mock.patch.object(agent_base_vendor.AgentDeployMixin, + @mock.patch.object(agent_base_vendor.HeartbeatMixin, 'continue_cleaning', autospec=True) def test_heartbeat_continue_cleaning_fails(self, mock_continue, mock_handler): @@ -211,7 +216,7 @@ class TestHeartbeat(AgentDeployMixinBaseTest): mock_handler.assert_called_once_with(task, mock.ANY) @mock.patch.object(manager_utils, 'cleaning_error_handler') - @mock.patch.object(agent_base_vendor.AgentDeployMixin, + @mock.patch.object(agent_base_vendor.HeartbeatMixin, 'continue_cleaning', autospec=True) def test_heartbeat_continue_cleaning_no_worker(self, mock_continue, mock_handler): @@ -233,9 +238,9 @@ class TestHeartbeat(AgentDeployMixinBaseTest): mock_continue.assert_called_once_with(mock.ANY, task) self.assertFalse(mock_handler.called) - @mock.patch.object(agent_base_vendor.AgentDeployMixin, 'continue_deploy', + @mock.patch.object(agent_base_vendor.HeartbeatMixin, 'continue_deploy', autospec=True) - @mock.patch.object(agent_base_vendor.AgentDeployMixin, + @mock.patch.object(agent_base_vendor.HeartbeatMixin, 'reboot_to_instance', autospec=True) @mock.patch.object(agent_base_vendor, '_notify_conductor_resume_clean', autospec=True) @@ -258,9 +263,9 @@ class TestHeartbeat(AgentDeployMixinBaseTest): node.refresh() self.assertNotIn('agent_last_heartbeat', node.driver_internal_info) - @mock.patch.object(agent_base_vendor.AgentDeployMixin, 'continue_deploy', + @mock.patch.object(agent_base_vendor.HeartbeatMixin, 'continue_deploy', autospec=True) - @mock.patch.object(agent_base_vendor.AgentDeployMixin, + @mock.patch.object(agent_base_vendor.HeartbeatMixin, 'reboot_to_instance', autospec=True) @mock.patch.object(agent_base_vendor, '_notify_conductor_resume_clean', autospec=True) @@ -281,7 +286,7 @@ class TestHeartbeat(AgentDeployMixinBaseTest): self.assertEqual(0, cd_mock.call_count) @mock.patch.object(objects.node.Node, 'touch_provisioning', autospec=True) - @mock.patch.object(agent_base_vendor.AgentDeployMixin, + @mock.patch.object(agent_base_vendor.HeartbeatMixin, 'deploy_has_started', autospec=True) def test_heartbeat_touch_provisioning(self, mock_deploy_started, mock_touch): @@ -295,6 +300,9 @@ class TestHeartbeat(AgentDeployMixinBaseTest): mock_touch.assert_called_once_with(mock.ANY) + +class AgentDeployMixinTest(AgentDeployMixinBaseTest): + @mock.patch.object(driver_utils, 'collect_ramdisk_logs', autospec=True) @mock.patch.object(time, 'sleep', lambda seconds: None) @mock.patch.object(manager_utils, 'node_power_action', autospec=True) @@ -966,7 +974,7 @@ class TestHeartbeat(AgentDeployMixinBaseTest): @mock.patch.object(agent_base_vendor, '_notify_conductor_resume_clean', autospec=True) @mock.patch.object(agent_base_vendor.AgentDeployMixin, - '_refresh_clean_steps', autospec=True) + 'refresh_clean_steps', autospec=True) @mock.patch.object(agent_client.AgentClient, 'get_commands_status', autospec=True) def _test_continue_cleaning_clean_version_mismatch( @@ -1005,7 +1013,7 @@ class TestHeartbeat(AgentDeployMixinBaseTest): @mock.patch.object(agent_base_vendor, '_notify_conductor_resume_clean', autospec=True) @mock.patch.object(agent_base_vendor.AgentDeployMixin, - '_refresh_clean_steps', autospec=True) + 'refresh_clean_steps', autospec=True) @mock.patch.object(agent_client.AgentClient, 'get_commands_status', autospec=True) def test_continue_cleaning_clean_version_mismatch_fail( @@ -1155,13 +1163,13 @@ class TestRefreshCleanSteps(AgentDeployMixinBaseTest): @mock.patch.object(agent_client.AgentClient, 'get_clean_steps', autospec=True) - def test__refresh_clean_steps(self, client_mock): + def test_refresh_clean_steps(self, client_mock): client_mock.return_value = { 'command_result': self.clean_steps} with task_manager.acquire( self.context, self.node.uuid, shared=False) as task: - self.deploy._refresh_clean_steps(task) + self.deploy.refresh_clean_steps(task) client_mock.assert_called_once_with(mock.ANY, task.node, task.ports) @@ -1182,7 +1190,7 @@ class TestRefreshCleanSteps(AgentDeployMixinBaseTest): @mock.patch.object(agent_client.AgentClient, 'get_clean_steps', autospec=True) - def test__refresh_clean_steps_missing_steps(self, client_mock): + def test_refresh_clean_steps_missing_steps(self, client_mock): del self.clean_steps['clean_steps'] client_mock.return_value = { 'command_result': self.clean_steps} @@ -1191,14 +1199,14 @@ class TestRefreshCleanSteps(AgentDeployMixinBaseTest): self.context, self.node.uuid, shared=False) as task: self.assertRaisesRegex(exception.NodeCleaningFailure, 'invalid result', - self.deploy._refresh_clean_steps, + self.deploy.refresh_clean_steps, task) client_mock.assert_called_once_with(mock.ANY, task.node, task.ports) @mock.patch.object(agent_client.AgentClient, 'get_clean_steps', autospec=True) - def test__refresh_clean_steps_missing_interface(self, client_mock): + def test_refresh_clean_steps_missing_interface(self, client_mock): step = self.clean_steps['clean_steps']['SpecificHardwareManager'][1] del step['interface'] client_mock.return_value = { @@ -1208,7 +1216,7 @@ class TestRefreshCleanSteps(AgentDeployMixinBaseTest): self.context, self.node.uuid, shared=False) as task: self.assertRaisesRegex(exception.NodeCleaningFailure, 'invalid clean step', - self.deploy._refresh_clean_steps, + self.deploy.refresh_clean_steps, task) client_mock.assert_called_once_with(mock.ANY, task.node, task.ports)