diff --git a/ironic/conductor/manager.py b/ironic/conductor/manager.py index f75029eaaf..5839013df8 100644 --- a/ironic/conductor/manager.py +++ b/ironic/conductor/manager.py @@ -606,12 +606,13 @@ class ConductorManager(base_manager.BaseConductorManager): node_id, purpose='node rescue') as task: node = task.node - # Record of any pre-existing agent_url should be removed. - utils.remove_agent_url(node) if node.maintenance: raise exception.NodeInMaintenance(op=_('rescuing'), node=node.uuid) + # Record of any pre-existing agent_url should be removed. + utils.wipe_token_and_url(task) + # driver validation may check rescue_password, so save it on the # node early i_info = node.instance_info @@ -758,6 +759,9 @@ class ConductorManager(base_manager.BaseConductorManager): handle_failure(e, _('Failed to unrescue. Exception: %s'), log_func=LOG.exception) + + utils.wipe_token_and_url(task) + if next_state == states.ACTIVE: task.process_event('done') else: diff --git a/ironic/conductor/utils.py b/ironic/conductor/utils.py index c64dc9f5ab..b30fdee5e1 100644 --- a/ironic/conductor/utils.py +++ b/ironic/conductor/utils.py @@ -451,16 +451,23 @@ def cleaning_error_handler(task, msg, tear_down_cleaning=True, task.process_event('fail', target_state=target_state) +def wipe_token_and_url(task): + """Remove agent URL and token from the task.""" + info = task.node.driver_internal_info + info.pop('agent_secret_token', None) + info.pop('agent_secret_token_pregenerated', None) + # Remove agent_url since it will be re-asserted + # upon the next deployment attempt. + info.pop('agent_url', None) + task.node.driver_internal_info = info + + def wipe_deploy_internal_info(task): """Remove temporary deployment fields from driver_internal_info.""" - info = task.node.driver_internal_info if not fast_track_able(task): - info.pop('agent_secret_token', None) - info.pop('agent_secret_token_pregenerated', None) - # Remove agent_url since it will be re-asserted - # upon the next deployment attempt. - info.pop('agent_url', None) + wipe_token_and_url(task) # Clear any leftover metadata about deployment. + info = task.node.driver_internal_info info['deploy_steps'] = None info.pop('agent_cached_deploy_steps', None) info.pop('deploy_step_index', None) @@ -473,11 +480,9 @@ def wipe_deploy_internal_info(task): def wipe_cleaning_internal_info(task): """Remove temporary cleaning fields from driver_internal_info.""" - info = task.node.driver_internal_info if not fast_track_able(task): - info.pop('agent_url', None) - info.pop('agent_secret_token', None) - info.pop('agent_secret_token_pregenerated', None) + wipe_token_and_url(task) + info = task.node.driver_internal_info info['clean_steps'] = None info.pop('agent_cached_clean_steps', None) info.pop('clean_step_index', None) diff --git a/ironic/tests/unit/conductor/test_manager.py b/ironic/tests/unit/conductor/test_manager.py index f1b0990bff..74f334068d 100644 --- a/ironic/tests/unit/conductor/test_manager.py +++ b/ironic/tests/unit/conductor/test_manager.py @@ -2767,11 +2767,14 @@ class DoNodeRescueTestCase(mgr_utils.CommonMixIn, mgr_utils.ServiceSetUpMixin, @mock.patch('ironic.conductor.task_manager.acquire', autospec=True) def test_do_node_rescue(self, mock_acquire): self._start_service() + dii = {'agent_secret_token': 'token', + 'agent_url': 'http://url', + 'other field': 'value'} task = self._create_task( node_attrs=dict(driver='fake-hardware', provision_state=states.ACTIVE, instance_info={}, - driver_internal_info={'agent_url': 'url'})) + driver_internal_info=dii)) mock_acquire.side_effect = self._get_acquire_side_effect(task) self.service.do_node_rescue(self.context, task.node.uuid, "password") @@ -2782,7 +2785,8 @@ class DoNodeRescueTestCase(mgr_utils.CommonMixIn, mgr_utils.ServiceSetUpMixin, err_handler=conductor_utils.spawn_rescue_error_handler) self.assertIn('rescue_password', task.node.instance_info) self.assertIn('hashed_rescue_password', task.node.instance_info) - self.assertNotIn('agent_url', task.node.driver_internal_info) + self.assertEqual({'other field': 'value'}, + task.node.driver_internal_info) def test_do_node_rescue_invalid_state(self): self._start_service() @@ -2985,16 +2989,22 @@ class DoNodeRescueTestCase(mgr_utils.CommonMixIn, mgr_utils.ServiceSetUpMixin, autospec=True) def test__do_node_unrescue(self, mock_unrescue): self._start_service() + dii = {'agent_url': 'http://url', + 'agent_secret_token': 'token', + 'other field': 'value'} node = obj_utils.create_test_node(self.context, driver='fake-hardware', provision_state=states.UNRESCUING, target_provision_state=states.ACTIVE, - instance_info={}) + instance_info={}, + driver_internal_info=dii) with task_manager.TaskManager(self.context, node.uuid) as task: mock_unrescue.return_value = states.ACTIVE self.service._do_node_unrescue(task) node.refresh() self.assertEqual(states.ACTIVE, node.provision_state) self.assertEqual(states.NOSTATE, node.target_provision_state) + self.assertEqual({'other field': 'value'}, + node.driver_internal_info) @mock.patch.object(manager, 'LOG', autospec=True) @mock.patch('ironic.drivers.modules.fake.FakeRescue.unrescue', diff --git a/releasenotes/notes/unrescue-token-ae664a17343e0610.yaml b/releasenotes/notes/unrescue-token-ae664a17343e0610.yaml new file mode 100644 index 0000000000..7ce3273e77 --- /dev/null +++ b/releasenotes/notes/unrescue-token-ae664a17343e0610.yaml @@ -0,0 +1,5 @@ +--- +fixes: + - | + Removes stale agent token on rescue and unrescue operations. Previously it + would cause subsequent rescue operations to fail.