Fix double transition to INSPECTFAIL on aborting in-band inspection

Both the driver and the conductor code try to transition the node to
INSPECTFAIL, with the 2nd attempt failing. Rework the driver code to
only do implementation-specific clean-up. Also safeguard the conductor
code against this case.

Change-Id: Ie1c64b4807ecf29fa0da54501798d363675977c8
This commit is contained in:
Dmitry Tantsur 2024-09-24 10:27:18 +02:00
parent 75927a8673
commit 8a6b5eb8c3
No known key found for this signature in database
GPG Key ID: 315B2AF9FD216C60
5 changed files with 84 additions and 16 deletions

View File

@ -104,7 +104,10 @@ def abort_inspection(task):
error=True, error=True,
user=task.context.user_id) user=task.context.user_id)
utils.wipe_token_and_url(task) utils.wipe_token_and_url(task)
if task.node.provision_state != states.INSPECTFAIL:
task.process_event('abort') task.process_event('abort')
else:
task.node.save()
LOG.info('Successfully aborted inspection of node %(node)s', LOG.info('Successfully aborted inspection of node %(node)s',
{'node': node.uuid}) {'node': node.uuid})

View File

@ -76,10 +76,10 @@ class AgentInspect(common.Common):
:param task: a task from TaskManager. :param task: a task from TaskManager.
""" """
error = _("By request, the inspection operation has been aborted") if inspect_utils.clear_lookup_addresses(task.node):
inspect_utils.clear_lookup_addresses(task.node) task.node.save()
common.inspection_error_handler(task, error, raise_exc=False,
clean_up=True) common.clean_up(task, finish=False)
def continue_inspection(self, task, inventory, plugin_data): def continue_inspection(self, task, inventory, plugin_data):
"""Continue in-band hardware inspection. """Continue in-band hardware inspection.

View File

@ -8500,14 +8500,14 @@ class NodeTraitsTestCase(mgr_utils.ServiceSetUpMixin, db_base.DbTestCase):
@mgr_utils.mock_record_keepalive @mgr_utils.mock_record_keepalive
@mock.patch('ironic.drivers.modules.fake.FakeInspect.abort', autospec=True)
@mock.patch('ironic.conductor.task_manager.acquire', autospec=True)
class DoNodeInspectAbortTestCase(mgr_utils.CommonMixIn, class DoNodeInspectAbortTestCase(mgr_utils.CommonMixIn,
mgr_utils.ServiceSetUpMixin, mgr_utils.ServiceSetUpMixin,
db_base.DbTestCase): db_base.DbTestCase):
@mock.patch.object(inspection, 'LOG', autospec=True) @mock.patch.object(inspection, 'LOG', autospec=True)
@mock.patch('ironic.drivers.modules.fake.FakeInspect.abort', autospec=True) def test_do_inspect_abort_interface_not_support(self, mock_log,
@mock.patch('ironic.conductor.task_manager.acquire', autospec=True) mock_acquire, mock_abort):
def test_do_inspect_abort_interface_not_support(self, mock_acquire,
mock_abort, mock_log):
node = obj_utils.create_test_node(self.context, node = obj_utils.create_test_node(self.context,
driver='fake-hardware', driver='fake-hardware',
provision_state=states.INSPECTWAIT) provision_state=states.INSPECTWAIT)
@ -8525,10 +8525,9 @@ class DoNodeInspectAbortTestCase(mgr_utils.CommonMixIn,
self.assertTrue(mock_log.error.called) self.assertTrue(mock_log.error.called)
@mock.patch.object(inspection, 'LOG', autospec=True) @mock.patch.object(inspection, 'LOG', autospec=True)
@mock.patch('ironic.drivers.modules.fake.FakeInspect.abort', autospec=True) def test_do_inspect_abort_interface_return_failed(self, mock_log,
@mock.patch('ironic.conductor.task_manager.acquire', autospec=True) mock_acquire,
def test_do_inspect_abort_interface_return_failed(self, mock_acquire, mock_abort):
mock_abort, mock_log):
mock_abort.side_effect = exception.IronicException('Oops') mock_abort.side_effect = exception.IronicException('Oops')
self._start_service() self._start_service()
node = obj_utils.create_test_node(self.context, node = obj_utils.create_test_node(self.context,
@ -8544,8 +8543,6 @@ class DoNodeInspectAbortTestCase(mgr_utils.CommonMixIn,
self.assertTrue(mock_log.exception.called) self.assertTrue(mock_log.exception.called)
self.assertIn('Failed to abort inspection', node.last_error) self.assertIn('Failed to abort inspection', node.last_error)
@mock.patch('ironic.drivers.modules.fake.FakeInspect.abort', autospec=True)
@mock.patch('ironic.conductor.task_manager.acquire', autospec=True)
def test_do_inspect_abort_succeeded(self, mock_acquire, mock_abort): def test_do_inspect_abort_succeeded(self, mock_acquire, mock_abort):
self._start_service() self._start_service()
node = obj_utils.create_test_node( node = obj_utils.create_test_node(
@ -8559,7 +8556,30 @@ class DoNodeInspectAbortTestCase(mgr_utils.CommonMixIn,
self.service.do_provisioning_action(self.context, task.node.uuid, self.service.do_provisioning_action(self.context, task.node.uuid,
"abort") "abort")
node.refresh() node.refresh()
self.assertEqual('inspect failed', node.provision_state) self.assertEqual(states.INSPECTFAIL, node.provision_state)
self.assertIn('Inspection was aborted', node.last_error)
self.assertNotIn('agent_url', node.driver_internal_info)
self.assertNotIn('agent_secret_token', node.driver_internal_info)
def test_do_inspect_abort_state_set_by_driver(self, mock_acquire,
mock_abort):
def abort(driver, task):
task.process_event('abort')
mock_abort.side_effect = abort
self._start_service()
node = obj_utils.create_test_node(
self.context,
driver='fake-hardware',
provision_state=states.INSPECTWAIT,
driver_internal_info={'agent_url': 'url',
'agent_secret_token': 'token'})
task = task_manager.TaskManager(self.context, node.uuid)
mock_acquire.side_effect = self._get_acquire_side_effect(task)
self.service.do_provisioning_action(self.context, task.node.uuid,
"abort")
node.refresh()
self.assertEqual(states.INSPECTFAIL, node.provision_state)
self.assertIn('Inspection was aborted', node.last_error) self.assertIn('Inspection was aborted', node.last_error)
self.assertNotIn('agent_url', node.driver_internal_info) self.assertNotIn('agent_url', node.driver_internal_info)
self.assertNotIn('agent_secret_token', node.driver_internal_info) self.assertNotIn('agent_secret_token', node.driver_internal_info)

View File

@ -136,3 +136,43 @@ class ContinueInspectionTestCase(db_base.DbTestCase):
self.assertEqual(states.INSPECTING, task.node.provision_state) self.assertEqual(states.INSPECTING, task.node.provision_state)
self.assertIsNone(result) self.assertIsNone(result)
@mock.patch.object(common, 'tear_down_managed_boot', autospec=True)
class AbortInspectionTestCase(db_base.DbTestCase):
def setUp(self):
super().setUp()
CONF.set_override('enabled_inspect_interfaces',
['agent', 'no-inspect'])
self.node = obj_utils.create_test_node(
self.context,
driver_internal_info={
'lookup_bmc_addresses': [42],
},
inspect_interface='agent',
provision_state=states.INSPECTWAIT)
self.iface = inspector.AgentInspect()
def test_success(self, mock_tear_down):
mock_tear_down.return_value = None
with task_manager.acquire(self.context, self.node.id) as task:
self.iface.abort(task)
mock_tear_down.assert_called_once_with(task)
self.node.refresh()
# Keep the state - it will be changed on the conductor level
self.assertEqual(states.INSPECTWAIT, self.node.provision_state)
self.assertNotIn('lookup_bmc_addresses',
self.node.driver_internal_info)
def test_cleanup_failed(self, mock_tear_down):
mock_tear_down.return_value = ['boom']
with task_manager.acquire(self.context, self.node.id) as task:
self.iface.abort(task)
mock_tear_down.assert_called_once_with(task)
self.node.refresh()
self.assertEqual(states.INSPECTFAIL, self.node.provision_state)
self.assertIn('boom', self.node.last_error)
self.assertNotIn('lookup_bmc_addresses',
self.node.driver_internal_info)

View File

@ -0,0 +1,5 @@
---
fixes:
- |
Fixes aborting in-band inspection. Previously, it would fail with
``Can not transition from state 'inspect failed' on event 'abort'``.