Fix idrac-wsman RAID step async error handling
Instead of using process_event('fail') use error_handlers, otherwise in case of failure node gets stuck and fails because of timeout, instead of failing earlier due to step failure. Story: 2008307 Task: 41194 Change-Id: Ieec0173f57367587985d2baad77205bb83e8b69a
This commit is contained in:
parent
101fc29686
commit
23951f4b44
@ -1491,13 +1491,18 @@ class DracWSManRAID(base.RAIDInterface):
|
||||
node.save()
|
||||
|
||||
def _set_failed(self, task, config_job):
|
||||
LOG.error("RAID configuration job failed for node %(node)s. "
|
||||
"Failed config job: %(config_job_id)s. "
|
||||
"Message: '%(message)s'.",
|
||||
{'node': task.node.uuid, 'config_job_id': config_job.id,
|
||||
'message': config_job.message})
|
||||
task.node.last_error = config_job.message
|
||||
task.process_event('fail')
|
||||
error_msg = (_("Failed config job: %(config_job_id)s. "
|
||||
"Message: '%(message)s'.") %
|
||||
{'config_job_id': config_job.id,
|
||||
'message': config_job.message})
|
||||
log_msg = (_("RAID configuration job failed for node %(node)s. "
|
||||
"%(error)s") %
|
||||
{'node': task.node.uuid, 'error': error_msg})
|
||||
if task.node.clean_step:
|
||||
LOG.error(log_msg)
|
||||
manager_utils.cleaning_error_handler(task, error_msg)
|
||||
else:
|
||||
manager_utils.deploying_error_handler(task, log_msg, error_msg)
|
||||
|
||||
def _resume(self, task):
|
||||
raid_common.update_raid_info(
|
||||
|
@ -203,9 +203,11 @@ class DracPeriodicTaskTestCase(db_base.DbTestCase):
|
||||
self._test__check_node_raid_jobs_with_completed_job(
|
||||
mock_notify_conductor_resume)
|
||||
|
||||
@mock.patch.object(manager_utils, 'cleaning_error_handler', autospec=True)
|
||||
@mock.patch.object(drac_common, 'get_drac_client', spec_set=True,
|
||||
autospec=True)
|
||||
def test__check_node_raid_jobs_with_failed_job(self, mock_get_drac_client):
|
||||
def test__check_node_raid_jobs_with_failed_job(
|
||||
self, mock_get_drac_client, mock_cleaning_error_handler):
|
||||
# mock node.driver_internal_info and node.clean_step
|
||||
driver_internal_info = {'raid_config_job_ids': ['42']}
|
||||
self.node.driver_internal_info = driver_internal_info
|
||||
@ -232,15 +234,18 @@ class DracPeriodicTaskTestCase(db_base.DbTestCase):
|
||||
self.assertEqual([],
|
||||
self.node.driver_internal_info['raid_config_job_ids'])
|
||||
self.assertEqual({}, self.node.raid_config)
|
||||
task.process_event.assert_called_once_with('fail')
|
||||
mock_cleaning_error_handler.assert_called_once_with(task, mock.ANY)
|
||||
|
||||
@mock.patch.object(manager_utils, 'deploying_error_handler', autospec=True)
|
||||
@mock.patch.object(manager_utils, 'cleaning_error_handler', autospec=True)
|
||||
@mock.patch.object(drac_common, 'get_drac_client', spec_set=True,
|
||||
autospec=True)
|
||||
@mock.patch.object(drac_raid.DracRAID, 'get_logical_disks',
|
||||
spec_set=True, autospec=True)
|
||||
def _test__check_node_raid_jobs_with_completed_job_already_failed(
|
||||
self, mock_notify_conductor_resume,
|
||||
mock_get_logical_disks, mock_get_drac_client):
|
||||
mock_get_logical_disks, mock_get_drac_client,
|
||||
mock_cleaning_error_handler, mock_deploying_error_handler):
|
||||
expected_logical_disk = {'size_gb': 558,
|
||||
'raid_level': '1',
|
||||
'name': 'disk 0'}
|
||||
@ -271,7 +276,12 @@ class DracPeriodicTaskTestCase(db_base.DbTestCase):
|
||||
self.assertNotIn('raid_config_job_failure',
|
||||
self.node.driver_internal_info)
|
||||
self.assertNotIn('logical_disks', self.node.raid_config)
|
||||
task.process_event.assert_called_once_with('fail')
|
||||
if self.node.clean_step:
|
||||
mock_cleaning_error_handler.assert_called_once_with(task, mock.ANY)
|
||||
else:
|
||||
mock_deploying_error_handler.assert_called_once_with(task,
|
||||
mock.ANY,
|
||||
mock.ANY)
|
||||
self.assertFalse(mock_notify_conductor_resume.called)
|
||||
|
||||
@mock.patch.object(manager_utils, 'notify_conductor_resume_clean',
|
||||
@ -346,13 +356,16 @@ class DracPeriodicTaskTestCase(db_base.DbTestCase):
|
||||
self._test__check_node_raid_jobs_with_multiple_jobs_completed(
|
||||
mock_notify_conductor_resume)
|
||||
|
||||
@mock.patch.object(manager_utils, 'deploying_error_handler', autospec=True)
|
||||
@mock.patch.object(manager_utils, 'cleaning_error_handler', autospec=True)
|
||||
@mock.patch.object(drac_common, 'get_drac_client', spec_set=True,
|
||||
autospec=True)
|
||||
@mock.patch.object(drac_raid.DracRAID, 'get_logical_disks',
|
||||
spec_set=True, autospec=True)
|
||||
def _test__check_node_raid_jobs_with_multiple_jobs_failed(
|
||||
self, mock_notify_conductor_resume,
|
||||
mock_get_logical_disks, mock_get_drac_client):
|
||||
mock_get_logical_disks, mock_get_drac_client,
|
||||
mock_cleaning_error_handler, mock_deploying_error_handler):
|
||||
expected_logical_disk = {'size_gb': 558,
|
||||
'raid_level': '1',
|
||||
'name': 'disk 0'}
|
||||
@ -387,7 +400,12 @@ class DracPeriodicTaskTestCase(db_base.DbTestCase):
|
||||
self.assertNotIn('raid_config_job_failure',
|
||||
self.node.driver_internal_info)
|
||||
self.assertNotIn('logical_disks', self.node.raid_config)
|
||||
task.process_event.assert_called_once_with('fail')
|
||||
if self.node.clean_step:
|
||||
mock_cleaning_error_handler.assert_called_once_with(task, mock.ANY)
|
||||
else:
|
||||
mock_deploying_error_handler.assert_called_once_with(task,
|
||||
mock.ANY,
|
||||
mock.ANY)
|
||||
self.assertFalse(mock_notify_conductor_resume.called)
|
||||
|
||||
@mock.patch.object(manager_utils, 'notify_conductor_resume_clean',
|
||||
|
@ -0,0 +1,8 @@
|
||||
---
|
||||
fixes:
|
||||
- |
|
||||
Fixes ``idrac-wsman`` RAID ``create_configuration`` clean step,
|
||||
``apply_configuration`` deploy step and ``delete_configuration`` clean and
|
||||
deploy step to fail correctly in case of error when checking completed
|
||||
jobs. Before the fix when RAID job failed, then node cleaning or deploying
|
||||
failed with timeout instead of actual error in clean or deploy step.
|
Loading…
Reference in New Issue
Block a user