Fix idrac-wsman RAID step async error handling
Instead of using process_event('fail') use error_handlers, otherwise in case of failure node gets stuck and fails because of timeout, instead of failing earlier due to step failure. Story: 2008307 Task: 41194 Change-Id: Ieec0173f57367587985d2baad77205bb83e8b69a
This commit is contained in:
parent
101fc29686
commit
23951f4b44
@ -1491,13 +1491,18 @@ class DracWSManRAID(base.RAIDInterface):
|
|||||||
node.save()
|
node.save()
|
||||||
|
|
||||||
def _set_failed(self, task, config_job):
|
def _set_failed(self, task, config_job):
|
||||||
LOG.error("RAID configuration job failed for node %(node)s. "
|
error_msg = (_("Failed config job: %(config_job_id)s. "
|
||||||
"Failed config job: %(config_job_id)s. "
|
"Message: '%(message)s'.") %
|
||||||
"Message: '%(message)s'.",
|
{'config_job_id': config_job.id,
|
||||||
{'node': task.node.uuid, 'config_job_id': config_job.id,
|
|
||||||
'message': config_job.message})
|
'message': config_job.message})
|
||||||
task.node.last_error = config_job.message
|
log_msg = (_("RAID configuration job failed for node %(node)s. "
|
||||||
task.process_event('fail')
|
"%(error)s") %
|
||||||
|
{'node': task.node.uuid, 'error': error_msg})
|
||||||
|
if task.node.clean_step:
|
||||||
|
LOG.error(log_msg)
|
||||||
|
manager_utils.cleaning_error_handler(task, error_msg)
|
||||||
|
else:
|
||||||
|
manager_utils.deploying_error_handler(task, log_msg, error_msg)
|
||||||
|
|
||||||
def _resume(self, task):
|
def _resume(self, task):
|
||||||
raid_common.update_raid_info(
|
raid_common.update_raid_info(
|
||||||
|
@ -203,9 +203,11 @@ class DracPeriodicTaskTestCase(db_base.DbTestCase):
|
|||||||
self._test__check_node_raid_jobs_with_completed_job(
|
self._test__check_node_raid_jobs_with_completed_job(
|
||||||
mock_notify_conductor_resume)
|
mock_notify_conductor_resume)
|
||||||
|
|
||||||
|
@mock.patch.object(manager_utils, 'cleaning_error_handler', autospec=True)
|
||||||
@mock.patch.object(drac_common, 'get_drac_client', spec_set=True,
|
@mock.patch.object(drac_common, 'get_drac_client', spec_set=True,
|
||||||
autospec=True)
|
autospec=True)
|
||||||
def test__check_node_raid_jobs_with_failed_job(self, mock_get_drac_client):
|
def test__check_node_raid_jobs_with_failed_job(
|
||||||
|
self, mock_get_drac_client, mock_cleaning_error_handler):
|
||||||
# mock node.driver_internal_info and node.clean_step
|
# mock node.driver_internal_info and node.clean_step
|
||||||
driver_internal_info = {'raid_config_job_ids': ['42']}
|
driver_internal_info = {'raid_config_job_ids': ['42']}
|
||||||
self.node.driver_internal_info = driver_internal_info
|
self.node.driver_internal_info = driver_internal_info
|
||||||
@ -232,15 +234,18 @@ class DracPeriodicTaskTestCase(db_base.DbTestCase):
|
|||||||
self.assertEqual([],
|
self.assertEqual([],
|
||||||
self.node.driver_internal_info['raid_config_job_ids'])
|
self.node.driver_internal_info['raid_config_job_ids'])
|
||||||
self.assertEqual({}, self.node.raid_config)
|
self.assertEqual({}, self.node.raid_config)
|
||||||
task.process_event.assert_called_once_with('fail')
|
mock_cleaning_error_handler.assert_called_once_with(task, mock.ANY)
|
||||||
|
|
||||||
|
@mock.patch.object(manager_utils, 'deploying_error_handler', autospec=True)
|
||||||
|
@mock.patch.object(manager_utils, 'cleaning_error_handler', autospec=True)
|
||||||
@mock.patch.object(drac_common, 'get_drac_client', spec_set=True,
|
@mock.patch.object(drac_common, 'get_drac_client', spec_set=True,
|
||||||
autospec=True)
|
autospec=True)
|
||||||
@mock.patch.object(drac_raid.DracRAID, 'get_logical_disks',
|
@mock.patch.object(drac_raid.DracRAID, 'get_logical_disks',
|
||||||
spec_set=True, autospec=True)
|
spec_set=True, autospec=True)
|
||||||
def _test__check_node_raid_jobs_with_completed_job_already_failed(
|
def _test__check_node_raid_jobs_with_completed_job_already_failed(
|
||||||
self, mock_notify_conductor_resume,
|
self, mock_notify_conductor_resume,
|
||||||
mock_get_logical_disks, mock_get_drac_client):
|
mock_get_logical_disks, mock_get_drac_client,
|
||||||
|
mock_cleaning_error_handler, mock_deploying_error_handler):
|
||||||
expected_logical_disk = {'size_gb': 558,
|
expected_logical_disk = {'size_gb': 558,
|
||||||
'raid_level': '1',
|
'raid_level': '1',
|
||||||
'name': 'disk 0'}
|
'name': 'disk 0'}
|
||||||
@ -271,7 +276,12 @@ class DracPeriodicTaskTestCase(db_base.DbTestCase):
|
|||||||
self.assertNotIn('raid_config_job_failure',
|
self.assertNotIn('raid_config_job_failure',
|
||||||
self.node.driver_internal_info)
|
self.node.driver_internal_info)
|
||||||
self.assertNotIn('logical_disks', self.node.raid_config)
|
self.assertNotIn('logical_disks', self.node.raid_config)
|
||||||
task.process_event.assert_called_once_with('fail')
|
if self.node.clean_step:
|
||||||
|
mock_cleaning_error_handler.assert_called_once_with(task, mock.ANY)
|
||||||
|
else:
|
||||||
|
mock_deploying_error_handler.assert_called_once_with(task,
|
||||||
|
mock.ANY,
|
||||||
|
mock.ANY)
|
||||||
self.assertFalse(mock_notify_conductor_resume.called)
|
self.assertFalse(mock_notify_conductor_resume.called)
|
||||||
|
|
||||||
@mock.patch.object(manager_utils, 'notify_conductor_resume_clean',
|
@mock.patch.object(manager_utils, 'notify_conductor_resume_clean',
|
||||||
@ -346,13 +356,16 @@ class DracPeriodicTaskTestCase(db_base.DbTestCase):
|
|||||||
self._test__check_node_raid_jobs_with_multiple_jobs_completed(
|
self._test__check_node_raid_jobs_with_multiple_jobs_completed(
|
||||||
mock_notify_conductor_resume)
|
mock_notify_conductor_resume)
|
||||||
|
|
||||||
|
@mock.patch.object(manager_utils, 'deploying_error_handler', autospec=True)
|
||||||
|
@mock.patch.object(manager_utils, 'cleaning_error_handler', autospec=True)
|
||||||
@mock.patch.object(drac_common, 'get_drac_client', spec_set=True,
|
@mock.patch.object(drac_common, 'get_drac_client', spec_set=True,
|
||||||
autospec=True)
|
autospec=True)
|
||||||
@mock.patch.object(drac_raid.DracRAID, 'get_logical_disks',
|
@mock.patch.object(drac_raid.DracRAID, 'get_logical_disks',
|
||||||
spec_set=True, autospec=True)
|
spec_set=True, autospec=True)
|
||||||
def _test__check_node_raid_jobs_with_multiple_jobs_failed(
|
def _test__check_node_raid_jobs_with_multiple_jobs_failed(
|
||||||
self, mock_notify_conductor_resume,
|
self, mock_notify_conductor_resume,
|
||||||
mock_get_logical_disks, mock_get_drac_client):
|
mock_get_logical_disks, mock_get_drac_client,
|
||||||
|
mock_cleaning_error_handler, mock_deploying_error_handler):
|
||||||
expected_logical_disk = {'size_gb': 558,
|
expected_logical_disk = {'size_gb': 558,
|
||||||
'raid_level': '1',
|
'raid_level': '1',
|
||||||
'name': 'disk 0'}
|
'name': 'disk 0'}
|
||||||
@ -387,7 +400,12 @@ class DracPeriodicTaskTestCase(db_base.DbTestCase):
|
|||||||
self.assertNotIn('raid_config_job_failure',
|
self.assertNotIn('raid_config_job_failure',
|
||||||
self.node.driver_internal_info)
|
self.node.driver_internal_info)
|
||||||
self.assertNotIn('logical_disks', self.node.raid_config)
|
self.assertNotIn('logical_disks', self.node.raid_config)
|
||||||
task.process_event.assert_called_once_with('fail')
|
if self.node.clean_step:
|
||||||
|
mock_cleaning_error_handler.assert_called_once_with(task, mock.ANY)
|
||||||
|
else:
|
||||||
|
mock_deploying_error_handler.assert_called_once_with(task,
|
||||||
|
mock.ANY,
|
||||||
|
mock.ANY)
|
||||||
self.assertFalse(mock_notify_conductor_resume.called)
|
self.assertFalse(mock_notify_conductor_resume.called)
|
||||||
|
|
||||||
@mock.patch.object(manager_utils, 'notify_conductor_resume_clean',
|
@mock.patch.object(manager_utils, 'notify_conductor_resume_clean',
|
||||||
|
@ -0,0 +1,8 @@
|
|||||||
|
---
|
||||||
|
fixes:
|
||||||
|
- |
|
||||||
|
Fixes ``idrac-wsman`` RAID ``create_configuration`` clean step,
|
||||||
|
``apply_configuration`` deploy step and ``delete_configuration`` clean and
|
||||||
|
deploy step to fail correctly in case of error when checking completed
|
||||||
|
jobs. Before the fix when RAID job failed, then node cleaning or deploying
|
||||||
|
failed with timeout instead of actual error in clean or deploy step.
|
Loading…
x
Reference in New Issue
Block a user