Add ablity to power off nodes in clean failed
We have seen duplicate ip issues when leaving clean failed nodes powered on. This patch allows operators to power down nodes that enter clean failed state. Change-Id: Iecb402227485fe0ba787a262121c9d6a048b0e13
This commit is contained in:
parent
8ef9db1570
commit
510a612eed
@ -499,6 +499,11 @@ def cleaning_error_handler(task, logmsg, errmsg=None, traceback=False,
|
|||||||
# NOTE(dtantsur): avoid overwriting existing maintenance_reason
|
# NOTE(dtantsur): avoid overwriting existing maintenance_reason
|
||||||
if not node.maintenance_reason and set_maintenance:
|
if not node.maintenance_reason and set_maintenance:
|
||||||
node.maintenance_reason = errmsg
|
node.maintenance_reason = errmsg
|
||||||
|
|
||||||
|
if CONF.conductor.poweroff_in_cleanfail:
|
||||||
|
# NOTE(NobodyCam): Power off node in clean fail
|
||||||
|
node_power_action(task, states.POWER_OFF)
|
||||||
|
|
||||||
node.save()
|
node.save()
|
||||||
|
|
||||||
if set_fail_state and node.provision_state != states.CLEANFAIL:
|
if set_fail_state and node.provision_state != states.CLEANFAIL:
|
||||||
|
@ -349,6 +349,14 @@ opts = [
|
|||||||
'is a global setting applying to all requests this '
|
'is a global setting applying to all requests this '
|
||||||
'conductor receives, regardless of access rights. '
|
'conductor receives, regardless of access rights. '
|
||||||
'The concurrent clean limit cannot be disabled.')),
|
'The concurrent clean limit cannot be disabled.')),
|
||||||
|
|
||||||
|
cfg.BoolOpt('poweroff_in_cleanfail',
|
||||||
|
default=False,
|
||||||
|
help=_('If True power off nodes in the ``clean failed`` '
|
||||||
|
'state. Default False. Option may be unsafe '
|
||||||
|
'when using Cleaning to perform '
|
||||||
|
'hardware-transformative actions such as '
|
||||||
|
'firmware upgrade.')),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
@ -436,6 +436,36 @@ class DoNodeCleanTestCase(db_base.DbTestCase):
|
|||||||
self.assertFalse(node.maintenance)
|
self.assertFalse(node.maintenance)
|
||||||
self.assertIsNone(node.fault)
|
self.assertIsNone(node.fault)
|
||||||
|
|
||||||
|
@mock.patch('ironic.drivers.modules.fake.FakePower.set_power_state',
|
||||||
|
autospec=True)
|
||||||
|
@mock.patch.object(n_flat.FlatNetwork, 'validate', autospec=True)
|
||||||
|
@mock.patch.object(conductor_steps, 'set_node_cleaning_steps',
|
||||||
|
autospec=True)
|
||||||
|
def test_do_node_clean_steps_fail_poweroff(self, mock_steps, mock_validate,
|
||||||
|
mock_power, clean_steps=None,
|
||||||
|
invalid_exc=True):
|
||||||
|
if invalid_exc:
|
||||||
|
mock_steps.side_effect = exception.InvalidParameterValue('invalid')
|
||||||
|
else:
|
||||||
|
mock_steps.side_effect = exception.NodeCleaningFailure('failure')
|
||||||
|
tgt_prov_state = states.MANAGEABLE if clean_steps else states.AVAILABLE
|
||||||
|
self.config(poweroff_in_cleanfail=True, group='conductor')
|
||||||
|
node = obj_utils.create_test_node(
|
||||||
|
self.context, driver='fake-hardware',
|
||||||
|
uuid=uuidutils.generate_uuid(),
|
||||||
|
provision_state=states.CLEANING,
|
||||||
|
power_state=states.POWER_ON,
|
||||||
|
target_provision_state=tgt_prov_state)
|
||||||
|
with task_manager.acquire(
|
||||||
|
self.context, node.uuid, shared=False) as task:
|
||||||
|
cleaning.do_node_clean(task, clean_steps=clean_steps)
|
||||||
|
mock_validate.assert_called_once_with(mock.ANY, task)
|
||||||
|
node.refresh()
|
||||||
|
self.assertEqual(states.CLEANFAIL, node.provision_state)
|
||||||
|
self.assertEqual(tgt_prov_state, node.target_provision_state)
|
||||||
|
mock_steps.assert_called_once_with(mock.ANY, disable_ramdisk=False)
|
||||||
|
self.assertTrue(mock_power.called)
|
||||||
|
|
||||||
def test__do_node_clean_automated_steps_fail(self):
|
def test__do_node_clean_automated_steps_fail(self):
|
||||||
for invalid in (True, False):
|
for invalid in (True, False):
|
||||||
self.__do_node_clean_steps_fail(invalid_exc=invalid)
|
self.__do_node_clean_steps_fail(invalid_exc=invalid)
|
||||||
|
@ -0,0 +1,8 @@
|
|||||||
|
---
|
||||||
|
features:
|
||||||
|
- |
|
||||||
|
Add new conductor conf option: [conductor]poweroff_in_cleanfail
|
||||||
|
(default: False). when True nodes entering clean failed state
|
||||||
|
will be powered off. This option may be unsafe when using
|
||||||
|
Cleaning to perform hardware-transformative actions such as
|
||||||
|
firmware upgrade.
|
Loading…
Reference in New Issue
Block a user