Fix idrac-wsman set_power_state to wait on HW
set_power_state has returned to the caller immediately without confirming the system has reached the requested state. This fixes that by synchronously waiting until the target state has been read before returning. That bug can cause instance workload deployments to fail on Dell EMC PowerEdge server models on which IPA ramdisk soft power off fails and ironic employs its OOB fallback strategy. After an otherwise successful deployment, the node is active, but is powered off. No error is reported in last_error. If the subsequent instance workflow expects the system to be powered on into the operating system, it fails. Story: 2009204 Task: 43261 Change-Id: I3112a22149c07e5508f26c79f33d09aeb905c308
This commit is contained in:
parent
743f206d07
commit
2a0fd1d13f
@ -24,6 +24,7 @@ from oslo_utils import importutils
|
||||
from ironic.common import exception
|
||||
from ironic.common import states
|
||||
from ironic.conductor import task_manager
|
||||
from ironic.conductor import utils as cond_utils
|
||||
from ironic.drivers import base
|
||||
from ironic.drivers.modules.drac import common as drac_common
|
||||
from ironic.drivers.modules.drac import management as drac_management
|
||||
@ -87,15 +88,17 @@ def _commit_boot_list_change(node):
|
||||
node.save()
|
||||
|
||||
|
||||
def _set_power_state(node, power_state):
|
||||
def _set_power_state(task, power_state, timeout=None):
|
||||
"""Turns the server power on/off or do a reboot.
|
||||
|
||||
:param node: an ironic node object.
|
||||
:param task: a TaskManager instance containing the node to act on.
|
||||
:param power_state: a power state from :mod:`ironic.common.states`.
|
||||
:param timeout: Time to wait for the node to reach the requested state.
|
||||
When requested state is reboot, not used as not waiting then.
|
||||
:raises: InvalidParameterValue if required DRAC credentials are missing.
|
||||
:raises: DracOperationError on an error from python-dracclient
|
||||
"""
|
||||
|
||||
node = task.node
|
||||
# NOTE(ifarkas): DRAC interface doesn't allow changing the boot device
|
||||
# multiple times in a row without a reboot. This is
|
||||
# because a change need to be committed via a
|
||||
@ -131,6 +134,20 @@ def _set_power_state(node, power_state):
|
||||
|
||||
try:
|
||||
client.set_power_state(target_power_state)
|
||||
if calc_power_state == states.REBOOT:
|
||||
# TODO(rloo): Support timeouts!
|
||||
if timeout is not None:
|
||||
LOG.warning("The 'idrac-wsman' Power Interface does not "
|
||||
"support 'timeout' parameter when setting "
|
||||
"power state to reboot. Ignoring "
|
||||
"timeout=%(timeout)s",
|
||||
{'timeout': timeout})
|
||||
else:
|
||||
# Skipped for reboot as can't match reboot with on/off.
|
||||
# Reboot so far has been part of workflow that is not followed
|
||||
# by another power state change that could break the flow.
|
||||
cond_utils.node_wait_for_power_state(
|
||||
task, calc_power_state, timeout)
|
||||
break
|
||||
except drac_exceptions.BaseClientException as exc:
|
||||
if (power_state == states.REBOOT
|
||||
@ -214,20 +231,13 @@ class DracWSManPower(base.PowerInterface):
|
||||
|
||||
:param task: a TaskManager instance containing the node to act on.
|
||||
:param power_state: a power state from :mod:`ironic.common.states`.
|
||||
:param timeout: timeout (in seconds). Unsupported by this interface.
|
||||
:param timeout: Time to wait for the node to reach the requested state.
|
||||
When requested state is reboot, not used as not waiting then.
|
||||
:raises: InvalidParameterValue if required DRAC credentials are
|
||||
missing.
|
||||
:raises: DracOperationError on an error from python-dracclient.
|
||||
"""
|
||||
# TODO(rloo): Support timeouts!
|
||||
if timeout is not None:
|
||||
LOG.warning(
|
||||
"The 'idrac' Power Interface's 'set_power_state' method "
|
||||
"doesn't support the 'timeout' parameter. Ignoring "
|
||||
"timeout=%(timeout)s",
|
||||
{'timeout': timeout})
|
||||
|
||||
_set_power_state(task.node, power_state)
|
||||
_set_power_state(task, power_state, timeout)
|
||||
|
||||
@METRICS.timer('DracPower.reboot')
|
||||
@task_manager.require_exclusive_lock
|
||||
@ -240,14 +250,7 @@ class DracWSManPower(base.PowerInterface):
|
||||
missing.
|
||||
:raises: DracOperationError on an error from python-dracclient.
|
||||
"""
|
||||
# TODO(rloo): Support timeouts!
|
||||
if timeout is not None:
|
||||
LOG.warning("The 'idrac' Power Interface's 'reboot' method "
|
||||
"doesn't support the 'timeout' parameter. Ignoring "
|
||||
"timeout=%(timeout)s",
|
||||
{'timeout': timeout})
|
||||
|
||||
_set_power_state(task.node, states.REBOOT)
|
||||
_set_power_state(task, states.REBOOT, timeout)
|
||||
|
||||
|
||||
class DracPower(DracWSManPower):
|
||||
|
@ -72,7 +72,8 @@ class DracPowerTestCase(test_utils.BaseDracTest):
|
||||
@mock.patch.object(drac_power.LOG, 'warning', autospec=True)
|
||||
def test_set_power_state(self, mock_log, mock_get_drac_client):
|
||||
mock_client = mock_get_drac_client.return_value
|
||||
|
||||
mock_client.get_power_state.side_effect = [drac_constants.POWER_ON,
|
||||
drac_constants.POWER_OFF]
|
||||
with task_manager.acquire(self.context, self.node.uuid,
|
||||
shared=False) as task:
|
||||
task.driver.power.set_power_state(task, states.POWER_OFF)
|
||||
@ -98,6 +99,8 @@ class DracPowerTestCase(test_utils.BaseDracTest):
|
||||
@mock.patch.object(drac_power.LOG, 'warning', autospec=True)
|
||||
def test_set_power_state_timeout(self, mock_log, mock_get_drac_client):
|
||||
mock_client = mock_get_drac_client.return_value
|
||||
mock_client.get_power_state.side_effect = [drac_constants.POWER_ON,
|
||||
drac_constants.POWER_OFF]
|
||||
|
||||
with task_manager.acquire(self.context, self.node.uuid,
|
||||
shared=False) as task:
|
||||
@ -106,7 +109,7 @@ class DracPowerTestCase(test_utils.BaseDracTest):
|
||||
|
||||
drac_power_state = drac_power.REVERSE_POWER_STATES[states.POWER_OFF]
|
||||
mock_client.set_power_state.assert_called_once_with(drac_power_state)
|
||||
self.assertTrue(mock_log.called)
|
||||
self.assertFalse(mock_log.called)
|
||||
|
||||
@mock.patch.object(drac_power.LOG, 'warning', autospec=True)
|
||||
def test_reboot_while_powered_on(self, mock_log, mock_get_drac_client):
|
||||
@ -137,7 +140,8 @@ class DracPowerTestCase(test_utils.BaseDracTest):
|
||||
|
||||
def test_reboot_while_powered_off(self, mock_get_drac_client):
|
||||
mock_client = mock_get_drac_client.return_value
|
||||
mock_client.get_power_state.return_value = drac_constants.POWER_OFF
|
||||
mock_client.get_power_state.side_effect = [drac_constants.POWER_OFF,
|
||||
drac_constants.POWER_ON]
|
||||
|
||||
with task_manager.acquire(self.context, self.node.uuid,
|
||||
shared=False) as task:
|
||||
@ -149,7 +153,9 @@ class DracPowerTestCase(test_utils.BaseDracTest):
|
||||
@mock.patch('time.sleep', autospec=True)
|
||||
def test_reboot_retries_success(self, mock_sleep, mock_get_drac_client):
|
||||
mock_client = mock_get_drac_client.return_value
|
||||
mock_client.get_power_state.return_value = drac_constants.POWER_OFF
|
||||
mock_client.get_power_state.side_effect = [drac_constants.POWER_OFF,
|
||||
drac_constants.POWER_OFF,
|
||||
drac_constants.POWER_ON]
|
||||
exc = drac_exceptions.DRACOperationFailed(
|
||||
drac_messages=['The command failed to set RequestedState'])
|
||||
mock_client.set_power_state.side_effect = [exc, None]
|
||||
|
@ -0,0 +1,10 @@
|
||||
---
|
||||
fixes:
|
||||
- |
|
||||
Fixes ``idrac-wsman`` power interface to wait for the hardware to reach the
|
||||
target state before returning. For systems where soft power off at the end
|
||||
of deployment to boot to instance failed and forced hard power off was
|
||||
used, this left node successfully deployed in off state without any errors.
|
||||
This broke other workflows expecting node to be on booted into
|
||||
OS at the end of deployment. Additional information can be found in
|
||||
`story 2009204 <https://storyboard.openstack.org/#!/story/2009204>`_.
|
Loading…
Reference in New Issue
Block a user