Merge "Agent: Out-of-band power off on deploy"
This commit is contained in:
commit
907e6a26b6
@ -104,3 +104,23 @@ Steps to enable proxies
|
|||||||
``image_no_proxy`` to driver_info properties in each node that will use the
|
``image_no_proxy`` to driver_info properties in each node that will use the
|
||||||
proxy. Please refer to ``ironic driver-properties`` output of the
|
proxy. Please refer to ``ironic driver-properties`` output of the
|
||||||
``agent_*`` driver you're using for descriptions of these properties.
|
``agent_*`` driver you're using for descriptions of these properties.
|
||||||
|
|
||||||
|
Advanced configuration
|
||||||
|
======================
|
||||||
|
|
||||||
|
Out-of-band Vs. in-band power off on deploy
|
||||||
|
-------------------------------------------
|
||||||
|
|
||||||
|
After deploying an image onto the node's hard disk Ironic will reboot
|
||||||
|
the machine into the new image. By default this power action happens
|
||||||
|
``in-band``, meaning that the ironic-conductor will instruct the IPA
|
||||||
|
ramdisk to power itself off.
|
||||||
|
|
||||||
|
Some hardware may have a problem with the default approach and
|
||||||
|
would require Ironic to talk directly to the management controller
|
||||||
|
to switch the power off and on again. In order to tell Ironic to do
|
||||||
|
that you have to update the node's ``driver_info`` field and set the
|
||||||
|
``deploy_forces_oob_reboot`` parameter with the value of **True**. For
|
||||||
|
example, the below command sets this configuration in a specific node::
|
||||||
|
|
||||||
|
ironic node-update <UUID or name> add driver_info/deploy_forces_oob_reboot=True
|
||||||
|
@ -22,6 +22,7 @@ import time
|
|||||||
from oslo_config import cfg
|
from oslo_config import cfg
|
||||||
from oslo_log import log
|
from oslo_log import log
|
||||||
from oslo_utils import excutils
|
from oslo_utils import excutils
|
||||||
|
from oslo_utils import strutils
|
||||||
from oslo_utils import timeutils
|
from oslo_utils import timeutils
|
||||||
import retrying
|
import retrying
|
||||||
|
|
||||||
@ -82,6 +83,13 @@ LOG = log.getLogger(__name__)
|
|||||||
# completing 'delete_configuration' of raid interface.
|
# completing 'delete_configuration' of raid interface.
|
||||||
POST_CLEAN_STEP_HOOKS = {}
|
POST_CLEAN_STEP_HOOKS = {}
|
||||||
|
|
||||||
|
VENDOR_PROPERTIES = {
|
||||||
|
'deploy_forces_oob_reboot': _(
|
||||||
|
'Whether Ironic should force a reboot of the Node via the out-of-band '
|
||||||
|
'channel after deployment is complete. Provides compatiblity with '
|
||||||
|
'older deploy ramdisks. Defaults to False. Optional.')
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
def _get_client():
|
def _get_client():
|
||||||
client = agent_client.AgentClient()
|
client = agent_client.AgentClient()
|
||||||
@ -178,9 +186,7 @@ class BaseAgentVendor(base.VendorInterface):
|
|||||||
|
|
||||||
:returns: dictionary of <property name>:<property description> entries.
|
:returns: dictionary of <property name>:<property description> entries.
|
||||||
"""
|
"""
|
||||||
# NOTE(jroll) all properties are set by the driver,
|
return VENDOR_PROPERTIES
|
||||||
# not by the operator.
|
|
||||||
return {}
|
|
||||||
|
|
||||||
def validate(self, task, method, **kwargs):
|
def validate(self, task, method, **kwargs):
|
||||||
"""Validate the driver-specific Node deployment info.
|
"""Validate the driver-specific Node deployment info.
|
||||||
@ -688,18 +694,38 @@ class BaseAgentVendor(base.VendorInterface):
|
|||||||
return task.driver.power.get_power_state(task)
|
return task.driver.power.get_power_state(task)
|
||||||
|
|
||||||
node = task.node
|
node = task.node
|
||||||
|
# Whether ironic should power off the node via out-of-band or
|
||||||
|
# in-band methods
|
||||||
|
oob_power_off = strutils.bool_from_string(
|
||||||
|
node.driver_info.get('deploy_forces_oob_reboot', False))
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
if not oob_power_off:
|
||||||
try:
|
try:
|
||||||
self._client.power_off(node)
|
self._client.power_off(node)
|
||||||
_wait_until_powered_off(task)
|
_wait_until_powered_off(task)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
LOG.warning(
|
LOG.warning(
|
||||||
_LW('Failed to soft power off node %(node_uuid)s '
|
_LW('Failed to soft power off node %(node_uuid)s '
|
||||||
'in at least %(timeout)d seconds. Error: %(error)s'),
|
'in at least %(timeout)d seconds. '
|
||||||
|
'Error: %(error)s'),
|
||||||
{'node_uuid': node.uuid,
|
{'node_uuid': node.uuid,
|
||||||
'timeout': (wait * (attempts - 1)) / 1000,
|
'timeout': (wait * (attempts - 1)) / 1000,
|
||||||
'error': e})
|
'error': e})
|
||||||
|
else:
|
||||||
|
# Flush the file system prior to hard rebooting the node
|
||||||
|
result = self._client.sync(node)
|
||||||
|
error = result.get('faultstring')
|
||||||
|
if error:
|
||||||
|
if 'Unknown command' in error:
|
||||||
|
error = _('The version of the IPA ramdisk used in '
|
||||||
|
'the deployment do not support the '
|
||||||
|
'command "sync"')
|
||||||
|
LOG.warning(_LW(
|
||||||
|
'Failed to flush the file system prior to hard '
|
||||||
|
'rebooting the node %(node)s. Error: %(error)s'),
|
||||||
|
{'node': node.uuid, 'error': error})
|
||||||
|
|
||||||
manager_utils.node_power_action(task, states.REBOOT)
|
manager_utils.node_power_action(task, states.REBOOT)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
msg = (_('Error rebooting node %(node)s after deploy. '
|
msg = (_('Error rebooting node %(node)s after deploy. '
|
||||||
|
@ -174,3 +174,10 @@ class AgentClient(object):
|
|||||||
return self._command(node=node,
|
return self._command(node=node,
|
||||||
method='standby.power_off',
|
method='standby.power_off',
|
||||||
params={})
|
params={})
|
||||||
|
|
||||||
|
def sync(self, node):
|
||||||
|
"""Flush file system buffers forcing changed blocks to disk."""
|
||||||
|
return self._command(node=node,
|
||||||
|
method='standby.sync',
|
||||||
|
params={},
|
||||||
|
wait=True)
|
||||||
|
@ -57,9 +57,6 @@ class IloVirtualMediaAgentVendorInterface(agent.AgentVendorInterface):
|
|||||||
class VendorPassthru(iscsi_deploy.VendorPassthru):
|
class VendorPassthru(iscsi_deploy.VendorPassthru):
|
||||||
"""Vendor-specific interfaces for iLO deploy drivers."""
|
"""Vendor-specific interfaces for iLO deploy drivers."""
|
||||||
|
|
||||||
def get_properties(self):
|
|
||||||
return {}
|
|
||||||
|
|
||||||
def validate(self, task, method, **kwargs):
|
def validate(self, task, method, **kwargs):
|
||||||
"""Validate vendor-specific actions.
|
"""Validate vendor-specific actions.
|
||||||
|
|
||||||
|
@ -701,9 +701,6 @@ class ISCSIDeploy(base.DeployInterface):
|
|||||||
class VendorPassthru(agent_base_vendor.BaseAgentVendor):
|
class VendorPassthru(agent_base_vendor.BaseAgentVendor):
|
||||||
"""Interface to mix IPMI and PXE vendor-specific interfaces."""
|
"""Interface to mix IPMI and PXE vendor-specific interfaces."""
|
||||||
|
|
||||||
def get_properties(self):
|
|
||||||
return {}
|
|
||||||
|
|
||||||
def validate(self, task, method, **kwargs):
|
def validate(self, task, method, **kwargs):
|
||||||
"""Validates the inputs for a vendor passthru.
|
"""Validates the inputs for a vendor passthru.
|
||||||
|
|
||||||
|
@ -3803,7 +3803,8 @@ class ManagerTestProperties(tests_db_base.DbTestCase):
|
|||||||
self._check_driver_properties("fake_ssh", expected)
|
self._check_driver_properties("fake_ssh", expected)
|
||||||
|
|
||||||
def test_driver_properties_fake_pxe(self):
|
def test_driver_properties_fake_pxe(self):
|
||||||
expected = ['deploy_kernel', 'deploy_ramdisk']
|
expected = ['deploy_kernel', 'deploy_ramdisk',
|
||||||
|
'deploy_forces_oob_reboot']
|
||||||
self._check_driver_properties("fake_pxe", expected)
|
self._check_driver_properties("fake_pxe", expected)
|
||||||
|
|
||||||
def test_driver_properties_fake_seamicro(self):
|
def test_driver_properties_fake_seamicro(self):
|
||||||
@ -3824,34 +3825,37 @@ class ManagerTestProperties(tests_db_base.DbTestCase):
|
|||||||
'ipmi_transit_address', 'ipmi_target_channel',
|
'ipmi_transit_address', 'ipmi_target_channel',
|
||||||
'ipmi_target_address', 'ipmi_local_address',
|
'ipmi_target_address', 'ipmi_local_address',
|
||||||
'deploy_kernel', 'deploy_ramdisk', 'ipmi_protocol_version',
|
'deploy_kernel', 'deploy_ramdisk', 'ipmi_protocol_version',
|
||||||
'ipmi_force_boot_device'
|
'ipmi_force_boot_device', 'deploy_forces_oob_reboot']
|
||||||
]
|
|
||||||
self._check_driver_properties("pxe_ipmitool", expected)
|
self._check_driver_properties("pxe_ipmitool", expected)
|
||||||
|
|
||||||
def test_driver_properties_pxe_ipminative(self):
|
def test_driver_properties_pxe_ipminative(self):
|
||||||
expected = ['ipmi_address', 'ipmi_password', 'ipmi_username',
|
expected = ['ipmi_address', 'ipmi_password', 'ipmi_username',
|
||||||
'deploy_kernel', 'deploy_ramdisk',
|
'deploy_kernel', 'deploy_ramdisk',
|
||||||
'ipmi_terminal_port', 'ipmi_force_boot_device']
|
'ipmi_terminal_port', 'ipmi_force_boot_device',
|
||||||
|
'deploy_forces_oob_reboot']
|
||||||
self._check_driver_properties("pxe_ipminative", expected)
|
self._check_driver_properties("pxe_ipminative", expected)
|
||||||
|
|
||||||
def test_driver_properties_pxe_ssh(self):
|
def test_driver_properties_pxe_ssh(self):
|
||||||
expected = ['deploy_kernel', 'deploy_ramdisk',
|
expected = ['deploy_kernel', 'deploy_ramdisk',
|
||||||
'ssh_address', 'ssh_username', 'ssh_virt_type',
|
'ssh_address', 'ssh_username', 'ssh_virt_type',
|
||||||
'ssh_key_contents', 'ssh_key_filename',
|
'ssh_key_contents', 'ssh_key_filename',
|
||||||
'ssh_password', 'ssh_port', 'ssh_terminal_port']
|
'ssh_password', 'ssh_port', 'ssh_terminal_port',
|
||||||
|
'deploy_forces_oob_reboot']
|
||||||
self._check_driver_properties("pxe_ssh", expected)
|
self._check_driver_properties("pxe_ssh", expected)
|
||||||
|
|
||||||
def test_driver_properties_pxe_seamicro(self):
|
def test_driver_properties_pxe_seamicro(self):
|
||||||
expected = ['deploy_kernel', 'deploy_ramdisk',
|
expected = ['deploy_kernel', 'deploy_ramdisk',
|
||||||
'seamicro_api_endpoint', 'seamicro_password',
|
'seamicro_api_endpoint', 'seamicro_password',
|
||||||
'seamicro_server_id', 'seamicro_username',
|
'seamicro_server_id', 'seamicro_username',
|
||||||
'seamicro_api_version', 'seamicro_terminal_port']
|
'seamicro_api_version', 'seamicro_terminal_port',
|
||||||
|
'deploy_forces_oob_reboot']
|
||||||
self._check_driver_properties("pxe_seamicro", expected)
|
self._check_driver_properties("pxe_seamicro", expected)
|
||||||
|
|
||||||
def test_driver_properties_pxe_snmp(self):
|
def test_driver_properties_pxe_snmp(self):
|
||||||
expected = ['deploy_kernel', 'deploy_ramdisk',
|
expected = ['deploy_kernel', 'deploy_ramdisk',
|
||||||
'snmp_driver', 'snmp_address', 'snmp_port', 'snmp_version',
|
'snmp_driver', 'snmp_address', 'snmp_port', 'snmp_version',
|
||||||
'snmp_community', 'snmp_security', 'snmp_outlet']
|
'snmp_community', 'snmp_security', 'snmp_outlet',
|
||||||
|
'deploy_forces_oob_reboot']
|
||||||
self._check_driver_properties("pxe_snmp", expected)
|
self._check_driver_properties("pxe_snmp", expected)
|
||||||
|
|
||||||
def test_driver_properties_fake_ilo(self):
|
def test_driver_properties_fake_ilo(self):
|
||||||
@ -3862,13 +3866,15 @@ class ManagerTestProperties(tests_db_base.DbTestCase):
|
|||||||
def test_driver_properties_ilo_iscsi(self):
|
def test_driver_properties_ilo_iscsi(self):
|
||||||
expected = ['ilo_address', 'ilo_username', 'ilo_password',
|
expected = ['ilo_address', 'ilo_username', 'ilo_password',
|
||||||
'client_port', 'client_timeout', 'ilo_deploy_iso',
|
'client_port', 'client_timeout', 'ilo_deploy_iso',
|
||||||
'console_port', 'ilo_change_password']
|
'console_port', 'ilo_change_password',
|
||||||
|
'deploy_forces_oob_reboot']
|
||||||
self._check_driver_properties("iscsi_ilo", expected)
|
self._check_driver_properties("iscsi_ilo", expected)
|
||||||
|
|
||||||
def test_driver_properties_agent_ilo(self):
|
def test_driver_properties_agent_ilo(self):
|
||||||
expected = ['ilo_address', 'ilo_username', 'ilo_password',
|
expected = ['ilo_address', 'ilo_username', 'ilo_password',
|
||||||
'client_port', 'client_timeout', 'ilo_deploy_iso',
|
'client_port', 'client_timeout', 'ilo_deploy_iso',
|
||||||
'console_port', 'ilo_change_password']
|
'console_port', 'ilo_change_password',
|
||||||
|
'deploy_forces_oob_reboot']
|
||||||
self._check_driver_properties("agent_ilo", expected)
|
self._check_driver_properties("agent_ilo", expected)
|
||||||
|
|
||||||
def test_driver_properties_fail(self):
|
def test_driver_properties_fail(self):
|
||||||
|
@ -684,6 +684,60 @@ class TestBaseAgentVendor(db_base.DbTestCase):
|
|||||||
self.assertEqual(states.DEPLOYFAIL, task.node.provision_state)
|
self.assertEqual(states.DEPLOYFAIL, task.node.provision_state)
|
||||||
self.assertEqual(states.ACTIVE, task.node.target_provision_state)
|
self.assertEqual(states.ACTIVE, task.node.target_provision_state)
|
||||||
|
|
||||||
|
@mock.patch.object(manager_utils, 'node_power_action', autospec=True)
|
||||||
|
@mock.patch.object(agent_client.AgentClient, 'sync',
|
||||||
|
spec=types.FunctionType)
|
||||||
|
def test_reboot_and_finish_deploy_power_action_oob_power_off(
|
||||||
|
self, sync_mock, node_power_action_mock):
|
||||||
|
# Enable force power off
|
||||||
|
driver_info = self.node.driver_info
|
||||||
|
driver_info['deploy_forces_oob_reboot'] = True
|
||||||
|
self.node.driver_info = driver_info
|
||||||
|
|
||||||
|
self.node.provision_state = states.DEPLOYING
|
||||||
|
self.node.target_provision_state = states.ACTIVE
|
||||||
|
self.node.save()
|
||||||
|
with task_manager.acquire(self.context, self.node.uuid,
|
||||||
|
shared=True) as task:
|
||||||
|
self.passthru.reboot_and_finish_deploy(task)
|
||||||
|
|
||||||
|
sync_mock.assert_called_once_with(task.node)
|
||||||
|
node_power_action_mock.assert_called_once_with(
|
||||||
|
task, states.REBOOT)
|
||||||
|
self.assertEqual(states.ACTIVE, task.node.provision_state)
|
||||||
|
self.assertEqual(states.NOSTATE, task.node.target_provision_state)
|
||||||
|
|
||||||
|
@mock.patch.object(agent_base_vendor.LOG, 'warning', autospec=True)
|
||||||
|
@mock.patch.object(manager_utils, 'node_power_action', autospec=True)
|
||||||
|
@mock.patch.object(agent_client.AgentClient, 'sync',
|
||||||
|
spec=types.FunctionType)
|
||||||
|
def test_reboot_and_finish_deploy_power_action_oob_power_off_failed(
|
||||||
|
self, sync_mock, node_power_action_mock, log_mock):
|
||||||
|
# Enable force power off
|
||||||
|
driver_info = self.node.driver_info
|
||||||
|
driver_info['deploy_forces_oob_reboot'] = True
|
||||||
|
self.node.driver_info = driver_info
|
||||||
|
|
||||||
|
self.node.provision_state = states.DEPLOYING
|
||||||
|
self.node.target_provision_state = states.ACTIVE
|
||||||
|
self.node.save()
|
||||||
|
with task_manager.acquire(self.context, self.node.uuid,
|
||||||
|
shared=True) as task:
|
||||||
|
sync_mock.return_value = {'faultstring': 'Unknown command: blah'}
|
||||||
|
self.passthru.reboot_and_finish_deploy(task)
|
||||||
|
|
||||||
|
sync_mock.assert_called_once_with(task.node)
|
||||||
|
node_power_action_mock.assert_called_once_with(
|
||||||
|
task, states.REBOOT)
|
||||||
|
self.assertEqual(states.ACTIVE, task.node.provision_state)
|
||||||
|
self.assertEqual(states.NOSTATE, task.node.target_provision_state)
|
||||||
|
log_error = ('The version of the IPA ramdisk used in the '
|
||||||
|
'deployment do not support the command "sync"')
|
||||||
|
log_mock.assert_called_once_with(
|
||||||
|
'Failed to flush the file system prior to hard rebooting the '
|
||||||
|
'node %(node)s. Error: %(error)s',
|
||||||
|
{'node': task.node.uuid, 'error': log_error})
|
||||||
|
|
||||||
@mock.patch.object(agent_client.AgentClient, 'install_bootloader',
|
@mock.patch.object(agent_client.AgentClient, 'install_bootloader',
|
||||||
autospec=True)
|
autospec=True)
|
||||||
@mock.patch.object(deploy_utils, 'try_set_boot_device', autospec=True)
|
@mock.patch.object(deploy_utils, 'try_set_boot_device', autospec=True)
|
||||||
@ -1276,3 +1330,7 @@ class TestRefreshCleanSteps(TestBaseAgentVendor):
|
|||||||
task)
|
task)
|
||||||
client_mock.assert_called_once_with(mock.ANY, task.node,
|
client_mock.assert_called_once_with(mock.ANY, task.node,
|
||||||
task.ports)
|
task.ports)
|
||||||
|
|
||||||
|
def test_get_properties(self):
|
||||||
|
expected = agent_base_vendor.VENDOR_PROPERTIES
|
||||||
|
self.assertEqual(expected, self.passthru.get_properties())
|
||||||
|
@ -236,3 +236,9 @@ class TestAgentClient(base.TestCase):
|
|||||||
self.client.power_off(self.node)
|
self.client.power_off(self.node)
|
||||||
self.client._command.assert_called_once_with(
|
self.client._command.assert_called_once_with(
|
||||||
node=self.node, method='standby.power_off', params={})
|
node=self.node, method='standby.power_off', params={})
|
||||||
|
|
||||||
|
def test_sync(self):
|
||||||
|
self.client._command = mock.MagicMock(spec_set=[])
|
||||||
|
self.client.sync(self.node)
|
||||||
|
self.client._command.assert_called_once_with(
|
||||||
|
node=self.node, method='standby.sync', params={}, wait=True)
|
||||||
|
@ -32,6 +32,7 @@ from ironic.common.glance_service import base_image_service
|
|||||||
from ironic.common import pxe_utils
|
from ironic.common import pxe_utils
|
||||||
from ironic.common import states
|
from ironic.common import states
|
||||||
from ironic.conductor import task_manager
|
from ironic.conductor import task_manager
|
||||||
|
from ironic.drivers.modules import agent_base_vendor
|
||||||
from ironic.drivers.modules import deploy_utils
|
from ironic.drivers.modules import deploy_utils
|
||||||
from ironic.drivers.modules import pxe
|
from ironic.drivers.modules import pxe
|
||||||
from ironic.tests.unit.conductor import mgr_utils
|
from ironic.tests.unit.conductor import mgr_utils
|
||||||
@ -549,6 +550,7 @@ class PXEBootTestCase(db_base.DbTestCase):
|
|||||||
|
|
||||||
def test_get_properties(self):
|
def test_get_properties(self):
|
||||||
expected = pxe.COMMON_PROPERTIES
|
expected = pxe.COMMON_PROPERTIES
|
||||||
|
expected.update(agent_base_vendor.VENDOR_PROPERTIES)
|
||||||
with task_manager.acquire(self.context, self.node.uuid,
|
with task_manager.acquire(self.context, self.node.uuid,
|
||||||
shared=True) as task:
|
shared=True) as task:
|
||||||
self.assertEqual(expected, task.driver.get_properties())
|
self.assertEqual(expected, task.driver.get_properties())
|
||||||
|
7
releasenotes/notes/oob-power-off-7bbdf5947ed24bf8.yaml
Normal file
7
releasenotes/notes/oob-power-off-7bbdf5947ed24bf8.yaml
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
---
|
||||||
|
fixes:
|
||||||
|
- Fixes a problem where some hardware/firmware (specially faulty ones)
|
||||||
|
won't come back online after an in-band ACPI soft power off by adding
|
||||||
|
a new driver property called "deploy_forces_oob_reboot" that can be set
|
||||||
|
to the nodes being deployed by the IPA ramdisk. If the value of this
|
||||||
|
property is True, Ironic will power cycle the node via out-of-band.
|
Loading…
Reference in New Issue
Block a user