Merge "Agent: Out-of-band power off on deploy"
This commit is contained in:
commit
907e6a26b6
@ -104,3 +104,23 @@ Steps to enable proxies
|
||||
``image_no_proxy`` to driver_info properties in each node that will use the
|
||||
proxy. Please refer to ``ironic driver-properties`` output of the
|
||||
``agent_*`` driver you're using for descriptions of these properties.
|
||||
|
||||
Advanced configuration
|
||||
======================
|
||||
|
||||
Out-of-band Vs. in-band power off on deploy
|
||||
-------------------------------------------
|
||||
|
||||
After deploying an image onto the node's hard disk Ironic will reboot
|
||||
the machine into the new image. By default this power action happens
|
||||
``in-band``, meaning that the ironic-conductor will instruct the IPA
|
||||
ramdisk to power itself off.
|
||||
|
||||
Some hardware may have a problem with the default approach and
|
||||
would require Ironic to talk directly to the management controller
|
||||
to switch the power off and on again. In order to tell Ironic to do
|
||||
that you have to update the node's ``driver_info`` field and set the
|
||||
``deploy_forces_oob_reboot`` parameter with the value of **True**. For
|
||||
example, the below command sets this configuration in a specific node::
|
||||
|
||||
ironic node-update <UUID or name> add driver_info/deploy_forces_oob_reboot=True
|
||||
|
@ -22,6 +22,7 @@ import time
|
||||
from oslo_config import cfg
|
||||
from oslo_log import log
|
||||
from oslo_utils import excutils
|
||||
from oslo_utils import strutils
|
||||
from oslo_utils import timeutils
|
||||
import retrying
|
||||
|
||||
@ -82,6 +83,13 @@ LOG = log.getLogger(__name__)
|
||||
# completing 'delete_configuration' of raid interface.
|
||||
POST_CLEAN_STEP_HOOKS = {}
|
||||
|
||||
VENDOR_PROPERTIES = {
|
||||
'deploy_forces_oob_reboot': _(
|
||||
'Whether Ironic should force a reboot of the Node via the out-of-band '
|
||||
'channel after deployment is complete. Provides compatiblity with '
|
||||
'older deploy ramdisks. Defaults to False. Optional.')
|
||||
}
|
||||
|
||||
|
||||
def _get_client():
|
||||
client = agent_client.AgentClient()
|
||||
@ -178,9 +186,7 @@ class BaseAgentVendor(base.VendorInterface):
|
||||
|
||||
:returns: dictionary of <property name>:<property description> entries.
|
||||
"""
|
||||
# NOTE(jroll) all properties are set by the driver,
|
||||
# not by the operator.
|
||||
return {}
|
||||
return VENDOR_PROPERTIES
|
||||
|
||||
def validate(self, task, method, **kwargs):
|
||||
"""Validate the driver-specific Node deployment info.
|
||||
@ -688,18 +694,38 @@ class BaseAgentVendor(base.VendorInterface):
|
||||
return task.driver.power.get_power_state(task)
|
||||
|
||||
node = task.node
|
||||
# Whether ironic should power off the node via out-of-band or
|
||||
# in-band methods
|
||||
oob_power_off = strutils.bool_from_string(
|
||||
node.driver_info.get('deploy_forces_oob_reboot', False))
|
||||
|
||||
try:
|
||||
try:
|
||||
self._client.power_off(node)
|
||||
_wait_until_powered_off(task)
|
||||
except Exception as e:
|
||||
LOG.warning(
|
||||
_LW('Failed to soft power off node %(node_uuid)s '
|
||||
'in at least %(timeout)d seconds. Error: %(error)s'),
|
||||
{'node_uuid': node.uuid,
|
||||
'timeout': (wait * (attempts - 1)) / 1000,
|
||||
'error': e})
|
||||
if not oob_power_off:
|
||||
try:
|
||||
self._client.power_off(node)
|
||||
_wait_until_powered_off(task)
|
||||
except Exception as e:
|
||||
LOG.warning(
|
||||
_LW('Failed to soft power off node %(node_uuid)s '
|
||||
'in at least %(timeout)d seconds. '
|
||||
'Error: %(error)s'),
|
||||
{'node_uuid': node.uuid,
|
||||
'timeout': (wait * (attempts - 1)) / 1000,
|
||||
'error': e})
|
||||
else:
|
||||
# Flush the file system prior to hard rebooting the node
|
||||
result = self._client.sync(node)
|
||||
error = result.get('faultstring')
|
||||
if error:
|
||||
if 'Unknown command' in error:
|
||||
error = _('The version of the IPA ramdisk used in '
|
||||
'the deployment do not support the '
|
||||
'command "sync"')
|
||||
LOG.warning(_LW(
|
||||
'Failed to flush the file system prior to hard '
|
||||
'rebooting the node %(node)s. Error: %(error)s'),
|
||||
{'node': node.uuid, 'error': error})
|
||||
|
||||
manager_utils.node_power_action(task, states.REBOOT)
|
||||
except Exception as e:
|
||||
msg = (_('Error rebooting node %(node)s after deploy. '
|
||||
|
@ -174,3 +174,10 @@ class AgentClient(object):
|
||||
return self._command(node=node,
|
||||
method='standby.power_off',
|
||||
params={})
|
||||
|
||||
def sync(self, node):
|
||||
"""Flush file system buffers forcing changed blocks to disk."""
|
||||
return self._command(node=node,
|
||||
method='standby.sync',
|
||||
params={},
|
||||
wait=True)
|
||||
|
@ -57,9 +57,6 @@ class IloVirtualMediaAgentVendorInterface(agent.AgentVendorInterface):
|
||||
class VendorPassthru(iscsi_deploy.VendorPassthru):
|
||||
"""Vendor-specific interfaces for iLO deploy drivers."""
|
||||
|
||||
def get_properties(self):
|
||||
return {}
|
||||
|
||||
def validate(self, task, method, **kwargs):
|
||||
"""Validate vendor-specific actions.
|
||||
|
||||
|
@ -701,9 +701,6 @@ class ISCSIDeploy(base.DeployInterface):
|
||||
class VendorPassthru(agent_base_vendor.BaseAgentVendor):
|
||||
"""Interface to mix IPMI and PXE vendor-specific interfaces."""
|
||||
|
||||
def get_properties(self):
|
||||
return {}
|
||||
|
||||
def validate(self, task, method, **kwargs):
|
||||
"""Validates the inputs for a vendor passthru.
|
||||
|
||||
|
@ -3803,7 +3803,8 @@ class ManagerTestProperties(tests_db_base.DbTestCase):
|
||||
self._check_driver_properties("fake_ssh", expected)
|
||||
|
||||
def test_driver_properties_fake_pxe(self):
|
||||
expected = ['deploy_kernel', 'deploy_ramdisk']
|
||||
expected = ['deploy_kernel', 'deploy_ramdisk',
|
||||
'deploy_forces_oob_reboot']
|
||||
self._check_driver_properties("fake_pxe", expected)
|
||||
|
||||
def test_driver_properties_fake_seamicro(self):
|
||||
@ -3824,34 +3825,37 @@ class ManagerTestProperties(tests_db_base.DbTestCase):
|
||||
'ipmi_transit_address', 'ipmi_target_channel',
|
||||
'ipmi_target_address', 'ipmi_local_address',
|
||||
'deploy_kernel', 'deploy_ramdisk', 'ipmi_protocol_version',
|
||||
'ipmi_force_boot_device'
|
||||
]
|
||||
'ipmi_force_boot_device', 'deploy_forces_oob_reboot']
|
||||
self._check_driver_properties("pxe_ipmitool", expected)
|
||||
|
||||
def test_driver_properties_pxe_ipminative(self):
|
||||
expected = ['ipmi_address', 'ipmi_password', 'ipmi_username',
|
||||
'deploy_kernel', 'deploy_ramdisk',
|
||||
'ipmi_terminal_port', 'ipmi_force_boot_device']
|
||||
'ipmi_terminal_port', 'ipmi_force_boot_device',
|
||||
'deploy_forces_oob_reboot']
|
||||
self._check_driver_properties("pxe_ipminative", expected)
|
||||
|
||||
def test_driver_properties_pxe_ssh(self):
|
||||
expected = ['deploy_kernel', 'deploy_ramdisk',
|
||||
'ssh_address', 'ssh_username', 'ssh_virt_type',
|
||||
'ssh_key_contents', 'ssh_key_filename',
|
||||
'ssh_password', 'ssh_port', 'ssh_terminal_port']
|
||||
'ssh_password', 'ssh_port', 'ssh_terminal_port',
|
||||
'deploy_forces_oob_reboot']
|
||||
self._check_driver_properties("pxe_ssh", expected)
|
||||
|
||||
def test_driver_properties_pxe_seamicro(self):
|
||||
expected = ['deploy_kernel', 'deploy_ramdisk',
|
||||
'seamicro_api_endpoint', 'seamicro_password',
|
||||
'seamicro_server_id', 'seamicro_username',
|
||||
'seamicro_api_version', 'seamicro_terminal_port']
|
||||
'seamicro_api_version', 'seamicro_terminal_port',
|
||||
'deploy_forces_oob_reboot']
|
||||
self._check_driver_properties("pxe_seamicro", expected)
|
||||
|
||||
def test_driver_properties_pxe_snmp(self):
|
||||
expected = ['deploy_kernel', 'deploy_ramdisk',
|
||||
'snmp_driver', 'snmp_address', 'snmp_port', 'snmp_version',
|
||||
'snmp_community', 'snmp_security', 'snmp_outlet']
|
||||
'snmp_community', 'snmp_security', 'snmp_outlet',
|
||||
'deploy_forces_oob_reboot']
|
||||
self._check_driver_properties("pxe_snmp", expected)
|
||||
|
||||
def test_driver_properties_fake_ilo(self):
|
||||
@ -3862,13 +3866,15 @@ class ManagerTestProperties(tests_db_base.DbTestCase):
|
||||
def test_driver_properties_ilo_iscsi(self):
|
||||
expected = ['ilo_address', 'ilo_username', 'ilo_password',
|
||||
'client_port', 'client_timeout', 'ilo_deploy_iso',
|
||||
'console_port', 'ilo_change_password']
|
||||
'console_port', 'ilo_change_password',
|
||||
'deploy_forces_oob_reboot']
|
||||
self._check_driver_properties("iscsi_ilo", expected)
|
||||
|
||||
def test_driver_properties_agent_ilo(self):
|
||||
expected = ['ilo_address', 'ilo_username', 'ilo_password',
|
||||
'client_port', 'client_timeout', 'ilo_deploy_iso',
|
||||
'console_port', 'ilo_change_password']
|
||||
'console_port', 'ilo_change_password',
|
||||
'deploy_forces_oob_reboot']
|
||||
self._check_driver_properties("agent_ilo", expected)
|
||||
|
||||
def test_driver_properties_fail(self):
|
||||
|
@ -684,6 +684,60 @@ class TestBaseAgentVendor(db_base.DbTestCase):
|
||||
self.assertEqual(states.DEPLOYFAIL, task.node.provision_state)
|
||||
self.assertEqual(states.ACTIVE, task.node.target_provision_state)
|
||||
|
||||
@mock.patch.object(manager_utils, 'node_power_action', autospec=True)
|
||||
@mock.patch.object(agent_client.AgentClient, 'sync',
|
||||
spec=types.FunctionType)
|
||||
def test_reboot_and_finish_deploy_power_action_oob_power_off(
|
||||
self, sync_mock, node_power_action_mock):
|
||||
# Enable force power off
|
||||
driver_info = self.node.driver_info
|
||||
driver_info['deploy_forces_oob_reboot'] = True
|
||||
self.node.driver_info = driver_info
|
||||
|
||||
self.node.provision_state = states.DEPLOYING
|
||||
self.node.target_provision_state = states.ACTIVE
|
||||
self.node.save()
|
||||
with task_manager.acquire(self.context, self.node.uuid,
|
||||
shared=True) as task:
|
||||
self.passthru.reboot_and_finish_deploy(task)
|
||||
|
||||
sync_mock.assert_called_once_with(task.node)
|
||||
node_power_action_mock.assert_called_once_with(
|
||||
task, states.REBOOT)
|
||||
self.assertEqual(states.ACTIVE, task.node.provision_state)
|
||||
self.assertEqual(states.NOSTATE, task.node.target_provision_state)
|
||||
|
||||
@mock.patch.object(agent_base_vendor.LOG, 'warning', autospec=True)
|
||||
@mock.patch.object(manager_utils, 'node_power_action', autospec=True)
|
||||
@mock.patch.object(agent_client.AgentClient, 'sync',
|
||||
spec=types.FunctionType)
|
||||
def test_reboot_and_finish_deploy_power_action_oob_power_off_failed(
|
||||
self, sync_mock, node_power_action_mock, log_mock):
|
||||
# Enable force power off
|
||||
driver_info = self.node.driver_info
|
||||
driver_info['deploy_forces_oob_reboot'] = True
|
||||
self.node.driver_info = driver_info
|
||||
|
||||
self.node.provision_state = states.DEPLOYING
|
||||
self.node.target_provision_state = states.ACTIVE
|
||||
self.node.save()
|
||||
with task_manager.acquire(self.context, self.node.uuid,
|
||||
shared=True) as task:
|
||||
sync_mock.return_value = {'faultstring': 'Unknown command: blah'}
|
||||
self.passthru.reboot_and_finish_deploy(task)
|
||||
|
||||
sync_mock.assert_called_once_with(task.node)
|
||||
node_power_action_mock.assert_called_once_with(
|
||||
task, states.REBOOT)
|
||||
self.assertEqual(states.ACTIVE, task.node.provision_state)
|
||||
self.assertEqual(states.NOSTATE, task.node.target_provision_state)
|
||||
log_error = ('The version of the IPA ramdisk used in the '
|
||||
'deployment do not support the command "sync"')
|
||||
log_mock.assert_called_once_with(
|
||||
'Failed to flush the file system prior to hard rebooting the '
|
||||
'node %(node)s. Error: %(error)s',
|
||||
{'node': task.node.uuid, 'error': log_error})
|
||||
|
||||
@mock.patch.object(agent_client.AgentClient, 'install_bootloader',
|
||||
autospec=True)
|
||||
@mock.patch.object(deploy_utils, 'try_set_boot_device', autospec=True)
|
||||
@ -1276,3 +1330,7 @@ class TestRefreshCleanSteps(TestBaseAgentVendor):
|
||||
task)
|
||||
client_mock.assert_called_once_with(mock.ANY, task.node,
|
||||
task.ports)
|
||||
|
||||
def test_get_properties(self):
|
||||
expected = agent_base_vendor.VENDOR_PROPERTIES
|
||||
self.assertEqual(expected, self.passthru.get_properties())
|
||||
|
@ -236,3 +236,9 @@ class TestAgentClient(base.TestCase):
|
||||
self.client.power_off(self.node)
|
||||
self.client._command.assert_called_once_with(
|
||||
node=self.node, method='standby.power_off', params={})
|
||||
|
||||
def test_sync(self):
|
||||
self.client._command = mock.MagicMock(spec_set=[])
|
||||
self.client.sync(self.node)
|
||||
self.client._command.assert_called_once_with(
|
||||
node=self.node, method='standby.sync', params={}, wait=True)
|
||||
|
@ -32,6 +32,7 @@ from ironic.common.glance_service import base_image_service
|
||||
from ironic.common import pxe_utils
|
||||
from ironic.common import states
|
||||
from ironic.conductor import task_manager
|
||||
from ironic.drivers.modules import agent_base_vendor
|
||||
from ironic.drivers.modules import deploy_utils
|
||||
from ironic.drivers.modules import pxe
|
||||
from ironic.tests.unit.conductor import mgr_utils
|
||||
@ -549,6 +550,7 @@ class PXEBootTestCase(db_base.DbTestCase):
|
||||
|
||||
def test_get_properties(self):
|
||||
expected = pxe.COMMON_PROPERTIES
|
||||
expected.update(agent_base_vendor.VENDOR_PROPERTIES)
|
||||
with task_manager.acquire(self.context, self.node.uuid,
|
||||
shared=True) as task:
|
||||
self.assertEqual(expected, task.driver.get_properties())
|
||||
|
7
releasenotes/notes/oob-power-off-7bbdf5947ed24bf8.yaml
Normal file
7
releasenotes/notes/oob-power-off-7bbdf5947ed24bf8.yaml
Normal file
@ -0,0 +1,7 @@
|
||||
---
|
||||
fixes:
|
||||
- Fixes a problem where some hardware/firmware (specially faulty ones)
|
||||
won't come back online after an in-band ACPI soft power off by adding
|
||||
a new driver property called "deploy_forces_oob_reboot" that can be set
|
||||
to the nodes being deployed by the IPA ramdisk. If the value of this
|
||||
property is True, Ironic will power cycle the node via out-of-band.
|
Loading…
Reference in New Issue
Block a user