diff --git a/doc/source/drivers/ipa.rst b/doc/source/drivers/ipa.rst index aa5ecf7a0e..bebbafba5d 100644 --- a/doc/source/drivers/ipa.rst +++ b/doc/source/drivers/ipa.rst @@ -104,3 +104,23 @@ Steps to enable proxies ``image_no_proxy`` to driver_info properties in each node that will use the proxy. Please refer to ``ironic driver-properties`` output of the ``agent_*`` driver you're using for descriptions of these properties. + +Advanced configuration +====================== + +Out-of-band Vs. in-band power off on deploy +------------------------------------------- + +After deploying an image onto the node's hard disk Ironic will reboot +the machine into the new image. By default this power action happens +``in-band``, meaning that the ironic-conductor will instruct the IPA +ramdisk to power itself off. + +Some hardware may have a problem with the default approach and +would require Ironic to talk directly to the management controller +to switch the power off and on again. In order to tell Ironic to do +that you have to update the node's ``driver_info`` field and set the +``deploy_forces_oob_reboot`` parameter with the value of **True**. For +example, the below command sets this configuration in a specific node:: + + ironic node-update add driver_info/deploy_forces_oob_reboot=True diff --git a/ironic/drivers/modules/agent_base_vendor.py b/ironic/drivers/modules/agent_base_vendor.py index bf10d1a35f..9df0cd392b 100644 --- a/ironic/drivers/modules/agent_base_vendor.py +++ b/ironic/drivers/modules/agent_base_vendor.py @@ -22,6 +22,7 @@ import time from oslo_config import cfg from oslo_log import log from oslo_utils import excutils +from oslo_utils import strutils from oslo_utils import timeutils import retrying @@ -82,6 +83,13 @@ LOG = log.getLogger(__name__) # completing 'delete_configuration' of raid interface. POST_CLEAN_STEP_HOOKS = {} +VENDOR_PROPERTIES = { + 'deploy_forces_oob_reboot': _( + 'Whether Ironic should force a reboot of the Node via the out-of-band ' + 'channel after deployment is complete. Provides compatiblity with ' + 'older deploy ramdisks. Defaults to False. Optional.') +} + def _get_client(): client = agent_client.AgentClient() @@ -178,9 +186,7 @@ class BaseAgentVendor(base.VendorInterface): :returns: dictionary of : entries. """ - # NOTE(jroll) all properties are set by the driver, - # not by the operator. - return {} + return VENDOR_PROPERTIES def validate(self, task, method, **kwargs): """Validate the driver-specific Node deployment info. @@ -688,18 +694,38 @@ class BaseAgentVendor(base.VendorInterface): return task.driver.power.get_power_state(task) node = task.node + # Whether ironic should power off the node via out-of-band or + # in-band methods + oob_power_off = strutils.bool_from_string( + node.driver_info.get('deploy_forces_oob_reboot', False)) try: - try: - self._client.power_off(node) - _wait_until_powered_off(task) - except Exception as e: - LOG.warning( - _LW('Failed to soft power off node %(node_uuid)s ' - 'in at least %(timeout)d seconds. Error: %(error)s'), - {'node_uuid': node.uuid, - 'timeout': (wait * (attempts - 1)) / 1000, - 'error': e}) + if not oob_power_off: + try: + self._client.power_off(node) + _wait_until_powered_off(task) + except Exception as e: + LOG.warning( + _LW('Failed to soft power off node %(node_uuid)s ' + 'in at least %(timeout)d seconds. ' + 'Error: %(error)s'), + {'node_uuid': node.uuid, + 'timeout': (wait * (attempts - 1)) / 1000, + 'error': e}) + else: + # Flush the file system prior to hard rebooting the node + result = self._client.sync(node) + error = result.get('faultstring') + if error: + if 'Unknown command' in error: + error = _('The version of the IPA ramdisk used in ' + 'the deployment do not support the ' + 'command "sync"') + LOG.warning(_LW( + 'Failed to flush the file system prior to hard ' + 'rebooting the node %(node)s. Error: %(error)s'), + {'node': node.uuid, 'error': error}) + manager_utils.node_power_action(task, states.REBOOT) except Exception as e: msg = (_('Error rebooting node %(node)s after deploy. ' diff --git a/ironic/drivers/modules/agent_client.py b/ironic/drivers/modules/agent_client.py index 86c985e599..d27c8dbd13 100644 --- a/ironic/drivers/modules/agent_client.py +++ b/ironic/drivers/modules/agent_client.py @@ -174,3 +174,10 @@ class AgentClient(object): return self._command(node=node, method='standby.power_off', params={}) + + def sync(self, node): + """Flush file system buffers forcing changed blocks to disk.""" + return self._command(node=node, + method='standby.sync', + params={}, + wait=True) diff --git a/ironic/drivers/modules/ilo/vendor.py b/ironic/drivers/modules/ilo/vendor.py index d991a145d5..981afa5999 100644 --- a/ironic/drivers/modules/ilo/vendor.py +++ b/ironic/drivers/modules/ilo/vendor.py @@ -57,9 +57,6 @@ class IloVirtualMediaAgentVendorInterface(agent.AgentVendorInterface): class VendorPassthru(iscsi_deploy.VendorPassthru): """Vendor-specific interfaces for iLO deploy drivers.""" - def get_properties(self): - return {} - def validate(self, task, method, **kwargs): """Validate vendor-specific actions. diff --git a/ironic/drivers/modules/iscsi_deploy.py b/ironic/drivers/modules/iscsi_deploy.py index 0206b738d0..bc111643e7 100644 --- a/ironic/drivers/modules/iscsi_deploy.py +++ b/ironic/drivers/modules/iscsi_deploy.py @@ -701,9 +701,6 @@ class ISCSIDeploy(base.DeployInterface): class VendorPassthru(agent_base_vendor.BaseAgentVendor): """Interface to mix IPMI and PXE vendor-specific interfaces.""" - def get_properties(self): - return {} - def validate(self, task, method, **kwargs): """Validates the inputs for a vendor passthru. diff --git a/ironic/tests/unit/conductor/test_manager.py b/ironic/tests/unit/conductor/test_manager.py index 48cf4c038f..ffd1ec20d9 100644 --- a/ironic/tests/unit/conductor/test_manager.py +++ b/ironic/tests/unit/conductor/test_manager.py @@ -3803,7 +3803,8 @@ class ManagerTestProperties(tests_db_base.DbTestCase): self._check_driver_properties("fake_ssh", expected) def test_driver_properties_fake_pxe(self): - expected = ['deploy_kernel', 'deploy_ramdisk'] + expected = ['deploy_kernel', 'deploy_ramdisk', + 'deploy_forces_oob_reboot'] self._check_driver_properties("fake_pxe", expected) def test_driver_properties_fake_seamicro(self): @@ -3824,34 +3825,37 @@ class ManagerTestProperties(tests_db_base.DbTestCase): 'ipmi_transit_address', 'ipmi_target_channel', 'ipmi_target_address', 'ipmi_local_address', 'deploy_kernel', 'deploy_ramdisk', 'ipmi_protocol_version', - 'ipmi_force_boot_device' - ] + 'ipmi_force_boot_device', 'deploy_forces_oob_reboot'] self._check_driver_properties("pxe_ipmitool", expected) def test_driver_properties_pxe_ipminative(self): expected = ['ipmi_address', 'ipmi_password', 'ipmi_username', 'deploy_kernel', 'deploy_ramdisk', - 'ipmi_terminal_port', 'ipmi_force_boot_device'] + 'ipmi_terminal_port', 'ipmi_force_boot_device', + 'deploy_forces_oob_reboot'] self._check_driver_properties("pxe_ipminative", expected) def test_driver_properties_pxe_ssh(self): expected = ['deploy_kernel', 'deploy_ramdisk', 'ssh_address', 'ssh_username', 'ssh_virt_type', 'ssh_key_contents', 'ssh_key_filename', - 'ssh_password', 'ssh_port', 'ssh_terminal_port'] + 'ssh_password', 'ssh_port', 'ssh_terminal_port', + 'deploy_forces_oob_reboot'] self._check_driver_properties("pxe_ssh", expected) def test_driver_properties_pxe_seamicro(self): expected = ['deploy_kernel', 'deploy_ramdisk', 'seamicro_api_endpoint', 'seamicro_password', 'seamicro_server_id', 'seamicro_username', - 'seamicro_api_version', 'seamicro_terminal_port'] + 'seamicro_api_version', 'seamicro_terminal_port', + 'deploy_forces_oob_reboot'] self._check_driver_properties("pxe_seamicro", expected) def test_driver_properties_pxe_snmp(self): expected = ['deploy_kernel', 'deploy_ramdisk', 'snmp_driver', 'snmp_address', 'snmp_port', 'snmp_version', - 'snmp_community', 'snmp_security', 'snmp_outlet'] + 'snmp_community', 'snmp_security', 'snmp_outlet', + 'deploy_forces_oob_reboot'] self._check_driver_properties("pxe_snmp", expected) def test_driver_properties_fake_ilo(self): @@ -3862,13 +3866,15 @@ class ManagerTestProperties(tests_db_base.DbTestCase): def test_driver_properties_ilo_iscsi(self): expected = ['ilo_address', 'ilo_username', 'ilo_password', 'client_port', 'client_timeout', 'ilo_deploy_iso', - 'console_port', 'ilo_change_password'] + 'console_port', 'ilo_change_password', + 'deploy_forces_oob_reboot'] self._check_driver_properties("iscsi_ilo", expected) def test_driver_properties_agent_ilo(self): expected = ['ilo_address', 'ilo_username', 'ilo_password', 'client_port', 'client_timeout', 'ilo_deploy_iso', - 'console_port', 'ilo_change_password'] + 'console_port', 'ilo_change_password', + 'deploy_forces_oob_reboot'] self._check_driver_properties("agent_ilo", expected) def test_driver_properties_fail(self): diff --git a/ironic/tests/unit/drivers/modules/test_agent_base_vendor.py b/ironic/tests/unit/drivers/modules/test_agent_base_vendor.py index b4c1ded179..29f4ee94b4 100644 --- a/ironic/tests/unit/drivers/modules/test_agent_base_vendor.py +++ b/ironic/tests/unit/drivers/modules/test_agent_base_vendor.py @@ -684,6 +684,60 @@ class TestBaseAgentVendor(db_base.DbTestCase): self.assertEqual(states.DEPLOYFAIL, task.node.provision_state) self.assertEqual(states.ACTIVE, task.node.target_provision_state) + @mock.patch.object(manager_utils, 'node_power_action', autospec=True) + @mock.patch.object(agent_client.AgentClient, 'sync', + spec=types.FunctionType) + def test_reboot_and_finish_deploy_power_action_oob_power_off( + self, sync_mock, node_power_action_mock): + # Enable force power off + driver_info = self.node.driver_info + driver_info['deploy_forces_oob_reboot'] = True + self.node.driver_info = driver_info + + self.node.provision_state = states.DEPLOYING + self.node.target_provision_state = states.ACTIVE + self.node.save() + with task_manager.acquire(self.context, self.node.uuid, + shared=True) as task: + self.passthru.reboot_and_finish_deploy(task) + + sync_mock.assert_called_once_with(task.node) + node_power_action_mock.assert_called_once_with( + task, states.REBOOT) + self.assertEqual(states.ACTIVE, task.node.provision_state) + self.assertEqual(states.NOSTATE, task.node.target_provision_state) + + @mock.patch.object(agent_base_vendor.LOG, 'warning', autospec=True) + @mock.patch.object(manager_utils, 'node_power_action', autospec=True) + @mock.patch.object(agent_client.AgentClient, 'sync', + spec=types.FunctionType) + def test_reboot_and_finish_deploy_power_action_oob_power_off_failed( + self, sync_mock, node_power_action_mock, log_mock): + # Enable force power off + driver_info = self.node.driver_info + driver_info['deploy_forces_oob_reboot'] = True + self.node.driver_info = driver_info + + self.node.provision_state = states.DEPLOYING + self.node.target_provision_state = states.ACTIVE + self.node.save() + with task_manager.acquire(self.context, self.node.uuid, + shared=True) as task: + sync_mock.return_value = {'faultstring': 'Unknown command: blah'} + self.passthru.reboot_and_finish_deploy(task) + + sync_mock.assert_called_once_with(task.node) + node_power_action_mock.assert_called_once_with( + task, states.REBOOT) + self.assertEqual(states.ACTIVE, task.node.provision_state) + self.assertEqual(states.NOSTATE, task.node.target_provision_state) + log_error = ('The version of the IPA ramdisk used in the ' + 'deployment do not support the command "sync"') + log_mock.assert_called_once_with( + 'Failed to flush the file system prior to hard rebooting the ' + 'node %(node)s. Error: %(error)s', + {'node': task.node.uuid, 'error': log_error}) + @mock.patch.object(agent_client.AgentClient, 'install_bootloader', autospec=True) @mock.patch.object(deploy_utils, 'try_set_boot_device', autospec=True) @@ -1276,3 +1330,7 @@ class TestRefreshCleanSteps(TestBaseAgentVendor): task) client_mock.assert_called_once_with(mock.ANY, task.node, task.ports) + + def test_get_properties(self): + expected = agent_base_vendor.VENDOR_PROPERTIES + self.assertEqual(expected, self.passthru.get_properties()) diff --git a/ironic/tests/unit/drivers/modules/test_agent_client.py b/ironic/tests/unit/drivers/modules/test_agent_client.py index fe5be2dba8..aead8915a7 100644 --- a/ironic/tests/unit/drivers/modules/test_agent_client.py +++ b/ironic/tests/unit/drivers/modules/test_agent_client.py @@ -236,3 +236,9 @@ class TestAgentClient(base.TestCase): self.client.power_off(self.node) self.client._command.assert_called_once_with( node=self.node, method='standby.power_off', params={}) + + def test_sync(self): + self.client._command = mock.MagicMock(spec_set=[]) + self.client.sync(self.node) + self.client._command.assert_called_once_with( + node=self.node, method='standby.sync', params={}, wait=True) diff --git a/ironic/tests/unit/drivers/modules/test_pxe.py b/ironic/tests/unit/drivers/modules/test_pxe.py index 7456262400..9e023ea182 100644 --- a/ironic/tests/unit/drivers/modules/test_pxe.py +++ b/ironic/tests/unit/drivers/modules/test_pxe.py @@ -32,6 +32,7 @@ from ironic.common.glance_service import base_image_service from ironic.common import pxe_utils from ironic.common import states from ironic.conductor import task_manager +from ironic.drivers.modules import agent_base_vendor from ironic.drivers.modules import deploy_utils from ironic.drivers.modules import pxe from ironic.tests.unit.conductor import mgr_utils @@ -549,6 +550,7 @@ class PXEBootTestCase(db_base.DbTestCase): def test_get_properties(self): expected = pxe.COMMON_PROPERTIES + expected.update(agent_base_vendor.VENDOR_PROPERTIES) with task_manager.acquire(self.context, self.node.uuid, shared=True) as task: self.assertEqual(expected, task.driver.get_properties()) diff --git a/releasenotes/notes/oob-power-off-7bbdf5947ed24bf8.yaml b/releasenotes/notes/oob-power-off-7bbdf5947ed24bf8.yaml new file mode 100644 index 0000000000..ddbbf152a1 --- /dev/null +++ b/releasenotes/notes/oob-power-off-7bbdf5947ed24bf8.yaml @@ -0,0 +1,7 @@ +--- +fixes: + - Fixes a problem where some hardware/firmware (specially faulty ones) + won't come back online after an in-band ACPI soft power off by adding + a new driver property called "deploy_forces_oob_reboot" that can be set + to the nodes being deployed by the IPA ramdisk. If the value of this + property is True, Ironic will power cycle the node via out-of-band.