diff --git a/doc/source/admin/cleaning.rst b/doc/source/admin/cleaning.rst index 04e84617d1..63aa294439 100644 --- a/doc/source/admin/cleaning.rst +++ b/doc/source/admin/cleaning.rst @@ -478,6 +478,23 @@ power will be turned off via the management interface. Afterwards, the the ``ironic-python-agent`` running on any child nodes. This constraint may be changed in the future. +Power Management with Child Nodes +--------------------------------- + +The mix of child nodes and parent nodes has special power considerations, +and these devices are evolving in the industry. That being said, the Ironic +project has taken an approach of explicitly attempting to "power on" any +parent node when a request comes in to "power on" a child node. This can be +bypassed by setting a ``driver_info`` parameter ``has_dedicated_power_supply`` +set to ``True``, in recognition that some hardware vendors are working on +supplying independent power to these classes of devices to meet their customer +use cases. + +Similarly to the case of a "power on" request for a child node, when power +is requested to be turned off for a "parent node", Ironic will issue +"power off" commands for all child nodes unless the child node has the +``has_dedicated_power_supply`` option set in the node's ``driver_info`` field. + Troubleshooting =============== If cleaning fails on a node, the node will be put into ``clean failed`` state. diff --git a/doc/source/admin/steps.rst b/doc/source/admin/steps.rst index 4bd270a141..86f5664273 100644 --- a/doc/source/admin/steps.rst +++ b/doc/source/admin/steps.rst @@ -99,3 +99,31 @@ a ``clean hold`` state. Once you have completed whatever action which needed to be performed while the node was in a held state, you will need to issue an unhold provision state command, via the API or command line to inform the node to proceed. + +Set the environment +=================== + +When using steps with the functionality to execute on child nodes, +i.e. nodes who a populated ``parent_node`` field, you always want to +ensure you have set the environment appropriately for your next action. + +For example, if you are executing steps against a parent node, which then +execute against a child node via the ``execute_on_child_nodes`` step option, +and it requires power to be on, you will want to explicitly +ensure the power is on for the parent node **unless** the child node can +operate independently, as signaled through the driver_info option +``has_dedicated_power_supply`` on the child node. Power is an obvious +case because Ironic has guarding logic internally to attempt to power-on the +parent node, but it cannot be an after thought due to internal task locking. + +Power specifically aside, the general principle applies to the execution +of all steps. You need always want to build upon the prior step or existing +existing known state of the system. + +.. NOTE:: + Ironic will attempt to ensure power is active for a ``parent_node`` when + powering on a child node. Conversely, Ironic will also attempt to power + down child nodes if a parent node is requested to be turned off, unless + the ``has_dedicated_power_supply`` option is set for the child node. + This pattern of behavior prevents parent nodes from being automatically + powered back on should a child node be left online. diff --git a/ironic/common/exception.py b/ironic/common/exception.py index 6ff4494ce8..9080000ae7 100644 --- a/ironic/common/exception.py +++ b/ironic/common/exception.py @@ -931,3 +931,17 @@ class ImageChecksumFileReadFailure(InvalidImage): _msg_fmt = _("Failed to read the file from local storage " "to perform a checksum operation.") code = http_client.SERVICE_UNAVAILABLE + + +class ParentNodeLocked(Conflict): + _msg_fmt = _("Node %(node)s parent_node %(parent)s is presently locked " + "and we are unable to perform any action on it at this " + "time. Please retry after the current operation is " + "completed.") + + +class ChildNodeLocked(Conflict): + _msg_fmt = _("Node %(node)s child_node %(child)s is presently locked " + "and we are unable to perform any action on it at this " + "time. Please retry after the current operation is " + "completed.") diff --git a/ironic/conductor/utils.py b/ironic/conductor/utils.py index 65d3a424c7..2cbf99ca80 100644 --- a/ironic/conductor/utils.py +++ b/ironic/conductor/utils.py @@ -290,7 +290,12 @@ def node_power_action(task, new_state, timeout=None): storage interface upon setting power on. :raises: other exceptions by the node's power driver if something wrong occurred during the power action. - + :raises: ChildNodeLocked when a child node must be acted upon due + to this request, but we cannot act upon it due to a lock + being held. + :raises: ParentNodeLocked when a parent node must be acted upon + due to this request, but we cannot act upon it due to a + lock being held. """ notify_utils.emit_power_set_notification( task, fields.NotificationLevel.INFO, fields.NotificationStatus.START, @@ -313,6 +318,15 @@ def node_power_action(task, new_state, timeout=None): node.save() return + # Parent node power required? + if task.node.parent_node: + # If we have a parent node defined for the device, we likely + # need to turn the power on. + parent_power_needed = not task.node.driver_info.get( + 'has_dedicated_power_supply', False) + else: + parent_power_needed = False + # Set the target_power_state and clear any last_error, if we're # starting a new operation. This will expose to other processes # and clients that work is in progress. Keep the last_error intact @@ -336,6 +350,12 @@ def node_power_action(task, new_state, timeout=None): task.driver.storage.attach_volumes(task) if new_state != states.REBOOT: + if new_state == states.POWER_ON and parent_power_needed: + _handle_child_power_on(task, target_state, timeout) + if new_state == states.POWER_OFF: + _handle_child_power_off(task, target_state, timeout) + # Parent/Child dependencies handled, we can take care of + # the original node now. task.driver.power.set_power_state(task, new_state, timeout=timeout) else: # TODO(TheJulia): We likely ought to consider toggling @@ -384,6 +404,68 @@ def node_power_action(task, new_state, timeout=None): {'node': node.uuid, 'error': e}) +def _handle_child_power_on(task, target_state, timeout): + """Actions related to powering on a parent for a child node.""" + with task_manager.acquire( + task.context, task.node.parent_node, + shared=True, + purpose='power on parent node') as pn_task: + if (pn_task.driver.power.get_power_state(pn_task) + != states.POWER_ON): + try: + pn_task.upgrade_lock() + except exception.NodeLocked as e: + LOG.error('Cannot power on parent_node %(pn)s ' + 'as it is locked by %(host)s. We are ' + 'unable to proceed with the current ' + 'operation for child node %(node)s. ' + 'Error: %(error)s', + {'pn': pn_task.node.uuid, + 'host': pn_task.node.reservation, + 'node': task.node.uuid, + 'error': e}) + raise exception.ParentNodeLocked( + node=task.node.uuid, parent=pn_task.node.uuid) + node_power_action(pn_task, target_state, timeout) + + +def _handle_child_power_off(task, target_state, timeout): + """Actions related to powering off child nodes.""" + child_nodes = task.node.list_child_node_ids(exclude_dedicated_power=True) + cn_tasks = [] + try: + for child in child_nodes: + # Get the task for the node, check the status, add + # the list of locks to act upon. + cn_task = task_manager.acquire( + task.context, child, shared=True, + purpose='child node power off') + if (cn_task.driver.power.get_power_state(cn_task) + != states.POWER_OFF): + cn_task.upgrade_lock() + cn_tasks.append(cn_task) + for cn_task in cn_tasks: + node_power_action(cn_task, target_state, timeout) + except exception.NodeLocked as e: + LOG.error( + 'Cannot power off child node %(cn)s ' + 'as it is locked by %(host)s. We are ' + 'unable to proceed with the current ' + 'operation for parent node %(node)s. ' + 'Error: %(error)s', + {'cn': cn_task.node.uuid, + 'host': cn_task.node.reservation, + 'node': task.node.uuid, + 'error': e}) + raise exception.ChildNodeLocked( + node=task.node.uuid, child=cn_task.node.uuid) + finally: + for cn_task in cn_tasks: + # We don't need to do anything else with the child node, + # and we need to release the child task's lock. + cn_task.release_resources() + + @task_manager.require_exclusive_lock def cleanup_after_timeout(task): """Cleanup deploy task after timeout. diff --git a/ironic/db/api.py b/ironic/db/api.py index b6f38a95d3..571c41bc01 100644 --- a/ironic/db/api.py +++ b/ironic/db/api.py @@ -1616,3 +1616,21 @@ class Connection(object, metaclass=abc.ABCMeta): :returns: A list of FirmwareComponent objects. :raises: NodeNotFound if the node is not found. """ + + @abc.abstractclassmethod + def get_child_node_ids_by_parent_uuid(self, node_uuid, + exclude_dedicated_power=False): + """Retrieve a list of child node IDs for a given parent UUID. + + This is an "internal" method, intended for use with power + management logic to facilitate power actions upon nodes, + where we obtain a list of nodes which requires additional + actions, and return *only* the required node IDs in order + to launch new tasks for power management activities. + + :param node_uuid: The uuid of the parent node, in order to + directly match the "parent_node" field. + :param exclude_dedicated_power: Boolean, False, if the list + should include child nodes with their own power supplies. + :returns: A list of tuples. + """ diff --git a/ironic/db/sqlalchemy/api.py b/ironic/db/sqlalchemy/api.py index 6375790f7d..12bb65dc32 100644 --- a/ironic/db/sqlalchemy/api.py +++ b/ironic/db/sqlalchemy/api.py @@ -3148,3 +3148,24 @@ class Connection(api.Connection): .filter_by(node_id=node_id) .all()) return result + + def get_child_node_ids_by_parent_uuid( + self, node_uuid, exclude_dedicated_power=False): + with _session_for_read() as session: + nodes = [] + res = session.execute(sa.select( + models.Node.id, + models.Node.driver_info + ).where(models.Node.parent_node == node_uuid)).all() + for r in res: + # NOTE(TheJulia): Dtantsur has noted we don't typically + # evaluate content data inside of the db api, and that + # we typically have the caller do it. We should likely + # refactor this, at some point for consistency if + # we're okay with the likely additional overhead + # to post filter an an initial result set. + if (exclude_dedicated_power + and r[1].get('has_dedicated_power_supply')): + continue + nodes.append(r[0]) + return nodes diff --git a/ironic/objects/node.py b/ironic/objects/node.py index 3a26e97ab2..19f0105484 100644 --- a/ironic/objects/node.py +++ b/ironic/objects/node.py @@ -773,6 +773,17 @@ class Node(base.IronicObject, object_base.VersionedObjectDictCompat): self.properties[key] = value self._changed_fields.add('properties') + def list_child_node_ids(self, exclude_dedicated_power=False): + """Returns a list of node IDs for child nodes, if any. + + :param exclude_dedicated_power: Boolean, Default False, if the list + should exclude nodes which are independently powered. + :returns: A list of any node_id values discovered in the database. + """ + return self.dbapi.get_child_node_ids_by_parent_uuid( + self.uuid, + exclude_dedicated_power=exclude_dedicated_power) + @base.IronicObjectRegistry.register class NodePayload(notification.NotificationPayloadBase): diff --git a/ironic/tests/unit/conductor/test_cleaning.py b/ironic/tests/unit/conductor/test_cleaning.py index a963536ec1..1906ce430b 100644 --- a/ironic/tests/unit/conductor/test_cleaning.py +++ b/ironic/tests/unit/conductor/test_cleaning.py @@ -1289,8 +1289,8 @@ class DoNodeCleanTestChildNodes(db_base.DbTestCase): def setUp(self): super(DoNodeCleanTestChildNodes, self).setUp() self.config(automated_clean=True, group='conductor') - self.power_off_parent = { - 'step': 'power_off', 'priority': 4, 'interface': 'power'} + self.power_on_parent = { + 'step': 'power_on', 'priority': 4, 'interface': 'power'} self.power_on_children = { 'step': 'power_on', 'priority': 5, 'interface': 'power', 'execute_on_child_nodes': True} @@ -1303,7 +1303,7 @@ class DoNodeCleanTestChildNodes(db_base.DbTestCase): self.power_on_parent = { 'step': 'power_on', 'priority': 15, 'interface': 'power'} self.clean_steps = [ - self.power_off_parent, + self.power_on_parent, self.power_on_children, self.update_firmware_on_children, self.reboot_children, @@ -1317,6 +1317,8 @@ class DoNodeCleanTestChildNodes(db_base.DbTestCase): driver_internal_info={'agent_secret_token': 'old', 'clean_steps': self.clean_steps}) + @mock.patch('ironic.drivers.modules.fake.FakePower.get_power_state', + autospec=True) @mock.patch('ironic.drivers.modules.fake.FakePower.reboot', autospec=True) @mock.patch('ironic.drivers.modules.fake.FakePower.set_power_state', @@ -1333,7 +1335,7 @@ class DoNodeCleanTestChildNodes(db_base.DbTestCase): autospec=True) def test_do_next_clean_step_with_children( self, mock_deploy, mock_mgmt, mock_power, mock_pv, mock_nv, - mock_sps, mock_reboot): + mock_sps, mock_reboot, mock_gps): child_node1 = obj_utils.create_test_node( self.context, uuid=uuidutils.generate_uuid(), @@ -1348,7 +1350,12 @@ class DoNodeCleanTestChildNodes(db_base.DbTestCase): last_error=None, power_state=states.POWER_OFF, parent_node=self.node.uuid) - + mock_gps.side_effect = [states.POWER_OFF, + states.POWER_ON, + states.POWER_OFF, + states.POWER_ON, + states.POWER_OFF, + states.POWER_ON] mock_deploy.return_value = None mock_mgmt.return_value = None mock_power.return_value = None @@ -1380,13 +1387,15 @@ class DoNodeCleanTestChildNodes(db_base.DbTestCase): self.assertEqual(0, mock_power.call_count) self.assertEqual(0, mock_nv.call_count) self.assertEqual(0, mock_pv.call_count) - self.assertEqual(4, mock_sps.call_count) + self.assertEqual(3, mock_sps.call_count) self.assertEqual(2, mock_reboot.call_count) mock_sps.assert_has_calls([ - mock.call(mock.ANY, mock.ANY, 'power off', timeout=None), + mock.call(mock.ANY, mock.ANY, 'power on', timeout=None), mock.call(mock.ANY, mock.ANY, 'power on', timeout=None), mock.call(mock.ANY, mock.ANY, 'power on', timeout=None)]) + @mock.patch('ironic.drivers.modules.fake.FakePower.get_power_state', + autospec=True) @mock.patch('ironic.drivers.modules.fake.FakePower.set_power_state', autospec=True) @mock.patch('ironic.drivers.modules.network.flat.FlatNetwork.validate', @@ -1401,7 +1410,7 @@ class DoNodeCleanTestChildNodes(db_base.DbTestCase): autospec=True) def test_do_next_clean_step_with_children_by_uuid( self, mock_deploy, mock_mgmt, mock_power, mock_pv, mock_nv, - mock_sps): + mock_sps, mock_gps): child_node1 = obj_utils.create_test_node( self.context, uuid=uuidutils.generate_uuid(), @@ -1414,6 +1423,10 @@ class DoNodeCleanTestChildNodes(db_base.DbTestCase): driver='fake-hardware', last_error=None, parent_node=self.node.uuid) + # NOTE(TheJulia): This sets the stage for the test, + # child node power is off, parent node power is on. + mock_gps.side_effect = [states.POWER_OFF, + states.POWER_ON] power_on_children = { 'step': 'power_on', 'priority': 5, 'interface': 'power', 'execute_on_child_nodes': True, diff --git a/ironic/tests/unit/conductor/test_servicing.py b/ironic/tests/unit/conductor/test_servicing.py index 4604a9ad82..81e0d5d7e5 100644 --- a/ironic/tests/unit/conductor/test_servicing.py +++ b/ironic/tests/unit/conductor/test_servicing.py @@ -1006,8 +1006,8 @@ class DoNodeServiceAbortTestCase(db_base.DbTestCase): class DoNodeCleanTestChildNodes(db_base.DbTestCase): def setUp(self): super(DoNodeCleanTestChildNodes, self).setUp() - self.power_off_parent = { - 'step': 'power_off', 'priority': 4, 'interface': 'power'} + self.power_on_parent = { + 'step': 'power_on', 'priority': 4, 'interface': 'power'} self.power_on_children = { 'step': 'power_on', 'priority': 5, 'interface': 'power', 'execute_on_child_nodes': True} @@ -1017,14 +1017,15 @@ class DoNodeCleanTestChildNodes(db_base.DbTestCase): self.reboot_children = { 'step': 'reboot', 'priority': 5, 'interface': 'power', 'execute_on_child_nodes': True} - self.power_on_parent = { - 'step': 'power_on', 'priority': 15, 'interface': 'power'} + self.power_off_children = { + 'step': 'power_off', 'priority': 15, 'interface': 'power', + 'execute_on_child_nodes': True} self.service_steps = [ - self.power_off_parent, + self.power_on_parent, self.power_on_children, self.update_firmware_on_children, self.reboot_children, - self.power_on_parent] + self.power_off_children] self.node = obj_utils.create_test_node( self.context, driver='fake-hardware', provision_state=states.SERVICING, @@ -1034,6 +1035,8 @@ class DoNodeCleanTestChildNodes(db_base.DbTestCase): driver_internal_info={'agent_secret_token': 'old', 'service_steps': self.service_steps}) + @mock.patch('ironic.drivers.modules.fake.FakePower.get_power_state', + autospec=True) @mock.patch('ironic.drivers.modules.fake.FakePower.reboot', autospec=True) @mock.patch('ironic.drivers.modules.fake.FakePower.set_power_state', @@ -1050,7 +1053,7 @@ class DoNodeCleanTestChildNodes(db_base.DbTestCase): autospec=True) def test_do_next_clean_step_with_children( self, mock_deploy, mock_mgmt, mock_power, mock_pv, mock_nv, - mock_sps, mock_reboot): + mock_sps, mock_reboot, mock_gps): child_node1 = obj_utils.create_test_node( self.context, uuid=uuidutils.generate_uuid(), @@ -1066,6 +1069,13 @@ class DoNodeCleanTestChildNodes(db_base.DbTestCase): power_state=states.POWER_OFF, parent_node=self.node.uuid) + mock_gps.side_effect = [ + states.POWER_OFF, + states.POWER_ON, + states.POWER_OFF, + states.POWER_ON, + states.POWER_OFF, + states.POWER_ON] mock_deploy.return_value = None mock_mgmt.return_value = None mock_power.return_value = None @@ -1097,12 +1107,12 @@ class DoNodeCleanTestChildNodes(db_base.DbTestCase): self.assertEqual(0, mock_power.call_count) self.assertEqual(0, mock_nv.call_count) self.assertEqual(0, mock_pv.call_count) - self.assertEqual(4, mock_sps.call_count) + self.assertEqual(3, mock_sps.call_count) self.assertEqual(2, mock_reboot.call_count) mock_sps.assert_has_calls([ - mock.call(mock.ANY, mock.ANY, 'power off', timeout=None), mock.call(mock.ANY, mock.ANY, 'power on', timeout=None), - mock.call(mock.ANY, mock.ANY, 'power on', timeout=None)]) + mock.call(mock.ANY, mock.ANY, 'power on', timeout=None), + mock.call(mock.ANY, mock.ANY, 'power off', timeout=None)]) @mock.patch('ironic.drivers.modules.fake.FakePower.set_power_state', autospec=True) diff --git a/ironic/tests/unit/conductor/test_utils.py b/ironic/tests/unit/conductor/test_utils.py index 798a29bc66..2cd80b591a 100644 --- a/ironic/tests/unit/conductor/test_utils.py +++ b/ironic/tests/unit/conductor/test_utils.py @@ -774,6 +774,238 @@ class NodePowerActionTestCase(db_base.DbTestCase): self.assertIsNone(node['target_power_state']) self.assertIsNone(node['last_error']) + @mock.patch.object(fake.FakePower, 'get_power_state', autospec=True) + def test_node_power_action_power_on_with_parent(self, get_power_mock): + """Test node_power_action to turns on a parent node""" + parent = obj_utils.create_test_node(self.context, + uuid=uuidutils.generate_uuid(), + driver='fake-hardware', + power_state=states.POWER_OFF) + + node = obj_utils.create_test_node(self.context, + uuid=uuidutils.generate_uuid(), + driver='fake-hardware', + power_state=states.POWER_OFF, + last_error='failed before', + parent_node=parent.uuid) + task = task_manager.TaskManager(self.context, node.uuid) + + get_power_mock.side_effect = states.POWER_OFF + + conductor_utils.node_power_action(task, states.POWER_ON) + + node.refresh() + parent.refresh() + # NOTE(TheJulia): We independently check power state + # in this code path so we end up with 3 calls, parent, + # parent as part of node_power_action, and then child. + self.assertEqual(3, get_power_mock.call_count) + self.assertEqual(states.POWER_ON, parent['power_state']) + self.assertEqual(states.POWER_ON, node['power_state']) + self.assertIsNone(parent['target_power_state']) + self.assertIsNone(parent['last_error']) + self.assertIsNone(node['target_power_state']) + self.assertIsNone(node['last_error']) + + @mock.patch.object(fake.FakePower, 'get_power_state', autospec=True) + def test_node_power_action_power_on_parent_off(self, get_power_mock): + """Test node_power_action to turns on a parent node""" + parent = obj_utils.create_test_node( + self.context, + name="parent_node", + uuid=uuidutils.generate_uuid(), + driver='fake-hardware', + power_state=states.POWER_OFF) + + node = obj_utils.create_test_node( + self.context, + name='child_node', + uuid=uuidutils.generate_uuid(), + driver='fake-hardware', + power_state=states.POWER_OFF, + last_error='failed before', + parent_node=parent.uuid, + driver_info={'has_dedicated_power_supply': True}) + + task = task_manager.TaskManager(self.context, node.uuid) + + get_power_mock.side_effect = states.POWER_OFF + + conductor_utils.node_power_action(task, states.POWER_ON) + + node.refresh() + parent.refresh() + self.assertEqual(1, get_power_mock.call_count) + self.assertEqual(states.POWER_OFF, parent['power_state']) + self.assertEqual(states.POWER_ON, node['power_state']) + self.assertIsNone(parent['target_power_state']) + self.assertIsNone(parent['last_error']) + self.assertIsNone(node['target_power_state']) + self.assertIsNone(node['last_error']) + + @mock.patch.object(fake.FakePower, 'get_power_state', autospec=True) + def test_node_power_action_power_off_parent(self, get_power_mock): + """Test node_power_action to turns on a parent node""" + parent = obj_utils.create_test_node( + self.context, + name="parent", + uuid=uuidutils.generate_uuid(), + driver='fake-hardware', + power_state=states.POWER_ON) + + node = obj_utils.create_test_node( + self.context, + name="child", + uuid=uuidutils.generate_uuid(), + driver='fake-hardware', + power_state=states.POWER_ON, + last_error='failed before', + parent_node=parent.uuid, + driver_info={'has_dedicated_power_supply': False}) + + task = task_manager.TaskManager(self.context, parent.uuid) + + get_power_mock.side_effect = states.POWER_ON + + conductor_utils.node_power_action(task, states.POWER_OFF) + + node.refresh() + parent.refresh() + self.assertEqual(3, get_power_mock.call_count) + self.assertEqual(states.POWER_OFF, parent['power_state']) + self.assertEqual(states.POWER_OFF, node['power_state']) + self.assertIsNone(parent['target_power_state']) + self.assertIsNone(parent['last_error']) + self.assertIsNone(node['target_power_state']) + self.assertIsNone(node['last_error']) + + @mock.patch.object(fake.FakePower, 'get_power_state', autospec=True) + def test_node_power_action_power_off_parent_child_remains( + self, get_power_mock): + """Test node_power_action to turns on a parent node""" + parent = obj_utils.create_test_node( + self.context, + name="parent", + uuid=uuidutils.generate_uuid(), + driver='fake-hardware', + power_state=states.POWER_ON) + + node = obj_utils.create_test_node( + self.context, + name="child", + uuid=uuidutils.generate_uuid(), + driver='fake-hardware', + power_state=states.POWER_ON, + last_error='failed before', + parent_node=parent.uuid, + driver_info={'has_dedicated_power_supply': True}) + + task = task_manager.TaskManager(self.context, parent.uuid) + + get_power_mock.side_effect = states.POWER_ON + + conductor_utils.node_power_action(task, states.POWER_OFF) + + node.refresh() + parent.refresh() + self.assertEqual(1, get_power_mock.call_count) + self.assertEqual(states.POWER_OFF, parent['power_state']) + self.assertEqual(states.POWER_ON, node['power_state']) + self.assertIsNone(parent['target_power_state']) + self.assertIsNone(parent['last_error']) + self.assertIsNone(node['target_power_state']) + self.assertIsNotNone(node['last_error']) + + @mock.patch.object(fake.FakePower, 'get_power_state', autospec=True) + def test_node_power_action_power_on_exception_if_parent_locked( + self, + get_power_mock): + """Test node_power_action to turns on a parent node""" + parent = obj_utils.create_test_node( + self.context, + uuid=uuidutils.generate_uuid(), + driver='fake-hardware', + power_state=states.POWER_OFF, + reservation='meow') + + node = obj_utils.create_test_node( + self.context, + uuid=uuidutils.generate_uuid(), + driver='fake-hardware', + power_state=states.POWER_OFF, + last_error='failed before', + parent_node=parent.uuid) + + task = task_manager.TaskManager(self.context, node.uuid) + + get_power_mock.side_effect = states.POWER_OFF + + self.assertRaises(exception.ParentNodeLocked, + conductor_utils.node_power_action, + task, states.POWER_ON) + node.refresh() + parent.refresh() + self.assertEqual(2, get_power_mock.call_count) + self.assertEqual(states.POWER_OFF, parent['power_state']) + self.assertEqual(states.POWER_OFF, node['power_state']) + self.assertIsNone(parent['target_power_state']) + self.assertIsNone(parent['last_error']) + self.assertIn('is presently locked', node['last_error']) + self.assertIsNone(node['target_power_state']) + + @mock.patch.object(fake.FakePower, 'get_power_state', autospec=True) + def test_node_power_action_power_off_exception_if_child_locked( + self, + get_power_mock): + """Test node_power_action to turns on a parent node""" + parent = obj_utils.create_test_node( + self.context, + uuid=uuidutils.generate_uuid(), + driver='fake-hardware', + power_state=states.POWER_ON) + + node = obj_utils.create_test_node( + self.context, + uuid=uuidutils.generate_uuid(), + driver='fake-hardware', + power_state=states.POWER_ON, + last_error='failed before', + parent_node=parent.uuid) + + locked_node = obj_utils.create_test_node( + self.context, + uuid=uuidutils.generate_uuid(), + driver='fake-hardware', + power_state=states.POWER_ON, + last_error='failed before', + parent_node=parent.uuid, + reservation='foo') + + task = task_manager.TaskManager(self.context, parent.uuid) + + get_power_mock.side_effect = states.POWER_ON + + self.assertRaises(exception.ChildNodeLocked, + conductor_utils.node_power_action, + task, states.POWER_OFF) + # Since we launched the task this way, we need to explicitly + # release it. + task.release_resources() + node.refresh() + parent.refresh() + self.assertEqual(3, get_power_mock.call_count) + self.assertEqual(states.POWER_ON, parent['power_state']) + self.assertEqual(states.POWER_ON, locked_node['power_state']) + self.assertEqual(states.POWER_ON, node['power_state']) + self.assertIsNone(parent['target_power_state']) + self.assertEqual('failed before', node['last_error']) + self.assertIn('is presently locked', parent['last_error']) + self.assertIsNone(node['target_power_state']) + self.assertIsNone(parent['target_power_state']) + self.assertIsNone(locked_node['target_power_state']) + self.assertIsNone(node['reservation']) + self.assertIsNone(parent['reservation']) + def test__calculate_target_state(self): for new_state in (states.POWER_ON, states.REBOOT, states.SOFT_REBOOT): self.assertEqual( diff --git a/releasenotes/notes/handle-power-ops-for-child-nodes-67a11f1900ce137a.yaml b/releasenotes/notes/handle-power-ops-for-child-nodes-67a11f1900ce137a.yaml new file mode 100644 index 0000000000..6ef2921a63 --- /dev/null +++ b/releasenotes/notes/handle-power-ops-for-child-nodes-67a11f1900ce137a.yaml @@ -0,0 +1,11 @@ +--- +fixes: + - | + Fixes the power handling flow as it relates to ``child nodes``, i.e. + bare metal nodes which have a ``parent_node`` set, such that power is + turned off on those nodes when the parent node is powered off, and that + power is turned on for the parent node when the child node is explicitly + requested to be in a ``power on`` state. This does not apply if the child + node device has a dedicated power supply, as indicated through a + ``driver_info`` parameter named ``has_dedicated_power_supply`` which + can be set to a value of "true".