diff --git a/doc/source/admin/cleaning.rst b/doc/source/admin/cleaning.rst index d800be5014..a6f195fc1f 100644 --- a/doc/source/admin/cleaning.rst +++ b/doc/source/admin/cleaning.rst @@ -395,6 +395,55 @@ interrupted, such as BIOS or Firmware updates. As a result, operators are forbidden from changing power state via the ironic API while a node is cleaning. +Advanced topics +=============== + +Parent Nodes +------------ + +The concept of a ``parent_node`` is where a node is configured to have a +"parent", and allows for actions upon the parent, to in some cases take into +account child nodes. Mainly, the concept of executing clean steps in relation +to child nodes. + +In this context, a child node is primarily intended to be an embedded device +with it's own management controller. For example "SmartNIC's" or Data +Processing Units (DPUs) which may have their own management controller and +power control. + +Child Node Clean Step Execution +------------------------------- + +You can execute steps which perform actions on child nodes. For example, +turn them on (via step ``power_on``), off (via step ``power_off``), or to +signal a BMC controlled reboot (via step ``reboot``). + +For example, if you need to explicitly power off child node power, before +performing another step, you can articulate it with a step such as:: + + [{ + "interface": "power", + "step": "power_off", + "execute_on_child_nodes": True, + "limit_child_node_execution": ['f96c8601-0a62-4e99-97d6-1e0d8daf6dce'] + }, + { + "interface": "deploy", + "step": "erase_devices" + }] + +As one would imagine, this step will power off a singular child node, as +a limit has been expressed to a singular known node, and that child node's +power will be turned off via the management interface. Afterwards, the +``erase_devices`` step will be executed on the parent node. + +.. NOTE:: + While the deployment step framework also supports the + ``execute_on_child_nodes`` and ``limit_child_node_execution`` parameters, + all of the step frameworks have a fundamental limitation in that child node + step execution is indended for syncronous actions which do not rely upon + the ``ironic-python-agent`` running on any child nodes. This constraint may + be changed in the future. Troubleshooting =============== diff --git a/ironic/api/controllers/v1/node.py b/ironic/api/controllers/v1/node.py index 7c6d1745e0..1a779b8384 100644 --- a/ironic/api/controllers/v1/node.py +++ b/ironic/api/controllers/v1/node.py @@ -80,6 +80,16 @@ _CLEAN_STEPS_SCHEMA = { "type": "object", "properties": {} }, + "execute_on_child_nodes": { + "description": "Boolean if the step should be executed " + "on child nodes.", + "type": "boolean", + }, + "limit_child_node_execution": { + "description": "List of nodes upon which to execute child " + "node steps on.", + "type": "array", + } }, # interface, step and args are the only expected keys "additionalProperties": False @@ -1166,6 +1176,25 @@ def _check_steps(steps, step_type, schema): except jsonschema.ValidationError as exc: raise exception.InvalidParameterValue(_('Invalid %s_steps: %s') % (step_type, exc)) + for step in steps: + eocn = step.get('execute_on_child_nodes') + child_nodes = step.get('limit_child_node_execution') + if eocn and type(child_nodes) == list: + # Extract each element, validate permission to access node. + for entry in child_nodes: + if not uuidutils.is_uuid_like(entry): + raise exception.InvalidParameterValue(_( + 'Invalid entry detected for execute_on_child_node ' + 'parameter in step %s.' % entry.get('step'))) + api_utils.check_node_policy_and_retrieve( + 'baremetal:node:set_provision_state', entry) + elif eocn: + # Validate looks like a bool, if not raise exception + if not strutils.is_valid_boolstr(eocn): + raise exception.InvalidParameterValue(_( + 'Invalid entry detected for execute_on_child_node ' + 'parameter in step %s. A boolean or a UUID value is ' + 'expected.' % step.get('step'))) def _get_chassis_uuid(node): diff --git a/ironic/conductor/cleaning.py b/ironic/conductor/cleaning.py index 9e4edb809a..1472151a79 100644 --- a/ironic/conductor/cleaning.py +++ b/ironic/conductor/cleaning.py @@ -22,6 +22,7 @@ from ironic.conductor import task_manager from ironic.conductor import utils from ironic.conf import CONF from ironic.drivers import utils as driver_utils +from ironic import objects LOG = log.getLogger(__name__) @@ -166,11 +167,21 @@ def do_next_clean_step(task, step_index, disable_ramdisk=None): node.clean_step = step node.set_driver_internal_info('clean_step_index', step_index + ind) node.save() - interface = getattr(task.driver, step.get('interface')) - LOG.info('Executing %(step)s on node %(node)s', - {'step': step, 'node': node.uuid}) + eocn = step.get('execute_on_child_nodes', False) + result = None try: - result = interface.execute_clean_step(task, step) + if not eocn: + interface = getattr(task.driver, step.get('interface')) + LOG.info('Executing %(step)s on node %(node)s', + {'step': step, 'node': node.uuid}) + if not conductor_steps.use_reserved_step_handler(task, step): + result = interface.execute_clean_step(task, step) + else: + LOG.info('Executing %(step)s on child nodes for node ' + '%(node)s.', + {'step': step, 'node': node.uuid}) + result = execute_step_on_child_nodes(task, step) + except Exception as e: if isinstance(e, exception.AgentConnectionFailed): if task.node.driver_internal_info.get('cleaning_reboot'): @@ -241,13 +252,69 @@ def do_next_clean_step(task, step_index, disable_ramdisk=None): return utils.cleaning_error_handler(task, msg, traceback=True, tear_down_cleaning=False) - LOG.info('Node %s cleaning complete', node.uuid) event = 'manage' if manual_clean or node.retired else 'done' # NOTE(rloo): No need to specify target prov. state; we're done task.process_event(event) +def execute_step_on_child_nodes(task, step): + """Execute a requested step against a child node. + + :param task: The TaskManager object for the parent node. + :param step: The requested step to be executed. + :returns: None on Success, the resulting error message if a + failure has occured. + """ + # NOTE(TheJulia): We could just use nodeinfo list calls against + # dbapi. + # NOTE(TheJulia): We validate the data in advance in the API + # with the original request context. + eocn = step.get('execute_on_child_nodes') + child_nodes = step.get('limit_child_node_execution', []) + filters = {'parent_node': task.node.uuid} + if eocn and len(child_nodes) >= 1: + filters['uuid_in'] = child_nodes + child_nodes = objects.Node.list( + task.context, + filters=filters, + fields=['uuid'] + ) + for child_node in child_nodes: + result = None + LOG.info('Executing step %(step)s on child node %(node)s for parent ' + 'node %(parent_node)s', + {'step': step, + 'node': child_node.uuid, + 'parent_node': task.node.uuid}) + with task_manager.acquire(task.context, + child_node.uuid, + purpose='execute step') as child_task: + interface = getattr(child_task.driver, step.get('interface')) + LOG.info('Executing %(step)s on node %(node)s', + {'step': step, 'node': child_task.node.uuid}) + if not conductor_steps.use_reserved_step_handler(child_task, step): + result = interface.execute_clean_step(child_task, step) + if result is not None: + if (result == states.CLEANWAIT + and CONF.conductor.permit_child_node_step_async_result): + # Operator has chosen to permit this due to some reason + # NOTE(TheJulia): This is where we would likely wire agent + # error handling if we ever implicitly allowed child node + # deploys to take place with the agent from a parent node + # being deployed. + continue + msg = (_('While executing step %(step)s on child node ' + '%(node)s, step returned invalid value: %(val)s') + % {'step': step, 'node': child_task.node.uuid, + 'val': result}) + LOG.error(msg) + # Only None or states.CLEANWAIT are possible paths forward + # in the parent step execution code, so returning the message + # means it will be logged. + return msg + + def get_last_error(node): last_error = _('By request, the clean operation was aborted') if node.clean_step: diff --git a/ironic/conductor/deployments.py b/ironic/conductor/deployments.py index 7e2c4e489a..3ffb54c39f 100644 --- a/ironic/conductor/deployments.py +++ b/ironic/conductor/deployments.py @@ -30,6 +30,7 @@ from ironic.conductor import task_manager from ironic.conductor import utils from ironic.conf import CONF from ironic.objects import fields +from ironic.objects import Node LOG = log.getLogger(__name__) @@ -269,11 +270,21 @@ def do_next_deploy_step(task, step_index): node.deploy_step = step node.set_driver_internal_info('deploy_step_index', idx) node.save() - interface = getattr(task.driver, step.get('interface')) - LOG.info('Executing %(step)s on node %(node)s', - {'step': step, 'node': node.uuid}) + + child_node_execution = step.get('execute_on_child_nodes', False) + result = None try: - result = interface.execute_deploy_step(task, step) + if not child_node_execution: + interface = getattr(task.driver, step.get('interface')) + LOG.info('Executing %(step)s on node %(node)s', + {'step': step, 'node': node.uuid}) + if not conductor_steps.use_reserved_step_handler(task, step): + result = interface.execute_deploy_step(task, step) + else: + LOG.info('Executing %(step)s on child nodes for node ' + '%(node)s', + {'step': step, 'node': node.uuid}) + result = execute_step_on_child_nodes(task, step) except exception.AgentInProgress as e: LOG.info('Conductor attempted to process deploy step for ' 'node %(node)s. Agent indicated it is presently ' @@ -490,3 +501,61 @@ def _start_console_in_deploy(task): else: notify_utils.emit_console_notification( task, 'console_restore', fields.NotificationStatus.END) + + +def execute_step_on_child_nodes(task, step): + """Execute a requested step against a child node. + + :param task: The TaskManager object for the parent node. + :param step: The requested step to be executed. + :returns: None on Success, the resulting error message if a + failure has occured. + """ + # NOTE(TheJulia): We could just use nodeinfo list calls against + # dbapi. + # NOTE(TheJulia): We validate the data in advance in the API + # with the original request context. + eocn = step.get('execute_on_child_nodes') + child_nodes = step.get('limit_child_node_execution', []) + filters = {'parent_node': task.node.uuid} + if eocn and len(child_nodes) >= 1: + filters['uuid_in'] = child_nodes + + child_nodes = Node.list( + task.context, + filters=filters, + fields=['uuid'] + ) + for child_node in child_nodes: + result = None + LOG.info('Executing step %(step)s on child node %(node)s for parent ' + 'node %(parent_node)s', + {'step': step, + 'node': child_node.uuid, + 'parent_node': task.node.uuid}) + with task_manager.acquire(task.context, + child_node.uuid, + purpose='execute step') as child_task: + interface = getattr(child_task.driver, step.get('interface')) + LOG.info('Executing %(step)s on node %(node)s', + {'step': step, 'node': child_task.node.uuid}) + if not conductor_steps.use_reserved_step_handler(child_task, step): + result = interface.execute_clean_step(child_task, step) + if result is not None: + if (result == states.DEPLOYWAIT + and CONF.conductor.permit_child_node_step_async_result): + # Operator has chosen to permit this due to some reason + # NOTE(TheJulia): This is where we would likely wire agent + # error handling if we ever implicitly allowed child node + # deploys to take place with the agent from a parent node + # being deployed. + continue + msg = (_('While executing step %(step)s on child node ' + '%(node)s, step returned invalid value: %(val)s') + % {'step': step, 'node': child_task.node.uuid, + 'val': result}) + LOG.error(msg) + # Only None or states.DEPLOYWAIT are possible paths forward + # in the parent step execution code, so returning the message + # means it will be logged. + return msg diff --git a/ironic/conductor/steps.py b/ironic/conductor/steps.py index 5a0fdd7b49..59429bb8a1 100644 --- a/ironic/conductor/steps.py +++ b/ironic/conductor/steps.py @@ -18,6 +18,7 @@ from oslo_log import log from ironic.common import exception from ironic.common.i18n import _ from ironic.common import states +from ironic.conductor import utils from ironic.objects import deploy_template LOG = log.getLogger(__name__) @@ -69,6 +70,16 @@ VERIFYING_INTERFACE_PRIORITY = { 'rescue': 1, } +# Reserved step names to to map to methods which need to be +# called, where node conductor logic wraps a driver's internal +# logic. Example, removing tokens based upon state before +# rebooting the node. +RESERVED_STEP_HANDLER_MAPPING = { + 'power_on': [utils.node_power_action, states.POWER_ON], + 'power_off': [utils.node_power_action, states.POWER_OFF], + 'reboot': [utils.node_power_action, states.REBOOT], +} + def _clean_step_key(step): """Sort by priority, then interface priority in event of tie. @@ -646,6 +657,15 @@ def _validate_user_steps(task, user_steps, driver_steps, step_type, result = [] for user_step in user_steps: + if user_step.get('execute_on_child_nodes'): + # NOTE(TheJulia): This input is validated on the API side + # as we have the original API request context to leverage + # for RBAC validation. + continue + if user_step.get('step') in ['power_on', 'power_off', 'reboot']: + # NOTE(TheJulia): These are flow related steps the conductor + # resolves internally. + continue # Check if this user-specified step isn't supported by the driver try: driver_step = driver_steps[step_id(user_step)] @@ -788,3 +808,19 @@ def validate_user_deploy_steps_and_templates(task, deploy_steps=None, _get_validated_steps_from_templates(task, skip_missing=skip_missing) # Validate steps from passed argument or stored on the node. _get_validated_user_deploy_steps(task, deploy_steps, skip_missing) + + +def use_reserved_step_handler(task, step): + """Returns True if reserved step execution is used, otherwise False. + + :param task: a TaskManager object. + :param step: The requested step. + """ + step_name = step.get('step') + if step_name and step_name in RESERVED_STEP_HANDLER_MAPPING.keys(): + call_to_use = RESERVED_STEP_HANDLER_MAPPING[step_name] + method = call_to_use[0] + parameter = call_to_use[1] + method(task, parameter) + return True + return False diff --git a/ironic/conf/conductor.py b/ironic/conf/conductor.py index 2452fafe78..4661aafc0f 100644 --- a/ironic/conf/conductor.py +++ b/ironic/conf/conductor.py @@ -349,7 +349,6 @@ opts = [ 'is a global setting applying to all requests this ' 'conductor receives, regardless of access rights. ' 'The concurrent clean limit cannot be disabled.')), - cfg.BoolOpt('poweroff_in_cleanfail', default=False, help=_('If True power off nodes in the ``clean failed`` ' @@ -357,6 +356,16 @@ opts = [ 'when using Cleaning to perform ' 'hardware-transformative actions such as ' 'firmware upgrade.')), + cfg.BoolOpt('permit_child_node_step_async_result', + default=False, + mutable=True, + help=_('This option allows child node steps to not error if ' + 'the resulting step execution returned a "wait" ' + 'state. Under normal conditions, child nodes are not ' + 'expected to request a wait state. This option exists ' + 'for operators to use if needed to perform specific ' + 'tasks where this is known acceptable. Use at your' + 'own risk!')), ] diff --git a/ironic/tests/unit/api/controllers/v1/test_node.py b/ironic/tests/unit/api/controllers/v1/test_node.py index c21fd42b0f..b7ff02be0f 100644 --- a/ironic/tests/unit/api/controllers/v1/test_node.py +++ b/ironic/tests/unit/api/controllers/v1/test_node.py @@ -49,9 +49,9 @@ from ironic.drivers.modules import inspect_utils from ironic import objects from ironic.objects import fields as obj_fields from ironic import tests as tests_root -from ironic.tests import base from ironic.tests.unit.api import base as test_api_base from ironic.tests.unit.api import utils as test_api_utils +from ironic.tests.unit.db import base as db_base from ironic.tests.unit.objects import utils as obj_utils @@ -6826,7 +6826,7 @@ ORHMKeXMO8fcK0By7CiMKwHSXCoEQgfQhWwpMdSsO8LgHCjh87DQc= """ self.assertEqual(http_client.BAD_REQUEST, ret.status_code) -class TestCheckCleanSteps(base.TestCase): +class TestCheckCleanSteps(db_base.DbTestCase): def test__check_clean_steps_not_list(self): clean_steps = {"step": "upgrade_firmware", "interface": "deploy"} self.assertRaisesRegex(exception.InvalidParameterValue, @@ -6894,6 +6894,55 @@ class TestCheckCleanSteps(base.TestCase): step2 = {"step": "configure raid", "interface": "raid"} api_node._check_clean_steps([step1, step2]) + @mock.patch.object(api_utils, 'check_node_policy_and_retrieve', + autospec=True) + def test__check_clean_steps_child_node(self, mock_policy): + node = obj_utils.create_test_node( + self.context, + provision_state=states.AVAILABLE, + name='node-1337') + obj_utils.create_test_node( + self.context, + provision_state=states.AVAILABLE, + name='node-n071337', + parent_node=node.uuid, + uuid=uuidutils.generate_uuid()) + clean_steps = [{"step": "upgrade_firmware", "interface": "deploy", + "execute_on_child_nodes": True}] + api_node._check_clean_steps(clean_steps) + mock_policy.assert_not_called() + + @mock.patch.object(api_utils, 'check_node_policy_and_retrieve', + autospec=True) + def test__check_clean_steps_child_node_list(self, mock_policy): + node = obj_utils.create_test_node( + self.context, + provision_state=states.AVAILABLE, + name='node-1337') + child_node_1 = obj_utils.create_test_node( + self.context, + provision_state=states.AVAILABLE, + name='node-n071337', + parent_node=node.uuid, + uuid=uuidutils.generate_uuid()) + child_node_2 = obj_utils.create_test_node( + self.context, + provision_state=states.AVAILABLE, + name='node-1337-card1', + parent_node=node.uuid, + uuid=uuidutils.generate_uuid()) + + clean_steps = [{"step": "upgrade_firmware", "interface": "deploy", + "execute_on_child_nodes": True, + "limit_child_node_execution": [child_node_1.uuid, + child_node_2.uuid]}] + api_node._check_clean_steps(clean_steps) + mock_policy.assert_has_calls([ + mock.call('baremetal:node:set_provision_state', + child_node_1.uuid), + mock.call('baremetal:node:set_provision_state', + child_node_2.uuid)]) + class TestAttachDetachVif(test_api_base.BaseApiTest): diff --git a/ironic/tests/unit/conductor/test_cleaning.py b/ironic/tests/unit/conductor/test_cleaning.py index cdfbf14ee6..6b3451fef6 100644 --- a/ironic/tests/unit/conductor/test_cleaning.py +++ b/ironic/tests/unit/conductor/test_cleaning.py @@ -1227,3 +1227,185 @@ class DoNodeCleanAbortTestCase(db_base.DbTestCase): self.assertIsNotNone(task.node.maintenance_reason) self.assertTrue(task.node.maintenance) self.assertEqual('clean failure', task.node.fault) + + +class DoNodeCleanTestChildNodes(db_base.DbTestCase): + def setUp(self): + super(DoNodeCleanTestChildNodes, self).setUp() + self.config(automated_clean=True, group='conductor') + self.power_off_parent = { + 'step': 'power_off', 'priority': 4, 'interface': 'power'} + self.power_on_children = { + 'step': 'power_on', 'priority': 5, 'interface': 'power', + 'execute_on_child_nodes': True} + self.update_firmware_on_children = { + 'step': 'update_firmware', 'priority': 10, + 'interface': 'management', 'execute_on_child_nodes': True} + self.reboot_children = { + 'step': 'reboot', 'priority': 5, 'interface': 'power', + 'execute_on_child_nodes': True} + self.power_on_parent = { + 'step': 'power_on', 'priority': 15, 'interface': 'power'} + self.clean_steps = [ + self.power_off_parent, + self.power_on_children, + self.update_firmware_on_children, + self.reboot_children, + self.power_on_parent] + self.node = obj_utils.create_test_node( + self.context, driver='fake-hardware', + provision_state=states.CLEANING, + target_provision_state=states.MANAGEABLE, + last_error=None, + power_state=states.POWER_ON, + driver_internal_info={'agent_secret_token': 'old', + 'clean_steps': self.clean_steps}) + + @mock.patch('ironic.drivers.modules.fake.FakePower.reboot', + autospec=True) + @mock.patch('ironic.drivers.modules.fake.FakePower.set_power_state', + autospec=True) + @mock.patch('ironic.drivers.modules.network.flat.FlatNetwork.validate', + autospec=True) + @mock.patch('ironic.drivers.modules.fake.FakePower.validate', + autospec=True) + @mock.patch('ironic.drivers.modules.fake.FakePower.execute_clean_step', + autospec=True) + @mock.patch('ironic.drivers.modules.fake.FakeManagement.' + 'execute_clean_step', autospec=True) + @mock.patch('ironic.drivers.modules.fake.FakeDeploy.execute_clean_step', + autospec=True) + def test_do_next_clean_step_with_children( + self, mock_deploy, mock_mgmt, mock_power, mock_pv, mock_nv, + mock_sps, mock_reboot): + child_node1 = obj_utils.create_test_node( + self.context, + uuid=uuidutils.generate_uuid(), + driver='fake-hardware', + last_error=None, + power_state=states.POWER_OFF, + parent_node=self.node.uuid) + child_node2 = obj_utils.create_test_node( + self.context, + uuid=uuidutils.generate_uuid(), + driver='fake-hardware', + last_error=None, + power_state=states.POWER_OFF, + parent_node=self.node.uuid) + + mock_deploy.return_value = None + mock_mgmt.return_value = None + mock_power.return_value = None + child1_updated_at = str(child_node1.updated_at) + child2_updated_at = str(child_node2.updated_at) + with task_manager.acquire( + self.context, self.node.uuid, shared=False) as task: + + cleaning.do_next_clean_step(task, 0, + disable_ramdisk=True) + self.node.refresh() + child_node1.refresh() + child_node2.refresh() + + # Confirm the objects *did* recieve locks. + self.assertNotEqual(child1_updated_at, child_node1.updated_at) + self.assertNotEqual(child2_updated_at, child_node2.updated_at) + + # Confirm the child nodes have no errors + self.assertFalse(child_node1.maintenance) + self.assertFalse(child_node2.maintenance) + self.assertIsNone(child_node1.last_error) + self.assertIsNone(child_node2.last_error) + self.assertIsNone(self.node.last_error) + + # Confirm the call counts expected + self.assertEqual(0, mock_deploy.call_count) + self.assertEqual(2, mock_mgmt.call_count) + self.assertEqual(0, mock_power.call_count) + self.assertEqual(0, mock_nv.call_count) + self.assertEqual(0, mock_pv.call_count) + self.assertEqual(4, mock_sps.call_count) + self.assertEqual(2, mock_reboot.call_count) + mock_sps.assert_has_calls([ + mock.call(mock.ANY, mock.ANY, 'power off', timeout=None), + mock.call(mock.ANY, mock.ANY, 'power on', timeout=None), + mock.call(mock.ANY, mock.ANY, 'power on', timeout=None)]) + + @mock.patch('ironic.drivers.modules.fake.FakePower.set_power_state', + autospec=True) + @mock.patch('ironic.drivers.modules.network.flat.FlatNetwork.validate', + autospec=True) + @mock.patch('ironic.drivers.modules.fake.FakePower.validate', + autospec=True) + @mock.patch('ironic.drivers.modules.fake.FakePower.execute_clean_step', + autospec=True) + @mock.patch('ironic.drivers.modules.fake.FakeManagement.' + 'execute_clean_step', autospec=True) + @mock.patch('ironic.drivers.modules.fake.FakeDeploy.execute_clean_step', + autospec=True) + def test_do_next_clean_step_with_children_by_uuid( + self, mock_deploy, mock_mgmt, mock_power, mock_pv, mock_nv, + mock_sps): + child_node1 = obj_utils.create_test_node( + self.context, + uuid=uuidutils.generate_uuid(), + driver='fake-hardware', + last_error=None, + parent_node=self.node.uuid) + child_node2 = obj_utils.create_test_node( + self.context, + uuid=uuidutils.generate_uuid(), + driver='fake-hardware', + last_error=None, + parent_node=self.node.uuid) + power_on_children = { + 'step': 'power_on', 'priority': 5, 'interface': 'power', + 'execute_on_child_nodes': True, + 'limit_child_node_execution': [child_node1.uuid]} + update_firmware_on_children = { + 'step': 'update_firmware', 'priority': 10, + 'interface': 'management', + 'execute_on_child_nodes': True, + 'limit_child_node_execution': [child_node1.uuid]} + power_on_parent = { + 'step': 'not_power', 'priority': 15, 'interface': 'power'} + clean_steps = [power_on_children, update_firmware_on_children, + power_on_parent] + dii = self.node.driver_internal_info + dii['clean_steps'] = clean_steps + self.node.driver_internal_info = dii + self.node.save() + + mock_deploy.return_value = None + mock_mgmt.return_value = None + mock_power.return_value = None + child1_updated_at = str(child_node1.updated_at) + + with task_manager.acquire( + self.context, self.node.uuid, shared=False) as task: + + cleaning.do_next_clean_step(task, 0, + disable_ramdisk=True) + self.node.refresh() + child_node1.refresh() + child_node2.refresh() + + # Confirm the objects *did* recieve locks. + self.assertNotEqual(child1_updated_at, child_node1.updated_at) + self.assertIsNone(child_node2.updated_at) + + # Confirm the child nodes have no errors + self.assertFalse(child_node1.maintenance) + self.assertFalse(child_node2.maintenance) + self.assertIsNone(child_node1.last_error) + self.assertIsNone(child_node2.last_error) + self.assertIsNone(self.node.last_error) + + # Confirm the call counts expected + self.assertEqual(0, mock_deploy.call_count) + self.assertEqual(1, mock_mgmt.call_count) + self.assertEqual(1, mock_power.call_count) + self.assertEqual(0, mock_nv.call_count) + self.assertEqual(0, mock_pv.call_count) + mock_sps.assert_has_calls([ + mock.call(mock.ANY, mock.ANY, 'power on', timeout=None)]) diff --git a/ironic/tests/unit/conductor/test_steps.py b/ironic/tests/unit/conductor/test_steps.py index d96670303d..2aae428688 100644 --- a/ironic/tests/unit/conductor/test_steps.py +++ b/ironic/tests/unit/conductor/test_steps.py @@ -19,6 +19,7 @@ from ironic.common import exception from ironic.common import states from ironic.conductor import steps as conductor_steps from ironic.conductor import task_manager +from ironic.conductor import utils as conductor_utils from ironic.conductor import verify as verify_steps from ironic import objects from ironic.tests.unit.db import base as db_base @@ -971,6 +972,19 @@ class NodeCleaningStepsTestCase(db_base.DbTestCase): 'priority': 20, 'abortable': True}] self.assertEqual(expected, result) + @mock.patch.object(conductor_steps, '_validate_user_step', + autospec=True) + def test__validate_user_clean_steps_reserved_options(self, mock_steps): + node = obj_utils.create_test_node(self.context) + user_steps = [{'step': 'magic', 'execute_on_child_nodes': True}, + {'step': 'power_on', 'interface': 'power'}, + {'step': 'power_off', 'interface': 'power'}, + {'step': 'reboot', 'interface': 'power'}] + with task_manager.acquire(self.context, node.uuid) as task: + conductor_steps._validate_user_clean_steps( + task, user_steps, disable_ramdisk=True) + mock_steps.assert_not_called() + @mock.patch.object(conductor_steps, '_get_deployment_templates', autospec=True) @@ -1299,3 +1313,31 @@ class NodeVerifyStepsTestCase(db_base.DbTestCase): verify_steps.do_node_verify(task) node.refresh() self.assertTrue(mock_execute.called) + + +class ReservedStepsHandlerTestCase(db_base.DbTestCase): + def setUp(self): + super(ReservedStepsHandlerTestCase, self).setUp() + + @mock.patch.object(conductor_utils, 'node_power_action', + autospec=True) + def _test_reserved_step(self, step, mock_power_action): + node = obj_utils.create_test_node( + self.context, driver='fake-hardware', + provision_state=states.VERIFYING, + target_provision_state=states.MANAGEABLE, + last_error=None, + clean_step=None) + with task_manager.acquire( + self.context, node.uuid, shared=False) as task: + res = conductor_steps.use_reserved_step_handler(task, step) + self.assertTrue(res) + + def test_reserved_step_power_on(self): + self._test_reserved_step({'step': 'power_on'}) + + def test_reserved_step_power_off(self): + self._test_reserved_step({'step': 'power_off'}) + + def test_reserved_step_power_reboot(self): + self._test_reserved_step({'step': 'reboot'}) diff --git a/releasenotes/notes/add-execute-on-child-node-20910aecb8f8b714.yaml b/releasenotes/notes/add-execute-on-child-node-20910aecb8f8b714.yaml new file mode 100644 index 0000000000..fdadd9ced2 --- /dev/null +++ b/releasenotes/notes/add-execute-on-child-node-20910aecb8f8b714.yaml @@ -0,0 +1,16 @@ +--- +features: + - | + Adds a capability for syncrhonous steps to be executed through the + cleaning and deployment steps framework upon child nodes, as associated + through the ``parent_node`` field. The new, optional step arguments are + a boolean value of ``execute_on_child_nodes``, and + ``limit_child_node_execution`` which consists of a list of node UUIDs. + The ability to invoke this permisison requires the ability to + set a provision state action upon the child node in the RBAC model. + - | + Adds a ``power_on``, ``power_on``, and ``reboot`` reserved step name + actions which toggles power through the conductor. This allows embedded + devices such as child nodes to have power state toggled as part + of the parent node's cleaning or deployment sequnece, if so stated + through the supplied configuration or deployment template.