From 832826f64076fa23e7a1648b0dda6dfb89ab08b3 Mon Sep 17 00:00:00 2001 From: Julia Kreger Date: Tue, 2 Feb 2016 10:44:06 -0500 Subject: [PATCH] Active Node Creation via adopt state At present the ironic API explicitly sets the new state for nodes to the beginning step in the ironic workflow. As part of hardware fleet lifecycle management, an operator expects to be able to migrate inventory and control systems for their hardware fleet utilizing their existing inventory data and allocation records. Ultimately this means that an imported host MAY already be allocated and unavailable for immediate allocation. As such, a mechanism is required to permit users to put nodes into an ACTIVE state without performing a deployment operation. This adds a new API provision_state verb to allow users to move nodes from MANAGEABLE state to ACTIVE state. Partial-Bug: #1526315 Change-Id: Ib3eadf4172e93add9a9855582f56cbb3707f3d39 Depends-On: Ie114bfaab249d73ea3ca7c0edc314ca1ed0448eb --- doc/source/deploy/adoption.rst | 183 +++++++ doc/source/images/states.svg | 470 ++++++++++-------- doc/source/index.rst | 1 + doc/source/webapi/v1.rst | 8 + ironic/api/controllers/v1/node.py | 3 +- ironic/api/controllers/v1/utils.py | 1 + ironic/api/controllers/v1/versions.py | 4 +- ironic/common/states.py | 43 +- ironic/conductor/manager.py | 58 +++ ironic/conductor/utils.py | 19 +- ironic/tests/unit/api/v1/test_nodes.py | 87 ++++ ironic/tests/unit/api/v1/test_utils.py | 11 + ironic/tests/unit/conductor/test_manager.py | 150 ++++++ ironic/tests/unit/conductor/test_utils.py | 17 + ...active-node-creation-a41c9869c966c82b.yaml | 14 + 15 files changed, 846 insertions(+), 223 deletions(-) create mode 100644 doc/source/deploy/adoption.rst create mode 100644 releasenotes/notes/active-node-creation-a41c9869c966c82b.yaml diff --git a/doc/source/deploy/adoption.rst b/doc/source/deploy/adoption.rst new file mode 100644 index 0000000000..34fab413d8 --- /dev/null +++ b/doc/source/deploy/adoption.rst @@ -0,0 +1,183 @@ +.. _adoption: + +============= +Node adoption +============= + +Overview +======== +As part of hardware inventory lifecycle management, it is not an +unreasonable need to have the capability to be able to add hardware +that should be considered "in-use" by the Bare Metal service, +that may have been deployed by another Bare Metal service +installation or deployed via other means. + +As such, the node adoption feature allows a user to define a node +as ``active`` while skipping the ``available`` and ``deploying`` +states, which will prevent the node from being seen by the Compute +service as ready for use. + +This feature is leveraged as part of the state machine workflow, +where a node in ``manageable`` can be moved to ``active`` state +via the provision_state verb ``adopt``. To view the state +transition capabilities, please see :ref:`states`. + +How it works +============ + +A node initially enrolled begins in the ``enroll`` state. An operator +must then move the node to ``manageable`` state, which causes the driver's +``power`` interface to be validated. Once in ``manageable`` state, +an operator can then explicitly choose to adopt a node. + +Adoption of a node results in the validation of the driver ``boot`` interface, +and upon success the process leverages what is referred to as the "takeover" +logic. The takeover process is intended for conductors to take over the +management of nodes for a conductor that has failed. + +The takeover process involves the driver deploy ``prepare`` and ``take_over`` +methods being called. These steps take driver specific actions such as +downloading and staging the deployment kernel and ramdisk, ISO image, any +required boot image, or boot ISO image and then places any PXE or virtual +media configuration necessary for the node should it be required. + +The adoption process makes no changes to the physical node, with the +exception of operator supplied configurations where virtual media is +used to boot the node under normal circumstances. An operator should +ensure that any supplied configuration defining the node is sufficient +for the continued operation of the node moving forward. Such as, if the +node is configured to network boot via instance_info/boot_option="netboot", +then appropriate driver specific node configuration should be set to +support this capability. + +Possible Risk +============= + +The main risk with this feature is that supplied configuration may ultimately +be incorrect or invalid which could result in potential operational issues: + +* ``rebuild`` verb - Rebuild is intended to allow a user to re-deploy the node + to a fresh state. The risk with adoption is that the image defined when an + operator adopts the node may not be the valid image for the pre-existing + configuration. + + If this feature is utilized for a migration from one deployment to another, + and pristine original images are loaded and provided, then ultimately the + risk is the same with any normal use of the ``rebuild`` feature, the server + is effectively wiped. + +* When deleting a node, the deletion or cleaning processes may fail if the + incorrect deployment image is supplied in the configuration as the node + may NOT have been deployed with the supplied image and driver or + compatibility issues may exist as a result. + + Operators will need to be cognizant of that possibility and should plan + accordingly to ensure that deployment images are known to be compatible + with the hardware in their environment. + +* Networking - Adoption will assert no new networking configuration to the + newly adopted node as that would be considered modifying the node. + + Operators will need to plan accordingly and have network configuration + such that the nodes will be able to network boot. + +How to use +========== + +.. NOTE:: + The power state that the ironic-conductor observes upon the first + successful power state check, as part of the transition to the + ``manageable`` state will be enforced with a node that has been adopted. + This means a node that is in ``power off`` state will, by default, have + the power state enforced as ``power off`` moving forward, unless an + administrator actively changes the power state using the Bare Metal + service. + +Requirements +------------ + +Requirements for use are essentially the same as to deploy a node: + +* Sufficient driver information to allow for a successful + power management validation. + +* Sufficient instance_info to pass deploy driver validation. + +Each driver may have additional requirements dependent upon the +configuration that is supplied. An example of this would be defining +a node to always boot from the network, which will cause the conductor +to attempt to retrieve the pertinent files. Inability to do so will +result in the adoption failing, and the node being placed in the +``adopt failed`` state. + +agent_ipmitool example +---------------------- + +This is an example to create a new node, named ``testnode``, with +sufficient information to pass basic validation in order to be taken +from the ``manageable`` state to ``active`` state.:: + + # Explicitly set the client API version environment variable to + # 1.17, which introduces the adoption capability. + export IRONIC_API_VERSION=1.17 + + ironic node-create -n testnode \ + -d agent_ipmitool \ + -i ipmi_address= \ + -i ipmi_username= \ + -i ipmi_password= \ + -i deploy_kernel= \ + -i deploy_ramdisk= + + ironic port-create --node -a + + ironic node-update testnode add \ + instance_info/image_source="http://localhost:8080/blankimage" + + ironic node-set-provision-state testnode manage + + ironic node-set-provision-state testnode adopt + +.. NOTE:: + In the above example, the image_source setting must reference a valid + image or file, however that image or file can ultimately be empty. + +.. NOTE:: + The above example will naturally fail as a fake image is + defined, and no instance_info/image_checksum is defined so + any actual attempt to write the image out will fail. + +.. NOTE:: + A user may wish to assign an instance_uuid to a node, which could be + used to match an instance in the Compute service. Doing so is not + required for the proper operation of the Bare Metal service. + + ironic node-update add instance_uuid= + +Troubleshooting +=============== + +Should an adoption operation fail for a node, the error that caused the +failure will be logged in the node's ``last_error`` field when viewing the +node. This error, in the case of node adoption, will largely be due to +failure of a validation step. Validation steps are dependent +upon what driver is selected for the node. + +Any node that is in the ``adopt failed`` state can have the ``adopt`` verb +re-attempted. Example:: + + ironic node-set-provision-state adopt + +If a user wishes to abort their attempt at adopting, they can then move +the node back to ``manageable`` from ``adopt failed`` state by issuing the +``manage`` verb. Example:: + + ironic node-set-provision-state manage + +If all else fails the hardware node can be removed from the Bare Metal +service. The ``node-delete`` command, which is **not** the same as setting +the provision state to ``delete``, can be used while the node is in +``adopt failed`` state. This will delete the node without cleaning +occurring to preserve the node's current state. Example:: + + ironic node-delete diff --git a/doc/source/images/states.svg b/doc/source/images/states.svg index 587892f28b..2c8ceccb5a 100644 --- a/doc/source/images/states.svg +++ b/doc/source/images/states.svg @@ -1,298 +1,338 @@ - - - + + Ironic states - + enroll - -enroll + +enroll verifying - -verifying + +verifying enroll->verifying - - -manage (via API) + + +manage (via API) + + +verifying->enroll + + +fail manageable - -manageable + +manageable -verifying->manageable - - -done - - -verifying->enroll - - -fail +verifying->manageable + + +done cleaning - -cleaning + +cleaning manageable->cleaning - - -provide (via API) + + +provide (via API) manageable->cleaning - - -clean (via API) + + +clean (via API) inspecting - -inspecting + +inspecting manageable->inspecting - - -inspect (via API) + + +inspect (via API) - -available - -available + +adopting + +adopting - -cleaning->available - - -done - - -clean failed - -clean failed - - -cleaning->clean failed - - -fail - - -clean wait - -clean wait - - -cleaning->clean wait - - -wait + +manageable->adopting + + +adopt (via API) -cleaning->manageable - - -manage +cleaning->manageable + + +manage + + +available + +available + + +cleaning->available + + +done + + +clean failed + +clean failed + + +cleaning->clean failed + + +fail + + +clean wait + +clean wait + + +cleaning->clean wait + + +wait -inspecting->manageable - - -done +inspecting->manageable + + +done -inspect failed - -inspect failed +inspect failed + +inspect failed -inspecting->inspect failed - - -fail - - -deploying - -deploying - - -available->deploying - - -active (via API) - - -available->manageable - - -manage (via API) - - -deploy failed - -deploy failed - - -deploying->deploy failed - - -fail - - -wait call-back - -wait call-back - - -deploying->wait call-back - - -wait +inspecting->inspect failed + + +fail -active - -active +active + +active + + +adopting->active + + +done + + +adopt failed + +adopt failed + + +adopting->adopt failed + + +fail + + +available->manageable + + +manage (via API) + + +deploying + +deploying + + +available->deploying + + +active (via API) -deploying->active - - -done +deploying->active + + +done + + +deploy failed + +deploy failed + + +deploying->deploy failed + + +fail + + +wait call-back + +wait call-back + + +deploying->wait call-back + + +wait -active->deploying - - -rebuild (via API) +active->deploying + + +rebuild (via API) -deleting - -deleting +deleting + +deleting -active->deleting - - -deleted (via API) - - -error - -error - - -deleting->error - - -error +active->deleting + + +deleted (via API) -deleting->cleaning - - -clean +deleting->cleaning + + +clean + + +error + +error + + +deleting->error + + +error -error->deploying - - -rebuild (via API) +error->deploying + + +rebuild (via API) -error->deleting - - -deleted (via API) - - -deploy failed->deploying - - -rebuild (via API) +error->deleting + + +deleted (via API) deploy failed->deploying - - -active (via API) + + +rebuild (via API) + + +deploy failed->deploying + + +active (via API) -deploy failed->deleting - - -deleted (via API) +deploy failed->deleting + + +deleted (via API) -wait call-back->deploying - - -resume - - -wait call-back->deploy failed - - -fail +wait call-back->deploying + + +resume -wait call-back->deleting - - -deleted (via API) +wait call-back->deleting + + +deleted (via API) + + +wait call-back->deploy failed + + +fail -clean failed->manageable - - -manage (via API) +clean failed->manageable + + +manage (via API) - -clean wait->clean failed - - -fail + +clean wait->cleaning + + +resume clean wait->clean failed - - -abort (via API) + + +fail - -clean wait->cleaning - - -resume + +clean wait->clean failed + + +abort (via API) -inspect failed->manageable - - -manage (via API) +inspect failed->manageable + + +manage (via API) -inspect failed->inspecting - - -inspect (via API) +inspect failed->inspecting + + +inspect (via API) + + +adopt failed->manageable + + +manage (via API) + + +adopt failed->adopting + + +adopt (via API) diff --git a/doc/source/index.rst b/doc/source/index.rst index 77d17ffeda..9981cacd50 100644 --- a/doc/source/index.rst +++ b/doc/source/index.rst @@ -41,6 +41,7 @@ Administrator's Guide deploy/raid deploy/inspection deploy/security + deploy/adoption deploy/troubleshooting Release Notes diff --git a/doc/source/webapi/v1.rst b/doc/source/webapi/v1.rst index c0e2d6ae58..4ed0e10a0e 100644 --- a/doc/source/webapi/v1.rst +++ b/doc/source/webapi/v1.rst @@ -32,7 +32,15 @@ always requests the newest supported API version. API Versions History -------------------- +**1.17** + + Addition of provision_state verb ``adopt`` which allows an operator + to move a node from ``manageable`` state to ``active`` state without + performing a deployment operation on the node. This is intended for + nodes that have already been deployed by external means. + **1.16** + Add ability to filter nodes by driver. **1.15** diff --git a/ironic/api/controllers/v1/node.py b/ironic/api/controllers/v1/node.py index c34cfc1ec5..18a8fd7c72 100644 --- a/ironic/api/controllers/v1/node.py +++ b/ironic/api/controllers/v1/node.py @@ -94,7 +94,8 @@ _DEFAULT_RETURN_FIELDS = ('instance_uuid', 'maintenance', 'power_state', # States where calling do_provisioning_action makes sense PROVISION_ACTION_STATES = (ir_states.VERBS['manage'], ir_states.VERBS['provide'], - ir_states.VERBS['abort']) + ir_states.VERBS['abort'], + ir_states.VERBS['adopt']) _NODES_CONTROLLER_RESERVED_WORDS = None diff --git a/ironic/api/controllers/v1/utils.py b/ironic/api/controllers/v1/utils.py index 85ffd3486d..027fd69e4c 100644 --- a/ironic/api/controllers/v1/utils.py +++ b/ironic/api/controllers/v1/utils.py @@ -51,6 +51,7 @@ MIN_VERB_VERSIONS = { states.VERBS['inspect']: versions.MINOR_6_INSPECT_STATE, states.VERBS['abort']: versions.MINOR_13_ABORT_VERB, states.VERBS['clean']: versions.MINOR_15_MANUAL_CLEAN, + states.VERBS['adopt']: versions.MINOR_17_ADOPT_VERB, } diff --git a/ironic/api/controllers/v1/versions.py b/ironic/api/controllers/v1/versions.py index 0acdf67016..16d9859461 100644 --- a/ironic/api/controllers/v1/versions.py +++ b/ironic/api/controllers/v1/versions.py @@ -46,6 +46,7 @@ BASE_VERSION = 1 # 2. '/v1/drivers//properties' # v1.15: Add ability to do manual cleaning of nodes # v1.16: Add ability to filter nodes by driver. +# v1.17: Add 'adopt' verb for ADOPTING active nodes. MINOR_0_JUNO = 0 MINOR_1_INITIAL_VERSION = 1 @@ -64,11 +65,12 @@ MINOR_13_ABORT_VERB = 13 MINOR_14_LINKS_NODESTATES_DRIVERPROPERTIES = 14 MINOR_15_MANUAL_CLEAN = 15 MINOR_16_DRIVER_FILTER = 16 +MINOR_17_ADOPT_VERB = 17 # When adding another version, update MINOR_MAX_VERSION and also update # doc/source/webapi/v1.rst with a detailed explanation of what the version has # changed. -MINOR_MAX_VERSION = MINOR_16_DRIVER_FILTER +MINOR_MAX_VERSION = MINOR_17_ADOPT_VERB # String representations of the minor and maximum versions MIN_VERSION_STRING = '{}.{}'.format(BASE_VERSION, MINOR_1_INITIAL_VERSION) diff --git a/ironic/common/states.py b/ironic/common/states.py index 09c4aff858..e5251036c8 100644 --- a/ironic/common/states.py +++ b/ironic/common/states.py @@ -47,6 +47,7 @@ VERBS = { 'inspect': 'inspect', 'abort': 'abort', 'clean': 'clean', + 'adopt': 'adopt', } """ Mapping of state-changing events that are PUT to the REST API @@ -161,19 +162,35 @@ INSPECTING = 'inspecting' """ Node is under inspection. This is the provision state used when inspection is started. A successfully -inspected node shall transition to MANAGEABLE status. +inspected node shall transition to MANAGEABLE state. """ INSPECTFAIL = 'inspect failed' """ Node inspection failed. """ +ADOPTING = 'adopting' +""" Node is being adopted. + +This provision state is intended for use to move a node from MANAGEABLE to +ACTIVE state to permit designation of nodes as being "managed" by Ironic, +however "deployed" previously by external means. +""" + +ADOPTFAIL = 'adopt failed' +""" Node failed to complete the adoption process. + +This state is the resulting state of a node that failed to complete adoption, +potentially due to invalid or incompatible information being defined for the +node. +""" UPDATE_ALLOWED_STATES = (DEPLOYFAIL, INSPECTING, INSPECTFAIL, CLEANFAIL, ERROR, - VERIFYING) + VERIFYING, ADOPTFAIL) """Transitional states in which we allow updating a node.""" -DELETE_ALLOWED_STATES = (AVAILABLE, NOSTATE, MANAGEABLE, ENROLL) +DELETE_ALLOWED_STATES = (AVAILABLE, NOSTATE, MANAGEABLE, ENROLL, + ADOPTFAIL) """States in which node deletion is allowed.""" STABLE_STATES = (ENROLL, MANAGEABLE, AVAILABLE, ACTIVE, ERROR) @@ -243,6 +260,10 @@ machine.add_transition(AVAILABLE, DEPLOYING, 'deploy') machine.add_state(INSPECTING, target=MANAGEABLE, **watchers) machine.add_state(INSPECTFAIL, target=MANAGEABLE, **watchers) +# Add adopt* states +machine.add_state(ADOPTING, target=ACTIVE, **watchers) +machine.add_state(ADOPTFAIL, target=ACTIVE, **watchers) + # A deployment may fail machine.add_transition(DEPLOYING, DEPLOYFAIL, 'fail') @@ -346,3 +367,19 @@ machine.add_transition(VERIFYING, MANAGEABLE, 'done') # Verification can fail with setting last_error and rolling back to ENROLL machine.add_transition(VERIFYING, ENROLL, 'fail') + +# Node Adoption is being attempted +machine.add_transition(MANAGEABLE, ADOPTING, 'adopt') + +# Adoption can succeed and the node should be set to ACTIVE +machine.add_transition(ADOPTING, ACTIVE, 'done') + +# Node adoptions can fail and as such nodes shall be set +# into a dedicated state to hold the nodes. +machine.add_transition(ADOPTING, ADOPTFAIL, 'fail') + +# Node adoption can be retried when it previously failed. +machine.add_transition(ADOPTFAIL, ADOPTING, 'adopt') + +# A node that failed adoption can be moved back to manageable +machine.add_transition(ADOPTFAIL, MANAGEABLE, 'manage') diff --git a/ironic/conductor/manager.py b/ironic/conductor/manager.py index 8cb9ba0083..69d3d73c34 100644 --- a/ironic/conductor/manager.py +++ b/ironic/conductor/manager.py @@ -1120,6 +1120,16 @@ class ConductorManager(base_manager.BaseConductorManager): err_handler=utils.provisioning_error_handler) return + if (action == states.VERBS['adopt'] and + node.provision_state in (states.MANAGEABLE, + states.ADOPTFAIL)): + task.process_event( + 'adopt', + callback=self._spawn_worker, + call_args=(self._do_adoption, task), + err_handler=utils.provisioning_error_handler) + return + if (action == states.VERBS['abort'] and node.provision_state == states.CLEANWAIT): @@ -1306,6 +1316,54 @@ class ConductorManager(base_manager.BaseConductorManager): callback_method=utils.cleanup_after_timeout, err_handler=utils.provisioning_error_handler) + @task_manager.require_exclusive_lock + def _do_adoption(self, task): + """Adopt the node. + + Similar to node takeover, adoption performs a driver boot + validation and then triggers node takeover in order to make the + conductor responsible for the node. Upon completion of takeover, + the node is moved to ACTIVE state. + + The goal of this method is to set the conditions for the node to + be managed by Ironic as an ACTIVE node without having performed + a deployment operation. + + :param task: a TaskManager instance + """ + + node = task.node + LOG.debug('Conductor %(cdr)s attempting to adopt node %(node)s', + {'cdr': self.host, 'node': node.uuid}) + + try: + # NOTE(TheJulia): A number of drivers expect to know if a + # whole disk image was used prior to their takeover logic + # being triggered, as such we need to populate the + # internal info based on the configuration the user has + # supplied. + iwdi = images.is_whole_disk_image(task.context, + task.node.instance_info) + node.driver_internal_info['is_whole_disk_image'] = iwdi + # Calling boot validate to ensure that sufficient information + # is supplied to allow the node to be able to boot if takeover + # writes items such as kernel/ramdisk data to disk. + task.driver.boot.validate(task) + # NOTE(TheJulia): While task.driver.boot.validate() is called + # above, and task.driver.power.validate() could be called, it + # is called as part of the transition from ENROLL to MANAGEABLE + # states. As such it is redundant to call here. + self._do_takeover(task) + LOG.info(_LI("Successfully adopted node %(node)s"), + {'node': node.uuid}) + task.process_event('done') + except Exception as err: + msg = (_('Error while attempting to adopt node %(node)s: ' + '%(err)s.') % {'node': node.uuid, 'err': err}) + LOG.error(msg) + node.last_error = msg + task.process_event('fail') + def _do_takeover(self, task): """Take over this node. diff --git a/ironic/conductor/utils.py b/ironic/conductor/utils.py index 0aac967a09..f25340c223 100644 --- a/ironic/conductor/utils.py +++ b/ironic/conductor/utils.py @@ -41,6 +41,14 @@ CLEANING_INTERFACE_PRIORITY = { def node_set_boot_device(task, device, persistent=False): """Set the boot device for a node. + Sets the boot device for a node if the node's driver interface + contains a 'management' interface. + + If the node that the boot device change is being requested for + is in ADOPTING state, the boot device will not be set as that + change could potentially result in the future running state of + an adopted node being modified erroneously. + :param task: a TaskManager instance. :param device: Boot device. Values are vendor-specific. :param persistent: Whether to set next-boot, or make the change @@ -51,9 +59,14 @@ def node_set_boot_device(task, device, persistent=False): """ if getattr(task.driver, 'management', None): task.driver.management.validate(task) - task.driver.management.set_boot_device(task, - device=device, - persistent=persistent) + # NOTE(TheJulia): When a node is in the ADOPTING state, we must + # not attempt to change the default boot device as a side effect + # of a driver's node takeover process as it is modifying + # a working machine. + if task.node.provision_state != states.ADOPTING: + task.driver.management.set_boot_device(task, + device=device, + persistent=persistent) @task_manager.require_exclusive_lock diff --git a/ironic/tests/unit/api/v1/test_nodes.py b/ironic/tests/unit/api/v1/test_nodes.py index 5e26b5d6ea..5eabd00475 100644 --- a/ironic/tests/unit/api/v1/test_nodes.py +++ b/ironic/tests/unit/api/v1/test_nodes.py @@ -2184,6 +2184,93 @@ class TestPut(test_api_base.BaseApiTest): mock_rpcapi.assert_called_once_with(mock.ANY, self.node.uuid, clean_steps, 'test-topic') + def test_adopt_raises_error_before_1_17(self): + """Test that a lower API client cannot use the adopt verb""" + ret = self.put_json('/nodes/%s/states/provision' % self.node.uuid, + {'target': states.VERBS['adopt']}, + headers={api_base.Version.string: "1.16"}, + expect_errors=True) + self.assertEqual(http_client.NOT_ACCEPTABLE, ret.status_code) + + @mock.patch.object(rpcapi.ConductorAPI, 'do_provisioning_action') + def test_adopt_from_manage(self, mock_dpa): + """Test that a node can be adopted from the manageable state""" + self.node.provision_state = states.MANAGEABLE + self.node.save() + + ret = self.put_json('/nodes/%s/states/provision' % self.node.uuid, + {'target': states.VERBS['adopt']}, + headers={api_base.Version.string: "1.17"}) + self.assertEqual(http_client.ACCEPTED, ret.status_code) + self.assertEqual(b'', ret.body) + mock_dpa.assert_called_once_with(mock.ANY, self.node.uuid, + states.VERBS['adopt'], + 'test-topic') + + @mock.patch.object(rpcapi.ConductorAPI, 'do_provisioning_action') + def test_adopt_from_adoption_failed(self, mock_dpa): + self.node.provision_state = states.ADOPTFAIL + self.node.save() + + ret = self.put_json('/nodes/%s/states/provision' % self.node.uuid, + {'target': states.VERBS['adopt']}, + headers={api_base.Version.string: "1.17"}) + self.assertEqual(http_client.ACCEPTED, ret.status_code) + self.assertEqual(b'', ret.body) + mock_dpa.assert_called_once_with(mock.ANY, self.node.uuid, + states.VERBS['adopt'], + 'test-topic') + + @mock.patch.object(rpcapi.ConductorAPI, 'do_provisioning_action') + def test_adopt_from_active_fails(self, mock_dpa): + self.node.provision_state = states.ACTIVE + self.node.save() + + ret = self.put_json('/nodes/%s/states/provision' % self.node.uuid, + {'target': states.VERBS['adopt']}, + headers={api_base.Version.string: "1.17"}, + expect_errors=True) + self.assertEqual(http_client.BAD_REQUEST, ret.status_code) + self.assertEqual(0, mock_dpa.call_count) + + @mock.patch.object(rpcapi.ConductorAPI, 'do_provisioning_action') + def test_manage_from_adoption_failed(self, mock_dpa): + self.node.provision_state = states.ADOPTFAIL + self.node.save() + + ret = self.put_json('/nodes/%s/states/provision' % self.node.uuid, + {'target': states.VERBS['manage']}, + headers={api_base.Version.string: "1.17"}) + self.assertEqual(http_client.ACCEPTED, ret.status_code) + self.assertEqual(b'', ret.body) + mock_dpa.assert_called_once_with(mock.ANY, self.node.uuid, + states.VERBS['manage'], + 'test-topic') + + @mock.patch.object(rpcapi.ConductorAPI, 'do_provisioning_action') + def test_bad_requests_in_adopting_state(self, mock_dpa): + self.node.provision_state = states.ADOPTING + self.node.save() + + for state in [states.ACTIVE, states.REBUILD, states.DELETED]: + ret = self.put_json('/nodes/%s/states/provision' % self.node.uuid, + {'target': state}, + expect_errors=True) + self.assertEqual(http_client.BAD_REQUEST, ret.status_code) + self.assertEqual(0, mock_dpa.call_count) + + @mock.patch.object(rpcapi.ConductorAPI, 'do_provisioning_action') + def test_bad_requests_in_adoption_failed_state(self, mock_dpa): + self.node.provision_state = states.ADOPTFAIL + self.node.save() + + for state in [states.ACTIVE, states.REBUILD, states.DELETED]: + ret = self.put_json('/nodes/%s/states/provision' % self.node.uuid, + {'target': state}, + expect_errors=True) + self.assertEqual(http_client.BAD_REQUEST, ret.status_code) + self.assertEqual(0, mock_dpa.call_count) + def test_set_console_mode_enabled(self): with mock.patch.object(rpcapi.ConductorAPI, 'set_console_mode') as mock_scm: diff --git a/ironic/tests/unit/api/v1/test_utils.py b/ironic/tests/unit/api/v1/test_utils.py index 3f9da1e424..7c01dba22e 100644 --- a/ironic/tests/unit/api/v1/test_utils.py +++ b/ironic/tests/unit/api/v1/test_utils.py @@ -182,6 +182,17 @@ class TestApiUtils(base.TestCase): mock_request.version.minor = 10 self.assertFalse(utils.allow_links_node_states_and_driver_properties()) + @mock.patch.object(pecan, 'request', spec_set=['version']) + def test_check_allow_adopt_verbs_fail(self, mock_request): + mock_request.version.minor = 16 + self.assertRaises(exception.NotAcceptable, + utils.check_allow_management_verbs, 'adopt') + + @mock.patch.object(pecan, 'request', spec_set=['version']) + def test_check_allow_adopt_verbs(self, mock_request): + mock_request.version.minor = 17 + utils.check_allow_management_verbs('adopt') + class TestNodeIdent(base.TestCase): diff --git a/ironic/tests/unit/conductor/test_manager.py b/ironic/tests/unit/conductor/test_manager.py index 9f327fa8be..d53db2a753 100644 --- a/ironic/tests/unit/conductor/test_manager.py +++ b/ironic/tests/unit/conductor/test_manager.py @@ -2610,6 +2610,16 @@ class DestroyNodeTestCase(mgr_utils.ServiceSetUpMixin, self.dbapi.get_node_by_uuid, node.uuid) + def test_destroy_node_adopt_failed_no_power_change(self): + self._start_service() + node = obj_utils.create_test_node(self.context, + driver='fake', + provision_state=states.ADOPTFAIL) + with mock.patch.object(self.driver.power, + 'set_power_state') as mock_power: + self.service.destroy_node(self.context, node.uuid) + self.assertFalse(mock_power.called) + @mgr_utils.mock_record_keepalive class UpdatePortTestCase(mgr_utils.ServiceSetUpMixin, @@ -4760,3 +4770,143 @@ class DoNodeTakeOverTestCase(mgr_utils.ServiceSetUpMixin, mock_prepare.assert_called_once_with(mock.ANY) mock_take_over.assert_called_once_with(mock.ANY) mock_start_console.assert_called_once_with(mock.ANY) + + +@mgr_utils.mock_record_keepalive +class DoNodeAdoptionTestCase( + mgr_utils.ServiceSetUpMixin, + tests_db_base.DbTestCase): + + @mock.patch('ironic.drivers.modules.fake.FakePower.validate') + @mock.patch('ironic.drivers.modules.fake.FakeBoot.validate') + @mock.patch('ironic.drivers.modules.fake.FakeConsole.start_console') + @mock.patch('ironic.drivers.modules.fake.FakeDeploy.take_over') + @mock.patch('ironic.drivers.modules.fake.FakeDeploy.prepare') + def test__do_adoption_with_takeover(self, + mock_prepare, + mock_take_over, + mock_start_console, + mock_boot_validate, + mock_power_validate): + self._start_service() + node = obj_utils.create_test_node( + self.context, driver='fake', + provision_state=states.ADOPTING) + task = task_manager.TaskManager(self.context, node.uuid) + + self.service._do_adoption(task) + node.refresh() + + self.assertEqual(states.ACTIVE, node.provision_state) + self.assertIsNone(node.last_error) + self.assertFalse(node.console_enabled) + mock_prepare.assert_called_once_with(mock.ANY) + mock_take_over.assert_called_once_with(mock.ANY) + self.assertFalse(mock_start_console.called) + self.assertTrue(mock_boot_validate.called) + + @mock.patch('ironic.drivers.modules.fake.FakeBoot.validate') + @mock.patch('ironic.drivers.modules.fake.FakeConsole.start_console') + @mock.patch('ironic.drivers.modules.fake.FakeDeploy.take_over') + @mock.patch('ironic.drivers.modules.fake.FakeDeploy.prepare') + def test__do_adoption_take_over_failure(self, + mock_prepare, + mock_take_over, + mock_start_console, + mock_boot_validate): + # Note(TheJulia): Use of an actual possible exception that + # can be raised due to a misconfiguration. + mock_take_over.side_effect = exception.IPMIFailure( + "something went wrong") + + self._start_service() + node = obj_utils.create_test_node( + self.context, driver='fake', + provision_state=states.ADOPTING) + task = task_manager.TaskManager(self.context, node.uuid) + + self.service._do_adoption(task) + node.refresh() + + self.assertEqual(states.ADOPTFAIL, node.provision_state) + self.assertIsNotNone(node.last_error) + self.assertFalse(node.console_enabled) + mock_prepare.assert_called_once_with(mock.ANY) + mock_take_over.assert_called_once_with(mock.ANY) + self.assertFalse(mock_start_console.called) + self.assertTrue(mock_boot_validate.called) + + @mock.patch('ironic.drivers.modules.fake.FakeBoot.validate') + @mock.patch('ironic.drivers.modules.fake.FakeConsole.start_console') + @mock.patch('ironic.drivers.modules.fake.FakeDeploy.take_over') + @mock.patch('ironic.drivers.modules.fake.FakeDeploy.prepare') + def test__do_adoption_boot_validate_failure(self, + mock_prepare, + mock_take_over, + mock_start_console, + mock_boot_validate): + # Note(TheJulia): Use of an actual possible exception that + # can be raised due to a misconfiguration. + mock_boot_validate.side_effect = exception.MissingParameterValue( + "something is missing") + + self._start_service() + node = obj_utils.create_test_node( + self.context, driver='fake', + provision_state=states.ADOPTING) + task = task_manager.TaskManager(self.context, node.uuid) + + self.service._do_adoption(task) + node.refresh() + + self.assertEqual(states.ADOPTFAIL, node.provision_state) + self.assertIsNotNone(node.last_error) + self.assertFalse(node.console_enabled) + self.assertFalse(mock_prepare.called) + self.assertFalse(mock_take_over.called) + self.assertFalse(mock_start_console.called) + self.assertTrue(mock_boot_validate.called) + + @mock.patch('ironic.conductor.manager.ConductorManager._spawn_worker') + def test_do_provisioning_action_adopt_node(self, mock_spawn): + node = obj_utils.create_test_node( + self.context, driver='fake', + provision_state=states.MANAGEABLE, + target_provision_state=states.NOSTATE) + + self._start_service() + self.service.do_provisioning_action(self.context, node.uuid, 'adopt') + node.refresh() + self.assertEqual(states.ADOPTING, node.provision_state) + self.assertEqual(states.ACTIVE, node.target_provision_state) + self.assertIsNone(node.last_error) + mock_spawn.assert_called_with(self.service._do_adoption, mock.ANY) + + @mock.patch('ironic.conductor.manager.ConductorManager._spawn_worker') + def test_do_provisioning_action_adopt_node_retry(self, mock_spawn): + node = obj_utils.create_test_node( + self.context, driver='fake', + provision_state=states.ADOPTFAIL, + target_provision_state=states.ACTIVE) + + self._start_service() + self.service.do_provisioning_action(self.context, node.uuid, 'adopt') + node.refresh() + self.assertEqual(states.ADOPTING, node.provision_state) + self.assertEqual(states.ACTIVE, node.target_provision_state) + self.assertIsNone(node.last_error) + mock_spawn.assert_called_with(self.service._do_adoption, mock.ANY) + + def test_do_provisioning_action_manage_of_failed_adoption(self): + node = obj_utils.create_test_node( + self.context, driver='fake', + provision_state=states.ADOPTFAIL, + target_provision_state=states.ACTIVE) + + self._start_service() + self.service.do_provisioning_action(self.context, node.uuid, 'manage') + node.refresh() + + self.assertEqual(states.MANAGEABLE, node.provision_state) + self.assertEqual(states.NOSTATE, node.target_provision_state) + self.assertIsNone(node.last_error) diff --git a/ironic/tests/unit/conductor/test_utils.py b/ironic/tests/unit/conductor/test_utils.py index a04a1c8f48..15b0117b4e 100644 --- a/ironic/tests/unit/conductor/test_utils.py +++ b/ironic/tests/unit/conductor/test_utils.py @@ -60,6 +60,23 @@ class NodeSetBootDeviceTestCase(base.DbTestCase): device='pxe', persistent=False) + def test_node_set_boot_device_adopting(self): + mgr_utils.mock_the_extension_manager(driver="fake_ipmitool") + self.driver = driver_factory.get_driver("fake_ipmitool") + ipmi_info = utils.get_test_ipmi_info() + node = obj_utils.create_test_node(self.context, + uuid=uuidutils.generate_uuid(), + driver='fake_ipmitool', + driver_info=ipmi_info, + provision_state=states.ADOPTING) + task = task_manager.TaskManager(self.context, node.uuid) + + with mock.patch.object(self.driver.management, + 'set_boot_device') as mock_sbd: + conductor_utils.node_set_boot_device(task, + device='pxe') + self.assertFalse(mock_sbd.called) + class NodePowerActionTestCase(base.DbTestCase): diff --git a/releasenotes/notes/active-node-creation-a41c9869c966c82b.yaml b/releasenotes/notes/active-node-creation-a41c9869c966c82b.yaml new file mode 100644 index 0000000000..91abd8e912 --- /dev/null +++ b/releasenotes/notes/active-node-creation-a41c9869c966c82b.yaml @@ -0,0 +1,14 @@ +--- +features: + - Addition of the provision state target verb of ``adopt`` + which allows an operator to move a node into an ``active`` + state from ``manageable`` state, without performing a deployment + operation on the node. This can be used to represent nodes that have + been previously deployed by other means that will now be managed by + ironic and be later released to the available hardware pool. +other: + - When a node is enrolled into ironic, upon transition to the + ``manageable`` state, the current power state of the node is + recorded. Once the node is adopted and in an ``active`` state, + that recorded power state will be enfored by ironic unless an + operator changes the power state in ironic.