diff --git a/ironic/conductor/base_manager.py b/ironic/conductor/base_manager.py index b80b41c66a..48abbbe4ba 100644 --- a/ironic/conductor/base_manager.py +++ b/ironic/conductor/base_manager.py @@ -260,7 +260,8 @@ class BaseConductorManager(periodic_task.PeriodicTasks): def _fail_if_in_state(self, context, filters, provision_state, sort_key, callback_method=None, - err_handler=None, last_error=None): + err_handler=None, last_error=None, + keep_target_state=False): """Fail nodes that are in specified state. Retrieves nodes that satisfy the criteria in 'filters'. @@ -285,6 +286,11 @@ class BaseConductorManager(periodic_task.PeriodicTasks): if an error occurs trying to spawn an thread to do the callback_method. :param: last_error: the error message to be updated in node.last_error + :param: keep_target_state: if True, a failed node will keep the same + target provision state it had before the + failure. Otherwise, the node's target + provision state will be determined by the + fsm. """ node_iter = self.iter_nodes(filters=filters, @@ -300,15 +306,19 @@ class BaseConductorManager(periodic_task.PeriodicTasks): task.node.provision_state != provision_state): continue + target_state = (None if not keep_target_state else + task.node.target_provision_state) + # timeout has been reached - process the event 'fail' if callback_method: task.process_event('fail', callback=self._spawn_worker, call_args=(callback_method, task), - err_handler=err_handler) + err_handler=err_handler, + target_state=target_state) else: task.node.last_error = last_error - task.process_event('fail') + task.process_event('fail', target_state=target_state) except exception.NoFreeConductorWorker: break except (exception.NodeLocked, exception.NodeNotFound): diff --git a/ironic/conductor/manager.py b/ironic/conductor/manager.py index 9e15e8c5bf..8c6b209e49 100644 --- a/ironic/conductor/manager.py +++ b/ironic/conductor/manager.py @@ -184,7 +184,7 @@ class ConductorManager(base_manager.BaseConductorManager): """Ironic Conductor manager main class.""" # NOTE(rloo): This must be in sync with rpcapi.ConductorAPI's. - RPC_API_VERSION = '1.31' + RPC_API_VERSION = '1.32' target = messaging.Target(version=RPC_API_VERSION) @@ -652,6 +652,7 @@ class ConductorManager(base_manager.BaseConductorManager): :param task: A TaskManager object :raises: NodeCleaningFailure if an internal error occurred when getting the next clean steps + :returns: ordered list of clean step dictionaries """ node = task.node @@ -675,6 +676,71 @@ class ConductorManager(base_manager.BaseConductorManager): reason=msg) return next_steps + @messaging.expected_exceptions(exception.InvalidParameterValue, + exception.InvalidStateRequested, + exception.NodeInMaintenance, + exception.NodeLocked, + exception.NoFreeConductorWorker) + def do_node_clean(self, context, node_id, clean_steps): + """RPC method to initiate manual cleaning. + + :param context: an admin context. + :param node_id: the ID or UUID of a node. + :param clean_steps: an ordered list of clean steps that will be + performed on the node. A clean step is a dictionary with required + keys 'interface' and 'step', and optional key 'args'. If + specified, the 'args' arguments are passed to the clean step + method.:: + + { 'interface': , + 'step': , + 'args': {: , ..., : } } + + For example (this isn't a real example, this clean step + doesn't exist):: + + { 'interface': deploy', + 'step': 'upgrade_firmware', + 'args': {'force': True} } + :raises: InvalidParameterValue if power validation fails. + :raises: InvalidStateRequested if the node is not in manageable state. + :raises: NodeLocked if node is locked by another conductor. + :raises: NoFreeConductorWorker when there is no free worker to start + async task. + """ + with task_manager.acquire(context, node_id, shared=False, + purpose='node manual cleaning') as task: + node = task.node + + if node.maintenance: + raise exception.NodeInMaintenance(op=_('cleaning'), + node=node.uuid) + + # NOTE(rloo): _do_node_clean() will also make a similar call + # to validate the power, but we are doing it again here so that + # the user gets immediate feedback of any issues. This behaviour + # (of validating) is consistent with other methods like + # self.do_node_deploy(). + try: + task.driver.power.validate(task) + except exception.InvalidParameterValue as e: + msg = (_('RPC do_node_clean failed to validate power info.' + ' Cannot clean node %(node)s. Error: %(msg)s') % + {'node': node.uuid, 'msg': e}) + raise exception.InvalidParameterValue(msg) + + try: + task.process_event( + 'clean', + callback=self._spawn_worker, + call_args=(self._do_node_clean, task, clean_steps), + err_handler=utils.provisioning_error_handler, + target_state=states.MANAGEABLE) + except exception.InvalidState: + raise exception.InvalidStateRequested( + action='manual clean', node=node.uuid, + state=node.provision_state) + def continue_node_clean(self, context, node_id): """RPC method to continue cleaning a node. @@ -697,26 +763,32 @@ class ConductorManager(base_manager.BaseConductorManager): with task_manager.acquire(context, node_id, shared=False, purpose='node cleaning') as task: + node = task.node + if node.target_provision_state == states.MANAGEABLE: + target_state = states.MANAGEABLE + else: + target_state = None + # TODO(lucasagomes): CLEANING here for backwards compat # with previous code, otherwise nodes in CLEANING when this # is deployed would fail. Should be removed once the Mitaka # release starts. - if task.node.provision_state not in (states.CLEANWAIT, - states.CLEANING): + if node.provision_state not in (states.CLEANWAIT, + states.CLEANING): raise exception.InvalidStateRequested(_( 'Cannot continue cleaning on %(node)s, node is in ' '%(state)s state, should be %(clean_state)s') % - {'node': task.node.uuid, - 'state': task.node.provision_state, + {'node': node.uuid, + 'state': node.provision_state, 'clean_state': states.CLEANWAIT}) next_steps = self._get_node_next_clean_steps(task) # If this isn't the final clean step in the cleaning operation # and it is flagged to abort after the clean step that just - # finished, we abort the cleaning operaation. - if task.node.clean_step.get('abort_after'): - step_name = task.node.clean_step['step'] + # finished, we abort the cleaning operation. + if node.clean_step.get('abort_after'): + step_name = node.clean_step['step'] if next_steps: LOG.debug('The cleaning operation for node %(node)s was ' 'marked to be aborted after step "%(step)s ' @@ -727,21 +799,22 @@ class ConductorManager(base_manager.BaseConductorManager): callback=self._spawn_worker, call_args=(self._do_node_clean_abort, task, step_name), - err_handler=utils.provisioning_error_handler) + err_handler=utils.provisioning_error_handler, + target_state=target_state) return LOG.debug('The cleaning operation for node %(node)s was ' 'marked to be aborted after step "%(step)s" ' 'completed. However, since there are no more ' 'clean steps after this, the abort is not going ' - 'to be done.', {'node': task.node.uuid, + 'to be done.', {'node': node.uuid, 'step': step_name}) # TODO(lucasagomes): This conditional is here for backwards # compat with previous code. Should be removed once the Mitaka # release starts. - if task.node.provision_state == states.CLEANWAIT: - task.process_event('resume') + if node.provision_state == states.CLEANWAIT: + task.process_event('resume', target_state=target_state) task.set_spawn_error_hook(utils.cleaning_error_handler, task.node, _('Failed to run next clean step')) @@ -750,19 +823,29 @@ class ConductorManager(base_manager.BaseConductorManager): self._do_next_clean_step, task, next_steps) - def _do_node_clean(self, task): - """Internal RPC method to perform automated cleaning of a node.""" - node = task.node - LOG.debug('Starting cleaning for node %s', node.uuid) + def _do_node_clean(self, task, clean_steps=None): + """Internal RPC method to perform cleaning of a node. - if not CONF.conductor.clean_nodes: + :param task: a TaskManager instance with an exclusive lock on its node + :param clean_steps: For a manual clean, the list of clean steps to + perform. Is None For automated cleaning (default). + For more information, see the clean_steps parameter + of :func:`ConductorManager.do_node_clean`. + """ + node = task.node + manual_clean = clean_steps is not None + clean_type = 'manual' if manual_clean else 'automated' + LOG.debug('Starting %(type)s cleaning for node %(node)s', + {'type': clean_type, 'node': node.uuid}) + + if not manual_clean and not CONF.conductor.clean_nodes: # Skip cleaning, move to AVAILABLE. node.clean_step = None node.save() task.process_event('done') - LOG.info(_LI('Cleaning is disabled, node %s has been successfully ' - 'moved to AVAILABLE state.'), node.uuid) + LOG.info(_LI('Automated cleaning is disabled, node %s has been ' + 'successfully moved to AVAILABLE state.'), node.uuid) return try: @@ -776,8 +859,15 @@ class ConductorManager(base_manager.BaseConductorManager): {'node': node.uuid, 'msg': e}) return utils.cleaning_error_handler(task, msg) + if manual_clean: + node.clean_step = {} + info = node.driver_internal_info + info['clean_steps'] = clean_steps + node.driver_internal_info = info + node.save() + # Allow the deploy driver to set up the ramdisk again (necessary for - # IPA cleaning/zapping) + # IPA cleaning) try: prepare_result = task.driver.deploy.prepare_cleaning(task) except Exception as e: @@ -798,10 +888,21 @@ class ConductorManager(base_manager.BaseConductorManager): # set node.driver_internal_info['clean_steps'] and # node.clean_step and then make an RPC call to # continue_node_cleaning to start cleaning. - task.process_event('wait') + + # For manual cleaning, the target provision state is MANAGEABLE, + # whereas for automated cleaning, it is AVAILABLE (the default). + target_state = states.MANAGEABLE if manual_clean else None + task.process_event('wait', target_state=target_state) return - utils.set_node_cleaning_steps(task) + try: + utils.set_node_cleaning_steps(task) + except (exception.InvalidParameterValue, + exception.NodeCleaningFailure) as e: + msg = (_('Cannot clean node %(node)s. Error: %(msg)s') + % {'node': node.uuid, 'msg': e}) + return utils.cleaning_error_handler(task, msg) + self._do_next_clean_step( task, node.driver_internal_info.get('clean_steps', [])) @@ -810,10 +911,16 @@ class ConductorManager(base_manager.BaseConductorManager): """Start executing cleaning/zapping steps. :param task: a TaskManager instance with an exclusive lock - :param steps: The list of remaining steps that need to be executed - on the node + :param steps: The ordered list of remaining steps that need to be + executed on the node. A step is a dictionary with + required keys 'interface' and 'step'. 'args' is an + optional key. """ node = task.node + # For manual cleaning, the target provision state is MANAGEABLE, + # whereas for automated cleaning, it is AVAILABLE. + manual_clean = node.target_provision_state == states.MANAGEABLE + LOG.info(_LI('Executing %(state)s on node %(node)s, remaining steps: ' '%(steps)s'), {'node': node.uuid, 'steps': steps, 'state': node.provision_state}) @@ -854,7 +961,8 @@ class ConductorManager(base_manager.BaseConductorManager): LOG.info(_LI('Clean step %(step)s on node %(node)s being ' 'executed asynchronously, waiting for driver.') % {'node': node.uuid, 'step': step}) - task.process_event('wait') + target_state = states.MANAGEABLE if manual_clean else None + task.process_event('wait', target_state=target_state) return elif result is not None: msg = (_('While executing step %(step)s on node ' @@ -870,6 +978,7 @@ class ConductorManager(base_manager.BaseConductorManager): driver_internal_info = node.driver_internal_info driver_internal_info['clean_steps'] = None node.driver_internal_info = driver_internal_info + node.save() try: task.driver.deploy.tear_down_cleaning(task) except Exception as e: @@ -880,7 +989,9 @@ class ConductorManager(base_manager.BaseConductorManager): tear_down_cleaning=False) LOG.info(_LI('Node %s cleaning complete'), node.uuid) - task.process_event('done') + event = 'manage' if manual_clean else 'done' + # NOTE(rloo): No need to specify target prov. state; we're done + task.process_event(event) def _do_node_verify(self, task): """Internal method to perform power credentials verification.""" @@ -967,8 +1078,9 @@ class ConductorManager(base_manager.BaseConductorManager): with task_manager.acquire(context, node_id, shared=False, purpose='provision action %s' % action) as task: + node = task.node if (action == states.VERBS['provide'] and - task.node.provision_state == states.MANAGEABLE): + node.provision_state == states.MANAGEABLE): task.process_event( 'provide', callback=self._spawn_worker, @@ -977,7 +1089,7 @@ class ConductorManager(base_manager.BaseConductorManager): return if (action == states.VERBS['manage'] and - task.node.provision_state == states.ENROLL): + node.provision_state == states.ENROLL): task.process_event( 'manage', callback=self._spawn_worker, @@ -986,45 +1098,49 @@ class ConductorManager(base_manager.BaseConductorManager): return if (action == states.VERBS['abort'] and - task.node.provision_state == states.CLEANWAIT): + node.provision_state == states.CLEANWAIT): # Check if the clean step is abortable; if so abort it. # Otherwise, indicate in that clean step, that cleaning # should be aborted after that step is done. - if (task.node.clean_step and not - task.node.clean_step.get('abortable')): + if (node.clean_step and not + node.clean_step.get('abortable')): LOG.info(_LI('The current clean step "%(clean_step)s" for ' 'node %(node)s is not abortable. Adding a ' 'flag to abort the cleaning after the clean ' 'step is completed.'), - {'clean_step': task.node.clean_step['step'], - 'node': task.node.uuid}) - clean_step = task.node.clean_step + {'clean_step': node.clean_step['step'], + 'node': node.uuid}) + clean_step = node.clean_step if not clean_step.get('abort_after'): clean_step['abort_after'] = True - task.node.clean_step = clean_step - task.node.save() + node.clean_step = clean_step + node.save() return LOG.debug('Aborting the cleaning operation during clean step ' '"%(step)s" for node %(node)s in provision state ' '"%(prov)s".', - {'node': task.node.uuid, - 'prov': task.node.provision_state, - 'step': task.node.clean_step.get('step')}) + {'node': node.uuid, + 'prov': node.provision_state, + 'step': node.clean_step.get('step')}) + target_state = None + if node.target_provision_state == states.MANAGEABLE: + target_state = states.MANAGEABLE task.process_event( 'abort', callback=self._spawn_worker, call_args=(self._do_node_clean_abort, task), - err_handler=utils.provisioning_error_handler) + err_handler=utils.provisioning_error_handler, + target_state=target_state) return try: task.process_event(action) except exception.InvalidState: raise exception.InvalidStateRequested( - action=action, node=task.node.uuid, - state=task.node.provision_state) + action=action, node=node.uuid, + state=node.provision_state) @periodic_task.periodic_task( spacing=CONF.conductor.sync_power_state_interval) @@ -1225,7 +1341,8 @@ class ConductorManager(base_manager.BaseConductorManager): "running on the node.") self._fail_if_in_state(context, filters, states.CLEANWAIT, 'provision_updated_at', - last_error=last_error) + last_error=last_error, + keep_target_state=True) @periodic_task.periodic_task( spacing=CONF.conductor.sync_local_state_interval) @@ -1353,7 +1470,6 @@ class ConductorManager(base_manager.BaseConductorManager): # CLEANFAIL -> MANAGEABLE # INSPECTIONFAIL -> MANAGEABLE # DEPLOYFAIL -> DELETING - # ZAPFAIL -> MANAGEABLE (in the future) if (not node.maintenance and node.provision_state not in states.DELETE_ALLOWED_STATES): msg = (_('Can not delete node "%(node)s" while it is in ' diff --git a/ironic/conductor/rpcapi.py b/ironic/conductor/rpcapi.py index 9f7d8f3f03..4d0d6d07c4 100644 --- a/ironic/conductor/rpcapi.py +++ b/ironic/conductor/rpcapi.py @@ -78,11 +78,12 @@ class ConductorAPI(object): | 1.31 - Added Versioned Objects indirection API methods: | object_class_action_versions, object_action and | object_backport_versions + | 1.32 - Add do_node_clean """ # NOTE(rloo): This must be in sync with manager.ConductorManager's. - RPC_API_VERSION = '1.31' + RPC_API_VERSION = '1.32' def __init__(self, topic=None): super(ConductorAPI, self).__init__() @@ -593,6 +594,25 @@ class ConductorAPI(object): return cctxt.call(context, 'get_raid_logical_disk_properties', driver_name=driver_name) + def do_node_clean(self, context, node_id, clean_steps, topic=None): + """Signal to conductor service to perform manual cleaning on a node. + + :param context: request context. + :param node_id: node ID or UUID. + :param clean_steps: a list of clean step dictionaries. + :param topic: RPC topic. Defaults to self.topic. + :raises: InvalidParameterValue if validation of power driver interface + failed. + :raises: InvalidStateRequested if cleaning can not be performed. + :raises: NodeInMaintenance if node is in maintenance mode. + :raises: NodeLocked if node is locked by another conductor. + :raises: NoFreeConductorWorker when there is no free worker to start + async task. + """ + cctxt = self.client.prepare(topic=topic or self.topic, version='1.32') + return cctxt.call(context, 'do_node_clean', + node_id=node_id, clean_steps=clean_steps) + def object_class_action_versions(self, context, objname, objmethod, object_versions, args, kwargs): """Perform an action on a VersionedObject class. diff --git a/ironic/conductor/utils.py b/ironic/conductor/utils.py index f4e5f6095e..60f4b0b9ca 100644 --- a/ironic/conductor/utils.py +++ b/ironic/conductor/utils.py @@ -175,7 +175,8 @@ def provisioning_error_handler(e, node, provision_state, """Set the node's provisioning states if error occurs. This hook gets called upon an exception being raised when spawning - the worker to do the deployment or tear down of a node. + the worker to do some provisioning to a node like deployment, tear down, + or cleaning. :param e: the exception object that was raised. :param node: an Ironic node object. @@ -202,10 +203,13 @@ def provisioning_error_handler(e, node, provision_state, def cleaning_error_handler(task, msg, tear_down_cleaning=True, set_fail_state=True): - """Put a failed node in CLEANFAIL or ZAPFAIL and maintenance.""" + """Put a failed node in CLEANFAIL and maintenance.""" # Reset clean step, msg should include current step if task.node.provision_state in (states.CLEANING, states.CLEANWAIT): task.node.clean_step = {} + # For manual cleaning, the target provision state is MANAGEABLE, whereas + # for automated cleaning, it is AVAILABLE. + manual_clean = task.node.target_provision_state == states.MANAGEABLE task.node.last_error = msg task.node.maintenance = True task.node.maintenance_reason = msg @@ -219,7 +223,8 @@ def cleaning_error_handler(task, msg, tear_down_cleaning=True, LOG.exception(msg) if set_fail_state: - task.process_event('fail') + target_state = states.MANAGEABLE if manual_clean else None + task.process_event('fail', target_state=target_state) def power_state_error_handler(e, node, power_state): @@ -253,14 +258,18 @@ def _step_key(step): CLEANING_INTERFACE_PRIORITY[step.get('interface')]) -def _get_cleaning_steps(task, enabled=False): - """Get sorted cleaning steps for task.node +def _get_cleaning_steps(task, enabled=False, sort=True): + """Get cleaning steps for task.node. :param task: A TaskManager object :param enabled: If True, returns only enabled (priority > 0) steps. If False, returns all clean steps. - :returns: A list of clean steps dictionaries, sorted with largest priority - as the first item + :param sort: If True, the steps are sorted from highest priority to lowest + priority. For steps having the same priority, they are sorted from + highest interface priority to lowest. + :raises: NodeCleaningFailure if there was a problem getting the + clean steps. + :returns: A list of clean step dictionaries """ # Iterate interfaces and get clean steps from each steps = list() @@ -270,17 +279,120 @@ def _get_cleaning_steps(task, enabled=False): interface_steps = [x for x in interface.get_clean_steps(task) if not enabled or x['priority'] > 0] steps.extend(interface_steps) - # Sort the steps from higher priority to lower priority - return sorted(steps, key=_step_key, reverse=True) + if sort: + # Sort the steps from higher priority to lower priority + steps = sorted(steps, key=_step_key, reverse=True) + return steps def set_node_cleaning_steps(task): - """Get the list of clean steps, save them to the node.""" - # Get the prioritized steps, store them. + """Set up the node with clean step information for cleaning. + + For automated cleaning, get the clean steps from the driver. + For manual cleaning, the user's clean steps are known but need to be + validated against the driver's clean steps. + + :raises: InvalidParameterValue if there is a problem with the user's + clean steps. + :raises: NodeCleaningFailure if there was a problem getting the + clean steps. + """ node = task.node - driver_internal_info = node.driver_internal_info - driver_internal_info['clean_steps'] = _get_cleaning_steps(task, - enabled=True) - node.driver_internal_info = driver_internal_info + # For manual cleaning, the target provision state is MANAGEABLE, whereas + # for automated cleaning, it is AVAILABLE. + manual_clean = node.target_provision_state == states.MANAGEABLE + + if not manual_clean: + # Get the prioritized steps for automated cleaning + driver_internal_info = node.driver_internal_info + driver_internal_info['clean_steps'] = _get_cleaning_steps(task, + enabled=True) + node.driver_internal_info = driver_internal_info + else: + # For manual cleaning, the list of cleaning steps was specified by the + # user and already saved in node.driver_internal_info['clean_steps']. + # Now that we know what the driver's available clean steps are, we can + # do further checks to validate the user's clean steps. + steps = node.driver_internal_info['clean_steps'] + _validate_user_clean_steps(task, steps) + node.clean_step = {} node.save() + + +def _validate_user_clean_steps(task, user_steps): + """Validate the user-specified clean steps. + + :param task: A TaskManager object + :param user_steps: a list of clean steps. A clean step is a dictionary + with required keys 'interface' and 'step', and optional key 'args':: + + { 'interface': , + 'step': , + 'args': {: , ..., : } } + + For example:: + + { 'interface': deploy', + 'step': 'upgrade_firmware', + 'args': {'force': True} } + :raises: InvalidParameterValue if validation of clean steps fails. + :raises: NodeCleaningFailure if there was a problem getting the + clean steps from the driver. + """ + + def step_id(step): + return '.'.join([step['step'], step['interface']]) + + errors = [] + + # The clean steps from the driver. A clean step dictionary is of the form: + # { 'interface': , + # 'step': , + # 'priority': + # 'abortable': Optional. . + # 'argsinfo': Optional. A dictionary of {:} + # entries. is a dictionary with + # { 'description': , + # 'required': } + # } + driver_steps = {} + for s in _get_cleaning_steps(task, enabled=False, sort=False): + driver_steps[step_id(s)] = s + + for user_step in user_steps: + # Check if this user_specified clean step isn't supported by the driver + try: + driver_step = driver_steps[step_id(user_step)] + except KeyError: + error = (_('node does not support this clean step: %(step)s') + % {'step': user_step}) + errors.append(error) + continue + + # Check that the user-specified arguments are valid + argsinfo = driver_step.get('argsinfo') or {} + user_args = user_step.get('args') or {} + invalid = set(user_args) - set(argsinfo) + if invalid: + error = _('clean step %(step)s has these invalid arguments: ' + '%(invalid)s') % {'step': user_step, + 'invalid': ', '.join(invalid)} + errors.append(error) + + # Check that all required arguments were specified by the user + missing = [] + for (arg_name, arg_info) in argsinfo.items(): + if arg_info.get('required', False) and arg_name not in user_args: + msg = arg_name + if arg_info.get('description'): + msg += ' (%(desc)s)' % {'desc': arg_info['description']} + missing.append(msg) + if missing: + error = _('clean step %(step)s is missing these required keyword ' + 'arguments: %(miss)s') % {'step': user_step, + 'miss': ', '.join(missing)} + errors.append(error) + + if errors: + raise exception.InvalidParameterValue('; '.join(errors)) diff --git a/ironic/tests/unit/conductor/test_manager.py b/ironic/tests/unit/conductor/test_manager.py index 4210d17d73..ae0a2d58fa 100644 --- a/ironic/tests/unit/conductor/test_manager.py +++ b/ironic/tests/unit/conductor/test_manager.py @@ -1050,22 +1050,29 @@ class DoNodeDeployTearDownTestCase(mgr_utils.ServiceSetUpMixin, self.assertIsNotNone(node.last_error) mock_cleanup.assert_called_once_with(mock.ANY) - def test__check_cleanwait_timeouts(self): + def _check_cleanwait_timeouts(self, manual=False): self._start_service() CONF.set_override('clean_callback_timeout', 1, group='conductor') + tgt_prov_state = states.MANAGEABLE if manual else states.AVAILABLE node = obj_utils.create_test_node( self.context, driver='fake', provision_state=states.CLEANWAIT, - target_provision_state=states.AVAILABLE, + target_provision_state=tgt_prov_state, provision_updated_at=datetime.datetime(2000, 1, 1, 0, 0)) self.service._check_cleanwait_timeouts(self.context) self.service._worker_pool.waitall() node.refresh() self.assertEqual(states.CLEANFAIL, node.provision_state) - self.assertEqual(states.AVAILABLE, node.target_provision_state) + self.assertEqual(tgt_prov_state, node.target_provision_state) self.assertIsNotNone(node.last_error) + def test__check_cleanwait_timeouts_automated_clean(self): + self._check_cleanwait_timeouts() + + def test__check_cleanwait_timeouts_manual_clean(self): + self._check_cleanwait_timeouts(manual=True) + def test_do_node_tear_down_invalid_state(self): self._start_service() # test node.provision_state is incorrect for tear_down @@ -1267,22 +1274,29 @@ class DoNodeDeployTearDownTestCase(mgr_utils.ServiceSetUpMixin, mock_spawn.assert_called_with(self.service._do_node_verify, mock.ANY) @mock.patch('ironic.conductor.manager.ConductorManager._spawn_worker') - def test_do_provision_action_abort(self, mock_spawn): + def _do_provision_action_abort(self, mock_spawn, manual=False): + tgt_prov_state = states.MANAGEABLE if manual else states.AVAILABLE node = obj_utils.create_test_node( self.context, driver='fake', provision_state=states.CLEANWAIT, - target_provision_state=states.AVAILABLE) + target_provision_state=tgt_prov_state) self._start_service() self.service.do_provisioning_action(self.context, node.uuid, 'abort') node.refresh() - # Node will be moved to AVAILABLE after cleaning, not tested here + # Node will be moved to tgt_prov_state after cleaning, not tested here self.assertEqual(states.CLEANFAIL, node.provision_state) - self.assertEqual(states.AVAILABLE, node.target_provision_state) + self.assertEqual(tgt_prov_state, node.target_provision_state) self.assertIsNone(node.last_error) mock_spawn.assert_called_with(self.service._do_node_clean_abort, mock.ANY) + def test_do_provision_action_abort_automated_clean(self): + self._do_provision_action_abort() + + def test_do_provision_action_abort_manual_clean(self): + self._do_provision_action_abort(manual=True) + def test_do_provision_action_abort_clean_step_not_abortable(self): node = obj_utils.create_test_node( self.context, driver='fake', @@ -1353,14 +1367,108 @@ class DoNodeCleanTestCase(mgr_utils.ServiceSetUpMixin, 'step': 'update_firmware', 'priority': 10, 'interface': 'deploy'} self.deploy_erase = { 'step': 'erase_disks', 'priority': 20, 'interface': 'deploy'} - # Cleaning should be executed in this order + # Automated cleaning should be executed in this order self.clean_steps = [self.deploy_erase, self.power_update, self.deploy_update] self.next_clean_steps = self.clean_steps[1:] - # Zap step + # Manual clean step self.deploy_raid = { 'step': 'build_raid', 'priority': 0, 'interface': 'deploy'} + @mock.patch('ironic.drivers.modules.fake.FakePower.validate') + def test_do_node_clean_maintenance(self, mock_validate): + node = obj_utils.create_test_node( + self.context, driver='fake', provision_state=states.MANAGEABLE, + target_provision_state=states.NOSTATE, + maintenance=True, maintenance_reason='reason') + self._start_service() + exc = self.assertRaises(messaging.rpc.ExpectedException, + self.service.do_node_clean, + self.context, node.uuid, []) + # Compare true exception hidden by @messaging.expected_exceptions + self.assertEqual(exception.NodeInMaintenance, exc.exc_info[0]) + self.assertFalse(mock_validate.called) + + @mock.patch('ironic.conductor.task_manager.TaskManager.process_event') + @mock.patch('ironic.drivers.modules.fake.FakePower.validate') + def test_do_node_clean_validate_fail(self, mock_validate, mock_process): + # power validate fails + mock_validate.side_effect = exception.InvalidParameterValue('error') + node = obj_utils.create_test_node( + self.context, driver='fake', provision_state=states.MANAGEABLE, + target_provision_state=states.NOSTATE) + self._start_service() + exc = self.assertRaises(messaging.rpc.ExpectedException, + self.service.do_node_clean, + self.context, node.uuid, []) + # Compare true exception hidden by @messaging.expected_exceptions + self.assertEqual(exception.InvalidParameterValue, exc.exc_info[0]) + mock_validate.assert_called_once_with(mock.ANY) + self.assertFalse(mock_process.called) + + @mock.patch('ironic.drivers.modules.fake.FakePower.validate') + def test_do_node_clean_invalid_state(self, mock_validate): + # test node.provision_state is incorrect for clean + node = obj_utils.create_test_node( + self.context, driver='fake', provision_state=states.ENROLL, + target_provision_state=states.NOSTATE) + self._start_service() + exc = self.assertRaises(messaging.rpc.ExpectedException, + self.service.do_node_clean, + self.context, node.uuid, []) + # Compare true exception hidden by @messaging.expected_exceptions + self.assertEqual(exception.InvalidStateRequested, exc.exc_info[0]) + mock_validate.assert_called_once_with(mock.ANY) + node.refresh() + self.assertFalse('clean_steps' in node.driver_internal_info) + + @mock.patch('ironic.conductor.manager.ConductorManager._spawn_worker') + @mock.patch('ironic.drivers.modules.fake.FakePower.validate') + def test_do_node_clean_ok(self, mock_validate, mock_spawn): + node = obj_utils.create_test_node( + self.context, driver='fake', provision_state=states.MANAGEABLE, + target_provision_state=states.NOSTATE, last_error='old error') + self._start_service() + clean_steps = [self.deploy_raid] + self.service.do_node_clean(self.context, node.uuid, clean_steps) + mock_validate.assert_called_once_with(mock.ANY) + mock_spawn.assert_called_with(self.service._do_node_clean, mock.ANY, + clean_steps) + node.refresh() + # Node will be moved to CLEANING + self.assertEqual(states.CLEANING, node.provision_state) + self.assertEqual(states.MANAGEABLE, node.target_provision_state) + self.assertIsNone(node.driver_internal_info.get('clean_steps')) + self.assertIsNone(node.last_error) + + @mock.patch('ironic.conductor.manager.ConductorManager._spawn_worker') + @mock.patch('ironic.drivers.modules.fake.FakePower.validate') + def test_do_node_clean_worker_pool_full(self, mock_validate, mock_spawn): + prv_state = states.MANAGEABLE + tgt_prv_state = states.NOSTATE + node = obj_utils.create_test_node( + self.context, driver='fake', provision_state=prv_state, + target_provision_state=tgt_prv_state) + self._start_service() + clean_steps = [self.deploy_raid] + mock_spawn.side_effect = exception.NoFreeConductorWorker() + exc = self.assertRaises(messaging.rpc.ExpectedException, + self.service.do_node_clean, + self.context, node.uuid, clean_steps) + # Compare true exception hidden by @messaging.expected_exceptions + self.assertEqual(exception.NoFreeConductorWorker, exc.exc_info[0]) + self.service._worker_pool.waitall() + mock_validate.assert_called_once_with(mock.ANY) + mock_spawn.assert_called_with(self.service._do_node_clean, mock.ANY, + clean_steps) + node.refresh() + # Make sure states were rolled back + self.assertEqual(prv_state, node.provision_state) + self.assertEqual(tgt_prv_state, node.target_provision_state) + + self.assertIsNotNone(node.last_error) + self.assertIsNone(node.reservation) + @mock.patch('ironic.conductor.manager.ConductorManager._spawn_worker') def test_continue_node_clean_worker_pool_full(self, mock_spawn): # Test the appropriate exception is raised if the worker pool is full @@ -1406,10 +1514,10 @@ class DoNodeCleanTestCase(mgr_utils.ServiceSetUpMixin, self.assertIsNone(node.reservation) @mock.patch('ironic.conductor.manager.ConductorManager._spawn_worker') - def _continue_node_clean(self, return_state, mock_spawn): + def _continue_node_clean(self, return_state, mock_spawn, manual=False): # test a node can continue cleaning via RPC prv_state = return_state - tgt_prv_state = states.AVAILABLE + tgt_prv_state = states.MANAGEABLE if manual else states.AVAILABLE driver_info = {'clean_steps': self.clean_steps} node = obj_utils.create_test_node(self.context, driver='fake', provision_state=prv_state, @@ -1421,23 +1529,29 @@ class DoNodeCleanTestCase(mgr_utils.ServiceSetUpMixin, self.service.continue_node_clean(self.context, node.uuid) self.service._worker_pool.waitall() node.refresh() + self.assertEqual(states.CLEANING, node.provision_state) + self.assertEqual(tgt_prv_state, node.target_provision_state) mock_spawn.assert_called_with(self.service._do_next_clean_step, mock.ANY, self.next_clean_steps) - def test_continue_node_clean(self): + def test_continue_node_clean_automated(self): self._continue_node_clean(states.CLEANWAIT) + def test_continue_node_clean_manual(self): + self._continue_node_clean(states.CLEANWAIT, manual=True) + def test_continue_node_clean_backward_compat(self): self._continue_node_clean(states.CLEANING) - def test_continue_node_clean_abort(self): + def _continue_node_clean_abort(self, manual=False): last_clean_step = self.clean_steps[0] last_clean_step['abortable'] = False last_clean_step['abort_after'] = True driver_info = {'clean_steps': self.clean_steps} + tgt_prov_state = states.MANAGEABLE if manual else states.AVAILABLE node = obj_utils.create_test_node( self.context, driver='fake', provision_state=states.CLEANWAIT, - target_provision_state=states.AVAILABLE, last_error=None, + target_provision_state=tgt_prov_state, last_error=None, driver_internal_info=driver_info, clean_step=self.clean_steps[0]) self._start_service() @@ -1445,46 +1559,68 @@ class DoNodeCleanTestCase(mgr_utils.ServiceSetUpMixin, self.service._worker_pool.waitall() node.refresh() self.assertEqual(states.CLEANFAIL, node.provision_state) - self.assertEqual(states.AVAILABLE, node.target_provision_state) + self.assertEqual(tgt_prov_state, node.target_provision_state) self.assertIsNotNone(node.last_error) # assert the clean step name is in the last error message self.assertIn(self.clean_steps[0]['step'], node.last_error) - def test_continue_node_clean_abort_last_clean_step(self): + def test_continue_node_clean_automated_abort(self): + self._continue_node_clean_abort() + + def test_continue_node_clean_manual_abort(self): + self._continue_node_clean_abort(manual=True) + + def _continue_node_clean_abort_last_clean_step(self, manual=False): last_clean_step = self.clean_steps[0] last_clean_step['abortable'] = False last_clean_step['abort_after'] = True driver_info = {'clean_steps': [self.clean_steps[0]]} + tgt_prov_state = states.MANAGEABLE if manual else states.AVAILABLE node = obj_utils.create_test_node( self.context, driver='fake', provision_state=states.CLEANWAIT, - target_provision_state=states.AVAILABLE, last_error=None, + target_provision_state=tgt_prov_state, last_error=None, driver_internal_info=driver_info, clean_step=self.clean_steps[0]) self._start_service() self.service.continue_node_clean(self.context, node.uuid) self.service._worker_pool.waitall() node.refresh() - self.assertEqual(states.AVAILABLE, node.provision_state) + self.assertEqual(tgt_prov_state, node.provision_state) self.assertIsNone(node.target_provision_state) self.assertIsNone(node.last_error) + def test_continue_node_clean_automated_abort_last_clean_step(self): + self._continue_node_clean_abort_last_clean_step() + + def test_continue_node_clean_manual_abort_last_clean_step(self): + self._continue_node_clean_abort_last_clean_step(manual=True) + @mock.patch('ironic.drivers.modules.fake.FakePower.validate') - def test__do_node_clean_validate_fail(self, mock_validate): + def __do_node_clean_validate_fail(self, mock_validate, clean_steps=None): # InvalidParameterValue should be cause node to go to CLEANFAIL self.config(clean_nodes=True, group='conductor') mock_validate.side_effect = exception.InvalidParameterValue('error') + tgt_prov_state = states.MANAGEABLE if clean_steps else states.AVAILABLE node = obj_utils.create_test_node( self.context, driver='fake', provision_state=states.CLEANING, - target_provision_state=states.AVAILABLE) + target_provision_state=tgt_prov_state) with task_manager.acquire( - self.context, node['id'], shared=False) as task: - self.service._do_node_clean(task) + self.context, node.uuid, shared=False) as task: + self.service._do_node_clean(task, clean_steps=clean_steps) node.refresh() self.assertEqual(states.CLEANFAIL, node.provision_state) + self.assertEqual(tgt_prov_state, node.target_provision_state) + mock_validate.assert_called_once_with(mock.ANY) + + def test__do_node_clean_automated_validate_fail(self): + self.__do_node_clean_validate_fail() + + def test__do_node_clean_manual_validate_fail(self): + self.__do_node_clean_validate_fail(clean_steps=[]) @mock.patch('ironic.drivers.modules.fake.FakePower.validate') - def test__do_node_clean_disabled(self, mock_validate): + def test__do_node_clean_automated_disabled(self, mock_validate): self.config(clean_nodes=False, group='conductor') node = obj_utils.create_test_node( self.context, driver='fake', @@ -1494,7 +1630,7 @@ class DoNodeCleanTestCase(mgr_utils.ServiceSetUpMixin, self._start_service() with task_manager.acquire( - self.context, node['id'], shared=False) as task: + self.context, node.uuid, shared=False) as task: self.service._do_node_clean(task) self.service._worker_pool.waitall() node.refresh() @@ -1506,44 +1642,135 @@ class DoNodeCleanTestCase(mgr_utils.ServiceSetUpMixin, self.assertEqual({}, node.clean_step) self.assertIsNone(node.driver_internal_info.get('clean_steps')) + @mock.patch('ironic.drivers.modules.fake.FakeDeploy.prepare_cleaning') + def __do_node_clean_prepare_clean_fail(self, mock_prep, clean_steps=None): + # Exception from task.driver.deploy.prepare_cleaning should cause node + # to go to CLEANFAIL + self.config(clean_nodes=True, group='conductor') + mock_prep.side_effect = exception.InvalidParameterValue('error') + tgt_prov_state = states.MANAGEABLE if clean_steps else states.AVAILABLE + node = obj_utils.create_test_node( + self.context, driver='fake', + provision_state=states.CLEANING, + target_provision_state=tgt_prov_state) + with task_manager.acquire( + self.context, node.uuid, shared=False) as task: + self.service._do_node_clean(task, clean_steps=clean_steps) + node.refresh() + self.assertEqual(states.CLEANFAIL, node.provision_state) + self.assertEqual(tgt_prov_state, node.target_provision_state) + mock_prep.assert_called_once_with(mock.ANY) + + def test__do_node_clean_automated_prepare_clean_fail(self): + self.__do_node_clean_prepare_clean_fail() + + def test__do_node_clean_manual_prepare_clean_fail(self): + self.__do_node_clean_prepare_clean_fail(clean_steps=[self.deploy_raid]) + + @mock.patch('ironic.drivers.modules.fake.FakeDeploy.prepare_cleaning') + def __do_node_clean_prepare_clean_wait(self, mock_prep, clean_steps=None): + self.config(clean_nodes=True, group='conductor') + mock_prep.return_value = states.CLEANWAIT + tgt_prov_state = states.MANAGEABLE if clean_steps else states.AVAILABLE + node = obj_utils.create_test_node( + self.context, driver='fake', + provision_state=states.CLEANING, + target_provision_state=tgt_prov_state) + with task_manager.acquire( + self.context, node.uuid, shared=False) as task: + self.service._do_node_clean(task, clean_steps=clean_steps) + node.refresh() + self.assertEqual(states.CLEANWAIT, node.provision_state) + self.assertEqual(tgt_prov_state, node.target_provision_state) + mock_prep.assert_called_once_with(mock.ANY) + + def test__do_node_clean_automated_prepare_clean_wait(self): + self.__do_node_clean_prepare_clean_wait() + + def test__do_node_clean_manual_prepare_clean_wait(self): + self.__do_node_clean_prepare_clean_wait(clean_steps=[self.deploy_raid]) + + @mock.patch.object(conductor_utils, 'set_node_cleaning_steps') + def __do_node_clean_steps_fail(self, mock_steps, clean_steps=None, + invalid_exc=True): + self.config(clean_nodes=True, group='conductor') + if invalid_exc: + mock_steps.side_effect = exception.InvalidParameterValue('invalid') + else: + mock_steps.side_effect = exception.NodeCleaningFailure('failure') + tgt_prov_state = states.MANAGEABLE if clean_steps else states.AVAILABLE + node = obj_utils.create_test_node( + self.context, driver='fake', + uuid=uuidutils.generate_uuid(), + provision_state=states.CLEANING, + target_provision_state=tgt_prov_state) + with task_manager.acquire( + self.context, node.uuid, shared=False) as task: + self.service._do_node_clean(task, clean_steps=clean_steps) + node.refresh() + self.assertEqual(states.CLEANFAIL, node.provision_state) + self.assertEqual(tgt_prov_state, node.target_provision_state) + mock_steps.assert_called_once_with(mock.ANY) + + def test__do_node_clean_automated_steps_fail(self): + for invalid in (True, False): + self.__do_node_clean_steps_fail(invalid_exc=invalid) + + def test__do_node_clean_manual_steps_fail(self): + for invalid in (True, False): + self.__do_node_clean_steps_fail(clean_steps=[self.deploy_raid], + invalid_exc=invalid) + @mock.patch.object(conductor_utils, 'set_node_cleaning_steps') @mock.patch('ironic.conductor.manager.ConductorManager.' '_do_next_clean_step') @mock.patch('ironic.drivers.modules.fake.FakePower.validate') - def test__do_node_clean(self, mock_validate, mock_next_step, mock_steps): + def __do_node_clean(self, mock_validate, mock_next_step, mock_steps, + clean_steps=None): + if clean_steps is None: + clean_steps = [] + tgt_prov_state = states.MANAGEABLE if clean_steps else states.AVAILABLE node = obj_utils.create_test_node( self.context, driver='fake', provision_state=states.CLEANING, - target_provision_state=states.AVAILABLE, + target_provision_state=tgt_prov_state, last_error=None, power_state=states.POWER_OFF, - driver_internal_info={'clean_steps': []}) + driver_internal_info={'clean_steps': clean_steps}) mock_steps.return_value = self.clean_steps self._start_service() with task_manager.acquire( - self.context, node['id'], shared=False) as task: - self.service._do_node_clean(task) + self.context, node.uuid, shared=False) as task: + self.service._do_node_clean(task, clean_steps=clean_steps) self.service._worker_pool.waitall() node.refresh() mock_validate.assert_called_once_with(task) - mock_next_step.assert_called_once_with(mock.ANY, []) + mock_next_step.assert_called_once_with(mock.ANY, clean_steps) mock_steps.assert_called_once_with(task) # Check that state didn't change self.assertEqual(states.CLEANING, node.provision_state) - self.assertEqual(states.AVAILABLE, node.target_provision_state) + self.assertEqual(tgt_prov_state, node.target_provision_state) + + def test__do_node_clean_automated(self): + self.__do_node_clean() + + def test__do_node_clean_manual(self): + self.__do_node_clean(clean_steps=[self.deploy_raid]) @mock.patch('ironic.drivers.modules.fake.FakeDeploy.execute_clean_step') - def _do_next_clean_step_first_step_async(self, return_state, mock_execute): + def _do_next_clean_step_first_step_async(self, return_state, mock_execute, + clean_steps=None): # Execute the first async clean step on a node + tgt_prov_state = states.MANAGEABLE if clean_steps else states.AVAILABLE node = obj_utils.create_test_node( self.context, driver='fake', provision_state=states.CLEANING, - target_provision_state=states.AVAILABLE, + target_provision_state=tgt_prov_state, last_error=None, clean_step={}) mock_execute.return_value = return_state @@ -1551,30 +1778,37 @@ class DoNodeCleanTestCase(mgr_utils.ServiceSetUpMixin, self._start_service() with task_manager.acquire( - self.context, node['id'], shared=False) as task: + self.context, node.uuid, shared=False) as task: self.service._do_next_clean_step(task, self.clean_steps) self.service._worker_pool.waitall() node.refresh() self.assertEqual(states.CLEANWAIT, node.provision_state) + self.assertEqual(tgt_prov_state, node.target_provision_state) self.assertEqual(self.clean_steps[0], node.clean_step) mock_execute.assert_called_once_with(mock.ANY, self.clean_steps[0]) - def test_do_next_clean_step_first_step_async(self): + def test_do_next_clean_step_automated_first_step_async(self): self._do_next_clean_step_first_step_async(states.CLEANWAIT) def test_do_next_clean_step_first_step_async_backward_compat(self): self._do_next_clean_step_first_step_async(states.CLEANING) + def test_do_next_clean_step_manual_first_step_async(self): + self._do_next_clean_step_first_step_async( + states.CLEANWAIT, clean_steps=[self.deploy_raid]) + @mock.patch('ironic.drivers.modules.fake.FakePower.execute_clean_step') def _do_next_clean_step_continue_from_last_cleaning(self, return_state, - mock_execute): + mock_execute, + manual=False): # Resume an in-progress cleaning after the first async step + tgt_prov_state = states.MANAGEABLE if manual else states.AVAILABLE node = obj_utils.create_test_node( self.context, driver='fake', provision_state=states.CLEANING, - target_provision_state=states.AVAILABLE, + target_provision_state=tgt_prov_state, last_error=None, clean_step=self.clean_steps[0]) mock_execute.return_value = return_state @@ -1582,13 +1816,14 @@ class DoNodeCleanTestCase(mgr_utils.ServiceSetUpMixin, self._start_service() with task_manager.acquire( - self.context, node['id'], shared=False) as task: + self.context, node.uuid, shared=False) as task: self.service._do_next_clean_step(task, self.next_clean_steps) self.service._worker_pool.waitall() node.refresh() self.assertEqual(states.CLEANWAIT, node.provision_state) + self.assertEqual(tgt_prov_state, node.target_provision_state) self.assertEqual(self.clean_steps[1], node.clean_step) mock_execute.assert_called_once_with(mock.ANY, self.clean_steps[1]) @@ -1598,39 +1833,52 @@ class DoNodeCleanTestCase(mgr_utils.ServiceSetUpMixin, def test_do_next_clean_step_continue_from_last_cleaning_backward_com(self): self._do_next_clean_step_continue_from_last_cleaning(states.CLEANING) + def test_do_next_clean_step_manual_continue_from_last_cleaning(self): + self._do_next_clean_step_continue_from_last_cleaning(states.CLEANWAIT, + manual=True) + @mock.patch('ironic.drivers.modules.fake.FakeDeploy.execute_clean_step') - def test__do_next_clean_step_last_step_noop(self, mock_execute): + def _do_next_clean_step_last_step_noop(self, mock_execute, manual=False): # Resume where last_step is the last cleaning step, should be noop + tgt_prov_state = states.MANAGEABLE if manual else states.AVAILABLE node = obj_utils.create_test_node( self.context, driver='fake', provision_state=states.CLEANING, - target_provision_state=states.AVAILABLE, + target_provision_state=tgt_prov_state, last_error=None, clean_step=self.clean_steps[-1]) self._start_service() with task_manager.acquire( - self.context, node['id'], shared=False) as task: + self.context, node.uuid, shared=False) as task: self.service._do_next_clean_step(task, []) self.service._worker_pool.waitall() node.refresh() # Cleaning should be complete without calling additional steps - self.assertEqual(states.AVAILABLE, node.provision_state) + self.assertEqual(tgt_prov_state, node.provision_state) + self.assertEqual(states.NOSTATE, node.target_provision_state) self.assertEqual({}, node.clean_step) self.assertFalse(mock_execute.called) + def test__do_next_clean_step_automated_last_step_noop(self): + self._do_next_clean_step_last_step_noop() + + def test__do_next_clean_step_manual_last_step_noop(self): + self._do_next_clean_step_last_step_noop(manual=True) + @mock.patch('ironic.drivers.modules.fake.FakePower.execute_clean_step') @mock.patch('ironic.drivers.modules.fake.FakeDeploy.execute_clean_step') - def test__do_next_clean_step_all(self, mock_deploy_execute, - mock_power_execute): + def _do_next_clean_step_all(self, mock_deploy_execute, + mock_power_execute, manual=False): # Run all steps from start to finish (all synchronous) + tgt_prov_state = states.MANAGEABLE if manual else states.AVAILABLE node = obj_utils.create_test_node( self.context, driver='fake', provision_state=states.CLEANING, - target_provision_state=states.AVAILABLE, + target_provision_state=tgt_prov_state, last_error=None, clean_step={}) mock_deploy_execute.return_value = None @@ -1639,14 +1887,15 @@ class DoNodeCleanTestCase(mgr_utils.ServiceSetUpMixin, self._start_service() with task_manager.acquire( - self.context, node['id'], shared=False) as task: + self.context, node.uuid, shared=False) as task: self.service._do_next_clean_step(task, self.clean_steps) self.service._worker_pool.waitall() node.refresh() # Cleaning should be complete - self.assertEqual(states.AVAILABLE, node.provision_state) + self.assertEqual(tgt_prov_state, node.provision_state) + self.assertEqual(states.NOSTATE, node.target_provision_state) self.assertEqual({}, node.clean_step) mock_power_execute.assert_called_once_with(mock.ANY, self.clean_steps[1]) @@ -1655,14 +1904,22 @@ class DoNodeCleanTestCase(mgr_utils.ServiceSetUpMixin, mock.call(self.clean_steps[2]) ] + def test_do_next_clean_step_automated_all(self): + self._do_next_clean_step_all() + + def test_do_next_clean_step_manual_all(self): + self._do_next_clean_step_all(manual=True) + @mock.patch('ironic.drivers.modules.fake.FakeDeploy.execute_clean_step') @mock.patch.object(fake.FakeDeploy, 'tear_down_cleaning', autospec=True) - def test__do_next_clean_step_fail(self, tear_mock, mock_execute): + def _do_next_clean_step_execute_fail(self, tear_mock, mock_execute, + manual=False): # When a clean step fails, go to CLEANFAIL + tgt_prov_state = states.MANAGEABLE if manual else states.AVAILABLE node = obj_utils.create_test_node( self.context, driver='fake', provision_state=states.CLEANING, - target_provision_state=states.AVAILABLE, + target_provision_state=tgt_prov_state, last_error=None, clean_step={}) mock_execute.side_effect = Exception() @@ -1670,7 +1927,7 @@ class DoNodeCleanTestCase(mgr_utils.ServiceSetUpMixin, self._start_service() with task_manager.acquire( - self.context, node['id'], shared=False) as task: + self.context, node.uuid, shared=False) as task: self.service._do_next_clean_step(task, self.clean_steps) tear_mock.assert_called_once_with(task.driver.deploy, task) @@ -1679,19 +1936,28 @@ class DoNodeCleanTestCase(mgr_utils.ServiceSetUpMixin, # Make sure we go to CLEANFAIL, clear clean_steps self.assertEqual(states.CLEANFAIL, node.provision_state) + self.assertEqual(tgt_prov_state, node.target_provision_state) self.assertEqual({}, node.clean_step) self.assertIsNotNone(node.last_error) self.assertTrue(node.maintenance) mock_execute.assert_called_once_with(mock.ANY, self.clean_steps[0]) + def test__do_next_clean_step_automated_execute_fail(self): + self._do_next_clean_step_execute_fail() + + def test__do_next_clean_step_manual_execute_fail(self): + self._do_next_clean_step_execute_fail(manual=True) + @mock.patch('ironic.drivers.modules.fake.FakeDeploy.execute_clean_step') @mock.patch.object(fake.FakeDeploy, 'tear_down_cleaning', autospec=True) - def test__do_next_clean_step_fail_in_tear_down_cleaning(self, tear_mock, - mock_execute): + def _do_next_clean_step_fail_in_tear_down_cleaning(self, tear_mock, + mock_execute, + manual=True): + tgt_prov_state = states.MANAGEABLE if manual else states.AVAILABLE node = obj_utils.create_test_node( self.context, driver='fake', provision_state=states.CLEANING, - target_provision_state=states.AVAILABLE, + target_provision_state=tgt_prov_state, last_error=None, clean_step={}) @@ -1701,7 +1967,7 @@ class DoNodeCleanTestCase(mgr_utils.ServiceSetUpMixin, self._start_service() with task_manager.acquire( - self.context, node['id'], shared=False) as task: + self.context, node.uuid, shared=False) as task: self.service._do_next_clean_step(task, self.clean_steps) self.service._worker_pool.waitall() @@ -1709,26 +1975,34 @@ class DoNodeCleanTestCase(mgr_utils.ServiceSetUpMixin, # Make sure we go to CLEANFAIL, clear clean_steps self.assertEqual(states.CLEANFAIL, node.provision_state) + self.assertEqual(tgt_prov_state, node.target_provision_state) self.assertEqual({}, node.clean_step) self.assertIsNotNone(node.last_error) self.assertEqual(1, tear_mock.call_count) self.assertTrue(node.maintenance) mock_execute.assert_called_once_with(mock.ANY, self.clean_steps[0]) + def test__do_next_clean_step_automated_fail_in_tear_down_cleaning(self): + self._do_next_clean_step_fail_in_tear_down_cleaning() + + def test__do_next_clean_step_manual_fail_in_tear_down_cleaning(self): + self._do_next_clean_step_fail_in_tear_down_cleaning(manual=True) + @mock.patch('ironic.drivers.modules.fake.FakeDeploy.execute_clean_step') - def test__do_next_clean_step_no_steps(self, mock_execute): + def _do_next_clean_step_no_steps(self, mock_execute, manual=False): # Resume where there are no steps, should be a noop + tgt_prov_state = states.MANAGEABLE if manual else states.AVAILABLE node = obj_utils.create_test_node( self.context, driver='fake', provision_state=states.CLEANING, - target_provision_state=states.AVAILABLE, + target_provision_state=tgt_prov_state, last_error=None, clean_step={}) self._start_service() with task_manager.acquire( - self.context, node['id'], shared=False) as task: + self.context, node.uuid, shared=False) as task: self.service._do_next_clean_step( task, []) @@ -1736,19 +2010,27 @@ class DoNodeCleanTestCase(mgr_utils.ServiceSetUpMixin, node.refresh() # Cleaning should be complete without calling additional steps - self.assertEqual(states.AVAILABLE, node.provision_state) + self.assertEqual(tgt_prov_state, node.provision_state) + self.assertEqual(states.NOSTATE, node.target_provision_state) self.assertEqual({}, node.clean_step) self.assertFalse(mock_execute.called) + def test__do_next_clean_step_automated_no_steps(self): + self._do_next_clean_step_no_steps() + + def test__do_next_clean_step_manual_no_steps(self): + self._do_next_clean_step_no_steps(manual=True) + @mock.patch('ironic.drivers.modules.fake.FakePower.execute_clean_step') @mock.patch('ironic.drivers.modules.fake.FakeDeploy.execute_clean_step') - def test__do_next_clean_step_bad_step_return_value( - self, deploy_exec_mock, power_exec_mock): + def _do_next_clean_step_bad_step_return_value( + self, deploy_exec_mock, power_exec_mock, manual=False): # When a clean step fails, go to CLEANFAIL + tgt_prov_state = states.MANAGEABLE if manual else states.AVAILABLE node = obj_utils.create_test_node( self.context, driver='fake', provision_state=states.CLEANING, - target_provision_state=states.AVAILABLE, + target_provision_state=tgt_prov_state, last_error=None, clean_step={}) deploy_exec_mock.return_value = "foo" @@ -1756,7 +2038,7 @@ class DoNodeCleanTestCase(mgr_utils.ServiceSetUpMixin, self._start_service() with task_manager.acquire( - self.context, node['id'], shared=False) as task: + self.context, node.uuid, shared=False) as task: self.service._do_next_clean_step(task, self.clean_steps) self.service._worker_pool.waitall() @@ -1764,6 +2046,7 @@ class DoNodeCleanTestCase(mgr_utils.ServiceSetUpMixin, # Make sure we go to CLEANFAIL, clear clean_steps self.assertEqual(states.CLEANFAIL, node.provision_state) + self.assertEqual(tgt_prov_state, node.target_provision_state) self.assertEqual({}, node.clean_step) self.assertIsNotNone(node.last_error) self.assertTrue(node.maintenance) @@ -1772,6 +2055,12 @@ class DoNodeCleanTestCase(mgr_utils.ServiceSetUpMixin, # Make sure we don't execute any other step and return self.assertFalse(power_exec_mock.called) + def test__do_next_clean_step_automated_bad_step_return_value(self): + self._do_next_clean_step_bad_step_return_value() + + def test__do_next_clean_step_manual_bad_step_return_value(self): + self._do_next_clean_step_bad_step_return_value(manual=True) + def test__get_node_next_clean_steps(self): driver_internal_info = {'clean_steps': self.clean_steps} node = obj_utils.create_test_node( @@ -3105,7 +3394,8 @@ class ManagerCheckDeployTimeoutsTestCase(mgr_utils.CommonMixIn, 'fail', callback=self.service._spawn_worker, call_args=(conductor_utils.cleanup_after_timeout, self.task), - err_handler=conductor_utils.provisioning_error_handler) + err_handler=conductor_utils.provisioning_error_handler, + target_state=None) def test_acquire_node_disappears(self, get_nodeinfo_mock, mapped_mock, acquire_mock): @@ -3192,7 +3482,8 @@ class ManagerCheckDeployTimeoutsTestCase(mgr_utils.CommonMixIn, 'fail', callback=self.service._spawn_worker, call_args=(conductor_utils.cleanup_after_timeout, self.task2), - err_handler=conductor_utils.provisioning_error_handler) + err_handler=conductor_utils.provisioning_error_handler, + target_state=None) def test_exiting_no_worker_avail(self, get_nodeinfo_mock, mapped_mock, acquire_mock): @@ -3217,7 +3508,8 @@ class ManagerCheckDeployTimeoutsTestCase(mgr_utils.CommonMixIn, 'fail', callback=self.service._spawn_worker, call_args=(conductor_utils.cleanup_after_timeout, self.task), - err_handler=conductor_utils.provisioning_error_handler) + err_handler=conductor_utils.provisioning_error_handler, + target_state=None) def test_exiting_with_other_exception(self, get_nodeinfo_mock, mapped_mock, acquire_mock): @@ -3243,7 +3535,8 @@ class ManagerCheckDeployTimeoutsTestCase(mgr_utils.CommonMixIn, 'fail', callback=self.service._spawn_worker, call_args=(conductor_utils.cleanup_after_timeout, self.task), - err_handler=conductor_utils.provisioning_error_handler) + err_handler=conductor_utils.provisioning_error_handler, + target_state=None) def test_worker_limit(self, get_nodeinfo_mock, mapped_mock, acquire_mock): self.config(periodic_max_workers=2, group='conductor') @@ -3269,7 +3562,8 @@ class ManagerCheckDeployTimeoutsTestCase(mgr_utils.CommonMixIn, 'fail', callback=self.service._spawn_worker, call_args=(conductor_utils.cleanup_after_timeout, self.task), - err_handler=conductor_utils.provisioning_error_handler) + err_handler=conductor_utils.provisioning_error_handler, + target_state=None) self.assertEqual([process_event_call] * 2, self.task.process_event.call_args_list) @@ -3806,7 +4100,7 @@ class ManagerCheckInspectTimeoutsTestCase(mgr_utils.CommonMixIn, mapped_mock.assert_called_once_with(self.node.uuid, self.node.driver) acquire_mock.assert_called_once_with(self.context, self.node.uuid, purpose=mock.ANY) - self.task.process_event.assert_called_with('fail') + self.task.process_event.assert_called_with('fail', target_state=None) def test__check_inspect_timeouts_acquire_node_disappears(self, get_nodeinfo_mock, @@ -3895,7 +4189,7 @@ class ManagerCheckInspectTimeoutsTestCase(mgr_utils.CommonMixIn, # First node skipped self.assertFalse(task.process_event.called) # Second node spawned - self.task2.process_event.assert_called_with('fail') + self.task2.process_event.assert_called_with('fail', target_state=None) def test__check_inspect_timeouts_exiting_no_worker_avail( self, get_nodeinfo_mock, mapped_mock, acquire_mock): @@ -3916,7 +4210,7 @@ class ManagerCheckInspectTimeoutsTestCase(mgr_utils.CommonMixIn, acquire_mock.assert_called_once_with(self.context, self.node.uuid, purpose=mock.ANY) - self.task.process_event.assert_called_with('fail') + self.task.process_event.assert_called_with('fail', target_state=None) def test__check_inspect_timeouts_exit_with_other_exception( self, get_nodeinfo_mock, mapped_mock, acquire_mock): @@ -3939,7 +4233,7 @@ class ManagerCheckInspectTimeoutsTestCase(mgr_utils.CommonMixIn, acquire_mock.assert_called_once_with(self.context, self.node.uuid, purpose=mock.ANY) - self.task.process_event.assert_called_with('fail') + self.task.process_event.assert_called_with('fail', target_state=None) def test__check_inspect_timeouts_worker_limit(self, get_nodeinfo_mock, mapped_mock, acquire_mock): @@ -3962,7 +4256,7 @@ class ManagerCheckInspectTimeoutsTestCase(mgr_utils.CommonMixIn, self.assertEqual([mock.call(self.context, self.node.uuid, purpose=mock.ANY)] * 2, acquire_mock.call_args_list) - process_event_call = mock.call('fail') + process_event_call = mock.call('fail', target_state=None) self.assertEqual([process_event_call] * 2, self.task.process_event.call_args_list) diff --git a/ironic/tests/unit/conductor/test_rpcapi.py b/ironic/tests/unit/conductor/test_rpcapi.py index 0362ef4386..798e17feae 100644 --- a/ironic/tests/unit/conductor/test_rpcapi.py +++ b/ironic/tests/unit/conductor/test_rpcapi.py @@ -318,6 +318,15 @@ class RPCAPITestCase(base.DbTestCase): node_id=self.fake_node['uuid'], target_raid_config='config') + def test_do_node_clean(self): + clean_steps = [{'step': 'upgrade_firmware', 'interface': 'deploy'}, + {'step': 'upgrade_bmc', 'interface': 'management'}] + self._test_rpcapi('do_node_clean', + 'call', + version='1.32', + node_id=self.fake_node['uuid'], + clean_steps=clean_steps) + def test_object_action(self): self._test_rpcapi('object_action', 'call', diff --git a/ironic/tests/unit/conductor/test_utils.py b/ironic/tests/unit/conductor/test_utils.py index 496231c48f..96b07a9ae9 100644 --- a/ironic/tests/unit/conductor/test_utils.py +++ b/ironic/tests/unit/conductor/test_utils.py @@ -346,10 +346,14 @@ class NodeCleaningStepsTestCase(base.DbTestCase): 'step': 'update_firmware', 'priority': 10, 'interface': 'deploy'} self.deploy_erase = { 'step': 'erase_disks', 'priority': 20, 'interface': 'deploy'} + # Automated cleaning should be executed in this order self.clean_steps = [self.deploy_erase, self.power_update, self.deploy_update] + # Manual clean step self.deploy_raid = { - 'step': 'build_raid', 'priority': 0, 'interface': 'deploy'} + 'step': 'build_raid', 'priority': 0, 'interface': 'deploy', + 'argsinfo': {'arg1': {'description': 'desc1', 'required': True}, + 'arg2': {'description': 'desc2'}}} @mock.patch('ironic.drivers.modules.fake.FakeDeploy.get_clean_steps') @mock.patch('ironic.drivers.modules.fake.FakePower.get_clean_steps') @@ -366,18 +370,36 @@ class NodeCleaningStepsTestCase(base.DbTestCase): self.deploy_update] with task_manager.acquire( - self.context, node['id'], shared=False) as task: + self.context, node.uuid, shared=False) as task: steps = conductor_utils._get_cleaning_steps(task, enabled=False) self.assertEqual(self.clean_steps, steps) + @mock.patch('ironic.drivers.modules.fake.FakeDeploy.get_clean_steps') + @mock.patch('ironic.drivers.modules.fake.FakePower.get_clean_steps') + def test__get_cleaning_steps_unsorted(self, mock_power_steps, + mock_deploy_steps): + node = obj_utils.create_test_node( + self.context, driver='fake', + provision_state=states.CLEANING, + target_provision_state=states.MANAGEABLE) + + mock_deploy_steps.return_value = [self.deploy_raid, + self.deploy_update, + self.deploy_erase] + with task_manager.acquire( + self.context, node.uuid, shared=False) as task: + steps = conductor_utils._get_cleaning_steps(task, enabled=False, + sort=False) + self.assertEqual(mock_deploy_steps.return_value, steps) + @mock.patch('ironic.drivers.modules.fake.FakeDeploy.get_clean_steps') @mock.patch('ironic.drivers.modules.fake.FakePower.get_clean_steps') def test__get_cleaning_steps_only_enabled(self, mock_power_steps, mock_deploy_steps): # Test getting only cleaning steps, with one driver returning None, two # conflicting priorities, and asserting they are ordered properly. - # Should discard zap step + # Should discard zero-priority (manual) clean step node = obj_utils.create_test_node( self.context, driver='fake', provision_state=states.CLEANING, @@ -389,13 +411,15 @@ class NodeCleaningStepsTestCase(base.DbTestCase): self.deploy_raid] with task_manager.acquire( - self.context, node['id'], shared=True) as task: + self.context, node.uuid, shared=True) as task: steps = conductor_utils._get_cleaning_steps(task, enabled=True) self.assertEqual(self.clean_steps, steps) + @mock.patch.object(conductor_utils, '_validate_user_clean_steps') @mock.patch.object(conductor_utils, '_get_cleaning_steps') - def test_set_node_cleaning_steps(self, mock_steps): + def test_set_node_cleaning_steps_automated(self, mock_steps, + mock_validate_user_steps): mock_steps.return_value = self.clean_steps node = obj_utils.create_test_node( @@ -406,9 +430,111 @@ class NodeCleaningStepsTestCase(base.DbTestCase): clean_step=None) with task_manager.acquire( - self.context, node['id'], shared=False) as task: + self.context, node.uuid, shared=False) as task: conductor_utils.set_node_cleaning_steps(task) node.refresh() self.assertEqual(self.clean_steps, task.node.driver_internal_info['clean_steps']) self.assertEqual({}, node.clean_step) + mock_steps.assert_called_once_with(task, enabled=True) + self.assertFalse(mock_validate_user_steps.called) + + @mock.patch.object(conductor_utils, '_validate_user_clean_steps') + @mock.patch.object(conductor_utils, '_get_cleaning_steps') + def test_set_node_cleaning_steps_manual(self, mock_steps, + mock_validate_user_steps): + clean_steps = [self.deploy_raid] + mock_steps.return_value = self.clean_steps + + node = obj_utils.create_test_node( + self.context, driver='fake', + provision_state=states.CLEANING, + target_provision_state=states.MANAGEABLE, + last_error=None, + clean_step=None, + driver_internal_info={'clean_steps': clean_steps}) + + with task_manager.acquire( + self.context, node.uuid, shared=False) as task: + conductor_utils.set_node_cleaning_steps(task) + node.refresh() + self.assertEqual(clean_steps, + task.node.driver_internal_info['clean_steps']) + self.assertEqual({}, node.clean_step) + self.assertFalse(mock_steps.called) + mock_validate_user_steps.assert_called_once_with(task, clean_steps) + + @mock.patch.object(conductor_utils, '_get_cleaning_steps') + def test__validate_user_clean_steps(self, mock_steps): + node = obj_utils.create_test_node(self.context) + mock_steps.return_value = self.clean_steps + + user_steps = [{'step': 'update_firmware', 'interface': 'power'}, + {'step': 'erase_disks', 'interface': 'deploy'}] + + with task_manager.acquire(self.context, node.uuid) as task: + conductor_utils._validate_user_clean_steps(task, user_steps) + mock_steps.assert_called_once_with(task, enabled=False, sort=False) + + @mock.patch.object(conductor_utils, '_get_cleaning_steps') + def test__validate_user_clean_steps_no_steps(self, mock_steps): + node = obj_utils.create_test_node(self.context) + mock_steps.return_value = self.clean_steps + + with task_manager.acquire(self.context, node.uuid) as task: + conductor_utils._validate_user_clean_steps(task, []) + mock_steps.assert_called_once_with(task, enabled=False, sort=False) + + @mock.patch.object(conductor_utils, '_get_cleaning_steps') + def test__validate_user_clean_steps_get_steps_exception(self, mock_steps): + node = obj_utils.create_test_node(self.context) + mock_steps.side_effect = exception.NodeCleaningFailure('bad') + + with task_manager.acquire(self.context, node.uuid) as task: + self.assertRaises(exception.NodeCleaningFailure, + conductor_utils._validate_user_clean_steps, + task, []) + mock_steps.assert_called_once_with(task, enabled=False, sort=False) + + @mock.patch.object(conductor_utils, '_get_cleaning_steps') + def test__validate_user_clean_steps_not_supported(self, mock_steps): + node = obj_utils.create_test_node(self.context) + mock_steps.return_value = [self.power_update, self.deploy_raid] + user_steps = [{'step': 'update_firmware', 'interface': 'power'}, + {'step': 'bad_step', 'interface': 'deploy'}] + + with task_manager.acquire(self.context, node.uuid) as task: + self.assertRaisesRegexp(exception.InvalidParameterValue, + "does not support.*bad_step", + conductor_utils._validate_user_clean_steps, + task, user_steps) + mock_steps.assert_called_once_with(task, enabled=False, sort=False) + + @mock.patch.object(conductor_utils, '_get_cleaning_steps') + def test__validate_user_clean_steps_invalid_arg(self, mock_steps): + node = obj_utils.create_test_node(self.context) + mock_steps.return_value = self.clean_steps + user_steps = [{'step': 'update_firmware', 'interface': 'power', + 'args': {'arg1': 'val1', 'arg2': 'val2'}}, + {'step': 'erase_disks', 'interface': 'deploy'}] + + with task_manager.acquire(self.context, node.uuid) as task: + self.assertRaisesRegexp(exception.InvalidParameterValue, + "update_firmware.*invalid.*arg1", + conductor_utils._validate_user_clean_steps, + task, user_steps) + mock_steps.assert_called_once_with(task, enabled=False, sort=False) + + @mock.patch.object(conductor_utils, '_get_cleaning_steps') + def test__validate_user_clean_steps_missing_required_arg(self, mock_steps): + node = obj_utils.create_test_node(self.context) + mock_steps.return_value = [self.power_update, self.deploy_raid] + user_steps = [{'step': 'update_firmware', 'interface': 'power'}, + {'step': 'build_raid', 'interface': 'deploy'}] + + with task_manager.acquire(self.context, node.uuid) as task: + self.assertRaisesRegexp(exception.InvalidParameterValue, + "build_raid.*missing.*arg1", + conductor_utils._validate_user_clean_steps, + task, user_steps) + mock_steps.assert_called_once_with(task, enabled=False, sort=False)