From 3c3ecd83f3c33692981d1c66dc3986dfdaf8bffa Mon Sep 17 00:00:00 2001 From: Scott Hussey Date: Sat, 17 Jun 2017 10:15:26 -0500 Subject: [PATCH] DRYD-2 MVP Final Phase - Node deployment Add driver task for DeployNode Add a OOB driver for manual operation to aid in using VMs for testing Drydock Add boot_mac field to YAML schema for identifying a VM (no IPMI IP) --- .../drivers/node/maasdriver/api_client.py | 2 +- .../drivers/node/maasdriver/driver.py | 130 ++++++++++- .../node/maasdriver/models/interface.py | 2 +- .../drivers/node/maasdriver/models/machine.py | 112 ++++++++-- drydock_provisioner/drivers/oob/__init__.py | 15 ++ .../drivers/oob/manual_driver/__init__.py | 13 ++ .../drivers/oob/manual_driver/driver.py | 74 +++++++ .../drivers/oob/pyghmi_driver/__init__.py | 2 + drydock_provisioner/ingester/plugins/yaml.py | 12 +- drydock_provisioner/objects/hostprofile.py | 9 +- drydock_provisioner/objects/node.py | 3 +- drydock_provisioner/orchestrator/__init__.py | 209 ++++++++++++------ setup.py | 1 + 13 files changed, 487 insertions(+), 97 deletions(-) create mode 100644 drydock_provisioner/drivers/oob/manual_driver/__init__.py create mode 100644 drydock_provisioner/drivers/oob/manual_driver/driver.py diff --git a/drydock_provisioner/drivers/node/maasdriver/api_client.py b/drydock_provisioner/drivers/node/maasdriver/api_client.py index fa463109..01930a4e 100644 --- a/drydock_provisioner/drivers/node/maasdriver/api_client.py +++ b/drydock_provisioner/drivers/node/maasdriver/api_client.py @@ -100,7 +100,7 @@ class MaasRequestFactory(object): if 'Accept' not in headers.keys(): headers['Accept'] = 'application/json' - if 'files' in kwargs.keys(): + if kwargs.get('files', None) is not None: files = kwargs.pop('files') files_tuples = {} diff --git a/drydock_provisioner/drivers/node/maasdriver/driver.py b/drydock_provisioner/drivers/node/maasdriver/driver.py index 9da0f9c3..90cc3c76 100644 --- a/drydock_provisioner/drivers/node/maasdriver/driver.py +++ b/drydock_provisioner/drivers/node/maasdriver/driver.py @@ -362,6 +362,77 @@ class MaasNodeDriver(NodeDriver): status=hd_fields.TaskStatus.Complete, result=result, result_detail=result_detail) + elif task.action ==hd_fields.OrchestratorAction.DeployNode: + self.orchestrator.task_field_update(task.get_id(), + status=hd_fields.TaskStatus.Running) + + self.logger.debug("Starting subtask to deploy %s nodes." % (len(task.node_list))) + + subtasks = [] + + result_detail = { + 'detail': [], + 'failed_nodes': [], + 'successful_nodes': [], + } + + for n in task.node_list: + subtask = self.orchestrator.create_task(task_model.DriverTask, + parent_task_id=task.get_id(), design_id=design_id, + action=hd_fields.OrchestratorAction.DeployNode, + site_name=task.site_name, + task_scope={'site': task.site_name, 'node_names': [n]}) + runner = MaasTaskRunner(state_manager=self.state_manager, + orchestrator=self.orchestrator, + task_id=subtask.get_id(),config=self.config) + + self.logger.info("Starting thread for task %s to deploy node %s" % (subtask.get_id(), n)) + + runner.start() + subtasks.append(subtask.get_id()) + + running_subtasks = len(subtasks) + attempts = 0 + worked = failed = False + + #TODO Add timeout to config + while running_subtasks > 0 and attempts < 30: + for t in subtasks: + subtask = self.state_manager.get_task(t) + + if subtask.status == hd_fields.TaskStatus.Complete: + self.logger.info("Task %s to deploy node %s complete - status %s" % + (subtask.get_id(), n, subtask.get_result())) + running_subtasks = running_subtasks - 1 + + if subtask.result == hd_fields.ActionResult.Success: + result_detail['successful_nodes'].extend(subtask.node_list) + worked = True + elif subtask.result == hd_fields.ActionResult.Failure: + result_detail['failed_nodes'].extend(subtask.node_list) + failed = True + elif subtask.result == hd_fields.ActionResult.PartialSuccess: + worked = failed = True + + time.sleep(1 * 60) + attempts = attempts + 1 + + if running_subtasks > 0: + self.logger.warning("Time out for task %s before all subtask threads complete" % (task.get_id())) + result = hd_fields.ActionResult.DependentFailure + result_detail['detail'].append('Some subtasks did not complete before the timeout threshold') + elif worked and failed: + result = hd_fields.ActionResult.PartialSuccess + elif worked: + result = hd_fields.ActionResult.Success + else: + result = hd_fields.ActionResult.Failure + + self.orchestrator.task_field_update(task.get_id(), + status=hd_fields.TaskStatus.Complete, + result=result, + result_detail=result_detail) + class MaasTaskRunner(drivers.DriverTaskRunner): def __init__(self, config=None, **kwargs): @@ -469,7 +540,7 @@ class MaasTaskRunner(drivers.DriverTaskRunner): % (vlan.resource_id, n.name)) else: # Create a new VLAN in this fabric and assign subnet to it - vlan = maas_vlan.Vlan(self.maas_client, name=n.name, vid=vlan_id, + vlan = maas_vlan.Vlan(self.maas_client, name=n.name, vid=n.vlan_id, mtu=getattr(n, 'mtu', None),fabric_id=fabric.resource_id) vlan = vlan_list.add(vlan) @@ -660,7 +731,7 @@ class MaasTaskRunner(drivers.DriverTaskRunner): node = site_design.get_baremetal_node(n) machine = machine_list.identify_baremetal_node(node, update_name=False) if machine is not None: - if machine.status_name == ['New', 'Broken']: + if machine.status_name in ['New', 'Broken']: self.logger.debug("Located node %s in MaaS, starting commissioning" % (n)) machine.commission() @@ -854,3 +925,58 @@ class MaasTaskRunner(drivers.DriverTaskRunner): status=hd_fields.TaskStatus.Complete, result=final_result, result_detail=result_detail) + elif task_action == hd_fields.OrchestratorAction.DeployNode: + try: + machine_list = maas_machine.Machines(self.maas_client) + machine_list.refresh() + + fabrics = maas_fabric.Fabrics(self.maas_client) + fabrics.refresh() + + subnets = maas_subnet.Subnets(self.maas_client) + subnets.refresh() + except Exception as ex: + self.logger.error("Error deploying node, cannot access MaaS: %s" % str(ex)) + traceback.print_tb(sys.last_traceback) + self.orchestrator.task_field_update(self.task.get_id(), + status=hd_fields.TaskStatus.Complete, + result=hd_fields.ActionResult.Failure, + result_detail={'detail': 'Error accessing MaaS API', 'retry': True}) + return + + nodes = self.task.node_list + + result_detail = {'detail': []} + + worked = failed = False + + for n in nodes: + self.logger.info("Acquiring node %s for deployment" % (n)) + + try: + machine = machine_list.acquire_node(n) + except DriverError as dex: + self.logger.warning("Error acquiring node %s, skipping" % n) + failed = True + continue + + self.logger.info("Deploying node %s" % (n)) + + try: + machine.deploy() + except DriverError as dex: + self.logger.warning("Error deploying node %s, skipping" % n) + failed = True + continue + + self.logger.info("Node %s deployed" % (n)) + + if failed: + final_result = hd_fields.ActionResult.Failure + else: + final_result = hd_fields.ActionResult.Success + + self.orchestrator.task_field_update(self.task.get_id(), + status=hd_fields.TaskStatus.Complete, + result=final_result, + result_detail=result_detail) diff --git a/drydock_provisioner/drivers/node/maasdriver/models/interface.py b/drydock_provisioner/drivers/node/maasdriver/models/interface.py index db2a9d85..135f1204 100644 --- a/drydock_provisioner/drivers/node/maasdriver/models/interface.py +++ b/drydock_provisioner/drivers/node/maasdriver/models/interface.py @@ -225,7 +225,7 @@ class Interfaces(model_base.ResourceCollectionBase): if exists is not None: self.logger.info("Interface for VLAN %s already exists on node %s, skipping" % (vlan_tag, self.system_id)) - return None + return exists url = self.interpolate_url() diff --git a/drydock_provisioner/drivers/node/maasdriver/models/machine.py b/drydock_provisioner/drivers/node/maasdriver/models/machine.py index 088ca986..aca826fd 100644 --- a/drydock_provisioner/drivers/node/maasdriver/models/machine.py +++ b/drydock_provisioner/drivers/node/maasdriver/models/machine.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import drydock_provisioner.error as errors import drydock_provisioner.drivers.node.maasdriver.models.base as model_base import drydock_provisioner.drivers.node.maasdriver.models.interface as maas_interface @@ -22,7 +23,7 @@ class Machine(model_base.ResourceBase): resource_url = 'machines/{resource_id}/' fields = ['resource_id', 'hostname', 'power_type', 'power_state', 'power_parameters', 'interfaces', - 'boot_interface', 'memory', 'cpu_count', 'tag_names', 'status_name'] + 'boot_interface', 'memory', 'cpu_count', 'tag_names', 'status_name', 'boot_mac'] json_fields = ['hostname', 'power_type'] def __init__(self, api_client, **kwargs): @@ -55,7 +56,30 @@ class Machine(model_base.ResourceBase): # Need to sort out how to handle exceptions if not resp.ok: - raise Exception() + self.logger.error("Error commissioning node, received HTTP %s from MaaS" % resp.status_code) + self.logger.debug("MaaS response: %s" % resp.text) + raise errors.DriverError("Error commissioning node, received HTTP %s from MaaS" % resp.status_code) + + def deploy(self, user_data=None, platform=None, kernel=None): + deploy_options = {} + + if user_data is not None: + deploy_options['user_data'] = user_data + + if platform is not None: + deploy_options['distro_series'] = platform + + if kernel is not None: + deploy_options['hwe_kernel'] = kernel + + url = self.interpolate_url() + resp = self.api_client.post(url, op='deploy', + files=deploy_options if len(deploy_options) > 0 else None) + + if not resp.ok: + self.logger.error("Error deploying node, received HTTP %s from MaaS" % resp.status_code) + self.logger.debug("MaaS response: %s" % resp.text) + raise errors.DriverError("Error deploying node, received HTTP %s from MaaS" % resp.status_code) def get_network_interface(self, iface_name): if self.interfaces is not None: @@ -106,6 +130,11 @@ class Machine(model_base.ResourceBase): if 'system_id' in obj_dict.keys(): refined_dict['resource_id'] = obj_dict.get('system_id') + # Capture the boot interface MAC to allow for node id of VMs + if 'boot_interface' in obj_dict.keys(): + if isinstance(obj_dict['boot_interface'], dict): + refined_dict['boot_mac'] = obj_dict['boot_interface']['mac_address'] + i = cls(api_client, **refined_dict) return i @@ -122,6 +151,37 @@ class Machines(model_base.ResourceCollectionBase): for k, v in self.resources.items(): v.get_power_params() + def acquire_node(self, node_name): + """ + Acquire a commissioned node fro deployment + + :param node_name: The hostname of a node to acquire + """ + + self.refresh() + + node = self.singleton({'hostname': node_name}) + + if node is None: + self.logger.info("Node %s not found" % (node_name)) + raise errors.DriverError("Node %s not found" % (node_name)) + + if node.status_name != 'Ready': + self.logger.info("Node %s status '%s' does not allow deployment, should be 'Ready'." % + (node_name, node.status_name)) + raise errors.DriverError("Node %s status '%s' does not allow deployment, should be 'Ready'." % + (node_name, node.status_name)) + + url = self.interpolate_url() + + resp = self.api_client.post(url, op='allocate', files={'system_id': node.resource_id}) + + if not resp.ok: + self.logger.error("Error acquiring node, MaaS returned %s" % resp.status_code) + self.logger.debug("MaaS response: %s" % resp.text) + raise errors.DriverError("Error acquiring node, MaaS returned %s" % resp.status_code) + + return node def identify_baremetal_node(self, node_model, update_name=True): """ @@ -132,30 +192,44 @@ class Machines(model_base.ResourceCollectionBase): :param node_model: Instance of objects.node.BaremetalNode to search MaaS for matching resource :param update_name: Whether Drydock should update the MaaS resource name to match the Drydock design """ - node_oob_network = node_model.oob_network - node_oob_ip = node_model.get_network_address(node_oob_network) + + maas_node = None - if node_oob_ip is None: - self.logger.warn("Node model missing OOB IP address") - raise ValueError('Node model missing OOB IP address') + if node_model.oob_type == 'ipmi': + node_oob_network = node_model.oob_network + node_oob_ip = node_model.get_network_address(node_oob_network) - try: - self.collect_power_params() + if node_oob_ip is None: + self.logger.warn("Node model missing OOB IP address") + raise ValueError('Node model missing OOB IP address') - maas_node = self.singleton({'power_params.power_address': node_oob_ip}) + try: + self.collect_power_params() - self.logger.debug("Found MaaS resource %s matching Node %s" % (maas_node.resource_id, node_model.get_id())) + maas_node = self.singleton({'power_params.power_address': node_oob_ip}) + except ValueError as ve: + self.logger.warn("Error locating matching MaaS resource for OOB IP %s" % (node_oob_ip)) + return None + else: + # Use boot_mac for node's not using IPMI + node_boot_mac = node_model.boot_mac - if maas_node.hostname != node_model.name and update_name: - maas_node.hostname = node_model.name - maas_node.update() - self.logger.debug("Updated MaaS resource %s hostname to %s" % (maas_node.resource_id, node_model.name)) + if node_boot_mac is not None: + maas_node = self.singleton({'boot_mac': node_model.boot_mac}) - return maas_node - except ValueError as ve: - self.logger.warn("Error locating matching MaaS resource for OOB IP %s" % (node_oob_ip)) + if maas_node is None: + self.logger.info("Could not locate node %s in MaaS" % node_model.name) return None + self.logger.debug("Found MaaS resource %s matching Node %s" % (maas_node.resource_id, node_model.get_id())) + + if maas_node.hostname != node_model.name and update_name: + maas_node.hostname = node_model.name + maas_node.update() + self.logger.debug("Updated MaaS resource %s hostname to %s" % (maas_node.resource_id, node_model.name)) + + return maas_node + def query(self, query): """ Custom query method to deal with complex fields @@ -190,4 +264,4 @@ class Machines(model_base.ResourceCollectionBase): return res raise errors.DriverError("Failed updating MAAS url %s - return code %s" - % (url, resp.status_code)) \ No newline at end of file + % (url, resp.status_code)) diff --git a/drydock_provisioner/drivers/oob/__init__.py b/drydock_provisioner/drivers/oob/__init__.py index 8e7a9c26..2dc1d13f 100644 --- a/drydock_provisioner/drivers/oob/__init__.py +++ b/drydock_provisioner/drivers/oob/__init__.py @@ -19,6 +19,8 @@ from drydock_provisioner.drivers import ProviderDriver class OobDriver(ProviderDriver): + oob_types_supported = [''] + def __init__(self, **kwargs): super(OobDriver, self).__init__(**kwargs) @@ -43,3 +45,16 @@ class OobDriver(ProviderDriver): else: raise DriverError("Unsupported action %s for driver %s" % (task_action, self.driver_desc)) + + @classmethod + def oob_type_support(cls, type_string): + """ + Does this driver support a particular OOB type + + :param type_string: OOB type to check + """ + + if type_string in cls.oob_types_supported: + return True + + return False \ No newline at end of file diff --git a/drydock_provisioner/drivers/oob/manual_driver/__init__.py b/drydock_provisioner/drivers/oob/manual_driver/__init__.py new file mode 100644 index 00000000..2a385a45 --- /dev/null +++ b/drydock_provisioner/drivers/oob/manual_driver/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2017 AT&T Intellectual Property. All other rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. \ No newline at end of file diff --git a/drydock_provisioner/drivers/oob/manual_driver/driver.py b/drydock_provisioner/drivers/oob/manual_driver/driver.py new file mode 100644 index 00000000..e53f9e30 --- /dev/null +++ b/drydock_provisioner/drivers/oob/manual_driver/driver.py @@ -0,0 +1,74 @@ +# Copyright 2017 AT&T Intellectual Property. All other rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import time +import logging + +import drydock_provisioner.error as errors +import drydock_provisioner.config as config + +import drydock_provisioner.objects.fields as hd_fields +import drydock_provisioner.objects.task as task_model + +import drydock_provisioner.drivers.oob as oob +import drydock_provisioner.drivers as drivers + + +class ManualDriver(oob.OobDriver): + + oob_types_supported = ['manual'] + + def __init__(self, **kwargs): + super(ManualDriver, self).__init__(**kwargs) + + self.driver_name = "manual_driver" + self.driver_key = "manual_driver" + self.driver_desc = "Manual (Noop) OOB Driver" + + self.logger = logging.getLogger('drydock.oobdriver.pyghmi') + self.config = config.DrydockConfig.node_driver.get(self.driver_key, {}) + + def execute_task(self, task_id): + task = self.state_manager.get_task(task_id) + + if task is None: + self.logger.error("Invalid task %s" % (task_id)) + raise errors.DriverError("Invalid task %s" % (task_id)) + + if task.action not in self.supported_actions: + self.logger.error("Driver %s doesn't support task action %s" + % (self.driver_desc, task.action)) + raise errors.DriverError("Driver %s doesn't support task action %s" + % (self.driver_desc, task.action)) + + design_id = getattr(task, 'design_id', None) + + if design_id is None: + raise errors.DriverError("No design ID specified in task %s" % + (task_id)) + + + if task.site_name is None: + raise errors.DriverError("Not site specified for task %s." % + (task_id)) + + self.orchestrator.task_field_update(task.get_id(), + status=hd_fields.TaskStatus.Running) + + self.logger.info("Sleeping 60s to allow time for manual OOB %s action" % task.action) + + time.sleep(60) + + self.orchestrator.task_field_update(task.get_id(), + status=hd_fields.TaskStatus.Complete, + result=hd_fields.ActionResult.Success) diff --git a/drydock_provisioner/drivers/oob/pyghmi_driver/__init__.py b/drydock_provisioner/drivers/oob/pyghmi_driver/__init__.py index 9010fbcf..655241ac 100644 --- a/drydock_provisioner/drivers/oob/pyghmi_driver/__init__.py +++ b/drydock_provisioner/drivers/oob/pyghmi_driver/__init__.py @@ -28,6 +28,8 @@ import drydock_provisioner.drivers as drivers class PyghmiDriver(oob.OobDriver): + oob_types_supported = ['ipmi'] + def __init__(self, **kwargs): super(PyghmiDriver, self).__init__(**kwargs) diff --git a/drydock_provisioner/ingester/plugins/yaml.py b/drydock_provisioner/ingester/plugins/yaml.py index 060ce38a..b179bb99 100644 --- a/drydock_provisioner/ingester/plugins/yaml.py +++ b/drydock_provisioner/ingester/plugins/yaml.py @@ -247,10 +247,12 @@ class YamlIngester(IngesterPlugin): oob = spec.get('oob', {}) - model.oob_type = oob.get('type', None) - model.oob_network = oob.get('network', None) - model.oob_account = oob.get('account', None) - model.oob_credential = oob.get('credential', None) + model.oob_parameters = {} + for k,v in oob.items(): + if k == 'type': + model.oob_type = oob.get('type', None) + else: + model.oob_parameters[k] = v storage = spec.get('storage', {}) model.storage_layout = storage.get('layout', 'lvm') @@ -320,6 +322,8 @@ class YamlIngester(IngesterPlugin): model.rack = node_metadata.get('rack', None) if kind == 'BaremetalNode': + model.boot_mac = node_metadata.get('boot_mac', None) + addresses = spec.get('addressing', []) if len(addresses) == 0: diff --git a/drydock_provisioner/objects/hostprofile.py b/drydock_provisioner/objects/hostprofile.py index 083a0cfc..bc367932 100644 --- a/drydock_provisioner/objects/hostprofile.py +++ b/drydock_provisioner/objects/hostprofile.py @@ -33,9 +33,7 @@ class HostProfile(base.DrydockPersistentObject, base.DrydockObject): 'parent_profile': obj_fields.StringField(nullable=True), 'hardware_profile': obj_fields.StringField(nullable=True), 'oob_type': obj_fields.StringField(nullable=True), - 'oob_network': obj_fields.StringField(nullable=True), - 'oob_account': obj_fields.StringField(nullable=True), - 'oob_credential': obj_fields.StringField(nullable=True), + 'oob_parameters': obj_fields.DictOfStringsField(nullable=True), 'storage_layout': obj_fields.StringField(nullable=True), 'bootdisk_device': obj_fields.StringField(nullable=True), # Consider a custom field for storage size @@ -91,8 +89,7 @@ class HostProfile(base.DrydockPersistentObject, base.DrydockObject): # First compute inheritance for simple fields inheritable_field_list = [ - 'hardware_profile', 'oob_type', 'oob_network', - 'oob_credential', 'oob_account', 'storage_layout', + 'hardware_profile', 'oob_type', 'storage_layout', 'bootdisk_device', 'bootdisk_root_size', 'bootdisk_boot_size', 'rack', 'base_os', 'kernel', 'kernel_params', 'primary_network'] @@ -105,6 +102,8 @@ class HostProfile(base.DrydockPersistentObject, base.DrydockObject): getattr(parent, f, None))) # Now compute inheritance for complex types + self.oob_parameters = objects.Utils.merge_dicts(self.oob_parameters, parent.oob_parameters) + self.tags = objects.Utils.merge_lists(self.tags, parent.tags) self.owner_data = objects.Utils.merge_dicts(self.owner_data, parent.owner_data) diff --git a/drydock_provisioner/objects/node.py b/drydock_provisioner/objects/node.py index 7854b346..7f7c1f57 100644 --- a/drydock_provisioner/objects/node.py +++ b/drydock_provisioner/objects/node.py @@ -31,7 +31,8 @@ class BaremetalNode(drydock_provisioner.objects.hostprofile.HostProfile): VERSION = '1.0' fields = { - 'addressing': ovo_fields.ObjectField('IpAddressAssignmentList') + 'addressing': ovo_fields.ObjectField('IpAddressAssignmentList'), + 'boot_mac': ovo_fields.StringField(nullable=True), } # A BaremetalNode is really nothing more than a physical diff --git a/drydock_provisioner/orchestrator/__init__.py b/drydock_provisioner/orchestrator/__init__.py index d893fafe..8d86cff9 100644 --- a/drydock_provisioner/orchestrator/__init__.py +++ b/drydock_provisioner/orchestrator/__init__.py @@ -37,14 +37,18 @@ class Orchestrator(object): self.logger = logging.getLogger('drydock.orchestrator') if enabled_drivers is not None: - oob_driver_name = enabled_drivers.get('oob', None) - if oob_driver_name is not None: - m, c = oob_driver_name.rsplit('.', 1) - oob_driver_class = \ - getattr(importlib.import_module(m), c, None) - if oob_driver_class is not None: - self.enabled_drivers['oob'] = oob_driver_class(state_manager=state_manager, - orchestrator=self) + oob_drivers = enabled_drivers.get('oob', []) + + for d in oob_drivers: + if d is not None: + m, c = d.rsplit('.', 1) + oob_driver_class = \ + getattr(importlib.import_module(m), c, None) + if oob_driver_class is not None: + if self.enabled_drivers.get('oob', None) is None: + self.enabled_drivers['oob'] = [] + self.enabled_drivers['oob'].append(oob_driver_class(state_manager=state_manager, + orchestrator=self)) node_driver_name = enabled_drivers.get('node', None) if node_driver_name is not None: @@ -174,39 +178,75 @@ class Orchestrator(object): self.task_field_update(task_id, status=hd_fields.TaskStatus.Running) - oob_driver = self.enabled_drivers['oob'] - - if oob_driver is None: - self.task_field_update(task_id, - status=hd_fields.TaskStatus.Errored, - result=hd_fields.ActionResult.Failure, - result_detail={'detail': 'Error: No oob driver configured', 'retry': False}) - return - site_design = self.get_effective_site(design_id) node_filter = task.node_filter + oob_type_partition = {} + target_nodes = self.process_node_filter(node_filter, site_design) - target_names = [x.get_name() for x in target_nodes] + for n in target_nodes: + if n.oob_type not in oob_type_partition.keys(): + oob_type_partition[n.oob_type] = [] - task_scope = {'site' : task_site, - 'node_names' : target_names} + oob_type_partition[n.oob_type].append(n) - oob_driver_task = self.create_task(tasks.DriverTask, + result_detail = {'detail': []} + worked = failed = False + + # TODO Need to multithread tasks for different OOB types + for oob_type, oob_nodes in oob_type_partition.items(): + oob_driver = None + for d in self.enabled_drivers['oob']: + if d.oob_type_support(oob_type): + oob_driver = d + break + + if oob_driver is None: + self.logger.warning("Node OOB type %s has no enabled driver." % oob_type) + result_detail['detail'].append("Error: No oob driver configured for type %s" % oob_type) + continue + + + target_names = [x.get_name() for x in oob_nodes] + + task_scope = {'site' : task_site, + 'node_names' : target_names} + + oob_driver_task = self.create_task(tasks.DriverTask, parent_task_id=task.get_id(), design_id=design_id, action=hd_fields.OrchestratorAction.InterrogateOob, task_scope=task_scope) - oob_driver.execute_task(oob_driver_task.get_id()) + self.logger.info("Starting task %s for node verification via OOB type %s" % + (oob_driver_task.get_id(), oob_type)) - oob_driver_task = self.state_manager.get_task(oob_driver_task.get_id()) + oob_driver.execute_task(oob_driver_task.get_id()) + + oob_driver_task = self.state_manager.get_task(oob_driver_task.get_id()) + + if oob_driver_task.get_result() in [hd_fields.ActionResult.Success, + hd_fields.ActionResult.PartialSuccess]: + worked = True + if oob_driver_task.get_result() in [hd_fields.ActionResult.Failure, + hd_fields.ActionResult.PartialSuccess]: + failed = True + + final_result = None + + if worked and failed: + final_result = hd_fields.ActionResult.PartialSuccess + elif worked: + final_result = hd_fields.ActionResult.Success + else: + final_result = hd_fields.ActionResult.Failure self.task_field_update(task_id, status=hd_fields.TaskStatus.Complete, - result=oob_driver_task.get_result()) + result=final_result, + result_detail=result_detail) return elif task.action == hd_fields.OrchestratorAction.PrepareNode: failed = worked = False @@ -217,15 +257,6 @@ class Orchestrator(object): # NOTE Should we attempt to interrogate the node via Node Driver to see if # it is in a deployed state before we start rebooting? Or do we just leverage # Drydock internal state via site build data (when implemented)? - oob_driver = self.enabled_drivers['oob'] - - if oob_driver is None: - self.task_field_update(task_id, - status=hd_fields.TaskStatus.Errored, - result=hd_fields.ActionResult.Failure, - result_detail={'detail': 'Error: No oob driver configured', 'retry': False}) - return - node_driver = self.enabled_drivers['node'] if node_driver is None: @@ -241,53 +272,79 @@ class Orchestrator(object): target_nodes = self.process_node_filter(node_filter, site_design) - target_names = [x.get_name() for x in target_nodes] + oob_type_partition = {} - task_scope = {'site' : task_site, - 'node_names' : target_names} + for n in target_nodes: + if n.oob_type not in oob_type_partition.keys(): + oob_type_partition[n.oob_type] = [] - setboot_task = self.create_task(tasks.DriverTask, - parent_task_id=task.get_id(), - design_id=design_id, - action=hd_fields.OrchestratorAction.SetNodeBoot, - task_scope=task_scope) + oob_type_partition[n.oob_type].append(n) - self.logger.info("Starting OOB driver task %s to set PXE boot" % (setboot_task.get_id())) + result_detail = {'detail': []} + worked = failed = False - oob_driver.execute_task(setboot_task.get_id()) + # TODO Need to multithread tasks for different OOB types + for oob_type, oob_nodes in oob_type_partition.items(): + oob_driver = None + for d in self.enabled_drivers['oob']: + if d.oob_type_support(oob_type): + oob_driver = d + break - self.logger.info("OOB driver task %s complete" % (setboot_task.get_id())) + if oob_driver is None: + self.logger.warning("Node OOB type %s has no enabled driver." % oob_type) + result_detail['detail'].append("Error: No oob driver configured for type %s" % oob_type) + continue + - setboot_task = self.state_manager.get_task(setboot_task.get_id()) + target_names = [x.get_name() for x in oob_nodes] - if setboot_task.get_result() == hd_fields.ActionResult.Success: - worked = True - elif setboot_task.get_result() == hd_fields.ActionResult.PartialSuccess: - worked = failed = True - elif setboot_task.get_result() == hd_fields.ActionResult.Failure: - failed = True + task_scope = {'site' : task_site, + 'node_names' : target_names} - cycle_task = self.create_task(tasks.DriverTask, + setboot_task = self.create_task(tasks.DriverTask, + parent_task_id=task.get_id(), + design_id=design_id, + action=hd_fields.OrchestratorAction.SetNodeBoot, + task_scope=task_scope) + + self.logger.info("Starting OOB driver task %s to set PXE boot for OOB type %s" % + (setboot_task.get_id(), oob_type)) + + oob_driver.execute_task(setboot_task.get_id()) + + self.logger.info("OOB driver task %s complete" % (setboot_task.get_id())) + + setboot_task = self.state_manager.get_task(setboot_task.get_id()) + + if setboot_task.get_result() == hd_fields.ActionResult.Success: + worked = True + elif setboot_task.get_result() == hd_fields.ActionResult.PartialSuccess: + worked = failed = True + elif setboot_task.get_result() == hd_fields.ActionResult.Failure: + failed = True + + cycle_task = self.create_task(tasks.DriverTask, parent_task_id=task.get_id(), design_id=design_id, action=hd_fields.OrchestratorAction.PowerCycleNode, task_scope=task_scope) - self.logger.info("Starting OOB driver task %s to power cycle nodes" % (cycle_task.get_id())) + self.logger.info("Starting OOB driver task %s to power cycle nodes for OOB type %s" % + (cycle_task.get_id(), oob_type)) - oob_driver.execute_task(cycle_task.get_id()) + oob_driver.execute_task(cycle_task.get_id()) - self.logger.info("OOB driver task %s complete" % (cycle_task.get_id())) + self.logger.info("OOB driver task %s complete" % (cycle_task.get_id())) - cycle_task = self.state_manager.get_task(cycle_task.get_id()) - - if cycle_task.get_result() == hd_fields.ActionResult.Success: - worked = True - elif cycle_task.get_result() == hd_fields.ActionResult.PartialSuccess: - worked = failed = True - elif cycle_task.get_result() == hd_fields.ActionResult.Failure: - failed = True + cycle_task = self.state_manager.get_task(cycle_task.get_id()) + if cycle_task.get_result() == hd_fields.ActionResult.Success: + worked = True + elif cycle_task.get_result() == hd_fields.ActionResult.PartialSuccess: + worked = failed = True + elif cycle_task.get_result() == hd_fields.ActionResult.Failure: + failed = True # IdentifyNode success will take some time after PowerCycleNode finishes # Retry the operation a few times if it fails before considering it a final failure @@ -402,7 +459,31 @@ class Orchestrator(object): elif node_networking_task.get_result() in [hd_fields.ActionResult.Failure, hd_fields.ActionResult.PartialSuccess]: failed = True - + + + if len(node_networking_task.result_detail['successful_nodes']) > 0: + self.logger.info("Found %s successfully networked nodes, starting deployment." % + (len(node_networking_task.result_detail['successful_nodes']))) + node_deploy_task = self.create_task(tasks.DriverTask, + parent_task_id=task.get_id(), design_id=design_id, + action=hd_fields.OrchestratorAction.DeployNode, + task_scope={'site': task_site, + 'node_names': node_networking_task.result_detail['successful_nodes']}) + + self.logger.info("Starting node driver task %s to deploy nodes." % (node_deploy_task.get_id())) + node_driver.execute_task(node_deploy_task.get_id()) + + node_deploy_task = self.state_manager.get_task(node_deploy_task.get_id()) + + if node_deploy_task.get_result() in [hd_fields.ActionResult.Success, + hd_fields.ActionResult.PartialSuccess]: + worked = True + elif node_deploy_task.get_result() in [hd_fields.ActionResult.Failure, + hd_fields.ActionResult.PartialSuccess]: + failed = True + else: + self.logger.warning("No nodes successfully networked, skipping deploy subtask") + final_result = None if worked and failed: final_result = hd_fields.ActionResult.PartialSuccess diff --git a/setup.py b/setup.py index b090c19c..3aa5c1ae 100644 --- a/setup.py +++ b/setup.py @@ -49,6 +49,7 @@ setup(name='drydock_provisioner', 'drydock_provisioner.drivers', 'drydock_provisioner.drivers.oob', 'drydock_provisioner.drivers.oob.pyghmi_driver', + 'drydock_provisioner.drivers.oob.manual_driver', 'drydock_provisioner.drivers.node', 'drydock_provisioner.drivers.node.maasdriver', 'drydock_provisioner.drivers.node.maasdriver.models',