From 1ae3c813fa7918da57c1127fd94b2d3cdb89e05f Mon Sep 17 00:00:00 2001 From: Scott Hussey Date: Thu, 15 Jun 2017 16:59:54 -0500 Subject: [PATCH] DRYD-2 MVP Phase 3 - Network interface configuration Implemented Orhcestrator task DeployNode (only network config for now) Implemented Driver task ApplyNodeNetworking Refactored Driver task CreateNetworkTemplate to fix design that left network in a state that wouldn't support node configs Updated the YAML example with some changes to support network refactoring - HostProfile field 'primary_network' specifies the network a node should use for default gateway - NetworkLinks now must list all allowed networks for that link and a Network is allowed only on a single link Updated YAML ingester to accept schema changes --- .../drivers/node/maasdriver/driver.py | 447 ++++++++++++++---- .../drivers/node/maasdriver/models/base.py | 4 +- .../node/maasdriver/models/interface.py | 228 ++++++++- .../drivers/node/maasdriver/models/iprange.py | 73 +++ .../drivers/node/maasdriver/models/machine.py | 1 + .../drivers/node/maasdriver/models/subnet.py | 53 ++- .../drivers/node/maasdriver/models/vlan.py | 5 +- drydock_provisioner/ingester/plugins/yaml.py | 7 +- drydock_provisioner/objects/hostprofile.py | 8 +- drydock_provisioner/objects/network.py | 3 +- drydock_provisioner/orchestrator/__init__.py | 58 +++ drydock_provisioner/orchestrator/readme.md | 3 +- examples/designparts_v1.0.yaml | 9 +- 13 files changed, 774 insertions(+), 125 deletions(-) create mode 100644 drydock_provisioner/drivers/node/maasdriver/models/iprange.py diff --git a/drydock_provisioner/drivers/node/maasdriver/driver.py b/drydock_provisioner/drivers/node/maasdriver/driver.py index e7800529..9da0f9c3 100644 --- a/drydock_provisioner/drivers/node/maasdriver/driver.py +++ b/drydock_provisioner/drivers/node/maasdriver/driver.py @@ -13,6 +13,8 @@ # limitations under the License. import time import logging +import traceback +import sys import drydock_provisioner.error as errors import drydock_provisioner.config as config @@ -290,7 +292,76 @@ class MaasNodeDriver(NodeDriver): status=hd_fields.TaskStatus.Complete, result=result, result_detail=result_detail) + elif task.action == hd_fields.OrchestratorAction.ApplyNodeNetworking: + self.orchestrator.task_field_update(task.get_id(), + status=hd_fields.TaskStatus.Running) + self.logger.debug("Starting subtask to configure networking on %s nodes." % (len(task.node_list))) + + subtasks = [] + + result_detail = { + 'detail': [], + 'failed_nodes': [], + 'successful_nodes': [], + } + + for n in task.node_list: + subtask = self.orchestrator.create_task(task_model.DriverTask, + parent_task_id=task.get_id(), design_id=design_id, + action=hd_fields.OrchestratorAction.ApplyNodeNetworking, + site_name=task.site_name, + task_scope={'site': task.site_name, 'node_names': [n]}) + runner = MaasTaskRunner(state_manager=self.state_manager, + orchestrator=self.orchestrator, + task_id=subtask.get_id(),config=self.config) + + self.logger.info("Starting thread for task %s to configure networking on node %s" % (subtask.get_id(), n)) + + runner.start() + subtasks.append(subtask.get_id()) + + running_subtasks = len(subtasks) + attempts = 0 + worked = failed = False + + #TODO Add timeout to config + while running_subtasks > 0 and attempts < 2: + for t in subtasks: + subtask = self.state_manager.get_task(t) + + if subtask.status == hd_fields.TaskStatus.Complete: + self.logger.info("Task %s to apply networking on node %s complete - status %s" % + (subtask.get_id(), n, subtask.get_result())) + running_subtasks = running_subtasks - 1 + + if subtask.result == hd_fields.ActionResult.Success: + result_detail['successful_nodes'].extend(subtask.node_list) + worked = True + elif subtask.result == hd_fields.ActionResult.Failure: + result_detail['failed_nodes'].extend(subtask.node_list) + failed = True + elif subtask.result == hd_fields.ActionResult.PartialSuccess: + worked = failed = True + + time.sleep(1 * 60) + attempts = attempts + 1 + + if running_subtasks > 0: + self.logger.warning("Time out for task %s before all subtask threads complete" % (task.get_id())) + result = hd_fields.ActionResult.DependentFailure + result_detail['detail'].append('Some subtasks did not complete before the timeout threshold') + elif worked and failed: + result = hd_fields.ActionResult.PartialSuccess + elif worked: + result = hd_fields.ActionResult.Success + else: + result = hd_fields.ActionResult.Failure + + self.orchestrator.task_field_update(task.get_id(), + status=hd_fields.TaskStatus.Complete, + result=result, + result_detail=result_detail) class MaasTaskRunner(drivers.DriverTaskRunner): def __init__(self, config=None, **kwargs): @@ -315,6 +386,10 @@ class MaasTaskRunner(drivers.DriverTaskRunner): # Try to true up MaaS definitions of fabrics/vlans/subnets # with the networks defined in Drydock design_networks = site_design.networks + design_links = site_design.network_links + + fabrics = maas_fabric.Fabrics(self.maas_client) + fabrics.refresh() subnets = maas_subnet.Subnets(self.maas_client) subnets.refresh() @@ -323,128 +398,171 @@ class MaasTaskRunner(drivers.DriverTaskRunner): 'detail': [] } - for n in design_networks: - try: - subnet = subnets.singleton({'cidr': n.cidr}) + for l in design_links: + fabrics_found = set() - if subnet is not None: - subnet.name = n.name - subnet.dns_servers = n.dns_servers + # First loop through the possible Networks on this NetworkLink + # and validate that MaaS's self-discovered networking matches + # our design. This means all self-discovered networks that are matched + # to a link need to all be part of the same fabric. Otherwise there is no + # way to reconcile the discovered topology with the designed topology + for net_name in l.allowed_networks: + n = site_design.get_network(net_name) - vlan_list = maas_vlan.Vlans(self.maas_client, fabric_id=subnet.fabric) - vlan_list.refresh() + if n is None: + self.logger.warning("Network %s allowed on link %s, but not defined." % (net_name, l.name)) + continue - vlan = vlan_list.select(subnet.vlan) + maas_net = subnets.singleton({'cidr': n.cidr}) - if vlan is not None: - if ((n.vlan_id is None and vlan.vid != 0) or - (n.vlan_id is not None and vlan.vid != n.vlan_id)): + if maas_net is not None: + fabrics_found.add(maas_net.fabric) + + if len(fabrics_found) > 1: + self.logger.warning("MaaS self-discovered network incompatible with NetworkLink %s" % l.name) + continue + elif len(fabrics_found) == 1: + link_fabric_id = fabrics_found.pop() + link_fabric = fabrics.select(link_fabric_id) + link_fabric.name = l.name + link_fabric.update() + else: + link_fabric = fabrics.singleton({'name': l.name}) + + if link_fabric is None: + link_fabric = maas_fabric.Fabric(self.maas_client, name=l.name) + fabrics.add(link_fabric) + + + # Now that we have the fabrics sorted out, check + # that VLAN tags and subnet attributes are correct + for net_name in l.allowed_networks: + n = site_design.get_network(net_name) + + if n is None: + continue + + try: + subnet = subnets.singleton({'cidr': n.cidr}) + + if subnet is None: + self.logger.info("Subnet for network %s not found, creating..." % (n.name)) + + fabric_list = maas_fabric.Fabrics(self.maas_client) + fabric_list.refresh() + fabric = fabric_list.singleton({'name': l.name}) + + if fabric is not None: + vlan_list = maas_vlan.Vlans(self.maas_client, fabric_id=fabric.resource_id) + vlan_list.refresh() + + vlan = vlan_list.singleton({'vid': n.vlan_id if n.vlan_id is not None else 0}) + + if vlan is not None: + vlan.name = n.name + + if getattr(n, 'mtu', None) is not None: + vlan.mtu = n.mtu - # if the VLAN name matches, assume this is the correct resource - # and it needs to be updated - if vlan.name == n.name: - vlan.set_vid(n.vlan_id) - vlan.mtu = n.mtu vlan.update() result_detail['detail'].append("VLAN %s found for network %s, updated attributes" - % (vlan.resource_id, n.name)) + % (vlan.resource_id, n.name)) else: - # Found a VLAN with the correct VLAN tag, update subnet to use it - target_vlan = vlan_list.singleton({'vid': n.vlan_id if n.vlan_id is not None else 0}) - if target_vlan is not None: - subnet.vlan = target_vlan.resource_id - else: - # This is a flag that after creating a fabric and - # VLAN below, update the subnet - subnet.vlan = None - else: - subnet.vlan = None - - # Check if the routes have a default route - subnet.gateway_ip = n.get_default_gateway() + # Create a new VLAN in this fabric and assign subnet to it + vlan = maas_vlan.Vlan(self.maas_client, name=n.name, vid=vlan_id, + mtu=getattr(n, 'mtu', None),fabric_id=fabric.resource_id) + vlan = vlan_list.add(vlan) - - result_detail['detail'].append("Subnet %s found for network %s, updated attributes" - % (subnet.resource_id, n.name)) - - # Need to find or create a Fabric/Vlan for this subnet - if (subnet is None or (subnet is not None and subnet.vlan is None)): - fabric_list = maas_fabric.Fabrics(self.maas_client) - fabric_list.refresh() - fabric = fabric_list.singleton({'name': n.name}) - - vlan = None + result_detail['detail'].append("VLAN %s created for network %s" + % (vlan.resource_id, n.name)) - if fabric is not None: - vlan_list = maas_vlan.Vlans(self.maas_client, fabric_id=fabric.resource_id) + # If subnet did not exist, create it here and attach it to the fabric/VLAN + subnet = maas_subnet.Subnet(self.maas_client, name=n.name, cidr=n.cidr, fabric=fabric.resource_id, + vlan=vlan.resource_id, gateway_ip=n.get_default_gateway()) + + subnet_list = maas_subnet.Subnets(self.maas_client) + subnet = subnet_list.add(subnet) + self.logger.info("Created subnet %s for CIDR %s on VLAN %s" % + (subnet.resource_id, subnet.cidr, subnet.vlan)) + + result_detail['detail'].append("Subnet %s created for network %s" % (subnet.resource_id, n.name)) + else: + self.logger.error("Fabric %s should be created, but cannot locate it." % (l.name)) + else: + subnet.name = n.name + subnet.dns_servers = n.dns_servers + + result_detail['detail'].append("Subnet %s found for network %s, updated attributes" + % (subnet.resource_id, n.name)) + self.logger.info("Updating existing MaaS subnet %s" % (subnet.resource_id)) + + vlan_list = maas_vlan.Vlans(self.maas_client, fabric_id=subnet.fabric) vlan_list.refresh() - - vlan = vlan_list.singleton({'vid': n.vlan_id if n.vlan_id is not None else 0}) + + vlan = vlan_list.select(subnet.vlan) if vlan is not None: - vlan = matching_vlans[0] - vlan.name = n.name + vlan.set_vid(n.vlan_id) + if getattr(n, 'mtu', None) is not None: vlan.mtu = n.mtu - if subnet is not None: - subnet.vlan = vlan.resource_id - subnet.update() - vlan.update() result_detail['detail'].append("VLAN %s found for network %s, updated attributes" - % (vlan.resource_id, n.name)) + % (vlan.resource_id, n.name)) else: - # Create a new VLAN in this fabric and assign subnet to it - vlan = maas_vlan.Vlan(self.maas_client, name=n.name, vid=vlan_id, - mtu=getattr(n, 'mtu', None),fabric_id=fabric.resource_id) - vlan = vlan_list.add(vlan) + self.logger.error("MaaS subnet %s does not have a matching VLAN" % (subnet.resource_id)) + continue + + # Check if the routes have a default route + subnet.gateway_ip = n.get_default_gateway() + subnet.update() - result_detail['detail'].append("VLAN %s created for network %s" - % (vlan.resource_id, n.name)) - if subnet is not None: - subnet.vlan = vlan.resource_id - subnet.update() + dhcp_on = False - else: - # Create new fabric and VLAN - fabric = maas_fabric.Fabric(self.maas_client, name=n.name) - fabric = fabric_list.add(fabric) - fabric_list.refresh() + for r in n.ranges: + subnet.add_address_range(r) + if r.get('type', None) == 'dhcp': + dhcp_on = True - result_detail['detail'].append("Fabric %s created for network %s" - % (fabric.resource_id, n.name)) + vlan_list = maas_vlan.Vlans(self.maas_client, fabric_id=subnet.fabric) + vlan_list.refresh() + vlan = vlan_list.select(subnet.vlan) - vlan_list = maas_vlan.Vlans(self.maas_client, fabric_id=new_fabric.resource_id) - vlan_list.refresh() + if dhcp_on and not vlan.dhcp_on: + self.logger.info("DHCP enabled for subnet %s, activating in MaaS" % (subnet.name)) - # A new fabric comes with a single default VLAN. Retrieve it and update attributes - vlan = vlan_list.single() - vlan.name = n.name - vlan.vid = n.vlan_id if n.vlan_id is not None else 0 - if getattr(n, 'mtu', None) is not None: - vlan.mtu = n.mtu + # TODO Ugly hack assuming a single rack controller for now until we implement multirack + resp = self.maas_client.get("rackcontrollers/") - vlan.update() - result_detail['detail'].append("VLAN %s updated for network %s" - % (vlan.resource_id, n.name)) - if subnet is not None: - # If subnet was found above, but needed attached to a new fabric/vlan then - # attach it - subnet.vlan = vlan.resource_id - subnet.update() + if resp.ok: + resp_json = resp.json() - if subnet is None: - # If subnet did not exist, create it here and attach it to the fabric/VLAN - subnet = maas_subnet.Subnet(self.maas_client, name=n.name, cidr=n.cidr, fabric=fabric.resource_id, - vlan=vlan.resource_id, gateway_ip=n.get_default_gateway()) + if not isinstance(resp_json, list): + self.logger.warning("Unexpected response when querying list of rack controllers") + self.logger.debug("%s" % resp.text) + else: + if len(resp_json) > 1: + self.logger.warning("Received more than one rack controller, defaulting to first") - subnet_list = maas_subnet.Subnets(self.maas_client) - subnet = subnet_list.add(subnet) - except ValueError as vex: - raise errors.DriverError("Inconsistent data from MaaS") + rackctl_id = resp_json[0]['system_id'] + + vlan.dhcp_on = True + vlan.primary_rack = rackctl_id + vlan.update() + self.logger.debug("Enabling DHCP on VLAN %s managed by rack ctlr %s" % + (vlan.resource_id, rackctl_id)) + elif dhcp_on and vlan.dhcp_on: + self.logger.info("DHCP already enabled for subnet %s" % (subnet.resource_id)) + + + # TODO sort out static route support as MaaS seems to require the destination + # network be defined in MaaS as well + + except ValueError as vex: + raise errors.DriverError("Inconsistent data from MaaS") subnet_list = maas_subnet.Subnets(self.maas_client) subnet_list.refresh() @@ -542,7 +660,7 @@ class MaasTaskRunner(drivers.DriverTaskRunner): node = site_design.get_baremetal_node(n) machine = machine_list.identify_baremetal_node(node, update_name=False) if machine is not None: - if machine.status_name == 'New': + if machine.status_name == ['New', 'Broken']: self.logger.debug("Located node %s in MaaS, starting commissioning" % (n)) machine.commission() @@ -595,3 +713,144 @@ class MaasTaskRunner(drivers.DriverTaskRunner): status=hd_fields.TaskStatus.Complete, result=result, result_detail=result_detail) + elif task_action == hd_fields.OrchestratorAction.ApplyNodeNetworking: + try: + machine_list = maas_machine.Machines(self.maas_client) + machine_list.refresh() + + fabrics = maas_fabric.Fabrics(self.maas_client) + fabrics.refresh() + + subnets = maas_subnet.Subnets(self.maas_client) + subnets.refresh() + except Exception as ex: + self.logger.error("Error applying node networking, cannot access MaaS: %s" % str(ex)) + traceback.print_tb(sys.last_traceback) + self.orchestrator.task_field_update(self.task.get_id(), + status=hd_fields.TaskStatus.Complete, + result=hd_fields.ActionResult.Failure, + result_detail={'detail': 'Error accessing MaaS API', 'retry': True}) + return + + nodes = self.task.node_list + + result_detail = {'detail': []} + + worked = failed = False + + # TODO Better way of representing the node statuses than static strings + for n in nodes: + try: + self.logger.debug("Locating node %s for network configuration" % (n)) + + node = site_design.get_baremetal_node(n) + machine = machine_list.identify_baremetal_node(node, update_name=False) + + if machine is not None: + if machine.status_name == 'Ready': + self.logger.debug("Located node %s in MaaS, starting interface configuration" % (n)) + + for i in node.interfaces: + nl = site_design.get_network_link(i.network_link) + + fabric = fabrics.singleton({'name': nl.name}) + + if fabric is None: + self.logger.error("No fabric found for NetworkLink %s" % (nl.name)) + failed = True + continue + + # TODO HardwareProfile device alias integration + iface = machine.get_network_interface(i.device_name) + + if iface is None: + self.logger.warning("Interface %s not found on node %s, skipping configuration" % + (i.device_name, machine.resource_id)) + continue + + if iface.fabric_id == fabric.resource_id: + self.logger.debug("Interface %s already attached to fabric_id %s" % + (i.device_name, fabric.resource_id)) + else: + self.logger.debug("Attaching node %s interface %s to fabric_id %s" % + (node.name, i.device_name, fabric.resource_id)) + iface.attach_fabric(fabric_id=fabric.resource_id) + + for iface_net in getattr(i, 'networks', []): + dd_net = site_design.get_network(iface_net) + + if dd_net is not None: + link_iface = None + if iface_net == getattr(nl, 'native_network', None): + # If a node interface is attached to the native network for a link + # then the interface itself should be linked to network, not a VLAN + # tagged interface + self.logger.debug("Attaching node %s interface %s to untagged VLAN on fabric %s" % + (node.name, i.device_name, fabric.resource_id)) + link_iface = iface + else: + # For non-native networks, we create VLAN tagged interfaces as children + # of this interface + vlan_options = { 'vlan_tag': dd_net.vlan_id, + 'parent_name': iface.name, + } + + if dd_net.mtu is not None: + vlan_options['mtu'] = dd_net.mtu + + self.logger.debug("Creating tagged interface for VLAN %s on system %s interface %s" % + (dd_net.vlan_id, node.name, i.device_name)) + + link_iface = machine.interfaces.create_vlan(**vlan_options) + + link_options = {} + link_options['primary'] = True if iface_net == getattr(node, 'primary_network', None) else False + link_options['subnet_cidr'] = dd_net.cidr + + found = False + for a in getattr(node, 'addressing', []): + if a.network == iface_net: + link_options['ip_address'] = None if a.address == 'dhcp' else a.address + found = True + + if not found: + self.logger.error("No addressed assigned to network %s for node %s, cannot link." % + (iface_net, node.name)) + continue + + self.logger.debug("Linking system %s interface %s to subnet %s" % + (node.name, i.device_name, dd_net.cidr)) + + link_iface.link_subnet(**link_options) + worked = True + else: + failed=True + self.logger.error("Did not find a defined Network %s to attach to interface" % iface_net) + + elif machine.status_name == 'Broken': + self.logger.info("Located node %s in MaaS, status broken. Run ConfigureHardware before configurating network" % (n)) + result_detail['detail'].append("Located node %s in MaaS, status 'Broken'. Skipping..." % (n)) + failed = True + else: + self.logger.warning("Located node %s in MaaS, unknown status %s. Skipping..." % (n, machine.status_name)) + result_detail['detail'].append("Located node %s in MaaS, unknown status %s. Skipping..." % (n, machine.status_name)) + failed = True + else: + self.logger.warning("Node %s not found in MaaS" % n) + failed = True + result_detail['detail'].append("Node %s not found in MaaS" % n) + + except Exception as ex: + failed = True + self.logger.error("Error configuring network for node %s: %s" % (n, str(ex))) + result_detail['detail'].append("Error configuring network for node %s: %s" % (n, str(ex))) + + if failed: + final_result = hd_fields.ActionResult.Failure + else: + final_result = hd_fields.ActionResult.Success + + self.orchestrator.task_field_update(self.task.get_id(), + status=hd_fields.TaskStatus.Complete, + result=final_result, + result_detail=result_detail) diff --git a/drydock_provisioner/drivers/node/maasdriver/models/base.py b/drydock_provisioner/drivers/node/maasdriver/models/base.py index 2b3df5df..84090240 100644 --- a/drydock_provisioner/drivers/node/maasdriver/models/base.py +++ b/drydock_provisioner/drivers/node/maasdriver/models/base.py @@ -29,7 +29,7 @@ class ResourceBase(object): def __init__(self, api_client, **kwargs): self.api_client = api_client - self.logger = logging.getLogger('drydock.drivers.maasdriver') + self.logger = logging.getLogger('drydock.nodedriver.maasdriver') for f in self.fields: if f in kwargs.keys(): @@ -161,7 +161,7 @@ class ResourceCollectionBase(object): def __init__(self, api_client): self.api_client = api_client self.resources = {} - self.logger = logging.getLogger('drydock.drivers.maasdriver') + self.logger = logging.getLogger('drydock.nodedriver.maasdriver') def interpolate_url(self): """ diff --git a/drydock_provisioner/drivers/node/maasdriver/models/interface.py b/drydock_provisioner/drivers/node/maasdriver/models/interface.py index 5c257279..db2a9d85 100644 --- a/drydock_provisioner/drivers/node/maasdriver/models/interface.py +++ b/drydock_provisioner/drivers/node/maasdriver/models/interface.py @@ -11,18 +11,170 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import logging import drydock_provisioner.drivers.node.maasdriver.models.base as model_base +import drydock_provisioner.drivers.node.maasdriver.models.fabric as maas_fabric +import drydock_provisioner.drivers.node.maasdriver.models.subnet as maas_subnet +import drydock_provisioner.drivers.node.maasdriver.models.vlan as maas_vlan + +import drydock_provisioner.error as errors class Interface(model_base.ResourceBase): resource_url = 'nodes/{system_id}/interfaces/{resource_id}/' fields = ['resource_id', 'system_id', 'name', 'type', 'mac_address', 'vlan', - 'links', 'effective_mtu'] + 'links', 'effective_mtu', 'fabric_id'] json_fields = ['name', 'type', 'mac_address', 'vlan', 'links', 'effective_mtu'] def __init__(self, api_client, **kwargs): super(Interface, self).__init__(api_client, **kwargs) + self.logger = logging.getLogger('drydock.nodedriver.maasdriver') + + def attach_fabric(self, fabric_id=None, fabric_name=None): + """ + Attach this interface to a MaaS fabric. Only one of fabric_id + or fabric_name should be specified. If both are, fabric_id rules + + :param fabric_id: The MaaS resource ID of a network Fabric to connect to + :param fabric_name: The name of a MaaS fabric to connect to + """ + + fabric = None + + fabrics = maas_fabric.Fabrics(self.api_client) + fabrics.refresh() + + if fabric_id is not None: + fabric = fabrics.select(fabric_id) + elif fabric_name is not None: + fabric = fabrics.singleton({'name': fabric_name}) + else: + self.logger.warning("Must specify fabric_id or fabric_name") + raise ValueError("Must specify fabric_id or fabric_name") + + if fabric is None: + self.logger.warning("Fabric not found in MaaS for fabric_id %s, fabric_name %s" % + (fabric_id, fabric_name)) + raise errors.DriverError("Fabric not found in MaaS for fabric_id %s, fabric_name %s" % + (fabric_id, fabric_name)) + + # Locate the untagged VLAN for this fabric. + fabric_vlan = fabric.vlans.singleton({'vid': 0}) + + if fabric_vlan is None: + self.logger.warning("Cannot locate untagged VLAN on fabric %s" % (fabric_id)) + raise errors.DriverError("Cannot locate untagged VLAN on fabric %s" % (fabric_id)) + + self.vlan = fabric_vlan.resource_id + self.logger.info("Attaching interface %s on system %s to VLAN %s on fabric %s" % + (self.resource_id, self.system_id, fabric_vlan.resource_id, fabric.resource_id)) + self.update() + + def is_linked(self, subnet_id): + for l in self.links: + if l.get('subnet_id', None) == subnet_id: + return True + + return False + + def link_subnet(self, subnet_id=None, subnet_cidr=None, ip_address=None, primary=False): + """ + Link this interface to a MaaS subnet. One of subnet_id or subnet_cidr + should be specified. If both are, subnet_id rules. + + :param subnet_id: The MaaS resource ID of a network subnet to connect to + :param subnet_cidr: The CIDR of a MaaS subnet to connect to + :param ip_address: The IP address to assign this interface. Should be a string with + a static IP or None. If None, DHCP will be used. + :param primary: Boolean of whether this interface is the primary interface of the node. This + sets the node default gateway to the gateway of the subnet + """ + + subnet = None + + subnets = maas_subnet.Subnets(self.api_client) + subnets.refresh() + + if subnet_id is not None: + subnet = subnets.select(subnet_id) + elif subnet_cidr is not None: + subnet = subnets.singleton({'cidr': subnet_cidr}) + else: + self.logger.warning("Must specify subnet_id or subnet_cidr") + raise ValueError("Must specify subnet_id or subnet_cidr") + + if subnet is None: + self.logger.warning("Subnet not found in MaaS for subnet_id %s, subnet_cidr %s" % + (subnet_id, subnet_cidr)) + raise errors.DriverError("Subnet not found in MaaS for subnet_id %s, subnet_cidr %s" % + (subnet_id, subnet_cidr)) + + # TODO Possibly add logic to true up link attributes, may be overkill + if self.is_linked(subnet.resource_id): + self.logger.info("Interface %s already linked to subnet %s, skipping." % + (self.resource_id, subnet.resource_id)) + return + + url = self.interpolate_url() + + # TODO Probably need to enumerate link mode + options = { 'subnet': subnet.resource_id, + 'mode': 'dhcp' if ip_address is None else 'static', + 'default_gateway': primary, + } + + if ip_address is not None: + options['ip_address'] = ip_address + + self.logger.debug("Linking interface %s to subnet: subnet=%s, mode=%s, address=%s, primary=%s" % + (self.resource_id, subnet.resource_id, options['mode'], ip_address, primary)) + + resp = self.api_client.post(url, op='link_subnet', files=options) + + if not resp.ok: + self.logger.error("Error linking interface %s to subnet %s - MaaS response %s: %s" % + (self.resouce_id, subnet.resource_id, resp.status_code, resp.text)) + raise errors.DriverError("Error linking interface %s to subnet %s - MaaS response %s" % + (self.resouce_id, subnet.resource_id, resp.status_code)) + + self.refresh() + + return + + @classmethod + def from_dict(cls, api_client, obj_dict): + """ + Because MaaS decides to replace the resource ids with the + representation of the resource, we must reverse it for a true + representation of the Interface + """ + refined_dict = {k: obj_dict.get(k, None) for k in cls.fields} + if 'id' in obj_dict.keys(): + refined_dict['resource_id'] = obj_dict.get('id') + + if isinstance(refined_dict.get('vlan', None), dict): + refined_dict['fabric_id'] = refined_dict['vlan']['fabric_id'] + refined_dict['vlan'] = refined_dict['vlan']['id'] + + link_list = [] + if isinstance(refined_dict.get('links', None), list): + for l in refined_dict['links']: + if isinstance(l, dict): + link = { 'resource_id': l['id'], + 'mode': l['mode'] + } + + if l.get('subnet', None) is not None: + link['subnet_id'] = l['subnet']['id'] + link['ip_address'] = l.get('ip_address', None) + + link_list.append(link) + + refined_dict['links'] = link_list + + i = cls(api_client, **refined_dict) + return i class Interfaces(model_base.ResourceCollectionBase): @@ -31,4 +183,76 @@ class Interfaces(model_base.ResourceCollectionBase): def __init__(self, api_client, **kwargs): super(Interfaces, self).__init__(api_client) - self.system_id = kwargs.get('system_id', None) \ No newline at end of file + self.system_id = kwargs.get('system_id', None) + + def create_vlan(self, vlan_tag, parent_name, mtu=None, tags=[]): + """ + Create a new VLAN interface on this node + + :param vlan_tag: The VLAN ID (not MaaS resource id of a VLAN) to create interface for + :param parent_name: The name of a MaaS interface to build the VLAN interface on top of + :param mtu: Optional configuration of the interface MTU + :param tags: Optional list of string tags to apply to the VLAN interface + """ + + self.refresh() + + parent_iface = self.singleton({'name': parent_name}) + + if parent_iface is None: + self.logger.error("Cannot locate parent interface %s" % (parent_name)) + raise errors.DriverError("Cannot locate parent interface %s" % (parent_name)) + + if parent_iface.type != 'physical': + self.logger.error("Cannot create VLAN interface on parent of type %s" % (parent_iface.type)) + raise errors.DriverError("Cannot create VLAN interface on parent of type %s" % (parent_iface.type)) + + if parent_iface.vlan is None: + self.logger.error("Cannot create VLAN interface on disconnected parent %s" % (parent_iface.resource_id)) + raise errors.DriverError("Cannot create VLAN interface on disconnected parent %s" % (parent_iface.resource_id)) + + vlans = maas_vlan.Vlans(self.api_client, fabric_id=parent_iface.fabric_id) + vlans.refresh() + + vlan = vlans.singleton({'vid': vlan_tag}) + + if vlan is None: + self.logger.error("Cannot locate VLAN %s on fabric %s to attach interface" % + (vlan_tag, parent_iface.fabric_id)) + + exists = self.singleton({'vlan': vlan.resource_id}) + + if exists is not None: + self.logger.info("Interface for VLAN %s already exists on node %s, skipping" % + (vlan_tag, self.system_id)) + return None + + url = self.interpolate_url() + + + options = { 'tags': ','.join(tags), + 'vlan': vlan.resource_id, + 'parent': parent_iface.resource_id, + } + + if mtu is not None: + options['mtu'] = mtu + + resp = self.api_client.post(url, op='create_vlan', files=options) + + + if resp.status_code == 200: + resp_json = resp.json() + vlan_iface = Interface.from_dict(self.api_client, resp_json) + self.logger.debug("Created VLAN interface %s for parent %s attached to VLAN %s" % + (vlan_iface.resource_id, parent_iface.resource_id, vlan.resource_id)) + return vlan_iface + else: + self.logger.error("Error creating VLAN interface to VLAN %s on system %s - MaaS response %s: %s" % + (vlan.resource_id, self.system_id, resp.status_code, resp.text)) + raise errors.DriverError("Error creating VLAN interface to VLAN %s on system %s - MaaS response %s" % + (vlan.resource_id, self.system_id, resp.status_code)) + + self.refresh() + + return \ No newline at end of file diff --git a/drydock_provisioner/drivers/node/maasdriver/models/iprange.py b/drydock_provisioner/drivers/node/maasdriver/models/iprange.py new file mode 100644 index 00000000..3840fea4 --- /dev/null +++ b/drydock_provisioner/drivers/node/maasdriver/models/iprange.py @@ -0,0 +1,73 @@ +# Copyright 2017 AT&T Intellectual Property. All other rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import drydock_provisioner.drivers.node.maasdriver.models.base as model_base + +class IpRange(model_base.ResourceBase): + + resource_url = 'iprange/{resource_id}/' + fields = ['resource_id', 'comment', 'subnet', 'type', 'start_ip', 'end_ip'] + json_fields = ['comment','start_ip', 'end_ip'] + + def __init__(self, api_client, **kwargs): + super(IpRange, self).__init__(api_client, **kwargs) + + @classmethod + def from_dict(cls, api_client, obj_dict): + refined_dict = {k: obj_dict.get(k, None) for k in cls.fields} + if 'id' in obj_dict.keys(): + refined_dict['resource_id'] = obj_dict.get('id') + + if isinstance(refined_dict.get('subnet', None), dict): + refined_dict['subnet'] = refined_dict['subnet']['id'] + + i = cls(api_client, **refined_dict) + return i + +class IpRanges(model_base.ResourceCollectionBase): + + collection_url = 'ipranges/' + collection_resource = IpRange + + def __init__(self, api_client, **kwargs): + super(IpRanges, self).__init__(api_client) + + def add(self, res): + """ + Custom add to include a subnet id and type which can't be + updated in a PUT + """ + data_dict = res.to_dict() + + subnet = getattr(res, 'subnet', None) + + if subnet is not None: + data_dict['subnet'] = subnet + + range_type = getattr(res, 'type', None) + + if range_type is not None: + data_dict['type'] = range_type + + url = self.interpolate_url() + + resp = self.api_client.post(url, files=data_dict) + + if resp.status_code == 200: + resp_json = resp.json() + res.set_resource_id(resp_json.get('id')) + return res + + raise errors.DriverError("Failed updating MAAS url %s - return code %s" + % (url, resp.status_code)) \ No newline at end of file diff --git a/drydock_provisioner/drivers/node/maasdriver/models/machine.py b/drydock_provisioner/drivers/node/maasdriver/models/machine.py index e06263ab..088ca986 100644 --- a/drydock_provisioner/drivers/node/maasdriver/models/machine.py +++ b/drydock_provisioner/drivers/node/maasdriver/models/machine.py @@ -14,6 +14,7 @@ import drydock_provisioner.drivers.node.maasdriver.models.base as model_base import drydock_provisioner.drivers.node.maasdriver.models.interface as maas_interface + import bson import yaml diff --git a/drydock_provisioner/drivers/node/maasdriver/models/subnet.py b/drydock_provisioner/drivers/node/maasdriver/models/subnet.py index 8aec521e..a9e6104a 100644 --- a/drydock_provisioner/drivers/node/maasdriver/models/subnet.py +++ b/drydock_provisioner/drivers/node/maasdriver/models/subnet.py @@ -11,30 +11,61 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - import drydock_provisioner.drivers.node.maasdriver.models.base as model_base +import drydock_provisioner.drivers.node.maasdriver.models.iprange as maas_iprange class Subnet(model_base.ResourceBase): resource_url = 'subnets/{resource_id}/' - fields = ['resource_id', 'name', 'description', 'fabric', 'vlan', 'vid', 'dhcp_on', - 'space', 'cidr', 'gateway_ip', 'rdns_mode', 'allow_proxy', 'dns_servers'] - json_fields = ['name', 'description','vlan', 'space', 'cidr', 'gateway_ip', 'rdns_mode', + fields = ['resource_id', 'name', 'description', 'fabric', 'vlan', 'vid', + 'cidr', 'gateway_ip', 'rdns_mode', 'allow_proxy', 'dns_servers'] + json_fields = ['name', 'description','vlan', 'cidr', 'gateway_ip', 'rdns_mode', 'allow_proxy', 'dns_servers'] def __init__(self, api_client, **kwargs): super(Subnet, self).__init__(api_client, **kwargs) - # For now all subnets will be part of the default space - self.space = 0 + def add_address_range(self, addr_range): + """ + Add a reserved or dynamic (DHCP) address range to this subnet + + :param addr_range: Dict with keys 'type', 'start', 'end' + """ + + # TODO Do better overlap detection. For now we just check if the exact range exists + current_ranges = maas_iprange.IpRanges(self.api_client) + current_ranges.refresh() + + exists = current_ranges.query({'start_ip': addr_range.get('start', None), + 'end_ip': addr_range.get('end', None)}) + + if len(exists) > 0: + self.logger.info('Address range from %s to %s already exists, skipping.' % + (addr_range.get('start', None), addr_range.get('end', None))) + return + + # Static ranges are what is left after reserved (not assigned by MaaS) + # and DHCP ranges are removed from a subnet + if addr_range.get('type', None) in ['reserved','dhcp']: + range_type = addr_range('type', None) + + if range_type == 'dhcp': + range_type = 'dynamic' + + maas_range = maas_iprange.IpRange(self.api_client, comment="Configured by Drydock", subnet=self.resource_id, + type=range_type, start_ip=addr_range.get('start', None), + end_ip=addr_range.get('end', None)) + maas_ranges = maas_iprange.IpRanges(self.api_client) + maas_ranges.add(maas_range) + - """ - Because MaaS decides to replace the VLAN id with the - representation of the VLAN, we must reverse it for a true - representation of the resource - """ @classmethod def from_dict(cls, api_client, obj_dict): + """ + Because MaaS decides to replace the VLAN id with the + representation of the VLAN, we must reverse it for a true + representation of the resource + """ refined_dict = {k: obj_dict.get(k, None) for k in cls.fields} if 'id' in obj_dict.keys(): refined_dict['resource_id'] = obj_dict.get('id') diff --git a/drydock_provisioner/drivers/node/maasdriver/models/vlan.py b/drydock_provisioner/drivers/node/maasdriver/models/vlan.py index 36acceac..ffe9711d 100644 --- a/drydock_provisioner/drivers/node/maasdriver/models/vlan.py +++ b/drydock_provisioner/drivers/node/maasdriver/models/vlan.py @@ -19,8 +19,9 @@ import drydock_provisioner.drivers.node.maasdriver.models.base as model_base class Vlan(model_base.ResourceBase): resource_url = 'fabrics/{fabric_id}/vlans/{api_id}/' - fields = ['resource_id', 'name', 'description', 'vid', 'fabric_id', 'dhcp_on', 'mtu'] - json_fields = ['name', 'description', 'vid', 'dhcp_on', 'mtu'] + fields = ['resource_id', 'name', 'description', 'vid', 'fabric_id', 'dhcp_on', 'mtu', + 'primary_rack', 'secondary_rack'] + json_fields = ['name', 'description', 'vid', 'dhcp_on', 'mtu', 'primary_rack', 'secondary_rack'] def __init__(self, api_client, **kwargs): super(Vlan, self).__init__(api_client, **kwargs) diff --git a/drydock_provisioner/ingester/plugins/yaml.py b/drydock_provisioner/ingester/plugins/yaml.py index fce62c32..060ce38a 100644 --- a/drydock_provisioner/ingester/plugins/yaml.py +++ b/drydock_provisioner/ingester/plugins/yaml.py @@ -143,6 +143,8 @@ class YamlIngester(IngesterPlugin): model.trunk_mode = trunking.get('mode', hd_fields.NetworkLinkTrunkingMode.Disabled) model.native_network = trunking.get('default_network', None) + model.allowed_networks = spec.get('allowed_networks', None) + models.append(model) else: raise ValueError('Unknown API version of object') @@ -160,7 +162,7 @@ class YamlIngester(IngesterPlugin): model.cidr = spec.get('cidr', None) model.allocation_strategy = spec.get('allocation', 'static') - model.vlan_id = spec.get('vlan_id', None) + model.vlan_id = spec.get('vlan', None) model.mtu = spec.get('mtu', None) dns = spec.get('dns', {}) @@ -285,7 +287,6 @@ class YamlIngester(IngesterPlugin): int_model.device_name = i.get('device_name', None) int_model.network_link = i.get('device_link', None) - int_model.primary_netowrk = i.get('primary', False) int_model.hardware_slaves = [] slaves = i.get('slaves', []) @@ -301,6 +302,8 @@ class YamlIngester(IngesterPlugin): model.interfaces.append(int_model) + model.primary_network = spec.get('primary_network', None) + node_metadata = spec.get('metadata', {}) metadata_tags = node_metadata.get('tags', []) model.tags = [] diff --git a/drydock_provisioner/objects/hostprofile.py b/drydock_provisioner/objects/hostprofile.py index 36aad003..083a0cfc 100644 --- a/drydock_provisioner/objects/hostprofile.py +++ b/drydock_provisioner/objects/hostprofile.py @@ -51,6 +51,7 @@ class HostProfile(base.DrydockPersistentObject, base.DrydockObject): 'base_os': obj_fields.StringField(nullable=True), 'kernel': obj_fields.StringField(nullable=True), 'kernel_params': obj_fields.StringField(nullable=True), + 'primary_network': obj_fields.StringField(nullable=False), } def __init__(self, **kwargs): @@ -93,7 +94,7 @@ class HostProfile(base.DrydockPersistentObject, base.DrydockObject): 'hardware_profile', 'oob_type', 'oob_network', 'oob_credential', 'oob_account', 'storage_layout', 'bootdisk_device', 'bootdisk_root_size', 'bootdisk_boot_size', - 'rack', 'base_os', 'kernel', 'kernel_params'] + 'rack', 'base_os', 'kernel', 'kernel_params', 'primary_network'] # Create applied data from self design values and parent # applied values @@ -134,7 +135,6 @@ class HostInterface(base.DrydockObject): fields = { 'device_name': obj_fields.StringField(), - 'primary_network': obj_fields.BooleanField(nullable=False, default=False), 'source': hd_fields.ModelSourceField(), 'network_link': obj_fields.StringField(nullable=True), 'hardware_slaves': obj_fields.ListOfStringsField(nullable=True), @@ -212,10 +212,6 @@ class HostInterface(base.DrydockObject): elif j.get_name() == parent_name: m = objects.HostInterface() m.device_name = j.get_name() - m.primary_network = \ - objects.Utils.apply_field_inheritance( - getattr(j, 'primary_network', None), - getattr(i, 'primary_network', None)) m.network_link = \ objects.Utils.apply_field_inheritance( diff --git a/drydock_provisioner/objects/network.py b/drydock_provisioner/objects/network.py index 028161da..65329e71 100644 --- a/drydock_provisioner/objects/network.py +++ b/drydock_provisioner/objects/network.py @@ -44,6 +44,7 @@ class NetworkLink(base.DrydockPersistentObject, base.DrydockObject): 'trunk_mode': hd_fields.NetworkLinkTrunkingModeField( default=hd_fields.NetworkLinkTrunkingMode.Disabled), 'native_network': ovo_fields.StringField(nullable=True), + 'allowed_networks': ovo_fields.ListOfStringsField(), } def __init__(self, **kwargs): @@ -104,8 +105,6 @@ class Network(base.DrydockPersistentObject, base.DrydockObject): return None - - @base.DrydockObjectRegistry.register class NetworkList(base.DrydockObjectListBase, base.DrydockObject): diff --git a/drydock_provisioner/orchestrator/__init__.py b/drydock_provisioner/orchestrator/__init__.py index 3e55cbf2..d893fafe 100644 --- a/drydock_provisioner/orchestrator/__init__.py +++ b/drydock_provisioner/orchestrator/__init__.py @@ -214,6 +214,9 @@ class Orchestrator(object): self.task_field_update(task_id, status=hd_fields.TaskStatus.Running) + # NOTE Should we attempt to interrogate the node via Node Driver to see if + # it is in a deployed state before we start rebooting? Or do we just leverage + # Drydock internal state via site build data (when implemented)? oob_driver = self.enabled_drivers['oob'] if oob_driver is None: @@ -357,6 +360,61 @@ class Orchestrator(object): result=final_result) return + elif task.action == hd_fields.OrchestratorAction.DeployNode: + failed = worked = False + + self.task_field_update(task_id, + status=hd_fields.TaskStatus.Running) + + node_driver = self.enabled_drivers['node'] + + if node_driver is None: + self.task_field_update(task_id, + status=hd_fields.TaskStatus.Errored, + result=hd_fields.ActionResult.Failure, + result_detail={'detail': 'Error: No node driver configured', 'retry': False}) + return + + site_design = self.get_effective_site(design_id) + + node_filter = task.node_filter + + target_nodes = self.process_node_filter(node_filter, site_design) + + target_names = [x.get_name() for x in target_nodes] + + task_scope = {'site' : task_site, + 'node_names' : target_names} + + node_networking_task = self.create_task(tasks.DriverTask, + parent_task_id=task.get_id(), design_id=design_id, + action=hd_fields.OrchestratorAction.ApplyNodeNetworking, + task_scope=task_scope) + + self.logger.info("Starting node driver task %s to apply networking on nodes." % (node_networking_task.get_id())) + node_driver.execute_task(node_networking_task.get_id()) + + node_networking_task = self.state_manager.get_task(node_networking_task.get_id()) + + if node_networking_task.get_result() in [hd_fields.ActionResult.Success, + hd_fields.ActionResult.PartialSuccess]: + worked = True + elif node_networking_task.get_result() in [hd_fields.ActionResult.Failure, + hd_fields.ActionResult.PartialSuccess]: + failed = True + + final_result = None + if worked and failed: + final_result = hd_fields.ActionResult.PartialSuccess + elif worked: + final_result = hd_fields.ActionResult.Success + else: + final_result = hd_fields.ActionResult.Failure + + self.task_field_update(task_id, + status=hd_fields.TaskStatus.Complete, + result=final_result) + else: raise errors.OrchestratorError("Action %s not supported" % (task.action)) diff --git a/drydock_provisioner/orchestrator/readme.md b/drydock_provisioner/orchestrator/readme.md index abb48068..4cdb852b 100644 --- a/drydock_provisioner/orchestrator/readme.md +++ b/drydock_provisioner/orchestrator/readme.md @@ -27,7 +27,8 @@ is compatible with the physical state of the site. * All baremetal nodes have an address, either static or DHCP, for all networks they are attached to. * No static IP assignments are duplicated * No static IP assignments are outside of the network they are targetted for -* No network MTU mismatches due to a network riding different links on different nodes +* All IP assignments are within declared ranges on the network +* Networks assigned to each node's interface are within the set of of the attached link's allowed_networks * Boot drive is above minimum size ### VerifySite ### diff --git a/examples/designparts_v1.0.yaml b/examples/designparts_v1.0.yaml index 1f669fab..0ca7011c 100644 --- a/examples/designparts_v1.0.yaml +++ b/examples/designparts_v1.0.yaml @@ -67,6 +67,9 @@ spec: mode: disabled # If disabled, what network is this port on. If '802.1q' what is the default network for the port. No default. default_network: oob + # List of Network names that are supported on this link. A Network can be listed on only one NetworkLink + allowed_networks: + - 'oob' --- apiVersion: 'v1.0' kind: Network @@ -93,7 +96,7 @@ spec: # Defined IP address ranges. All node IP address assignments must fall into a defined range # of the correct type ranges: - # Type of range. Supports 'static' or 'dhcp'. No default + # Type of range. Supports 'reserved', 'static' or 'dhcp'. No default - type: 'dhcp' # Start of the address range, inclusive. No default start: '172.16.1.100' @@ -202,14 +205,14 @@ spec: fs_uuid: # A filesystem label. Defaults to None fs_label: + # Network name of the primary network (default gateway, DNS, etc...) + primary_network: 'mgmt' # Physical and logical network interfaces interfaces: # What the interface should be named in the operating system. May not match a hardware device name device_name: bond0 # The NetworkLink connected to this interface. Must be the name of a NetworkLink design part device_link: 'gp' - # Whether this interface is considered the primary interface on the server. Supports true and false. Defaults to false - primary: true # Hardware devices that support this interface. For configurating a physical device, this would be a list of one # For bonds, this would be a list of all the physical devices in the bond. These can refer to HardwareProfile device aliases # or explicit device names