NSX|V: add support for host groups for DRS HA

The code adds support for host_groups. This allows the plugin
to place the edge VMs host_groups to provide HA.

In order to get the fire cell anti affinity, we do the following:

1. Admin out of band: create two ‘Virtual machine to Hosts’ rules.
    These are listed in the host_groups parameter(s)
    a. HOST-GROUP-A – all hosts in fire cell A
    b. HOST-GROUP-B – all hosts in fire cell B
2. The plugin does the following:
    a. Create 2 VM groups (each one will be a placeholder for the edges).
    The VM group name is of the following format:
    'neutron-group-%s-%s' % (edge_id, index)
    b. Create 2 VM/Host rules. This will contain the VM group from above
    The Host group name is of the following format:
    'neutron-rule-%s-%s' % (edge_id, index)
    c. Plugin deletes above when the edges are deleted.

An admin utility method has been added that configures the host groups:
nsxadmin -o nsx-update -r edges -p edge-id=edge-55 --property hostgroup=True|False

Depends-On: I494a1d19341f30f22803a3fe6baf020a67ad6b08
Change-Id: I9bf3c280c37c02081c11ac8abacc424db6cac09f
This commit is contained in:
Gary Kotton 2017-02-05 06:43:27 -08:00
parent 45f87c8639
commit 338d47e058
11 changed files with 250 additions and 11 deletions

View File

@ -64,6 +64,10 @@ Edges
nsxadmin -o nsx-update -r edges -p edge-id=edge-55 --property resource=<cpu|memory> --property limit=<limit> --property reservation=<reservation> --property shares=<shares>
- Update DRS hostgroups for an edge::
nsxadmin -o nsx-update -r edges -p edge-id=edge-55 --property hostgroup=True|False
Orphaned Edges
~~~~~~~~~~~~~~

View File

@ -474,6 +474,12 @@ nsxv_opts = [
'edges will be placed in the primary datastore as '
'active and the other half will be placed in the '
'ha_datastore')),
cfg.ListOpt('edge_host_groups',
default=[],
help=_('(Optional) If edge HA is used then this will ensure '
'that active/backup edges are placed in the listed '
'host groups. 2 predefined host groups need to be '
'configured.')),
cfg.StrOpt('external_network',
help=_('(Required) Network ID for physical network '
'connectivity')),
@ -682,6 +688,12 @@ nsxv_az_opts = [
'active and the other half will be placed in the '
'ha_datastore. If this value is not set, the global '
'one will be used')),
cfg.ListOpt('edge_host_groups',
default=[],
help=_('(Optional) If edge HA is used then this will ensure '
'that active/backup edges are placed in the listed '
'host groups. 2 predefined host groups need to be '
'configured.')),
cfg.ListOpt('backup_edge_pool',
help=_("(Optional) Defines edge pool's management range for "
"the availability zone. If not defined, the global one "

View File

@ -487,3 +487,122 @@ class DvsManager(object):
"config.uplinkPortPolicy")
standby = list(set(uplinks.uplinkPortName) - set(ports))
policy.uplinkPortOrder.standbyUplinkPort = standby
def _reconfigure_cluster(self, session, cluster, config_spec):
"""Reconfigure a cluster in vcenter"""
try:
reconfig_task = session.invoke_api(
session.vim, "ReconfigureComputeResource_Task",
cluster, spec=config_spec, modify=True)
except Exception as excep:
LOG.exception(_LE('Failed to reconfigure cluster %s'), excep)
session.wait_for_task(reconfig_task)
def _create_vm_group_spec(self, client_factory, name, vm_refs):
group = client_factory.create('ns0:ClusterVmGroup')
group.name = name
# On vCenter UI, it is not possible to create VM group without
# VMs attached to it. But, using APIs, it is possible to create
# VM group without VMs attached. Therefore, check for existence
# of vm attribute in the group to avoid exceptions
if hasattr(group, 'vm'):
group.vm += vm_refs
else:
group.vm = vm_refs
group_spec = client_factory.create('ns0:ClusterGroupSpec')
group_spec.operation = 'add'
group_spec.info = group
return [group_spec]
def _delete_vm_group_spec(self, client_factory, name):
group_spec = client_factory.create('ns0:ClusterGroupSpec')
group = client_factory.create('ns0:ClusterVmGroup')
group.name = name
group_spec.operation = 'remove'
group_spec.removeKey = name
group_spec.info = group
return [group_spec]
def _create_cluster_rules_spec(self, client_factory, name, vm_group_name,
host_group_name):
rules_spec = client_factory.create('ns0:ClusterRuleSpec')
rules_spec.operation = 'add'
policy_class = 'ns0:ClusterVmHostRuleInfo'
rules_info = client_factory.create(policy_class)
rules_info.enabled = True
rules_info.mandatory = True
rules_info.name = name
rules_info.vmGroupName = vm_group_name
rules_info.affineHostGroupName = host_group_name
rules_spec.info = rules_info
return rules_spec
def _delete_cluster_rules_spec(self, client_factory, rule):
rules_spec = client_factory.create('ns0:ClusterRuleSpec')
rules_spec.operation = 'remove'
rules_spec.removeKey = int(rule.key)
policy_class = 'ns0:ClusterVmHostRuleInfo'
rules_info = client_factory.create(policy_class)
rules_info.name = rule.name
rules_info.vmGroupName = rule.vmGroupName
rules_info.affineHostGroupName = rule.affineHostGroupName
rules_spec.info = rules_info
return rules_spec
def update_cluster_edge_failover(self, resource_id, vm_moids,
edge_id, host_group_names):
"""Updates cluster for vm placement using DRS"""
session = self._session
resource = vim_util.get_moref(resource_id, 'ResourcePool')
# TODO(garyk): cache the cluster details
cluster = session.invoke_api(
vim_util, "get_object_property", self._session.vim, resource,
"owner")
vms = [vim_util.get_moref(vm_moid, 'VirtualMachine') for
vm_moid in vm_moids]
client_factory = session.vim.client.factory
config_spec = client_factory.create('ns0:ClusterConfigSpecEx')
# Create the VM groups
config_spec.groupSpec = [
self._create_vm_group_spec(
client_factory,
'neutron-group-%s-%s' % (edge_id, index),
[vm])
for index, vm in enumerate(vms, start=1)]
config_spec.rulesSpec = [
self._create_cluster_rules_spec(
client_factory, 'neutron-rule-%s-%s' % (edge_id, index),
'neutron-group-%s-%s' % (edge_id, index), host_group_name)
for index, host_group_name in enumerate(host_group_names, start=1)]
self._reconfigure_cluster(session, cluster, config_spec)
def cluster_edge_delete(self, resource_id, edge_id):
session = self._session
resource = vim_util.get_moref(resource_id, 'ResourcePool')
# TODO(garyk): cache the cluster details
cluster = session.invoke_api(
vim_util, "get_object_property", self._session.vim, resource,
"owner")
client_factory = session.vim.client.factory
config_spec = client_factory.create('ns0:ClusterConfigSpecEx')
cluster_config = session.invoke_api(
vim_util, "get_object_property", self._session.vim, cluster,
"configurationEx")
groupSpec = []
for group in cluster_config.group:
if 'neutron-group-%s-' % (edge_id) in group.name:
groupSpec.append(self._delete_vm_group_spec(
client_factory, group.name))
if groupSpec:
config_spec.groupSpec = groupSpec
ruleSpec = []
for rule in cluster_config.rule:
if 'neutron-rule-%s-' % (edge_id) in rule.name:
ruleSpec.append(self._delete_cluster_rules_spec(
client_factory, rule))
if ruleSpec:
config_spec.rulesSpec = ruleSpec
if groupSpec or ruleSpec:
self._reconfigure_cluster(session, cluster, config_spec)

View File

@ -74,6 +74,7 @@ class ConfiguredAvailabilityZone(object):
self.external_network = cfg.CONF.nsxv.external_network
self.vdn_scope_id = cfg.CONF.nsxv.vdn_scope_id
self.dvs_id = cfg.CONF.nsxv.dvs_id
self.edge_host_groups = cfg.CONF.nsxv.edge_host_groups
# No support for metadata per az
self.az_metadata_support = False
@ -136,6 +137,10 @@ class ConfiguredAvailabilityZone(object):
if not self.dvs_id:
self.dvs_id = cfg.CONF.nsxv.dvs_id
self.edge_host_groups = az_info.get('edge_host_groups', [])
if not self.edge_host_groups:
self.edge_host_groups = cfg.CONF.nsxv.edge_host_groups
# Support for metadata per az only if configured, and different
# from the global one
self.mgt_net_proxy_ips = az_info.get('mgt_net_proxy_ips')
@ -193,6 +198,7 @@ class ConfiguredAvailabilityZone(object):
self.external_network = cfg.CONF.nsxv.external_network
self.vdn_scope_id = cfg.CONF.nsxv.vdn_scope_id
self.dvs_id = cfg.CONF.nsxv.dvs_id
self.edge_host_groups = cfg.CONF.nsxv.edge_host_groups
def is_default(self):
return self.name == DEFAULT_NAME

View File

@ -212,6 +212,12 @@ class NsxVPluginV2(addr_pair_db.AllowedAddressPairsMixin,
# TODO(rkukura): Replace with new VIF security details
pbin.CAP_PORT_FILTER:
'security-group' in self.supported_extension_aliases}}
# This needs to be set prior to binding callbacks
self.dvs_id = cfg.CONF.nsxv.dvs_id
if cfg.CONF.nsxv.use_dvs_features:
self._dvs = dvs.DvsManager(dvs_id=self.dvs_id)
else:
self._dvs = None
# Create the client to interface with the NSX-v
_nsx_v_callbacks = edge_utils.NsxVCallbacks(self)
self.nsx_v = vcns_driver.VcnsDriver(_nsx_v_callbacks)
@ -223,7 +229,6 @@ class NsxVPluginV2(addr_pair_db.AllowedAddressPairsMixin,
self._configure_reservations()
self.edge_manager = edge_utils.EdgeManager(self.nsx_v, self)
self.vdn_scope_id = cfg.CONF.nsxv.vdn_scope_id
self.dvs_id = cfg.CONF.nsxv.dvs_id
self.nsx_sg_utils = securitygroup_utils.NsxSecurityGroupUtils(
self.nsx_v)
self._availability_zones_data = nsx_az.ConfiguredAvailabilityZones()
@ -249,11 +254,6 @@ class NsxVPluginV2(addr_pair_db.AllowedAddressPairsMixin,
self._router_managers = managers.RouterTypeManager(self)
if cfg.CONF.nsxv.use_dvs_features:
self._dvs = dvs.DvsManager(dvs_id=self.dvs_id)
else:
self._dvs = None
if self.edge_manager.is_dhcp_opt_enabled:
# Only expose the extension if it is supported
self.supported_extension_aliases.append("dhcp-mtu")

View File

@ -28,6 +28,7 @@ from vmware_nsx.common import exceptions as nsxv_exc
from vmware_nsx.common import nsxv_constants
from vmware_nsx.common import utils
from vmware_nsx.db import nsxv_db
from vmware_nsx.plugins.nsx_v import availability_zones as nsx_az
from vmware_nsx.plugins.nsx_v.vshield.common import constants
from vmware_nsx.plugins.nsx_v.vshield.common import exceptions
from vmware_nsx.plugins.nsx_v.vshield import edge_utils
@ -443,7 +444,8 @@ class EdgeApplianceDriver(object):
raise nsxv_exc.NsxPluginException(err_msg=error)
self.callbacks.complete_edge_creation(
context, edge_id, name, router_id, dist, True)
context, edge_id, name, router_id, dist, True,
availability_zone)
except exceptions.VcnsApiException:
self.callbacks.complete_edge_creation(
@ -544,6 +546,19 @@ class EdgeApplianceDriver(object):
LOG.error(_LE("Failed to resize edge: %s"), e.response)
def delete_edge(self, context, router_id, edge_id, dist=False):
binding = None
try:
binding = nsxv_db.get_nsxv_router_binding_by_edge(context.session,
edge_id)
except Exception:
LOG.debug('Unable to get the binding for edge %s', edge_id)
if binding:
az_name = binding['availability_zone']
else:
az_name = nsx_az.DEFAULT_NAME
az = nsx_az.ConfiguredAvailabilityZones().get_availability_zone(
az_name)
self.callbacks.pre_edge_deletion(edge_id, az)
try:
nsxv_db.delete_nsxv_router_binding(context.session, router_id)
if not dist:

View File

@ -37,7 +37,7 @@ from neutron.plugins.common import constants as plugin_const
from neutron_lib.api import validators
from neutron_lib import exceptions as n_exc
from vmware_nsx._i18n import _, _LE, _LW
from vmware_nsx._i18n import _, _LE, _LI, _LW
from vmware_nsx.common import config as conf
from vmware_nsx.common import exceptions as nsx_exc
from vmware_nsx.common import locking
@ -2579,6 +2579,45 @@ def update_edge_loglevel(vcns, edge_id, module, level):
level))
def update_edge_host_groups(vcns, edge_id, dvs, availability_zone):
# Update edge DRS host groups
h, appliances = vcns.get_edge_appliances(edge_id)
vms = [appliance['vmId']
for appliance in appliances['appliances']]
# Ensure random distribution of the VMs
if availability_zone.ha_placement_random:
random.shuffle(vms)
try:
LOG.info(_LI('Create DRS groups for '
'%(vms)s on edge %(edge_id)s'),
{'vms': vms,
'edge_id': edge_id})
dvs.update_cluster_edge_failover(
availability_zone.resource_pool,
vms, edge_id, availability_zone.edge_host_groups)
except Exception as e:
LOG.error(_LE('Unable to create DRS groups for '
'%(vms)s on edge %(edge_id)s. Error: %(e)s'),
{'vms': vms,
'edge_id': edge_id,
'e': e})
def delete_edge_host_groups(vcns, edge_id, dvs):
h, apps = vcns.get_edge_appliances(edge_id)
if apps['appliances']:
resource_pool_id = apps['appliances'][0]['resourcePoolId']
try:
LOG.info(_LI('Removing DRS groups for edge %(edge_id)s'),
{'edge_id': edge_id})
dvs.cluster_edge_delete(resource_pool_id, edge_id)
except Exception as e:
LOG.error(_LE('Unable to remove DRS groups for '
'edge %(edge_id)s. Error: %(e)s'),
{'edge_id': edge_id,
'e': e})
class NsxVCallbacks(object):
"""Edge callback implementation Callback functions for
asynchronous tasks.
@ -2587,7 +2626,8 @@ class NsxVCallbacks(object):
self.plugin = plugin
def complete_edge_creation(
self, context, edge_id, name, router_id, dist, deploy_successful):
self, context, edge_id, name, router_id, dist, deploy_successful,
availability_zone=None):
router_db = None
if uuidutils.is_uuid_like(router_id):
try:
@ -2606,6 +2646,11 @@ class NsxVCallbacks(object):
nsxv_db.update_nsxv_router_binding(
context.session, router_id,
status=plugin_const.ACTIVE)
if (self.plugin._dvs and availability_zone and
availability_zone.edge_ha and
availability_zone.edge_host_groups):
update_edge_host_groups(self.plugin.nsx_v.vcns, edge_id,
self.plugin._dvs, availability_zone)
else:
LOG.error(_LE("Failed to deploy Edge for router %s"), name)
if router_db:
@ -2617,6 +2662,12 @@ class NsxVCallbacks(object):
nsxv_db.clean_edge_vnic_binding(
context.session, edge_id)
def pre_edge_deletion(self, edge_id, availability_zone):
if (self.plugin._dvs and availability_zone and
availability_zone.edge_ha and availability_zone.edge_host_groups):
delete_edge_host_groups(self.plugin.nsx_v.vcns, edge_id,
self.plugin._dvs)
def complete_edge_update(
self, context, edge_id, router_id, successful, set_errors):
if successful:

View File

@ -964,6 +964,10 @@ class Vcns(object):
uri = "/api/4.0/edges/%s/appliances" % edge_id
return self.do_request(HTTP_PUT, uri, request)
def get_edge_appliances(self, edge_id):
uri = "/api/4.0/edges/%s/appliances" % edge_id
return self.do_request(HTTP_GET, uri)
def upload_edge_certificate(self, edge_id, request):
"""Creates a certificate on the specified Edge appliance."""
uri = '%s/%s/%s' % (TRUSTSTORE_PREFIX, CERTIFICATE, edge_id)

View File

@ -17,6 +17,7 @@ import logging
import pprint
import textwrap
from vmware_nsx.dvs import dvs
from vmware_nsx.plugins.nsx_v.vshield import edge_utils
from vmware_nsx.shell.admin.plugins.common import constants
from vmware_nsx.shell.admin.plugins.common import formatters
@ -359,6 +360,18 @@ def change_edge_appliance_reservations(properties):
LOG.error(_LE("%s"), str(e))
def change_edge_hostgroup(properties):
edge_id = properties.get('edge-id')
dvs_mng = dvs.DvsManager()
if properties.get('hostgroup').lower() == "true":
az_name, size = _get_edge_az_and_size(edge_id)
az = nsx_az.ConfiguredAvailabilityZones().get_availability_zone(
az_name)
edge_utils.update_edge_host_groups(nsxv, edge_id, dvs_mng, az)
else:
edge_utils.delete_edge_host_groups(nsxv, edge_id, dvs_mng)
@admin_utils.output_header
def nsx_update_edge(resource, event, trigger, **kwargs):
"""Update edge properties"""
@ -377,7 +390,9 @@ def nsx_update_edge(resource, event, trigger, **kwargs):
"--property resource=cpu|memory and "
"(optional) --property limit=<limit> and/or "
"(optional) --property shares=<shares> and/or "
"(optional) --property reservation=<reservation>")
"(optional) --property reservation=<reservation> "
"\nFor hostgroup updates, add "
"--property hostgroup=True|False")
if not kwargs.get('property'):
LOG.error(usage_msg)
return
@ -403,6 +418,8 @@ def nsx_update_edge(resource, event, trigger, **kwargs):
change_edge_syslog(properties)
elif properties.get('resource'):
change_edge_appliance_reservations(properties)
elif properties.get('hostgroup'):
change_edge_hostgroup(properties)
elif change_edge_loglevel(properties):
pass
else:

View File

@ -1239,6 +1239,13 @@ class FakeVcns(object):
response = {}
return (header, response)
def get_edge_appliances(self, edge_id):
header = {
'status': 204
}
response = {}
return (header, response)
def get_routes(self, edge_id):
header = {
'status': 204

View File

@ -347,7 +347,11 @@ class VcnsDriverTestCase(base.BaseTestCase):
super(VcnsDriverTestCase, self).tearDown()
def complete_edge_creation(
self, context, edge_id, name, router_id, dist, deploy_successful):
self, context, edge_id, name, router_id, dist, deploy_successful,
availability_zone=None):
pass
def pre_edge_deletion(self, edge_id, az):
pass
def _deploy_edge(self):