NSX|V: prevent deadlock with subnet creation and deletion

Ensure that we treat the locks in the same order with the
delete and create operations.

The fix above result in a deadlcok due to a change in neutron
where ipam delete call port_update. This conflicted with
commit d89eba1a851d76179bd882cc8024b86f71e1153b.

To ensure locking for this we make use of a new lock for the
DHCP interfaces.

Change-Id: I6c3f25ab40247853024560c00d3faa106e5d90b8
This commit is contained in:
Gary Kotton 2017-05-06 03:12:22 +03:00 committed by garyk
parent e0102ba2c9
commit 881bc26b3a
3 changed files with 35 additions and 46 deletions

View File

@ -494,11 +494,7 @@ class RouterDistributedDriver(router_driver.RouterBaseDriver):
with locking.LockManager.get_lock(network_id):
dhcp_id = self.edge_manager.create_dhcp_edge_service(
context, network_id, subnet)
address_groups = self.plugin._create_network_dhcp_address_group(
context, network_id)
self.edge_manager.update_dhcp_edge_service(
context, network_id, address_groups=address_groups)
self.plugin._update_dhcp_adddress(context, network_id)
if dhcp_id:
edge_id, az_name = self.plugin._get_edge_id_and_az_by_rtr_id(
context, dhcp_id)

View File

@ -1893,6 +1893,13 @@ class NsxVPluginV2(addr_pair_db.AllowedAddressPairsMixin,
return self._update_port(context, id, port, original_port,
is_compute_port, device_id)
def _update_dhcp_adddress(self, context, network_id):
with locking.LockManager.get_lock('dhcp-update-%s' % network_id):
address_groups = self._create_network_dhcp_address_group(
context, network_id)
self._update_dhcp_edge_service(context, network_id,
address_groups)
def _update_port(self, context, id, port, original_port, is_compute_port,
device_id):
attrs = port[port_def.RESOURCE_NAME]
@ -2039,11 +2046,8 @@ class NsxVPluginV2(addr_pair_db.AllowedAddressPairsMixin,
self._create_dhcp_static_binding(context, ret_port)
elif owner == constants.DEVICE_OWNER_DHCP:
# Update the ip of the dhcp port
with locking.LockManager.get_lock(ret_port['network_id']):
address_groups = self._create_network_dhcp_address_group(
context, ret_port['network_id'])
self._update_dhcp_edge_service(
context, ret_port['network_id'], address_groups)
self._update_dhcp_adddress(context,
ret_port['network_id'])
elif (owner == constants.DEVICE_OWNER_ROUTER_GW or
owner == constants.DEVICE_OWNER_ROUTER_INTF):
# This is a router port - update the edge appliance
@ -2278,18 +2282,21 @@ class NsxVPluginV2(addr_pair_db.AllowedAddressPairsMixin,
# and send update dhcp interface rest call before deleting subnet's
# corresponding dhcp interface rest call and lead to overlap response
# from backend.
network_id = subnet['network_id']
with locking.LockManager.get_lock(network_id):
with locking.LockManager.get_lock('nsx-edge-pool'):
with db_api.context_manager.writer.using(context):
super(NsxVPluginV2, self).delete_subnet(context, id)
if subnet['enable_dhcp']:
# There is only DHCP port available
if len(ports) == 1:
port = ports.pop()
# This is done out of the transaction as it invokes
# update_port which interfaces with the NSX
self.ipam.delete_port(context, port['id'])
if subnet['enable_dhcp']:
# Delete the DHCP edge service
network_id = subnet['network_id']
filters = {'network_id': [network_id]}
remaining_subnets = self.get_subnets(context,
filters=filters)
@ -2301,11 +2308,7 @@ class NsxVPluginV2(addr_pair_db.AllowedAddressPairsMixin,
self._delete_dhcp_edge_service(context, network_id)
else:
# Update address group and delete the DHCP port only
with locking.LockManager.get_lock(network_id):
addr_groups = self._create_network_dhcp_address_group(
context, network_id)
self._update_dhcp_edge_service(context, network_id,
addr_groups)
self._update_dhcp_adddress(context, network_id)
def _is_overlapping_reserved_subnets(self, subnet):
"""Return True if the subnet overlaps with reserved subnets.
@ -2428,7 +2431,8 @@ class NsxVPluginV2(addr_pair_db.AllowedAddressPairsMixin,
self._update_dhcp_service_with_subnet(context, s)
except Exception:
with excutils.save_and_reraise_exception():
self.delete_subnet(context, s['id'])
super(NsxVPluginV2, self).delete_subnet(context,
s['id'])
return s
def _process_subnet_ext_attr_create(self, session, subnet_db,
@ -2630,11 +2634,7 @@ class NsxVPluginV2(addr_pair_db.AllowedAddressPairsMixin,
network_id)
self._delete_dhcp_edge_service(context, network_id)
return
with locking.LockManager.get_lock(network_id):
address_groups = self._create_network_dhcp_address_group(
context, network_id)
self._update_dhcp_edge_service(context, network_id,
address_groups)
self._update_dhcp_adddress(context, network_id)
def _get_conflict_network_ids_by_overlapping(self, context, subnets):
with locking.LockManager.get_lock('nsx-networking'):
@ -2732,10 +2732,7 @@ class NsxVPluginV2(addr_pair_db.AllowedAddressPairsMixin,
self.edge_manager.create_dhcp_edge_service(context, network_id,
subnet)
# Create all dhcp ports within the network
address_groups = self._create_network_dhcp_address_group(
context, network_id)
self.edge_manager.update_dhcp_edge_service(
context, network_id, address_groups=address_groups)
self._update_dhcp_adddress(context, network_id)
except Exception:
with excutils.save_and_reraise_exception():

View File

@ -1541,11 +1541,7 @@ class EdgeManager(object):
LOG.error('Database conflict could not be recovered '
'for VDR %(vdr)s DHCP edge %(dhcp)s',
{'vdr': vdr_router_id, 'dhcp': dhcp_edge_id})
with locking.LockManager.get_lock(network_id):
address_groups = self.plugin._create_network_dhcp_address_group(
context, network_id)
self.update_dhcp_edge_service(
context, network_id, address_groups=address_groups)
self.plugin._update_dhcp_adddress(context, network_id)
self.set_sysctl_rp_filter_for_vdr_dhcp(
context, dhcp_edge_id, network_id)