From 881bc26b3ae1b369e261f9df4ae12110bf3063d9 Mon Sep 17 00:00:00 2001 From: Gary Kotton Date: Sat, 6 May 2017 03:12:22 +0300 Subject: [PATCH] NSX|V: prevent deadlock with subnet creation and deletion Ensure that we treat the locks in the same order with the delete and create operations. The fix above result in a deadlcok due to a change in neutron where ipam delete call port_update. This conflicted with commit d89eba1a851d76179bd882cc8024b86f71e1153b. To ensure locking for this we make use of a new lock for the DHCP interfaces. Change-Id: I6c3f25ab40247853024560c00d3faa106e5d90b8 --- .../drivers/distributed_router_driver.py | 6 +- vmware_nsx/plugins/nsx_v/plugin.py | 69 +++++++++---------- .../plugins/nsx_v/vshield/edge_utils.py | 6 +- 3 files changed, 35 insertions(+), 46 deletions(-) diff --git a/vmware_nsx/plugins/nsx_v/drivers/distributed_router_driver.py b/vmware_nsx/plugins/nsx_v/drivers/distributed_router_driver.py index bf4100ee5e..e60aac149f 100644 --- a/vmware_nsx/plugins/nsx_v/drivers/distributed_router_driver.py +++ b/vmware_nsx/plugins/nsx_v/drivers/distributed_router_driver.py @@ -494,11 +494,7 @@ class RouterDistributedDriver(router_driver.RouterBaseDriver): with locking.LockManager.get_lock(network_id): dhcp_id = self.edge_manager.create_dhcp_edge_service( context, network_id, subnet) - - address_groups = self.plugin._create_network_dhcp_address_group( - context, network_id) - self.edge_manager.update_dhcp_edge_service( - context, network_id, address_groups=address_groups) + self.plugin._update_dhcp_adddress(context, network_id) if dhcp_id: edge_id, az_name = self.plugin._get_edge_id_and_az_by_rtr_id( context, dhcp_id) diff --git a/vmware_nsx/plugins/nsx_v/plugin.py b/vmware_nsx/plugins/nsx_v/plugin.py index 5052b14048..3f39a6d0f2 100644 --- a/vmware_nsx/plugins/nsx_v/plugin.py +++ b/vmware_nsx/plugins/nsx_v/plugin.py @@ -1893,6 +1893,13 @@ class NsxVPluginV2(addr_pair_db.AllowedAddressPairsMixin, return self._update_port(context, id, port, original_port, is_compute_port, device_id) + def _update_dhcp_adddress(self, context, network_id): + with locking.LockManager.get_lock('dhcp-update-%s' % network_id): + address_groups = self._create_network_dhcp_address_group( + context, network_id) + self._update_dhcp_edge_service(context, network_id, + address_groups) + def _update_port(self, context, id, port, original_port, is_compute_port, device_id): attrs = port[port_def.RESOURCE_NAME] @@ -2039,11 +2046,8 @@ class NsxVPluginV2(addr_pair_db.AllowedAddressPairsMixin, self._create_dhcp_static_binding(context, ret_port) elif owner == constants.DEVICE_OWNER_DHCP: # Update the ip of the dhcp port - with locking.LockManager.get_lock(ret_port['network_id']): - address_groups = self._create_network_dhcp_address_group( - context, ret_port['network_id']) - self._update_dhcp_edge_service( - context, ret_port['network_id'], address_groups) + self._update_dhcp_adddress(context, + ret_port['network_id']) elif (owner == constants.DEVICE_OWNER_ROUTER_GW or owner == constants.DEVICE_OWNER_ROUTER_INTF): # This is a router port - update the edge appliance @@ -2278,34 +2282,33 @@ class NsxVPluginV2(addr_pair_db.AllowedAddressPairsMixin, # and send update dhcp interface rest call before deleting subnet's # corresponding dhcp interface rest call and lead to overlap response # from backend. - with locking.LockManager.get_lock('nsx-edge-pool'): - with db_api.context_manager.writer.using(context): - super(NsxVPluginV2, self).delete_subnet(context, id) + network_id = subnet['network_id'] + with locking.LockManager.get_lock(network_id): + with locking.LockManager.get_lock('nsx-edge-pool'): + with db_api.context_manager.writer.using(context): + super(NsxVPluginV2, self).delete_subnet(context, id) + if subnet['enable_dhcp']: # There is only DHCP port available if len(ports) == 1: port = ports.pop() + # This is done out of the transaction as it invokes + # update_port which interfaces with the NSX self.ipam.delete_port(context, port['id']) - if subnet['enable_dhcp']: - # Delete the DHCP edge service - network_id = subnet['network_id'] - filters = {'network_id': [network_id]} - remaining_subnets = self.get_subnets(context, - filters=filters) - if len(remaining_subnets) == 0: - self._cleanup_dhcp_edge_before_deletion( - context, network_id) - LOG.debug("Delete the DHCP service for network %s", - network_id) - self._delete_dhcp_edge_service(context, network_id) - else: - # Update address group and delete the DHCP port only - with locking.LockManager.get_lock(network_id): - addr_groups = self._create_network_dhcp_address_group( + # Delete the DHCP edge service + filters = {'network_id': [network_id]} + remaining_subnets = self.get_subnets(context, + filters=filters) + if len(remaining_subnets) == 0: + self._cleanup_dhcp_edge_before_deletion( context, network_id) - self._update_dhcp_edge_service(context, network_id, - addr_groups) + LOG.debug("Delete the DHCP service for network %s", + network_id) + self._delete_dhcp_edge_service(context, network_id) + else: + # Update address group and delete the DHCP port only + self._update_dhcp_adddress(context, network_id) def _is_overlapping_reserved_subnets(self, subnet): """Return True if the subnet overlaps with reserved subnets. @@ -2428,7 +2431,8 @@ class NsxVPluginV2(addr_pair_db.AllowedAddressPairsMixin, self._update_dhcp_service_with_subnet(context, s) except Exception: with excutils.save_and_reraise_exception(): - self.delete_subnet(context, s['id']) + super(NsxVPluginV2, self).delete_subnet(context, + s['id']) return s def _process_subnet_ext_attr_create(self, session, subnet_db, @@ -2630,11 +2634,7 @@ class NsxVPluginV2(addr_pair_db.AllowedAddressPairsMixin, network_id) self._delete_dhcp_edge_service(context, network_id) return - with locking.LockManager.get_lock(network_id): - address_groups = self._create_network_dhcp_address_group( - context, network_id) - self._update_dhcp_edge_service(context, network_id, - address_groups) + self._update_dhcp_adddress(context, network_id) def _get_conflict_network_ids_by_overlapping(self, context, subnets): with locking.LockManager.get_lock('nsx-networking'): @@ -2732,10 +2732,7 @@ class NsxVPluginV2(addr_pair_db.AllowedAddressPairsMixin, self.edge_manager.create_dhcp_edge_service(context, network_id, subnet) # Create all dhcp ports within the network - address_groups = self._create_network_dhcp_address_group( - context, network_id) - self.edge_manager.update_dhcp_edge_service( - context, network_id, address_groups=address_groups) + self._update_dhcp_adddress(context, network_id) except Exception: with excutils.save_and_reraise_exception(): diff --git a/vmware_nsx/plugins/nsx_v/vshield/edge_utils.py b/vmware_nsx/plugins/nsx_v/vshield/edge_utils.py index e285bdac44..4ea5cf76d1 100644 --- a/vmware_nsx/plugins/nsx_v/vshield/edge_utils.py +++ b/vmware_nsx/plugins/nsx_v/vshield/edge_utils.py @@ -1541,11 +1541,7 @@ class EdgeManager(object): LOG.error('Database conflict could not be recovered ' 'for VDR %(vdr)s DHCP edge %(dhcp)s', {'vdr': vdr_router_id, 'dhcp': dhcp_edge_id}) - with locking.LockManager.get_lock(network_id): - address_groups = self.plugin._create_network_dhcp_address_group( - context, network_id) - self.update_dhcp_edge_service( - context, network_id, address_groups=address_groups) + self.plugin._update_dhcp_adddress(context, network_id) self.set_sysctl_rp_filter_for_vdr_dhcp( context, dhcp_edge_id, network_id)