From b02f705d3d0a953092ed81820f8118c0f637cbf2 Mon Sep 17 00:00:00 2001 From: Abhishek Raut Date: Tue, 29 Mar 2016 12:32:21 -0700 Subject: [PATCH] [NSX-v]: Validate edges while subnet create During subnet create, in NSX-v, we retrieve router bindings to determine list of available edges which could serve the DHCP for that subnet. We randomly select one of the available edges and use that edge. In case the edge goes missing from the backend, subnet create fails and any subsequent subnet creates will try to retrieve the same edge id from backend and eventually fail again with the same reason. Hence we end up in a weird state where subnets cannot be created anymore. This patch adds a validation to check whether a randomly chosen available edge exists on the backend or not. If it does not exist, we choose another edge until we run out of valid edges at which point we create a new edge to serve this subnet. Change-Id: I298f0aefdf76cad78283edf614cb6623ea249983 Closes-Bug: #1564679 --- .../plugins/nsx_v/vshield/edge_utils.py | 41 ++++++++++++++----- .../unit/nsx_v/vshield/test_edge_utils.py | 19 +++++++++ 2 files changed, 50 insertions(+), 10 deletions(-) diff --git a/vmware_nsx/plugins/nsx_v/vshield/edge_utils.py b/vmware_nsx/plugins/nsx_v/vshield/edge_utils.py index f895caed57..5b844d3cc7 100644 --- a/vmware_nsx/plugins/nsx_v/vshield/edge_utils.py +++ b/vmware_nsx/plugins/nsx_v/vshield/edge_utils.py @@ -831,6 +831,19 @@ class EdgeManager(object): edges = [binding['dhcp_edge_id'] for binding in bindings] return edges + def _get_random_available_edge(self, available_edge_ids): + while available_edge_ids: + # Randomly select an edge ID from the pool. + new_id = random.choice(available_edge_ids) + # Validate whether the edge exists on the backend. + if not self.check_edge_active_at_backend(new_id): + # Remove edge_id from available edges pool. + available_edge_ids.remove(new_id) + LOG.warning(_LW("Skipping edge: %s due to inactive status on " + "the backend."), new_id) + else: + return new_id + def _get_available_edges(self, context, network_id, conflicting_nets): if conflicting_nets is None: conflicting_nets = [] @@ -961,11 +974,15 @@ class EdgeManager(object): # one #4. Update the address groups to the vnic if available_edge_ids: - new_id = random.choice(available_edge_ids) - LOG.debug("Select edge %s to support dhcp for " - "network %s", new_id, network_id) - self.reuse_existing_dhcp_edge( - context, new_id, resource_id, network_id) + new_id = self._get_random_available_edge( + available_edge_ids) + if new_id: + LOG.debug("Select edge %s to support dhcp for " + "network %s", new_id, network_id) + self.reuse_existing_dhcp_edge( + context, new_id, resource_id, network_id) + else: + allocate_new_edge = True else: allocate_new_edge = True # case 2: attach the subnet to a new edge and update vnic @@ -977,11 +994,15 @@ class EdgeManager(object): available_edge_ids, conflict_edge_ids) # There is available one if available_edge_ids: - new_id = random.choice(available_edge_ids) - LOG.debug("Select edge %s to support dhcp for network %s", - new_id, network_id) - self.reuse_existing_dhcp_edge( - context, new_id, resource_id, network_id) + new_id = self._get_random_available_edge( + available_edge_ids) + if new_id: + LOG.debug("Select edge %s to support dhcp for network " + "%s", new_id, network_id) + self.reuse_existing_dhcp_edge( + context, new_id, resource_id, network_id) + else: + allocate_new_edge = True else: allocate_new_edge = True diff --git a/vmware_nsx/tests/unit/nsx_v/vshield/test_edge_utils.py b/vmware_nsx/tests/unit/nsx_v/vshield/test_edge_utils.py index e549d2302b..756b195a16 100644 --- a/vmware_nsx/tests/unit/nsx_v/vshield/test_edge_utils.py +++ b/vmware_nsx/tests/unit/nsx_v/vshield/test_edge_utils.py @@ -117,6 +117,25 @@ class EdgeDHCPManagerTestCase(EdgeUtilsTestCaseMixin): appliance_size=vcns_const.SERVICE_SIZE_MAPPING['dhcp'], dist=False) + def test_get_random_available_edge(self): + available_edge_ids = ['edge-1', 'edge-2'] + selected_edge_id = self.edge_manager._get_random_available_edge( + available_edge_ids) + self.assertIn(selected_edge_id, available_edge_ids) + + def test_get_random_available_edge_missing_edges_returns_none(self): + available_edge_ids = ['edge-1', 'edge-2'] + # Always return inactive(False) while checking whether the edge + # exists on the backend. + with mock.patch.object(self.edge_manager, + 'check_edge_active_at_backend', + return_value=False): + selected_edge_id = self.edge_manager._get_random_available_edge( + available_edge_ids) + # If no active edges are found on the backend, return None so that + # a new DHCP edge is created. + self.assertIsNone(selected_edge_id) + class EdgeUtilsTestCase(EdgeUtilsTestCaseMixin):