From 17ef415084c697e10dcd7d605816dc790072ba51 Mon Sep 17 00:00:00 2001 From: Adit Sarfaty Date: Thu, 27 Jun 2019 11:50:46 +0300 Subject: [PATCH] NSX|P: Rollback update GW info in case of backend issues Change-Id: Ie4b9e178eacf283c07ec2fa0ef7a4eb260298d91 --- vmware_nsx/plugins/nsx_p/plugin.py | 141 ++++++++++++--------- vmware_nsx/tests/unit/nsx_p/test_plugin.py | 79 +++++++++++- 2 files changed, 160 insertions(+), 60 deletions(-) diff --git a/vmware_nsx/plugins/nsx_p/plugin.py b/vmware_nsx/plugins/nsx_p/plugin.py index 5aba441d20..aeff3db0e0 100644 --- a/vmware_nsx/plugins/nsx_p/plugin.py +++ b/vmware_nsx/plugins/nsx_p/plugin.py @@ -1524,9 +1524,15 @@ class NsxPolicyPlugin(nsx_plugin_common.NsxPluginV3Base): # remove the edge cluster from the tier1 router self.nsxpolicy.tier1.remove_edge_cluster(router_id) - def _update_router_gw_info(self, context, router_id, info): + def _get_router_gw_info(self, context, router_id): + router = self.get_router(context, router_id) + return router.get(l3_apidef.EXTERNAL_GW_INFO, {}) + + def _update_router_gw_info(self, context, router_id, info, + called_from=None): # Get the original data of the router GW router = self._get_router(context, router_id) + orig_info = self._get_router_gw_info(context, router_id) org_tier0_uuid = self._get_tier0_uuid_by_router(context, router) org_enable_snat = router.enable_snat orgaddr, orgmask, _orgnexthop = ( @@ -1562,72 +1568,85 @@ class NsxPolicyPlugin(nsx_plugin_common.NsxPluginV3Base): lb_exist = self.service_router_has_loadbalancers( context, router_id) tier1_services_exist = fw_exist or vpn_exist or lb_exist + actions = self._get_update_router_gw_actions( org_tier0_uuid, orgaddr, org_enable_snat, new_tier0_uuid, newaddr, new_enable_snat, tier1_services_exist, sr_currently_exists) - if actions['add_service_router']: - self.create_service_router(context, router_id, router=router) + try: + if actions['add_service_router']: + self.create_service_router(context, router_id, router=router) - if actions['remove_snat_rules']: - for subnet in router_subnets: - self._del_subnet_snat_rule(router_id, subnet) - if actions['remove_no_dnat_rules']: - for subnet in router_subnets: - self._del_subnet_no_dnat_rule(router_id, subnet) + if actions['remove_snat_rules']: + for subnet in router_subnets: + self._del_subnet_snat_rule(router_id, subnet) + if actions['remove_no_dnat_rules']: + for subnet in router_subnets: + self._del_subnet_no_dnat_rule(router_id, subnet) - if (actions['remove_router_link_port'] or - actions['add_router_link_port']): - # GW was changed. update GW and route advertisement - # pylint: disable=unexpected-keyword-arg - self.nsxpolicy.tier1.update_route_advertisement( - router_id, - static_routes=not new_enable_snat, - nat=actions['advertise_route_nat_flag'], - subnets=actions['advertise_route_connected_flag'], - tier0=new_tier0_uuid) + if (actions['remove_router_link_port'] or + actions['add_router_link_port']): + # GW was changed. update GW and route advertisement + # pylint: disable=unexpected-keyword-arg + self.nsxpolicy.tier1.update_route_advertisement( + router_id, + static_routes=not new_enable_snat, + nat=actions['advertise_route_nat_flag'], + subnets=actions['advertise_route_connected_flag'], + tier0=new_tier0_uuid) - # Set/Unset the router TZ to allow vlan switches traffic - if cfg.CONF.nsx_p.allow_passthrough: - if new_tier0_uuid: - tz_uuid = self.nsxpolicy.tier0.get_overlay_transport_zone( - new_tier0_uuid) + # Set/Unset the router TZ to allow vlan switches traffic + if cfg.CONF.nsx_p.allow_passthrough: + if new_tier0_uuid: + tier0_client = self.nsxpolicy.tier0 + tz_uuid = tier0_client.get_overlay_transport_zone( + new_tier0_uuid) + else: + tz_uuid = None + self.nsxpolicy.tier1.update_transport_zone( + router_id, tz_uuid) else: - tz_uuid = None - self.nsxpolicy.tier1.update_transport_zone( - router_id, tz_uuid) + LOG.debug("Not adding transport-zone to tier1 router %s " + "as passthrough api is disabled", router_id) else: - LOG.debug("Not adding transport-zone to tier1 router %s as " - "passthrough api is disabled", router_id) - else: - # Only update route advertisement - self.nsxpolicy.tier1.update_route_advertisement( - router_id, - static_routes=not new_enable_snat, - nat=actions['advertise_route_nat_flag'], - subnets=actions['advertise_route_connected_flag']) + # Only update route advertisement + self.nsxpolicy.tier1.update_route_advertisement( + router_id, + static_routes=not new_enable_snat, + nat=actions['advertise_route_nat_flag'], + subnets=actions['advertise_route_connected_flag']) - if actions['add_snat_rules']: - # Add SNAT rules for all the subnets which are in different scope - # than the GW - gw_address_scope = self._get_network_address_scope( - context, router.gw_port.network_id) - for subnet in router_subnets: - self._add_subnet_snat_rule(context, router_id, - subnet, gw_address_scope, newaddr) + if actions['add_snat_rules']: + # Add SNAT rules for all the subnets which are in different + # scope than the GW + gw_address_scope = self._get_network_address_scope( + context, router.gw_port.network_id) + for subnet in router_subnets: + self._add_subnet_snat_rule(context, router_id, + subnet, gw_address_scope, + newaddr) - if actions['add_no_dnat_rules']: - for subnet in router_subnets: - self._add_subnet_no_dnat_rule(context, router_id, subnet) + if actions['add_no_dnat_rules']: + for subnet in router_subnets: + self._add_subnet_no_dnat_rule(context, router_id, subnet) - # always advertise ipv6 subnets if gateway is set - advertise_ipv6_subnets = True if info else False - self._update_router_advertisement_rules(router_id, - router_subnets, - advertise_ipv6_subnets) - if actions['remove_service_router']: - self.delete_service_router(router_id) + # always advertise ipv6 subnets if gateway is set + advertise_ipv6_subnets = True if info else False + self._update_router_advertisement_rules(router_id, + router_subnets, + advertise_ipv6_subnets) + if actions['remove_service_router']: + self.delete_service_router(router_id) + except nsx_lib_exc.NsxLibException as e: + # GW updates failed on the NSX. Rollback the change, + # unless it is during create or delete of a router + with excutils.save_and_reraise_exception(): + if not called_from: + LOG.error("Rolling back router %s GW info update because " + "of NSX failure %s", router_id, e) + super(NsxPolicyPlugin, self)._update_router_gw_info( + context, router_id, orig_info, router=router) def _update_router_advertisement_rules(self, router_id, subnets, advertise_ipv6): @@ -1683,8 +1702,9 @@ class NsxPolicyPlugin(nsx_plugin_common.NsxPluginV3Base): if gw_info and gw_info != const.ATTR_NOT_SPECIFIED: try: - self._update_router_gw_info(context, router['id'], gw_info) - except (db_exc.DBError, nsx_lib_exc.ManagerError): + self._update_router_gw_info(context, router['id'], gw_info, + called_from="create") + except (db_exc.DBError, nsx_lib_exc.NsxLibException): with excutils.save_and_reraise_exception(): LOG.error("Failed to set gateway info for router " "being created: %s - removing router", @@ -1700,7 +1720,14 @@ class NsxPolicyPlugin(nsx_plugin_common.NsxPluginV3Base): def delete_router(self, context, router_id): router = self.get_router(context, router_id) if router.get(l3_apidef.EXTERNAL_GW_INFO): - self._update_router_gw_info(context, router_id, {}) + try: + self._update_router_gw_info(context, router_id, {}, + called_from="delete") + except nsx_lib_exc.NsxLibException as e: + LOG.error("Failed to remove router %s gw info before " + "deletion, but going on with the deletion anyway: " + "%s", router_id, e) + ret_val = super(NsxPolicyPlugin, self).delete_router( context, router_id) diff --git a/vmware_nsx/tests/unit/nsx_p/test_plugin.py b/vmware_nsx/tests/unit/nsx_p/test_plugin.py index 04a87535db..5f5f4bff38 100644 --- a/vmware_nsx/tests/unit/nsx_p/test_plugin.py +++ b/vmware_nsx/tests/unit/nsx_p/test_plugin.py @@ -1502,9 +1502,6 @@ class NsxPTestL3NatTestCase(NsxPTestL3NatTest, def test_router_add_gateway_no_subnet(self): self.skipTest('No support for no subnet gateway set') - def test_create_router_gateway_fails(self): - self.skipTest('not supported') - def test_router_remove_ipv6_subnet_from_interface(self): self.skipTest('not supported') @@ -2211,3 +2208,79 @@ class NsxPTestL3NatTestCase(NsxPTestL3NatTest, oct_listener._unsubscribe_router_delete_callback() self.lb_mock1.start() self.lb_mock2.start() + + def test_router_gw_info_rollback(self): + """Fail the GW addition and verify rollback was performed""" + with self.router() as r,\ + self.external_network() as public_net,\ + self.subnet(network=public_net, cidr='12.0.0.0/24', + enable_dhcp=False) as s1,\ + mock.patch("vmware_nsxlib.v3.policy.core_resources." + "NsxPolicyTier1Api.update_route_advertisement", + side_effect=nsxlib_exc.NsxLibException): + # Make sure creation fails + self._add_external_gateway_to_router( + r['router']['id'], + s1['subnet']['network_id'], + expected_code=exc.HTTPInternalServerError.code) + # Make sure there is no GW configured + body = self._show('routers', r['router']['id']) + self.assertIsNone(body['router']['external_gateway_info']) + + def test_router_create_with_gw_info_failed(self): + """Fail the GW addition during router creation + and verify rollback was performed + """ + with self.router() as r,\ + self.external_network() as public_net,\ + self.subnet(network=public_net, cidr='12.0.0.0/24', + enable_dhcp=False) as s1,\ + mock.patch("vmware_nsxlib.v3.policy.core_resources." + "NsxPolicyTier1Api.update_route_advertisement", + side_effect=nsxlib_exc.NsxLibException): + # Make sure creation fails + self._add_external_gateway_to_router( + r['router']['id'], + s1['subnet']['network_id'], + expected_code=exc.HTTPInternalServerError.code) + # Make sure there is no GW configured + body = self._show('routers', r['router']['id']) + self.assertIsNone(body['router']['external_gateway_info']) + + def test_create_router_gateway_fails(self): + with self.external_network() as public_net,\ + self.subnet(network=public_net, cidr='12.0.0.0/24', + enable_dhcp=False),\ + mock.patch.object(self.plugin.nsxpolicy.tier1, + "get_edge_cluster_path", + return_value=False),\ + mock.patch.object(self.plugin.nsxpolicy.tier1, + "set_edge_cluster_path", + side_effect=nsxlib_exc.NsxLibException): + data = {'router': { + 'name': 'router1', 'admin_state_up': True, + 'tenant_id': self._tenant_id, + 'external_gateway_info': { + 'network_id': public_net['network']['id']}}} + + self.assertRaises(nsxlib_exc.NsxLibException, + self.plugin.create_router, self.ctx, data) + # Verify router doesn't persist on failure + routers = self.plugin.get_routers(self.ctx) + self.assertEqual(0, len(routers)) + + def test_delete_router_gateway_fails(self): + """Verify that router deletion continues even if gw update fails""" + with self.router() as r,\ + self.external_network() as public_net,\ + self.subnet(network=public_net, cidr='12.0.0.0/24', + enable_dhcp=False) as s1: + self._add_external_gateway_to_router( + r['router']['id'], + s1['subnet']['network_id']) + with mock.patch("vmware_nsxlib.v3.policy.core_resources." + "NsxPolicyTier1Api.update_route_advertisement", + side_effect=nsxlib_exc.NsxLibException): + self._delete('routers', r['router']['id']) + routers = self.plugin.get_routers(self.ctx) + self.assertEqual(0, len(routers))