From cbae86a958322c20cc0a48b5c0d41ce81afdddb5 Mon Sep 17 00:00:00 2001 From: Aaron Rosen Date: Sat, 16 Feb 2013 16:26:14 -0800 Subject: [PATCH] Improve error handling when nvp and quantum are out of sync Previouly when nvp and quantum were out of sync an exception would be raised notifying the user of this. This patch changes the behavior so that now when elements are found in the quantum db but not in NVP they are put into error state. In addition there error state elements are now able to be deleted if not found in nvp. This patch also removes outdated doc strings that seem to keep being adding. Fixes bug 1127643 Change-Id: I3e79ab7a284e61623cd63fefc3755746b8e4dfd7 --- .../nicira/nicira_nvp_plugin/QuantumPlugin.py | 133 ++++++++++-------- .../nicira_nvp_plugin/common/exceptions.py | 4 - .../nicira/nicira_nvp_plugin/nvplib.py | 8 +- .../tests/unit/nicira/fake_nvpapiclient.py | 9 +- .../tests/unit/nicira/test_nicira_plugin.py | 103 ++++++++++++++ 5 files changed, 191 insertions(+), 66 deletions(-) diff --git a/quantum/plugins/nicira/nicira_nvp_plugin/QuantumPlugin.py b/quantum/plugins/nicira/nicira_nvp_plugin/QuantumPlugin.py index a8e2de004c..93d2614934 100644 --- a/quantum/plugins/nicira/nicira_nvp_plugin/QuantumPlugin.py +++ b/quantum/plugins/nicira/nicira_nvp_plugin/QuantumPlugin.py @@ -397,13 +397,17 @@ class NvpPluginV2(db_base_plugin_v2.QuantumDbPluginV2, # TODO(bgh): if this is a bridged network and the lswitch we just got # back will have zero ports after the delete we should garbage collect # the lswitch. - nvplib.delete_port(self.default_cluster, - port_data['network_id'], - port) - LOG.debug(_("_nvp_delete_port completed for port %(port_id)s " - "on network %(net_id)s"), - {'port_id': port_data['id'], - 'net_id': port_data['network_id']}) + try: + nvplib.delete_port(self.default_cluster, + port_data['network_id'], + port) + LOG.debug(_("_nvp_delete_port completed for port %(port_id)s " + "on network %(net_id)s"), + {'port_id': port_data['id'], + 'net_id': port_data['network_id']}) + + except q_exc.NotFound: + LOG.warning(_("port %s not found in NVP"), port_data['id']) def _nvp_delete_router_port(self, context, port_data): # Delete logical router port @@ -768,14 +772,6 @@ class NvpPluginV2(db_base_plugin_v2.QuantumDbPluginV2, return new_net def delete_network(self, context, id): - """ - Deletes the network with the specified network identifier - belonging to the specified tenant. - - :returns: None - :raises: exception.NetworkInUse - :raises: exception.NetworkNotFound - """ external = self._network_is_external(context, id) # Before deleting ports, ensure the peer of a NVP logical # port with a patch attachment is removed too @@ -812,14 +808,17 @@ class NvpPluginV2(db_base_plugin_v2.QuantumDbPluginV2, # Do not go to NVP for external networks if not external: - # FIXME(salvatore-orlando): Failures here might lead NVP - # and quantum state to diverge - pairs = self._get_lswitch_cluster_pairs(id, context.tenant_id) - for (cluster, switches) in pairs: - nvplib.delete_networks(cluster, id, switches) + try: + # FIXME(salvatore-orlando): Failures here might lead NVP + # and quantum state to diverge + pairs = self._get_lswitch_cluster_pairs(id, context.tenant_id) + for (cluster, switches) in pairs: + nvplib.delete_networks(cluster, id, switches) - LOG.debug(_("delete_network completed for tenant: %s"), - context.tenant_id) + LOG.debug(_("delete_network completed for tenant: %s"), + context.tenant_id) + except q_exc.NotFound: + LOG.warning(_("Did not found lswitch %s in NVP"), id) def _get_lswitch_cluster_pairs(self, netw_id, tenant_id): """Figure out the set of lswitches on each cluster that maps to this @@ -872,6 +871,8 @@ class NvpPluginV2(db_base_plugin_v2.QuantumDbPluginV2, if nvp_net_status != network.status: # update the network status network.status = nvp_net_status + except q_exc.NotFound: + network.status = constants.NET_STATUS_ERROR except Exception: err_msg = _("Unable to get logical switches") LOG.exception(err_msg) @@ -933,14 +934,17 @@ class NvpPluginV2(db_base_plugin_v2.QuantumDbPluginV2, if quantum_lswitch[l3.EXTERNAL]: continue elif quantum_lswitch['id'] not in nvp_lswitches: - raise nvp_exc.NvpOutOfSyncException() - # TODO(salvatore-orlando): be careful about "extended" - # logical switches - ls = nvp_lswitches.pop(quantum_lswitch['id']) - if (ls["_relations"]["LogicalSwitchStatus"]["fabric_status"]): - quantum_lswitch["status"] = constants.NET_STATUS_ACTIVE + LOG.warning(_("Logical Switch %s found in quantum database " + "but not in NVP."), quantum_lswitch["id"]) + quantum_lswitch["status"] = constants.NET_STATUS_ERROR else: - quantum_lswitch["status"] = constants.NET_STATUS_DOWN + # TODO(salvatore-orlando): be careful about "extended" + # logical switches + ls = nvp_lswitches.pop(quantum_lswitch['id']) + if (ls["_relations"]["LogicalSwitchStatus"]["fabric_status"]): + quantum_lswitch["status"] = constants.NET_STATUS_ACTIVE + else: + quantum_lswitch["status"] = constants.NET_STATUS_DOWN # do not make the case in which switches are found in NVP # but not in Quantum catastrophic. @@ -1030,7 +1034,12 @@ class NvpPluginV2(db_base_plugin_v2.QuantumDbPluginV2, "&relations=LogicalPortStatus" % (lswitch, lport_fields_str, vm_filter, tenant_filter)) - ports = nvplib.get_all_query_pages(lport_query_path, c) + try: + ports = nvplib.get_all_query_pages(lport_query_path, c) + except q_exc.NotFound: + LOG.warn(_("Lswitch %s not found in NVP"), lswitch) + ports = None + if ports: for port in ports: for tag in port["tags"]: @@ -1063,12 +1072,12 @@ class NvpPluginV2(db_base_plugin_v2.QuantumDbPluginV2, quantum_lport["status"] = constants.PORT_STATUS_DOWN del nvp_lports[quantum_lport["id"]] - lports.append(quantum_lport) except KeyError: - + quantum_lport["status"] = constants.PORT_STATUS_ERROR LOG.debug(_("Quantum logical port %s was not found on NVP"), quantum_lport['id']) + lports.append(quantum_lport) # do not make the case in which ports are found in NVP # but not in Quantum catastrophic. if len(nvp_lports): @@ -1282,13 +1291,18 @@ class NvpPluginV2(db_base_plugin_v2.QuantumDbPluginV2, nvp_id = nicira_db.get_nvp_port_id(context.session, id) #TODO: pass the appropriate cluster here - port = nvplib.get_logical_port_status( - self.default_cluster, quantum_db_port['network_id'], nvp_id) - quantum_db_port["admin_state_up"] = port["admin_status_enabled"] - if port["fabric_status_up"]: - quantum_db_port["status"] = constants.PORT_STATUS_ACTIVE - else: - quantum_db_port["status"] = constants.PORT_STATUS_DOWN + try: + port = nvplib.get_logical_port_status( + self.default_cluster, quantum_db_port['network_id'], + nvp_id) + quantum_db_port["admin_state_up"] = ( + port["admin_status_enabled"]) + if port["fabric_status_up"]: + quantum_db_port["status"] = constants.PORT_STATUS_ACTIVE + else: + quantum_db_port["status"] = constants.PORT_STATUS_DOWN + except q_exc.NotFound: + quantum_db_port["status"] = constants.PORT_STATUS_ERROR return quantum_db_port def create_router(self, context, router): @@ -1393,13 +1407,12 @@ class NvpPluginV2(db_base_plugin_v2.QuantumDbPluginV2, # together with the resource try: nvplib.delete_lrouter(self.default_cluster, id) - except NvpApiClient.ResourceNotFound: - raise nvp_exc.NvpPluginException( - err_msg=(_("Logical router '%s' not found " - "on NVP Platform") % id)) + except q_exc.NotFound: + LOG.warning(_("Logical router '%s' not found " + "on NVP Platform") % id) except NvpApiClient.NvpApiException: raise nvp_exc.NvpPluginException( - err_msg=(_("Unable to update logical router" + err_msg=(_("Unable to delete logical router" "on NVP Platform"))) def get_router(self, context, id, fields=None): @@ -1408,23 +1421,26 @@ class NvpPluginV2(db_base_plugin_v2.QuantumDbPluginV2, # FIXME(salvatore-orlando): We need to # find the appropriate cluster! cluster = self.default_cluster - lrouter = nvplib.get_lrouter(cluster, id) - router_op_status = constants.NET_STATUS_DOWN + try: + lrouter = nvplib.get_lrouter(cluster, id) + except q_exc.NotFound: + lrouter = {} + router_op_status = constants.NET_STATUS_ERROR relations = lrouter.get('_relations') if relations: lrouter_status = relations.get('LogicalRouterStatus') - # FIXME(salvatore-orlando): Being unable to fetch the - # logical router status should be an exception. - if lrouter_status: - router_op_status = (lrouter_status.get('fabric_status') - and constants.NET_STATUS_ACTIVE or - constants.NET_STATUS_DOWN) - LOG.debug(_("Current router status:%(router_status)s;" - "Status in Quantum DB:%(db_router_status)s"), - {'router_status': router_op_status, - 'db_router_status': router.status}) + # FIXME(salvatore-orlando): Being unable to fetch the + # logical router status should be an exception. + if lrouter_status: + router_op_status = (lrouter_status.get('fabric_status') + and constants.NET_STATUS_ACTIVE or + constants.NET_STATUS_DOWN) if router_op_status != router.status: - # update the network status + LOG.debug(_("Current router status:%(router_status)s;" + "Status in Quantum DB:%(db_router_status)s"), + {'router_status': router_op_status, + 'db_router_status': router.status}) + # update the router status with context.session.begin(subtransactions=True): router.status = router_op_status except NvpApiClient.NvpApiException: @@ -1467,6 +1483,8 @@ class NvpPluginV2(db_base_plugin_v2.QuantumDbPluginV2, else: router.status = constants.NET_STATUS_DOWN nvp_lrouters.remove(nvp_lrouter) + else: + router.status = constants.NET_STATUS_ERROR # do not make the case in which routers are found in NVP # but not in Quantum catastrophic. @@ -1655,7 +1673,6 @@ class NvpPluginV2(db_base_plugin_v2.QuantumDbPluginV2, Overrides method from base class. The method is augmented for creating NAT rules in the process. - """ if (('fixed_ip_address' in fip and fip['fixed_ip_address']) and not ('port_id' in fip and fip['port_id'])): diff --git a/quantum/plugins/nicira/nicira_nvp_plugin/common/exceptions.py b/quantum/plugins/nicira/nicira_nvp_plugin/common/exceptions.py index caad357eb5..64d365fa4c 100644 --- a/quantum/plugins/nicira/nicira_nvp_plugin/common/exceptions.py +++ b/quantum/plugins/nicira/nicira_nvp_plugin/common/exceptions.py @@ -38,10 +38,6 @@ class NvpNoMorePortsException(NvpPluginException): "Maximum number of ports reached") -class NvpOutOfSyncException(NvpPluginException): - message = _("Quantum state has diverged from the networking backend!") - - class NvpNatRuleMismatch(NvpPluginException): message = _("While retrieving NAT rules, %(actual_rules)s were found " "whereas rules in the (%(min_rules)s,%(max_rules)s) interval " diff --git a/quantum/plugins/nicira/nicira_nvp_plugin/nvplib.py b/quantum/plugins/nicira/nicira_nvp_plugin/nvplib.py index 2732d6b0bc..7198929d9e 100644 --- a/quantum/plugins/nicira/nicira_nvp_plugin/nvplib.py +++ b/quantum/plugins/nicira/nicira_nvp_plugin/nvplib.py @@ -187,7 +187,11 @@ def do_single_request(*args, **kwargs): """Issue a request to a specified cluster if specified via kwargs (cluster=).""" cluster = kwargs["cluster"] - return cluster.api_client.request(*args) + try: + req = cluster.api_client.request(*args) + except NvpApiClient.ResourceNotFound: + raise exception.NotFound() + return req def do_multi_request(*args, **kwargs): @@ -254,6 +258,8 @@ def get_lswitches(cluster, quantum_net_id): cluster) results.extend(extra_switches) return results + except NvpApiClient.ResourceNotFound: + raise exception.NetworkNotFound(net_id=quantum_net_id) except NvpApiClient.NvpApiException: # TODO(salvatore-olrando): Do a better exception handling # and re-raising diff --git a/quantum/tests/unit/nicira/fake_nvpapiclient.py b/quantum/tests/unit/nicira/fake_nvpapiclient.py index bbbae9f15e..86e5b10798 100644 --- a/quantum/tests/unit/nicira/fake_nvpapiclient.py +++ b/quantum/tests/unit/nicira/fake_nvpapiclient.py @@ -19,6 +19,7 @@ import urlparse from quantum.openstack.common import log as logging from quantum.openstack.common import uuidutils +from quantum.plugins.nicira.nicira_nvp_plugin import NvpApiClient LOG = logging.getLogger(__name__) @@ -374,8 +375,7 @@ class FakeClient: for res_uuid in res_dict if res_uuid == target_uuid] if items: return json.dumps(items[0]) - raise Exception("show: resource %s:%s not found" % - (resource_type, target_uuid)) + raise NvpApiClient.ResourceNotFound() def handle_get(self, url): #TODO(salvatore-orlando): handle field selection @@ -483,7 +483,10 @@ class FakeClient: if not response_file: raise Exception("resource not found") res_dict = getattr(self, '_fake_%s_dict' % res_type) - del res_dict[uuids[-1]] + try: + del res_dict[uuids[-1]] + except KeyError: + raise NvpApiClient.ResourceNotFound() return "" def fake_request(self, *args, **kwargs): diff --git a/quantum/tests/unit/nicira/test_nicira_plugin.py b/quantum/tests/unit/nicira/test_nicira_plugin.py index 261120fc42..f96b4bfe1a 100644 --- a/quantum/tests/unit/nicira/test_nicira_plugin.py +++ b/quantum/tests/unit/nicira/test_nicira_plugin.py @@ -21,6 +21,7 @@ import mock from oslo.config import cfg import webob.exc +from quantum.common import constants import quantum.common.test_lib as test_lib from quantum import context from quantum.extensions import providernet as pnet @@ -490,3 +491,105 @@ class TestNiciraQoSQueue(NiciraPluginV2TestCase): res = req.get_response(self.ext_api) queue = self.deserialize('json', res) self.assertEqual(queue['qos_queue']['max'], 20) + + +class NiciraQuantumNVPOutOfSync(test_l3_plugin.L3NatTestCaseBase, + NiciraPluginV2TestCase): + + def test_delete_network_not_in_nvp(self): + res = self._create_network('json', 'net1', True) + net1 = self.deserialize('json', res) + self.fc._fake_lswitch_dict.clear() + req = self.new_delete_request('networks', net1['network']['id']) + res = req.get_response(self.api) + self.assertEqual(res.status_int, 204) + + def test_list_networks_not_in_nvp(self): + res = self._create_network('json', 'net1', True) + self.deserialize('json', res) + self.fc._fake_lswitch_dict.clear() + req = self.new_list_request('networks') + nets = self.deserialize('json', req.get_response(self.api)) + self.assertEquals(nets['networks'][0]['status'], + constants.NET_STATUS_ERROR) + + def test_show_network_not_in_nvp(self): + res = self._create_network('json', 'net1', True) + net = self.deserialize('json', res) + self.fc._fake_lswitch_dict.clear() + req = self.new_show_request('networks', net['network']['id']) + net = self.deserialize('json', req.get_response(self.api)) + self.assertEquals(net['network']['status'], + constants.NET_STATUS_ERROR) + + def test_delete_port_not_in_nvp(self): + res = self._create_network('json', 'net1', True) + net1 = self.deserialize('json', res) + res = self._create_port('json', net1['network']['id']) + port = self.deserialize('json', res) + self.fc._fake_lswitch_lport_dict.clear() + req = self.new_delete_request('ports', port['port']['id']) + res = req.get_response(self.api) + self.assertEqual(res.status_int, 204) + + def test_list_port_not_in_nvp(self): + res = self._create_network('json', 'net1', True) + net1 = self.deserialize('json', res) + res = self._create_port('json', net1['network']['id']) + self.deserialize('json', res) + self.fc._fake_lswitch_lport_dict.clear() + req = self.new_list_request('ports') + nets = self.deserialize('json', req.get_response(self.api)) + self.assertEquals(nets['ports'][0]['status'], + constants.PORT_STATUS_ERROR) + + def test_show_port_not_in_nvp(self): + res = self._create_network('json', 'net1', True) + net1 = self.deserialize('json', res) + res = self._create_port('json', net1['network']['id']) + port = self.deserialize('json', res) + self.fc._fake_lswitch_lport_dict.clear() + req = self.new_show_request('ports', port['port']['id']) + net = self.deserialize('json', req.get_response(self.api)) + self.assertEquals(net['port']['status'], + constants.PORT_STATUS_ERROR) + + def test_delete_port_and_network_not_in_nvp(self): + res = self._create_network('json', 'net1', True) + net1 = self.deserialize('json', res) + res = self._create_port('json', net1['network']['id']) + port = self.deserialize('json', res) + self.fc._fake_lswitch_dict.clear() + self.fc._fake_lswitch_lport_dict.clear() + req = self.new_delete_request('ports', port['port']['id']) + res = req.get_response(self.api) + self.assertEqual(res.status_int, 204) + req = self.new_delete_request('networks', net1['network']['id']) + res = req.get_response(self.api) + self.assertEqual(res.status_int, 204) + + def test_delete_router_not_in_nvp(self): + res = self._create_router('json', 'tenant') + router = self.deserialize('json', res) + self.fc._fake_lrouter_dict.clear() + req = self.new_delete_request('routers', router['router']['id']) + res = req.get_response(self.ext_api) + self.assertEqual(res.status_int, 204) + + def test_list_routers_not_in_nvp(self): + res = self._create_router('json', 'tenant') + self.deserialize('json', res) + self.fc._fake_lrouter_dict.clear() + req = self.new_list_request('routers') + routers = self.deserialize('json', req.get_response(self.ext_api)) + self.assertEquals(routers['routers'][0]['status'], + constants.NET_STATUS_ERROR) + + def test_show_router_not_in_nvp(self): + res = self._create_router('json', 'tenant') + router = self.deserialize('json', res) + self.fc._fake_lrouter_dict.clear() + req = self.new_show_request('routers', router['router']['id']) + router = self.deserialize('json', req.get_response(self.ext_api)) + self.assertEquals(router['router']['status'], + constants.NET_STATUS_ERROR)