Handle racing condition in OFC port deletion

Multiple delete_port operations can run in parallel.
For such case later operation(s) will receive 404 (NotFound)
error from OFC or NotResultFound sqlalchemy exception.
These are valid exceptions and they should be ignored
in delete_port operations.

Closes-Bug: #1281574

OFCConsistencyBroken is renamed to OFCMappingNotFound
because when multiple delete_port operations run in parallel
OFCConsistencyBroken can occur and it is a valid case
so the excepion name looks inappropriate.

Change-Id: I1511d55994c88b8828f0ff62610c18ddc6dfac8f
This commit is contained in:
Akihiro Motoki 2014-02-18 17:01:01 +09:00 committed by Gerrit Code Review
parent cdb1c7a760
commit 5c6808763d
8 changed files with 84 additions and 18 deletions

View File

@ -28,13 +28,18 @@ class OFCException(qexc.NeutronException):
self.err_code = kwargs.get('err_code') self.err_code = kwargs.get('err_code')
class OFCResourceNotFound(qexc.NotFound):
message = _("The specified OFC resource (%(resource)s) is not found.")
class NECDBException(qexc.NeutronException): class NECDBException(qexc.NeutronException):
message = _("An exception occurred in NECPluginV2 DB: %(reason)s") message = _("An exception occurred in NECPluginV2 DB: %(reason)s")
class OFCConsistencyBroken(qexc.NeutronException): class OFCMappingNotFound(qexc.NotFound):
message = _("Consistency of neutron-OFC resource map is broken: " message = _("Neutron-OFC resource mapping for "
"%(reason)s") "%(resource)s %(neutron_id)s is not found. "
"It may be deleted during processing.")
class PortInfoNotFound(qexc.NotFound): class PortInfoNotFound(qexc.NotFound):

View File

@ -95,6 +95,10 @@ class OFCClient(object):
httplib.ACCEPTED, httplib.ACCEPTED,
httplib.NO_CONTENT): httplib.NO_CONTENT):
return data return data
elif res.status == httplib.NOT_FOUND:
LOG.info(_("Specified resource %s does not exist on OFC "),
action)
raise nexc.OFCResourceNotFound(resource=action)
else: else:
LOG.warning(_("Operation on OFC failed: " LOG.warning(_("Operation on OFC failed: "
"status=%(status)s, detail=%(detail)s"), "status=%(status)s, detail=%(detail)s"),

View File

@ -144,9 +144,8 @@ def get_ofc_id_lookup_both(session, resource, neutron_id):
ofc_id = get_ofc_id(session, resource, neutron_id, ofc_id = get_ofc_id(session, resource, neutron_id,
old_style=True) old_style=True)
if not ofc_id: if not ofc_id:
reason = (_("NotFound %(resource)s for neutron_id=%(id)s.") raise nexc.OFCMappingNotFound(resource=resource,
% {'resource': resource, 'id': neutron_id}) neutron_id=neutron_id)
raise nexc.OFCConsistencyBroken(reason=reason)
return ofc_id return ofc_id

View File

@ -187,7 +187,7 @@ class NECPluginV2(db_base_plugin_v2.NeutronDbPluginV2,
else: else:
LOG.debug(_('_cleanup_ofc_tenant: No OFC tenant for %s'), LOG.debug(_('_cleanup_ofc_tenant: No OFC tenant for %s'),
tenant_id) tenant_id)
except (nexc.OFCException, nexc.OFCConsistencyBroken) as exc: except (nexc.OFCException, nexc.OFCMappingNotFound) as exc:
reason = _("delete_ofc_tenant() failed due to %s") % exc reason = _("delete_ofc_tenant() failed due to %s") % exc
LOG.warn(reason) LOG.warn(reason)
@ -223,7 +223,7 @@ class NECPluginV2(db_base_plugin_v2.NeutronDbPluginV2,
try: try:
self.ofc.create_ofc_port(context, port['id'], port) self.ofc.create_ofc_port(context, port['id'], port)
port_status = const.PORT_STATUS_ACTIVE port_status = const.PORT_STATUS_ACTIVE
except (nexc.OFCException, nexc.OFCConsistencyBroken) as exc: except (nexc.OFCException, nexc.OFCMappingNotFound) as exc:
LOG.error(_("create_ofc_port() failed due to %s"), exc) LOG.error(_("create_ofc_port() failed due to %s"), exc)
port_status = const.PORT_STATUS_ERROR port_status = const.PORT_STATUS_ERROR
@ -244,7 +244,23 @@ class NECPluginV2(db_base_plugin_v2.NeutronDbPluginV2,
try: try:
self.ofc.delete_ofc_port(context, port['id'], port) self.ofc.delete_ofc_port(context, port['id'], port)
port_status = const.PORT_STATUS_DOWN port_status = const.PORT_STATUS_DOWN
except (nexc.OFCException, nexc.OFCConsistencyBroken) as exc: except (nexc.OFCResourceNotFound, nexc.OFCMappingNotFound):
# There is a case where multiple delete_port operation are
# running concurrently. For example, delete_port from
# release_dhcp_port and deletion of network owned ports in
# delete_network. In such cases delete_ofc_port may receive
# 404 error from OFC.
# Also there is a case where neutron port is deleted
# between exists_ofc_port and get_ofc_id in delete_ofc_port.
# In this case OFCMappingNotFound is raised.
# These two cases are valid situations.
LOG.info(_("deactivate_port(): OFC port for port=%s is "
"already removed."), port['id'])
# The port is already removed, so there is no need
# to update status in the database.
port['status'] = const.PORT_STATUS_DOWN
return port
except nexc.OFCException as exc:
LOG.error(_("delete_ofc_port() failed due to %s"), exc) LOG.error(_("delete_ofc_port() failed due to %s"), exc)
port_status = const.PORT_STATUS_ERROR port_status = const.PORT_STATUS_ERROR
@ -282,7 +298,7 @@ class NECPluginV2(db_base_plugin_v2.NeutronDbPluginV2,
if not self.ofc.exists_ofc_tenant(context, tenant_id): if not self.ofc.exists_ofc_tenant(context, tenant_id):
self.ofc.create_ofc_tenant(context, tenant_id) self.ofc.create_ofc_tenant(context, tenant_id)
self.ofc.create_ofc_network(context, tenant_id, net_id, net_name) self.ofc.create_ofc_network(context, tenant_id, net_id, net_name)
except (nexc.OFCException, nexc.OFCConsistencyBroken) as exc: except (nexc.OFCException, nexc.OFCMappingNotFound) as exc:
LOG.error(_("Failed to create network id=%(id)s on " LOG.error(_("Failed to create network id=%(id)s on "
"OFC: %(exc)s"), {'id': net_id, 'exc': exc}) "OFC: %(exc)s"), {'id': net_id, 'exc': exc})
network['network']['status'] = const.NET_STATUS_ERROR network['network']['status'] = const.NET_STATUS_ERROR
@ -368,7 +384,7 @@ class NECPluginV2(db_base_plugin_v2.NeutronDbPluginV2,
try: try:
self.ofc.delete_ofc_network(context, id, net_db) self.ofc.delete_ofc_network(context, id, net_db)
except (nexc.OFCException, nexc.OFCConsistencyBroken) as exc: except (nexc.OFCException, nexc.OFCMappingNotFound) as exc:
reason = _("delete_network() failed due to %s") % exc reason = _("delete_network() failed due to %s") % exc
LOG.error(reason) LOG.error(reason)
self._update_resource_status(context, "network", net_db['id'], self._update_resource_status(context, "network", net_db['id'],

View File

@ -127,7 +127,7 @@ class PacketFilterMixin(pf_db.PacketFilterDbMixin):
self.ofc.create_ofc_packet_filter(context, pf_id, self.ofc.create_ofc_packet_filter(context, pf_id,
packet_filter) packet_filter)
pf_status = pf_db.PF_STATUS_ACTIVE pf_status = pf_db.PF_STATUS_ACTIVE
except (nexc.OFCException, nexc.OFCConsistencyBroken) as exc: except (nexc.OFCException, nexc.OFCMappingNotFound) as exc:
LOG.error(_("Failed to create packet_filter id=%(id)s on " LOG.error(_("Failed to create packet_filter id=%(id)s on "
"OFC: %(exc)s"), {'id': pf_id, 'exc': str(exc)}) "OFC: %(exc)s"), {'id': pf_id, 'exc': str(exc)})
pf_status = pf_db.PF_STATUS_ERROR pf_status = pf_db.PF_STATUS_ERROR
@ -153,7 +153,7 @@ class PacketFilterMixin(pf_db.PacketFilterDbMixin):
try: try:
self.ofc.delete_ofc_packet_filter(context, pf_id) self.ofc.delete_ofc_packet_filter(context, pf_id)
pf_status = pf_db.PF_STATUS_DOWN pf_status = pf_db.PF_STATUS_DOWN
except (nexc.OFCException, nexc.OFCConsistencyBroken) as exc: except (nexc.OFCException, nexc.OFCMappingNotFound) as exc:
LOG.error(_("Failed to delete packet_filter id=%(id)s from " LOG.error(_("Failed to delete packet_filter id=%(id)s from "
"OFC: %(exc)s"), {'id': pf_id, 'exc': str(exc)}) "OFC: %(exc)s"), {'id': pf_id, 'exc': str(exc)})
pf_status = pf_db.PF_STATUS_ERROR pf_status = pf_db.PF_STATUS_ERROR

View File

@ -119,7 +119,7 @@ class RouterOpenFlowDriver(RouterDriverBase):
new_status) new_status)
router['status'] = new_status router['status'] = new_status
return router return router
except (nexc.OFCException, nexc.OFCConsistencyBroken) as exc: except (nexc.OFCException, nexc.OFCMappingNotFound) as exc:
with excutils.save_and_reraise_exception(): with excutils.save_and_reraise_exception():
if (isinstance(exc, nexc.OFCException) and if (isinstance(exc, nexc.OFCException) and
exc.status == httplib.CONFLICT): exc.status == httplib.CONFLICT):
@ -151,7 +151,7 @@ class RouterOpenFlowDriver(RouterDriverBase):
self.plugin._update_resource_status( self.plugin._update_resource_status(
context, "router", router_id, new_status) context, "router", router_id, new_status)
new_router['status'] = new_status new_router['status'] = new_status
except (nexc.OFCException, nexc.OFCConsistencyBroken) as exc: except (nexc.OFCException, nexc.OFCMappingNotFound) as exc:
with excutils.save_and_reraise_exception(): with excutils.save_and_reraise_exception():
reason = _("_update_ofc_routes() failed due to %s") % exc reason = _("_update_ofc_routes() failed due to %s") % exc
LOG.error(reason) LOG.error(reason)
@ -164,7 +164,7 @@ class RouterOpenFlowDriver(RouterDriverBase):
def delete_router(self, context, router_id, router): def delete_router(self, context, router_id, router):
try: try:
self.ofc.delete_ofc_router(context, router_id, router) self.ofc.delete_ofc_router(context, router_id, router)
except (nexc.OFCException, nexc.OFCConsistencyBroken) as exc: except (nexc.OFCException, nexc.OFCMappingNotFound) as exc:
with excutils.save_and_reraise_exception(): with excutils.save_and_reraise_exception():
LOG.error(_("delete_router() failed due to %s"), exc) LOG.error(_("delete_router() failed due to %s"), exc)
self.plugin._update_resource_status( self.plugin._update_resource_status(
@ -195,7 +195,7 @@ class RouterOpenFlowDriver(RouterDriverBase):
self.plugin._update_resource_status( self.plugin._update_resource_status(
context, "port", port_id, new_status) context, "port", port_id, new_status)
return port return port
except (nexc.OFCException, nexc.OFCConsistencyBroken) as exc: except (nexc.OFCException, nexc.OFCMappingNotFound) as exc:
with excutils.save_and_reraise_exception(): with excutils.save_and_reraise_exception():
reason = _("add_router_interface() failed due to %s") % exc reason = _("add_router_interface() failed due to %s") % exc
LOG.error(reason) LOG.error(reason)
@ -213,7 +213,7 @@ class RouterOpenFlowDriver(RouterDriverBase):
new_status) new_status)
port['status'] = new_status port['status'] = new_status
return port return port
except (nexc.OFCException, nexc.OFCConsistencyBroken) as exc: except (nexc.OFCException, nexc.OFCMappingNotFound) as exc:
with excutils.save_and_reraise_exception(): with excutils.save_and_reraise_exception():
reason = _("delete_router_interface() failed due to %s") % exc reason = _("delete_router_interface() failed due to %s") % exc
LOG.error(reason) LOG.error(reason)

View File

@ -843,3 +843,40 @@ class TestNecPluginOfcManager(NecPluginV2TestCase):
] ]
self.ofc.assert_has_calls(expected) self.ofc.assert_has_calls(expected)
self.assertEqual(self.ofc.delete_ofc_port.call_count, 2) self.assertEqual(self.ofc.delete_ofc_port.call_count, 2)
def _test_delete_port_for_disappeared_ofc_port(self, raised_exc):
self.ofc.set_raise_exc('delete_ofc_port', raised_exc)
with self.port(no_delete=True) as port:
port_id = port['port']['id']
portinfo = {'id': port_id, 'port_no': 123}
self.rpcapi_update_ports(added=[portinfo])
self._delete('ports', port_id)
# Check the port on neutron db is deleted. NotFound for
# neutron port itself should be handled by called. It is
# consistent with ML2 behavior, but it may need to be
# revisit.
self._show('ports', port_id,
expected_code=webob.exc.HTTPNotFound.code)
ctx = mock.ANY
port = mock.ANY
expected = [
mock.call.exists_ofc_port(ctx, port_id),
mock.call.create_ofc_port(ctx, port_id, port),
mock.call.exists_ofc_port(ctx, port_id),
mock.call.delete_ofc_port(ctx, port_id, port),
]
self.ofc.assert_has_calls(expected)
self.assertEqual(self.ofc.delete_ofc_port.call_count, 1)
def test_delete_port_for_nonexist_ofc_port(self):
self._test_delete_port_for_disappeared_ofc_port(
nexc.OFCResourceNotFound(resource='ofc_port'))
def test_delete_port_for_noofcmap_ofc_port(self):
self._test_delete_port_for_disappeared_ofc_port(
nexc.OFCMappingNotFound(resource='port', neutron_id='port1'))

View File

@ -72,6 +72,11 @@ class OFCClientTest(base.BaseTestCase):
for status in [201, 202, 204]: for status in [201, 202, 204]:
self._test_do_request(status, None, None) self._test_do_request(status, None, None)
def test_do_request_returns_404(self):
resbody = ''
errmsg = _("The specified OFC resource (/somewhere) is not found.")
self._test_do_request(404, resbody, errmsg, nexc.OFCResourceNotFound)
def test_do_request_error_no_body(self): def test_do_request_error_no_body(self):
errmsg = _("An OFC exception has occurred: Operation on OFC failed") errmsg = _("An OFC exception has occurred: Operation on OFC failed")
exc_checks = {'status': 400, 'err_code': None, 'err_msg': None} exc_checks = {'status': 400, 'err_code': None, 'err_msg': None}