From 1f1ee39693cd494ba223ee2eda48a88c48b20b37 Mon Sep 17 00:00:00 2001 From: Vivekanandan Narasimhan Date: Tue, 23 Sep 2014 02:25:16 -0700 Subject: [PATCH] Race for l2pop when ports go up/down on same host With l2pop enabled, race exists in delete_port_postcommit when both create/update_port and delete_port deal with different ports on the same host, where such ports are either the first (or) last on same network for that host. This race happens outside the DB locking zones in the respective methods of ML2 plugin. To fix this, we have moved determination of fdb_entries back to delete_port_postcommit and removed delete_port_precommit altogether from l2pop mechanism driver. In order to accomodate dvr interfaces, we are storing and re-using the mechanism-driver context which hold dvr-port-binding information while invoking delete_port_postcommit. We loop through dvr interface bindings invoking delete_port_postcommit similar to delete_port_precommit. Closes-Bug: #1372438 Change-Id: If0502f57382441fdb4510c81a89794f57a38e696 --- .../plugins/ml2/drivers/l2pop/mech_driver.py | 27 +++++-------------- neutron/plugins/ml2/plugin.py | 14 +++++----- .../unit/ml2/drivers/test_l2population.py | 16 +++++++++++ 3 files changed, 29 insertions(+), 28 deletions(-) diff --git a/neutron/plugins/ml2/drivers/l2pop/mech_driver.py b/neutron/plugins/ml2/drivers/l2pop/mech_driver.py index a98a27fd7d..c85acb1c73 100644 --- a/neutron/plugins/ml2/drivers/l2pop/mech_driver.py +++ b/neutron/plugins/ml2/drivers/l2pop/mech_driver.py @@ -38,32 +38,18 @@ class L2populationMechanismDriver(api.MechanismDriver, LOG.debug(_("Experimental L2 population driver")) self.rpc_ctx = n_context.get_admin_context_without_session() self.migrated_ports = {} - self.remove_fdb_entries = {} def _get_port_fdb_entries(self, port): return [[port['mac_address'], ip['ip_address']] for ip in port['fixed_ips']] - def delete_port_precommit(self, context): - port = context.current - agent_host = context.host - - if port['id'] not in self.remove_fdb_entries: - self.remove_fdb_entries[port['id']] = {} - - self.remove_fdb_entries[port['id']][agent_host] = ( - self._update_port_down(context, port, agent_host, 1)) - def delete_port_postcommit(self, context): port = context.current agent_host = context.host - if port['id'] in self.remove_fdb_entries: - for agent_host in list(self.remove_fdb_entries[port['id']]): - self.L2populationAgentNotify.remove_fdb_entries( - self.rpc_ctx, - self.remove_fdb_entries[port['id']][agent_host]) - self.remove_fdb_entries[port['id']].pop(agent_host, 0) - self.remove_fdb_entries.pop(port['id'], 0) + + fdb_entries = self._update_port_down(context, port, agent_host) + self.L2populationAgentNotify.remove_fdb_entries(self.rpc_ctx, + fdb_entries) def _get_diff_ips(self, orig, port): orig_ips = set([ip['ip_address'] for ip in orig['fixed_ips']]) @@ -260,8 +246,7 @@ class L2populationMechanismDriver(api.MechanismDriver, self.L2populationAgentNotify.add_fdb_entries(self.rpc_ctx, other_fdb_entries) - def _update_port_down(self, context, port, agent_host, - agent_active_ports_count_for_flooding=0): + def _update_port_down(self, context, port, agent_host): port_infos = self._get_port_infos(context, port, agent_host) if not port_infos: return @@ -277,7 +262,7 @@ class L2populationMechanismDriver(api.MechanismDriver, {'segment_id': segment['segmentation_id'], 'network_type': segment['network_type'], 'ports': {agent_ip: []}}} - if agent_active_ports == agent_active_ports_count_for_flooding: + if agent_active_ports == 0: # Agent is removing its last activated port in this network, # other agents needs to be notified to delete their flooding entry. other_fdb_entries[network_id]['ports'][agent_ip].append( diff --git a/neutron/plugins/ml2/plugin.py b/neutron/plugins/ml2/plugin.py index d744f19fe1..72cf151006 100644 --- a/neutron/plugins/ml2/plugin.py +++ b/neutron/plugins/ml2/plugin.py @@ -990,7 +990,7 @@ class Ml2Plugin(db_base_plugin_v2.NeutronDbPluginV2, port = self._make_port_dict(port_db) network = self.get_network(context, port['network_id']) - mech_context = None + bound_mech_contexts = [] device_owner = port['device_owner'] if device_owner == const.DEVICE_OWNER_DVR_INTERFACE: bindings = db.get_dvr_port_bindings(context.session, id) @@ -998,6 +998,7 @@ class Ml2Plugin(db_base_plugin_v2.NeutronDbPluginV2, mech_context = driver_context.DvrPortContext( self, context, port, network, bind) self.mechanism_manager.delete_port_precommit(mech_context) + bound_mech_contexts.append(mech_context) else: mech_context = driver_context.PortContext(self, context, port, network, binding) @@ -1005,6 +1006,7 @@ class Ml2Plugin(db_base_plugin_v2.NeutronDbPluginV2, router_info = l3plugin.dvr_deletens_if_no_port(context, id) removed_routers += router_info self.mechanism_manager.delete_port_precommit(mech_context) + bound_mech_contexts.append(mech_context) self._delete_port_security_group_bindings(context, id) if l3plugin: router_ids = l3plugin.disassociate_floatingips( @@ -1029,12 +1031,10 @@ class Ml2Plugin(db_base_plugin_v2.NeutronDbPluginV2, {'id': router['router_id'], 'agent': router['agent_id']}) try: - # for both normal and DVR Interface ports, only one invocation of - # delete_port_postcommit. We use gather/scatter technique for DVR - # interface ports, where the bindings are gathered in - # delete_port_precommit() call earlier and scattered as l2pop - # rules to cloud nodes in delete_port_postcommit() here - if mech_context: + # Note that DVR Interface ports will have bindings on + # multiple hosts, and so will have multiple mech_contexts, + # while other ports typically have just one. + for mech_context in bound_mech_contexts: self.mechanism_manager.delete_port_postcommit(mech_context) except ml2_exc.MechanismDriverError: # TODO(apech) - One or more mechanism driver failed to diff --git a/neutron/tests/unit/ml2/drivers/test_l2population.py b/neutron/tests/unit/ml2/drivers/test_l2population.py index cfe76d3a57..85d8b37a73 100644 --- a/neutron/tests/unit/ml2/drivers/test_l2population.py +++ b/neutron/tests/unit/ml2/drivers/test_l2population.py @@ -13,6 +13,7 @@ # License for the specific language governing permissions and limitations # under the License. +import contextlib import mock from neutron.common import constants @@ -24,6 +25,7 @@ from neutron.extensions import providernet as pnet from neutron import manager from neutron.openstack.common import timeutils from neutron.plugins.ml2 import config as config +from neutron.plugins.ml2.drivers.l2pop import mech_driver as l2pop_mech_driver from neutron.plugins.ml2 import managers from neutron.plugins.ml2 import rpc from neutron.tests.unit import test_db_plugin as test_plugin @@ -794,3 +796,17 @@ class TestL2PopulationRpcTestCase(test_plugin.NeutronDbPluginV2TestCase): self.mock_fanout.assert_called_with( mock.ANY, expected, topic=self.fanout_topic) + + def test_delete_port_invokes_update_device_down(self): + l2pop_mech = l2pop_mech_driver.L2populationMechanismDriver() + l2pop_mech.L2PopulationAgentNotify = mock.Mock() + l2pop_mech.rpc_ctx = mock.Mock() + with contextlib.nested( + mock.patch.object(l2pop_mech, + '_update_port_down', + return_value=None), + mock.patch.object(l2pop_mech.L2PopulationAgentNotify, + 'remove_fdb_entries')) as (upd_port_down, + rem_fdb_entries): + l2pop_mech.delete_port_postcommit(mock.Mock()) + self.assertTrue(upd_port_down.called) \ No newline at end of file