From 4f79cb3932f2518ab3f06b86ceea065cbb399e8c Mon Sep 17 00:00:00 2001 From: Julia Kreger Date: Tue, 16 Jan 2018 09:21:37 -0800 Subject: [PATCH] Don't try to lock for vif detach Historically, we did not have a prohibition upon removing a VIF entry stored in the extra field, however the VIF attachment/detachment feature resulted in a task being created which by default attempts to pull a reservation lock unless explicitly shared. This is problematic as part of the process of undeploying a node as exclusive locks are generated. Presently, if any of those locked tasks run long, such as a new image being required or for some crazy reason, the BMC power request hangs for a few minutes, the VIF record may be orphaned and never removed, as the expectation is that nova deletes the VIF record from ironic. This allows the VIF record to be removed when a node is no longer in active use and possibly subject to a lock being held for a long period of time, such as when setting up for CLEANING. Additionally, this patch moves the actual VIF record deletion until after the detachment action in the event that it fails. This allows for the state in ironic to be consistent instead of the record being removed before the detachment occurs. Change-Id: Ib7544e43a2b26441d4f562b584bbc7fee6a11fea Closes-Bug: #1743652 --- ironic/conductor/manager.py | 5 +++++ ironic/drivers/base.py | 4 ++++ ironic/drivers/modules/network/common.py | 4 ++-- ironic/tests/unit/conductor/test_manager.py | 17 +++++++++-------- .../drivers/modules/network/test_common.py | 18 +++++++++++++++++- ...if-detach-locking-fix-7be66f8150e19819.yaml | 8 ++++++++ 6 files changed, 45 insertions(+), 11 deletions(-) create mode 100644 releasenotes/notes/vif-detach-locking-fix-7be66f8150e19819.yaml diff --git a/ironic/conductor/manager.py b/ironic/conductor/manager.py index fde7cabc09..b62ce6d623 100644 --- a/ironic/conductor/manager.py +++ b/ironic/conductor/manager.py @@ -2943,7 +2943,12 @@ class ConductorManager(base_manager.BaseConductorManager): """ LOG.debug("RPC vif_detach called for the node %(node_id)s with " "vif_id %(vif_id)s", {'node_id': node_id, 'vif_id': vif_id}) + # NOTE(TheJulia): This task is explicitly called without a lock as + # long lived locks occur during node tear-down as a node goes into + # cleaning. The lack of a lock allows nova to remove the VIF record. + # See: https://bugs.launchpad.net/ironic/+bug/1743652 with task_manager.acquire(context, node_id, + shared=True, purpose='detach vif') as task: task.driver.network.validate(task) task.driver.network.vif_detach(task, vif_id) diff --git a/ironic/drivers/base.py b/ironic/drivers/base.py index 7412f88c2b..33e77ebf21 100644 --- a/ironic/drivers/base.py +++ b/ironic/drivers/base.py @@ -1031,6 +1031,10 @@ class NetworkInterface(BaseInterface): def vif_detach(self, task, vif_id): """Detach a virtual network interface from a node + Warning: This method is called by the conductor as a shared + task, to ensure that a vif can be detached during a long-lived lock. + Such as those that occur when a node is being unprovisioned. + :param task: A TaskManager instance. :param vif_id: A VIF ID to detach :raises: NetworkError, VifNotAttached diff --git a/ironic/drivers/modules/network/common.py b/ironic/drivers/modules/network/common.py index dd99da1411..5236f45cd8 100644 --- a/ironic/drivers/modules/network/common.py +++ b/ironic/drivers/modules/network/common.py @@ -581,8 +581,8 @@ class NeutronVIFPortIDMixin(VIFPortIDMixin): # attached, and fail if not. port_like_obj = self._get_port_like_obj_by_vif_id(task, vif_id) - self._clear_vif_from_port_like_obj(port_like_obj) - # NOTE(vsaienko) allow to unplug VIFs from ACTIVE instance. if task.node.provision_state == states.ACTIVE: neutron.unbind_neutron_port(vif_id, context=task.context) + + self._clear_vif_from_port_like_obj(port_like_obj) diff --git a/ironic/tests/unit/conductor/test_manager.py b/ironic/tests/unit/conductor/test_manager.py index 59fc87c8c1..feaa32bdf0 100644 --- a/ironic/tests/unit/conductor/test_manager.py +++ b/ironic/tests/unit/conductor/test_manager.py @@ -35,6 +35,7 @@ from ironic.common import boot_devices from ironic.common import driver_factory from ironic.common import exception from ironic.common import images +from ironic.common import neutron from ironic.common import states from ironic.common import swift from ironic.conductor import manager @@ -4344,17 +4345,17 @@ class VifTestCase(mgr_utils.ServiceSetUpMixin, db_base.DbTestCase): mock_detach.assert_called_once_with(mock.ANY, "interface") mock_valid.assert_called_once_with(mock.ANY, mock.ANY) - @mock.patch.object(n_flat.FlatNetwork, 'vif_detach', autpspec=True) - def test_vif_detach_node_locked(self, mock_detach, mock_valid): + @mock.patch.object(neutron, 'unbind_neutron_port', autpspec=True) + def test_vif_detach_node_is_locked(self, mock_detach, mock_valid): node = obj_utils.create_test_node(self.context, driver='fake', reservation='fake-reserv') - exc = self.assertRaises(messaging.rpc.ExpectedException, - self.service.vif_detach, - self.context, node.uuid, "interface") - # Compare true exception hidden by @messaging.expected_exceptions - self.assertEqual(exception.NodeLocked, exc.exc_info[0]) + obj_utils.create_test_port(self.context, + node_id=node.id, + internal_info={ + 'tenant_vif_port_id': 'fake-id'}) + self.service.vif_detach(self.context, node.uuid, 'fake-id') self.assertFalse(mock_detach.called) - self.assertFalse(mock_valid.called) + self.assertTrue(mock_valid.called) @mock.patch.object(n_flat.FlatNetwork, 'vif_detach', autpspec=True) def test_vif_detach_raises_network_error(self, mock_detach, diff --git a/ironic/tests/unit/drivers/modules/network/test_common.py b/ironic/tests/unit/drivers/modules/network/test_common.py index a64fc348fe..cccb2c0166 100644 --- a/ironic/tests/unit/drivers/modules/network/test_common.py +++ b/ironic/tests/unit/drivers/modules/network/test_common.py @@ -941,12 +941,28 @@ class TestNeutronVifPortIDMixin(db_base.DbTestCase): self.node.save() mock_get.return_value = self.port mock_unp.side_effect = exception.NetworkError - with task_manager.acquire(self.context, self.node.id) as task: + with task_manager.acquire(self.context, self.node.id, + shared=True) as task: self.assertRaises(exception.NetworkError, self.interface.vif_detach, task, 'fake_vif_id') mock_unp.assert_called_once_with('fake_vif_id', context=task.context) mock_get.assert_called_once_with(task, 'fake_vif_id') + mock_clear.assert_not_called() + + @mock.patch.object(common.VIFPortIDMixin, '_clear_vif_from_port_like_obj') + @mock.patch.object(neutron_common, 'unbind_neutron_port', autospec=True) + @mock.patch.object(common.VIFPortIDMixin, '_get_port_like_obj_by_vif_id') + def test_vif_detach_deleting_node_success(self, mock_get, mock_unp, + mock_clear): + self.node.provision_state = states.DELETING + self.node.save() + mock_get.return_value = self.port + with task_manager.acquire(self.context, self.node.id, + shared=True) as task: + self.interface.vif_detach(task, 'fake_vif_id') + self.assertFalse(mock_unp.called) + mock_get.assert_called_once_with(task, 'fake_vif_id') mock_clear.assert_called_once_with(self.port) @mock.patch.object(common_utils, 'warn_about_deprecated_extra_vif_port_id', diff --git a/releasenotes/notes/vif-detach-locking-fix-7be66f8150e19819.yaml b/releasenotes/notes/vif-detach-locking-fix-7be66f8150e19819.yaml new file mode 100644 index 0000000000..3a53383c39 --- /dev/null +++ b/releasenotes/notes/vif-detach-locking-fix-7be66f8150e19819.yaml @@ -0,0 +1,8 @@ +--- +fixes: + - | + Addresses a condition where the Compute Service may have been unable to + remove VIF attachment records while a baremetal node is being unprovisiond. This + condition resulted in VIF records being orphaned, blocking future + deployments without manual intervention. + See `bug 1743652 `_ for more details.