Fixes linuxbridge agent downs with tap device deletion timing issue

Fixes bug 1136264

Add try/except in deamon loop for preventing agent broken down by
unhandled exception.

Change-Id: I99a4f2b485f134b240630a895378d0f61d6382d8
This commit is contained in:
He Jie Xu 2013-03-05 09:52:01 +08:00
parent 401504541e
commit 5376c174bd
2 changed files with 76 additions and 10 deletions

View File

@ -601,16 +601,24 @@ class LinuxBridgeQuantumAgentRPC(sg_rpc.SecurityGroupAgentRpcMixin):
LOG.info(_("Agent out of sync with plugin!")) LOG.info(_("Agent out of sync with plugin!"))
devices.clear() devices.clear()
sync = False sync = False
device_info = {}
device_info = self.br_mgr.update_devices(devices) try:
device_info = self.br_mgr.update_devices(devices)
# notify plugin about device deltas except Exception:
if device_info: LOG.exception(_("Update devices failed"))
LOG.debug(_("Agent loop has new devices!")) sync = True
# If treat devices fails - indicates must resync with plugin try:
sync = self.process_network_devices(device_info) # notify plugin about device deltas
devices = device_info['current'] if device_info:
LOG.debug(_("Agent loop has new devices!"))
# If treat devices fails - indicates must resync with
# plugin
sync = self.process_network_devices(device_info)
devices = device_info['current']
except Exception:
LOG.exception(_("Error in agent loop. Devices info: %s"),
device_info)
sync = True
# sleep till end of polling interval # sleep till end of polling interval
elapsed = (time.time() - start) elapsed = (time.time() - start)
if (elapsed < self.polling_interval): if (elapsed < self.polling_interval):

View File

@ -14,6 +14,8 @@
# License for the specific language governing permissions and limitations # License for the specific language governing permissions and limitations
# under the License. # under the License.
import contextlib
import mock import mock
from oslo.config import cfg from oslo.config import cfg
import testtools import testtools
@ -52,3 +54,59 @@ class TestLinuxBridge(testtools.TestCase):
result = self.linux_bridge.ensure_physical_in_bridge( result = self.linux_bridge.ensure_physical_in_bridge(
'network_id', 'physnet1', 7) 'network_id', 'physnet1', 7)
self.assertTrue(vlan_bridge_func.called) self.assertTrue(vlan_bridge_func.called)
class TestLinuxBridgeAgent(testtools.TestCase):
def setUp(self):
super(TestLinuxBridgeAgent, self).setUp()
self.lbmgr_patcher = mock.patch('quantum.plugins.linuxbridge.agent.'
'linuxbridge_quantum_agent.'
'LinuxBridgeManager')
self.lbmgr_mock = self.lbmgr_patcher.start()
self.addCleanup(self.lbmgr_patcher.stop)
def test_update_devices_failed(self):
lbmgr_instance = self.lbmgr_mock.return_value
lbmgr_instance.update_devices.side_effect = RuntimeError
agent = linuxbridge_quantum_agent.LinuxBridgeQuantumAgentRPC({},
0,
None)
raise_exception = [0]
def info_mock(msg):
if raise_exception[0] < 2:
raise_exception[0] += 1
else:
raise RuntimeError()
with mock.patch.object(linuxbridge_quantum_agent.LOG, 'info') as log:
log.side_effect = info_mock
with testtools.ExpectedException(RuntimeError):
agent.daemon_loop()
self.assertEqual(3, log.call_count)
def test_process_network_devices_failed(self):
device_info = {'current': [1, 2, 3]}
lbmgr_instance = self.lbmgr_mock.return_value
lbmgr_instance.update_devices.return_value = device_info
agent = linuxbridge_quantum_agent.LinuxBridgeQuantumAgentRPC({},
0,
None)
raise_exception = [0]
def info_mock(msg):
if raise_exception[0] < 2:
raise_exception[0] += 1
else:
raise RuntimeError()
with contextlib.nested(
mock.patch.object(linuxbridge_quantum_agent.LOG, 'info'),
mock.patch.object(agent, 'process_network_devices')
) as (log, process_network_devices):
log.side_effect = info_mock
process_network_devices.side_effect = RuntimeError
with testtools.ExpectedException(RuntimeError):
agent.daemon_loop()
self.assertEqual(3, log.call_count)