Merge "L3 Agent restart causes network outage"
This commit is contained in:
commit
f52f70334a
@ -217,8 +217,9 @@ class L3NATAgent(firewall_l3_agent.FWaaSL3AgentRpcCallback, manager.Manager):
|
||||
self.updated_routers = set()
|
||||
self.removed_routers = set()
|
||||
self.sync_progress = False
|
||||
if self.conf.use_namespaces:
|
||||
self._destroy_router_namespaces(self.conf.router_id)
|
||||
|
||||
self._delete_stale_namespaces = (self.conf.use_namespaces and
|
||||
self.conf.router_delete_namespaces)
|
||||
|
||||
self.rpc_loop = loopingcall.FixedIntervalLoopingCall(
|
||||
self._rpc_loop)
|
||||
@ -241,28 +242,46 @@ class L3NATAgent(firewall_l3_agent.FWaaSL3AgentRpcCallback, manager.Manager):
|
||||
LOG.error(msg)
|
||||
raise SystemExit(msg)
|
||||
|
||||
def _destroy_router_namespaces(self, only_router_id=None):
|
||||
"""Destroy router namespaces on the host to eliminate all stale
|
||||
linux devices, iptables rules, and namespaces.
|
||||
def _cleanup_namespaces(self, routers):
|
||||
"""Destroy stale router namespaces on host when L3 agent restarts
|
||||
|
||||
If only_router_id is passed, only destroy single namespace, to allow
|
||||
for multiple l3 agents on the same host, without stepping on each
|
||||
other's toes on init. This only makes sense if only_router_id is set.
|
||||
This routine is called when self._delete_stale_namespaces is True.
|
||||
|
||||
The argument routers is the list of routers that are recorded in
|
||||
the database as being hosted on this node.
|
||||
"""
|
||||
root_ip = ip_lib.IPWrapper(self.root_helper)
|
||||
for ns in root_ip.get_namespaces(self.root_helper):
|
||||
if ns.startswith(NS_PREFIX):
|
||||
router_id = ns[len(NS_PREFIX):]
|
||||
if only_router_id and not only_router_id == router_id:
|
||||
continue
|
||||
try:
|
||||
root_ip = ip_lib.IPWrapper(self.root_helper)
|
||||
|
||||
if self.conf.enable_metadata_proxy:
|
||||
self._destroy_metadata_proxy(router_id, ns)
|
||||
host_namespaces = root_ip.get_namespaces(self.root_helper)
|
||||
router_namespaces = set(ns for ns in host_namespaces
|
||||
if ns.startswith(NS_PREFIX))
|
||||
ns_to_ignore = set(NS_PREFIX + r['id'] for r in routers)
|
||||
ns_to_destroy = router_namespaces - ns_to_ignore
|
||||
except RuntimeError:
|
||||
LOG.exception(_('RuntimeError in obtaining router list '
|
||||
'for namespace cleanup.'))
|
||||
else:
|
||||
self._destroy_stale_router_namespaces(ns_to_destroy)
|
||||
|
||||
try:
|
||||
self._destroy_router_namespace(ns)
|
||||
except Exception:
|
||||
LOG.exception(_("Failed deleting namespace '%s'"), ns)
|
||||
def _destroy_stale_router_namespaces(self, router_namespaces):
|
||||
"""Destroys the stale router namespaces
|
||||
|
||||
The argumenet router_namespaces is a list of stale router namespaces
|
||||
|
||||
As some stale router namespaces may not be able to be deleted, only
|
||||
one attempt will be made to delete them.
|
||||
"""
|
||||
for ns in router_namespaces:
|
||||
if self.conf.enable_metadata_proxy:
|
||||
self._destroy_metadata_proxy(ns[len(NS_PREFIX):], ns)
|
||||
|
||||
try:
|
||||
self._destroy_router_namespace(ns)
|
||||
except RuntimeError:
|
||||
LOG.exception(_('Failed to destroy stale router namespace '
|
||||
'%s'), ns)
|
||||
self._delete_stale_namespaces = False
|
||||
|
||||
def _destroy_router_namespace(self, namespace):
|
||||
ns_ip = ip_lib.IPWrapper(self.root_helper, namespace=namespace)
|
||||
@ -769,10 +788,19 @@ class L3NATAgent(firewall_l3_agent.FWaaSL3AgentRpcCallback, manager.Manager):
|
||||
self._process_routers(routers, all_routers=True)
|
||||
self.fullsync = False
|
||||
LOG.debug(_("_sync_routers_task successfully completed"))
|
||||
except rpc_common.RPCException:
|
||||
LOG.exception(_("Failed synchronizing routers due to RPC error"))
|
||||
self.fullsync = True
|
||||
return
|
||||
except Exception:
|
||||
LOG.exception(_("Failed synchronizing routers"))
|
||||
self.fullsync = True
|
||||
|
||||
# Resync is not necessary for the cleanup of stale
|
||||
# namespaces.
|
||||
if self._delete_stale_namespaces:
|
||||
self._cleanup_namespaces(routers)
|
||||
|
||||
def after_start(self):
|
||||
LOG.info(_("L3 agent started"))
|
||||
|
||||
|
@ -780,55 +780,6 @@ class TestBasicRouterOperations(base.BaseTestCase):
|
||||
agent._process_router_delete()
|
||||
self.assertFalse(list(agent.removed_routers))
|
||||
|
||||
def test_destroy_namespace(self):
|
||||
|
||||
class FakeDev(object):
|
||||
def __init__(self, name):
|
||||
self.name = name
|
||||
|
||||
self.mock_ip.get_namespaces.return_value = ['qrouter-foo',
|
||||
'qrouter-bar']
|
||||
self.mock_ip.get_devices.return_value = [FakeDev('qr-aaaa'),
|
||||
FakeDev('qgw-aaaa')]
|
||||
|
||||
agent = l3_agent.L3NATAgent(HOSTNAME, self.conf)
|
||||
|
||||
pm = self.external_process.return_value
|
||||
pm.reset_mock()
|
||||
|
||||
agent._destroy_router_namespace = mock.MagicMock()
|
||||
agent._destroy_router_namespaces()
|
||||
|
||||
self.assertEqual(pm.disable.call_count, 2)
|
||||
|
||||
self.assertEqual(agent._destroy_router_namespace.call_count, 2)
|
||||
|
||||
def test_destroy_namespace_with_router_id(self):
|
||||
|
||||
class FakeDev(object):
|
||||
def __init__(self, name):
|
||||
self.name = name
|
||||
|
||||
self.conf.router_id = _uuid()
|
||||
|
||||
namespaces = ['qrouter-foo', 'qrouter-' + self.conf.router_id]
|
||||
|
||||
self.mock_ip.get_namespaces.return_value = namespaces
|
||||
self.mock_ip.get_devices.return_value = [FakeDev('qr-aaaa'),
|
||||
FakeDev('qgw-aaaa')]
|
||||
|
||||
agent = l3_agent.L3NATAgent(HOSTNAME, self.conf)
|
||||
|
||||
pm = self.external_process.return_value
|
||||
pm.reset_mock()
|
||||
|
||||
agent._destroy_router_namespace = mock.MagicMock()
|
||||
agent._destroy_router_namespaces(self.conf.router_id)
|
||||
|
||||
self.assertEqual(pm.disable.call_count, 1)
|
||||
|
||||
self.assertEqual(agent._destroy_router_namespace.call_count, 1)
|
||||
|
||||
def test_destroy_router_namespace_skips_ns_removal(self):
|
||||
agent = l3_agent.L3NATAgent(HOSTNAME, self.conf)
|
||||
agent._destroy_router_namespace("fakens")
|
||||
@ -890,6 +841,7 @@ class TestBasicRouterOperations(base.BaseTestCase):
|
||||
self.conf.set_override('router_id', '1234')
|
||||
agent = l3_agent.L3NATAgent(HOSTNAME, self.conf)
|
||||
self.assertEqual(['1234'], agent._router_ids())
|
||||
self.assertFalse(agent._delete_stale_namespaces)
|
||||
|
||||
def test_process_routers_with_no_ext_net_in_conf(self):
|
||||
agent = l3_agent.L3NATAgent(HOSTNAME, self.conf)
|
||||
@ -988,6 +940,69 @@ class TestBasicRouterOperations(base.BaseTestCase):
|
||||
'-p tcp -m tcp --dport 8775 -j ACCEPT')
|
||||
self.assertEqual([rules], agent.metadata_filter_rules())
|
||||
|
||||
def _cleanup_namespace_test(self,
|
||||
stale_namespace_list,
|
||||
router_list,
|
||||
other_namespaces):
|
||||
self.conf.set_override('router_delete_namespaces', True)
|
||||
|
||||
good_namespace_list = [l3_agent.NS_PREFIX + r['id']
|
||||
for r in router_list]
|
||||
self.mock_ip.get_namespaces.return_value = (stale_namespace_list +
|
||||
good_namespace_list +
|
||||
other_namespaces)
|
||||
|
||||
agent = l3_agent.L3NATAgent(HOSTNAME, self.conf)
|
||||
|
||||
self.assertTrue(agent._delete_stale_namespaces)
|
||||
|
||||
pm = self.external_process.return_value
|
||||
pm.reset_mock()
|
||||
|
||||
agent._destroy_router_namespace = mock.MagicMock()
|
||||
agent._cleanup_namespaces(router_list)
|
||||
|
||||
self.assertEqual(pm.disable.call_count, len(stale_namespace_list))
|
||||
self.assertEqual(agent._destroy_router_namespace.call_count,
|
||||
len(stale_namespace_list))
|
||||
expected_args = [mock.call(ns) for ns in stale_namespace_list]
|
||||
agent._destroy_router_namespace.assert_has_calls(expected_args,
|
||||
any_order=True)
|
||||
self.assertFalse(agent._delete_stale_namespaces)
|
||||
|
||||
def test_cleanup_namespace(self):
|
||||
self.conf.set_override('router_id', None)
|
||||
stale_namespaces = [l3_agent.NS_PREFIX + 'foo',
|
||||
l3_agent.NS_PREFIX + 'bar']
|
||||
other_namespaces = ['unknown']
|
||||
|
||||
self._cleanup_namespace_test(stale_namespaces,
|
||||
[],
|
||||
other_namespaces)
|
||||
|
||||
def test_cleanup_namespace_with_registered_router_ids(self):
|
||||
self.conf.set_override('router_id', None)
|
||||
stale_namespaces = [l3_agent.NS_PREFIX + 'cccc',
|
||||
l3_agent.NS_PREFIX + 'eeeee']
|
||||
router_list = [{'id': 'foo'}, {'id': 'aaaa'}]
|
||||
other_namespaces = ['qdhcp-aabbcc', 'unknown']
|
||||
|
||||
self._cleanup_namespace_test(stale_namespaces,
|
||||
router_list,
|
||||
other_namespaces)
|
||||
|
||||
def test_cleanup_namespace_with_conf_router_id(self):
|
||||
self.conf.set_override('router_id', 'bbbbb')
|
||||
stale_namespaces = [l3_agent.NS_PREFIX + 'cccc',
|
||||
l3_agent.NS_PREFIX + 'eeeee',
|
||||
l3_agent.NS_PREFIX + self.conf.router_id]
|
||||
router_list = [{'id': 'foo'}, {'id': 'aaaa'}]
|
||||
other_namespaces = ['qdhcp-aabbcc', 'unknown']
|
||||
|
||||
self._cleanup_namespace_test(stale_namespaces,
|
||||
router_list,
|
||||
other_namespaces)
|
||||
|
||||
|
||||
class TestL3AgentEventHandler(base.BaseTestCase):
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user