Add periodic check resync check to DHCP agent
fixes bug 1047605 This patch adds a periodic resync check to the DHCP agent that will resync state with the Quantum server if any notification or rpc errors have occurred since the last check. Change-Id: I879662ff44f2991cd2ff07062bb4e770a2981528
This commit is contained in:
parent
a79e8c465d
commit
fb100db4fc
@ -7,6 +7,10 @@
|
||||
# devstack installation.
|
||||
state_path = /opt/stack/data
|
||||
|
||||
# The DHCP agent will resync its state with Quantum to recover from any
|
||||
# transient notification or rpc errors. The interval is number of
|
||||
# seconds between attempts.
|
||||
# resync_interval = 30
|
||||
|
||||
# The DHCP requires that an inteface driver be set. Choose the one that best
|
||||
# matches you plugin.
|
||||
|
@ -46,6 +46,7 @@ NS_PREFIX = 'qdhcp-'
|
||||
class DhcpAgent(object):
|
||||
OPTS = [
|
||||
cfg.StrOpt('root_helper', default='sudo'),
|
||||
cfg.IntOpt('resync_interval', default=30),
|
||||
cfg.StrOpt('dhcp_driver',
|
||||
default='quantum.agent.linux.dhcp.Dnsmasq',
|
||||
help="The driver used to manage the DHCP server."),
|
||||
@ -54,6 +55,7 @@ class DhcpAgent(object):
|
||||
]
|
||||
|
||||
def __init__(self, conf):
|
||||
self.needs_resync = False
|
||||
self.conf = conf
|
||||
self.cache = NetworkCache()
|
||||
|
||||
@ -67,10 +69,8 @@ class DhcpAgent(object):
|
||||
|
||||
def run(self):
|
||||
"""Activate the DHCP agent."""
|
||||
# enable DHCP for current networks
|
||||
for network_id in self.plugin_rpc.get_active_networks():
|
||||
self.enable_dhcp_helper(network_id)
|
||||
|
||||
self.sync_state()
|
||||
self.periodic_resync()
|
||||
self.lease_relay.start()
|
||||
self.notifications.run_dispatch(self)
|
||||
|
||||
@ -92,17 +92,51 @@ class DhcpAgent(object):
|
||||
return True
|
||||
|
||||
except Exception, e:
|
||||
self.needs_resync = True
|
||||
LOG.exception('Unable to %s dhcp.' % action)
|
||||
|
||||
def update_lease(self, network_id, ip_address, time_remaining):
|
||||
self.plugin_rpc.update_lease_expiration(network_id, ip_address,
|
||||
time_remaining)
|
||||
try:
|
||||
self.plugin_rpc.update_lease_expiration(network_id, ip_address,
|
||||
time_remaining)
|
||||
except:
|
||||
self.needs_resync = True
|
||||
LOG.exception(_('Unable to update lease'))
|
||||
|
||||
def sync_state(self):
|
||||
"""Sync the local DHCP state with Quantum."""
|
||||
LOG.info(_('Synchronizing state'))
|
||||
known_networks = set(self.cache.get_network_ids())
|
||||
|
||||
try:
|
||||
active_networks = set(self.plugin_rpc.get_active_networks())
|
||||
for deleted_id in known_networks - active_networks:
|
||||
self.disable_dhcp_helper(deleted_id)
|
||||
|
||||
for network_id in active_networks:
|
||||
self.refresh_dhcp_helper(network_id)
|
||||
except:
|
||||
self.needs_resync = True
|
||||
LOG.exception(_('Unable to sync network state.'))
|
||||
|
||||
def _periodic_resync_helper(self):
|
||||
"""Resync the dhcp state at the configured interval."""
|
||||
while True:
|
||||
eventlet.sleep(self.conf.resync_interval)
|
||||
if self.needs_resync:
|
||||
self.needs_resync = False
|
||||
self.sync_state()
|
||||
|
||||
def periodic_resync(self):
|
||||
"""Spawn a thread to periodically resync the dhcp state."""
|
||||
eventlet.spawn(self._periodic_resync_helper)
|
||||
|
||||
def enable_dhcp_helper(self, network_id):
|
||||
"""Enable DHCP for a network that meets enabling criteria."""
|
||||
try:
|
||||
network = self.plugin_rpc.get_network_info(network_id)
|
||||
except:
|
||||
self.needs_resync = True
|
||||
LOG.exception(_('Network %s RPC info call failed.') % network_id)
|
||||
return
|
||||
|
||||
@ -119,8 +153,8 @@ class DhcpAgent(object):
|
||||
"""Disable DHCP for a network known to the agent."""
|
||||
network = self.cache.get_network_by_id(network_id)
|
||||
if network:
|
||||
self.call_driver('disable', network)
|
||||
self.cache.remove(network)
|
||||
if self.call_driver('disable', network):
|
||||
self.cache.remove(network)
|
||||
|
||||
def refresh_dhcp_helper(self, network_id):
|
||||
"""Refresh or disable DHCP for a network depending on the current state
|
||||
@ -132,7 +166,12 @@ class DhcpAgent(object):
|
||||
# DHCP current not running for network.
|
||||
return self.enable_dhcp_helper(network_id)
|
||||
|
||||
network = self.plugin_rpc.get_network_info(network_id)
|
||||
try:
|
||||
network = self.plugin_rpc.get_network_info(network_id)
|
||||
except:
|
||||
self.needs_resync = True
|
||||
LOG.exception(_('Network %s RPC info call failed.') % network_id)
|
||||
return
|
||||
|
||||
old_cidrs = set(s.cidr for s in old_network.subnets if s.enable_dhcp)
|
||||
new_cidrs = set(s.cidr for s in network.subnets if s.enable_dhcp)
|
||||
@ -141,8 +180,8 @@ class DhcpAgent(object):
|
||||
self.call_driver('reload_allocations', network)
|
||||
self.cache.put(network)
|
||||
elif new_cidrs:
|
||||
self.call_driver('restart', network)
|
||||
self.cache.put(network)
|
||||
if self.call_driver('restart', network):
|
||||
self.cache.put(network)
|
||||
else:
|
||||
self.disable_dhcp_helper(network.id)
|
||||
|
||||
@ -274,6 +313,9 @@ class NetworkCache(object):
|
||||
self.subnet_lookup = {}
|
||||
self.port_lookup = {}
|
||||
|
||||
def get_network_ids(self):
|
||||
return self.cache.keys()
|
||||
|
||||
def get_network_by_id(self, network_id):
|
||||
return self.cache.get(network_id)
|
||||
|
||||
|
@ -104,22 +104,17 @@ class TestDhcpAgent(unittest.TestCase):
|
||||
|
||||
def test_run_completes_single_pass(self):
|
||||
with mock.patch('quantum.agent.dhcp_agent.DeviceManager') as dev_mgr:
|
||||
with mock.patch('quantum.agent.dhcp_agent.DhcpPluginApi') as plug:
|
||||
mock_plugin = mock.Mock()
|
||||
mock_plugin.get_active_networks.return_value = ['a']
|
||||
plug.return_value = mock_plugin
|
||||
|
||||
dhcp = dhcp_agent.DhcpAgent(cfg.CONF)
|
||||
with mock.patch.object(dhcp, 'enable_dhcp_helper') as enable:
|
||||
with mock.patch.object(dhcp, 'lease_relay') as relay:
|
||||
dhcp.run()
|
||||
enable.assert_called_once_with('a')
|
||||
plug.assert_called_once_with('q-plugin', mock.ANY)
|
||||
mock_plugin.assert_has_calls(
|
||||
[mock.call.get_active_networks()])
|
||||
relay.assert_has_mock_calls([mock.call.run()])
|
||||
|
||||
self.notification.assert_has_calls([mock.call.run_dispatch()])
|
||||
dhcp = dhcp_agent.DhcpAgent(cfg.CONF)
|
||||
attrs_to_mock = dict(
|
||||
[(a, mock.DEFAULT) for a in
|
||||
['sync_state', 'lease_relay', 'periodic_resync']])
|
||||
with mock.patch.multiple(dhcp, **attrs_to_mock) as mocks:
|
||||
dhcp.run()
|
||||
mocks['sync_state'].assert_called_once_with()
|
||||
mocks['periodic_resync'].assert_called_once_with()
|
||||
mocks['lease_relay'].assert_has_mock_calls(
|
||||
[mock.call.start()])
|
||||
self.notification.assert_has_calls([mock.call.run_dispatch()])
|
||||
|
||||
def test_call_driver(self):
|
||||
network = mock.Mock()
|
||||
@ -149,6 +144,95 @@ class TestDhcpAgent(unittest.TestCase):
|
||||
mock.ANY,
|
||||
'qdhcp-1')
|
||||
self.assertEqual(log.call_count, 1)
|
||||
self.assertTrue(dhcp.needs_resync)
|
||||
|
||||
def test_update_lease(self):
|
||||
with mock.patch('quantum.agent.dhcp_agent.DhcpPluginApi') as plug:
|
||||
dhcp = dhcp_agent.DhcpAgent(cfg.CONF)
|
||||
dhcp.update_lease('net_id', '192.168.1.1', 120)
|
||||
plug.assert_has_calls(
|
||||
[mock.call().update_lease_expiration(
|
||||
'net_id', '192.168.1.1', 120)])
|
||||
|
||||
def test_update_lease_failure(self):
|
||||
with mock.patch('quantum.agent.dhcp_agent.DhcpPluginApi') as plug:
|
||||
plug.return_value.update_lease_expiration.side_effect = Exception
|
||||
|
||||
with mock.patch.object(dhcp_agent.LOG, 'exception') as log:
|
||||
dhcp = dhcp_agent.DhcpAgent(cfg.CONF)
|
||||
dhcp.update_lease('net_id', '192.168.1.1', 120)
|
||||
plug.assert_has_calls(
|
||||
[mock.call().update_lease_expiration(
|
||||
'net_id', '192.168.1.1', 120)])
|
||||
|
||||
self.assertTrue(log.called)
|
||||
self.assertTrue(dhcp.needs_resync)
|
||||
|
||||
def _test_sync_state_helper(self, known_networks, active_networks):
|
||||
with mock.patch('quantum.agent.dhcp_agent.DhcpPluginApi') as plug:
|
||||
mock_plugin = mock.Mock()
|
||||
mock_plugin.get_active_networks.return_value = active_networks
|
||||
plug.return_value = mock_plugin
|
||||
|
||||
dhcp = dhcp_agent.DhcpAgent(cfg.CONF)
|
||||
|
||||
attrs_to_mock = dict(
|
||||
[(a, mock.DEFAULT) for a in
|
||||
['refresh_dhcp_helper', 'disable_dhcp_helper', 'cache']])
|
||||
|
||||
with mock.patch.multiple(dhcp, **attrs_to_mock) as mocks:
|
||||
mocks['cache'].get_network_ids.return_value = known_networks
|
||||
dhcp.sync_state()
|
||||
|
||||
exp_refresh = [
|
||||
mock.call(net_id) for net_id in active_networks]
|
||||
|
||||
diff = set(known_networks) - set(active_networks)
|
||||
exp_disable = [mock.call(net_id) for net_id in diff]
|
||||
|
||||
mocks['cache'].assert_has_calls([mock.call.get_network_ids()])
|
||||
mocks['refresh_dhcp_helper'].assert_has_called(exp_refresh)
|
||||
mocks['disable_dhcp_helper'].assert_has_called(exp_disable)
|
||||
|
||||
def test_sync_state_initial(self):
|
||||
self._test_sync_state_helper([], ['a'])
|
||||
|
||||
def test_sync_state_same(self):
|
||||
self._test_sync_state_helper(['a'], ['a'])
|
||||
|
||||
def test_sync_state_disabled_net(self):
|
||||
self._test_sync_state_helper(['b'], ['a'])
|
||||
|
||||
def test_sync_state_plugin_error(self):
|
||||
with mock.patch('quantum.agent.dhcp_agent.DhcpPluginApi') as plug:
|
||||
mock_plugin = mock.Mock()
|
||||
mock_plugin.get_active_networks.side_effect = Exception
|
||||
plug.return_value = mock_plugin
|
||||
|
||||
with mock.patch.object(dhcp_agent.LOG, 'exception') as log:
|
||||
dhcp = dhcp_agent.DhcpAgent(cfg.CONF)
|
||||
dhcp.sync_state()
|
||||
|
||||
self.assertTrue(log.called)
|
||||
self.assertTrue(dhcp.needs_resync)
|
||||
|
||||
def test_periodic_resync(self):
|
||||
dhcp = dhcp_agent.DhcpAgent(cfg.CONF)
|
||||
with mock.patch.object(dhcp_agent.eventlet, 'spawn') as spawn:
|
||||
dhcp.periodic_resync()
|
||||
spawn.assert_called_once_with(dhcp._periodic_resync_helper)
|
||||
|
||||
def test_periodoc_resync_helper(self):
|
||||
with mock.patch.object(dhcp_agent.eventlet, 'sleep') as sleep:
|
||||
dhcp = dhcp_agent.DhcpAgent(cfg.CONF)
|
||||
dhcp.needs_resync = True
|
||||
with mock.patch.object(dhcp, 'sync_state') as sync_state:
|
||||
sync_state.side_effect = RuntimeError
|
||||
with self.assertRaises(RuntimeError):
|
||||
dhcp._periodic_resync_helper()
|
||||
sync_state.assert_called_once_with()
|
||||
sleep.assert_called_once_with(dhcp.conf.resync_interval)
|
||||
self.assertFalse(dhcp.needs_resync)
|
||||
|
||||
|
||||
class TestDhcpAgentEventHandler(unittest.TestCase):
|
||||
@ -207,6 +291,7 @@ class TestDhcpAgentEventHandler(unittest.TestCase):
|
||||
[mock.call.get_network_info(fake_network.id)])
|
||||
self.assertFalse(self.call_driver.called)
|
||||
self.assertTrue(log.called)
|
||||
self.assertTrue(self.dhcp.needs_resync)
|
||||
self.assertFalse(self.cache.called)
|
||||
|
||||
def test_enable_dhcp_helper_driver_failure(self):
|
||||
@ -232,6 +317,16 @@ class TestDhcpAgentEventHandler(unittest.TestCase):
|
||||
[mock.call.get_network_by_id('abcdef')])
|
||||
self.assertEqual(self.call_driver.call_count, 0)
|
||||
|
||||
def test_disable_dhcp_helper_driver_failure(self):
|
||||
self.cache.get_network_by_id.return_value = fake_network
|
||||
self.dhcp.disable_dhcp_helper(fake_network.id)
|
||||
self.call_driver.disable.return_value = False
|
||||
self.cache.assert_has_calls(
|
||||
[mock.call.get_network_by_id(fake_network.id)])
|
||||
self.call_driver.assert_called_once_with('disable', fake_network)
|
||||
self.cache.assert_has_calls(
|
||||
[mock.call.get_network_by_id(fake_network.id)])
|
||||
|
||||
def test_network_create_end(self):
|
||||
payload = dict(network=dict(id=fake_network.id))
|
||||
|
||||
@ -275,6 +370,23 @@ class TestDhcpAgentEventHandler(unittest.TestCase):
|
||||
self.cache.assert_has_calls(
|
||||
[mock.call.get_network_by_id('net-id')])
|
||||
|
||||
def test_refresh_dhcp_helper_exception_during_rpc(self):
|
||||
network = FakeModel('net-id',
|
||||
tenant_id='aaaaaaaa-aaaa-aaaa-aaaaaaaaaaaa',
|
||||
admin_state_up=True,
|
||||
subnets=[],
|
||||
ports=[])
|
||||
|
||||
self.cache.get_network_by_id.return_value = network
|
||||
self.plugin.get_network_info.side_effect = Exception
|
||||
with mock.patch.object(dhcp_agent.LOG, 'exception') as log:
|
||||
self.dhcp.refresh_dhcp_helper(network.id)
|
||||
self.assertFalse(self.call_driver.called)
|
||||
self.cache.assert_has_calls(
|
||||
[mock.call.get_network_by_id('net-id')])
|
||||
self.assertTrue(log.called)
|
||||
self.assertTrue(self.dhcp.needs_resync)
|
||||
|
||||
def test_subnet_update_end(self):
|
||||
payload = dict(subnet=dict(network_id=fake_network.id))
|
||||
self.cache.get_network_by_id.return_value = fake_network
|
||||
@ -472,6 +584,12 @@ class TestNetworkCache(unittest.TestCase):
|
||||
|
||||
self.assertEqual(nc.get_network_by_id(fake_network.id), fake_network)
|
||||
|
||||
def test_get_network_ids(self):
|
||||
nc = dhcp_agent.NetworkCache()
|
||||
nc.put(fake_network)
|
||||
|
||||
self.assertEqual(nc.get_network_ids(), [fake_network.id])
|
||||
|
||||
def test_get_network_by_subnet_id(self):
|
||||
nc = dhcp_agent.NetworkCache()
|
||||
nc.put(fake_network)
|
||||
|
Loading…
Reference in New Issue
Block a user