From c95e58b70796eae6bbc8bac283f81df9aa1d1894 Mon Sep 17 00:00:00 2001 From: Gary Kotton Date: Wed, 1 May 2013 12:54:11 +0000 Subject: [PATCH] Enable network to be scheduled to N DHCP agents Fixes bug 1174132 Change-Id: Iabe96fd8a98f3539eb21268fb5ef58df8dbd8782 --- etc/quantum.conf | 5 + .../rpc/agentnotifiers/dhcp_rpc_agent_api.py | 13 +- quantum/manager.py | 33 +++- quantum/scheduler/__init__.py | 2 + quantum/scheduler/dhcp_agent_scheduler.py | 100 ++++++------ .../unit/openvswitch/test_agent_scheduler.py | 150 ++++++++++++++++-- quantum/tests/unit/test_agent_ext_plugin.py | 17 ++ quantum/tests/unit/test_quantum_manager.py | 16 ++ 8 files changed, 274 insertions(+), 62 deletions(-) diff --git a/etc/quantum.conf b/etc/quantum.conf index 9f2fccaaa3..206fa85832 100644 --- a/etc/quantum.conf +++ b/etc/quantum.conf @@ -229,6 +229,11 @@ notification_topics = notifications # Allow auto scheduling routers to L3 agent. It will schedule non-hosted # routers to first L3 agent which sends sync_routers message to quantum server # router_auto_schedule = True + +# Number of DHCP agents scheduled to host a network. This enables redundant +# DHCP agents for configured networks. +# dhcp_agents_per_network = 1 + # =========== end of items for agent scheduler extension ===== # =========== WSGI parameters related to the API server ============== diff --git a/quantum/api/rpc/agentnotifiers/dhcp_rpc_agent_api.py b/quantum/api/rpc/agentnotifiers/dhcp_rpc_agent_api.py index fbb1bbb2f5..959e2b1bfd 100644 --- a/quantum/api/rpc/agentnotifiers/dhcp_rpc_agent_api.py +++ b/quantum/api/rpc/agentnotifiers/dhcp_rpc_agent_api.py @@ -69,12 +69,13 @@ class DhcpAgentNotifyAPI(proxy.RpcProxy): adminContext = (context if context.is_admin else context.elevated()) network = plugin.get_network(adminContext, network_id) - chosen_agent = plugin.schedule_network(adminContext, network) - if chosen_agent: - self._notification_host( - context, 'network_create_end', - {'network': {'id': network_id}}, - chosen_agent['host']) + chosen_agents = plugin.schedule_network(adminContext, network) + if chosen_agents: + for agent in chosen_agents: + self._notification_host( + context, 'network_create_end', + {'network': {'id': network_id}}, + agent['host']) for (host, topic) in self._get_dhcp_agents(context, network_id): self.cast( context, self.make_msg(method, diff --git a/quantum/manager.py b/quantum/manager.py index 5573ab19e8..4c6a9f76c7 100644 --- a/quantum/manager.py +++ b/quantum/manager.py @@ -59,6 +59,30 @@ class Manager(periodic_task.PeriodicTasks): pass +def validate_post_plugin_load(): + """Checks if the configuration variables are valid. + + If the configuration is invalid then the method will return an error + message. If all is OK then it will return None. + """ + if ('dhcp_agents_per_network' in cfg.CONF and + cfg.CONF.dhcp_agents_per_network <= 0): + msg = _("dhcp_agents_per_network must be >= 1. '%s' " + "is invalid.") % cfg.CONF.dhcp_agents_per_network + return msg + + +def validate_pre_plugin_load(): + """Checks if the configuration variables are valid. + + If the configuration is invalid then the method will return an error + message. If all is OK then it will return None. + """ + if cfg.CONF.core_plugin is None: + msg = _('Quantum core_plugin not configured!') + return msg + + class QuantumManager(object): """Quantum's Manager class. @@ -74,8 +98,8 @@ class QuantumManager(object): if not options: options = {} - if cfg.CONF.core_plugin is None: - msg = _('Quantum core_plugin not configured!') + msg = validate_pre_plugin_load() + if msg: LOG.critical(msg) raise Exception(msg) @@ -96,6 +120,11 @@ class QuantumManager(object): "Example: pip install quantum-sample-plugin")) self.plugin = plugin_klass() + msg = validate_post_plugin_load() + if msg: + LOG.critical(msg) + raise Exception(msg) + # core plugin as a part of plugin collection simplifies # checking extensions # TODO(enikanorov): make core plugin the same as diff --git a/quantum/scheduler/__init__.py b/quantum/scheduler/__init__.py index 9dc2a46c38..7fc68d0669 100644 --- a/quantum/scheduler/__init__.py +++ b/quantum/scheduler/__init__.py @@ -31,4 +31,6 @@ AGENTS_SCHEDULER_OPTS = [ help=_('Allow auto scheduling networks to DHCP agent.')), cfg.BoolOpt('router_auto_schedule', default=True, help=_('Allow auto scheduling routers to L3 agent.')), + cfg.IntOpt('dhcp_agents_per_network', default=1, + help=_('Number of DHCP agents scheduled to host a network.')), ] diff --git a/quantum/scheduler/dhcp_agent_scheduler.py b/quantum/scheduler/dhcp_agent_scheduler.py index 3133eb23f6..9bcadd7ede 100644 --- a/quantum/scheduler/dhcp_agent_scheduler.py +++ b/quantum/scheduler/dhcp_agent_scheduler.py @@ -17,13 +17,11 @@ import random -from sqlalchemy.orm import exc -from sqlalchemy.sql import exists +from oslo.config import cfg from quantum.common import constants from quantum.db import agents_db from quantum.db import agentschedulers_db -from quantum.db import models_v2 from quantum.openstack.common import log as logging @@ -36,75 +34,87 @@ class ChanceScheduler(object): can be introduced later. """ + def _schedule_bind_network(self, context, agent, network_id): + binding = agentschedulers_db.NetworkDhcpAgentBinding() + binding.dhcp_agent = agent + binding.network_id = network_id + context.session.add(binding) + LOG.debug(_('Network %(network_id)s is scheduled to be hosted by ' + 'DHCP agent %(agent_id)s'), + {'network_id': network_id, + 'agent_id': agent}) + def schedule(self, plugin, context, network): - """Schedule the network to an active DHCP agent if there - is no active DHCP agent hosting it. + """Schedule the network to active DHCP agent(s). + + A list of scheduled agents is returned. """ + agents_per_network = cfg.CONF.dhcp_agents_per_network + #TODO(gongysh) don't schedule the networks with only # subnets whose enable_dhcp is false with context.session.begin(subtransactions=True): dhcp_agents = plugin.get_dhcp_agents_hosting_networks( context, [network['id']], active=True) - if dhcp_agents: + if len(dhcp_agents) >= agents_per_network: LOG.debug(_('Network %s is hosted already'), network['id']) return + n_agents = agents_per_network - len(dhcp_agents) enabled_dhcp_agents = plugin.get_agents_db( context, filters={ 'agent_type': [constants.AGENT_TYPE_DHCP], 'admin_state_up': [True]}) if not enabled_dhcp_agents: - LOG.warn(_('No enabled DHCP agents')) + LOG.warn(_('No more DHCP agents')) return - active_dhcp_agents = [enabled_dhcp_agent for enabled_dhcp_agent in - enabled_dhcp_agents if not - agents_db.AgentDbMixin.is_agent_down( - enabled_dhcp_agent['heartbeat_timestamp'])] + active_dhcp_agents = [ + agent for agent in set(enabled_dhcp_agents) + if not agents_db.AgentDbMixin.is_agent_down( + agent['heartbeat_timestamp']) + and agent not in dhcp_agents + ] if not active_dhcp_agents: - LOG.warn(_('No active DHCP agents')) + LOG.warn(_('No more DHCP agents')) return - chosen_agent = random.choice(active_dhcp_agents) - binding = agentschedulers_db.NetworkDhcpAgentBinding() - binding.dhcp_agent = chosen_agent - binding.network_id = network['id'] - context.session.add(binding) - LOG.debug(_('Network %(network_id)s is scheduled to be hosted by ' - 'DHCP agent %(agent_id)s'), - {'network_id': network['id'], - 'agent_id': chosen_agent['id']}) - return chosen_agent + n_agents = min(len(active_dhcp_agents), n_agents) + chosen_agents = random.sample(active_dhcp_agents, n_agents) + for agent in chosen_agents: + self._schedule_bind_network(context, agent, network['id']) + return chosen_agents def auto_schedule_networks(self, plugin, context, host): """Schedule non-hosted networks to the DHCP agent on the specified host. """ + agents_per_network = cfg.CONF.dhcp_agents_per_network with context.session.begin(subtransactions=True): query = context.session.query(agents_db.Agent) query = query.filter(agents_db.Agent.agent_type == constants.AGENT_TYPE_DHCP, agents_db.Agent.host == host, agents_db.Agent.admin_state_up == True) - try: - dhcp_agent = query.one() - except (exc.MultipleResultsFound, exc.NoResultFound): - LOG.warn(_('No enabled DHCP agent on host %s'), - host) - return False - if agents_db.AgentDbMixin.is_agent_down( - dhcp_agent.heartbeat_timestamp): - LOG.warn(_('DHCP agent %s is not active'), dhcp_agent.id) - #TODO(gongysh) consider the disabled agent's network - net_stmt = ~exists().where( - models_v2.Network.id == - agentschedulers_db.NetworkDhcpAgentBinding.network_id) - net_ids = context.session.query( - models_v2.Network.id).filter(net_stmt).all() - if not net_ids: - LOG.debug(_('No non-hosted networks')) - return False - for net_id in net_ids: - binding = agentschedulers_db.NetworkDhcpAgentBinding() - binding.dhcp_agent = dhcp_agent - binding.network_id = net_id[0] - context.session.add(binding) + dhcp_agents = query.all() + for dhcp_agent in dhcp_agents: + if agents_db.AgentDbMixin.is_agent_down( + dhcp_agent.heartbeat_timestamp): + LOG.warn(_('DHCP agent %s is not active'), dhcp_agent.id) + continue + #TODO(gongysh) consider the disabled agent's network + fields = ['network_id', 'enable_dhcp'] + subnets = plugin.get_subnets(context, fields=fields) + net_ids = set(s['network_id'] for s in subnets + if s['enable_dhcp']) + if not net_ids: + LOG.debug(_('No non-hosted networks')) + return False + for net_id in net_ids: + agents = plugin.get_dhcp_agents_hosting_networks( + context, [net_id], active=True) + if len(agents) >= agents_per_network: + continue + binding = agentschedulers_db.NetworkDhcpAgentBinding() + binding.dhcp_agent = dhcp_agent + binding.network_id = net_id + context.session.add(binding) return True diff --git a/quantum/tests/unit/openvswitch/test_agent_scheduler.py b/quantum/tests/unit/openvswitch/test_agent_scheduler.py index 395ca77d1a..c28e09d7fc 100644 --- a/quantum/tests/unit/openvswitch/test_agent_scheduler.py +++ b/quantum/tests/unit/openvswitch/test_agent_scheduler.py @@ -17,6 +17,7 @@ import contextlib import copy import mock +from oslo.config import cfg from webob import exc from quantum.api import extensions @@ -230,8 +231,9 @@ class OvsAgentSchedulerTestCase(test_l3_plugin.L3NatTestCaseMixin, self.assertEqual(0, len(dhcp_agents['agents'])) def test_network_auto_schedule_with_disabled(self): - with contextlib.nested(self.network(), - self.network()): + cfg.CONF.set_override('allow_overlapping_ips', True) + with contextlib.nested(self.subnet(), + self.subnet()): dhcp_rpc = dhcp_rpc_base.DhcpRpcCallbackMixin() self._register_agent_states() hosta_id = self._get_agent_id(constants.AGENT_TYPE_DHCP, @@ -249,17 +251,58 @@ class OvsAgentSchedulerTestCase(test_l3_plugin.L3NatTestCaseMixin, self.assertEqual(0, num_hosta_nets) self.assertEqual(2, num_hostc_nets) + def test_network_auto_schedule_with_no_dhcp(self): + cfg.CONF.set_override('allow_overlapping_ips', True) + with contextlib.nested(self.subnet(enable_dhcp=False), + self.subnet(enable_dhcp=False)): + dhcp_rpc = dhcp_rpc_base.DhcpRpcCallbackMixin() + self._register_agent_states() + hosta_id = self._get_agent_id(constants.AGENT_TYPE_DHCP, + DHCP_HOSTA) + hostc_id = self._get_agent_id(constants.AGENT_TYPE_DHCP, + DHCP_HOSTC) + self._disable_agent(hosta_id) + dhcp_rpc.get_active_networks(self.adminContext, host=DHCP_HOSTA) + dhcp_rpc.get_active_networks(self.adminContext, host=DHCP_HOSTC) + networks = self._list_networks_hosted_by_dhcp_agent(hostc_id) + num_hostc_nets = len(networks['networks']) + networks = self._list_networks_hosted_by_dhcp_agent(hosta_id) + num_hosta_nets = len(networks['networks']) + self.assertEqual(0, num_hosta_nets) + self.assertEqual(0, num_hostc_nets) + + def test_network_auto_schedule_with_multiple_agents(self): + cfg.CONF.set_override('dhcp_agents_per_network', 2) + cfg.CONF.set_override('allow_overlapping_ips', True) + with contextlib.nested(self.subnet(), + self.subnet()): + dhcp_rpc = dhcp_rpc_base.DhcpRpcCallbackMixin() + self._register_agent_states() + hosta_id = self._get_agent_id(constants.AGENT_TYPE_DHCP, + DHCP_HOSTA) + hostc_id = self._get_agent_id(constants.AGENT_TYPE_DHCP, + DHCP_HOSTC) + dhcp_rpc.get_active_networks(self.adminContext, host=DHCP_HOSTA) + dhcp_rpc.get_active_networks(self.adminContext, host=DHCP_HOSTC) + networks = self._list_networks_hosted_by_dhcp_agent(hostc_id) + num_hostc_nets = len(networks['networks']) + networks = self._list_networks_hosted_by_dhcp_agent(hosta_id) + num_hosta_nets = len(networks['networks']) + self.assertEqual(2, num_hosta_nets) + self.assertEqual(2, num_hostc_nets) + def test_network_auto_schedule_with_hosted(self): # one agent hosts all the networks, other hosts none - with contextlib.nested(self.network(), - self.network()) as (net1, net2): + cfg.CONF.set_override('allow_overlapping_ips', True) + with contextlib.nested(self.subnet(), + self.subnet()) as (sub1, sub2): dhcp_rpc = dhcp_rpc_base.DhcpRpcCallbackMixin() self._register_agent_states() dhcp_rpc.get_active_networks(self.adminContext, host=DHCP_HOSTA) # second agent will not host the network since first has got it. dhcp_rpc.get_active_networks(self.adminContext, host=DHCP_HOSTC) dhcp_agents = self._list_dhcp_agents_hosting_network( - net1['network']['id']) + sub1['subnet']['network_id']) hosta_id = self._get_agent_id(constants.AGENT_TYPE_DHCP, DHCP_HOSTA) hostc_id = self._get_agent_id(constants.AGENT_TYPE_DHCP, @@ -287,20 +330,21 @@ class OvsAgentSchedulerTestCase(test_l3_plugin.L3NatTestCaseMixin, 'agent_type': constants.AGENT_TYPE_DHCP} dhcp_hostc = copy.deepcopy(dhcp_hosta) dhcp_hostc['host'] = DHCP_HOSTC - with self.network() as net1: + cfg.CONF.set_override('allow_overlapping_ips', True) + with self.subnet() as sub1: self._register_one_agent_state(dhcp_hosta) dhcp_rpc.get_active_networks(self.adminContext, host=DHCP_HOSTA) hosta_id = self._get_agent_id(constants.AGENT_TYPE_DHCP, DHCP_HOSTA) self._disable_agent(hosta_id, admin_state_up=False) - with self.network() as net2: + with self.subnet() as sub2: self._register_one_agent_state(dhcp_hostc) dhcp_rpc.get_active_networks(self.adminContext, host=DHCP_HOSTC) dhcp_agents_1 = self._list_dhcp_agents_hosting_network( - net1['network']['id']) + sub1['subnet']['network_id']) dhcp_agents_2 = self._list_dhcp_agents_hosting_network( - net2['network']['id']) + sub2['subnet']['network_id']) hosta_nets = self._list_networks_hosted_by_dhcp_agent(hosta_id) num_hosta_nets = len(hosta_nets['networks']) hostc_id = self._get_agent_id( @@ -330,6 +374,43 @@ class OvsAgentSchedulerTestCase(test_l3_plugin.L3NatTestCaseMixin, self.assertEqual(0, result0) self.assertEqual(1, result1) + def test_network_ha_scheduling_on_port_creation(self): + cfg.CONF.set_override('dhcp_agents_per_network', 2) + with self.subnet() as subnet: + dhcp_agents = self._list_dhcp_agents_hosting_network( + subnet['subnet']['network_id']) + result0 = len(dhcp_agents['agents']) + self._register_agent_states() + with self.port(subnet=subnet, + device_owner="compute:test:" + DHCP_HOSTA) as port: + dhcp_agents = self._list_dhcp_agents_hosting_network( + port['port']['network_id']) + result1 = len(dhcp_agents['agents']) + self.assertEqual(0, result0) + self.assertEqual(2, result1) + + def test_network_ha_scheduling_on_port_creation_with_new_agent(self): + cfg.CONF.set_override('dhcp_agents_per_network', 3) + with self.subnet() as subnet: + dhcp_agents = self._list_dhcp_agents_hosting_network( + subnet['subnet']['network_id']) + result0 = len(dhcp_agents['agents']) + self._register_agent_states() + with self.port(subnet=subnet, + device_owner="compute:test:" + DHCP_HOSTA) as port: + dhcp_agents = self._list_dhcp_agents_hosting_network( + port['port']['network_id']) + result1 = len(dhcp_agents['agents']) + self._register_one_dhcp_agent() + with self.port(subnet=subnet, + device_owner="compute:test:" + DHCP_HOSTA) as port: + dhcp_agents = self._list_dhcp_agents_hosting_network( + port['port']['network_id']) + result2 = len(dhcp_agents['agents']) + self.assertEqual(0, result0) + self.assertEqual(2, result1) + self.assertEqual(3, result2) + def test_network_scheduler_with_disabled_agent(self): dhcp_hosta = { 'binary': 'quantum-dhcp-agent', @@ -873,6 +954,57 @@ class OvsDhcpAgentNotifierTestCase(test_l3_plugin.L3NatTestCaseMixin, topic='dhcp_agent.' + DHCP_HOSTA)] self.assertEqual(mock_dhcp.call_args_list, expected_calls) + def test_network_ha_port_create_notification(self): + cfg.CONF.set_override('dhcp_agents_per_network', 2) + dhcp_hosta = { + 'binary': 'quantum-dhcp-agent', + 'host': DHCP_HOSTA, + 'topic': 'dhcp_agent', + 'configurations': {'dhcp_driver': 'dhcp_driver', + 'use_namespaces': True, + }, + 'agent_type': constants.AGENT_TYPE_DHCP} + self._register_one_agent_state(dhcp_hosta) + dhcp_hostc = copy.deepcopy(dhcp_hosta) + dhcp_hostc['host'] = DHCP_HOSTC + self._register_one_agent_state(dhcp_hostc) + with mock.patch.object(self.dhcp_notifier, 'cast') as mock_dhcp: + with self.network(do_delete=False) as net1: + with self.subnet(network=net1, + do_delete=False) as subnet1: + with self.port(subnet=subnet1, no_delete=True) as port: + network_id = port['port']['network_id'] + expected_calls_a = [ + mock.call( + mock.ANY, + self.dhcp_notifier.make_msg( + 'network_create_end', + payload={'network': {'id': network_id}}), + topic='dhcp_agent.' + DHCP_HOSTA), + mock.call( + mock.ANY, + self.dhcp_notifier.make_msg( + 'port_create_end', + payload={'port': port['port']}), + topic='dhcp_agent.' + DHCP_HOSTA)] + expected_calls_c = [ + mock.call( + mock.ANY, + self.dhcp_notifier.make_msg( + 'network_create_end', + payload={'network': {'id': network_id}}), + topic='dhcp_agent.' + DHCP_HOSTC), + mock.call( + mock.ANY, + self.dhcp_notifier.make_msg( + 'port_create_end', + payload={'port': port['port']}), + topic='dhcp_agent.' + DHCP_HOSTC)] + for expected in expected_calls_a: + self.assertIn(expected, mock_dhcp.call_args_list) + for expected in expected_calls_c: + self.assertIn(expected, mock_dhcp.call_args_list) + class OvsL3AgentNotifierTestCase(test_l3_plugin.L3NatTestCaseMixin, test_agent_ext_plugin.AgentDBTestMixIn, diff --git a/quantum/tests/unit/test_agent_ext_plugin.py b/quantum/tests/unit/test_agent_ext_plugin.py index ae11be5feb..f58d98ab59 100644 --- a/quantum/tests/unit/test_agent_ext_plugin.py +++ b/quantum/tests/unit/test_agent_ext_plugin.py @@ -44,6 +44,7 @@ L3_HOSTA = 'hosta' DHCP_HOSTA = 'hosta' L3_HOSTB = 'hostb' DHCP_HOSTC = 'hostc' +DHCP_HOST1 = 'host1' class AgentTestExtensionManager(object): @@ -124,6 +125,22 @@ class AgentDBTestMixIn(object): time=timeutils.strtime()) return [l3_hosta, l3_hostb, dhcp_hosta, dhcp_hostc] + def _register_one_dhcp_agent(self): + """Register one DHCP agent.""" + dhcp_host = { + 'binary': 'quantum-dhcp-agent', + 'host': DHCP_HOST1, + 'topic': 'DHCP_AGENT', + 'configurations': {'dhcp_driver': 'dhcp_driver', + 'use_namespaces': True, + }, + 'agent_type': constants.AGENT_TYPE_DHCP} + callback = agents_db.AgentExtRpcCallback() + callback.report_state(self.adminContext, + agent_state={'agent_state': dhcp_host}, + time=timeutils.strtime()) + return [dhcp_host] + class AgentDBTestCase(AgentDBTestMixIn, test_db_plugin.QuantumDbPluginV2TestCase): diff --git a/quantum/tests/unit/test_quantum_manager.py b/quantum/tests/unit/test_quantum_manager.py index 705f3a1967..e5458a040f 100644 --- a/quantum/tests/unit/test_quantum_manager.py +++ b/quantum/tests/unit/test_quantum_manager.py @@ -25,6 +25,8 @@ from oslo.config import cfg from quantum.common import config from quantum.common.test_lib import test_config from quantum.manager import QuantumManager +from quantum.manager import validate_post_plugin_load +from quantum.manager import validate_pre_plugin_load from quantum.openstack.common import log as logging from quantum.plugins.common import constants from quantum.tests import base @@ -102,3 +104,17 @@ class QuantumManagerTestCase(base.BaseTestCase): self.assertIn(constants.CORE, svc_plugins.keys()) self.assertIn(constants.LOADBALANCER, svc_plugins.keys()) self.assertIn(constants.DUMMY, svc_plugins.keys()) + + def test_post_plugin_validation(self): + self.assertIsNone(validate_post_plugin_load()) + cfg.CONF.set_override('dhcp_agents_per_network', 2) + self.assertIsNone(validate_post_plugin_load()) + cfg.CONF.set_override('dhcp_agents_per_network', 0) + self.assertIsNotNone(validate_post_plugin_load()) + cfg.CONF.set_override('dhcp_agents_per_network', -1) + self.assertIsNotNone(validate_post_plugin_load()) + + def test_pre_plugin_validation(self): + self.assertIsNotNone(validate_pre_plugin_load()) + cfg.CONF.set_override('core_plugin', 'dummy.plugin') + self.assertIsNone(validate_pre_plugin_load())