From 107b28952010ca45c69f32762dde43466b788a54 Mon Sep 17 00:00:00 2001 From: Assaf Muller Date: Mon, 15 Sep 2014 18:11:17 +0300 Subject: [PATCH] HA routers master state now distributed amongst agents We're currently running with no pre-emption, meaning that the first router in a cluster to go up will be the master, regardless of priority. Since the order in which we sent notifications was constant, the same agent hosted the master instances of all HA routers, defeating the idea of load sharing. Closes-Bug: #1365429 Change-Id: Ia6fe2bd0317c241bf7eb55915df7650dfdc68210 --- .../api/rpc/agentnotifiers/l3_rpc_agent_api.py | 17 ++++++++++++----- neutron/db/l3_hamode_db.py | 3 ++- neutron/plugins/nec/nec_router.py | 5 +++-- 3 files changed, 17 insertions(+), 8 deletions(-) diff --git a/neutron/api/rpc/agentnotifiers/l3_rpc_agent_api.py b/neutron/api/rpc/agentnotifiers/l3_rpc_agent_api.py index d06074e78e..0b8f7e5efa 100644 --- a/neutron/api/rpc/agentnotifiers/l3_rpc_agent_api.py +++ b/neutron/api/rpc/agentnotifiers/l3_rpc_agent_api.py @@ -13,6 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +import random + from neutron.common import constants from neutron.common import rpc as n_rpc from neutron.common import topics @@ -43,7 +45,8 @@ class L3AgentNotifyAPI(n_rpc.RpcProxy): payload=payload), topic='%s.%s' % (topics.L3_AGENT, host)) - def _agent_notification(self, context, method, router_ids, operation): + def _agent_notification(self, context, method, router_ids, operation, + shuffle_agents): """Notify changed routers to hosting l3 agents.""" adminContext = context.is_admin and context or context.elevated() plugin = manager.NeutronManager.get_service_plugins().get( @@ -53,6 +56,8 @@ class L3AgentNotifyAPI(n_rpc.RpcProxy): adminContext, [router_id], admin_state_up=True, active=True) + if shuffle_agents: + random.shuffle(l3_agents) for l3_agent in l3_agents: LOG.debug(_('Notify agent at %(topic)s.%(host)s the message ' '%(method)s'), @@ -91,7 +96,8 @@ class L3AgentNotifyAPI(n_rpc.RpcProxy): self.make_msg(method, payload=dvr_arptable), topic=topic, version='1.2') - def _notification(self, context, method, router_ids, operation): + def _notification(self, context, method, router_ids, operation, + shuffle_agents): """Notify all the agents that are hosting the routers.""" plugin = manager.NeutronManager.get_service_plugins().get( service_constants.L3_ROUTER_NAT) @@ -105,7 +111,7 @@ class L3AgentNotifyAPI(n_rpc.RpcProxy): context or context.elevated()) plugin.schedule_routers(adminContext, router_ids) self._agent_notification( - context, method, router_ids, operation) + context, method, router_ids, operation, shuffle_agents) else: self.fanout_cast( context, self.make_msg(method, @@ -132,10 +138,11 @@ class L3AgentNotifyAPI(n_rpc.RpcProxy): def router_deleted(self, context, router_id): self._notification_fanout(context, 'router_deleted', router_id) - def routers_updated(self, context, router_ids, operation=None, data=None): + def routers_updated(self, context, router_ids, operation=None, data=None, + shuffle_agents=False): if router_ids: self._notification(context, 'routers_updated', router_ids, - operation) + operation, shuffle_agents) def add_arp_entry(self, context, router_id, arp_table, operation=None): self._agent_notification_arp(context, 'add_arp_entry', router_id, diff --git a/neutron/db/l3_hamode_db.py b/neutron/db/l3_hamode_db.py index 19ecf3cc9b..a0ed580850 100644 --- a/neutron/db/l3_hamode_db.py +++ b/neutron/db/l3_hamode_db.py @@ -323,7 +323,8 @@ class L3_HA_NAT_db_mixin(l3_dvr_db.L3_NAT_with_dvr_db_mixin): l3_port_check=False) def _notify_ha_interfaces_updated(self, context, router_id): - self.l3_rpc_notifier.routers_updated(context, [router_id]) + self.l3_rpc_notifier.routers_updated( + context, [router_id], shuffle_agents=True) @classmethod def _is_ha(cls, router): diff --git a/neutron/plugins/nec/nec_router.py b/neutron/plugins/nec/nec_router.py index 9af75cbeda..f47bcd6ad5 100644 --- a/neutron/plugins/nec/nec_router.py +++ b/neutron/plugins/nec/nec_router.py @@ -289,7 +289,8 @@ class L3AgentSchedulerDbMixin(l3_agentschedulers_db.L3AgentSchedulerDbMixin): class L3AgentNotifyAPI(l3_rpc_agent_api.L3AgentNotifyAPI): - def _notification(self, context, method, router_ids, operation): + def _notification(self, context, method, router_ids, operation, + shuffle_agents): """Notify all the agents that are hosting the routers. _notification() is called in L3 db plugin for all routers regardless @@ -300,7 +301,7 @@ class L3AgentNotifyAPI(l3_rpc_agent_api.L3AgentNotifyAPI): router_ids = rdb.get_routers_by_provider( context.session, nconst.ROUTER_PROVIDER_L3AGENT, router_ids) super(L3AgentNotifyAPI, self)._notification( - context, method, router_ids, operation) + context, method, router_ids, operation, shuffle_agents) def load_driver(plugin, ofc_manager):