HA routers master state now distributed amongst agents

We're currently running with no pre-emption, meaning that
the first router in a cluster to go up will be the master,
regardless of priority. Since the order in which we sent
notifications was constant, the same agent hosted the
master instances of all HA routers, defeating the idea
of load sharing.

Closes-Bug: #1365429
Change-Id: Ia6fe2bd0317c241bf7eb55915df7650dfdc68210
This commit is contained in:
Assaf Muller 2014-09-15 18:11:17 +03:00
parent 8c6f098e9a
commit 107b289520
3 changed files with 17 additions and 8 deletions

View File

@ -13,6 +13,8 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import random
from neutron.common import constants from neutron.common import constants
from neutron.common import rpc as n_rpc from neutron.common import rpc as n_rpc
from neutron.common import topics from neutron.common import topics
@ -43,7 +45,8 @@ class L3AgentNotifyAPI(n_rpc.RpcProxy):
payload=payload), payload=payload),
topic='%s.%s' % (topics.L3_AGENT, host)) topic='%s.%s' % (topics.L3_AGENT, host))
def _agent_notification(self, context, method, router_ids, operation): def _agent_notification(self, context, method, router_ids, operation,
shuffle_agents):
"""Notify changed routers to hosting l3 agents.""" """Notify changed routers to hosting l3 agents."""
adminContext = context.is_admin and context or context.elevated() adminContext = context.is_admin and context or context.elevated()
plugin = manager.NeutronManager.get_service_plugins().get( plugin = manager.NeutronManager.get_service_plugins().get(
@ -53,6 +56,8 @@ class L3AgentNotifyAPI(n_rpc.RpcProxy):
adminContext, [router_id], adminContext, [router_id],
admin_state_up=True, admin_state_up=True,
active=True) active=True)
if shuffle_agents:
random.shuffle(l3_agents)
for l3_agent in l3_agents: for l3_agent in l3_agents:
LOG.debug(_('Notify agent at %(topic)s.%(host)s the message ' LOG.debug(_('Notify agent at %(topic)s.%(host)s the message '
'%(method)s'), '%(method)s'),
@ -91,7 +96,8 @@ class L3AgentNotifyAPI(n_rpc.RpcProxy):
self.make_msg(method, payload=dvr_arptable), self.make_msg(method, payload=dvr_arptable),
topic=topic, version='1.2') topic=topic, version='1.2')
def _notification(self, context, method, router_ids, operation): def _notification(self, context, method, router_ids, operation,
shuffle_agents):
"""Notify all the agents that are hosting the routers.""" """Notify all the agents that are hosting the routers."""
plugin = manager.NeutronManager.get_service_plugins().get( plugin = manager.NeutronManager.get_service_plugins().get(
service_constants.L3_ROUTER_NAT) service_constants.L3_ROUTER_NAT)
@ -105,7 +111,7 @@ class L3AgentNotifyAPI(n_rpc.RpcProxy):
context or context.elevated()) context or context.elevated())
plugin.schedule_routers(adminContext, router_ids) plugin.schedule_routers(adminContext, router_ids)
self._agent_notification( self._agent_notification(
context, method, router_ids, operation) context, method, router_ids, operation, shuffle_agents)
else: else:
self.fanout_cast( self.fanout_cast(
context, self.make_msg(method, context, self.make_msg(method,
@ -132,10 +138,11 @@ class L3AgentNotifyAPI(n_rpc.RpcProxy):
def router_deleted(self, context, router_id): def router_deleted(self, context, router_id):
self._notification_fanout(context, 'router_deleted', router_id) self._notification_fanout(context, 'router_deleted', router_id)
def routers_updated(self, context, router_ids, operation=None, data=None): def routers_updated(self, context, router_ids, operation=None, data=None,
shuffle_agents=False):
if router_ids: if router_ids:
self._notification(context, 'routers_updated', router_ids, self._notification(context, 'routers_updated', router_ids,
operation) operation, shuffle_agents)
def add_arp_entry(self, context, router_id, arp_table, operation=None): def add_arp_entry(self, context, router_id, arp_table, operation=None):
self._agent_notification_arp(context, 'add_arp_entry', router_id, self._agent_notification_arp(context, 'add_arp_entry', router_id,

View File

@ -323,7 +323,8 @@ class L3_HA_NAT_db_mixin(l3_dvr_db.L3_NAT_with_dvr_db_mixin):
l3_port_check=False) l3_port_check=False)
def _notify_ha_interfaces_updated(self, context, router_id): def _notify_ha_interfaces_updated(self, context, router_id):
self.l3_rpc_notifier.routers_updated(context, [router_id]) self.l3_rpc_notifier.routers_updated(
context, [router_id], shuffle_agents=True)
@classmethod @classmethod
def _is_ha(cls, router): def _is_ha(cls, router):

View File

@ -289,7 +289,8 @@ class L3AgentSchedulerDbMixin(l3_agentschedulers_db.L3AgentSchedulerDbMixin):
class L3AgentNotifyAPI(l3_rpc_agent_api.L3AgentNotifyAPI): class L3AgentNotifyAPI(l3_rpc_agent_api.L3AgentNotifyAPI):
def _notification(self, context, method, router_ids, operation): def _notification(self, context, method, router_ids, operation,
shuffle_agents):
"""Notify all the agents that are hosting the routers. """Notify all the agents that are hosting the routers.
_notification() is called in L3 db plugin for all routers regardless _notification() is called in L3 db plugin for all routers regardless
@ -300,7 +301,7 @@ class L3AgentNotifyAPI(l3_rpc_agent_api.L3AgentNotifyAPI):
router_ids = rdb.get_routers_by_provider( router_ids = rdb.get_routers_by_provider(
context.session, nconst.ROUTER_PROVIDER_L3AGENT, router_ids) context.session, nconst.ROUTER_PROVIDER_L3AGENT, router_ids)
super(L3AgentNotifyAPI, self)._notification( super(L3AgentNotifyAPI, self)._notification(
context, method, router_ids, operation) context, method, router_ids, operation, shuffle_agents)
def load_driver(plugin, ofc_manager): def load_driver(plugin, ofc_manager):