vmware-nsx/neutron/db/l3_agentschedulers_db.py

468 lines
20 KiB
Python

# Copyright (c) 2013 OpenStack Foundation.
# All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import datetime
import random
import time
from oslo.config import cfg
from oslo.db import exception as db_exc
import sqlalchemy as sa
from sqlalchemy import func
from sqlalchemy import orm
from sqlalchemy.orm import exc
from sqlalchemy.orm import joinedload
from neutron.common import constants
from neutron.common import utils as n_utils
from neutron import context as n_ctx
from neutron.db import agents_db
from neutron.db import agentschedulers_db
from neutron.db import model_base
from neutron.extensions import l3agentscheduler
from neutron import manager
from neutron.openstack.common.gettextutils import _LI, _LW
from neutron.openstack.common import log as logging
from neutron.openstack.common import loopingcall
from neutron.openstack.common import timeutils
LOG = logging.getLogger(__name__)
L3_AGENTS_SCHEDULER_OPTS = [
cfg.StrOpt('router_scheduler_driver',
default='neutron.scheduler.l3_agent_scheduler.ChanceScheduler',
help=_('Driver to use for scheduling '
'router to a default L3 agent')),
cfg.BoolOpt('router_auto_schedule', default=True,
help=_('Allow auto scheduling of routers to L3 agent.')),
cfg.BoolOpt('allow_automatic_l3agent_failover', default=False,
help=_('Automatically reschedule routers from offline L3 '
'agents to online L3 agents.')),
]
cfg.CONF.register_opts(L3_AGENTS_SCHEDULER_OPTS)
class RouterL3AgentBinding(model_base.BASEV2):
"""Represents binding between neutron routers and L3 agents."""
router_id = sa.Column(sa.String(36),
sa.ForeignKey("routers.id", ondelete='CASCADE'),
primary_key=True)
l3_agent = orm.relation(agents_db.Agent)
l3_agent_id = sa.Column(sa.String(36),
sa.ForeignKey("agents.id", ondelete='CASCADE'),
primary_key=True)
class L3AgentSchedulerDbMixin(l3agentscheduler.L3AgentSchedulerPluginBase,
agentschedulers_db.AgentSchedulerDbMixin):
"""Mixin class to add l3 agent scheduler extension to plugins
using the l3 agent for routing.
"""
router_scheduler = None
def start_periodic_agent_status_check(self):
if not cfg.CONF.allow_automatic_l3agent_failover:
LOG.info(_LI("Skipping period L3 agent status check because "
"automatic router rescheduling is disabled."))
return
self.periodic_agent_loop = loopingcall.FixedIntervalLoopingCall(
self.reschedule_routers_from_down_agents)
interval = max(cfg.CONF.agent_down_time / 2, 1)
# add random initial delay to allow agents to check in after the
# neutron server first starts. random to offset multiple servers
self.periodic_agent_loop.start(interval=interval,
initial_delay=random.randint(interval, interval * 2))
def reschedule_routers_from_down_agents(self):
"""Reschedule routers from down l3 agents if admin state is up."""
# give agents extra time to handle transient failures
agent_dead_limit = cfg.CONF.agent_down_time * 2
# check for an abrupt clock change since last check. if a change is
# detected, sleep for a while to let the agents check in.
tdelta = timeutils.utcnow() - getattr(self, '_clock_jump_canary',
timeutils.utcnow())
if timeutils.total_seconds(tdelta) > cfg.CONF.agent_down_time:
LOG.warn(_LW("Time since last L3 agent reschedule check has "
"exceeded the interval between checks. Waiting "
"before check to allow agents to send a heartbeat "
"in case there was a clock adjustment."))
time.sleep(agent_dead_limit)
self._clock_jump_canary = timeutils.utcnow()
context = n_ctx.get_admin_context()
cutoff = timeutils.utcnow() - datetime.timedelta(
seconds=agent_dead_limit)
down_bindings = (
context.session.query(RouterL3AgentBinding).
join(agents_db.Agent).
filter(agents_db.Agent.heartbeat_timestamp < cutoff,
agents_db.Agent.admin_state_up))
for binding in down_bindings:
LOG.warn(_LW("Rescheduling router %(router)s from agent %(agent)s "
"because the agent did not report to the server in "
"the last %(dead_time)s seconds."),
{'router': binding.router_id,
'agent': binding.l3_agent_id,
'dead_time': agent_dead_limit})
self.reschedule_router(context, binding.router_id)
def validate_agent_router_combination(self, context, agent, router):
"""Validate if the router can be correctly assigned to the agent.
:raises: RouterL3AgentMismatch if attempting to assign DVR router
to legacy agent, or centralized router to compute's L3 agents.
:raises: InvalidL3Agent if attempting to assign router to an
unsuitable agent (disabled, type != L3, incompatible configuration)
"""
is_distributed = router.get('distributed')
agent_conf = self.get_configuration_dict(agent)
agent_mode = agent_conf.get('agent_mode', 'legacy')
is_agent_router_types_incompatible = (
agent_mode == 'dvr' and not is_distributed
or agent_mode == 'legacy' and is_distributed
)
if is_agent_router_types_incompatible:
router_type = ('distributed' if is_distributed else 'centralized')
raise l3agentscheduler.RouterL3AgentMismatch(
router_type=router_type, router_id=router['id'],
agent_mode=agent_mode, agent_id=agent['id'])
is_wrong_type_or_unsuitable_agent = (
agent['agent_type'] != constants.AGENT_TYPE_L3 or
not agent['admin_state_up'] or
not self.get_l3_agent_candidates(context, router, [agent])
)
if is_wrong_type_or_unsuitable_agent:
raise l3agentscheduler.InvalidL3Agent(id=agent['id'])
def check_agent_router_scheduling_needed(self, context, agent, router):
"""Check if the router scheduling is needed.
:raises: RouterHostedByL3Agent if router is already assigned
to a different agent.
:returns: True if scheduling is needed, otherwise False
"""
router_id = router['id']
agent_id = agent['id']
query = context.session.query(RouterL3AgentBinding)
bindings = query.filter_by(router_id=router_id).all()
if not bindings:
return True
for binding in bindings:
if binding.l3_agent_id == agent_id:
# router already bound to the agent we need
return False
if router.get('distributed'):
return False
# non-dvr case: centralized router is already bound to some agent
raise l3agentscheduler.RouterHostedByL3Agent(
router_id=router_id,
agent_id=bindings[0].l3_agent_id)
def create_router_to_agent_binding(self, context, agent, router):
"""Create router to agent binding."""
router_id = router['id']
agent_id = agent['id']
if self.router_scheduler:
try:
self.router_scheduler.bind_router(context, router_id, agent)
except db_exc.DBError:
raise l3agentscheduler.RouterSchedulingFailed(
router_id=router_id, agent_id=agent_id)
def add_router_to_l3_agent(self, context, agent_id, router_id):
"""Add a l3 agent to host a router."""
with context.session.begin(subtransactions=True):
router = self.get_router(context, router_id)
agent = self._get_agent(context, agent_id)
self.validate_agent_router_combination(context, agent, router)
if self.check_agent_router_scheduling_needed(
context, agent, router):
self.create_router_to_agent_binding(context, agent, router)
else:
return
l3_notifier = self.agent_notifiers.get(constants.AGENT_TYPE_L3)
if l3_notifier:
l3_notifier.router_added_to_agent(
context, [router_id], agent.host)
def remove_router_from_l3_agent(self, context, agent_id, router_id):
"""Remove the router from l3 agent.
After removal, the router will be non-hosted until there is update
which leads to re-schedule or be added to another agent manually.
"""
agent = self._get_agent(context, agent_id)
self._unbind_router(context, router_id, agent_id)
l3_notifier = self.agent_notifiers.get(constants.AGENT_TYPE_L3)
if l3_notifier:
l3_notifier.router_removed_from_agent(
context, router_id, agent.host)
def _unbind_router(self, context, router_id, agent_id):
with context.session.begin(subtransactions=True):
query = context.session.query(RouterL3AgentBinding)
query = query.filter(
RouterL3AgentBinding.router_id == router_id,
RouterL3AgentBinding.l3_agent_id == agent_id)
try:
binding = query.one()
except exc.NoResultFound:
raise l3agentscheduler.RouterNotHostedByL3Agent(
router_id=router_id, agent_id=agent_id)
context.session.delete(binding)
def reschedule_router(self, context, router_id, candidates=None):
"""Reschedule router to a new l3 agent
Remove the router from the agent(s) currently hosting it and
schedule it again
"""
cur_agents = self.list_l3_agents_hosting_router(
context, router_id)['agents']
with context.session.begin(subtransactions=True):
for agent in cur_agents:
self._unbind_router(context, router_id, agent['id'])
new_agent = self.schedule_router(context, router_id,
candidates=candidates)
if not new_agent:
raise l3agentscheduler.RouterReschedulingFailed(
router_id=router_id)
l3_notifier = self.agent_notifiers.get(constants.AGENT_TYPE_L3)
if l3_notifier:
for agent in cur_agents:
l3_notifier.router_removed_from_agent(
context, router_id, agent['host'])
l3_notifier.router_added_to_agent(
context, [router_id], new_agent.host)
def list_routers_on_l3_agent(self, context, agent_id):
query = context.session.query(RouterL3AgentBinding.router_id)
query = query.filter(RouterL3AgentBinding.l3_agent_id == agent_id)
router_ids = [item[0] for item in query]
if router_ids:
return {'routers':
self.get_routers(context, filters={'id': router_ids})}
else:
return {'routers': []}
def list_active_sync_routers_on_active_l3_agent(
self, context, host, router_ids):
agent = self._get_agent_by_type_and_host(
context, constants.AGENT_TYPE_L3, host)
if not agent.admin_state_up:
return []
query = context.session.query(RouterL3AgentBinding.router_id)
query = query.filter(
RouterL3AgentBinding.l3_agent_id == agent.id)
if router_ids:
query = query.filter(
RouterL3AgentBinding.router_id.in_(router_ids))
router_ids = [item[0] for item in query]
if router_ids:
return self.get_sync_data(context, router_ids=router_ids,
active=True)
else:
return []
def get_l3_agents_hosting_routers(self, context, router_ids,
admin_state_up=None,
active=None):
if not router_ids:
return []
query = context.session.query(RouterL3AgentBinding)
if len(router_ids) > 1:
query = query.options(joinedload('l3_agent')).filter(
RouterL3AgentBinding.router_id.in_(router_ids))
else:
query = query.options(joinedload('l3_agent')).filter(
RouterL3AgentBinding.router_id == router_ids[0])
if admin_state_up is not None:
query = (query.filter(agents_db.Agent.admin_state_up ==
admin_state_up))
l3_agents = [binding.l3_agent for binding in query]
if active is not None:
l3_agents = [l3_agent for l3_agent in
l3_agents if not
agents_db.AgentDbMixin.is_agent_down(
l3_agent['heartbeat_timestamp'])]
return l3_agents
def _get_l3_bindings_hosting_routers(self, context, router_ids):
if not router_ids:
return []
query = context.session.query(RouterL3AgentBinding)
if len(router_ids) > 1:
query = query.options(joinedload('l3_agent')).filter(
RouterL3AgentBinding.router_id.in_(router_ids))
else:
query = query.options(joinedload('l3_agent')).filter(
RouterL3AgentBinding.router_id == router_ids[0])
return query.all()
def list_l3_agents_hosting_router(self, context, router_id):
with context.session.begin(subtransactions=True):
bindings = self._get_l3_bindings_hosting_routers(
context, [router_id])
results = []
for binding in bindings:
l3_agent_dict = self._make_agent_dict(binding.l3_agent)
results.append(l3_agent_dict)
if results:
return {'agents': results}
else:
return {'agents': []}
def get_l3_agents(self, context, active=None, filters=None):
query = context.session.query(agents_db.Agent)
query = query.filter(
agents_db.Agent.agent_type == constants.AGENT_TYPE_L3)
if active is not None:
query = (query.filter(agents_db.Agent.admin_state_up == active))
if filters:
for key, value in filters.iteritems():
column = getattr(agents_db.Agent, key, None)
if column:
query = query.filter(column.in_(value))
return [l3_agent
for l3_agent in query
if agentschedulers_db.AgentSchedulerDbMixin.is_eligible_agent(
active, l3_agent)]
def check_ports_exist_on_l3agent(self, context, l3_agent, router_id,
subnet_id):
"""
This function checks for existence of dvr serviceable
ports on the host, running the input l3agent.
"""
if not subnet_id:
return True
core_plugin = manager.NeutronManager.get_plugin()
filter = {'fixed_ips': {'subnet_id': [subnet_id]}}
ports = core_plugin.get_ports(context, filters=filter)
for port in ports:
if (n_utils.is_dvr_serviced(port['device_owner']) and
l3_agent['host'] == port['binding:host_id']):
return True
return False
def get_snat_candidates(self, sync_router, l3_agents):
"""Get the valid snat enabled l3 agents for the distributed router."""
candidates = []
is_router_distributed = sync_router.get('distributed', False)
if not is_router_distributed:
return candidates
for l3_agent in l3_agents:
if not l3_agent.admin_state_up:
continue
agent_conf = self.get_configuration_dict(l3_agent)
agent_mode = agent_conf.get('agent_mode', 'legacy')
if agent_mode != 'dvr_snat':
continue
router_id = agent_conf.get('router_id', None)
use_namespaces = agent_conf.get('use_namespaces', True)
if not use_namespaces and router_id != sync_router['id']:
continue
handle_internal_only_routers = agent_conf.get(
'handle_internal_only_routers', True)
gateway_external_network_id = agent_conf.get(
'gateway_external_network_id', None)
ex_net_id = (sync_router['external_gateway_info'] or {}).get(
'network_id')
if ((not ex_net_id and not handle_internal_only_routers) or
(ex_net_id and gateway_external_network_id and
ex_net_id != gateway_external_network_id)):
continue
candidates.append(l3_agent)
return candidates
def get_l3_agent_candidates(self, context, sync_router, l3_agents,
subnet_id=None):
"""Get the valid l3 agents for the router from a list of l3_agents."""
candidates = []
for l3_agent in l3_agents:
if not l3_agent.admin_state_up:
continue
agent_conf = self.get_configuration_dict(l3_agent)
router_id = agent_conf.get('router_id', None)
use_namespaces = agent_conf.get('use_namespaces', True)
handle_internal_only_routers = agent_conf.get(
'handle_internal_only_routers', True)
gateway_external_network_id = agent_conf.get(
'gateway_external_network_id', None)
agent_mode = agent_conf.get('agent_mode', 'legacy')
if not use_namespaces and router_id != sync_router['id']:
continue
ex_net_id = (sync_router['external_gateway_info'] or {}).get(
'network_id')
if ((not ex_net_id and not handle_internal_only_routers) or
(ex_net_id and gateway_external_network_id and
ex_net_id != gateway_external_network_id)):
continue
is_router_distributed = sync_router.get('distributed', False)
if agent_mode in ('legacy', 'dvr_snat') and (
not is_router_distributed):
candidates.append(l3_agent)
elif is_router_distributed and agent_mode.startswith('dvr') and (
self.check_ports_exist_on_l3agent(
context, l3_agent, sync_router['id'], subnet_id)):
candidates.append(l3_agent)
return candidates
def auto_schedule_routers(self, context, host, router_ids):
if self.router_scheduler:
return self.router_scheduler.auto_schedule_routers(
self, context, host, router_ids)
def schedule_router(self, context, router, candidates=None, hints=None):
if self.router_scheduler:
return self.router_scheduler.schedule(
self, context, router, candidates=candidates, hints=hints)
def schedule_routers(self, context, routers, hints=None):
"""Schedule the routers to l3 agents."""
for router in routers:
self.schedule_router(context, router, candidates=None, hints=hints)
def get_l3_agent_with_min_routers(self, context, agent_ids):
"""Return l3 agent with the least number of routers."""
query = context.session.query(
agents_db.Agent,
func.count(
RouterL3AgentBinding.router_id
).label('count')).outerjoin(RouterL3AgentBinding).group_by(
RouterL3AgentBinding.l3_agent_id).order_by('count')
res = query.filter(agents_db.Agent.id.in_(agent_ids)).first()
return res[0]