8548f110ac
Commit I5b804e09e630d88d551271d9731cc1f65c065259 changed/removed some of the methods in CommonDbMixin. As a result the way the plugins register the different extend-dict methods has changed, and now uses a decorator. Also those extend-dict callbacks are static methods which do not receive 'self', and this caused some additional changes. Change-Id: If99da0ea1e37792bd531ef92b0bbb880d2b05b8a Depends-on: I5b804e09e630d88d551271d9731cc1f65c065259
689 lines
30 KiB
Python
689 lines
30 KiB
Python
# Copyright 2013 VMware, Inc.
|
|
# All Rights Reserved
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
|
# not use this file except in compliance with the License. You may obtain
|
|
# a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
|
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
|
# License for the specific language governing permissions and limitations
|
|
# under the License.
|
|
|
|
import copy
|
|
import random
|
|
|
|
from neutron_lib import constants
|
|
from neutron_lib import context as n_context
|
|
from neutron_lib import exceptions
|
|
from oslo_log import log
|
|
from oslo_serialization import jsonutils
|
|
from oslo_service import loopingcall
|
|
from oslo_utils import timeutils
|
|
import six
|
|
|
|
from neutron.db import _model_query as model_query
|
|
from neutron.db import api as db_api
|
|
from neutron.db.models import external_net as external_net_db
|
|
from neutron.db.models import l3 as l3_db
|
|
from neutron.db import models_v2
|
|
from neutron.extensions import l3
|
|
|
|
from vmware_nsx._i18n import _
|
|
from vmware_nsx.api_client import exception as api_exc
|
|
from vmware_nsx.common import exceptions as nsx_exc
|
|
from vmware_nsx.common import nsx_utils
|
|
from vmware_nsx.nsxlib import mh as nsxlib
|
|
from vmware_nsx.nsxlib.mh import router as routerlib
|
|
from vmware_nsx.nsxlib.mh import switch as switchlib
|
|
|
|
# Maximum page size for a single request
|
|
# NOTE(salv-orlando): This might become a version-dependent map should the
|
|
# limit be raised in future versions
|
|
MAX_PAGE_SIZE = 5000
|
|
|
|
LOG = log.getLogger(__name__)
|
|
|
|
|
|
class NsxCache(object):
|
|
"""A simple Cache for NSX resources.
|
|
|
|
Associates resource id with resource hash to rapidly identify
|
|
updated resources.
|
|
Each entry in the cache also stores the following information:
|
|
- changed: the resource in the cache has been altered following
|
|
an update or a delete
|
|
- hit: the resource has been visited during an update (and possibly
|
|
left unchanged)
|
|
- data: current resource data
|
|
- data_bk: backup of resource data prior to its removal
|
|
"""
|
|
|
|
def __init__(self):
|
|
# Maps an uuid to the dict containing it
|
|
self._uuid_dict_mappings = {}
|
|
# Dicts for NSX cached resources
|
|
self._lswitches = {}
|
|
self._lswitchports = {}
|
|
self._lrouters = {}
|
|
|
|
def __getitem__(self, key):
|
|
# uuids are unique across the various types of resources
|
|
# TODO(salv-orlando): Avoid lookups over all dictionaries
|
|
# when retrieving items
|
|
# Fetch lswitches, lports, or lrouters
|
|
resources = self._uuid_dict_mappings[key]
|
|
return resources[key]
|
|
|
|
def _clear_changed_flag_and_remove_from_cache(self, resources):
|
|
# Clear the 'changed' attribute for all items
|
|
# NOTE(arosen): the copy.copy is to avoid: 'RuntimeError:
|
|
# dictionary changed size during iteration' for py3
|
|
|
|
for uuid, item in copy.copy(resources).items():
|
|
if item.pop('changed', None) and not item.get('data'):
|
|
# The item is not anymore in NSX, so delete it
|
|
del resources[uuid]
|
|
del self._uuid_dict_mappings[uuid]
|
|
LOG.debug("Removed item %s from NSX object cache", uuid)
|
|
|
|
def _update_resources(self, resources, new_resources, clear_changed=True):
|
|
if clear_changed:
|
|
self._clear_changed_flag_and_remove_from_cache(resources)
|
|
|
|
def do_hash(item):
|
|
return hash(jsonutils.dumps(item))
|
|
|
|
# Parse new data and identify new, deleted, and updated resources
|
|
for item in new_resources:
|
|
item_id = item['uuid']
|
|
if resources.get(item_id):
|
|
new_hash = do_hash(item)
|
|
if new_hash != resources[item_id]['hash']:
|
|
resources[item_id]['hash'] = new_hash
|
|
resources[item_id]['changed'] = True
|
|
resources[item_id]['data_bk'] = (
|
|
resources[item_id]['data'])
|
|
resources[item_id]['data'] = item
|
|
# Mark the item as hit in any case
|
|
resources[item_id]['hit'] = True
|
|
LOG.debug("Updating item %s in NSX object cache", item_id)
|
|
else:
|
|
resources[item_id] = {'hash': do_hash(item)}
|
|
resources[item_id]['hit'] = True
|
|
resources[item_id]['changed'] = True
|
|
resources[item_id]['data'] = item
|
|
# add an uuid to dict mapping for easy retrieval
|
|
# with __getitem__
|
|
self._uuid_dict_mappings[item_id] = resources
|
|
LOG.debug("Added item %s to NSX object cache", item_id)
|
|
|
|
def _delete_resources(self, resources):
|
|
# Mark for removal all the elements which have not been visited.
|
|
# And clear the 'hit' attribute.
|
|
for to_delete in [k for (k, v) in six.iteritems(resources)
|
|
if not v.pop('hit', False)]:
|
|
resources[to_delete]['changed'] = True
|
|
resources[to_delete]['data_bk'] = (
|
|
resources[to_delete].pop('data', None))
|
|
|
|
def _get_resource_ids(self, resources, changed_only):
|
|
if changed_only:
|
|
return [k for (k, v) in six.iteritems(resources)
|
|
if v.get('changed')]
|
|
return resources.keys()
|
|
|
|
def get_lswitches(self, changed_only=False):
|
|
return self._get_resource_ids(self._lswitches, changed_only)
|
|
|
|
def get_lrouters(self, changed_only=False):
|
|
return self._get_resource_ids(self._lrouters, changed_only)
|
|
|
|
def get_lswitchports(self, changed_only=False):
|
|
return self._get_resource_ids(self._lswitchports, changed_only)
|
|
|
|
def update_lswitch(self, lswitch):
|
|
self._update_resources(self._lswitches, [lswitch], clear_changed=False)
|
|
|
|
def update_lrouter(self, lrouter):
|
|
self._update_resources(self._lrouters, [lrouter], clear_changed=False)
|
|
|
|
def update_lswitchport(self, lswitchport):
|
|
self._update_resources(self._lswitchports, [lswitchport],
|
|
clear_changed=False)
|
|
|
|
def process_updates(self, lswitches=None,
|
|
lrouters=None, lswitchports=None):
|
|
self._update_resources(self._lswitches, lswitches)
|
|
self._update_resources(self._lrouters, lrouters)
|
|
self._update_resources(self._lswitchports, lswitchports)
|
|
return (self._get_resource_ids(self._lswitches, changed_only=True),
|
|
self._get_resource_ids(self._lrouters, changed_only=True),
|
|
self._get_resource_ids(self._lswitchports, changed_only=True))
|
|
|
|
def process_deletes(self):
|
|
self._delete_resources(self._lswitches)
|
|
self._delete_resources(self._lrouters)
|
|
self._delete_resources(self._lswitchports)
|
|
return (self._get_resource_ids(self._lswitches, changed_only=True),
|
|
self._get_resource_ids(self._lrouters, changed_only=True),
|
|
self._get_resource_ids(self._lswitchports, changed_only=True))
|
|
|
|
|
|
class SyncParameters(object):
|
|
"""Defines attributes used by the synchronization procedure.
|
|
|
|
chunk_size: Actual chunk size
|
|
extra_chunk_size: Additional data to fetch because of chunk size
|
|
adjustment
|
|
current_chunk: Counter of the current data chunk being synchronized
|
|
Page cursors: markers for the next resource to fetch.
|
|
'start' means page cursor unset for fetching 1st page
|
|
init_sync_performed: True if the initial synchronization concluded
|
|
"""
|
|
|
|
def __init__(self, min_chunk_size):
|
|
self.chunk_size = min_chunk_size
|
|
self.extra_chunk_size = 0
|
|
self.current_chunk = 0
|
|
self.ls_cursor = 'start'
|
|
self.lr_cursor = 'start'
|
|
self.lp_cursor = 'start'
|
|
self.init_sync_performed = False
|
|
self.total_size = 0
|
|
|
|
|
|
def _start_loopingcall(min_chunk_size, state_sync_interval, func,
|
|
initial_delay=5):
|
|
"""Start a loopingcall for the synchronization task."""
|
|
# Start a looping call to synchronize operational status
|
|
# for neutron resources
|
|
if not state_sync_interval:
|
|
# do not start the looping call if specified
|
|
# sync interval is 0
|
|
return
|
|
state_synchronizer = loopingcall.DynamicLoopingCall(
|
|
func, sp=SyncParameters(min_chunk_size))
|
|
state_synchronizer.start(
|
|
initial_delay=initial_delay,
|
|
periodic_interval_max=state_sync_interval)
|
|
return state_synchronizer
|
|
|
|
|
|
class NsxSynchronizer(object):
|
|
|
|
LS_URI = nsxlib._build_uri_path(
|
|
switchlib.LSWITCH_RESOURCE, fields='uuid,tags,fabric_status',
|
|
relations='LogicalSwitchStatus')
|
|
LR_URI = nsxlib._build_uri_path(
|
|
routerlib.LROUTER_RESOURCE, fields='uuid,tags,fabric_status',
|
|
relations='LogicalRouterStatus')
|
|
LP_URI = nsxlib._build_uri_path(
|
|
switchlib.LSWITCHPORT_RESOURCE,
|
|
parent_resource_id='*',
|
|
fields='uuid,tags,fabric_status_up',
|
|
relations='LogicalPortStatus')
|
|
|
|
def __init__(self, plugin, cluster, state_sync_interval,
|
|
req_delay, min_chunk_size, max_rand_delay=0,
|
|
initial_delay=5):
|
|
random.seed()
|
|
self._nsx_cache = NsxCache()
|
|
# Store parameters as instance members
|
|
# NOTE(salv-orlando): apologies if it looks java-ish
|
|
self._plugin = plugin
|
|
self._cluster = cluster
|
|
self._req_delay = req_delay
|
|
self._sync_interval = state_sync_interval
|
|
self._max_rand_delay = max_rand_delay
|
|
# Validate parameters
|
|
if self._sync_interval < self._req_delay:
|
|
err_msg = (_("Minimum request delay:%(req_delay)s must not "
|
|
"exceed synchronization interval:%(sync_interval)s") %
|
|
{'req_delay': self._req_delay,
|
|
'sync_interval': self._sync_interval})
|
|
LOG.error(err_msg)
|
|
raise nsx_exc.NsxPluginException(err_msg=err_msg)
|
|
# Backoff time in case of failures while fetching sync data
|
|
self._sync_backoff = 1
|
|
# Store the looping call in an instance variable to allow unit tests
|
|
# for controlling its lifecycle
|
|
self._sync_looping_call = _start_loopingcall(
|
|
min_chunk_size, state_sync_interval,
|
|
self._synchronize_state, initial_delay=initial_delay)
|
|
|
|
def _get_tag_dict(self, tags):
|
|
return dict((tag.get('scope'), tag['tag']) for tag in tags)
|
|
|
|
def synchronize_network(self, context, neutron_network_data,
|
|
lswitches=None):
|
|
"""Synchronize a Neutron network with its NSX counterpart.
|
|
|
|
This routine synchronizes a set of switches when a Neutron
|
|
network is mapped to multiple lswitches.
|
|
"""
|
|
if not lswitches:
|
|
# Try to get logical switches from nsx
|
|
try:
|
|
lswitches = nsx_utils.fetch_nsx_switches(
|
|
context.session, self._cluster,
|
|
neutron_network_data['id'])
|
|
except exceptions.NetworkNotFound:
|
|
# TODO(salv-orlando): We should be catching
|
|
# api_exc.ResourceNotFound here
|
|
# The logical switch was not found
|
|
LOG.warning("Logical switch for neutron network %s not "
|
|
"found on NSX.", neutron_network_data['id'])
|
|
lswitches = []
|
|
else:
|
|
for lswitch in lswitches:
|
|
self._nsx_cache.update_lswitch(lswitch)
|
|
# By default assume things go wrong
|
|
status = constants.NET_STATUS_ERROR
|
|
# In most cases lswitches will contain a single element
|
|
for ls in lswitches:
|
|
if not ls:
|
|
# Logical switch was deleted
|
|
break
|
|
ls_status = ls['_relations']['LogicalSwitchStatus']
|
|
if not ls_status['fabric_status']:
|
|
status = constants.NET_STATUS_DOWN
|
|
break
|
|
else:
|
|
# No switch was down or missing. Set status to ACTIVE unless
|
|
# there were no switches in the first place!
|
|
if lswitches:
|
|
status = constants.NET_STATUS_ACTIVE
|
|
# Update db object
|
|
if status == neutron_network_data['status']:
|
|
# do nothing
|
|
return
|
|
|
|
with db_api.context_manager.writer.using(context):
|
|
try:
|
|
network = self._plugin._get_network(context,
|
|
neutron_network_data['id'])
|
|
except exceptions.NetworkNotFound:
|
|
pass
|
|
else:
|
|
network.status = status
|
|
LOG.debug("Updating status for neutron resource %(q_id)s to:"
|
|
" %(status)s",
|
|
{'q_id': neutron_network_data['id'],
|
|
'status': status})
|
|
|
|
def _synchronize_lswitches(self, ctx, ls_uuids, scan_missing=False):
|
|
if not ls_uuids and not scan_missing:
|
|
return
|
|
neutron_net_ids = set()
|
|
neutron_nsx_mappings = {}
|
|
# TODO(salvatore-orlando): Deal with the case the tag
|
|
# has been tampered with
|
|
for ls_uuid in ls_uuids:
|
|
# If the lswitch has been deleted, get backup copy of data
|
|
lswitch = (self._nsx_cache[ls_uuid].get('data') or
|
|
self._nsx_cache[ls_uuid].get('data_bk'))
|
|
tags = self._get_tag_dict(lswitch['tags'])
|
|
neutron_id = tags.get('quantum_net_id')
|
|
neutron_net_ids.add(neutron_id)
|
|
neutron_nsx_mappings[neutron_id] = (
|
|
neutron_nsx_mappings.get(neutron_id, []) +
|
|
[self._nsx_cache[ls_uuid]])
|
|
# Fetch neutron networks from database
|
|
filters = {'router:external': [False]}
|
|
if not scan_missing:
|
|
filters['id'] = neutron_net_ids
|
|
|
|
networks = model_query.get_collection(
|
|
ctx, models_v2.Network, self._plugin._make_network_dict,
|
|
filters=filters)
|
|
|
|
for network in networks:
|
|
lswitches = neutron_nsx_mappings.get(network['id'], [])
|
|
lswitches = [lsw.get('data') for lsw in lswitches]
|
|
self.synchronize_network(ctx, network, lswitches)
|
|
|
|
def synchronize_router(self, context, neutron_router_data,
|
|
lrouter=None):
|
|
"""Synchronize a neutron router with its NSX counterpart."""
|
|
if not lrouter:
|
|
# Try to get router from nsx
|
|
try:
|
|
# This query will return the logical router status too
|
|
nsx_router_id = nsx_utils.get_nsx_router_id(
|
|
context.session, self._cluster, neutron_router_data['id'])
|
|
if nsx_router_id:
|
|
lrouter = routerlib.get_lrouter(
|
|
self._cluster, nsx_router_id)
|
|
except exceptions.NotFound:
|
|
# NOTE(salv-orlando): We should be catching
|
|
# api_exc.ResourceNotFound here
|
|
# The logical router was not found
|
|
LOG.warning("Logical router for neutron router %s not "
|
|
"found on NSX.", neutron_router_data['id'])
|
|
if lrouter:
|
|
# Update the cache
|
|
self._nsx_cache.update_lrouter(lrouter)
|
|
|
|
# Note(salv-orlando): It might worth adding a check to verify neutron
|
|
# resource tag in nsx entity matches a Neutron id.
|
|
# By default assume things go wrong
|
|
status = constants.NET_STATUS_ERROR
|
|
if lrouter:
|
|
lr_status = (lrouter['_relations']
|
|
['LogicalRouterStatus']
|
|
['fabric_status'])
|
|
status = (lr_status and
|
|
constants.NET_STATUS_ACTIVE
|
|
or constants.NET_STATUS_DOWN)
|
|
# Update db object
|
|
if status == neutron_router_data['status']:
|
|
# do nothing
|
|
return
|
|
|
|
with db_api.context_manager.writer.using(context):
|
|
try:
|
|
router = self._plugin._get_router(context,
|
|
neutron_router_data['id'])
|
|
except l3.RouterNotFound:
|
|
pass
|
|
else:
|
|
router.status = status
|
|
LOG.debug("Updating status for neutron resource %(q_id)s to:"
|
|
" %(status)s",
|
|
{'q_id': neutron_router_data['id'],
|
|
'status': status})
|
|
|
|
def _synchronize_lrouters(self, ctx, lr_uuids, scan_missing=False):
|
|
if not lr_uuids and not scan_missing:
|
|
return
|
|
# TODO(salvatore-orlando): Deal with the case the tag
|
|
# has been tampered with
|
|
neutron_router_mappings = {}
|
|
for lr_uuid in lr_uuids:
|
|
lrouter = (self._nsx_cache[lr_uuid].get('data') or
|
|
self._nsx_cache[lr_uuid].get('data_bk'))
|
|
tags = self._get_tag_dict(lrouter['tags'])
|
|
neutron_router_id = tags.get('q_router_id')
|
|
if neutron_router_id:
|
|
neutron_router_mappings[neutron_router_id] = (
|
|
self._nsx_cache[lr_uuid])
|
|
else:
|
|
LOG.warning("Unable to find Neutron router id for "
|
|
"NSX logical router: %s", lr_uuid)
|
|
# Fetch neutron routers from database
|
|
filters = ({} if scan_missing else
|
|
{'id': neutron_router_mappings.keys()})
|
|
routers = model_query.get_collection(
|
|
ctx, l3_db.Router, self._plugin._make_router_dict,
|
|
filters=filters)
|
|
for router in routers:
|
|
lrouter = neutron_router_mappings.get(router['id'])
|
|
self.synchronize_router(
|
|
ctx, router, lrouter and lrouter.get('data'))
|
|
|
|
def synchronize_port(self, context, neutron_port_data,
|
|
lswitchport=None, ext_networks=None):
|
|
"""Synchronize a Neutron port with its NSX counterpart."""
|
|
# Skip synchronization for ports on external networks
|
|
if not ext_networks:
|
|
ext_networks = [net['id'] for net in context.session.query(
|
|
models_v2.Network).join(
|
|
external_net_db.ExternalNetwork,
|
|
(models_v2.Network.id ==
|
|
external_net_db.ExternalNetwork.network_id))]
|
|
if neutron_port_data['network_id'] in ext_networks:
|
|
with db_api.context_manager.writer.using(context):
|
|
neutron_port_data['status'] = constants.PORT_STATUS_ACTIVE
|
|
return
|
|
|
|
if not lswitchport:
|
|
# Try to get port from nsx
|
|
try:
|
|
ls_uuid, lp_uuid = nsx_utils.get_nsx_switch_and_port_id(
|
|
context.session, self._cluster, neutron_port_data['id'])
|
|
if lp_uuid:
|
|
lswitchport = switchlib.get_port(
|
|
self._cluster, ls_uuid, lp_uuid,
|
|
relations='LogicalPortStatus')
|
|
except (exceptions.PortNotFoundOnNetwork):
|
|
# NOTE(salv-orlando): We should be catching
|
|
# api_exc.ResourceNotFound here instead
|
|
# of PortNotFoundOnNetwork when the id exists but
|
|
# the logical switch port was not found
|
|
LOG.warning("Logical switch port for neutron port %s "
|
|
"not found on NSX.", neutron_port_data['id'])
|
|
lswitchport = None
|
|
else:
|
|
# If lswitchport is not None, update the cache.
|
|
# It could be none if the port was deleted from the backend
|
|
if lswitchport:
|
|
self._nsx_cache.update_lswitchport(lswitchport)
|
|
# Note(salv-orlando): It might worth adding a check to verify neutron
|
|
# resource tag in nsx entity matches Neutron id.
|
|
# By default assume things go wrong
|
|
status = constants.PORT_STATUS_ERROR
|
|
if lswitchport:
|
|
lp_status = (lswitchport['_relations']
|
|
['LogicalPortStatus']
|
|
['fabric_status_up'])
|
|
status = (lp_status and
|
|
constants.PORT_STATUS_ACTIVE
|
|
or constants.PORT_STATUS_DOWN)
|
|
|
|
# Update db object
|
|
if status == neutron_port_data['status']:
|
|
# do nothing
|
|
return
|
|
|
|
with db_api.context_manager.writer.using(context):
|
|
try:
|
|
port = self._plugin._get_port(context,
|
|
neutron_port_data['id'])
|
|
except exceptions.PortNotFound:
|
|
pass
|
|
else:
|
|
port.status = status
|
|
LOG.debug("Updating status for neutron resource %(q_id)s to:"
|
|
" %(status)s",
|
|
{'q_id': neutron_port_data['id'],
|
|
'status': status})
|
|
|
|
def _synchronize_lswitchports(self, ctx, lp_uuids, scan_missing=False):
|
|
if not lp_uuids and not scan_missing:
|
|
return
|
|
# Find Neutron port id by tag - the tag is already
|
|
# loaded in memory, no reason for doing a db query
|
|
# TODO(salvatore-orlando): Deal with the case the tag
|
|
# has been tampered with
|
|
neutron_port_mappings = {}
|
|
for lp_uuid in lp_uuids:
|
|
lport = (self._nsx_cache[lp_uuid].get('data') or
|
|
self._nsx_cache[lp_uuid].get('data_bk'))
|
|
tags = self._get_tag_dict(lport['tags'])
|
|
neutron_port_id = tags.get('q_port_id')
|
|
if neutron_port_id:
|
|
neutron_port_mappings[neutron_port_id] = (
|
|
self._nsx_cache[lp_uuid])
|
|
# Fetch neutron ports from database
|
|
# At the first sync we need to fetch all ports
|
|
filters = ({} if scan_missing else
|
|
{'id': neutron_port_mappings.keys()})
|
|
# TODO(salv-orlando): Work out a solution for avoiding
|
|
# this query
|
|
ext_nets = [net['id'] for net in ctx.session.query(
|
|
models_v2.Network).join(
|
|
external_net_db.ExternalNetwork,
|
|
(models_v2.Network.id ==
|
|
external_net_db.ExternalNetwork.network_id))]
|
|
ports = model_query.get_collection(
|
|
ctx, models_v2.Port, self._plugin._make_port_dict,
|
|
filters=filters)
|
|
for port in ports:
|
|
lswitchport = neutron_port_mappings.get(port['id'])
|
|
self.synchronize_port(
|
|
ctx, port, lswitchport and lswitchport.get('data'),
|
|
ext_networks=ext_nets)
|
|
|
|
def _get_chunk_size(self, sp):
|
|
# NOTE(salv-orlando): Try to use __future__ for this routine only?
|
|
ratio = ((float(sp.total_size) / float(sp.chunk_size)) /
|
|
(float(self._sync_interval) / float(self._req_delay)))
|
|
new_size = max(1.0, ratio) * float(sp.chunk_size)
|
|
return int(new_size) + (new_size - int(new_size) > 0)
|
|
|
|
def _fetch_data(self, uri, cursor, page_size):
|
|
# If not cursor there is nothing to retrieve
|
|
if cursor:
|
|
if cursor == 'start':
|
|
cursor = None
|
|
# Chunk size tuning might, in some conditions, make it larger
|
|
# than 5,000, which is the maximum page size allowed by the NSX
|
|
# API. In this case the request should be split in multiple
|
|
# requests. This is not ideal, and therefore a log warning will
|
|
# be emitted.
|
|
num_requests = page_size // (MAX_PAGE_SIZE + 1) + 1
|
|
if num_requests > 1:
|
|
LOG.warning("Requested page size is %(cur_chunk_size)d. "
|
|
"It might be necessary to do %(num_requests)d "
|
|
"round-trips to NSX for fetching data. Please "
|
|
"tune sync parameters to ensure chunk size "
|
|
"is less than %(max_page_size)d",
|
|
{'cur_chunk_size': page_size,
|
|
'num_requests': num_requests,
|
|
'max_page_size': MAX_PAGE_SIZE})
|
|
# Only the first request might return the total size,
|
|
# subsequent requests will definitely not
|
|
results, cursor, total_size = nsxlib.get_single_query_page(
|
|
uri, self._cluster, cursor,
|
|
min(page_size, MAX_PAGE_SIZE))
|
|
for _req in range(num_requests - 1):
|
|
# If no cursor is returned break the cycle as there is no
|
|
# actual need to perform multiple requests (all fetched)
|
|
# This happens when the overall size of resources exceeds
|
|
# the maximum page size, but the number for each single
|
|
# resource type is below this threshold
|
|
if not cursor:
|
|
break
|
|
req_results, cursor = nsxlib.get_single_query_page(
|
|
uri, self._cluster, cursor,
|
|
min(page_size, MAX_PAGE_SIZE))[:2]
|
|
results.extend(req_results)
|
|
# reset cursor before returning if we queried just to
|
|
# know the number of entities
|
|
return results, cursor if page_size else 'start', total_size
|
|
return [], cursor, None
|
|
|
|
def _fetch_nsx_data_chunk(self, sp):
|
|
base_chunk_size = sp.chunk_size
|
|
chunk_size = base_chunk_size + sp.extra_chunk_size
|
|
LOG.info("Fetching up to %s resources "
|
|
"from NSX backend", chunk_size)
|
|
fetched = ls_count = lr_count = lp_count = 0
|
|
lswitches = lrouters = lswitchports = []
|
|
if sp.ls_cursor or sp.ls_cursor == 'start':
|
|
(lswitches, sp.ls_cursor, ls_count) = self._fetch_data(
|
|
self.LS_URI, sp.ls_cursor, chunk_size)
|
|
fetched = len(lswitches)
|
|
if fetched < chunk_size and sp.lr_cursor or sp.lr_cursor == 'start':
|
|
(lrouters, sp.lr_cursor, lr_count) = self._fetch_data(
|
|
self.LR_URI, sp.lr_cursor, max(chunk_size - fetched, 0))
|
|
fetched += len(lrouters)
|
|
if fetched < chunk_size and sp.lp_cursor or sp.lp_cursor == 'start':
|
|
(lswitchports, sp.lp_cursor, lp_count) = self._fetch_data(
|
|
self.LP_URI, sp.lp_cursor, max(chunk_size - fetched, 0))
|
|
fetched += len(lswitchports)
|
|
if sp.current_chunk == 0:
|
|
# No cursors were provided. Then it must be possible to
|
|
# calculate the total amount of data to fetch
|
|
sp.total_size = ls_count + lr_count + lp_count
|
|
LOG.debug("Total data size: %d", sp.total_size)
|
|
sp.chunk_size = self._get_chunk_size(sp)
|
|
# Calculate chunk size adjustment
|
|
sp.extra_chunk_size = sp.chunk_size - base_chunk_size
|
|
LOG.debug("Fetched %(num_lswitches)d logical switches, "
|
|
"%(num_lswitchports)d logical switch ports,"
|
|
"%(num_lrouters)d logical routers",
|
|
{'num_lswitches': len(lswitches),
|
|
'num_lswitchports': len(lswitchports),
|
|
'num_lrouters': len(lrouters)})
|
|
return (lswitches, lrouters, lswitchports)
|
|
|
|
def _synchronize_state(self, sp):
|
|
# If the plugin has been destroyed, stop the LoopingCall
|
|
if not self._plugin:
|
|
raise loopingcall.LoopingCallDone()
|
|
start = timeutils.utcnow()
|
|
# Reset page cursor variables if necessary
|
|
if sp.current_chunk == 0:
|
|
sp.ls_cursor = sp.lr_cursor = sp.lp_cursor = 'start'
|
|
LOG.info("Running state synchronization task. Chunk: %s",
|
|
sp.current_chunk)
|
|
# Fetch chunk_size data from NSX
|
|
try:
|
|
(lswitches, lrouters, lswitchports) = (
|
|
self._fetch_nsx_data_chunk(sp))
|
|
except (api_exc.RequestTimeout, api_exc.NsxApiException):
|
|
sleep_interval = self._sync_backoff
|
|
# Cap max back off to 64 seconds
|
|
self._sync_backoff = min(self._sync_backoff * 2, 64)
|
|
LOG.exception("An error occurred while communicating with "
|
|
"NSX backend. Will retry synchronization "
|
|
"in %d seconds", sleep_interval)
|
|
return sleep_interval
|
|
LOG.debug("Time elapsed querying NSX: %s",
|
|
timeutils.utcnow() - start)
|
|
if sp.total_size:
|
|
num_chunks = ((sp.total_size / sp.chunk_size) +
|
|
(sp.total_size % sp.chunk_size != 0))
|
|
else:
|
|
num_chunks = 1
|
|
LOG.debug("Number of chunks: %d", num_chunks)
|
|
# Find objects which have changed on NSX side and need
|
|
# to be synchronized
|
|
LOG.debug("Processing NSX cache for updated objects")
|
|
(ls_uuids, lr_uuids, lp_uuids) = self._nsx_cache.process_updates(
|
|
lswitches, lrouters, lswitchports)
|
|
# Process removed objects only at the last chunk
|
|
scan_missing = (sp.current_chunk == num_chunks - 1 and
|
|
not sp.init_sync_performed)
|
|
if sp.current_chunk == num_chunks - 1:
|
|
LOG.debug("Processing NSX cache for deleted objects")
|
|
self._nsx_cache.process_deletes()
|
|
ls_uuids = self._nsx_cache.get_lswitches(
|
|
changed_only=not scan_missing)
|
|
lr_uuids = self._nsx_cache.get_lrouters(
|
|
changed_only=not scan_missing)
|
|
lp_uuids = self._nsx_cache.get_lswitchports(
|
|
changed_only=not scan_missing)
|
|
LOG.debug("Time elapsed hashing data: %s",
|
|
timeutils.utcnow() - start)
|
|
# Get an admin context
|
|
ctx = n_context.get_admin_context()
|
|
# Synchronize with database
|
|
self._synchronize_lswitches(ctx, ls_uuids,
|
|
scan_missing=scan_missing)
|
|
self._synchronize_lrouters(ctx, lr_uuids,
|
|
scan_missing=scan_missing)
|
|
self._synchronize_lswitchports(ctx, lp_uuids,
|
|
scan_missing=scan_missing)
|
|
# Increase chunk counter
|
|
LOG.info("Synchronization for chunk %(chunk_num)d of "
|
|
"%(total_chunks)d performed",
|
|
{'chunk_num': sp.current_chunk + 1,
|
|
'total_chunks': num_chunks})
|
|
sp.current_chunk = (sp.current_chunk + 1) % num_chunks
|
|
added_delay = 0
|
|
if sp.current_chunk == 0:
|
|
# Ensure init_sync_performed is True
|
|
if not sp.init_sync_performed:
|
|
sp.init_sync_performed = True
|
|
# Add additional random delay
|
|
added_delay = random.randint(0, self._max_rand_delay)
|
|
LOG.debug("Time elapsed at end of sync: %s",
|
|
timeutils.utcnow() - start)
|
|
return self._sync_interval / num_chunks + added_delay
|