NSXv: admin util metadata breakage recovery
Due to neutron bug, some metadata components in the various backend Edge appliances are missing. The patch is supposed to address these issues. Admin util command can run per Edge, per AZ or for the whole cloud. Cases handled by the utility: - Existing metadata proxies' internal IP is different than the IPs which are defined in the Edge's loadbalancer object. This case can happen when the metadata proxies are recreated for some reason. - Edge appliance is lacking the metadata network connectivity, and the loadbalancer objects. This case can happen while a router or a DHCP was created by the Neutron parent process, which failed to initialize with metadata due to a bug. - The Edge is missing the metadata firewall rules. This case can happen while the first interface attachment to the router was done in the Neutron parent process context due to the bug described above. Command syntax: Update AZ: nsxadmin -r metadata -o nsx-update --property az-name=az123 Update single Edge appliance: nsxadmin -r metadata -o nsx-update --property edge-id=edge-15 Update entire cloud: nsxadmin -r metadata -o nsx-update Change-Id: I77de9e0a0c627e43d3b1c95573d151e0414a34a9
This commit is contained in:
parent
586f4d0f1d
commit
0e97278c8a
@ -287,10 +287,18 @@ Security Groups, Firewall and Spoofguard
|
||||
Metadata
|
||||
~~~~~~~~
|
||||
|
||||
- Update loadbalancer members on router and DHCP edges::
|
||||
- Update metadata infrastructure on all router and DHCP edges::
|
||||
|
||||
nsxadmin -r metadata -o nsx-update
|
||||
|
||||
- Update metadata infrastructure on availability zone's router and DHCP edges::
|
||||
|
||||
nsxadmin -r metadata -o nsx-update --property az-name=az123
|
||||
|
||||
- Update metadata infrastructure on specific router or DHCP edge::
|
||||
|
||||
nsxadmin -r metadata -o nsx-update --property edge-id=edge-15
|
||||
|
||||
- Update shared secret on router and DHCP edges::
|
||||
|
||||
nsxadmin -r metadata -o nsx-update-secret
|
||||
|
@ -29,6 +29,7 @@ from vmware_nsx.plugins.nsx_v import availability_zones as nsx_az
|
||||
from vmware_nsx.plugins.nsx_v import md_proxy
|
||||
from vmware_nsx.plugins.nsx_v.vshield.common import constants as vcns_constants
|
||||
from vmware_nsx.plugins.nsx_v.vshield import nsxv_loadbalancer as nsxv_lb
|
||||
from vmware_nsx.services.lbaas.nsx_v import lbaas_common as lb_common
|
||||
from vmware_nsx.shell.admin.plugins.common import constants
|
||||
from vmware_nsx.shell.admin.plugins.common import formatters
|
||||
from vmware_nsx.shell.admin.plugins.common import utils as admin_utils
|
||||
@ -36,100 +37,273 @@ from vmware_nsx.shell.admin.plugins.nsxv.resources import utils as utils
|
||||
from vmware_nsx.shell import resources as shell
|
||||
|
||||
|
||||
NSXV_MD_RULES = [
|
||||
{'name': 'MDServiceIP',
|
||||
'destination': {'ipAddress': ['169.254.169.254']},
|
||||
'enabled': True,
|
||||
'application': {'service': [{'protocol': 'tcp',
|
||||
'port': [80, 443, 8775]}]},
|
||||
'action': 'accept',
|
||||
'ruleTag': None},
|
||||
{'name': 'MDInterEdgeNet',
|
||||
'destination': {'ipAddress': ['169.254.128.0/17']},
|
||||
'enabled': True,
|
||||
'action': 'deny',
|
||||
'ruleTag': None}]
|
||||
|
||||
LOG = logging.getLogger(__name__)
|
||||
nsxv = utils.get_nsxv_client()
|
||||
|
||||
|
||||
def _append_md_fw_rules(fw_rules):
|
||||
# Set FW rules tags
|
||||
NSXV_MD_RULES[0]['ruleTag'] = len(fw_rules) + 1
|
||||
NSXV_MD_RULES[1]['ruleTag'] = len(fw_rules) + 2
|
||||
fw_rules += NSXV_MD_RULES
|
||||
return fw_rules
|
||||
|
||||
|
||||
def _handle_edge_firewall_rules(edge_id):
|
||||
try:
|
||||
h, fw_cfg = nsxv.get_firewall(edge_id)
|
||||
except Exception as e:
|
||||
fw_cfg = {}
|
||||
LOG.error("Failed to retrieve firewall config for edge %(edge)s "
|
||||
"with exception %(e)s", {'edge': edge_id, 'e': e})
|
||||
do_update = True
|
||||
fw_rules = fw_cfg.get('firewallRules', {}).get('firewallRules', [])
|
||||
for rule in fw_rules:
|
||||
if rule['name'] in ['MDInterEdgeNet', 'MDServiceIP']:
|
||||
do_update = False
|
||||
break
|
||||
if do_update:
|
||||
fw_rules = _append_md_fw_rules(fw_rules)
|
||||
fw_cfg['firewallRules']['firewallRules'] = fw_rules
|
||||
try:
|
||||
nsxv.update_firewall(edge_id, fw_cfg)
|
||||
LOG.info('Added missing firewall rules for edge %s', edge_id)
|
||||
except Exception as e:
|
||||
LOG.warning("Failed to update firewall config for edge "
|
||||
"%(edge)s with exception %(e)s",
|
||||
{'edge': edge_id, 'e': e})
|
||||
|
||||
|
||||
def _recreate_rtr_metadata_cfg(context, plugin, az_name, edge_id):
|
||||
rtr_binding = nsxv_db.get_nsxv_router_binding_by_edge(
|
||||
context.session, edge_id)
|
||||
md_handler = plugin.metadata_proxy_handler[az_name]
|
||||
if md_handler:
|
||||
try:
|
||||
md_handler.configure_router_edge(
|
||||
context, rtr_binding['router_id'])
|
||||
LOG.info('Added metadata components for edge %s',
|
||||
edge_id)
|
||||
except Exception as e:
|
||||
LOG.error('Recreation of metadata components for edge '
|
||||
'%(edge)s failed with error %(e)s',
|
||||
{'edge': edge_id, 'e': e})
|
||||
|
||||
|
||||
def _update_md_lb_members(edge_id, edge_internal_ips, lb, pool):
|
||||
LOG.info('Updating metadata members for edge %s', edge_id)
|
||||
pool.members = {}
|
||||
|
||||
i = 0
|
||||
s_port = cfg.CONF.nsxv.nova_metadata_port
|
||||
for member_ip in edge_internal_ips:
|
||||
i += 1
|
||||
member = nsxv_lb.NsxvLBPoolMember(
|
||||
name='Member-%d' % i,
|
||||
ip_address=member_ip,
|
||||
port=s_port,
|
||||
monitor_port=s_port)
|
||||
pool.add_member(member)
|
||||
|
||||
try:
|
||||
lb.submit_to_backend(nsxv, edge_id)
|
||||
LOG.info('Updated members for %s', edge_id)
|
||||
except Exception as e:
|
||||
LOG.error('Updating members for %(edge)s failed with '
|
||||
'error %(e)s', {'edge': edge_id, 'e': e})
|
||||
|
||||
|
||||
def _get_internal_edge_ips(context, az_name):
|
||||
# Get the list of internal networks for this AZ
|
||||
db_net = nsxv_db.get_nsxv_internal_network_for_az(
|
||||
context.session,
|
||||
vcns_constants.InternalEdgePurposes.INTER_EDGE_PURPOSE,
|
||||
az_name)
|
||||
|
||||
internal_net = None
|
||||
internal_subnet = None
|
||||
if db_net:
|
||||
internal_net = db_net['network_id']
|
||||
internal_subnet = context.session.query(
|
||||
models_v2.Subnet).filter_by(
|
||||
network_id=internal_net).first().get('id')
|
||||
|
||||
# Get the list of internal edges for this AZ
|
||||
edge_list = nsxv_db.get_nsxv_internal_edges_by_purpose(
|
||||
context.session,
|
||||
vcns_constants.InternalEdgePurposes.INTER_EDGE_PURPOSE)
|
||||
edge_az_list = [edge for edge in edge_list if
|
||||
nsxv_db.get_router_availability_zone(
|
||||
context.session, edge['router_id']) == az_name]
|
||||
|
||||
md_rtr_ids = [edge['router_id'] for edge in edge_az_list]
|
||||
|
||||
edge_internal_ips = []
|
||||
for edge in edge_az_list:
|
||||
edge_internal_port = context.session.query(
|
||||
models_v2.Port).filter_by(network_id=internal_net,
|
||||
device_id=edge['router_id']).first()
|
||||
if edge_internal_port:
|
||||
edge_internal_ip = context.session.query(
|
||||
models_v2.IPAllocation).filter_by(
|
||||
port_id=edge_internal_port['id']).first()
|
||||
edge_internal_ips.append(edge_internal_ip['ip_address'])
|
||||
|
||||
if not internal_net or not internal_subnet or not edge_internal_ips:
|
||||
return None, None
|
||||
|
||||
LOG.info('Metadata proxy internal IPs are %s', edge_internal_ips)
|
||||
return edge_internal_ips, md_rtr_ids
|
||||
|
||||
|
||||
def _handle_edge(context, plugin, az_name, edge_id, edge_internal_ips):
|
||||
with locking.LockManager.get_lock(edge_id):
|
||||
lb = nsxv_lb.NsxvLoadbalancer.get_loadbalancer(nsxv, edge_id)
|
||||
virt = lb.virtual_servers.get(md_proxy.METADATA_VSE_NAME)
|
||||
if virt:
|
||||
pool = virt.default_pool
|
||||
curr_member_ips = [member.payload['ipAddress'] for member in
|
||||
pool.members.values()]
|
||||
if set(curr_member_ips) != set(edge_internal_ips):
|
||||
_update_md_lb_members(edge_id, edge_internal_ips, lb, pool)
|
||||
|
||||
else:
|
||||
# Interface connectivity and LB definition are done at the same
|
||||
# operation. if LB is missing then interface should be missing
|
||||
# as well
|
||||
LOG.info('Metadata LB components for edge %s are missing',
|
||||
edge_id)
|
||||
_recreate_rtr_metadata_cfg(context, plugin, az_name, edge_id)
|
||||
_handle_edge_firewall_rules(edge_id)
|
||||
|
||||
|
||||
@admin_utils.output_header
|
||||
def nsx_redo_metadata_cfg(resource, event, trigger, **kwargs):
|
||||
properties = admin_utils.parse_multi_keyval_opt(kwargs.get('property'))
|
||||
edgeapi = utils.NeutronDbClient()
|
||||
plugin = utils.NsxVPluginWrapper()
|
||||
|
||||
edge_id = properties.get('edge-id')
|
||||
if properties:
|
||||
if edge_id:
|
||||
nsx_redo_metadata_cfg_for_edge(edgeapi.context, plugin, edge_id)
|
||||
return
|
||||
else:
|
||||
# if the net-id property exist - recreate the edge for this network
|
||||
az_name = properties.get('az-name')
|
||||
if az_name:
|
||||
nsx_redo_metadata_cfg_for_az(edgeapi.context, plugin, az_name)
|
||||
return
|
||||
LOG.error('Cannot parse properties %s', properties)
|
||||
return
|
||||
|
||||
nsx_redo_metadata_cfg_all(edgeapi.context, plugin)
|
||||
|
||||
|
||||
def nsx_redo_metadata_cfg_for_edge(context, plugin, edge_id):
|
||||
binding = nsxv_db.get_nsxv_router_binding_by_edge(context.session, edge_id)
|
||||
if binding:
|
||||
az_name = binding['availability_zone']
|
||||
|
||||
conf_az = nsx_az.NsxVAvailabilityZones()
|
||||
az = conf_az.availability_zones[az_name]
|
||||
if not az.supports_metadata():
|
||||
LOG.error('Edge %(edge)s belongs to az %(az)s which does not '
|
||||
'support metadata',
|
||||
{'az': az_name, 'edge': edge_id})
|
||||
|
||||
edge_internal_ips, md_rtr_ids = _get_internal_edge_ips(context,
|
||||
az_name)
|
||||
|
||||
if binding['router_id'] in md_rtr_ids:
|
||||
LOG.error('Edge %s is a metadata proxy', edge_id)
|
||||
return
|
||||
|
||||
if (binding['router_id'].startswith(
|
||||
vcns_constants.BACKUP_ROUTER_PREFIX) or
|
||||
binding['router_id'].startswith(
|
||||
vcns_constants.PLR_EDGE_PREFIX)or
|
||||
binding['router_id'].startswith(
|
||||
lb_common.RESOURCE_ID_PFX)):
|
||||
LOG.error('Edge %s is not a metadata delivery appliance', edge_id)
|
||||
return
|
||||
|
||||
_handle_edge(context, plugin, az_name, edge_id, edge_internal_ips)
|
||||
else:
|
||||
LOG.error('No edge binding found for edge %s', edge_id)
|
||||
|
||||
|
||||
@admin_utils.output_header
|
||||
def nsx_redo_metadata_cfg_all(context, plugin):
|
||||
user_confirm = admin_utils.query_yes_no("Do you want to setup metadata "
|
||||
"infrastructure for all the edges",
|
||||
default="no")
|
||||
if not user_confirm:
|
||||
LOG.info("NSXv vnics deletion aborted by user")
|
||||
return
|
||||
|
||||
config.register_nsxv_azs(cfg.CONF, cfg.CONF.nsxv.availability_zones)
|
||||
conf_az = nsx_az.NsxVAvailabilityZones()
|
||||
az_list = conf_az.list_availability_zones_objects()
|
||||
for az in az_list:
|
||||
if az.supports_metadata():
|
||||
nsx_redo_metadata_cfg_for_az(az, edgeapi)
|
||||
nsx_redo_metadata_cfg_for_az(context, plugin, az.name, False)
|
||||
else:
|
||||
LOG.info("Skipping availability zone: %s - no metadata "
|
||||
"configuration", az.name)
|
||||
|
||||
|
||||
def nsx_redo_metadata_cfg_for_az(az, edgeapi):
|
||||
LOG.info("Updating MetaData for availability zone: %s", az.name)
|
||||
def nsx_redo_metadata_cfg_for_az(context, plugin, az_name, check_az=True):
|
||||
LOG.info("Updating MetaData for availability zone: %s", az_name)
|
||||
|
||||
# Get the list of internal networks for this AZ
|
||||
db_net = nsxv_db.get_nsxv_internal_network_for_az(
|
||||
edgeapi.context.session,
|
||||
vcns_constants.InternalEdgePurposes.INTER_EDGE_PURPOSE,
|
||||
az.name)
|
||||
if check_az:
|
||||
conf_az = nsx_az.NsxVAvailabilityZones()
|
||||
az = conf_az.availability_zones.get(az_name)
|
||||
if not az:
|
||||
LOG.error('Availability zone %s not found', az_name)
|
||||
return
|
||||
if not az.supports_metadata():
|
||||
LOG.error('Availability zone %s is not configured with metadata',
|
||||
az_name)
|
||||
return
|
||||
|
||||
internal_net = None
|
||||
internal_subnet = None
|
||||
if db_net:
|
||||
internal_net = db_net['network_id']
|
||||
internal_subnet = edgeapi.context.session.query(
|
||||
models_v2.Subnet).filter_by(
|
||||
network_id=internal_net).first().get('id')
|
||||
|
||||
# Get the list of internal edges for this AZ
|
||||
edge_list = nsxv_db.get_nsxv_internal_edges_by_purpose(
|
||||
edgeapi.context.session,
|
||||
vcns_constants.InternalEdgePurposes.INTER_EDGE_PURPOSE)
|
||||
edge_az_list = [edge for edge in edge_list if
|
||||
nsxv_db.get_router_availability_zone(
|
||||
edgeapi.context.session, edge['router_id']) == az.name]
|
||||
|
||||
md_rtr_ids = [edge['router_id'] for edge in edge_az_list]
|
||||
|
||||
edge_internal_ips = []
|
||||
for edge in edge_az_list:
|
||||
edge_internal_port = edgeapi.context.session.query(
|
||||
models_v2.Port).filter_by(network_id=internal_net,
|
||||
device_id=edge['router_id']).first()
|
||||
if edge_internal_port:
|
||||
edge_internal_ip = edgeapi.context.session.query(
|
||||
models_v2.IPAllocation).filter_by(
|
||||
port_id=edge_internal_port['id']).first()
|
||||
edge_internal_ips.append(edge_internal_ip['ip_address'])
|
||||
|
||||
if not internal_net or not internal_subnet or not edge_internal_ips:
|
||||
edge_internal_ips, md_rtr_ids = _get_internal_edge_ips(context,
|
||||
az_name)
|
||||
if not edge_internal_ips and not md_rtr_ids:
|
||||
LOG.error("Metadata infrastructure is missing or broken. "
|
||||
"It is recommended to restart neutron service before "
|
||||
"proceeding with configuration restoration")
|
||||
return
|
||||
|
||||
router_bindings = nsxv_db.get_nsxv_router_bindings(
|
||||
edgeapi.context.session,
|
||||
context.session,
|
||||
filters={'edge_type': [nsxv_constants.SERVICE_EDGE],
|
||||
'availability_zone': az.name})
|
||||
'availability_zone': [az_name]})
|
||||
edge_ids = list(set([binding['edge_id'] for binding in router_bindings
|
||||
if (binding['router_id'] not in set(md_rtr_ids) and
|
||||
not binding['router_id'].startswith(
|
||||
vcns_constants.BACKUP_ROUTER_PREFIX) and
|
||||
not binding['router_id'].startswith(
|
||||
vcns_constants.PLR_EDGE_PREFIX))]))
|
||||
vcns_constants.PLR_EDGE_PREFIX)and
|
||||
not binding['router_id'].startswith(
|
||||
lb_common.RESOURCE_ID_PFX))]))
|
||||
|
||||
for edge_id in edge_ids:
|
||||
with locking.LockManager.get_lock(edge_id):
|
||||
lb = nsxv_lb.NsxvLoadbalancer.get_loadbalancer(nsxv, edge_id)
|
||||
virt = lb.virtual_servers.get(md_proxy.METADATA_VSE_NAME)
|
||||
if virt:
|
||||
pool = virt.default_pool
|
||||
pool.members = {}
|
||||
|
||||
i = 0
|
||||
s_port = cfg.CONF.nsxv.nova_metadata_port
|
||||
for member_ip in edge_internal_ips:
|
||||
i += 1
|
||||
member = nsxv_lb.NsxvLBPoolMember(
|
||||
name='Member-%d' % i,
|
||||
ip_address=member_ip,
|
||||
port=s_port,
|
||||
monitor_port=s_port)
|
||||
pool.add_member(member)
|
||||
|
||||
lb.submit_to_backend(nsxv, edge_id)
|
||||
_handle_edge(context, plugin, az_name, edge_id, edge_internal_ips)
|
||||
|
||||
|
||||
@admin_utils.output_header
|
||||
|
Loading…
Reference in New Issue
Block a user