browbeat/utils/rally_cleanup.py
Sanjay Chari ab300f91f6 Rally cleanup script fix
This patch introduces the following changes.
1. rally_cleanup.py has been moved to the utils directory.
2. The rally_cleanup.py script was getting stuck in an infinite loop
as some functions were modified to clean up only rally objects while
other functions were not modified. This patch modifies the first function
that is called in traceback to clean up only rally objects, to avoid such
issues.

Change-Id: Ie0b5972b62b9dd591506f735191724f1b30c5038
2022-01-07 11:48:53 +05:30

346 lines
13 KiB
Python
Executable File

#!/usr/bin/env python
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import time
import argparse
import subprocess
import yaml
from multiprocessing import Pool
from rally.common import cfg
from rally.common import logging
from rally_openstack import osclients
LOG = logging.getLogger(__name__)
CONF = cfg.CONF
# Define http timeout to avoid traceback
CONF.openstack_client_http_timeout = 180.0
# Get Openstack clients
osclient = osclients.Clients.create_from_env()
nova_client = osclient.nova(version="2.73")
neutron_client = osclient.neutron()
keystone_client = osclient.keystone()
# Currently we only check and delete router_interface
# and not DVR or HA interface
ROUTER_INTERFACE_OWNERS = ("network:router_interface",
"network:router_interface_distributed",
"network:ha_router_replicated_interface")
# Exclude deleting these networks
NETWORK_EXCLUDE = ["public", "lb-mgmt-net"]
# number of processes swapned to delete resources concurrently
CONCURRENCY = 16
# Cleanup of a resource is retried by these many times
MAX_ATTEMPTS = 7
# number of times we check if the resource still exist after giving a delete request
MAX_CHECK = 6
# default config file for browbeat
_config_file = '../browbeat-config.yaml'
def delete_server(resource):
server = nova_client.servers.get(resource.id)
print("pid {} deleting server id {} name {}".format(
os.getpid(), resource.id, server.name))
nova_client.servers.delete(server)
for i in range(0, MAX_CHECK):
try:
nova_client.servers.get(resource.id)
print("pid {} server {} still exists".format(os.getpid(), resource.id))
except Exception:
print("pid {} server {} succesfully deleted".format(os.getpid(), resource.id))
break
time.sleep(5)
return resource
def delete_network(resource):
network = neutron_client.show_network(resource.id)["network"]
print("pid {} deleting network id {} name {}".format(
os.getpid(), resource.id, network["name"]))
# delete network
neutron_client.delete_network(network["id"])
# check if network got deleted or not
for i in range(0, MAX_CHECK):
try:
neutron_client.show_network(resource.id)["network"]
print("pid {} network {} still exists".format(os.getpid(), resource.id))
except Exception:
print("pid {} network {} succesfully deleted".format(os.getpid(), resource.id))
break
time.sleep(5)
return resource
def delete_security_group(resource):
sg = neutron_client.show_security_group(resource.id)["security_group"]
print("pid {} deleting security_group id {} name {}".format(
os.getpid(), resource.id, sg["name"]))
# delete security_group
neutron_client.delete_security_group(sg["id"])
# check if security_group got deleted or not
for i in range(0, MAX_CHECK):
try:
neutron_client.show_security_group(resource.id)["security_group"]
print("pid {} security_group {} still exists".format(os.getpid(), resource.id))
except Exception:
print("pid {} security_group {} succesfully deleted".format(os.getpid(), resource.id))
break
time.sleep(5)
return resource
def delete_floatingip(resource):
floatingip = neutron_client.show_floatingip(resource.id)["floatingip"]
print("pid {} deleting floatingip id {} address {}".format(
os.getpid(), resource.id, floatingip["floating_ip_address"]))
neutron_client.delete_floatingip(resource.id)
# check if floatingip got deleted or not
for i in range(0, MAX_CHECK):
try:
neutron_client.show_floatingip(resource.id)["floatingip"]
print("pid {} floatingip {} still exists".format(os.getpid(), resource.id))
except Exception:
print("pid {} floatingip {} succesfully deleted".format(os.getpid(), resource.id))
break
time.sleep(5)
return resource
def delete_router(resource):
router = neutron_client.show_router(resource.id)["router"]
print("pid {} deleting router id {} name {}".format(
os.getpid(), resource.id, router["name"]))
try:
neutron_client.remove_gateway_router(resource.id)
except Exception:
print("pid {} router id {} gateway doesn't exist".format(os.getpid(), resource.id))
time.sleep(5)
neutron_client.delete_router(router["id"])
# check if router got deleted or not
for i in range(0, MAX_CHECK):
try:
neutron_client.show_router(resource.id)["router"]
print("pid {} router {} still exists".format(os.getpid(), resource.id))
except Exception:
print("pid {} router {} succesfully deleted".format(os.getpid(), resource.id))
break
time.sleep(5)
return resource
def delete_router_ports(resource):
port = neutron_client.show_port(resource.id)["port"]
if (port["device_owner"] not in ROUTER_INTERFACE_OWNERS):
return resource
print("pid {} deleting router {} port id {}".format(
os.getpid(), port["device_id"], resource.id))
neutron_client.remove_interface_router(port["device_id"], {"port_id": port["id"]})
time.sleep(5)
neutron_client.delete_port(port["id"])
# check if port got deleted or not
for i in range(0, MAX_CHECK):
try:
neutron_client.show_port(resource.id)["port"]
print("pid {} router port {} still exists".format(os.getpid(), resource.id))
except Exception:
print("pid {} router port {} succesfully deleted".format(os.getpid(), resource.id))
break
time.sleep(5)
return resource
class Resource:
def __init__(self, id):
self.id = id
def __getnewargs__(self):
return self.id,
# Ir creates CONCURRENCY pool of processes. Each process runs the provided function.
# "map_async" will spwan the process with the function to run and with only one "unique"
# resource from the list of resources. None of the processes work on the same resource.
# For example, to delete networks, each process deletes a unique network.
def cleanup_with_concurrency(cleanup_fun, resources):
ret = True
with Pool(CONCURRENCY) as p:
result = p.map_async(cleanup_fun, resources)
# https://stackoverflow.com/questions/26063877/python-multiprocessing-module-join-processes-with-timeout
# wait 50 seconds for every worker to finish
# it is a cumulative timeout
result.wait(timeout=60)
try:
# check if workers succesfully executed
result.get(timeout=60)
except Exception:
ret = False
try:
# This will close succesfully exited processes and forcefully failed processes.
p.terminate()
except Exception:
# close any leftover child processes
p.close()
p.join()
return ret
def cleanup_nova_vms():
while True:
servers = [server for server in nova_client.servers.list(detailed=True,
search_opts={"all_tenants": 1}, limit=100) if "rally" in server.name]
if (len(servers) == 0):
break
print("Deleting {} servers".format(len(servers)))
ids = [Resource(server.id) for server in servers]
cleanup_with_concurrency(delete_server, ids)
time.sleep(5)
def cleanup_neutron_networks():
for i in range(0, MAX_ATTEMPTS):
networks = neutron_client.list_networks()["networks"]
if (len(networks) == 0):
break
ids = [Resource(network["id"]) for network in networks
if network["name"] not in NETWORK_EXCLUDE and "rally" in network["name"]]
if (len(ids) == 0):
break
print("Deleting {} networks".format(len(ids)))
if cleanup_with_concurrency(delete_network, ids):
break
time.sleep(5)
def get_admin_security_group():
projects = keystone_client.projects.list()
admin_project = [project.id for project in projects if project.name == "admin"][0]
sgs = neutron_client.list_security_groups(project=admin_project)["security_groups"]
return [sg["id"] for sg in sgs if sg["name"] == "default"][0]
def cleanup_neutron_security_groups():
# we shouldn't cleanup default security group created by admin
default_sg = get_admin_security_group()
while True:
sgs = [sg for sg in neutron_client.list_security_groups()["security_groups"]
if "rally" in sg["name"]]
if (len(sgs) == 0):
break
ids = [Resource(sg["id"]) for sg in sgs
if sg["id"] != default_sg]
if (len(ids) == 0):
break
print("Deleting {} security_groups".format(len(ids)))
if cleanup_with_concurrency(delete_security_group, ids):
break
time.sleep(5)
def cleanup_neutron_floatingips():
while True:
floatingips = neutron_client.list_floatingips()["floatingips"]
if (len(floatingips) == 0):
break
print("Deleting {} floatingips".format(len(floatingips)))
ids = [Resource(floatingip["id"]) for floatingip in floatingips]
if cleanup_with_concurrency(delete_floatingip, ids):
break
time.sleep(5)
def _cleanup_neutron_router_ports():
while True:
ports = [port for port in neutron_client.list_ports(
device_owner='network:router_interface')["ports"]
if "rally" in port["name"]]
if (len(ports) == 0):
break
print("Deleting {} router ports".format(len(ports)))
ids = [Resource(port["id"]) for port in ports]
if cleanup_with_concurrency(delete_router_ports, ids):
break
time.sleep(5)
def cleanup_neutron_routers():
_cleanup_neutron_router_ports()
while True:
routers = [router for router in neutron_client.list_routers()["routers"]
if "rally" in router["name"]]
if (len(routers) == 0):
break
print("Deleting {} routers".format(len(routers)))
ids = [Resource(router["id"]) for router in routers]
if cleanup_with_concurrency(delete_router, ids):
break
time.sleep(5)
# browbeat/config.py cannot be accessed from rally-venv as it
# requires modules from browbeat-venv. This function loads the
# config file without validation, as the config file would have
# already been validated while running the workloads.
def load_browbeat_config(path):
with open(path, "r") as config_file:
browbeat_config = yaml.safe_load(config_file)
return browbeat_config
def cleanup_external_network_links(_config):
for workload in _config["workloads"][::-1]:
if workload["name"] == "dynamic-workloads" and workload["enabled"]:
dynamic_workload = workload["scenarios"][0]
if dynamic_workload["enabled"]:
iface_name = dynamic_workload["iface_name"]
num_external_networks = dynamic_workload["num_external_networks"]
for link_num in range(num_external_networks):
cmd = ["sudo", "ip", "link", "delete", "{}.{}".format(iface_name,
link_num + 1)]
proc = subprocess.Popen(cmd)
proc.wait()
if proc.returncode == 0:
print("Deleting vlan {}.{} was successful".format(
iface_name, link_num + 1))
else:
print("Deleting vlan {}.{} failed".format(
iface_name, link_num + 1))
def cleanup_resources():
cleanup_nova_vms()
cleanup_neutron_floatingips()
cleanup_neutron_routers()
cleanup_neutron_security_groups()
cleanup_neutron_networks()
parser = argparse.ArgumentParser(
description="Rally cleanup script")
# Dynamic workloads creates link files for each
# vlan external network from rally context. This flag
# cleans up the link files.
parser.add_argument(
'-e', '--cleanup_external_network_links', action='store_true',
help='Flag to cleanup rally context external network NICs')
if parser.parse_args().cleanup_external_network_links:
parser.add_argument(
'-s', '--setup', nargs='?', default=_config_file,
help='Provide Browbeat YAML configuration file. Default is ./{}'.format(_config_file))
_config = load_browbeat_config(parser.parse_args().setup)
cleanup_external_network_links(_config)
cleanup_resources()