Merge "Add rolling downtime simulation tools"
This commit is contained in:
commit
eacb01c0a0
62
bowling_ball/README.rst
Normal file
62
bowling_ball/README.rst
Normal file
@ -0,0 +1,62 @@
|
|||||||
|
Bowling Ball - OpenStack-Ansible Rolling Downtime Simulator
|
||||||
|
###########################################################
|
||||||
|
:date: 2017-03-09
|
||||||
|
:tags: rackspace, openstack, ansible
|
||||||
|
:category: \*openstack, \*nix
|
||||||
|
|
||||||
|
About
|
||||||
|
-----
|
||||||
|
|
||||||
|
This project aims to test for issues with rolling downtime on
|
||||||
|
OpenStack-Ansible deployments. It's comprised of two main components:
|
||||||
|
|
||||||
|
* The ``rolling_restart.py`` script
|
||||||
|
* The ``tests`` directory
|
||||||
|
|
||||||
|
The ``rolling_restart.py`` script will stop containers from a specified group
|
||||||
|
in a rolling fashion - node 1 will stop, then start, then node 2, then
|
||||||
|
node 3 and so on. This script runs from the *deployment host*.
|
||||||
|
|
||||||
|
The ``tests`` directory contains scripts to generate traffic against the
|
||||||
|
target services. These vary per service, but attempt to apply usage to a
|
||||||
|
system that will be restarted by ``rolling_restart.py`` in order to
|
||||||
|
measure the effects. These scripts run from a *utility container*.
|
||||||
|
|
||||||
|
|
||||||
|
Usage
|
||||||
|
-----
|
||||||
|
|
||||||
|
#. Start your test script from the utility container. ``keystone.py``
|
||||||
|
will request a session and a list of projects on an infinite loop, for
|
||||||
|
example.
|
||||||
|
#. From the deployment node, run ``rolling_restart.py`` in the playbooks
|
||||||
|
directory (necessary to find the inventory script). Specify the service
|
||||||
|
you're targeting with the ``-s`` parameter.
|
||||||
|
|
||||||
|
``rolling_restart.py -s keystone_container``
|
||||||
|
|
||||||
|
You can specify a wait time in seconds between stopping and starting
|
||||||
|
individual nodes.
|
||||||
|
|
||||||
|
``rolling_restart.py -s keystone_container -w 60``
|
||||||
|
|
||||||
|
|
||||||
|
Assumptions
|
||||||
|
-----------
|
||||||
|
|
||||||
|
These tools are currently coupled to OSA, and they assume paths to files
|
||||||
|
as specified by the ``multi-node-aio`` scripts.
|
||||||
|
|
||||||
|
Container stopping and starting is done with an ansible command, and the
|
||||||
|
physical host to target is derivced from the current inventory.
|
||||||
|
|
||||||
|
``rolling_restart.py`` must currently be run from the ``playbooks``
|
||||||
|
directory. This will be fixed later.
|
||||||
|
|
||||||
|
You must source ``openrc`` before running ``keystone.py``.
|
||||||
|
|
||||||
|
|
||||||
|
Why the name?
|
||||||
|
-------------
|
||||||
|
|
||||||
|
It sets 'em up and knocks em down.
|
128
bowling_ball/rolling_restart.py
Normal file
128
bowling_ball/rolling_restart.py
Normal file
@ -0,0 +1,128 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# Copyright 2017, Rackspace US, Inc.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
#
|
||||||
|
# (c) 2017, Nolan Brubaker <nolan.brubaker@rackspace.com>
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import subprocess
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
|
||||||
|
CONF_DIR = os.path.join('/', 'etc', 'openstack_deploy')
|
||||||
|
INVENTORY_FILE = os.path.join(CONF_DIR, 'openstack_inventory.json')
|
||||||
|
CONF_FILE = os.path.join(CONF_DIR, 'openstack_user_config.yml')
|
||||||
|
PLAYBOOK_DIR = os.path.join('/', 'opt', 'openstack_ansible', 'playbooks')
|
||||||
|
|
||||||
|
STOP_TEMPLATE = 'ansible -i inventory -m shell -a\
|
||||||
|
"lxc-stop -n {container}" {host}'
|
||||||
|
START_TEMPLATE = 'ansible -i inventory -m shell -a\
|
||||||
|
"lxc-start -dn {container}" {host}'
|
||||||
|
|
||||||
|
|
||||||
|
def args(arg_list):
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
usage='%(prog)s',
|
||||||
|
description='OpenStack-Ansible Rolling Update Simulator',
|
||||||
|
epilog='Licensed "Apache 2.0"')
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
'-s',
|
||||||
|
'--service',
|
||||||
|
help='Name of the service to rolling restart.',
|
||||||
|
required=True,
|
||||||
|
default=None,
|
||||||
|
)
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
'-w',
|
||||||
|
'--wait',
|
||||||
|
help=("Number of seconds to wait between stopping and starting. "
|
||||||
|
"Default: 120"),
|
||||||
|
default=120,
|
||||||
|
)
|
||||||
|
|
||||||
|
return vars(parser.parse_args(arg_list))
|
||||||
|
|
||||||
|
|
||||||
|
def read_inventory(inventory_file):
|
||||||
|
"""Parse inventory file into a python dictionary"""
|
||||||
|
with open(inventory_file, 'r') as f:
|
||||||
|
inventory = json.load(f)
|
||||||
|
return inventory
|
||||||
|
|
||||||
|
|
||||||
|
def get_similar_groups(target_group, inventory):
|
||||||
|
"""
|
||||||
|
Find group suggestions
|
||||||
|
"""
|
||||||
|
suggestions = []
|
||||||
|
for key in inventory.keys():
|
||||||
|
if target_group in key:
|
||||||
|
suggestions.append(key)
|
||||||
|
return suggestions
|
||||||
|
|
||||||
|
|
||||||
|
def get_containers(target_group, inventory):
|
||||||
|
"""Get container names in the relevant group"""
|
||||||
|
|
||||||
|
group = inventory.get(target_group, None)
|
||||||
|
|
||||||
|
if group is None:
|
||||||
|
groups = get_similar_groups(target_group, inventory)
|
||||||
|
print("No group {} found.".format(target_group))
|
||||||
|
if groups:
|
||||||
|
print("Maybe try one of these:")
|
||||||
|
print("\n".join(groups))
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
containers = group['hosts']
|
||||||
|
containers.sort()
|
||||||
|
return containers
|
||||||
|
|
||||||
|
|
||||||
|
def rolling_restart(containers, inventory, wait=120):
|
||||||
|
"""Restart containers in numerical order, one at a time.
|
||||||
|
|
||||||
|
wait is the number of seconds to wait between stopping and starting a
|
||||||
|
container
|
||||||
|
"""
|
||||||
|
for container in containers:
|
||||||
|
host = inventory['_meta']['hostvars'][container]['physical_host']
|
||||||
|
|
||||||
|
stop_cmd = STOP_TEMPLATE.format(container=container, host=host)
|
||||||
|
print("Stopping {container}".format(container=container))
|
||||||
|
subprocess.check_call(stop_cmd, shell=True)
|
||||||
|
|
||||||
|
time.sleep(wait)
|
||||||
|
|
||||||
|
start_cmd = START_TEMPLATE.format(container=container, host=host)
|
||||||
|
subprocess.check_call(start_cmd, shell=True)
|
||||||
|
print("Started {container}".format(container=container))
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
all_args = args(sys.argv[1:])
|
||||||
|
service = all_args['service']
|
||||||
|
wait = all_args['wait']
|
||||||
|
|
||||||
|
inventory = read_inventory(INVENTORY_FILE)
|
||||||
|
containers = get_containers(service, inventory)
|
||||||
|
|
||||||
|
rolling_restart(containers, inventory, wait)
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
62
bowling_ball/tests/keystone.py
Normal file
62
bowling_ball/tests/keystone.py
Normal file
@ -0,0 +1,62 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# Copyright 2017, Rackspace US, Inc.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
#
|
||||||
|
# (c) 2017, Nolan Brubaker <nolan.brubaker@rackspace.com>
|
||||||
|
|
||||||
|
import datetime
|
||||||
|
from keystoneauth1.identity import v3
|
||||||
|
from keystoneauth1 import session
|
||||||
|
from keystoneauth1.exceptions.connection import ConnectFailure
|
||||||
|
from keystoneauth1.exceptions.http import InternalServerError
|
||||||
|
from keystoneclient.v3 import client
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
|
||||||
|
auth_url = os.environ['OS_AUTH_URL']
|
||||||
|
password = os.environ['OS_PASSWORD']
|
||||||
|
|
||||||
|
auth = v3.Password(auth_url=auth_url, username="admin",
|
||||||
|
password=password, project_name="admin",
|
||||||
|
user_domain_id="default", project_domain_id="default")
|
||||||
|
|
||||||
|
disconnected = None
|
||||||
|
try:
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
# Pause for a bit so we're not generating more data than we
|
||||||
|
# can handle
|
||||||
|
time.sleep(1)
|
||||||
|
start_time = datetime.datetime.now()
|
||||||
|
|
||||||
|
sess = session.Session(auth=auth)
|
||||||
|
keystone = client.Client(session=sess)
|
||||||
|
keystone.projects.list()
|
||||||
|
|
||||||
|
end_time = datetime.datetime.now()
|
||||||
|
|
||||||
|
if disconnected:
|
||||||
|
dis_delta = end_time - disconnected
|
||||||
|
disconnected = None
|
||||||
|
print("Reconnect {}s".format(dis_delta.total_seconds()))
|
||||||
|
|
||||||
|
delta = end_time - start_time
|
||||||
|
|
||||||
|
print("New list: {]s.".format(delta.total_seconds()))
|
||||||
|
except (ConnectFailure, InternalServerError):
|
||||||
|
if not disconnected:
|
||||||
|
disconnected = datetime.datetime.now()
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
sys.exit()
|
Loading…
x
Reference in New Issue
Block a user