Retire Tripleo: remove repo content
TripleO project is retiring - https://review.opendev.org/c/openstack/governance/+/905145 this commit remove the content of this project repo Change-Id: I5080dd23acbf6beca58e70e6ae1f1bc3d1843161
This commit is contained in:
parent
74eec6791c
commit
a1119eb2d8
62
.gitignore
vendored
62
.gitignore
vendored
@ -1,62 +0,0 @@
|
|||||||
# Byte-compiled / optimized / DLL files
|
|
||||||
__pycache__/
|
|
||||||
*.py[cod]
|
|
||||||
|
|
||||||
# C extensions
|
|
||||||
*.so
|
|
||||||
|
|
||||||
# Distribution / packaging
|
|
||||||
.Python
|
|
||||||
env/
|
|
||||||
develop-eggs/
|
|
||||||
dist/
|
|
||||||
downloads/
|
|
||||||
eggs/
|
|
||||||
.eggs/
|
|
||||||
lib/
|
|
||||||
lib64/
|
|
||||||
sdist/
|
|
||||||
var/
|
|
||||||
*.egg-info/
|
|
||||||
.installed.cfg
|
|
||||||
*.egg
|
|
||||||
|
|
||||||
# PyInstaller
|
|
||||||
# Usually these files are written by a python script from a template
|
|
||||||
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
|
||||||
*.manifest
|
|
||||||
*.spec
|
|
||||||
|
|
||||||
# Installer logs
|
|
||||||
pip-log.txt
|
|
||||||
pip-delete-this-directory.txt
|
|
||||||
|
|
||||||
# Unit test / coverage reports
|
|
||||||
htmlcov/
|
|
||||||
.tox/
|
|
||||||
.coverage
|
|
||||||
.coverage.*
|
|
||||||
.cache
|
|
||||||
nosetests.xml
|
|
||||||
coverage.xml
|
|
||||||
*,cover
|
|
||||||
|
|
||||||
# Translations
|
|
||||||
*.mo
|
|
||||||
*.pot
|
|
||||||
|
|
||||||
# Django stuff:
|
|
||||||
*.log
|
|
||||||
|
|
||||||
# Sphinx documentation
|
|
||||||
doc/build/
|
|
||||||
|
|
||||||
# PyBuilder
|
|
||||||
target/
|
|
||||||
|
|
||||||
# virtualenv
|
|
||||||
.venv/
|
|
||||||
|
|
||||||
# Files created by releasenotes build
|
|
||||||
releasenotes/build
|
|
||||||
|
|
202
LICENSE
202
LICENSE
@ -1,202 +0,0 @@
|
|||||||
Apache License
|
|
||||||
Version 2.0, January 2004
|
|
||||||
http://www.apache.org/licenses/
|
|
||||||
|
|
||||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
|
||||||
|
|
||||||
1. Definitions.
|
|
||||||
|
|
||||||
"License" shall mean the terms and conditions for use, reproduction,
|
|
||||||
and distribution as defined by Sections 1 through 9 of this document.
|
|
||||||
|
|
||||||
"Licensor" shall mean the copyright owner or entity authorized by
|
|
||||||
the copyright owner that is granting the License.
|
|
||||||
|
|
||||||
"Legal Entity" shall mean the union of the acting entity and all
|
|
||||||
other entities that control, are controlled by, or are under common
|
|
||||||
control with that entity. For the purposes of this definition,
|
|
||||||
"control" means (i) the power, direct or indirect, to cause the
|
|
||||||
direction or management of such entity, whether by contract or
|
|
||||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
|
||||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
|
||||||
|
|
||||||
"You" (or "Your") shall mean an individual or Legal Entity
|
|
||||||
exercising permissions granted by this License.
|
|
||||||
|
|
||||||
"Source" form shall mean the preferred form for making modifications,
|
|
||||||
including but not limited to software source code, documentation
|
|
||||||
source, and configuration files.
|
|
||||||
|
|
||||||
"Object" form shall mean any form resulting from mechanical
|
|
||||||
transformation or translation of a Source form, including but
|
|
||||||
not limited to compiled object code, generated documentation,
|
|
||||||
and conversions to other media types.
|
|
||||||
|
|
||||||
"Work" shall mean the work of authorship, whether in Source or
|
|
||||||
Object form, made available under the License, as indicated by a
|
|
||||||
copyright notice that is included in or attached to the work
|
|
||||||
(an example is provided in the Appendix below).
|
|
||||||
|
|
||||||
"Derivative Works" shall mean any work, whether in Source or Object
|
|
||||||
form, that is based on (or derived from) the Work and for which the
|
|
||||||
editorial revisions, annotations, elaborations, or other modifications
|
|
||||||
represent, as a whole, an original work of authorship. For the purposes
|
|
||||||
of this License, Derivative Works shall not include works that remain
|
|
||||||
separable from, or merely link (or bind by name) to the interfaces of,
|
|
||||||
the Work and Derivative Works thereof.
|
|
||||||
|
|
||||||
"Contribution" shall mean any work of authorship, including
|
|
||||||
the original version of the Work and any modifications or additions
|
|
||||||
to that Work or Derivative Works thereof, that is intentionally
|
|
||||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
|
||||||
or by an individual or Legal Entity authorized to submit on behalf of
|
|
||||||
the copyright owner. For the purposes of this definition, "submitted"
|
|
||||||
means any form of electronic, verbal, or written communication sent
|
|
||||||
to the Licensor or its representatives, including but not limited to
|
|
||||||
communication on electronic mailing lists, source code control systems,
|
|
||||||
and issue tracking systems that are managed by, or on behalf of, the
|
|
||||||
Licensor for the purpose of discussing and improving the Work, but
|
|
||||||
excluding communication that is conspicuously marked or otherwise
|
|
||||||
designated in writing by the copyright owner as "Not a Contribution."
|
|
||||||
|
|
||||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
|
||||||
on behalf of whom a Contribution has been received by Licensor and
|
|
||||||
subsequently incorporated within the Work.
|
|
||||||
|
|
||||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
|
||||||
this License, each Contributor hereby grants to You a perpetual,
|
|
||||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
|
||||||
copyright license to reproduce, prepare Derivative Works of,
|
|
||||||
publicly display, publicly perform, sublicense, and distribute the
|
|
||||||
Work and such Derivative Works in Source or Object form.
|
|
||||||
|
|
||||||
3. Grant of Patent License. Subject to the terms and conditions of
|
|
||||||
this License, each Contributor hereby grants to You a perpetual,
|
|
||||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
|
||||||
(except as stated in this section) patent license to make, have made,
|
|
||||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
|
||||||
where such license applies only to those patent claims licensable
|
|
||||||
by such Contributor that are necessarily infringed by their
|
|
||||||
Contribution(s) alone or by combination of their Contribution(s)
|
|
||||||
with the Work to which such Contribution(s) was submitted. If You
|
|
||||||
institute patent litigation against any entity (including a
|
|
||||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
|
||||||
or a Contribution incorporated within the Work constitutes direct
|
|
||||||
or contributory patent infringement, then any patent licenses
|
|
||||||
granted to You under this License for that Work shall terminate
|
|
||||||
as of the date such litigation is filed.
|
|
||||||
|
|
||||||
4. Redistribution. You may reproduce and distribute copies of the
|
|
||||||
Work or Derivative Works thereof in any medium, with or without
|
|
||||||
modifications, and in Source or Object form, provided that You
|
|
||||||
meet the following conditions:
|
|
||||||
|
|
||||||
(a) You must give any other recipients of the Work or
|
|
||||||
Derivative Works a copy of this License; and
|
|
||||||
|
|
||||||
(b) You must cause any modified files to carry prominent notices
|
|
||||||
stating that You changed the files; and
|
|
||||||
|
|
||||||
(c) You must retain, in the Source form of any Derivative Works
|
|
||||||
that You distribute, all copyright, patent, trademark, and
|
|
||||||
attribution notices from the Source form of the Work,
|
|
||||||
excluding those notices that do not pertain to any part of
|
|
||||||
the Derivative Works; and
|
|
||||||
|
|
||||||
(d) If the Work includes a "NOTICE" text file as part of its
|
|
||||||
distribution, then any Derivative Works that You distribute must
|
|
||||||
include a readable copy of the attribution notices contained
|
|
||||||
within such NOTICE file, excluding those notices that do not
|
|
||||||
pertain to any part of the Derivative Works, in at least one
|
|
||||||
of the following places: within a NOTICE text file distributed
|
|
||||||
as part of the Derivative Works; within the Source form or
|
|
||||||
documentation, if provided along with the Derivative Works; or,
|
|
||||||
within a display generated by the Derivative Works, if and
|
|
||||||
wherever such third-party notices normally appear. The contents
|
|
||||||
of the NOTICE file are for informational purposes only and
|
|
||||||
do not modify the License. You may add Your own attribution
|
|
||||||
notices within Derivative Works that You distribute, alongside
|
|
||||||
or as an addendum to the NOTICE text from the Work, provided
|
|
||||||
that such additional attribution notices cannot be construed
|
|
||||||
as modifying the License.
|
|
||||||
|
|
||||||
You may add Your own copyright statement to Your modifications and
|
|
||||||
may provide additional or different license terms and conditions
|
|
||||||
for use, reproduction, or distribution of Your modifications, or
|
|
||||||
for any such Derivative Works as a whole, provided Your use,
|
|
||||||
reproduction, and distribution of the Work otherwise complies with
|
|
||||||
the conditions stated in this License.
|
|
||||||
|
|
||||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
|
||||||
any Contribution intentionally submitted for inclusion in the Work
|
|
||||||
by You to the Licensor shall be under the terms and conditions of
|
|
||||||
this License, without any additional terms or conditions.
|
|
||||||
Notwithstanding the above, nothing herein shall supersede or modify
|
|
||||||
the terms of any separate license agreement you may have executed
|
|
||||||
with Licensor regarding such Contributions.
|
|
||||||
|
|
||||||
6. Trademarks. This License does not grant permission to use the trade
|
|
||||||
names, trademarks, service marks, or product names of the Licensor,
|
|
||||||
except as required for reasonable and customary use in describing the
|
|
||||||
origin of the Work and reproducing the content of the NOTICE file.
|
|
||||||
|
|
||||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
|
||||||
agreed to in writing, Licensor provides the Work (and each
|
|
||||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
|
||||||
implied, including, without limitation, any warranties or conditions
|
|
||||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
|
||||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
|
||||||
appropriateness of using or redistributing the Work and assume any
|
|
||||||
risks associated with Your exercise of permissions under this License.
|
|
||||||
|
|
||||||
8. Limitation of Liability. In no event and under no legal theory,
|
|
||||||
whether in tort (including negligence), contract, or otherwise,
|
|
||||||
unless required by applicable law (such as deliberate and grossly
|
|
||||||
negligent acts) or agreed to in writing, shall any Contributor be
|
|
||||||
liable to You for damages, including any direct, indirect, special,
|
|
||||||
incidental, or consequential damages of any character arising as a
|
|
||||||
result of this License or out of the use or inability to use the
|
|
||||||
Work (including but not limited to damages for loss of goodwill,
|
|
||||||
work stoppage, computer failure or malfunction, or any and all
|
|
||||||
other commercial damages or losses), even if such Contributor
|
|
||||||
has been advised of the possibility of such damages.
|
|
||||||
|
|
||||||
9. Accepting Warranty or Additional Liability. While redistributing
|
|
||||||
the Work or Derivative Works thereof, You may choose to offer,
|
|
||||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
|
||||||
or other liability obligations and/or rights consistent with this
|
|
||||||
License. However, in accepting such obligations, You may act only
|
|
||||||
on Your own behalf and on Your sole responsibility, not on behalf
|
|
||||||
of any other Contributor, and only if You agree to indemnify,
|
|
||||||
defend, and hold each Contributor harmless for any liability
|
|
||||||
incurred by, or claims asserted against, such Contributor by reason
|
|
||||||
of your accepting any such warranty or additional liability.
|
|
||||||
|
|
||||||
END OF TERMS AND CONDITIONS
|
|
||||||
|
|
||||||
APPENDIX: How to apply the Apache License to your work.
|
|
||||||
|
|
||||||
To apply the Apache License to your work, attach the following
|
|
||||||
boilerplate notice, with the fields enclosed by brackets "{}"
|
|
||||||
replaced with your own identifying information. (Don't include
|
|
||||||
the brackets!) The text should be enclosed in the appropriate
|
|
||||||
comment syntax for the file format. We also recommend that a
|
|
||||||
file or class name and description of purpose be included on the
|
|
||||||
same "printed page" as the copyright notice for easier
|
|
||||||
identification within third-party archives.
|
|
||||||
|
|
||||||
Copyright {yyyy} {name of copyright owner}
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
|
|
144
README.md
144
README.md
@ -1,144 +0,0 @@
|
|||||||
Utility roles and docs for TripleO
|
|
||||||
==================================
|
|
||||||
|
|
||||||
These Ansible roles are a set of useful tools to be used on top of TripleO
|
|
||||||
deployments. They can also be used together with
|
|
||||||
[tripleo-quickstart](https://github.com/openstack/tripleo-quickstart) (and
|
|
||||||
[tripleo-quickstart-extras](https://github.com/openstack/tripleo-quickstart-extras)).
|
|
||||||
|
|
||||||
The documentation of each role is located in the individual role folders.
|
|
||||||
General usage information about *tripleo-quickstart* can be found in the
|
|
||||||
[project documentation](https://docs.openstack.org/tripleo-quickstart/latest/).
|
|
||||||
|
|
||||||
Using the playbook on an existing TripleO environment
|
|
||||||
-----------------------------------------------------
|
|
||||||
|
|
||||||
The playbooks can be launched directly from the **undercloud** machine of the
|
|
||||||
**TripleO** deployment. The described steps are expected to be run inside the
|
|
||||||
*/home/stack* directory.
|
|
||||||
|
|
||||||
First of all a clone of the *tripleo-ha-utils* repository must be
|
|
||||||
created:
|
|
||||||
|
|
||||||
git clone https://github.com/openstack/tripleo-ha-utils
|
|
||||||
|
|
||||||
then three environment variables needs to be exported, pointing three files:
|
|
||||||
|
|
||||||
export ANSIBLE_CONFIG="/home/stack/ansible.cfg"
|
|
||||||
export ANSIBLE_INVENTORY="/home/stack/hosts"
|
|
||||||
export ANSIBLE_SSH_ARGS="-F /home/stack/ssh.config.ansible"
|
|
||||||
|
|
||||||
These files are:
|
|
||||||
|
|
||||||
**ansible.cfg** which must contain at least these lines:
|
|
||||||
|
|
||||||
[defaults]
|
|
||||||
roles_path = /home/stack/tripleo-ha-utils/roles
|
|
||||||
|
|
||||||
**hosts** which must be configured depending on the deployed environment,
|
|
||||||
reflecting these sections:
|
|
||||||
|
|
||||||
undercloud ansible_host=undercloud ansible_user=stack ansible_private_key_file=/home/stack/.ssh/id_rsa
|
|
||||||
overcloud-compute-1 ansible_host=overcloud-compute-1 ansible_user=heat-admin ansible_private_key_file=/home/stack/.ssh/id_rsa
|
|
||||||
overcloud-compute-0 ansible_host=overcloud-compute-0 ansible_user=heat-admin ansible_private_key_file=/home/stack/.ssh/id_rsa
|
|
||||||
overcloud-controller-2 ansible_host=overcloud-controller-2 ansible_user=heat-admin ansible_private_key_file=/home/stack/.ssh/id_rsa
|
|
||||||
overcloud-controller-1 ansible_host=overcloud-controller-1 ansible_user=heat-admin ansible_private_key_file=/home/stack/.ssh/id_rsa
|
|
||||||
overcloud-controller-0 ansible_host=overcloud-controller-0 ansible_user=heat-admin ansible_private_key_file=/home/stack/.ssh/id_rsa
|
|
||||||
|
|
||||||
[compute]
|
|
||||||
overcloud-compute-1
|
|
||||||
overcloud-compute-0
|
|
||||||
|
|
||||||
[undercloud]
|
|
||||||
undercloud
|
|
||||||
|
|
||||||
[overcloud]
|
|
||||||
overcloud-compute-1
|
|
||||||
overcloud-compute-0
|
|
||||||
overcloud-controller-2
|
|
||||||
overcloud-controller-1
|
|
||||||
overcloud-controller-0
|
|
||||||
|
|
||||||
[controller]
|
|
||||||
overcloud-controller-2
|
|
||||||
overcloud-controller-1
|
|
||||||
overcloud-controller-0
|
|
||||||
|
|
||||||
**ssh.config.ansible** which can be generated by these code lines:
|
|
||||||
|
|
||||||
cat /home/stack/.ssh/id_rsa.pub >> /home/stack/.ssh/authorized_keys
|
|
||||||
echo -e "Host undercloud\n Hostname 127.0.0.1\n IdentityFile /home/stack/.ssh/id_rsa\n User stack\n StrictHostKeyChecking no\n UserKnownHostsFile=/dev/null\n" > ssh.config.ansible
|
|
||||||
. /home/stack/stackrc
|
|
||||||
openstack server list -c Name -c Networks | awk '/ctlplane/ {print $2, $4}' | sed s/ctlplane=//g | while read node; do node_name=$(echo $node | cut -f 1 -d " "); node_ip=$(echo $node | cut -f 2 -d " "); echo -e "Host $node_name\n Hostname $node_ip\n IdentityFile /home/stack/.ssh/id_rsa\n User heat-admin\n StrictHostKeyChecking no\n UserKnownHostsFile=/dev/null\n"; done >> ssh.config.ansible
|
|
||||||
|
|
||||||
|
|
||||||
It can *optionally* contain specific per-host connection options, like these:
|
|
||||||
|
|
||||||
...
|
|
||||||
...
|
|
||||||
Host overcloud-controller-0
|
|
||||||
ProxyCommand ssh -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no -o ConnectTimeout=60 -F /home/stack/ssh.config.ansible undercloud -W 192.168.24.16:22
|
|
||||||
IdentityFile /home/stack/.ssh/id_rsa
|
|
||||||
User heat-admin
|
|
||||||
StrictHostKeyChecking no
|
|
||||||
UserKnownHostsFile=/dev/null
|
|
||||||
...
|
|
||||||
...
|
|
||||||
|
|
||||||
In this example to connect to overcloud-controller-0 ansible will use
|
|
||||||
*undercloud* as a *ProxyHost*.
|
|
||||||
|
|
||||||
With this setup in place is then possible to launch the playbook:
|
|
||||||
|
|
||||||
ansible-playbook /home/stack/tripleo-ha-utils/playbooks/overcloud-instance-ha.yml -e release=newton
|
|
||||||
|
|
||||||
Using the playbooks on tripleo-quickstart provided environment
|
|
||||||
--------------------------------------------------------------
|
|
||||||
|
|
||||||
*tripleo-ha-utils* project can be set as a *tripleo-quickstart*
|
|
||||||
extra requirement, so all the code will be automatically downloaded and
|
|
||||||
available.
|
|
||||||
Inside the requirements.txt file you will need a line pointing to this repo:
|
|
||||||
|
|
||||||
echo "https://github.com/openstack/tripleo-ha-utils/#egg=tripleo-ha-utils" >> tripleo-quickstart/quickstart-extras-requirements.txt
|
|
||||||
|
|
||||||
Supposing the environment was successfully provided with a previous quickstart
|
|
||||||
execution, to use one of the utils playbook a command line like this one can be
|
|
||||||
used:
|
|
||||||
|
|
||||||
./quickstart.sh \
|
|
||||||
--retain-inventory \
|
|
||||||
--teardown none \
|
|
||||||
--playbook overcloud-instance-ha.yml \
|
|
||||||
--working-dir /path/to/workdir \
|
|
||||||
--config /path/to/config.yml \
|
|
||||||
--release <RELEASE> \
|
|
||||||
--tags all \
|
|
||||||
<VIRTHOST HOSTNAME or IP>
|
|
||||||
|
|
||||||
Basically this command:
|
|
||||||
|
|
||||||
- **Keep** existing data on the repo (by keeping the inventory and all the
|
|
||||||
virtual machines)
|
|
||||||
- Uses the *overcloud-instance-ha.yml* playbook
|
|
||||||
- Uses the same workdir where quickstart was first deployed
|
|
||||||
- Select the specific config file (optionally)
|
|
||||||
- Specifies the release (mitaka, newton, or “master” for ocata)
|
|
||||||
- Performs all the tasks in the playbook overcloud-instance-ha.yml
|
|
||||||
|
|
||||||
**Important note**
|
|
||||||
|
|
||||||
You might need to export *ANSIBLE_SSH_ARGS* with the path of the
|
|
||||||
*ssh.config.ansible* file to make the command work, like this:
|
|
||||||
|
|
||||||
export ANSIBLE_SSH_ARGS="-F /path/to/quickstart/workdir/ssh.config.ansible"
|
|
||||||
|
|
||||||
License
|
|
||||||
-------
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0. You may obtain a copy of the License at [http://www.apache.org/licenses/LICENSE-2.0]()
|
|
||||||
|
|
||||||
Author Information
|
|
||||||
------------------
|
|
||||||
|
|
||||||
Raoul Scarazzini <rasca@redhat.com>
|
|
10
README.rst
Normal file
10
README.rst
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
This project is no longer maintained.
|
||||||
|
|
||||||
|
The contents of this repository are still available in the Git
|
||||||
|
source code management system. To see the contents of this
|
||||||
|
repository before it reached its end of life, please check out the
|
||||||
|
previous commit with "git checkout HEAD^1".
|
||||||
|
|
||||||
|
For any further questions, please email
|
||||||
|
openstack-discuss@lists.openstack.org or join #openstack-dev on
|
||||||
|
OFTC.
|
@ -1,174 +0,0 @@
|
|||||||
Multi Virtual Undercloud
|
|
||||||
========================
|
|
||||||
|
|
||||||
This document describes a way to deploy multiple virtual undercloud on the same
|
|
||||||
host. This is mainly for environments in which you want to manage multiple
|
|
||||||
baremetal overclouds without having one baremetal machine dedicated for each one
|
|
||||||
you deploy.
|
|
||||||
|
|
||||||
Requirements
|
|
||||||
------------
|
|
||||||
|
|
||||||
**Physical switches**
|
|
||||||
|
|
||||||
The switch(es) must support VLAN tagging and all the ports must be configured in
|
|
||||||
trunk, so that the dedicated network interface on the physical host (in the
|
|
||||||
examples the secondary interface, eth1) is able to offer PXE and dhcp to all the
|
|
||||||
overcloud machines via undercloud virtual machine's bridged interface.
|
|
||||||
|
|
||||||
**Host hardware**
|
|
||||||
|
|
||||||
The main requirement to make this kind of setup working is to have a host
|
|
||||||
powerful enough to run virtual machines with at least 16GB of RAM and 8 cpus.
|
|
||||||
The more power you have, the more undercloud machines you can spawn without
|
|
||||||
having impact on performances.
|
|
||||||
|
|
||||||
**Host Network topology**
|
|
||||||
|
|
||||||
Host is reachable via ssh from the machine launching quickstart and configured
|
|
||||||
with two main network interfaces:
|
|
||||||
|
|
||||||
- **eth0**: bridged on **br0**, pointing to LAN (underclouds will own an IP to
|
|
||||||
be reachable via ssh);
|
|
||||||
- **eth1**: connected to the dedicated switch that supports all the VLANs that
|
|
||||||
will be used in the deployment;
|
|
||||||
|
|
||||||
Over eth1, for each undercloud virtual machine two VLAN interfaces are created,
|
|
||||||
with associated bridges:
|
|
||||||
|
|
||||||
- **Control plane network bridge** (i.e. br2100) built over VLAN interface (i.e.
|
|
||||||
eth1.2100) that will be eth1 on the undercloud virtual machine, used by
|
|
||||||
TripleO as br-ctlplane;
|
|
||||||
- **External network bridge** (i.e. br2105) built over VLAN interface (i.e.
|
|
||||||
eth1.2105) that will be eth2 on the undercloud virtual machine, used by
|
|
||||||
TripleO as external network device;
|
|
||||||
|
|
||||||
![network-topology](./multi-virtual-undercloud_network-topology.png "Multi Virtual Undercloud - Network Topology")
|
|
||||||
|
|
||||||
Quickstart configuration
|
|
||||||
------------------------
|
|
||||||
|
|
||||||
Virtual undercloud machine is treated as a baremetal one and the Quickstart
|
|
||||||
command relies on the baremetal undercloud role, and its playbook.
|
|
||||||
This means that any playbook similar to [baremetal-undercloud.yml](https://github.com/openstack/tripleo-quickstart-extras/blob/master/playbooks/baremetal-undercloud.yml "Baremetal undercloud playbook") should be okay.
|
|
||||||
|
|
||||||
The configuration file has two specific sections that needs attention:
|
|
||||||
|
|
||||||
- Additional interface for external network to route overcloud traffic:
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
undercloud_networks:
|
|
||||||
external:
|
|
||||||
address: 172.20.0.254
|
|
||||||
netmask: 255.255.255.0
|
|
||||||
device_type: ethernet
|
|
||||||
device_name: eth2
|
|
||||||
```
|
|
||||||
|
|
||||||
**NOTE:** in this configuration eth2 is acting also as a default router for
|
|
||||||
the external network.
|
|
||||||
|
|
||||||
- Baremetal provision script, which will be an helper for the
|
|
||||||
[multi-virtual-undercloud.sh](./multi-virtual-undercloud.sh) script on the <VIRTHOST>:
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
baremetal_provisioning_script: "/path/to/multi-virtual-undercloud-provisioner.sh <VIRTHOST> <DISTRO> <UNDERCLOUD-NAME> <UNDERCLOUD IP> <UNDERCLOUD NETMASK> <UNDERCLOUD GATEWAY> <CTLPLANEV LAN> <EXTERNAL NETWORK VLAN>"
|
|
||||||
```
|
|
||||||
|
|
||||||
The supported parameters, with the exception of VIRTHOST, are the same ones
|
|
||||||
that are passed to the script that lives (and runs) on the VIRTHOST,
|
|
||||||
*multi-virtual-undercloud.sh*.
|
|
||||||
This helper script launches the remote command on VIRTHOST host and ensures
|
|
||||||
that the machine gets reachable via ssh before proceeding.
|
|
||||||
|
|
||||||
The multi virtual undercloud script
|
|
||||||
-----------------------------------
|
|
||||||
|
|
||||||
The [multi-virtual-undercloud.sh](./multi-virtual-undercloud.sh) script is
|
|
||||||
placed on the VIRTHOST and needs these parameters:
|
|
||||||
|
|
||||||
1. **DISTRO**: this must be the name (without extension) of one of the images
|
|
||||||
present inside the */images* dir on the VIRTHOST;
|
|
||||||
2. **VMNAME**: the name of the undercloud virtual machine (the name that will be
|
|
||||||
used by libvirt);
|
|
||||||
3. **VMETH0IP**: IP of the virtual undercloud primary interface to wich
|
|
||||||
quickstart (and users) will connect via ssh;
|
|
||||||
4. **VMETH0NM**: Netmask of the virtual undercloud primary interface;
|
|
||||||
5. **VMETH0GW**: Gateway of the virtual undercloud primary interface;
|
|
||||||
6. **VMSSHKEY**: Public key to be enabled on the virtual undercloud;
|
|
||||||
7. **UCVLAN**: VLAN of the overcloud's ctlplane network;
|
|
||||||
8. **UCEXTVLAN**: VLAN of the overcloud's external network;
|
|
||||||
|
|
||||||
The script's actions are basically:
|
|
||||||
|
|
||||||
1. Destroy and undefine any existing machine named as the one we want to create;
|
|
||||||
2. Prepare the image on which the virtual undercloud will be created by copying
|
|
||||||
the available distro image and preparing it to be ready for the TripleO
|
|
||||||
installation, it fix size, network interfaces, packages and ssh keys;
|
|
||||||
3. Create and launch the virtual undercloud machine;
|
|
||||||
|
|
||||||
**Note**: on the VIRTHOST there must exist an */images* directory containing
|
|
||||||
images suitable for the deploy.
|
|
||||||
Having this directory structure:
|
|
||||||
|
|
||||||
```console
|
|
||||||
[root@VIRTHOST ~]# ls -l /images/
|
|
||||||
total 1898320
|
|
||||||
lrwxrwxrwx. 1 root root 34 14 feb 09.20 centos-7.qcow2 -> CentOS-7-x86_64-GenericCloud.qcow2
|
|
||||||
-rw-r--r--. 1 root root 1361182720 15 feb 10.57 CentOS-7-x86_64-GenericCloud.qcow2
|
|
||||||
lrwxrwxrwx. 1 root root 36 14 feb 09.20 rhel-7.qcow2 -> rhel-guest-image-7.3-33.x86_64.qcow2
|
|
||||||
-rw-r--r--. 1 root root 582695936 19 ott 18.44 rhel-guest-image-7.3-33.x86_64.qcow2
|
|
||||||
```
|
|
||||||
|
|
||||||
Helps on updating the images, since one can leave config files pointing to
|
|
||||||
*centos-7* and, in case of updates, make the symlink point a newer image.
|
|
||||||
|
|
||||||
Quickstart command
|
|
||||||
------------------
|
|
||||||
|
|
||||||
A typical invocation of the TripleO Quickstart command is something similar to
|
|
||||||
this:
|
|
||||||
|
|
||||||
```console
|
|
||||||
/path/to/tripleo-quickstart/quickstart.sh \
|
|
||||||
--bootstrap \
|
|
||||||
--ansible-debug \
|
|
||||||
--no-clone \
|
|
||||||
--playbook baremetal-undercloud.yml \
|
|
||||||
--working-dir /path/to/workdir \
|
|
||||||
--config /path/to/config.yml \
|
|
||||||
--release $RELEASE \
|
|
||||||
--tags "all" \
|
|
||||||
$VIRTHOST
|
|
||||||
```
|
|
||||||
|
|
||||||
So nothing different from a normal quickstart deploy command line, the
|
|
||||||
difference here is made by the config.yml as described above, with its provision
|
|
||||||
script.
|
|
||||||
|
|
||||||
Conclusions
|
|
||||||
-----------
|
|
||||||
|
|
||||||
This approach can be considered useful in testing multi environments with
|
|
||||||
TripleO for three reasons:
|
|
||||||
|
|
||||||
* It is *fast*: it takes the same time to install the undercloud but less to
|
|
||||||
provide it, since you don’t have to wait the physical undercloud provision;
|
|
||||||
* It is *isolated*: using VLANs to separate the traffic keeps each environment
|
|
||||||
completely isolated from the others;
|
|
||||||
* It is *reliable*: you can have the undercloud on a shared storage and think
|
|
||||||
about putting the undercloud vm in HA, live migrating it with libvirt,
|
|
||||||
pacemaker, whatever...
|
|
||||||
|
|
||||||
There are no macroscopic cons, except for the initial configuration on the
|
|
||||||
VIRTHOST, that is made only one time, at the beginning.
|
|
||||||
|
|
||||||
License
|
|
||||||
-------
|
|
||||||
|
|
||||||
GPL
|
|
||||||
|
|
||||||
Author Information
|
|
||||||
------------------
|
|
||||||
|
|
||||||
Raoul Scarazzini <rasca@redhat.com>
|
|
@ -1,46 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
set -eux
|
|
||||||
|
|
||||||
VIRTHOST=$1
|
|
||||||
DISTRO=$2
|
|
||||||
VMNAME=$3
|
|
||||||
VMETH0IP=$4
|
|
||||||
VMETH0NM=$5
|
|
||||||
VMETH0GW=$6
|
|
||||||
VMSSHKEY=$7
|
|
||||||
UCVLAN=$8
|
|
||||||
UCEXTVLAN=$9
|
|
||||||
|
|
||||||
function wait_machine_status {
|
|
||||||
UNDERCLOUD=$1
|
|
||||||
STATUS=$2
|
|
||||||
while true
|
|
||||||
do
|
|
||||||
nc $UNDERCLOUD 22 < /dev/null &> /dev/null
|
|
||||||
NCSTATUS=$?
|
|
||||||
if [ "$STATUS" == "up" ]
|
|
||||||
then
|
|
||||||
[ $NCSTATUS -eq 0 ] && break || (sleep 5; echo -n ".")
|
|
||||||
else
|
|
||||||
[ $NCSTATUS -ne 0 ] && break || (sleep 5; echo -n ".")
|
|
||||||
fi
|
|
||||||
done
|
|
||||||
}
|
|
||||||
|
|
||||||
# Copying public key on VIRTHOST
|
|
||||||
echo -n "$(date) - Copying $VMSSHKEY on $VIRTHOST: "
|
|
||||||
scp $VMSSHKEY root@$VIRTHOST:$VMNAME\_key.pub
|
|
||||||
echo "Done."
|
|
||||||
|
|
||||||
# Providing the machine
|
|
||||||
echo -n "$(date) - Starting provision of $VMNAME ($VMETH0IP) on $VIRTHOST: "
|
|
||||||
ssh root@$VIRTHOST /root/multi-virtual-undercloud.sh $DISTRO $VMNAME $VMETH0IP $VMETH0NM $VMETH0GW $VMNAME\_key.pub $UCVLAN $UCEXTVLAN
|
|
||||||
echo "Done."
|
|
||||||
|
|
||||||
set +e
|
|
||||||
|
|
||||||
# Wait for machine to come up
|
|
||||||
echo -n "$(date) - Waiting for $VMNAME to come up again after update: "
|
|
||||||
wait_machine_status $VMETH0IP "up"
|
|
||||||
echo "Done."
|
|
@ -1,115 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
set -eux
|
|
||||||
|
|
||||||
DISTRO=$1
|
|
||||||
CLONEFROM=/images/$DISTRO\.qcow2
|
|
||||||
VMNAME=$2
|
|
||||||
VMIMG=/vms/$VMNAME\.qcow2
|
|
||||||
VMIMGCOPY=/vms/ORIG-$VMNAME\.qcow2
|
|
||||||
VMETH0IP=$3
|
|
||||||
VMETH0NM=$4
|
|
||||||
VMETH0GW=$5
|
|
||||||
VMSSHKEY=$6
|
|
||||||
VMDISKADD=50G
|
|
||||||
UCVLAN=$7
|
|
||||||
UCEXTVLAN=$8
|
|
||||||
WORKDIR=/tmp/virt-undercloud-$(date +%s)
|
|
||||||
|
|
||||||
mkdir -p $WORKDIR
|
|
||||||
pushd $WORKDIR
|
|
||||||
|
|
||||||
# Destroy the machine if it is running
|
|
||||||
ISRUNNING=$(virsh list | grep $VMNAME || true)
|
|
||||||
[ "x$ISRUNNING" != "x" ] && virsh destroy $VMNAME
|
|
||||||
|
|
||||||
# Undefine the vm if it is defined
|
|
||||||
ISDEFINED=$(virsh list --all | grep $VMNAME || true)
|
|
||||||
[ "x$ISDEFINED" != "x" ] && virsh undefine $VMNAME
|
|
||||||
|
|
||||||
# Copy qcow2 base image
|
|
||||||
cp -v $CLONEFROM $VMIMG
|
|
||||||
|
|
||||||
echo "$(date) - Adding $VMDISKADD to $VMIMG: "
|
|
||||||
qemu-img resize $VMIMG +$VMDISKADD
|
|
||||||
|
|
||||||
echo "$(date) - Resizing filesystem of $VMIMG: "
|
|
||||||
cp -v $VMIMG $VMIMGCOPY
|
|
||||||
virt-resize --expand /dev/sda1 $VMIMGCOPY $VMIMG
|
|
||||||
rm -fv $VMIMGCOPY
|
|
||||||
|
|
||||||
echo "$(date) - Checking status of $VMIMG: "
|
|
||||||
qemu-img info $VMIMG
|
|
||||||
virt-filesystems --long -h --all -a $VMIMG
|
|
||||||
|
|
||||||
cat > ifcfg-eth0 <<EOF
|
|
||||||
NAME=eth0
|
|
||||||
DEVICE=eth0
|
|
||||||
ONBOOT=yes
|
|
||||||
BOOTPROTO=static
|
|
||||||
IPADDR=$VMETH0IP
|
|
||||||
NETMASK=$VMETH0NM
|
|
||||||
GATEWAY=$VMETH0GW
|
|
||||||
PEERDNS=yes
|
|
||||||
DNS1=8.8.8.8
|
|
||||||
TYPE=Ethernet
|
|
||||||
EOF
|
|
||||||
|
|
||||||
cat > ifcfg-eth1 <<EOF
|
|
||||||
NAME=eth1
|
|
||||||
DEVICE=eth1
|
|
||||||
ONBOOT=yes
|
|
||||||
BOOTPROTO=none
|
|
||||||
TYPE=Ethernet
|
|
||||||
EOF
|
|
||||||
|
|
||||||
cat $VMSSHKEY >> ./authorized_keys
|
|
||||||
|
|
||||||
case "$DISTRO" in
|
|
||||||
"centos-7") virt-customize -a $VMIMG \
|
|
||||||
--root-password password:redhat \
|
|
||||||
--install openssh-server \
|
|
||||||
--run-command "xfs_growfs /" \
|
|
||||||
--run-command "echo 'GRUB_CMDLINE_LINUX=\"console=tty0 crashkernel=auto no_timer_check net.ifnames=0 console=ttyS0,115200n8\"' >> /etc/default/grub" \
|
|
||||||
--run-command "grubby --update-kernel=ALL --args=net.ifnames=0" \
|
|
||||||
--run-command "systemctl enable sshd" \
|
|
||||||
--mkdir /root/.ssh \
|
|
||||||
--copy-in ifcfg-eth0:/etc/sysconfig/network-scripts/ \
|
|
||||||
--copy-in ifcfg-eth1:/etc/sysconfig/network-scripts/ \
|
|
||||||
--copy-in ./authorized_keys:/root/.ssh/ \
|
|
||||||
--selinux-relabel
|
|
||||||
;;
|
|
||||||
"rhel-7") virt-customize -a $VMIMG \
|
|
||||||
--root-password password:redhat \
|
|
||||||
--run-command "curl -o rhos-release-latest.noarch.rpm http://rhos-release.virt.bos.redhat.com/repos/rhos-release/rhos-release-latest.noarch.rpm" \
|
|
||||||
--run-command "rpm -Uvh rhos-release-latest.noarch.rpm" \
|
|
||||||
--run-command "rhos-release rhel-7.3" \
|
|
||||||
--install openssh-server \
|
|
||||||
--run-command "systemctl enable sshd" \
|
|
||||||
--run-command "rpm -e rhos-release" \
|
|
||||||
--run-command "sed -i -e '/\[rhelosp-rhel-7.3-server-opt\]/,/^\[/s/enabled=0/enabled=1/' /etc/yum.repos.d/rhos-release-rhel-7.3.repo" \
|
|
||||||
--mkdir /root/.ssh \
|
|
||||||
--copy-in ifcfg-eth0:/etc/sysconfig/network-scripts/ \
|
|
||||||
--copy-in ifcfg-eth1:/etc/sysconfig/network-scripts/ \
|
|
||||||
--copy-in ./authorized_keys:/root/.ssh/ \
|
|
||||||
--selinux-relabel
|
|
||||||
;;
|
|
||||||
esac
|
|
||||||
|
|
||||||
# Deploy the vm
|
|
||||||
virt-install \
|
|
||||||
--import \
|
|
||||||
--name $VMNAME \
|
|
||||||
--ram 16192 \
|
|
||||||
--disk path=$VMIMG \
|
|
||||||
--vcpus 8 \
|
|
||||||
--os-type linux \
|
|
||||||
--os-variant generic \
|
|
||||||
--network bridge=br0 \
|
|
||||||
--network bridge=br$UCVLAN \
|
|
||||||
--network bridge=br$UCEXTVLAN \
|
|
||||||
--graphics none \
|
|
||||||
--noautoconsole
|
|
||||||
|
|
||||||
rm -rf $WORKDIR
|
|
||||||
popd
|
|
Binary file not shown.
Before Width: | Height: | Size: 224 KiB |
@ -1,52 +0,0 @@
|
|||||||
Infrared Intance-ha Plugin Playbook
|
|
||||||
====================================
|
|
||||||
|
|
||||||
This Plugin deploys Instance-Ha on OpenStack using InfraRed
|
|
||||||
|
|
||||||
The Tasks in infrared_instance-ha_plugin_main.yml, along with the
|
|
||||||
plugin.spec at tripleo-ha-utils/plugin.spec provide support
|
|
||||||
for running this repo's roles and playbooks as an Infrared plugin.
|
|
||||||
|
|
||||||
[InfraRed](http://infrared.readthedocs.io/en/stable/) is a plugin based system
|
|
||||||
that aims to provide an easy-to-use CLI for Ansible based projects and
|
|
||||||
OpenStack deployment.
|
|
||||||
|
|
||||||
The plugin provides infrared plugin integration for
|
|
||||||
two OpenStack High-Availability features:
|
|
||||||
|
|
||||||
[instance-ha](https://github.com/openstack/tripleo-ha-utils/tree/master/roles/instance-ha)
|
|
||||||
|
|
||||||
[stonith-config](https://github.com/openstack/tripleo-ha-utils/tree/master/roles/stonith-config)
|
|
||||||
|
|
||||||
Usage:
|
|
||||||
=====
|
|
||||||
|
|
||||||
**Installation and deployment:**
|
|
||||||
|
|
||||||
[Setup InfraRed](http://infrared.readthedocs.io/en/stable/bootstrap.html)
|
|
||||||
|
|
||||||
ir plugin add https://github.com/openstack/tripleo-ha-utils
|
|
||||||
|
|
||||||
export ANSIBLE_ROLES_PATH='plugins/tripleo-ha-utils/roles'
|
|
||||||
|
|
||||||
ir instance-ha-deploy -v --release 12 --stonith_devices all
|
|
||||||
|
|
||||||
*notice: a fail & warning will be issued if the plugin's specific ANSIBLE_ROLES_PATH is not defined *
|
|
||||||
|
|
||||||
|
|
||||||
**Plugin help:**
|
|
||||||
|
|
||||||
ir instance-ha-deploy -h
|
|
||||||
|
|
||||||
|
|
||||||
**Plugin Uninstall:**
|
|
||||||
|
|
||||||
ir plugin remove instance-ha-deploy
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
Author Information
|
|
||||||
------------------
|
|
||||||
|
|
||||||
Pini Komarov pkomarov@redhat.com
|
|
@ -1,128 +0,0 @@
|
|||||||
---
|
|
||||||
- name: check ANSIBLE_ROLES_PATH variable
|
|
||||||
hosts: localhost
|
|
||||||
tasks:
|
|
||||||
|
|
||||||
- shell: |
|
|
||||||
echo $ANSIBLE_ROLES_PATH
|
|
||||||
name: check $ANSIBLE_ROLES_PATH variable
|
|
||||||
register: ansible_roles_path_out
|
|
||||||
|
|
||||||
- name: check $ANSIBLE_ROLES_PATH is set
|
|
||||||
fail:
|
|
||||||
msg: Please export ANSIBLE_ROLES_PATH='plugins/tripleo-ha-utils/roles' ; Before running this playbook with infrared.
|
|
||||||
when: '"tripleo-ha-utils" not in ansible_roles_path_out.stdout'
|
|
||||||
|
|
||||||
|
|
||||||
#manual override because of https://github.com/ansible/ansible/issues/26336
|
|
||||||
#- name: Configure Instance HA
|
|
||||||
# hosts: undercloud
|
|
||||||
# gather_facts: yes
|
|
||||||
#
|
|
||||||
# tasks:
|
|
||||||
#
|
|
||||||
# - include_role:
|
|
||||||
# name: instance-ha
|
|
||||||
|
|
||||||
#This creates the clouds.yaml file from undercloud/overcloud credentials
|
|
||||||
#for use in pythonsdk api for osp connection and querrying:
|
|
||||||
|
|
||||||
- name: create clouds.yaml for pythonsdk api
|
|
||||||
hosts: undercloud
|
|
||||||
tasks:
|
|
||||||
|
|
||||||
- name: get undercloud variables
|
|
||||||
shell: |
|
|
||||||
for key in $( set | awk '{FS="="} /^OS_/ {print $1}' ); do unset $key ; done
|
|
||||||
source /home/stack/stackrc
|
|
||||||
echo -n "undercloud: {'auth': { 'auth_url': '$OS_AUTH_URL', 'username': '$OS_USERNAME', 'password': '$OS_PASSWORD', 'project_name': '${OS_PROJECT_NAME:-$OS_TENANT_NAME}', 'project_domain_name': '$OS_PROJECT_DOMAIN_NAME', 'user_domain_name': '$OS_USER_DOMAIN_NAME'}}"
|
|
||||||
|
|
||||||
register: cloud_details
|
|
||||||
|
|
||||||
- name: create clouds.yaml if doesn't exist
|
|
||||||
blockinfile:
|
|
||||||
content: 'clouds:'
|
|
||||||
dest: /home/stack/clouds.yaml
|
|
||||||
marker: "#{mark} HEADER"
|
|
||||||
create: yes
|
|
||||||
|
|
||||||
- name: insert undercloud parameters
|
|
||||||
blockinfile:
|
|
||||||
dest: /home/stack/clouds.yaml
|
|
||||||
block: |5
|
|
||||||
{{ cloud_details.stdout|from_yaml|to_nice_yaml(indent=4) }}
|
|
||||||
insertbefore: "#END undercloud SECTION"
|
|
||||||
marker: "#{mark} undercloud PARAMETERS"
|
|
||||||
|
|
||||||
- name: get overcloud variables
|
|
||||||
shell: |
|
|
||||||
for key in $( set | awk '{FS="="} /^OS_/ {print $1}' ); do unset $key ; done
|
|
||||||
source /home/stack/overcloudrc
|
|
||||||
echo -n "overcloud: {'auth': { 'auth_url': '$OS_AUTH_URL', 'username': '$OS_USERNAME', 'password': '$OS_PASSWORD', 'project_name': '${OS_PROJECT_NAME:-$OS_TENANT_NAME}', 'project_domain_name': '$OS_PROJECT_DOMAIN_NAME', 'user_domain_name': '$OS_USER_DOMAIN_NAME' }}"
|
|
||||||
|
|
||||||
register: cloud_details
|
|
||||||
|
|
||||||
- name: create clouds.yaml if doesn't exist
|
|
||||||
blockinfile:
|
|
||||||
content: 'clouds:'
|
|
||||||
dest: /home/stack/clouds.yaml
|
|
||||||
marker: "#{mark} HEADER"
|
|
||||||
create: yes
|
|
||||||
|
|
||||||
- name: insert overcloud parameters
|
|
||||||
blockinfile:
|
|
||||||
dest: /home/stack/clouds.yaml
|
|
||||||
block: |5
|
|
||||||
{{ cloud_details.stdout|from_yaml|to_nice_yaml(indent=4) }}
|
|
||||||
insertbefore: "#END overcloud SECTION"
|
|
||||||
marker: "#{mark} overcloud PARAMETERS"
|
|
||||||
|
|
||||||
|
|
||||||
#This executes all from the undercloud itself:
|
|
||||||
|
|
||||||
- name: Configure Instance HA
|
|
||||||
hosts: undercloud
|
|
||||||
gather_facts: no
|
|
||||||
tasks:
|
|
||||||
|
|
||||||
- name: create ansible hosts file
|
|
||||||
template:
|
|
||||||
src: templates/ansible_hosts.yml.j2
|
|
||||||
dest: /home/stack/hosts
|
|
||||||
owner: stack
|
|
||||||
group: stack
|
|
||||||
mode: 0644
|
|
||||||
|
|
||||||
- name: create ssh.config.ansible file
|
|
||||||
shell: |
|
|
||||||
source /home/stack/stackrc
|
|
||||||
echo -e "Host undercloud\n Hostname 127.0.0.1\n IdentityFile /home/stack/.ssh/id_rsa\n User stack\n StrictHostKeyChecking no\n UserKnownHostsFile=/dev/null\n" > ssh.config.ansible
|
|
||||||
openstack server list -c Name -c Networks | awk '/ctlplane/ {print $2, $4}' | sed s/ctlplane=//g | while read node; do node_name=$(echo $node | cut -f 1 -d " "); node_ip=$(echo $node | cut -f 2 -d " "); echo -e "Host $node_name\n Hostname $node_ip\n IdentityFile /home/stack/.ssh/id_rsa\n User heat-admin\n StrictHostKeyChecking no\n UserKnownHostsFile=/dev/null\n"; done >> ssh.config.ansible
|
|
||||||
|
|
||||||
- name: get tripleo-ha-utils repo
|
|
||||||
git:
|
|
||||||
repo: 'https://github.com/openstack/tripleo-ha-utils.git'
|
|
||||||
dest: /home/stack/tripleo-ha-utils
|
|
||||||
|
|
||||||
- name: create ansible env file
|
|
||||||
shell: |
|
|
||||||
cat >/home/stack/ansible_ha.env<<EOF
|
|
||||||
export ANSIBLE_INVENTORY="/home/stack/hosts"
|
|
||||||
export ANSIBLE_SSH_ARGS="-F /home/stack/ssh.config.ansible"
|
|
||||||
export ANSIBLE_CONFIG="/home/stack/ansible.cfg"
|
|
||||||
|
|
||||||
- name: create ansible.cfg file
|
|
||||||
shell: |
|
|
||||||
printf "[defaults]\nroles_path = /home/stack/tripleo-ha-utils/roles" > /home/stack/ansible.cfg
|
|
||||||
|
|
||||||
- name: run instance-ha deploy script
|
|
||||||
shell: |
|
|
||||||
source /home/stack/ansible_ha.env
|
|
||||||
ansible-playbook -v /home/stack/tripleo-ha-utils/playbooks/overcloud-instance-ha.yml -e release={{release}} -e stonith_devices={{stonith_devices}} -e instance_ha_shared_storage={{instance_ha_shared_storage}} -e instance_ha_action={{instance_ha_action}}
|
|
||||||
register: instance_ha_deploy_outcome
|
|
||||||
|
|
||||||
vars:
|
|
||||||
instance_ha_action: '{{ install.instance_ha_action }}'
|
|
||||||
release: '{{ install.release }}'
|
|
||||||
stonith_devices: '{{ install.stonith_devices }}'
|
|
||||||
instance_ha_shared_storage: '{{ install.instance_ha_shared_storage }}'
|
|
@ -1,27 +0,0 @@
|
|||||||
undercloud ansible_host=undercloud ansible_user=stack ansible_private_key_file=/home/stack/.ssh/id_rsa
|
|
||||||
|
|
||||||
{% for overcloud_host in groups['overcloud_nodes'] %}
|
|
||||||
{{overcloud_host}} ansible_host={{overcloud_host}} ansible_user=heat-admin ansible_private_key_file=/home/stack/.ssh/id_rsa
|
|
||||||
{% endfor %}
|
|
||||||
|
|
||||||
{% for overcloud_host in groups['overcloud_nodes'] %}
|
|
||||||
{{overcloud_host}} ansible_host={{overcloud_host}} ansible_user=heat-admin ansible_private_key_file=/home/stack/.ssh/id_rsa
|
|
||||||
{% endfor %}
|
|
||||||
|
|
||||||
[compute]
|
|
||||||
{% for overcloud_host in groups['compute'] %}
|
|
||||||
{{overcloud_host}}
|
|
||||||
{% endfor %}
|
|
||||||
|
|
||||||
[undercloud]
|
|
||||||
undercloud
|
|
||||||
|
|
||||||
[overcloud]
|
|
||||||
{% for overcloud_host in groups['overcloud_nodes'] %}
|
|
||||||
{{overcloud_host}}
|
|
||||||
{% endfor %}
|
|
||||||
|
|
||||||
[controller]
|
|
||||||
{% for overcloud_host in groups['controller'] %}
|
|
||||||
{{overcloud_host}}
|
|
||||||
{% endfor %}
|
|
@ -1,148 +0,0 @@
|
|||||||
---
|
|
||||||
- name: Baremetal undercloud install
|
|
||||||
hosts: localhost
|
|
||||||
roles:
|
|
||||||
- baremetal-undercloud
|
|
||||||
tags:
|
|
||||||
- baremetal-undercloud
|
|
||||||
|
|
||||||
- name: Add the undercloud node to the generated inventory
|
|
||||||
hosts: localhost
|
|
||||||
gather_facts: yes
|
|
||||||
roles:
|
|
||||||
- tripleo-inventory
|
|
||||||
tags:
|
|
||||||
- undercloud-inventory
|
|
||||||
|
|
||||||
- name: Setup repositories
|
|
||||||
hosts: undercloud
|
|
||||||
gather_facts: yes
|
|
||||||
roles:
|
|
||||||
- repo-setup
|
|
||||||
tags:
|
|
||||||
- undercloud-repo-setup
|
|
||||||
|
|
||||||
- name: Install packages
|
|
||||||
hosts: undercloud
|
|
||||||
gather_facts: no
|
|
||||||
roles:
|
|
||||||
- baremetal-undercloud/packages
|
|
||||||
tags:
|
|
||||||
- undercloud-pkgs-install
|
|
||||||
|
|
||||||
- name: Deploy the undercloud
|
|
||||||
hosts: undercloud
|
|
||||||
gather_facts: no
|
|
||||||
roles:
|
|
||||||
- undercloud-deploy
|
|
||||||
tags:
|
|
||||||
- undercloud-deploy
|
|
||||||
|
|
||||||
- name: Prepare baremetal for the overcloud deployment
|
|
||||||
hosts: undercloud
|
|
||||||
roles:
|
|
||||||
- baremetal-prep-overcloud
|
|
||||||
tags:
|
|
||||||
- baremetal-prep-overcloud
|
|
||||||
|
|
||||||
- name: Prepare configuration files for the overcloud deployment
|
|
||||||
hosts: undercloud
|
|
||||||
gather_facts: no
|
|
||||||
roles:
|
|
||||||
- overcloud-prep-config
|
|
||||||
tags:
|
|
||||||
- overcloud-prep-config
|
|
||||||
|
|
||||||
- name: Prepare overcloud containers
|
|
||||||
hosts: undercloud
|
|
||||||
gather_facts: no
|
|
||||||
roles:
|
|
||||||
- overcloud-prep-containers
|
|
||||||
tags:
|
|
||||||
- overcloud-prep-containers
|
|
||||||
|
|
||||||
- name: Fetch the overcloud images
|
|
||||||
hosts: undercloud
|
|
||||||
gather_facts: no
|
|
||||||
become: true
|
|
||||||
roles:
|
|
||||||
- fetch-images
|
|
||||||
tags:
|
|
||||||
- overcloud-fetch-images
|
|
||||||
|
|
||||||
- name: Prepare the overcloud images for deployment
|
|
||||||
hosts: undercloud
|
|
||||||
gather_facts: no
|
|
||||||
roles:
|
|
||||||
- overcloud-prep-images
|
|
||||||
tags:
|
|
||||||
- overcloud-prep-images
|
|
||||||
|
|
||||||
- name: Prepare overcloud flavors
|
|
||||||
hosts: undercloud
|
|
||||||
gather_facts: no
|
|
||||||
roles:
|
|
||||||
- overcloud-prep-flavors
|
|
||||||
tags:
|
|
||||||
- overcloud-prep-flavors
|
|
||||||
|
|
||||||
- name: Prepare the undercloud networks for the overcloud deployment
|
|
||||||
hosts: undercloud
|
|
||||||
gather_facts: no
|
|
||||||
roles:
|
|
||||||
- overcloud-prep-network
|
|
||||||
tags:
|
|
||||||
- overcloud-prep-network
|
|
||||||
|
|
||||||
- name: Prepare SSL for the overcloud
|
|
||||||
hosts: undercloud
|
|
||||||
gather_facts: yes
|
|
||||||
roles:
|
|
||||||
- overcloud-ssl
|
|
||||||
tags:
|
|
||||||
- overcloud-ssl
|
|
||||||
|
|
||||||
- name: Deploy the overcloud
|
|
||||||
hosts: undercloud
|
|
||||||
gather_facts: yes
|
|
||||||
roles:
|
|
||||||
- overcloud-deploy
|
|
||||||
tags:
|
|
||||||
- overcloud-deploy
|
|
||||||
|
|
||||||
- name: Add the overcloud nodes to the generated inventory
|
|
||||||
hosts: undercloud
|
|
||||||
gather_facts: yes
|
|
||||||
vars:
|
|
||||||
inventory: all
|
|
||||||
roles:
|
|
||||||
- tripleo-inventory
|
|
||||||
tags:
|
|
||||||
- overcloud-inventory
|
|
||||||
|
|
||||||
- name: Check the result of the deployment
|
|
||||||
hosts: localhost
|
|
||||||
tasks:
|
|
||||||
- name: ensure the deployment result has been read into memory
|
|
||||||
include_vars: "{{ local_working_dir }}/overcloud_deployment_result.json"
|
|
||||||
|
|
||||||
# overcloud_deploy_result = ["failed", "passed"]
|
|
||||||
- name: did the deployment pass or fail?
|
|
||||||
debug: var=overcloud_deploy_result
|
|
||||||
failed_when: overcloud_deploy_result == "failed"
|
|
||||||
tags:
|
|
||||||
- overcloud-deploy-check
|
|
||||||
|
|
||||||
- name: Gather undercloud and overcloud facts
|
|
||||||
hosts: undercloud overcloud
|
|
||||||
gather_facts: yes
|
|
||||||
tags:
|
|
||||||
- overcloud-validate-ha
|
|
||||||
|
|
||||||
- name: Validate the overcloud using HA tests
|
|
||||||
hosts: undercloud
|
|
||||||
gather_facts: no
|
|
||||||
roles:
|
|
||||||
- validate-ha
|
|
||||||
tags:
|
|
||||||
- overcloud-validate-ha
|
|
@ -1,10 +0,0 @@
|
|||||||
---
|
|
||||||
- name: Gather undercloud and overcloud facts
|
|
||||||
hosts: undercloud overcloud
|
|
||||||
gather_facts: yes
|
|
||||||
|
|
||||||
- name: Configure Instance HA
|
|
||||||
hosts: undercloud
|
|
||||||
gather_facts: no
|
|
||||||
roles:
|
|
||||||
- instance-ha
|
|
@ -1,7 +0,0 @@
|
|||||||
---
|
|
||||||
|
|
||||||
- name: Configure STONITH for all the hosts on the overcloud
|
|
||||||
hosts: undercloud
|
|
||||||
gather_facts: yes
|
|
||||||
roles:
|
|
||||||
- stonith-config
|
|
@ -1,14 +0,0 @@
|
|||||||
---
|
|
||||||
- name: Gather undercloud and overcloud facts
|
|
||||||
hosts: undercloud overcloud
|
|
||||||
gather_facts: yes
|
|
||||||
tags:
|
|
||||||
- overcloud-validate-ha
|
|
||||||
|
|
||||||
- name: Validate overcloud HA status
|
|
||||||
hosts: undercloud
|
|
||||||
gather_facts: yes
|
|
||||||
tags:
|
|
||||||
- overcloud-validate-ha
|
|
||||||
roles:
|
|
||||||
- validate-ha
|
|
37
plugin.spec
37
plugin.spec
@ -1,37 +0,0 @@
|
|||||||
---
|
|
||||||
config:
|
|
||||||
entry_point: ./infrared/infrared_instance-ha_plugin_main.yml
|
|
||||||
plugin_type: install
|
|
||||||
subparsers:
|
|
||||||
instance-ha-deploy:
|
|
||||||
description: Collection of instance-ha configuration tasks
|
|
||||||
include_groups: ["Ansible options", "Inventory", "Common options", "Answers file"]
|
|
||||||
groups:
|
|
||||||
|
|
||||||
- title: Instance HA
|
|
||||||
options:
|
|
||||||
instance_ha_action:
|
|
||||||
type: Value
|
|
||||||
default: install
|
|
||||||
help: |
|
|
||||||
Can be 'install' or 'uninstall'
|
|
||||||
|
|
||||||
release:
|
|
||||||
type: Value
|
|
||||||
help: |
|
|
||||||
A rhos release - version_number.
|
|
||||||
Example: "rhos-10".
|
|
||||||
required: yes
|
|
||||||
stonith_devices:
|
|
||||||
type: Value
|
|
||||||
default: controllers
|
|
||||||
help: |
|
|
||||||
Can be all, controllers or computes
|
|
||||||
|
|
||||||
instance_ha_shared_storage:
|
|
||||||
type: Bool
|
|
||||||
help: |
|
|
||||||
Do we have a shared storage or not?
|
|
||||||
default: False
|
|
||||||
|
|
||||||
|
|
@ -1,53 +0,0 @@
|
|||||||
Rally tests
|
|
||||||
===========
|
|
||||||
|
|
||||||
This directory contains all the files available to use Rally for testing the
|
|
||||||
behavior of the TripleO environment.
|
|
||||||
For example you can test if instance HA is behaving correctly inside the
|
|
||||||
overcloud environment in which it was configured.
|
|
||||||
|
|
||||||
Requirements
|
|
||||||
------------
|
|
||||||
|
|
||||||
A working and accessible TripleO environment, as described [here](https://github.com/openstack/tripleo-ha-utils/tree/master/README.md).
|
|
||||||
so an *hosts* file containing the whole environment inventory and, if needed, a
|
|
||||||
*ssh.config.ansible* with all the information to access nodes.
|
|
||||||
|
|
||||||
How to use Rally to test Instance HA
|
|
||||||
------------------------------------
|
|
||||||
|
|
||||||
If you want to launch a Rally test session to check how Instance HA is behaving
|
|
||||||
into the overcloud you can rely on a command like this one:
|
|
||||||
|
|
||||||
ansible-playbook -i hosts \
|
|
||||||
-e public_physical_network="public" \
|
|
||||||
-e floating_ip_cidr="192.168.99.0/24" \
|
|
||||||
-e public_net_pool_start="192.168.99.211" \
|
|
||||||
-e public_net_pool_end="192.168.99.216" \
|
|
||||||
-e public_net_gateway="192.168.99.254" \
|
|
||||||
tripleo-ha-utils/rally/instance-ha.yml
|
|
||||||
|
|
||||||
this command can be launched from the *undercloud* machine or from a jump host
|
|
||||||
(which must have all the required file locally).
|
|
||||||
The requested parameters refers to the network settings in which the instances
|
|
||||||
will be spawned into.
|
|
||||||
|
|
||||||
This will execute the tests contained in the template yaml:
|
|
||||||
|
|
||||||
* *InstanceHA.recover_instance_fip_and_volume*: spawn an instance, stop the
|
|
||||||
compute it's running on, check it migrates, check node recovers;
|
|
||||||
* *InstanceHA.recover_stopped_instance_fip*: spawn an instance, put it in
|
|
||||||
stopped status, stop the compute it's running on, check it migrates, check
|
|
||||||
node recovers;
|
|
||||||
* *InstanceHA.recover_instance_two_cycles*: do as in the first step, but two
|
|
||||||
times;
|
|
||||||
|
|
||||||
License
|
|
||||||
-------
|
|
||||||
|
|
||||||
GPL
|
|
||||||
|
|
||||||
Author Information
|
|
||||||
------------------
|
|
||||||
|
|
||||||
Raoul Scarazzini <rasca@redhat.com>
|
|
@ -1,99 +0,0 @@
|
|||||||
---
|
|
||||||
- hosts: undercloud
|
|
||||||
gather_facts: no
|
|
||||||
become: yes
|
|
||||||
become_method: sudo
|
|
||||||
tasks:
|
|
||||||
- name: Install Rally dependencies
|
|
||||||
shell: |
|
|
||||||
# Python pip
|
|
||||||
wget https://bootstrap.pypa.io/get-pip.py -O get-pip.py
|
|
||||||
python get-pip.py
|
|
||||||
# Depndencies
|
|
||||||
yum install -y gmp-devel libffi-devel libxml2-devel libxslt-devel openssl-devel postgresql-devel gcc python-devel
|
|
||||||
|
|
||||||
- hosts: undercloud
|
|
||||||
gather_facts: no
|
|
||||||
tasks:
|
|
||||||
- name: Install Rally
|
|
||||||
shell: |
|
|
||||||
# Install Rally from upstream
|
|
||||||
wget -q -O- https://raw.githubusercontent.com/openstack/rally/master/install_rally.sh | bash |& tee rally-install.log
|
|
||||||
mkdir -p .rally/plugins
|
|
||||||
- name: Check Rally installation
|
|
||||||
shell: |
|
|
||||||
source /home/stack/rally/bin/activate
|
|
||||||
rally --version
|
|
||||||
|
|
||||||
- hosts: undercloud
|
|
||||||
gather_facts: no
|
|
||||||
tasks:
|
|
||||||
- name: Copy instance-ha Rally plugin to remote rally directory
|
|
||||||
copy:
|
|
||||||
src: plugins/instanceha.py
|
|
||||||
dest: .rally/plugins
|
|
||||||
|
|
||||||
- hosts: undercloud
|
|
||||||
gather_facts: no
|
|
||||||
tasks:
|
|
||||||
- name: Install Rally environment and create deployment
|
|
||||||
shell: |
|
|
||||||
source /home/stack/overcloudrc
|
|
||||||
source /home/stack/rally/bin/activate
|
|
||||||
export OS_INSECURE=True
|
|
||||||
rally deployment create --fromenv --name overcloud |& tee rally-instance-ha-deployment-create.log
|
|
||||||
rally deployment use overcloud
|
|
||||||
|
|
||||||
- hosts: undercloud
|
|
||||||
gather_facts: no
|
|
||||||
tasks:
|
|
||||||
- name: Prepare overcloud env
|
|
||||||
shell: |
|
|
||||||
source /home/stack/overcloudrc
|
|
||||||
|
|
||||||
projectid=$(openstack project list | awk '/admin/ {print $2}')
|
|
||||||
wget -O /tmp/cirros-0.3.4-x86_64-disk.img http://download.cirros-cloud.net/0.3.4/cirros-0.3.4-x86_64-disk.img
|
|
||||||
glance --os-project-id=$projectid image-create --name cirros --container-format bare --disk-format raw --file /tmp/cirros-0.3.4-x86_64-disk.img --visibility public
|
|
||||||
|
|
||||||
nova flavor-create --ephemeral 0 --is-public True m1.tiny overcloud-instance-test-small-flavor 2048 20 1
|
|
||||||
|
|
||||||
neutron net-create {{ public_physical_network }}-network --router:external=True --provider:physical_network {{ public_physical_network }} --provider:network_type flat
|
|
||||||
neutron subnet-create --name {{ public_physical_network }}-subnet --disable-dhcp --allocation-pool start={{ public_net_pool_start }},end={{ public_net_pool_end }} --gateway {{ public_net_gateway }} {{ public_physical_network }}-network {{ floating_ip_cidr }}
|
|
||||||
|
|
||||||
- hosts: undercloud
|
|
||||||
gather_facts: no
|
|
||||||
tasks:
|
|
||||||
- name: Copy Rally task file
|
|
||||||
template:
|
|
||||||
src: templates/instance-ha.yaml.j2
|
|
||||||
dest: "/home/stack/instance-ha.yaml"
|
|
||||||
mode: 0666
|
|
||||||
|
|
||||||
- name: Start Rally task
|
|
||||||
shell: |
|
|
||||||
source /home/stack/rally/bin/activate
|
|
||||||
rally task start --task /home/stack/instance-ha.yaml --deployment overcloud |& tee rally-instance-ha-run.log
|
|
||||||
|
|
||||||
- name: Create Report JUnit
|
|
||||||
shell: |
|
|
||||||
source /home/stack/rally/bin/activate
|
|
||||||
rally task report --junit --out /home/stack/nosetests.xml |& tee rally-instance-ha-report.log
|
|
||||||
|
|
||||||
- fetch:
|
|
||||||
src: "/home/stack/nosetests.xml"
|
|
||||||
dest: "{{ lookup('env', 'PWD') }}/nosetests.xml"
|
|
||||||
flat: yes
|
|
||||||
|
|
||||||
- hosts: undercloud
|
|
||||||
gather_facts: no
|
|
||||||
tasks:
|
|
||||||
- name: Remove overcloud env
|
|
||||||
shell: |
|
|
||||||
source /home/stack/overcloudrc
|
|
||||||
|
|
||||||
projectid=$(openstack project list | awk '/admin/ {print $2}')
|
|
||||||
glance --os-project-id=$projectid image-delete $(glance --os-project-id=$projectid image-list | awk '/cirros/ {print $2}')
|
|
||||||
|
|
||||||
nova flavor-delete overcloud-instance-test-small-flavor
|
|
||||||
|
|
||||||
neutron net-delete {{ public_physical_network }}-network
|
|
@ -1,458 +0,0 @@
|
|||||||
from os import path
|
|
||||||
import socket
|
|
||||||
import time
|
|
||||||
|
|
||||||
|
|
||||||
from rally.common import logging
|
|
||||||
from rally.common import sshutils
|
|
||||||
from rally import exceptions
|
|
||||||
from rally_openstack import consts
|
|
||||||
from rally_openstack import scenario
|
|
||||||
from rally_openstack.scenarios.vm import utils as vm_utils
|
|
||||||
from rally_openstack.scenarios.cinder import utils as cinder_utils
|
|
||||||
from rally.task import atomic
|
|
||||||
from rally.task import types
|
|
||||||
from rally.task import validation
|
|
||||||
from rally.task import utils as task_utils
|
|
||||||
import six
|
|
||||||
|
|
||||||
|
|
||||||
LOG = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
def failover(self, host, command, port=22, username="", password="",
|
|
||||||
key_filename=None, pkey=None):
|
|
||||||
"""Trigger failover at host
|
|
||||||
:param host:
|
|
||||||
:param command:
|
|
||||||
:return:
|
|
||||||
"""
|
|
||||||
if key_filename:
|
|
||||||
key_filename = path.expanduser(key_filename)
|
|
||||||
LOG.info("Host: %s. Injecting Failover %s" % (host,
|
|
||||||
command))
|
|
||||||
try:
|
|
||||||
code, out, err = _run_command(self, server_ip=host, port=port,
|
|
||||||
username=username,
|
|
||||||
password=password,
|
|
||||||
key_filename=key_filename,
|
|
||||||
pkey=pkey, command=command
|
|
||||||
)
|
|
||||||
if code and code > 0:
|
|
||||||
raise exceptions.ScriptError(
|
|
||||||
"Error running command %(command)s. "
|
|
||||||
"Error %(code)s: %(error)s" % {
|
|
||||||
"command": command, "code": code, "error": err})
|
|
||||||
except exceptions.SSHTimeout:
|
|
||||||
LOG.debug("SSH session of disruptor command timeouted, continue...")
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
def _run_command(self, server_ip, port, username, password, command,
|
|
||||||
pkey=None, key_filename=None):
|
|
||||||
"""Run command via SSH on server.
|
|
||||||
Create SSH connection for server, wait for server to become available
|
|
||||||
(there is a delay between server being set to ACTIVE and sshd being
|
|
||||||
available). Then call run_command_over_ssh to actually execute the
|
|
||||||
command.
|
|
||||||
Note: Shadows vm.utils.VMScenario._run_command to support key_filename.
|
|
||||||
:param server_ip: server ip address
|
|
||||||
:param port: ssh port for SSH connection
|
|
||||||
:param username: str. ssh username for server
|
|
||||||
:param password: Password for SSH authentication
|
|
||||||
:param command: Dictionary specifying command to execute.
|
|
||||||
See `rally info find VMTasks.boot_runcommand_delete' parameter
|
|
||||||
`command' docstring for explanation.
|
|
||||||
:param key_filename: private key filename for SSH authentication
|
|
||||||
:param pkey: key for SSH authentication
|
|
||||||
:returns: tuple (exit_status, stdout, stderr)
|
|
||||||
"""
|
|
||||||
if not key_filename:
|
|
||||||
pkey = pkey or self.context["user"]["keypair"]["private"]
|
|
||||||
ssh = sshutils.SSH(username, server_ip, port=port,
|
|
||||||
pkey=pkey, password=password,
|
|
||||||
key_filename=key_filename)
|
|
||||||
self._wait_for_ssh(ssh)
|
|
||||||
return _run_command_over_ssh(self, ssh, command)
|
|
||||||
|
|
||||||
|
|
||||||
@atomic.action_timer("vm.run_command_over_ssh")
|
|
||||||
def _run_command_over_ssh(self, ssh, command):
|
|
||||||
"""Run command inside an instance.
|
|
||||||
This is a separate function so that only script execution is timed.
|
|
||||||
:param ssh: A SSHClient instance.
|
|
||||||
:param command: Dictionary specifying command to execute.
|
|
||||||
See `rally info find VMTasks.boot_runcommand_delete' parameter
|
|
||||||
`command' docstring for explanation.
|
|
||||||
:returns: tuple (exit_status, stdout, stderr)
|
|
||||||
"""
|
|
||||||
cmd, stdin = [], None
|
|
||||||
|
|
||||||
interpreter = command.get("interpreter") or []
|
|
||||||
if interpreter:
|
|
||||||
if isinstance(interpreter, six.string_types):
|
|
||||||
interpreter = [interpreter]
|
|
||||||
elif type(interpreter) != list:
|
|
||||||
raise ValueError("command 'interpreter' value must be str "
|
|
||||||
"or list type")
|
|
||||||
cmd.extend(interpreter)
|
|
||||||
|
|
||||||
remote_path = command.get("remote_path") or []
|
|
||||||
if remote_path:
|
|
||||||
if isinstance(remote_path, six.string_types):
|
|
||||||
remote_path = [remote_path]
|
|
||||||
elif type(remote_path) != list:
|
|
||||||
raise ValueError("command 'remote_path' value must be str "
|
|
||||||
"or list type")
|
|
||||||
cmd.extend(remote_path)
|
|
||||||
if command.get("local_path"):
|
|
||||||
ssh.put_file(os.path.expanduser(
|
|
||||||
command["local_path"]), remote_path[-1],
|
|
||||||
mode=self.USER_RWX_OTHERS_RX_ACCESS_MODE)
|
|
||||||
|
|
||||||
if command.get("script_file"):
|
|
||||||
stdin = open(os.path.expanduser(command["script_file"]), "rb")
|
|
||||||
|
|
||||||
elif command.get("script_inline"):
|
|
||||||
stdin = six.moves.StringIO(command["script_inline"])
|
|
||||||
|
|
||||||
cmd.extend(command.get("command_args") or [])
|
|
||||||
|
|
||||||
return ssh.execute(cmd, stdin=stdin, timeout=10)
|
|
||||||
|
|
||||||
|
|
||||||
def one_killing_iteration(self, server, fip, computes, disruptor_cmd,
|
|
||||||
stop_instance):
|
|
||||||
"""Find the host where instance is hosted, disrupt the host and
|
|
||||||
verify status of the instance after the failover"""
|
|
||||||
|
|
||||||
server_admin = self.admin_clients("nova").servers.get(server.id)
|
|
||||||
host_name_pre = getattr(server_admin, "OS-EXT-SRV-ATTR:host")
|
|
||||||
host_name_ext = host_name_pre.split('.')[0] + ".external"
|
|
||||||
hypervisors = self.admin_clients("nova").hypervisors.list()
|
|
||||||
hostnames = []
|
|
||||||
for hypervisor in hypervisors:
|
|
||||||
hostnames.append(getattr(hypervisor, "hypervisor_hostname"))
|
|
||||||
if getattr(hypervisor, "hypervisor_hostname") == host_name_pre:
|
|
||||||
hypervisor_id = getattr(hypervisor, "id")
|
|
||||||
hypervisor = self.admin_clients("nova").hypervisors.get(hypervisor_id)
|
|
||||||
hypervisor_ip = socket.gethostbyname(host_name_ext.strip())
|
|
||||||
|
|
||||||
if not disruptor_cmd:
|
|
||||||
disruptor_cmd = {
|
|
||||||
"script_inline": "sudo sh -c \"echo b > /proc/sysrq-trigger\"",
|
|
||||||
"interpreter": "/bin/sh"
|
|
||||||
}
|
|
||||||
|
|
||||||
# Trigger failover of compute node hosting the instance
|
|
||||||
failover(self, host=hypervisor_ip,
|
|
||||||
command=disruptor_cmd,
|
|
||||||
port=computes.get("port", 22),
|
|
||||||
username=computes.get("username"),
|
|
||||||
password=computes.get("password"),
|
|
||||||
key_filename=computes.get("key_filename"),
|
|
||||||
pkey=computes.get("pkey")
|
|
||||||
)
|
|
||||||
# Wait for instance to be moved to different host
|
|
||||||
hostnames.remove(host_name_pre)
|
|
||||||
task_utils.wait_for(
|
|
||||||
server_admin,
|
|
||||||
status_attr="OS-EXT-SRV-ATTR:host",
|
|
||||||
ready_statuses=hostnames,
|
|
||||||
update_resource=task_utils.get_from_manager(),
|
|
||||||
timeout=120,
|
|
||||||
check_interval=5
|
|
||||||
)
|
|
||||||
|
|
||||||
# Check the instance is SHUTOFF in the case of stopped instance or
|
|
||||||
# that the instance is pingable
|
|
||||||
if stop_instance:
|
|
||||||
task_utils.wait_for(
|
|
||||||
server,
|
|
||||||
ready_statuses=["SHUTOFF"],
|
|
||||||
update_resource=task_utils.get_from_manager(),
|
|
||||||
timeout=60,
|
|
||||||
check_interval=2
|
|
||||||
)
|
|
||||||
#server_admin = self.admin_clients("nova").servers.get(server.id)
|
|
||||||
#host_name_post = getattr(server_admin, "OS-EXT-SRV-ATTR:host")
|
|
||||||
#if host_name_post in host_name_pre:
|
|
||||||
#raise exceptions.InvalidHostException()
|
|
||||||
else:
|
|
||||||
try:
|
|
||||||
if self.wait_for_ping:
|
|
||||||
self._wait_for_ping(fip["ip"])
|
|
||||||
except exceptions.TimeoutException:
|
|
||||||
console_logs = self._get_server_console_output(server,
|
|
||||||
None)
|
|
||||||
LOG.debug("VM console logs:\n%s", console_logs)
|
|
||||||
raise
|
|
||||||
|
|
||||||
|
|
||||||
def recover_instance_ha(self, image, flavor, computes,
|
|
||||||
volume_args=None,
|
|
||||||
floating_network=None,
|
|
||||||
use_floating_ip=True,
|
|
||||||
force_delete=False,
|
|
||||||
stop_instance=False,
|
|
||||||
disruptor_cmd=None,
|
|
||||||
iterations=1,
|
|
||||||
wait_for_ping=True,
|
|
||||||
max_log_length=None,
|
|
||||||
**kwargs):
|
|
||||||
"""Boot a server, trigger failover of host and verify instance.
|
|
||||||
|
|
||||||
:param image: glance image name to use for the vm
|
|
||||||
:param flavor: VM flavor name
|
|
||||||
:param computes: dictionary with credentials to the compute nodes
|
|
||||||
consisting of username, password, port, key_filename, disruptor
|
|
||||||
command and pkey.
|
|
||||||
Examples::
|
|
||||||
computes: {
|
|
||||||
username: heat-admin,
|
|
||||||
key_filename: /path/to/ssh/id_rsa.pub
|
|
||||||
port: 22
|
|
||||||
}
|
|
||||||
:param volume_args: volume args for booting server from volume
|
|
||||||
:param floating_network: external network name, for floating ip
|
|
||||||
:param use_floating_ip: bool, floating or fixed IP for SSH connection
|
|
||||||
:param force_delete: whether to use force_delete for servers
|
|
||||||
:param stop_instance: whether to stop instance before disruptor command
|
|
||||||
:param disruptor_cmd: command to be send to hosting compute node
|
|
||||||
:param iterations: number of compute node killing iteration
|
|
||||||
:param wait_for_ping: whether to check connectivity on server creation
|
|
||||||
:param **kwargs: extra arguments for booting the server
|
|
||||||
:param max_log_length: The number of tail nova console-log lines user
|
|
||||||
would like to retrieve
|
|
||||||
:returns:
|
|
||||||
"""
|
|
||||||
|
|
||||||
self.wait_for_ping = wait_for_ping
|
|
||||||
|
|
||||||
if volume_args:
|
|
||||||
volume = self.cinder.create_volume(volume_args["size"], imageRef=None)
|
|
||||||
kwargs["block_device_mapping"] = {"vdrally": "%s:::1" % volume.id}
|
|
||||||
|
|
||||||
server, fip = self._boot_server_with_fip(
|
|
||||||
image, flavor, use_floating_ip=use_floating_ip,
|
|
||||||
floating_network=floating_network,
|
|
||||||
key_name=self.context["user"]["keypair"]["name"],
|
|
||||||
**kwargs)
|
|
||||||
|
|
||||||
task_utils.wait_for(
|
|
||||||
server,
|
|
||||||
ready_statuses=["ACTIVE"],
|
|
||||||
update_resource=task_utils.get_from_manager(),
|
|
||||||
timeout=120,
|
|
||||||
check_interval=2
|
|
||||||
)
|
|
||||||
|
|
||||||
try:
|
|
||||||
if self.wait_for_ping:
|
|
||||||
self._wait_for_ping(fip["ip"])
|
|
||||||
except exceptions.TimeoutException:
|
|
||||||
console_logs = self._get_server_console_output(server,
|
|
||||||
max_log_length)
|
|
||||||
LOG.debug("VM console logs:\n%s", console_logs)
|
|
||||||
raise
|
|
||||||
|
|
||||||
if stop_instance:
|
|
||||||
self._stop_server(server)
|
|
||||||
task_utils.wait_for(
|
|
||||||
server,
|
|
||||||
ready_statuses=["SHUTOFF"],
|
|
||||||
update_resource=task_utils.get_from_manager(),
|
|
||||||
timeout=120,
|
|
||||||
check_interval=2
|
|
||||||
)
|
|
||||||
|
|
||||||
# Wait a little before killing the compute
|
|
||||||
# If we do not wait, backing image will get corrupted which was reported as bug
|
|
||||||
time.sleep(30)
|
|
||||||
|
|
||||||
for iteration in range(1, iterations+1):
|
|
||||||
one_killing_iteration(self, server, fip, computes,
|
|
||||||
disruptor_cmd, stop_instance)
|
|
||||||
# Give cluster some time to recover original compute node
|
|
||||||
LOG.info("Wait for compute nodes to come online after previous disruption")
|
|
||||||
time.sleep(360)
|
|
||||||
|
|
||||||
if stop_instance:
|
|
||||||
# Start instance If It was stopped.
|
|
||||||
self._start_server(server)
|
|
||||||
|
|
||||||
task_utils.wait_for(
|
|
||||||
server,
|
|
||||||
ready_statuses=["ACTIVE"],
|
|
||||||
update_resource=task_utils.get_from_manager(),
|
|
||||||
timeout=120,
|
|
||||||
check_interval=2
|
|
||||||
)
|
|
||||||
self._delete_server_with_fip(server, fip, force_delete=force_delete)
|
|
||||||
|
|
||||||
|
|
||||||
@types.convert(image={"type": "glance_image"},
|
|
||||||
flavor={"type": "nova_flavor"})
|
|
||||||
@validation.add("image_valid_on_flavor",
|
|
||||||
flavor_param="flavor", image_param="image")
|
|
||||||
@validation.add("valid_command", param_name="command", required=False)
|
|
||||||
@validation.add("number", param_name="port", minval=1, maxval=65535,
|
|
||||||
nullable=True, integer_only=True)
|
|
||||||
@validation.add("external_network_exists", param_name="floating_network")
|
|
||||||
@validation.add("required_services",
|
|
||||||
services=[consts.Service.NOVA, consts.Service.CINDER])
|
|
||||||
@validation.add("required_platform", platform="openstack",
|
|
||||||
users=True, admin=True)
|
|
||||||
@scenario.configure(context={"cleanup@openstack": ["nova", "cinder"],
|
|
||||||
"keypair@openstack": {}, "allow_ssh@openstack": None},
|
|
||||||
name="InstanceHA.recover_instance_fip_and_volume",
|
|
||||||
platform="openstack")
|
|
||||||
class InstanceHARecoverFIPAndVolume(vm_utils.VMScenario, cinder_utils.CinderBasic):
|
|
||||||
|
|
||||||
def __init__(self, *args, **kwargs):
|
|
||||||
super(InstanceHARecoverFIPAndVolume, self).__init__(*args, **kwargs)
|
|
||||||
|
|
||||||
def run(self, image, flavor, computes,
|
|
||||||
volume_args=None,
|
|
||||||
floating_network=None,
|
|
||||||
use_floating_ip=True,
|
|
||||||
force_delete=False,
|
|
||||||
wait_for_ping=True,
|
|
||||||
max_log_length=None,
|
|
||||||
**kwargs):
|
|
||||||
|
|
||||||
recover_instance_ha(self, image, flavor, computes,
|
|
||||||
volume_args=volume_args,
|
|
||||||
floating_network=floating_network,
|
|
||||||
use_floating_ip=use_floating_ip,
|
|
||||||
force_delete=force_delete,
|
|
||||||
wait_for_ping=wait_for_ping,
|
|
||||||
max_log_length=max_log_length,
|
|
||||||
**kwargs)
|
|
||||||
|
|
||||||
|
|
||||||
@types.convert(image={"type": "glance_image"},
|
|
||||||
flavor={"type": "nova_flavor"})
|
|
||||||
@validation.add("image_valid_on_flavor",
|
|
||||||
flavor_param="flavor", image_param="image")
|
|
||||||
@validation.add("valid_command", param_name="command", required=False)
|
|
||||||
@validation.add("number", param_name="port", minval=1, maxval=65535,
|
|
||||||
nullable=True, integer_only=True)
|
|
||||||
@validation.add("external_network_exists", param_name="floating_network")
|
|
||||||
@validation.add("required_services",
|
|
||||||
services=[consts.Service.NOVA, consts.Service.CINDER])
|
|
||||||
@validation.add("required_platform", platform="openstack",
|
|
||||||
users=True, admin=True)
|
|
||||||
@scenario.configure(context={"cleanup@openstack": ["nova", "cinder"],
|
|
||||||
"keypair@openstack": {}, "allow_ssh@openstack": None},
|
|
||||||
name="InstanceHA.recover_instance_two_cycles",
|
|
||||||
platform="openstack")
|
|
||||||
class InstanceHARecoverTwoCycle(vm_utils.VMScenario, cinder_utils.CinderBasic):
|
|
||||||
|
|
||||||
def __init__(self, *args, **kwargs):
|
|
||||||
super(InstanceHARecoverTwoCycle, self).__init__(*args, **kwargs)
|
|
||||||
|
|
||||||
def run(self, image, flavor, computes,
|
|
||||||
volume_args=None,
|
|
||||||
floating_network=None,
|
|
||||||
use_floating_ip=True,
|
|
||||||
force_delete=False,
|
|
||||||
wait_for_ping=True,
|
|
||||||
max_log_length=None,
|
|
||||||
**kwargs):
|
|
||||||
|
|
||||||
recover_instance_ha(self, image, flavor, computes,
|
|
||||||
volume_args=volume_args,
|
|
||||||
floating_network=floating_network,
|
|
||||||
use_floating_ip=use_floating_ip,
|
|
||||||
force_delete=force_delete,
|
|
||||||
iterations=2,
|
|
||||||
wait_for_ping=wait_for_ping,
|
|
||||||
max_log_length=max_log_length,
|
|
||||||
**kwargs)
|
|
||||||
|
|
||||||
|
|
||||||
@types.convert(image={"type": "glance_image"},
|
|
||||||
flavor={"type": "nova_flavor"})
|
|
||||||
@validation.add("image_valid_on_flavor",
|
|
||||||
flavor_param="flavor", image_param="image")
|
|
||||||
@validation.add("valid_command", param_name="command", required=False)
|
|
||||||
@validation.add("number", param_name="port", minval=1, maxval=65535,
|
|
||||||
nullable=True, integer_only=True)
|
|
||||||
@validation.add("external_network_exists", param_name="floating_network")
|
|
||||||
@validation.add("required_services",
|
|
||||||
services=[consts.Service.NOVA, consts.Service.CINDER])
|
|
||||||
@validation.add("required_platform", platform="openstack",
|
|
||||||
users=True, admin=True)
|
|
||||||
@scenario.configure(context={"cleanup@openstack": ["nova", "cinder"],
|
|
||||||
"keypair@openstack": {}, "allow_ssh@openstack": None},
|
|
||||||
name="InstanceHA.recover_stopped_instance_fip",
|
|
||||||
platform="openstack")
|
|
||||||
class InstanceHARecoverStopped(vm_utils.VMScenario, cinder_utils.CinderBasic):
|
|
||||||
|
|
||||||
def __init__(self, *args, **kwargs):
|
|
||||||
super(InstanceHARecoverStopped, self).__init__(*args, **kwargs)
|
|
||||||
|
|
||||||
def run(self, image, flavor, computes,
|
|
||||||
volume_args=None,
|
|
||||||
floating_network=None,
|
|
||||||
use_floating_ip=True,
|
|
||||||
force_delete=False,
|
|
||||||
wait_for_ping=True,
|
|
||||||
max_log_length=None,
|
|
||||||
**kwargs):
|
|
||||||
|
|
||||||
recover_instance_ha(self, image, flavor, computes,
|
|
||||||
volume_args=volume_args,
|
|
||||||
floating_network=floating_network,
|
|
||||||
use_floating_ip=use_floating_ip,
|
|
||||||
force_delete=force_delete,
|
|
||||||
stop_instance=True,
|
|
||||||
wait_for_ping=wait_for_ping,
|
|
||||||
max_log_length=max_log_length,
|
|
||||||
**kwargs)
|
|
||||||
|
|
||||||
|
|
||||||
@types.convert(image={"type": "glance_image"},
|
|
||||||
flavor={"type": "nova_flavor"})
|
|
||||||
@validation.add("image_valid_on_flavor",
|
|
||||||
flavor_param="flavor", image_param="image")
|
|
||||||
@validation.add("valid_command", param_name="command", required=False)
|
|
||||||
@validation.add("number", param_name="port", minval=1, maxval=65535,
|
|
||||||
nullable=True, integer_only=True)
|
|
||||||
@validation.add("external_network_exists", param_name="floating_network")
|
|
||||||
@validation.add("required_services",
|
|
||||||
services=[consts.Service.NOVA, consts.Service.CINDER])
|
|
||||||
@validation.add("required_platform", platform="openstack",
|
|
||||||
users=True, admin=True)
|
|
||||||
@scenario.configure(context={"cleanup@openstack": ["nova", "cinder"],
|
|
||||||
"keypair@openstack": {}, "allow_ssh@openstack": None},
|
|
||||||
name="InstanceHA.recover_instance_nova_compute",
|
|
||||||
platform="openstack")
|
|
||||||
class InstanceHARecoverNovaCompute(vm_utils.VMScenario, cinder_utils.CinderBasic):
|
|
||||||
|
|
||||||
def __init__(self, *args, **kwargs):
|
|
||||||
super(InstanceHARecoverNovaCompute, self).__init__(*args, **kwargs)
|
|
||||||
|
|
||||||
def run(self, image, flavor, computes,
|
|
||||||
volume_args=None,
|
|
||||||
floating_network=None,
|
|
||||||
use_floating_ip=True,
|
|
||||||
force_delete=False,
|
|
||||||
wait_for_ping=True,
|
|
||||||
max_log_length=None,
|
|
||||||
**kwargs):
|
|
||||||
|
|
||||||
disruptor_cmd = {
|
|
||||||
"script_inline": "sudo kill -9 $(ps -ef | grep ^nova* | awk \'{print$2}\'); echo {}",
|
|
||||||
"interpreter": "/bin/sh"
|
|
||||||
}
|
|
||||||
recover_instance_ha(self, image, flavor, computes,
|
|
||||||
volume_args=volume_args,
|
|
||||||
floating_network=floating_network,
|
|
||||||
use_floating_ip=use_floating_ip,
|
|
||||||
force_delete=force_delete,
|
|
||||||
disruptor_cmd=disruptor_cmd,
|
|
||||||
wait_for_ping=wait_for_ping,
|
|
||||||
max_log_length=max_log_length,
|
|
||||||
**kwargs)
|
|
@ -1,81 +0,0 @@
|
|||||||
---
|
|
||||||
InstanceHA.recover_instance_fip_and_volume:
|
|
||||||
-
|
|
||||||
args:
|
|
||||||
flavor:
|
|
||||||
name: "m1.tiny"
|
|
||||||
image:
|
|
||||||
name: cirros
|
|
||||||
volume_args:
|
|
||||||
size: 1
|
|
||||||
floating_network: "{{ public_physical_network }}-network"
|
|
||||||
force_delete: false
|
|
||||||
wait_for_ping: false
|
|
||||||
computes:
|
|
||||||
username: "heat-admin"
|
|
||||||
key_filename: "/home/stack/.ssh/id_rsa"
|
|
||||||
port: 22
|
|
||||||
runner:
|
|
||||||
type: "constant"
|
|
||||||
times: 1
|
|
||||||
concurrency: 1
|
|
||||||
context:
|
|
||||||
users:
|
|
||||||
tenants: 2
|
|
||||||
users_per_tenant: 1
|
|
||||||
network: {}
|
|
||||||
sla:
|
|
||||||
failure_rate:
|
|
||||||
max: 0.0
|
|
||||||
InstanceHA.recover_stopped_instance_fip:
|
|
||||||
-
|
|
||||||
args:
|
|
||||||
flavor:
|
|
||||||
name: "m1.tiny"
|
|
||||||
image:
|
|
||||||
name: cirros
|
|
||||||
floating_network: "{{ public_physical_network }}-network"
|
|
||||||
force_delete: false
|
|
||||||
wait_for_ping: false
|
|
||||||
computes:
|
|
||||||
username: "heat-admin"
|
|
||||||
key_filename: "/home/stack/.ssh/id_rsa"
|
|
||||||
port: 22
|
|
||||||
runner:
|
|
||||||
type: "constant"
|
|
||||||
times: 1
|
|
||||||
concurrency: 1
|
|
||||||
context:
|
|
||||||
users:
|
|
||||||
tenants: 2
|
|
||||||
users_per_tenant: 1
|
|
||||||
network: {}
|
|
||||||
sla:
|
|
||||||
failure_rate:
|
|
||||||
max: 0.0
|
|
||||||
InstanceHA.recover_instance_two_cycles:
|
|
||||||
-
|
|
||||||
args:
|
|
||||||
flavor:
|
|
||||||
name: "m1.tiny"
|
|
||||||
image:
|
|
||||||
name: cirros
|
|
||||||
floating_network: "{{ public_physical_network }}-network"
|
|
||||||
force_delete: false
|
|
||||||
wait_for_ping: false
|
|
||||||
computes:
|
|
||||||
username: "heat-admin"
|
|
||||||
key_filename: "/home/stack/.ssh/id_rsa"
|
|
||||||
port: 22
|
|
||||||
runner:
|
|
||||||
type: "constant"
|
|
||||||
times: 1
|
|
||||||
concurrency: 1
|
|
||||||
context:
|
|
||||||
users:
|
|
||||||
tenants: 2
|
|
||||||
users_per_tenant: 1
|
|
||||||
network: {}
|
|
||||||
sla:
|
|
||||||
failure_rate:
|
|
||||||
max: 0.0
|
|
@ -1,226 +0,0 @@
|
|||||||
instance-ha
|
|
||||||
===========
|
|
||||||
|
|
||||||
This role aims to automate all the steps needed to configure instance HA on a
|
|
||||||
deployed TripleO overcloud environment.
|
|
||||||
|
|
||||||
Requirements
|
|
||||||
------------
|
|
||||||
|
|
||||||
The TripleO environment must be prepared as described [here](https://github.com/openstack/tripleo-ha-utils/tree/master/README.md).
|
|
||||||
|
|
||||||
**NOTE**: Instance-HA depends on STONITH. This means that all the steps
|
|
||||||
performed by this role make sense only if on the overcloud STONITH has been
|
|
||||||
configured. There is a dedicated role that automates the STONITH
|
|
||||||
configuration, named [stonith-config](https://github.com/openstack/tripleo-ha-utils/tree/master/roles/stonith-config).
|
|
||||||
|
|
||||||
Instance HA
|
|
||||||
-----------
|
|
||||||
|
|
||||||
Instance HA is a feature that gives a certain degree of high-availability to the
|
|
||||||
instances spawned by an OpenStack deployment. Namely, if a compute node on which
|
|
||||||
an instance is running breaks for whatever reason, this configuration will spawn
|
|
||||||
the instances that were running on the broken node onto a functioning one.
|
|
||||||
This role automates are all the necessary steps needed to configure Pacemaker
|
|
||||||
cluster to support this functionality. A typical cluster configuration on a
|
|
||||||
clean stock **newton** (or **osp10**) deployment is something like this:
|
|
||||||
|
|
||||||
Online: [ overcloud-controller-0 overcloud-controller-1 overcloud-controller-2 ]
|
|
||||||
|
|
||||||
Full list of resources:
|
|
||||||
|
|
||||||
ip-192.168.24.10 (ocf::heartbeat:IPaddr2): Started overcloud-controller-0
|
|
||||||
ip-172.18.0.11 (ocf::heartbeat:IPaddr2): Started overcloud-controller-0
|
|
||||||
ip-172.20.0.19 (ocf::heartbeat:IPaddr2): Started overcloud-controller-1
|
|
||||||
ip-172.17.0.11 (ocf::heartbeat:IPaddr2): Started overcloud-controller-1
|
|
||||||
ip-172.19.0.12 (ocf::heartbeat:IPaddr2): Started overcloud-controller-0
|
|
||||||
Clone Set: haproxy-clone [haproxy]
|
|
||||||
Started: [ overcloud-controller-0 overcloud-controller-1 overcloud-controller-2 ]
|
|
||||||
Master/Slave Set: galera-master [galera]
|
|
||||||
Masters: [ overcloud-controller-0 overcloud-controller-1 overcloud-controller-2 ]
|
|
||||||
ip-172.17.0.18 (ocf::heartbeat:IPaddr2): Started overcloud-controller-1
|
|
||||||
Clone Set: rabbitmq-clone [rabbitmq]
|
|
||||||
Started: [ overcloud-controller-0 overcloud-controller-1 overcloud-controller-2 ]
|
|
||||||
Master/Slave Set: redis-master [redis]
|
|
||||||
Masters: [ overcloud-controller-0 ]
|
|
||||||
Slaves: [ overcloud-controller-1 overcloud-controller-2 ]
|
|
||||||
openstack-cinder-volume (systemd:openstack-cinder-volume): Started overcloud-controller-0
|
|
||||||
|
|
||||||
As you can see we have 3 controllers, six IP resources, four *core* resources
|
|
||||||
(*haproxy*, *galera*, *rabbitmq* and *redis*) and one last resource which is
|
|
||||||
*openstack-cinder-volume* that needs to run as a single active/passive resource
|
|
||||||
inside the cluster. This role configures all the additional resources needed
|
|
||||||
to have a working instance HA setup. Once the playbook is executed, the
|
|
||||||
configuration will be something like this:
|
|
||||||
|
|
||||||
Online: [ overcloud-controller-0 overcloud-controller-1 overcloud-controller-2 ]
|
|
||||||
RemoteOnline: [ overcloud-compute-0 overcloud-compute-1 ]
|
|
||||||
|
|
||||||
Full list of resources:
|
|
||||||
|
|
||||||
ip-192.168.24.10 (ocf::heartbeat:IPaddr2): Started overcloud-controller-0
|
|
||||||
ip-172.18.0.11 (ocf::heartbeat:IPaddr2): Started overcloud-controller-0
|
|
||||||
ip-172.20.0.19 (ocf::heartbeat:IPaddr2): Started overcloud-controller-1
|
|
||||||
ip-172.17.0.11 (ocf::heartbeat:IPaddr2): Started overcloud-controller-1
|
|
||||||
ip-172.19.0.12 (ocf::heartbeat:IPaddr2): Started overcloud-controller-0
|
|
||||||
Clone Set: haproxy-clone [haproxy]
|
|
||||||
Started: [ overcloud-controller-0 overcloud-controller-1 overcloud-controller-2 ]
|
|
||||||
Stopped: [ overcloud-compute-0 overcloud-compute-1 ]
|
|
||||||
Master/Slave Set: galera-master [galera]
|
|
||||||
Masters: [ overcloud-controller-0 overcloud-controller-1 overcloud-controller-2 ]
|
|
||||||
Stopped: [ overcloud-compute-0 overcloud-compute-1 ]
|
|
||||||
ip-172.17.0.18 (ocf::heartbeat:IPaddr2): Started overcloud-controller-1
|
|
||||||
Clone Set: rabbitmq-clone [rabbitmq]
|
|
||||||
Started: [ overcloud-controller-0 overcloud-controller-1 overcloud-controller-2 ]
|
|
||||||
Stopped: [ overcloud-compute-0 overcloud-compute-1 ]
|
|
||||||
Master/Slave Set: redis-master [redis]
|
|
||||||
Masters: [ overcloud-controller-0 ]
|
|
||||||
Slaves: [ overcloud-controller-1 overcloud-controller-2 ]
|
|
||||||
Stopped: [ overcloud-compute-0 overcloud-compute-1 ]
|
|
||||||
openstack-cinder-volume (systemd:openstack-cinder-volume): Started overcloud-controller-0
|
|
||||||
ipmilan-overcloud-compute-0 (stonith:fence_ipmilan): Started overcloud-controller-1
|
|
||||||
ipmilan-overcloud-controller-2 (stonith:fence_ipmilan): Started overcloud-controller-0
|
|
||||||
ipmilan-overcloud-controller-0 (stonith:fence_ipmilan): Started overcloud-controller-0
|
|
||||||
ipmilan-overcloud-controller-1 (stonith:fence_ipmilan): Started overcloud-controller-1
|
|
||||||
ipmilan-overcloud-compute-1 (stonith:fence_ipmilan): Started overcloud-controller-1
|
|
||||||
nova-evacuate (ocf::openstack:NovaEvacuate): Started overcloud-controller-0
|
|
||||||
Clone Set: nova-compute-checkevacuate-clone [nova-compute-checkevacuate]
|
|
||||||
Started: [ overcloud-compute-0 overcloud-compute-1 ]
|
|
||||||
Stopped: [ overcloud-controller-0 overcloud-controller-1 overcloud-controller-2 ]
|
|
||||||
Clone Set: nova-compute-clone [nova-compute]
|
|
||||||
Started: [ overcloud-compute-0 overcloud-compute-1 ]
|
|
||||||
Stopped: [ overcloud-controller-0 overcloud-controller-1 overcloud-controller-2 ]
|
|
||||||
fence-nova (stonith:fence_compute): Started overcloud-controller-0
|
|
||||||
overcloud-compute-1 (ocf::pacemaker:remote): Started overcloud-controller-0
|
|
||||||
overcloud-compute-0 (ocf::pacemaker:remote): Started overcloud-controller-1
|
|
||||||
|
|
||||||
How Instance HA works
|
|
||||||
---------------------
|
|
||||||
|
|
||||||
There are three key resource agents you need to consider. Here's the list:
|
|
||||||
|
|
||||||
- *fence_compute* (named **fence-nova** inside the cluster): which takes care
|
|
||||||
of marking a compute node with the attribute "evacuate" set to yes;
|
|
||||||
- *NovaEvacuate* (named **nova-evacuate** inside the cluster): which takes care
|
|
||||||
of the effective evacuation of the instances and runs on one of the
|
|
||||||
controllers;
|
|
||||||
- *nova-compute-wait* (named **nova-compute-checkevacuate** inside the
|
|
||||||
cluster): which waits for eventual evacuation before starting nova compute
|
|
||||||
services and runs on each compute nodes;
|
|
||||||
|
|
||||||
Looking at the role you will notice that other systemd resources will be added
|
|
||||||
into the cluster on the compute nodes, especially in older release like mitaka
|
|
||||||
(*neutron-openvswitch-agent*, *libvirtd*, *openstack-ceilometer-compute* and
|
|
||||||
*nova-compute*), but the keys for the correct instance HA comprehension are the
|
|
||||||
aforementioned three resources.
|
|
||||||
|
|
||||||
Evacuation
|
|
||||||
----------
|
|
||||||
|
|
||||||
The principle under which Instance HA works is *evacuation*. This means that
|
|
||||||
when a host becomes unavailablea for whatever reason, instances on it are
|
|
||||||
evacuated to another available host.
|
|
||||||
Instance HA works both on shared storage and local storage environments, which
|
|
||||||
means that evacuated instances will maintain the same network setup (static ip,
|
|
||||||
floating ip and so on) and characteristics inside the new host, even if they
|
|
||||||
will be spawned from scratch.
|
|
||||||
|
|
||||||
What happens when a compute node is lost
|
|
||||||
----------------------------------------
|
|
||||||
|
|
||||||
Once configured, how does the system behaves when evacuation is needed? The
|
|
||||||
following sequence describes the actions taken by the cluster and the OpenStack
|
|
||||||
components:
|
|
||||||
|
|
||||||
1. A compute node (say overcloud-compute-1) which is running instances goes
|
|
||||||
down for some reason (power outage, kernel panic, manual intervention);
|
|
||||||
2. The cluster starts the action sequence to fence this host, since it needs
|
|
||||||
to be sure that the host is *really* down before driving any other operation
|
|
||||||
(otherwise there is potential for data corruption or multiple identical VMs
|
|
||||||
running at the same time in the infrastructure). Setup is configured to have
|
|
||||||
two levels of fencing for the compute hosts:
|
|
||||||
|
|
||||||
* **IPMI**: which will occur first and will take care of physically
|
|
||||||
resetting the host and hence assuring that the machine is really powered
|
|
||||||
off;
|
|
||||||
* **fence-nova**: which will occur afterwards and will take care of marking
|
|
||||||
with a cluster per-node attribute "evacuate=yes";
|
|
||||||
|
|
||||||
So the host gets reset and on the cluster a new node-property like the
|
|
||||||
following will appear:
|
|
||||||
|
|
||||||
[root@overcloud-controller-0 ~]# attrd_updater -n evacuate -A
|
|
||||||
name="evacuate" host="overcloud-compute-1.localdomain" value="yes"
|
|
||||||
|
|
||||||
3. At this point the resource **nova-evacuate** which constantly monitors the
|
|
||||||
attributes of the cluster in search of the evacuate tag will find out that
|
|
||||||
the *overcloud-compute-1* host needs evacuation, and by internally using
|
|
||||||
*nova-compute commands*, will start the evactuation of the instances towards
|
|
||||||
another host;
|
|
||||||
4. In the meantime, while compute-1 is booting up again,
|
|
||||||
**nova-compute-checkevacuate** will wait (with a default timeout of 120
|
|
||||||
seconds) for the evacuation to complete before starting the chain via the
|
|
||||||
*NovaCompute* resource that will enable the fenced host to become available
|
|
||||||
again for running instances;
|
|
||||||
|
|
||||||
What to look for when something is not working
|
|
||||||
----------------------------------------------
|
|
||||||
|
|
||||||
Here there are some tips to follow once you need to debug why instance HA is
|
|
||||||
not working:
|
|
||||||
|
|
||||||
1. Check credentials: many resources require access data the the overcloud
|
|
||||||
coming form the overcloudrc file, so it's not so difficult to do copy
|
|
||||||
errors;
|
|
||||||
2. Check connectivity: stonith is essential for cluster and if for some reason
|
|
||||||
the cluster is not able to fence the compute nodes, the whole instance HA
|
|
||||||
environment will not work;
|
|
||||||
3. Check errors: inside the controller's cluster log
|
|
||||||
(*/var/log/cluster/corosync.log*) some errors may catch the eye.
|
|
||||||
|
|
||||||
Examples on how to invoke the playbook via ansible
|
|
||||||
--------------------------------------------------
|
|
||||||
|
|
||||||
This command line will install the whole instance-ha solution, with controller
|
|
||||||
stonith, compute stonith and all the instance ha steps in:
|
|
||||||
|
|
||||||
ansible-playbook /home/stack/tripleo-ha-utils/playbooks/overcloud-instance-ha.yml -e release="rhos-10"
|
|
||||||
|
|
||||||
By default the playbook will install the instance-ha solution with the shared
|
|
||||||
storage configuration, but it is possible to make the installation in a no
|
|
||||||
shared storage environment, passing the **instance_ha_shared_storage** variable
|
|
||||||
as **false**:
|
|
||||||
|
|
||||||
ansible-playbook /home/stack/tripleo-ha-utils/playbooks/overcloud-instance-ha.yml -e release="rhos-10" -e instance_ha_shared_storage=false
|
|
||||||
|
|
||||||
If a user configured the overcloud with a specific domain it is possible to
|
|
||||||
override the default "localdomain" value by passing the **overcloud_domain**
|
|
||||||
variable to the playbook:
|
|
||||||
|
|
||||||
ansible-playbook /home/stack/tripleo-ha-utils/playbooks/overcloud-instance-ha.yml -e release="rhos-10" -e overcloud_domain="mydomain"
|
|
||||||
|
|
||||||
If a user already installed STONITH for controllers and wants just to apply all
|
|
||||||
the instance HA steps with STONITH for the compute nodes can launch this:
|
|
||||||
|
|
||||||
ansible-playbook /home/stack/tripleo-ha-utils/playbooks/overcloud-instance-ha.yml -e release="rhos-10" -e stonith_devices="computes"
|
|
||||||
|
|
||||||
To uninstall the whole instance HA solution:
|
|
||||||
|
|
||||||
ansible-playbook /home/stack/tripleo-ha-utils/playbooks/overcloud-instance-ha.yml -e release="rhos-10" -e instance_ha_action="uninstall"
|
|
||||||
|
|
||||||
Or if you a user needs to omit STONITH for the controllers:
|
|
||||||
|
|
||||||
ansible-playbook /home/stack/tripleo-ha-utils/playbooks/overcloud-instance-ha.yml -e release="rhos-10" -e stonith_devices="computes" -e instance_ha_action="uninstall"
|
|
||||||
|
|
||||||
Is it also possible to totally omit STONITH configuration by passing "none" as
|
|
||||||
the value of *stonith_devices*.
|
|
||||||
|
|
||||||
License
|
|
||||||
-------
|
|
||||||
|
|
||||||
GPL
|
|
||||||
|
|
||||||
Author Information
|
|
||||||
------------------
|
|
||||||
|
|
||||||
Raoul Scarazzini <rasca@redhat.com>
|
|
@ -1,13 +0,0 @@
|
|||||||
---
|
|
||||||
|
|
||||||
overcloud_working_dir: "/home/heat-admin"
|
|
||||||
working_dir: "/home/stack"
|
|
||||||
|
|
||||||
# Can be install or uninstall
|
|
||||||
instance_ha_action: "install"
|
|
||||||
|
|
||||||
# Do we have a shared storage or not?
|
|
||||||
instance_ha_shared_storage: true
|
|
||||||
|
|
||||||
# Set overcloud domain
|
|
||||||
overcloud_domain: "localdomain"
|
|
@ -1,386 +0,0 @@
|
|||||||
---
|
|
||||||
- name: Apply STONITH for compute nodes
|
|
||||||
include_role:
|
|
||||||
name: stonith-config
|
|
||||||
vars:
|
|
||||||
stonith_devices: "computes"
|
|
||||||
when:
|
|
||||||
- stonith_devices in ["all","computes"]
|
|
||||||
|
|
||||||
- name: Disable openstack-nova-compute on compute
|
|
||||||
service:
|
|
||||||
name: openstack-nova-compute
|
|
||||||
state: stopped
|
|
||||||
enabled: no
|
|
||||||
become: yes
|
|
||||||
delegate_to: "{{ item }}"
|
|
||||||
with_items:
|
|
||||||
- "{{ groups['compute'] }}"
|
|
||||||
when: release not in [ 'pike', 'rhos-12' ]
|
|
||||||
|
|
||||||
- name: Disable neutron-openvswitch-agent on compute
|
|
||||||
service:
|
|
||||||
name: neutron-openvswitch-agent
|
|
||||||
state: stopped
|
|
||||||
enabled: no
|
|
||||||
become: yes
|
|
||||||
delegate_to: "{{ item }}"
|
|
||||||
with_items:
|
|
||||||
- "{{ groups['compute'] }}"
|
|
||||||
when: release in [ 'liberty', 'rhos-8', 'mitaka', 'rhos-9' ]
|
|
||||||
|
|
||||||
- name: Disable openstack-ceilometer-compute on compute
|
|
||||||
service:
|
|
||||||
name: openstack-ceilometer-compute
|
|
||||||
state: stopped
|
|
||||||
enabled: no
|
|
||||||
become: yes
|
|
||||||
delegate_to: "{{ item }}"
|
|
||||||
with_items:
|
|
||||||
- "{{ groups['compute'] }}"
|
|
||||||
when: release in [ 'liberty', 'rhos-8', 'mitaka', 'rhos-9' ]
|
|
||||||
|
|
||||||
- name: Disable libvirtd on compute
|
|
||||||
become: yes
|
|
||||||
service:
|
|
||||||
name: libvirtd
|
|
||||||
state: stopped
|
|
||||||
enabled: no
|
|
||||||
delegate_to: "{{ item }}"
|
|
||||||
with_items:
|
|
||||||
- "{{ groups['compute'] }}"
|
|
||||||
when: release in [ 'liberty', 'rhos-8', 'mitaka', 'rhos-9' ]
|
|
||||||
|
|
||||||
- name: Generate authkey for remote pacemaker
|
|
||||||
shell: |
|
|
||||||
dd if=/dev/urandom of="/tmp/authkey" bs=4096 count=1
|
|
||||||
delegate_to: localhost
|
|
||||||
|
|
||||||
- name: Make sure pacemaker config dir exists
|
|
||||||
become: yes
|
|
||||||
file:
|
|
||||||
path: /etc/pacemaker
|
|
||||||
state: directory
|
|
||||||
mode: 0750
|
|
||||||
group: "haclient"
|
|
||||||
delegate_to: "{{ item }}"
|
|
||||||
with_items:
|
|
||||||
- "{{ groups['controller'] }}"
|
|
||||||
- "{{ groups['compute'] }}"
|
|
||||||
|
|
||||||
- name: Copy authkey on all the overcloud nodes
|
|
||||||
become: yes
|
|
||||||
copy:
|
|
||||||
src: /tmp/authkey
|
|
||||||
dest: /etc/pacemaker/authkey
|
|
||||||
mode: 0640
|
|
||||||
group: "haclient"
|
|
||||||
delegate_to: "{{ item }}"
|
|
||||||
with_items:
|
|
||||||
- "{{ groups['controller'] }}"
|
|
||||||
- "{{ groups['compute'] }}"
|
|
||||||
|
|
||||||
- name: Remove authkey from local dir
|
|
||||||
file:
|
|
||||||
path: /tmp/authkey
|
|
||||||
state: absent
|
|
||||||
delegate_to: localhost
|
|
||||||
|
|
||||||
- name: Enable iptables traffic for pacemaker_remote
|
|
||||||
become: yes
|
|
||||||
shell: |
|
|
||||||
iptables -I INPUT -p tcp -m state --state NEW -m tcp --dport 3121 -j ACCEPT
|
|
||||||
delegate_to: "{{ item }}"
|
|
||||||
with_items:
|
|
||||||
- "{{ groups['controller'] }}"
|
|
||||||
- "{{ groups['compute'] }}"
|
|
||||||
|
|
||||||
- name: Make iptables pacemaker_remote rule permanent
|
|
||||||
become: yes
|
|
||||||
lineinfile:
|
|
||||||
path: /etc/sysconfig/iptables
|
|
||||||
line: "-A INPUT -p tcp -m state --state NEW -m tcp --dport 3121 -j ACCEPT"
|
|
||||||
insertafter: ":OUTPUT ACCEPT"
|
|
||||||
delegate_to: "{{ item }}"
|
|
||||||
with_items:
|
|
||||||
- "{{ groups['controller'] }}"
|
|
||||||
- "{{ groups['compute'] }}"
|
|
||||||
|
|
||||||
- name: Start pacemaker remote service on compute nodes
|
|
||||||
become: yes
|
|
||||||
service:
|
|
||||||
name: pacemaker_remote
|
|
||||||
enabled: yes
|
|
||||||
state: started
|
|
||||||
delegate_to: "{{ item }}"
|
|
||||||
with_items:
|
|
||||||
- "{{ groups['compute'] }}"
|
|
||||||
|
|
||||||
- name: Get the name of the stack
|
|
||||||
shell: |
|
|
||||||
source {{ working_dir }}/stackrc
|
|
||||||
openstack stack list -f value -c 'Stack Name'
|
|
||||||
register: stack_name
|
|
||||||
|
|
||||||
- name: Check if a v3 overcloud's rc file exists
|
|
||||||
stat:
|
|
||||||
path: "{{ working_dir }}/{{ stack_name.stdout }}rc.v3"
|
|
||||||
register: v3_rc_file_stat
|
|
||||||
|
|
||||||
- name: Get the contents of the overcloud's rc file v3
|
|
||||||
set_fact:
|
|
||||||
overcloudrc: "{{ stack_name.stdout }}rc.v3"
|
|
||||||
when: v3_rc_file_stat.stat.exists
|
|
||||||
|
|
||||||
- name: Get the contents of the overcloud's rc file
|
|
||||||
set_fact:
|
|
||||||
overcloudrc: "{{ stack_name.stdout }}rc"
|
|
||||||
when: not v3_rc_file_stat.stat.exists
|
|
||||||
|
|
||||||
- block:
|
|
||||||
- name: Get OS_USERNAME from overcloudrc
|
|
||||||
shell: |
|
|
||||||
grep OS_USERNAME {{ working_dir }}/{{ overcloudrc }} | sed 's/export OS_USERNAME=//g'
|
|
||||||
register: "OS_USERNAME"
|
|
||||||
|
|
||||||
- name: Get OS_PASSWORD from overcloudrc
|
|
||||||
shell: |
|
|
||||||
grep OS_PASSWORD {{ working_dir }}/{{ overcloudrc }} | sed 's/export OS_PASSWORD=//g'
|
|
||||||
register: "OS_PASSWORD"
|
|
||||||
|
|
||||||
- name: Get OS_AUTH_URL from overcloudrc
|
|
||||||
shell: |
|
|
||||||
grep OS_AUTH_URL {{ working_dir }}/{{ overcloudrc }} | sed 's/export OS_AUTH_URL=//g'
|
|
||||||
register: "OS_AUTH_URL"
|
|
||||||
|
|
||||||
- name: Get OS_PROJECT_NAME or OS_TENANT_NAME from overcloudrc
|
|
||||||
shell: |
|
|
||||||
grep -E 'OS_PROJECT_NAME|OS_TENANT_NAME' {{ working_dir }}/{{ overcloudrc }} | tail -1 | sed 's/export OS_.*_NAME=//g'
|
|
||||||
register: "OS_TENANT_NAME"
|
|
||||||
|
|
||||||
- name: Get OS_USER_DOMAIN_NAME from overcloudrc
|
|
||||||
shell: |
|
|
||||||
grep OS_USER_DOMAIN_NAME {{ working_dir }}/{{ overcloudrc }} | sed 's/export OS_USER_DOMAIN_NAME=//g'
|
|
||||||
register: "OS_USER_DOMAIN_NAME"
|
|
||||||
when: v3_rc_file_stat.stat.exists
|
|
||||||
|
|
||||||
- name: Get OS_PROJECT_DOMAIN_NAME from overcloudrc
|
|
||||||
shell: |
|
|
||||||
grep OS_PROJECT_DOMAIN_NAME {{ working_dir }}/{{ overcloudrc }} | sed 's/export OS_PROJECT_DOMAIN_NAME=//g'
|
|
||||||
register: "OS_PROJECT_DOMAIN_NAME"
|
|
||||||
when: v3_rc_file_stat.stat.exists
|
|
||||||
|
|
||||||
- name: Define variable for pcs additional options for overcloud's rc file v3
|
|
||||||
set_fact:
|
|
||||||
pcs_v3_rc_file_opts: ""
|
|
||||||
|
|
||||||
- name: Define variable for pcs additional options for no_shared_storage
|
|
||||||
set_fact:
|
|
||||||
pcs_NovaEvacuate_no_shared_storage_opts: ""
|
|
||||||
pcs_fence_compute_no_shared_storage_opts: ""
|
|
||||||
|
|
||||||
- name: Set pcs additional options for overcloud's rc file v3
|
|
||||||
set_fact:
|
|
||||||
pcs_v3_rc_file_opts: "project_domain=$OS_PROJECT_DOMAIN_NAME user_domain=$OS_USER_DOMAIN_NAME"
|
|
||||||
when: v3_rc_file_stat.stat.exists
|
|
||||||
|
|
||||||
- name: Set pcs additional options for no_shared_storage
|
|
||||||
set_fact:
|
|
||||||
pcs_NovaEvacuate_no_shared_storage_opts: "no_shared_storage=1"
|
|
||||||
pcs_fence_compute_no_shared_storage_opts: "no-shared-storage=True"
|
|
||||||
when: not instance_ha_shared_storage|bool
|
|
||||||
|
|
||||||
- block:
|
|
||||||
- name: Create resource nova-evacuate
|
|
||||||
shell: |
|
|
||||||
pcs resource create nova-evacuate ocf:openstack:NovaEvacuate auth_url=$OS_AUTH_URL username=$OS_USERNAME password=$OS_PASSWORD tenant_name=$OS_TENANT_NAME {{ pcs_v3_rc_file_opts }} {{ pcs_NovaEvacuate_no_shared_storage_opts }} --force
|
|
||||||
|
|
||||||
- name: Create pacemaker constraint to start nova-evacuate only on non compute nodes
|
|
||||||
shell: |
|
|
||||||
pcs constraint location nova-evacuate rule resource-discovery=never score=-INFINITY osprole eq compute
|
|
||||||
|
|
||||||
- name: Create pacemaker constraints to start VIP resources before nova-evacuate
|
|
||||||
shell: |
|
|
||||||
for i in $(pcs status | grep IP | awk '{ print $1 }')
|
|
||||||
do pcs constraint order start $i then nova-evacuate
|
|
||||||
done
|
|
||||||
|
|
||||||
- name: Create pacemaker constraints to start openstack services before nova-evacuate
|
|
||||||
shell: "pcs constraint order start {{ item }} then nova-evacuate require-all=false"
|
|
||||||
with_items:
|
|
||||||
- openstack-glance-api-clone
|
|
||||||
- neutron-metadata-agent-clone
|
|
||||||
- openstack-nova-conductor-clone
|
|
||||||
when: release in [ 'liberty', 'rhos-8', 'mitaka', 'rhos-9' ]
|
|
||||||
|
|
||||||
- name: Disable keystone resource
|
|
||||||
shell: "pcs resource disable openstack-keystone --wait=900"
|
|
||||||
when: release in [ 'liberty', 'rhos-8' ]
|
|
||||||
|
|
||||||
# Keystone resource was replaced by openstack-core resource in RHOS9
|
|
||||||
- name: Disable openstack-core resource
|
|
||||||
shell: "pcs resource disable openstack-core --wait=900"
|
|
||||||
when: release in [ 'mitaka', 'rhos-9' ]
|
|
||||||
|
|
||||||
- name: Set controller pacemaker property on controllers
|
|
||||||
shell: "pcs property set --node {{ hostvars[item]['ansible_hostname'] }} osprole=controller"
|
|
||||||
with_items: "{{ groups['controller'] }}"
|
|
||||||
|
|
||||||
- name: Get stonith devices
|
|
||||||
shell: "pcs stonith | awk '{print $1}' | tr '\n' ' '"
|
|
||||||
register: stonithdevs
|
|
||||||
|
|
||||||
- name: Setup stonith devices
|
|
||||||
shell: |
|
|
||||||
for i in $(cibadmin -Q --xpath //primitive --node-path | awk -F "id='" '{print $2}' | awk -F "'" '{print $1}' | uniq); do
|
|
||||||
found=0
|
|
||||||
if [ -n "{{ stonithdevs.stdout }}" ]; then
|
|
||||||
for x in {{ stonithdevs.stdout }}; do
|
|
||||||
if [ "$x" == "$i" ]; then
|
|
||||||
found=1
|
|
||||||
fi
|
|
||||||
done
|
|
||||||
fi
|
|
||||||
if [ $found = 0 ]; then
|
|
||||||
pcs constraint location $i rule resource-discovery=exclusive score=0 osprole eq controller
|
|
||||||
fi
|
|
||||||
done
|
|
||||||
when: release not in [ 'pike', 'rhos-12' ]
|
|
||||||
|
|
||||||
- name: Create compute pacemaker resources and constraints
|
|
||||||
shell: |
|
|
||||||
pcs resource create nova-compute-checkevacuate ocf:openstack:nova-compute-wait auth_url=$OS_AUTH_URL username=$OS_USERNAME password=$OS_PASSWORD tenant_name=$OS_TENANT_NAME domain={{ overcloud_domain }} op start timeout=300 --clone interleave=true --disabled --force
|
|
||||||
pcs constraint location nova-compute-checkevacuate-clone rule resource-discovery=exclusive score=0 osprole eq compute
|
|
||||||
pcs resource create nova-compute systemd:openstack-nova-compute op start timeout=60s --clone interleave=true --disabled --force
|
|
||||||
pcs constraint location nova-compute-clone rule resource-discovery=exclusive score=0 osprole eq compute
|
|
||||||
pcs constraint order start nova-compute-checkevacuate-clone then nova-compute-clone require-all=true
|
|
||||||
pcs constraint order start nova-compute-clone then nova-evacuate require-all=false
|
|
||||||
when: release not in [ 'pike', 'rhos-12' ]
|
|
||||||
|
|
||||||
- name: Create compute pacemaker resources and constraints
|
|
||||||
shell: |
|
|
||||||
pcs resource create neutron-openvswitch-agent-compute systemd:neutron-openvswitch-agent --clone interleave=true --disabled --force
|
|
||||||
pcs constraint location neutron-openvswitch-agent-compute-clone rule resource-discovery=exclusive score=0 osprole eq compute
|
|
||||||
pcs resource create libvirtd-compute systemd:libvirtd --clone interleave=true --disabled --force
|
|
||||||
pcs constraint location libvirtd-compute-clone rule resource-discovery=exclusive score=0 osprole eq compute
|
|
||||||
pcs constraint order start neutron-openvswitch-agent-compute-clone then libvirtd-compute-clone
|
|
||||||
pcs constraint colocation add libvirtd-compute-clone with neutron-openvswitch-agent-compute-clone
|
|
||||||
pcs resource create ceilometer-compute systemd:openstack-ceilometer-compute --clone interleave=true --disabled --force
|
|
||||||
pcs constraint location ceilometer-compute-clone rule resource-discovery=exclusive score=0 osprole eq compute
|
|
||||||
pcs constraint order start libvirtd-compute-clone then ceilometer-compute-clone
|
|
||||||
pcs constraint colocation add ceilometer-compute-clone with libvirtd-compute-clone
|
|
||||||
pcs constraint order start libvirtd-compute-clone then nova-compute-clone
|
|
||||||
pcs constraint colocation add nova-compute-clone with libvirtd-compute-clone
|
|
||||||
when: release in [ 'liberty', 'rhos-8', 'mitaka', 'rhos-9' ]
|
|
||||||
|
|
||||||
- name: Create pacemaker constraint for neutron-server, nova-conductor and ceilometer-notification
|
|
||||||
shell: |
|
|
||||||
pcs constraint order start neutron-server-clone then neutron-openvswitch-agent-compute-clone require-all=false
|
|
||||||
pcs constraint order start openstack-ceilometer-notification-clone then ceilometer-compute-clone require-all=false
|
|
||||||
pcs constraint order start openstack-nova-conductor-clone then nova-compute-checkevacuate-clone require-all=false
|
|
||||||
when: release in [ 'liberty', 'rhos-8', 'mitaka', 'rhos-9' ]
|
|
||||||
|
|
||||||
- name: Set requires to fencing as default for all resources (Pike/RHOS-12)
|
|
||||||
shell: "pcs resource defaults requires=fencing"
|
|
||||||
when: release in [ 'pike', 'rhos-12' ]
|
|
||||||
|
|
||||||
- name: Create fence-nova pacemaker resource (no shared storage)
|
|
||||||
shell: "pcs stonith create fence-nova fence_compute auth_url=$OS_AUTH_URL login=$OS_USERNAME passwd=$OS_PASSWORD tenant_name=$OS_TENANT_NAME domain={{ overcloud_domain }} record_only=1 {{ pcs_fence_compute_no_shared_storage_opts }} --force"
|
|
||||||
when: release not in [ 'pike', 'rhos-12' ]
|
|
||||||
|
|
||||||
- name: Create fence-nova pacemaker resource (Pike/RHOS-12)
|
|
||||||
shell: "pcs stonith create fence-nova fence_compute auth_url=$OS_AUTH_URL login=$OS_USERNAME passwd=$OS_PASSWORD tenant_name=$OS_TENANT_NAME domain={{ overcloud_domain }} record_only=1 {{ pcs_fence_compute_no_shared_storage_opts }} meta provides=unfencing --force"
|
|
||||||
when: release in [ 'pike', 'rhos-12' ]
|
|
||||||
|
|
||||||
- name: Create pacemaker constraint for fence-nova to fix it on controller node and set resource-discovery never
|
|
||||||
shell: "pcs constraint location fence-nova rule resource-discovery=never score=0 osprole eq controller"
|
|
||||||
|
|
||||||
- name: Create pacemaker constraint for fence-nova to start after galera
|
|
||||||
shell: "pcs constraint order promote galera-master then fence-nova require-all=false"
|
|
||||||
when: release not in [ 'pike', 'rhos-12' ]
|
|
||||||
|
|
||||||
- name: Create nova-compute order constraint on fence-nova
|
|
||||||
shell: "pcs constraint order start fence-nova then nova-compute-clone"
|
|
||||||
when: release not in [ 'pike', 'rhos-12' ]
|
|
||||||
|
|
||||||
- name: Set cluster recheck interval to 1 minute
|
|
||||||
shell: "pcs property set cluster-recheck-interval=1min"
|
|
||||||
|
|
||||||
- name: Create pacemaker remote resource on compute nodes
|
|
||||||
shell: "pcs resource create {{ hostvars[item]['ansible_hostname'] }} ocf:pacemaker:remote reconnect_interval=240 op monitor interval=20"
|
|
||||||
with_items: "{{ groups['compute'] }}"
|
|
||||||
|
|
||||||
- name: Set osprole for compute nodes
|
|
||||||
shell: "pcs property set --node {{ hostvars[item]['ansible_hostname'] }} osprole=compute"
|
|
||||||
with_items: "{{ groups['compute'] }}"
|
|
||||||
|
|
||||||
- name: Add STONITH level definitions for compute nodes
|
|
||||||
shell: |
|
|
||||||
compute_stonith_name=$(cibadmin --query --xpath "//primitive[@class='stonith']/instance_attributes/nvpair[@value='{{ item }}']" | sed 's/.*id="\(.*\)-instance_attributes-pcmk_host_list".*/\1/g')
|
|
||||||
pcs stonith level add 1 {{ item }} $compute_stonith_name,fence-nova
|
|
||||||
with_items: "{{ groups['compute'] }}"
|
|
||||||
|
|
||||||
- name: Enable keystone resource
|
|
||||||
shell: "pcs resource enable openstack-keystone"
|
|
||||||
when: release in [ 'liberty', 'rhos-8' ]
|
|
||||||
|
|
||||||
- name: Enable openstack-core resource
|
|
||||||
shell: "pcs resource enable openstack-core"
|
|
||||||
when: release in [ 'mitaka', 'rhos-9' ]
|
|
||||||
|
|
||||||
- name: Wait for httpd service to be started
|
|
||||||
shell: "systemctl show httpd --property=ActiveState"
|
|
||||||
register: httpd_status_result
|
|
||||||
until: httpd_status_result.stdout.find('inactive') == -1 and httpd_status_result.stdout.find('activating') == -1
|
|
||||||
retries: 30
|
|
||||||
delay: 10
|
|
||||||
when: release not in [ 'liberty', 'rhos-8', 'mitaka', 'rhos-9' ]
|
|
||||||
|
|
||||||
- name: Enable compute nodes resources (nova)
|
|
||||||
shell: "pcs resource enable {{ item }}"
|
|
||||||
with_items:
|
|
||||||
- nova-compute-checkevacuate
|
|
||||||
- nova-compute
|
|
||||||
when: release not in [ 'pike', 'rhos-12' ]
|
|
||||||
|
|
||||||
- name: Create compute unfence resource to override default resource requires (Pike/RHOS-12)
|
|
||||||
shell: |
|
|
||||||
pcs resource create compute-unfence-trigger ocf:pacemaker:Dummy op start requires="unfencing" --clone --disabled
|
|
||||||
pcs constraint location compute-unfence-trigger-clone rule resource-discovery=never score=-INFINITY osprole ne compute
|
|
||||||
pcs resource enable compute-unfence-trigger
|
|
||||||
when: release in [ 'pike', 'rhos-12' ]
|
|
||||||
|
|
||||||
- name: Enable compute nodes resources (others)
|
|
||||||
shell: "pcs resource enable {{ item }}"
|
|
||||||
with_items:
|
|
||||||
- neutron-openvswitch-agent-compute
|
|
||||||
- libvirtd-compute
|
|
||||||
- ceilometer-compute
|
|
||||||
when: release in [ 'liberty', 'rhos-8', 'mitaka', 'rhos-9' ]
|
|
||||||
environment:
|
|
||||||
OS_USERNAME: "{{ OS_USERNAME.stdout }}"
|
|
||||||
OS_PASSWORD: "{{ OS_PASSWORD.stdout }}"
|
|
||||||
OS_AUTH_URL: "{{ OS_AUTH_URL.stdout }}"
|
|
||||||
OS_TENANT_NAME: "{{ OS_TENANT_NAME.stdout }}"
|
|
||||||
OS_USER_DOMAIN_NAME: "{{ OS_USER_DOMAIN_NAME.stdout }}"
|
|
||||||
OS_PROJECT_DOMAIN_NAME: "{{ OS_PROJECT_DOMAIN_NAME.stdout }}"
|
|
||||||
become: yes
|
|
||||||
delegate_to: "{{ groups.controller[0] }}"
|
|
||||||
|
|
||||||
- name: Cleanup (if any) failed resources
|
|
||||||
shell: |
|
|
||||||
for resource in $(pcs status | sed -n -e '/Failed Actions:/,/^$/p' | egrep 'OCF_|not running|unknown' | awk '{print $2}' | cut -f1 -d_ | sort |uniq)
|
|
||||||
do
|
|
||||||
pcs resource cleanup $resource
|
|
||||||
done
|
|
||||||
become: yes
|
|
||||||
delegate_to: "{{ groups.controller[0] }}"
|
|
||||||
|
|
||||||
- name: Wait for (if any) failed resources to recover
|
|
||||||
shell: pcs status | sed -n -e '/Failed Actions:/,/^$/p' | egrep 'OCF_|not running|unknown' | awk '{print $2}' | cut -f1 -d_ | sort |uniq
|
|
||||||
register: failed_resources
|
|
||||||
until: failed_resources.stdout != []
|
|
||||||
retries: 10
|
|
||||||
delay: 10
|
|
||||||
become: yes
|
|
||||||
delegate_to: "{{ groups.controller[0] }}"
|
|
@ -1,31 +0,0 @@
|
|||||||
---
|
|
||||||
- name: Check if Instance HA steps were already applied
|
|
||||||
include: pre-checks.yml
|
|
||||||
when:
|
|
||||||
- instance_ha_action == "install"
|
|
||||||
|
|
||||||
- name: Apply STONITH for controller nodes
|
|
||||||
include_role:
|
|
||||||
name: stonith-config
|
|
||||||
when:
|
|
||||||
- instance_ha_action == "install"
|
|
||||||
- stonith_devices in ["all","controllers"]
|
|
||||||
|
|
||||||
- name: Apply Instance High Availability steps
|
|
||||||
include: apply.yml
|
|
||||||
when:
|
|
||||||
- instance_ha_action == "install"
|
|
||||||
|
|
||||||
- name: Undo Instance High Availability steps
|
|
||||||
include: undo.yml
|
|
||||||
when:
|
|
||||||
- instance_ha_action == "uninstall"
|
|
||||||
|
|
||||||
- name: Remove STONITH for controller nodes
|
|
||||||
include_role:
|
|
||||||
name: stonith-config
|
|
||||||
vars:
|
|
||||||
stonith_action: "uninstall"
|
|
||||||
when:
|
|
||||||
- instance_ha_action == "uninstall"
|
|
||||||
- stonith_devices in ["all","controllers"]
|
|
@ -1,25 +0,0 @@
|
|||||||
---
|
|
||||||
- block:
|
|
||||||
- name: Check if STONITH resources already exist
|
|
||||||
shell: |
|
|
||||||
pcs stonith show | grep {{ item }}
|
|
||||||
with_items:
|
|
||||||
- fence-nova
|
|
||||||
register: pre_existing_stonith
|
|
||||||
failed_when: pre_existing_stonith.rc == 0
|
|
||||||
|
|
||||||
- name: Check if IHA resources already exist
|
|
||||||
shell: |
|
|
||||||
pcs resource show | grep "{{ item }}"
|
|
||||||
with_items:
|
|
||||||
- compute-unfence-trigger
|
|
||||||
- nova-compute-checkevacuate
|
|
||||||
- nova-compute
|
|
||||||
- nova-evacuate
|
|
||||||
- neutron-openvswitch-agent-compute
|
|
||||||
- libvirtd-compute
|
|
||||||
- ceilometer-compute
|
|
||||||
register: pre_existing_resources
|
|
||||||
failed_when: pre_existing_resources.rc == 0
|
|
||||||
become: yes
|
|
||||||
delegate_to: "{{ groups.controller[0] }}"
|
|
@ -1,168 +0,0 @@
|
|||||||
---
|
|
||||||
- block:
|
|
||||||
- name: Remove STONITH level definitions for compute nodes
|
|
||||||
shell: |
|
|
||||||
compute_stonith_name=$(cibadmin --query --xpath "//primitive[@class='stonith']/instance_attributes/nvpair[@value='{{ item }}']" | sed 's/.*id="\(.*\)-instance_attributes-pcmk_host_list".*/\1/g')
|
|
||||||
for stonith_level in $(cibadmin --query --xpath "//configuration/fencing-topology/fencing-level[@devices='$compute_stonith_name,fence-nova'][@index='1'][@target='{{ item }}']" --node-path)
|
|
||||||
do
|
|
||||||
pcs stonith level delete 1 {{ item }} $compute_stonith_name,fence-nova
|
|
||||||
done
|
|
||||||
with_items: "{{ groups['compute'] }}"
|
|
||||||
|
|
||||||
- name: Remove fence-nova STONITH device
|
|
||||||
shell: |
|
|
||||||
for stonithid in $(pcs stonith show | awk '/fence_compute/ {print $1}')
|
|
||||||
do
|
|
||||||
pcs stonith delete fence-nova
|
|
||||||
done
|
|
||||||
|
|
||||||
- name: Remove resources associated to remote nodes
|
|
||||||
shell: |
|
|
||||||
for resourceid in $(pcs resource show | grep compute | grep 'Clone Set:' | awk '{print $3}')
|
|
||||||
do
|
|
||||||
pcs resource cleanup $resourceid
|
|
||||||
pcs --force resource delete $resourceid
|
|
||||||
done
|
|
||||||
|
|
||||||
- name: Remove NovaEvacuate resource
|
|
||||||
shell: |
|
|
||||||
for resourceid in $(pcs resource show | grep NovaEvacuate | awk '/NovaEvacuate/ {print $1}')
|
|
||||||
do
|
|
||||||
pcs resource cleanup $resourceid
|
|
||||||
pcs --force resource delete $resourceid
|
|
||||||
done
|
|
||||||
|
|
||||||
- name: Remove pacemaker remote resource
|
|
||||||
shell: |
|
|
||||||
for resourceid in $(pcs resource show | awk '/:remote/ {print $1}')
|
|
||||||
do
|
|
||||||
pcs resource cleanup $resourceid
|
|
||||||
pcs --force resource delete $resourceid
|
|
||||||
done
|
|
||||||
|
|
||||||
- name: Remove constraints related to role controller
|
|
||||||
shell: |
|
|
||||||
for constraintid in $(pcs config show | grep -B 3 "osprole eq controller" | awk '/Constraint/ {print $2}')
|
|
||||||
do
|
|
||||||
pcs constraint delete $constraintid
|
|
||||||
done
|
|
||||||
|
|
||||||
- name: Unset controller pacemaker property on controllers
|
|
||||||
shell: |
|
|
||||||
for nodeid in $(pcs property | awk '/osprole/ { print $1 }' | cut -d: -f1)
|
|
||||||
do
|
|
||||||
pcs property unset --node $nodeid osprole
|
|
||||||
done
|
|
||||||
|
|
||||||
- name: Unset cluster recheck interval to 1 minute
|
|
||||||
shell: |
|
|
||||||
for propertyid in $(pcs property | awk '/cluster-recheck-interval/ { print $1 }' | cut -d: -f1)
|
|
||||||
do
|
|
||||||
pcs property unset cluster-recheck-interval
|
|
||||||
done
|
|
||||||
become: yes
|
|
||||||
delegate_to: "{{ groups.controller[0] }}"
|
|
||||||
|
|
||||||
- name: Cleanup failed resources (if any)
|
|
||||||
shell: |
|
|
||||||
for resource in $(pcs status | sed -n -e '/Failed Actions:/,/^$/p' | egrep 'OCF_|not running|unknown' | awk '{print $2}' | cut -f1 -d_ | sort |uniq)
|
|
||||||
do
|
|
||||||
pcs resource cleanup $resource
|
|
||||||
done
|
|
||||||
become: yes
|
|
||||||
delegate_to: "{{ groups.controller[0] }}"
|
|
||||||
|
|
||||||
- name: Wait for failed resources to recover (if any)
|
|
||||||
shell: pcs status | sed -n -e '/Failed Actions:/,/^$/p' | egrep 'OCF_|not running|unknown' | awk '{print $2}' | cut -f1 -d_ | sort |uniq
|
|
||||||
register: failed_resources
|
|
||||||
until: failed_resources.stdout != []
|
|
||||||
retries: 10
|
|
||||||
delay: 10
|
|
||||||
become: yes
|
|
||||||
delegate_to: "{{ groups.controller[0] }}"
|
|
||||||
|
|
||||||
- name: Enable openstack-nova-compute on compute
|
|
||||||
service:
|
|
||||||
name: openstack-nova-compute
|
|
||||||
state: started
|
|
||||||
enabled: yes
|
|
||||||
become: yes
|
|
||||||
delegate_to: "{{ item }}"
|
|
||||||
with_items:
|
|
||||||
- "{{ groups['compute'] }}"
|
|
||||||
when: release not in [ 'pike', 'rhos-12' ]
|
|
||||||
|
|
||||||
- name: Enable neutron-openvswitch-agent on compute
|
|
||||||
service:
|
|
||||||
name: neutron-openvswitch-agent
|
|
||||||
state: started
|
|
||||||
enabled: yes
|
|
||||||
become: yes
|
|
||||||
delegate_to: "{{ item }}"
|
|
||||||
with_items:
|
|
||||||
- "{{ groups['compute'] }}"
|
|
||||||
when: release in [ 'liberty', 'rhos-8', 'mitaka', 'rhos-9' ]
|
|
||||||
|
|
||||||
- name: Enable openstack-ceilometer-compute on compute
|
|
||||||
service:
|
|
||||||
name: openstack-ceilometer-compute
|
|
||||||
state: started
|
|
||||||
enabled: yes
|
|
||||||
become: yes
|
|
||||||
delegate_to: "{{ item }}"
|
|
||||||
with_items:
|
|
||||||
- "{{ groups['compute'] }}"
|
|
||||||
when: release in [ 'liberty', 'rhos-8', 'mitaka', 'rhos-9' ]
|
|
||||||
|
|
||||||
- name: Enable libvirtd on compute
|
|
||||||
become: yes
|
|
||||||
service:
|
|
||||||
name: libvirtd
|
|
||||||
state: started
|
|
||||||
enabled: yes
|
|
||||||
delegate_to: "{{ item }}"
|
|
||||||
with_items:
|
|
||||||
- "{{ groups['compute'] }}"
|
|
||||||
when: release in [ 'liberty', 'rhos-8', 'mitaka', 'rhos-9' ]
|
|
||||||
|
|
||||||
- name: Stop pacemaker remote service on compute nodes
|
|
||||||
become: yes
|
|
||||||
service:
|
|
||||||
name: pacemaker_remote
|
|
||||||
enabled: no
|
|
||||||
state: stopped
|
|
||||||
delegate_to: "{{ item }}"
|
|
||||||
with_items:
|
|
||||||
- "{{ groups['compute'] }}"
|
|
||||||
|
|
||||||
- name: Disable iptables traffic for pacemaker_remote
|
|
||||||
become: yes
|
|
||||||
shell: |
|
|
||||||
while [ $(iptables-save | grep -c "\-A INPUT \-p tcp \-m state \-\-state NEW \-m tcp \-\-dport 3121 \-j ACCEPT") -ne 0 ]
|
|
||||||
do
|
|
||||||
iptables -D INPUT -p tcp -m state --state NEW -m tcp --dport 3121 -j ACCEPT
|
|
||||||
done
|
|
||||||
delegate_to: "{{ item }}"
|
|
||||||
with_items:
|
|
||||||
- "{{ groups['controller'] }}"
|
|
||||||
- "{{ groups['compute'] }}"
|
|
||||||
|
|
||||||
- name: Remove iptables pacemaker_remote permanent rule
|
|
||||||
become: yes
|
|
||||||
lineinfile:
|
|
||||||
path: /etc/sysconfig/iptables
|
|
||||||
line: "-A INPUT -p tcp -m state --state NEW -m tcp --dport 3121 -j ACCEPT"
|
|
||||||
state: absent
|
|
||||||
delegate_to: "{{ item }}"
|
|
||||||
with_items:
|
|
||||||
- "{{ groups['controller'] }}"
|
|
||||||
- "{{ groups['compute'] }}"
|
|
||||||
|
|
||||||
- name: Undo STONITH for compute nodes
|
|
||||||
include_role:
|
|
||||||
name: stonith-config
|
|
||||||
vars:
|
|
||||||
stonith_action: "uninstall"
|
|
||||||
stonith_devices: "computes"
|
|
||||||
when:
|
|
||||||
- stonith_devices in ["all","computes"]
|
|
@ -1,90 +0,0 @@
|
|||||||
stonith-config
|
|
||||||
==============
|
|
||||||
|
|
||||||
This role acts on an already deployed tripleo environment, setting up STONITH
|
|
||||||
(Shoot The Other Node In The Head) inside the Pacemaker configuration for all
|
|
||||||
the hosts that are part of the overcloud.
|
|
||||||
|
|
||||||
Requirements
|
|
||||||
------------
|
|
||||||
|
|
||||||
The TripleO environment must be prepared as described [here](https://github.com/openstack/tripleo-ha-utils/tree/master/README.md).
|
|
||||||
|
|
||||||
STONITH
|
|
||||||
-------
|
|
||||||
|
|
||||||
STONITH is the way a Pacemaker clusters use to be certain that a node is powered
|
|
||||||
off. STONITH is the only way to use a shared storage environment without
|
|
||||||
worrying about concurrent writes on disks. Inside TripleO environments STONITH
|
|
||||||
is a requisite also for activating features like Instance HA because, before
|
|
||||||
moving any machine, the system need to be sure that the "move from" machine is
|
|
||||||
off.
|
|
||||||
STONITH configuration relies on the **instackenv.json** file, used by TripleO
|
|
||||||
also to configure Ironic and all the provision stuff.
|
|
||||||
Basically this role enables STONITH on the Pacemaker cluster and takes all the
|
|
||||||
information from the mentioned file, creating a STONITH resource for each host
|
|
||||||
on the overcloud.
|
|
||||||
After running this playbook the cluster configuration will have this properties:
|
|
||||||
|
|
||||||
$ sudo pcs property
|
|
||||||
Cluster Properties:
|
|
||||||
cluster-infrastructure: corosync
|
|
||||||
cluster-name: tripleo_cluster
|
|
||||||
...
|
|
||||||
...
|
|
||||||
**stonith-enabled: true**
|
|
||||||
|
|
||||||
And something like this, depending on how many nodes are there in the overcloud:
|
|
||||||
|
|
||||||
sudo pcs stonith
|
|
||||||
ipmilan-overcloud-compute-0 (stonith:fence_ipmilan): Started overcloud-controller-1
|
|
||||||
ipmilan-overcloud-controller-2 (stonith:fence_ipmilan): Started overcloud-controller-0
|
|
||||||
ipmilan-overcloud-controller-0 (stonith:fence_ipmilan): Started overcloud-controller-0
|
|
||||||
ipmilan-overcloud-controller-1 (stonith:fence_ipmilan): Started overcloud-controller-1
|
|
||||||
ipmilan-overcloud-compute-1 (stonith:fence_ipmilan): Started overcloud-controller-1
|
|
||||||
|
|
||||||
Having all this in place is a requirement for a reliable HA solution and for
|
|
||||||
configuring special OpenStack features like [Instance HA](https://github.com/openstack/tripleo-ha-utils/tree/master/roles/instance-ha).
|
|
||||||
|
|
||||||
**Note**: by default this role configures STONITH for the controllers nodes,
|
|
||||||
but it is possible to configure all the nodes or to limitate it just for
|
|
||||||
computes, by setting the **stonith_devices** variable, which by default is set
|
|
||||||
to "controllers", but can also be "*all*" or "*computes*".
|
|
||||||
|
|
||||||
Limitations
|
|
||||||
-----------
|
|
||||||
|
|
||||||
The only kind of STONITH devices supported are **for the moment** IPMI.
|
|
||||||
|
|
||||||
Examples on how to invoke the playbook via ansible
|
|
||||||
--------------------------------------------------
|
|
||||||
|
|
||||||
This command line will install the STONITH devices for the controller nodes:
|
|
||||||
|
|
||||||
ansible-playbook /home/stack/tripleo-ha-utils/playbooks/overcloud-stonith-config.yml
|
|
||||||
|
|
||||||
If a user wants to install the STONITH devices for all the nodes:
|
|
||||||
|
|
||||||
ansible-playbook /home/stack/tripleo-ha-utils/playbooks/overcloud-stonith-config.yml -e stonith_devices="all"
|
|
||||||
|
|
||||||
To uninstall the STONITH devices for the controllers:
|
|
||||||
|
|
||||||
ansible-playbook /home/stack/tripleo-ha-utils/playbooks/overcloud-stonith-config.yml -e stonith_action="uninstall"
|
|
||||||
|
|
||||||
To uninstall the STONITH devices just for the computes:
|
|
||||||
|
|
||||||
ansible-playbook /home/stack/tripleo-ha-utils/playbooks/overcloud-stonith-config.yml -e stonith_action="uninstall" -e stonith_devices="computes"
|
|
||||||
|
|
||||||
The STONITH role supports also "none" as a valid value for *stonith_devices*
|
|
||||||
which can become useful when configuring instance HA in an environment already
|
|
||||||
configured with STONITH for both controllers and computes.
|
|
||||||
|
|
||||||
License
|
|
||||||
-------
|
|
||||||
|
|
||||||
GPL
|
|
||||||
|
|
||||||
Author Information
|
|
||||||
------------------
|
|
||||||
|
|
||||||
Raoul Scarazzini <rasca@redhat.com>
|
|
@ -1,13 +0,0 @@
|
|||||||
---
|
|
||||||
|
|
||||||
overcloud_working_dir: "/home/heat-admin"
|
|
||||||
working_dir: "/home/stack"
|
|
||||||
instack_env_file: "{{ working_dir }}/instackenv.json"
|
|
||||||
|
|
||||||
config_stonith_python_script: config-stonith-from-instackenv.py.j2
|
|
||||||
|
|
||||||
# Can be install, uninstall or none
|
|
||||||
stonith_action: "install"
|
|
||||||
|
|
||||||
# Can be all, controllers or computes
|
|
||||||
stonith_devices: controllers
|
|
@ -1,32 +0,0 @@
|
|||||||
---
|
|
||||||
- name: Load the STONITH creation script on the undercloud
|
|
||||||
template:
|
|
||||||
src: "{{ config_stonith_python_script }}"
|
|
||||||
dest: "{{ working_dir }}/config_stonith_from_instackenv.py"
|
|
||||||
mode: 0755
|
|
||||||
|
|
||||||
- name: Generate STONITH script
|
|
||||||
shell: |
|
|
||||||
source {{ working_dir }}/stackrc
|
|
||||||
{{ working_dir }}/config_stonith_from_instackenv.py {{ instack_env_file }} {{ stonith_action }} {{ stonith_devices }}
|
|
||||||
register: stonith_script
|
|
||||||
|
|
||||||
- name: Delete the STONITH script on the overcloud (if exists)
|
|
||||||
file:
|
|
||||||
path: "{{ overcloud_working_dir }}/config-stonith.sh"
|
|
||||||
state: absent
|
|
||||||
delegate_to: "{{ groups.controller[0] }}"
|
|
||||||
|
|
||||||
- name: Create the STONITH script on the overcloud
|
|
||||||
lineinfile:
|
|
||||||
destfile: "{{ overcloud_working_dir }}/config-stonith.sh"
|
|
||||||
line: "{{ stonith_script.stdout }}"
|
|
||||||
create: yes
|
|
||||||
mode: 0755
|
|
||||||
delegate_to: "{{ groups.controller[0] }}"
|
|
||||||
|
|
||||||
- name: Execute STONITH script
|
|
||||||
become: true
|
|
||||||
delegate_to: "{{ groups.controller[0] }}"
|
|
||||||
shell: >
|
|
||||||
{{ overcloud_working_dir }}/config-stonith.sh &> config_stonith.log
|
|
@ -1,94 +0,0 @@
|
|||||||
#!/bin/python
|
|
||||||
|
|
||||||
import os
|
|
||||||
import json
|
|
||||||
import sys
|
|
||||||
from keystoneauth1.identity import v2
|
|
||||||
from keystoneauth1 import session
|
|
||||||
from pprint import pprint
|
|
||||||
from novaclient import client
|
|
||||||
|
|
||||||
# JSon file as first parameter
|
|
||||||
jdata = open(sys.argv[1])
|
|
||||||
data = json.load(jdata)
|
|
||||||
|
|
||||||
# install, uninstall, none
|
|
||||||
fence_config = sys.argv[2]
|
|
||||||
# controllers, computes, all or none
|
|
||||||
fence_devices = sys.argv[3]
|
|
||||||
|
|
||||||
# Define variables to connect to nova
|
|
||||||
os_username = os.environ['OS_USERNAME']
|
|
||||||
os_password = os.environ['OS_PASSWORD']
|
|
||||||
os_auth_url = os.environ['OS_AUTH_URL']
|
|
||||||
try:
|
|
||||||
os_tenant_name = os.environ['OS_TENANT_NAME']
|
|
||||||
except:
|
|
||||||
os_project_name = os.environ['OS_PROJECT_NAME']
|
|
||||||
os_project_domain_name=os.environ['OS_PROJECT_DOMAIN_NAME']
|
|
||||||
os_user_domain_name=os.environ['OS_USER_DOMAIN_NAME']
|
|
||||||
os_compute_api_version = os.environ['COMPUTE_API_VERSION']
|
|
||||||
|
|
||||||
# If fence_devices includes controllers then we act on the overall stonith-enabled property of the cluster
|
|
||||||
if (fence_devices in ['controllers','all']):
|
|
||||||
# If we're uninstalling then we disable stonith
|
|
||||||
if (fence_config == 'uninstall'):
|
|
||||||
print('pcs property set stonith-enabled=false')
|
|
||||||
# If we're installing then we enable it
|
|
||||||
elif (fence_config == 'install'):
|
|
||||||
print('pcs property set stonith-enabled=true')
|
|
||||||
|
|
||||||
# Connect to nova
|
|
||||||
try:
|
|
||||||
# Liberty/OSP-8,Mitaka/OSP-9,Newton/OSP-10
|
|
||||||
nt = client.Client(2,
|
|
||||||
os_username,
|
|
||||||
os_password,
|
|
||||||
os_tenant_name,
|
|
||||||
os_auth_url)
|
|
||||||
nt.hypervisors.list()
|
|
||||||
except:
|
|
||||||
try:
|
|
||||||
# Ocata/OSP-11
|
|
||||||
nt = client.Client(2,
|
|
||||||
username=os_username,
|
|
||||||
password=os_password,
|
|
||||||
project_name=os_tenant_name,
|
|
||||||
auth_url=os_auth_url)
|
|
||||||
nt.hypervisors.list()
|
|
||||||
except:
|
|
||||||
# Pike/OSP-12
|
|
||||||
nt = client.Client(2,
|
|
||||||
auth_url=os_auth_url,
|
|
||||||
username=os_username,
|
|
||||||
password=os_password,
|
|
||||||
project_name=os_project_name,
|
|
||||||
project_domain_name=os_project_domain_name,
|
|
||||||
user_domain_name=os_user_domain_name)
|
|
||||||
nt.hypervisors.list()
|
|
||||||
|
|
||||||
# Parse instances
|
|
||||||
for instance in nt.servers.list():
|
|
||||||
for node in data["nodes"]:
|
|
||||||
if (node["mac"][0].lower() == instance.addresses['ctlplane'][0]['OS-EXT-IPS-MAC:mac_addr']
|
|
||||||
and
|
|
||||||
(
|
|
||||||
('controller' in instance.name and fence_devices in ['controllers','all'])
|
|
||||||
or
|
|
||||||
('compute' in instance.name and fence_devices in ['computes','all'])
|
|
||||||
)
|
|
||||||
):
|
|
||||||
if (fence_config == 'uninstall'):
|
|
||||||
print('pcs stonith delete ipmilan-{} || /bin/true'.format(instance.name))
|
|
||||||
elif (fence_config == 'install'):
|
|
||||||
try:
|
|
||||||
print('pcs stonith create ipmilan-{} fence_ipmilan pcmk_host_list="{}" ipaddr="{}" login="{}" passwd="{}" ipport={} lanplus="true" delay=20 op monitor interval=60s'
|
|
||||||
.format(instance.name,instance.name,node["pm_addr"],node["pm_user"],node["pm_password"],node["pm_port"]))
|
|
||||||
except:
|
|
||||||
print('pcs stonith create ipmilan-{} fence_ipmilan pcmk_host_list="{}" ipaddr="{}" login="{}" passwd="{}" lanplus="true" delay=20 op monitor interval=60s'
|
|
||||||
.format(instance.name,instance.name,node["pm_addr"],node["pm_user"],node["pm_password"]))
|
|
||||||
print('pcs constraint location ipmilan-{} avoids {}'
|
|
||||||
.format(instance.name,instance.name))
|
|
||||||
|
|
||||||
# Close nova connection
|
|
||||||
jdata.close()
|
|
@ -1,60 +0,0 @@
|
|||||||
################
|
|
||||||
# Python imports
|
|
||||||
################
|
|
||||||
import os
|
|
||||||
import json
|
|
||||||
import sys
|
|
||||||
# The below will be enabled once OS_AUTH_URL=http://192.0.2.1:5000/v3
|
|
||||||
#from keystoneauth1.identity import v3
|
|
||||||
from keystoneauth1.identity import v2
|
|
||||||
from keystoneauth1 import session
|
|
||||||
from pprint import pprint
|
|
||||||
from novaclient import client
|
|
||||||
|
|
||||||
##########################################################
|
|
||||||
# Environment variables (need to source before launching):
|
|
||||||
##########################################################
|
|
||||||
export NOVA_VERSION=1.1
|
|
||||||
export OS_PASSWORD=$(sudo hiera admin_password)
|
|
||||||
# If v3:
|
|
||||||
export OS_AUTH_URL=http://192.0.2.1:5000/v3
|
|
||||||
# else
|
|
||||||
export OS_AUTH_URL=http://192.0.2.1:5000/v2.0
|
|
||||||
export OS_USERNAME=admin
|
|
||||||
export OS_TENANT_NAME=admin
|
|
||||||
export COMPUTE_API_VERSION=1.1
|
|
||||||
export OS_NO_CACHE=True
|
|
||||||
|
|
||||||
##############
|
|
||||||
# JSON format:
|
|
||||||
##############
|
|
||||||
{ "nodes": [
|
|
||||||
{
|
|
||||||
"mac": [
|
|
||||||
"b8:ca:3a:66:e3:82"
|
|
||||||
],
|
|
||||||
"_comment":"host12-rack03.scale.openstack.engineering.redhat.com",
|
|
||||||
"cpu": "",
|
|
||||||
"memory": "",
|
|
||||||
"disk": "",
|
|
||||||
"arch": "x86_64",
|
|
||||||
"pm_type":"pxe_ipmitool",
|
|
||||||
"pm_user":"qe-scale",
|
|
||||||
"pm_password":"d0ckingSt4tion",
|
|
||||||
"pm_addr":"10.1.8.102"
|
|
||||||
},
|
|
||||||
...
|
|
||||||
|
|
||||||
########################################################################
|
|
||||||
# To make the below working os_auth_url must be http://192.0.2.1:5000/v3
|
|
||||||
########################################################################
|
|
||||||
auth = v3.Password(auth_url=os_auth_url,
|
|
||||||
username=os_username,
|
|
||||||
password=os_password,
|
|
||||||
{% if release in [ 'liberty', 'rhos-8', 'mitaka', 'rhos-9' ] %}
|
|
||||||
tenant_name=os_tenant_name,
|
|
||||||
{% else %}
|
|
||||||
project_name=os_tenant_name,
|
|
||||||
{% endif %}
|
|
||||||
user_domain_id='default',
|
|
||||||
project_domain_id='default')
|
|
@ -1,119 +0,0 @@
|
|||||||
validate-ha
|
|
||||||
===========
|
|
||||||
|
|
||||||
This role acts on an already deployed tripleo environment, testing HA related
|
|
||||||
functionalities of the installation.
|
|
||||||
|
|
||||||
Requirements
|
|
||||||
------------
|
|
||||||
|
|
||||||
The TripleO environment must be prepared as described [here](https://github.com/openstack/tripleo-ha-utils/tree/master/README.md).
|
|
||||||
|
|
||||||
This role tests also instances spawning and to make this working the
|
|
||||||
definition of the floating network must be passed.
|
|
||||||
It can be contained in a config file, like this:
|
|
||||||
|
|
||||||
private_network_cidr: "192.168.1.0/24"
|
|
||||||
public_physical_network: "floating"
|
|
||||||
floating_ip_cidr: "10.0.0.0/24"
|
|
||||||
public_net_pool_start: "10.0.0.191"
|
|
||||||
public_net_pool_end: "10.0.0.198"
|
|
||||||
public_net_gateway: "10.0.0.254"
|
|
||||||
|
|
||||||
Or passed directly to the ansible command line (see examples below).
|
|
||||||
|
|
||||||
HA tests
|
|
||||||
--------
|
|
||||||
|
|
||||||
HA tests are meant to check the behavior of the environment in front of
|
|
||||||
circumstances that involve service interruption, lost of a node and in general
|
|
||||||
actions that stress the OpenStack installation with unexpected failures.
|
|
||||||
Each test is associated to a global variable that, if true, makes the test
|
|
||||||
happen.
|
|
||||||
Tests are grouped and performed by default depending on the OpenStack release.
|
|
||||||
This is the list of the supported variables, with test description and name of
|
|
||||||
the release on which the test is performed:
|
|
||||||
|
|
||||||
- **test_ha_failed_actions**: Look for failed actions (**all**)
|
|
||||||
- **test_ha_master_slave**: Stop master slave resources (galera and redis), all
|
|
||||||
the resources should come down (**all**)
|
|
||||||
- **test_ha_keystone_constraint_removal**: Stop keystone resource (by stopping
|
|
||||||
httpd), check no other resource is stopped (**mitaka**)
|
|
||||||
- Next generation cluster checks (**newton**, **ocata**, **master**):
|
|
||||||
- **test_ha_ng_a**: Stop every systemd resource, stop Galera and Rabbitmq,
|
|
||||||
Start every systemd resource
|
|
||||||
- **test_ha_ng_b**: Stop Galera and Rabbitmq, stop every systemd resource,
|
|
||||||
Start every systemd resource
|
|
||||||
- **test_ha_ng_c**: Stop Galera and Rabbitmq, wait 20 minutes to see if
|
|
||||||
something fails
|
|
||||||
|
|
||||||
It is also possible to omit (or add) tests not made for the specific release,
|
|
||||||
using the above vars, by passing to the command line variables like this:
|
|
||||||
|
|
||||||
...
|
|
||||||
-e test_ha_failed_actions=false \
|
|
||||||
-e test_ha_ng_a=true \
|
|
||||||
...
|
|
||||||
|
|
||||||
In this case we will not check for failed actions, a test that otherwise would
|
|
||||||
have been done in mitaka, and we will force the execution of the "ng_a" test
|
|
||||||
described earlier, which is originally executed just in newton versions or
|
|
||||||
above.
|
|
||||||
|
|
||||||
All tests are performed using the tool [ha-test-suite](https://github.com/openstack/tripleo-ha-utils/tree/master/tools/ha-test-suite).
|
|
||||||
|
|
||||||
Applying latency
|
|
||||||
----------------
|
|
||||||
|
|
||||||
It is possible to add an arbitrary amount of milliseconds of latency on each
|
|
||||||
overcloud node to check whether the environment can pass the HA validation in
|
|
||||||
any case.
|
|
||||||
Adding the latency will be a matter of passing two variables:
|
|
||||||
|
|
||||||
* **latency_ms**: which will be the number of additional milliseconds to be
|
|
||||||
added to the interface;
|
|
||||||
* **latency_eth_interface**: the physical interface to which the user wants to
|
|
||||||
apply the latency, this must be present in all the overcloud nodes;
|
|
||||||
|
|
||||||
So a typical command line in which a user wants to add 20ms of latency on the
|
|
||||||
ethernet device eth0 will contain something like this:
|
|
||||||
|
|
||||||
...
|
|
||||||
-e latency_ms=20 \
|
|
||||||
-e latency_eth_interface=eth0 \
|
|
||||||
...
|
|
||||||
|
|
||||||
The latency will be applied before the tests execution and remove right after.
|
|
||||||
|
|
||||||
Examples on how to invoke the playbook via ansible
|
|
||||||
--------------------------------------------------
|
|
||||||
|
|
||||||
Here's a way to invoke the tests from an *undercloud* machine prepared as
|
|
||||||
described [here](https://github.com/openstack/tripleo-ha-utils/tree/master/README.md).
|
|
||||||
|
|
||||||
ansible-playbook /home/stack/tripleo-ha-utils/playbooks/overcloud-validate-ha.yml \
|
|
||||||
-e release=ocata \
|
|
||||||
-e local_working_dir=/home/stack \
|
|
||||||
-e private_net_cidr="192.168.1.0/24" \
|
|
||||||
-e public_physical_network="floating" \
|
|
||||||
-e floating_ip_cidr="10.0.0.0/24" \
|
|
||||||
-e public_net_pool_start="10.0.0.191" \
|
|
||||||
-e public_net_pool_end="10.0.0.198" \
|
|
||||||
-e public_net_gateway="10.0.0.254"
|
|
||||||
|
|
||||||
Note that the variables above can be declared inside a config.yml file that can
|
|
||||||
be passed to the ansible-playbook command like this:
|
|
||||||
|
|
||||||
ansible-playbook -vvvv /home/stack/tripleo-ha-utils/playbooks/overcloud-validate-ha.yml -e @/home/stack/config.yml
|
|
||||||
|
|
||||||
The result will be the same.
|
|
||||||
|
|
||||||
License
|
|
||||||
-------
|
|
||||||
|
|
||||||
GPL
|
|
||||||
|
|
||||||
Author Information
|
|
||||||
------------------
|
|
||||||
|
|
||||||
Raoul Scarazzini <rasca@redhat.com>
|
|
@ -1,25 +0,0 @@
|
|||||||
---
|
|
||||||
|
|
||||||
working_dir: "/home/stack"
|
|
||||||
validate_ha_logs_dir: "{{ working_dir }}/validate_ha_logs"
|
|
||||||
overcloud_working_dir: "/home/heat-admin"
|
|
||||||
|
|
||||||
validate_ha_heat_environment: "validate-ha-heat-environment.yaml.j2"
|
|
||||||
validate_ha_heat_template: "validate-ha-heat-template.yaml.j2"
|
|
||||||
validate_ha_heat_instance_image_format: "qcow2"
|
|
||||||
validate_ha_heat_instance_image_location: "http://download.cirros-cloud.net/0.4.0/cirros-0.4.0-x86_64-disk.img"
|
|
||||||
validate_ha_heat_instance_volume_gb: 1
|
|
||||||
|
|
||||||
private_net_name: "private-network"
|
|
||||||
private_subnet_name: "private-subnet"
|
|
||||||
public_net_name: "public-network"
|
|
||||||
public_subnet_name: "public-subnet"
|
|
||||||
private_net_cidr: "10.1.1.0/24"
|
|
||||||
public_physical_network: "datacentre"
|
|
||||||
public_network_type: "flat"
|
|
||||||
floating_ip_cidr: "{{ undercloud_network_cidr|default('192.0.2.0/24') }}"
|
|
||||||
floating_ip_start: "{{ floating_ip_cidr|nthhost(100) }}"
|
|
||||||
floating_ip_end: "{{ floating_ip_cidr|nthhost(120) }}"
|
|
||||||
external_network_gateway: "{{ floating_ip_cidr|nthhost(1) }}"
|
|
||||||
|
|
||||||
latency_ms: 0
|
|
@ -1,26 +0,0 @@
|
|||||||
---
|
|
||||||
|
|
||||||
# Execute ha-test-suite test
|
|
||||||
- block:
|
|
||||||
- name: Testing {{ ha_test_name }} with recovery {{ ha_recovery_name }}"
|
|
||||||
delegate_to: "{{ groups.controller[0] }}"
|
|
||||||
shell: >
|
|
||||||
{{ overcloud_working_dir }}/ha-test-suite/ha-test-suite.sh \
|
|
||||||
-t {{ overcloud_working_dir }}/ha-test-suite/test/{{ ha_test_name }} \
|
|
||||||
-r {{ overcloud_working_dir }}/ha-test-suite/recovery/{{ ha_recovery_name }}
|
|
||||||
register: ha_test_cmd
|
|
||||||
|
|
||||||
- include_tasks: heat-validation-create.yml
|
|
||||||
- include_tasks: heat-validation-check.yml
|
|
||||||
- include_tasks: heat-validation-delete.yml
|
|
||||||
|
|
||||||
vars:
|
|
||||||
stack_name: "stack_{{ ha_test_name }}"
|
|
||||||
|
|
||||||
always:
|
|
||||||
- name: Copy stdout for test {{ ha_test_name }} to undercloud
|
|
||||||
copy: content="{{ ha_test_cmd.stdout }}" dest="{{ validate_ha_logs_dir }}/{{ ha_test_name }}_stdout.log"
|
|
||||||
rescue:
|
|
||||||
- name: Copy stderr for test {{ ha_test_name }} to undercloud
|
|
||||||
copy: content="{{ ha_test_cmd.stderr }}" dest="{{ validate_ha_logs_dir }}/{{ ha_test_name }}_stderr.log"
|
|
||||||
- fail: msg="{{ ha_test_cmd.stderr }}"
|
|
@ -1,7 +0,0 @@
|
|||||||
---
|
|
||||||
|
|
||||||
- name: Wait up to five minutes for the instance to be reachable
|
|
||||||
wait_for:
|
|
||||||
host: "{{ vars[ stack_name + '_instance_ip'].stdout }}"
|
|
||||||
port: 22
|
|
||||||
timeout: 300
|
|
@ -1,30 +0,0 @@
|
|||||||
---
|
|
||||||
|
|
||||||
- name: Load image in Glance to be used by Heat
|
|
||||||
shell: |
|
|
||||||
source {{ working_dir }}/overcloudrc
|
|
||||||
openstack image create \
|
|
||||||
--disk-format {{ validate_ha_heat_instance_image_format }} \
|
|
||||||
--file {{ working_dir }}/{{ heat_image_name }} \
|
|
||||||
--format value \
|
|
||||||
--column "id" \
|
|
||||||
validate_ha_image > \
|
|
||||||
{{ validate_ha_logs_dir }}/{{ ha_test_name }}_image-create.log 2>&1
|
|
||||||
|
|
||||||
- name: Execute environment validation via Heat
|
|
||||||
shell: |
|
|
||||||
source {{ working_dir }}/overcloudrc
|
|
||||||
openstack stack create \
|
|
||||||
--environment validate-ha-heat-environment.yaml \
|
|
||||||
--template validate-ha-heat-template.yaml \
|
|
||||||
--wait \
|
|
||||||
{{ stack_name }} > \
|
|
||||||
{{ validate_ha_logs_dir }}/{{ ha_test_name }}_heat-create.log 2>&1
|
|
||||||
|
|
||||||
- name: Get instance IP
|
|
||||||
shell: |
|
|
||||||
source {{ working_dir }}/overcloudrc
|
|
||||||
openstack stack show -c outputs -f json {{ stack_name }} | \
|
|
||||||
jq --raw-output '.outputs[] | select( .output_key == "server_public_ip") | .output_value' 2>&1 | \
|
|
||||||
tee {{ validate_ha_logs_dir }}/{{ ha_test_name }}_heat-instance-ip.log
|
|
||||||
register: "{{ stack_name }}_instance_ip"
|
|
@ -1,16 +0,0 @@
|
|||||||
---
|
|
||||||
|
|
||||||
- name: Clean the created stack
|
|
||||||
shell: |
|
|
||||||
source {{ working_dir }}/overcloudrc
|
|
||||||
openstack stack delete \
|
|
||||||
--yes \
|
|
||||||
--wait \
|
|
||||||
{{ stack_name }} > \
|
|
||||||
{{ validate_ha_logs_dir }}/{{ ha_test_name }}_heat-delete.log 2>&1
|
|
||||||
|
|
||||||
- name: Clean image in Glance
|
|
||||||
shell: |
|
|
||||||
source {{ working_dir }}/overcloudrc
|
|
||||||
openstack image delete validate_ha_image > \
|
|
||||||
{{ validate_ha_logs_dir }}/{{ ha_test_name }}_image-delete.log 2>&1
|
|
@ -1,147 +0,0 @@
|
|||||||
---
|
|
||||||
|
|
||||||
- name: Include test sequence depending on release
|
|
||||||
include_vars:
|
|
||||||
dir: "vars"
|
|
||||||
files_matching: "test_list_{{ release }}.yml"
|
|
||||||
|
|
||||||
- name: Create directory on the undercloud to store test results
|
|
||||||
file: path={{ validate_ha_logs_dir }} state=directory
|
|
||||||
|
|
||||||
- name: Copy ha-test-suite on controllers
|
|
||||||
shell: >
|
|
||||||
{% if (undercloud_user == 'zuul') and (zuul.projects is defined) -%}
|
|
||||||
/usr/bin/rsync --delay-updates -F --compress --archive -e 'ssh -F {{ local_working_dir }}/ssh.config.ansible' /home/{{ undercloud_user }}/src/opendev.org/openstack/tripleo-ha-utils/tools/ha-test-suite {{ hostvars[item]['ansible_hostname'] }}:
|
|
||||||
{%- else -%}
|
|
||||||
/usr/bin/rsync --delay-updates -F --compress --archive -e 'ssh -F {{ local_working_dir }}/ssh.config.ansible' {{ local_working_dir }}/tripleo-ha-utils/tools/ha-test-suite {{ hostvars[item]['ansible_hostname'] }}:
|
|
||||||
{%- endif -%}
|
|
||||||
delegate_to: "localhost"
|
|
||||||
with_items:
|
|
||||||
- "{{ groups['controller'] }}"
|
|
||||||
|
|
||||||
- name: Apply latency (if defined)
|
|
||||||
vars:
|
|
||||||
latency_action: "add"
|
|
||||||
include_tasks: manage-latency.yml
|
|
||||||
when: latency_ms|int > 0
|
|
||||||
|
|
||||||
- name: Create the environment template on undercloud
|
|
||||||
template:
|
|
||||||
src: "{{ validate_ha_heat_environment }}"
|
|
||||||
dest: "{{ working_dir }}/validate-ha-heat-environment.yaml"
|
|
||||||
mode: 0600
|
|
||||||
|
|
||||||
- name: Create the test template on undercloud
|
|
||||||
template:
|
|
||||||
src: "{{ validate_ha_heat_template }}"
|
|
||||||
dest: "{{ working_dir }}/validate-ha-heat-template.yaml"
|
|
||||||
mode: 0600
|
|
||||||
|
|
||||||
- name: Download and uncompress (if necessary) image file for Heat
|
|
||||||
shell: |
|
|
||||||
image_url="{{ validate_ha_heat_instance_image_location }}"
|
|
||||||
image_file=$(basename $image_url)
|
|
||||||
|
|
||||||
curl -s -o $image_file $image_url
|
|
||||||
|
|
||||||
case "$image_file" in
|
|
||||||
*.tar)
|
|
||||||
image_name=$(tar xvf $image_file)
|
|
||||||
;;
|
|
||||||
*.tar.gz|*.tgz)
|
|
||||||
image_name=$(tar xzvf $image_file)
|
|
||||||
;;
|
|
||||||
*.tar.bz2|*.tbz2)
|
|
||||||
image_name=$(tar xjvf $image_file)
|
|
||||||
;;
|
|
||||||
*.tar.xz|*.txz)
|
|
||||||
image_name=$(tar xJf $image_file)
|
|
||||||
;;
|
|
||||||
*.bz2)
|
|
||||||
bunzip2 --force --quiet $image_file
|
|
||||||
image_name=${image_file%.*};
|
|
||||||
;;
|
|
||||||
*.gz)
|
|
||||||
gunzip --force --quiet $image_file
|
|
||||||
image_name=${image_file%.*};
|
|
||||||
;;
|
|
||||||
*.xz)
|
|
||||||
xz --force --quiet --decompress $image_file
|
|
||||||
image_name=${image_file%.*};
|
|
||||||
;;
|
|
||||||
*) image_name=$image_file
|
|
||||||
;;
|
|
||||||
esac
|
|
||||||
|
|
||||||
echo $image_name
|
|
||||||
register: image_name
|
|
||||||
|
|
||||||
- set_fact:
|
|
||||||
heat_image_name: "{{ image_name.stdout }}"
|
|
||||||
|
|
||||||
# Test: failed actions
|
|
||||||
- name: HA test - Failed actions
|
|
||||||
vars:
|
|
||||||
ha_test_name: "test_ha_failed_actions"
|
|
||||||
ha_recovery_name: ""
|
|
||||||
include_tasks: ha-test-suite.yml
|
|
||||||
when: test_ha_failed_actions|bool
|
|
||||||
|
|
||||||
# Test: Master/Slave
|
|
||||||
- name: HA test - Master/Slave core resource stop and start
|
|
||||||
vars:
|
|
||||||
ha_test_name: "test_master-slave"
|
|
||||||
ha_recovery_name: "recovery_master-slave"
|
|
||||||
include_tasks: ha-test-suite.yml
|
|
||||||
when: test_ha_master_slave|bool
|
|
||||||
|
|
||||||
# Test: Keystone stop
|
|
||||||
- name: HA test - Keystone stop
|
|
||||||
vars:
|
|
||||||
ha_test_name: "test_keystone-stop"
|
|
||||||
ha_recovery_name: "recovery_keystone-stop"
|
|
||||||
include_tasks: ha-test-suite.yml
|
|
||||||
when: test_ha_keystone_stop|bool
|
|
||||||
|
|
||||||
# Test: Keystone removal
|
|
||||||
- name: HA test - Keystone constraint removal
|
|
||||||
vars:
|
|
||||||
ha_test_name: "test_keystone-constraint-removal"
|
|
||||||
ha_recovery_name: "recovery_keystone-constraint-removal"
|
|
||||||
include_tasks: ha-test-suite.yml
|
|
||||||
when: test_ha_keystone_constraint_removal|bool
|
|
||||||
|
|
||||||
# Test: NG A
|
|
||||||
- name: HA test - Pacemaker light test A
|
|
||||||
vars:
|
|
||||||
ha_test_name: "test_pacemaker-light-a"
|
|
||||||
ha_recovery_name: "recovery_pacemaker-light"
|
|
||||||
include_tasks: ha-test-suite.yml
|
|
||||||
when: test_ha_ng_a|bool
|
|
||||||
|
|
||||||
# Test: NG B
|
|
||||||
- name: HA test - Pacemaker light test B
|
|
||||||
vars:
|
|
||||||
ha_test_name: "test_pacemaker-light-b"
|
|
||||||
ha_recovery_name: "recovery_pacemaker-light"
|
|
||||||
include_tasks: ha-test-suite.yml
|
|
||||||
when: test_ha_ng_b|bool
|
|
||||||
|
|
||||||
# Test: NG C
|
|
||||||
- name: HA test - Pacemaker light test C
|
|
||||||
vars:
|
|
||||||
ha_test_name: "test_pacemaker-light-c"
|
|
||||||
ha_recovery_name: "recovery_pacemaker-light"
|
|
||||||
include_tasks: ha-test-suite.yml
|
|
||||||
when: test_ha_ng_c|bool
|
|
||||||
|
|
||||||
- name: Remove image file
|
|
||||||
file:
|
|
||||||
path: "{{ working_dir }}/{{ heat_image_name }}"
|
|
||||||
state: absent
|
|
||||||
|
|
||||||
- name: Remove latency (if defined)
|
|
||||||
vars:
|
|
||||||
latency_action: "del"
|
|
||||||
include_tasks: manage-latency.yml
|
|
||||||
when: latency_ms|int > 0
|
|
@ -1,12 +0,0 @@
|
|||||||
# Manage latency on all nodes
|
|
||||||
- name: "Manage latency on all nodes"
|
|
||||||
shell: |
|
|
||||||
/usr/sbin/tc qdisc {{ latency_action }} dev {{ latency_eth_interface }} root netem delay {{ latency_ms }}ms
|
|
||||||
delegate_to: "{{ item }}"
|
|
||||||
become: true
|
|
||||||
with_items:
|
|
||||||
- "{{ groups['overcloud'] }}"
|
|
||||||
when:
|
|
||||||
- latency_action in [ "add", "del" ]
|
|
||||||
- latency_eth_interface is defined
|
|
||||||
- latency_ms|int > 0
|
|
@ -1,13 +0,0 @@
|
|||||||
# Heat template parameters
|
|
||||||
parameters:
|
|
||||||
private_net_name: "{{ private_net_name }}"
|
|
||||||
private_subnet_name: "{{ private_subnet_name }}"
|
|
||||||
private_net_cidr: "{{ private_net_cidr }}"
|
|
||||||
public_net_name: "{{ public_net_name }}"
|
|
||||||
public_subnet_name: "{{ public_subnet_name }}"
|
|
||||||
public_physical_network: "{{ public_physical_network }}"
|
|
||||||
public_network_type: "{{ public_network_type }}"
|
|
||||||
public_net_cidr: "{{ floating_ip_cidr }}"
|
|
||||||
public_net_gateway: "{{ public_net_gateway }}"
|
|
||||||
public_net_pool_start: "{{ public_net_pool_start }}"
|
|
||||||
public_net_pool_end: "{{ public_net_pool_end }}"
|
|
@ -1,192 +0,0 @@
|
|||||||
heat_template_version: 2016-10-14
|
|
||||||
description: spawning a server
|
|
||||||
|
|
||||||
parameters:
|
|
||||||
private_net_name:
|
|
||||||
type: string
|
|
||||||
default: "private"
|
|
||||||
description: Name of private network into which servers get deployed
|
|
||||||
private_subnet_name:
|
|
||||||
type: string
|
|
||||||
default: private_subnet
|
|
||||||
description: Name of private subnet into which servers get deployed
|
|
||||||
private_net_cidr:
|
|
||||||
type: string
|
|
||||||
description: Private network address (CIDR notation)
|
|
||||||
public_physical_network:
|
|
||||||
type: string
|
|
||||||
default: "datacentre"
|
|
||||||
description: Physical network name
|
|
||||||
public_network_type:
|
|
||||||
type: string
|
|
||||||
default: "flat"
|
|
||||||
description: Type of the physical network (flat or vlan)
|
|
||||||
constraints:
|
|
||||||
- allowed_values:
|
|
||||||
- vlan
|
|
||||||
- flat
|
|
||||||
public_net_name:
|
|
||||||
type: string
|
|
||||||
default: public
|
|
||||||
description: Name of public network into which servers get deployed
|
|
||||||
public_subnet_name:
|
|
||||||
type: string
|
|
||||||
default: public_subnet
|
|
||||||
description: Name of public subnet into which servers get deployed
|
|
||||||
public_net_cidr:
|
|
||||||
type: string
|
|
||||||
description: Public network address (CIDR notation)
|
|
||||||
public_net_gateway:
|
|
||||||
type: string
|
|
||||||
description: Public network gateway address
|
|
||||||
public_net_pool_start:
|
|
||||||
type: string
|
|
||||||
description: Start of public network IP address allocation pool
|
|
||||||
public_net_pool_end:
|
|
||||||
type: string
|
|
||||||
description: End of public network IP address allocation pool
|
|
||||||
|
|
||||||
resources:
|
|
||||||
|
|
||||||
###########
|
|
||||||
# Network #
|
|
||||||
###########
|
|
||||||
|
|
||||||
private_net:
|
|
||||||
type: OS::Neutron::Net
|
|
||||||
properties:
|
|
||||||
name: { get_param: private_net_name }
|
|
||||||
|
|
||||||
private_subnet:
|
|
||||||
type: OS::Neutron::Subnet
|
|
||||||
properties:
|
|
||||||
name: { get_param: private_subnet_name }
|
|
||||||
network_id: { get_resource: private_net }
|
|
||||||
cidr: { get_param: private_net_cidr }
|
|
||||||
|
|
||||||
public_net:
|
|
||||||
type: OS::Neutron::ProviderNet
|
|
||||||
properties:
|
|
||||||
name: { get_param: public_net_name }
|
|
||||||
router_external: true
|
|
||||||
physical_network: { get_param: public_physical_network }
|
|
||||||
network_type: { get_param: public_network_type }
|
|
||||||
|
|
||||||
public_subnet:
|
|
||||||
type: OS::Neutron::Subnet
|
|
||||||
properties:
|
|
||||||
name: { get_param: public_subnet_name }
|
|
||||||
network_id: { get_resource: public_net }
|
|
||||||
cidr: { get_param: public_net_cidr }
|
|
||||||
gateway_ip: { get_param: public_net_gateway }
|
|
||||||
allocation_pools:
|
|
||||||
- start: { get_param: public_net_pool_start }
|
|
||||||
end: { get_param: public_net_pool_end }
|
|
||||||
|
|
||||||
router:
|
|
||||||
type: OS::Neutron::Router
|
|
||||||
properties:
|
|
||||||
external_gateway_info:
|
|
||||||
network: { get_resource: public_net }
|
|
||||||
|
|
||||||
router_interface:
|
|
||||||
type: OS::Neutron::RouterInterface
|
|
||||||
properties:
|
|
||||||
router_id: { get_resource: router }
|
|
||||||
subnet_id: { get_resource: private_subnet }
|
|
||||||
|
|
||||||
public_net_port:
|
|
||||||
type: OS::Neutron::Port
|
|
||||||
properties:
|
|
||||||
network: { get_resource: private_net }
|
|
||||||
fixed_ips:
|
|
||||||
- subnet: { get_resource: private_subnet }
|
|
||||||
security_groups: [{ get_resource: public_security_group }]
|
|
||||||
|
|
||||||
public_floating_ip:
|
|
||||||
type: OS::Neutron::FloatingIP
|
|
||||||
properties:
|
|
||||||
floating_network: { get_resource: public_net }
|
|
||||||
port_id: { get_resource: public_net_port }
|
|
||||||
|
|
||||||
public_security_group:
|
|
||||||
type: OS::Neutron::SecurityGroup
|
|
||||||
properties:
|
|
||||||
description: Add security group rules for the multi-tier architecture
|
|
||||||
name: pingandssh
|
|
||||||
rules:
|
|
||||||
- remote_ip_prefix: 0.0.0.0/0
|
|
||||||
protocol: tcp
|
|
||||||
port_range_min: 22
|
|
||||||
port_range_max: 22
|
|
||||||
- remote_ip_prefix: 0.0.0.0/0
|
|
||||||
protocol: tcp
|
|
||||||
port_range_min: 80
|
|
||||||
port_range_max: 80
|
|
||||||
- remote_ip_prefix: 0.0.0.0/0
|
|
||||||
protocol: icmp
|
|
||||||
|
|
||||||
###########
|
|
||||||
# Volume #
|
|
||||||
###########
|
|
||||||
|
|
||||||
instance_volume:
|
|
||||||
type: OS::Cinder::Volume
|
|
||||||
properties:
|
|
||||||
name: "instance_volume"
|
|
||||||
size: {{ validate_ha_heat_instance_volume_gb }}
|
|
||||||
image: "validate_ha_image"
|
|
||||||
|
|
||||||
###########
|
|
||||||
# Keypair #
|
|
||||||
###########
|
|
||||||
|
|
||||||
instance_keypair:
|
|
||||||
type: OS::Nova::KeyPair
|
|
||||||
properties:
|
|
||||||
name: "instance_keypair"
|
|
||||||
save_private_key: "true"
|
|
||||||
|
|
||||||
###########
|
|
||||||
# Flavor #
|
|
||||||
###########
|
|
||||||
|
|
||||||
instance_flavor:
|
|
||||||
type: OS::Nova::Flavor
|
|
||||||
properties:
|
|
||||||
name: "instance_flavor"
|
|
||||||
ephemeral: 0
|
|
||||||
ram: 2048
|
|
||||||
disk: 10
|
|
||||||
vcpus: 2
|
|
||||||
|
|
||||||
###########
|
|
||||||
# Server #
|
|
||||||
###########
|
|
||||||
|
|
||||||
instance:
|
|
||||||
type: OS::Nova::Server
|
|
||||||
properties:
|
|
||||||
name: "validate_ha_instance"
|
|
||||||
flavor: { get_resource: instance_flavor }
|
|
||||||
key_name: { get_resource: instance_keypair }
|
|
||||||
networks:
|
|
||||||
- port: { get_resource: public_net_port }
|
|
||||||
block_device_mapping: [{ device_name: "vda", volume_id : { get_resource : instance_volume }, delete_on_termination : "true" }]
|
|
||||||
|
|
||||||
outputs:
|
|
||||||
server_private_ip:
|
|
||||||
description: IP address of first web server in private network
|
|
||||||
value: { get_attr: [ instance, first_address ] }
|
|
||||||
|
|
||||||
server_public_ip:
|
|
||||||
description: Floating IP address of the web server
|
|
||||||
value: { get_attr: [ public_floating_ip, floating_ip_address ] }
|
|
||||||
|
|
||||||
public_key:
|
|
||||||
description: The public key of the keypair.
|
|
||||||
value: { get_attr: [instance_keypair, public_key] }
|
|
||||||
|
|
||||||
private_key:
|
|
||||||
description: The private key of the keypair.
|
|
||||||
value: { get_attr: [instance_keypair, private_key] }
|
|
@ -1,7 +0,0 @@
|
|||||||
test_ha_failed_actions: true
|
|
||||||
test_ha_master_slave: true
|
|
||||||
test_ha_keystone_stop: true
|
|
||||||
test_ha_keystone_constraint_removal: false
|
|
||||||
test_ha_ng_a: false
|
|
||||||
test_ha_ng_b: false
|
|
||||||
test_ha_ng_c: false
|
|
@ -1 +0,0 @@
|
|||||||
test_list_rocky.yml
|
|
@ -1,7 +0,0 @@
|
|||||||
test_ha_failed_actions: true
|
|
||||||
test_ha_master_slave: true
|
|
||||||
test_ha_keystone_stop: false
|
|
||||||
test_ha_keystone_constraint_removal: true
|
|
||||||
test_ha_ng_a: false
|
|
||||||
test_ha_ng_b: false
|
|
||||||
test_ha_ng_c: false
|
|
@ -1,7 +0,0 @@
|
|||||||
test_ha_failed_actions: true
|
|
||||||
test_ha_master_slave: true
|
|
||||||
test_ha_keystone_stop: false
|
|
||||||
test_ha_keystone_constraint_removal: false
|
|
||||||
test_ha_ng_a: true
|
|
||||||
test_ha_ng_b: true
|
|
||||||
test_ha_ng_c: true
|
|
@ -1,7 +0,0 @@
|
|||||||
test_ha_failed_actions: true
|
|
||||||
test_ha_master_slave: true
|
|
||||||
test_ha_keystone_stop: false
|
|
||||||
test_ha_keystone_constraint_removal: false
|
|
||||||
test_ha_ng_a: true
|
|
||||||
test_ha_ng_b: true
|
|
||||||
test_ha_ng_c: true
|
|
@ -1,7 +0,0 @@
|
|||||||
test_ha_failed_actions: true
|
|
||||||
test_ha_master_slave: true
|
|
||||||
test_ha_keystone_stop: false
|
|
||||||
test_ha_keystone_constraint_removal: false
|
|
||||||
test_ha_ng_a: true
|
|
||||||
test_ha_ng_b: true
|
|
||||||
test_ha_ng_c: true
|
|
@ -1,7 +0,0 @@
|
|||||||
test_ha_failed_actions: true
|
|
||||||
test_ha_master_slave: true
|
|
||||||
test_ha_keystone_stop: false
|
|
||||||
test_ha_keystone_constraint_removal: false
|
|
||||||
test_ha_ng_a: true
|
|
||||||
test_ha_ng_b: true
|
|
||||||
test_ha_ng_c: true
|
|
@ -1 +0,0 @@
|
|||||||
test_list_newton.yml
|
|
@ -1 +0,0 @@
|
|||||||
test_list_ocata.yml
|
|
@ -1 +0,0 @@
|
|||||||
test_list_pike.yml
|
|
@ -1 +0,0 @@
|
|||||||
test_list_queens.yml
|
|
@ -1 +0,0 @@
|
|||||||
test_list_liberty.yml
|
|
@ -1 +0,0 @@
|
|||||||
test_list_mitaka.yml
|
|
@ -1,7 +0,0 @@
|
|||||||
test_ha_failed_actions: true
|
|
||||||
test_ha_master_slave: true
|
|
||||||
test_ha_keystone_stop: false
|
|
||||||
test_ha_keystone_constraint_removal: false
|
|
||||||
test_ha_ng_a: true
|
|
||||||
test_ha_ng_b: true
|
|
||||||
test_ha_ng_c: true
|
|
38
setup.cfg
38
setup.cfg
@ -1,38 +0,0 @@
|
|||||||
[metadata]
|
|
||||||
name = tripleo-ha-utils
|
|
||||||
summary = Give a set of tools to test TripleO HA capabilities
|
|
||||||
description_file =
|
|
||||||
long_description_content_type = text/markdown
|
|
||||||
README.md
|
|
||||||
author = Raoul Scarazzini
|
|
||||||
author_email = rasca@redhat.com
|
|
||||||
home_page = https://github.com/openstack/tripleo-ha-utils/
|
|
||||||
classifier =
|
|
||||||
License :: OSI Approved :: Apache Software License
|
|
||||||
Development Status :: 4 - Beta
|
|
||||||
Intended Audience :: Developers
|
|
||||||
Intended Audience :: System Administrators
|
|
||||||
Intended Audience :: Information Technology
|
|
||||||
Topic :: Utilities
|
|
||||||
|
|
||||||
[build_sphinx]
|
|
||||||
all_files = 1
|
|
||||||
build-dir = doc/build
|
|
||||||
source-dir = doc/source
|
|
||||||
|
|
||||||
[global]
|
|
||||||
setup-hooks =
|
|
||||||
pbr.hooks.setup_hook
|
|
||||||
|
|
||||||
[files]
|
|
||||||
data_files =
|
|
||||||
config = config/*
|
|
||||||
playbooks = playbooks/*
|
|
||||||
usr/local/share/ansible/roles = roles/*
|
|
||||||
|
|
||||||
[wheel]
|
|
||||||
universal = 1
|
|
||||||
|
|
||||||
[pbr]
|
|
||||||
skip_authors = True
|
|
||||||
skip_changelog = True
|
|
20
setup.py
20
setup.py
@ -1,20 +0,0 @@
|
|||||||
# Copyright Red Hat, Inc. All Rights Reserved.
|
|
||||||
#
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
|
||||||
# not use this file except in compliance with the License. You may obtain
|
|
||||||
# a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
|
||||||
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
|
||||||
# License for the specific language governing permissions and limitations
|
|
||||||
# under the License.
|
|
||||||
|
|
||||||
import setuptools
|
|
||||||
|
|
||||||
setuptools.setup(
|
|
||||||
setup_requires=['pbr'],
|
|
||||||
py_modules=[],
|
|
||||||
pbr=True)
|
|
@ -1,145 +0,0 @@
|
|||||||
# OpenStack TripleO HA Test Suite
|
|
||||||
|
|
||||||
This project is a modular and a customizable test suite to be applied in an
|
|
||||||
Overcloud OpenStack environment deployed via TripleO upstream or Red Hat
|
|
||||||
OpenStack Director (OSPd).
|
|
||||||
|
|
||||||
## Usage
|
|
||||||
|
|
||||||
The script needs at least a test file (-t) which must contain the sequence of
|
|
||||||
the operations to be done. A recovery file (-r), with the sequence of the
|
|
||||||
operations needed to recovery the environment can also be passed. So a typical
|
|
||||||
invocation will be something like this:
|
|
||||||
|
|
||||||
```console
|
|
||||||
[heat-admin@overcloud-controller-0 overcloud-ha-test-suite]$ ./overcloud-ha-test-suite.sh -t test/test_keystone-constraint-removal -r recovery/recovery_keystone-constraint-removal
|
|
||||||
Fri May 20 15:27:19 UTC 2016 - Populationg overcloud elements...OK
|
|
||||||
Fri May 20 15:27:22 UTC 2016 - Test: Stop keystone resource (by stopping httpd), check no other resource is stopped
|
|
||||||
Fri May 20 15:27:22 UTC 2016 * Step 1: disable keystone resource via httpd stop
|
|
||||||
Fri May 20 15:27:22 UTC 2016 - Performing action disable on resource httpd ..OK
|
|
||||||
Fri May 20 15:27:26 UTC 2016 - List of cluster's failed actions:
|
|
||||||
Cluster is OK.
|
|
||||||
Fri May 20 15:27:29 UTC 2016 * Step 2: check resource status
|
|
||||||
Fri May 20 15:27:29 UTC 2016 - Cycling for 10 minutes polling every minute the status of the resources
|
|
||||||
Fri May 20 15:28:29 UTC 2016 - Polling...
|
|
||||||
delay -> OK
|
|
||||||
galera -> OK
|
|
||||||
...
|
|
||||||
...
|
|
||||||
openstack-sahara-engine -> OK
|
|
||||||
rabbitmq -> OK
|
|
||||||
redis -> OK
|
|
||||||
Fri May 20 15:41:00 UTC 2016 - List of cluster's failed actions:
|
|
||||||
Cluster is OK.
|
|
||||||
Fri May 20 15:41:03 UTC 2016 - Waiting 10 seconds to recover environment
|
|
||||||
Fri May 20 15:41:13 UTC 2016 - Recovery: Enable keystone via httpd and check for failed actions
|
|
||||||
Fri May 20 15:41:13 UTC 2016 * Step 1: enable keystone resource via httpd
|
|
||||||
Fri May 20 15:41:13 UTC 2016 - Performing action enable on resource httpd-clone OK
|
|
||||||
Fri May 20 15:41:15 UTC 2016 - List of cluster's failed actions:
|
|
||||||
Cluster is OK.
|
|
||||||
Fri May 20 15:41:17 UTC 2016 - End
|
|
||||||
```
|
|
||||||
|
|
||||||
The exit status will depend on the result of the operations. If a disable
|
|
||||||
operation fails, if failed actions will appear, if recovery does not ends with
|
|
||||||
success exit status will not be 0.
|
|
||||||
|
|
||||||
## Test and recoveries
|
|
||||||
|
|
||||||
Test and recovery are bash script portions that are
|
|
||||||
included inside the main script. Some functions and variables are available to
|
|
||||||
help on recurring operations. These functions are listed here:
|
|
||||||
|
|
||||||
- **check_failed_actions**: will print failed actions and return error in case
|
|
||||||
some of them are present;
|
|
||||||
- **check_resources_process_status**: will check for the process status of the
|
|
||||||
resources on the system (not in the cluster), i.e. will check if there is a
|
|
||||||
process for mysql daemon;
|
|
||||||
- **wait_resource_status**: will wail until a default timeout
|
|
||||||
($RESOURCE_CHANGE_STATUS_TIMEOUT) for a resource to reach a status;
|
|
||||||
- **check_resource_status**: will check a resource status, i.e. if you want to
|
|
||||||
check if httpd resource is started;
|
|
||||||
- **wait_cluster_start**: will wait the until a timeout
|
|
||||||
($RESOURCE_CHANGE_STATUS_TIMEOUT) to be started, specifically will wait for
|
|
||||||
all resources to be in state "Started";
|
|
||||||
- **play_on_resources**: will set the status of a resource;
|
|
||||||
|
|
||||||
The variables are:
|
|
||||||
|
|
||||||
- **OVERCLOUD_CORE_RESOURCES**: which are galera and rabbitmq
|
|
||||||
- **OVERCLOUD_RESOURCES**: which are *all* the resources
|
|
||||||
- **OVERCLOUD_SYSTEMD_RESOURCES**: which are the resources managed via systemd
|
|
||||||
by pacemaker;
|
|
||||||
|
|
||||||
And can be used in combination to wrote test and recovery files.
|
|
||||||
|
|
||||||
### Test file contents
|
|
||||||
|
|
||||||
A typical test file, say test/test_keystone-constraint-removal, will contain
|
|
||||||
something like this:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# Test: Stop keystone resource (by stopping httpd), check no other resource is stopped
|
|
||||||
|
|
||||||
echo "$(date) * Step 1: disable keystone resource via httpd stop"
|
|
||||||
play_on_resources "disable" "httpd"
|
|
||||||
|
|
||||||
echo "$(date) - List of cluster's failed actions:"
|
|
||||||
check_failed_actions
|
|
||||||
|
|
||||||
echo "$(date) * Step 2: check resource status"
|
|
||||||
# Define resource list without httpd
|
|
||||||
OVERCLOUD_RESOURCES_NO_KEYSTONE="$(echo $OVERCLOUD_RESOURCES | sed 's/httpd/ /g')"
|
|
||||||
# Define number of minutes to look for status
|
|
||||||
MINUTES=10
|
|
||||||
# Cycling for $MINUTES minutes polling every minute the status of the resources
|
|
||||||
echo "$(date) - Cycling for 10 minutes polling every minute the status of the resources"
|
|
||||||
i=0
|
|
||||||
while [ $i -lt $MINUTES ]
|
|
||||||
do
|
|
||||||
# Wait a minute
|
|
||||||
sleep 60
|
|
||||||
echo "$(date) - Polling..."
|
|
||||||
for resource in $OVERCLOUD_RESOURCES_NO_KEYSTONE
|
|
||||||
do
|
|
||||||
echo -n "$resource -> "
|
|
||||||
check_resource_status "$resource" "Started"
|
|
||||||
[ $? -eq 0 ] && echo "OK" || (FAILURES=1; echo "Error!")
|
|
||||||
done
|
|
||||||
let "i++"
|
|
||||||
done
|
|
||||||
|
|
||||||
echo "$(date) - List of cluster's failed actions:"
|
|
||||||
check_failed_actions
|
|
||||||
```
|
|
||||||
|
|
||||||
Code is commented and should be self explaining, but in short:
|
|
||||||
- the first commented line, after "# Test: " is read as test title;
|
|
||||||
- using play_on_resources it disables httpd resource;
|
|
||||||
- it checks for failed actions;
|
|
||||||
- it defines a list of variable named OVERCLOUD_RESOURCES_NO_KEYSTONE containing
|
|
||||||
all the variable but httpd;
|
|
||||||
- it cycles for 10 minutes, polling every minute the status of all the
|
|
||||||
resources;
|
|
||||||
|
|
||||||
If any of these steps for some reason fails, then the overall test will be
|
|
||||||
considered failed and the exit status will not be 0.
|
|
||||||
|
|
||||||
### Recovery file contents
|
|
||||||
|
|
||||||
A typical recovery file, say recovery/recovery_keystone-constraint-removal,
|
|
||||||
will contain something like this:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# Recovery: Enable keystone via httpd and check for failed actions
|
|
||||||
|
|
||||||
echo "$(date) * Step 1: enable keystone resource via httpd"
|
|
||||||
play_on_resources "enable" "httpd-clone"
|
|
||||||
|
|
||||||
echo "$(date) - List of cluster's failed actions:" check_failed_actions
|
|
||||||
```
|
|
||||||
|
|
||||||
Again:
|
|
||||||
- the first commented line, after "# Recovery: " is read as recovery title;
|
|
||||||
- using play_on_resources it enables httpd resource;
|
|
||||||
- it checks for failed actions;
|
|
@ -1,80 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
# Raoul Scarazzini (rasca@redhat.com)
|
|
||||||
# This script provides a testing suite for TripleO HA environments
|
|
||||||
|
|
||||||
# Define main workdir
|
|
||||||
WORKDIR=$(dirname $0)
|
|
||||||
|
|
||||||
# Source function library.
|
|
||||||
. $WORKDIR/include/functions
|
|
||||||
|
|
||||||
# Fixed parameters
|
|
||||||
# How much time wait in seconds for a resource to change status (i.e. from started to stopped)
|
|
||||||
RESOURCE_CHANGE_STATUS_TIMEOUT=600
|
|
||||||
# How much time wait in seconds before starting recovery
|
|
||||||
DEFAULT_RECOVERY_WAIT_TIME=10
|
|
||||||
|
|
||||||
# Command line parameters
|
|
||||||
if [ $# -gt 0 ]
|
|
||||||
then
|
|
||||||
while :; do
|
|
||||||
case $1 in
|
|
||||||
-h|-\?|--help)
|
|
||||||
usage
|
|
||||||
exit
|
|
||||||
;;
|
|
||||||
-t|--test)
|
|
||||||
test_sequence="$2"
|
|
||||||
shift
|
|
||||||
;;
|
|
||||||
-r|--recover)
|
|
||||||
recovery_sequence="$2"
|
|
||||||
shift
|
|
||||||
;;
|
|
||||||
--)
|
|
||||||
shift
|
|
||||||
break
|
|
||||||
;;
|
|
||||||
-?*)
|
|
||||||
usage
|
|
||||||
exit 1
|
|
||||||
;;
|
|
||||||
*)
|
|
||||||
break
|
|
||||||
esac
|
|
||||||
|
|
||||||
shift
|
|
||||||
done
|
|
||||||
else
|
|
||||||
usage
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Populating overcloud elements
|
|
||||||
echo -n "$(date) - Populationg overcloud elements..."
|
|
||||||
OVERCLOUD_CORE_RESOURCES="galera redis rabbitmq"
|
|
||||||
OVERCLOUD_RESOURCES=$(sudo pcs resource show | egrep '^ (C|[a-Z])' | sed 's/.* \[\(.*\)\]/\1/g' | sed 's/ \(.*\)(.*):.*/\1/g' | sort)
|
|
||||||
OVERCLOUD_SYSTEMD_RESOURCES=$(sudo pcs config show | egrep "Resource:.*systemd"|grep -v "haproxy"|awk '{print $2}')
|
|
||||||
echo "OK"
|
|
||||||
|
|
||||||
if [ -f "$test_sequence" ]
|
|
||||||
then
|
|
||||||
echo "$(date) - Test: $(grep '^#.*Test:' $test_sequence | sed 's/^#.*Test: //')"
|
|
||||||
. $test_sequence
|
|
||||||
else
|
|
||||||
echo "No test file passed or unable to read test file."
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ -f "$recovery_sequence" ]
|
|
||||||
then
|
|
||||||
echo "$(date) - Waiting $DEFAULT_RECOVERY_WAIT_TIME seconds to recover environment"
|
|
||||||
sleep $DEFAULT_RECOVERY_WAIT_TIME
|
|
||||||
|
|
||||||
echo "$(date) - Recovery: $(grep '^#.*Recovery:' $recovery_sequence | sed 's/^#.*Recovery: //')"
|
|
||||||
. $recovery_sequence
|
|
||||||
else
|
|
||||||
echo "No recovery file passed or unable to read recovery file."
|
|
||||||
fi
|
|
||||||
|
|
||||||
echo "$(date) - End"
|
|
@ -1,151 +0,0 @@
|
|||||||
# Raoul Scarazzini (rasca@redhat.com)
|
|
||||||
# This script provides a testing suite from TripleO/Directory OpenStack HA (so
|
|
||||||
# with Pacemaker) environments functions to be used inside TripleO/Director
|
|
||||||
# OpenStack HA environments
|
|
||||||
|
|
||||||
function usage {
|
|
||||||
echo "Usage $0 -t <testfile> [-r <recover file>] [-u]
|
|
||||||
-t, --test <testfile> Specify which file contains the test to run
|
|
||||||
-r, --recover <recoverfile> Specify which file (if any) should be used for recovery
|
|
||||||
-u, --undercloud Test will be performed on undercloud
|
|
||||||
"
|
|
||||||
}
|
|
||||||
|
|
||||||
function check_failed_actions {
|
|
||||||
resource=$1
|
|
||||||
|
|
||||||
sudo pcs status | grep "Failed Actions:" &> /dev/null
|
|
||||||
if [ $? -eq 0 ]
|
|
||||||
then
|
|
||||||
if [ "x$resource" == "x" ]
|
|
||||||
then
|
|
||||||
echo "Cluster has failed actions:"
|
|
||||||
sudo pcs status | sed -n -e '/Failed Actions:/,/^$/p' | egrep 'OCF_|not running|unknown' | awk '{print $2}' | cut -f1 -d_ | sort |uniq
|
|
||||||
exit 1
|
|
||||||
else
|
|
||||||
errors=$(sudo pcs status | sed -n -e '/Failed Actions:/,/^$/p' | grep -A1 $resource)
|
|
||||||
if [ $? -eq 0 ]
|
|
||||||
then
|
|
||||||
echo "Resource $resource has failed actions:"
|
|
||||||
echo $errors
|
|
||||||
exit 1
|
|
||||||
else
|
|
||||||
echo "No failed actions for $resource."
|
|
||||||
return 0
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
else
|
|
||||||
[ "x$resource" == "x" ] && echo "Cluster is OK." || echo "No failed actions for $resource."
|
|
||||||
return 0
|
|
||||||
fi
|
|
||||||
}
|
|
||||||
|
|
||||||
function check_resources_process_status {
|
|
||||||
for resource in $OVERCLOUD_RESOURCES
|
|
||||||
do
|
|
||||||
echo -n "$resource -> "
|
|
||||||
|
|
||||||
case $resource in
|
|
||||||
ip-*) #ip_addr=$(pcs resource show $resource | grep Attributes | sed 's/.*ip=\(.*\) cidr.*/\1/g')
|
|
||||||
ip_addr=$(echo $resource | sed 's/ip-//g')
|
|
||||||
sudo ip a s | grep $ip_addr &> /dev/null
|
|
||||||
;;
|
|
||||||
rabbitmq) sudo /usr/sbin/rabbitmqctl cluster_status &> /dev/null
|
|
||||||
;;
|
|
||||||
redis) pidof /usr/bin/redis-server &> /dev/null
|
|
||||||
;;
|
|
||||||
galera) pidof /usr/libexec/mysqld &> /dev/null
|
|
||||||
;;
|
|
||||||
*cleanup*|delay) echo -n "no need to check if it's "
|
|
||||||
;;
|
|
||||||
*) systemctl is-active $resource &> /dev/null
|
|
||||||
;;
|
|
||||||
esac
|
|
||||||
|
|
||||||
[ $? -eq 0 ] && echo "active" || echo "inactive"
|
|
||||||
|
|
||||||
done
|
|
||||||
}
|
|
||||||
|
|
||||||
function wait_resource_status {
|
|
||||||
resource=$1
|
|
||||||
status=$2
|
|
||||||
i=1
|
|
||||||
|
|
||||||
while [ $i -lt $RESOURCE_CHANGE_STATUS_TIMEOUT ]
|
|
||||||
do
|
|
||||||
output=$(sudo pcs status resources | sed -n -e "/\(Clone\|Master\/Slave\) Set: .*\[$resource\]/,/^ [a-Z]/p" | head -n -1 | tail -n +2 | egrep -v "$status\:")
|
|
||||||
if [ "x$output" == "x" ]
|
|
||||||
then
|
|
||||||
return 0
|
|
||||||
break
|
|
||||||
else
|
|
||||||
echo -n "."
|
|
||||||
sleep 1
|
|
||||||
let "i++"
|
|
||||||
fi
|
|
||||||
done
|
|
||||||
check_failed_actions
|
|
||||||
exit 1
|
|
||||||
}
|
|
||||||
|
|
||||||
function check_resource_status {
|
|
||||||
resource=$1
|
|
||||||
status=$2
|
|
||||||
|
|
||||||
output=$(sudo pcs status resources | sed -n -e "/\(Clone\|Master\/Slave\) Set: .*\[$resource\]/,/^ [a-Z]/p" | head -n -1 | tail -n +2 | egrep -v "$status\:")
|
|
||||||
# Since we are checking a specific status, if we have output from above it
|
|
||||||
# means that for some reason the resource is not in the state we are expecting
|
|
||||||
[ "x$output" == "x" ] && return 0 || (check_failed_actions; exit 1)
|
|
||||||
}
|
|
||||||
|
|
||||||
function wait_cluster_start {
|
|
||||||
i=1
|
|
||||||
while true; do
|
|
||||||
[ $i -eq $RESOURCE_CHANGE_STATUS_TIMEOUT ] && break
|
|
||||||
|
|
||||||
# Check for failed actions
|
|
||||||
sudo pcs status | egrep "Failed" &> /dev/null
|
|
||||||
[ $? -eq 0 ] && break
|
|
||||||
|
|
||||||
# If we have stopped resources let's wait
|
|
||||||
sudo pcs status | egrep "Stopped" &> /dev/null
|
|
||||||
if [ $? -eq 0 ]
|
|
||||||
then
|
|
||||||
echo -n "."
|
|
||||||
else
|
|
||||||
echo "All cluster resources are started."
|
|
||||||
return 0
|
|
||||||
break
|
|
||||||
fi
|
|
||||||
sleep 1
|
|
||||||
let "i++"
|
|
||||||
done
|
|
||||||
|
|
||||||
# If we are here than we have problems: we hit timeout or we still have
|
|
||||||
# stopped resources
|
|
||||||
echo "Problems found. There are stopped or failed resources!"
|
|
||||||
check_failed_actions
|
|
||||||
exit 1
|
|
||||||
}
|
|
||||||
|
|
||||||
function play_on_resources {
|
|
||||||
action=$1
|
|
||||||
resources=$2
|
|
||||||
|
|
||||||
for resource in $resources
|
|
||||||
do
|
|
||||||
echo -n "$(date) - Performing action $action on resource $resource "
|
|
||||||
# Do the action on the resource
|
|
||||||
sudo pcs resource $action $resource --wait=$RESOURCE_CHANGE_STATUS_TIMEOUT
|
|
||||||
if [ $? -ne 0 ]
|
|
||||||
then
|
|
||||||
echo "FAILURE!"
|
|
||||||
check_failed_actions $resource
|
|
||||||
exit 1
|
|
||||||
else
|
|
||||||
echo "OK"
|
|
||||||
fi
|
|
||||||
done
|
|
||||||
return 0
|
|
||||||
}
|
|
@ -1,13 +0,0 @@
|
|||||||
# Recovery: Enable all systemd and core resources, cleanup failed actions
|
|
||||||
|
|
||||||
echo "$(date) * Step 1: enable all the cluster resources"
|
|
||||||
play_on_resources "enable" "$OVERCLOUD_RESOURCES"
|
|
||||||
|
|
||||||
echo "$(date) * Step 2: Cleaning up failed resources"
|
|
||||||
sudo pcs status | sed -n -e '/Failed Actions:/,/^$/p' | egrep 'OCF_TIMEOUT|not running' | awk '{print $2}' | cut -f1 -d_ | sort | uniq | while read RES; do echo "Cleaning $RES"; sudo pcs resource cleanup $RES; done
|
|
||||||
|
|
||||||
echo "$(date) * Step 3: Waiting all resources to start"
|
|
||||||
wait_cluster_start
|
|
||||||
|
|
||||||
echo "$(date) - List of cluster's failed actions:"
|
|
||||||
check_failed_actions
|
|
@ -1,7 +0,0 @@
|
|||||||
# Recovery: Enable keystone via httpd and check for failed actions
|
|
||||||
|
|
||||||
echo "$(date) * Step 1: enable keystone resource via httpd"
|
|
||||||
play_on_resources "enable" "httpd-clone"
|
|
||||||
|
|
||||||
echo "$(date) - List of cluster's failed actions:"
|
|
||||||
check_failed_actions
|
|
@ -1,10 +0,0 @@
|
|||||||
# Recovery: Enable openstack-keystone and check for failed actions
|
|
||||||
|
|
||||||
echo "$(date) * Step 1: enable openstack-keystone resource"
|
|
||||||
play_on_resources "enable" "openstack-keystone-clone"
|
|
||||||
|
|
||||||
echo "$(date) - Checking for Stopped resources:"
|
|
||||||
wait_cluster_start
|
|
||||||
|
|
||||||
echo "$(date) - List of cluster's failed actions:"
|
|
||||||
check_failed_actions
|
|
@ -1,7 +0,0 @@
|
|||||||
# Recovery: Enable master slave resources (galera and redis), all the resources should come up
|
|
||||||
|
|
||||||
echo "$(date) * Step 1: enable galera, redis and rabbitmq"
|
|
||||||
play_on_resources "enable" "$OVERCLOUD_CORE_RESOURCES"
|
|
||||||
|
|
||||||
echo "$(date) - List of cluster's failed actions:"
|
|
||||||
check_failed_actions
|
|
@ -1,7 +0,0 @@
|
|||||||
# Recovery: Enable mongo and check for failed actions
|
|
||||||
|
|
||||||
echo "$(date) * Step 1: enable mongo"
|
|
||||||
play_on_resources "enable" "mongo"
|
|
||||||
|
|
||||||
echo "$(date) - List of cluster's failed actions:"
|
|
||||||
check_failed_actions
|
|
@ -1,13 +0,0 @@
|
|||||||
# Recovery: Enable all systemd and core resources, cleanup failed actions
|
|
||||||
|
|
||||||
echo "$(date) * Step 1: enable core resources"
|
|
||||||
play_on_resources "enable" "$OVERCLOUD_CORE_RESOURCES"
|
|
||||||
|
|
||||||
echo "$(date) * Step 2: enable all the systemd resources"
|
|
||||||
play_on_resources "enable" "$OVERCLOUD_SYSTEMD_RESOURCES"
|
|
||||||
|
|
||||||
echo "$(date) * Step 3: Waiting all resources to start"
|
|
||||||
wait_cluster_start
|
|
||||||
|
|
||||||
echo "$(date) - List of cluster's failed actions:"
|
|
||||||
check_failed_actions
|
|
@ -1,10 +0,0 @@
|
|||||||
# Recovery: Start cluster again
|
|
||||||
|
|
||||||
echo "$(date) * Step 1: start the cluster"
|
|
||||||
sudo pcs cluster start --all
|
|
||||||
|
|
||||||
echo "$(date) * Step 2: Waiting all resources to start"
|
|
||||||
wait_cluster_start
|
|
||||||
|
|
||||||
echo "$(date) - List of cluster's failed actions:"
|
|
||||||
check_failed_actions
|
|
@ -1,3 +0,0 @@
|
|||||||
# Test: Wait cluster start and look for failed actions
|
|
||||||
echo "$(date) - Waiting for cluster start and checking for failed resources:"
|
|
||||||
wait_cluster_start
|
|
@ -1,40 +0,0 @@
|
|||||||
# Test: Stop keystone resource (by stopping httpd), check no other resource is stopped
|
|
||||||
|
|
||||||
echo "$(date) * Step 1: disable keystone resource via httpd stop"
|
|
||||||
play_on_resources "disable" "httpd"
|
|
||||||
|
|
||||||
echo "$(date) - List of cluster's failed actions:"
|
|
||||||
check_failed_actions
|
|
||||||
|
|
||||||
echo "$(date) * Step 2: check resource status"
|
|
||||||
# Define resource list without httpd
|
|
||||||
OVERCLOUD_RESOURCES_NO_KEYSTONE="$(echo $OVERCLOUD_RESOURCES | sed 's/httpd/ /g')"
|
|
||||||
# Define number of minutes to look for status
|
|
||||||
MINUTES=10
|
|
||||||
# Cycling for $MINUTES minutes polling every minute the status of the resources
|
|
||||||
echo "$(date) - Cycling for 10 minutes polling every minute the status of the resources"
|
|
||||||
i=0
|
|
||||||
while [ $i -lt $MINUTES ]
|
|
||||||
do
|
|
||||||
# Wait a minute
|
|
||||||
sleep 60
|
|
||||||
echo "$(date) - Polling..."
|
|
||||||
for resource in $OVERCLOUD_RESOURCES_NO_KEYSTONE
|
|
||||||
do
|
|
||||||
echo -n "$resource -> "
|
|
||||||
# If the resource is a multi state like galera or redis, do a different check
|
|
||||||
case $resource in
|
|
||||||
"galera") check_resource_status "$resource" "Masters"
|
|
||||||
;;
|
|
||||||
"redis") check_resource_status "$resource" "(Masters|Slaves)"
|
|
||||||
;;
|
|
||||||
*) check_resource_status "$resource" "Started"
|
|
||||||
;;
|
|
||||||
esac
|
|
||||||
[ $? -eq 0 ] && echo "OK" || (FAILURES=1; echo "Error!"; break)
|
|
||||||
done
|
|
||||||
let "i++"
|
|
||||||
done
|
|
||||||
|
|
||||||
echo "$(date) - List of cluster's failed actions:"
|
|
||||||
check_failed_actions
|
|
@ -1,7 +0,0 @@
|
|||||||
# Test: Stop openstack-keystone and look for failed actions
|
|
||||||
|
|
||||||
echo "$(date) * Step 1: disable openstack-keystone resource"
|
|
||||||
play_on_resources "disable" "openstack-keystone-clone"
|
|
||||||
|
|
||||||
echo "$(date) - List of cluster's failed actions:"
|
|
||||||
check_failed_actions
|
|
@ -1,7 +0,0 @@
|
|||||||
# Test: Stop master slave resources (galera and redis), all the resources should come down
|
|
||||||
|
|
||||||
echo "$(date) * Step 1: disable galera, redis and rabbitmq"
|
|
||||||
play_on_resources "disable" "$OVERCLOUD_CORE_RESOURCES"
|
|
||||||
|
|
||||||
echo "$(date) - List of cluster's failed actions:"
|
|
||||||
check_failed_actions
|
|
@ -1,43 +0,0 @@
|
|||||||
# Test: Stop mongo resource, check related systemd resources are fine
|
|
||||||
|
|
||||||
echo "$(date) * Step 1: disable mongo"
|
|
||||||
play_on_resources "disable" "mongo"
|
|
||||||
|
|
||||||
echo "$(date) - List of cluster's failed actions:"
|
|
||||||
check_failed_actions
|
|
||||||
|
|
||||||
echo "$(date) * Step 2: check resource status"
|
|
||||||
# Define related resources
|
|
||||||
OVERCLOUD_RESOURCES="openstack-aodh-evaluator openstack-aodh-listener openstack-aodh-notifier openstack-ceilometer-central.service openstack-ceilometer-collector.service openstack-ceilometer-notification.service"
|
|
||||||
# Define number of minutes to look for status
|
|
||||||
MINUTES=10
|
|
||||||
# Cycling for $MINUTES minutes polling every minute the status of the resources
|
|
||||||
echo "$(date) - Cycling for 10 minutes polling every minute the status of the resources"
|
|
||||||
i=0
|
|
||||||
while [ $i -lt $MINUTES ]
|
|
||||||
do
|
|
||||||
# Wait a minute
|
|
||||||
sleep 60
|
|
||||||
echo "$(date) - Polling..."
|
|
||||||
for resource in $OVERCLOUD_RESOURCES
|
|
||||||
do
|
|
||||||
echo -n "$resource -> "
|
|
||||||
# Check if the resource is active for the system
|
|
||||||
systemctl is-active $resource
|
|
||||||
if [ $? -ne 0 ]
|
|
||||||
then
|
|
||||||
# Show status of the resource
|
|
||||||
echo "Error! Resource $resource is not active anymore."
|
|
||||||
systemctl status $resource
|
|
||||||
# Check in any case cluster's failed actions
|
|
||||||
echo "$(date) - List of cluster's failed actions:"
|
|
||||||
check_failed_actions
|
|
||||||
# Now exit with an error
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
done
|
|
||||||
let "i++"
|
|
||||||
done
|
|
||||||
|
|
||||||
# If we are here, test was successful
|
|
||||||
echo "$(date) - Test was successful"
|
|
@ -1,19 +0,0 @@
|
|||||||
# Test: Stop every systemd resource, stop Galera and Rabbitmq, Start every systemd resource
|
|
||||||
|
|
||||||
echo "$(date) * Step 1: disable all the systemd resources"
|
|
||||||
play_on_resources "disable" "$OVERCLOUD_SYSTEMD_RESOURCES"
|
|
||||||
|
|
||||||
echo "$(date) - List of cluster's failed actions:"
|
|
||||||
check_failed_actions
|
|
||||||
|
|
||||||
echo "$(date) * Step 2: disable core services"
|
|
||||||
play_on_resources "disable" "$OVERCLOUD_CORE_RESOURCES"
|
|
||||||
|
|
||||||
echo "$(date) - List of cluster's failed actions:"
|
|
||||||
check_failed_actions
|
|
||||||
|
|
||||||
echo "$(date) * Step 3: enable each resource one by one and check the status"
|
|
||||||
play_on_resources "enable" "$OVERCLOUD_SYSTEMD_RESOURCES"
|
|
||||||
|
|
||||||
echo "$(date) - List of cluster's failed actions:"
|
|
||||||
check_failed_actions
|
|
@ -1,19 +0,0 @@
|
|||||||
# Test: Stop Galera and Rabbitmq, stop every systemd resource, Start every systemd resource
|
|
||||||
|
|
||||||
echo "$(date) * Step 1: disable core services"
|
|
||||||
play_on_resources "disable" "$OVERCLOUD_CORE_RESOURCES"
|
|
||||||
|
|
||||||
echo "$(date) - List of cluster's failed actions:"
|
|
||||||
check_failed_actions
|
|
||||||
|
|
||||||
echo "$(date) * Step 2: disable all the systemd resources"
|
|
||||||
play_on_resources "disable" "$OVERCLOUD_SYSTEMD_RESOURCES"
|
|
||||||
|
|
||||||
echo "$(date) - List of cluster's failed actions:"
|
|
||||||
check_failed_actions
|
|
||||||
|
|
||||||
echo "$(date) * Step 3: enable all the systemd resources"
|
|
||||||
play_on_resources "enable" "$OVERCLOUD_SYSTEMD_RESOURCES"
|
|
||||||
|
|
||||||
echo "$(date) - List of cluster's failed actions:"
|
|
||||||
check_failed_actions
|
|
@ -1,22 +0,0 @@
|
|||||||
# Test: Stop Galera and Rabbitmq, wait 20 minutes to see if something fails
|
|
||||||
|
|
||||||
echo "$(date) * Step 1: disable core services"
|
|
||||||
play_on_resources "disable" "$OVERCLOUD_CORE_RESOURCES"
|
|
||||||
|
|
||||||
echo "$(date) - List of cluster's failed actions:"
|
|
||||||
check_failed_actions
|
|
||||||
|
|
||||||
echo "$(date) * Step 2: poll every minute for twenty minutes the status of the resources"
|
|
||||||
for i in $(seq 1 20)
|
|
||||||
do
|
|
||||||
check_failed_actions
|
|
||||||
if [ $? -ne 0 ]
|
|
||||||
then
|
|
||||||
echo "Errors found, test is over."
|
|
||||||
break
|
|
||||||
fi
|
|
||||||
sleep 60
|
|
||||||
done
|
|
||||||
|
|
||||||
echo "$(date) - List of cluster's failed actions:"
|
|
||||||
check_failed_actions
|
|
@ -1,10 +0,0 @@
|
|||||||
# Test: Check active processes after cluster stop
|
|
||||||
|
|
||||||
echo "$(date) * Step 1: checking actual process status"
|
|
||||||
check_resources_process_status
|
|
||||||
|
|
||||||
echo "$(date) * Step 2: stopping cluster"
|
|
||||||
sudo pcs cluster stop --all
|
|
||||||
|
|
||||||
echo "$(date) * Step 3: checking actual process status"
|
|
||||||
check_resources_process_status
|
|
Loading…
x
Reference in New Issue
Block a user