Retire Tripleo: remove repo content

TripleO project is retiring - https://review.opendev.org/c/openstack/governance/+/905145 this commit remove the content of this project repo Change-Id: I5080dd23acbf6beca58e70e6ae1f1bc3d1843161
2024-02-24 10:47:35 -08:00 · 2024-02-24 10:47:35 -08:00 · a1119eb2d8
commit a1119eb2d8
parent 74eec6791c
76 changed files with 10 additions and 4309 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,62 +0,0 @@
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]
 # C extensions
 *.so
 # Distribution / packaging
 .Python
 env/
 develop-eggs/
 dist/
 downloads/
 eggs/
 .eggs/
 lib/
 lib64/
 sdist/
 var/
 *.egg-info/
 .installed.cfg
 *.egg
 # PyInstaller
 #  Usually these files are written by a python script from a template
 #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 *.manifest
 *.spec
 # Installer logs
 pip-log.txt
 pip-delete-this-directory.txt
 # Unit test / coverage reports
 htmlcov/
 .tox/
 .coverage
 .coverage.*
 .cache
 nosetests.xml
 coverage.xml
 *,cover
 # Translations
 *.mo
 *.pot
 # Django stuff:
 *.log
 # Sphinx documentation
 doc/build/
 # PyBuilder
 target/
 # virtualenv
 .venv/
 # Files created by releasenotes build
 releasenotes/build
--- a/202
+++ b/202
@ -1,202 +0,0 @@
                                 Apache License
                           Version 2.0, January 2004
                        http://www.apache.org/licenses/
   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
   1. Definitions.
      "License" shall mean the terms and conditions for use, reproduction,
      and distribution as defined by Sections 1 through 9 of this document.
      "Licensor" shall mean the copyright owner or entity authorized by
      the copyright owner that is granting the License.
      "Legal Entity" shall mean the union of the acting entity and all
      other entities that control, are controlled by, or are under common
      control with that entity. For the purposes of this definition,
      "control" means (i) the power, direct or indirect, to cause the
      direction or management of such entity, whether by contract or
      otherwise, or (ii) ownership of fifty percent (50%) or more of the
      outstanding shares, or (iii) beneficial ownership of such entity.
      "You" (or "Your") shall mean an individual or Legal Entity
      exercising permissions granted by this License.
      "Source" form shall mean the preferred form for making modifications,
      including but not limited to software source code, documentation
      source, and configuration files.
      "Object" form shall mean any form resulting from mechanical
      transformation or translation of a Source form, including but
      not limited to compiled object code, generated documentation,
      and conversions to other media types.
      "Work" shall mean the work of authorship, whether in Source or
      Object form, made available under the License, as indicated by a
      copyright notice that is included in or attached to the work
      (an example is provided in the Appendix below).
      "Derivative Works" shall mean any work, whether in Source or Object
      form, that is based on (or derived from) the Work and for which the
      editorial revisions, annotations, elaborations, or other modifications
      represent, as a whole, an original work of authorship. For the purposes
      of this License, Derivative Works shall not include works that remain
      separable from, or merely link (or bind by name) to the interfaces of,
      the Work and Derivative Works thereof.
      "Contribution" shall mean any work of authorship, including
      the original version of the Work and any modifications or additions
      to that Work or Derivative Works thereof, that is intentionally
      submitted to Licensor for inclusion in the Work by the copyright owner
      or by an individual or Legal Entity authorized to submit on behalf of
      the copyright owner. For the purposes of this definition, "submitted"
      means any form of electronic, verbal, or written communication sent
      to the Licensor or its representatives, including but not limited to
      communication on electronic mailing lists, source code control systems,
      and issue tracking systems that are managed by, or on behalf of, the
      Licensor for the purpose of discussing and improving the Work, but
      excluding communication that is conspicuously marked or otherwise
      designated in writing by the copyright owner as "Not a Contribution."
      "Contributor" shall mean Licensor and any individual or Legal Entity
      on behalf of whom a Contribution has been received by Licensor and
      subsequently incorporated within the Work.
   2. Grant of Copyright License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      copyright license to reproduce, prepare Derivative Works of,
      publicly display, publicly perform, sublicense, and distribute the
      Work and such Derivative Works in Source or Object form.
   3. Grant of Patent License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      (except as stated in this section) patent license to make, have made,
      use, offer to sell, sell, import, and otherwise transfer the Work,
      where such license applies only to those patent claims licensable
      by such Contributor that are necessarily infringed by their
      Contribution(s) alone or by combination of their Contribution(s)
      with the Work to which such Contribution(s) was submitted. If You
      institute patent litigation against any entity (including a
      cross-claim or counterclaim in a lawsuit) alleging that the Work
      or a Contribution incorporated within the Work constitutes direct
      or contributory patent infringement, then any patent licenses
      granted to You under this License for that Work shall terminate
      as of the date such litigation is filed.
   4. Redistribution. You may reproduce and distribute copies of the
      Work or Derivative Works thereof in any medium, with or without
      modifications, and in Source or Object form, provided that You
      meet the following conditions:
      (a) You must give any other recipients of the Work or
          Derivative Works a copy of this License; and
      (b) You must cause any modified files to carry prominent notices
          stating that You changed the files; and
      (c) You must retain, in the Source form of any Derivative Works
          that You distribute, all copyright, patent, trademark, and
          attribution notices from the Source form of the Work,
          excluding those notices that do not pertain to any part of
          the Derivative Works; and
      (d) If the Work includes a "NOTICE" text file as part of its
          distribution, then any Derivative Works that You distribute must
          include a readable copy of the attribution notices contained
          within such NOTICE file, excluding those notices that do not
          pertain to any part of the Derivative Works, in at least one
          of the following places: within a NOTICE text file distributed
          as part of the Derivative Works; within the Source form or
          documentation, if provided along with the Derivative Works; or,
          within a display generated by the Derivative Works, if and
          wherever such third-party notices normally appear. The contents
          of the NOTICE file are for informational purposes only and
          do not modify the License. You may add Your own attribution
          notices within Derivative Works that You distribute, alongside
          or as an addendum to the NOTICE text from the Work, provided
          that such additional attribution notices cannot be construed
          as modifying the License.
      You may add Your own copyright statement to Your modifications and
      may provide additional or different license terms and conditions
      for use, reproduction, or distribution of Your modifications, or
      for any such Derivative Works as a whole, provided Your use,
      reproduction, and distribution of the Work otherwise complies with
      the conditions stated in this License.
   5. Submission of Contributions. Unless You explicitly state otherwise,
      any Contribution intentionally submitted for inclusion in the Work
      by You to the Licensor shall be under the terms and conditions of
      this License, without any additional terms or conditions.
      Notwithstanding the above, nothing herein shall supersede or modify
      the terms of any separate license agreement you may have executed
      with Licensor regarding such Contributions.
   6. Trademarks. This License does not grant permission to use the trade
      names, trademarks, service marks, or product names of the Licensor,
      except as required for reasonable and customary use in describing the
      origin of the Work and reproducing the content of the NOTICE file.
   7. Disclaimer of Warranty. Unless required by applicable law or
      agreed to in writing, Licensor provides the Work (and each
      Contributor provides its Contributions) on an "AS IS" BASIS,
      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
      implied, including, without limitation, any warranties or conditions
      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
      PARTICULAR PURPOSE. You are solely responsible for determining the
      appropriateness of using or redistributing the Work and assume any
      risks associated with Your exercise of permissions under this License.
   8. Limitation of Liability. In no event and under no legal theory,
      whether in tort (including negligence), contract, or otherwise,
      unless required by applicable law (such as deliberate and grossly
      negligent acts) or agreed to in writing, shall any Contributor be
      liable to You for damages, including any direct, indirect, special,
      incidental, or consequential damages of any character arising as a
      result of this License or out of the use or inability to use the
      Work (including but not limited to damages for loss of goodwill,
      work stoppage, computer failure or malfunction, or any and all
      other commercial damages or losses), even if such Contributor
      has been advised of the possibility of such damages.
   9. Accepting Warranty or Additional Liability. While redistributing
      the Work or Derivative Works thereof, You may choose to offer,
      and charge a fee for, acceptance of support, warranty, indemnity,
      or other liability obligations and/or rights consistent with this
      License. However, in accepting such obligations, You may act only
      on Your own behalf and on Your sole responsibility, not on behalf
      of any other Contributor, and only if You agree to indemnify,
      defend, and hold each Contributor harmless for any liability
      incurred by, or claims asserted against, such Contributor by reason
      of your accepting any such warranty or additional liability.
   END OF TERMS AND CONDITIONS
   APPENDIX: How to apply the Apache License to your work.
      To apply the Apache License to your work, attach the following
      boilerplate notice, with the fields enclosed by brackets "{}"
      replaced with your own identifying information. (Don't include
      the brackets!)  The text should be enclosed in the appropriate
      comment syntax for the file format. We also recommend that a
      file or class name and description of purpose be included on the
      same "printed page" as the copyright notice for easier
      identification within third-party archives.
   Copyright {yyyy} {name of copyright owner}
   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at
       http://www.apache.org/licenses/LICENSE-2.0
   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.
--- a/README.md
+++ b/README.md
@ -1,144 +0,0 @@
 Utility roles and docs for TripleO
 ==================================
 These Ansible roles are a set of useful tools to be used on top of TripleO
 deployments. They can also be used together with
 [tripleo-quickstart](https://github.com/openstack/tripleo-quickstart) (and
 [tripleo-quickstart-extras](https://github.com/openstack/tripleo-quickstart-extras)).
 The documentation of each role is located in the individual role folders.
 General usage information about *tripleo-quickstart* can be found in the
 [project documentation](https://docs.openstack.org/tripleo-quickstart/latest/).
 Using the playbook on an existing TripleO environment
 -----------------------------------------------------
 The playbooks can be launched directly from the **undercloud** machine of the
 **TripleO** deployment. The described steps are expected to be run inside the
 */home/stack* directory.
 First of all a clone of the *tripleo-ha-utils* repository must be
 created:
    git clone https://github.com/openstack/tripleo-ha-utils
 then three environment variables needs to be exported, pointing three files:
    export ANSIBLE_CONFIG="/home/stack/ansible.cfg"
    export ANSIBLE_INVENTORY="/home/stack/hosts"
    export ANSIBLE_SSH_ARGS="-F /home/stack/ssh.config.ansible"
 These files are:
 **ansible.cfg** which must contain at least these lines:
    [defaults]
    roles_path = /home/stack/tripleo-ha-utils/roles
 **hosts** which must be configured depending on the deployed environment,
 reflecting these sections:
    undercloud ansible_host=undercloud ansible_user=stack ansible_private_key_file=/home/stack/.ssh/id_rsa
    overcloud-compute-1 ansible_host=overcloud-compute-1 ansible_user=heat-admin ansible_private_key_file=/home/stack/.ssh/id_rsa
    overcloud-compute-0 ansible_host=overcloud-compute-0 ansible_user=heat-admin ansible_private_key_file=/home/stack/.ssh/id_rsa
    overcloud-controller-2 ansible_host=overcloud-controller-2 ansible_user=heat-admin ansible_private_key_file=/home/stack/.ssh/id_rsa
    overcloud-controller-1 ansible_host=overcloud-controller-1 ansible_user=heat-admin ansible_private_key_file=/home/stack/.ssh/id_rsa
    overcloud-controller-0 ansible_host=overcloud-controller-0 ansible_user=heat-admin ansible_private_key_file=/home/stack/.ssh/id_rsa
    [compute]
    overcloud-compute-1
    overcloud-compute-0
    [undercloud]
    undercloud
    [overcloud]
    overcloud-compute-1
    overcloud-compute-0
    overcloud-controller-2
    overcloud-controller-1
    overcloud-controller-0
    [controller]
    overcloud-controller-2
    overcloud-controller-1
    overcloud-controller-0
 **ssh.config.ansible** which can be generated by these code lines:
    cat /home/stack/.ssh/id_rsa.pub >> /home/stack/.ssh/authorized_keys
    echo -e "Host undercloud\n Hostname 127.0.0.1\n IdentityFile /home/stack/.ssh/id_rsa\n User stack\n StrictHostKeyChecking no\n UserKnownHostsFile=/dev/null\n" > ssh.config.ansible
    . /home/stack/stackrc
    openstack server list -c Name -c Networks | awk '/ctlplane/ {print $2, $4}' | sed s/ctlplane=//g | while read node; do node_name=$(echo $node | cut -f 1 -d " "); node_ip=$(echo $node | cut -f 2 -d " "); echo -e "Host $node_name\n Hostname $node_ip\n IdentityFile /home/stack/.ssh/id_rsa\n User heat-admin\n StrictHostKeyChecking no\n UserKnownHostsFile=/dev/null\n"; done >> ssh.config.ansible
 It can *optionally* contain specific per-host connection options, like these:
    ...
    ...
    Host overcloud-controller-0
        ProxyCommand ssh -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no -o ConnectTimeout=60 -F /home/stack/ssh.config.ansible undercloud -W 192.168.24.16:22
        IdentityFile /home/stack/.ssh/id_rsa
        User heat-admin
        StrictHostKeyChecking no
        UserKnownHostsFile=/dev/null
    ...
    ...
 In this example to connect to overcloud-controller-0 ansible will use
 *undercloud* as a *ProxyHost*.
 With this setup in place is then possible to launch the playbook:
    ansible-playbook /home/stack/tripleo-ha-utils/playbooks/overcloud-instance-ha.yml -e release=newton
 Using the playbooks on tripleo-quickstart provided environment
 --------------------------------------------------------------
 *tripleo-ha-utils* project can be set as a *tripleo-quickstart*
 extra requirement, so all the code will be automatically downloaded and
 available.
 Inside the requirements.txt file you will need a line pointing to this repo:
    echo "https://github.com/openstack/tripleo-ha-utils/#egg=tripleo-ha-utils" >> tripleo-quickstart/quickstart-extras-requirements.txt
 Supposing the environment was successfully provided with a previous quickstart
 execution, to use one of the utils playbook a command line like this one can be
 used:
    ./quickstart.sh \
       --retain-inventory \
       --teardown none \
       --playbook overcloud-instance-ha.yml \
       --working-dir /path/to/workdir \
       --config /path/to/config.yml \
       --release <RELEASE> \
       --tags all \
       <VIRTHOST HOSTNAME or IP>
 Basically this command:
 - **Keep** existing data on the repo (by keeping the inventory and all the
  virtual machines)
 - Uses the *overcloud-instance-ha.yml* playbook
 - Uses the same workdir where quickstart was first deployed
 - Select the specific config file (optionally)
 - Specifies the release (mitaka, newton, or “master” for ocata)
 - Performs all the tasks in the playbook overcloud-instance-ha.yml
 **Important note**
 You might need to export *ANSIBLE_SSH_ARGS* with the path of the
 *ssh.config.ansible* file to make the command work, like this:
    export ANSIBLE_SSH_ARGS="-F /path/to/quickstart/workdir/ssh.config.ansible"
 License
 -------
 Licensed under the Apache License, Version 2.0. You may obtain a copy of the License at [http://www.apache.org/licenses/LICENSE-2.0]()
 Author Information
 ------------------
 Raoul Scarazzini <rasca@redhat.com>
--- a/README.rst
+++ b/README.rst
@ -0,0 +1,10 @@
 This project is no longer maintained.
 The contents of this repository are still available in the Git
 source code management system.  To see the contents of this
 repository before it reached its end of life, please check out the
 previous commit with "git checkout HEAD^1".
 For any further questions, please email
 openstack-discuss@lists.openstack.org or join #openstack-dev on
 OFTC.
--- a/docs/multi-virtual-undercloud/README.md
+++ b/docs/multi-virtual-undercloud/README.md
@ -1,174 +0,0 @@
 Multi Virtual Undercloud
 ========================
 This document describes a way to deploy multiple virtual undercloud on the same
 host. This is mainly for environments in which you want to manage multiple
 baremetal overclouds without having one baremetal machine dedicated for each one
 you deploy.
 Requirements
 ------------
 **Physical switches**
 The switch(es) must support VLAN tagging and all the ports must be configured in
 trunk, so that the dedicated network interface on the physical host (in the
 examples the secondary interface, eth1) is able to offer PXE and dhcp to all the
 overcloud machines via undercloud virtual machine's bridged interface.
 **Host hardware**
 The main requirement to make this kind of setup working is to have a host
 powerful enough to run virtual machines with at least 16GB of RAM and 8 cpus.
 The more power you have, the more undercloud machines you can spawn without
 having impact on performances.
 **Host Network topology**
 Host is reachable via ssh from the machine launching quickstart and configured
 with two main network interfaces:
 - **eth0**: bridged on **br0**, pointing to LAN (underclouds will own an IP to
  be reachable via ssh);
 - **eth1**: connected to the dedicated switch that supports all the VLANs that
  will be used in the deployment;
 Over eth1, for each undercloud virtual machine two VLAN interfaces are created,
 with associated bridges:
 - **Control plane network bridge** (i.e. br2100) built over VLAN interface (i.e.
  eth1.2100) that will be eth1 on the undercloud virtual machine, used by
  TripleO as br-ctlplane;
 - **External network bridge** (i.e. br2105) built over VLAN interface (i.e.
  eth1.2105) that will be eth2 on the undercloud virtual machine, used by
  TripleO as external network device;
 ![network-topology](./multi-virtual-undercloud_network-topology.png "Multi Virtual Undercloud - Network Topology")
 Quickstart configuration
 ------------------------
 Virtual undercloud machine is treated as a baremetal one and the Quickstart
 command relies on the baremetal undercloud role, and its playbook.
 This means that any playbook similar to [baremetal-undercloud.yml](https://github.com/openstack/tripleo-quickstart-extras/blob/master/playbooks/baremetal-undercloud.yml "Baremetal undercloud playbook") should be okay.
 The configuration file has two specific sections that needs attention:
 - Additional interface for external network to route overcloud traffic:
  ```yaml
  undercloud_networks:
     external:
       address: 172.20.0.254
       netmask: 255.255.255.0
       device_type: ethernet
       device_name: eth2
  ```
  **NOTE:** in this configuration eth2 is acting also as a default router for
  the external network.
 - Baremetal provision script, which will be an helper for the
  [multi-virtual-undercloud.sh](./multi-virtual-undercloud.sh) script on the <VIRTHOST>:
  ```yaml
   baremetal_provisioning_script: "/path/to/multi-virtual-undercloud-provisioner.sh <VIRTHOST> <DISTRO> <UNDERCLOUD-NAME> <UNDERCLOUD IP> <UNDERCLOUD NETMASK> <UNDERCLOUD GATEWAY> <CTLPLANEV LAN> <EXTERNAL NETWORK VLAN>"
  ```
  The supported parameters, with the exception of VIRTHOST, are the same ones
  that are passed to the script that lives (and runs) on the VIRTHOST,
  *multi-virtual-undercloud.sh*.
  This helper script launches the remote command on VIRTHOST host and ensures
  that the machine gets reachable via ssh before proceeding.
 The multi virtual undercloud script
 -----------------------------------
 The [multi-virtual-undercloud.sh](./multi-virtual-undercloud.sh) script is
 placed on the VIRTHOST and needs these parameters:
 1. **DISTRO**: this must be the name (without extension) of one of the images
   present inside the */images* dir on the VIRTHOST;
 2. **VMNAME**: the name of the undercloud virtual machine (the name that will be
   used by libvirt);
 3. **VMETH0IP**: IP of the virtual undercloud primary interface to wich
   quickstart (and users) will connect via ssh;
 4. **VMETH0NM**: Netmask of the virtual undercloud primary interface;
 5. **VMETH0GW**: Gateway of the virtual undercloud primary interface;
 6. **VMSSHKEY**: Public key to be enabled on the virtual undercloud;
 7. **UCVLAN**: VLAN of the overcloud's ctlplane network;
 8. **UCEXTVLAN**: VLAN of the overcloud's external network;
 The script's actions are basically:
 1. Destroy and undefine any existing machine named as the one we want to create;
 2. Prepare the image on which the virtual undercloud will be created by copying
   the available distro image and preparing it to be ready for the TripleO
   installation, it fix size, network interfaces, packages and ssh keys;
 3. Create and launch the virtual undercloud machine;
 **Note**: on the VIRTHOST there must exist an */images* directory containing
 images suitable for the deploy.
 Having this directory structure:
 ```console
 [root@VIRTHOST ~]# ls -l /images/
 total 1898320
 lrwxrwxrwx.  1 root root         34 14 feb 09.20 centos-7.qcow2 -> CentOS-7-x86_64-GenericCloud.qcow2
 -rw-r--r--.  1 root root 1361182720 15 feb 10.57 CentOS-7-x86_64-GenericCloud.qcow2
 lrwxrwxrwx.  1 root root         36 14 feb 09.20 rhel-7.qcow2 -> rhel-guest-image-7.3-33.x86_64.qcow2
 -rw-r--r--.  1 root root  582695936 19 ott 18.44 rhel-guest-image-7.3-33.x86_64.qcow2
 ```
 Helps on updating the images, since one can leave config files pointing to
 *centos-7* and, in case of updates, make the symlink point a newer image.
 Quickstart command
 ------------------
 A typical invocation of the TripleO Quickstart command is something similar to
 this:
 ```console
 /path/to/tripleo-quickstart/quickstart.sh \
  --bootstrap \
  --ansible-debug \
  --no-clone \
  --playbook baremetal-undercloud.yml \
  --working-dir /path/to/workdir \
  --config /path/to/config.yml \
  --release $RELEASE \
  --tags "all" \
  $VIRTHOST
 ```
 So nothing different from a normal quickstart deploy command line, the
 difference here is made by the config.yml as described above, with its provision
 script.
 Conclusions
 -----------
 This approach can be considered useful in testing multi environments with
 TripleO for three reasons:
 * It is *fast*: it takes the same time to install the undercloud but less to
  provide it, since you don’t have to wait the physical undercloud provision;
 * It is *isolated*: using VLANs to separate the traffic keeps each environment
  completely isolated from the others;
 * It is *reliable*: you can have the undercloud on a shared storage and think
  about putting the undercloud vm in HA, live migrating it with libvirt,
  pacemaker, whatever...
 There are no macroscopic cons, except for the initial configuration on the
 VIRTHOST, that is made only one time, at the beginning.
 License
 -------
 GPL
 Author Information
 ------------------
 Raoul Scarazzini <rasca@redhat.com>
--- a/docs/multi-virtual-undercloud/multi-virtual-undercloud-provisioner.sh
+++ b/docs/multi-virtual-undercloud/multi-virtual-undercloud-provisioner.sh
@ -1,46 +0,0 @@
 #!/bin/bash
 set -eux
 VIRTHOST=$1
 DISTRO=$2
 VMNAME=$3
 VMETH0IP=$4
 VMETH0NM=$5
 VMETH0GW=$6
 VMSSHKEY=$7
 UCVLAN=$8
 UCEXTVLAN=$9
 function wait_machine_status {
 UNDERCLOUD=$1
 STATUS=$2
 while true
  do
   nc $UNDERCLOUD 22 < /dev/null &> /dev/null
   NCSTATUS=$?
   if [ "$STATUS" == "up" ]
    then
     [ $NCSTATUS -eq 0 ] && break || (sleep 5; echo -n ".")
    else
     [ $NCSTATUS -ne 0 ] && break || (sleep 5; echo -n ".")
   fi
  done
 }
 # Copying public key on VIRTHOST
 echo -n "$(date) - Copying $VMSSHKEY on $VIRTHOST: "
 scp $VMSSHKEY root@$VIRTHOST:$VMNAME\_key.pub
 echo "Done."
 # Providing the machine
 echo -n "$(date) - Starting provision of $VMNAME ($VMETH0IP) on $VIRTHOST: "
 ssh root@$VIRTHOST /root/multi-virtual-undercloud.sh $DISTRO $VMNAME $VMETH0IP $VMETH0NM $VMETH0GW $VMNAME\_key.pub $UCVLAN $UCEXTVLAN
 echo "Done."
 set +e
 # Wait for machine to come up
 echo -n "$(date) - Waiting for $VMNAME to come up again after update: "
 wait_machine_status $VMETH0IP "up"
 echo "Done."
--- a/docs/multi-virtual-undercloud/multi-virtual-undercloud.sh
+++ b/docs/multi-virtual-undercloud/multi-virtual-undercloud.sh
@ -1,115 +0,0 @@
 #!/bin/bash
 set -eux
 DISTRO=$1
 CLONEFROM=/images/$DISTRO\.qcow2
 VMNAME=$2
 VMIMG=/vms/$VMNAME\.qcow2
 VMIMGCOPY=/vms/ORIG-$VMNAME\.qcow2
 VMETH0IP=$3
 VMETH0NM=$4
 VMETH0GW=$5
 VMSSHKEY=$6
 VMDISKADD=50G
 UCVLAN=$7
 UCEXTVLAN=$8
 WORKDIR=/tmp/virt-undercloud-$(date +%s)
 mkdir -p $WORKDIR
 pushd $WORKDIR
 # Destroy the machine if it is running
 ISRUNNING=$(virsh list | grep $VMNAME || true)
 [ "x$ISRUNNING" != "x" ] && virsh destroy $VMNAME
 # Undefine the vm if it is defined
 ISDEFINED=$(virsh list --all | grep $VMNAME || true)
 [ "x$ISDEFINED" != "x" ] && virsh undefine $VMNAME
 # Copy qcow2 base image
 cp -v $CLONEFROM $VMIMG
 echo "$(date) - Adding $VMDISKADD to $VMIMG: "
 qemu-img resize $VMIMG +$VMDISKADD
 echo "$(date) - Resizing filesystem of $VMIMG: "
 cp -v $VMIMG $VMIMGCOPY
 virt-resize --expand /dev/sda1 $VMIMGCOPY $VMIMG
 rm -fv $VMIMGCOPY
 echo "$(date) - Checking status of $VMIMG: "
 qemu-img info $VMIMG
 virt-filesystems --long -h --all -a $VMIMG
 cat > ifcfg-eth0 <<EOF
 NAME=eth0
 DEVICE=eth0
 ONBOOT=yes
 BOOTPROTO=static
 IPADDR=$VMETH0IP
 NETMASK=$VMETH0NM
 GATEWAY=$VMETH0GW
 PEERDNS=yes
 DNS1=8.8.8.8
 TYPE=Ethernet
 EOF
 cat > ifcfg-eth1 <<EOF
 NAME=eth1
 DEVICE=eth1
 ONBOOT=yes
 BOOTPROTO=none
 TYPE=Ethernet
 EOF
 cat $VMSSHKEY >> ./authorized_keys
 case "$DISTRO" in
 "centos-7") virt-customize -a $VMIMG \
             --root-password password:redhat \
             --install openssh-server \
             --run-command "xfs_growfs /" \
             --run-command "echo 'GRUB_CMDLINE_LINUX=\"console=tty0 crashkernel=auto no_timer_check net.ifnames=0 console=ttyS0,115200n8\"' >> /etc/default/grub" \
             --run-command "grubby --update-kernel=ALL --args=net.ifnames=0" \
             --run-command "systemctl enable sshd" \
             --mkdir /root/.ssh \
             --copy-in ifcfg-eth0:/etc/sysconfig/network-scripts/ \
             --copy-in ifcfg-eth1:/etc/sysconfig/network-scripts/ \
             --copy-in ./authorized_keys:/root/.ssh/ \
             --selinux-relabel
            ;;
 "rhel-7") virt-customize -a $VMIMG \
           --root-password password:redhat \
           --run-command "curl -o rhos-release-latest.noarch.rpm http://rhos-release.virt.bos.redhat.com/repos/rhos-release/rhos-release-latest.noarch.rpm" \
           --run-command "rpm -Uvh rhos-release-latest.noarch.rpm" \
           --run-command "rhos-release rhel-7.3" \
           --install openssh-server \
           --run-command "systemctl enable sshd" \
           --run-command "rpm -e rhos-release" \
           --run-command "sed -i -e '/\[rhelosp-rhel-7.3-server-opt\]/,/^\[/s/enabled=0/enabled=1/' /etc/yum.repos.d/rhos-release-rhel-7.3.repo" \
           --mkdir /root/.ssh \
           --copy-in ifcfg-eth0:/etc/sysconfig/network-scripts/ \
           --copy-in ifcfg-eth1:/etc/sysconfig/network-scripts/ \
           --copy-in ./authorized_keys:/root/.ssh/ \
           --selinux-relabel
          ;;
 esac
 # Deploy the vm
 virt-install \
 --import \
 --name $VMNAME \
 --ram 16192 \
 --disk path=$VMIMG \
 --vcpus 8 \
 --os-type linux \
 --os-variant generic \
 --network bridge=br0 \
 --network bridge=br$UCVLAN \
 --network bridge=br$UCEXTVLAN \
 --graphics none \
 --noautoconsole
 rm -rf $WORKDIR
 popd
--- a/docs/multi-virtual-undercloud/multi-virtual-undercloud_network-topology.png
+++ b/docs/multi-virtual-undercloud/multi-virtual-undercloud_network-topology.png
--- a/infrared/README.md
+++ b/infrared/README.md
@ -1,52 +0,0 @@
 Infrared Intance-ha Plugin Playbook
 ====================================
 This Plugin deploys Instance-Ha on OpenStack using InfraRed
 The Tasks in infrared_instance-ha_plugin_main.yml, along with the
 plugin.spec at tripleo-ha-utils/plugin.spec provide support
 for running this repo's roles and playbooks as an Infrared plugin.
 [InfraRed](http://infrared.readthedocs.io/en/stable/) is a plugin based system
 that aims to provide an easy-to-use CLI for Ansible based projects and
 OpenStack deployment.
 The plugin provides infrared plugin integration for
 two OpenStack High-Availability features:
 [instance-ha](https://github.com/openstack/tripleo-ha-utils/tree/master/roles/instance-ha)
 [stonith-config](https://github.com/openstack/tripleo-ha-utils/tree/master/roles/stonith-config)
 Usage:
 =====
 **Installation and deployment:**
 [Setup InfraRed](http://infrared.readthedocs.io/en/stable/bootstrap.html)
 ir plugin add https://github.com/openstack/tripleo-ha-utils
 export ANSIBLE_ROLES_PATH='plugins/tripleo-ha-utils/roles'
 ir instance-ha-deploy -v --release 12 --stonith_devices all
 *notice: a fail & warning will be issued if the plugin's specific ANSIBLE_ROLES_PATH is not defined *
 **Plugin help:**
 ir instance-ha-deploy -h
 **Plugin Uninstall:**
 ir plugin remove instance-ha-deploy
 Author Information
 ------------------
 Pini Komarov pkomarov@redhat.com
--- a/infrared/infrared_instance-ha_plugin_main.yml
+++ b/infrared/infrared_instance-ha_plugin_main.yml
@ -1,128 +0,0 @@
 ---
 - name: check ANSIBLE_ROLES_PATH variable
  hosts: localhost
  tasks:
  - shell: |
      echo $ANSIBLE_ROLES_PATH
    name: check $ANSIBLE_ROLES_PATH variable
    register: ansible_roles_path_out
  - name: check $ANSIBLE_ROLES_PATH is set
    fail:
      msg: Please export ANSIBLE_ROLES_PATH='plugins/tripleo-ha-utils/roles' ; Before running this playbook with infrared.
    when: '"tripleo-ha-utils" not in ansible_roles_path_out.stdout'
 #manual  override because of https://github.com/ansible/ansible/issues/26336
 #- name: Configure Instance HA
 #  hosts: undercloud
 #  gather_facts: yes
 #
 #  tasks:
 #
 #  - include_role:
 #      name: instance-ha
 #This creates the clouds.yaml file from undercloud/overcloud credentials
 #for use in pythonsdk api for osp connection and querrying:
 - name: create clouds.yaml for pythonsdk api
  hosts: undercloud
  tasks:
        - name: get undercloud variables
          shell: |
              for key in $( set | awk '{FS="="}  /^OS_/ {print $1}' ); do unset $key ; done
              source /home/stack/stackrc
              echo -n "undercloud: {'auth': { 'auth_url': '$OS_AUTH_URL', 'username': '$OS_USERNAME', 'password': '$OS_PASSWORD', 'project_name': '${OS_PROJECT_NAME:-$OS_TENANT_NAME}', 'project_domain_name': '$OS_PROJECT_DOMAIN_NAME', 'user_domain_name': '$OS_USER_DOMAIN_NAME'}}"
          register: cloud_details
        - name: create clouds.yaml if doesn't exist
          blockinfile:
              content: 'clouds:'
              dest: /home/stack/clouds.yaml
              marker: "#{mark} HEADER"
              create: yes
        - name: insert undercloud parameters
          blockinfile:
              dest: /home/stack/clouds.yaml
              block: |5
                    {{ cloud_details.stdout|from_yaml|to_nice_yaml(indent=4) }}
              insertbefore: "#END undercloud SECTION"
              marker: "#{mark} undercloud PARAMETERS"
        - name: get overcloud variables
          shell: |
              for key in $( set | awk '{FS="="}  /^OS_/ {print $1}' ); do unset $key ; done
              source /home/stack/overcloudrc
              echo -n "overcloud: {'auth': { 'auth_url': '$OS_AUTH_URL', 'username': '$OS_USERNAME', 'password': '$OS_PASSWORD', 'project_name': '${OS_PROJECT_NAME:-$OS_TENANT_NAME}', 'project_domain_name': '$OS_PROJECT_DOMAIN_NAME', 'user_domain_name': '$OS_USER_DOMAIN_NAME' }}"
          register: cloud_details
        - name: create clouds.yaml if doesn't exist
          blockinfile:
              content: 'clouds:'
              dest: /home/stack/clouds.yaml
              marker: "#{mark} HEADER"
              create: yes
        - name: insert overcloud parameters
          blockinfile:
              dest: /home/stack/clouds.yaml
              block: |5
                    {{ cloud_details.stdout|from_yaml|to_nice_yaml(indent=4) }}
              insertbefore: "#END overcloud SECTION"
              marker: "#{mark} overcloud PARAMETERS"
 #This executes all from the undercloud itself:
 - name: Configure Instance HA
  hosts: undercloud
  gather_facts: no
  tasks:
  - name: create ansible hosts file
    template:
      src: templates/ansible_hosts.yml.j2
      dest: /home/stack/hosts
      owner: stack
      group: stack
      mode: 0644
  - name: create ssh.config.ansible file
    shell: |
      source /home/stack/stackrc
      echo -e "Host undercloud\n Hostname 127.0.0.1\n IdentityFile /home/stack/.ssh/id_rsa\n User stack\n StrictHostKeyChecking no\n UserKnownHostsFile=/dev/null\n" > ssh.config.ansible
      openstack server list -c Name -c Networks | awk '/ctlplane/ {print $2, $4}' | sed s/ctlplane=//g | while read node; do node_name=$(echo $node | cut -f 1 -d " "); node_ip=$(echo $node | cut -f 2 -d " "); echo -e "Host $node_name\n Hostname $node_ip\n IdentityFile /home/stack/.ssh/id_rsa\n User heat-admin\n StrictHostKeyChecking no\n UserKnownHostsFile=/dev/null\n"; done >> ssh.config.ansible
  - name: get tripleo-ha-utils repo
    git:
      repo: 'https://github.com/openstack/tripleo-ha-utils.git'
      dest: /home/stack/tripleo-ha-utils
  - name: create ansible env file
    shell: |
      cat >/home/stack/ansible_ha.env<<EOF
      export ANSIBLE_INVENTORY="/home/stack/hosts"
      export ANSIBLE_SSH_ARGS="-F /home/stack/ssh.config.ansible"
      export ANSIBLE_CONFIG="/home/stack/ansible.cfg"
  - name: create ansible.cfg file
    shell: |
      printf "[defaults]\nroles_path = /home/stack/tripleo-ha-utils/roles"  > /home/stack/ansible.cfg
  - name: run instance-ha deploy script
    shell: |
      source /home/stack/ansible_ha.env
      ansible-playbook -v /home/stack/tripleo-ha-utils/playbooks/overcloud-instance-ha.yml -e release={{release}} -e stonith_devices={{stonith_devices}} -e instance_ha_shared_storage={{instance_ha_shared_storage}} -e instance_ha_action={{instance_ha_action}}
    register: instance_ha_deploy_outcome
  vars:
    instance_ha_action: '{{ install.instance_ha_action }}'
    release: '{{ install.release }}'
    stonith_devices: '{{ install.stonith_devices }}'
    instance_ha_shared_storage: '{{ install.instance_ha_shared_storage }}'
--- a/infrared/templates/ansible_hosts.yml.j2
+++ b/infrared/templates/ansible_hosts.yml.j2
@ -1,27 +0,0 @@
 undercloud ansible_host=undercloud ansible_user=stack ansible_private_key_file=/home/stack/.ssh/id_rsa
 {% for overcloud_host in groups['overcloud_nodes'] %}
 {{overcloud_host}} ansible_host={{overcloud_host}} ansible_user=heat-admin ansible_private_key_file=/home/stack/.ssh/id_rsa
 {% endfor %}
 {% for overcloud_host in groups['overcloud_nodes'] %}
 {{overcloud_host}} ansible_host={{overcloud_host}} ansible_user=heat-admin ansible_private_key_file=/home/stack/.ssh/id_rsa
 {% endfor %}
 [compute]
 {% for overcloud_host in groups['compute'] %}
 {{overcloud_host}}
 {% endfor %}
 [undercloud]
 undercloud
 [overcloud]
 {% for overcloud_host in groups['overcloud_nodes'] %}
 {{overcloud_host}}
 {% endfor %}
 [controller]
 {% for overcloud_host in groups['controller'] %}
 {{overcloud_host}}
 {% endfor %}
--- a/playbooks/baremetal-undercloud-validate-ha.yml
+++ b/playbooks/baremetal-undercloud-validate-ha.yml
@ -1,148 +0,0 @@
 ---
 - name: Baremetal undercloud install
  hosts: localhost
  roles:
    - baremetal-undercloud
  tags:
    - baremetal-undercloud
 - name: Add the undercloud node to the generated inventory
  hosts: localhost
  gather_facts: yes
  roles:
    - tripleo-inventory
  tags:
    - undercloud-inventory
 - name: Setup repositories
  hosts: undercloud
  gather_facts: yes
  roles:
    - repo-setup
  tags:
    - undercloud-repo-setup
 - name: Install packages
  hosts: undercloud
  gather_facts: no
  roles:
    - baremetal-undercloud/packages
  tags:
    - undercloud-pkgs-install
 - name: Deploy the undercloud
  hosts: undercloud
  gather_facts: no
  roles:
    - undercloud-deploy
  tags:
    - undercloud-deploy
 - name: Prepare baremetal for the overcloud deployment
  hosts: undercloud
  roles:
    - baremetal-prep-overcloud
  tags:
    - baremetal-prep-overcloud
 - name: Prepare configuration files for the overcloud deployment
  hosts: undercloud
  gather_facts: no
  roles:
    - overcloud-prep-config
  tags:
    - overcloud-prep-config
 - name:  Prepare overcloud containers
  hosts: undercloud
  gather_facts: no
  roles:
    - overcloud-prep-containers
  tags:
    - overcloud-prep-containers
 - name: Fetch the overcloud images
  hosts: undercloud
  gather_facts: no
  become: true
  roles:
    - fetch-images
  tags:
    - overcloud-fetch-images
 - name: Prepare the overcloud images for deployment
  hosts: undercloud
  gather_facts: no
  roles:
    - overcloud-prep-images
  tags:
    - overcloud-prep-images
 - name: Prepare overcloud flavors
  hosts: undercloud
  gather_facts: no
  roles:
    - overcloud-prep-flavors
  tags:
    - overcloud-prep-flavors
 - name: Prepare the undercloud networks for the overcloud deployment
  hosts: undercloud
  gather_facts: no
  roles:
    - overcloud-prep-network
  tags:
    - overcloud-prep-network
 - name: Prepare SSL for the overcloud
  hosts: undercloud
  gather_facts: yes
  roles:
    - overcloud-ssl
  tags:
    - overcloud-ssl
 - name: Deploy the overcloud
  hosts: undercloud
  gather_facts: yes
  roles:
    - overcloud-deploy
  tags:
    - overcloud-deploy
 - name: Add the overcloud nodes to the generated inventory
  hosts: undercloud
  gather_facts: yes
  vars:
      inventory: all
  roles:
    - tripleo-inventory
  tags:
    - overcloud-inventory
 - name: Check the result of the deployment
  hosts: localhost
  tasks:
    - name: ensure the deployment result has been read into memory
      include_vars: "{{ local_working_dir }}/overcloud_deployment_result.json"
    # overcloud_deploy_result = ["failed", "passed"]
    - name: did the deployment pass or fail?
      debug: var=overcloud_deploy_result
      failed_when: overcloud_deploy_result == "failed"
  tags:
    - overcloud-deploy-check
 - name: Gather undercloud and overcloud facts
  hosts: undercloud overcloud
  gather_facts: yes
  tags:
    - overcloud-validate-ha
 - name: Validate the overcloud using HA tests
  hosts: undercloud
  gather_facts: no
  roles:
    - validate-ha
  tags:
    - overcloud-validate-ha
--- a/playbooks/overcloud-instance-ha.yml
+++ b/playbooks/overcloud-instance-ha.yml
@ -1,10 +0,0 @@
 ---
 - name: Gather undercloud and overcloud facts
  hosts: undercloud overcloud
  gather_facts: yes
 - name: Configure Instance HA
  hosts: undercloud
  gather_facts: no
  roles:
    - instance-ha
--- a/playbooks/overcloud-stonith-config.yml
+++ b/playbooks/overcloud-stonith-config.yml
@ -1,7 +0,0 @@
 ---
 - name:  Configure STONITH for all the hosts on the overcloud
  hosts: undercloud
  gather_facts: yes
  roles:
    - stonith-config
--- a/playbooks/overcloud-validate-ha.yml
+++ b/playbooks/overcloud-validate-ha.yml
@ -1,14 +0,0 @@
 ---
 - name: Gather undercloud and overcloud facts
  hosts: undercloud overcloud
  gather_facts: yes
  tags:
    - overcloud-validate-ha
 - name:  Validate overcloud HA status
  hosts: undercloud
  gather_facts: yes
  tags:
    - overcloud-validate-ha
  roles:
    - validate-ha
--- a/plugin.spec
+++ b/plugin.spec
@ -1,37 +0,0 @@
 ---
 config:
  entry_point: ./infrared/infrared_instance-ha_plugin_main.yml
  plugin_type: install
 subparsers:
    instance-ha-deploy:
        description: Collection of instance-ha configuration tasks
        include_groups: ["Ansible options", "Inventory", "Common options", "Answers file"]
        groups:
            - title: Instance HA
              options:
                  instance_ha_action:
                      type: Value
                      default: install
                      help: |
                        Can be 'install' or 'uninstall'
                  release:
                      type: Value
                      help: |
                         A rhos release - version_number.
                         Example: "rhos-10".
                      required: yes
                  stonith_devices:
                    type: Value
                    default: controllers
                    help: |
                     Can be all, controllers or computes
                  instance_ha_shared_storage:
                    type: Bool
                    help: |
                      Do we have a shared storage or not?
                    default: False
--- a/rally/README.md
+++ b/rally/README.md
@ -1,53 +0,0 @@
 Rally tests
 ===========
 This directory contains all the files available to use Rally for testing the
 behavior of the TripleO environment.
 For example you can test if instance HA is behaving correctly inside the
 overcloud environment in which it was configured.
 Requirements
 ------------
 A working and accessible TripleO environment, as described [here](https://github.com/openstack/tripleo-ha-utils/tree/master/README.md).
 so an *hosts* file containing the whole environment inventory and, if needed, a
 *ssh.config.ansible* with all the information to access nodes.
 How to use Rally to test Instance HA
 ------------------------------------
 If you want to launch a Rally test session to check how Instance HA is behaving
 into the overcloud you can rely on a command like this one:
    ansible-playbook -i hosts \
     -e public_physical_network="public" \
     -e floating_ip_cidr="192.168.99.0/24" \
     -e public_net_pool_start="192.168.99.211" \
     -e public_net_pool_end="192.168.99.216" \
     -e public_net_gateway="192.168.99.254" \
     tripleo-ha-utils/rally/instance-ha.yml
 this command can be launched from the *undercloud* machine or from a jump host
 (which must have all the required file locally).
 The requested parameters refers to the network settings in which the instances
 will be spawned into.
 This will execute the tests contained in the template yaml:
 * *InstanceHA.recover_instance_fip_and_volume*: spawn an instance, stop the
  compute it's running on, check it migrates, check node recovers;
 * *InstanceHA.recover_stopped_instance_fip*: spawn an instance, put it in
  stopped status, stop the compute it's running on, check it migrates, check
  node recovers;
 * *InstanceHA.recover_instance_two_cycles*: do as in the first step, but two
  times;
 License
 -------
 GPL
 Author Information
 ------------------
 Raoul Scarazzini <rasca@redhat.com>
--- a/rally/instance-ha.yml
+++ b/rally/instance-ha.yml
@ -1,99 +0,0 @@
 ---
 - hosts: undercloud
  gather_facts: no
  become: yes
  become_method: sudo
  tasks:
    - name: Install Rally dependencies
      shell: |
        # Python pip
        wget https://bootstrap.pypa.io/get-pip.py -O get-pip.py
        python get-pip.py
        # Depndencies
        yum install -y gmp-devel libffi-devel libxml2-devel libxslt-devel openssl-devel postgresql-devel gcc python-devel
 - hosts: undercloud
  gather_facts: no
  tasks:
    - name: Install Rally
      shell: |
        # Install Rally from upstream
        wget -q -O- https://raw.githubusercontent.com/openstack/rally/master/install_rally.sh | bash |& tee rally-install.log
        mkdir -p .rally/plugins
    - name: Check Rally installation
      shell: |
        source /home/stack/rally/bin/activate
        rally --version
 - hosts: undercloud
  gather_facts: no
  tasks:
    - name: Copy instance-ha Rally plugin to remote rally directory
      copy:
        src: plugins/instanceha.py
        dest: .rally/plugins
 - hosts: undercloud
  gather_facts: no
  tasks:
    - name: Install Rally environment and create deployment
      shell: |
        source /home/stack/overcloudrc
        source /home/stack/rally/bin/activate
        export OS_INSECURE=True
        rally deployment create --fromenv --name overcloud |& tee rally-instance-ha-deployment-create.log
        rally deployment use overcloud
 - hosts: undercloud
  gather_facts: no
  tasks:
    - name: Prepare overcloud env
      shell: |
        source /home/stack/overcloudrc
        projectid=$(openstack project list | awk '/admin/ {print $2}')
        wget -O /tmp/cirros-0.3.4-x86_64-disk.img http://download.cirros-cloud.net/0.3.4/cirros-0.3.4-x86_64-disk.img
        glance --os-project-id=$projectid image-create --name cirros --container-format bare --disk-format raw --file /tmp/cirros-0.3.4-x86_64-disk.img --visibility public
        nova flavor-create --ephemeral 0 --is-public True m1.tiny overcloud-instance-test-small-flavor 2048 20 1
        neutron net-create {{ public_physical_network }}-network --router:external=True --provider:physical_network {{ public_physical_network }} --provider:network_type flat
        neutron subnet-create --name {{ public_physical_network }}-subnet --disable-dhcp --allocation-pool start={{ public_net_pool_start }},end={{ public_net_pool_end }} --gateway {{ public_net_gateway }} {{ public_physical_network }}-network {{ floating_ip_cidr }}
 - hosts: undercloud
  gather_facts: no
  tasks:
    - name: Copy Rally task file
      template:
        src: templates/instance-ha.yaml.j2
        dest: "/home/stack/instance-ha.yaml"
        mode: 0666
    - name: Start Rally task
      shell: |
        source /home/stack/rally/bin/activate
        rally task start --task /home/stack/instance-ha.yaml --deployment overcloud |& tee rally-instance-ha-run.log
    - name: Create Report JUnit
      shell: |
        source /home/stack/rally/bin/activate
        rally task report --junit --out /home/stack/nosetests.xml |& tee rally-instance-ha-report.log
    - fetch:
        src: "/home/stack/nosetests.xml"
        dest: "{{ lookup('env', 'PWD') }}/nosetests.xml"
        flat: yes
 - hosts: undercloud
  gather_facts: no
  tasks:
    - name: Remove overcloud env
      shell: |
        source /home/stack/overcloudrc
        projectid=$(openstack project list | awk '/admin/ {print $2}')
        glance --os-project-id=$projectid image-delete $(glance --os-project-id=$projectid image-list | awk '/cirros/ {print $2}')
        nova flavor-delete overcloud-instance-test-small-flavor
        neutron net-delete {{ public_physical_network }}-network
--- a/rally/plugins/instanceha.py
+++ b/rally/plugins/instanceha.py
@ -1,458 +0,0 @@
 from os import path
 import socket
 import time
 from rally.common import logging
 from rally.common import sshutils
 from rally import exceptions
 from rally_openstack import consts
 from rally_openstack import scenario
 from rally_openstack.scenarios.vm import utils as vm_utils
 from rally_openstack.scenarios.cinder import utils as cinder_utils
 from rally.task import atomic
 from rally.task import types
 from rally.task import validation
 from rally.task import utils as task_utils
 import six
 LOG = logging.getLogger(__name__)
 def failover(self, host, command, port=22, username="", password="",
             key_filename=None, pkey=None):
    """Trigger failover at host
    :param host:
    :param command:
    :return:
    """
    if key_filename:
        key_filename = path.expanduser(key_filename)
    LOG.info("Host: %s. Injecting Failover %s" % (host,
                                                  command))
    try:
        code, out, err = _run_command(self, server_ip=host, port=port,
                                      username=username,
                                      password=password,
                                      key_filename=key_filename,
                                      pkey=pkey, command=command
                                      )
        if code and code > 0:
            raise exceptions.ScriptError(
                "Error running command %(command)s. "
                "Error %(code)s: %(error)s" % {
                    "command": command, "code": code, "error": err})
    except exceptions.SSHTimeout:
        LOG.debug("SSH session of disruptor command timeouted, continue...")
        pass
 def _run_command(self, server_ip, port, username, password, command,
                 pkey=None, key_filename=None):
    """Run command via SSH on server.
    Create SSH connection for server, wait for server to become available
    (there is a delay between server being set to ACTIVE and sshd being
    available). Then call run_command_over_ssh to actually execute the
    command.
    Note: Shadows vm.utils.VMScenario._run_command to support key_filename.
    :param server_ip: server ip address
    :param port: ssh port for SSH connection
    :param username: str. ssh username for server
    :param password: Password for SSH authentication
    :param command: Dictionary specifying command to execute.
        See `rally info find VMTasks.boot_runcommand_delete' parameter
        `command' docstring for explanation.
    :param key_filename: private key filename for SSH authentication
    :param pkey: key for SSH authentication
    :returns: tuple (exit_status, stdout, stderr)
    """
    if not key_filename:
        pkey = pkey or self.context["user"]["keypair"]["private"]
    ssh = sshutils.SSH(username, server_ip, port=port,
                       pkey=pkey, password=password,
                       key_filename=key_filename)
    self._wait_for_ssh(ssh)
    return _run_command_over_ssh(self, ssh, command)
@atomic.action_timer("vm.run_command_over_ssh")
 def _run_command_over_ssh(self, ssh, command):
    """Run command inside an instance.
    This is a separate function so that only script execution is timed.
    :param ssh: A SSHClient instance.
    :param command: Dictionary specifying command to execute.
        See `rally info find VMTasks.boot_runcommand_delete' parameter
        `command' docstring for explanation.
    :returns: tuple (exit_status, stdout, stderr)
    """
    cmd, stdin = [], None
    interpreter = command.get("interpreter") or []
    if interpreter:
        if isinstance(interpreter, six.string_types):
            interpreter = [interpreter]
        elif type(interpreter) != list:
            raise ValueError("command 'interpreter' value must be str "
                             "or list type")
        cmd.extend(interpreter)
    remote_path = command.get("remote_path") or []
    if remote_path:
        if isinstance(remote_path, six.string_types):
            remote_path = [remote_path]
        elif type(remote_path) != list:
            raise ValueError("command 'remote_path' value must be str "
                             "or list type")
        cmd.extend(remote_path)
        if command.get("local_path"):
            ssh.put_file(os.path.expanduser(
                command["local_path"]), remote_path[-1],
                mode=self.USER_RWX_OTHERS_RX_ACCESS_MODE)
    if command.get("script_file"):
        stdin = open(os.path.expanduser(command["script_file"]), "rb")
    elif command.get("script_inline"):
        stdin = six.moves.StringIO(command["script_inline"])
    cmd.extend(command.get("command_args") or [])
    return ssh.execute(cmd, stdin=stdin, timeout=10)
 def one_killing_iteration(self, server, fip, computes, disruptor_cmd,
                          stop_instance):
    """Find the host where instance is hosted, disrupt the host and
    verify status of the instance after the failover"""
    server_admin = self.admin_clients("nova").servers.get(server.id)
    host_name_pre = getattr(server_admin, "OS-EXT-SRV-ATTR:host")
    host_name_ext = host_name_pre.split('.')[0] + ".external"
    hypervisors = self.admin_clients("nova").hypervisors.list()
    hostnames = []
    for hypervisor in hypervisors:
        hostnames.append(getattr(hypervisor, "hypervisor_hostname"))
        if getattr(hypervisor, "hypervisor_hostname") == host_name_pre:
            hypervisor_id = getattr(hypervisor, "id")
    hypervisor = self.admin_clients("nova").hypervisors.get(hypervisor_id)
    hypervisor_ip = socket.gethostbyname(host_name_ext.strip())
    if not disruptor_cmd:
        disruptor_cmd = {
            "script_inline": "sudo sh -c \"echo b > /proc/sysrq-trigger\"",
            "interpreter": "/bin/sh"
            }
    # Trigger failover of compute node hosting the instance
    failover(self, host=hypervisor_ip,
             command=disruptor_cmd,
             port=computes.get("port", 22),
             username=computes.get("username"),
             password=computes.get("password"),
             key_filename=computes.get("key_filename"),
             pkey=computes.get("pkey")
             )
    # Wait for instance to be moved to different host
    hostnames.remove(host_name_pre)
    task_utils.wait_for(
            server_admin,
            status_attr="OS-EXT-SRV-ATTR:host",
            ready_statuses=hostnames,
            update_resource=task_utils.get_from_manager(),
            timeout=120,
            check_interval=5
         )
    # Check the instance is SHUTOFF in the case of stopped instance or
    # that the instance is pingable
    if stop_instance:
        task_utils.wait_for(
            server,
            ready_statuses=["SHUTOFF"],
            update_resource=task_utils.get_from_manager(),
            timeout=60,
            check_interval=2
        )
        #server_admin = self.admin_clients("nova").servers.get(server.id)
        #host_name_post = getattr(server_admin, "OS-EXT-SRV-ATTR:host")
        #if host_name_post in host_name_pre:
            #raise exceptions.InvalidHostException()
    else:
        try:
            if self.wait_for_ping:
               self._wait_for_ping(fip["ip"])
        except exceptions.TimeoutException:
            console_logs = self._get_server_console_output(server,
                                                               None)
            LOG.debug("VM console logs:\n%s", console_logs)
            raise
 def recover_instance_ha(self, image, flavor, computes,
                        volume_args=None,
                        floating_network=None,
                        use_floating_ip=True,
                        force_delete=False,
                        stop_instance=False,
                        disruptor_cmd=None,
                        iterations=1,
                        wait_for_ping=True,
                        max_log_length=None,
                        **kwargs):
    """Boot a server, trigger failover of host and verify instance.
    :param image: glance image name to use for the vm
    :param flavor: VM flavor name
    :param computes: dictionary with credentials to the compute nodes
        consisting of username, password, port, key_filename, disruptor
        command and pkey.
        Examples::
            computes: {
              username: heat-admin,
              key_filename: /path/to/ssh/id_rsa.pub
              port: 22
            }
    :param volume_args: volume args for booting server from volume
    :param floating_network: external network name, for floating ip
    :param use_floating_ip: bool, floating or fixed IP for SSH connection
    :param force_delete: whether to use force_delete for servers
    :param stop_instance: whether to stop instance before disruptor command
    :param disruptor_cmd: command to be send to hosting compute node
    :param iterations: number of compute node killing iteration
    :param wait_for_ping: whether to check connectivity on server creation
    :param **kwargs: extra arguments for booting the server
    :param max_log_length: The number of tail nova console-log lines user
                           would like to retrieve
    :returns:
    """
    self.wait_for_ping = wait_for_ping
    if volume_args:
        volume = self.cinder.create_volume(volume_args["size"], imageRef=None)
        kwargs["block_device_mapping"] = {"vdrally": "%s:::1" % volume.id}
    server, fip = self._boot_server_with_fip(
        image, flavor, use_floating_ip=use_floating_ip,
        floating_network=floating_network,
        key_name=self.context["user"]["keypair"]["name"],
        **kwargs)
    task_utils.wait_for(
        server,
        ready_statuses=["ACTIVE"],
        update_resource=task_utils.get_from_manager(),
        timeout=120,
        check_interval=2
    )
    try:
        if self.wait_for_ping:
            self._wait_for_ping(fip["ip"])
    except exceptions.TimeoutException:
        console_logs = self._get_server_console_output(server,
                                                       max_log_length)
        LOG.debug("VM console logs:\n%s", console_logs)
        raise
    if stop_instance:
        self._stop_server(server)
        task_utils.wait_for(
            server,
            ready_statuses=["SHUTOFF"],
            update_resource=task_utils.get_from_manager(),
            timeout=120,
            check_interval=2
        )
    # Wait a little before killing the compute
    # If we do not wait, backing image will get corrupted which was reported as bug
    time.sleep(30)
    for iteration in range(1, iterations+1):
        one_killing_iteration(self, server, fip, computes,
                              disruptor_cmd, stop_instance)
        # Give cluster some time to recover original compute node
        LOG.info("Wait for compute nodes to come online after previous disruption")
        time.sleep(360)
    if stop_instance:
        # Start instance If It was stopped.
        self._start_server(server)
    task_utils.wait_for(
        server,
        ready_statuses=["ACTIVE"],
        update_resource=task_utils.get_from_manager(),
        timeout=120,
        check_interval=2
    )
    self._delete_server_with_fip(server, fip, force_delete=force_delete)
@types.convert(image={"type": "glance_image"},
               flavor={"type": "nova_flavor"})
@validation.add("image_valid_on_flavor",
                flavor_param="flavor", image_param="image")
@validation.add("valid_command", param_name="command", required=False)
@validation.add("number", param_name="port", minval=1, maxval=65535,
                nullable=True, integer_only=True)
@validation.add("external_network_exists", param_name="floating_network")
@validation.add("required_services",
                services=[consts.Service.NOVA, consts.Service.CINDER])
@validation.add("required_platform", platform="openstack",
                users=True, admin=True)
@scenario.configure(context={"cleanup@openstack": ["nova", "cinder"],
                             "keypair@openstack": {}, "allow_ssh@openstack": None},
                    name="InstanceHA.recover_instance_fip_and_volume",
                    platform="openstack")
 class InstanceHARecoverFIPAndVolume(vm_utils.VMScenario, cinder_utils.CinderBasic):
    def __init__(self, *args, **kwargs):
        super(InstanceHARecoverFIPAndVolume, self).__init__(*args, **kwargs)
    def run(self, image, flavor, computes,
            volume_args=None,
            floating_network=None,
            use_floating_ip=True,
            force_delete=False,
            wait_for_ping=True,
            max_log_length=None,
            **kwargs):
        recover_instance_ha(self, image, flavor, computes,
                            volume_args=volume_args,
                            floating_network=floating_network,
                            use_floating_ip=use_floating_ip,
                            force_delete=force_delete,
                            wait_for_ping=wait_for_ping,
                            max_log_length=max_log_length,
                            **kwargs)
@types.convert(image={"type": "glance_image"},
               flavor={"type": "nova_flavor"})
@validation.add("image_valid_on_flavor",
                flavor_param="flavor", image_param="image")
@validation.add("valid_command", param_name="command", required=False)
@validation.add("number", param_name="port", minval=1, maxval=65535,
                nullable=True, integer_only=True)
@validation.add("external_network_exists", param_name="floating_network")
@validation.add("required_services",
                services=[consts.Service.NOVA, consts.Service.CINDER])
@validation.add("required_platform", platform="openstack",
                users=True, admin=True)
@scenario.configure(context={"cleanup@openstack": ["nova", "cinder"],
                             "keypair@openstack": {}, "allow_ssh@openstack": None},
                    name="InstanceHA.recover_instance_two_cycles",
                    platform="openstack")
 class InstanceHARecoverTwoCycle(vm_utils.VMScenario, cinder_utils.CinderBasic):
    def __init__(self, *args, **kwargs):
        super(InstanceHARecoverTwoCycle, self).__init__(*args, **kwargs)
    def run(self, image, flavor, computes,
            volume_args=None,
            floating_network=None,
            use_floating_ip=True,
            force_delete=False,
            wait_for_ping=True,
            max_log_length=None,
            **kwargs):
        recover_instance_ha(self, image, flavor, computes,
                            volume_args=volume_args,
                            floating_network=floating_network,
                            use_floating_ip=use_floating_ip,
                            force_delete=force_delete,
                            iterations=2,
                            wait_for_ping=wait_for_ping,
                            max_log_length=max_log_length,
                            **kwargs)
@types.convert(image={"type": "glance_image"},
               flavor={"type": "nova_flavor"})
@validation.add("image_valid_on_flavor",
                flavor_param="flavor", image_param="image")
@validation.add("valid_command", param_name="command", required=False)
@validation.add("number", param_name="port", minval=1, maxval=65535,
                nullable=True, integer_only=True)
@validation.add("external_network_exists", param_name="floating_network")
@validation.add("required_services",
                services=[consts.Service.NOVA, consts.Service.CINDER])
@validation.add("required_platform", platform="openstack",
                users=True, admin=True)
@scenario.configure(context={"cleanup@openstack": ["nova", "cinder"],
                             "keypair@openstack": {}, "allow_ssh@openstack": None},
                    name="InstanceHA.recover_stopped_instance_fip",
                    platform="openstack")
 class InstanceHARecoverStopped(vm_utils.VMScenario, cinder_utils.CinderBasic):
    def __init__(self, *args, **kwargs):
        super(InstanceHARecoverStopped, self).__init__(*args, **kwargs)
    def run(self, image, flavor, computes,
            volume_args=None,
            floating_network=None,
            use_floating_ip=True,
            force_delete=False,
            wait_for_ping=True,
            max_log_length=None,
            **kwargs):
        recover_instance_ha(self, image, flavor, computes,
                            volume_args=volume_args,
                            floating_network=floating_network,
                            use_floating_ip=use_floating_ip,
                            force_delete=force_delete,
                            stop_instance=True,
                            wait_for_ping=wait_for_ping,
                            max_log_length=max_log_length,
                            **kwargs)
@types.convert(image={"type": "glance_image"},
               flavor={"type": "nova_flavor"})
@validation.add("image_valid_on_flavor",
                flavor_param="flavor", image_param="image")
@validation.add("valid_command", param_name="command", required=False)
@validation.add("number", param_name="port", minval=1, maxval=65535,
                nullable=True, integer_only=True)
@validation.add("external_network_exists", param_name="floating_network")
@validation.add("required_services",
                services=[consts.Service.NOVA, consts.Service.CINDER])
@validation.add("required_platform", platform="openstack",
                users=True, admin=True)
@scenario.configure(context={"cleanup@openstack": ["nova", "cinder"],
                             "keypair@openstack": {}, "allow_ssh@openstack": None},
                    name="InstanceHA.recover_instance_nova_compute",
                    platform="openstack")
 class InstanceHARecoverNovaCompute(vm_utils.VMScenario, cinder_utils.CinderBasic):
    def __init__(self, *args, **kwargs):
        super(InstanceHARecoverNovaCompute, self).__init__(*args, **kwargs)
    def run(self, image, flavor, computes,
            volume_args=None,
            floating_network=None,
            use_floating_ip=True,
            force_delete=False,
            wait_for_ping=True,
            max_log_length=None,
            **kwargs):
        disruptor_cmd = {
            "script_inline": "sudo kill -9 $(ps -ef | grep ^nova* | awk \'{print$2}\'); echo {}",
            "interpreter": "/bin/sh"
            }
        recover_instance_ha(self, image, flavor, computes,
                            volume_args=volume_args,
                            floating_network=floating_network,
                            use_floating_ip=use_floating_ip,
                            force_delete=force_delete,
                            disruptor_cmd=disruptor_cmd,
                            wait_for_ping=wait_for_ping,
                            max_log_length=max_log_length,
                            **kwargs)
--- a/rally/templates/instance-ha.yaml.j2
+++ b/rally/templates/instance-ha.yaml.j2
@ -1,81 +0,0 @@
 ---
  InstanceHA.recover_instance_fip_and_volume:
    -
      args:
        flavor:
            name: "m1.tiny"
        image:
            name: cirros
        volume_args:
            size: 1
        floating_network: "{{ public_physical_network }}-network"
        force_delete: false
        wait_for_ping: false
        computes:
            username: "heat-admin"
            key_filename: "/home/stack/.ssh/id_rsa"
            port: 22
      runner:
        type: "constant"
        times: 1
        concurrency: 1
      context:
        users:
          tenants: 2
          users_per_tenant: 1
        network: {}
      sla:
        failure_rate:
          max: 0.0
  InstanceHA.recover_stopped_instance_fip:
    -
      args:
        flavor:
            name: "m1.tiny"
        image:
            name: cirros
        floating_network: "{{ public_physical_network }}-network"
        force_delete: false
        wait_for_ping: false
        computes:
            username: "heat-admin"
            key_filename: "/home/stack/.ssh/id_rsa"
            port: 22
      runner:
        type: "constant"
        times: 1
        concurrency: 1
      context:
        users:
          tenants: 2
          users_per_tenant: 1
        network: {}
      sla:
        failure_rate:
          max: 0.0
  InstanceHA.recover_instance_two_cycles:
    -
      args:
        flavor:
            name: "m1.tiny"
        image:
            name: cirros
        floating_network: "{{ public_physical_network }}-network"
        force_delete: false
        wait_for_ping: false
        computes:
            username: "heat-admin"
            key_filename: "/home/stack/.ssh/id_rsa"
            port: 22
      runner:
        type: "constant"
        times: 1
        concurrency: 1
      context:
        users:
          tenants: 2
          users_per_tenant: 1
        network: {}
      sla:
        failure_rate:
          max: 0.0
--- a/roles/instance-ha/README.md
+++ b/roles/instance-ha/README.md
@ -1,226 +0,0 @@
 instance-ha
 ===========
 This role aims to automate all the steps needed to configure instance HA on a
 deployed TripleO overcloud environment.
 Requirements
 ------------
 The TripleO environment must be prepared as described [here](https://github.com/openstack/tripleo-ha-utils/tree/master/README.md).
 **NOTE**: Instance-HA depends on STONITH. This means that all the steps
 performed by this role make sense only if on the overcloud STONITH has been
 configured. There is a dedicated role that automates the STONITH
 configuration, named [stonith-config](https://github.com/openstack/tripleo-ha-utils/tree/master/roles/stonith-config).
 Instance HA
 -----------
 Instance HA is a feature that gives a certain degree of high-availability to the
 instances spawned by an OpenStack deployment. Namely, if a compute node on which
 an instance is running breaks for whatever reason, this configuration will spawn
 the instances that were running on the broken node onto a functioning one.
 This role automates are all the necessary steps needed to configure Pacemaker
 cluster to support this functionality. A typical cluster configuration on a
 clean stock **newton** (or **osp10**) deployment is something like this:
    Online: [ overcloud-controller-0 overcloud-controller-1 overcloud-controller-2 ]
    Full list of resources:
     ip-192.168.24.10       (ocf::heartbeat:IPaddr2):       Started overcloud-controller-0
     ip-172.18.0.11 (ocf::heartbeat:IPaddr2):       Started overcloud-controller-0
     ip-172.20.0.19 (ocf::heartbeat:IPaddr2):       Started overcloud-controller-1
     ip-172.17.0.11 (ocf::heartbeat:IPaddr2):       Started overcloud-controller-1
     ip-172.19.0.12 (ocf::heartbeat:IPaddr2):       Started overcloud-controller-0
     Clone Set: haproxy-clone [haproxy]
         Started: [ overcloud-controller-0 overcloud-controller-1 overcloud-controller-2 ]
     Master/Slave Set: galera-master [galera]
         Masters: [ overcloud-controller-0 overcloud-controller-1 overcloud-controller-2 ]
     ip-172.17.0.18 (ocf::heartbeat:IPaddr2):       Started overcloud-controller-1
     Clone Set: rabbitmq-clone [rabbitmq]
         Started: [ overcloud-controller-0 overcloud-controller-1 overcloud-controller-2 ]
     Master/Slave Set: redis-master [redis]
         Masters: [ overcloud-controller-0 ]
         Slaves: [ overcloud-controller-1 overcloud-controller-2 ]
     openstack-cinder-volume        (systemd:openstack-cinder-volume):      Started overcloud-controller-0
 As you can see we have 3 controllers, six IP resources, four *core* resources
 (*haproxy*, *galera*, *rabbitmq* and *redis*) and one last resource which is
 *openstack-cinder-volume* that needs to run as a single active/passive resource
 inside the cluster.  This role configures all the additional resources needed
 to have a working instance HA setup.  Once the playbook is executed, the
 configuration will be something like this:
    Online: [ overcloud-controller-0 overcloud-controller-1 overcloud-controller-2 ]
    RemoteOnline: [ overcloud-compute-0 overcloud-compute-1 ]
    Full list of resources:
     ip-192.168.24.10       (ocf::heartbeat:IPaddr2):       Started overcloud-controller-0
     ip-172.18.0.11 (ocf::heartbeat:IPaddr2):       Started overcloud-controller-0
     ip-172.20.0.19 (ocf::heartbeat:IPaddr2):       Started overcloud-controller-1
     ip-172.17.0.11 (ocf::heartbeat:IPaddr2):       Started overcloud-controller-1
     ip-172.19.0.12 (ocf::heartbeat:IPaddr2):       Started overcloud-controller-0
     Clone Set: haproxy-clone [haproxy]
         Started: [ overcloud-controller-0 overcloud-controller-1 overcloud-controller-2 ]
         Stopped: [ overcloud-compute-0 overcloud-compute-1 ]
     Master/Slave Set: galera-master [galera]
         Masters: [ overcloud-controller-0 overcloud-controller-1 overcloud-controller-2 ]
         Stopped: [ overcloud-compute-0 overcloud-compute-1 ]
     ip-172.17.0.18 (ocf::heartbeat:IPaddr2):       Started overcloud-controller-1
     Clone Set: rabbitmq-clone [rabbitmq]
         Started: [ overcloud-controller-0 overcloud-controller-1 overcloud-controller-2 ]
         Stopped: [ overcloud-compute-0 overcloud-compute-1 ]
     Master/Slave Set: redis-master [redis]
         Masters: [ overcloud-controller-0 ]
         Slaves: [ overcloud-controller-1 overcloud-controller-2 ]
         Stopped: [ overcloud-compute-0 overcloud-compute-1 ]
     openstack-cinder-volume        (systemd:openstack-cinder-volume):      Started overcloud-controller-0
     ipmilan-overcloud-compute-0    (stonith:fence_ipmilan):        Started overcloud-controller-1
     ipmilan-overcloud-controller-2 (stonith:fence_ipmilan):        Started overcloud-controller-0
     ipmilan-overcloud-controller-0 (stonith:fence_ipmilan):        Started overcloud-controller-0
     ipmilan-overcloud-controller-1 (stonith:fence_ipmilan):        Started overcloud-controller-1
     ipmilan-overcloud-compute-1    (stonith:fence_ipmilan):        Started overcloud-controller-1
     nova-evacuate  (ocf::openstack:NovaEvacuate):  Started overcloud-controller-0
     Clone Set: nova-compute-checkevacuate-clone [nova-compute-checkevacuate]
         Started: [ overcloud-compute-0 overcloud-compute-1 ]
         Stopped: [ overcloud-controller-0 overcloud-controller-1 overcloud-controller-2 ]
     Clone Set: nova-compute-clone [nova-compute]
         Started: [ overcloud-compute-0 overcloud-compute-1 ]
         Stopped: [ overcloud-controller-0 overcloud-controller-1 overcloud-controller-2 ]
     fence-nova     (stonith:fence_compute):        Started overcloud-controller-0
     overcloud-compute-1    (ocf::pacemaker:remote):        Started overcloud-controller-0
     overcloud-compute-0    (ocf::pacemaker:remote):        Started overcloud-controller-1
 How Instance HA works
 ---------------------
 There are three key resource agents you need to consider. Here's the list:
 - *fence_compute* (named **fence-nova** inside the cluster): which takes care
  of marking a compute node with the attribute "evacuate" set to yes;
 - *NovaEvacuate* (named **nova-evacuate** inside the cluster): which takes care
  of the effective evacuation of the instances and runs on one of the
  controllers;
 - *nova-compute-wait* (named **nova-compute-checkevacuate** inside the
  cluster): which waits for eventual evacuation before starting nova compute
  services and runs on each compute nodes;
 Looking at the role you will notice that other systemd resources will be added
 into the cluster on the compute nodes, especially in older release like mitaka
 (*neutron-openvswitch-agent*, *libvirtd*, *openstack-ceilometer-compute* and
 *nova-compute*), but the keys for the correct instance HA comprehension are the
 aforementioned three resources.
 Evacuation
 ----------
 The principle under which Instance HA works is *evacuation*. This means that
 when a host becomes unavailablea for whatever reason, instances on it are
 evacuated to another available host.
 Instance HA works both on shared storage and local storage environments, which
 means that evacuated instances will maintain the same network setup (static ip,
 floating ip and so on) and characteristics inside the new host, even if they
 will be spawned from scratch.
 What happens when a compute node is lost
 ----------------------------------------
 Once configured, how does the system behaves when evacuation is needed? The
 following sequence describes the actions taken by the cluster and the OpenStack
 components:
 1. A compute node (say overcloud-compute-1) which is running instances goes
   down for some reason (power outage, kernel panic, manual intervention);
 2. The cluster starts the action sequence to fence this host, since it needs
   to be sure that the host is *really* down before driving any other operation
   (otherwise there is potential for data corruption or multiple identical VMs
   running at the same time in the infrastructure). Setup is configured to have
   two levels of fencing for the compute hosts:
    * **IPMI**: which will occur first and will take care of physically
      resetting the host and hence assuring that the machine is really powered
      off;
    * **fence-nova**: which will occur afterwards and will take care of marking
      with a cluster per-node attribute "evacuate=yes";
    So the host gets reset and on the cluster a new node-property like the
    following will appear:
        [root@overcloud-controller-0 ~]# attrd_updater -n evacuate -A
        name="evacuate" host="overcloud-compute-1.localdomain" value="yes"
 3. At this point the resource **nova-evacuate** which constantly monitors the
   attributes of the cluster in search of the evacuate tag will find out that
   the *overcloud-compute-1* host needs evacuation, and by internally using
   *nova-compute commands*, will start the evactuation of the instances towards
   another host;
 4. In the meantime, while compute-1 is booting up again,
   **nova-compute-checkevacuate** will wait (with a default timeout of 120
   seconds) for the evacuation to complete before starting the chain via the
   *NovaCompute* resource that will enable the fenced host to become available
   again for running instances;
 What to look for when something is not working
 ----------------------------------------------
 Here there are some tips to follow once you need to debug why instance HA is
 not working:
 1. Check credentials: many resources require access data the the overcloud
   coming form the overcloudrc file, so it's not so difficult to do copy
   errors;
 2. Check connectivity: stonith is essential for cluster and if for some reason
   the cluster is not able to fence the compute nodes, the whole instance HA
   environment will not work;
 3. Check errors: inside the controller's cluster log
   (*/var/log/cluster/corosync.log*) some errors may catch the eye.
 Examples on how to invoke the playbook via ansible
 --------------------------------------------------
 This command line will install the whole instance-ha solution, with controller
 stonith, compute stonith and all the instance ha steps in:
    ansible-playbook /home/stack/tripleo-ha-utils/playbooks/overcloud-instance-ha.yml -e release="rhos-10"
 By default the playbook will install the instance-ha solution with the shared
 storage configuration, but it is possible to make the installation in a no
 shared storage environment, passing the **instance_ha_shared_storage** variable
 as **false**:
    ansible-playbook /home/stack/tripleo-ha-utils/playbooks/overcloud-instance-ha.yml -e release="rhos-10" -e instance_ha_shared_storage=false
 If a user configured the overcloud with a specific domain it is possible to
 override the default "localdomain" value by passing the **overcloud_domain**
 variable to the playbook:
    ansible-playbook /home/stack/tripleo-ha-utils/playbooks/overcloud-instance-ha.yml -e release="rhos-10" -e overcloud_domain="mydomain"
 If a user already installed STONITH for controllers and wants just to apply all
 the instance HA steps with STONITH for the compute nodes can launch this:
    ansible-playbook /home/stack/tripleo-ha-utils/playbooks/overcloud-instance-ha.yml -e release="rhos-10" -e stonith_devices="computes"
 To uninstall the whole instance HA solution:
    ansible-playbook /home/stack/tripleo-ha-utils/playbooks/overcloud-instance-ha.yml -e release="rhos-10" -e instance_ha_action="uninstall"
 Or if you a user needs to omit STONITH for the controllers:
    ansible-playbook /home/stack/tripleo-ha-utils/playbooks/overcloud-instance-ha.yml -e release="rhos-10" -e stonith_devices="computes" -e instance_ha_action="uninstall"
 Is it also possible to totally omit STONITH configuration by passing "none" as
 the value of *stonith_devices*.
 License
 -------
 GPL
 Author Information
 ------------------
 Raoul Scarazzini <rasca@redhat.com>
--- a/roles/instance-ha/defaults/main.yml
+++ b/roles/instance-ha/defaults/main.yml
@ -1,13 +0,0 @@
 ---
 overcloud_working_dir: "/home/heat-admin"
 working_dir: "/home/stack"
 # Can be install or uninstall
 instance_ha_action: "install"
 # Do we have a shared storage or not?
 instance_ha_shared_storage: true
 # Set overcloud domain
 overcloud_domain: "localdomain"
--- a/roles/instance-ha/tasks/apply.yml
+++ b/roles/instance-ha/tasks/apply.yml
@ -1,386 +0,0 @@
 ---
 - name: Apply STONITH for compute nodes
  include_role:
    name: stonith-config
  vars:
    stonith_devices: "computes"
  when:
    - stonith_devices in ["all","computes"]
 - name: Disable openstack-nova-compute on compute
  service:
    name: openstack-nova-compute
    state: stopped
    enabled: no
  become: yes
  delegate_to: "{{ item }}"
  with_items:
    - "{{ groups['compute'] }}"
  when: release not in [ 'pike', 'rhos-12' ]
 - name: Disable neutron-openvswitch-agent on compute
  service:
    name: neutron-openvswitch-agent
    state: stopped
    enabled: no
  become: yes
  delegate_to: "{{ item }}"
  with_items:
    - "{{ groups['compute'] }}"
  when: release in [ 'liberty', 'rhos-8', 'mitaka', 'rhos-9' ]
 - name: Disable openstack-ceilometer-compute on compute
  service:
    name: openstack-ceilometer-compute
    state: stopped
    enabled: no
  become: yes
  delegate_to: "{{ item }}"
  with_items:
    - "{{ groups['compute'] }}"
  when: release in [ 'liberty', 'rhos-8', 'mitaka', 'rhos-9' ]
 - name: Disable libvirtd on compute
  become: yes
  service:
    name: libvirtd
    state: stopped
    enabled: no
  delegate_to: "{{ item }}"
  with_items:
    - "{{ groups['compute'] }}"
  when: release in [ 'liberty', 'rhos-8', 'mitaka', 'rhos-9' ]
 - name: Generate authkey for remote pacemaker
  shell: |
    dd if=/dev/urandom of="/tmp/authkey" bs=4096 count=1
  delegate_to: localhost
 - name: Make sure pacemaker config dir exists
  become: yes
  file:
     path: /etc/pacemaker
     state: directory
     mode: 0750
     group: "haclient"
  delegate_to: "{{ item }}"
  with_items:
    - "{{ groups['controller'] }}"
    - "{{ groups['compute'] }}"
 - name: Copy authkey on all the overcloud nodes
  become: yes
  copy:
     src: /tmp/authkey
     dest: /etc/pacemaker/authkey
     mode: 0640
     group: "haclient"
  delegate_to: "{{ item }}"
  with_items:
    - "{{ groups['controller'] }}"
    - "{{ groups['compute'] }}"
 - name: Remove authkey from local dir
  file:
     path: /tmp/authkey
     state: absent
  delegate_to: localhost
 - name: Enable iptables traffic for pacemaker_remote
  become: yes
  shell: |
    iptables -I INPUT -p tcp -m state --state NEW -m tcp --dport 3121 -j ACCEPT
  delegate_to: "{{ item }}"
  with_items:
    - "{{ groups['controller'] }}"
    - "{{ groups['compute'] }}"
 - name: Make iptables pacemaker_remote rule permanent
  become: yes
  lineinfile:
    path: /etc/sysconfig/iptables
    line: "-A INPUT -p tcp -m state --state NEW -m tcp --dport 3121 -j ACCEPT"
    insertafter: ":OUTPUT ACCEPT"
  delegate_to: "{{ item }}"
  with_items:
    - "{{ groups['controller'] }}"
    - "{{ groups['compute'] }}"
 - name: Start pacemaker remote service on compute nodes
  become: yes
  service:
    name: pacemaker_remote
    enabled: yes
    state: started
  delegate_to: "{{ item }}"
  with_items:
    - "{{ groups['compute'] }}"
 - name: Get the name of the stack
  shell: |
    source {{ working_dir }}/stackrc
    openstack stack list -f value -c 'Stack Name'
  register: stack_name
 - name: Check if a v3 overcloud's rc file exists
  stat:
    path: "{{ working_dir }}/{{ stack_name.stdout }}rc.v3"
  register: v3_rc_file_stat
 - name: Get the contents of the overcloud's rc file v3
  set_fact:
    overcloudrc: "{{ stack_name.stdout }}rc.v3"
  when: v3_rc_file_stat.stat.exists
 - name: Get the contents of the overcloud's rc file
  set_fact:
    overcloudrc: "{{ stack_name.stdout }}rc"
  when: not v3_rc_file_stat.stat.exists
 - block:
  - name: Get OS_USERNAME from overcloudrc
    shell: |
       grep OS_USERNAME {{ working_dir }}/{{ overcloudrc }} | sed 's/export OS_USERNAME=//g'
    register: "OS_USERNAME"
  - name: Get OS_PASSWORD from overcloudrc
    shell: |
       grep OS_PASSWORD {{ working_dir }}/{{ overcloudrc }} | sed 's/export OS_PASSWORD=//g'
    register: "OS_PASSWORD"
  - name: Get OS_AUTH_URL from overcloudrc
    shell: |
       grep OS_AUTH_URL {{ working_dir }}/{{ overcloudrc }} | sed 's/export OS_AUTH_URL=//g'
    register: "OS_AUTH_URL"
  - name: Get OS_PROJECT_NAME or OS_TENANT_NAME from overcloudrc
    shell: |
       grep -E 'OS_PROJECT_NAME|OS_TENANT_NAME' {{ working_dir }}/{{ overcloudrc }} | tail -1 | sed 's/export OS_.*_NAME=//g'
    register: "OS_TENANT_NAME"
  - name: Get OS_USER_DOMAIN_NAME from overcloudrc
    shell: |
       grep OS_USER_DOMAIN_NAME {{ working_dir }}/{{ overcloudrc }} | sed 's/export OS_USER_DOMAIN_NAME=//g'
    register: "OS_USER_DOMAIN_NAME"
    when: v3_rc_file_stat.stat.exists
  - name: Get OS_PROJECT_DOMAIN_NAME from overcloudrc
    shell: |
       grep OS_PROJECT_DOMAIN_NAME {{ working_dir }}/{{ overcloudrc }} | sed 's/export OS_PROJECT_DOMAIN_NAME=//g'
    register: "OS_PROJECT_DOMAIN_NAME"
    when: v3_rc_file_stat.stat.exists
 - name: Define variable for pcs additional options for overcloud's rc file v3
  set_fact:
    pcs_v3_rc_file_opts: ""
 - name: Define variable for pcs additional options for no_shared_storage
  set_fact:
    pcs_NovaEvacuate_no_shared_storage_opts: ""
    pcs_fence_compute_no_shared_storage_opts: ""
 - name: Set pcs additional options for overcloud's rc file v3
  set_fact:
    pcs_v3_rc_file_opts: "project_domain=$OS_PROJECT_DOMAIN_NAME user_domain=$OS_USER_DOMAIN_NAME"
  when: v3_rc_file_stat.stat.exists
 - name: Set pcs additional options for no_shared_storage
  set_fact:
    pcs_NovaEvacuate_no_shared_storage_opts: "no_shared_storage=1"
    pcs_fence_compute_no_shared_storage_opts: "no-shared-storage=True"
  when: not instance_ha_shared_storage|bool
 - block:
    - name: Create resource nova-evacuate
      shell: |
        pcs resource create nova-evacuate ocf:openstack:NovaEvacuate auth_url=$OS_AUTH_URL username=$OS_USERNAME password=$OS_PASSWORD tenant_name=$OS_TENANT_NAME {{ pcs_v3_rc_file_opts }} {{ pcs_NovaEvacuate_no_shared_storage_opts }} --force
    - name: Create pacemaker constraint to start nova-evacuate only on non compute nodes
      shell: |
        pcs constraint location nova-evacuate rule resource-discovery=never score=-INFINITY osprole eq compute
    - name: Create pacemaker constraints to start VIP resources before nova-evacuate
      shell: |
          for i in $(pcs status | grep IP | awk '{ print $1 }')
            do pcs constraint order start $i then nova-evacuate
          done
    - name: Create pacemaker constraints to start openstack services before nova-evacuate
      shell: "pcs constraint order start {{ item }} then nova-evacuate require-all=false"
      with_items:
          - openstack-glance-api-clone
          - neutron-metadata-agent-clone
          - openstack-nova-conductor-clone
      when: release in [ 'liberty', 'rhos-8', 'mitaka', 'rhos-9' ]
    - name: Disable keystone resource
      shell: "pcs resource disable openstack-keystone --wait=900"
      when: release in [ 'liberty', 'rhos-8' ]
    # Keystone resource was replaced by openstack-core resource in RHOS9
    - name: Disable openstack-core resource
      shell: "pcs resource disable openstack-core --wait=900"
      when: release in [ 'mitaka', 'rhos-9' ]
    - name: Set controller pacemaker property on controllers
      shell: "pcs property set --node {{ hostvars[item]['ansible_hostname'] }} osprole=controller"
      with_items: "{{ groups['controller'] }}"
    - name: Get stonith devices
      shell: "pcs stonith | awk '{print $1}' | tr '\n' ' '"
      register: stonithdevs
    - name: Setup stonith devices
      shell: |
          for i in $(cibadmin -Q --xpath //primitive --node-path | awk -F "id='" '{print $2}' | awk -F "'" '{print $1}' | uniq); do
            found=0
            if [ -n "{{ stonithdevs.stdout }}" ]; then
              for x in {{ stonithdevs.stdout }}; do
                if [ "$x" == "$i" ]; then
                  found=1
                fi
              done
            fi
            if [ $found = 0 ]; then
              pcs constraint location $i rule resource-discovery=exclusive score=0 osprole eq controller
            fi
          done
      when: release not in [ 'pike', 'rhos-12' ]
    - name: Create compute pacemaker resources and constraints
      shell: |
          pcs resource create nova-compute-checkevacuate ocf:openstack:nova-compute-wait auth_url=$OS_AUTH_URL username=$OS_USERNAME password=$OS_PASSWORD tenant_name=$OS_TENANT_NAME domain={{ overcloud_domain }} op start timeout=300 --clone interleave=true --disabled --force
          pcs constraint location nova-compute-checkevacuate-clone rule resource-discovery=exclusive score=0 osprole eq compute
          pcs resource create nova-compute systemd:openstack-nova-compute op start timeout=60s --clone interleave=true --disabled --force
          pcs constraint location nova-compute-clone rule resource-discovery=exclusive score=0 osprole eq compute
          pcs constraint order start nova-compute-checkevacuate-clone then nova-compute-clone require-all=true
          pcs constraint order start nova-compute-clone then nova-evacuate require-all=false
      when: release not in [ 'pike', 'rhos-12' ]
    - name: Create compute pacemaker resources and constraints
      shell: |
          pcs resource create neutron-openvswitch-agent-compute systemd:neutron-openvswitch-agent --clone interleave=true --disabled --force
          pcs constraint location neutron-openvswitch-agent-compute-clone rule resource-discovery=exclusive score=0 osprole eq compute
          pcs resource create libvirtd-compute systemd:libvirtd --clone interleave=true --disabled --force
          pcs constraint location libvirtd-compute-clone rule resource-discovery=exclusive score=0 osprole eq compute
          pcs constraint order start neutron-openvswitch-agent-compute-clone then libvirtd-compute-clone
          pcs constraint colocation add libvirtd-compute-clone with neutron-openvswitch-agent-compute-clone
          pcs resource create ceilometer-compute systemd:openstack-ceilometer-compute --clone interleave=true --disabled --force
          pcs constraint location ceilometer-compute-clone rule resource-discovery=exclusive score=0 osprole eq compute
          pcs constraint order start libvirtd-compute-clone then ceilometer-compute-clone
          pcs constraint colocation add ceilometer-compute-clone with libvirtd-compute-clone
          pcs constraint order start libvirtd-compute-clone then nova-compute-clone
          pcs constraint colocation add nova-compute-clone with libvirtd-compute-clone
      when: release in [ 'liberty', 'rhos-8', 'mitaka', 'rhos-9' ]
    - name: Create pacemaker constraint for neutron-server, nova-conductor and ceilometer-notification
      shell: |
          pcs constraint order start neutron-server-clone then neutron-openvswitch-agent-compute-clone require-all=false
          pcs constraint order start openstack-ceilometer-notification-clone then ceilometer-compute-clone require-all=false
          pcs constraint order start openstack-nova-conductor-clone then nova-compute-checkevacuate-clone require-all=false
      when: release in [ 'liberty', 'rhos-8', 'mitaka', 'rhos-9' ]
    - name: Set requires to fencing as default for all resources (Pike/RHOS-12)
      shell: "pcs resource defaults requires=fencing"
      when: release in [ 'pike', 'rhos-12' ]
    - name: Create fence-nova pacemaker resource (no shared storage)
      shell: "pcs stonith create fence-nova fence_compute auth_url=$OS_AUTH_URL login=$OS_USERNAME passwd=$OS_PASSWORD tenant_name=$OS_TENANT_NAME domain={{ overcloud_domain }} record_only=1 {{ pcs_fence_compute_no_shared_storage_opts }} --force"
      when: release not in [ 'pike', 'rhos-12' ]
    - name: Create fence-nova pacemaker resource (Pike/RHOS-12)
      shell: "pcs stonith create fence-nova fence_compute auth_url=$OS_AUTH_URL login=$OS_USERNAME passwd=$OS_PASSWORD tenant_name=$OS_TENANT_NAME domain={{ overcloud_domain }} record_only=1 {{ pcs_fence_compute_no_shared_storage_opts }} meta provides=unfencing --force"
      when: release in [ 'pike', 'rhos-12' ]
    - name: Create pacemaker constraint for fence-nova to fix it on controller node and set resource-discovery never
      shell: "pcs constraint location fence-nova rule resource-discovery=never score=0 osprole eq controller"
    - name: Create pacemaker constraint for fence-nova to start after galera
      shell: "pcs constraint order promote galera-master then fence-nova require-all=false"
      when: release not in [ 'pike', 'rhos-12' ]
    - name: Create nova-compute order constraint on fence-nova
      shell: "pcs constraint order start fence-nova then nova-compute-clone"
      when: release not in [ 'pike', 'rhos-12' ]
    - name: Set cluster recheck interval to 1 minute
      shell: "pcs property set cluster-recheck-interval=1min"
    - name: Create pacemaker remote resource on compute nodes
      shell: "pcs resource create {{ hostvars[item]['ansible_hostname'] }} ocf:pacemaker:remote reconnect_interval=240 op monitor interval=20"
      with_items: "{{ groups['compute'] }}"
    - name: Set osprole for compute nodes
      shell: "pcs property set --node {{ hostvars[item]['ansible_hostname'] }} osprole=compute"
      with_items: "{{ groups['compute'] }}"
    - name: Add STONITH level definitions for compute nodes
      shell: |
        compute_stonith_name=$(cibadmin --query --xpath "//primitive[@class='stonith']/instance_attributes/nvpair[@value='{{ item }}']" | sed 's/.*id="\(.*\)-instance_attributes-pcmk_host_list".*/\1/g')
        pcs stonith level add 1 {{ item }} $compute_stonith_name,fence-nova
      with_items: "{{ groups['compute'] }}"
    - name: Enable keystone resource
      shell: "pcs resource enable openstack-keystone"
      when: release in [ 'liberty', 'rhos-8' ]
    - name: Enable openstack-core resource
      shell: "pcs resource enable openstack-core"
      when: release in [ 'mitaka', 'rhos-9' ]
    - name: Wait for httpd service to be started
      shell: "systemctl show httpd --property=ActiveState"
      register: httpd_status_result
      until: httpd_status_result.stdout.find('inactive') == -1 and httpd_status_result.stdout.find('activating') == -1
      retries: 30
      delay: 10
      when: release not in [ 'liberty', 'rhos-8', 'mitaka', 'rhos-9' ]
    - name: Enable compute nodes resources (nova)
      shell: "pcs resource enable {{ item }}"
      with_items:
          - nova-compute-checkevacuate
          - nova-compute
      when: release not in [ 'pike', 'rhos-12' ]
    - name: Create compute unfence resource to override default resource requires (Pike/RHOS-12)
      shell: |
        pcs resource create compute-unfence-trigger ocf:pacemaker:Dummy op start requires="unfencing" --clone --disabled
        pcs constraint location compute-unfence-trigger-clone rule resource-discovery=never score=-INFINITY osprole ne compute
        pcs resource enable compute-unfence-trigger
      when: release in [ 'pike', 'rhos-12' ]
    - name: Enable compute nodes resources (others)
      shell: "pcs resource enable {{ item }}"
      with_items:
          - neutron-openvswitch-agent-compute
          - libvirtd-compute
          - ceilometer-compute
      when: release in [ 'liberty', 'rhos-8', 'mitaka', 'rhos-9' ]
  environment:
     OS_USERNAME: "{{ OS_USERNAME.stdout }}"
     OS_PASSWORD: "{{ OS_PASSWORD.stdout }}"
     OS_AUTH_URL: "{{ OS_AUTH_URL.stdout }}"
     OS_TENANT_NAME: "{{ OS_TENANT_NAME.stdout }}"
     OS_USER_DOMAIN_NAME: "{{ OS_USER_DOMAIN_NAME.stdout }}"
     OS_PROJECT_DOMAIN_NAME: "{{ OS_PROJECT_DOMAIN_NAME.stdout }}"
  become: yes
  delegate_to: "{{ groups.controller[0] }}"
 - name: Cleanup (if any) failed resources
  shell: |
     for resource in $(pcs status | sed -n -e '/Failed Actions:/,/^$/p' | egrep 'OCF_|not running|unknown' | awk '{print $2}' | cut -f1 -d_ | sort |uniq)
      do
       pcs resource cleanup $resource
      done
  become: yes
  delegate_to: "{{ groups.controller[0] }}"
 - name: Wait for (if any) failed resources to recover
  shell: pcs status | sed -n -e '/Failed Actions:/,/^$/p' | egrep 'OCF_|not running|unknown' | awk '{print $2}' | cut -f1 -d_ | sort |uniq
  register: failed_resources
  until: failed_resources.stdout != []
  retries: 10
  delay: 10
  become: yes
  delegate_to: "{{ groups.controller[0] }}"
--- a/roles/instance-ha/tasks/main.yml
+++ b/roles/instance-ha/tasks/main.yml
@ -1,31 +0,0 @@
 ---
 - name: Check if Instance HA steps were already applied
  include: pre-checks.yml
  when:
    - instance_ha_action == "install"
 - name: Apply STONITH for controller nodes
  include_role:
    name: stonith-config
  when:
    - instance_ha_action == "install"
    - stonith_devices in ["all","controllers"]
 - name: Apply Instance High Availability steps
  include: apply.yml
  when:
    - instance_ha_action == "install"
 - name: Undo Instance High Availability steps
  include: undo.yml
  when:
    - instance_ha_action == "uninstall"
 - name: Remove STONITH for controller nodes
  include_role:
    name: stonith-config
  vars:
    stonith_action: "uninstall"
  when:
    - instance_ha_action == "uninstall"
    - stonith_devices in ["all","controllers"]
--- a/roles/instance-ha/tasks/pre-checks.yml
+++ b/roles/instance-ha/tasks/pre-checks.yml
@ -1,25 +0,0 @@
 ---
 - block:
  - name: Check if STONITH resources already exist
    shell: |
      pcs stonith show | grep {{ item }}
    with_items:
      - fence-nova
    register: pre_existing_stonith
    failed_when: pre_existing_stonith.rc == 0
  - name: Check if IHA resources already exist
    shell: |
      pcs resource show | grep "{{ item }}"
    with_items:
      - compute-unfence-trigger
      - nova-compute-checkevacuate
      - nova-compute
      - nova-evacuate
      - neutron-openvswitch-agent-compute
      - libvirtd-compute
      - ceilometer-compute
    register: pre_existing_resources
    failed_when: pre_existing_resources.rc == 0
  become: yes
  delegate_to: "{{ groups.controller[0] }}"
--- a/roles/instance-ha/tasks/undo.yml
+++ b/roles/instance-ha/tasks/undo.yml
@ -1,168 +0,0 @@
 ---
 - block:
    - name: Remove STONITH level definitions for compute nodes
      shell: |
        compute_stonith_name=$(cibadmin --query --xpath "//primitive[@class='stonith']/instance_attributes/nvpair[@value='{{ item }}']" | sed 's/.*id="\(.*\)-instance_attributes-pcmk_host_list".*/\1/g')
        for stonith_level in $(cibadmin --query --xpath "//configuration/fencing-topology/fencing-level[@devices='$compute_stonith_name,fence-nova'][@index='1'][@target='{{ item }}']" --node-path)
         do
          pcs stonith level delete 1 {{ item }} $compute_stonith_name,fence-nova
         done
      with_items: "{{ groups['compute'] }}"
    - name: Remove fence-nova STONITH device
      shell: |
        for stonithid in $(pcs stonith show | awk '/fence_compute/ {print $1}')
         do
          pcs stonith delete fence-nova
         done
    - name: Remove resources associated to remote nodes
      shell: |
        for resourceid in $(pcs resource show | grep compute | grep 'Clone Set:' | awk '{print $3}')
         do
          pcs resource cleanup $resourceid
          pcs --force resource delete $resourceid
         done
    - name: Remove NovaEvacuate resource
      shell: |
        for resourceid in $(pcs resource show | grep NovaEvacuate | awk '/NovaEvacuate/ {print $1}')
         do
          pcs resource cleanup $resourceid
          pcs --force resource delete $resourceid
         done
    - name: Remove pacemaker remote resource
      shell: |
        for resourceid in $(pcs resource show | awk '/:remote/ {print $1}')
         do
          pcs resource cleanup $resourceid
          pcs --force resource delete $resourceid
         done
    - name: Remove constraints related to role controller
      shell: |
        for constraintid in $(pcs config show | grep -B 3 "osprole eq controller" | awk '/Constraint/ {print $2}')
         do
          pcs constraint delete $constraintid
         done
    - name: Unset controller pacemaker property on controllers
      shell: |
        for nodeid in $(pcs property | awk '/osprole/ { print $1 }' | cut -d: -f1)
         do
          pcs property unset --node $nodeid osprole
         done
    - name: Unset cluster recheck interval to 1 minute
      shell: |
        for propertyid in $(pcs property | awk '/cluster-recheck-interval/ { print $1 }' | cut -d: -f1)
         do
          pcs property unset cluster-recheck-interval
        done
  become: yes
  delegate_to: "{{ groups.controller[0] }}"
 - name: Cleanup failed resources (if any)
  shell: |
    for resource in $(pcs status | sed -n -e '/Failed Actions:/,/^$/p' | egrep 'OCF_|not running|unknown' | awk '{print $2}' | cut -f1 -d_ | sort |uniq)
     do
      pcs resource cleanup $resource
     done
  become: yes
  delegate_to: "{{ groups.controller[0] }}"
 - name: Wait for failed resources to recover (if any)
  shell: pcs status | sed -n -e '/Failed Actions:/,/^$/p' | egrep 'OCF_|not running|unknown' | awk '{print $2}' | cut -f1 -d_ | sort |uniq
  register: failed_resources
  until: failed_resources.stdout != []
  retries: 10
  delay: 10
  become: yes
  delegate_to: "{{ groups.controller[0] }}"
 - name: Enable openstack-nova-compute on compute
  service:
    name: openstack-nova-compute
    state: started
    enabled: yes
  become: yes
  delegate_to: "{{ item }}"
  with_items:
    - "{{ groups['compute'] }}"
  when: release not in [ 'pike', 'rhos-12' ]
 - name: Enable neutron-openvswitch-agent on compute
  service:
    name: neutron-openvswitch-agent
    state: started
    enabled: yes
  become: yes
  delegate_to: "{{ item }}"
  with_items:
    - "{{ groups['compute'] }}"
  when: release in [ 'liberty', 'rhos-8', 'mitaka', 'rhos-9' ]
 - name: Enable openstack-ceilometer-compute on compute
  service:
    name: openstack-ceilometer-compute
    state: started
    enabled: yes
  become: yes
  delegate_to: "{{ item }}"
  with_items:
    - "{{ groups['compute'] }}"
  when: release in [ 'liberty', 'rhos-8', 'mitaka', 'rhos-9' ]
 - name: Enable libvirtd on compute
  become: yes
  service:
    name: libvirtd
    state: started
    enabled: yes
  delegate_to: "{{ item }}"
  with_items:
    - "{{ groups['compute'] }}"
  when: release in [ 'liberty', 'rhos-8', 'mitaka', 'rhos-9' ]
 - name: Stop pacemaker remote service on compute nodes
  become: yes
  service:
    name: pacemaker_remote
    enabled: no
    state: stopped
  delegate_to: "{{ item }}"
  with_items:
    - "{{ groups['compute'] }}"
 - name: Disable iptables traffic for pacemaker_remote
  become: yes
  shell: |
    while [ $(iptables-save | grep -c "\-A INPUT \-p tcp \-m state \-\-state NEW \-m tcp \-\-dport 3121 \-j ACCEPT") -ne 0 ]
     do
      iptables -D INPUT -p tcp -m state --state NEW -m tcp --dport 3121 -j ACCEPT
     done
  delegate_to: "{{ item }}"
  with_items:
    - "{{ groups['controller'] }}"
    - "{{ groups['compute'] }}"
 - name: Remove iptables pacemaker_remote permanent rule
  become: yes
  lineinfile:
    path: /etc/sysconfig/iptables
    line: "-A INPUT -p tcp -m state --state NEW -m tcp --dport 3121 -j ACCEPT"
    state: absent
  delegate_to: "{{ item }}"
  with_items:
    - "{{ groups['controller'] }}"
    - "{{ groups['compute'] }}"
 - name: Undo STONITH for compute nodes
  include_role:
    name: stonith-config
  vars:
    stonith_action: "uninstall"
    stonith_devices: "computes"
  when:
    - stonith_devices in ["all","computes"]
--- a/roles/stonith-config/README.md
+++ b/roles/stonith-config/README.md
@ -1,90 +0,0 @@
 stonith-config
 ==============
 This role acts on an already deployed tripleo environment, setting up STONITH
 (Shoot The Other Node In The Head) inside the Pacemaker configuration for all
 the hosts that are part of the overcloud.
 Requirements
 ------------
 The TripleO environment must be prepared as described [here](https://github.com/openstack/tripleo-ha-utils/tree/master/README.md).
 STONITH
 -------
 STONITH is the way a Pacemaker clusters use to be certain that a node is powered
 off. STONITH is the only way to use a shared storage environment without
 worrying about concurrent writes on disks. Inside TripleO environments STONITH
 is a requisite also for activating features like Instance HA because, before
 moving any machine, the system need to be sure that the "move from" machine is
 off.
 STONITH configuration relies on the **instackenv.json** file, used by TripleO
 also to configure Ironic and all the provision stuff.
 Basically this role enables STONITH on the Pacemaker cluster and takes all the
 information from the mentioned file, creating a STONITH resource for each host
 on the overcloud.
 After running this playbook the cluster configuration will have this properties:
    $ sudo pcs property
    Cluster Properties:
     cluster-infrastructure: corosync
     cluster-name: tripleo_cluster
     ...
     ...
     **stonith-enabled: true**
 And something like this, depending on how many nodes are there in the overcloud:
    sudo pcs stonith
     ipmilan-overcloud-compute-0    (stonith:fence_ipmilan):        Started overcloud-controller-1
     ipmilan-overcloud-controller-2 (stonith:fence_ipmilan):        Started overcloud-controller-0
     ipmilan-overcloud-controller-0 (stonith:fence_ipmilan):        Started overcloud-controller-0
     ipmilan-overcloud-controller-1 (stonith:fence_ipmilan):        Started overcloud-controller-1
     ipmilan-overcloud-compute-1    (stonith:fence_ipmilan):        Started overcloud-controller-1
 Having all this in place is a requirement for a reliable HA solution and for
 configuring special OpenStack features like [Instance HA](https://github.com/openstack/tripleo-ha-utils/tree/master/roles/instance-ha).
 **Note**: by default this role configures STONITH for the controllers nodes,
 but it is possible to configure all the nodes or to limitate it just for
 computes, by setting the **stonith_devices** variable, which by default is set
 to "controllers", but can also be "*all*" or "*computes*".
 Limitations
 -----------
 The only kind of STONITH devices supported are **for the moment** IPMI.
 Examples on how to invoke the playbook via ansible
 --------------------------------------------------
 This command line will install the STONITH devices for the controller nodes:
    ansible-playbook /home/stack/tripleo-ha-utils/playbooks/overcloud-stonith-config.yml
 If a user wants to install the STONITH devices for all the nodes:
    ansible-playbook /home/stack/tripleo-ha-utils/playbooks/overcloud-stonith-config.yml -e stonith_devices="all"
 To uninstall the STONITH devices for the controllers:
    ansible-playbook /home/stack/tripleo-ha-utils/playbooks/overcloud-stonith-config.yml -e stonith_action="uninstall"
 To uninstall the STONITH devices just for the computes:
    ansible-playbook /home/stack/tripleo-ha-utils/playbooks/overcloud-stonith-config.yml -e stonith_action="uninstall" -e stonith_devices="computes"
 The STONITH role supports also "none" as a valid value for *stonith_devices*
 which can become useful when configuring instance HA in an environment already
 configured with STONITH for both controllers and computes.
 License
 -------
 GPL
 Author Information
 ------------------
 Raoul Scarazzini <rasca@redhat.com>
--- a/roles/stonith-config/defaults/main.yml
+++ b/roles/stonith-config/defaults/main.yml
@ -1,13 +0,0 @@
 ---
 overcloud_working_dir: "/home/heat-admin"
 working_dir: "/home/stack"
 instack_env_file: "{{ working_dir }}/instackenv.json"
 config_stonith_python_script: config-stonith-from-instackenv.py.j2
 # Can be install, uninstall or none
 stonith_action: "install"
 # Can be all, controllers or computes
 stonith_devices: controllers
--- a/roles/stonith-config/tasks/main.yml
+++ b/roles/stonith-config/tasks/main.yml
@ -1,32 +0,0 @@
 ---
 - name: Load the STONITH creation script on the undercloud
  template:
    src: "{{ config_stonith_python_script }}"
    dest: "{{ working_dir }}/config_stonith_from_instackenv.py"
    mode: 0755
 - name: Generate STONITH script
  shell: |
    source {{ working_dir }}/stackrc
    {{ working_dir }}/config_stonith_from_instackenv.py {{ instack_env_file }} {{ stonith_action }} {{ stonith_devices }}
  register: stonith_script
 - name: Delete the STONITH script on the overcloud (if exists)
  file:
    path: "{{ overcloud_working_dir }}/config-stonith.sh"
    state: absent
  delegate_to: "{{ groups.controller[0] }}"
 - name: Create the STONITH script on the overcloud
  lineinfile:
    destfile: "{{ overcloud_working_dir }}/config-stonith.sh"
    line: "{{ stonith_script.stdout }}"
    create: yes
    mode: 0755
  delegate_to: "{{ groups.controller[0] }}"
 - name: Execute STONITH script
  become: true
  delegate_to: "{{ groups.controller[0] }}"
  shell: >
    {{ overcloud_working_dir }}/config-stonith.sh &> config_stonith.log
--- a/roles/stonith-config/templates/config-stonith-from-instackenv.py.j2
+++ b/roles/stonith-config/templates/config-stonith-from-instackenv.py.j2
@ -1,94 +0,0 @@
 #!/bin/python
 import os
 import json
 import sys
 from keystoneauth1.identity import v2
 from keystoneauth1 import session
 from pprint import pprint
 from novaclient import client
 # JSon file as first parameter
 jdata = open(sys.argv[1])
 data = json.load(jdata)
 # install, uninstall, none
 fence_config = sys.argv[2]
 # controllers, computes, all or none
 fence_devices = sys.argv[3]
 # Define variables to connect to nova
 os_username = os.environ['OS_USERNAME']
 os_password = os.environ['OS_PASSWORD']
 os_auth_url = os.environ['OS_AUTH_URL']
 try:
 os_tenant_name = os.environ['OS_TENANT_NAME']
 except:
 os_project_name = os.environ['OS_PROJECT_NAME']
 os_project_domain_name=os.environ['OS_PROJECT_DOMAIN_NAME']
 os_user_domain_name=os.environ['OS_USER_DOMAIN_NAME']
 os_compute_api_version = os.environ['COMPUTE_API_VERSION']
 # If fence_devices includes controllers then we act on the overall stonith-enabled property of the cluster
 if (fence_devices in ['controllers','all']):
 # If we're uninstalling then we disable stonith
 if (fence_config == 'uninstall'):
  print('pcs property set stonith-enabled=false')
 # If we're installing then we enable it
 elif (fence_config == 'install'):
  print('pcs property set stonith-enabled=true')
 # Connect to nova
 try:
 # Liberty/OSP-8,Mitaka/OSP-9,Newton/OSP-10
 nt = client.Client(2,
                    os_username,
                    os_password,
                    os_tenant_name,
                    os_auth_url)
 nt.hypervisors.list()
 except:
 try:
  # Ocata/OSP-11
  nt = client.Client(2,
                     username=os_username,
                     password=os_password,
                     project_name=os_tenant_name,
                     auth_url=os_auth_url)
  nt.hypervisors.list()
 except:
  # Pike/OSP-12
  nt = client.Client(2,
                     auth_url=os_auth_url,
                     username=os_username,
                     password=os_password,
                     project_name=os_project_name,
                     project_domain_name=os_project_domain_name,
                     user_domain_name=os_user_domain_name)
  nt.hypervisors.list()
 # Parse instances
 for instance in nt.servers.list():
 for node in data["nodes"]:
  if (node["mac"][0].lower() == instance.addresses['ctlplane'][0]['OS-EXT-IPS-MAC:mac_addr']
      and
      (
       ('controller' in instance.name and fence_devices in ['controllers','all'])
       or
       ('compute' in instance.name and fence_devices in ['computes','all'])
      )
     ):
   if (fence_config == 'uninstall'):
    print('pcs stonith delete ipmilan-{} || /bin/true'.format(instance.name))
   elif (fence_config == 'install'):
    try:
      print('pcs stonith create ipmilan-{} fence_ipmilan pcmk_host_list="{}" ipaddr="{}" login="{}" passwd="{}" ipport={} lanplus="true" delay=20 op monitor interval=60s'
          .format(instance.name,instance.name,node["pm_addr"],node["pm_user"],node["pm_password"],node["pm_port"]))
    except:
      print('pcs stonith create ipmilan-{} fence_ipmilan pcmk_host_list="{}" ipaddr="{}" login="{}" passwd="{}" lanplus="true" delay=20 op monitor interval=60s'
          .format(instance.name,instance.name,node["pm_addr"],node["pm_user"],node["pm_password"]))
    print('pcs constraint location ipmilan-{} avoids {}'
          .format(instance.name,instance.name))
 # Close nova connection
 jdata.close()
--- a/roles/stonith-config/templates/config-stonith-from-instackenv.py.readme
+++ b/roles/stonith-config/templates/config-stonith-from-instackenv.py.readme
@ -1,60 +0,0 @@
 ################
 # Python imports
 ################
 import os
 import json
 import sys
 # The below will be enabled once OS_AUTH_URL=http://192.0.2.1:5000/v3
 #from keystoneauth1.identity import v3
 from keystoneauth1.identity import v2
 from keystoneauth1 import session
 from pprint import pprint
 from novaclient import client
 ##########################################################
 # Environment variables (need to source before launching):
 ##########################################################
 export NOVA_VERSION=1.1
 export OS_PASSWORD=$(sudo hiera admin_password)
 # If v3:
 export OS_AUTH_URL=http://192.0.2.1:5000/v3
 # else
 export OS_AUTH_URL=http://192.0.2.1:5000/v2.0
 export OS_USERNAME=admin
 export OS_TENANT_NAME=admin
 export COMPUTE_API_VERSION=1.1
 export OS_NO_CACHE=True
 ##############
 # JSON format:
 ##############
 { "nodes": [
 {
  "mac": [
 "b8:ca:3a:66:e3:82"
  ],
  "_comment":"host12-rack03.scale.openstack.engineering.redhat.com",
  "cpu": "",
  "memory": "",
  "disk": "",
  "arch": "x86_64",
  "pm_type":"pxe_ipmitool",
  "pm_user":"qe-scale",
  "pm_password":"d0ckingSt4tion",
  "pm_addr":"10.1.8.102"
 },
 ...
 ########################################################################
 # To make the below working os_auth_url must be http://192.0.2.1:5000/v3
 ########################################################################
 auth = v3.Password(auth_url=os_auth_url,
                   username=os_username,
                   password=os_password,
 {% if release in [ 'liberty', 'rhos-8', 'mitaka', 'rhos-9' ] %}
                   tenant_name=os_tenant_name,
 {% else %}
                   project_name=os_tenant_name,
 {% endif %}
                   user_domain_id='default',
                   project_domain_id='default')
--- a/roles/validate-ha/README.md
+++ b/roles/validate-ha/README.md
@ -1,119 +0,0 @@
 validate-ha
 ===========
 This role acts on an already deployed tripleo environment, testing HA related
 functionalities of the installation.
 Requirements
 ------------
 The TripleO environment must be prepared as described [here](https://github.com/openstack/tripleo-ha-utils/tree/master/README.md).
 This role tests also instances spawning and to make this working the
 definition of the floating network must be passed.
 It can be contained in a config file, like this:
    private_network_cidr: "192.168.1.0/24"
    public_physical_network: "floating"
    floating_ip_cidr: "10.0.0.0/24"
    public_net_pool_start: "10.0.0.191"
    public_net_pool_end: "10.0.0.198"
    public_net_gateway: "10.0.0.254"
 Or passed directly to the ansible command line (see examples below).
 HA tests
 --------
 HA tests are meant to check the behavior of the environment in front of
 circumstances that involve service interruption, lost of a node and in general
 actions that stress the OpenStack installation with unexpected failures.
 Each test is associated to a global variable that, if true, makes the test
 happen.
 Tests are grouped and performed by default depending on the OpenStack release.
 This is the list of the supported variables, with test description and name of
 the release on which the test is performed:
 - **test_ha_failed_actions**: Look for failed actions (**all**)
 - **test_ha_master_slave**: Stop master slave resources (galera and redis), all
 the resources should come down (**all**)
 - **test_ha_keystone_constraint_removal**: Stop keystone resource (by stopping
 httpd), check no other resource is stopped (**mitaka**)
 - Next generation cluster checks (**newton**, **ocata**, **master**):
  - **test_ha_ng_a**: Stop every systemd resource, stop Galera and Rabbitmq,
 Start every systemd resource
  - **test_ha_ng_b**: Stop Galera and Rabbitmq, stop every systemd resource,
 Start every systemd resource
  - **test_ha_ng_c**: Stop Galera and Rabbitmq, wait 20 minutes to see if
 something fails
 It is also possible to omit (or add) tests not made for the specific release,
 using the above vars, by passing to the command line variables like this:
    ...
    -e test_ha_failed_actions=false \
    -e test_ha_ng_a=true \
    ...
 In this case we will not check for failed actions, a test that otherwise would
 have been done in mitaka, and we will force the execution of the "ng_a" test
 described earlier, which is originally executed just in newton versions or
 above.
 All tests are performed using the tool [ha-test-suite](https://github.com/openstack/tripleo-ha-utils/tree/master/tools/ha-test-suite).
 Applying latency
 ----------------
 It is possible to add an arbitrary amount of milliseconds of latency on each
 overcloud node to check whether the environment can pass the HA validation in
 any case.
 Adding the latency will be a matter of passing two variables:
 * **latency_ms**: which will be the number of additional milliseconds to be
 added to the interface;
 * **latency_eth_interface**: the physical interface to which the user wants to
 apply the latency, this must be present in all the overcloud nodes;
 So a typical command line in which a user wants to add 20ms of latency on the
 ethernet device eth0 will contain something like this:
    ...
    -e latency_ms=20 \
    -e latency_eth_interface=eth0 \
    ...
 The latency will be applied before the tests execution and remove right after.
 Examples on how to invoke the playbook via ansible
 --------------------------------------------------
 Here's a way to invoke the tests from an *undercloud* machine prepared as
 described [here](https://github.com/openstack/tripleo-ha-utils/tree/master/README.md).
    ansible-playbook /home/stack/tripleo-ha-utils/playbooks/overcloud-validate-ha.yml \
      -e release=ocata \
      -e local_working_dir=/home/stack \
      -e private_net_cidr="192.168.1.0/24" \
      -e public_physical_network="floating" \
      -e floating_ip_cidr="10.0.0.0/24" \
      -e public_net_pool_start="10.0.0.191" \
      -e public_net_pool_end="10.0.0.198" \
      -e public_net_gateway="10.0.0.254"
 Note that the variables above can be declared inside a config.yml file that can
 be passed to the ansible-playbook command like this:
    ansible-playbook -vvvv /home/stack/tripleo-ha-utils/playbooks/overcloud-validate-ha.yml -e @/home/stack/config.yml
 The result will be the same.
 License
 -------
 GPL
 Author Information
 ------------------
 Raoul Scarazzini <rasca@redhat.com>
--- a/roles/validate-ha/defaults/main.yml
+++ b/roles/validate-ha/defaults/main.yml
@ -1,25 +0,0 @@
 ---
 working_dir: "/home/stack"
 validate_ha_logs_dir: "{{ working_dir }}/validate_ha_logs"
 overcloud_working_dir: "/home/heat-admin"
 validate_ha_heat_environment: "validate-ha-heat-environment.yaml.j2"
 validate_ha_heat_template: "validate-ha-heat-template.yaml.j2"
 validate_ha_heat_instance_image_format: "qcow2"
 validate_ha_heat_instance_image_location: "http://download.cirros-cloud.net/0.4.0/cirros-0.4.0-x86_64-disk.img"
 validate_ha_heat_instance_volume_gb: 1
 private_net_name: "private-network"
 private_subnet_name: "private-subnet"
 public_net_name: "public-network"
 public_subnet_name: "public-subnet"
 private_net_cidr: "10.1.1.0/24"
 public_physical_network: "datacentre"
 public_network_type: "flat"
 floating_ip_cidr: "{{ undercloud_network_cidr|default('192.0.2.0/24') }}"
 floating_ip_start: "{{ floating_ip_cidr|nthhost(100) }}"
 floating_ip_end: "{{ floating_ip_cidr|nthhost(120) }}"
 external_network_gateway: "{{ floating_ip_cidr|nthhost(1) }}"
 latency_ms: 0
--- a/roles/validate-ha/tasks/ha-test-suite.yml
+++ b/roles/validate-ha/tasks/ha-test-suite.yml
@ -1,26 +0,0 @@
 ---
 # Execute ha-test-suite test
 - block:
    - name: Testing {{ ha_test_name }} with recovery {{ ha_recovery_name }}"
      delegate_to: "{{ groups.controller[0] }}"
      shell: >
        {{ overcloud_working_dir }}/ha-test-suite/ha-test-suite.sh \
         -t {{ overcloud_working_dir }}/ha-test-suite/test/{{ ha_test_name }} \
         -r {{ overcloud_working_dir }}/ha-test-suite/recovery/{{ ha_recovery_name }}
      register: ha_test_cmd
    - include_tasks: heat-validation-create.yml
    - include_tasks: heat-validation-check.yml
    - include_tasks: heat-validation-delete.yml
  vars:
    stack_name: "stack_{{ ha_test_name }}"
  always:
    - name: Copy stdout for test {{ ha_test_name }} to undercloud
      copy: content="{{ ha_test_cmd.stdout }}" dest="{{ validate_ha_logs_dir }}/{{ ha_test_name }}_stdout.log"
  rescue:
    - name: Copy stderr for test {{ ha_test_name }} to undercloud
      copy: content="{{ ha_test_cmd.stderr }}" dest="{{ validate_ha_logs_dir }}/{{ ha_test_name }}_stderr.log"
    - fail: msg="{{ ha_test_cmd.stderr }}"
--- a/roles/validate-ha/tasks/heat-validation-check.yml
+++ b/roles/validate-ha/tasks/heat-validation-check.yml
@ -1,7 +0,0 @@
 ---
 - name: Wait up to five minutes for the instance to be reachable
  wait_for:
    host: "{{ vars[ stack_name + '_instance_ip'].stdout }}"
    port: 22
    timeout: 300
--- a/roles/validate-ha/tasks/heat-validation-create.yml
+++ b/roles/validate-ha/tasks/heat-validation-create.yml
@ -1,30 +0,0 @@
 ---
 - name: Load image in Glance to be used by Heat
  shell: |
    source {{ working_dir }}/overcloudrc
    openstack image create \
      --disk-format {{ validate_ha_heat_instance_image_format }} \
      --file {{ working_dir }}/{{ heat_image_name }} \
      --format value \
      --column "id" \
      validate_ha_image > \
      {{ validate_ha_logs_dir }}/{{ ha_test_name }}_image-create.log 2>&1
 - name: Execute environment validation via Heat
  shell: |
    source {{ working_dir }}/overcloudrc
    openstack stack create \
      --environment validate-ha-heat-environment.yaml \
      --template validate-ha-heat-template.yaml \
      --wait \
      {{ stack_name }} > \
      {{ validate_ha_logs_dir }}/{{ ha_test_name }}_heat-create.log 2>&1
 - name: Get instance IP
  shell: |
    source {{ working_dir }}/overcloudrc
    openstack stack show -c outputs -f json {{ stack_name }} | \
      jq --raw-output '.outputs[] | select( .output_key == "server_public_ip") | .output_value' 2>&1 | \
      tee {{ validate_ha_logs_dir }}/{{ ha_test_name }}_heat-instance-ip.log
  register: "{{ stack_name }}_instance_ip"
--- a/roles/validate-ha/tasks/heat-validation-delete.yml
+++ b/roles/validate-ha/tasks/heat-validation-delete.yml
@ -1,16 +0,0 @@
 ---
 - name: Clean the created stack
  shell: |
    source {{ working_dir }}/overcloudrc
    openstack stack delete \
      --yes \
      --wait \
      {{ stack_name }} > \
      {{ validate_ha_logs_dir }}/{{ ha_test_name }}_heat-delete.log 2>&1
 - name: Clean image in Glance
  shell: |
    source {{ working_dir }}/overcloudrc
    openstack image delete validate_ha_image > \
    {{ validate_ha_logs_dir }}/{{ ha_test_name }}_image-delete.log 2>&1
--- a/roles/validate-ha/tasks/main.yml
+++ b/roles/validate-ha/tasks/main.yml
@ -1,147 +0,0 @@
 ---
 - name: Include test sequence depending on release
  include_vars:
    dir: "vars"
    files_matching: "test_list_{{ release }}.yml"
 - name: Create directory on the undercloud to store test results
  file: path={{ validate_ha_logs_dir }} state=directory
 - name: Copy ha-test-suite on controllers
  shell: >
    {% if (undercloud_user == 'zuul') and (zuul.projects is defined) -%}
    /usr/bin/rsync --delay-updates -F --compress --archive -e 'ssh -F {{ local_working_dir }}/ssh.config.ansible' /home/{{ undercloud_user }}/src/opendev.org/openstack/tripleo-ha-utils/tools/ha-test-suite {{ hostvars[item]['ansible_hostname'] }}:
    {%- else -%}
    /usr/bin/rsync --delay-updates -F --compress --archive -e 'ssh -F {{ local_working_dir }}/ssh.config.ansible' {{ local_working_dir }}/tripleo-ha-utils/tools/ha-test-suite {{ hostvars[item]['ansible_hostname'] }}:
    {%- endif -%}
  delegate_to: "localhost"
  with_items:
    - "{{ groups['controller'] }}"
 - name: Apply latency (if defined)
  vars:
    latency_action: "add"
  include_tasks: manage-latency.yml
  when: latency_ms|int > 0
 - name: Create the environment template on undercloud
  template:
    src: "{{ validate_ha_heat_environment }}"
    dest: "{{ working_dir }}/validate-ha-heat-environment.yaml"
    mode: 0600
 - name: Create the test template on undercloud
  template:
    src: "{{ validate_ha_heat_template }}"
    dest: "{{ working_dir }}/validate-ha-heat-template.yaml"
    mode: 0600
 - name: Download and uncompress (if necessary) image file for Heat
  shell: |
    image_url="{{ validate_ha_heat_instance_image_location }}"
    image_file=$(basename $image_url)
    curl -s -o $image_file $image_url
    case "$image_file" in
       *.tar)
           image_name=$(tar xvf $image_file)
           ;;
       *.tar.gz|*.tgz)
           image_name=$(tar xzvf $image_file)
           ;;
       *.tar.bz2|*.tbz2)
           image_name=$(tar xjvf $image_file)
           ;;
       *.tar.xz|*.txz)
           image_name=$(tar xJf $image_file)
           ;;
       *.bz2)
           bunzip2 --force --quiet $image_file
           image_name=${image_file%.*};
           ;;
       *.gz)
           gunzip --force --quiet $image_file
           image_name=${image_file%.*};
           ;;
       *.xz)
           xz --force --quiet --decompress $image_file
           image_name=${image_file%.*};
           ;;
       *) image_name=$image_file
          ;;
    esac
    echo $image_name
  register: image_name
 - set_fact:
    heat_image_name: "{{ image_name.stdout }}"
 # Test: failed actions
 - name: HA test - Failed actions
  vars:
    ha_test_name: "test_ha_failed_actions"
    ha_recovery_name: ""
  include_tasks: ha-test-suite.yml
  when: test_ha_failed_actions|bool
 # Test: Master/Slave
 - name: HA test - Master/Slave core resource stop and start
  vars:
    ha_test_name: "test_master-slave"
    ha_recovery_name: "recovery_master-slave"
  include_tasks: ha-test-suite.yml
  when: test_ha_master_slave|bool
 # Test: Keystone stop
 - name: HA test - Keystone stop
  vars:
    ha_test_name: "test_keystone-stop"
    ha_recovery_name: "recovery_keystone-stop"
  include_tasks: ha-test-suite.yml
  when: test_ha_keystone_stop|bool
 # Test: Keystone removal
 - name: HA test - Keystone constraint removal
  vars:
    ha_test_name: "test_keystone-constraint-removal"
    ha_recovery_name: "recovery_keystone-constraint-removal"
  include_tasks: ha-test-suite.yml
  when: test_ha_keystone_constraint_removal|bool
 # Test: NG A
 - name: HA test - Pacemaker light test A
  vars:
    ha_test_name: "test_pacemaker-light-a"
    ha_recovery_name: "recovery_pacemaker-light"
  include_tasks: ha-test-suite.yml
  when: test_ha_ng_a|bool
 # Test: NG B
 - name: HA test - Pacemaker light test B
  vars:
    ha_test_name: "test_pacemaker-light-b"
    ha_recovery_name: "recovery_pacemaker-light"
  include_tasks: ha-test-suite.yml
  when: test_ha_ng_b|bool
 # Test: NG C
 - name: HA test - Pacemaker light test C
  vars:
    ha_test_name: "test_pacemaker-light-c"
    ha_recovery_name: "recovery_pacemaker-light"
  include_tasks: ha-test-suite.yml
  when: test_ha_ng_c|bool
 - name: Remove image file
  file:
     path: "{{ working_dir }}/{{ heat_image_name }}"
     state: absent
 - name: Remove latency (if defined)
  vars:
    latency_action: "del"
  include_tasks: manage-latency.yml
  when: latency_ms|int > 0
--- a/roles/validate-ha/tasks/manage-latency.yml
+++ b/roles/validate-ha/tasks/manage-latency.yml
@ -1,12 +0,0 @@
 # Manage latency on all nodes
 - name: "Manage latency on all nodes"
  shell: |
    /usr/sbin/tc qdisc {{ latency_action }} dev {{ latency_eth_interface }} root netem delay {{ latency_ms }}ms
  delegate_to: "{{ item }}"
  become: true
  with_items:
    - "{{ groups['overcloud'] }}"
  when:
    - latency_action in [ "add", "del" ]
    - latency_eth_interface is defined
    - latency_ms|int > 0
--- a/roles/validate-ha/templates/validate-ha-heat-environment.yaml.j2
+++ b/roles/validate-ha/templates/validate-ha-heat-environment.yaml.j2
@ -1,13 +0,0 @@
 # Heat template parameters
 parameters:
  private_net_name: "{{ private_net_name }}"
  private_subnet_name: "{{ private_subnet_name }}"
  private_net_cidr: "{{ private_net_cidr }}"
  public_net_name: "{{ public_net_name }}"
  public_subnet_name: "{{ public_subnet_name }}"
  public_physical_network: "{{ public_physical_network }}"
  public_network_type: "{{ public_network_type }}"
  public_net_cidr: "{{ floating_ip_cidr }}"
  public_net_gateway: "{{ public_net_gateway }}"
  public_net_pool_start: "{{ public_net_pool_start }}"
  public_net_pool_end: "{{ public_net_pool_end }}"
--- a/roles/validate-ha/templates/validate-ha-heat-template.yaml.j2
+++ b/roles/validate-ha/templates/validate-ha-heat-template.yaml.j2
@ -1,192 +0,0 @@
 heat_template_version: 2016-10-14
 description: spawning a server
 parameters:
  private_net_name:
    type: string
    default: "private"
    description: Name of private network into which servers get deployed
  private_subnet_name:
    type: string
    default: private_subnet
    description: Name of private subnet into which servers get deployed
  private_net_cidr:
    type: string
    description: Private network address (CIDR notation)
  public_physical_network:
    type: string
    default: "datacentre"
    description: Physical network name
  public_network_type:
    type: string
    default: "flat"
    description: Type of the physical network (flat or vlan)
    constraints:
      - allowed_values:
        - vlan
        - flat
  public_net_name:
    type: string
    default: public
    description: Name of public network into which servers get deployed
  public_subnet_name:
    type: string
    default: public_subnet
    description: Name of public subnet into which servers get deployed
  public_net_cidr:
    type: string
    description: Public network address (CIDR notation)
  public_net_gateway:
    type: string
    description: Public network gateway address
  public_net_pool_start:
    type: string
    description: Start of public network IP address allocation pool
  public_net_pool_end:
    type: string
    description: End of public network IP address allocation pool
 resources:
  ###########
  # Network #
  ###########
  private_net:
    type: OS::Neutron::Net
    properties:
      name: { get_param: private_net_name }
  private_subnet:
    type: OS::Neutron::Subnet
    properties:
      name: { get_param: private_subnet_name }
      network_id: { get_resource: private_net }
      cidr: { get_param: private_net_cidr }
  public_net:
    type: OS::Neutron::ProviderNet
    properties:
      name: { get_param: public_net_name }
      router_external: true
      physical_network: { get_param: public_physical_network }
      network_type: { get_param: public_network_type }
  public_subnet:
    type: OS::Neutron::Subnet
    properties:
      name: { get_param: public_subnet_name }
      network_id: { get_resource: public_net }
      cidr: { get_param: public_net_cidr }
      gateway_ip: { get_param: public_net_gateway }
      allocation_pools:
        - start: { get_param: public_net_pool_start }
          end: { get_param: public_net_pool_end }
  router:
    type: OS::Neutron::Router
    properties:
      external_gateway_info:
        network: { get_resource: public_net }
  router_interface:
    type: OS::Neutron::RouterInterface
    properties:
      router_id: { get_resource: router }
      subnet_id: { get_resource: private_subnet }
  public_net_port:
    type: OS::Neutron::Port
    properties:
      network: { get_resource: private_net }
      fixed_ips:
        - subnet: { get_resource: private_subnet }
      security_groups: [{ get_resource: public_security_group }]
  public_floating_ip:
    type: OS::Neutron::FloatingIP
    properties:
      floating_network: { get_resource: public_net }
      port_id: { get_resource: public_net_port }
  public_security_group:
    type: OS::Neutron::SecurityGroup
    properties:
      description: Add security group rules for the multi-tier architecture
      name: pingandssh
      rules:
        - remote_ip_prefix: 0.0.0.0/0
          protocol: tcp
          port_range_min: 22
          port_range_max: 22
        - remote_ip_prefix: 0.0.0.0/0
          protocol: tcp
          port_range_min: 80
          port_range_max: 80
        - remote_ip_prefix: 0.0.0.0/0
          protocol: icmp
  ###########
  # Volume  #
  ###########
  instance_volume:
    type: OS::Cinder::Volume
    properties:
      name: "instance_volume"
      size: {{ validate_ha_heat_instance_volume_gb }}
      image: "validate_ha_image"
  ###########
  # Keypair #
  ###########
  instance_keypair:
    type: OS::Nova::KeyPair
    properties:
      name: "instance_keypair"
      save_private_key: "true"
  ###########
  # Flavor  #
  ###########
  instance_flavor:
    type: OS::Nova::Flavor
    properties:
      name: "instance_flavor"
      ephemeral: 0
      ram: 2048
      disk: 10
      vcpus: 2
  ###########
  # Server  #
  ###########
  instance:
    type: OS::Nova::Server
    properties:
      name: "validate_ha_instance"
      flavor: { get_resource: instance_flavor }
      key_name: { get_resource: instance_keypair }
      networks:
        - port: { get_resource: public_net_port }
      block_device_mapping: [{ device_name: "vda", volume_id : { get_resource : instance_volume }, delete_on_termination : "true" }]
 outputs:
  server_private_ip:
    description: IP address of first web server in private network
    value: { get_attr: [ instance, first_address ] }
  server_public_ip:
    description: Floating IP address of the web server
    value: { get_attr: [ public_floating_ip, floating_ip_address ] }
  public_key:
    description: The public key of the keypair.
    value: { get_attr: [instance_keypair, public_key] }
  private_key:
    description: The private key of the keypair.
    value: { get_attr: [instance_keypair, private_key] }
--- a/roles/validate-ha/vars/test_list_liberty.yml
+++ b/roles/validate-ha/vars/test_list_liberty.yml
@ -1,7 +0,0 @@
 test_ha_failed_actions: true
 test_ha_master_slave: true
 test_ha_keystone_stop: true
 test_ha_keystone_constraint_removal: false
 test_ha_ng_a: false
 test_ha_ng_b: false
 test_ha_ng_c: false
--- a/roles/validate-ha/vars/test_list_master.yml
+++ b/roles/validate-ha/vars/test_list_master.yml
@ -1 +0,0 @@
 test_list_rocky.yml
--- a/roles/validate-ha/vars/test_list_mitaka.yml
+++ b/roles/validate-ha/vars/test_list_mitaka.yml
@ -1,7 +0,0 @@
 test_ha_failed_actions: true
 test_ha_master_slave: true
 test_ha_keystone_stop: false
 test_ha_keystone_constraint_removal: true
 test_ha_ng_a: false
 test_ha_ng_b: false
 test_ha_ng_c: false
--- a/roles/validate-ha/vars/test_list_newton.yml
+++ b/roles/validate-ha/vars/test_list_newton.yml
@ -1,7 +0,0 @@
 test_ha_failed_actions: true
 test_ha_master_slave: true
 test_ha_keystone_stop: false
 test_ha_keystone_constraint_removal: false
 test_ha_ng_a: true
 test_ha_ng_b: true
 test_ha_ng_c: true
--- a/roles/validate-ha/vars/test_list_ocata.yml
+++ b/roles/validate-ha/vars/test_list_ocata.yml
@ -1,7 +0,0 @@
 test_ha_failed_actions: true
 test_ha_master_slave: true
 test_ha_keystone_stop: false
 test_ha_keystone_constraint_removal: false
 test_ha_ng_a: true
 test_ha_ng_b: true
 test_ha_ng_c: true
--- a/roles/validate-ha/vars/test_list_pike.yml
+++ b/roles/validate-ha/vars/test_list_pike.yml
@ -1,7 +0,0 @@
 test_ha_failed_actions: true
 test_ha_master_slave: true
 test_ha_keystone_stop: false
 test_ha_keystone_constraint_removal: false
 test_ha_ng_a: true
 test_ha_ng_b: true
 test_ha_ng_c: true
--- a/roles/validate-ha/vars/test_list_queens.yml
+++ b/roles/validate-ha/vars/test_list_queens.yml
@ -1,7 +0,0 @@
 test_ha_failed_actions: true
 test_ha_master_slave: true
 test_ha_keystone_stop: false
 test_ha_keystone_constraint_removal: false
 test_ha_ng_a: true
 test_ha_ng_b: true
 test_ha_ng_c: true
--- a/roles/validate-ha/vars/test_list_rhos-10.yml
+++ b/roles/validate-ha/vars/test_list_rhos-10.yml
@ -1 +0,0 @@
 test_list_newton.yml
--- a/roles/validate-ha/vars/test_list_rhos-11.yml
+++ b/roles/validate-ha/vars/test_list_rhos-11.yml
@ -1 +0,0 @@
 test_list_ocata.yml
--- a/roles/validate-ha/vars/test_list_rhos-12.yml
+++ b/roles/validate-ha/vars/test_list_rhos-12.yml
@ -1 +0,0 @@
 test_list_pike.yml
--- a/roles/validate-ha/vars/test_list_rhos-13.yml
+++ b/roles/validate-ha/vars/test_list_rhos-13.yml
@ -1 +0,0 @@
 test_list_queens.yml
--- a/roles/validate-ha/vars/test_list_rhos-8.yml
+++ b/roles/validate-ha/vars/test_list_rhos-8.yml
@ -1 +0,0 @@
 test_list_liberty.yml
--- a/roles/validate-ha/vars/test_list_rhos-9.yml
+++ b/roles/validate-ha/vars/test_list_rhos-9.yml
@ -1 +0,0 @@
 test_list_mitaka.yml
--- a/roles/validate-ha/vars/test_list_rocky.yml
+++ b/roles/validate-ha/vars/test_list_rocky.yml
@ -1,7 +0,0 @@
 test_ha_failed_actions: true
 test_ha_master_slave: true
 test_ha_keystone_stop: false
 test_ha_keystone_constraint_removal: false
 test_ha_ng_a: true
 test_ha_ng_b: true
 test_ha_ng_c: true
--- a/setup.cfg
+++ b/setup.cfg
@ -1,38 +0,0 @@
 [metadata]
 name = tripleo-ha-utils
 summary = Give a set of tools to test TripleO HA capabilities
 description_file =
 long_description_content_type = text/markdown
    README.md
 author = Raoul Scarazzini
 author_email = rasca@redhat.com
 home_page = https://github.com/openstack/tripleo-ha-utils/
 classifier =
  License :: OSI Approved :: Apache Software License
  Development Status :: 4 - Beta
  Intended Audience :: Developers
  Intended Audience :: System Administrators
  Intended Audience :: Information Technology
  Topic :: Utilities
 [build_sphinx]
 all_files = 1
 build-dir = doc/build
 source-dir = doc/source
 [global]
 setup-hooks =
    pbr.hooks.setup_hook
 [files]
 data_files =
    config = config/*
    playbooks = playbooks/*
    usr/local/share/ansible/roles = roles/*
 [wheel]
 universal = 1
 [pbr]
 skip_authors = True
 skip_changelog = True
--- a/setup.py
+++ b/setup.py
@ -1,20 +0,0 @@
 #   Copyright Red Hat, Inc. All Rights Reserved.
 #
 #   Licensed under the Apache License, Version 2.0 (the "License"); you may
 #   not use this file except in compliance with the License. You may obtain
 #   a copy of the License at
 #
 #       http://www.apache.org/licenses/LICENSE-2.0
 #
 #   Unless required by applicable law or agreed to in writing, software
 #   distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 #   WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 #   License for the specific language governing permissions and limitations
 #   under the License.
 import setuptools
 setuptools.setup(
    setup_requires=['pbr'],
    py_modules=[],
    pbr=True)
--- a/tools/ha-test-suite/README.md
+++ b/tools/ha-test-suite/README.md
@ -1,145 +0,0 @@
 # OpenStack TripleO HA Test Suite
 This project is a modular and a customizable test suite to be applied in an
 Overcloud OpenStack environment deployed via TripleO upstream or Red Hat
 OpenStack Director (OSPd).
 ## Usage
 The script needs at least a test file (-t) which must contain the sequence of
 the operations to be done.  A recovery file (-r), with the sequence of the
 operations needed to recovery the environment can also be passed. So a typical
 invocation will be something like this:
 ```console
 [heat-admin@overcloud-controller-0 overcloud-ha-test-suite]$ ./overcloud-ha-test-suite.sh -t test/test_keystone-constraint-removal -r recovery/recovery_keystone-constraint-removal
 Fri May 20 15:27:19 UTC 2016 - Populationg overcloud elements...OK
 Fri May 20 15:27:22 UTC 2016 - Test: Stop keystone resource (by stopping httpd), check no other resource  is stopped
 Fri May 20 15:27:22 UTC 2016 * Step 1: disable keystone resource via httpd stop
 Fri May 20 15:27:22 UTC 2016 - Performing action disable on resource httpd ..OK
 Fri May 20 15:27:26 UTC 2016 - List of cluster's failed actions:
 Cluster is OK.
 Fri May 20 15:27:29 UTC 2016 * Step 2: check resource status
 Fri May 20 15:27:29 UTC 2016 - Cycling for 10 minutes polling every minute the status of the resources
 Fri May 20 15:28:29 UTC 2016 - Polling...
 delay -> OK
 galera -> OK
 ...
 ...
 openstack-sahara-engine -> OK
 rabbitmq -> OK
 redis -> OK
 Fri May 20 15:41:00 UTC 2016 - List of cluster's failed actions:
 Cluster is OK.
 Fri May 20 15:41:03 UTC 2016 - Waiting 10 seconds to recover environment
 Fri May 20 15:41:13 UTC 2016 - Recovery: Enable keystone via httpd and check for failed actions
 Fri May 20 15:41:13 UTC 2016 * Step 1: enable keystone resource via httpd
 Fri May 20 15:41:13 UTC 2016 - Performing action enable on resource httpd-clone OK
 Fri May 20 15:41:15 UTC 2016 - List of cluster's failed actions:
 Cluster is OK.
 Fri May 20 15:41:17 UTC 2016 - End
 ```
 The exit status will depend on the result of the operations. If a disable
 operation fails, if failed actions will appear, if recovery does not ends with
 success exit status will not be 0.
 ## Test and recoveries
 Test and recovery are bash script portions that are
 included inside the main script. Some functions and variables are available to
 help on recurring operations.  These functions are listed here:
 - **check_failed_actions**: will print failed actions and return error in case
  some of them are present;
 - **check_resources_process_status**: will check for the process status of the
  resources on the system (not in the cluster), i.e. will check if there is a
  process for mysql daemon;
 - **wait_resource_status**: will wail until a default timeout
  ($RESOURCE_CHANGE_STATUS_TIMEOUT) for a resource to reach a status;
 - **check_resource_status**: will check a resource status, i.e. if you want to
  check if httpd resource is started;
 - **wait_cluster_start**: will wait the until a timeout
  ($RESOURCE_CHANGE_STATUS_TIMEOUT) to be started, specifically will wait for
  all resources to be in state "Started";
 - **play_on_resources**: will set the status of a resource;
 The variables are:
 - **OVERCLOUD_CORE_RESOURCES**: which are galera and rabbitmq
 - **OVERCLOUD_RESOURCES**: which are *all* the resources
 - **OVERCLOUD_SYSTEMD_RESOURCES**: which are the resources managed via systemd
  by pacemaker;
 And can be used in combination to wrote test and recovery files.
 ### Test file contents
 A typical test file, say test/test_keystone-constraint-removal, will contain
 something like this:
 ```bash
 # Test: Stop keystone resource (by stopping httpd), check no other resource is stopped
 echo "$(date) * Step 1: disable keystone resource via httpd stop"
 play_on_resources "disable" "httpd"
 echo "$(date) - List of cluster's failed actions:"
 check_failed_actions
 echo "$(date) * Step 2: check resource status"
 # Define resource list without httpd
 OVERCLOUD_RESOURCES_NO_KEYSTONE="$(echo $OVERCLOUD_RESOURCES | sed 's/httpd/ /g')"
 # Define number of minutes to look for status
 MINUTES=10
 # Cycling for $MINUTES minutes polling every minute the status of the resources
 echo "$(date) - Cycling for 10 minutes polling every minute the status of the resources"
 i=0
 while [ $i -lt $MINUTES ]
 do
  # Wait a minute
  sleep 60
  echo "$(date) - Polling..."
  for resource in $OVERCLOUD_RESOURCES_NO_KEYSTONE
   do
    echo -n "$resource -> "
    check_resource_status "$resource" "Started"
    [ $? -eq 0 ] && echo "OK" || (FAILURES=1; echo "Error!")
   done
  let "i++"
 done
 echo "$(date) - List of cluster's failed actions:"
 check_failed_actions
 ```
 Code is commented and should be self explaining, but in short:
 - the first commented line, after "# Test: " is read as test title;
 - using play_on_resources it disables httpd resource;
 - it checks for failed actions;
 - it defines a list of variable named OVERCLOUD_RESOURCES_NO_KEYSTONE containing
  all the variable but httpd;
 - it cycles for 10 minutes, polling every minute the status of all the
  resources;
 If any of these steps for some reason fails, then the overall test will be
 considered failed and the exit status will not be 0.
 ### Recovery file contents
 A typical recovery file, say recovery/recovery_keystone-constraint-removal,
 will contain something like this:
 ```bash
 # Recovery: Enable keystone via httpd and check for failed actions
 echo "$(date) * Step 1: enable keystone resource via httpd"
 play_on_resources "enable" "httpd-clone"
 echo "$(date) - List of cluster's failed actions:" check_failed_actions
 ```
 Again:
 - the first commented line, after "# Recovery: " is read as recovery title;
 - using play_on_resources it enables httpd resource;
 - it checks for failed actions;
--- a/tools/ha-test-suite/ha-test-suite.sh
+++ b/tools/ha-test-suite/ha-test-suite.sh
@ -1,80 +0,0 @@
 #!/bin/bash
 # Raoul Scarazzini (rasca@redhat.com)
 # This script provides a testing suite for TripleO HA environments
 # Define main workdir
 WORKDIR=$(dirname $0)
 # Source function library.
 . $WORKDIR/include/functions
 # Fixed parameters
 # How much time wait in seconds for a resource to change status (i.e. from started to stopped)
 RESOURCE_CHANGE_STATUS_TIMEOUT=600
 # How much time wait in seconds before starting recovery
 DEFAULT_RECOVERY_WAIT_TIME=10
 # Command line parameters
 if [ $# -gt 0 ]
 then
  while :; do
   case $1 in
    -h|-\?|--help)
        usage
        exit
        ;;
    -t|--test)
        test_sequence="$2"
        shift
        ;;
    -r|--recover)
        recovery_sequence="$2"
        shift
        ;;
    --)
        shift
        break
        ;;
    -?*)
        usage
        exit 1
        ;;
    *)
        break
   esac
   shift
  done
 else
  usage
  exit 1
 fi
 # Populating overcloud elements
 echo -n "$(date) - Populationg overcloud elements..."
 OVERCLOUD_CORE_RESOURCES="galera redis rabbitmq"
 OVERCLOUD_RESOURCES=$(sudo pcs resource show | egrep '^ (C|[a-Z])' | sed 's/.* \[\(.*\)\]/\1/g' | sed 's/ \(.*\)(.*):.*/\1/g' | sort)
 OVERCLOUD_SYSTEMD_RESOURCES=$(sudo pcs config show | egrep "Resource:.*systemd"|grep -v "haproxy"|awk '{print $2}')
 echo "OK"
 if [ -f "$test_sequence" ]
 then
  echo "$(date) - Test: $(grep '^#.*Test:' $test_sequence | sed 's/^#.*Test: //')"
  . $test_sequence
 else
  echo "No test file passed or unable to read test file."
 fi
 if [ -f "$recovery_sequence" ]
 then
  echo "$(date) - Waiting $DEFAULT_RECOVERY_WAIT_TIME seconds to recover environment"
  sleep $DEFAULT_RECOVERY_WAIT_TIME
  echo "$(date) - Recovery: $(grep '^#.*Recovery:' $recovery_sequence | sed 's/^#.*Recovery: //')"
  . $recovery_sequence
 else
  echo "No recovery file passed or unable to read recovery file."
 fi
 echo "$(date) - End"
--- a/tools/ha-test-suite/include/functions
+++ b/tools/ha-test-suite/include/functions
@ -1,151 +0,0 @@
 # Raoul Scarazzini (rasca@redhat.com)
 # This script provides a testing suite from TripleO/Directory OpenStack HA (so
 # with Pacemaker) environments functions to be used inside TripleO/Director
 # OpenStack HA environments
 function usage {
  echo "Usage $0 -t <testfile> [-r <recover file>] [-u]
 -t, --test <testfile>		Specify which file contains the test to run
 -r, --recover <recoverfile>	Specify which file (if any) should be used for recovery
 -u, --undercloud		Test will be performed on undercloud
 "
 }
 function check_failed_actions {
 resource=$1
 sudo pcs status | grep "Failed Actions:" &> /dev/null
 if [ $? -eq 0 ]
  then
   if [ "x$resource" == "x" ]
    then
     echo "Cluster has failed actions:"
     sudo pcs status | sed -n -e '/Failed Actions:/,/^$/p' | egrep 'OCF_|not running|unknown' | awk '{print $2}' | cut -f1 -d_ | sort |uniq
     exit 1
    else
     errors=$(sudo pcs status | sed -n -e '/Failed Actions:/,/^$/p' | grep -A1 $resource)
     if [ $? -eq 0 ]
      then
       echo "Resource $resource has failed actions:"
       echo $errors
       exit 1
      else
       echo "No failed actions for $resource."
       return 0
     fi
   fi
  else
   [ "x$resource" == "x" ] && echo "Cluster is OK." || echo "No failed actions for $resource."
   return 0
  fi
 }
 function check_resources_process_status {
 for resource in $OVERCLOUD_RESOURCES
  do
   echo -n "$resource -> "
   case $resource in
   ip-*) #ip_addr=$(pcs resource show $resource | grep Attributes | sed 's/.*ip=\(.*\) cidr.*/\1/g')
        ip_addr=$(echo $resource | sed 's/ip-//g')
        sudo ip a s | grep $ip_addr &> /dev/null
        ;;
   rabbitmq) sudo /usr/sbin/rabbitmqctl cluster_status &> /dev/null
             ;;
   redis) pidof /usr/bin/redis-server &> /dev/null
          ;;
   galera) pidof /usr/libexec/mysqld &> /dev/null
           ;;
   *cleanup*|delay) echo  -n "no need to check if it's "
                  ;;
   *) systemctl is-active $resource &> /dev/null
      ;;
   esac
   [ $? -eq 0 ] && echo "active" || echo "inactive"
  done
 }
 function wait_resource_status {
 resource=$1
 status=$2
 i=1
 while [ $i -lt $RESOURCE_CHANGE_STATUS_TIMEOUT ]
  do
   output=$(sudo pcs status resources | sed -n -e "/\(Clone\|Master\/Slave\) Set: .*\[$resource\]/,/^ [a-Z]/p" | head -n -1 | tail -n +2 | egrep -v "$status\:")
   if [ "x$output" == "x" ]
    then
     return 0
     break
    else
     echo -n "."
     sleep 1
     let "i++"
   fi
  done
 check_failed_actions
 exit 1
 }
 function check_resource_status {
 resource=$1
 status=$2
 output=$(sudo pcs status resources | sed -n -e "/\(Clone\|Master\/Slave\) Set: .*\[$resource\]/,/^ [a-Z]/p" | head -n -1 | tail -n +2 | egrep -v "$status\:")
 # Since we are checking a specific status, if we have output from above it
 # means that for some reason the resource is not in the state we are expecting
 [ "x$output" == "x" ] && return 0 || (check_failed_actions; exit 1)
 }
 function wait_cluster_start {
 i=1
 while true; do
  [ $i -eq $RESOURCE_CHANGE_STATUS_TIMEOUT ] && break
  # Check for failed actions
  sudo pcs status | egrep "Failed" &> /dev/null
  [ $? -eq 0 ] && break
  # If we have stopped resources let's wait
  sudo pcs status | egrep "Stopped" &> /dev/null
  if [ $? -eq 0 ]
   then
    echo -n "."
   else
    echo "All cluster resources are started."
    return 0
    break
  fi
  sleep 1
  let "i++"
 done
 # If we are here than we have problems: we hit timeout or we still have
 # stopped resources
 echo "Problems found. There are stopped or failed resources!"
 check_failed_actions
 exit 1
 }
 function play_on_resources {
 action=$1
 resources=$2
 for resource in $resources
  do
   echo -n "$(date) - Performing action $action on resource $resource "
   # Do the action on the resource
   sudo pcs resource $action $resource --wait=$RESOURCE_CHANGE_STATUS_TIMEOUT
   if [ $? -ne 0 ]
    then
     echo "FAILURE!"
     check_failed_actions $resource
     exit 1
    else
     echo "OK"
   fi
  done
  return 0
 }
--- a/tools/ha-test-suite/recovery/recovery_entire-cluster
+++ b/tools/ha-test-suite/recovery/recovery_entire-cluster
@ -1,13 +0,0 @@
 # Recovery: Enable all systemd and core resources, cleanup failed actions
 echo "$(date) * Step 1: enable all the cluster resources"
 play_on_resources "enable" "$OVERCLOUD_RESOURCES"
 echo "$(date) * Step 2: Cleaning up failed resources"
 sudo pcs status | sed -n -e '/Failed Actions:/,/^$/p' | egrep 'OCF_TIMEOUT|not running' | awk '{print $2}' | cut -f1 -d_ | sort | uniq | while read RES; do echo "Cleaning $RES"; sudo pcs resource cleanup $RES; done
 echo "$(date) * Step 3: Waiting all resources to start"
 wait_cluster_start
 echo "$(date) - List of cluster's failed actions:"
 check_failed_actions
--- a/tools/ha-test-suite/recovery/recovery_keystone-constraint-removal
+++ b/tools/ha-test-suite/recovery/recovery_keystone-constraint-removal
@ -1,7 +0,0 @@
 # Recovery: Enable keystone via httpd and check for failed actions
 echo "$(date) * Step 1: enable keystone resource via httpd"
 play_on_resources "enable" "httpd-clone"
 echo "$(date) - List of cluster's failed actions:"
 check_failed_actions
--- a/tools/ha-test-suite/recovery/recovery_keystone-stop
+++ b/tools/ha-test-suite/recovery/recovery_keystone-stop
@ -1,10 +0,0 @@
 # Recovery: Enable openstack-keystone and check for failed actions
 echo "$(date) * Step 1: enable openstack-keystone resource"
 play_on_resources "enable" "openstack-keystone-clone"
 echo "$(date) - Checking for Stopped resources:"
 wait_cluster_start
 echo "$(date) - List of cluster's failed actions:"
 check_failed_actions
--- a/tools/ha-test-suite/recovery/recovery_master-slave
+++ b/tools/ha-test-suite/recovery/recovery_master-slave
@ -1,7 +0,0 @@
 # Recovery: Enable master slave resources (galera and redis), all the resources should come up
 echo "$(date) * Step 1: enable galera, redis and rabbitmq"
 play_on_resources "enable" "$OVERCLOUD_CORE_RESOURCES"
 echo "$(date) - List of cluster's failed actions:"
 check_failed_actions
--- a/tools/ha-test-suite/recovery/recovery_mongo
+++ b/tools/ha-test-suite/recovery/recovery_mongo
@ -1,7 +0,0 @@
 # Recovery: Enable mongo and check for failed actions
 echo "$(date) * Step 1: enable mongo"
 play_on_resources "enable" "mongo"
 echo "$(date) - List of cluster's failed actions:"
 check_failed_actions
--- a/tools/ha-test-suite/recovery/recovery_pacemaker-light
+++ b/tools/ha-test-suite/recovery/recovery_pacemaker-light
@ -1,13 +0,0 @@
 # Recovery: Enable all systemd and core resources, cleanup failed actions
 echo "$(date) * Step 1: enable core resources"
 play_on_resources "enable" "$OVERCLOUD_CORE_RESOURCES"
 echo "$(date) * Step 2: enable all the systemd resources"
 play_on_resources "enable" "$OVERCLOUD_SYSTEMD_RESOURCES"
 echo "$(date) * Step 3: Waiting all resources to start"
 wait_cluster_start
 echo "$(date) - List of cluster's failed actions:"
 check_failed_actions
--- a/tools/ha-test-suite/recovery/recovery_processes-after-cluster-stop
+++ b/tools/ha-test-suite/recovery/recovery_processes-after-cluster-stop
@ -1,10 +0,0 @@
 # Recovery: Start cluster again
 echo "$(date) * Step 1: start the cluster"
 sudo pcs cluster start --all
 echo "$(date) * Step 2: Waiting all resources to start"
 wait_cluster_start
 echo "$(date) - List of cluster's failed actions:"
 check_failed_actions
--- a/tools/ha-test-suite/test/test_check-failed-actions
+++ b/tools/ha-test-suite/test/test_check-failed-actions
@ -1,3 +0,0 @@
 # Test: Wait cluster start and look for failed actions
 echo "$(date) - Waiting for cluster start and checking for failed resources:"
 wait_cluster_start
--- a/tools/ha-test-suite/test/test_keystone-constraint-removal
+++ b/tools/ha-test-suite/test/test_keystone-constraint-removal
@ -1,40 +0,0 @@
 # Test: Stop keystone resource (by stopping httpd), check no other resource is stopped
 echo "$(date) * Step 1: disable keystone resource via httpd stop"
 play_on_resources "disable" "httpd"
 echo "$(date) - List of cluster's failed actions:"
 check_failed_actions
 echo "$(date) * Step 2: check resource status"
 # Define resource list without httpd
 OVERCLOUD_RESOURCES_NO_KEYSTONE="$(echo $OVERCLOUD_RESOURCES | sed 's/httpd/ /g')"
 # Define number of minutes to look for status
 MINUTES=10
 # Cycling for $MINUTES minutes polling every minute the status of the resources
 echo "$(date) - Cycling for 10 minutes polling every minute the status of the resources"
 i=0
 while [ $i -lt $MINUTES ]
 do
  # Wait a minute
  sleep 60
  echo "$(date) - Polling..."
  for resource in $OVERCLOUD_RESOURCES_NO_KEYSTONE
   do
    echo -n "$resource -> "
    # If the resource is a multi state like galera or redis, do a different check
    case $resource in
    "galera") check_resource_status "$resource" "Masters"
              ;;
    "redis") check_resource_status "$resource" "(Masters|Slaves)"
             ;;
    *) check_resource_status "$resource" "Started"
       ;;
    esac
    [ $? -eq 0 ] && echo "OK" || (FAILURES=1; echo "Error!"; break)
   done
  let "i++"
 done
 echo "$(date) - List of cluster's failed actions:"
 check_failed_actions
--- a/tools/ha-test-suite/test/test_keystone-stop
+++ b/tools/ha-test-suite/test/test_keystone-stop
@ -1,7 +0,0 @@
 # Test: Stop openstack-keystone and look for failed actions
 echo "$(date) * Step 1: disable openstack-keystone resource"
 play_on_resources "disable" "openstack-keystone-clone"
 echo "$(date) - List of cluster's failed actions:"
 check_failed_actions
--- a/tools/ha-test-suite/test/test_master-slave
+++ b/tools/ha-test-suite/test/test_master-slave
@ -1,7 +0,0 @@
 # Test: Stop master slave resources (galera and redis), all the resources should come down
 echo "$(date) * Step 1: disable galera, redis and rabbitmq"
 play_on_resources "disable" "$OVERCLOUD_CORE_RESOURCES"
 echo "$(date) - List of cluster's failed actions:"
 check_failed_actions
--- a/tools/ha-test-suite/test/test_mongo-with-aodh-ceilometer
+++ b/tools/ha-test-suite/test/test_mongo-with-aodh-ceilometer
@ -1,43 +0,0 @@
 # Test: Stop mongo resource, check related systemd resources are fine
 echo "$(date) * Step 1: disable mongo"
 play_on_resources "disable" "mongo"
 echo "$(date) - List of cluster's failed actions:"
 check_failed_actions
 echo "$(date) * Step 2: check resource status"
 # Define related resources
 OVERCLOUD_RESOURCES="openstack-aodh-evaluator openstack-aodh-listener openstack-aodh-notifier openstack-ceilometer-central.service openstack-ceilometer-collector.service openstack-ceilometer-notification.service"
 # Define number of minutes to look for status
 MINUTES=10
 # Cycling for $MINUTES minutes polling every minute the status of the resources
 echo "$(date) - Cycling for 10 minutes polling every minute the status of the resources"
 i=0
 while [ $i -lt $MINUTES ]
 do
  # Wait a minute
  sleep 60
  echo "$(date) - Polling..."
  for resource in $OVERCLOUD_RESOURCES
   do
    echo -n "$resource -> "
    # Check if the resource is active for the system
    systemctl is-active $resource
    if [ $? -ne 0 ]
     then
      # Show status of the resource
      echo "Error! Resource $resource is not active anymore."
      systemctl status $resource
      # Check in any case cluster's failed actions
      echo "$(date) - List of cluster's failed actions:"
      check_failed_actions
      # Now exit with an error
      exit 1
    fi
   done
  let "i++"
 done
 # If we are here, test was successful
 echo "$(date) - Test was successful"
--- a/tools/ha-test-suite/test/test_pacemaker-light-a
+++ b/tools/ha-test-suite/test/test_pacemaker-light-a
@ -1,19 +0,0 @@
 # Test: Stop every systemd resource, stop Galera and Rabbitmq, Start every systemd resource
 echo "$(date) * Step 1: disable all the systemd resources"
 play_on_resources "disable" "$OVERCLOUD_SYSTEMD_RESOURCES"
 echo "$(date) - List of cluster's failed actions:"
 check_failed_actions
 echo "$(date) * Step 2: disable core services"
 play_on_resources "disable" "$OVERCLOUD_CORE_RESOURCES"
 echo "$(date) - List of cluster's failed actions:"
 check_failed_actions
 echo "$(date) * Step 3: enable each resource one by one and check the status"
 play_on_resources "enable" "$OVERCLOUD_SYSTEMD_RESOURCES"
 echo "$(date) - List of cluster's failed actions:"
 check_failed_actions
--- a/tools/ha-test-suite/test/test_pacemaker-light-b
+++ b/tools/ha-test-suite/test/test_pacemaker-light-b
@ -1,19 +0,0 @@
 # Test: Stop Galera and Rabbitmq, stop every systemd resource, Start every systemd resource
 echo "$(date) * Step 1: disable core services"
 play_on_resources "disable" "$OVERCLOUD_CORE_RESOURCES"
 echo "$(date) - List of cluster's failed actions:"
 check_failed_actions
 echo "$(date) * Step 2: disable all the systemd resources"
 play_on_resources "disable" "$OVERCLOUD_SYSTEMD_RESOURCES"
 echo "$(date) - List of cluster's failed actions:"
 check_failed_actions
 echo "$(date) * Step 3: enable all the systemd resources"
 play_on_resources "enable" "$OVERCLOUD_SYSTEMD_RESOURCES"
 echo "$(date) - List of cluster's failed actions:"
 check_failed_actions
--- a/tools/ha-test-suite/test/test_pacemaker-light-c
+++ b/tools/ha-test-suite/test/test_pacemaker-light-c
@ -1,22 +0,0 @@
 # Test: Stop Galera and Rabbitmq, wait 20 minutes to see if something fails
 echo "$(date) * Step 1: disable core services"
 play_on_resources "disable" "$OVERCLOUD_CORE_RESOURCES"
 echo "$(date) - List of cluster's failed actions:"
 check_failed_actions
 echo "$(date) * Step 2: poll every minute for twenty minutes the status of the resources"
 for i in $(seq 1 20)
 do
  check_failed_actions
  if [ $? -ne 0 ]
   then
    echo "Errors found, test is over."
    break
  fi
  sleep 60
 done
 echo "$(date) - List of cluster's failed actions:"
 check_failed_actions
--- a/tools/ha-test-suite/test/test_processes-after-cluster-stop
+++ b/tools/ha-test-suite/test/test_processes-after-cluster-stop
@ -1,10 +0,0 @@
 # Test: Check active processes after cluster stop
 echo "$(date) * Step 1: checking actual process status"
 check_resources_process_status
 echo "$(date) * Step 2: stopping cluster"
 sudo pcs cluster stop --all
 echo "$(date) * Step 3: checking actual process status"
 check_resources_process_status