openstack-ansible/playbooks/healthcheck-infrastructure.yml
Dmitriy Rabotyagov bc5428b21d Remove usage of rsyslog roles
We've switched all services to store logs to journald by default and
rsyslog roles are not used except really small amount of usecases that
also hardly valid as of today. With that we deprecate repos and remove
their usega to reduce maintenance load.

Change-Id: Iefd4143f83f4df44b917180000a1aa57161b2811
2022-10-19 15:10:59 +02:00

365 lines
12 KiB
YAML

---
# Copyright 2017, Rackspace US, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# This playbook is meant to run after setup-infrastructure, and expects
# the infrastructure bits to have properly deployed to succeed.
# Test unbound-install.yml
# TO BE IMPLEMENTED
# Test repo-install.yml
- name: Ensure all repo-servers are built and are accessible by hosts.
hosts: all_containers[0]:physical_hosts[0]
gather_facts: yes
vars:
repo_requirements_file: "constraints/upper_constraints_cached.txt"
tasks:
- name: Check the repo sync file on each repo server
uri:
url: "http://{{ hostvars[item]['container_address'] }}:{{ repo_server_port }}/{{ repo_requirements_file }}"
with_inventory_hostnames: "{{ groups['repo_all'] }}"
when: install_method == 'source'
tags:
- healthcheck
- healthcheck-repo-install
# Test haproxy-install.yml
- name: Ensuring haproxy runs
hosts: haproxy
gather_facts: yes
tasks:
- name: Check if host can connect to external keepalived ping IP
command: "ping -c 2 {{ keepalived_external_ping_address }}"
changed_when: false
- name: Check if host can connect to internal keepalived ping IP
command: "ping -c 2 {{ keepalived_internal_ping_address }}"
changed_when: false
- name: Checking if keepalived is running
command: "pgrep keepalived"
changed_when: false
when: groups['haproxy'] | length > 1
- package:
name: "{% if ansible_facts['os_family'] | lower == 'redhat' %}nmap-ncat{% else %}netcat-openbsd{% endif %}"
state: present
# Fails if HAProxy is not running
- name: Recording haproxy stats as a way to ensure haproxy runs
shell: 'echo "show info;show stat" | nc -U /var/run/haproxy.stat'
changed_when: false
register: haproxy_stats
# Run this playbook with -v and you'll see your DOWN issues
- name: Printing the output of haproxy stats
debug:
var: haproxy_stats
verbosity: 1
tags:
- healthcheck
- healthcheck-haproxy-install
# Test repo-use.yml
- name: Ensure all the containers can connect to the repos
hosts: all_containers
gather_facts: yes
# By having serial, you ensure that the first three containers are hitting
# the load balancer at the same time, which would then cause hitting three
# different repos servers.
# When this is done, the rest can be done with all the nodes at the same time.
serial:
- 3
- 100%
vars_files:
- defaults/source_install.yml
tasks:
# Repo release path points to the internal LB vip
- name: Check the presence of upper constraints on your repos and check load balancing
uri:
url: "{{ openstack_repo_url }}/constraints/upper_constraints_cached.txt"
tags:
- healthcheck
- healthcheck-repo-use
# Test utility-install.yml
- name: Ensure the service setup host is ready to run openstack calls
hosts: "{{ openstack_service_setup_host | default('localhost') }}"
gather_facts: no
vars_files:
- "defaults/{{ install_method }}_install.yml"
vars:
ansible_python_interpreter: "{{ openstack_service_setup_host_python_interpreter | default(ansible_facts['python']['executable']) }}"
tasks:
- name: Get openstack client config
openstack.cloud.config:
- name: Show openstack client config
debug:
var: openstack.clouds
verbosity: 1
tags:
- healthcheck
- healthcheck-utility-install
# Test memcached-install.yml
- name: Check memcached for keystone
hosts: keystone_all
gather_facts: no
tasks:
- name: Set facts about memcached
setup:
delegate_to: "{{ item }}"
delegate_facts: true
with_items: "{{ groups['memcached'] }}"
- package:
name: "{% if ansible_facts['os_family'] | lower == 'redhat' %}nmap-ncat{% else %}netcat-openbsd{% endif %}"
state: present
- name: Connect to remote memcache servers (full mesh testing)
shell: "echo stats | nc -w 3 {{ hostvars[memcached_host]['container_address'] }} {{ memcached_port }}"
changed_when: false
register: memcache_stats
with_items: "{{ groups['memcached'] }}"
loop_control:
loop_var: memcached_host
- name: Output memcache stats if in verbose mode
debug:
var: memcache_stats
verbosity: 1
tags:
- healthcheck
- healthcheck-memcached-install
# Test galera-install.yml
- name: Sanity checks for all containers
hosts: all_containers:physical_hosts
gather_facts: no
tasks:
- name: Connect to galera port
wait_for:
port: 3306
host: "{{ internal_lb_vip_address }}"
state: started
tags:
- healthcheck
- healthcheck-galera-install
- name: Run functional tests
hosts: galera_all
user: root
gather_facts: true
tasks:
- name: Wait for cluster to be ready
block:
- name: Wait for cluster ready state
command: |
mysql -h {{ ansible_host }} \
-u "{{ galera_root_user | default('root') }}" \
-p"{{ galera_root_password }}" \
-e "show status like 'wsrep_incoming_addresses';" \
--silent \
--skip-column-names
register: mysql_instance_ready
retries: 20
delay: 5
changed_when: false
until: mysql_instance_ready is success and mysql_instance_ready.stdout.split()[-1].split(',') | length == groups['galera_all'] | length
rescue:
- name: Restarting weird maria instance
service:
name: mariadb
state: restarted
- name: Wait for cluster ready state
command: |
mysql -h {{ ansible_host }} \
-u "{{ galera_root_user | default('root') }}" \
-p"{{ galera_root_password }}" \
-e "show status like 'wsrep_incoming_addresses';" \
--silent \
--skip-column-names
register: mysql_instance_ready
retries: 20
delay: 5
changed_when: false
until: mysql_instance_ready is success and mysql_instance_ready.stdout.split()[-1].split(',') | length == groups['galera_all'] | length
- name: Check cluster local state
command: |
mysql -h {{ ansible_host }} \
-u "{{ galera_root_user | default('root') }}" \
-p"{{ galera_root_password }}" \
-e "show status like 'wsrep_local_state_comment';" \
--silent \
--skip-column-names
register: wsrep_local_state_comment
changed_when: false
tags:
- skip_ansible_lint
- name: Check cluster evs state
command: |
mysql -h {{ ansible_host }} \
-u "{{ galera_root_user | default('root') }}" \
-p"{{ galera_root_password }}" \
-e "show status like 'wsrep_evs_state';" \
--silent \
--skip-column-names
register: wsrep_evs_state
changed_when: false
tags:
- skip_ansible_lint
- name: Check contents
assert:
that:
- "'Synced' in wsrep_local_state_comment.stdout"
- "'OPERATIONAL' in wsrep_evs_state.stdout"
- name: Create DB for service on "{{ groups['galera_all'][0] }}"
community.mysql.mysql_db:
login_user: "{{ galera_root_user | default('root') }}"
login_password: "{{ galera_root_password }}"
login_host: "{{ ansible_host }}"
name: "OSA-test"
state: "present"
when: inventory_hostname == groups['galera_all'][0]
tags:
- skip_ansible_lint
- name: Grant access to the DB on "{{ groups['galera_all'][-1] }}"
community.mysql.mysql_user:
login_user: "{{ galera_root_user | default('root') }}"
login_password: "{{ galera_root_password }}" # noqa no-log-password
login_host: "{{ ansible_host }}"
name: "osa-tester"
password: "tester-secrete" # noqa no-log-password
host: "{{ item }}"
state: "present"
priv: "OSA-test.*:ALL"
with_items:
- "localhost"
- "%"
when: inventory_hostname == groups['galera_all'][-1]
- name: Try to login with user to DB
delegate_to: "{{ groups['utility_all'][0] }}"
command: |
mysql -h {{ internal_lb_vip_address }} \
-p"tester-secrete" \
-u osa-tester \
OSA-test \
-e "SHOW TABLES;"
when: inventory_hostname == groups['galera_all'][-1]
- name: Remove created user
community.mysql.mysql_user:
login_user: "{{ galera_root_user | default('root') }}"
login_password: "{{ galera_root_password }}" # noqa no-log-password
login_host: "{{ ansible_host }}"
name: "osa-tester"
state: "absent"
host: "{{ item }}"
with_items:
- "localhost"
- "%"
when: inventory_hostname == groups['galera_all'][-1]
- name: Remove created DB
community.mysql.mysql_db:
login_user: "{{ galera_root_user | default('root') }}"
login_password: "{{ galera_root_password }}" # noqa no-log-password
login_host: "{{ ansible_host }}"
name: "OSA-test"
state: "absent"
when: inventory_hostname == groups['galera_all'][0]
tags:
- skip_ansible_lint
# Test rabbitmq-install.yml
- name: Add a user for rabbitmq
hosts: rabbitmq_all[0]
gather_facts: no
tasks:
- name: Configure Rabbitmq vhost
community.rabbitmq.rabbitmq_vhost:
name: "/testvhost"
state: "present"
- name: Configure Rabbitmq user
community.rabbitmq.rabbitmq_user:
user: "testguest"
password: "secrete" # noqa no-log-password
vhost: "/testvhost"
configure_priv: ".*"
read_priv: ".*"
write_priv: ".*"
state: "present"
tags:
- healthcheck
- healthcheck-rabbitmq-install
- name: Ensure all the usual openstack containers can connect to rabbit
hosts: all_containers:!etcd_all:!galera_all:!memcached:!haproxy:!rabbitmq_all:!unbound:!repo_all
gather_facts: no
vars:
venv_path: /tmp/rabbitmqtest
vars_files:
- "defaults/{{ install_method }}_install.yml"
post_tasks:
- name: Generate venv for rabbitmq testing
include_role:
name: "python_venv_build"
vars:
venv_install_destination_path: "{{ venv_path }}"
venv_pip_packages:
- pika
- name: Copying test script
copy:
src: "../scripts/rabbitmq-test.py"
dest: "{{ venv_path }}/rabbitmq-test.py"
mode: 0755
- name: Connect to rabbitmq
command: "{{ venv_path }}/bin/python {{ venv_path }}/rabbitmq-test.py {{ hostvars[groups['rabbitmq_all'][0]]['container_address'] }}"
changed_when: false
tags:
- healthcheck
- healthcheck-rabbitmq-install
- name: Remove guest user for rabbitmq
hosts: rabbitmq_all[0]
gather_facts: no
tasks:
- name: Remove test user
community.rabbitmq.rabbitmq_user:
user: testguest
password: secrete
vhost: "/testvhost"
state: absent
no_log: true
- name: Remove test vhost
community.rabbitmq.rabbitmq_vhost:
name: "/testvhost"
state: "absent"
tags:
- healthcheck
- healthcheck-rabbitmq-install
- healthcheck-teardown
# TODO: Other playbook's tests.