Merge "Add more systematic healthchecks"

This commit is contained in:
Zuul 2018-08-06 12:44:51 +00:00 committed by Gerrit Code Review
commit 49f96c7b60
2 changed files with 118 additions and 94 deletions

View File

@ -16,22 +16,28 @@
# This playbook is meant to run after setup-hosts. # This playbook is meant to run after setup-hosts.
# To succeed, it expects the setup-hosts playbook to have run successfuly. # To succeed, it expects the setup-hosts playbook to have run successfuly.
# Ensuring the openstack hosts are well setup # Test if the openstack-hosts-setup play was a success.
# Ensure the containers are properly setup # TO BE IMPLEMENTED
# Test if security-hardening was a success.
# TO BE IMPLEMENTED
# Test if containers-deploy was a success.
# Ensure the lxc containers are properly setup
- name: Ensuring hosts good behavior - name: Ensuring hosts good behavior
hosts: hosts hosts: lxc_hosts
gather_facts: no gather_facts: no
tasks: tasks:
- name: Check the right role code was used
debug:
msg: "To be implemented"
verbosity: 1
# TODO(evrardjp): Add nspawn checking
- name: Looking for dnsmasq process - name: Looking for dnsmasq process
command: pgrep dnsmasq command: pgrep dnsmasq
changed_when: false changed_when: false
when: container_tech | default('lxc') == 'lxc'
- name: Ensuring hosts good behavior
hosts: nspawn_hosts
gather_facts: no
tasks:
- debug:
msg: "To be implemented. Please help."
- name: Ensuring containers creation, connection and good behavior - name: Ensuring containers creation, connection and good behavior
hosts: all_containers hosts: all_containers
@ -48,6 +54,7 @@
url: https://git.openstack.org/cgit/openstack/openstack-ansible/plain/ansible-role-requirements.yml url: https://git.openstack.org/cgit/openstack/openstack-ansible/plain/ansible-role-requirements.yml
dest: /tmp/osa-master-requirements dest: /tmp/osa-master-requirements
# Test extra settings before setup-infrastructure
- name: Ensure settings are not wrong with the usual suspects issues before trying to deploy infra - name: Ensure settings are not wrong with the usual suspects issues before trying to deploy infra
hosts: haproxy hosts: haproxy
gather_facts: yes gather_facts: yes

View File

@ -16,13 +16,32 @@
# This playbook is meant to run after setup-infrastructure, and expects # This playbook is meant to run after setup-infrastructure, and expects
# the infrastructure bits to have properly deployed to succeed. # the infrastructure bits to have properly deployed to succeed.
# Ensuring Load Balancer behavior # Test unbound-install.yml
# TO BE IMPLEMENTED
# Test repo-install.yml
- name: Ensure all repo-servers are built and are accessible by hosts.
hosts: all_containers[0]:physical_hosts[0]
gather_facts: yes
vars:
repo_requirements_file: "os-releases/{{ openstack_release }}/{{ os_distro_version }}/requirements_constraints.txt"
tasks:
- name: Check the upper constraint on each repo server
uri:
url: "http://{{ hostvars[item]['container_address'] }}:{{ repo_server_port }}/{{ repo_requirements_file }}"
with_inventory_hostnames: "{{ groups['repo_all'] }}"
when: install_method == 'source'
tags:
- healthcheck
- healthcheck-repo-install
# Test haproxy-install.yml
- name: Ensuring haproxy runs - name: Ensuring haproxy runs
hosts: haproxy hosts: haproxy
gather_facts: no gather_facts: no
tasks: tasks:
- name: Check if host can connect to keepalived ping IP - name: Check if host can connect to keepalived ping IP
command: "ping {{ keepalived_ping_address }}" command: "ping -c 2 {{ keepalived_ping_address }}"
changed_when: false changed_when: false
- name: Checking if keepalived is running - name: Checking if keepalived is running
@ -35,33 +54,24 @@
shell: 'echo "show info;show stat" | nc -U /var/run/haproxy.stat' shell: 'echo "show info;show stat" | nc -U /var/run/haproxy.stat'
changed_when: false changed_when: false
register: haproxy_stats register: haproxy_stats
tags:
- haproxy
# Run this playbook with -v and you'll see your DOWN issues # Run this playbook with -v and you'll see your DOWN issues
- name: Printing the output of haproxy stats - name: Printing the output of haproxy stats
debug: debug:
var: haproxy_stats var: haproxy_stats
verbosity: 1 verbosity: 1
tags: tags:
- haproxy - healthcheck
- healthcheck-haproxy-install
# We are looking up from the first container.
- name: Ensure that all the repos have data
hosts: all_containers[0]
gather_facts: yes
vars:
repo_requirements_file: "os-releases/{{ openstack_release }}/{{ os_distro_version }}/requirements_constraints.txt"
tasks:
- name: Check the upper constraint on each repo server
uri:
url: "http://{{ hostvars[item]['container_address'] }}:{{ repo_server_port }}/{{ repo_requirements_file }}"
with_inventory_hostnames: "{{ groups['repo_all'] }}"
tags:
- repo
# Test repo-use.yml
- name: Ensure all the containers can connect to the repos - name: Ensure all the containers can connect to the repos
hosts: all_containers hosts: all_containers
gather_facts: yes gather_facts: yes
# By having serial, you ensure that the first three containers are hitting
# the load balancer at the same time, which would then cause hitting three
# different repos servers.
# When this is done, the rest can be done with all the nodes at the same time.
serial: serial:
- 3 - 3
- 100% - 100%
@ -71,10 +81,11 @@
uri: uri:
url: "{{ repo_release_path }}/requirements_constraints.txt" url: "{{ repo_release_path }}/requirements_constraints.txt"
tags: tags:
- repo - healthcheck
- healthcheck-repo-use
- name: Sanity checks for all containers - name: Sanity checks for all containers
hosts: all_containers hosts: all_containers:physical_hosts
gather_facts: no gather_facts: no
tasks: tasks:
- name: Ensure everyone can reach apt proxy - name: Ensure everyone can reach apt proxy
@ -84,16 +95,23 @@
when: when:
- "ansible_pkg_mgr == 'apt'" - "ansible_pkg_mgr == 'apt'"
tags: tags:
- proxy - healthcheck
- name: Connect to galera port - healthcheck-repo-use
wait_for:
port: 3306
host: "{{ internal_lb_vip_address }}"
state: started
tags:
- galera
# Specific checks: Memcached # Test utility-install.yml
- name: Ensure utility container has clients
hosts: utility_all
gather_facts: no
tasks:
- name: Ensure openstackclient is installed and in path
command: which openstack
register: _openstackclient
tags:
- healthcheck
- healthcheck-utility-install
# Test memcached-install.yml
- name: Check memcached for keystone - name: Check memcached for keystone
hosts: keystone_all hosts: keystone_all
gather_facts: no gather_facts: no
@ -103,13 +121,11 @@
delegate_to: "{{ item }}" delegate_to: "{{ item }}"
delegate_facts: true delegate_facts: true
with_items: "{{ groups['memcached'] }}" with_items: "{{ groups['memcached'] }}"
tags:
- memcached
- package: - package:
name: netcat name: netcat
state: present state: present
tags:
- memcached
- name: Connect to remote memcache servers (full mesh testing) - name: Connect to remote memcache servers (full mesh testing)
shell: "echo stats | nc {{ hostvars[memcached_host]['container_address'] }} {{ memcached_port }}" shell: "echo stats | nc {{ hostvars[memcached_host]['container_address'] }} {{ memcached_port }}"
changed_when: false changed_when: false
@ -117,78 +133,74 @@
with_items: "{{ groups['memcached'] }}" with_items: "{{ groups['memcached'] }}"
loop_control: loop_control:
loop_var: memcached_host loop_var: memcached_host
tags:
- memcached
- name: Output memcache stats if in verbose mode - name: Output memcache stats if in verbose mode
debug: debug:
var: memcache_stats var: memcache_stats
verbosity: 1 verbosity: 1
tags: tags:
- memcached - healthcheck
- healthcheck-memcached-install
# Specific checks: Rabbit # Test galera-install.yml
- name: Ask if rabbitmq test should run - name: Sanity checks for all containers
hosts: all_containers hosts: all_containers:physical_hosts
connection: local
gather_facts: no gather_facts: no
vars_prompt:
- name: "rabbit_test_prompt"
prompt: "Are you sure you want to run rabbit tests? It runs pip install on all your containers."
default: "no"
private: no
tasks: tasks:
- name: Mark the usage of rabbitmq tests. - name: Connect to galera port
set_fact: wait_for:
run_rabbit_tests: "{{ rabbit_test_prompt | bool }}" port: 3306
host: "{{ internal_lb_vip_address }}"
state: started
tags: tags:
- rabbitmq - healthcheck
- healthcheck-galera-install
# Test rabbitmq-install.yml
- name: Add a user for rabbitmq - name: Add a user for rabbitmq
hosts: rabbitmq_all[0] hosts: rabbitmq_all[0]
gather_facts: no gather_facts: no
tasks: tasks:
- name: Create credentials on vhost - name: Configure Rabbitmq vhost
include: common-tasks/rabbitmq-vhost-user.yml rabbitmq_vhost:
vars: name: "testvhost"
user: testguest state: "present"
password: secrete
vhost: "/test" - name: Configure Rabbitmq user
_rabbitmq_host_group: "rabbitmq_all" rabbitmq_user:
user: "testguest"
password: "secrete"
vhost: "testvhost"
configure_priv: ".*"
read_priv: ".*"
write_priv: ".*"
state: "present"
no_log: True
tags: tags:
- rabbitmq - healthcheck
when: run_rabbit_tests | default(false) - healthcheck-rabbitmq-install
- name: Ensure all the usual openstack containers can connect to rabbit - name: Ensure all the usual openstack containers can connect to rabbit
hosts: all_containers:!etcd_all:!galera_all:!memcached:!haproxy:!rabbitmq_all:!rsyslog:!unbound:!repo_all hosts: all_containers:!etcd_all:!galera_all:!memcached:!haproxy:!rabbitmq_all:!rsyslog:!unbound:!repo_all
gather_facts: no gather_facts: no
vars: vars:
venv_path: /tmp/rabbitmqtest venv_path: /tmp/rabbitmqtest
roles:
- role: pip_install
when: run_rabbit_tests | default(false)
tags:
- rabbitmq
post_tasks: post_tasks:
- name: Generate venv for rabbitmq testing - name: Generate venv for rabbitmq testing
pip: pip:
name: pika name: pika
virtualenv: "{{ venv_path }}" virtualenv: "{{ venv_path }}"
when: run_rabbit_tests | default(false)
tags:
- rabbitmq
- name: Copying test script - name: Copying test script
copy: copy:
src: "../scripts/rabbitmq-test.py" src: "../scripts/rabbitmq-test.py"
dest: "{{ venv_path }}/rabbitmq-test.py" dest: "{{ venv_path }}/rabbitmq-test.py"
mode: 0755 mode: 0755
when: run_rabbit_tests | default(false)
tags:
- rabbitmq
- name: Connect to rabbitmq - name: Connect to rabbitmq
command: "{{ venv_path }}/bin/python2 {{ venv_path }}/rabbitmq-test.py {{ hostvars[groups['rabbitmq_all'][0]]['container_address'] }}" command: "{{ venv_path }}/bin/python2 {{ venv_path }}/rabbitmq-test.py {{ hostvars[groups['rabbitmq_all'][0]]['container_address'] }}"
when: run_rabbit_tests | default(false)
tags: tags:
- rabbitmq - healthcheck
- healthcheck-rabbitmq-install
- name: Remove guest user for rabbitmq - name: Remove guest user for rabbitmq
hosts: rabbitmq_all[0] hosts: rabbitmq_all[0]
@ -198,11 +210,16 @@
rabbitmq_user: rabbitmq_user:
user: testguest user: testguest
password: secrete password: secrete
vhost: "/test" vhost: "/testvhost"
state: absent state: absent
no_log: true no_log: true
when: run_rabbit_tests | default(false) - name: Configure Rabbitmq vhost
rabbitmq_vhost:
name: "testvhost"
state: "absent"
tags: tags:
- rabbitmq - healthcheck
- healthcheck-rabbitmq-install
- healthcheck-teardown
# TODO(evrardjp): Specific checks: Etcd # TODO: Other playbook's tests.