From f5354f55b139ac1a84be5f59aaf3cb03d548f22e Mon Sep 17 00:00:00 2001 From: "Michal (inc0) Jastrzebski" Date: Thu, 18 May 2017 08:20:14 -0700 Subject: [PATCH] Enable multinode gate This patches changes deploy_gate quite a bit so in reality all deployments will now assume multinode (even if it's single node). After that we will refactor it even further to enable easy addition of new scenerios. Change-Id: I1faada46e6a7aa026128b2f01d77eabb04759439 --- ansible/roles/baremetal/tasks/install.yml | 2 + tests/ansible_generate_config.yml | 40 +++++++ tests/ansible_generate_inventory.yml | 26 ++++ tests/ansible_get_logs.yml | 88 ++++++++++++++ tests/get_logs.sh | 43 +++++++ tests/templates/globals-default.j2 | 31 +++++ tests/templates/inventory.j2 | 6 + tests/templates/nova-compute-overrides.j2 | 4 + tools/deploy_aio.sh | 140 ---------------------- tools/playbook-setup-nodes.yml | 23 ---- tools/setup_gate.sh | 101 ++++++++++------ 11 files changed, 304 insertions(+), 200 deletions(-) create mode 100644 tests/ansible_generate_config.yml create mode 100644 tests/ansible_generate_inventory.yml create mode 100644 tests/ansible_get_logs.yml create mode 100644 tests/get_logs.sh create mode 100644 tests/templates/globals-default.j2 create mode 100644 tests/templates/inventory.j2 create mode 100644 tests/templates/nova-compute-overrides.j2 delete mode 100755 tools/deploy_aio.sh diff --git a/ansible/roles/baremetal/tasks/install.yml b/ansible/roles/baremetal/tasks/install.yml index d13188fda5..240a356ac6 100644 --- a/ansible/roles/baremetal/tasks/install.yml +++ b/ansible/roles/baremetal/tasks/install.yml @@ -4,10 +4,12 @@ become: True when: ansible_os_family == 'Debian' +# TODO(inc0): Gates don't seem to have ufw executable, check for it instead of ignore errors - name: Set firewall default policy become: True ufw: state=disabled policy=allow when: ansible_os_family == 'Debian' + ignore_errors: yes - name: Check if firewalld is installed command: rpm -q firewalld diff --git a/tests/ansible_generate_config.yml b/tests/ansible_generate_config.yml new file mode 100644 index 0000000000..61a543bcb1 --- /dev/null +++ b/tests/ansible_generate_config.yml @@ -0,0 +1,40 @@ +--- +- hosts: localhost + connection: local + become: True + tasks: + - name: Get api_interface name + set_fact: api_interface_address="{{ lookup('file', '/etc/nodepool/primary_node_private') }}" + + - shell: "ip a | grep {{ api_interface_address }}" + register: api_interface_name + + - set_fact: api_interface_name="{{ api_interface_name.stdout_lines[0].split(" ")[-1] }}" + + - set_fact: + is_multinode: "{{ lookup('file', '/etc/nodepool/sub_nodes') }}" + + - name: Ensure /etc/kolla dir + file: + path: /etc/kolla + state: "directory" + + - name: Setup globals.yml + template: + src: "templates/globals-default.j2" + dest: "/etc/kolla/globals.yml" + + - name: Copy passwords.yml + copy: + src: "../etc/kolla/passwords.yml" + dest: "/etc/kolla/passwords.yml" + + - name: Ensure /etc/kolla/config directory + file: + path: /etc/kolla/config/nova + state: "directory" + + - name: Setup overrides + template: + src: "templates/nova-compute-overrides.j2" + dest: "/etc/kolla/config/nova/nova-compute.conf" diff --git a/tests/ansible_generate_inventory.yml b/tests/ansible_generate_inventory.yml new file mode 100644 index 0000000000..9d93d9eeff --- /dev/null +++ b/tests/ansible_generate_inventory.yml @@ -0,0 +1,26 @@ +--- +- hosts: localhost + connection: local + tasks: + - name: Get node addresses + set_fact: + primary_node_address: "{{ lookup('file', '/etc/nodepool/primary_node_private') }}" + sub_node_addresses: "{{ lookup('file', '/etc/nodepool/sub_nodes_private').split('\n') }}" + + - set_fact: node_group="{{ lookup('template', 'templates/inventory.j2')}}" + + - name: Ensure /tmp/kolla exists + file: + path: "/tmp/kolla" + state: "directory" + + - name: Copy default ansible kolla-ansible inventory + copy: + src: ../ansible/inventory/all-in-one + dest: /tmp/kolla/raw_inventory + + - name: Replace localhost with IPs + replace: + path: /tmp/kolla/raw_inventory + regexp: "localhost.*$" + replace: "{{ node_group }}" diff --git a/tests/ansible_get_logs.yml b/tests/ansible_get_logs.yml new file mode 100644 index 0000000000..68d9e2a97e --- /dev/null +++ b/tests/ansible_get_logs.yml @@ -0,0 +1,88 @@ +--- +- hosts: all + tasks: + - name: Check node role + command: "cat /etc/nodepool/role" + register: node_role + + - set_fact: node_role="{{ node_role.stdout }}" + + - name: Ensure /tmp/logs dir + file: + path: "/tmp/logs" + state: "directory" + when: + - node_role == "sub" + + - name: Get /tmp/logs symlink target + command: "readlink -f /tmp/logs" + register: logs_target + + - name: Ensure primary node directories + file: + path: "{{ logs_target.stdout }}/{{ item }}" + state: "directory" + mode: 0777 + when: + - node_role == "primary" + with_items: + - "subnodes" + - "docker_logs" + - "kolla_configs" + - "system_logs" + - "kolla" + + - name: Ensure sub node directories + file: + path: "/tmp/logs/{{ item }}" + state: "directory" + mode: 0777 + when: + - node_role == "sub" + with_items: + - "docker_logs" + - "kolla_configs" + - "system_logs" + - "kolla" + + - name: Run diagnostics script + script: get_logs.sh + register: get_logs_result + failed_when: false + + - name: Print get_logs output + debug: + msg: "{{ get_logs_result.stdout }}" + + - name: Run dump_info script + script: ../tools/dump_info.sh + + - name: Download logs from all subnodes + synchronize: + src: "/tmp/logs" + dest: "{{ logs_target.stdout }}/subnodes/{{ ansible_hostname }}" + mode: "pull" + when: + - node_role == "sub" + + - name: Change permission of all log files + command: "chmod -R 777 {{ logs_target.stdout }}" + + - name: Fail if get_logs has failed + fail: + msg: "{{ get_logs_result.stdout }}" + when: get_logs_result.rc != 0 + + - name: Copy inventory file to logs + copy: + src: "/tmp/kolla/raw_inventory" + dest: "{{ logs_target.stdout }}/ansible/inventory" + when: + - node_role == "primary" + + - name: Copy hosts file to logs + copy: + src: "/etc/hosts" + dest: "{{ logs_target.stdout }}/system_logs/hosts" + when: + - node_role == "primary" diff --git a/tests/get_logs.sh b/tests/get_logs.sh new file mode 100644 index 0000000000..9ac82cda3b --- /dev/null +++ b/tests/get_logs.sh @@ -0,0 +1,43 @@ +#!/bin/bash + +copy_logs() { + cp -rnL /var/lib/docker/volumes/kolla_logs/_data/* /tmp/logs/kolla/ + cp -rnL /etc/kolla/* /tmp/logs/kolla_configs/ + cp -rvnL /var/log/* /tmp/logs/system_logs/ + + + if [[ -x "$(command -v journalctl)" ]]; then + journalctl --no-pager -u docker.service > /tmp/logs/system_logs/docker.log + else + cp /var/log/upstart/docker.log /tmp/logs/system_logs/docker.log + fi +} + +check_failure() { + # Command failures after this point can be expected + set +o errexit + + docker images + docker ps -a + # All docker container's status are created, restarting, running, removing, + # paused, exited and dead. Containers without running status are treated as + # failure. removing is added in docker 1.13, just ignore it now. + failed_containers=$(docker ps -a --format "{{.Names}}" \ + --filter status=created \ + --filter status=restarting \ + --filter status=paused \ + --filter status=exited \ + --filter status=dead) + + for failed in ${failed_containers}; do + docker logs --tail all ${failed} > /tmp/logs/docker_logs/${failed} + done + + copy_logs + + if [[ -n "$failed_containers" ]]; then + exit 1; + fi +} + +check_failure diff --git a/tests/templates/globals-default.j2 b/tests/templates/globals-default.j2 new file mode 100644 index 0000000000..accda1ead5 --- /dev/null +++ b/tests/templates/globals-default.j2 @@ -0,0 +1,31 @@ +--- +kolla_base_distro: "{{ base }}" +kolla_install_type: "{{ type }}" + +{% if is_multinode %} +enable_haproxy: "no" +kolla_internal_vip_address: "{{ api_interface_address }}" +{% else %} +kolla_internal_vip_address: "169.254.169.10" +{% endif %} + +network_interface: "{{ api_interface_name }}" +docker_restart_policy: "never" +# NOTE(Jeffrey4l): use different a docker namespace name in case it pull image from hub.docker.io when deplying +docker_namespace: "lokolla" +docker_registry: "{{ api_interface_address }}:4000" +neutron_external_interface: "fake_interface" +enable_horizon: "yes" +enable_heat: "no" +openstack_logging_debug: "True" +openstack_service_workers: "1" + +# enable port security in gate until this bug is fixed +# https://bugs.launchpad.net/neutron/+bug/1694420 +extension_drivers: + - name: "qos" + enabled: "{{ '{{' }} enable_neutron_qos | bool {{ '}}' }}" + - name: "port_security" + enabled: true + - name: "dns" + enabled: "{{ '{{' }} enable_designate | bool {{ '}}' }}" diff --git a/tests/templates/inventory.j2 b/tests/templates/inventory.j2 new file mode 100644 index 0000000000..0a1f2ecdc6 --- /dev/null +++ b/tests/templates/inventory.j2 @@ -0,0 +1,6 @@ +{{ primary_node_address }} ansible_become=true ansible_connection=local +{% for addr in sub_node_addresses %} +{% if addr %} +{{ addr }} ansible_user=jenkins ansible_become=true ansible_ssh_private_key_file=/etc/nodepool/id_rsa +{% endif %} +{% endfor %} diff --git a/tests/templates/nova-compute-overrides.j2 b/tests/templates/nova-compute-overrides.j2 new file mode 100644 index 0000000000..665435f938 --- /dev/null +++ b/tests/templates/nova-compute-overrides.j2 @@ -0,0 +1,4 @@ +[libvirt] +virt_type=qemu +# NOTE(Jeffrey4l): fix the gate in iax-ord nodes for libvirt 2.0. +cpu_mode=none diff --git a/tools/deploy_aio.sh b/tools/deploy_aio.sh deleted file mode 100755 index 9448e12ee2..0000000000 --- a/tools/deploy_aio.sh +++ /dev/null @@ -1,140 +0,0 @@ -#!/bin/bash - -set -o xtrace -set -o errexit - -export PATH="/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin" - -export KOLLA_BASE=$1 -export KOLLA_TYPE=$2 -export KEEPALIVED_VIRTUAL_ROUTER_ID=$(shuf -i 1-255 -n 1) - -function copy_logs { - cp -rnL /var/lib/docker/volumes/kolla_logs/_data/* /tmp/logs/kolla/ - cp -rnL /etc/kolla/* /tmp/logs/kolla_configs/ - cp -rvnL /var/log/* /tmp/logs/system_logs/ - - - if [[ -x "$(command -v journalctl)" ]]; then - journalctl --no-pager -u docker.service > /tmp/logs/system_logs/docker.log - else - cp /var/log/upstart/docker.log /tmp/logs/system_logs/docker.log - fi - - # NOTE(SamYaple): Fix permissions for log extraction in gate - chmod -R 777 /tmp/logs/kolla /tmp/logs/kolla_configs /tmp/logs/system_logs - ara generate html /tmp/logs/playbook_reports/ - gzip --recursive --best /tmp/logs/playbook_reports/ -} - -function sanity_check { - # Wait for service ready - sleep 15 - . /etc/kolla/admin-openrc.sh - # TODO(Jeffrey4l): Restart the memcached container to cleanup all cache. - # Remove this after this bug is fixed - # https://bugs.launchpad.net/oslo.cache/+bug/1590779 - docker restart memcached - nova --debug service-list - neutron --debug agent-list - tools/init-runonce - nova --debug boot --poll --image $(openstack image list | awk '/cirros/ {print $2}') --nic net-id=$(openstack network list | awk '/demo-net/ {print $2}') --flavor 1 kolla_boot_test - nova --debug list - # If the status is not ACTIVE, print info and exit 1 - nova --debug show kolla_boot_test | awk '{buf=buf"\n"$0} $2=="status" && $4!="ACTIVE" {failed="yes"}; END {if (failed=="yes") {print buf; exit 1}}' -} - -function check_failure { - # Command failures after this point can be expected - set +o errexit - - docker images - docker ps -a - # All docker container's status are created, restarting, running, removing, - # paused, exited and dead. Containers without running status are treated as - # failure. removing is added in docker 1.13, just ignore it now. - failed_containers=$(docker ps -a --format "{{.Names}}" \ - --filter status=created \ - --filter status=restarting \ - --filter status=paused \ - --filter status=exited \ - --filter status=dead) - - for failed in ${failed_containers}; do - docker logs --tail all ${failed} - done - - copy_logs - - if [[ -n "$failed_containers" ]]; then - exit 1; - fi -} - -function write_configs { - mkdir -p /etc/kolla/config - - PRIVATE_ADDRESS=$(cat /etc/nodepool/node_private) - PRIVATE_INTERFACE=$(ip -4 --oneline address | awk -v pattern=${PRIVATE_ADDRESS} '$0 ~ pattern {print $2}') - cat << EOF > /etc/kolla/globals.yml ---- -kolla_base_distro: "${KOLLA_BASE}" -kolla_install_type: "${KOLLA_TYPE}" -kolla_internal_vip_address: "169.254.169.10" -keepalived_virtual_router_id: "${KEEPALIVED_VIRTUAL_ROUTER_ID}" -docker_restart_policy: "never" -# NOTE(Jeffrey4l): use different a docker namespace name in case it pull image from hub.docker.io when deplying -docker_namespace: "lokolla" -docker_registry: "${PRIVATE_ADDRESS}:4000" -network_interface: "${PRIVATE_INTERFACE}" -neutron_external_interface: "fake_interface" -enable_horizon: "yes" -enable_heat: "no" -openstack_logging_debug: "True" -openstack_service_workers: "1" - -# enable port security in gate until this bug is fixed -# https://bugs.launchpad.net/neutron/+bug/1694420 -extension_drivers: - - name: "qos" - enabled: "{{ enable_neutron_qos | bool }}" - - name: "port_security" - enabled: true - - name: "dns" - enabled: "{{ enable_designate | bool }}" -EOF - - mkdir /etc/kolla/config/nova - cat << EOF > /etc/kolla/config/nova/nova-compute.conf -[libvirt] -virt_type=qemu -# NOTE(Jeffrey4l): fix the gate in iax-ord nodes for libvirt 2.0. -cpu_mode=none -EOF -} - -trap check_failure EXIT - -write_configs - -# Create dummy interface for neutron -ip l a fake_interface type dummy - -# Actually do the deployment -tools/kolla-ansible -vvv prechecks -# TODO(jeffrey4l): add pull action when we have a local registry -# service in CI -tools/kolla-ansible -vvv deploy -tools/kolla-ansible -vvv post-deploy - -# Test OpenStack Environment -sanity_check - -# TODO(jeffrey4l): make some configure file change and -# trigger a real reconfigure -tools/kolla-ansible -vvv reconfigure -# TODO(jeffrey4l): need run a real upgrade -tools/kolla-ansible -vvv upgrade - -# run prechecks again -tools/kolla-ansible -vvv prechecks diff --git a/tools/playbook-setup-nodes.yml b/tools/playbook-setup-nodes.yml index a475aa8ee2..47864d335a 100644 --- a/tools/playbook-setup-nodes.yml +++ b/tools/playbook-setup-nodes.yml @@ -3,25 +3,10 @@ become: true tasks: - - name: Setup /etc/hosts - copy: - src: /etc/hosts - dest: /etc/hosts - - name: Ensure /etc/hostname is valid for SELinux command: restorecon -v /etc/hostname when: ansible_os_family == 'RedHat' - - name: Assign hostname - hostname: - name: "{{ inventory_hostname }}" - - - name: Copy setup script - copy: - src: setup_{{ ansible_os_family }}.sh - dest: /tmp/setup.sh - mode: 0755 - - name: Install wget package package: name=wget @@ -34,11 +19,3 @@ state: directory path: /tmp/{{ inventory_hostname }} become: false - - - name: Run node setup - shell: /tmp/setup.sh - - - name: Changing permissions of Docker socket to 666 - file: - path: /run/docker.sock - mode: 0666 diff --git a/tools/setup_gate.sh b/tools/setup_gate.sh index 6869925242..e6c905cc15 100755 --- a/tools/setup_gate.sh +++ b/tools/setup_gate.sh @@ -33,10 +33,8 @@ EOF } function setup_config { - sudo cp -r etc/kolla /etc/ - # Generate passwords - sudo tools/generate_passwords.py - + sudo mkdir /etc/kolla + sudo chmod -R 777 /etc/kolla # Use Infra provided pypi. # Wheel package mirror may be not compatible. So do not enable it. PIP_CONF=$(mktemp) @@ -102,39 +100,14 @@ function setup_workaround_broken_nodepool { } function setup_ssh { - # Generate a new keypair that Ansible will use - ssh-keygen -f /home/jenkins/.ssh/kolla -N '' - cat /home/jenkins/.ssh/kolla.pub >> /home/jenkins/.ssh/authorized_keys - - # Push the public key around to all of the nodes - for ip in $(cat /etc/nodepool/sub_nodes_private); do - scp /home/jenkins/.ssh/kolla.pub ${ip}:/home/jenkins/.ssh/authorized_keys - # TODO(SamYaple): Remove this root key pushing once Kolla doesn't - # require root anymore. - ssh ${ip} -i /home/jenkins/.ssh/kolla 'sudo mkdir -p /root/.ssh; sudo cp /home/jenkins/.ssh/* /root/.ssh/' - done - - # From now on use the new IdentityFile for connecting to other hosts - echo "IdentityFile /home/jenkins/.ssh/kolla" >> /home/jenkins/.ssh/config - chmod 600 /home/jenkins/.ssh/config + sudo chown jenkins /etc/nodepool/id_rsa + sudo chmod 600 /etc/nodepool/id_rsa } function setup_inventory { - local counter=0 echo -e "127.0.0.1\tlocalhost" > /tmp/hosts - for ip in $(cat /etc/nodepool/{node_private,sub_nodes_private}); do - : $((counter++)) - # FIXME(jeffrey4l): do not set two hostnames in oneline. this is a - # wordround fix for the rabbitmq failed when deploy on CentOS in the CI - # gate. the ideal fix should set the hostname in setup_gate.sh script. - # But it do not work as expect with unknown reason - ssh-keyscan "${ip}" >> ~/.ssh/known_hosts - echo -e "${ip}\tnode${counter}" >> /tmp/hosts - echo -e "${ip}\t$(ssh ${ip} hostname)" >> /tmp/hosts - echo "node${counter}" >> ${RAW_INVENTORY} - done - + ansible-playbook tests/ansible_generate_inventory.yml sudo chown root: /tmp/hosts sudo chmod 644 /tmp/hosts sudo mv /tmp/hosts /etc/hosts @@ -150,7 +123,6 @@ function setup_ansible { setup_inventory - sudo mkdir /etc/ansible sudo tee /etc/ansible/ansible.cfg< /tmp/logs/ansible/get-logs +} + setup_logging tools/dump_info.sh clone_repos setup_workaround_broken_nodepool setup_ssh setup_ansible -setup_node setup_config +setup_node + +ansible-playbook -e type=$INSTALL_TYPE -e base=$BASE_DISTRO tests/ansible_generate_config.yml > /tmp/logs/ansible/generate_config +tools/kolla-ansible -i ${RAW_INVENTORY} bootstrap-servers > /tmp/logs/ansible/bootstrap-servers +sudo tools/generate_passwords.py prepare_images -sudo tools/deploy_aio.sh "${BASE_DISTRO}" "${INSTALL_TYPE}" +trap get_logs EXIT -tools/dump_info.sh +# Create dummy interface for neutron +ansible -m shell -i ${RAW_INVENTORY} -a "ip l a fake_interface type dummy" all + +#TODO(inc0): Post-deploy complains that /etc/kolla is not writable. Probably we need to include become there +sudo chmod -R 777 /etc/kolla +# Actually do the deployment +tools/kolla-ansible -i ${RAW_INVENTORY} -vvv prechecks > /tmp/logs/ansible/prechecks1 +# TODO(jeffrey4l): add pull action when we have a local registry +# service in CI +tools/kolla-ansible -i ${RAW_INVENTORY} -vvv deploy > /tmp/logs/ansible/deploy +tools/kolla-ansible -i ${RAW_INVENTORY} -vvv post-deploy > /tmp/logs/ansible/post-deploy + +# Test OpenStack Environment +# TODO: use kolla-ansible check when it's ready +sanity_check + +# TODO(jeffrey4l): make some configure file change and +# trigger a real reconfigure +tools/kolla-ansible -i ${RAW_INVENTORY} -vvv reconfigure > /tmp/logs/ansible/post-deploy +# TODO(jeffrey4l): need run a real upgrade +tools/kolla-ansible -i ${RAW_INVENTORY} -vvv upgrade > /tmp/logs/ansible/upgrade + +# run prechecks again +tools/kolla-ansible -i ${RAW_INVENTORY} -vvv prechecks > /tmp/logs/ansible/prechecks2 + +get_logs + +ara generate html /tmp/logs/playbook_reports/ +gzip --recursive --best /tmp/logs/playbook_reports/