kayobe/ansible/overcloud-hardware-inspect.yml
Mark Goddard 6c54ce4d3b Introduce max fail percentage to playbooks
This allows us to continue execution until a certain proportion of hosts
fail. This can be useful at scale, where failures are common, and
restarting a deployment is time-consuming.

The default max failure percentage is 100, keeping the default
behaviour. A global max failure percentage may be set via
kayobe_max_fail_percentage, and individual playbooks may define a max
failure percentage via <playbook>_max_fail_percentage.

Related Kolla Ansible patch:
https://review.opendev.org/c/openstack/kolla-ansible/+/805598

Change-Id: Ib81c72b63be5765cca664c38141ffc769640cf07
2024-06-03 16:24:29 +00:00

161 lines
6.2 KiB
YAML

---
# Use bifrost to inspect the overcloud nodes' hardware.
- name: Ensure the overcloud nodes' hardware is inspected
hosts: overcloud
max_fail_percentage: >-
{{ overcloud_hardware_inspect_max_fail_percentage |
default(kayobe_max_fail_percentage) |
default(100) }}
tags:
- hardware-inspect
vars:
# Set to False to avoid waiting for the nodes to become active.
wait_inspected: True
wait_inspected_timeout: "{{ kolla_bifrost_inspection_timeout }}"
wait_inspected_interval: 10
# List of states from which we can get to inspecting.
inspectable_states:
- enroll
- manageable
- available
- inspect failed
# List of valid states while a node is being inspected.
inspecting_states:
- inspecting
- inspect wait
# Retries to use when using Ironic API and hitting node locked errors.
ironic_retries: 6
ironic_retry_interval: 5
seed_host: "{{ groups['seed'][0] }}"
gather_facts: no
tasks:
- name: Check the ironic node's initial provision state
command: >
docker exec bifrost_deploy
bash -c '
export OS_CLOUD=bifrost &&
export BIFROST_INVENTORY_SOURCE=ironic &&
export BIFROST_NODE_NAMES="{{ inventory_hostname }}" &&
ansible baremetal
--connection local
--inventory /etc/bifrost/inventory/
-e @/etc/bifrost/bifrost.yml
-e @/etc/bifrost/dib.yml
--limit {{ inventory_hostname }}
-m command
-a "baremetal node show {% raw %}{{ inventory_hostname }}{% endraw %} -f value -c provision_state"'
register: show_result
changed_when: False
delegate_to: "{{ seed_host }}"
vars:
# NOTE: Without this, the seed's ansible_host variable will not be
# respected when using delegate_to.
ansible_host: "{{ hostvars[seed_host].ansible_host | default(seed_host) }}"
- name: Set a fact containing the ironic node's initial provision state
set_fact:
initial_provision_state: "{{ show_result.stdout_lines[1] }}"
- name: Fail if the ironic node is in an unexpected provision state
fail:
msg: >
Ironic node for {{ inventory_hostname }} is in an unexpected
initial provision state: {{ initial_provision_state }}. Expected
states are: {{ inspectable_states | join(',') }}.
when: initial_provision_state not in inspectable_states
- name: Ensure the ironic node is manageable
command: >
docker exec bifrost_deploy
bash -c '
export OS_CLOUD=bifrost &&
export BIFROST_INVENTORY_SOURCE=ironic &&
export BIFROST_NODE_NAMES="{{ inventory_hostname }}" &&
ansible baremetal -vvvv
--connection local
--inventory /etc/bifrost/inventory/
-e @/etc/bifrost/bifrost.yml
-e @/etc/bifrost/dib.yml
--limit {{ inventory_hostname }}
-m command
-a "baremetal node manage {% raw %}{{ inventory_hostname }}{% endraw %}"'
register: manage_result
until: manage_result is successful or 'is locked by host' in manage_result.stdout
retries: "{{ ironic_retries }}"
delay: "{{ ironic_retry_interval }}"
when: initial_provision_state != 'manageable'
delegate_to: "{{ seed_host }}"
vars:
# NOTE: Without this, the seed's ansible_host variable will not be
# respected when using delegate_to.
ansible_host: "{{ hostvars[seed_host].ansible_host | default(seed_host) }}"
- name: Ensure the ironic node is inspected
command: >
docker exec bifrost_deploy
bash -c '
export OS_CLOUD=bifrost &&
export BIFROST_INVENTORY_SOURCE=ironic &&
export BIFROST_NODE_NAMES="{{ inventory_hostname }}" &&
ansible baremetal -vvvv
--connection local
--inventory /etc/bifrost/inventory/
-e @/etc/bifrost/bifrost.yml
-e @/etc/bifrost/dib.yml
--limit {{ inventory_hostname }}
-m command
-a "baremetal node inspect {% raw %}{{ inventory_hostname }}{% endraw %}"'
register: provide_result
until: provide_result is successful or 'is locked by host' in provide_result.stdout
retries: "{{ ironic_retries }}"
delay: "{{ ironic_retry_interval }}"
delegate_to: "{{ seed_host }}"
vars:
# NOTE: Without this, the seed's ansible_host variable will not be
# respected when using delegate_to.
ansible_host: "{{ hostvars[seed_host].ansible_host | default(seed_host) }}"
- name: Wait for the ironic node to be inspected
command: >
docker exec bifrost_deploy
bash -c '
export OS_CLOUD=bifrost &&
export BIFROST_INVENTORY_SOURCE=ironic &&
export BIFROST_NODE_NAMES="{{ inventory_hostname }}" &&
ansible baremetal
--connection local
--inventory /etc/bifrost/inventory/
-e @/etc/bifrost/bifrost.yml
-e @/etc/bifrost/dib.yml
--limit {{ inventory_hostname }}
-m command
-a "baremetal node show {% raw %}{{ inventory_hostname }}{% endraw %} -f value -c provision_state"'
register: show_result
# Wait until the node is no longer in one of the inspecting states.
until: not show_result.stdout_lines[1:] | intersect(inspecting_states)
retries: "{{ wait_inspected_timeout | int // wait_inspected_interval | int }}"
delay: "{{ wait_inspected_interval }}"
when: wait_inspected | bool
changed_when: False
delegate_to: "{{ seed_host }}"
vars:
# NOTE: Without this, the seed's ansible_host variable will not be
# respected when using delegate_to.
ansible_host: "{{ hostvars[seed_host].ansible_host | default(seed_host) }}"
- name: Set a fact containing the final provision state
set_fact:
final_provision_state: "{{ show_result.stdout_lines[1] }}"
when: wait_inspected | bool
- name: Fail if any of the nodes are not manageable
fail:
msg: >
Ironic node for {{ inventory_hostname }} is in an unexpected
provision state after inspecting. Ironic provision state:
{{ final_provision_state }}. Expected: manageable.
when:
- wait_inspected | bool
- final_provision_state != 'manageable'