diff --git a/ansible/roles/nova/defaults/main.yml b/ansible/roles/nova/defaults/main.yml index 0474bbc279..abcd4cfa84 100644 --- a/ansible/roles/nova/defaults/main.yml +++ b/ansible/roles/nova/defaults/main.yml @@ -394,6 +394,15 @@ nova_services_require_nova_conf: - nova-scheduler - nova-spicehtml5proxy +# After upgrading nova-compute, services will have an RPC version cap in place. +# We need to restart all services that communicate with nova-compute in order +# to allow them to use the latest RPC version. Ideally, there would be a way to +# check whether all nova services are using the latest version, but currently +# there is not. Instead, wait a short time for all nova compute services to +# update the version of their service in the database. This seems to take +# around 10 seconds, but the default is 30 to allow room for slowness. +nova_compute_startup_delay: 30 + #################### # Notification #################### diff --git a/ansible/roles/nova/handlers/main.yml b/ansible/roles/nova/handlers/main.yml index ad73838e9a..faa2072fcf 100644 --- a/ansible/roles/nova/handlers/main.yml +++ b/ansible/roles/nova/handlers/main.yml @@ -319,3 +319,55 @@ - kolla_action != "config" - inventory_hostname in groups['compute'] - enable_nova_fake | bool + +# NOTE(mgoddard): After upgrading nova-compute, services will have an RPC +# version cap in place. We need to restart all services that communicate with +# nova-compute in order to allow them to use the latest RPC version. Ideally, +# there would be a way to check whether all nova services are using the latest +# version, but currently there is not. Instead, wait a short time for all nova +# compute services to update the version of their service in the database. +# This seems to take around 10 seconds, but the default is 30 to allow room +# for slowness. + +- name: Wait for nova-compute services to update service versions + pause: + seconds: "{{ nova_compute_startup_delay }}" + run_once: true + when: + - kolla_action == 'upgrade' + listen: + - Restart nova-compute container + - Restart nova-compute-ironic container + - Restart nova-compute-fake containers + +# NOTE(mgoddard): Currently (just prior to Stein release), sending SIGHUP to +# nova compute services leaves them in a broken state in which they cannot +# start new instances. The following error is seen in the logs: +# "In shutdown, no new events can be scheduled" +# To work around this we restart the nova-compute services. +# Speaking to the nova team, this seems to be an issue in oslo.service, +# with a fix proposed here: https://review.openstack.org/#/c/641907. +# This issue also seems to affect the proxy services, which exit non-zero in +# reponse to a SIGHUP, so restart those too. +# The issue actually affects all nova services, since they remain with RPC +# version pinned to the previous release: +# https://bugs.launchpad.net/kolla-ansible/+bug/1833069. +# TODO(mgoddard): Use SIGHUP when this bug has been fixed. + +- name: Restart nova services to remove RPC version cap + become: true + kolla_docker: + action: restart_container + common_options: "{{ docker_common_options }}" + name: "{{ item.value.container_name }}" + when: + - kolla_action == 'upgrade' + - inventory_hostname in groups[item.value.group] + - item.value.enabled | bool + - item.key in nova_services_require_nova_conf + - item.key != 'placement-api' + with_dict: "{{ nova_services }}" + listen: + - Restart nova-compute container + - Restart nova-compute-ironic container + - Restart nova-compute-fake containers diff --git a/ansible/roles/nova/tasks/legacy_upgrade.yml b/ansible/roles/nova/tasks/legacy_upgrade.yml index bd931d5282..04c879e3bc 100644 --- a/ansible/roles/nova/tasks/legacy_upgrade.yml +++ b/ansible/roles/nova/tasks/legacy_upgrade.yml @@ -26,5 +26,3 @@ - name: Flush handlers meta: flush_handlers - -- include_tasks: reload.yml diff --git a/ansible/roles/nova/tasks/reload.yml b/ansible/roles/nova/tasks/reload.yml deleted file mode 100644 index c00965b438..0000000000 --- a/ansible/roles/nova/tasks/reload.yml +++ /dev/null @@ -1,38 +0,0 @@ ---- -# This play calls sighup on every service to refresh upgrade levels - -# NOTE(mgoddard): Currently (just prior to Stein release), sending SIGHUP to -# nova compute services leaves them in a broken state in which they cannot -# start new instances. The following error is seen in the logs: -# "In shutdown, no new events can be scheduled" -# To work around this we restart the nova-compute services. -# Speaking to the nova team, this seems to be an issue in oslo.service, -# with a fix proposed here: https://review.opendev.org/#/c/641907. -# This issue also seems to affect the proxy services, which exit non-zero in -# reponse to a SIGHUP, so restart those too. -# TODO(mgoddard): Remove this workaround when this bug has been fixed. - -- name: Send SIGHUP to nova services - become: true - command: docker exec -t {{ item.value.container_name }} kill -1 1 - when: - - inventory_hostname in groups[item.value.group] - - item.value.enabled | bool - - item.key in nova_services_require_nova_conf - - not item.key.startswith('nova-compute') - - not item.key.endswith('proxy') - with_dict: "{{ nova_services }}" - -- name: Restart nova compute and proxy services - become: true - kolla_docker: - action: restart_container - common_options: "{{ docker_common_options }}" - name: "{{ item.value.container_name }}" - when: - - inventory_hostname in groups[item.value.group] - - item.value.enabled | bool - - item.key in nova_services_require_nova_conf - - item.key.startswith('nova-compute') - or item.key.endswith('proxy') - with_dict: "{{ nova_services }}" diff --git a/ansible/roles/nova/tasks/rolling_upgrade.yml b/ansible/roles/nova/tasks/rolling_upgrade.yml index 76ed9c56cd..962321975b 100644 --- a/ansible/roles/nova/tasks/rolling_upgrade.yml +++ b/ansible/roles/nova/tasks/rolling_upgrade.yml @@ -25,8 +25,6 @@ - name: Flush handlers meta: flush_handlers -- include_tasks: reload.yml - - name: Migrate Nova database vars: nova_api: "{{ nova_services['nova-api'] }}"