From 4cef8b6b20c2abfc7fa23da3c6d2877693c45a6b Mon Sep 17 00:00:00 2001 From: Jesse Pretorius Date: Mon, 3 Jul 2017 11:06:34 +0100 Subject: [PATCH] Implement rolling upgrades for nova Based on [1], this patch implements changes to the playbook which executes the nova deployment in a play per host group, serialised to ensure that: 1. The services are changed in the right order. 2. The API services remain available at all times during an upgrade. 3. If services are sharing a host/container then the role execution will not execute twice on the same host. [1] https://docs.openstack.org/developer/nova/upgrade.html Depends-On: I08e5a7f0ce526b11aa52c35ee29c458954a5f22d Change-Id: I3173962a35b716fd9b0b2a526420fcc5d844befa --- group_vars/nova_all.yml | 15 + playbooks/common-playbooks/nova.yml | 166 ++++++++++ playbooks/common-tasks/restart-service.yml | 2 +- playbooks/os-nova-install.yml | 313 +++++++++++------- ...ova-rolling-upgrades-5a3927330c6be5fd.yaml | 7 + .../templates/user_variables.aio.yml.j2 | 6 + 6 files changed, 383 insertions(+), 126 deletions(-) create mode 100644 playbooks/common-playbooks/nova.yml create mode 100644 releasenotes/notes/nova-rolling-upgrades-5a3927330c6be5fd.yaml diff --git a/group_vars/nova_all.yml b/group_vars/nova_all.yml index d47a65b279..636f64226b 100644 --- a/group_vars/nova_all.yml +++ b/group_vars/nova_all.yml @@ -13,6 +13,18 @@ # See the License for the specific language governing permissions and # limitations under the License. +# The MySQL details for the nova service +nova_galera_user: nova +nova_galera_database: nova +nova_galera_address: "{{ galera_address }}" +nova_api_galera_user: nova_api +nova_api_galera_database: nova_api +nova_api_galera_address: "{{ galera_address }}" +nova_placement_galera_user: nova_placement +nova_placement_galera_database: nova_placement +nova_placement_galera_address: "{{ galera_address }}" +nova_cell0_database: nova_cell0 + nova_external_ssl: "{{ openstack_external_ssl }}" nova_ceph_client_uuid: '{{ cinder_ceph_client_uuid | default() }}' nova_dhcp_domain: "{{ dhcp_domain }}" @@ -22,6 +34,9 @@ nova_glance_api_servers: "{{ glance_api_servers }}" # Ensure that the package state matches the global setting nova_package_state: "{{ package_state }}" +# The system user for all nova services +nova_system_user_name: nova + # venv fetch configuration nova_venv_tag: "{{ venv_tag }}" nova_bin: "/openstack/venvs/nova-{{ nova_venv_tag }}/bin" diff --git a/playbooks/common-playbooks/nova.yml b/playbooks/common-playbooks/nova.yml new file mode 100644 index 0000000000..add3621ec9 --- /dev/null +++ b/playbooks/common-playbooks/nova.yml @@ -0,0 +1,166 @@ +--- +# Copyright 2014, Rackspace US, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +- name: Install nova services + hosts: "{{ nova_hosts }}" + serial: "{{ nova_serial }}" + gather_facts: "{{ gather_facts | default(True) }}" + user: root + environment: "{{ deployment_environment_variables | default({}) }}" + vars_files: + - ../defaults/repo_packages/nova_consoles.yml + tags: + - nova + pre_tasks: + + # In order to ensure that any container, software or + # config file changes which causes a container/service + # restart do not cause an unexpected outage, we drain + # the load balancer back end for this container. + - include: ../common-tasks/haproxy-endpoint-manage.yml + vars: + haproxy_backend: "{{ item }}-back" + haproxy_state: disabled + when: + - "item in group_names" + - "groups[item] | length > 1" + with_items: + - "nova_api_metadata" + - "nova_api_os_compute" + - "nova_api_placement" + - "nova_console" + + - name: Determine management bridge IP address + include: ../common-tasks/dynamic-address-fact.yml + vars: + network_address: "management_address" + + - name: Configure container + include: ../common-tasks/os-lxc-container-setup.yml + vars: + extra_container_config_no_restart: + - "lxc.start.order=69" + + - name: Configure log directories (on metal) + include: ../common-tasks/os-log-dir-setup.yml + vars: + log_dirs: + - src: "/openstack/log/{{ inventory_hostname }}-nova" + dest: "/var/log/nova" + + - name: Configure package proxy cache + include: ../common-tasks/package-cache-proxy.yml + + - name: Add nbd devices to the compute + shell: | + for i in /dev/nbd*;do + lxc-device -n {{ container_name }} add $i $i + done + failed_when: false + register: device_add + changed_when: > + 'added' in device_add.stdout.lower() + delegate_to: "{{ physical_host }}" + when: + - "'nova_compute' in group_names" + - "not is_metal | bool" + tags: + - always + + - name: Add net/tun device to the compute + command: | + lxc-device -n {{ container_name }} add /dev/net/tun /dev/net/tun + delegate_to: "{{ physical_host }}" + when: + - "'nova_compute' in group_names" + - "not is_metal | bool" + tags: + - always + + - name: Check if kvm device exists + stat: + path: /dev/kvm + delegate_to: "{{ physical_host }}" + register: kvm_device + when: + - "'nova_compute' in group_names" + - "not is_metal | bool" + tags: + - always + + - name: Add kvm device to the compute + command: | + lxc-device -n {{ container_name }} add /dev/kvm /dev/kvm + delegate_to: "{{ physical_host }}" + register: device_add + failed_when: false + changed_when: > + 'added' in device_add.stdout.lower() + when: + - "'nova_compute' in group_names" + - "not is_metal | bool" + - "'ischr' in kvm_device.stat and kvm_device.stat.ischr | bool" + tags: + - always + + roles: + - role: "os_nova" + nova_novncproxy_git_repo: "{{ openstack_repo_git_url }}/novnc" + nova_novncproxy_git_install_branch: "{{ novncproxy_git_install_branch }}" + nova_spicehtml5_git_repo: "{{ openstack_repo_git_url }}/spice-html5" + nova_spicehtml5_git_install_branch: "{{ spicehtml5_git_install_branch }}" + nova_management_address: "{{ management_address }}" + + - role: "ceph_client" + openstack_service_system_user: "{{ nova_system_user_name }}" + openstack_service_venv_bin: "{{ nova_bin }}" + when: + - "'nova_compute' in group_names" + - "(nova_libvirt_images_rbd_pool is defined) or + (cinder_backends_rbd_inuse | default(false)) | bool" + tags: + - ceph + + - role: "openstack_openrc" + tags: + - openrc + + - role: "rsyslog_client" + rsyslog_client_log_rotate_file: nova_log_rotate + rsyslog_client_log_dir: "/var/log/nova" + rsyslog_client_config_name: "99-nova-rsyslog-client.conf" + tags: + - rsyslog + + - role: "system_crontab_coordination" + tags: + - crontab + + post_tasks: + # Now that container changes are done, we can set + # the load balancer back end for this container + # to available again. + - include: ../common-tasks/haproxy-endpoint-manage.yml + vars: + haproxy_backend: "{{ item }}-back" + haproxy_state: enabled + when: + - "item in group_names" + - "groups[item] | length > 1" + with_items: + - "nova_api_metadata" + - "nova_api_os_compute" + - "nova_api_placement" + - "nova_console" diff --git a/playbooks/common-tasks/restart-service.yml b/playbooks/common-tasks/restart-service.yml index 007e3cd326..808fb0990e 100644 --- a/playbooks/common-tasks/restart-service.yml +++ b/playbooks/common-tasks/restart-service.yml @@ -33,7 +33,7 @@ service: name: "{{ service_file }}" state: "{{ service_action }}" - with_items: "{{ _enabled_services.stdout_lines }}" + with_items: "{{ (_enabled_services.stdout_lines | difference(service_negate | default([]))) | list }}" loop_control: loop_var: service_file diff --git a/playbooks/os-nova-install.yml b/playbooks/os-nova-install.yml index 931441480e..65792ecc8f 100644 --- a/playbooks/os-nova-install.yml +++ b/playbooks/os-nova-install.yml @@ -13,31 +13,28 @@ # See the License for the specific language governing permissions and # limitations under the License. -- name: Installation and setup of Nova - hosts: nova_all +- name: Prepare MQ/DB services + hosts: nova_conductor gather_facts: "{{ gather_facts | default(True) }}" - max_fail_percentage: 20 user: root - pre_tasks: - - include: common-tasks/dynamic-address-fact.yml - vars: - network_address: "management_address" - - include: common-tasks/os-lxc-container-setup.yml - vars: - extra_container_config_no_restart: - - "lxc.start.order=69" - - include: common-tasks/rabbitmq-vhost-user.yml - static: no + environment: "{{ deployment_environment_variables | default({}) }}" + tags: + - nova + tasks: + + - name: Configure rabbitmq vhost/user (nova) + include: common-tasks/rabbitmq-vhost-user.yml vars: user: "{{ nova_rabbitmq_userid }}" password: "{{ nova_rabbitmq_password }}" vhost: "{{ nova_rabbitmq_vhost }}" _rabbitmq_host_group: "{{ nova_rabbitmq_host_group }}" when: - - inventory_hostname == groups['nova_all'][0] - groups[nova_rabbitmq_host_group] | length > 0 - - include: common-tasks/rabbitmq-vhost-user.yml - static: no + run_once: yes + + - name: Configure rabbitmq vhost/user (nova/telemetry) + include: common-tasks/rabbitmq-vhost-user.yml vars: user: "{{ nova_rabbitmq_telemetry_userid }}" password: "{{ nova_rabbitmq_telemetry_password }}" @@ -45,141 +42,207 @@ _rabbitmq_host_group: "{{ nova_rabbitmq_telemetry_host_group }}" when: - nova_ceilometer_enabled | bool - - inventory_hostname == groups['nova_all'][0] - groups[nova_rabbitmq_telemetry_host_group] is defined - groups[nova_rabbitmq_telemetry_host_group] | length > 0 - groups[nova_rabbitmq_telemetry_host_group] != groups[nova_rabbitmq_host_group] - - include: common-tasks/os-log-dir-setup.yml - vars: - log_dirs: - - src: "/openstack/log/{{ inventory_hostname }}-nova" - dest: "/var/log/nova" - - include: common-tasks/mysql-db-user.yml - static: no + run_once: yes + + - name: Configure MySQL user (nova) + include: common-tasks/mysql-db-user.yml vars: user_name: "{{ nova_galera_user }}" password: "{{ nova_container_mysql_password }}" login_host: "{{ nova_galera_address }}" db_name: "{{ nova_galera_database }}" - when: inventory_hostname == groups['nova_all'][0] - - include: common-tasks/mysql-db-user.yml - static: no + run_once: yes + + - name: Configure MySQL user (nova-api) + include: common-tasks/mysql-db-user.yml vars: user_name: "{{ nova_api_galera_user }}" password: "{{ nova_api_container_mysql_password }}" login_host: "{{ nova_api_galera_address }}" db_name: "{{ nova_api_galera_database }}" - when: inventory_hostname == groups['nova_all'][0] - - include: common-tasks/mysql-db-user.yml - static: no + run_once: yes + + - name: Configure MySQL user (nova-placement) + include: common-tasks/mysql-db-user.yml vars: user_name: "{{ nova_placement_galera_user }}" password: "{{ nova_placement_container_mysql_password }}" login_host: "{{ nova_placement_galera_address }}" db_name: "{{ nova_placement_galera_database }}" - when: inventory_hostname == groups['nova_all'][0] - - include: common-tasks/mysql-db-user.yml - static: no + run_once: yes + + - name: Configure MySQL user (nova-api cell0) + include: common-tasks/mysql-db-user.yml vars: user_name: "{{ nova_api_galera_user }}" password: "{{ nova_api_container_mysql_password }}" login_host: "{{ nova_api_galera_address }}" db_name: "{{ nova_cell0_database }}" db_append_privs: "yes" - when: inventory_hostname == groups['nova_all'][0] - - include: common-tasks/package-cache-proxy.yml + run_once: yes - - name: Add nbd devices to the compute - shell: | - for i in /dev/nbd*;do - lxc-device -n {{ container_name }} add $i $i - done - failed_when: false - register: device_add - changed_when: > - 'added' in device_add.stdout.lower() - delegate_to: "{{ physical_host }}" - when: - - "inventory_hostname in groups['nova_compute']" - - "not is_metal | bool" - tags: - - always - - name: Add net/tun device to the compute - command: | - lxc-device -n {{ container_name }} add /dev/net/tun /dev/net/tun - delegate_to: "{{ physical_host }}" - when: - - "inventory_hostname in groups['nova_compute']" - - "not is_metal | bool" - tags: - - always - - name: Check if kvm device exists - stat: - path: /dev/kvm - delegate_to: "{{ physical_host }}" - register: kvm_device - when: - - "inventory_hostname in groups['nova_compute']" - - "not is_metal | bool" - tags: - - always - - name: Add kvm device to the compute - command: | - lxc-device -n {{ container_name }} add /dev/kvm /dev/kvm - delegate_to: "{{ physical_host }}" - register: device_add - failed_when: false - changed_when: > - 'added' in device_add.stdout.lower() - when: - - "inventory_hostname in groups['nova_compute']" - - "not is_metal | bool" - - kvm_device.stat.ischr is defined and kvm_device.stat.ischr - tags: - - always - roles: - - role: "os_nova" - nova_novncproxy_git_repo: "{{ openstack_repo_git_url }}/novnc" - nova_novncproxy_git_install_branch: "{{ novncproxy_git_install_branch }}" - nova_spicehtml5_git_repo: "{{ openstack_repo_git_url }}/spice-html5" - nova_spicehtml5_git_install_branch: "{{ spicehtml5_git_install_branch }}" - nova_management_address: "{{ management_address }}" - - role: "ceph_client" - openstack_service_system_user: "{{ nova_system_user_name }}" - openstack_service_venv_bin: "{{ nova_bin }}" - when: - - inventory_hostname in groups['nova_compute'] - - nova_libvirt_images_rbd_pool is defined or - cinder_backends_rbd_inuse | default(false) | bool - tags: - - ceph - - role: "openstack_openrc" - tags: - - openrc - - role: "rsyslog_client" - rsyslog_client_log_rotate_file: nova_log_rotate - rsyslog_client_log_dir: "/var/log/nova" - rsyslog_client_config_name: "99-nova-rsyslog-client.conf" - tags: - - rsyslog - - role: "system_crontab_coordination" - tags: - - crontab - vars_files: - - defaults/repo_packages/nova_consoles.yml + + +- name: Install nova-conductor services + include: common-playbooks/nova.yml vars: - is_metal: "{{ properties.is_metal|default(false) }}" - nova_galera_user: nova - nova_galera_database: nova - nova_api_galera_user: nova_api - nova_api_galera_database: nova_api - nova_placement_galera_user: nova_placement - nova_placement_galera_database: nova_placement - nova_cell0_database: nova_cell0 - nova_galera_address: "{{ galera_address }}" - nova_api_galera_address: "{{ galera_address }}" - nova_placement_galera_address: "{{ galera_address }}" - glance_host: "{{ internal_lb_vip_address }}" + nova_hosts: "nova_conductor" + nova_serial: "{{ nova_conductor_serial | default(['1', '100%']) }}" + + + +- name: Install nova-scheduler/nova-consoleauth services + include: common-playbooks/nova.yml + vars: + nova_hosts: "nova_scheduler:nova_consoleauth:!nova_conductor" + nova_serial: "{{ nova_scheduler_serial | default(['1', '100%']) }}" + + + +- name: Install nova API services + include: common-playbooks/nova.yml + vars: + nova_hosts: "nova_api_os_compute:nova_api_placement:!nova_conductor:!nova_scheduler:!nova_consoleauth" + nova_serial: "{{ nova_api_serial | default(['1', '100%']) }}" + + + +- name: Install nova console/metadata services + include: common-playbooks/nova.yml + vars: + nova_hosts: "nova_api_metadata:nova_console:!nova_conductor:!nova_scheduler:!nova_consoleauth:!nova_api_os_compute:!nova_api_placement" + nova_serial: "{{ nova_console_serial | default(['1', '100%']) }}" + + + +- name: Install nova compute + include: common-playbooks/nova.yml + vars: + nova_hosts: "nova_compute:!nova_conductor:!nova_scheduler:!nova_consoleauth:!nova_api_os_compute:!nova_api_placement:!nova_api_metadata:!nova_console" + nova_serial: "{{ nova_compute_serial | default('20%') }}" + + + +# These facts are set against the deployment host to ensure that +# they are fast to access. This is done in preference to setting +# them against each target as the hostvars extraction will take +# a long time if executed against a large inventory. +- name: Refresh local facts after all software changes are made + hosts: nova_all + max_fail_percentage: 20 + user: root environment: "{{ deployment_environment_variables | default({}) }}" tags: - nova + tasks: + - name: refresh local facts + setup: + filter: ansible_local + gather_subset: "!all" + + # This variable contains the values of the local fact set for the cinder + # venv tag for all hosts in the 'cinder_all' host group. + - name: Gather software version list + set_fact: + nova_all_software_versions: "{{ (groups['nova_all'] | map('extract', hostvars, ['ansible_local', 'openstack_ansible', 'nova', 'venv_tag'])) | list }}" + delegate_to: localhost + run_once: yes + + # This variable outputs a boolean value which is True when + # nova_all_software_versions contains a list of defined + # values. If they are not defined, it means that not all + # hosts have their software deployed yet. + - name: Set software deployed fact + set_fact: + nova_all_software_deployed: "{{ (nova_all_software_versions | select('defined')) | list == nova_all_software_versions }}" + delegate_to: localhost + run_once: yes + + # This variable outputs a boolean when all the values in + # nova_all_software_versions are the same and the software + # has been deployed to all hosts in the group. + - name: Set software updated fact + set_fact: + nova_all_software_updated: "{{ ((nova_all_software_versions | unique) | length == 1) and (nova_all_software_deployed | bool) }}" + delegate_to: localhost + run_once: yes + + +# Note that the placement API service does not understand how to reload, +# so it fails when you try to make it do so. We therefore skip the reload +# for that service. +- name: Reload all nova services to ensure new RPC object version is used + hosts: "nova_all:!nova_api_placement" + gather_facts: no + serial: "{{ nova_serial | default('100%') }}" + max_fail_percentage: 20 + user: root + environment: "{{ deployment_environment_variables | default({}) }}" + tags: + - nova + tasks: + - name: Execute nova service reload + include: common-tasks/restart-service.yml + vars: + service_name: "nova" + service_action: "reloaded" + service_negate: "{{ ['nova-placement-api.service'] + nova_service_negate | default([]) }}" + when: + - "nova_all_software_updated | bool" + - "ansible_local['openstack_ansible']['nova']['need_service_restart'] | bool" + + + +# Note that the placement API service does not understand how to reload, +# so it fails when you try to make it do so. We therefore restart it instead. +- name: Restart the nova placement API service to ensure new RPC object version is used + hosts: "nova_api_placement" + gather_facts: no + serial: "{{ nova_api_serial | default(['1', '100%']) }}" + max_fail_percentage: 20 + user: root + environment: "{{ deployment_environment_variables | default({}) }}" + tags: + - nova + tasks: + - name: Execute nova service restart + include: common-tasks/restart-service.yml + vars: + service_name: "nova-placement-api" + service_action: "restarted" + when: + - "nova_all_software_updated | bool" + - "ansible_local['openstack_ansible']['nova']['need_service_restart'] | bool" + + + +- name: Perform online database migrations + hosts: nova_conductor + gather_facts: no + user: root + environment: "{{ deployment_environment_variables | default({}) }}" + tags: + - nova + tasks: + - name: Perform online data migrations + command: "{{ nova_bin }}/nova-manage db online_data_migrations" + become: yes + become_user: "{{ nova_system_user_name }}" + when: + - "nova_all_software_updated | bool" + - "ansible_local['openstack_ansible']['nova']['need_online_data_migrations'] | bool" + changed_when: false + run_once: yes + register: data_migrations + + - name: Disable the online migrations requirement + ini_file: + dest: "/etc/ansible/facts.d/openstack_ansible.fact" + section: nova + option: need_online_data_migrations + value: False + when: + - data_migrations | succeeded diff --git a/releasenotes/notes/nova-rolling-upgrades-5a3927330c6be5fd.yaml b/releasenotes/notes/nova-rolling-upgrades-5a3927330c6be5fd.yaml new file mode 100644 index 0000000000..59d56709c2 --- /dev/null +++ b/releasenotes/notes/nova-rolling-upgrades-5a3927330c6be5fd.yaml @@ -0,0 +1,7 @@ +--- +features: + - | + The ``os-nova-install.yml`` playbook will now execute a rolling + upgrade of nova including database migrations as per the procedure + described in the + `nova documentation `_. diff --git a/tests/roles/bootstrap-host/templates/user_variables.aio.yml.j2 b/tests/roles/bootstrap-host/templates/user_variables.aio.yml.j2 index 319861cf7e..6d40cfeda3 100644 --- a/tests/roles/bootstrap-host/templates/user_variables.aio.yml.j2 +++ b/tests/roles/bootstrap-host/templates/user_variables.aio.yml.j2 @@ -101,3 +101,9 @@ tempest_run: yes # Disable chronyd in OpenStack CI security_rhel7_enable_chrony: no {% endif %} + +# For testing purposes in public clouds, we need to ignore these +# services when trying to do a reload of nova services. +nova_service_negate: + - "nova-agent.service" + - "nova-resetnetwork.service"