diff --git a/.zuul.yaml b/.zuul.yaml index 39ba9813..9e0c55ff 100644 --- a/.zuul.yaml +++ b/.zuul.yaml @@ -3,7 +3,11 @@ nodes: - name: controller label: nested-virt-ubuntu-jammy - - name: compute + # NOTE(artom) We can't name the node 'compute' because that seems to + # take precedence over the 'compute' group in playbooks, so things we + # want to run on all hosts in the 'compute' group would only run on the + # subnode. + - name: compute-host label: nested-virt-ubuntu-jammy groups: # Node where tests are executed and test results collected @@ -14,11 +18,11 @@ - name: compute nodes: - controller - - compute + - compute-host # Nodes that are not the controller - name: subnode nodes: - - compute + - compute-host # Switch node for multinode networking setup - name: switch nodes: @@ -26,7 +30,7 @@ # Peer nodes for multinode networking setup - name: peers nodes: - - compute + - compute-host - job: name: whitebox-devstack-multinode @@ -49,6 +53,7 @@ # open source implementation of UEFI for VMs via the OVMF package. In # addition to test vTPM hosts need swtpm as well extra_packages: ovmf,swtpm-tools + tempest_exclude_regex: ^whitebox_tempest_plugin\.api\.compute\.test_hugepages devstack_localrc: MAX_COMPUTE_NODES: 2 NOVA_SERVICE_REPORT_INTERVAL: 10 @@ -86,7 +91,6 @@ swtpm_group: swtpm group-vars: subnode: - num_hugepages: 2048 devstack_localrc: LIBVIRT_TYPE: kvm NOVA_SERVICE_REPORT_INTERVAL: 10 @@ -109,11 +113,24 @@ swtpm_user: swtpm swtpm_group: swtpm tempest: - num_hugepages: 512 devstack_plugins: barbican: https://opendev.org/openstack/barbican.git whitebox-tempest-plugin: https://opendev.org/openstack/whitebox-tempest-plugin.git +- job: + name: whitebox-devstack-multinode-hugepages + parent: whitebox-devstack-multinode + description: | + Runs the hugepages tests on a deployment that has set up hugepages on the hosts. + vars: + tempest_test_regex: ^whitebox_tempest_plugin\.api\.compute\.test_hugepages + # NOTE(artom) The parent job's exclude regex excludes the hugepages + # tests, so we need to overwrite it here with a regex that matches + # *nothing*. + tempest_exclude_regex: $^ + num_2M_pages: 512 + num_1G_pages: 1 + - job: name: whitebox-devstack-ceph-multinode parent: devstack-plugin-ceph-multinode-tempest-py3 @@ -174,3 +191,6 @@ - whitebox-devstack-multinode - whitebox-devstack-ceph-multinode - openstack-tox-pep8 + experimental: + jobs: + - whitebox-devstack-multinode-hugepages diff --git a/devstack/plugin.sh b/devstack/plugin.sh index 44a9e063..c8940541 100644 --- a/devstack/plugin.sh +++ b/devstack/plugin.sh @@ -19,6 +19,7 @@ function configure { iniset $TEMPEST_CONFIG whitebox default_video_model $WHITEBOX_DEFAULT_VIDEO_MODEL iniset $TEMPEST_CONFIG whitebox max_disk_devices_to_attach $WHITEBOX_MAX_DISK_DEVICES_TO_ATTACH iniset $TEMPEST_CONFIG whitebox nodes_yaml $WHITEBOX_NODES_YAML + iniset $TEMPEST_CONFIG whitebox hugepage_guest_ram_size $WHITEBOX_HUGEPAGE_GUEST_RAM_SIZE iniset $TEMPEST_CONFIG whitebox-database user $DATABASE_USER iniset $TEMPEST_CONFIG whitebox-database password $DATABASE_PASSWORD @@ -27,6 +28,7 @@ function configure { iniset $TEMPEST_CONFIG whitebox-hardware cpu_topology "$WHITEBOX_CPU_TOPOLOGY" iniset $TEMPEST_CONFIG whitebox-hardware dedicated_cpus_per_numa "$WHITEBOX_DEDICATED_CPUS_PER_NUMA" iniset $TEMPEST_CONFIG whitebox-hardware shared_cpus_per_numa "$WHITEBOX_SHARED_CPUS_PER_NUMA" + iniset $TEMPEST_CONFIG whitebox-hardware configured_hugepage_sizes "$WHITEBOX_CONFIGURED_HUGEPAGES" iniset $TEMPEST_CONFIG compute-feature-enabled virtio_rng "$COMPUTE_FEATURE_VIRTIO_RNG" iniset $TEMPEST_CONFIG compute-feature-enabled rbd_download "$COMPUTE_FEATURE_RBD_DOWNLOAD" @@ -39,6 +41,7 @@ function configure { # https://github.com/openstack/devstack/blob/6b0f055b4ed407f8a190f768d0e654235ac015dd/lib/nova#L46C36-L46C50 iniset $TEMPEST_CONFIG whitebox-nova-compute state_path $DATA_DIR/nova + iniset $NOVA_CONF filter_scheduler track_instance_changes True } if [[ "$1" == "stack" ]]; then diff --git a/devstack/settings b/devstack/settings index 23a946e8..17f3dc58 100644 --- a/devstack/settings +++ b/devstack/settings @@ -7,10 +7,12 @@ WHITEBOX_RX_QUEUE_SIZE=${WHITEBOX_RX_QUEUE_SIZE:-1024} WHITEBOX_DEFAULT_VIDEO_MODEL=${WHITEBOX_DEFAULT_VIDEO_MODEL:-'virtio'} WHITEBOX_MAX_DISK_DEVICES_TO_ATTACH=${WHITEBOX_MAX_DISK_DEVICES_TO_ATTACH:-7} WHITEBOX_NODES_YAML=${WHITEBOX_NODES_YAML:-'/home/zuul/compute_nodes.yaml'} +WHITEBOX_HUGEPAGE_GUEST_RAM_SIZE=${WHITEBOX_HUGEPAGE_GUEST_RAM_SIZE:-1024} WHITEBOX_CPU_TOPOLOGY=${WHITEBOX_CPU_TOPOLOGY:-''} WHITEBOX_DEDICATED_CPUS_PER_NUMA=${WHITEBOX_DEDICATED_CPUS_PER_NUMA:-4} WHITEBOX_SHARED_CPUS_PER_NUMA=${WHITEBOX_SHARED_CPUS_PER_NUMA:-2} +WHITEBOX_CONFIGURED_HUGEPAGES=${WHITEBOX_CONFIGURED_HUGEPAGES:-'2048,1048576'} COMPUTE_FEATURE_VIRTIO_RNG=${COMPUTE_FEATURE_VIRTIO_RNG:-'True'} COMPUTE_FEATURE_RBD_DOWNLOAD=${COMPUTE_FEATURE_RBD_DOWNLOAD:-'False'} diff --git a/playbooks/whitebox/pre.yaml b/playbooks/whitebox/pre.yaml index ba4dbe4b..42e2e819 100644 --- a/playbooks/whitebox/pre.yaml +++ b/playbooks/whitebox/pre.yaml @@ -44,4 +44,45 @@ shell: | cat /home/zuul/compute_nodes.yaml run_once: true - delegate_to: controller \ No newline at end of file + delegate_to: controller + +- hosts: compute + tasks: + - name: Create hugepages for computes + block: + + - name: Append to GRUB command line + lineinfile: + path: /etc/default/grub + state: present + backrefs: yes + regexp: GRUB_CMDLINE_LINUX="([^"]*)" + line: GRUB_CMDLINE_LINUX="\1 hugepagesz=2M hugepages={{ num_2M_pages }} hugepagesz=1G hugepages={{ num_1G_pages }} transparent_hugepage=never" + become: yes + + - name: Update grub.cfg + # NOTE(artom) This assumes an Ubuntu host + command: update-grub2 + become: yes + + - name: Reboot + reboot: + become: yes + + - name: (Re-)start the Zuul console streamer after the reboot + # NOTE(artom) The job will still work if we don't do this, but the + # console will get spammed with 'Waiting on logger' messages. See + # https://bugs.launchpad.net/openstack-gate/+bug/1806655 for more + # info. + import_role: + name: start-zuul-console + + - name: Add 1G hugetlbfs mount + # The 2M hugetlbfs is mounted automatically by the OS, but we need to + # manually add the 1G mount. + shell: | + mkdir /dev/hugepages1G + mount -t hugetlbfs -o pagesize=1G none /dev/hugepages1G + become: yes + + when: num_2M_pages is defined and num_1G_pages is defined diff --git a/whitebox_tempest_plugin/api/compute/base.py b/whitebox_tempest_plugin/api/compute/base.py index 84a0f4ff..9ba1c00f 100644 --- a/whitebox_tempest_plugin/api/compute/base.py +++ b/whitebox_tempest_plugin/api/compute/base.py @@ -435,3 +435,11 @@ class BaseWhiteboxComputeTest(base.BaseV2ComputeAdminTest): 'status = "%s"' % status) data = cursor.fetchall() return data[0]['COUNT(*)'] + + def _get_hugepage_xml_element(self, server_id): + """Gather and return all instances of the page element from XML element + 'memoryBacking/hugepages' in a given server's domain. + """ + root = self.get_server_xml(server_id) + huge_pages = root.findall('.memoryBacking/hugepages/page') + return huge_pages diff --git a/whitebox_tempest_plugin/api/compute/test_cpu_pinning.py b/whitebox_tempest_plugin/api/compute/test_cpu_pinning.py index 732a5afc..d59bf30e 100644 --- a/whitebox_tempest_plugin/api/compute/test_cpu_pinning.py +++ b/whitebox_tempest_plugin/api/compute/test_cpu_pinning.py @@ -612,14 +612,6 @@ class NUMALiveMigrationBase(BasePinningTest): cpuset = root.find('./vcpu').attrib.get('cpuset', None) return hardware.parse_cpu_spec(cpuset) - def _get_hugepage_xml_element(self, server_id): - """Gather and return all instances of the page element from XML element - 'memoryBacking/hugepages' in a given server's domain. - """ - root = self.get_server_xml(server_id) - huge_pages = root.findall('.memoryBacking/hugepages/page') - return huge_pages - def _validate_hugepage_elements(self, server_id, pagesize): """Analyze the hugepage xml element(s) from a provided instance. Expect to find only one hugepage element in the domain. Return boolean result diff --git a/whitebox_tempest_plugin/api/compute/test_hugepages.py b/whitebox_tempest_plugin/api/compute/test_hugepages.py new file mode 100644 index 00000000..85a148a6 --- /dev/null +++ b/whitebox_tempest_plugin/api/compute/test_hugepages.py @@ -0,0 +1,196 @@ +# Copyright 2022 Red Hat Inc. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +from tempest import config +import testtools +from whitebox_tempest_plugin.api.compute import base + +from oslo_log import log as logging + +CONF = config.CONF +LOG = logging.getLogger(__name__) + + +class HugePageResize(base.BaseWhiteboxComputeTest): + + @classmethod + def skip_checks(cls): + super(HugePageResize, cls).skip_checks() + if len(getattr(CONF.whitebox_hardware, + 'configured_hugepage_sizes')) == 0: + msg = "configured_hugepage_sizes in whitebox-hardware is not " \ + "present" + raise cls.skipException(msg) + + def _get_xml_hugepage_size(self, server_id): + """Analyze the hugepage xml element(s) from a provided instance. Expect + to find only one hugepage element in the domain. Return boolean result + comparing if the found page size is equal to the expected page size. + """ + huge_pages_list = self._get_hugepage_xml_element(server_id) + self.assertEqual(1, len(huge_pages_list), "Expected to find 1 " + "hugepage XML element on server %s but found %s" + % (server_id, len(huge_pages_list))) + huge_page_xml = huge_pages_list[0] + return int(huge_page_xml.attrib['size']) + + def test_hugepage_resize_large_to_small(self): + """Resize a guest with large hugepages to small hugepages and back + + Create a guest using a flavor with hw:mem_page_size:large, resize it + to a flavor with hw:mem_page_size:small, and then resize it back to + the original flavor + """ + flavor_a = self.create_flavor( + ram=str(CONF.whitebox.hugepage_guest_ram_size), + extra_specs={'hw:mem_page_size': 'large'}) + + server = self.create_test_server(flavor=flavor_a['id'], + wait_until='ACTIVE') + + # Cannot assume the exact pagesize of the guest, verify the backing + # memory element is present on the guest and the found size is greater + # than or equal to the smallest potential size configured in the + # environment + large_page_size = self._get_xml_hugepage_size(server['id']) + minimum_pagesize_threshold = \ + min(CONF.whitebox_hardware.configured_hugepage_sizes) + self.assertTrue( + large_page_size >= minimum_pagesize_threshold, + "Pagesize found %s should be greater than or equal to pagesize " + "of %s for server %s" % + (large_page_size, minimum_pagesize_threshold, server['id']) + ) + + # Resize the guest using a flavor with hw:mem_page_size:small, + # memory backing element should not be present on guest currently so + # no need for XML verification + flavor_b = self.create_flavor( + ram=str(CONF.whitebox.hugepage_guest_ram_size), + extra_specs={'hw:mem_page_size': 'small'}) + self.resize_server(server['id'], flavor_b['id']) + + # Resize instance back to staring flavor size and repeat XML check of + # the guest + self.resize_server(server['id'], flavor_a['id']) + large_page_size = self._get_xml_hugepage_size(server['id']) + self.assertTrue( + large_page_size >= minimum_pagesize_threshold, + "After resizing back to original flavor, pagesize found %s should " + "be greater than or equal to pagesize of %s for server %s" % + (large_page_size, minimum_pagesize_threshold, server['id']) + ) + + def test_hugepage_resize_size_to_small(self): + """Resize a guest with a specified hugepage size to small hugepages + + Create a guest using a flavor with using an explicit hugepage size(s), + based on what is configured in whitebox_hardware. Resize the guest to a + flavor with hw:mem_page_size:small, and then resize it back to the + original flavor. Repeat this process for every hugepage size configured + in in whitebox_hardware.configured_hugepage_sizes + """ + flavor_small = self.create_flavor( + ram=str(CONF.whitebox.hugepage_guest_ram_size), + extra_specs={'hw:mem_page_size': 'small'}) + + # Create a flavor and launch an instance based on every configured + # hugepage size in the deployment. + for page_size in CONF.whitebox_hardware.configured_hugepage_sizes: + flavor_a = self.create_flavor( + ram=str(CONF.whitebox.hugepage_guest_ram_size), + extra_specs={'hw:mem_page_size': str(page_size)}) + + server = self.create_test_server(flavor=flavor_a['id'], + wait_until='ACTIVE') + + size_found = self._get_xml_hugepage_size(server['id']) + self.assertTrue( + page_size == size_found, + "Expected pagesize of %s not found on server %s instead " + "found %s" % (page_size, server['id'], size_found) + ) + + # Resize the guest using a flavor with hw:mem_page_size:small, + # memory backing will not be present in with guest so follow up + # XML verification is not necessary + self.resize_server(server['id'], flavor_small['id']) + + # Resize back to its original size and confirm memory backing + # element is present and has the correct size + self.resize_server(server['id'], flavor_a['id']) + size_found = self._get_xml_hugepage_size(server['id']) + self.assertTrue( + page_size == size_found, + "Expected pagesize of %s not found on server %s after " + "resizing back to original flavor size, instead found %s" % + (page_size, server['id'], size_found) + ) + + self.delete_server(server['id']) + + @testtools.skipUnless( + len(CONF.whitebox_hardware.configured_hugepage_sizes) > 1, + 'Need at least 2 configured hugepage sizes to execute test') + def test_hugepage_resize_size_to_size(self): + """Resize a guest with a specified hugepage size to another size + + Create two flavors based on the two provided hugepage sizes. The + flavors created use explicit sizes Create a + server using the first flavor, resize the guest to the second flavor, + and resize back to the original spec + """ + start_size, target_size = \ + CONF.whitebox_hardware.configured_hugepage_sizes + + flavor_a = self.create_flavor( + ram=str(CONF.whitebox.hugepage_guest_ram_size), + extra_specs={'hw:mem_page_size': str(start_size)}) + + server = self.create_test_server(flavor=flavor_a['id'], + wait_until='ACTIVE') + + size_found = self._get_xml_hugepage_size(server['id']) + self.assertTrue( + start_size == size_found, + "Expected pagesize of %s not found on server %s instead " + "found %s" % (start_size, server['id'], size_found) + ) + + flavor_b = self.create_flavor( + ram=str(CONF.whitebox.hugepage_guest_ram_size), + extra_specs={'hw:mem_page_size': str(target_size)}) + + # Resize to the target size and confirm memory backing element is + # present and has the correct size + self.resize_server(server['id'], flavor_b['id']) + size_found = self._get_xml_hugepage_size(server['id']) + self.assertTrue( + target_size == size_found, + "Expected pagesize of %s not found on server %s after resize " + "instead found %s" % (target_size, server['id'], size_found) + ) + + # Resize back to its original size and confirm memory backing + # element is present and has the correct size + self.resize_server(server['id'], flavor_a['id']) + size_found = self._get_xml_hugepage_size(server['id']) + + self.assertTrue( + start_size == size_found, + "Expected pagesize of %s not found on server %s after resizing " + "back to original flavor size, instead found %s" % + (start_size, server['id'], size_found) + ) diff --git a/whitebox_tempest_plugin/config.py b/whitebox_tempest_plugin/config.py index 8394f415..40fd9cbb 100644 --- a/whitebox_tempest_plugin/config.py +++ b/whitebox_tempest_plugin/config.py @@ -133,7 +133,12 @@ general_opts = [ 'libvirt_hw_machine_type', default='pc', choices=["pc", "q35"], - help='The machine type configured for the nova computes') + help='The machine type configured for the nova computes'), + cfg.IntOpt( + 'hugepage_guest_ram_size', + default=64, + help="RAM size in MB to use when launching the guests backed " + "by hugepages."), ] nova_compute_group = cfg.OptGroup( @@ -224,6 +229,12 @@ hardware_opts = [ '. For example, if NUMA node 0 has ' 'CPUs 0 and 1, and NUMA node 1 has CPUs 2 and 3, the value to ' 'set would be `0: [0,1], 1: [2, 3]`.'), + cfg.Opt( + 'configured_hugepage_sizes', + type=types.List(types.Integer()), + default=[], + help='List of configured hugepage sizes available in kB in the ' + 'environment e.g. 2048,1048576'), cfg.IntOpt( 'dedicated_cpus_per_numa', default=0,