From 9b7cab9cf19dcd95690166061192b273814ee3c5 Mon Sep 17 00:00:00 2001 From: James Denton Date: Fri, 10 Aug 2018 15:44:14 +0000 Subject: [PATCH] Adds DPDK support for Open vSwitch This commit provides baseline changes to the os_neutron role to support DPDK-accelerated Open vSwitch. Change-Id: I08aba431d1546160e7c0311ad929762a018e0dca Implements: dpdk support for openvswitch Closes-Bug: #1784660 --- defaults/main.yml | 16 + doc/source/app-openvswitch-dpdk.rst | 461 ++++++++++++++++++ doc/source/index.rst | 1 + tasks/providers/ovs_config.yml | 6 + tasks/providers/setup_ovs_dpdk.yml | 127 +++++ templates/dpdk.conf.j2 | 59 +++ templates/dpdk_interfaces.j2 | 25 + .../plugins/ml2/openvswitch_agent.ini.j2 | 8 + vars/debian.yml | 6 + vars/distro_install.yml | 2 + vars/source_install.yml | 2 + vars/suse.yml | 3 + 12 files changed, 716 insertions(+) create mode 100644 doc/source/app-openvswitch-dpdk.rst create mode 100644 tasks/providers/setup_ovs_dpdk.yml create mode 100644 templates/dpdk.conf.j2 create mode 100644 templates/dpdk_interfaces.j2 diff --git a/defaults/main.yml b/defaults/main.yml index 292a9299..19ca7343 100644 --- a/defaults/main.yml +++ b/defaults/main.yml @@ -450,3 +450,19 @@ ovs_nsh_support: False # Set higher priority to mardim PPA when ovs_nsh_support is True ovs_nsh_apt_pinned_packages: [{ package: "*", release: "LP-PPA-mardim-mardim-ppa"}] + +### +### DPDK Configuration +### + +ovs_datapath: "netdev" +ovs_dpdk_pci_addresses: [] +ovs_dpdk_driver: vfio-pci +ovs_dpdk_support: False +ovs_dpdk_lcore_mask: 1 +ovs_dpdk_pmd_cpu_mask: 2 +ovs_dpdk_socket_mem: "1024" +ovs_dpdk_nr_1g_pages: 0 +ovs_dpdk_nr_2m_pages: 0 +# (NOTE) (or "libvirtd", depending on your version of libvirt) +vhost_socket_directory_group: "libvirt" diff --git a/doc/source/app-openvswitch-dpdk.rst b/doc/source/app-openvswitch-dpdk.rst new file mode 100644 index 00000000..a2ce009e --- /dev/null +++ b/doc/source/app-openvswitch-dpdk.rst @@ -0,0 +1,461 @@ +===================================== +Scenario - Using Open vSwitch w/ DPDK +===================================== + +Overview +~~~~~~~~ + +Operators can choose to utilize DPDK-accelerated Open vSwitch instead of +unaccelerated Open vSwitch or Linux Bridges for the Neutron virtual network +infrastructure. This architecture is best suited for NFV workloads and +requires careful consideration and planning before implementing. This +document outlines how to set it up in your environment. + +.. warning:: + + The current implementation of DPDK in OpenStack-Ansible is + experimental and not production ready. There is no guarantee of + upgradability or backwards compatibility between releases. + +Recommended reading +~~~~~~~~~~~~~~~~~~~ + +We recommend that you read the following documents before proceeding: + +* Neutron with Open vSwitch Scenario: + ``_ +* Open vSwitch with DPDK datapath: + ``_ +* Getting the best performance from DPDK: + ``_ +* OpenStack documentation on hugepages: + ``_ + +Prerequisites +~~~~~~~~~~~~~ + +To enable DPDK on a Linux platform, ensure that VT-d/VT-x are enabled for +Intel processors and AMD-V/AMD-Vi are enabled for AMD processors. Such +features are typically enabled in the BIOS. + +On an Intel platform, the following kernel parameters are required and can be +added to the GRUB configuration: + +.. code-block:: console + + GRUB_CMDLINE_LINUX="... iommu=pt intel_iommu=on" + +On an AMD platform, use these parameters instead: + +.. code-block:: console + + GRUB_CMDLINE_LINUX="... iommu=pt amd_iommu=on" + +Update GRUB and reboot the host(s). + +Hugepages are required for DPDK. Instances leveraging DPDK-accelerated +Open vSwitch must be configured to use hugepages by way of flavor +attributes. Those attributes and the configuration of hugepages are +described in this guide. + +CPU frequency should be set to maximum for optimal performance. Many +hardware vendors set the energy saving properties in the BIOS that +may need to be modified. Changing the CPU frequency using ``cpufreq`` +or similar utilities to ``performance`` from ``ondemand`` is recommended. + +.. note:: + + The playbooks currently only support a single NIC interface for DPDK. Multiple + ports per NIC are not yet supported but may be at a later time. This guide + assumes the NIC is bound to NUMA node0, but the instructions can be modified + for NICs bound to other NUMA nodes.. + +NUMA topology +~~~~~~~~~~~~~ + +Non-uniform memory access (NUMA) is a computer memory design used in +multiprocessing. This guide cannot go into great depths about NUMA +architecture. However, there are some configurations to be made that +rely on the operator understanding NUMA characteristics of compute +nodes hosting workloads using DPDK-accelerated Open vSwitch. + +To view the NUMA topology of a particular system, use the ``numactl`` +command shown here: + +.. code-block:: console + + root@compute1:~# numactl --hardware + available: 2 nodes (0-1) + node 0 cpus: 0 1 2 3 4 5 6 7 16 17 18 19 20 21 22 23 + node 0 size: 48329 MB + node 0 free: 31798 MB + node 1 cpus: 8 9 10 11 12 13 14 15 24 25 26 27 28 29 30 31 + node 1 size: 48379 MB + node 1 free: 25995 MB + node distances: + node 0 1 + 0: 10 20 + 1: 20 10 + +The NUMA topology presented here corresponds to a host with 2x Intel Xeon 2450L +processors with 96 GB of total RAM. The RAM is evenly split between the two NUMA +nodes. Each CPU has 8 cores. With hyperthreading enabled, there are 16 threads +per CPU for a total of 32 threads or cores presented to the operating system. +It just so happens that this two-socket system has one NUMA node per socket, +however, that will not always be the case. Consult your system's documentation +for information unique to your system. + +The first eight cores/cpus in the list for a given NUMA node can be considered +physical cores in the CPU. For NUMA node0, this would be cores 0-7. The other +eight cores, 16-23, are considered virtual sibling cores and are presented when +hyperthreading is enabled. The physical-to-virtual mapping can be determined +with the following commands: + +.. code-block:: console + + root@compute1:~# for cpu in {0..7}; do cat /sys/devices/system/cpu/"cpu"$cpu/topology/thread_siblings_list; done + 0,16 + 1,17 + 2,18 + 3,19 + 4,20 + 5,21 + 6,22 + 7,23 + + root@compute1:~# for cpu in {8..15}; do cat /sys/devices/system/cpu/"cpu"$cpu/topology/thread_siblings_list; done + 8,24 + 9,25 + 10,26 + 11,27 + 12,28 + 13,29 + 14,30 + 15,31 + +A PCI slot typically corresponds to a single NUMA node. For optimal +performance, a DPDK NIC and any instance utilizing the NIC should be +restricted to the same NUMA node and its respective memory. Ensuring +this behavior requires the use of flavors, host aggregates, and special +kernel parameters and Open vSwitch/DPDK configuration settings. + +In this example, a single 10G NIC installed in PCI slot 2 is bound to NUMA +node0. Ideally, any instances utilizing the NIC would be limited to cores and +memory associated with NUMA node0. This means cores 0-7 and 16-23, and up to +48GB of RAM. In reality, however, some cores and RAM from NUMA node0 will be +reserved and made unavailable to instances. In addition, cores 8-15 and 24-31 +associated with NUMA node1 should be made unavailable to instances. The +configuration to do just that will be covered later in this guide. + +It is considered good practice to reserve a single physical core and its +respective virtual sibling from each NUMA node for normal (non-DPDK) +operating system functions. In addition, at least one physical core +(and sibling) from each NUMA node should be reserved for DPDK poll mode +driver (PMD) functions, even when a NIC(s) is bound to a single NUMA node. +The remaining cores can be reserved for virtual machine instances. + +In this example, the breakdown would resemble the following: + +| Reserved Cores | Purpose | node0 | node1 | +| ---------------------- | --------------------- | --------- | ----- | +| 0,8,16,24 | Host Operating System | 0,16 | 8,24 | +| 1,9,17,25 | DPDK PMDs | 1,17 | 9,25 | +| 2-7,18-23 | Virtual Machines | 2-7,18-23 | N/A | + +The variables are overrides used to define this configuration are discussed +in the following sections. + +Hugepage configuration +~~~~~~~~~~~~~~~~~~~~~~ + +DPDK requires the configuration of hugepages, which is a mechanism by which +the Linux kernel can partition and address larger amounts of memory beyond +the basic page unit (4096 bytes). Huge pages are blocks of contiguous memory +that commonly come in 2MB and 1G sizes. The page tables used by 2MB pages +are suitable for managing multiple gigabytes of memory, whereas the page tables +of 1GB pages are preferred for scaling to terabytes of memory. DPDK requires +the use of 1GB pages. + +A typical x86 system will have a Huge Page Size of 2048 kBytes (2MB). The +default huge page size may be found by looking at the output of /proc/meminfo: + +.. code-block:: console + + # cat /proc/meminfo | grep Hugepagesize + Hugepagesize: 2048 kB + +The number of Hugepages can be allocated at runtime by modifying +``/proc/sys/vm/nr_hugepages`` or by using the ``sysctl`` command. + +To view the current setting using the ``/proc`` entry: + +.. code-block:: console + + # cat /proc/sys/vm/nr_hugepages + 0 + +To view the current setting using the ``sysctl`` command: + +.. code-block:: console + + # sysctl vm.nr_hugepages + vm.nr_hugepages = 0 + +To set the number of huge pages using ``/proc`` entry: + +.. code-block:: console + + # echo 5 > /proc/sys/vm/nr_hugepages + +To set the number of hugepages using sysctl: + +.. code-block:: console + + # sysctl -w vm.nr_hugepages=5 + vm.nr_hugepages = 5 + +It may be necessary to reboot to be able to allocate the number of hugepages +that is needed. This is due to hugepages requiring large areas of contiguous +physical memory. + +When 1G hugepages are used, they must be configured at boot time. The amount +of 1G hugepages that should be created will vary based on a few factors, +including: + +* The total amount of RAM available in the system +* The amount of RAM required for the planned number of instances +* The number of NUMA nodes that will be used + +The NUMA topology presented here corresponds to a host with 2x Intel Xeon 2450L +processors with 96GB of total RAM. The RAM is evenly split between the two NUMA +nodes. A DPDK NIC will be associated with a single NUMA node, and for optimal +performance any instance utilizing the DPDK NIC should be limited to the same +cores and memory associated with the NUMA node. On this example system, +both DPDK and instances can only utilize *up to* the 48GB of RAM associated +with NUMA node0, though some of that RAM will be utilized by the OS and other +tasks. + +Of the 48GB of RAM available on NUMA node0, 32GB will be reserved for 1GB +hugepages to be consumed by DPDK PMDs and instances. Configuring hugepages +using kernel parameters results in the defined number of hugepages to be split +evenly across NUMA nodes. With the following kernel parameter, each NUMA node +will be assigned 32x 1G hugepages: + +.. code-block:: console + + GRUB_CMDLINE_LINUX="... hugepagesz=1G hugepages=64" + +Hugepages can be adjusted at runtime if necessary, but doing so is outside the +scope of this guide. + +OpenStack-Ansible variables and overrides +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The ability to pin instances to certain cores is not new, and can be +accomplished using the ``vcpu_pin_set`` override seen here: + +.. code-block:: console + + nova_nova_conf_overrides: + DEFAULT: + vcpu_pin_set: 2-7,18-23 + +This change can be added to the ``user_overrides.yml`` file for global +implementation, or to individual nodes in the ``openstack_user_config.yml`` +file as shown here: + +.. code-block:: console + + compute_hosts: + compute01: + ip: 172.29.236.200 + container_vars: + ... + nova_nova_conf_overrides: + DEFAULT: + vcpu_pin_set: 2-7,18-23 + +Cores reserved for host operating system functions (non-DPDK) must be converted +to a hexidecimal mask and defined using the ``ovs_dpdk_lcore_mask`` override. +To convert to a hex mask you must first establish the binary mask of chosen +cores using the following table: + +| 31 | 30 | . | 24 | 23 | . | 17 | 16 | 15 | . | 9 | 8 | 7 | . | 1 | 0 | +| -- | -- | - | -- | -- | - | -- | -- | -- | - | -- | -- | -- | - | -- | -- | +| 0 | 0 | . | 1 | 0 | . | 0 | 1 | 0 | . | 0 | 1 | 0 | . | 0 | 1 | + +The ellipses represent cores not shown. The binary mask for cores 0,8,16,24 +can be determined in the following way: + +.. code-block:: console + + 00000001000000010000000100000001 + +The hexidecimal representation of that binary value is ``0x1010101``. Set +the ``ovs_dpdk_lcore_mask`` override accordingly in the ``user_variables.yml`` +file or ``openstack_user_config.yml``: + +.. code-block:: console + + ovs_dpdk_lcore_mask: 1010101 + +The mask for cores 1,9,17,25 reserved for DPDK PMDs can be determined in +a similar fashion. The table would resemble the following: + +| 31 | 30 | . | 25 | 24 | . | 17 | 16 | 15 | . | 9 | 8 | 7 | . | 1 | 0 | +| -- | -- | - | -- | -- | - | -- | -- | -- | - | -- | -- | -- | - | -- | -- | +| 0 | 0 | . | 1 | 0 | . | 1 | 0 | 0 | . | 1 | 0 | 0 | . | 1 | 0 | + +The ellipses represent cores not shown. The binary mask for cores 1,9,17,254 +can be determined in the following way: + +.. code-block:: console + + 00000010000000100000001000000010 + +The hexidecimal representation of that binary value is ``0x2020202``. Set +the ``ovs_dpdk_pmd_cpu_mask`` override accordingly in the +``user_variables.yml`` file or ``openstack_user_config.yml``: + +.. code-block:: console + + ovs_dpdk_pmd_cpu_mask: 2020202 + +Additional variables should be set, including: + +* ovs_dpdk_driver +* ovs_dpdk_pci_addresses +* ovs_dpdk_socket_mem + +The default value for ``ovs_dpdk_driver`` is ``vfio-pci``. Overrides can be +set globally or on a per-host basis. + +.. note:: + + Please consult the DPDK Network Interface Controller Driver `documentation + `_ for more inforation on + supported network drivers for DPDK. + +The value for ``ovs_dpdk_pci_addresses`` is the PCI bus address of the NIC +port(s) associated with the DPDK NIC. In this example, the DPDK NIC is +identified as address ``0000:03:00``. The individual interfaces are +``0000:03:00.0`` and ``0000:03:00.1``, respectively. The variable +``ovs_dpdk_pci_addresses`` is a list, and both values can be defined like so: + +.. code-block:: console + + ovs_dpdk_pci_addresses: + - 0000:03:00.0 + - 0000:03:00.1 + +The value for ``ovs_dpdk_socket_mem`` will vary based on the number of NUMA +nodes, number of NICs per NUMA node, and the MTU. The default value assumes +a single NUMA node and associates a single 1G hugepage to DPDK that can +handle a 1500 MTU. When multiple NUMA nodes are available, even with a single +NIC, the following should be set: + +.. code-block:: console + + ovs_dpdk_socket_mem: "1024,1024" + +For systems using a single NUMA node of a dual-NUMA system and a 9000 MTU, the +following can be set: + +.. code-block:: console + + ovs_dpdk_socket_mem: "3072,1024" + +Determing socket memory required involves calculations that are out of the +scope of this guide. + +Flavor configuration +~~~~~~~~~~~~~~~~~~~~ + +Instances that connect to a DPDK-accelerated Open vSwitch must be configured to +utilize large (1G) hugepages by way of custom flavor attributes. + +The ``hw:mem_page_size`` property can be set on a new or existing flavor to +enable this functionality: + +.. code-block:: console + + openstack flavor set m1.small --property hw:mem_page_size=large + +NOTE: If small page size is used, or no page size is set, the interface may +appear in the instance but will not be functional. + +OpenStack-Ansible user variables +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Create a group var file for your network hosts +``/etc/openstack_deploy/group_vars/network_hosts``. It has to include: + +.. code-block:: yaml + + # Ensure the openvswitch kernel module is loaded + openstack_host_specific_kernel_modules: + - name: "openvswitch" + pattern: "CONFIG_OPENVSWITCH" + +Specify provider network definitions in your +``/etc/openstack_deploy/openstack_user_config.yml`` that define one or more +Neutron provider bridges and related configuration: + +.. code-block:: yaml + + - network: + container_bridge: "br-provider" + container_type: "veth" + type: "vlan" + range: "101:200,301:400" + net_name: "physnet1" + group_binds: + - neutron_openvswitch_agent + +.. note:: + + A single DPDK interface can be connected to an OVS provider bridge, and + must be done using the ``ovs-vsctl`` command as a post-installation step. + +Set the following user variables in your +``/etc/openstack_deploy/user_variables.yml`` to enable the Open vSwitch driver +and DPDK support: + +.. code-block:: yaml + + neutron_plugin_type: ml2.ovs + neutron_ml2_drivers_type: "vlan" + + # Enable DPDK support + ovs_dpdk_support: True + + # Add these overrides or set on per-host basis in openstack_user_config.yml + ovs_dpdk_pci_addresses: "0000:03:00.0" + ovs_dpdk_lcore_mask: 1010101 + ovs_dpdk_pmd_cpu_mask: 2020202 + ovs_dpdk_socket_mem: "1024,1024" + +.. note:: + + Overlay networks are not supported on DPDK-enabled nodes at this time. + +Post-installation +~~~~~~~~~~~~~~~~~ + +Once the playbooks have been run and OVS/DPDK has been configured, it will be +necessary to add a physical interface to the provider bridge before networking +can be fully established. + +On compute nodes, the following command can be used to attach a NIC port +``0000:03:00.0`` to the provider bridge ``br-provider``: + +.. code-block:: console + + ovs-vsctl add-port br-provider 0000:03:00.0 -- set Interface 0000:03:00.0 type=dpdk options:dpdk-devargs=0000:03:00.0 + +The command can be adjusted according to your configuration. + +.. warning:: + + Adding multiple ports to the bridge may result in bridging loops unless + bonding is configured. DPDK bonding is outside the scope of this guide. diff --git a/doc/source/index.rst b/doc/source/index.rst index 73d8bed2..83067c27 100644 --- a/doc/source/index.rst +++ b/doc/source/index.rst @@ -9,6 +9,7 @@ Neutron role for OpenStack-Ansible app-openvswitch.rst app-openvswitch-asap.rst app-openvswitch-dvr.rst + app-openvswitch-dpdk.rst app-openvswitch-sfc.rst app-ovn.rst app-nuage.rst diff --git a/tasks/providers/ovs_config.yml b/tasks/providers/ovs_config.yml index faecf6d3..f7d37066 100644 --- a/tasks/providers/ovs_config.yml +++ b/tasks/providers/ovs_config.yml @@ -25,6 +25,7 @@ - neutron_services['neutron-openvswitch-agent']['group'] in group_names - neutron_plugin_type in ['ml2.ovs', 'ml2.ovs.dvr'] - neutron_provider_networks.network_mappings is defined + - not ovs_dpdk_support - name: Add ports to Network Provider Bridges openvswitch_port: @@ -38,3 +39,8 @@ - neutron_services['neutron-openvswitch-agent']['group'] in group_names - neutron_plugin_type in ['ml2.ovs', 'ml2.ovs.dvr'] - neutron_provider_networks.network_interface_mappings is defined and (neutron_provider_networks.network_interface_mappings|length > 0) + - not ovs_dpdk_support + +- include: setup_ovs_dpdk.yml + when: + - ovs_dpdk_support diff --git a/tasks/providers/setup_ovs_dpdk.yml b/tasks/providers/setup_ovs_dpdk.yml new file mode 100644 index 00000000..f99a10d6 --- /dev/null +++ b/tasks/providers/setup_ovs_dpdk.yml @@ -0,0 +1,127 @@ +--- +# (c) 2019, James Denton +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +- name: Set alternative ovs-vswitchd service + alternatives: + name: ovs-vswitchd + path: /usr/lib/openvswitch-switch-dpdk/ovs-vswitchd-dpdk + when: + - ansible_pkg_mgr in ['apt'] + - neutron_services['neutron-openvswitch-agent']['group'] in group_names + - '"nova_compute" in group_names' + +- name: Configure DPDK interface to driver bindings + template: + src: dpdk_interfaces.j2 + dest: "/etc/dpdk/interfaces" + owner: "root" + group: "root" + when: + - neutron_services['neutron-openvswitch-agent']['group'] in group_names + - '"nova_compute" in group_names' + +- name: Configure DPDK hugepage allocation + template: + src: dpdk.conf.j2 + dest: "/etc/dpdk/dpdk.conf" + owner: "root" + group: "root" + when: + - neutron_services['neutron-openvswitch-agent']['group'] in group_names + - '"nova_compute" in group_names' + +- name: Ensure DPDK service is started and enabled + systemd: + name: "{{ dpdk_service_name }}" + state: restarted + enabled: yes + when: + - neutron_services['neutron-openvswitch-agent']['group'] in group_names + - '"nova_compute" in group_names' + +- name: Ensure Open vSwitch service is started and enabled + systemd: + name: "{{ neutron_ovs_service_name }}" + state: restarted + enabled: yes + when: + - neutron_services['neutron-openvswitch-agent']['group'] in group_names + +- name: Set DPDK lcore mask + command: "ovs-vsctl --no-wait set Open_vSwitch . other_config:dpdk-lcore-mask={{ ovs_dpdk_lcore_mask }}" + when: + - neutron_services['neutron-openvswitch-agent']['group'] in group_names + - '"nova_compute" in group_names' + +- name: Set DPDK PMD cpu mask + command: "ovs-vsctl --no-wait set Open_vSwitch . other_config:pmd-cpu-mask={{ ovs_dpdk_pmd_cpu_mask }}" + when: + - neutron_services['neutron-openvswitch-agent']['group'] in group_names + - '"nova_compute" in group_names' + +- name: Set DPDK socket memory + command: "ovs-vsctl --no-wait set Open_vSwitch . other_config:dpdk-socket-mem={{ ovs_dpdk_socket_mem }}" + when: + - neutron_services['neutron-openvswitch-agent']['group'] in group_names + - '"nova_compute" in group_names' + +- name: Enable DPDK support for openvswitch + command: "ovs-vsctl --no-wait set Open_vSwitch . other_config:dpdk-init=true" + when: + - neutron_services['neutron-openvswitch-agent']['group'] in group_names + - '"nova_compute" in group_names' + +# (jamesdenton) Should replace hard dir with var, and only do this on computes +- name: Create vhost_socket directory + file: + path: /var/lib/vhost_socket + state: directory + owner: libvirt-qemu + group: "{{ vhost_socket_directory_group }}" + mode: "0755" + when: + - neutron_services['neutron-openvswitch-agent']['group'] in group_names + - '"nova_compute" in group_names' + +# NOTE: This needs to be netdev for compute and system for network node +# Should I set an override for network nodes (non-dpdk)to default 'system' rather than 'netdev'? +- name: Setup Network Provider Bridges + openvswitch_bridge: + bridge: "{{ bridge_mapping.split(':')[1] }}" + set: "bridge {{ bridge_mapping.split(':')[1] }} datapath_type={{ ovs_datapath }}" + fail_mode: secure + state: present + with_items: "{{ neutron_provider_networks.network_mappings.split(',') }}" + loop_control: + loop_var: bridge_mapping + when: + - neutron_services['neutron-openvswitch-agent']['group'] in group_names + - neutron_plugin_type in ['ml2.ovs', 'ml2.ovs.dvr'] + - neutron_provider_networks.network_mappings is defined + +# (todo) Loop thru ints or build a bond with ints. TBD. +- name: Add ports to Network Provider Bridges + openvswitch_port: + bridge: "{{ interface_mapping.split(':')[0] }}" + port: "{{ interface_mapping.split(':',1)[1] }}" + set: "Interface {{ interface_mapping.split(':',1)[1] }} type=dpdk options:dpdk-devargs='{{ interface_mapping.split(':',1)[1] }}'" + state: present + with_items: "{{ neutron_provider_networks.network_interface_mappings.split(',') }}" + loop_control: + loop_var: interface_mapping + when: + - neutron_services['neutron-openvswitch-agent']['group'] in group_names + - neutron_plugin_type in ['ml2.ovs', 'ml2.ovs.dvr'] + - neutron_provider_networks.network_interface_mappings is defined and (neutron_provider_networks.network_interface_mappings|length > 0) diff --git a/templates/dpdk.conf.j2 b/templates/dpdk.conf.j2 new file mode 100644 index 00000000..cd043975 --- /dev/null +++ b/templates/dpdk.conf.j2 @@ -0,0 +1,59 @@ +# {{ ansible_managed }} + +# +# The number of 2M hugepages to reserve on system boot +# +# Default is 0 +# To e.g. let it reserve 64x 2M Hugepages set: +# NR_2M_PAGES=64 + +# +# The number of 1G hugepages to reserve on system boot +# +# Default is 0 +# To e.g. let it reserve 2x 1G Hugepages set: +# NR_1G_PAGES=2 + +# +# Dropping slab and pagecache can help to successfully allocate hugepages, +# especially later in the lifecycle of a system. +# This comes at the cost of loosing all slab and pagecache on (re)start +# of the dpdk service - therefore the default is off. +# +# Default is 0 +# Set to 1 to enable it +#DROPCACHE_BEFORE_HP_ALLOC=0 + +# The DPDK library will use the first mounted hugetlbfs. +# The init scripts try to ensure there is at least one default hugetlbfs +# mountpoint on start. +# If you have multiple hugetlbfs mountpoints for a complex (e.g. specific numa +# policies) setup it should be controlled by the admin instead of this init +# script. In that case specific mountpoints can be provided as parameters to +# the DPDK library. + +# Hardware may support other granularities of hugepages (like 4M). But the +# larger the hugepages the earlier those should be allocated. +# Note: the dpdk init scripts will report warnings, but not fail if they could +# not allocate the requested amount of hugepages. +# The more or the larger the hugepages to be allocated are, the more it is +# recommended to do the reservation as kernel commandline arguments. +# To do so edit /etc/default/grub: GRUB_CMDLINE_LINUX_DEFAULT +# and add [hugepagesz=xx] hugepages=yy ... +# +# Kernel commandline config: +# hugepagesz sets the size for the next hugepages reservation (default 2M) +# hugepages reserves the given number of hugepages of the size set before +# +# After modifying /etc/default/grub, the command "update-grub" has to be +# run in order to re-generate the grub config files. The new values will +# be used after next reboot. +# +# example: +# GRUB_CMDLINE_LINUX_DEFAULT="... hugepages=16 hugepagesz=1G hugepages=2" +# +# If the system supports it, this will reserve 16x 2M pages and 2x 1G pages. +# + +NR_2M_PAGES={{ ovs_dpdk_nr_2m_pages }} +NR_1G_PAGES={{ ovs_dpdk_nr_1g_pages }} diff --git a/templates/dpdk_interfaces.j2 b/templates/dpdk_interfaces.j2 new file mode 100644 index 00000000..8637c7ad --- /dev/null +++ b/templates/dpdk_interfaces.j2 @@ -0,0 +1,25 @@ +# {{ ansible_managed }} + +# +# Currently only "pci" is supported +# Device ID on the specified bus +# Driver to bind against (vfio-pci, uio_pci_generic, igb_uio or +# rte_kni) +# +# Be aware that the two dpdk compatible drivers uio_pci_generic and vfio-pci are +# part of linux-image-extra- package. +# This package is not always installed by default - for example in cloud-images. +# So please install it in case you run into missing module issues. +# +# For the module igb_uio, please install the dpdk-igb-uio-dkms package. +# For the module rte_kni, please install the dpdk-rte-kni-dkms package. +# +# +# pci 0000:04:00.0 vfio-pci +# pci 0000:04:00.1 uio_pci_generic +# pci 0000:05:00.0 igb_uio +# pci 0000:06:00.0 rte_kni + +{% for pci_address in ovs_dpdk_pci_addresses %} +pci {{ pci_address }} {{ ovs_dpdk_driver }} +{% endfor %} diff --git a/templates/plugins/ml2/openvswitch_agent.ini.j2 b/templates/plugins/ml2/openvswitch_agent.ini.j2 index 64873805..03a10e3e 100644 --- a/templates/plugins/ml2/openvswitch_agent.ini.j2 +++ b/templates/plugins/ml2/openvswitch_agent.ini.j2 @@ -10,6 +10,10 @@ bridge_mappings = {{ neutron_provider_networks.network_mappings }} {% if neutron_services['neutron-openvswitch-agent']['group'] in group_names and neutron_plugin_type == 'ml2.ovs' and neutron_provider_networks.network_mappings is defined and ((neutron_provider_networks.network_mappings | length) > 0) %} bridge_mappings = {{ neutron_provider_networks.network_mappings }} {% endif %} +{% if ovs_dpdk_support %} +datapath_type = {{ ovs_datapath }} +vhostuser_socket_dir = /var/lib/vhost_socket +{% endif %} [agent] l2_population = {{ neutron_plugins[neutron_plugin_type].l2_population }} @@ -26,6 +30,10 @@ extensions = {{ ovs_agent_extensions | join(',') }} # Security groups [securitygroup] +{% if ovs_dpdk_support %} +firewall_driver = openvswitch +{% else %} firewall_driver = {{ neutron_plugins[neutron_plugin_type].driver_firewall }} +{% endif %} enable_security_group = True enable_ipset = True diff --git a/vars/debian.yml b/vars/debian.yml index aafa4731..c9c9a6f5 100644 --- a/vars/debian.yml +++ b/vars/debian.yml @@ -30,6 +30,12 @@ neutron_ovs_nsh_required_packages: - openvswitch-switch - python-openvswitch +neutron_ovs_dpdk_required_packages: + - openvswitch-common + - openvswitch-switch-dpdk + +dpdk_service_name: dpdk + ## APT Cache options cache_timeout: 600 diff --git a/vars/distro_install.yml b/vars/distro_install.yml index 6ad1074b..0fc6300f 100644 --- a/vars/distro_install.yml +++ b/vars/distro_install.yml @@ -28,6 +28,8 @@ neutron_package_list: |- {% set _ = packages.extend(neutron_optional_ovs_distro_packages) %} {% if (ovs_nsh_support and ansible_pkg_mgr in ['apt', 'zypper']) %} {% set _ = packages.extend(neutron_ovs_nsh_required_packages) %} + {% elif (ovs_dpdk_support and ansible_pkg_mgr in ['apt', 'zypper']) %} + {% set _ = packages.extend(neutron_ovs_dpdk_required_packages) %} {% else %} {% set _ = packages.extend(neutron_ovs_distro_packages) %} {% endif %} diff --git a/vars/source_install.yml b/vars/source_install.yml index ed6eaa3e..11bc48dc 100644 --- a/vars/source_install.yml +++ b/vars/source_install.yml @@ -27,6 +27,8 @@ neutron_package_list: |- {% if neutron_needs_openvswitch | bool %} {% if (ovs_nsh_support and ansible_pkg_mgr in ['apt', 'zypper']) %} {% set _ = packages.extend(neutron_ovs_nsh_required_packages) %} + {% elif (ovs_dpdk_support and ansible_pkg_mgr in ['apt', 'zypper']) %} + {% set _ = packages.extend(neutron_ovs_dpdk_required_packages) %} {% else %} {% set _ = packages.extend(neutron_ovs_distro_packages) %} {% endif %} diff --git a/vars/suse.yml b/vars/suse.yml index 79b41de2..26497f4b 100644 --- a/vars/suse.yml +++ b/vars/suse.yml @@ -28,6 +28,9 @@ neutron_ovs_socket_path: "/usr/local/var/run/openvswitch" neutron_ovs_nsh_required_packages: - openvswitch-switch +neutron_ovs_dpdk_required_packages: + - openvswitch-dpdk + neutron_repos: - repo: https://download.opensuse.org/repositories/home:/mosquetero/openSUSE_Leap_{{ ansible_distribution_version }}/ name: ovs-nsh