#!/bin/bash _XTRACE_OVS_DPDK_CTL=$(set +o | grep xtrace) if [[ "${OVS_DPDK_CTL_DEBUG}" == "True" ]]; then set -o xtrace fi FULL_PATH=$(realpath "${BASH_SOURCE[0]}") CONFIG_FILE=${CONFIG_FILE:-"/etc/default/ovs-dpdk.conf"} SERVICE_FILE="/etc/systemd/system/ovs-dpdkctl.service" BRIDGE_SERVICE_FILE="/etc/systemd/system/ovs-dpdk-bridge.service" function get_value { crudini --get $CONFIG_FILE $@ } function set_value { crudini --set $CONFIG_FILE $1 $2 "$3" } function del_value { crudini --del $CONFIG_FILE $@ } function is_set { output=$(crudini --get $CONFIG_FILE $* &> /dev/null ) && [ -n "$(crudini --get $CONFIG_FILE $*)" ]; echo $? } function show_config { cat $CONFIG_FILE } function get_config { echo $CONFIG_FILE } function del_config { rm -f $CONFIG_FILE } function is_redhat_family { [[ -e /etc/redhat-release ]] ; echo $? } function generate_pciwhitelist { _Whitelist='' for nic in $(list_dpdk_nics); do address="$(get_value $nic address)" if [ "$_Whitelist" == '' ]; then _Whitelist="-w $address" else _Whitelist="$_Whitelist -w $address" fi done echo $_Whitelist } function gen_port_mappings { OVS_BRIDGE_MAPPINGS=$(get_value ovs bridge_mappings) OVS_BRIDGES=${OVS_BRIDGE_MAPPINGS//,/ } OVS_DPDK_PORT_MAPPINGS="" ARRAY=( $OVS_BRIDGES ) for net in "${ARRAY[@]}"; do bridge="${net##*:}" nic=${bridge/br-/} if [[ -z "$OVS_DPDK_PORT_MAPPINGS" ]]; then OVS_DPDK_PORT_MAPPINGS="$nic:$bridge" else OVS_DPDK_PORT_MAPPINGS="$OVS_DPDK_PORT_MAPPINGS,$nic:$bridge" fi done echo "$OVS_DPDK_PORT_MAPPINGS" } function gen_config { del_config touch $CONFIG_FILE set_value ovs bridge_mappings ${bridge_mappings:-""} set_value ovs port_mappings ${port_mappings:-$(gen_port_mappings)} set_value ovs cidr_mappings ${cidr_mappings:-""} set_value ovs ovs_coremask ${ovs_coremask:-"0x1"} set_value ovs pmd_coremask ${pmd_coremask:-"0x2"} set_value ovs ovs_mem_channels ${ovs_mem_channels:-4} set_value ovs ovs_socket_mem ${ovs_socket_mem:-"512"} set_value ovs dpdk_interface_driver ${dpdk_interface_driver:-"uio_pci_generic"} set_value ovs hugepage_mountpoint ${hugepage_mountpoint:-"/dev/hugepages"} set_value ovs physical_port_policy ${ovs_physical_port_policy:-"named"} ls -al /sys/class/net/* | awk '$0 ~ /pci/ {n=split($NF,a,"/"); print "\n[" a[n] "]\naddress = " a[n-2] "\ndriver ="}' >> $CONFIG_FILE for nic in $(get_value | grep -v ovs); do set_value $nic driver $(get_driver_by_address $(get_value $nic address)) done for nic in $(list_dpdk_nics); do set_value $nic driver ${dpdk_interface_driver:-"uio_pci_generic"} done set_value ovs pci_whitelist "'${pci_whitelist:-$(generate_pciwhitelist)}'" } function bind_nic { echo $2 > /sys/bus/pci/devices/$1/driver_override echo $1 > /sys/bus/pci/drivers/$2/bind } function unbind_nic { echo $1 > /sys/bus/pci/drivers/$2/unbind echo > /sys/bus/pci/devices/$1/driver_override } function list_dpdk_nics { for nic in $(crudini --get $CONFIG_FILE ovs port_mappings | cut -d : -f 1); do echo $nic; done } function bind_nics { for nic in $(list_dpdk_nics); do device_address="$(get_value $nic address)" current_driver="$(get_driver_by_address $device_address)" target_driver="$(get_value $nic driver)" if [ "$current_driver" != "$target_driver" ]; then set_value $nic old_driver $current_driver unbind_nic $device_address $current_driver bind_nic $device_address $target_driver fi done } function unbind_nics { for nic in $(list_dpdk_nics); do if [ "$(is_set $nic old_driver)" == 0 ]; then device_address="$(get_value $nic address)" current_driver="$(get_driver_by_address $device_address)" target_driver="$(get_value $nic old_driver)" if [ "$current_driver" != "$target_driver" ]; then unbind_nic $device_address $current_driver bind_nic $device_address $target_driver del_value $nic old_driver fi fi done } function get_address_by_name { ls -al /sys/class/net/$1 | awk '$0 ~ /pci/ {n=split($NF,a,"/"); print a[n-2] }' } function get_driver_by_address { ls /sys/bus/pci/devices/$1/driver -al | awk '{n=split($NF,a,"/"); print a[n]}' } function get_port_bridge { for pair in $(get_value ovs port_mappings); do nic=`echo $pair | cut -f 1 -d ":"` if [[ "$nic" == "$1" ]]; then bridge=`echo $pair | cut -f 2 -d ":"` echo $bridge return 0 fi done return 1 } function init_ovs_db { ovs-vsctl init ovs-vsctl --no-wait set Open_vSwitch . other_config:pmd-cpu-mask="$(get_value ovs pmd_coremask)" \ other_config:dpdk-init=True other_config:dpdk-lcore-mask="$(get_value ovs ovs_coremask)" \ other_config:dpdk-mem-channels="$(get_value ovs ovs_mem_channels)" \ other_config:dpdk-socket-mem="$(get_value ovs ovs_socket_mem)" \ other_config:dpdk-hugepage-dir="$(get_value ovs hugepage_mountpoint)" \ other_config:dpdk-extra=" --proc-type primary $(get_value ovs pci_whitelist) " } function init_ovs_bridges { raw_bridge_mappings=$(get_value ovs bridge_mappings) bridge_mappings=( ${raw_bridge_mappings//,/ } ) for pair in "${bridge_mappings[@]}"; do bridge=`echo $pair | cut -f 2 -d ":"` sudo ovs-vsctl --no-wait -- --may-exist add-br $bridge -- set Bridge $bridge datapath_type=netdev done } function init_ovs_interfaces { pci_port_pairs='' for nic in $(list_dpdk_nics); do address="$(get_value $nic address)" if [ "$pci_port_pairs" == '' ]; then pci_port_pairs="$address,$nic" else pci_port_pairs="$pci_port_pairs $address,$nic" fi done pci_port_pairs="$(echo $pci_port_pairs | sort)" dpdk_port_number=0 for pair in $pci_port_pairs; do addr="$(echo $pair | cut -f 1 -d ",")" nic="$(echo $pair | cut -f 2 -d ",")" bridge="$(get_port_bridge $nic)" # ovs 2.6 and older requires dpdkX names, ovs 2.7+ requires dpdk-devargs instead. if [ "$(get_value ovs physical_port_policy)" == "indexed" ]; then ovs-vsctl --no-wait --may-exist add-port $bridge "dpdk${dpdk_port_number}" \ -- set Interface "dpdk${dpdk_port_number}" type=dpdk else ovs-vsctl --may-exist add-port $bridge $nic \ -- set Interface $nic type=dpdk options:dpdk-devargs=$addr fi dpdk_port_number=$((dpdk_port_number+1)) done } function init { init_ovs_db init_ovs_bridges init_ovs_interfaces } function install_network_manager_conf { pair=$(get_value ovs cidr_mappings) bridge=`echo $pair | cut -f 1 -d ":"` cidr=`echo $pair | cut -f 2 -d ":"` ip=`echo $cidr | cut -f 1 -d "/"` prefix=`echo $cidr | cut -f 2 -d "/"` mask="" full_octets=$(expr $prefix / 8) partial_octet=$(expr $prefix % 8) for octet in 0 1 2 3 ; do if [[ "$octet" < "$full_octets" ]]; then mask+=255 elif [[ "$octet" == "$full_octets" ]]; then mask+=$((256 - 2**(8-$partial_octet))) else mask+=0 fi [[ "$octet" < 3 ]] && mask+=. done if [[ $(is_redhat_family) == 0 ]]; then cat << EOF | tee "/etc/sysconfig/network-scripts/ifcfg-$bridge" DEVICE=$bridge BOOTPROTO=static IPADDR=$ip NETMASK=$mask HOTPLUG=yes ONBOOT=yes EOF install_redhat_bridge_service $bridge else cat << EOF | tee "/etc/network/interfaces.d/$bridge.cfg" auto $bridge iface $bridge inet static address $ip netmask $mask EOF fi } function uninstall_network_manager_conf { pair=$(get_value ovs cidr_mappings) bridge=`echo $pair | cut -f 1 -d ":"` if [[ $(is_redhat_family) == 0 ]]; then rm -f /etc/sysconfig/network-scripts/ifcfg-$bridge else rm -f /etc/network/interfaces.d/$bridge.cfg fi } function install_service { cat << EOF | tee "$SERVICE_FILE" [Unit] Description=configuration service for ovs-dpdk nics. Before=network-pre.target After=syslog.target [Service] # Uncomment to enable debug logging. # Environment=OVS_DPDK_CTL_DEBUG=True Environment=CONFIG_FILE=$CONFIG_FILE Type=oneshot RemainAfterExit=yes ExecStart=/bin/ovs-dpdkctl bind_nics ExecStop=/bin/ovs-dpdkctl unbind_nics [Install] WantedBy=multi-user.target EOF systemctl daemon-reload systemctl enable ovs-dpdkctl } function install_redhat_bridge_service { cat << EOF | tee "$BRIDGE_SERVICE_FILE" [Unit] Description=configuration service for ovs-dpdk bridge. After=docker.service After=network.target After=ovs-dpdkctl.service [Service] Type=simple RemainAfterExit=yes ExecStartPre=/bin/bash -c "[[ -e /sys/class/net/$1 ]]" ExecStart=/usr/sbin/ifup $1 ExecStop=/usr/sbin/ifdown $1 Restart=on-failure RestartSec=5 [Install] WantedBy=multi-user.target EOF systemctl daemon-reload systemctl enable ovs-dpdk-bridge } function uninstall_service { systemctl disable ovs-dpdkctl rm -f "$SERVICE_FILE" if [ -e "$BRIDGE_SERVICE_FILE" ]; then systemctl disable ovs-dpdk-bridge rm -f "$BRIDGE_SERVICE_FILE" fi systemctl daemon-reload } function configure_kernel_modules { driver="$(get_value ovs dpdk_interface_driver)" lsmod | grep -ws $driver > /dev/null || modprobe $driver if [[ $(is_redhat_family) == 0 ]]; then [[ ! -e /etc/modules-load.d/${driver}.conf ]] && echo $driver | tee /etc/modules-load.d/${driver}.conf else grep -ws $driver /etc/modules > /dev/null || echo $driver | tee -a /etc/modules fi } function unconfigure_kernel_modules { driver="$(get_value ovs dpdk_interface_driver)" lsmod | grep -ws $driver > /dev/null && rmmod $driver if [[ $(is_redhat_family) == 0 ]] ; then [[ -e /etc/modules-load.d/${driver}.conf ]] && rm -f /etc/modules-load.d/${driver}.conf else grep -ws $driver /etc/modules > /dev/null && sed -e "s/$driver//" -i /etc/modules fi } function install { configure_kernel_modules if [ ! -e "$SERVICE_FILE" ]; then install_service fi if [ ! -e /bin/ovs-dpdkctl ]; then cp "$FULL_PATH" /bin/ovs-dpdkctl chmod +x /bin/ovs-dpdkctl fi if [ ! -e "$CONFIG_FILE" ]; then gen_config fi systemctl start ovs-dpdkctl install_network_manager_conf if [[ $(is_redhat_family) == 0 ]]; then systemctl start ovs-dpdk-bridge fi } function uninstall { systemctl stop ovs-dpdkctl if [ -e "$SERVICE_FILE" ]; then uninstall_service fi uninstall_network_manager_conf unconfigure_kernel_modules if [ -e /bin/ovs-dpdkctl ]; then rm -f /bin/ovs-dpdkctl fi if [ -e "$CONFIG_FILE" ]; then rm -f "$CONFIG_FILE" fi } function usage { cat << "EOF" ovs-dpdkctl.sh: A tool to configure ovs with dpdk. - This tool automate the process of binding host insterfacesto a dpdk compaible driver (uio_pci_generic | vfio-pci) at boot. - This tool automate bootstraping ovs so that it can use the dpdk accelerated netdev datapath. commands: - install: - installs ovs-dpdkctl as a systemd service. - installs ovs-dpdkctl binary. - generates ovs-dpdkctl configuration file. - starts ovs-dpdkctl service. - uninstall: - stops ovs-dpdkctl service. - uninstalls ovs-dpdkctl systemd service. - uninstalls ovs-dpdkctl binary. - removes ovs-dpdkctl configuration file. - bind_nics: - iterates over all dpdk interfaces defined in ovs-dpdkctl config and binds the interface to the target driver specifed in the config if current driver does not equal target. - unbind_nics: - iterates over all dpdk interfaces defined in ovs-dpdkctl config and restores the interface to its original non dpdk driver. - init: - defines dpdk specific configuration paramater in the ovsdb. - creates bridges as spcified by ovs bridge_mappings in ovs-dpdkctl config. - creates dpdk ports as defined by ovs port_mappings in ovs-dpdkctl config. - usage: - prints this message options: - debuging: - To enable debuging export OVS_DPDK_CTL_DEBUG=True - install: - The varibles described below can be defined to customise installation of ovs-dpdkctl. = ovs-dpdkctl.sh install - bridge_mappings: - A comma separated list of physnet to bridge mappings. - Example: bridge_mappings=physnet1:br-ex1,physnet2:br-ex2 - Default: "" - port_mappings: - A comma separated list of port to bridge mappings. - Example: port_mappings=eth1:br-ex1,eth2:br-ex2 - Default: generated form bridge_mappings assuming bridge names are constructed by appending br- to port name. - cidr_mappings: - A comma separated list of bridge to cidr mappings. - Example: cidr_mappings=br-ex1:192.168.1.1/24,br-ex2:192.168.2.1/24 - Default: "" - ovs_coremask: - A hex encoded string container a bitmask of what cpu core to pin the non dataplane treads of the ovs-vswitchd to. - Node that only the first core of the bit mask is currently used by ovs. - Example: ovs_coremask=0x1 - Default: "0x1" - pmd_coremask: - A hex encoded string container a bitmask of what cpu cores to pin the dataplane pool mode driver treads of the ovs-vswitchd to. - Each bit set in the bitmask will result in the creating of a pmd. - For best performance it is recommended to allocate at least 1 pmd per numa node. On systems with HyperThreading enabled it is recommended to also allocate the HT sibling core in the pmd_coremask.cores allocated to ovs with dpdk via the pmd_coremask should be removed from the nova vcpu_pin_set and isolated from the kernel scheduler. - Note it is not recommended to isolate cores in the nova vcpu_pin_set unless the host will be dedicated for vms that request cpu pinning. - Example: pmd_coremask=0x4 - Default: "0x4" - ovs_mem_channels: - The number of memory channels supported by the plathforms. - Example: ovs_mem_channels=2 - Default: "4" - ovs_socket_mem: - A comma separated list of hugepage memory, specifed in MBs per numa node, allocated to the ovs-vswitchd to use for the dpdk dataplane. - For best performance memory should be allocated evenly across all numa node that will run a pmd. - Example: ovs_socket_mem=512,512 - Default: "512" - hugepage_mountpoint: - The hugepage mountpoint to use when allocating memory for dpdk. - Example: hugepage_mountpoint=/dev/my_custom_mountpoint - Default: "/dev/hugepages" - dpdk_interface_driver: - The dpdk compatible userspace driver to use when binding host interfaces. - Example: dpdk_interface_driver=vfio_pci - Default: "uio_pci_generic" - pci_whitelist: - A repeated space separated list of pci whitelist flags for allowed ovs-dpdk ports. - The pci_whitelist allows multiple dpdk primary process to utilise different pci devices without resulting in a conflict of ownership. - Example: pci_whitelist="-w -w " - Default: auto generated form port_mappings. EOF } if [ $# -ge 1 ]; then func=$1 shift else func="usage" fi #replace with switch later eval "$func $@" ${_XTRACE_OVS_DPDK_CTL}