Additional OVS-DPDK configs including bonding support

Enhance the Neutron charts to support configuration parameters for
following additional configurations for deploying OVS with DPDK:-

1. Bonding support
2. Jumbo Frame support
3. Number of Rx Queue and Rx and Tx Queue sizes

Change-Id: I4ee7c8465825cf7d66d175446c4145a8a26b6381
This commit is contained in:
Deepak Tiwari 2019-05-15 12:19:13 -05:00
parent a4f84ad8fb
commit 21af1acde9
5 changed files with 390 additions and 65 deletions

View File

@ -65,7 +65,6 @@ function get_ip_address_from_interface {
local interface=$1 local interface=$1
local ip=$(ip -4 -o addr s "${interface}" | awk '{ print $4; exit }' | awk -F '/' '{print $1}') local ip=$(ip -4 -o addr s "${interface}" | awk '{ print $4; exit }' | awk -F '/' '{print $1}')
if [ -z "${ip}" ] ; then if [ -z "${ip}" ] ; then
echo "Interface ${interface} has no valid IP address."
exit 1 exit 1
fi fi
echo ${ip} echo ${ip}
@ -75,32 +74,34 @@ function get_ip_prefix_from_interface {
local interface=$1 local interface=$1
local prefix=$(ip -4 -o addr s "${interface}" | awk '{ print $4; exit }' | awk -F '/' '{print $2}') local prefix=$(ip -4 -o addr s "${interface}" | awk '{ print $4; exit }' | awk -F '/' '{print $2}')
if [ -z "${prefix}" ] ; then if [ -z "${prefix}" ] ; then
echo "Interface ${interface} has no valid IP address."
exit 1 exit 1
fi fi
echo ${prefix} echo ${prefix}
} }
function bind_dpdk_nics { function migrate_ip {
target_driver=$(get_dpdk_config_value ${DPDK_CONFIG} '.driver') pci_id=$1
bridge_name=$2
# loop over all nics
echo $DPDK_CONFIG | jq -r -c '.nics[]' | \
while IFS= read -r nic; do
local port_name=$(get_dpdk_config_value ${nic} '.name')
local pci_id=$(get_dpdk_config_value ${nic} '.pci_id')
local bridge=$(get_dpdk_config_value ${nic} '.bridge')
if [[ $(get_dpdk_config_value ${nic} '.migrate_ip') == true ]] ; then
local src_nic=$(get_name_by_pci_id ${pci_id}) local src_nic=$(get_name_by_pci_id ${pci_id})
if [ -n "${src_nic}" ] ; then if [ -n "${src_nic}" ] ; then
set +e
ip=$(get_ip_address_from_interface ${src_nic}) ip=$(get_ip_address_from_interface ${src_nic})
prefix=$(get_ip_prefix_from_interface ${src_nic}) prefix=$(get_ip_prefix_from_interface ${src_nic})
# Enabling explicit error handling: We must avoid to lose the IP # Enabling explicit error handling: We must avoid to lose the IP
# address in the migration process. Hence, on every error, we # address in the migration process. Hence, on every error, we
# attempt to assign the IP back to the original NIC and exit. # attempt to assign the IP back to the original NIC and exit.
set +e bridge_exists=$(ip a s "${bridge_name}" | grep "${bridge_name}" | cut -f2 -d':' 2> /dev/null)
if [ -z "${bridge_exists}" ] ; then
echo "Bridge "${bridge_name}" does not exist. Creating it on demand."
init_ovs_dpdk_bridge "${bridge_name}"
fi
bridge_ip=$(get_ip_address_from_interface "${bridge_name}")
bridge_prefix=$(get_ip_prefix_from_interface "${bridge_name}")
if [[ -n "${ip}" && -n "${prefix}" ]]; then
ip addr flush dev ${src_nic} ip addr flush dev ${src_nic}
if [ $? -ne 0 ] ; then if [ $? -ne 0 ] ; then
ip addr add ${ip}/${prefix} dev ${src_nic} ip addr add ${ip}/${prefix} dev ${src_nic}
@ -108,29 +109,86 @@ function bind_dpdk_nics {
exit 1 exit 1
fi fi
bridge_exists=$(ip a s ${bridge} 2> /dev/null) ip addr add ${ip}/${prefix} dev "${bridge_name}"
if [ -z "${bridge_exists}" ] ; then
echo "Bridge ${bridge} does not exist. Creating it on demand."
init_ovs_dpdk_bridge ${bridge}
fi
ip addr add ${ip}/${prefix} dev ${bridge}
if [ $? -ne 0 ] ; then if [ $? -ne 0 ] ; then
echo "Error assigning IP to bridge ${bridge}." echo "Error assigning IP to bridge "${bridge_name}"."
ip addr add ${ip}/${prefix} dev ${src_nic} ip addr add ${ip}/${prefix} dev ${src_nic}
exit 1 exit 1
fi fi
elif [[ -n "${bridge_ip}" && -n "${bridge_prefix}" ]]; then
echo "Bridge '${bridge_name}' already has IP assigned. Keeping the same:: IP:[${bridge_ip}]; Prefix:[${bridge_prefix}]..."
else
echo "Interface ${name} has invalid IP address. IP:[${ip}]; Prefix:[${prefix}]..."
exit 1
fi
set -e set -e
fi fi
fi }
current_driver="$(get_driver_by_address ${pci_id} )" function get_pf_or_vf_pci {
dpdk_pci_id=${1}
vf_index=${2}
if [ -n "$vf_index" ]
then
iface=$(get_name_by_pci_id "${dpdk_pci_id}")
sysfs_numvfs_path="/sys/class/net/${iface}/device/sriov_numvfs"
if [[ -f /sys/class/net/${iface}/device/sriov_numvfs &&
"$(cat /sys/class/net/${iface}/device/sriov_numvfs)" -ne "0" &&
-e /sys/class/net/${iface}/device/virtfn${vf_index} ]]
then
dpdk_pci_id=$(ls -la /sys/class/net/${iface}/device/virtfn${vf_index})
dpdk_pci_id=${dpdk_pci_id#*"../"}
else
echo "Error fetching the VF PCI for PF: ["${iface}", "${dpdk_pci_id}"] and VF-Index: ${vf_index}."
exit 1
fi
fi
}
function bind_dpdk_nic {
target_driver=${1}
pci_id=${2}
current_driver="$(get_driver_by_address "${pci_id}" )"
if [ "$current_driver" != "$target_driver" ]; then if [ "$current_driver" != "$target_driver" ]; then
if [ "$current_driver" != "" ]; then if [ "$current_driver" != "" ]; then
unbind_nic ${pci_id} ${current_driver} unbind_nic "${pci_id}" ${current_driver}
fi fi
bind_nic ${pci_id} ${target_driver} bind_nic "${pci_id}" ${target_driver}
fi fi
}
function process_dpdk_nics {
target_driver=$(get_dpdk_config_value ${DPDK_CONFIG} '.driver')
# loop over all nics
echo $DPDK_CONFIG | jq -r -c '.nics[]' | \
while IFS= read -r nic; do
local port_name=$(get_dpdk_config_value ${nic} '.name')
local pci_id=$(get_dpdk_config_value ${nic} '.pci_id')
local bridge=$(get_dpdk_config_value ${nic} '.bridge')
local vf_index=$(get_dpdk_config_value ${nic} '.vf_index')
if [[ $(get_dpdk_config_value ${nic} '.migrate_ip') == true ]] ; then
migrate_ip "${pci_id}" "${bridge}"
fi
iface=$(get_name_by_pci_id "${pci_id}")
if [ -n "${vf_index}" ]; then
vf_string="vf ${vf_index}"
fi
ip link set ${iface} promisc on
ip link set ${iface} ${vf_string} trust on
ip link set ${iface} ${vf_string} spoofchk off
# Fetch the PCI to be bound to DPDK driver.
# In case VF Index is configured then PCI of that particular VF
# is bound to DPDK, otherwise PF PCI is bound to DPDK.
get_pf_or_vf_pci "${pci_id}" "${vf_index}"
bind_dpdk_nic ${target_driver} "${dpdk_pci_id}"
ovs-vsctl --db=unix:${OVS_SOCKET} --if-exists del-port ${port_name} ovs-vsctl --db=unix:${OVS_SOCKET} --if-exists del-port ${port_name}
@ -147,6 +205,18 @@ function bind_dpdk_nics {
if [ -n "${pmd_rxq_affinity}" ]; then if [ -n "${pmd_rxq_affinity}" ]; then
dpdk_options+='other_config:pmd-rxq-affinity=${pmd_rxq_affinity} ' dpdk_options+='other_config:pmd-rxq-affinity=${pmd_rxq_affinity} '
fi fi
mtu=$(get_dpdk_config_value ${nic} '.mtu')
if [ -n "${mtu}" ]; then
dpdk_options+='mtu_request=${mtu} '
fi
n_rxq_size=$(get_dpdk_config_value ${nic} '.n_rxq_size')
if [ -n "${n_rxq_size}" ]; then
dpdk_options+='options:n_rxq_desc=${n_rxq_size} '
fi
n_txq_size=$(get_dpdk_config_value ${nic} '.n_txq_size')
if [ -n "${n_txq_size}" ]; then
dpdk_options+='options:n_txq_desc=${n_txq_size} '
fi
ovs-vsctl --db=unix:${OVS_SOCKET} --may-exist add-port ${bridge} ${port_name} \ ovs-vsctl --db=unix:${OVS_SOCKET} --may-exist add-port ${bridge} ${port_name} \
-- set Interface ${port_name} type=dpdk options:dpdk-devargs=${pci_id} ${dpdk_options} -- set Interface ${port_name} type=dpdk options:dpdk-devargs=${pci_id} ${dpdk_options}
@ -154,6 +224,90 @@ function bind_dpdk_nics {
done done
} }
function process_dpdk_bonds {
target_driver=$(get_dpdk_config_value ${DPDK_CONFIG} '.driver')
# loop over all bonds
echo $DPDK_CONFIG | jq -r -c '.bonds[]' > /tmp/bonds_array
while IFS= read -r bond; do
local bond_name=$(get_dpdk_config_value ${bond} '.name')
local dpdk_bridge=$(get_dpdk_config_value ${bond} '.bridge')
local migrate_ip=$(get_dpdk_config_value ${bond} '.migrate_ip')
local mtu=$(get_dpdk_config_value ${bond} '.mtu')
local n_rxq=$(get_dpdk_config_value ${bond} '.n_rxq')
local ofport_request=$(get_dpdk_config_value ${bond} '.ofport_request')
local n_rxq_size=$(get_dpdk_config_value ${bond} '.n_rxq_size')
local n_txq_size=$(get_dpdk_config_value ${bond} '.n_txq_size')
local ovs_options=$(get_dpdk_config_value ${bond} '.ovs_options')
local nic_name_str=""
local dev_args_str=""
local ip_migrated=false
echo $bond | jq -r -c '.nics[]' > /tmp/nics_array
while IFS= read -r nic; do
local pci_id=$(get_dpdk_config_value ${nic} '.pci_id')
local nic_name=$(get_dpdk_config_value ${nic} '.name')
local pmd_rxq_affinity=$(get_dpdk_config_value ${nic} '.pmd_rxq_affinity')
local vf_index=$(get_dpdk_config_value ${nic} '.vf_index')
local vf_string=""
if [[ ${migrate_ip} = "true" && ${ip_migrated} = "false" ]]; then
migrate_ip "${pci_id}" "${dpdk_bridge}"
ip_migrated=true
fi
iface=$(get_name_by_pci_id "${pci_id}")
if [ -n "${vf_index}" ]; then
vf_string="vf ${vf_index}"
fi
ip link set ${iface} promisc on
ip link set ${iface} ${vf_string} trust on
ip link set ${iface} ${vf_string} spoofchk off
# Fetch the PCI to be bound to DPDK driver.
# In case VF Index is configured then PCI of that particular VF
# is bound to DPDK, otherwise PF PCI is bound to DPDK.
get_pf_or_vf_pci "${pci_id}" "${vf_index}"
bind_dpdk_nic ${target_driver} "${dpdk_pci_id}"
nic_name_str+=" "${nic_name}""
dev_args_str+=" -- set Interface "${nic_name}" type=dpdk options:dpdk-devargs=""${dpdk_pci_id}"
if [[ -n ${mtu} ]]; then
dev_args_str+=" -- set Interface "${nic_name}" mtu_request=${mtu}"
fi
if [[ -n ${n_rxq} ]]; then
dev_args_str+=" -- set Interface "${nic_name}" options:n_rxq=${n_rxq}"
fi
if [[ -n ${ofport_request} ]]; then
dev_args_str+=" -- set Interface "${nic_name}" ofport_request=${ofport_request}"
fi
if [[ -n ${pmd_rxq_affinity} ]]; then
dev_args_str+=" -- set Interface "${nic_name}" other_config:pmd-rxq-affinity=${pmd_rxq_affinity}"
fi
if [[ -n ${n_rxq_size} ]]; then
dev_args_str+=" -- set Interface "${nic_name}" options:n_rxq_desc=${n_rxq_size}"
fi
if [[ -n ${n_txq_size} ]]; then
dev_args_str+=" -- set Interface "${nic_name}" options:n_txq_desc=${n_txq_size}"
fi
done < /tmp/nics_array
ovs-vsctl --db=unix:${OVS_SOCKET} --if-exists del-port "${bond_name}"
ovs-vsctl --db=unix:${OVS_SOCKET} --may-exist add-bond "${dpdk_bridge}" "${bond_name}" \
${nic_name_str} \
"${ovs_options}" ${dev_args_str}
done < "/tmp/bonds_array"
}
function get_driver_by_address { function get_driver_by_address {
if [[ -e /sys/bus/pci/devices/$1/driver ]]; then if [[ -e /sys/bus/pci/devices/$1/driver ]]; then
echo $(ls /sys/bus/pci/devices/$1/driver -al | awk '{n=split($NF,a,"/"); print a[n]}') echo $(ls /sys/bus/pci/devices/$1/driver -al | awk '{n=split($NF,a,"/"); print a[n]}')
@ -200,12 +354,16 @@ do
if [ -n "$iface" ] && [ "$iface" != "null" ] if [ -n "$iface" ] && [ "$iface" != "null" ]
then then
ovs-vsctl --no-wait --may-exist add-port $bridge $iface ovs-vsctl --no-wait --may-exist add-port $bridge $iface
if [[ $(get_dpdk_config_value ${DPDK_CONFIG} '.enabled') != "true" ]]; then
ip link set dev $iface up ip link set dev $iface up
fi fi
fi
done done
tunnel_interface="{{- .Values.network.interface.tunnel -}}" tunnel_types="{{- .Values.conf.plugins.openvswitch_agent.agent.tunnel_types -}}"
if [ -z "${tunnel_interface}" ] ; then if [[ -n "${tunnel_types}" ]] ; then
tunnel_interface="{{- .Values.network.interface.tunnel -}}"
if [ -z "${tunnel_interface}" ] ; then
# search for interface with tunnel network routing # search for interface with tunnel network routing
tunnel_network_cidr="{{- .Values.network.interface.tunnel_network_cidr -}}" tunnel_network_cidr="{{- .Values.network.interface.tunnel_network_cidr -}}"
if [ -z "${tunnel_network_cidr}" ] ; then if [ -z "${tunnel_network_cidr}" ] ; then
@ -214,14 +372,17 @@ if [ -z "${tunnel_interface}" ] ; then
# If there is not tunnel network gateway, exit # If there is not tunnel network gateway, exit
tunnel_interface=$(ip -4 route list ${tunnel_network_cidr} | awk -F 'dev' '{ print $2; exit }' \ tunnel_interface=$(ip -4 route list ${tunnel_network_cidr} | awk -F 'dev' '{ print $2; exit }' \
| awk '{ print $1 }') || exit 1 | awk '{ print $1 }') || exit 1
fi
fi fi
if [[ "${DPDK_ENABLED}" == "true" ]]; then if [[ "${DPDK_ENABLED}" == "true" ]]; then
init_ovs_dpdk_bridges init_ovs_dpdk_bridges
bind_dpdk_nics process_dpdk_nics
process_dpdk_bonds
fi fi
# determine local-ip dynamically based on interface provided but only if tunnel_types is not null # determine local-ip dynamically based on interface provided but only if tunnel_types is not null
if [[ -n "${tunnel_types}" ]] ; then
LOCAL_IP=$(get_ip_address_from_interface ${tunnel_interface}) LOCAL_IP=$(get_ip_address_from_interface ${tunnel_interface})
if [ -z "${LOCAL_IP}" ] ; then if [ -z "${LOCAL_IP}" ] ; then
echo "Var LOCAL_IP is empty" echo "Var LOCAL_IP is empty"
@ -232,4 +393,4 @@ tee > /tmp/pod-shared/ml2-local-ip.ini << EOF
[ovs] [ovs]
local_ip = "${LOCAL_IP}" local_ip = "${LOCAL_IP}"
EOF EOF
fi

View File

@ -20,8 +20,10 @@ set -ex
exec neutron-openvswitch-agent \ exec neutron-openvswitch-agent \
--config-file /etc/neutron/neutron.conf \ --config-file /etc/neutron/neutron.conf \
--config-file /etc/neutron/plugins/ml2/ml2_conf.ini \ --config-file /etc/neutron/plugins/ml2/ml2_conf.ini
--config-file /tmp/pod-shared/ml2-local-ip.ini \ {{- if .Values.conf.plugins.openvswitch_agent.agent.tunnel_types }} \
--config-file /tmp/pod-shared/ml2-local-ip.ini
{{- end }} \
--config-file /etc/neutron/plugins/ml2/openvswitch_agent.ini --config-file /etc/neutron/plugins/ml2/openvswitch_agent.ini
{{- if .Values.conf.plugins.taas.taas.enabled }} \ {{- if .Values.conf.plugins.taas.taas.enabled }} \
--config-file /etc/neutron/plugins/ml2/taas.ini --config-file /etc/neutron/plugins/ml2/taas.ini

View File

@ -1950,17 +1950,59 @@ conf:
# because additional parameters are needed # because additional parameters are needed
ovs_dpdk: ovs_dpdk:
enabled: false enabled: false
# driver: uio_pci_generic driver: uio_pci_generic
# nics: # In case bonds are configured, the nics which are part of those bonds
# - name: dpdk0 # must NOT be provided here.
# pci_id: '0000:05:00.0' nics:
# bridge: br-phy - name: dpdk0
pci_id: '0000:05:00.0'
# Set VF Index in case some particular VF(s) need to be
# used with ovs-dpdk.
# vf_index: 0
bridge: br-phy
migrate_ip: true
n_rxq: 2
pmd_rxq_affinity: "0:3,1:27"
ofport_request: 1
# optional parameters for tuning the OVS DPDK config
# in alignment with the available hardware resources
# mtu: 2000
# n_rxq_size: 1024
# n_txq_size: 1024
bridges:
- name: br-phy
# Optional parameter for configuring bonding in OVS-DPDK
# - name: br-phy-bond0
# bonds:
# - name: dpdkbond0
# bridge: br-phy-bond0
# # The IP from the first nic in nics list shall be used
# migrate_ip: true # migrate_ip: true
# mtu: 2000
# # Please note that n_rxq is set for each NIC individually
# # rather than denoting the total number of rx queues for
# # the bond as a whole. So setting n_rxq = 2 below for ex.
# # would be 4 rx queues in total for the bond.
# n_rxq: 2 # n_rxq: 2
# pmd_rxq_affinity: "0:3,1:27"
# ofport_request: 1 # ofport_request: 1
# bridges: # n_rxq_size: 1024
# - name: br-phy # n_txq_size: 1024
# ovs_options: "bond_mode=active-backup"
# nics:
# - name: dpdk_b0s0
# pci_id: '0000:06:00.0'
# pmd_rxq_affinity: "0:3,1:27"
# # Set VF Index in case some particular VF(s) need to be
# # used with ovs-dpdk. In which case pci_id of PF must be
# # provided above.
# # vf_index: 0
# - name: dpdk_b0s1
# pci_id: '0000:07:00.0'
# pmd_rxq_affinity: "0:3,1:27"
# # Set VF Index in case some particular VF(s) need to be
# # used with ovs-dpdk. In which case pci_id of PF must be
# # provided above.
# # vf_index: 0
# Names of secrets used by bootstrap and environmental checks # Names of secrets used by bootstrap and environmental checks
secrets: secrets:

View File

@ -0,0 +1,30 @@
network:
interface:
tunnel: br-phy-bond0
conf:
plugins:
openvswitch_agent:
agent:
tunnel_types: vxlan
ovs:
bridge_mappings: public:br-ex
datapath_type: netdev
vhostuser_socket_dir: /var/run/openvswitch/vhostuser
ovs_dpdk:
enabled: true
driver: uio_pci_generic
nics: []
bonds:
# CHANGE-ME: modify below parameters according to your hardware
- name: dpdkbond0
bridge: br-phy-bond0
# The IP from the first nic in nics list shall be used
migrate_ip: true
ovs_options: "bond_mode=active-backup"
nics:
- name: dpdk_b0s0
pci_id: '0000:00:05.0'
- name: dpdk_b0s1
pci_id: '0000:00:06.0'
bridges:
- name: br-phy-bond0

View File

@ -0,0 +1,90 @@
network:
interface:
sriov:
- device: enp3s0f0
num_vfs: 32
promisc: false
- device: enp66s0f1
num_vfs: 32
promisc: false
tunnel: br-phy-bond0
backend:
- openvswitch
- sriov
conf:
auto_bridge_add:
br-ex: null
neutron:
DEFAULT:
l3_ha: False
max_l3_agents_per_router: 1
l3_ha_network_type: vxlan
dhcp_agents_per_network: 1
service_plugins: router
plugins:
ml2_conf:
ml2:
mechanism_drivers: l2population,openvswitch,sriovnicswitch
type_drivers: vlan,flat,vxlan
tenant_network_types: vxlan
ml2_type_flat:
flat_networks: public
ml2_type_vlan:
network_vlan_ranges: ovsnet:2:4094,sriovnet1:100:4000,sriovnet2:100:4000
openvswitch_agent:
default:
ovs_vsctl_timeout: 30
agent:
tunnel_types: vxlan
securitygroup:
enable_security_group: False
firewall_driver: neutron.agent.firewall.NoopFirewallDriver
ovs:
bridge_mappings: public:br-ex,ovsnet:br-phy-bond0
datapath_type: netdev
vhostuser_socket_dir: /var/run/openvswitch/vhostuser
of_connect_timeout: 60
of_request_timeout: 30
sriov_agent:
securitygroup:
firewall_driver: neutron.agent.firewall.NoopFirewallDriver
sriov_nic:
physical_device_mappings: sriovnet1:enp3s0f0,sriovnet2:enp66s0f1
exclude_devices: enp3s0f0:0000:00:05.1,enp66s0f1:0000:00:06.1
ovs_dpdk:
enabled: true
driver: uio_pci_generic
nics: []
bonds:
# CHANGE-ME: modify below parameters according to your hardware
- name: dpdkbond0
bridge: br-phy-bond0
mtu: 9000
# The IP from the first nic in nics list shall be used
migrate_ip: true
n_rxq: 2
n_rxq_size: 1024
n_txq_size: 1024
ovs_options: "bond_mode=active-backup"
nics:
- name: dpdk_b0s0
pci_id: '0000:00:05.0'
vf_index: 0
- name: dpdk_b0s1
pci_id: '0000:00:06.0'
vf_index: 0
bridges:
- name: br-phy-bond0
# In case of shared profile (sriov + ovs-dpdk), sriov agent should finish
# first so as to let it configure the SRIOV VFs before ovs-agent tries to
# bind it with DPDK driver.
dependencies:
dynamic:
targeted:
openvswitch:
ovs_agent:
pod:
- requireSameNode: true
labels:
application: neutron
component: neutron-sriov-agent