diff --git a/neutron/templates/bin/_neutron-openvswitch-agent-init.sh.tpl b/neutron/templates/bin/_neutron-openvswitch-agent-init.sh.tpl index ce4734da67..bbc88321dd 100644 --- a/neutron/templates/bin/_neutron-openvswitch-agent-init.sh.tpl +++ b/neutron/templates/bin/_neutron-openvswitch-agent-init.sh.tpl @@ -65,7 +65,6 @@ function get_ip_address_from_interface { local interface=$1 local ip=$(ip -4 -o addr s "${interface}" | awk '{ print $4; exit }' | awk -F '/' '{print $1}') if [ -z "${ip}" ] ; then - echo "Interface ${interface} has no valid IP address." exit 1 fi echo ${ip} @@ -75,63 +74,122 @@ function get_ip_prefix_from_interface { local interface=$1 local prefix=$(ip -4 -o addr s "${interface}" | awk '{ print $4; exit }' | awk -F '/' '{print $2}') if [ -z "${prefix}" ] ; then - echo "Interface ${interface} has no valid IP address." exit 1 fi echo ${prefix} } -function bind_dpdk_nics { - target_driver=$(get_dpdk_config_value ${DPDK_CONFIG} '.driver') +function migrate_ip { + pci_id=$1 + bridge_name=$2 + local src_nic=$(get_name_by_pci_id ${pci_id}) + if [ -n "${src_nic}" ] ; then + set +e + ip=$(get_ip_address_from_interface ${src_nic}) + prefix=$(get_ip_prefix_from_interface ${src_nic}) + + # Enabling explicit error handling: We must avoid to lose the IP + # address in the migration process. Hence, on every error, we + # attempt to assign the IP back to the original NIC and exit. + bridge_exists=$(ip a s "${bridge_name}" | grep "${bridge_name}" | cut -f2 -d':' 2> /dev/null) + if [ -z "${bridge_exists}" ] ; then + echo "Bridge "${bridge_name}" does not exist. Creating it on demand." + init_ovs_dpdk_bridge "${bridge_name}" + fi + + bridge_ip=$(get_ip_address_from_interface "${bridge_name}") + bridge_prefix=$(get_ip_prefix_from_interface "${bridge_name}") + + if [[ -n "${ip}" && -n "${prefix}" ]]; then + ip addr flush dev ${src_nic} + if [ $? -ne 0 ] ; then + ip addr add ${ip}/${prefix} dev ${src_nic} + echo "Error while flushing IP from ${src_nic}." + exit 1 + fi + + ip addr add ${ip}/${prefix} dev "${bridge_name}" + if [ $? -ne 0 ] ; then + echo "Error assigning IP to bridge "${bridge_name}"." + ip addr add ${ip}/${prefix} dev ${src_nic} + exit 1 + fi + elif [[ -n "${bridge_ip}" && -n "${bridge_prefix}" ]]; then + echo "Bridge '${bridge_name}' already has IP assigned. Keeping the same:: IP:[${bridge_ip}]; Prefix:[${bridge_prefix}]..." + else + echo "Interface ${name} has invalid IP address. IP:[${ip}]; Prefix:[${prefix}]..." + exit 1 + fi + set -e + fi +} + +function get_pf_or_vf_pci { + dpdk_pci_id=${1} + vf_index=${2} + + if [ -n "$vf_index" ] + then + iface=$(get_name_by_pci_id "${dpdk_pci_id}") + sysfs_numvfs_path="/sys/class/net/${iface}/device/sriov_numvfs" + if [[ -f /sys/class/net/${iface}/device/sriov_numvfs && + "$(cat /sys/class/net/${iface}/device/sriov_numvfs)" -ne "0" && + -e /sys/class/net/${iface}/device/virtfn${vf_index} ]] + then + dpdk_pci_id=$(ls -la /sys/class/net/${iface}/device/virtfn${vf_index}) + dpdk_pci_id=${dpdk_pci_id#*"../"} + else + echo "Error fetching the VF PCI for PF: ["${iface}", "${dpdk_pci_id}"] and VF-Index: ${vf_index}." + exit 1 + fi + fi +} + +function bind_dpdk_nic { + target_driver=${1} + pci_id=${2} + + current_driver="$(get_driver_by_address "${pci_id}" )" + if [ "$current_driver" != "$target_driver" ]; then + if [ "$current_driver" != "" ]; then + unbind_nic "${pci_id}" ${current_driver} + fi + bind_nic "${pci_id}" ${target_driver} + fi +} + +function process_dpdk_nics { + target_driver=$(get_dpdk_config_value ${DPDK_CONFIG} '.driver') # loop over all nics echo $DPDK_CONFIG | jq -r -c '.nics[]' | \ while IFS= read -r nic; do local port_name=$(get_dpdk_config_value ${nic} '.name') local pci_id=$(get_dpdk_config_value ${nic} '.pci_id') local bridge=$(get_dpdk_config_value ${nic} '.bridge') + local vf_index=$(get_dpdk_config_value ${nic} '.vf_index') if [[ $(get_dpdk_config_value ${nic} '.migrate_ip') == true ]] ; then - local src_nic=$(get_name_by_pci_id ${pci_id}) - if [ -n "${src_nic}" ] ; then - ip=$(get_ip_address_from_interface ${src_nic}) - prefix=$(get_ip_prefix_from_interface ${src_nic}) - - # Enabling explicit error handling: We must avoid to lose the IP - # address in the migration process. Hence, on every error, we - # attempt to assign the IP back to the original NIC and exit. - set +e - ip addr flush dev ${src_nic} - if [ $? -ne 0 ] ; then - ip addr add ${ip}/${prefix} dev ${src_nic} - echo "Error while flushing IP from ${src_nic}." - exit 1 - fi - - bridge_exists=$(ip a s ${bridge} 2> /dev/null) - if [ -z "${bridge_exists}" ] ; then - echo "Bridge ${bridge} does not exist. Creating it on demand." - init_ovs_dpdk_bridge ${bridge} - fi - - ip addr add ${ip}/${prefix} dev ${bridge} - if [ $? -ne 0 ] ; then - echo "Error assigning IP to bridge ${bridge}." - ip addr add ${ip}/${prefix} dev ${src_nic} - exit 1 - fi - set -e - fi + migrate_ip "${pci_id}" "${bridge}" fi - current_driver="$(get_driver_by_address ${pci_id} )" - if [ "$current_driver" != "$target_driver" ]; then - if [ "$current_driver" != "" ]; then - unbind_nic ${pci_id} ${current_driver} - fi - bind_nic ${pci_id} ${target_driver} + iface=$(get_name_by_pci_id "${pci_id}") + + if [ -n "${vf_index}" ]; then + vf_string="vf ${vf_index}" fi + ip link set ${iface} promisc on + ip link set ${iface} ${vf_string} trust on + ip link set ${iface} ${vf_string} spoofchk off + + # Fetch the PCI to be bound to DPDK driver. + # In case VF Index is configured then PCI of that particular VF + # is bound to DPDK, otherwise PF PCI is bound to DPDK. + get_pf_or_vf_pci "${pci_id}" "${vf_index}" + + bind_dpdk_nic ${target_driver} "${dpdk_pci_id}" + ovs-vsctl --db=unix:${OVS_SOCKET} --if-exists del-port ${port_name} dpdk_options="" @@ -147,6 +205,18 @@ function bind_dpdk_nics { if [ -n "${pmd_rxq_affinity}" ]; then dpdk_options+='other_config:pmd-rxq-affinity=${pmd_rxq_affinity} ' fi + mtu=$(get_dpdk_config_value ${nic} '.mtu') + if [ -n "${mtu}" ]; then + dpdk_options+='mtu_request=${mtu} ' + fi + n_rxq_size=$(get_dpdk_config_value ${nic} '.n_rxq_size') + if [ -n "${n_rxq_size}" ]; then + dpdk_options+='options:n_rxq_desc=${n_rxq_size} ' + fi + n_txq_size=$(get_dpdk_config_value ${nic} '.n_txq_size') + if [ -n "${n_txq_size}" ]; then + dpdk_options+='options:n_txq_desc=${n_txq_size} ' + fi ovs-vsctl --db=unix:${OVS_SOCKET} --may-exist add-port ${bridge} ${port_name} \ -- set Interface ${port_name} type=dpdk options:dpdk-devargs=${pci_id} ${dpdk_options} @@ -154,6 +224,90 @@ function bind_dpdk_nics { done } +function process_dpdk_bonds { + target_driver=$(get_dpdk_config_value ${DPDK_CONFIG} '.driver') + # loop over all bonds + echo $DPDK_CONFIG | jq -r -c '.bonds[]' > /tmp/bonds_array + while IFS= read -r bond; do + local bond_name=$(get_dpdk_config_value ${bond} '.name') + local dpdk_bridge=$(get_dpdk_config_value ${bond} '.bridge') + local migrate_ip=$(get_dpdk_config_value ${bond} '.migrate_ip') + local mtu=$(get_dpdk_config_value ${bond} '.mtu') + local n_rxq=$(get_dpdk_config_value ${bond} '.n_rxq') + local ofport_request=$(get_dpdk_config_value ${bond} '.ofport_request') + local n_rxq_size=$(get_dpdk_config_value ${bond} '.n_rxq_size') + local n_txq_size=$(get_dpdk_config_value ${bond} '.n_txq_size') + local ovs_options=$(get_dpdk_config_value ${bond} '.ovs_options') + + local nic_name_str="" + local dev_args_str="" + local ip_migrated=false + + echo $bond | jq -r -c '.nics[]' > /tmp/nics_array + while IFS= read -r nic; do + local pci_id=$(get_dpdk_config_value ${nic} '.pci_id') + local nic_name=$(get_dpdk_config_value ${nic} '.name') + local pmd_rxq_affinity=$(get_dpdk_config_value ${nic} '.pmd_rxq_affinity') + local vf_index=$(get_dpdk_config_value ${nic} '.vf_index') + local vf_string="" + + if [[ ${migrate_ip} = "true" && ${ip_migrated} = "false" ]]; then + migrate_ip "${pci_id}" "${dpdk_bridge}" + ip_migrated=true + fi + + iface=$(get_name_by_pci_id "${pci_id}") + + if [ -n "${vf_index}" ]; then + vf_string="vf ${vf_index}" + fi + + ip link set ${iface} promisc on + ip link set ${iface} ${vf_string} trust on + ip link set ${iface} ${vf_string} spoofchk off + + # Fetch the PCI to be bound to DPDK driver. + # In case VF Index is configured then PCI of that particular VF + # is bound to DPDK, otherwise PF PCI is bound to DPDK. + get_pf_or_vf_pci "${pci_id}" "${vf_index}" + + bind_dpdk_nic ${target_driver} "${dpdk_pci_id}" + + nic_name_str+=" "${nic_name}"" + dev_args_str+=" -- set Interface "${nic_name}" type=dpdk options:dpdk-devargs=""${dpdk_pci_id}" + + if [[ -n ${mtu} ]]; then + dev_args_str+=" -- set Interface "${nic_name}" mtu_request=${mtu}" + fi + + if [[ -n ${n_rxq} ]]; then + dev_args_str+=" -- set Interface "${nic_name}" options:n_rxq=${n_rxq}" + fi + + if [[ -n ${ofport_request} ]]; then + dev_args_str+=" -- set Interface "${nic_name}" ofport_request=${ofport_request}" + fi + + if [[ -n ${pmd_rxq_affinity} ]]; then + dev_args_str+=" -- set Interface "${nic_name}" other_config:pmd-rxq-affinity=${pmd_rxq_affinity}" + fi + + if [[ -n ${n_rxq_size} ]]; then + dev_args_str+=" -- set Interface "${nic_name}" options:n_rxq_desc=${n_rxq_size}" + fi + + if [[ -n ${n_txq_size} ]]; then + dev_args_str+=" -- set Interface "${nic_name}" options:n_txq_desc=${n_txq_size}" + fi + done < /tmp/nics_array + + ovs-vsctl --db=unix:${OVS_SOCKET} --if-exists del-port "${bond_name}" + ovs-vsctl --db=unix:${OVS_SOCKET} --may-exist add-bond "${dpdk_bridge}" "${bond_name}" \ + ${nic_name_str} \ + "${ovs_options}" ${dev_args_str} + done < "/tmp/bonds_array" +} + function get_driver_by_address { if [[ -e /sys/bus/pci/devices/$1/driver ]]; then echo $(ls /sys/bus/pci/devices/$1/driver -al | awk '{n=split($NF,a,"/"); print a[n]}') @@ -200,28 +354,35 @@ do if [ -n "$iface" ] && [ "$iface" != "null" ] then ovs-vsctl --no-wait --may-exist add-port $bridge $iface - ip link set dev $iface up + if [[ $(get_dpdk_config_value ${DPDK_CONFIG} '.enabled') != "true" ]]; then + ip link set dev $iface up + fi fi done -tunnel_interface="{{- .Values.network.interface.tunnel -}}" -if [ -z "${tunnel_interface}" ] ; then - # search for interface with tunnel network routing - tunnel_network_cidr="{{- .Values.network.interface.tunnel_network_cidr -}}" - if [ -z "${tunnel_network_cidr}" ] ; then - tunnel_network_cidr="0/0" +tunnel_types="{{- .Values.conf.plugins.openvswitch_agent.agent.tunnel_types -}}" +if [[ -n "${tunnel_types}" ]] ; then + tunnel_interface="{{- .Values.network.interface.tunnel -}}" + if [ -z "${tunnel_interface}" ] ; then + # search for interface with tunnel network routing + tunnel_network_cidr="{{- .Values.network.interface.tunnel_network_cidr -}}" + if [ -z "${tunnel_network_cidr}" ] ; then + tunnel_network_cidr="0/0" + fi + # If there is not tunnel network gateway, exit + tunnel_interface=$(ip -4 route list ${tunnel_network_cidr} | awk -F 'dev' '{ print $2; exit }' \ + | awk '{ print $1 }') || exit 1 fi - # If there is not tunnel network gateway, exit - tunnel_interface=$(ip -4 route list ${tunnel_network_cidr} | awk -F 'dev' '{ print $2; exit }' \ - | awk '{ print $1 }') || exit 1 fi if [[ "${DPDK_ENABLED}" == "true" ]]; then init_ovs_dpdk_bridges - bind_dpdk_nics + process_dpdk_nics + process_dpdk_bonds fi # determine local-ip dynamically based on interface provided but only if tunnel_types is not null +if [[ -n "${tunnel_types}" ]] ; then LOCAL_IP=$(get_ip_address_from_interface ${tunnel_interface}) if [ -z "${LOCAL_IP}" ] ; then echo "Var LOCAL_IP is empty" @@ -232,4 +393,4 @@ tee > /tmp/pod-shared/ml2-local-ip.ini << EOF [ovs] local_ip = "${LOCAL_IP}" EOF - +fi diff --git a/neutron/templates/bin/_neutron-openvswitch-agent.sh.tpl b/neutron/templates/bin/_neutron-openvswitch-agent.sh.tpl index a9b90d4316..10cd19dc36 100644 --- a/neutron/templates/bin/_neutron-openvswitch-agent.sh.tpl +++ b/neutron/templates/bin/_neutron-openvswitch-agent.sh.tpl @@ -20,8 +20,10 @@ set -ex exec neutron-openvswitch-agent \ --config-file /etc/neutron/neutron.conf \ - --config-file /etc/neutron/plugins/ml2/ml2_conf.ini \ - --config-file /tmp/pod-shared/ml2-local-ip.ini \ + --config-file /etc/neutron/plugins/ml2/ml2_conf.ini +{{- if .Values.conf.plugins.openvswitch_agent.agent.tunnel_types }} \ + --config-file /tmp/pod-shared/ml2-local-ip.ini +{{- end }} \ --config-file /etc/neutron/plugins/ml2/openvswitch_agent.ini {{- if .Values.conf.plugins.taas.taas.enabled }} \ --config-file /etc/neutron/plugins/ml2/taas.ini diff --git a/neutron/values.yaml b/neutron/values.yaml index 14b03f0a29..ae5eb1b411 100644 --- a/neutron/values.yaml +++ b/neutron/values.yaml @@ -1950,17 +1950,59 @@ conf: # because additional parameters are needed ovs_dpdk: enabled: false - # driver: uio_pci_generic - # nics: - # - name: dpdk0 - # pci_id: '0000:05:00.0' - # bridge: br-phy - # migrate_ip: true - # n_rxq: 2 - # pmd_rxq_affinity: "0:3,1:27" - # ofport_request: 1 - # bridges: - # - name: br-phy + driver: uio_pci_generic + # In case bonds are configured, the nics which are part of those bonds + # must NOT be provided here. + nics: + - name: dpdk0 + pci_id: '0000:05:00.0' + # Set VF Index in case some particular VF(s) need to be + # used with ovs-dpdk. + # vf_index: 0 + bridge: br-phy + migrate_ip: true + n_rxq: 2 + pmd_rxq_affinity: "0:3,1:27" + ofport_request: 1 + # optional parameters for tuning the OVS DPDK config + # in alignment with the available hardware resources + # mtu: 2000 + # n_rxq_size: 1024 + # n_txq_size: 1024 + bridges: + - name: br-phy + # Optional parameter for configuring bonding in OVS-DPDK + # - name: br-phy-bond0 + # bonds: + # - name: dpdkbond0 + # bridge: br-phy-bond0 + # # The IP from the first nic in nics list shall be used + # migrate_ip: true + # mtu: 2000 + # # Please note that n_rxq is set for each NIC individually + # # rather than denoting the total number of rx queues for + # # the bond as a whole. So setting n_rxq = 2 below for ex. + # # would be 4 rx queues in total for the bond. + # n_rxq: 2 + # ofport_request: 1 + # n_rxq_size: 1024 + # n_txq_size: 1024 + # ovs_options: "bond_mode=active-backup" + # nics: + # - name: dpdk_b0s0 + # pci_id: '0000:06:00.0' + # pmd_rxq_affinity: "0:3,1:27" + # # Set VF Index in case some particular VF(s) need to be + # # used with ovs-dpdk. In which case pci_id of PF must be + # # provided above. + # # vf_index: 0 + # - name: dpdk_b0s1 + # pci_id: '0000:07:00.0' + # pmd_rxq_affinity: "0:3,1:27" + # # Set VF Index in case some particular VF(s) need to be + # # used with ovs-dpdk. In which case pci_id of PF must be + # # provided above. + # # vf_index: 0 # Names of secrets used by bootstrap and environmental checks secrets: diff --git a/neutron/values_overrides/dpdk-bond.yaml b/neutron/values_overrides/dpdk-bond.yaml new file mode 100644 index 0000000000..c188783fe6 --- /dev/null +++ b/neutron/values_overrides/dpdk-bond.yaml @@ -0,0 +1,30 @@ +network: + interface: + tunnel: br-phy-bond0 +conf: + plugins: + openvswitch_agent: + agent: + tunnel_types: vxlan + ovs: + bridge_mappings: public:br-ex + datapath_type: netdev + vhostuser_socket_dir: /var/run/openvswitch/vhostuser + ovs_dpdk: + enabled: true + driver: uio_pci_generic + nics: [] + bonds: + # CHANGE-ME: modify below parameters according to your hardware + - name: dpdkbond0 + bridge: br-phy-bond0 + # The IP from the first nic in nics list shall be used + migrate_ip: true + ovs_options: "bond_mode=active-backup" + nics: + - name: dpdk_b0s0 + pci_id: '0000:00:05.0' + - name: dpdk_b0s1 + pci_id: '0000:00:06.0' + bridges: + - name: br-phy-bond0 diff --git a/neutron/values_overrides/shared-sriov-ovs-dpdk-bond.yaml b/neutron/values_overrides/shared-sriov-ovs-dpdk-bond.yaml new file mode 100644 index 0000000000..30d6a2fd53 --- /dev/null +++ b/neutron/values_overrides/shared-sriov-ovs-dpdk-bond.yaml @@ -0,0 +1,90 @@ +network: + interface: + sriov: + - device: enp3s0f0 + num_vfs: 32 + promisc: false + - device: enp66s0f1 + num_vfs: 32 + promisc: false + tunnel: br-phy-bond0 + backend: + - openvswitch + - sriov +conf: + auto_bridge_add: + br-ex: null + neutron: + DEFAULT: + l3_ha: False + max_l3_agents_per_router: 1 + l3_ha_network_type: vxlan + dhcp_agents_per_network: 1 + service_plugins: router + plugins: + ml2_conf: + ml2: + mechanism_drivers: l2population,openvswitch,sriovnicswitch + type_drivers: vlan,flat,vxlan + tenant_network_types: vxlan + ml2_type_flat: + flat_networks: public + ml2_type_vlan: + network_vlan_ranges: ovsnet:2:4094,sriovnet1:100:4000,sriovnet2:100:4000 + openvswitch_agent: + default: + ovs_vsctl_timeout: 30 + agent: + tunnel_types: vxlan + securitygroup: + enable_security_group: False + firewall_driver: neutron.agent.firewall.NoopFirewallDriver + ovs: + bridge_mappings: public:br-ex,ovsnet:br-phy-bond0 + datapath_type: netdev + vhostuser_socket_dir: /var/run/openvswitch/vhostuser + of_connect_timeout: 60 + of_request_timeout: 30 + sriov_agent: + securitygroup: + firewall_driver: neutron.agent.firewall.NoopFirewallDriver + sriov_nic: + physical_device_mappings: sriovnet1:enp3s0f0,sriovnet2:enp66s0f1 + exclude_devices: enp3s0f0:0000:00:05.1,enp66s0f1:0000:00:06.1 + ovs_dpdk: + enabled: true + driver: uio_pci_generic + nics: [] + bonds: + # CHANGE-ME: modify below parameters according to your hardware + - name: dpdkbond0 + bridge: br-phy-bond0 + mtu: 9000 + # The IP from the first nic in nics list shall be used + migrate_ip: true + n_rxq: 2 + n_rxq_size: 1024 + n_txq_size: 1024 + ovs_options: "bond_mode=active-backup" + nics: + - name: dpdk_b0s0 + pci_id: '0000:00:05.0' + vf_index: 0 + - name: dpdk_b0s1 + pci_id: '0000:00:06.0' + vf_index: 0 + bridges: + - name: br-phy-bond0 +# In case of shared profile (sriov + ovs-dpdk), sriov agent should finish +# first so as to let it configure the SRIOV VFs before ovs-agent tries to +# bind it with DPDK driver. +dependencies: + dynamic: + targeted: + openvswitch: + ovs_agent: + pod: + - requireSameNode: true + labels: + application: neutron + component: neutron-sriov-agent \ No newline at end of file