Traffic control fixes and refresh
This commit fixes a couple of issues surrounding the main StarlingX traffic control script. The following has been changed: - The name of the script has been changed to remove the 'cgcs_' prefix - Obsolete code involving the (old) infrastructure network and related consolidation with the management interface has been removed. - There is no more class/qdisc for live-migration, since that traffic now runs over the cluster-host network. It should be prioritized via a kubernetes network policy instead. (not in this commit) - The code has been cleaned up to define the handles and qdisc parameters globally so they can be changed once instead of in each tc command. - SM heartbeats are treated as high priority traffic. Previously, we were only treating a tos of 0x10 (low delay) as high priority traffic, while SM uses a DSCP value of 0xc0. - The script now handles IPv6 Closes-Bug: #1839386 Change-Id: I32cd58842865d8c8efe9444b23b05a96f1df168a Signed-off-by: Steven Webster <steven.webster@windriver.com>
This commit is contained in:
parent
bca5ef87d4
commit
fc33582e4f
@ -40,13 +40,13 @@ for i in $(ip link | grep mtu | grep eth |awk '{print $2}' | sed 's#:##g'); do
|
||||
done
|
||||
|
||||
###############################################################################
|
||||
# TC Configuration Script (/usr/local/bin/cgcs_tc_setup.sh)
|
||||
# TC Configuration Script (/usr/local/bin/tc_setup.sh)
|
||||
###############################################################################
|
||||
delimiter ${LOGFILE} "cat /usr/local/bin/cgcs_tc_setup.sh"
|
||||
if [ -f /usr/local/bin/cgcs_tc_setup.sh ]; then
|
||||
cat /usr/local/bin/cgcs_tc_setup.sh >> ${LOGFILE} 2>>${COLLECT_ERROR_LOG}
|
||||
delimiter ${LOGFILE} "cat /usr/local/bin/tc_setup.sh"
|
||||
if [ -f /usr/local/bin/tc_setup.sh ]; then
|
||||
cat /usr/local/bin/tc_setup.sh >> ${LOGFILE} 2>>${COLLECT_ERROR_LOG}
|
||||
else
|
||||
echo "/usr/local/bin/cgcs_tc_setup.sh NOT FOUND" >> ${LOGFILE}
|
||||
echo "/usr/local/bin/tc_setup.sh NOT FOUND" >> ${LOGFILE}
|
||||
fi
|
||||
|
||||
###############################################################################
|
||||
|
@ -57,7 +57,7 @@ mkdir -p $RPM_BUILD_ROOT/wheels
|
||||
install -m 644 dist/*.whl $RPM_BUILD_ROOT/wheels/
|
||||
|
||||
install -d %{buildroot}%{local_bindir}
|
||||
install %{_buildsubdir}/scripts/cgcs_tc_setup.sh %{buildroot}%{local_bindir}
|
||||
install %{_buildsubdir}/scripts/tc_setup.sh %{buildroot}%{local_bindir}
|
||||
install %{_buildsubdir}/scripts/remotelogging_tc_setup.sh %{buildroot}%{local_bindir}
|
||||
install %{_buildsubdir}/scripts/connectivity_test %{buildroot}%{local_bindir}
|
||||
install -m 555 %{_buildsubdir}/scripts/update-iso.sh %{buildroot}%{local_bindir}
|
||||
@ -88,7 +88,7 @@ systemctl enable opt-platform.service
|
||||
%license LICENSE
|
||||
%defattr(-,root,root,-)
|
||||
/usr/bin/verify-license
|
||||
%{local_bindir}/cgcs_tc_setup.sh
|
||||
%{local_bindir}/tc_setup.sh
|
||||
%{local_bindir}/remotelogging_tc_setup.sh
|
||||
%{local_bindir}/connectivity_test
|
||||
%{local_sbindir}/patch-restart-mtce
|
||||
|
@ -1,518 +0,0 @@
|
||||
#!/bin/sh
|
||||
|
||||
#
|
||||
# Copyright (c) 2017-2018 Wind River Systems, Inc.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
|
||||
# $1 - interface
|
||||
# $2 - interface type [mgmt, infra]
|
||||
# $3 - link capacity
|
||||
# $4 - dummy used to determine if we're backgrounded or not
|
||||
|
||||
DEV=$1
|
||||
NETWORKTYPE=$2
|
||||
NETWORKSPEED=$3
|
||||
|
||||
if [ ${NETWORKTYPE} != "mgmt" -a ${NETWORKTYPE} != "infra" ]; then
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# We want to be able to wait some time (typically <10 sec) for the
|
||||
# network link to autonegotiate link speed. Re-run the script in
|
||||
# the background so the parent can return right away and init can
|
||||
# continue.
|
||||
if [ $# -eq 3 ]; then
|
||||
$0 $DEV $NETWORKTYPE $NETWORKSPEED dummy &
|
||||
disown
|
||||
exit 0
|
||||
fi
|
||||
|
||||
function test_valid_speed {
|
||||
# After the link is enabled but before the autonegotiation is complete
|
||||
# the link speed may be read as either -1 or as 4294967295 (which is
|
||||
# uint(-1) in twos-complement) depending on the kernel. Neither one is valid.
|
||||
if (( $1 > 0 )) && (( $1 != 4294967295 ))
|
||||
then
|
||||
return 0
|
||||
else
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
function log {
|
||||
# It seems that syslog isn't yet running, so append directly to the syslog file
|
||||
FILE=/var/log/platform.log
|
||||
echo `date +%FT%T.%3N` `hostname` CGCS_TC_SETUP: $@ >> $FILE
|
||||
}
|
||||
|
||||
function infra_exists {
|
||||
if [ -z "$infrastructure_interface" ]; then
|
||||
return 1
|
||||
else
|
||||
return 0
|
||||
fi
|
||||
}
|
||||
|
||||
function is_consolidated {
|
||||
if ! infra_exists
|
||||
then
|
||||
return 1
|
||||
fi
|
||||
|
||||
local INFRA=$infrastructure_interface
|
||||
local MGMT=$management_interface
|
||||
|
||||
# determine whether the management interface is a parent of the
|
||||
# infrastructure interface based on name.
|
||||
# eg. this matches enp0s8 to enp0s8.10 but not enp0s88
|
||||
if [[ $INFRA =~ $MGMT[\.][0-9]+$ ]]; then
|
||||
return 0
|
||||
fi
|
||||
return 1
|
||||
}
|
||||
|
||||
function is_vlan {
|
||||
if [ -f /proc/net/vlan/$DEV ]; then
|
||||
return 0
|
||||
else
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
function is_loopback {
|
||||
# (from include/uapi/linux/if.h)
|
||||
# IFF_LOOPBACK = 1<<3 = 8. Using a left shifted syntax can confuse bashate.
|
||||
IFF_LOOPBACK=8
|
||||
|
||||
# get the interface flags
|
||||
FLAGS=`cat /sys/class/net/$DEV/flags`
|
||||
|
||||
if ((($IFF_LOOPBACK & $FLAGS) == 0))
|
||||
then
|
||||
return 1
|
||||
else
|
||||
return 0
|
||||
fi
|
||||
}
|
||||
|
||||
function get_tc_filter_ethertype {
|
||||
local ETHERTYPE=$DEFAULT_ETHERTYPE
|
||||
|
||||
if is_consolidated
|
||||
then
|
||||
if ! is_vlan
|
||||
then
|
||||
# If we have a consolidated VLAN interface, we must set the
|
||||
# protocol to '802.1q' for the underlying Ethernet interface
|
||||
# to be able to match on IP packets coming from the VLAN
|
||||
# interface.
|
||||
ETHERTYPE=802.1q
|
||||
fi
|
||||
fi
|
||||
echo $ETHERTYPE
|
||||
return 0
|
||||
}
|
||||
|
||||
function setup_tc_port_filter {
|
||||
local PORT=$1
|
||||
local PORTMASK=$2
|
||||
local FLOWID=$3
|
||||
local PROTOCOL=$4
|
||||
local PRIORITY=$DEFAULT_PRIORITY
|
||||
local ETHERTYPE=$DEFAULT_ETHERTYPE
|
||||
|
||||
ETHERTYPE=$(get_tc_filter_ethertype)
|
||||
|
||||
if [ -z $PROTOCOL ]; then
|
||||
# Apply to TCP and UDP
|
||||
tc filter add dev $DEV protocol $ETHERTYPE parent 1:0 prio $PRIORITY \
|
||||
u32 match ip dport $PORT $PORTMASK flowid $FLOWID
|
||||
tc filter add dev $DEV protocol $ETHERTYPE parent 1:0 prio $PRIORITY \
|
||||
u32 match ip sport $PORT $PORTMASK flowid $FLOWID
|
||||
else
|
||||
# Apply to specific protocol only
|
||||
tc filter add dev $DEV protocol $ETHERTYPE parent 1:0 prio $PRIORITY \
|
||||
u32 match ip protocol $PROTOCOL 0xff match \
|
||||
ip dport $PORT $PORTMASK flowid $FLOWID
|
||||
tc filter add dev $DEV protocol $ETHERTYPE parent 1:0 prio $PRIORITY \
|
||||
u32 match ip protocol $PROTOCOL 0xff match \
|
||||
ip sport $PORT $PORTMASK flowid $FLOWID
|
||||
fi
|
||||
}
|
||||
|
||||
function setup_tc_tos_filter {
|
||||
local TOS=$1
|
||||
local TOSMASK=$2
|
||||
local FLOWID=$3
|
||||
local ETHERTYPE=$4
|
||||
local PRIORITY=$5
|
||||
|
||||
if [ -z $ETHERTYPE ]; then
|
||||
ETHERTYPE=$DEFAULT_ETHERTYPE
|
||||
fi
|
||||
|
||||
if [ -z $PRIORITY ]; then
|
||||
PRIORITY=$DEFAULT_PRIORITY
|
||||
fi
|
||||
|
||||
tc filter add dev $DEV protocol $ETHERTYPE parent 1:0 prio $PRIORITY \
|
||||
u32 match ip tos $TOS $TOSMASK flowid $FLOWID
|
||||
}
|
||||
|
||||
function setup_root_tc {
|
||||
# create new qdiscs, classes and queues
|
||||
tc qdisc add dev $DEV root handle 1: htb default 40
|
||||
tc class add dev $DEV parent 1: classid 1:1 htb rate ${SPEED}mbit \
|
||||
burst 15k quantum 60000
|
||||
}
|
||||
|
||||
function setup_default_tc {
|
||||
local RATE=$1
|
||||
local CEIL=$2
|
||||
|
||||
local FLOWQ=40
|
||||
local CLASSID=1:$FLOWQ
|
||||
local FLOWID=$CLASSID
|
||||
|
||||
# create default qdiscs, classes
|
||||
$AC $CLASSID htb rate $((${RATE}*${SPEED}/100))mbit burst 15k \
|
||||
ceil $((${CEIL}*${SPEED}/100))mbit prio 4 quantum 60000
|
||||
tc qdisc add dev $DEV parent $CLASSID handle $FLOWQ: sfq perturb 10
|
||||
}
|
||||
|
||||
function setup_hiprio_tc {
|
||||
local RATE=$1
|
||||
local CEIL=$2
|
||||
|
||||
local FLOWQ=10
|
||||
local CLASSID=1:$FLOWQ
|
||||
local FLOWID=$CLASSID
|
||||
local ETHERTYPE=$DEFAULT_ETHERTYPE
|
||||
ETHERTYPE=$(get_tc_filter_ethertype)
|
||||
|
||||
# create high priority qdiscs, classes, and queues
|
||||
$AC $CLASSID htb rate $((${RATE}*${SPEED}/100))mbit burst 15k \
|
||||
ceil $((${CEIL}*${SPEED}/100))mbit prio 3 quantum 60000
|
||||
tc qdisc add dev $DEV parent $CLASSID handle $FLOWQ: sfq perturb 10
|
||||
|
||||
# filter for high priority traffic
|
||||
setup_tc_tos_filter 0x10 0xf8 $FLOWID $ETHERTYPE
|
||||
|
||||
if [ "$ETHERTYPE" != "$DEFAULT_ETHERTYPE" ]; then
|
||||
# For the 'hiprio' class, a second filter at a different priority is
|
||||
# needed in this case to match traffic with the default ethertype.
|
||||
# (ie. high priority management traffic).
|
||||
local PRIORITY
|
||||
PRIORITY=$(($DEFAULT_PRIORITY + 1))
|
||||
setup_tc_tos_filter 0x10 0xf8 $FLOWID $DEFAULT_ETHERTYPE $PRIORITY
|
||||
fi
|
||||
}
|
||||
|
||||
function setup_migration_tc {
|
||||
local RATE=$1
|
||||
local CEIL=$2
|
||||
|
||||
local FLOWQ=30
|
||||
local CLASSID=1:$FLOWQ
|
||||
local FLOWID=$CLASSID
|
||||
|
||||
# create migration qdiscs, classes, and queues
|
||||
$AC $CLASSID htb rate $((${RATE}*${SPEED}/100))mbit burst 15k \
|
||||
ceil $((${CEIL}*${SPEED}/100))mbit prio 2 quantum 60000
|
||||
tc qdisc add dev $DEV parent $CLASSID handle $FLOWQ: sfq perturb 10
|
||||
|
||||
# Migration (TCP, ports 49152-49215)
|
||||
setup_tc_port_filter 49152 0xffc0 $FLOWID $TCP
|
||||
|
||||
# Migration via libvirt tunnel (TCP, port 16509)
|
||||
setup_tc_port_filter 16509 0xffff $FLOWID $TCP
|
||||
}
|
||||
|
||||
function setup_storage_tc {
|
||||
local RATE=$1
|
||||
local CEIL=$2
|
||||
|
||||
local FLOWQ=20
|
||||
local CLASSID=1:$FLOWQ
|
||||
local FLOWID=$CLASSID
|
||||
|
||||
# create storage qdiscs, classes, and queues
|
||||
$AC $CLASSID htb rate $((${RATE}*${SPEED}/100))mbit burst 15k \
|
||||
ceil $((${CEIL}*${SPEED}/100))mbit prio 1 quantum 60000
|
||||
tc qdisc add dev $DEV parent $CLASSID handle $FLOWQ: sfq perturb 10
|
||||
|
||||
# Storage, NFS (UDP/TCP, port 2049)
|
||||
setup_tc_port_filter 2049 0xffff $FLOWID
|
||||
|
||||
# Storage, iSCSI (UDP/TCP, port 3260)
|
||||
setup_tc_port_filter 3260 0xffff $FLOWID
|
||||
|
||||
# Storage, CEPH (TCP, ports 6789,6800-7100)
|
||||
PORTS=( 6789 6800 6816 6912 7040 7072 7088 )
|
||||
PORTMASKS=( 0xffff 0xfff0 0xffa0 0xff80 0xffa0 0xfff0 0xfffa )
|
||||
for idx in "${!PORTS[@]}"; do
|
||||
PORT=${PORTS[$idx]}
|
||||
MASK=${PORTMASKS[$idx]}
|
||||
setup_tc_port_filter $PORT $MASK $FLOWID $TCP
|
||||
done
|
||||
}
|
||||
|
||||
function setup_drbd_tc {
|
||||
local RATE=$1
|
||||
local CEIL=$2
|
||||
|
||||
local FLOWQ=50
|
||||
local CLASSID=1:$FLOWQ
|
||||
local FLOWID=$CLASSID
|
||||
|
||||
# create DRBD qdiscs, classes and queues
|
||||
$AC $CLASSID htb rate $((${RATE}*${SPEED}/100))mbit burst 15k \
|
||||
ceil $((${CEIL}*${SPEED}/100))mbit quantum 60000
|
||||
|
||||
tc qdisc add dev $DEV parent $CLASSID handle $FLOWQ: sfq perturb 10
|
||||
|
||||
# DRDB (TCP, ports 7789,7790,7791,7799)
|
||||
# port 7793 is used with drdb-extension
|
||||
PORTS=( 7789 7790 7791 7792 7799 7793 )
|
||||
PORTMASKS=( 0xffff 0xffff 0xffff 0xffff 0xffff 0xffff)
|
||||
for idx in "${!PORTS[@]}"; do
|
||||
PORT=${PORTS[$idx]}
|
||||
MASK=${PORTMASKS[$idx]}
|
||||
setup_tc_port_filter $PORT $MASK $FLOWID $TCP
|
||||
done
|
||||
}
|
||||
|
||||
function setup_mgmt_tc_individual {
|
||||
# Configure high priority and default traffic classes.
|
||||
|
||||
setup_root_tc
|
||||
|
||||
# bandwidth percentages
|
||||
local HIPRIO_BW=10
|
||||
local DEFAULT_BW=10
|
||||
|
||||
# bandwidth ceiling percentages, for borrowing bandwidth.
|
||||
# the management interface is not consolidated, so set the ceiling to the
|
||||
# maximum rate.
|
||||
local HIPRIO_CBW=100
|
||||
local DEFAULT_CBW=100
|
||||
|
||||
setup_hiprio_tc $HIPRIO_BW $HIPRIO_CBW
|
||||
setup_default_tc $DEFAULT_BW $DEFAULT_CBW
|
||||
}
|
||||
|
||||
|
||||
function setup_mgmt_tc_vlan {
|
||||
# Configure high priority and default traffic classes.
|
||||
|
||||
setup_root_tc
|
||||
|
||||
# bandwidth percentages
|
||||
local HIPRIO_BW=10
|
||||
local DEFAULT_BW=10
|
||||
|
||||
# bandwidth ceiling percentages, for borrowing bandwidth.
|
||||
# The management interface is a vlan, so reserve bandwidth
|
||||
# for sibling infra vlan interfaces.
|
||||
local HIPRIO_CBW=20
|
||||
local DEFAULT_CBW=20
|
||||
|
||||
setup_hiprio_tc $HIPRIO_BW $HIPRIO_CBW
|
||||
setup_default_tc $DEFAULT_BW $DEFAULT_CBW
|
||||
}
|
||||
|
||||
function setup_mgmt_tc_consolidated {
|
||||
# Configure management classes.
|
||||
# All traffic coming from the infra will get treated again by the
|
||||
# management traffic classes. We need to apply the same TCs as the
|
||||
# infra to prevent a management application from starving the
|
||||
# upper interface.
|
||||
setup_root_tc
|
||||
setup_tc_all
|
||||
}
|
||||
|
||||
function setup_mgmt_tc_infra_exists {
|
||||
if is_consolidated
|
||||
then
|
||||
# Infra over mgmt. In this case we want to reserve
|
||||
# a small portion of the link for management.
|
||||
setup_mgmt_tc_consolidated
|
||||
else
|
||||
# Only setup hiprio and default classes.
|
||||
# The infra will handle storage, migration, DRBD.
|
||||
if is_vlan
|
||||
then
|
||||
setup_mgmt_tc_vlan
|
||||
else
|
||||
setup_mgmt_tc_individual
|
||||
fi
|
||||
fi
|
||||
}
|
||||
|
||||
function setup_mgmt_tc_no_infra {
|
||||
# Configure traffic classes for a management interface when
|
||||
# no infrastructure interface exists. Configure the full
|
||||
# set of TCs.
|
||||
|
||||
setup_root_tc
|
||||
setup_tc_all
|
||||
}
|
||||
|
||||
function setup_infra_tc_consolidated {
|
||||
# Configure the full set of traffic classes, but leave a small
|
||||
# portion of bandwidth for the management interface.
|
||||
|
||||
# reserve 1% BW for management
|
||||
local RESERVED
|
||||
RESERVED=$((1*${SPEED}/100))
|
||||
SPEED=$((${SPEED}-${RESERVED}))
|
||||
|
||||
setup_root_tc
|
||||
setup_tc_all
|
||||
}
|
||||
|
||||
function setup_infra_tc_individual {
|
||||
# Configure the full set of traffic classes.
|
||||
|
||||
setup_root_tc
|
||||
if is_vlan
|
||||
then
|
||||
# reserve 1% BW for sibling vlan interfaces
|
||||
local RESERVED
|
||||
RESERVED=$((1*${SPEED}/100))
|
||||
SPEED=$((${SPEED}-${RESERVED}))
|
||||
fi
|
||||
setup_tc_all
|
||||
}
|
||||
|
||||
function setup_tc_all {
|
||||
# bandwidth percentages, in case of over-percentage, bandwidth is divided based
|
||||
# on bandwidth ratios
|
||||
local MIG_BW=30
|
||||
local STOR_BW=50
|
||||
local DRBD_BW=80
|
||||
local HIPRIO_BW=10
|
||||
local DEFAULT_BW=10
|
||||
|
||||
# bandwidth ceiling percentages, for borrowing bandwidth
|
||||
local MIG_CBW=100
|
||||
local STOR_CBW=100
|
||||
local DRBD_CBW=100
|
||||
local HIPRIO_CBW=20
|
||||
local DEFAULT_CBW=20
|
||||
|
||||
setup_hiprio_tc $HIPRIO_BW $HIPRIO_CBW
|
||||
setup_storage_tc $STOR_BW $STOR_CBW
|
||||
setup_migration_tc $MIG_BW $MIG_CBW
|
||||
setup_default_tc $DEFAULT_BW $DEFAULT_CBW
|
||||
if [ $nodetype == "controller" ]; then
|
||||
setup_drbd_tc $DRBD_BW $DRBD_CBW
|
||||
fi
|
||||
}
|
||||
|
||||
function get_dev_speed {
|
||||
# If the link doesn't come up we won't go enabled, so here we can
|
||||
# afford to wait forever for the link.
|
||||
while true; do
|
||||
if [ -e /sys/class/net/$1/bonding ]; then
|
||||
for VAL in `cat /sys/class/net/$1/lower_*/speed`; do
|
||||
if test_valid_speed $VAL; then
|
||||
log slave for bond link $1 reported speed $VAL
|
||||
echo $VAL
|
||||
return 0
|
||||
else
|
||||
log slave for bond link $1 reported invalid speed $VAL
|
||||
fi
|
||||
done
|
||||
log all slaves for bond link $1 reported invalid speeds, \
|
||||
will sleep 30 sec and try again
|
||||
else
|
||||
VAL=`cat /sys/class/net/$1/speed`
|
||||
if test_valid_speed $VAL; then
|
||||
log link $1 reported speed $VAL
|
||||
echo $VAL
|
||||
return 0
|
||||
else
|
||||
log link $1 returned invalid speed $VAL, \
|
||||
will sleep 30 sec and try again
|
||||
fi
|
||||
fi
|
||||
sleep 30
|
||||
done
|
||||
}
|
||||
|
||||
function get_speed {
|
||||
local dev=$1
|
||||
local networktype=$2
|
||||
local net_speed=$NETWORKSPEED
|
||||
local dev_speed
|
||||
dev_speed=$(get_dev_speed $DEV)
|
||||
local speed=$dev_speed
|
||||
if [ $net_speed != $dev_speed ]; then
|
||||
log WARNING: $dev has a different operational speed [$dev_speed] \
|
||||
than configured speed [$net_speed] for network type $networktype
|
||||
if test_valid_speed $net_speed; then
|
||||
# Use greater of configured net speed / recorded dev speed
|
||||
if [ $net_speed -gt $dev_speed ]; then
|
||||
speed=$net_speed
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
log using speed $speed for tc filtering on $dev
|
||||
echo $speed
|
||||
}
|
||||
|
||||
|
||||
if is_loopback
|
||||
then
|
||||
# mgmt/infra uses the loopback for CPE simplex
|
||||
exit 0
|
||||
fi
|
||||
|
||||
log running tc setup script for $DEV $NETWORKTYPE in background
|
||||
|
||||
if [ -f /etc/platform/platform.conf ]; then
|
||||
source /etc/platform/platform.conf
|
||||
fi
|
||||
|
||||
SPEED=$(get_speed $DEV $NETWORKTYPE)
|
||||
|
||||
# 1:10 = high priority class
|
||||
# 1:20 = storage class
|
||||
# 1:30 = migration class
|
||||
# 1:40 = default class
|
||||
# 1:50 = DRBD class
|
||||
|
||||
# generic class add preamble
|
||||
AC="tc class add dev $DEV parent 1:1 classid"
|
||||
|
||||
# protocol numbers
|
||||
TCP=6
|
||||
UDP=17
|
||||
|
||||
# default ethertype for filters
|
||||
DEFAULT_ETHERTYPE=ip
|
||||
|
||||
# default priority for filters
|
||||
DEFAULT_PRIORITY=1
|
||||
|
||||
# delete existing qdiscs
|
||||
tc qdisc del dev $DEV root > /dev/null 2>&1
|
||||
|
||||
if [ ${NETWORKTYPE} = "mgmt" ]; then
|
||||
if infra_exists
|
||||
then
|
||||
setup_mgmt_tc_infra_exists
|
||||
else
|
||||
setup_mgmt_tc_no_infra
|
||||
fi
|
||||
else
|
||||
if is_consolidated
|
||||
then
|
||||
setup_infra_tc_consolidated
|
||||
else
|
||||
setup_infra_tc_individual
|
||||
fi
|
||||
fi
|
@ -30,7 +30,7 @@ function is_loopback {
|
||||
function log {
|
||||
# It seems that syslog isn't yet running, so append directly to the syslog file
|
||||
local FILE=/var/log/platform.log
|
||||
echo `date +%FT%T.%3N` `hostname` CGCS_TC_SETUP: $@ >> $FILE
|
||||
echo `date +%FT%T.%3N` `hostname` TC_SETUP: $@ >> $FILE
|
||||
}
|
||||
|
||||
function test_valid_speed {
|
||||
@ -91,7 +91,7 @@ LOG_BW=9
|
||||
DEFAULT_CBW=20
|
||||
LOG_CBW=20
|
||||
|
||||
# 1:40 = default class from cgcs_tc_setup.sh
|
||||
# 1:40 = default class from tc_setup.sh
|
||||
# 1:60 = LOG class
|
||||
|
||||
if [ $nodetype == "controller" ]; then
|
||||
|
503
utilities/platform-util/scripts/tc_setup.sh
Executable file
503
utilities/platform-util/scripts/tc_setup.sh
Executable file
@ -0,0 +1,503 @@
|
||||
#!/bin/sh
|
||||
|
||||
#
|
||||
# Copyright (c) 2017-2019 Wind River Systems, Inc.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
|
||||
# $1 - interface
|
||||
# $2 - interface type [mgmt]
|
||||
# $3 - link capacity
|
||||
# $4 - dummy used to determine if we're backgrounded or not
|
||||
|
||||
DEV=$1
|
||||
NETWORKTYPE=$2
|
||||
NETWORKSPEED=$3
|
||||
|
||||
# log file
|
||||
LOG_FILE=/var/log/platform.log
|
||||
|
||||
# default ethertype for filters
|
||||
DEFAULT_FILTER_ETHERTYPE=ip
|
||||
|
||||
# default match protocol for filters
|
||||
DEFAULT_FILTER_MATCH_PROTOCOL=ip
|
||||
|
||||
# default priority for filters
|
||||
DEFAULT_FILTER_PRIORITY=1
|
||||
|
||||
# default HTB class quantum (borrowing amount) in bytes
|
||||
DEFAULT_HTB_QUANTUM=60000
|
||||
|
||||
# default HTB class burst (amount of bytes that can be burst at ceil speed)
|
||||
DEFAULT_HTB_BURST=15k
|
||||
|
||||
# default SFQ algorithm pertubation in seconds (recommended 10)
|
||||
DEFAULT_SFQ_PERTUBATION=10
|
||||
|
||||
# major handle for the root qdisc / class.
|
||||
# All objects in the same traffic control structure must share a major handle
|
||||
# number. Conventionally, numbering schemes start at 1 for objects attached
|
||||
# directly to the root qdisc.
|
||||
ROOT_HANDLE_MAJOR=1
|
||||
|
||||
# minor handle for the root class.
|
||||
ROOT_HANDLE_MINOR=1
|
||||
|
||||
# root qdisc id. The minor number namespace is left available for classes
|
||||
ROOT_QDISC_ID="${ROOT_HANDLE_MAJOR}:"
|
||||
|
||||
# root class id. The root class id is typically 1:1
|
||||
ROOT_CLASS_ID="${ROOT_HANDLE_MAJOR}:${ROOT_HANDLE_MINOR}"
|
||||
|
||||
# minor handle for a qdisc. Unambiguously identifies an object as a qdisc
|
||||
QDISC_HANDLE_MINOR=0
|
||||
|
||||
# RFC 2474 class selector codepoints (DCSP)
|
||||
IPTOS_CLASS_CS0=0x00
|
||||
IPTOS_CLASS_CS1=0x20
|
||||
IPTOS_CLASS_CS2=0x40
|
||||
IPTOS_CLASS_CS3=0x60
|
||||
IPTOS_CLASS_CS4=0x80
|
||||
IPTOS_CLASS_CS5=0xa0
|
||||
IPTOS_CLASS_CS6=0xc0
|
||||
IPTOS_CLASS_CS7=0xe0
|
||||
|
||||
# protocol numbers
|
||||
IPPROTO_TCP=6
|
||||
IPPROTO_UDP=17
|
||||
|
||||
# relative guaranteed bandwidth percentages for traffic classes
|
||||
# in case of over-percentage, bandwidth is divided based on bandwidth ratios
|
||||
BW_PCT_DEFAULT=90
|
||||
BW_PCT_HIPRIO=10
|
||||
|
||||
# ceiling percentages for traffic classes
|
||||
CEIL_PCT_DEFAULT=100
|
||||
CEIL_PCT_HIPRIO=20
|
||||
|
||||
# class priority for traffic classes (lower = higher priority)
|
||||
CLASS_PRIORITY_DEFAULT=4
|
||||
CLASS_PRIORITY_HIPRIO=${DEFAULT_FILTER_PRIORITY}
|
||||
|
||||
# arbitrary flow id for traffic classes
|
||||
FLOWID_DEFAULT=40
|
||||
FLOWID_HIPRIO=10
|
||||
|
||||
# Network types to apply traffic controls on
|
||||
VALID_NETWORKTYPES=("mgmt")
|
||||
|
||||
function log {
|
||||
echo `date +%FT%T.%3N` `hostname` TC_SETUP: $@ >> ${LOG_FILE}
|
||||
}
|
||||
|
||||
## sanity check the given speed
|
||||
function test_valid_speed {
|
||||
# After the link is enabled but before the autonegotiation is complete
|
||||
# the link speed may be read as either -1 or as 4294967295 (which is
|
||||
# uint(-1) in twos-complement) depending on the kernel. Neither one is
|
||||
# valid.
|
||||
if (( $1 > 0 )) && (( $1 != 4294967295 ))
|
||||
then
|
||||
return 0
|
||||
else
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
## determine the negotiated speed for a given interface
|
||||
function get_dev_speed {
|
||||
# If the link doesn't come up we won't go enabled, so here we can
|
||||
# afford to wait forever for the link.
|
||||
while true; do
|
||||
if [ -e /sys/class/net/$1/bonding ]; then
|
||||
for VAL in `cat /sys/class/net/$1/lower_*/speed`; do
|
||||
if test_valid_speed ${VAL}; then
|
||||
log slave for bond link $1 reported speed ${VAL}
|
||||
echo ${VAL}
|
||||
return 0
|
||||
else
|
||||
log slave for bond link $1 reported invalid speed ${VAL}
|
||||
fi
|
||||
done
|
||||
log all slaves for bond link $1 reported invalid speeds, \
|
||||
will sleep 30 sec and try again
|
||||
else
|
||||
VAL=`cat /sys/class/net/$1/speed`
|
||||
if test_valid_speed ${VAL}; then
|
||||
log link $1 reported speed ${VAL}
|
||||
echo ${VAL}
|
||||
return 0
|
||||
else
|
||||
log link $1 returned invalid speed ${VAL}, \
|
||||
will sleep 30 sec and try again
|
||||
fi
|
||||
fi
|
||||
sleep 30
|
||||
done
|
||||
}
|
||||
|
||||
## Determines the maximum speed (Mbps) that should be used in traffic control
|
||||
## rate / ceiling calculations
|
||||
function get_speed {
|
||||
local dev=$1
|
||||
local networktype=$2
|
||||
local net_speed=${NETWORKSPEED}
|
||||
local dev_speed=$(get_dev_speed ${dev})
|
||||
local speed=${dev_speed}
|
||||
|
||||
if [ ${net_speed} != ${dev_speed} ]; then
|
||||
log WARNING: ${dev} has a different operational speed [${dev_speed}] \
|
||||
than configured speed [${net_speed}] for network type ${networktype}
|
||||
if test_valid_speed ${net_speed}; then
|
||||
# Use greater of configured net speed / recorded dev speed
|
||||
if [ ${net_speed} -gt ${dev_speed} ]; then
|
||||
speed=${net_speed}
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
log using speed ${speed} for tc filtering on ${dev}
|
||||
echo ${speed}
|
||||
}
|
||||
|
||||
## Determines whether a device is a loopback interface
|
||||
function is_loopback {
|
||||
local DEVICE=$1
|
||||
|
||||
# (from include/uapi/linux/if.h)
|
||||
# IFF_LOOPBACK = 1<<3 = 8. Using a left shifted syntax can confuse bashate.
|
||||
IFF_LOOPBACK=8
|
||||
|
||||
# get the interface flags
|
||||
FLAGS=`cat /sys/class/net/${DEVICE}/flags`
|
||||
|
||||
if (((${IFF_LOOPBACK} & ${FLAGS}) == 0))
|
||||
then
|
||||
return 1
|
||||
else
|
||||
return 0
|
||||
fi
|
||||
}
|
||||
|
||||
## Determines whether the network type requires traffic controls
|
||||
function is_valid_networktype {
|
||||
local NETTYPE=$1
|
||||
|
||||
for nt in ${VALID_NETWORKTYPES}; do
|
||||
if [ "${NETTYPE}" == ${nt} ]; then
|
||||
return 0
|
||||
fi
|
||||
done
|
||||
return 1
|
||||
}
|
||||
|
||||
## Determines whether the given device is a vlan interface
|
||||
function is_vlan {
|
||||
local DEVICE=$1
|
||||
if [ -f /proc/net/vlan/${DEVICE} ]; then
|
||||
return 0
|
||||
else
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
## Delete existing classes, qdiscs, and filters
|
||||
function delete_tcs {
|
||||
local DEVICE=$1
|
||||
|
||||
# Deleting the root qdisc will also delete all underlying
|
||||
# classes, qdiscs and filters
|
||||
tc qdisc del dev ${DEVICE} root > /dev/null 2>&1
|
||||
}
|
||||
|
||||
## Create the root qdisc and class
|
||||
function setup_root_tc {
|
||||
local DEVICE=$1
|
||||
local RATE=$2
|
||||
|
||||
local QDISC_TYPE="htb"
|
||||
local CLASS_TYPE="htb"
|
||||
|
||||
tc qdisc add dev ${DEVICE} root handle ${ROOT_QDISC_ID} ${QDISC_TYPE} \
|
||||
default ${FLOWID_DEFAULT}
|
||||
|
||||
tc class add dev ${DEVICE} parent ${ROOT_QDISC_ID} \
|
||||
classid ${ROOT_CLASS_ID} \
|
||||
${CLASS_TYPE} \
|
||||
rate ${RATE}mbit \
|
||||
burst ${DEFAULT_HTB_BURST} \
|
||||
quantum ${DEFAULT_HTB_QUANTUM}
|
||||
}
|
||||
|
||||
## Create classes and qdiscs for default (unfiltered) traffic
|
||||
function setup_default_tc {
|
||||
local DEVICE=$1
|
||||
local MAXSPEED=$2
|
||||
local MIN_BW_PCT=$3
|
||||
local MAX_BW_PCT=$4
|
||||
local RATE=$((${MIN_BW_PCT}*${MAXSPEED}/100))
|
||||
local CEIL=$((${MAX_BW_PCT}*${MAXSPEED}/100))
|
||||
local CLASS_TYPE="htb"
|
||||
local QDISC_TYPE="sfq"
|
||||
local QDISC_ID="${FLOWID_DEFAULT}:"
|
||||
|
||||
# associate the objects with the root qdisc/class
|
||||
local CLASS_ID=${ROOT_HANDLE_MAJOR}:${FLOWID_DEFAULT}
|
||||
local QDISC_PARENT=${ROOT_CLASS_ID}
|
||||
|
||||
tc class add dev ${DEVICE} parent ${QDISC_PARENT} classid ${CLASS_ID} \
|
||||
${CLASS_TYPE} \
|
||||
rate ${RATE}mbit \
|
||||
burst ${DEFAULT_HTB_BURST} \
|
||||
ceil ${CEIL}mbit \
|
||||
prio ${CLASS_PRIORITY_DEFAULT} \
|
||||
quantum ${DEFAULT_HTB_QUANTUM}
|
||||
|
||||
tc qdisc add dev ${DEVICE} parent ${CLASS_ID} handle ${QDISC_ID} \
|
||||
${QDISC_TYPE} \
|
||||
perturb ${DEFAULT_SFQ_PERTUBATION}
|
||||
}
|
||||
|
||||
## Get the match parameters to filter on TOS/DSCP
|
||||
function get_tc_tos_match {
|
||||
local IP_VERSION=$1
|
||||
local TOS=$2
|
||||
|
||||
# 6 bits DSCP
|
||||
local TOSMASK=0xfc
|
||||
|
||||
if [ ${IP_VERSION} == 6 ]; then
|
||||
L3PROTO="ip6"
|
||||
TOS_FIELD="priority"
|
||||
else
|
||||
L3PROTO="ip"
|
||||
TOS_FIELD="tos"
|
||||
fi
|
||||
|
||||
echo "match ${L3PROTO} ${TOS_FIELD} ${TOS} ${TOSMASK}"
|
||||
}
|
||||
|
||||
## Get the match parameters to filter on a L4 protocol
|
||||
function get_tc_l4_protocol_match {
|
||||
local IP_VERSION=$1
|
||||
local L4PROTOCOL=$2
|
||||
|
||||
# 8 bits protocol
|
||||
local PROTOCOLMASK=0xff
|
||||
|
||||
if [ ${IP_VERSION} == 6 ]; then
|
||||
L3PROTO="ip6"
|
||||
else
|
||||
L3PROTO="ip"
|
||||
fi
|
||||
|
||||
echo "match ${L3PROTO} protocol ${L4PROTOCOL} ${PROTOCOLMASK}"
|
||||
}
|
||||
|
||||
## Get the match parameters to filter on a port range
|
||||
function get_tc_port_match {
|
||||
local IP_VERSION=$1
|
||||
local PORT=$2
|
||||
local PORTMASK=$3
|
||||
local TYPE=${4:-"src"}
|
||||
|
||||
if [ ${TYPE} == "src" ]; then
|
||||
TYPE="sport"
|
||||
else
|
||||
TYPE="dport"
|
||||
fi
|
||||
|
||||
if [ ${IP_VERSION} == 6 ]; then
|
||||
L3PROTO="ip6"
|
||||
else
|
||||
L3PROTO="ip"
|
||||
fi
|
||||
|
||||
echo "match ${L3PROTO} ${TYPE} ${PORT} ${PORTMASK}"
|
||||
}
|
||||
|
||||
## Get the l2/l3 protocol
|
||||
function get_tc_protocol {
|
||||
local IP_VERSION=$1
|
||||
local ETHERTYPE=$2
|
||||
local PROTOCOL=${ETHERTYPE}
|
||||
|
||||
if [ -z ${PROTOCOL} ]; then
|
||||
# If the ethertype was not explicitly specified, infer it
|
||||
# from the IP version
|
||||
if [ ${IP_VERSION} == 6 ]; then
|
||||
PROTOCOL="ipv6"
|
||||
else
|
||||
PROTOCOL="ip"
|
||||
fi
|
||||
fi
|
||||
|
||||
echo "${PROTOCOL}"
|
||||
}
|
||||
|
||||
## Create a filter to deliver system maintenance heartbeats to the high
|
||||
## priority class
|
||||
function setup_tc_sm_filter {
|
||||
local DEVICE=$1
|
||||
local FLOWID=$2
|
||||
local ETHERTYPE=$3
|
||||
local PRIORITY=${DEFAULT_FILTER_PRIORITY}
|
||||
|
||||
# Setup filters for both IPv4 and IPv6
|
||||
local IP_VERSIONS=(4 6)
|
||||
|
||||
# SM uses UDP over ports 2222-2223 with a TOS of 12
|
||||
local SM_PORT=2222
|
||||
local SM_PORTMASK=0xfffe
|
||||
local SM_PORTTYPE="dst"
|
||||
local SM_TOS=${IPTOS_CLASS_CS6}
|
||||
local SM_PROTO=${IPPROTO_UDP}
|
||||
|
||||
# specifies attaching the filter to the root qdisc
|
||||
local QDISC_ID=${ROOT_HANDLE_MAJOR}:${QDISC_HANDLE_MINOR}
|
||||
|
||||
for idx in "${!IP_VERSIONS[@]}"; do
|
||||
IP_VERSION=${IP_VERSIONS[$idx]}
|
||||
|
||||
local PROTOCOL=$(get_tc_protocol ${IP_VERSION} ${ETHERTYPE})
|
||||
local TOS_MATCH=$(get_tc_tos_match ${IP_VERSION} ${SM_TOS})
|
||||
local PROTO_MATCH=$(get_tc_l4_protocol_match ${IP_VERSION} ${SM_PROTO})
|
||||
local PORT_MATCH=$(get_tc_port_match \
|
||||
${IP_VERSION} ${SM_PORT} ${SM_PORTMASK} ${SM_PORTTYPE})
|
||||
local MATCH_PARAMS="${TOS_MATCH} ${PROTO_MATCH} ${PORT_MATCH}"
|
||||
|
||||
tc filter add dev ${DEVICE} protocol ${PROTOCOL} parent ${QDISC_ID} \
|
||||
prio ${PRIORITY} u32 ${MATCH_PARAMS} flowid ${FLOWID}
|
||||
|
||||
PRIORITY=$(($PRIORITY+1))
|
||||
done
|
||||
}
|
||||
|
||||
function setup_tc_port_filter {
|
||||
local DEVICE=$1
|
||||
local FLOWID=$2
|
||||
local ETHERTYPE=$3
|
||||
local PORT=$4
|
||||
local PORTMASK=$5
|
||||
local L4PROTOCOL=$6
|
||||
local PRIORITY=${DEFAULT_FILTER_PRIORITY}
|
||||
|
||||
# Setup filters for both IPv4 and IPv6
|
||||
local IP_VERSIONS=(4 6)
|
||||
|
||||
# Setup filters for both sport and dport
|
||||
local PORT_TYPES=("src" "dst")
|
||||
|
||||
# specifies attaching the filter to the root qdisc
|
||||
local QDISC_ID=${ROOT_HANDLE_MAJOR}:${QDISC_HANDLE_MINOR}
|
||||
|
||||
for i in "${!IP_VERSIONS[@]}"; do
|
||||
local IP_VERSION=${IP_VERSIONS[$i]}
|
||||
local PROTOCOL=$(get_tc_protocol ${IP_VERSION} ${ETHERTYPE})
|
||||
|
||||
for j in "${!PORT_TYPES[@]}"; do
|
||||
|
||||
local PORT_MATCH=$(get_tc_port_match \
|
||||
${IP_VERSION} ${PORT} ${PORTMASK} ${PORT_TYPES[$j]})
|
||||
|
||||
if [ -z $L4PROTOCOL ]; then
|
||||
# Apply to TCP and UDP
|
||||
tc filter add dev ${DEVICE} protocol ${PROTOCOL} \
|
||||
parent ${QDISC_ID} prio ${PRIORITY} u32 ${PORT_MATCH} \
|
||||
flowid ${FLOWID}
|
||||
else
|
||||
# Apply to specific protocol only
|
||||
local PROTO_MATCH=$(get_tc_l4_protocol_match \
|
||||
${IP_VERSION} ${L4PROTOCOL})
|
||||
tc filter add dev ${DEVICE} protocol ${PROTOCOL} \
|
||||
parent ${QDISC_ID} prio ${PRIORITY} u32 ${PROTO_MATCH} \
|
||||
${PORT_MATCH} flowid ${FLOWID}
|
||||
fi
|
||||
done
|
||||
PRIORITY=$(($PRIORITY+1))
|
||||
done
|
||||
}
|
||||
|
||||
## Create classes, qdiscs, and filters for high priority traffic
|
||||
function setup_hiprio_tc {
|
||||
local DEVICE=$1
|
||||
local MAXSPEED=$2
|
||||
local MIN_BW_PCT=$3
|
||||
local MAX_BW_PCT=$4
|
||||
local ETHERTYPE=$5
|
||||
local MATCH_PROTOCOL=$6
|
||||
local RATE=$((${MIN_BW_PCT}*${MAXSPEED}/100))
|
||||
local CEIL=$((${MAX_BW_PCT}*${MAXSPEED}/100))
|
||||
local CLASS_TYPE="htb"
|
||||
local QDISC_ID="${FLOWID_HIPRIO}:"
|
||||
local QDISC_TYPE="sfq"
|
||||
|
||||
# associate the objects with the root qdisc/class
|
||||
local CLASS_ID=${ROOT_HANDLE_MAJOR}:${FLOWID_HIPRIO}
|
||||
local QDISC_PARENT=${ROOT_CLASS_ID}
|
||||
local FLOWID=${CLASS_ID}
|
||||
|
||||
tc class add dev ${DEVICE} parent ${QDISC_PARENT} classid ${CLASS_ID} \
|
||||
${CLASS_TYPE} \
|
||||
rate ${RATE}mbit \
|
||||
burst ${DEFAULT_HTB_BURST} \
|
||||
ceil ${CEIL}mbit \
|
||||
prio ${CLASS_PRIORITY_HIPRIO} \
|
||||
quantum ${DEFAULT_HTB_QUANTUM}
|
||||
|
||||
tc qdisc add dev ${DEVICE} parent ${CLASS_ID} handle ${QDISC_ID} \
|
||||
${QDISC_TYPE} \
|
||||
perturb ${DEFAULT_SFQ_PERTUBATION}
|
||||
|
||||
# Treat system maintenance heartbeats as high priority traffic
|
||||
setup_tc_sm_filter "${DEVICE}" "${FLOWID}" "${ETHERTYPE}"
|
||||
}
|
||||
|
||||
if ! is_valid_networktype $NETWORKTYPE; then
|
||||
exit 0
|
||||
fi
|
||||
|
||||
if is_loopback $DEV; then
|
||||
# Don't setup traffic classes for a loopback device (ie simplex system)
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# We want to be able to wait some time (typically <10 sec) for the
|
||||
# network link to autonegotiate link speed. Re-run the script in
|
||||
# the background so the parent can return right away and init can
|
||||
# continue.
|
||||
if [ $# -eq 3 ]; then
|
||||
$0 ${DEV} ${NETWORKTYPE} ${NETWORKSPEED} dummy &
|
||||
disown
|
||||
exit 0
|
||||
fi
|
||||
|
||||
log running tc setup script for ${DEV} ${NETWORKTYPE} in background
|
||||
|
||||
if [ -f /etc/platform/platform.conf ]; then
|
||||
source /etc/platform/platform.conf
|
||||
fi
|
||||
|
||||
SPEED=$(get_speed ${DEV} ${NETWORKTYPE})
|
||||
|
||||
delete_tcs ${DEV}
|
||||
setup_root_tc ${DEV} ${SPEED}
|
||||
setup_default_tc ${DEV} ${SPEED} ${BW_PCT_DEFAULT} ${CEIL_PCT_DEFAULT}
|
||||
setup_hiprio_tc ${DEV} ${SPEED} ${BW_PCT_HIPRIO} ${CEIL_PCT_HIPRIO}
|
||||
|
||||
if is_vlan ${DEV}; then
|
||||
if [ -e /sys/class/net/${DEV}/lower_* ]; then
|
||||
for LOWER in `basename $(readlink /sys/class/net/${DEV}/lower_*)`; do
|
||||
# In the case of a vlan interface, reserve bandwidth for high
|
||||
# priority traffic on the underlying interface.
|
||||
delete_tcs ${LOWER}
|
||||
setup_root_tc ${LOWER} ${SPEED}
|
||||
setup_default_tc ${LOWER} ${SPEED} $((100-${BW_PCT_HIPRIO})) 100
|
||||
setup_hiprio_tc ${LOWER} ${SPEED} ${BW_PCT_HIPRIO} \
|
||||
${CEIL_PCT_HIPRIO} "802.1q"
|
||||
done
|
||||
fi
|
||||
fi
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user