Add staged kubernetes version 1.20.9
Multiple versions of kubernetes are required to support upgrade. This adds staged version of kubernetes 1.20.9, built with a specific version of golang. All subpackage versions are included in the iso image without collisions. The following patches are ported to specific kubernetes version: kubelet-cpumanager-disable-CFS-quota-throttling-for-.patch kubelet-cpumanager-keep-normal-containers-off-reserv.patch kubelet-cpumanager-infrastructure-pods-use-system-re.patch kubelet-cpumanager-introduce-concept-of-isolated-CPU.patch kubeadm-create-platform-pods-with-zero-CPU-resources.patch enable-support-for-kubernetes-to-ignore-isolcpus.patch Story: 2008972 Task: 43056 Signed-off-by: Jim Gauld <james.gauld@windriver.com> Change-Id: Ie19612f1980690be073ab2236afbb9ccefe504e5
This commit is contained in:
parent
c4898b0bd9
commit
94517398e4
@ -162,6 +162,9 @@ kubernetes-1.18.1-client
|
|||||||
kubernetes-1.19.13-node
|
kubernetes-1.19.13-node
|
||||||
kubernetes-1.19.13-kubeadm
|
kubernetes-1.19.13-kubeadm
|
||||||
kubernetes-1.19.13-client
|
kubernetes-1.19.13-client
|
||||||
|
kubernetes-1.20.9-node
|
||||||
|
kubernetes-1.20.9-kubeadm
|
||||||
|
kubernetes-1.20.9-client
|
||||||
containerd
|
containerd
|
||||||
k8s-pod-recovery
|
k8s-pod-recovery
|
||||||
containernetworking-plugins
|
containernetworking-plugins
|
||||||
|
@ -56,6 +56,7 @@ kubernetes/containerd
|
|||||||
kubernetes/cni/plugins
|
kubernetes/cni/plugins
|
||||||
kubernetes/kubernetes-1.18.1
|
kubernetes/kubernetes-1.18.1
|
||||||
kubernetes/kubernetes-1.19.13
|
kubernetes/kubernetes-1.19.13
|
||||||
|
kubernetes/kubernetes-1.20.9
|
||||||
kubernetes/kubernetes-unversioned
|
kubernetes/kubernetes-unversioned
|
||||||
kubernetes/docker-distribution
|
kubernetes/docker-distribution
|
||||||
kubernetes/etcd
|
kubernetes/etcd
|
||||||
|
@ -33,6 +33,7 @@ keycodemapdb-16e5b07.tar.gz#keycodemapdb#https://github.com/CendioOssman/keycode
|
|||||||
kubernetes-contrib-v1.18.1.tar.gz#kubernetes-contrib-1.18.1#https://github.com/kubernetes-retired/contrib/tarball/89f6948e24578fed2a90a87871b2263729f90ac3#http##
|
kubernetes-contrib-v1.18.1.tar.gz#kubernetes-contrib-1.18.1#https://github.com/kubernetes-retired/contrib/tarball/89f6948e24578fed2a90a87871b2263729f90ac3#http##
|
||||||
kubernetes-v1.18.1.tar.gz#kubernetes-1.18.1#https://github.com/kubernetes/kubernetes/archive/7879fc12a63337efff607952a323df90cdc7a335.tar.gz#http##
|
kubernetes-v1.18.1.tar.gz#kubernetes-1.18.1#https://github.com/kubernetes/kubernetes/archive/7879fc12a63337efff607952a323df90cdc7a335.tar.gz#http##
|
||||||
kubernetes-v1.19.13.tar.gz#kubernetes-1.19.13#https://github.com/kubernetes/kubernetes/archive/refs/tags/v1.19.13.tar.gz#http##
|
kubernetes-v1.19.13.tar.gz#kubernetes-1.19.13#https://github.com/kubernetes/kubernetes/archive/refs/tags/v1.19.13.tar.gz#http##
|
||||||
|
kubernetes-v1.20.9.tar.gz#kubernetes-1.20.9#https://github.com/kubernetes/kubernetes/archive/refs/tags/v1.20.9.tar.gz#http##
|
||||||
kvm-unit-tests.git-4ea7633.tar.bz2#kvm-unit-tests#https://git.kernel.org/pub/scm/virt/kvm/kvm-unit-tests.git/snapshot/kvm-unit-tests-20171020.tar.gz#http##
|
kvm-unit-tests.git-4ea7633.tar.bz2#kvm-unit-tests#https://git.kernel.org/pub/scm/virt/kvm/kvm-unit-tests.git/snapshot/kvm-unit-tests-20171020.tar.gz#http##
|
||||||
ldapscripts-2.0.8.tgz#ldapscripts-2.0.8#https://sourceforge.net/projects/ldapscripts/files/ldapscripts/ldapscripts-2.0.8/ldapscripts-2.0.8.tgz/download#http##
|
ldapscripts-2.0.8.tgz#ldapscripts-2.0.8#https://sourceforge.net/projects/ldapscripts/files/ldapscripts/ldapscripts-2.0.8/ldapscripts-2.0.8.tgz/download#http##
|
||||||
libtpms-0.6.0-4f0d59d.tar.gz#libtpms-0.6.0#https://github.com/stefanberger/libtpms/tarball/c421ca0f4d00c0caceeda8d62c1efb2b7e47db04#http##
|
libtpms-0.6.0-4f0d59d.tar.gz#libtpms-0.6.0#https://github.com/stefanberger/libtpms/tarball/c421ca0f4d00c0caceeda8d62c1efb2b7e47db04#http##
|
||||||
|
9
kubernetes/kubernetes-1.20.9/centos/Readme.rst
Normal file
9
kubernetes/kubernetes-1.20.9/centos/Readme.rst
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
The spec file used here was from the kubernetes 1.10.0 src rpm.
|
||||||
|
The orig file is included to help show modifications made to that
|
||||||
|
spec file, to help understand which changes were needed and to
|
||||||
|
assist with future upversioning.
|
||||||
|
|
||||||
|
The contrib tarball does not have the same versioning as kubernetes and
|
||||||
|
there is little activity in that repo.
|
||||||
|
|
||||||
|
The version for the contrib tarball is arbitrary.
|
5
kubernetes/kubernetes-1.20.9/centos/build_srpm.data
Normal file
5
kubernetes/kubernetes-1.20.9/centos/build_srpm.data
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
VERSION=1.20.9
|
||||||
|
TAR_NAME=kubernetes
|
||||||
|
TAR="$TAR_NAME-v$VERSION.tar.gz"
|
||||||
|
COPY_LIST="${CGCS_BASE}/downloads/$TAR $FILES_BASE/*"
|
||||||
|
TIS_PATCH_VER=PKG_GITREVCOUNT
|
@ -0,0 +1,79 @@
|
|||||||
|
From 80fc45845ac260819108a6a6dabb9da7c0fd111f Mon Sep 17 00:00:00 2001
|
||||||
|
From: Chris Friesen <chris.friesen@windriver.com>
|
||||||
|
Date: Fri, 23 Oct 2020 17:46:10 -0600
|
||||||
|
Subject: [PATCH 6/6] enable support for kubernetes to ignore isolcpus
|
||||||
|
|
||||||
|
The normal mechanisms for allocating isolated CPUs do not allow
|
||||||
|
a mix of isolated and exclusive CPUs in the same container. In
|
||||||
|
order to allow this in *very* limited cases where the pod spec
|
||||||
|
is known in advance we will add the ability to disable the normal
|
||||||
|
isolcpus behaviour.
|
||||||
|
|
||||||
|
If the file "/etc/kubernetes/ignore_isolcpus" exists, then kubelet
|
||||||
|
will basically forget everything it knows about isolcpus and just
|
||||||
|
treat them like regular CPUs.
|
||||||
|
|
||||||
|
The admin user can then rely on the fact that CPU allocation is
|
||||||
|
deterministic to ensure that the isolcpus they configure end up being
|
||||||
|
allocated to the correct pods.
|
||||||
|
---
|
||||||
|
pkg/kubelet/cm/cpumanager/cpu_manager.go | 9 +++++++++
|
||||||
|
pkg/kubelet/cm/cpumanager/policy_static.go | 8 ++++++++
|
||||||
|
2 files changed, 17 insertions(+)
|
||||||
|
|
||||||
|
diff --git a/pkg/kubelet/cm/cpumanager/cpu_manager.go b/pkg/kubelet/cm/cpumanager/cpu_manager.go
|
||||||
|
index 8470431c07c..fd0bdeeee07 100644
|
||||||
|
--- a/pkg/kubelet/cm/cpumanager/cpu_manager.go
|
||||||
|
+++ b/pkg/kubelet/cm/cpumanager/cpu_manager.go
|
||||||
|
@@ -19,6 +19,7 @@ package cpumanager
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"math"
|
||||||
|
+ "os"
|
||||||
|
"sync"
|
||||||
|
"time"
|
||||||
|
"strings"
|
||||||
|
@@ -56,6 +57,14 @@ const cpuManagerStateFileName = "cpu_manager_state"
|
||||||
|
|
||||||
|
// get the system-level isolated CPUs
|
||||||
|
func getIsolcpus() cpuset.CPUSet {
|
||||||
|
+
|
||||||
|
+ // This is a gross hack to basically turn off awareness of isolcpus to enable
|
||||||
|
+ // isolated cpus to be allocated to pods the same way as non-isolated CPUs.
|
||||||
|
+ if _, err := os.Stat("/etc/kubernetes/ignore_isolcpus"); err == nil {
|
||||||
|
+ klog.Infof("[cpumanager] turning off isolcpus awareness")
|
||||||
|
+ return cpuset.NewCPUSet()
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
dat, err := ioutil.ReadFile("/sys/devices/system/cpu/isolated")
|
||||||
|
if err != nil {
|
||||||
|
klog.Errorf("[cpumanager] unable to read sysfs isolcpus subdir")
|
||||||
|
diff --git a/pkg/kubelet/cm/cpumanager/policy_static.go b/pkg/kubelet/cm/cpumanager/policy_static.go
|
||||||
|
index 4acd5609748..78c5f0f2576 100644
|
||||||
|
--- a/pkg/kubelet/cm/cpumanager/policy_static.go
|
||||||
|
+++ b/pkg/kubelet/cm/cpumanager/policy_static.go
|
||||||
|
@@ -18,6 +18,7 @@ package cpumanager
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
+ "os"
|
||||||
|
"strconv"
|
||||||
|
|
||||||
|
v1 "k8s.io/api/core/v1"
|
||||||
|
@@ -614,6 +615,13 @@ func isKubeInfra(pod *v1.Pod) bool {
|
||||||
|
|
||||||
|
// get the isolated CPUs (if any) from the devices associated with a specific container
|
||||||
|
func (p *staticPolicy) podIsolCPUs(pod *v1.Pod, container *v1.Container) cpuset.CPUSet {
|
||||||
|
+
|
||||||
|
+ // This is a gross hack to basically turn off awareness of isolcpus to enable
|
||||||
|
+ // isolated cpus to be allocated to pods the same way as non-isolated CPUs.
|
||||||
|
+ if _, err := os.Stat("/etc/kubernetes/ignore_isolcpus"); err == nil {
|
||||||
|
+ return cpuset.NewCPUSet()
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
// NOTE: This is required for TestStaticPolicyAdd() since makePod() does
|
||||||
|
// not create UID. We also need a way to properly stub devicemanager.
|
||||||
|
if len(string(pod.UID)) == 0 {
|
||||||
|
--
|
||||||
|
2.17.1
|
||||||
|
|
84
kubernetes/kubernetes-1.20.9/centos/files/genmanpages.sh
Normal file
84
kubernetes/kubernetes-1.20.9/centos/files/genmanpages.sh
Normal file
@ -0,0 +1,84 @@
|
|||||||
|
MDSFORMANPAGES="kube-apiserver.md kube-controller-manager.md kube-proxy.md kube-scheduler.md kubelet.md"
|
||||||
|
|
||||||
|
# remove comments from man pages
|
||||||
|
for manpage in ${MDSFORMANPAGES}; do
|
||||||
|
pos=$(grep -n "<\!-- END MUNGE: UNVERSIONED_WARNING -->" ${manpage} | cut -d':' -f1)
|
||||||
|
if [ -n ${pos} ]; then
|
||||||
|
sed -i "1,${pos}{/.*/d}" ${manpage}
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
# for each man page add NAME and SYNOPSIS section
|
||||||
|
# kube-apiserver
|
||||||
|
sed -i -s "s/## kube-apiserver/# NAME\nkube-apiserver \- Provides the API for kubernetes orchestration.\n\n# SYNOPSIS\n**kube-apiserver** [OPTIONS]\n/" kube-apiserver.md
|
||||||
|
|
||||||
|
cat << 'EOF' >> kube-apiserver.md
|
||||||
|
# EXAMPLES
|
||||||
|
```
|
||||||
|
/usr/bin/kube-apiserver --logtostderr=true --v=0 --etcd_servers=http://127.0.0.1:4001 --insecure_bind_address=127.0.0.1 --insecure_port=8080 --kubelet_port=10250 --service-cluster-ip-range=10.1.1.0/24 --allow_privileged=false
|
||||||
|
```
|
||||||
|
EOF
|
||||||
|
# kube-controller-manager
|
||||||
|
sed -i -s "s/## kube-controller-manager/# NAME\nkube-controller-manager \- Enforces kubernetes services.\n\n# SYNOPSIS\n**kube-controller-manager** [OPTIONS]\n/" kube-controller-manager.md
|
||||||
|
|
||||||
|
cat << 'EOF' >> kube-controller-manager.md
|
||||||
|
# EXAMPLES
|
||||||
|
```
|
||||||
|
/usr/bin/kube-controller-manager --logtostderr=true --v=0 --master=127.0.0.1:8080
|
||||||
|
```
|
||||||
|
EOF
|
||||||
|
# kube-proxy
|
||||||
|
sed -i -s "s/## kube-proxy/# NAME\nkube-proxy \- Provides network proxy services.\n\n# SYNOPSIS\n**kube-proxy** [OPTIONS]\n/" kube-proxy.md
|
||||||
|
|
||||||
|
cat << 'EOF' >> kube-proxy.md
|
||||||
|
# EXAMPLES
|
||||||
|
```
|
||||||
|
/usr/bin/kube-proxy --logtostderr=true --v=0 --master=http://127.0.0.1:8080
|
||||||
|
```
|
||||||
|
EOF
|
||||||
|
# kube-scheduler
|
||||||
|
sed -i -s "s/## kube-scheduler/# NAME\nkube-scheduler \- Schedules containers on hosts.\n\n# SYNOPSIS\n**kube-scheduler** [OPTIONS]\n/" kube-scheduler.md
|
||||||
|
|
||||||
|
cat << 'EOF' >> kube-scheduler.md
|
||||||
|
# EXAMPLES
|
||||||
|
```
|
||||||
|
/usr/bin/kube-scheduler --logtostderr=true --v=0 --master=127.0.0.1:8080
|
||||||
|
```
|
||||||
|
EOF
|
||||||
|
# kubelet
|
||||||
|
sed -i -s "s/## kubelet/# NAME\nkubelet \- Processes a container manifest so the containers are launched according to how they are described.\n\n# SYNOPSIS\n**kubelet** [OPTIONS]\n/" kubelet.md
|
||||||
|
|
||||||
|
cat << 'EOF' >> kubelet.md
|
||||||
|
# EXAMPLES
|
||||||
|
```
|
||||||
|
/usr/bin/kubelet --logtostderr=true --v=0 --api_servers=http://127.0.0.1:8080 --address=127.0.0.1 --port=10250 --hostname_override=127.0.0.1 --allow-privileged=false
|
||||||
|
```
|
||||||
|
EOF
|
||||||
|
|
||||||
|
# for all man-pages
|
||||||
|
for md in $MDSFORMANPAGES; do
|
||||||
|
# correct section names
|
||||||
|
sed -i -s "s/### Synopsis/# DESCRIPTION/" $md
|
||||||
|
sed -i -s "s/### Options/# OPTIONS/" $md
|
||||||
|
# add header
|
||||||
|
sed -i "s/# NAME/% KUBERNETES(1) kubernetes User Manuals\n# NAME/" $md
|
||||||
|
# modify list of options
|
||||||
|
# options with no value in ""
|
||||||
|
sed -i -r 's/(^ )(-[^":][^":]*)(:)(.*)/\*\*\2\*\*\n\t\4\n/' $md
|
||||||
|
# option with value in ""
|
||||||
|
sed -i -r 's/(^ )(-[^":][^":]*)("[^"]*")(:)(.*)/\*\*\2\3\*\*\n\t\5\n/' $md
|
||||||
|
# options in -s, --long
|
||||||
|
sed -i -r 's/(^ )(-[a-z], -[^":][^":]*)(:)(.*)/\*\*\2\*\*\n\t\4\n/' $md
|
||||||
|
sed -i -r 's/(^ )(-[a-z], -[^":][^":]*)("[^"]*")(:)(.*)/\*\*\2\3\*\*\n\t\5\n/' $md
|
||||||
|
# remove ```
|
||||||
|
sed -i 's/```//' $md
|
||||||
|
# remove all lines starting with ######
|
||||||
|
sed -i 's/^######.*//' $md
|
||||||
|
# modify footer
|
||||||
|
sed -i -r "s/^\[!\[Analytics\].*//" $md
|
||||||
|
# md does not contain section => taking 1
|
||||||
|
name="${md%.md}"
|
||||||
|
go-md2man -in $md -out man/man1/$name.1
|
||||||
|
done
|
||||||
|
|
||||||
|
|
@ -0,0 +1,108 @@
|
|||||||
|
From 62575aa6d34c52dffb02535a526f6361cdedb300 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Chris Friesen <chris.friesen@windriver.com>
|
||||||
|
Date: Fri, 3 Sep 2021 18:05:15 -0400
|
||||||
|
Subject: [PATCH 5/6] kubeadm: create platform pods with zero CPU resources
|
||||||
|
|
||||||
|
We want to specify zero CPU resources when creating the manifests
|
||||||
|
for the static platform pods, as a workaround for the lack of
|
||||||
|
separate resource tracking for platform resources.
|
||||||
|
|
||||||
|
We also specify zero CPU resources for the coredns deployment.
|
||||||
|
manifests.go appears to be the main file for this, not sure if the
|
||||||
|
others are used but I changed them just in case.
|
||||||
|
|
||||||
|
Signed-off-by: Jim Gauld <james.gauld@windriver.com>
|
||||||
|
---
|
||||||
|
cluster/addons/dns/coredns/coredns.yaml.base | 2 +-
|
||||||
|
cluster/addons/dns/coredns/coredns.yaml.in | 2 +-
|
||||||
|
cluster/addons/dns/coredns/coredns.yaml.sed | 2 +-
|
||||||
|
cmd/kubeadm/app/phases/addons/dns/manifests.go | 2 +-
|
||||||
|
cmd/kubeadm/app/phases/controlplane/manifests.go | 6 +++---
|
||||||
|
5 files changed, 7 insertions(+), 7 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/cluster/addons/dns/coredns/coredns.yaml.base b/cluster/addons/dns/coredns/coredns.yaml.base
|
||||||
|
index 460db6317db..30873a81f18 100644
|
||||||
|
--- a/cluster/addons/dns/coredns/coredns.yaml.base
|
||||||
|
+++ b/cluster/addons/dns/coredns/coredns.yaml.base
|
||||||
|
@@ -138,7 +138,7 @@ spec:
|
||||||
|
limits:
|
||||||
|
memory: __DNS__MEMORY__LIMIT__
|
||||||
|
requests:
|
||||||
|
- cpu: 100m
|
||||||
|
+ cpu: 0
|
||||||
|
memory: 70Mi
|
||||||
|
args: [ "-conf", "/etc/coredns/Corefile" ]
|
||||||
|
volumeMounts:
|
||||||
|
diff --git a/cluster/addons/dns/coredns/coredns.yaml.in b/cluster/addons/dns/coredns/coredns.yaml.in
|
||||||
|
index 35fd52f15cd..51d963282ea 100644
|
||||||
|
--- a/cluster/addons/dns/coredns/coredns.yaml.in
|
||||||
|
+++ b/cluster/addons/dns/coredns/coredns.yaml.in
|
||||||
|
@@ -138,7 +138,7 @@ spec:
|
||||||
|
limits:
|
||||||
|
memory: 'dns_memory_limit'
|
||||||
|
requests:
|
||||||
|
- cpu: 100m
|
||||||
|
+ cpu: 0
|
||||||
|
memory: 70Mi
|
||||||
|
args: [ "-conf", "/etc/coredns/Corefile" ]
|
||||||
|
volumeMounts:
|
||||||
|
diff --git a/cluster/addons/dns/coredns/coredns.yaml.sed b/cluster/addons/dns/coredns/coredns.yaml.sed
|
||||||
|
index ebe0c7182e8..dab87353509 100644
|
||||||
|
--- a/cluster/addons/dns/coredns/coredns.yaml.sed
|
||||||
|
+++ b/cluster/addons/dns/coredns/coredns.yaml.sed
|
||||||
|
@@ -138,7 +138,7 @@ spec:
|
||||||
|
limits:
|
||||||
|
memory: $DNS_MEMORY_LIMIT
|
||||||
|
requests:
|
||||||
|
- cpu: 100m
|
||||||
|
+ cpu: 0
|
||||||
|
memory: 70Mi
|
||||||
|
args: [ "-conf", "/etc/coredns/Corefile" ]
|
||||||
|
volumeMounts:
|
||||||
|
diff --git a/cmd/kubeadm/app/phases/addons/dns/manifests.go b/cmd/kubeadm/app/phases/addons/dns/manifests.go
|
||||||
|
index 014cbd773c2..18ce45d1e85 100644
|
||||||
|
--- a/cmd/kubeadm/app/phases/addons/dns/manifests.go
|
||||||
|
+++ b/cmd/kubeadm/app/phases/addons/dns/manifests.go
|
||||||
|
@@ -254,7 +254,7 @@ spec:
|
||||||
|
limits:
|
||||||
|
memory: 170Mi
|
||||||
|
requests:
|
||||||
|
- cpu: 100m
|
||||||
|
+ cpu: 0
|
||||||
|
memory: 70Mi
|
||||||
|
args: [ "-conf", "/etc/coredns/Corefile" ]
|
||||||
|
volumeMounts:
|
||||||
|
diff --git a/cmd/kubeadm/app/phases/controlplane/manifests.go b/cmd/kubeadm/app/phases/controlplane/manifests.go
|
||||||
|
index 8181bea63a4..4c4b4448dd4 100644
|
||||||
|
--- a/cmd/kubeadm/app/phases/controlplane/manifests.go
|
||||||
|
+++ b/cmd/kubeadm/app/phases/controlplane/manifests.go
|
||||||
|
@@ -60,7 +60,7 @@ func GetStaticPodSpecs(cfg *kubeadmapi.ClusterConfiguration, endpoint *kubeadmap
|
||||||
|
LivenessProbe: staticpodutil.LivenessProbe(staticpodutil.GetAPIServerProbeAddress(endpoint), "/livez", int(endpoint.BindPort), v1.URISchemeHTTPS),
|
||||||
|
ReadinessProbe: staticpodutil.ReadinessProbe(staticpodutil.GetAPIServerProbeAddress(endpoint), "/readyz", int(endpoint.BindPort), v1.URISchemeHTTPS),
|
||||||
|
StartupProbe: staticpodutil.StartupProbe(staticpodutil.GetAPIServerProbeAddress(endpoint), "/livez", int(endpoint.BindPort), v1.URISchemeHTTPS, cfg.APIServer.TimeoutForControlPlane),
|
||||||
|
- Resources: staticpodutil.ComponentResources("250m"),
|
||||||
|
+ Resources: staticpodutil.ComponentResources("0"),
|
||||||
|
Env: kubeadmutil.GetProxyEnvVars(),
|
||||||
|
}, mounts.GetVolumes(kubeadmconstants.KubeAPIServer),
|
||||||
|
map[string]string{kubeadmconstants.KubeAPIServerAdvertiseAddressEndpointAnnotationKey: endpoint.String()}),
|
||||||
|
@@ -72,7 +72,7 @@ func GetStaticPodSpecs(cfg *kubeadmapi.ClusterConfiguration, endpoint *kubeadmap
|
||||||
|
VolumeMounts: staticpodutil.VolumeMountMapToSlice(mounts.GetVolumeMounts(kubeadmconstants.KubeControllerManager)),
|
||||||
|
LivenessProbe: staticpodutil.LivenessProbe(staticpodutil.GetControllerManagerProbeAddress(cfg), "/healthz", kubeadmconstants.KubeControllerManagerPort, v1.URISchemeHTTPS),
|
||||||
|
StartupProbe: staticpodutil.StartupProbe(staticpodutil.GetControllerManagerProbeAddress(cfg), "/healthz", kubeadmconstants.KubeControllerManagerPort, v1.URISchemeHTTPS, cfg.APIServer.TimeoutForControlPlane),
|
||||||
|
- Resources: staticpodutil.ComponentResources("200m"),
|
||||||
|
+ Resources: staticpodutil.ComponentResources("0"),
|
||||||
|
Env: kubeadmutil.GetProxyEnvVars(),
|
||||||
|
}, mounts.GetVolumes(kubeadmconstants.KubeControllerManager), nil),
|
||||||
|
kubeadmconstants.KubeScheduler: staticpodutil.ComponentPod(v1.Container{
|
||||||
|
@@ -83,7 +83,7 @@ func GetStaticPodSpecs(cfg *kubeadmapi.ClusterConfiguration, endpoint *kubeadmap
|
||||||
|
VolumeMounts: staticpodutil.VolumeMountMapToSlice(mounts.GetVolumeMounts(kubeadmconstants.KubeScheduler)),
|
||||||
|
LivenessProbe: staticpodutil.LivenessProbe(staticpodutil.GetSchedulerProbeAddress(cfg), "/healthz", kubeadmconstants.KubeSchedulerPort, v1.URISchemeHTTPS),
|
||||||
|
StartupProbe: staticpodutil.StartupProbe(staticpodutil.GetSchedulerProbeAddress(cfg), "/healthz", kubeadmconstants.KubeSchedulerPort, v1.URISchemeHTTPS, cfg.APIServer.TimeoutForControlPlane),
|
||||||
|
- Resources: staticpodutil.ComponentResources("100m"),
|
||||||
|
+ Resources: staticpodutil.ComponentResources("0"),
|
||||||
|
Env: kubeadmutil.GetProxyEnvVars(),
|
||||||
|
}, mounts.GetVolumes(kubeadmconstants.KubeScheduler), nil),
|
||||||
|
}
|
||||||
|
--
|
||||||
|
2.17.1
|
||||||
|
|
17
kubernetes/kubernetes-1.20.9/centos/files/kubeadm.conf
Normal file
17
kubernetes/kubernetes-1.20.9/centos/files/kubeadm.conf
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
# Note: This dropin only works with kubeadm and kubelet v1.11+
|
||||||
|
[Service]
|
||||||
|
Environment="KUBELET_KUBECONFIG_ARGS=--bootstrap-kubeconfig=/etc/kubernetes/bootstrap-kubelet.conf --kubeconfig=/etc/kubernetes/kubelet.conf"
|
||||||
|
Environment="KUBELET_CONFIG_ARGS=--config=/var/lib/kubelet/config.yaml"
|
||||||
|
# This is a file that "kubeadm init" and "kubeadm join" generates at runtime, populating the KUBELET_KUBEADM_ARGS variable dynamically
|
||||||
|
EnvironmentFile=-/var/lib/kubelet/kubeadm-flags.env
|
||||||
|
# This is a file that the user can use for overrides of the kubelet args as a last resort. Preferably, the user should use
|
||||||
|
# the .NodeRegistration.KubeletExtraArgs object in the configuration files instead. KUBELET_EXTRA_ARGS should be sourced from this file.
|
||||||
|
EnvironmentFile=-/etc/sysconfig/kubelet
|
||||||
|
ExecStart=
|
||||||
|
ExecStart=/usr/bin/kubelet $KUBELET_KUBECONFIG_ARGS $KUBELET_CONFIG_ARGS $KUBELET_KUBEADM_ARGS $KUBELET_EXTRA_ARGS
|
||||||
|
ExecStartPre=-/usr/bin/kubelet-cgroup-setup.sh
|
||||||
|
ExecStartPost=/bin/bash -c 'echo $MAINPID > /var/run/kubelet.pid;'
|
||||||
|
ExecStopPost=/bin/rm -f /var/run/kubelet.pid
|
||||||
|
Restart=always
|
||||||
|
StartLimitInterval=0
|
||||||
|
RestartSec=10
|
@ -0,0 +1,132 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
#
|
||||||
|
# Copyright (c) 2019 Wind River Systems, Inc.
|
||||||
|
#
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
#
|
||||||
|
# This script does minimal cgroup setup for kubelet. This creates k8s-infra
|
||||||
|
# cgroup for a minimal set of resource controllers, and configures cpuset
|
||||||
|
# attributes to span all online cpus and nodes. This will do nothing if
|
||||||
|
# the k8s-infra cgroup already exists (i.e., assume already configured).
|
||||||
|
# NOTE: The creation of directories under /sys/fs/cgroup is volatile, and
|
||||||
|
# does not persist reboots. The cpuset.mems and cpuset.cpus is later updated
|
||||||
|
# by puppet kubernetes.pp manifest.
|
||||||
|
#
|
||||||
|
|
||||||
|
# Define minimal path
|
||||||
|
PATH=/bin:/usr/bin:/usr/local/bin
|
||||||
|
|
||||||
|
# Log info message to /var/log/daemon.log
|
||||||
|
function LOG {
|
||||||
|
logger -p daemon.info "$0($$): $@"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Log error message to /var/log/daemon.log
|
||||||
|
function ERROR {
|
||||||
|
logger -s -p daemon.error "$0($$): ERROR: $@"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Create minimal cgroup directories and configure cpuset attributes if required
|
||||||
|
function create_cgroup {
|
||||||
|
local cg_name=$1
|
||||||
|
local cg_nodeset=$2
|
||||||
|
local cg_cpuset=$3
|
||||||
|
|
||||||
|
local CGROUP=/sys/fs/cgroup
|
||||||
|
local CONTROLLERS_AUTO_DELETED=("pids" "hugetlb")
|
||||||
|
local CONTROLLERS_PRESERVED=("cpuset" "memory" "cpu,cpuacct" "systemd")
|
||||||
|
local cnt=''
|
||||||
|
local CGDIR=''
|
||||||
|
local RC=0
|
||||||
|
|
||||||
|
# Ensure that these cgroups are created every time as they are auto deleted
|
||||||
|
for cnt in ${CONTROLLERS_AUTO_DELETED[@]}; do
|
||||||
|
CGDIR=${CGROUP}/${cnt}/${cg_name}
|
||||||
|
if [ -d ${CGDIR} ]; then
|
||||||
|
LOG "Nothing to do, already configured: ${CGDIR}."
|
||||||
|
continue
|
||||||
|
fi
|
||||||
|
LOG "Creating: ${CGDIR}"
|
||||||
|
mkdir -p ${CGDIR}
|
||||||
|
RC=$?
|
||||||
|
if [ ${RC} -ne 0 ]; then
|
||||||
|
ERROR "Creating: ${CGDIR}, rc=${RC}"
|
||||||
|
exit ${RC}
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
# These cgroups are preserved so if any of these are encountered additional
|
||||||
|
# cgroup setup is not required
|
||||||
|
for cnt in ${CONTROLLERS_PRESERVED[@]}; do
|
||||||
|
CGDIR=${CGROUP}/${cnt}/${cg_name}
|
||||||
|
if [ -d ${CGDIR} ]; then
|
||||||
|
LOG "Nothing to do, already configured: ${CGDIR}."
|
||||||
|
exit ${RC}
|
||||||
|
fi
|
||||||
|
LOG "Creating: ${CGDIR}"
|
||||||
|
mkdir -p ${CGDIR}
|
||||||
|
RC=$?
|
||||||
|
if [ ${RC} -ne 0 ]; then
|
||||||
|
ERROR "Creating: ${CGDIR}, rc=${RC}"
|
||||||
|
exit ${RC}
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
# Customize cpuset attributes
|
||||||
|
LOG "Configuring cgroup: ${cg_name}, nodeset: ${cg_nodeset}, cpuset: ${cg_cpuset}"
|
||||||
|
CGDIR=${CGROUP}/cpuset/${cg_name}
|
||||||
|
local CGMEMS=${CGDIR}/cpuset.mems
|
||||||
|
local CGCPUS=${CGDIR}/cpuset.cpus
|
||||||
|
local CGTASKS=${CGDIR}/tasks
|
||||||
|
|
||||||
|
# Assign cgroup memory nodeset
|
||||||
|
LOG "Assign nodeset ${cg_nodeset} to ${CGMEMS}"
|
||||||
|
/bin/echo ${cg_nodeset} > ${CGMEMS}
|
||||||
|
RC=$?
|
||||||
|
if [ ${RC} -ne 0 ]; then
|
||||||
|
ERROR "Unable to write to: ${CGMEMS}, rc=${RC}"
|
||||||
|
exit ${RC}
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Assign cgroup cpus
|
||||||
|
LOG "Assign cpuset ${cg_cpuset} to ${CGCPUS}"
|
||||||
|
/bin/echo ${cg_cpuset} > ${CGCPUS}
|
||||||
|
RC=$?
|
||||||
|
if [ ${RC} -ne 0 ]; then
|
||||||
|
ERROR "Assigning: ${cg_cpuset} to ${CGCPUS}, rc=${RC}"
|
||||||
|
exit ${RC}
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Set file ownership
|
||||||
|
chown root:root ${CGMEMS} ${CGCPUS} ${CGTASKS}
|
||||||
|
RC=$?
|
||||||
|
if [ ${RC} -ne 0 ]; then
|
||||||
|
ERROR "Setting owner for: ${CGMEMS}, ${CGCPUS}, ${CGTASKS}, rc=${RC}"
|
||||||
|
exit ${RC}
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Set file mode permissions
|
||||||
|
chmod 644 ${CGMEMS} ${CGCPUS} ${CGTASKS}
|
||||||
|
RC=$?
|
||||||
|
if [ ${RC} -ne 0 ]; then
|
||||||
|
ERROR "Setting mode for: ${CGMEMS}, ${CGCPUS}, ${CGTASKS}, rc=${RC}"
|
||||||
|
exit ${RC}
|
||||||
|
fi
|
||||||
|
|
||||||
|
return ${RC}
|
||||||
|
}
|
||||||
|
|
||||||
|
if [ $UID -ne 0 ]; then
|
||||||
|
ERROR "Require sudo/root."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Configure default kubepods cpuset to span all online cpus and nodes.
|
||||||
|
ONLINE_NODESET=$(/bin/cat /sys/devices/system/node/online)
|
||||||
|
ONLINE_CPUSET=$(/bin/cat /sys/devices/system/cpu/online)
|
||||||
|
|
||||||
|
# Configure kubelet cgroup to match cgroupRoot.
|
||||||
|
create_cgroup 'k8s-infra' ${ONLINE_NODESET} ${ONLINE_CPUSET}
|
||||||
|
|
||||||
|
exit $?
|
||||||
|
|
@ -0,0 +1,111 @@
|
|||||||
|
From 019172946c0146eca91d611595866c70a8ed3ddb Mon Sep 17 00:00:00 2001
|
||||||
|
From: Jim Gauld <james.gauld@windriver.com>
|
||||||
|
Date: Fri, 3 Sep 2021 14:10:46 -0400
|
||||||
|
Subject: [PATCH 1/6] kubelet cpumanager disable CFS quota throttling for
|
||||||
|
Guaranteed pods
|
||||||
|
|
||||||
|
This disables CFS CPU quota to avoid performance degradation due to
|
||||||
|
Linux kernel CFS quota implementation. Note that 4.18 kernel attempts
|
||||||
|
to solve the CFS throttling problem, but there are reports that it is
|
||||||
|
not completely effective.
|
||||||
|
|
||||||
|
This disables CFS quota throttling for Guaranteed pods for both
|
||||||
|
parent and container cgroups by writing -1 to cgroup cpu.cfs_quota_us.
|
||||||
|
Disabling has a dramatic latency improvement for HTTP response times.
|
||||||
|
|
||||||
|
Signed-off-by: Jim Gauld <james.gauld@windriver.com>
|
||||||
|
---
|
||||||
|
pkg/kubelet/cm/cpumanager/cpu_manager.go | 22 ++++++++++++++++++++++
|
||||||
|
pkg/kubelet/cm/helpers_linux.go | 5 +++++
|
||||||
|
pkg/kubelet/cm/helpers_linux_test.go | 8 ++++----
|
||||||
|
3 files changed, 31 insertions(+), 4 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/pkg/kubelet/cm/cpumanager/cpu_manager.go b/pkg/kubelet/cm/cpumanager/cpu_manager.go
|
||||||
|
index 44368efc441..88cfbc1fa83 100644
|
||||||
|
--- a/pkg/kubelet/cm/cpumanager/cpu_manager.go
|
||||||
|
+++ b/pkg/kubelet/cm/cpumanager/cpu_manager.go
|
||||||
|
@@ -36,6 +36,7 @@ import (
|
||||||
|
"k8s.io/kubernetes/pkg/kubelet/config"
|
||||||
|
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
|
||||||
|
"k8s.io/kubernetes/pkg/kubelet/status"
|
||||||
|
+ v1qos "k8s.io/kubernetes/pkg/apis/core/v1/helper/qos"
|
||||||
|
)
|
||||||
|
|
||||||
|
// ActivePodsFunc is a function that returns a list of pods to reconcile.
|
||||||
|
@@ -242,6 +243,14 @@ func (m *manager) AddContainer(p *v1.Pod, c *v1.Container, containerID string) e
|
||||||
|
// Get the CPUs assigned to the container during Allocate()
|
||||||
|
// (or fall back to the default CPUSet if none were assigned).
|
||||||
|
cpus := m.state.GetCPUSetOrDefault(string(p.UID), c.Name)
|
||||||
|
+
|
||||||
|
+ // Guaranteed PODs should not have CFS quota throttle
|
||||||
|
+ if m.policy.Name() == string(PolicyStatic) && v1qos.GetPodQOS(p) == v1.PodQOSGuaranteed {
|
||||||
|
+ err := m.disableContainerCPUQuota(containerID)
|
||||||
|
+ if err != nil {
|
||||||
|
+ klog.Errorf("[cpumanager] AddContainer disable CPU Quota error: %v", err)
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
m.Unlock()
|
||||||
|
|
||||||
|
if !cpus.IsEmpty() {
|
||||||
|
@@ -489,3 +498,16 @@ func (m *manager) GetCPUs(podUID, containerName string) []int64 {
|
||||||
|
}
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
+
|
||||||
|
+func (m *manager) disableContainerCPUQuota(containerID string) error {
|
||||||
|
+ // Disable CFS CPU quota to avoid performance degradation due to
|
||||||
|
+ // Linux kernel CFS throttle implementation.
|
||||||
|
+ // NOTE: 4.18 kernel attempts to solve CFS throttling problem,
|
||||||
|
+ // but there are reports that it is not completely effective.
|
||||||
|
+ return m.containerRuntime.UpdateContainerResources(
|
||||||
|
+ containerID,
|
||||||
|
+ &runtimeapi.LinuxContainerResources{
|
||||||
|
+ CpuPeriod: 100000,
|
||||||
|
+ CpuQuota: -1,
|
||||||
|
+ })
|
||||||
|
+}
|
||||||
|
diff --git a/pkg/kubelet/cm/helpers_linux.go b/pkg/kubelet/cm/helpers_linux.go
|
||||||
|
index 9b115ab5380..d3185e1e958 100644
|
||||||
|
--- a/pkg/kubelet/cm/helpers_linux.go
|
||||||
|
+++ b/pkg/kubelet/cm/helpers_linux.go
|
||||||
|
@@ -166,6 +166,11 @@ func ResourceConfigForPod(pod *v1.Pod, enforceCPULimits bool, cpuPeriod uint64)
|
||||||
|
// determine the qos class
|
||||||
|
qosClass := v1qos.GetPodQOS(pod)
|
||||||
|
|
||||||
|
+ // disable cfs quota for guaranteed pods
|
||||||
|
+ if qosClass == v1.PodQOSGuaranteed {
|
||||||
|
+ cpuQuota = int64(-1)
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
// build the result
|
||||||
|
result := &ResourceConfig{}
|
||||||
|
if qosClass == v1.PodQOSGuaranteed {
|
||||||
|
diff --git a/pkg/kubelet/cm/helpers_linux_test.go b/pkg/kubelet/cm/helpers_linux_test.go
|
||||||
|
index 56d765fbc22..0c43afe5875 100644
|
||||||
|
--- a/pkg/kubelet/cm/helpers_linux_test.go
|
||||||
|
+++ b/pkg/kubelet/cm/helpers_linux_test.go
|
||||||
|
@@ -63,8 +63,8 @@ func TestResourceConfigForPod(t *testing.T) {
|
||||||
|
burstablePartialShares := MilliCPUToShares(200)
|
||||||
|
burstableQuota := MilliCPUToQuota(200, int64(defaultQuotaPeriod))
|
||||||
|
guaranteedShares := MilliCPUToShares(100)
|
||||||
|
- guaranteedQuota := MilliCPUToQuota(100, int64(defaultQuotaPeriod))
|
||||||
|
- guaranteedTunedQuota := MilliCPUToQuota(100, int64(tunedQuotaPeriod))
|
||||||
|
+ guaranteedQuota := int64(-1)
|
||||||
|
+ guaranteedTunedQuota := int64(-1)
|
||||||
|
memoryQuantity = resource.MustParse("100Mi")
|
||||||
|
cpuNoLimit := int64(-1)
|
||||||
|
guaranteedMemory := memoryQuantity.Value()
|
||||||
|
@@ -283,8 +283,8 @@ func TestResourceConfigForPodWithCustomCPUCFSQuotaPeriod(t *testing.T) {
|
||||||
|
burstablePartialShares := MilliCPUToShares(200)
|
||||||
|
burstableQuota := MilliCPUToQuota(200, int64(defaultQuotaPeriod))
|
||||||
|
guaranteedShares := MilliCPUToShares(100)
|
||||||
|
- guaranteedQuota := MilliCPUToQuota(100, int64(defaultQuotaPeriod))
|
||||||
|
- guaranteedTunedQuota := MilliCPUToQuota(100, int64(tunedQuotaPeriod))
|
||||||
|
+ guaranteedQuota := int64(-1)
|
||||||
|
+ guaranteedTunedQuota := int64(-1)
|
||||||
|
memoryQuantity = resource.MustParse("100Mi")
|
||||||
|
cpuNoLimit := int64(-1)
|
||||||
|
guaranteedMemory := memoryQuantity.Value()
|
||||||
|
--
|
||||||
|
2.17.1
|
||||||
|
|
@ -0,0 +1,139 @@
|
|||||||
|
From 5471fc2f03d1d14ceb250bf98f400cee9feb6983 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Jim Gauld <james.gauld@windriver.com>
|
||||||
|
Date: Fri, 3 Sep 2021 15:57:58 -0400
|
||||||
|
Subject: [PATCH 3/6] kubelet cpumanager infrastructure pods use system
|
||||||
|
reserved CPUs
|
||||||
|
|
||||||
|
This assigns system infrastructure pods to the "reserved" cpuset
|
||||||
|
to isolate them from the shared pool of CPUs.
|
||||||
|
|
||||||
|
Infrastructure pods include any pods that belong to the kube-system,
|
||||||
|
armada, cert-manager, vault, platform-deployment-manager, portieris,
|
||||||
|
or notification namespaces.
|
||||||
|
|
||||||
|
The implementation is a bit simplistic, it is assumed that the
|
||||||
|
"reserved" cpuset is large enough to handle all infrastructure pods
|
||||||
|
CPU allocations.
|
||||||
|
|
||||||
|
This also prevents infrastucture pods from using Guaranteed resources.
|
||||||
|
|
||||||
|
Signed-off-by: Jim Gauld <james.gauld@windriver.com>
|
||||||
|
---
|
||||||
|
pkg/kubelet/cm/cpumanager/policy_static.go | 44 +++++++++++++++++++
|
||||||
|
.../cm/cpumanager/policy_static_test.go | 19 +++++++-
|
||||||
|
2 files changed, 62 insertions(+), 1 deletion(-)
|
||||||
|
|
||||||
|
diff --git a/pkg/kubelet/cm/cpumanager/policy_static.go b/pkg/kubelet/cm/cpumanager/policy_static.go
|
||||||
|
index e892d63641b..ab3206c5dc4 100644
|
||||||
|
--- a/pkg/kubelet/cm/cpumanager/policy_static.go
|
||||||
|
+++ b/pkg/kubelet/cm/cpumanager/policy_static.go
|
||||||
|
@@ -33,6 +33,11 @@ import (
|
||||||
|
// PolicyStatic is the name of the static policy
|
||||||
|
const PolicyStatic policyName = "static"
|
||||||
|
|
||||||
|
+// Define namespaces used by platform infrastructure pods
|
||||||
|
+var infraNamespaces = [...]string{
|
||||||
|
+ "kube-system", "armada", "cert-manager", "platform-deployment-manager", "portieris", "vault", "notification",
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
// staticPolicy is a CPU manager policy that does not change CPU
|
||||||
|
// assignments for exclusively pinned guaranteed containers after the main
|
||||||
|
// container process starts.
|
||||||
|
@@ -233,6 +238,31 @@ func (p *staticPolicy) updateCPUsToReuse(pod *v1.Pod, container *v1.Container, c
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *staticPolicy) Allocate(s state.State, pod *v1.Pod, container *v1.Container) error {
|
||||||
|
+ // Process infra pods before guaranteed pods
|
||||||
|
+ if isKubeInfra(pod) {
|
||||||
|
+ // Container belongs in reserved pool.
|
||||||
|
+ // We don't want to fall through to the p.guaranteedCPUs() clause below so return either nil or error.
|
||||||
|
+ if _, ok := s.GetCPUSet(string(pod.UID), container.Name); ok {
|
||||||
|
+ klog.Infof("[cpumanager] static policy: reserved container already present in state, skipping " +
|
||||||
|
+ "(namespace: %s, pod UID: %s, pod: %s, container: %s)",
|
||||||
|
+ pod.Namespace, string(pod.UID), pod.Name, container.Name)
|
||||||
|
+ return nil
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ cpuset := p.reserved
|
||||||
|
+ if cpuset.IsEmpty() {
|
||||||
|
+ // If this happens then someone messed up.
|
||||||
|
+ return fmt.Errorf("[cpumanager] static policy: reserved container unable to allocate cpus " +
|
||||||
|
+ "(namespace: %s, pod UID: %s, pod: %s, container: %s); cpuset=%v, reserved:%v",
|
||||||
|
+ pod.Namespace, string(pod.UID), pod.Name, container.Name, cpuset, p.reserved)
|
||||||
|
+ }
|
||||||
|
+ s.SetCPUSet(string(pod.UID), container.Name, cpuset)
|
||||||
|
+ klog.Infof("[cpumanager] static policy: reserved: AddContainer " +
|
||||||
|
+ "(namespace: %s, pod UID: %s, pod: %s, container: %s); cpuset=%v",
|
||||||
|
+ pod.Namespace, string(pod.UID), pod.Name, container.Name, cpuset)
|
||||||
|
+ return nil
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
if numCPUs := p.guaranteedCPUs(pod, container); numCPUs != 0 {
|
||||||
|
klog.Infof("[cpumanager] static policy: Allocate (pod: %s, container: %s)", format.Pod(pod), container.Name)
|
||||||
|
// container belongs in an exclusively allocated pool
|
||||||
|
@@ -322,6 +352,10 @@ func (p *staticPolicy) guaranteedCPUs(pod *v1.Pod, container *v1.Container) int
|
||||||
|
if cpuQuantity.Value()*1000 != cpuQuantity.MilliValue() {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
+ // Infrastructure pods use reserved CPUs even if they're in the Guaranteed QoS class
|
||||||
|
+ if isKubeInfra(pod) {
|
||||||
|
+ return 0
|
||||||
|
+ }
|
||||||
|
// Safe downcast to do for all systems with < 2.1 billion CPUs.
|
||||||
|
// Per the language spec, `int` is guaranteed to be at least 32 bits wide.
|
||||||
|
// https://golang.org/ref/spec#Numeric_types
|
||||||
|
@@ -524,3 +558,13 @@ func (p *staticPolicy) generateCPUTopologyHints(availableCPUs cpuset.CPUSet, reu
|
||||||
|
|
||||||
|
return hints
|
||||||
|
}
|
||||||
|
+
|
||||||
|
+// check if a given pod is in a platform infrastructure namespace
|
||||||
|
+func isKubeInfra(pod *v1.Pod) bool {
|
||||||
|
+ for _, namespace := range infraNamespaces {
|
||||||
|
+ if namespace == pod.Namespace {
|
||||||
|
+ return true
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+ return false
|
||||||
|
+}
|
||||||
|
\ No newline at end of file
|
||||||
|
diff --git a/pkg/kubelet/cm/cpumanager/policy_static_test.go b/pkg/kubelet/cm/cpumanager/policy_static_test.go
|
||||||
|
index 9c7e4f146ff..5cfd9a8e24e 100644
|
||||||
|
--- a/pkg/kubelet/cm/cpumanager/policy_static_test.go
|
||||||
|
+++ b/pkg/kubelet/cm/cpumanager/policy_static_test.go
|
||||||
|
@@ -747,7 +747,8 @@ func TestStaticPolicyStartWithResvList(t *testing.T) {
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestStaticPolicyAddWithResvList(t *testing.T) {
|
||||||
|
-
|
||||||
|
+ infraPod := makePod("fakePod", "fakeContainer2", "200m", "200m")
|
||||||
|
+ infraPod.Namespace = "kube-system"
|
||||||
|
testCases := []staticPolicyTestWithResvList{
|
||||||
|
{
|
||||||
|
description: "GuPodSingleCore, SingleSocketHT, ExpectError",
|
||||||
|
@@ -789,6 +790,22 @@ func TestStaticPolicyAddWithResvList(t *testing.T) {
|
||||||
|
expCPUAlloc: true,
|
||||||
|
expCSet: cpuset.NewCPUSet(4, 5),
|
||||||
|
},
|
||||||
|
+ {
|
||||||
|
+ description: "InfraPod, SingleSocketHT, ExpectAllocReserved",
|
||||||
|
+ topo: topoSingleSocketHT,
|
||||||
|
+ numReservedCPUs: 2,
|
||||||
|
+ reserved: cpuset.NewCPUSet(0, 1),
|
||||||
|
+ stAssignments: state.ContainerCPUAssignments{
|
||||||
|
+ "fakePod": map[string]cpuset.CPUSet{
|
||||||
|
+ "fakeContainer100": cpuset.NewCPUSet(2, 3, 6, 7),
|
||||||
|
+ },
|
||||||
|
+ },
|
||||||
|
+ stDefaultCPUSet: cpuset.NewCPUSet(4, 5),
|
||||||
|
+ pod: infraPod,
|
||||||
|
+ expErr: nil,
|
||||||
|
+ expCPUAlloc: true,
|
||||||
|
+ expCSet: cpuset.NewCPUSet(0, 1),
|
||||||
|
+ },
|
||||||
|
}
|
||||||
|
|
||||||
|
testExcl := true
|
||||||
|
--
|
||||||
|
2.17.1
|
||||||
|
|
@ -0,0 +1,526 @@
|
|||||||
|
From 05db95e27509e60022a62c1001be2191ba42d2a3 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Jim Gauld <james.gauld@windriver.com>
|
||||||
|
Date: Fri, 3 Sep 2021 17:30:31 -0400
|
||||||
|
Subject: [PATCH 4/6] kubelet cpumanager introduce concept of isolated CPUs
|
||||||
|
|
||||||
|
This introduces the concept of "isolated CPUs", which are CPUs that
|
||||||
|
have been isolated at the kernel level via the "isolcpus" kernel boot
|
||||||
|
parameter.
|
||||||
|
|
||||||
|
When starting the kubelet process, two separate sets of reserved CPUs
|
||||||
|
may be specified. With this change CPUs reserved via
|
||||||
|
'--system-reserved=cpu' will be used for infrastructure pods while the
|
||||||
|
isolated CPUs should be reserved via '--kube-reserved=cpu' to cause
|
||||||
|
kubelet to skip over them for "normal" CPU resource tracking. The
|
||||||
|
kubelet code will double-check that the specified isolated CPUs match
|
||||||
|
what the kernel exposes in "/sys/devices/system/cpu/isolated".
|
||||||
|
|
||||||
|
A plugin (outside the scope of this commit) will expose the isolated
|
||||||
|
CPUs to kubelet via the device plugin API.
|
||||||
|
|
||||||
|
If a pod specifies some number of "isolcpus" resources, the device
|
||||||
|
manager will allocate them. In this code we check whether such
|
||||||
|
resources have been allocated, and if so we set the container cpuset to
|
||||||
|
the isolated CPUs. This does mean that it really only makes sense to
|
||||||
|
specify "isolcpus" resources for best-effort or burstable pods, not for
|
||||||
|
guaranteed ones since that would throw off the accounting code. In
|
||||||
|
order to ensure the accounting still works as designed, if "isolcpus"
|
||||||
|
are specified for guaranteed pods, the affinity will be set to the
|
||||||
|
non-isolated CPUs.
|
||||||
|
|
||||||
|
Signed-off-by: Jim Gauld <james.gauld@windriver.com>
|
||||||
|
Co-authored-by: Chris Friesen <chris.friesen@windriver.com>
|
||||||
|
---
|
||||||
|
pkg/kubelet/cm/container_manager_linux.go | 1 +
|
||||||
|
pkg/kubelet/cm/cpumanager/cpu_manager.go | 31 ++++++-
|
||||||
|
pkg/kubelet/cm/cpumanager/cpu_manager_test.go | 13 ++-
|
||||||
|
pkg/kubelet/cm/cpumanager/policy_static.go | 84 +++++++++++++++++--
|
||||||
|
.../cm/cpumanager/policy_static_test.go | 44 ++++++++--
|
||||||
|
5 files changed, 155 insertions(+), 18 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/pkg/kubelet/cm/container_manager_linux.go b/pkg/kubelet/cm/container_manager_linux.go
|
||||||
|
index eeea6a8b7e4..4f250b2a6ca 100644
|
||||||
|
--- a/pkg/kubelet/cm/container_manager_linux.go
|
||||||
|
+++ b/pkg/kubelet/cm/container_manager_linux.go
|
||||||
|
@@ -333,6 +333,7 @@ func NewContainerManager(mountUtil mount.Interface, cadvisorInterface cadvisor.I
|
||||||
|
cm.GetNodeAllocatableReservation(),
|
||||||
|
nodeConfig.KubeletRootDir,
|
||||||
|
cm.topologyManager,
|
||||||
|
+ cm.deviceManager,
|
||||||
|
)
|
||||||
|
if err != nil {
|
||||||
|
klog.Errorf("failed to initialize cpu manager: %v", err)
|
||||||
|
diff --git a/pkg/kubelet/cm/cpumanager/cpu_manager.go b/pkg/kubelet/cm/cpumanager/cpu_manager.go
|
||||||
|
index a0586c7b860..8470431c07c 100644
|
||||||
|
--- a/pkg/kubelet/cm/cpumanager/cpu_manager.go
|
||||||
|
+++ b/pkg/kubelet/cm/cpumanager/cpu_manager.go
|
||||||
|
@@ -21,6 +21,8 @@ import (
|
||||||
|
"math"
|
||||||
|
"sync"
|
||||||
|
"time"
|
||||||
|
+ "strings"
|
||||||
|
+ "io/ioutil"
|
||||||
|
|
||||||
|
cadvisorapi "github.com/google/cadvisor/info/v1"
|
||||||
|
v1 "k8s.io/api/core/v1"
|
||||||
|
@@ -34,6 +36,7 @@ import (
|
||||||
|
"k8s.io/kubernetes/pkg/kubelet/cm/cpuset"
|
||||||
|
"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager"
|
||||||
|
"k8s.io/kubernetes/pkg/kubelet/config"
|
||||||
|
+ "k8s.io/kubernetes/pkg/kubelet/cm/devicemanager"
|
||||||
|
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
|
||||||
|
"k8s.io/kubernetes/pkg/kubelet/status"
|
||||||
|
v1qos "k8s.io/kubernetes/pkg/apis/core/v1/helper/qos"
|
||||||
|
@@ -51,6 +54,25 @@ type policyName string
|
||||||
|
// cpuManagerStateFileName is the file name where cpu manager stores its state
|
||||||
|
const cpuManagerStateFileName = "cpu_manager_state"
|
||||||
|
|
||||||
|
+// get the system-level isolated CPUs
|
||||||
|
+func getIsolcpus() cpuset.CPUSet {
|
||||||
|
+ dat, err := ioutil.ReadFile("/sys/devices/system/cpu/isolated")
|
||||||
|
+ if err != nil {
|
||||||
|
+ klog.Errorf("[cpumanager] unable to read sysfs isolcpus subdir")
|
||||||
|
+ return cpuset.NewCPUSet()
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ // The isolated cpus string ends in a newline
|
||||||
|
+ cpustring := strings.TrimSuffix(string(dat), "\n")
|
||||||
|
+ cset, err := cpuset.Parse(cpustring)
|
||||||
|
+ if err != nil {
|
||||||
|
+ klog.Errorf("[cpumanager] unable to parse sysfs isolcpus string to cpuset")
|
||||||
|
+ return cpuset.NewCPUSet()
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ return cset
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
// Manager interface provides methods for Kubelet to manage pod cpus.
|
||||||
|
type Manager interface {
|
||||||
|
// Start is called during Kubelet initialization.
|
||||||
|
@@ -136,7 +158,7 @@ func (s *sourcesReadyStub) AddSource(source string) {}
|
||||||
|
func (s *sourcesReadyStub) AllReady() bool { return true }
|
||||||
|
|
||||||
|
// NewManager creates new cpu manager based on provided policy
|
||||||
|
-func NewManager(cpuPolicyName string, reconcilePeriod time.Duration, machineInfo *cadvisorapi.MachineInfo, specificCPUs cpuset.CPUSet, nodeAllocatableReservation v1.ResourceList, stateFileDirectory string, affinity topologymanager.Store) (Manager, error) {
|
||||||
|
+func NewManager(cpuPolicyName string, reconcilePeriod time.Duration, machineInfo *cadvisorapi.MachineInfo, specificCPUs cpuset.CPUSet, nodeAllocatableReservation v1.ResourceList, stateFileDirectory string, affinity topologymanager.Store, deviceManager devicemanager.Manager) (Manager, error) {
|
||||||
|
var topo *topology.CPUTopology
|
||||||
|
var policy Policy
|
||||||
|
|
||||||
|
@@ -173,8 +195,11 @@ func NewManager(cpuPolicyName string, reconcilePeriod time.Duration, machineInfo
|
||||||
|
// NOTE: Set excludeReserved unconditionally to exclude reserved CPUs from default cpuset.
|
||||||
|
// This variable is primarily to make testing easier.
|
||||||
|
excludeReserved := true
|
||||||
|
- policy, err = NewStaticPolicy(topo, numReservedCPUs, specificCPUs, affinity, excludeReserved)
|
||||||
|
-
|
||||||
|
+ // isolCPUs is the set of kernel-isolated CPUs. They should be a subset of specificCPUs or
|
||||||
|
+ // of the CPUs that NewStaticPolicy() will pick if numReservedCPUs is set. It's only in the
|
||||||
|
+ // argument list here for ease of testing, it's really internal to the policy.
|
||||||
|
+ isolCPUs := getIsolcpus()
|
||||||
|
+ policy, err = NewStaticPolicy(topo, numReservedCPUs, specificCPUs, isolCPUs, affinity, deviceManager, excludeReserved)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("new static policy error: %v", err)
|
||||||
|
}
|
||||||
|
diff --git a/pkg/kubelet/cm/cpumanager/cpu_manager_test.go b/pkg/kubelet/cm/cpumanager/cpu_manager_test.go
|
||||||
|
index a155791e75f..7a6ea90b3c5 100644
|
||||||
|
--- a/pkg/kubelet/cm/cpumanager/cpu_manager_test.go
|
||||||
|
+++ b/pkg/kubelet/cm/cpumanager/cpu_manager_test.go
|
||||||
|
@@ -38,6 +38,7 @@ import (
|
||||||
|
"k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/topology"
|
||||||
|
"k8s.io/kubernetes/pkg/kubelet/cm/cpuset"
|
||||||
|
"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager"
|
||||||
|
+ "k8s.io/kubernetes/pkg/kubelet/cm/devicemanager"
|
||||||
|
)
|
||||||
|
|
||||||
|
type mockState struct {
|
||||||
|
@@ -211,6 +212,7 @@ func makeMultiContainerPod(initCPUs, appCPUs []struct{ request, limit string })
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCPUManagerAdd(t *testing.T) {
|
||||||
|
+ testDM, _ := devicemanager.NewManagerStub()
|
||||||
|
testExcl := false
|
||||||
|
testPolicy, _ := NewStaticPolicy(
|
||||||
|
&topology.CPUTopology{
|
||||||
|
@@ -226,7 +228,9 @@ func TestCPUManagerAdd(t *testing.T) {
|
||||||
|
},
|
||||||
|
0,
|
||||||
|
cpuset.NewCPUSet(),
|
||||||
|
+ cpuset.NewCPUSet(),
|
||||||
|
topologymanager.NewFakeManager(),
|
||||||
|
+ testDM,
|
||||||
|
testExcl)
|
||||||
|
testCases := []struct {
|
||||||
|
description string
|
||||||
|
@@ -483,8 +487,9 @@ func TestCPUManagerAddWithInitContainers(t *testing.T) {
|
||||||
|
}
|
||||||
|
|
||||||
|
testExcl := false
|
||||||
|
+ testDM, _ := devicemanager.NewManagerStub()
|
||||||
|
for _, testCase := range testCases {
|
||||||
|
- policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.NewCPUSet(), topologymanager.NewFakeManager(), testExcl)
|
||||||
|
+ policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.NewCPUSet(), cpuset.NewCPUSet(), topologymanager.NewFakeManager(), testDM, testExcl)
|
||||||
|
|
||||||
|
state := &mockState{
|
||||||
|
assignments: testCase.stAssignments,
|
||||||
|
@@ -636,7 +641,8 @@ func TestCPUManagerGenerate(t *testing.T) {
|
||||||
|
}
|
||||||
|
defer os.RemoveAll(sDir)
|
||||||
|
|
||||||
|
- mgr, err := NewManager(testCase.cpuPolicyName, 5*time.Second, machineInfo, cpuset.NewCPUSet(), testCase.nodeAllocatableReservation, sDir, topologymanager.NewFakeManager())
|
||||||
|
+ testDM, err := devicemanager.NewManagerStub()
|
||||||
|
+ mgr, err := NewManager(testCase.cpuPolicyName, 5*time.Second, machineInfo, cpuset.NewCPUSet(), testCase.nodeAllocatableReservation, sDir, topologymanager.NewFakeManager(), testDM)
|
||||||
|
if testCase.expectedError != nil {
|
||||||
|
if !strings.Contains(err.Error(), testCase.expectedError.Error()) {
|
||||||
|
t.Errorf("Unexpected error message. Have: %s wants %s", err.Error(), testCase.expectedError.Error())
|
||||||
|
@@ -991,6 +997,7 @@ func TestReconcileState(t *testing.T) {
|
||||||
|
// the following tests are with --reserved-cpus configured
|
||||||
|
func TestCPUManagerAddWithResvList(t *testing.T) {
|
||||||
|
testExcl := false
|
||||||
|
+ testDM, _ := devicemanager.NewManagerStub()
|
||||||
|
testPolicy, _ := NewStaticPolicy(
|
||||||
|
&topology.CPUTopology{
|
||||||
|
NumCPUs: 4,
|
||||||
|
@@ -1005,7 +1012,9 @@ func TestCPUManagerAddWithResvList(t *testing.T) {
|
||||||
|
},
|
||||||
|
1,
|
||||||
|
cpuset.NewCPUSet(0),
|
||||||
|
+ cpuset.NewCPUSet(),
|
||||||
|
topologymanager.NewFakeManager(),
|
||||||
|
+ testDM,
|
||||||
|
testExcl)
|
||||||
|
testCases := []struct {
|
||||||
|
description string
|
||||||
|
diff --git a/pkg/kubelet/cm/cpumanager/policy_static.go b/pkg/kubelet/cm/cpumanager/policy_static.go
|
||||||
|
index ab3206c5dc4..4acd5609748 100644
|
||||||
|
--- a/pkg/kubelet/cm/cpumanager/policy_static.go
|
||||||
|
+++ b/pkg/kubelet/cm/cpumanager/policy_static.go
|
||||||
|
@@ -18,6 +18,7 @@ package cpumanager
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
+ "strconv"
|
||||||
|
|
||||||
|
v1 "k8s.io/api/core/v1"
|
||||||
|
"k8s.io/klog/v2"
|
||||||
|
@@ -28,6 +29,7 @@ import (
|
||||||
|
"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager"
|
||||||
|
"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager/bitmask"
|
||||||
|
"k8s.io/kubernetes/pkg/kubelet/util/format"
|
||||||
|
+ "k8s.io/kubernetes/pkg/kubelet/cm/devicemanager"
|
||||||
|
)
|
||||||
|
|
||||||
|
// PolicyStatic is the name of the static policy
|
||||||
|
@@ -81,6 +83,10 @@ type staticPolicy struct {
|
||||||
|
topology *topology.CPUTopology
|
||||||
|
// set of CPUs that is not available for exclusive assignment
|
||||||
|
reserved cpuset.CPUSet
|
||||||
|
+ // subset of reserved CPUs with isolcpus attribute
|
||||||
|
+ isolcpus cpuset.CPUSet
|
||||||
|
+ // parent containerManager, used to get device list
|
||||||
|
+ deviceManager devicemanager.Manager
|
||||||
|
// If true, default CPUSet should exclude reserved CPUs
|
||||||
|
excludeReserved bool
|
||||||
|
// topology manager reference to get container Topology affinity
|
||||||
|
@@ -95,7 +101,7 @@ var _ Policy = &staticPolicy{}
|
||||||
|
// NewStaticPolicy returns a CPU manager policy that does not change CPU
|
||||||
|
// assignments for exclusively pinned guaranteed containers after the main
|
||||||
|
// container process starts.
|
||||||
|
-func NewStaticPolicy(topology *topology.CPUTopology, numReservedCPUs int, reservedCPUs cpuset.CPUSet, affinity topologymanager.Store, excludeReserved bool) (Policy, error) {
|
||||||
|
+func NewStaticPolicy(topology *topology.CPUTopology, numReservedCPUs int, reservedCPUs cpuset.CPUSet, isolCPUs cpuset.CPUSet, affinity topologymanager.Store, deviceManager devicemanager.Manager, excludeReserved bool) (Policy, error) {
|
||||||
|
allCPUs := topology.CPUDetails.CPUs()
|
||||||
|
var reserved cpuset.CPUSet
|
||||||
|
if reservedCPUs.Size() > 0 {
|
||||||
|
@@ -116,9 +122,17 @@ func NewStaticPolicy(topology *topology.CPUTopology, numReservedCPUs int, reserv
|
||||||
|
|
||||||
|
klog.Infof("[cpumanager] reserved %d CPUs (\"%s\") not available for exclusive assignment", reserved.Size(), reserved)
|
||||||
|
|
||||||
|
+ if !isolCPUs.IsSubsetOf(reserved) {
|
||||||
|
+ klog.Errorf("[cpumanager] isolCPUs %v is not a subset of reserved %v", isolCPUs, reserved)
|
||||||
|
+ reserved = reserved.Union(isolCPUs)
|
||||||
|
+ klog.Warningf("[cpumanager] mismatch isolCPUs %v, force reserved %v", isolCPUs, reserved)
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
return &staticPolicy{
|
||||||
|
topology: topology,
|
||||||
|
reserved: reserved,
|
||||||
|
+ isolcpus: isolCPUs,
|
||||||
|
+ deviceManager: deviceManager,
|
||||||
|
excludeReserved: excludeReserved,
|
||||||
|
affinity: affinity,
|
||||||
|
cpusToReuse: make(map[string]cpuset.CPUSet),
|
||||||
|
@@ -155,8 +169,8 @@ func (p *staticPolicy) validateState(s state.State) error {
|
||||||
|
} else {
|
||||||
|
s.SetDefaultCPUSet(allCPUs)
|
||||||
|
}
|
||||||
|
- klog.Infof("[cpumanager] static policy: CPUSet: allCPUs:%v, reserved:%v, default:%v\n",
|
||||||
|
- allCPUs, p.reserved, s.GetDefaultCPUSet())
|
||||||
|
+ klog.Infof("[cpumanager] static policy: CPUSet: allCPUs:%v, reserved:%v, isolcpus:%v, default:%v\n",
|
||||||
|
+ allCPUs, p.reserved, p.isolcpus, s.GetDefaultCPUSet())
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
@@ -249,12 +263,12 @@ func (p *staticPolicy) Allocate(s state.State, pod *v1.Pod, container *v1.Contai
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
- cpuset := p.reserved
|
||||||
|
+ cpuset := p.reserved.Clone().Difference(p.isolcpus)
|
||||||
|
if cpuset.IsEmpty() {
|
||||||
|
// If this happens then someone messed up.
|
||||||
|
return fmt.Errorf("[cpumanager] static policy: reserved container unable to allocate cpus " +
|
||||||
|
- "(namespace: %s, pod UID: %s, pod: %s, container: %s); cpuset=%v, reserved:%v",
|
||||||
|
- pod.Namespace, string(pod.UID), pod.Name, container.Name, cpuset, p.reserved)
|
||||||
|
+ "(namespace: %s, pod UID: %s, pod: %s, container: %s); cpuset=%v, reserved:%v, isolcpus:%v",
|
||||||
|
+ pod.Namespace, string(pod.UID), pod.Name, container.Name, cpuset, p.reserved, p.isolcpus)
|
||||||
|
}
|
||||||
|
s.SetCPUSet(string(pod.UID), container.Name, cpuset)
|
||||||
|
klog.Infof("[cpumanager] static policy: reserved: AddContainer " +
|
||||||
|
@@ -285,8 +299,37 @@ func (p *staticPolicy) Allocate(s state.State, pod *v1.Pod, container *v1.Contai
|
||||||
|
}
|
||||||
|
s.SetCPUSet(string(pod.UID), container.Name, cpuset)
|
||||||
|
p.updateCPUsToReuse(pod, container, cpuset)
|
||||||
|
+ klog.Infof("[cpumanager] guaranteed: AddContainer " +
|
||||||
|
+ "(namespace: %s, pod UID: %s, pod: %s, container: %s); numCPUS=%d, cpuset=%v",
|
||||||
|
+ pod.Namespace, string(pod.UID), pod.Name, container.Name, numCPUs, cpuset)
|
||||||
|
+ return nil
|
||||||
|
+ }
|
||||||
|
|
||||||
|
+ if isolcpus := p.podIsolCPUs(pod, container); isolcpus.Size() > 0 {
|
||||||
|
+ // container has requested isolated CPUs
|
||||||
|
+ if set, ok := s.GetCPUSet(string(pod.UID), container.Name); ok {
|
||||||
|
+ if set.Equals(isolcpus) {
|
||||||
|
+ klog.Infof("[cpumanager] isolcpus container already present in state, skipping " +
|
||||||
|
+ "(namespace: %s, pod UID: %s, pod: %s, container: %s)",
|
||||||
|
+ pod.Namespace, string(pod.UID), pod.Name, container.Name)
|
||||||
|
+ return nil
|
||||||
|
+ } else {
|
||||||
|
+ klog.Infof("[cpumanager] isolcpus container state has cpus %v, should be %v" +
|
||||||
|
+ "(namespace: %s, pod UID: %s, pod: %s, container: %s)",
|
||||||
|
+ isolcpus, set, pod.Namespace, string(pod.UID), pod.Name, container.Name)
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+ // Note that we do not do anything about init containers here.
|
||||||
|
+ // It looks like devices are allocated per-pod based on effective requests/limits
|
||||||
|
+ // and extra devices from initContainers are not freed up when the regular containers start.
|
||||||
|
+ // TODO: confirm this is still true for 1.20
|
||||||
|
+ s.SetCPUSet(string(pod.UID), container.Name, isolcpus)
|
||||||
|
+ klog.Infof("[cpumanager] isolcpus: AddContainer " +
|
||||||
|
+ "(namespace: %s, pod UID: %s, pod: %s, container: %s); cpuset=%v",
|
||||||
|
+ pod.Namespace, string(pod.UID), pod.Name, container.Name, isolcpus)
|
||||||
|
+ return nil
|
||||||
|
}
|
||||||
|
+
|
||||||
|
// container belongs in the shared pool (nothing to do; use default cpuset)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
@@ -567,4 +610,33 @@ func isKubeInfra(pod *v1.Pod) bool {
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+// get the isolated CPUs (if any) from the devices associated with a specific container
|
||||||
|
+func (p *staticPolicy) podIsolCPUs(pod *v1.Pod, container *v1.Container) cpuset.CPUSet {
|
||||||
|
+ // NOTE: This is required for TestStaticPolicyAdd() since makePod() does
|
||||||
|
+ // not create UID. We also need a way to properly stub devicemanager.
|
||||||
|
+ if len(string(pod.UID)) == 0 {
|
||||||
|
+ return cpuset.NewCPUSet()
|
||||||
|
+ }
|
||||||
|
+ devices := p.deviceManager.GetDevices(string(pod.UID), container.Name)
|
||||||
|
+ for _, dev := range devices {
|
||||||
|
+ // this resource name needs to match the isolcpus device plugin
|
||||||
|
+ if dev.ResourceName == "windriver.com/isolcpus" {
|
||||||
|
+ cpuStrList := dev.DeviceIds
|
||||||
|
+ if len(cpuStrList) > 0 {
|
||||||
|
+ cpuSet := cpuset.NewCPUSet()
|
||||||
|
+ // loop over the list of strings, convert each one to int, add to cpuset
|
||||||
|
+ for _, cpuStr := range cpuStrList {
|
||||||
|
+ cpu, err := strconv.Atoi(cpuStr)
|
||||||
|
+ if err != nil {
|
||||||
|
+ panic(err)
|
||||||
|
+ }
|
||||||
|
+ cpuSet = cpuSet.Union(cpuset.NewCPUSet(cpu))
|
||||||
|
+ }
|
||||||
|
+ return cpuSet
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+ return cpuset.NewCPUSet()
|
||||||
|
}
|
||||||
|
\ No newline at end of file
|
||||||
|
diff --git a/pkg/kubelet/cm/cpumanager/policy_static_test.go b/pkg/kubelet/cm/cpumanager/policy_static_test.go
|
||||||
|
index 5cfd9a8e24e..8307aa1e3f0 100644
|
||||||
|
--- a/pkg/kubelet/cm/cpumanager/policy_static_test.go
|
||||||
|
+++ b/pkg/kubelet/cm/cpumanager/policy_static_test.go
|
||||||
|
@@ -27,6 +27,7 @@ import (
|
||||||
|
"k8s.io/kubernetes/pkg/kubelet/cm/cpuset"
|
||||||
|
"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager"
|
||||||
|
"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager/bitmask"
|
||||||
|
+ "k8s.io/kubernetes/pkg/kubelet/cm/devicemanager"
|
||||||
|
)
|
||||||
|
|
||||||
|
type staticPolicyTest struct {
|
||||||
|
@@ -45,8 +46,9 @@ type staticPolicyTest struct {
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestStaticPolicyName(t *testing.T) {
|
||||||
|
+ testDM, _ := devicemanager.NewManagerStub()
|
||||||
|
testExcl := false
|
||||||
|
- policy, _ := NewStaticPolicy(topoSingleSocketHT, 1, cpuset.NewCPUSet(), topologymanager.NewFakeManager(), testExcl)
|
||||||
|
+ policy, _ := NewStaticPolicy(topoSingleSocketHT, 1, cpuset.NewCPUSet(), cpuset.NewCPUSet(), topologymanager.NewFakeManager(), testDM, testExcl)
|
||||||
|
|
||||||
|
policyName := policy.Name()
|
||||||
|
if policyName != "static" {
|
||||||
|
@@ -56,6 +58,7 @@ func TestStaticPolicyName(t *testing.T) {
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestStaticPolicyStart(t *testing.T) {
|
||||||
|
+ testDM, _ := devicemanager.NewManagerStub()
|
||||||
|
testCases := []staticPolicyTest{
|
||||||
|
{
|
||||||
|
description: "non-corrupted state",
|
||||||
|
@@ -131,7 +134,7 @@ func TestStaticPolicyStart(t *testing.T) {
|
||||||
|
}
|
||||||
|
for _, testCase := range testCases {
|
||||||
|
t.Run(testCase.description, func(t *testing.T) {
|
||||||
|
- p, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.NewCPUSet(), topologymanager.NewFakeManager(), testCase.excludeReserved)
|
||||||
|
+ p, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.NewCPUSet(), cpuset.NewCPUSet(), topologymanager.NewFakeManager(), testDM, testCase.excludeReserved)
|
||||||
|
policy := p.(*staticPolicy)
|
||||||
|
st := &mockState{
|
||||||
|
assignments: testCase.stAssignments,
|
||||||
|
@@ -179,6 +182,7 @@ func TestStaticPolicyAdd(t *testing.T) {
|
||||||
|
largeTopoSock0CPUSet := largeTopoSock0Builder.Result()
|
||||||
|
largeTopoSock1CPUSet := largeTopoSock1Builder.Result()
|
||||||
|
|
||||||
|
+ testDM, _ := devicemanager.NewManagerStub()
|
||||||
|
testCases := []staticPolicyTest{
|
||||||
|
{
|
||||||
|
description: "GuPodSingleCore, SingleSocketHT, ExpectError",
|
||||||
|
@@ -447,7 +451,7 @@ func TestStaticPolicyAdd(t *testing.T) {
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, testCase := range testCases {
|
||||||
|
- policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.NewCPUSet(), topologymanager.NewFakeManager(), testCase.excludeReserved)
|
||||||
|
+ policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.NewCPUSet(), cpuset.NewCPUSet(), topologymanager.NewFakeManager(), testDM, testCase.excludeReserved)
|
||||||
|
|
||||||
|
st := &mockState{
|
||||||
|
assignments: testCase.stAssignments,
|
||||||
|
@@ -490,6 +494,7 @@ func TestStaticPolicyAdd(t *testing.T) {
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestStaticPolicyRemove(t *testing.T) {
|
||||||
|
+ testDM, _ := devicemanager.NewManagerStub()
|
||||||
|
excludeReserved := false
|
||||||
|
testCases := []staticPolicyTest{
|
||||||
|
{
|
||||||
|
@@ -549,7 +554,7 @@ func TestStaticPolicyRemove(t *testing.T) {
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, testCase := range testCases {
|
||||||
|
- policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.NewCPUSet(), topologymanager.NewFakeManager(), excludeReserved)
|
||||||
|
+ policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.NewCPUSet(), cpuset.NewCPUSet(), topologymanager.NewFakeManager(), testDM, excludeReserved)
|
||||||
|
|
||||||
|
st := &mockState{
|
||||||
|
assignments: testCase.stAssignments,
|
||||||
|
@@ -571,6 +576,7 @@ func TestStaticPolicyRemove(t *testing.T) {
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestTopologyAwareAllocateCPUs(t *testing.T) {
|
||||||
|
+ testDM, _ := devicemanager.NewManagerStub()
|
||||||
|
excludeReserved := false
|
||||||
|
testCases := []struct {
|
||||||
|
description string
|
||||||
|
@@ -640,7 +646,7 @@ func TestTopologyAwareAllocateCPUs(t *testing.T) {
|
||||||
|
},
|
||||||
|
}
|
||||||
|
for _, tc := range testCases {
|
||||||
|
- p, _ := NewStaticPolicy(tc.topo, 0, cpuset.NewCPUSet(), topologymanager.NewFakeManager(), excludeReserved)
|
||||||
|
+ p, _ := NewStaticPolicy(tc.topo, 0, cpuset.NewCPUSet(), cpuset.NewCPUSet(), topologymanager.NewFakeManager(), testDM, excludeReserved)
|
||||||
|
policy := p.(*staticPolicy)
|
||||||
|
st := &mockState{
|
||||||
|
assignments: tc.stAssignments,
|
||||||
|
@@ -673,6 +679,7 @@ type staticPolicyTestWithResvList struct {
|
||||||
|
topo *topology.CPUTopology
|
||||||
|
numReservedCPUs int
|
||||||
|
reserved cpuset.CPUSet
|
||||||
|
+ isolcpus cpuset.CPUSet
|
||||||
|
stAssignments state.ContainerCPUAssignments
|
||||||
|
stDefaultCPUSet cpuset.CPUSet
|
||||||
|
pod *v1.Pod
|
||||||
|
@@ -713,9 +720,10 @@ func TestStaticPolicyStartWithResvList(t *testing.T) {
|
||||||
|
},
|
||||||
|
}
|
||||||
|
testExcl := false
|
||||||
|
+ testDM, _ := devicemanager.NewManagerStub()
|
||||||
|
for _, testCase := range testCases {
|
||||||
|
t.Run(testCase.description, func(t *testing.T) {
|
||||||
|
- p, err := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, testCase.reserved, topologymanager.NewFakeManager(), testExcl)
|
||||||
|
+ p, err := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, testCase.reserved, cpuset.NewCPUSet(), topologymanager.NewFakeManager(), testDM, testExcl)
|
||||||
|
if !reflect.DeepEqual(err, testCase.expNewErr) {
|
||||||
|
t.Errorf("StaticPolicy Start() error (%v). expected error: %v but got: %v",
|
||||||
|
testCase.description, testCase.expNewErr, err)
|
||||||
|
@@ -755,6 +763,7 @@ func TestStaticPolicyAddWithResvList(t *testing.T) {
|
||||||
|
topo: topoSingleSocketHT,
|
||||||
|
numReservedCPUs: 1,
|
||||||
|
reserved: cpuset.NewCPUSet(0),
|
||||||
|
+ isolcpus: cpuset.NewCPUSet(),
|
||||||
|
stAssignments: state.ContainerCPUAssignments{},
|
||||||
|
stDefaultCPUSet: cpuset.NewCPUSet(1, 2, 3, 4, 5, 6, 7),
|
||||||
|
pod: makePod("fakePod", "fakeContainer2", "8000m", "8000m"),
|
||||||
|
@@ -767,6 +776,7 @@ func TestStaticPolicyAddWithResvList(t *testing.T) {
|
||||||
|
topo: topoSingleSocketHT,
|
||||||
|
numReservedCPUs: 2,
|
||||||
|
reserved: cpuset.NewCPUSet(0, 1),
|
||||||
|
+ isolcpus: cpuset.NewCPUSet(),
|
||||||
|
stAssignments: state.ContainerCPUAssignments{},
|
||||||
|
stDefaultCPUSet: cpuset.NewCPUSet(2, 3, 4, 5, 6, 7),
|
||||||
|
pod: makePod("fakePod", "fakeContainer2", "1000m", "1000m"),
|
||||||
|
@@ -779,6 +789,7 @@ func TestStaticPolicyAddWithResvList(t *testing.T) {
|
||||||
|
topo: topoSingleSocketHT,
|
||||||
|
numReservedCPUs: 2,
|
||||||
|
reserved: cpuset.NewCPUSet(0, 1),
|
||||||
|
+ isolcpus: cpuset.NewCPUSet(),
|
||||||
|
stAssignments: state.ContainerCPUAssignments{
|
||||||
|
"fakePod": map[string]cpuset.CPUSet{
|
||||||
|
"fakeContainer100": cpuset.NewCPUSet(2, 3, 6, 7),
|
||||||
|
@@ -795,6 +806,7 @@ func TestStaticPolicyAddWithResvList(t *testing.T) {
|
||||||
|
topo: topoSingleSocketHT,
|
||||||
|
numReservedCPUs: 2,
|
||||||
|
reserved: cpuset.NewCPUSet(0, 1),
|
||||||
|
+ isolcpus: cpuset.NewCPUSet(),
|
||||||
|
stAssignments: state.ContainerCPUAssignments{
|
||||||
|
"fakePod": map[string]cpuset.CPUSet{
|
||||||
|
"fakeContainer100": cpuset.NewCPUSet(2, 3, 6, 7),
|
||||||
|
@@ -806,11 +818,29 @@ func TestStaticPolicyAddWithResvList(t *testing.T) {
|
||||||
|
expCPUAlloc: true,
|
||||||
|
expCSet: cpuset.NewCPUSet(0, 1),
|
||||||
|
},
|
||||||
|
+ {
|
||||||
|
+ description: "InfraPod, SingleSocketHT, Isolcpus, ExpectAllocReserved",
|
||||||
|
+ topo: topoSingleSocketHT,
|
||||||
|
+ numReservedCPUs: 2,
|
||||||
|
+ reserved: cpuset.NewCPUSet(0, 1),
|
||||||
|
+ isolcpus: cpuset.NewCPUSet(1),
|
||||||
|
+ stAssignments: state.ContainerCPUAssignments{
|
||||||
|
+ "fakePod": map[string]cpuset.CPUSet{
|
||||||
|
+ "fakeContainer100": cpuset.NewCPUSet(2, 3, 6, 7),
|
||||||
|
+ },
|
||||||
|
+ },
|
||||||
|
+ stDefaultCPUSet: cpuset.NewCPUSet(4, 5),
|
||||||
|
+ pod: infraPod,
|
||||||
|
+ expErr: nil,
|
||||||
|
+ expCPUAlloc: true,
|
||||||
|
+ expCSet: cpuset.NewCPUSet(0),
|
||||||
|
+ },
|
||||||
|
}
|
||||||
|
|
||||||
|
testExcl := true
|
||||||
|
+ testDM, _ := devicemanager.NewManagerStub()
|
||||||
|
for _, testCase := range testCases {
|
||||||
|
- policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, testCase.reserved, topologymanager.NewFakeManager(), testExcl)
|
||||||
|
+ policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, testCase.reserved, testCase.isolcpus, topologymanager.NewFakeManager(), testDM, testExcl)
|
||||||
|
|
||||||
|
st := &mockState{
|
||||||
|
assignments: testCase.stAssignments,
|
||||||
|
--
|
||||||
|
2.17.1
|
||||||
|
|
@ -0,0 +1,313 @@
|
|||||||
|
From c85d0d1a42fc5989f2e989daf46fdedeebf486a4 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Jim Gauld <james.gauld@windriver.com>
|
||||||
|
Date: Fri, 3 Sep 2021 15:31:31 -0400
|
||||||
|
Subject: [PATCH 2/6] kubelet cpumanager keep normal containers off reserved
|
||||||
|
CPUs
|
||||||
|
|
||||||
|
When starting the kubelet process, two separate sets of reserved CPUs
|
||||||
|
may be specified. With this change CPUs reserved via '--system-reserved=cpu'
|
||||||
|
or '--kube-reserved=cpu' will be ignored by kubernetes itself. A small
|
||||||
|
tweak to the default CPU affinity ensures that "normal" Kubernetes
|
||||||
|
pods won't run on the reserved CPUs.
|
||||||
|
|
||||||
|
Signed-off-by: Jim Gauld <james.gauld@windriver.com>
|
||||||
|
---
|
||||||
|
pkg/kubelet/cm/cpumanager/cpu_manager.go | 6 +++-
|
||||||
|
pkg/kubelet/cm/cpumanager/cpu_manager_test.go | 11 ++++--
|
||||||
|
pkg/kubelet/cm/cpumanager/policy_static.go | 29 ++++++++++++---
|
||||||
|
.../cm/cpumanager/policy_static_test.go | 35 +++++++++++++------
|
||||||
|
4 files changed, 62 insertions(+), 19 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/pkg/kubelet/cm/cpumanager/cpu_manager.go b/pkg/kubelet/cm/cpumanager/cpu_manager.go
|
||||||
|
index 88cfbc1fa83..a0586c7b860 100644
|
||||||
|
--- a/pkg/kubelet/cm/cpumanager/cpu_manager.go
|
||||||
|
+++ b/pkg/kubelet/cm/cpumanager/cpu_manager.go
|
||||||
|
@@ -170,7 +170,11 @@ func NewManager(cpuPolicyName string, reconcilePeriod time.Duration, machineInfo
|
||||||
|
// exclusively allocated.
|
||||||
|
reservedCPUsFloat := float64(reservedCPUs.MilliValue()) / 1000
|
||||||
|
numReservedCPUs := int(math.Ceil(reservedCPUsFloat))
|
||||||
|
- policy, err = NewStaticPolicy(topo, numReservedCPUs, specificCPUs, affinity)
|
||||||
|
+ // NOTE: Set excludeReserved unconditionally to exclude reserved CPUs from default cpuset.
|
||||||
|
+ // This variable is primarily to make testing easier.
|
||||||
|
+ excludeReserved := true
|
||||||
|
+ policy, err = NewStaticPolicy(topo, numReservedCPUs, specificCPUs, affinity, excludeReserved)
|
||||||
|
+
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("new static policy error: %v", err)
|
||||||
|
}
|
||||||
|
diff --git a/pkg/kubelet/cm/cpumanager/cpu_manager_test.go b/pkg/kubelet/cm/cpumanager/cpu_manager_test.go
|
||||||
|
index 34b170be234..a155791e75f 100644
|
||||||
|
--- a/pkg/kubelet/cm/cpumanager/cpu_manager_test.go
|
||||||
|
+++ b/pkg/kubelet/cm/cpumanager/cpu_manager_test.go
|
||||||
|
@@ -211,6 +211,7 @@ func makeMultiContainerPod(initCPUs, appCPUs []struct{ request, limit string })
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCPUManagerAdd(t *testing.T) {
|
||||||
|
+ testExcl := false
|
||||||
|
testPolicy, _ := NewStaticPolicy(
|
||||||
|
&topology.CPUTopology{
|
||||||
|
NumCPUs: 4,
|
||||||
|
@@ -225,7 +226,8 @@ func TestCPUManagerAdd(t *testing.T) {
|
||||||
|
},
|
||||||
|
0,
|
||||||
|
cpuset.NewCPUSet(),
|
||||||
|
- topologymanager.NewFakeManager())
|
||||||
|
+ topologymanager.NewFakeManager(),
|
||||||
|
+ testExcl)
|
||||||
|
testCases := []struct {
|
||||||
|
description string
|
||||||
|
updateErr error
|
||||||
|
@@ -480,8 +482,9 @@ func TestCPUManagerAddWithInitContainers(t *testing.T) {
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
+ testExcl := false
|
||||||
|
for _, testCase := range testCases {
|
||||||
|
- policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.NewCPUSet(), topologymanager.NewFakeManager())
|
||||||
|
+ policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.NewCPUSet(), topologymanager.NewFakeManager(), testExcl)
|
||||||
|
|
||||||
|
state := &mockState{
|
||||||
|
assignments: testCase.stAssignments,
|
||||||
|
@@ -987,6 +990,7 @@ func TestReconcileState(t *testing.T) {
|
||||||
|
// above test cases are without kubelet --reserved-cpus cmd option
|
||||||
|
// the following tests are with --reserved-cpus configured
|
||||||
|
func TestCPUManagerAddWithResvList(t *testing.T) {
|
||||||
|
+ testExcl := false
|
||||||
|
testPolicy, _ := NewStaticPolicy(
|
||||||
|
&topology.CPUTopology{
|
||||||
|
NumCPUs: 4,
|
||||||
|
@@ -1001,7 +1005,8 @@ func TestCPUManagerAddWithResvList(t *testing.T) {
|
||||||
|
},
|
||||||
|
1,
|
||||||
|
cpuset.NewCPUSet(0),
|
||||||
|
- topologymanager.NewFakeManager())
|
||||||
|
+ topologymanager.NewFakeManager(),
|
||||||
|
+ testExcl)
|
||||||
|
testCases := []struct {
|
||||||
|
description string
|
||||||
|
updateErr error
|
||||||
|
diff --git a/pkg/kubelet/cm/cpumanager/policy_static.go b/pkg/kubelet/cm/cpumanager/policy_static.go
|
||||||
|
index c3309ef7280..e892d63641b 100644
|
||||||
|
--- a/pkg/kubelet/cm/cpumanager/policy_static.go
|
||||||
|
+++ b/pkg/kubelet/cm/cpumanager/policy_static.go
|
||||||
|
@@ -76,6 +76,8 @@ type staticPolicy struct {
|
||||||
|
topology *topology.CPUTopology
|
||||||
|
// set of CPUs that is not available for exclusive assignment
|
||||||
|
reserved cpuset.CPUSet
|
||||||
|
+ // If true, default CPUSet should exclude reserved CPUs
|
||||||
|
+ excludeReserved bool
|
||||||
|
// topology manager reference to get container Topology affinity
|
||||||
|
affinity topologymanager.Store
|
||||||
|
// set of CPUs to reuse across allocations in a pod
|
||||||
|
@@ -88,7 +90,7 @@ var _ Policy = &staticPolicy{}
|
||||||
|
// NewStaticPolicy returns a CPU manager policy that does not change CPU
|
||||||
|
// assignments for exclusively pinned guaranteed containers after the main
|
||||||
|
// container process starts.
|
||||||
|
-func NewStaticPolicy(topology *topology.CPUTopology, numReservedCPUs int, reservedCPUs cpuset.CPUSet, affinity topologymanager.Store) (Policy, error) {
|
||||||
|
+func NewStaticPolicy(topology *topology.CPUTopology, numReservedCPUs int, reservedCPUs cpuset.CPUSet, affinity topologymanager.Store, excludeReserved bool) (Policy, error) {
|
||||||
|
allCPUs := topology.CPUDetails.CPUs()
|
||||||
|
var reserved cpuset.CPUSet
|
||||||
|
if reservedCPUs.Size() > 0 {
|
||||||
|
@@ -112,6 +114,7 @@ func NewStaticPolicy(topology *topology.CPUTopology, numReservedCPUs int, reserv
|
||||||
|
return &staticPolicy{
|
||||||
|
topology: topology,
|
||||||
|
reserved: reserved,
|
||||||
|
+ excludeReserved: excludeReserved,
|
||||||
|
affinity: affinity,
|
||||||
|
cpusToReuse: make(map[string]cpuset.CPUSet),
|
||||||
|
}, nil
|
||||||
|
@@ -140,7 +143,15 @@ func (p *staticPolicy) validateState(s state.State) error {
|
||||||
|
}
|
||||||
|
// state is empty initialize
|
||||||
|
allCPUs := p.topology.CPUDetails.CPUs()
|
||||||
|
- s.SetDefaultCPUSet(allCPUs)
|
||||||
|
+ if p.excludeReserved {
|
||||||
|
+ // Exclude reserved CPUs from the default CPUSet to keep containers off them
|
||||||
|
+ // unless explicitly affined.
|
||||||
|
+ s.SetDefaultCPUSet(allCPUs.Difference(p.reserved))
|
||||||
|
+ } else {
|
||||||
|
+ s.SetDefaultCPUSet(allCPUs)
|
||||||
|
+ }
|
||||||
|
+ klog.Infof("[cpumanager] static policy: CPUSet: allCPUs:%v, reserved:%v, default:%v\n",
|
||||||
|
+ allCPUs, p.reserved, s.GetDefaultCPUSet())
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
@@ -148,9 +159,11 @@ func (p *staticPolicy) validateState(s state.State) error {
|
||||||
|
// 1. Check if the reserved cpuset is not part of default cpuset because:
|
||||||
|
// - kube/system reserved have changed (increased) - may lead to some containers not being able to start
|
||||||
|
// - user tampered with file
|
||||||
|
- if !p.reserved.Intersection(tmpDefaultCPUset).Equals(p.reserved) {
|
||||||
|
- return fmt.Errorf("not all reserved cpus: \"%s\" are present in defaultCpuSet: \"%s\"",
|
||||||
|
- p.reserved.String(), tmpDefaultCPUset.String())
|
||||||
|
+ if !p.excludeReserved {
|
||||||
|
+ if !p.reserved.Intersection(tmpDefaultCPUset).Equals(p.reserved) {
|
||||||
|
+ return fmt.Errorf("not all reserved cpus: \"%s\" are present in defaultCpuSet: \"%s\"",
|
||||||
|
+ p.reserved.String(), tmpDefaultCPUset.String())
|
||||||
|
+ }
|
||||||
|
}
|
||||||
|
|
||||||
|
// 2. Check if state for static policy is consistent
|
||||||
|
@@ -179,6 +192,9 @@ func (p *staticPolicy) validateState(s state.State) error {
|
||||||
|
}
|
||||||
|
}
|
||||||
|
totalKnownCPUs = totalKnownCPUs.UnionAll(tmpCPUSets)
|
||||||
|
+ if p.excludeReserved {
|
||||||
|
+ totalKnownCPUs = totalKnownCPUs.Union(p.reserved)
|
||||||
|
+ }
|
||||||
|
if !totalKnownCPUs.Equals(p.topology.CPUDetails.CPUs()) {
|
||||||
|
return fmt.Errorf("current set of available CPUs \"%s\" doesn't match with CPUs in state \"%s\"",
|
||||||
|
p.topology.CPUDetails.CPUs().String(), totalKnownCPUs.String())
|
||||||
|
@@ -249,6 +265,9 @@ func (p *staticPolicy) RemoveContainer(s state.State, podUID string, containerNa
|
||||||
|
klog.Infof("[cpumanager] static policy: RemoveContainer (pod: %s, container: %s)", podUID, containerName)
|
||||||
|
if toRelease, ok := s.GetCPUSet(podUID, containerName); ok {
|
||||||
|
s.Delete(podUID, containerName)
|
||||||
|
+ if p.excludeReserved {
|
||||||
|
+ toRelease = toRelease.Difference(p.reserved)
|
||||||
|
+ }
|
||||||
|
// Mutate the shared pool, adding released cpus.
|
||||||
|
s.SetDefaultCPUSet(s.GetDefaultCPUSet().Union(toRelease))
|
||||||
|
}
|
||||||
|
diff --git a/pkg/kubelet/cm/cpumanager/policy_static_test.go b/pkg/kubelet/cm/cpumanager/policy_static_test.go
|
||||||
|
index b4b46c68c17..9c7e4f146ff 100644
|
||||||
|
--- a/pkg/kubelet/cm/cpumanager/policy_static_test.go
|
||||||
|
+++ b/pkg/kubelet/cm/cpumanager/policy_static_test.go
|
||||||
|
@@ -33,6 +33,7 @@ type staticPolicyTest struct {
|
||||||
|
description string
|
||||||
|
topo *topology.CPUTopology
|
||||||
|
numReservedCPUs int
|
||||||
|
+ excludeReserved bool
|
||||||
|
podUID string
|
||||||
|
containerName string
|
||||||
|
stAssignments state.ContainerCPUAssignments
|
||||||
|
@@ -44,7 +45,8 @@ type staticPolicyTest struct {
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestStaticPolicyName(t *testing.T) {
|
||||||
|
- policy, _ := NewStaticPolicy(topoSingleSocketHT, 1, cpuset.NewCPUSet(), topologymanager.NewFakeManager())
|
||||||
|
+ testExcl := false
|
||||||
|
+ policy, _ := NewStaticPolicy(topoSingleSocketHT, 1, cpuset.NewCPUSet(), topologymanager.NewFakeManager(), testExcl)
|
||||||
|
|
||||||
|
policyName := policy.Name()
|
||||||
|
if policyName != "static" {
|
||||||
|
@@ -74,6 +76,15 @@ func TestStaticPolicyStart(t *testing.T) {
|
||||||
|
stDefaultCPUSet: cpuset.NewCPUSet(),
|
||||||
|
expCSet: cpuset.NewCPUSet(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11),
|
||||||
|
},
|
||||||
|
+ {
|
||||||
|
+ description: "empty cpuset exclude reserved",
|
||||||
|
+ topo: topoDualSocketHT,
|
||||||
|
+ numReservedCPUs: 2,
|
||||||
|
+ excludeReserved: true,
|
||||||
|
+ stAssignments: state.ContainerCPUAssignments{},
|
||||||
|
+ stDefaultCPUSet: cpuset.NewCPUSet(),
|
||||||
|
+ expCSet: cpuset.NewCPUSet(1, 2, 3, 4, 5, 7, 8, 9, 10, 11),
|
||||||
|
+ },
|
||||||
|
{
|
||||||
|
description: "reserved cores 0 & 6 are not present in available cpuset",
|
||||||
|
topo: topoDualSocketHT,
|
||||||
|
@@ -120,7 +131,7 @@ func TestStaticPolicyStart(t *testing.T) {
|
||||||
|
}
|
||||||
|
for _, testCase := range testCases {
|
||||||
|
t.Run(testCase.description, func(t *testing.T) {
|
||||||
|
- p, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.NewCPUSet(), topologymanager.NewFakeManager())
|
||||||
|
+ p, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.NewCPUSet(), topologymanager.NewFakeManager(), testCase.excludeReserved)
|
||||||
|
policy := p.(*staticPolicy)
|
||||||
|
st := &mockState{
|
||||||
|
assignments: testCase.stAssignments,
|
||||||
|
@@ -436,7 +447,7 @@ func TestStaticPolicyAdd(t *testing.T) {
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, testCase := range testCases {
|
||||||
|
- policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.NewCPUSet(), topologymanager.NewFakeManager())
|
||||||
|
+ policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.NewCPUSet(), topologymanager.NewFakeManager(), testCase.excludeReserved)
|
||||||
|
|
||||||
|
st := &mockState{
|
||||||
|
assignments: testCase.stAssignments,
|
||||||
|
@@ -479,6 +490,7 @@ func TestStaticPolicyAdd(t *testing.T) {
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestStaticPolicyRemove(t *testing.T) {
|
||||||
|
+ excludeReserved := false
|
||||||
|
testCases := []staticPolicyTest{
|
||||||
|
{
|
||||||
|
description: "SingleSocketHT, DeAllocOneContainer",
|
||||||
|
@@ -537,7 +549,7 @@ func TestStaticPolicyRemove(t *testing.T) {
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, testCase := range testCases {
|
||||||
|
- policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.NewCPUSet(), topologymanager.NewFakeManager())
|
||||||
|
+ policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.NewCPUSet(), topologymanager.NewFakeManager(), excludeReserved)
|
||||||
|
|
||||||
|
st := &mockState{
|
||||||
|
assignments: testCase.stAssignments,
|
||||||
|
@@ -559,6 +571,7 @@ func TestStaticPolicyRemove(t *testing.T) {
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestTopologyAwareAllocateCPUs(t *testing.T) {
|
||||||
|
+ excludeReserved := false
|
||||||
|
testCases := []struct {
|
||||||
|
description string
|
||||||
|
topo *topology.CPUTopology
|
||||||
|
@@ -627,7 +640,7 @@ func TestTopologyAwareAllocateCPUs(t *testing.T) {
|
||||||
|
},
|
||||||
|
}
|
||||||
|
for _, tc := range testCases {
|
||||||
|
- p, _ := NewStaticPolicy(tc.topo, 0, cpuset.NewCPUSet(), topologymanager.NewFakeManager())
|
||||||
|
+ p, _ := NewStaticPolicy(tc.topo, 0, cpuset.NewCPUSet(), topologymanager.NewFakeManager(), excludeReserved)
|
||||||
|
policy := p.(*staticPolicy)
|
||||||
|
st := &mockState{
|
||||||
|
assignments: tc.stAssignments,
|
||||||
|
@@ -699,9 +712,10 @@ func TestStaticPolicyStartWithResvList(t *testing.T) {
|
||||||
|
expNewErr: fmt.Errorf("[cpumanager] unable to reserve the required amount of CPUs (size of 0-1 did not equal 1)"),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
+ testExcl := false
|
||||||
|
for _, testCase := range testCases {
|
||||||
|
t.Run(testCase.description, func(t *testing.T) {
|
||||||
|
- p, err := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, testCase.reserved, topologymanager.NewFakeManager())
|
||||||
|
+ p, err := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, testCase.reserved, topologymanager.NewFakeManager(), testExcl)
|
||||||
|
if !reflect.DeepEqual(err, testCase.expNewErr) {
|
||||||
|
t.Errorf("StaticPolicy Start() error (%v). expected error: %v but got: %v",
|
||||||
|
testCase.description, testCase.expNewErr, err)
|
||||||
|
@@ -741,7 +755,7 @@ func TestStaticPolicyAddWithResvList(t *testing.T) {
|
||||||
|
numReservedCPUs: 1,
|
||||||
|
reserved: cpuset.NewCPUSet(0),
|
||||||
|
stAssignments: state.ContainerCPUAssignments{},
|
||||||
|
- stDefaultCPUSet: cpuset.NewCPUSet(0, 1, 2, 3, 4, 5, 6, 7),
|
||||||
|
+ stDefaultCPUSet: cpuset.NewCPUSet(1, 2, 3, 4, 5, 6, 7),
|
||||||
|
pod: makePod("fakePod", "fakeContainer2", "8000m", "8000m"),
|
||||||
|
expErr: fmt.Errorf("not enough cpus available to satisfy request"),
|
||||||
|
expCPUAlloc: false,
|
||||||
|
@@ -753,7 +767,7 @@ func TestStaticPolicyAddWithResvList(t *testing.T) {
|
||||||
|
numReservedCPUs: 2,
|
||||||
|
reserved: cpuset.NewCPUSet(0, 1),
|
||||||
|
stAssignments: state.ContainerCPUAssignments{},
|
||||||
|
- stDefaultCPUSet: cpuset.NewCPUSet(0, 1, 2, 3, 4, 5, 6, 7),
|
||||||
|
+ stDefaultCPUSet: cpuset.NewCPUSet(2, 3, 4, 5, 6, 7),
|
||||||
|
pod: makePod("fakePod", "fakeContainer2", "1000m", "1000m"),
|
||||||
|
expErr: nil,
|
||||||
|
expCPUAlloc: true,
|
||||||
|
@@ -769,7 +783,7 @@ func TestStaticPolicyAddWithResvList(t *testing.T) {
|
||||||
|
"fakeContainer100": cpuset.NewCPUSet(2, 3, 6, 7),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
- stDefaultCPUSet: cpuset.NewCPUSet(0, 1, 4, 5),
|
||||||
|
+ stDefaultCPUSet: cpuset.NewCPUSet(4, 5),
|
||||||
|
pod: makePod("fakePod", "fakeContainer3", "2000m", "2000m"),
|
||||||
|
expErr: nil,
|
||||||
|
expCPUAlloc: true,
|
||||||
|
@@ -777,8 +791,9 @@ func TestStaticPolicyAddWithResvList(t *testing.T) {
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
+ testExcl := true
|
||||||
|
for _, testCase := range testCases {
|
||||||
|
- policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, testCase.reserved, topologymanager.NewFakeManager())
|
||||||
|
+ policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, testCase.reserved, topologymanager.NewFakeManager(), testExcl)
|
||||||
|
|
||||||
|
st := &mockState{
|
||||||
|
assignments: testCase.stAssignments,
|
||||||
|
--
|
||||||
|
2.17.1
|
||||||
|
|
2253
kubernetes/kubernetes-1.20.9/centos/kubernetes.spec
Normal file
2253
kubernetes/kubernetes-1.20.9/centos/kubernetes.spec
Normal file
File diff suppressed because it is too large
Load Diff
2282
kubernetes/kubernetes-1.20.9/centos/kubernetes.spec.orig
Normal file
2282
kubernetes/kubernetes-1.20.9/centos/kubernetes.spec.orig
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
x
Reference in New Issue
Block a user