96d5a7a4dd
This change ports the following kubernetes 1.27.5 patches which were refactored slightly to allow for upstream changes The following patches were applied cleanly: kubeadm-create-platform-pods-with-zero-CPU-resources.patch kubelet-sort-isolcpus-allocation-when-SMT-enabled.patch The following patches were refactored: Revert-use-subpath-for-coredns-only-for-default-repo.patch kubernetes-make-isolcpus-allocation-SMT-aware.patch kubelet-cpumanager-disable-CFS-quota-throttling.patch kubelet-cpumanager-keep-normal-containers-off-reserv.patch kubelet-cpumanager-infra-pods-use-system-reserved-CP.patch kubelet-cpumanager-introduce-concept-of-isolated-CPU.patch enable-support-for-kubernetes-to-ignore-isolcpus.patch kubelet-CFS-quota-throttling-for-non-integer-cpulimit.patch Test Plan: PASS: Kubernetes package 1.27.5 builds properly. PASS: Run all Kubelet, kubeadm, kubectl make tests for affected code. Story: 2010878 Task: 48740 Depends-On: https://review.opendev.org/c/starlingx/integ/+/892988 Change-Id: I130d41d2b906826e5cb8186ec754e0e74a7d891a Signed-off-by: Boovan Rajendran <boovan.rajendran@windriver.com>
358 lines
15 KiB
Diff
358 lines
15 KiB
Diff
From 1d1addb2c0a6bac1513a83431998f17aec76cd20 Mon Sep 17 00:00:00 2001
|
|
From: Boovan Rajendran <boovan.rajendran@windriver.com>
|
|
Date: Fri, 1 Sep 2023 07:15:25 -0400
|
|
Subject: [PATCH] kubelet cpumanager keep normal containers off reserved CPUs
|
|
|
|
When starting the kubelet process, two separate sets of reserved CPUs
|
|
may be specified. With this change CPUs reserved via
|
|
'--system-reserved=cpu'
|
|
or '--kube-reserved=cpu' will be ignored by kubernetes itself. A small
|
|
tweak to the default CPU affinity ensures that "normal" Kubernetes
|
|
pods won't run on the reserved CPUs.
|
|
|
|
Co-authored-by: Jim Gauld <james.gauld@windriver.com>
|
|
Signed-off-by: Sachin Gopala Krishna <saching.krishna@windriver.com>
|
|
Signed-off-by: Ramesh Kumar Sivanandam <rameshkumar.sivanandam@windriver.com>
|
|
Signed-off-by: Boovan Rajendran <boovan.rajendran@windriver.com>
|
|
---
|
|
pkg/kubelet/cm/cpumanager/cpu_manager.go | 6 ++-
|
|
pkg/kubelet/cm/cpumanager/cpu_manager_test.go | 19 +++++++---
|
|
pkg/kubelet/cm/cpumanager/policy_static.go | 30 ++++++++++++---
|
|
.../cm/cpumanager/policy_static_test.go | 38 ++++++++++++++-----
|
|
4 files changed, 71 insertions(+), 22 deletions(-)
|
|
|
|
diff --git a/pkg/kubelet/cm/cpumanager/cpu_manager.go b/pkg/kubelet/cm/cpumanager/cpu_manager.go
|
|
index 9e2dce60501..e2c89efeb2e 100644
|
|
--- a/pkg/kubelet/cm/cpumanager/cpu_manager.go
|
|
+++ b/pkg/kubelet/cm/cpumanager/cpu_manager.go
|
|
@@ -192,7 +192,11 @@ func NewManager(cpuPolicyName string, cpuPolicyOptions map[string]string, reconc
|
|
// exclusively allocated.
|
|
reservedCPUsFloat := float64(reservedCPUs.MilliValue()) / 1000
|
|
numReservedCPUs := int(math.Ceil(reservedCPUsFloat))
|
|
- policy, err = NewStaticPolicy(topo, numReservedCPUs, specificCPUs, affinity, cpuPolicyOptions)
|
|
+ // NOTE: Set excludeReserved unconditionally to exclude reserved CPUs from default cpuset.
|
|
+ // This variable is primarily to make testing easier.
|
|
+ excludeReserved := true
|
|
+ policy, err = NewStaticPolicy(topo, numReservedCPUs, specificCPUs, affinity, cpuPolicyOptions, excludeReserved)
|
|
+
|
|
if err != nil {
|
|
return nil, fmt.Errorf("new static policy error: %w", err)
|
|
}
|
|
diff --git a/pkg/kubelet/cm/cpumanager/cpu_manager_test.go b/pkg/kubelet/cm/cpumanager/cpu_manager_test.go
|
|
index 250f1eb014a..bb69b0ac084 100644
|
|
--- a/pkg/kubelet/cm/cpumanager/cpu_manager_test.go
|
|
+++ b/pkg/kubelet/cm/cpumanager/cpu_manager_test.go
|
|
@@ -215,6 +215,7 @@ func makeMultiContainerPod(initCPUs, appCPUs []struct{ request, limit string })
|
|
}
|
|
|
|
func TestCPUManagerAdd(t *testing.T) {
|
|
+ testExcl := false
|
|
testPolicy, _ := NewStaticPolicy(
|
|
&topology.CPUTopology{
|
|
NumCPUs: 4,
|
|
@@ -230,7 +231,8 @@ func TestCPUManagerAdd(t *testing.T) {
|
|
0,
|
|
cpuset.New(),
|
|
topologymanager.NewFakeManager(),
|
|
- nil)
|
|
+ nil,
|
|
+ testExcl)
|
|
testCases := []struct {
|
|
description string
|
|
updateErr error
|
|
@@ -479,8 +481,9 @@ func TestCPUManagerAddWithInitContainers(t *testing.T) {
|
|
},
|
|
}
|
|
|
|
+ testExcl := false
|
|
for _, testCase := range testCases {
|
|
- policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.New(), topologymanager.NewFakeManager(), nil)
|
|
+ policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.New(), topologymanager.NewFakeManager(), nil, testExcl)
|
|
|
|
mockState := &mockState{
|
|
assignments: testCase.stAssignments,
|
|
@@ -705,6 +708,7 @@ func TestCPUManagerRemove(t *testing.T) {
|
|
}
|
|
|
|
func TestReconcileState(t *testing.T) {
|
|
+ testExcl := false
|
|
testPolicy, _ := NewStaticPolicy(
|
|
&topology.CPUTopology{
|
|
NumCPUs: 8,
|
|
@@ -724,7 +728,8 @@ func TestReconcileState(t *testing.T) {
|
|
0,
|
|
cpuset.New(),
|
|
topologymanager.NewFakeManager(),
|
|
- nil)
|
|
+ nil,
|
|
+ testExcl)
|
|
|
|
testCases := []struct {
|
|
description string
|
|
@@ -1228,6 +1233,7 @@ func TestReconcileState(t *testing.T) {
|
|
// above test cases are without kubelet --reserved-cpus cmd option
|
|
// the following tests are with --reserved-cpus configured
|
|
func TestCPUManagerAddWithResvList(t *testing.T) {
|
|
+ testExcl := false
|
|
testPolicy, _ := NewStaticPolicy(
|
|
&topology.CPUTopology{
|
|
NumCPUs: 4,
|
|
@@ -1243,7 +1249,8 @@ func TestCPUManagerAddWithResvList(t *testing.T) {
|
|
1,
|
|
cpuset.New(0),
|
|
topologymanager.NewFakeManager(),
|
|
- nil)
|
|
+ nil,
|
|
+ testExcl)
|
|
testCases := []struct {
|
|
description string
|
|
updateErr error
|
|
@@ -1368,6 +1375,7 @@ func TestCPUManagerHandlePolicyOptions(t *testing.T) {
|
|
}
|
|
|
|
func TestCPUManagerGetAllocatableCPUs(t *testing.T) {
|
|
+ testExcl := false
|
|
nonePolicy, _ := NewNonePolicy(nil)
|
|
staticPolicy, _ := NewStaticPolicy(
|
|
&topology.CPUTopology{
|
|
@@ -1384,7 +1392,8 @@ func TestCPUManagerGetAllocatableCPUs(t *testing.T) {
|
|
1,
|
|
cpuset.New(0),
|
|
topologymanager.NewFakeManager(),
|
|
- nil)
|
|
+ nil,
|
|
+ testExcl)
|
|
|
|
testCases := []struct {
|
|
description string
|
|
diff --git a/pkg/kubelet/cm/cpumanager/policy_static.go b/pkg/kubelet/cm/cpumanager/policy_static.go
|
|
index 7a82de03da8..d25b2482537 100644
|
|
--- a/pkg/kubelet/cm/cpumanager/policy_static.go
|
|
+++ b/pkg/kubelet/cm/cpumanager/policy_static.go
|
|
@@ -105,6 +105,8 @@ type staticPolicy struct {
|
|
topology *topology.CPUTopology
|
|
// set of CPUs that is not available for exclusive assignment
|
|
reservedCPUs cpuset.CPUSet
|
|
+ // If true, default CPUSet should exclude reserved CPUs
|
|
+ excludeReserved bool
|
|
// Superset of reservedCPUs. It includes not just the reservedCPUs themselves,
|
|
// but also any siblings of those reservedCPUs on the same physical die.
|
|
// NOTE: If the reserved set includes full physical CPUs from the beginning
|
|
@@ -125,7 +127,7 @@ var _ Policy = &staticPolicy{}
|
|
// NewStaticPolicy returns a CPU manager policy that does not change CPU
|
|
// assignments for exclusively pinned guaranteed containers after the main
|
|
// container process starts.
|
|
-func NewStaticPolicy(topology *topology.CPUTopology, numReservedCPUs int, reservedCPUs cpuset.CPUSet, affinity topologymanager.Store, cpuPolicyOptions map[string]string) (Policy, error) {
|
|
+func NewStaticPolicy(topology *topology.CPUTopology, numReservedCPUs int, reservedCPUs cpuset.CPUSet, affinity topologymanager.Store, cpuPolicyOptions map[string]string, excludeReserved bool) (Policy, error) {
|
|
opts, err := NewStaticPolicyOptions(cpuPolicyOptions)
|
|
if err != nil {
|
|
return nil, err
|
|
@@ -140,6 +142,7 @@ func NewStaticPolicy(topology *topology.CPUTopology, numReservedCPUs int, reserv
|
|
policy := &staticPolicy{
|
|
topology: topology,
|
|
affinity: affinity,
|
|
+ excludeReserved: excludeReserved,
|
|
cpusToReuse: make(map[string]cpuset.CPUSet),
|
|
options: opts,
|
|
}
|
|
@@ -201,7 +204,15 @@ func (p *staticPolicy) validateState(s state.State) error {
|
|
}
|
|
// state is empty initialize
|
|
allCPUs := p.topology.CPUDetails.CPUs()
|
|
- s.SetDefaultCPUSet(allCPUs)
|
|
+ if p.excludeReserved {
|
|
+ // Exclude reserved CPUs from the default CPUSet to keep containers off them
|
|
+ // unless explicitly affined.
|
|
+ s.SetDefaultCPUSet(allCPUs.Difference(p.reservedCPUs))
|
|
+ } else {
|
|
+ s.SetDefaultCPUSet(allCPUs)
|
|
+ }
|
|
+ klog.Infof("[cpumanager] static policy: CPUSet: allCPUs:%v, reserved:%v, default:%v\n",
|
|
+ allCPUs, p.reservedCPUs, s.GetDefaultCPUSet())
|
|
return nil
|
|
}
|
|
|
|
@@ -209,11 +220,12 @@ func (p *staticPolicy) validateState(s state.State) error {
|
|
// 1. Check if the reserved cpuset is not part of default cpuset because:
|
|
// - kube/system reserved have changed (increased) - may lead to some containers not being able to start
|
|
// - user tampered with file
|
|
- if !p.reservedCPUs.Intersection(tmpDefaultCPUset).Equals(p.reservedCPUs) {
|
|
- return fmt.Errorf("not all reserved cpus: \"%s\" are present in defaultCpuSet: \"%s\"",
|
|
- p.reservedCPUs.String(), tmpDefaultCPUset.String())
|
|
+ if !p.excludeReserved {
|
|
+ if !p.reservedCPUs.Intersection(tmpDefaultCPUset).Equals(p.reservedCPUs) {
|
|
+ return fmt.Errorf("not all reserved cpus: \"%s\" are present in defaultCpuSet: \"%s\"",
|
|
+ p.reservedCPUs.String(), tmpDefaultCPUset.String())
|
|
+ }
|
|
}
|
|
-
|
|
// 2. Check if state for static policy is consistent
|
|
for pod := range tmpAssignments {
|
|
for container, cset := range tmpAssignments[pod] {
|
|
@@ -240,6 +252,9 @@ func (p *staticPolicy) validateState(s state.State) error {
|
|
}
|
|
}
|
|
totalKnownCPUs = totalKnownCPUs.Union(tmpCPUSets...)
|
|
+ if p.excludeReserved {
|
|
+ totalKnownCPUs = totalKnownCPUs.Union(p.reservedCPUs)
|
|
+ }
|
|
if !totalKnownCPUs.Equals(p.topology.CPUDetails.CPUs()) {
|
|
return fmt.Errorf("current set of available CPUs \"%s\" doesn't match with CPUs in state \"%s\"",
|
|
p.topology.CPUDetails.CPUs().String(), totalKnownCPUs.String())
|
|
@@ -374,6 +389,9 @@ func (p *staticPolicy) RemoveContainer(s state.State, podUID string, containerNa
|
|
cpusInUse := getAssignedCPUsOfSiblings(s, podUID, containerName)
|
|
if toRelease, ok := s.GetCPUSet(podUID, containerName); ok {
|
|
s.Delete(podUID, containerName)
|
|
+ if p.excludeReserved {
|
|
+ toRelease = toRelease.Difference(p.reservedCPUs)
|
|
+ }
|
|
// Mutate the shared pool, adding released cpus.
|
|
toRelease = toRelease.Difference(cpusInUse)
|
|
s.SetDefaultCPUSet(s.GetDefaultCPUSet().Union(toRelease))
|
|
diff --git a/pkg/kubelet/cm/cpumanager/policy_static_test.go b/pkg/kubelet/cm/cpumanager/policy_static_test.go
|
|
index 2c88dee0ba5..c4675394a93 100644
|
|
--- a/pkg/kubelet/cm/cpumanager/policy_static_test.go
|
|
+++ b/pkg/kubelet/cm/cpumanager/policy_static_test.go
|
|
@@ -36,6 +36,7 @@ type staticPolicyTest struct {
|
|
description string
|
|
topo *topology.CPUTopology
|
|
numReservedCPUs int
|
|
+ excludeReserved bool
|
|
reservedCPUs *cpuset.CPUSet
|
|
podUID string
|
|
options map[string]string
|
|
@@ -69,7 +70,8 @@ func (spt staticPolicyTest) PseudoClone() staticPolicyTest {
|
|
}
|
|
|
|
func TestStaticPolicyName(t *testing.T) {
|
|
- policy, _ := NewStaticPolicy(topoSingleSocketHT, 1, cpuset.New(), topologymanager.NewFakeManager(), nil)
|
|
+ testExcl := false
|
|
+ policy, _ := NewStaticPolicy(topoSingleSocketHT, 1, cpuset.New(), topologymanager.NewFakeManager(), nil, testExcl)
|
|
|
|
policyName := policy.Name()
|
|
if policyName != "static" {
|
|
@@ -99,6 +101,15 @@ func TestStaticPolicyStart(t *testing.T) {
|
|
stDefaultCPUSet: cpuset.New(),
|
|
expCSet: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11),
|
|
},
|
|
+ {
|
|
+ description: "empty cpuset exclude reserved",
|
|
+ topo: topoDualSocketHT,
|
|
+ numReservedCPUs: 2,
|
|
+ excludeReserved: true,
|
|
+ stAssignments: state.ContainerCPUAssignments{},
|
|
+ stDefaultCPUSet: cpuset.New(),
|
|
+ expCSet: cpuset.New(1, 2, 3, 4, 5, 7, 8, 9, 10, 11),
|
|
+ },
|
|
{
|
|
description: "reserved cores 0 & 6 are not present in available cpuset",
|
|
topo: topoDualSocketHT,
|
|
@@ -145,7 +156,8 @@ func TestStaticPolicyStart(t *testing.T) {
|
|
}
|
|
for _, testCase := range testCases {
|
|
t.Run(testCase.description, func(t *testing.T) {
|
|
- p, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.New(), topologymanager.NewFakeManager(), nil)
|
|
+ p, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.New(), topologymanager.NewFakeManager(), nil, testCase.excludeReserved)
|
|
+
|
|
policy := p.(*staticPolicy)
|
|
st := &mockState{
|
|
assignments: testCase.stAssignments,
|
|
@@ -614,7 +626,8 @@ func runStaticPolicyTestCase(t *testing.T, testCase staticPolicyTest) {
|
|
if testCase.reservedCPUs != nil {
|
|
cpus = testCase.reservedCPUs.Clone()
|
|
}
|
|
- policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpus, tm, testCase.options)
|
|
+ testExcl := false
|
|
+ policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpus, tm, testCase.options, testExcl)
|
|
|
|
st := &mockState{
|
|
assignments: testCase.stAssignments,
|
|
@@ -685,7 +698,7 @@ func TestStaticPolicyReuseCPUs(t *testing.T) {
|
|
}
|
|
|
|
for _, testCase := range testCases {
|
|
- policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.New(), topologymanager.NewFakeManager(), nil)
|
|
+ policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.New(), topologymanager.NewFakeManager(), nil, testCase.excludeReserved)
|
|
|
|
st := &mockState{
|
|
assignments: testCase.stAssignments,
|
|
@@ -717,6 +730,7 @@ func TestStaticPolicyReuseCPUs(t *testing.T) {
|
|
}
|
|
|
|
func TestStaticPolicyRemove(t *testing.T) {
|
|
+ excludeReserved := false
|
|
testCases := []staticPolicyTest{
|
|
{
|
|
description: "SingleSocketHT, DeAllocOneContainer",
|
|
@@ -775,7 +789,7 @@ func TestStaticPolicyRemove(t *testing.T) {
|
|
}
|
|
|
|
for _, testCase := range testCases {
|
|
- policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.New(), topologymanager.NewFakeManager(), nil)
|
|
+ policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.New(), topologymanager.NewFakeManager(), nil, excludeReserved)
|
|
|
|
st := &mockState{
|
|
assignments: testCase.stAssignments,
|
|
@@ -797,6 +811,7 @@ func TestStaticPolicyRemove(t *testing.T) {
|
|
}
|
|
|
|
func TestTopologyAwareAllocateCPUs(t *testing.T) {
|
|
+ excludeReserved := false
|
|
testCases := []struct {
|
|
description string
|
|
topo *topology.CPUTopology
|
|
@@ -865,7 +880,7 @@ func TestTopologyAwareAllocateCPUs(t *testing.T) {
|
|
},
|
|
}
|
|
for _, tc := range testCases {
|
|
- p, _ := NewStaticPolicy(tc.topo, 0, cpuset.New(), topologymanager.NewFakeManager(), nil)
|
|
+ p, _ := NewStaticPolicy(tc.topo, 0, cpuset.New(), topologymanager.NewFakeManager(), nil, excludeReserved)
|
|
policy := p.(*staticPolicy)
|
|
st := &mockState{
|
|
assignments: tc.stAssignments,
|
|
@@ -937,9 +952,11 @@ func TestStaticPolicyStartWithResvList(t *testing.T) {
|
|
expNewErr: fmt.Errorf("[cpumanager] unable to reserve the required amount of CPUs (size of 0-1 did not equal 1)"),
|
|
},
|
|
}
|
|
+ testExcl := false
|
|
for _, testCase := range testCases {
|
|
t.Run(testCase.description, func(t *testing.T) {
|
|
- p, err := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, testCase.reserved, topologymanager.NewFakeManager(), nil)
|
|
+ p, err := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, testCase.reserved, topologymanager.NewFakeManager(), nil, testExcl)
|
|
+
|
|
if !reflect.DeepEqual(err, testCase.expNewErr) {
|
|
t.Errorf("StaticPolicy Start() error (%v). expected error: %v but got: %v",
|
|
testCase.description, testCase.expNewErr, err)
|
|
@@ -979,7 +996,7 @@ func TestStaticPolicyAddWithResvList(t *testing.T) {
|
|
numReservedCPUs: 1,
|
|
reserved: cpuset.New(0),
|
|
stAssignments: state.ContainerCPUAssignments{},
|
|
- stDefaultCPUSet: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7),
|
|
+ stDefaultCPUSet: cpuset.New(1, 2, 3, 4, 5, 6, 7),
|
|
pod: makePod("fakePod", "fakeContainer2", "8000m", "8000m"),
|
|
expErr: fmt.Errorf("not enough cpus available to satisfy request"),
|
|
expCPUAlloc: false,
|
|
@@ -991,7 +1008,7 @@ func TestStaticPolicyAddWithResvList(t *testing.T) {
|
|
numReservedCPUs: 2,
|
|
reserved: cpuset.New(0, 1),
|
|
stAssignments: state.ContainerCPUAssignments{},
|
|
- stDefaultCPUSet: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7),
|
|
+ stDefaultCPUSet: cpuset.New(2, 3, 4, 5, 6, 7),
|
|
pod: makePod("fakePod", "fakeContainer2", "1000m", "1000m"),
|
|
expErr: nil,
|
|
expCPUAlloc: true,
|
|
@@ -1015,8 +1032,9 @@ func TestStaticPolicyAddWithResvList(t *testing.T) {
|
|
},
|
|
}
|
|
|
|
+ testExcl := true
|
|
for _, testCase := range testCases {
|
|
- policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, testCase.reserved, topologymanager.NewFakeManager(), nil)
|
|
+ policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, testCase.reserved, topologymanager.NewFakeManager(), nil, testExcl)
|
|
|
|
st := &mockState{
|
|
assignments: testCase.stAssignments,
|
|
--
|
|
2.25.1
|
|
|