integ/kubernetes/kubernetes-1.24.4/debian/deb_folder/patches/kubelet-cpumanager-keep-normal-containers-off-reserv.patch
Sachin Gopala Krishna d0e346c423 Debian: Add kubernetes 1.24.4 remaining patches
This ports the remaining kubernetes 1.24.4 patches.

The following patches were refactored slightly to allow for
upstream changes:
kubelet-cpumanager-disable-CFS-quota-throttling-for-.patch
kubelet-cpumanager-keep-normal-containers-off-reserv.patch

The following patch was added to get tests working again:
cpumanager-policy-static-test-refactor.patch

Test-plan:

1. Revert-use-subpath-for-coredns-only-for-default-repo:
   Kubeadm commands worked as expected

2. enable-support-for-kubernetes-to-ignore-isolcpus:
   Set kube-cpu-mgr-policy as static and allocated some isolcpus.
   i.  Deployed a pod with dedicated CPU and verified that it is not
       affined to isolcpus.
   ii. kube-ignore-isol-cpus is set to enabled, deployed a pod with
       dedicated CPU and verified that is allocated to isolated CPU.

3. kubeadm-create-platform-pods-with-zero-CPU-resources:
   Verified the usage of CPUs is 0 in coredns, kube-controller-manager,
   kube-scheduler, and kube-apiserver pods of kube-system namespace.

4. kubelet-cpumanager-disable-CFS-quota-throttling-for- :
   Verified that pods that in the "Guaranteed" QoS class, on hosts that
   have "kube-cpu-mgr-policy=static" have cpu.cfs_quota_us set to -1.

5. kubelet-cpumanager-infra-pods-use-system-reserved-CP:
   Verified that platform pods are affined to platform CPUs

6. kubelet-cpumanager-introduce-concept-of-isolated-CPU:
   Verified pods can allocate isolated CPUs and are affined to them.
   Verified pods allocating application CPUs don't get isolated CPUs.
   Verified that pods allocating dedicated and isolated CPUs are affined
   to the dedicated CPUs.
   Verified that pods allocating non-dedicated and isolated CPUs are
   affined to the isolated CPUs.

7. kubelet-cpumanager-keep-normal-containers-off-reserv:
   Verified the pod which is not in platform namespace are affined to
   application or application isolated CPUs

8. kubelet-sort-isolcpus-allocation-when-SMT-enabled:
   Verified after enabling SMT multithreading that isolated CPUs are
   allocated as lowest-numbered SMT siblings first and then
   higher-numbered SMT siblings, then any single thread.

9. kubernetes-make-isolcpus-allocation-SMT-aware:
   Verified after enabling SMT multithreading that isolated CPUs are
   allocated as pairs of SMT siblings first, then already-existing
   single SMT siblings, then we allocate one of a pair of SMT siblings
   as a last resort.

Story: 2010301
Task: 46315

Signed-off-by: Sachin Gopala Krishna <saching.krishna@windriver.com>
Signed-off-by: Chris Friesen <chris.friesen@windriver.com>
Change-Id: Ic8f3d53f58f09ae13f9c299fb31e5f91a0a5bc9f
2022-10-24 13:21:18 -06:00

304 lines
14 KiB
Diff

From c827ea3b075774c9c72c33d38c973d05276cb7ac Mon Sep 17 00:00:00 2001
From: Sachin Gopala Krishna <saching.krishna@windriver.com>
Date: Mon, 3 Oct 2022 19:22:14 -0400
Subject: [PATCH] kubelet cpumanager keep normal containers off reserved CPUs
When starting the kubelet process, two separate sets of reserved CPUs
may be specified. With this change CPUs reserved via
'--system-reserved=cpu'
or '--kube-reserved=cpu' will be ignored by kubernetes itself. A small
tweak to the default CPU affinity ensures that "normal" Kubernetes
pods won't run on the reserved CPUs.
Co-authored-by: Jim Gauld <james.gauld@windriver.com>
Signed-off-by: Sachin Gopala Krishna <saching.krishna@windriver.com>
---
pkg/kubelet/cm/cpumanager/cpu_manager.go | 6 +++-
pkg/kubelet/cm/cpumanager/cpu_manager_test.go | 11 ++++--
pkg/kubelet/cm/cpumanager/policy_static.go | 29 +++++++++++----
.../cm/cpumanager/policy_static_test.go | 36 ++++++++++++++-----
4 files changed, 63 insertions(+), 19 deletions(-)
diff --git a/pkg/kubelet/cm/cpumanager/cpu_manager.go b/pkg/kubelet/cm/cpumanager/cpu_manager.go
index df431b06601..884c7323a79 100644
--- a/pkg/kubelet/cm/cpumanager/cpu_manager.go
+++ b/pkg/kubelet/cm/cpumanager/cpu_manager.go
@@ -191,7 +191,11 @@ func NewManager(cpuPolicyName string, cpuPolicyOptions map[string]string, reconc
// exclusively allocated.
reservedCPUsFloat := float64(reservedCPUs.MilliValue()) / 1000
numReservedCPUs := int(math.Ceil(reservedCPUsFloat))
- policy, err = NewStaticPolicy(topo, numReservedCPUs, specificCPUs, affinity, cpuPolicyOptions)
+ // NOTE: Set excludeReserved unconditionally to exclude reserved CPUs from default cpuset.
+ // This variable is primarily to make testing easier.
+ excludeReserved := true
+ policy, err = NewStaticPolicy(topo, numReservedCPUs, specificCPUs, affinity, cpuPolicyOptions, excludeReserved)
+
if err != nil {
return nil, fmt.Errorf("new static policy error: %w", err)
}
diff --git a/pkg/kubelet/cm/cpumanager/cpu_manager_test.go b/pkg/kubelet/cm/cpumanager/cpu_manager_test.go
index 9b3e24fc3b2..2c8349662c4 100644
--- a/pkg/kubelet/cm/cpumanager/cpu_manager_test.go
+++ b/pkg/kubelet/cm/cpumanager/cpu_manager_test.go
@@ -215,6 +215,7 @@ func makeMultiContainerPod(initCPUs, appCPUs []struct{ request, limit string })
}
func TestCPUManagerAdd(t *testing.T) {
+ testExcl := false
testPolicy, _ := NewStaticPolicy(
&topology.CPUTopology{
NumCPUs: 4,
@@ -230,7 +231,8 @@ func TestCPUManagerAdd(t *testing.T) {
0,
cpuset.NewCPUSet(),
topologymanager.NewFakeManager(),
- nil)
+ nil,
+ testExcl)
testCases := []struct {
description string
updateErr error
@@ -479,8 +481,9 @@ func TestCPUManagerAddWithInitContainers(t *testing.T) {
},
}
+ testExcl := false
for _, testCase := range testCases {
- policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.NewCPUSet(), topologymanager.NewFakeManager(), nil)
+ policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.NewCPUSet(), topologymanager.NewFakeManager(), nil, testExcl)
mockState := &mockState{
assignments: testCase.stAssignments,
@@ -1228,6 +1231,7 @@ func TestReconcileState(t *testing.T) {
// above test cases are without kubelet --reserved-cpus cmd option
// the following tests are with --reserved-cpus configured
func TestCPUManagerAddWithResvList(t *testing.T) {
+ testExcl := false
testPolicy, _ := NewStaticPolicy(
&topology.CPUTopology{
NumCPUs: 4,
@@ -1243,7 +1247,8 @@ func TestCPUManagerAddWithResvList(t *testing.T) {
1,
cpuset.NewCPUSet(0),
topologymanager.NewFakeManager(),
- nil)
+ nil,
+ testExcl)
testCases := []struct {
description string
updateErr error
diff --git a/pkg/kubelet/cm/cpumanager/policy_static.go b/pkg/kubelet/cm/cpumanager/policy_static.go
index a872b389c46..09e0fc0ea0e 100644
--- a/pkg/kubelet/cm/cpumanager/policy_static.go
+++ b/pkg/kubelet/cm/cpumanager/policy_static.go
@@ -96,6 +96,8 @@ type staticPolicy struct {
topology *topology.CPUTopology
// set of CPUs that is not available for exclusive assignment
reserved cpuset.CPUSet
+ // If true, default CPUSet should exclude reserved CPUs
+ excludeReserved bool
// topology manager reference to get container Topology affinity
affinity topologymanager.Store
// set of CPUs to reuse across allocations in a pod
@@ -110,7 +112,7 @@ var _ Policy = &staticPolicy{}
// NewStaticPolicy returns a CPU manager policy that does not change CPU
// assignments for exclusively pinned guaranteed containers after the main
// container process starts.
-func NewStaticPolicy(topology *topology.CPUTopology, numReservedCPUs int, reservedCPUs cpuset.CPUSet, affinity topologymanager.Store, cpuPolicyOptions map[string]string) (Policy, error) {
+func NewStaticPolicy(topology *topology.CPUTopology, numReservedCPUs int, reservedCPUs cpuset.CPUSet, affinity topologymanager.Store, cpuPolicyOptions map[string]string, excludeReserved bool) (Policy, error) {
opts, err := NewStaticPolicyOptions(cpuPolicyOptions)
if err != nil {
return nil, err
@@ -172,7 +174,15 @@ func (p *staticPolicy) validateState(s state.State) error {
}
// state is empty initialize
allCPUs := p.topology.CPUDetails.CPUs()
- s.SetDefaultCPUSet(allCPUs)
+ if p.excludeReserved {
+ // Exclude reserved CPUs from the default CPUSet to keep containers off them
+ // unless explicitly affined.
+ s.SetDefaultCPUSet(allCPUs.Difference(p.reserved))
+ } else {
+ s.SetDefaultCPUSet(allCPUs)
+ }
+ klog.Infof("[cpumanager] static policy: CPUSet: allCPUs:%v, reserved:%v, default:%v\n",
+ allCPUs, p.reserved, s.GetDefaultCPUSet())
return nil
}
@@ -180,11 +190,12 @@ func (p *staticPolicy) validateState(s state.State) error {
// 1. Check if the reserved cpuset is not part of default cpuset because:
// - kube/system reserved have changed (increased) - may lead to some containers not being able to start
// - user tampered with file
- if !p.reserved.Intersection(tmpDefaultCPUset).Equals(p.reserved) {
- return fmt.Errorf("not all reserved cpus: \"%s\" are present in defaultCpuSet: \"%s\"",
- p.reserved.String(), tmpDefaultCPUset.String())
+ if !p.excludeReserved {
+ if !p.reserved.Intersection(tmpDefaultCPUset).Equals(p.reserved) {
+ return fmt.Errorf("not all reserved cpus: \"%s\" are present in defaultCpuSet: \"%s\"",
+ p.reserved.String(), tmpDefaultCPUset.String())
+ }
}
-
// 2. Check if state for static policy is consistent
for pod := range tmpAssignments {
for container, cset := range tmpAssignments[pod] {
@@ -211,6 +222,9 @@ func (p *staticPolicy) validateState(s state.State) error {
}
}
totalKnownCPUs = totalKnownCPUs.UnionAll(tmpCPUSets)
+ if p.excludeReserved {
+ totalKnownCPUs = totalKnownCPUs.Union(p.reserved)
+ }
if !totalKnownCPUs.Equals(p.topology.CPUDetails.CPUs()) {
return fmt.Errorf("current set of available CPUs \"%s\" doesn't match with CPUs in state \"%s\"",
p.topology.CPUDetails.CPUs().String(), totalKnownCPUs.String())
@@ -310,6 +324,9 @@ func (p *staticPolicy) RemoveContainer(s state.State, podUID string, containerNa
cpusInUse := getAssignedCPUsOfSiblings(s, podUID, containerName)
if toRelease, ok := s.GetCPUSet(podUID, containerName); ok {
s.Delete(podUID, containerName)
+ if p.excludeReserved {
+ toRelease = toRelease.Difference(p.reserved)
+ }
// Mutate the shared pool, adding released cpus.
toRelease = toRelease.Difference(cpusInUse)
s.SetDefaultCPUSet(s.GetDefaultCPUSet().Union(toRelease))
diff --git a/pkg/kubelet/cm/cpumanager/policy_static_test.go b/pkg/kubelet/cm/cpumanager/policy_static_test.go
index 4e3255fff01..edfb40d880e 100644
--- a/pkg/kubelet/cm/cpumanager/policy_static_test.go
+++ b/pkg/kubelet/cm/cpumanager/policy_static_test.go
@@ -33,6 +33,7 @@ type staticPolicyTest struct {
description string
topo *topology.CPUTopology
numReservedCPUs int
+ excludeReserved bool
podUID string
options map[string]string
containerName string
@@ -64,7 +65,8 @@ func (spt staticPolicyTest) PseudoClone() staticPolicyTest {
}
func TestStaticPolicyName(t *testing.T) {
- policy, _ := NewStaticPolicy(topoSingleSocketHT, 1, cpuset.NewCPUSet(), topologymanager.NewFakeManager(), nil)
+ testExcl := false
+ policy, _ := NewStaticPolicy(topoSingleSocketHT, 1, cpuset.NewCPUSet(), topologymanager.NewFakeManager(), nil, testExcl)
policyName := policy.Name()
if policyName != "static" {
@@ -94,6 +96,15 @@ func TestStaticPolicyStart(t *testing.T) {
stDefaultCPUSet: cpuset.NewCPUSet(),
expCSet: cpuset.NewCPUSet(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11),
},
+ {
+ description: "empty cpuset exclude reserved",
+ topo: topoDualSocketHT,
+ numReservedCPUs: 2,
+ excludeReserved: true,
+ stAssignments: state.ContainerCPUAssignments{},
+ stDefaultCPUSet: cpuset.NewCPUSet(),
+ expCSet: cpuset.NewCPUSet(1, 2, 3, 4, 5, 7, 8, 9, 10, 11),
+ },
{
description: "reserved cores 0 & 6 are not present in available cpuset",
topo: topoDualSocketHT,
@@ -140,7 +151,8 @@ func TestStaticPolicyStart(t *testing.T) {
}
for _, testCase := range testCases {
t.Run(testCase.description, func(t *testing.T) {
- p, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.NewCPUSet(), topologymanager.NewFakeManager(), nil)
+ p, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.NewCPUSet(), topologymanager.NewFakeManager(), nil, testCase.excludeReserved)
+
policy := p.(*staticPolicy)
st := &mockState{
assignments: testCase.stAssignments,
@@ -517,7 +529,8 @@ func TestStaticPolicyAdd(t *testing.T) {
}
func runStaticPolicyTestCase(t *testing.T, testCase staticPolicyTest) {
- policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.NewCPUSet(), topologymanager.NewFakeManager(), testCase.options)
+ testExcl := false
+ policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.NewCPUSet(), topologymanager.NewFakeManager(), testCase.options, testExcl)
st := &mockState{
assignments: testCase.stAssignments,
@@ -583,7 +596,7 @@ func TestStaticPolicyReuseCPUs(t *testing.T) {
}
for _, testCase := range testCases {
- policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.NewCPUSet(), topologymanager.NewFakeManager(), nil)
+ policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.NewCPUSet(), topologymanager.NewFakeManager(), nil, testCase.excludeReserved)
st := &mockState{
assignments: testCase.stAssignments,
@@ -615,6 +628,7 @@ func TestStaticPolicyReuseCPUs(t *testing.T) {
}
func TestStaticPolicyRemove(t *testing.T) {
+ excludeReserved := false
testCases := []staticPolicyTest{
{
description: "SingleSocketHT, DeAllocOneContainer",
@@ -695,6 +709,7 @@ func TestStaticPolicyRemove(t *testing.T) {
}
func TestTopologyAwareAllocateCPUs(t *testing.T) {
+ excludeReserved := false
testCases := []struct {
description string
topo *topology.CPUTopology
@@ -763,7 +778,7 @@ func TestTopologyAwareAllocateCPUs(t *testing.T) {
},
}
for _, tc := range testCases {
- p, _ := NewStaticPolicy(tc.topo, 0, cpuset.NewCPUSet(), topologymanager.NewFakeManager(), nil)
+ p, _ := NewStaticPolicy(tc.topo, 0, cpuset.NewCPUSet(), topologymanager.NewFakeManager(), nil, excludeReserved)
policy := p.(*staticPolicy)
st := &mockState{
assignments: tc.stAssignments,
@@ -835,9 +850,11 @@ func TestStaticPolicyStartWithResvList(t *testing.T) {
expNewErr: fmt.Errorf("[cpumanager] unable to reserve the required amount of CPUs (size of 0-1 did not equal 1)"),
},
}
+ testExcl := false
for _, testCase := range testCases {
t.Run(testCase.description, func(t *testing.T) {
- p, err := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, testCase.reserved, topologymanager.NewFakeManager(), nil)
+ p, err := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, testCase.reserved, topologymanager.NewFakeManager(), nil, testExcl)
+
if !reflect.DeepEqual(err, testCase.expNewErr) {
t.Errorf("StaticPolicy Start() error (%v). expected error: %v but got: %v",
testCase.description, testCase.expNewErr, err)
@@ -877,7 +894,7 @@ func TestStaticPolicyAddWithResvList(t *testing.T) {
numReservedCPUs: 1,
reserved: cpuset.NewCPUSet(0),
stAssignments: state.ContainerCPUAssignments{},
- stDefaultCPUSet: cpuset.NewCPUSet(0, 1, 2, 3, 4, 5, 6, 7),
+ stDefaultCPUSet: cpuset.NewCPUSet(1, 2, 3, 4, 5, 6, 7),
pod: makePod("fakePod", "fakeContainer2", "8000m", "8000m"),
expErr: fmt.Errorf("not enough cpus available to satisfy request"),
expCPUAlloc: false,
@@ -889,7 +906,7 @@ func TestStaticPolicyAddWithResvList(t *testing.T) {
numReservedCPUs: 2,
reserved: cpuset.NewCPUSet(0, 1),
stAssignments: state.ContainerCPUAssignments{},
- stDefaultCPUSet: cpuset.NewCPUSet(0, 1, 2, 3, 4, 5, 6, 7),
+ stDefaultCPUSet: cpuset.NewCPUSet(2, 3, 4, 5, 6, 7),
pod: makePod("fakePod", "fakeContainer2", "1000m", "1000m"),
expErr: nil,
expCPUAlloc: true,
@@ -913,8 +930,9 @@ func TestStaticPolicyAddWithResvList(t *testing.T) {
},
}
+ testExcl := true
for _, testCase := range testCases {
- policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, testCase.reserved, topologymanager.NewFakeManager(), nil)
+ policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, testCase.reserved, topologymanager.NewFakeManager(), nil, testExcl)
st := &mockState{
assignments: testCase.stAssignments,
--
2.25.1