integ/kubernetes/kubernetes-1.24.4/debian/deb_folder/patches/kubelet-cpumanager-infra-pods-use-system-reserved-CP.patch
Sachin Gopala Krishna d0e346c423 Debian: Add kubernetes 1.24.4 remaining patches
This ports the remaining kubernetes 1.24.4 patches.

The following patches were refactored slightly to allow for
upstream changes:
kubelet-cpumanager-disable-CFS-quota-throttling-for-.patch
kubelet-cpumanager-keep-normal-containers-off-reserv.patch

The following patch was added to get tests working again:
cpumanager-policy-static-test-refactor.patch

Test-plan:

1. Revert-use-subpath-for-coredns-only-for-default-repo:
   Kubeadm commands worked as expected

2. enable-support-for-kubernetes-to-ignore-isolcpus:
   Set kube-cpu-mgr-policy as static and allocated some isolcpus.
   i.  Deployed a pod with dedicated CPU and verified that it is not
       affined to isolcpus.
   ii. kube-ignore-isol-cpus is set to enabled, deployed a pod with
       dedicated CPU and verified that is allocated to isolated CPU.

3. kubeadm-create-platform-pods-with-zero-CPU-resources:
   Verified the usage of CPUs is 0 in coredns, kube-controller-manager,
   kube-scheduler, and kube-apiserver pods of kube-system namespace.

4. kubelet-cpumanager-disable-CFS-quota-throttling-for- :
   Verified that pods that in the "Guaranteed" QoS class, on hosts that
   have "kube-cpu-mgr-policy=static" have cpu.cfs_quota_us set to -1.

5. kubelet-cpumanager-infra-pods-use-system-reserved-CP:
   Verified that platform pods are affined to platform CPUs

6. kubelet-cpumanager-introduce-concept-of-isolated-CPU:
   Verified pods can allocate isolated CPUs and are affined to them.
   Verified pods allocating application CPUs don't get isolated CPUs.
   Verified that pods allocating dedicated and isolated CPUs are affined
   to the dedicated CPUs.
   Verified that pods allocating non-dedicated and isolated CPUs are
   affined to the isolated CPUs.

7. kubelet-cpumanager-keep-normal-containers-off-reserv:
   Verified the pod which is not in platform namespace are affined to
   application or application isolated CPUs

8. kubelet-sort-isolcpus-allocation-when-SMT-enabled:
   Verified after enabling SMT multithreading that isolated CPUs are
   allocated as lowest-numbered SMT siblings first and then
   higher-numbered SMT siblings, then any single thread.

9. kubernetes-make-isolcpus-allocation-SMT-aware:
   Verified after enabling SMT multithreading that isolated CPUs are
   allocated as pairs of SMT siblings first, then already-existing
   single SMT siblings, then we allocate one of a pair of SMT siblings
   as a last resort.

Story: 2010301
Task: 46315

Signed-off-by: Sachin Gopala Krishna <saching.krishna@windriver.com>
Signed-off-by: Chris Friesen <chris.friesen@windriver.com>
Change-Id: Ic8f3d53f58f09ae13f9c299fb31e5f91a0a5bc9f
2022-10-24 13:21:18 -06:00

151 lines
6.1 KiB
Diff

From 70399c6eebe5216332e77b0f56ace7028168c726 Mon Sep 17 00:00:00 2001
From: Gleb Aronsky <gleb.aronsky@windriver.com>
Date: Mon, 21 Mar 2022 17:25:07 -0300
Subject: [PATCH] kubelet cpumanager infra pods use system reserved CPUs
This assigns system infrastructure pods to the "reserved" cpuset
to isolate them from the shared pool of CPUs.
Infrastructure pods include any pods that belong to the kube-system,
armada, cert-manager, vault, platform-deployment-manager, portieris,
notification, flux-helm or metrics-server namespaces.
The implementation is a bit simplistic, it is assumed that the
"reserved" cpuset is large enough to handle all infrastructure pods
CPU allocations.
This also prevents infrastucture pods from using Guaranteed resources.
Co-authored-by: Jim Gauld <james.gauld@windriver.com>
Signed-off-by: Gleb Aronsky <gleb.aronsky@windriver.com>
Signed-off-by: Thiago Miranda <ThiagoOliveira.Miranda@windriver.com>
Signed-off-by: Kaustubh Dhokte <kaustubh.dhokte@windriver.com>
---
pkg/kubelet/cm/cpumanager/policy_static.go | 47 +++++++++++++++++--
.../cm/cpumanager/policy_static_test.go | 19 +++++++-
2 files changed, 61 insertions(+), 5 deletions(-)
diff --git a/pkg/kubelet/cm/cpumanager/policy_static.go b/pkg/kubelet/cm/cpumanager/policy_static.go
index 09e0fc0ea0e..a3c93a896df 100644
--- a/pkg/kubelet/cm/cpumanager/policy_static.go
+++ b/pkg/kubelet/cm/cpumanager/policy_static.go
@@ -53,6 +53,11 @@ func (e SMTAlignmentError) Type() string {
return ErrorSMTAlignment
}
+// Define namespaces used by platform infrastructure pods
+var infraNamespaces = [...]string{
+ "kube-system", "armada", "cert-manager", "platform-deployment-manager", "portieris", "vault", "notification", "flux-helm", "metrics-server",
+}
+
// staticPolicy is a CPU manager policy that does not change CPU
// assignments for exclusively pinned guaranteed containers after the main
// container process starts.
@@ -121,10 +126,11 @@ func NewStaticPolicy(topology *topology.CPUTopology, numReservedCPUs int, reserv
klog.InfoS("Static policy created with configuration", "options", opts)
policy := &staticPolicy{
- topology: topology,
- affinity: affinity,
- cpusToReuse: make(map[string]cpuset.CPUSet),
- options: opts,
+ topology: topology,
+ affinity: affinity,
+ excludeReserved: excludeReserved,
+ cpusToReuse: make(map[string]cpuset.CPUSet),
+ options: opts,
}
allCPUs := topology.CPUDetails.CPUs()
@@ -263,6 +269,25 @@ func (p *staticPolicy) updateCPUsToReuse(pod *v1.Pod, container *v1.Container, c
}
func (p *staticPolicy) Allocate(s state.State, pod *v1.Pod, container *v1.Container) error {
+ // Process infra pods before guaranteed pods
+ if isKubeInfra(pod) {
+ // Container belongs in reserved pool.
+ // We don't want to fall through to the p.guaranteedCPUs() clause below so return either nil or error.
+ if _, ok := s.GetCPUSet(string(pod.UID), container.Name); ok {
+ klog.Infof("[cpumanager] static policy: reserved container already present in state, skipping (namespace: %s, pod UID: %s, pod: %s, container: %s)", pod.Namespace, string(pod.UID), pod.Name, container.Name)
+ return nil
+ }
+
+ cpuset := p.reserved
+ if cpuset.IsEmpty() {
+ // If this happens then someone messed up.
+ return fmt.Errorf("[cpumanager] static policy: reserved container unable to allocate cpus (namespace: %s, pod UID: %s, pod: %s, container: %s); cpuset=%v, reserved:%v", pod.Namespace, string(pod.UID), pod.Name, container.Name, cpuset, p.reserved)
+ }
+ s.SetCPUSet(string(pod.UID), container.Name, cpuset)
+ klog.Infof("[cpumanager] static policy: reserved: AddContainer (namespace: %s, pod UID: %s, pod: %s, container: %s); cpuset=%v", pod.Namespace, string(pod.UID), pod.Name, container.Name, cpuset)
+ return nil
+ }
+
if numCPUs := p.guaranteedCPUs(pod, container); numCPUs != 0 {
klog.InfoS("Static policy: Allocate", "pod", klog.KObj(pod), "containerName", container.Name)
// container belongs in an exclusively allocated pool
@@ -382,6 +407,10 @@ func (p *staticPolicy) guaranteedCPUs(pod *v1.Pod, container *v1.Container) int
if cpuQuantity.Value()*1000 != cpuQuantity.MilliValue() {
return 0
}
+ // Infrastructure pods use reserved CPUs even if they're in the Guaranteed QoS class
+ if isKubeInfra(pod) {
+ return 0
+ }
// Safe downcast to do for all systems with < 2.1 billion CPUs.
// Per the language spec, `int` is guaranteed to be at least 32 bits wide.
// https://golang.org/ref/spec#Numeric_types
@@ -595,3 +624,13 @@ func (p *staticPolicy) generateCPUTopologyHints(availableCPUs cpuset.CPUSet, reu
return hints
}
+
+// check if a given pod is in a platform infrastructure namespace
+func isKubeInfra(pod *v1.Pod) bool {
+ for _, namespace := range infraNamespaces {
+ if namespace == pod.Namespace {
+ return true
+ }
+ }
+ return false
+}
diff --git a/pkg/kubelet/cm/cpumanager/policy_static_test.go b/pkg/kubelet/cm/cpumanager/policy_static_test.go
index 81251e576fd..d4b4b790210 100644
--- a/pkg/kubelet/cm/cpumanager/policy_static_test.go
+++ b/pkg/kubelet/cm/cpumanager/policy_static_test.go
@@ -886,7 +886,8 @@ func TestStaticPolicyStartWithResvList(t *testing.T) {
}
func TestStaticPolicyAddWithResvList(t *testing.T) {
-
+ infraPod := makePod("fakePod", "fakeContainer2", "200m", "200m")
+ infraPod.Namespace = "kube-system"
testCases := []staticPolicyTestWithResvList{
{
description: "GuPodSingleCore, SingleSocketHT, ExpectError",
@@ -928,6 +929,22 @@ func TestStaticPolicyAddWithResvList(t *testing.T) {
expCPUAlloc: true,
expCSet: cpuset.NewCPUSet(4, 5),
},
+ {
+ description: "InfraPod, SingleSocketHT, ExpectAllocReserved",
+ topo: topoSingleSocketHT,
+ numReservedCPUs: 2,
+ reserved: cpuset.NewCPUSet(0, 1),
+ stAssignments: state.ContainerCPUAssignments{
+ "fakePod": map[string]cpuset.CPUSet{
+ "fakeContainer100": cpuset.NewCPUSet(2, 3, 6, 7),
+ },
+ },
+ stDefaultCPUSet: cpuset.NewCPUSet(4, 5),
+ pod: infraPod,
+ expErr: nil,
+ expCPUAlloc: true,
+ expCSet: cpuset.NewCPUSet(0, 1),
+ },
}
testExcl := true
--
2.25.1