integ/kubernetes/kubernetes-1.28.4/debian/deb_folder/patches/kubelet-cpumanager-infra-pods-use-system-reserved-CP.patch
Saba Touheed Mujawar bc28897dc5 Add kubernetes 1.28.4 patches
This change ports the following kubernetes 1.28.4 patches which were
refactored slightly to allow for upstream changes

The following patches were applied cleanly:
kubelet-sort-isolcpus-allocation-when-SMT-enabled.patch
kubelet-cpumanager-disable-CFS-quota-throttling.patch
kubelet-cpumanager-keep-normal-containers-off-reserv.patch
kubelet-cpumanager-infra-pods-use-system-reserved-CP.patch
Affinity-of-guaranteed-pod-to-non-isolated-CPUs.patch
kubelet-CFS-quota-throttling-for-non-integer-cpulimit.patch

The following patches were refactored:
kubeadm-create-platform-pods-with-zero-CPU-resources.patch
kubernetes-make-isolcpus-allocation-SMT-aware.patch
kubelet-cpumanager-introduce-concept-of-isolated-CPU.patch
enable-support-for-kubernetes-to-ignore-isolcpus.patch

Note: Revert-use-subpath-for-coredns-only-for-default-repo.patch
      is removed as this change that updates the dns
      imageRepository is taken care in ansible playbook
      https://review.opendev.org/c/starlingx/ansible-playbooks/+/903499/1/playbookconfig/src/playbooks/roles/common/files/kubeadm.yaml.j2

Test Plan:
PASS: Kubernetes package 1.28.4 builds properly.
PASS: Run all Kubelet, kubeadm, kubectl make tests for affected code.
PASS: build-iso successful with multiple kubernetes versions
PASS: Install iso with k8s 1.28 default and test all patches.

Story: 2010878
Task: 49209

Change-Id: I7693ad2fcc93d146eeae882d44f83b60589565db
Signed-off-by: Saba Touheed Mujawar <sabatouheed.mujawar@windriver.com>
2023-12-18 01:38:21 -05:00

168 lines
7.1 KiB
Diff

From 000e637a3298cf488f3d9dc144ab835ce7e93068 Mon Sep 17 00:00:00 2001
From: Boovan Rajendran <boovan.rajendran@windriver.com>
Date: Tue, 5 Sep 2023 06:27:39 -0400
Subject: [PATCH] kubelet cpumanager infra pods use system reserved CPUs
This assigns system infrastructure pods to the "reserved" cpuset
to isolate them from the shared pool of CPUs.
Infrastructure pods include any pods that belong to the kube-system,
armada, cert-manager, vault, platform-deployment-manager, portieris,
notification, flux-helm or metrics-server namespaces.
The implementation is a bit simplistic, it is assumed that the
"reserved" cpuset is large enough to handle all infrastructure pods
CPU allocations.
This also prevents infrastucture pods from using Guaranteed resources.
Co-authored-by: Jim Gauld <james.gauld@windriver.com>
Signed-off-by: Gleb Aronsky <gleb.aronsky@windriver.com>
Signed-off-by: Thiago Miranda <ThiagoOliveira.Miranda@windriver.com>
Signed-off-by: Kaustubh Dhokte <kaustubh.dhokte@windriver.com>
Signed-off-by: Ramesh Kumar Sivanandam <rameshkumar.sivanandam@windriver.com>
Signed-off-by: Sachin Gopala Krishna <saching.krishna@windriver.com>
Signed-off-by: Boovan Rajendran <boovan.rajendran@windriver.com>
---
pkg/kubelet/cm/cpumanager/policy_static.go | 50 ++++++++++++++++---
.../cm/cpumanager/policy_static_test.go | 19 ++++++-
2 files changed, 62 insertions(+), 7 deletions(-)
diff --git a/pkg/kubelet/cm/cpumanager/policy_static.go b/pkg/kubelet/cm/cpumanager/policy_static.go
index 895c707600d..9b7545c2207 100644
--- a/pkg/kubelet/cm/cpumanager/policy_static.go
+++ b/pkg/kubelet/cm/cpumanager/policy_static.go
@@ -62,6 +62,11 @@ func (e SMTAlignmentError) Type() string {
return ErrorSMTAlignment
}
+// Define namespaces used by platform infrastructure pods
+var infraNamespaces = [...]string{
+ "kube-system", "armada", "cert-manager", "platform-deployment-manager", "portieris", "vault", "notification", "flux-helm", "metrics-server",
+}
+
// staticPolicy is a CPU manager policy that does not change CPU
// assignments for exclusively pinned guaranteed containers after the main
// container process starts.
@@ -140,11 +145,11 @@ func NewStaticPolicy(topology *topology.CPUTopology, numReservedCPUs int, reserv
klog.InfoS("Static policy created with configuration", "options", opts)
policy := &staticPolicy{
- topology: topology,
- affinity: affinity,
+ topology: topology,
+ affinity: affinity,
excludeReserved: excludeReserved,
- cpusToReuse: make(map[string]cpuset.CPUSet),
- options: opts,
+ cpusToReuse: make(map[string]cpuset.CPUSet),
+ options: opts,
}
allCPUs := topology.CPUDetails.CPUs()
@@ -222,8 +227,8 @@ func (p *staticPolicy) validateState(s state.State) error {
// - user tampered with file
if !p.excludeReserved {
if !p.reservedCPUs.Intersection(tmpDefaultCPUset).Equals(p.reservedCPUs) {
- return fmt.Errorf("not all reserved cpus: \"%s\" are present in defaultCpuSet: \"%s\"",
- p.reservedCPUs.String(), tmpDefaultCPUset.String())
+ return fmt.Errorf("not all reserved cpus: \"%s\" are present in defaultCpuSet: \"%s\"",
+ p.reservedCPUs.String(), tmpDefaultCPUset.String())
}
}
// 2. Check if state for static policy is consistent
@@ -302,6 +307,25 @@ func (p *staticPolicy) updateCPUsToReuse(pod *v1.Pod, container *v1.Container, c
}
func (p *staticPolicy) Allocate(s state.State, pod *v1.Pod, container *v1.Container) (rerr error) {
+ // Process infra pods before guaranteed pods
+ if isKubeInfra(pod) {
+ // Container belongs in reserved pool.
+ // We don't want to fall through to the p.guaranteedCPUs() clause below so return either nil or error.
+ if _, ok := s.GetCPUSet(string(pod.UID), container.Name); ok {
+ klog.Infof("[cpumanager] static policy: reserved container already present in state, skipping (namespace: %s, pod UID: %s, pod: %s, container: %s)", pod.Namespace, string(pod.UID), pod.Name, container.Name)
+ return nil
+ }
+
+ cpuset := p.reservedCPUs
+ if cpuset.IsEmpty() {
+ // If this happens then someone messed up.
+ return fmt.Errorf("[cpumanager] static policy: reserved container unable to allocate cpus (namespace: %s, pod UID: %s, pod: %s, container: %s); cpuset=%v, reserved:%v", pod.Namespace, string(pod.UID), pod.Name, container.Name, cpuset, p.reservedCPUs)
+ }
+ s.SetCPUSet(string(pod.UID), container.Name, cpuset)
+ klog.Infof("[cpumanager] static policy: reserved: AddContainer (namespace: %s, pod UID: %s, pod: %s, container: %s); cpuset=%v", pod.Namespace, string(pod.UID), pod.Name, container.Name, cpuset)
+ return nil
+ }
+
numCPUs := p.guaranteedCPUs(pod, container)
if numCPUs == 0 {
// container belongs in the shared pool (nothing to do; use default cpuset)
@@ -453,6 +477,10 @@ func (p *staticPolicy) guaranteedCPUs(pod *v1.Pod, container *v1.Container) int
if cpuQuantity.Value()*1000 != cpuQuantity.MilliValue() {
return 0
}
+ // Infrastructure pods use reserved CPUs even if they're in the Guaranteed QoS class
+ if isKubeInfra(pod) {
+ return 0
+ }
// Safe downcast to do for all systems with < 2.1 billion CPUs.
// Per the language spec, `int` is guaranteed to be at least 32 bits wide.
// https://golang.org/ref/spec#Numeric_types
@@ -671,6 +699,16 @@ func (p *staticPolicy) generateCPUTopologyHints(availableCPUs cpuset.CPUSet, reu
return hints
}
+// check if a given pod is in a platform infrastructure namespace
+func isKubeInfra(pod *v1.Pod) bool {
+ for _, namespace := range infraNamespaces {
+ if namespace == pod.Namespace {
+ return true
+ }
+ }
+ return false
+}
+
// isHintSocketAligned function return true if numa nodes in hint are socket aligned.
func (p *staticPolicy) isHintSocketAligned(hint topologymanager.TopologyHint, minAffinitySize int) bool {
numaNodesBitMask := hint.NUMANodeAffinity.GetBits()
diff --git a/pkg/kubelet/cm/cpumanager/policy_static_test.go b/pkg/kubelet/cm/cpumanager/policy_static_test.go
index deb2f4c7982..b864c6c57c6 100644
--- a/pkg/kubelet/cm/cpumanager/policy_static_test.go
+++ b/pkg/kubelet/cm/cpumanager/policy_static_test.go
@@ -988,7 +988,8 @@ func TestStaticPolicyStartWithResvList(t *testing.T) {
}
func TestStaticPolicyAddWithResvList(t *testing.T) {
-
+ infraPod := makePod("fakePod", "fakeContainer2", "200m", "200m")
+ infraPod.Namespace = "kube-system"
testCases := []staticPolicyTestWithResvList{
{
description: "GuPodSingleCore, SingleSocketHT, ExpectError",
@@ -1030,6 +1031,22 @@ func TestStaticPolicyAddWithResvList(t *testing.T) {
expCPUAlloc: true,
expCSet: cpuset.New(4, 5),
},
+ {
+ description: "InfraPod, SingleSocketHT, ExpectAllocReserved",
+ topo: topoSingleSocketHT,
+ numReservedCPUs: 2,
+ reserved: cpuset.New(0, 1),
+ stAssignments: state.ContainerCPUAssignments{
+ "fakePod": map[string]cpuset.CPUSet{
+ "fakeContainer100": cpuset.New(2, 3, 6, 7),
+ },
+ },
+ stDefaultCPUSet: cpuset.New(4, 5),
+ pod: infraPod,
+ expErr: nil,
+ expCPUAlloc: true,
+ expCSet: cpuset.New(0, 1),
+ },
}
testExcl := true
--
2.25.1