bc28897dc5
This change ports the following kubernetes 1.28.4 patches which were refactored slightly to allow for upstream changes The following patches were applied cleanly: kubelet-sort-isolcpus-allocation-when-SMT-enabled.patch kubelet-cpumanager-disable-CFS-quota-throttling.patch kubelet-cpumanager-keep-normal-containers-off-reserv.patch kubelet-cpumanager-infra-pods-use-system-reserved-CP.patch Affinity-of-guaranteed-pod-to-non-isolated-CPUs.patch kubelet-CFS-quota-throttling-for-non-integer-cpulimit.patch The following patches were refactored: kubeadm-create-platform-pods-with-zero-CPU-resources.patch kubernetes-make-isolcpus-allocation-SMT-aware.patch kubelet-cpumanager-introduce-concept-of-isolated-CPU.patch enable-support-for-kubernetes-to-ignore-isolcpus.patch Note: Revert-use-subpath-for-coredns-only-for-default-repo.patch is removed as this change that updates the dns imageRepository is taken care in ansible playbook https://review.opendev.org/c/starlingx/ansible-playbooks/+/903499/1/playbookconfig/src/playbooks/roles/common/files/kubeadm.yaml.j2 Test Plan: PASS: Kubernetes package 1.28.4 builds properly. PASS: Run all Kubelet, kubeadm, kubectl make tests for affected code. PASS: build-iso successful with multiple kubernetes versions PASS: Install iso with k8s 1.28 default and test all patches. Story: 2010878 Task: 49209 Change-Id: I7693ad2fcc93d146eeae882d44f83b60589565db Signed-off-by: Saba Touheed Mujawar <sabatouheed.mujawar@windriver.com>
168 lines
7.1 KiB
Diff
168 lines
7.1 KiB
Diff
From 000e637a3298cf488f3d9dc144ab835ce7e93068 Mon Sep 17 00:00:00 2001
|
|
From: Boovan Rajendran <boovan.rajendran@windriver.com>
|
|
Date: Tue, 5 Sep 2023 06:27:39 -0400
|
|
Subject: [PATCH] kubelet cpumanager infra pods use system reserved CPUs
|
|
|
|
This assigns system infrastructure pods to the "reserved" cpuset
|
|
to isolate them from the shared pool of CPUs.
|
|
|
|
Infrastructure pods include any pods that belong to the kube-system,
|
|
armada, cert-manager, vault, platform-deployment-manager, portieris,
|
|
notification, flux-helm or metrics-server namespaces.
|
|
|
|
The implementation is a bit simplistic, it is assumed that the
|
|
"reserved" cpuset is large enough to handle all infrastructure pods
|
|
CPU allocations.
|
|
|
|
This also prevents infrastucture pods from using Guaranteed resources.
|
|
|
|
Co-authored-by: Jim Gauld <james.gauld@windriver.com>
|
|
Signed-off-by: Gleb Aronsky <gleb.aronsky@windriver.com>
|
|
Signed-off-by: Thiago Miranda <ThiagoOliveira.Miranda@windriver.com>
|
|
Signed-off-by: Kaustubh Dhokte <kaustubh.dhokte@windriver.com>
|
|
Signed-off-by: Ramesh Kumar Sivanandam <rameshkumar.sivanandam@windriver.com>
|
|
Signed-off-by: Sachin Gopala Krishna <saching.krishna@windriver.com>
|
|
Signed-off-by: Boovan Rajendran <boovan.rajendran@windriver.com>
|
|
---
|
|
pkg/kubelet/cm/cpumanager/policy_static.go | 50 ++++++++++++++++---
|
|
.../cm/cpumanager/policy_static_test.go | 19 ++++++-
|
|
2 files changed, 62 insertions(+), 7 deletions(-)
|
|
|
|
diff --git a/pkg/kubelet/cm/cpumanager/policy_static.go b/pkg/kubelet/cm/cpumanager/policy_static.go
|
|
index 895c707600d..9b7545c2207 100644
|
|
--- a/pkg/kubelet/cm/cpumanager/policy_static.go
|
|
+++ b/pkg/kubelet/cm/cpumanager/policy_static.go
|
|
@@ -62,6 +62,11 @@ func (e SMTAlignmentError) Type() string {
|
|
return ErrorSMTAlignment
|
|
}
|
|
|
|
+// Define namespaces used by platform infrastructure pods
|
|
+var infraNamespaces = [...]string{
|
|
+ "kube-system", "armada", "cert-manager", "platform-deployment-manager", "portieris", "vault", "notification", "flux-helm", "metrics-server",
|
|
+}
|
|
+
|
|
// staticPolicy is a CPU manager policy that does not change CPU
|
|
// assignments for exclusively pinned guaranteed containers after the main
|
|
// container process starts.
|
|
@@ -140,11 +145,11 @@ func NewStaticPolicy(topology *topology.CPUTopology, numReservedCPUs int, reserv
|
|
klog.InfoS("Static policy created with configuration", "options", opts)
|
|
|
|
policy := &staticPolicy{
|
|
- topology: topology,
|
|
- affinity: affinity,
|
|
+ topology: topology,
|
|
+ affinity: affinity,
|
|
excludeReserved: excludeReserved,
|
|
- cpusToReuse: make(map[string]cpuset.CPUSet),
|
|
- options: opts,
|
|
+ cpusToReuse: make(map[string]cpuset.CPUSet),
|
|
+ options: opts,
|
|
}
|
|
|
|
allCPUs := topology.CPUDetails.CPUs()
|
|
@@ -222,8 +227,8 @@ func (p *staticPolicy) validateState(s state.State) error {
|
|
// - user tampered with file
|
|
if !p.excludeReserved {
|
|
if !p.reservedCPUs.Intersection(tmpDefaultCPUset).Equals(p.reservedCPUs) {
|
|
- return fmt.Errorf("not all reserved cpus: \"%s\" are present in defaultCpuSet: \"%s\"",
|
|
- p.reservedCPUs.String(), tmpDefaultCPUset.String())
|
|
+ return fmt.Errorf("not all reserved cpus: \"%s\" are present in defaultCpuSet: \"%s\"",
|
|
+ p.reservedCPUs.String(), tmpDefaultCPUset.String())
|
|
}
|
|
}
|
|
// 2. Check if state for static policy is consistent
|
|
@@ -302,6 +307,25 @@ func (p *staticPolicy) updateCPUsToReuse(pod *v1.Pod, container *v1.Container, c
|
|
}
|
|
|
|
func (p *staticPolicy) Allocate(s state.State, pod *v1.Pod, container *v1.Container) (rerr error) {
|
|
+ // Process infra pods before guaranteed pods
|
|
+ if isKubeInfra(pod) {
|
|
+ // Container belongs in reserved pool.
|
|
+ // We don't want to fall through to the p.guaranteedCPUs() clause below so return either nil or error.
|
|
+ if _, ok := s.GetCPUSet(string(pod.UID), container.Name); ok {
|
|
+ klog.Infof("[cpumanager] static policy: reserved container already present in state, skipping (namespace: %s, pod UID: %s, pod: %s, container: %s)", pod.Namespace, string(pod.UID), pod.Name, container.Name)
|
|
+ return nil
|
|
+ }
|
|
+
|
|
+ cpuset := p.reservedCPUs
|
|
+ if cpuset.IsEmpty() {
|
|
+ // If this happens then someone messed up.
|
|
+ return fmt.Errorf("[cpumanager] static policy: reserved container unable to allocate cpus (namespace: %s, pod UID: %s, pod: %s, container: %s); cpuset=%v, reserved:%v", pod.Namespace, string(pod.UID), pod.Name, container.Name, cpuset, p.reservedCPUs)
|
|
+ }
|
|
+ s.SetCPUSet(string(pod.UID), container.Name, cpuset)
|
|
+ klog.Infof("[cpumanager] static policy: reserved: AddContainer (namespace: %s, pod UID: %s, pod: %s, container: %s); cpuset=%v", pod.Namespace, string(pod.UID), pod.Name, container.Name, cpuset)
|
|
+ return nil
|
|
+ }
|
|
+
|
|
numCPUs := p.guaranteedCPUs(pod, container)
|
|
if numCPUs == 0 {
|
|
// container belongs in the shared pool (nothing to do; use default cpuset)
|
|
@@ -453,6 +477,10 @@ func (p *staticPolicy) guaranteedCPUs(pod *v1.Pod, container *v1.Container) int
|
|
if cpuQuantity.Value()*1000 != cpuQuantity.MilliValue() {
|
|
return 0
|
|
}
|
|
+ // Infrastructure pods use reserved CPUs even if they're in the Guaranteed QoS class
|
|
+ if isKubeInfra(pod) {
|
|
+ return 0
|
|
+ }
|
|
// Safe downcast to do for all systems with < 2.1 billion CPUs.
|
|
// Per the language spec, `int` is guaranteed to be at least 32 bits wide.
|
|
// https://golang.org/ref/spec#Numeric_types
|
|
@@ -671,6 +699,16 @@ func (p *staticPolicy) generateCPUTopologyHints(availableCPUs cpuset.CPUSet, reu
|
|
return hints
|
|
}
|
|
|
|
+// check if a given pod is in a platform infrastructure namespace
|
|
+func isKubeInfra(pod *v1.Pod) bool {
|
|
+ for _, namespace := range infraNamespaces {
|
|
+ if namespace == pod.Namespace {
|
|
+ return true
|
|
+ }
|
|
+ }
|
|
+ return false
|
|
+}
|
|
+
|
|
// isHintSocketAligned function return true if numa nodes in hint are socket aligned.
|
|
func (p *staticPolicy) isHintSocketAligned(hint topologymanager.TopologyHint, minAffinitySize int) bool {
|
|
numaNodesBitMask := hint.NUMANodeAffinity.GetBits()
|
|
diff --git a/pkg/kubelet/cm/cpumanager/policy_static_test.go b/pkg/kubelet/cm/cpumanager/policy_static_test.go
|
|
index deb2f4c7982..b864c6c57c6 100644
|
|
--- a/pkg/kubelet/cm/cpumanager/policy_static_test.go
|
|
+++ b/pkg/kubelet/cm/cpumanager/policy_static_test.go
|
|
@@ -988,7 +988,8 @@ func TestStaticPolicyStartWithResvList(t *testing.T) {
|
|
}
|
|
|
|
func TestStaticPolicyAddWithResvList(t *testing.T) {
|
|
-
|
|
+ infraPod := makePod("fakePod", "fakeContainer2", "200m", "200m")
|
|
+ infraPod.Namespace = "kube-system"
|
|
testCases := []staticPolicyTestWithResvList{
|
|
{
|
|
description: "GuPodSingleCore, SingleSocketHT, ExpectError",
|
|
@@ -1030,6 +1031,22 @@ func TestStaticPolicyAddWithResvList(t *testing.T) {
|
|
expCPUAlloc: true,
|
|
expCSet: cpuset.New(4, 5),
|
|
},
|
|
+ {
|
|
+ description: "InfraPod, SingleSocketHT, ExpectAllocReserved",
|
|
+ topo: topoSingleSocketHT,
|
|
+ numReservedCPUs: 2,
|
|
+ reserved: cpuset.New(0, 1),
|
|
+ stAssignments: state.ContainerCPUAssignments{
|
|
+ "fakePod": map[string]cpuset.CPUSet{
|
|
+ "fakeContainer100": cpuset.New(2, 3, 6, 7),
|
|
+ },
|
|
+ },
|
|
+ stDefaultCPUSet: cpuset.New(4, 5),
|
|
+ pod: infraPod,
|
|
+ expErr: nil,
|
|
+ expCPUAlloc: true,
|
|
+ expCSet: cpuset.New(0, 1),
|
|
+ },
|
|
}
|
|
|
|
testExcl := true
|
|
--
|
|
2.25.1
|
|
|