integ/kubernetes/kubernetes-1.29.2/debian/deb_folder/patches/Identify-platform-pods-based-on-pod-or-namespace-labels.patch
Boovan Rajendran 9ec39a1da0 Identify platform pods based on pod/namespace labels
Pods with namespace 'kube-system', or labeled with
'app.starlingx.io/component=platform' are identified as
'platform'. These have isolated cpu affinity cpuset when
kubelet cpu-manager 'static' policy is configured. This
decouples performance of application pods from the platform.

The new patch Identify-platform-pods-based-on-pod-or-namespace-labels
patch is used to identify platform pods using above criteria for
Kubernetes 1.25 and later. The hard-coded list of platform namespaces
is no longer required. The original hard-coded list is still kept
for Kubernetes 1.24 so we can phase in this change.

This will also fix the Kubernetes make test fail issue after merging
the review https://review.opendev.org/c/starlingx/integ/+/907637

This change also include the missed out patch
kubeadm-reduce-UpgradeManifestTimeout.patch for k8s 1.29.

Test Plan:
PASS: Run all Kubelet, kubeadm, kubectl make tests for affected code.
PASS: All affected versions of kubernetes package build successfully.
PASS: Create a pod with the platform label. Pod is classified as
      a platform pod.
PASS: Create a pod without the platform label but in a namespace with
      the platform label. Pod is classified as a platform pod.
PASS: Create a pod without the platform label and in a namespace
      without the platform label. Pod is not classified as a platform
      pod.
PASS: Verify correct cpuset affinity of platform vs application pods
      after host lock/unlock
PASS: Verify kube-system namespace pods are correctly identified
      as Platform

Closes-Bug: 2058042

Story: 2010612
Task: 49910

Change-Id: I0cc05c2e9057b64f9191317e72a1bf4d7b8d0771
Signed-off-by: Boovan Rajendran <boovan.rajendran@windriver.com>
2024-04-26 02:13:49 -04:00

436 lines
16 KiB
Diff

From df2fa4aa3893cbd78b81ee8ccb07456aaae10d82 Mon Sep 17 00:00:00 2001
From: Boovan Rajendran <boovan.rajendran@windriver.com>
Date: Mon, 1 Apr 2024 06:44:45 -0400
Subject: [PATCH] Identify platform pods based on pod or namespace labels
Pods with namespace 'kube-system', or labeled with
'app.starlingx.io/component=platform' are identified as 'platform'.
These have isolated cpu affinity cpuset when cpu-manager 'static'
policy is configured.
Signed-off-by: Boovan Rajendran <boovan.rajendran@windriver.com>
---
pkg/kubelet/cm/cpumanager/policy_static.go | 96 ++++++++++--
.../cm/cpumanager/policy_static_test.go | 148 +++++++++++++++++-
.../cm/cpumanager/topology_hints_test.go | 4 +
3 files changed, 235 insertions(+), 13 deletions(-)
diff --git a/pkg/kubelet/cm/cpumanager/policy_static.go b/pkg/kubelet/cm/cpumanager/policy_static.go
index ee0afd378cf..639fc8bd5fb 100644
--- a/pkg/kubelet/cm/cpumanager/policy_static.go
+++ b/pkg/kubelet/cm/cpumanager/policy_static.go
@@ -17,14 +17,20 @@ limitations under the License.
package cpumanager
import (
+ "context"
"fmt"
"strconv"
+ k8sclient "k8s.io/client-go/kubernetes"
+ metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+ restclient "k8s.io/client-go/rest"
v1 "k8s.io/api/core/v1"
+ "k8s.io/client-go/tools/clientcmd"
utilfeature "k8s.io/apiserver/pkg/util/feature"
"k8s.io/klog/v2"
podutil "k8s.io/kubernetes/pkg/api/v1/pod"
v1qos "k8s.io/kubernetes/pkg/apis/core/v1/helper/qos"
+ "k8s.io/kubernetes/cmd/kubeadm/app/constants"
"k8s.io/kubernetes/pkg/features"
"k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/state"
"k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/topology"
@@ -46,6 +52,23 @@ const (
ErrorSMTAlignment = "SMTAlignmentError"
)
+// Declared as variables so that they can easily more
+// overridden during testing
+type getPodNamespace func(string) (*v1.Namespace, error)
+type buildFromConfigFlag func(masterUrl string, kubeconfigPath string) (*restclient.Config, error)
+type isKubeInfraFunc func(pod *v1.Pod) bool
+
+var varGetNamespaceObject getPodNamespace
+var varBuildConfigFromFlags buildFromConfigFlag
+var varIsKubeInfra isKubeInfraFunc
+
+func init() {
+ varIsKubeInfra = isKubeInfra
+ varGetNamespaceObject = getPodNamespaceObject
+ varBuildConfigFromFlags = clientcmd.BuildConfigFromFlags
+}
+
+
// SMTAlignmentError represents an error due to SMT alignment
type SMTAlignmentError struct {
RequestedCPUs int
@@ -65,11 +88,6 @@ func (e SMTAlignmentError) Type() string {
return ErrorSMTAlignment
}
-// Define namespaces used by platform infrastructure pods
-var infraNamespaces = [...]string{
- "kube-system", "armada", "cert-manager", "platform-deployment-manager", "portieris", "vault", "notification", "flux-helm", "metrics-server",
-}
-
// staticPolicy is a CPU manager policy that does not change CPU
// assignments for exclusively pinned guaranteed containers after the main
// container process starts.
@@ -331,7 +349,7 @@ func (p *staticPolicy) updateCPUsToReuse(pod *v1.Pod, container *v1.Container, c
func (p *staticPolicy) Allocate(s state.State, pod *v1.Pod, container *v1.Container) (rerr error) {
// Process infra pods before guaranteed pods
- if isKubeInfra(pod) {
+ if varIsKubeInfra(pod) {
// Container belongs in reserved pool.
// We don't want to fall through to the p.guaranteedCPUs() clause below so return either nil or error.
if _, ok := s.GetCPUSet(string(pod.UID), container.Name); ok {
@@ -529,7 +547,7 @@ func (p *staticPolicy) guaranteedCPUs(pod *v1.Pod, container *v1.Container) int
return 0
}
// Infrastructure pods use reserved CPUs even if they're in the Guaranteed QoS class
- if isKubeInfra(pod) {
+ if varIsKubeInfra(pod) {
return 0
}
// Safe downcast to do for all systems with < 2.1 billion CPUs.
@@ -757,14 +775,68 @@ func (p *staticPolicy) generateCPUTopologyHints(availableCPUs cpuset.CPUSet, reu
return hints
}
-// check if a given pod is in a platform infrastructure namespace
+func getPodNamespaceObject(podNamespaceName string) (*v1.Namespace, error) {
+
+ kubeConfigPath := constants.GetKubeletKubeConfigPath()
+ cfg, err := varBuildConfigFromFlags("", kubeConfigPath)
+ if err != nil {
+ klog.Error("Failed to build client config from ", kubeConfigPath, err.Error())
+ return nil, err
+ }
+
+ clientset, err := k8sclient.NewForConfig(cfg)
+ if err != nil {
+ klog.Error("Failed to get clientset for KUBECONFIG ", kubeConfigPath, err.Error())
+ return nil, err
+ }
+
+ namespaceObj, err := clientset.CoreV1().Namespaces().Get(context.TODO(), podNamespaceName, metav1.GetOptions{})
+ if err != nil {
+ klog.Error("Error getting namespace object:", err.Error())
+ return nil, err
+ }
+
+ return namespaceObj, nil
+
+}
+
+// check if a given pod is labelled as platform pod or
+// is in a namespace labelled as a platform namespace
func isKubeInfra(pod *v1.Pod) bool {
- for _, namespace := range infraNamespaces {
- if namespace == pod.Namespace {
- return true
- }
+
+ podName := pod.GetName()
+ podNamespaceName := pod.GetNamespace()
+
+ if podNamespaceName == "kube-system" {
+ klog.Infof("Pod %s has %s namespace. Treating as platform pod.", podName , podNamespaceName)
+ return true
+ }
+
+ klog.InfoS("Checking pod ", podName , " for label 'app.starlingx.io/component=platform'.")
+ podLabels := pod.GetLabels()
+ val, ok := podLabels["app.starlingx.io/component"]
+ if (ok && val == "platform") {
+ klog.InfoS("Pod ", podName, " has 'app.starlingx.io/component=platform' label. Treating as platform pod.")
+ return true
}
+
+ klog.V(4).InfoS("Pod ", pod.GetName(), " does not have 'app.starlingx.io/component=platform' label. Checking its namespace information...")
+
+ namespaceObj, err := varGetNamespaceObject(podNamespaceName)
+ if err != nil {
+ return false
+ }
+
+ namespaceLabels := namespaceObj.GetLabels()
+ val, ok = namespaceLabels["app.starlingx.io/component"]
+ if ok && val == "platform" {
+ klog.InfoS("For pod: ", podName, ", its Namespace ", podNamespaceName, " has 'app.starlingx.io/component=platform' label. Treating as platform pod.")
+ return true
+ }
+
+ klog.InfoS("Neither pod ", podName, " nor its namespace ", podNamespaceName, " has 'app.starlingx.io/component=platform' label. Not treating as platform pod.")
return false
+
}
// get the isolated CPUs (if any) from the devices associated with a specific container
diff --git a/pkg/kubelet/cm/cpumanager/policy_static_test.go b/pkg/kubelet/cm/cpumanager/policy_static_test.go
index 0e926c1b1e6..05c90376bfc 100644
--- a/pkg/kubelet/cm/cpumanager/policy_static_test.go
+++ b/pkg/kubelet/cm/cpumanager/policy_static_test.go
@@ -17,10 +17,13 @@ limitations under the License.
package cpumanager
import (
+ "errors"
"fmt"
"reflect"
"testing"
+ metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+ restclient "k8s.io/client-go/rest"
v1 "k8s.io/api/core/v1"
utilfeature "k8s.io/apiserver/pkg/util/feature"
featuregatetesting "k8s.io/component-base/featuregate/testing"
@@ -973,6 +976,7 @@ type staticPolicyTestWithResvList struct {
stAssignments state.ContainerCPUAssignments
stDefaultCPUSet cpuset.CPUSet
pod *v1.Pod
+ isKubeInfraPodfunc isKubeInfraFunc
expErr error
expNewErr error
expCPUAlloc bool
@@ -1045,6 +1049,14 @@ func TestStaticPolicyStartWithResvList(t *testing.T) {
}
}
+func fakeIsKubeInfraTrue(pod *v1.Pod) bool {
+ return true
+}
+
+func fakeIsKubeInfraFalse(pod *v1.Pod) bool {
+ return false
+}
+
func TestStaticPolicyAddWithResvList(t *testing.T) {
infraPod := makePod("fakePod", "fakeContainer2", "200m", "200m")
infraPod.Namespace = "kube-system"
@@ -1058,6 +1070,7 @@ func TestStaticPolicyAddWithResvList(t *testing.T) {
stAssignments: state.ContainerCPUAssignments{},
stDefaultCPUSet: cpuset.New(1, 2, 3, 4, 5, 6, 7),
pod: makePod("fakePod", "fakeContainer2", "8000m", "8000m"),
+ isKubeInfraPodfunc: fakeIsKubeInfraFalse,
expErr: fmt.Errorf("not enough cpus available to satisfy request: requested=8, available=7"),
expCPUAlloc: false,
expCSet: cpuset.New(),
@@ -1071,6 +1084,7 @@ func TestStaticPolicyAddWithResvList(t *testing.T) {
stAssignments: state.ContainerCPUAssignments{},
stDefaultCPUSet: cpuset.New(2, 3, 4, 5, 6, 7),
pod: makePod("fakePod", "fakeContainer2", "1000m", "1000m"),
+ isKubeInfraPodfunc: fakeIsKubeInfraFalse,
expErr: nil,
expCPUAlloc: true,
expCSet: cpuset.New(4), // expect sibling of partial core
@@ -1088,6 +1102,7 @@ func TestStaticPolicyAddWithResvList(t *testing.T) {
},
stDefaultCPUSet: cpuset.New(0, 1, 4, 5),
pod: makePod("fakePod", "fakeContainer3", "2000m", "2000m"),
+ isKubeInfraPodfunc: fakeIsKubeInfraFalse,
expErr: nil,
expCPUAlloc: true,
expCSet: cpuset.New(4, 5),
@@ -1105,6 +1120,7 @@ func TestStaticPolicyAddWithResvList(t *testing.T) {
},
stDefaultCPUSet: cpuset.New(4, 5),
pod: infraPod,
+ isKubeInfraPodfunc: fakeIsKubeInfraTrue,
expErr: nil,
expCPUAlloc: true,
expCSet: cpuset.New(0, 1),
@@ -1122,6 +1138,7 @@ func TestStaticPolicyAddWithResvList(t *testing.T) {
},
stDefaultCPUSet: cpuset.New(4, 5),
pod: infraPod,
+ isKubeInfraPodfunc: fakeIsKubeInfraTrue,
expErr: nil,
expCPUAlloc: true,
expCSet: cpuset.New(0),
@@ -1137,7 +1154,7 @@ func TestStaticPolicyAddWithResvList(t *testing.T) {
assignments: testCase.stAssignments,
defaultCPUSet: testCase.stDefaultCPUSet,
}
-
+ varIsKubeInfra = testCase.isKubeInfraPodfunc
container := &testCase.pod.Spec.Containers[0]
err := policy.Allocate(st, testCase.pod, container)
if !reflect.DeepEqual(err, testCase.expErr) {
@@ -1258,6 +1275,135 @@ func TestStaticPolicyOptions(t *testing.T) {
}
}
+func makePodWithLabels(podLabels map[string]string) *v1.Pod {
+ return &v1.Pod{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: "test-pod",
+ Namespace: "test-namespace",
+ Labels: podLabels,
+ },
+ }
+}
+
+func fakeBuildConfigFromFlags(masterUrl string, kubeconfigPath string) (*restclient.Config, error) {
+
+ return &restclient.Config{}, nil
+}
+
+func fakeBuildConfigFromFlagsError(masterUrl string, kubeconfigPath string) (*restclient.Config, error) {
+
+ errString := fmt.Sprintf("%s file not found", kubeconfigPath)
+ return nil, errors.New(errString)
+
+}
+
+func getFakeInfraPodNamespace(_ string) (*v1.Namespace, error) {
+
+ return &v1.Namespace{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: "test-namespace",
+ Labels: map[string]string{
+ "app.starlingx.io/component": "platform",
+ },
+ }}, nil
+}
+
+func getFakeNonInfraPodNamespace(_ string) (*v1.Namespace, error) {
+
+ return &v1.Namespace{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: "test-namespace",
+ Labels: map[string]string{
+ "fake": "label",
+ }}}, nil
+
+}
+
+type kubeInfraPodTestCase struct {
+ description string
+ pod *v1.Pod
+ namespaceFunc getPodNamespace
+ expectedValue bool
+}
+
+func TestKubeInfraPod(t *testing.T) {
+ testCases := []kubeInfraPodTestCase{
+ {
+ description: "Pod with platform label and namespace with platform label",
+ pod: makePodWithLabels(map[string]string{
+ "app.starlingx.io/component": "platform",
+ }),
+ namespaceFunc: getFakeInfraPodNamespace,
+ expectedValue: true,
+ },
+ {
+ description: "Pod with platform label and namespace without platform label",
+ pod: makePodWithLabels(map[string]string{
+ "app.starlingx.io/component": "platform",
+ }),
+ namespaceFunc: getFakeNonInfraPodNamespace,
+ expectedValue: true,
+
+ },
+ {
+ description: "Pod without platform label and namespace with platform label",
+ pod: makePodWithLabels(map[string]string{
+ "test": "label",
+ }),
+ namespaceFunc: getFakeInfraPodNamespace,
+ expectedValue: true,
+ },
+ {
+ description: "Pod without platform label and namespace without platform label",
+ pod: makePodWithLabels(map[string]string{
+ "test": "namespace",
+ }),
+ namespaceFunc: getFakeNonInfraPodNamespace,
+ expectedValue: false,
+ },
+
+ }
+
+ for _, testCase := range testCases {
+ t.Run(testCase.description, func(t *testing.T) {
+
+ varGetNamespaceObject = testCase.namespaceFunc
+ varBuildConfigFromFlags = fakeBuildConfigFromFlags
+ gotValue := isKubeInfra(testCase.pod)
+
+ if gotValue != testCase.expectedValue {
+ t.Errorf("StaticPolicy isKubeInfraPod() error %v. expected value %v actual value %v",
+ testCase.description, testCase.expectedValue, gotValue)
+ } else {
+ fmt.Printf("StaticPolicy isKubeInfraPod() test successful. : %v ", testCase.description)
+ }
+
+ })
+ }
+
+ test := kubeInfraPodTestCase{
+ description: "Failure reading kubeconfig file",
+ pod: makePodWithLabels(map[string]string{
+ "test": "namespace",
+ }),
+ namespaceFunc: getFakeNonInfraPodNamespace,
+ expectedValue: false,
+ }
+
+ varGetNamespaceObject = getPodNamespaceObject
+ varBuildConfigFromFlags = fakeBuildConfigFromFlagsError
+
+ gotValue := isKubeInfra(test.pod)
+
+ if gotValue != test.expectedValue {
+ t.Errorf("StaticPolicy isKubeInfraPod() error %v. expected value %v actual value %v",
+ test.description, test.expectedValue, gotValue)
+ } else {
+ fmt.Printf("StaticPolicy isKubeInfraPod() test successful. : %v ", test.description)
+ }
+
+}
+
func newCPUSetPtr(cpus ...int) *cpuset.CPUSet {
ret := cpuset.New(cpus...)
return &ret
diff --git a/pkg/kubelet/cm/cpumanager/topology_hints_test.go b/pkg/kubelet/cm/cpumanager/topology_hints_test.go
index 53738b613c2..ad9c0f17602 100644
--- a/pkg/kubelet/cm/cpumanager/topology_hints_test.go
+++ b/pkg/kubelet/cm/cpumanager/topology_hints_test.go
@@ -197,6 +197,7 @@ func TestPodGuaranteedCPUs(t *testing.T) {
expectedCPU: 210,
},
}
+ varIsKubeInfra = fakeIsKubeInfraFalse
for _, tc := range tcases {
t.Run(tc.name, func(t *testing.T) {
requestedCPU := p.podGuaranteedCPUs(tc.pod)
@@ -241,6 +242,7 @@ func TestGetTopologyHints(t *testing.T) {
sourcesReady: &sourcesReadyStub{},
}
+ varIsKubeInfra = fakeIsKubeInfraFalse
hints := m.GetTopologyHints(&tc.pod, &tc.container)[string(v1.ResourceCPU)]
if len(tc.expectedHints) == 0 && len(hints) == 0 {
continue
@@ -294,6 +296,7 @@ func TestGetPodTopologyHints(t *testing.T) {
sourcesReady: &sourcesReadyStub{},
}
+ varIsKubeInfra = fakeIsKubeInfraFalse
podHints := m.GetPodTopologyHints(&tc.pod)[string(v1.ResourceCPU)]
if len(tc.expectedHints) == 0 && len(podHints) == 0 {
continue
@@ -477,6 +480,7 @@ func TestGetPodTopologyHintsWithPolicyOptions(t *testing.T) {
sourcesReady: &sourcesReadyStub{},
}
+ varIsKubeInfra = fakeIsKubeInfraFalse
podHints := m.GetPodTopologyHints(&testCase.pod)[string(v1.ResourceCPU)]
sort.SliceStable(podHints, func(i, j int) bool {
return podHints[i].LessThan(podHints[j])
--
2.25.1