bc28897dc5
This change ports the following kubernetes 1.28.4 patches which were refactored slightly to allow for upstream changes The following patches were applied cleanly: kubelet-sort-isolcpus-allocation-when-SMT-enabled.patch kubelet-cpumanager-disable-CFS-quota-throttling.patch kubelet-cpumanager-keep-normal-containers-off-reserv.patch kubelet-cpumanager-infra-pods-use-system-reserved-CP.patch Affinity-of-guaranteed-pod-to-non-isolated-CPUs.patch kubelet-CFS-quota-throttling-for-non-integer-cpulimit.patch The following patches were refactored: kubeadm-create-platform-pods-with-zero-CPU-resources.patch kubernetes-make-isolcpus-allocation-SMT-aware.patch kubelet-cpumanager-introduce-concept-of-isolated-CPU.patch enable-support-for-kubernetes-to-ignore-isolcpus.patch Note: Revert-use-subpath-for-coredns-only-for-default-repo.patch is removed as this change that updates the dns imageRepository is taken care in ansible playbook https://review.opendev.org/c/starlingx/ansible-playbooks/+/903499/1/playbookconfig/src/playbooks/roles/common/files/kubeadm.yaml.j2 Test Plan: PASS: Kubernetes package 1.28.4 builds properly. PASS: Run all Kubelet, kubeadm, kubectl make tests for affected code. PASS: build-iso successful with multiple kubernetes versions PASS: Install iso with k8s 1.28 default and test all patches. Story: 2010878 Task: 49209 Change-Id: I7693ad2fcc93d146eeae882d44f83b60589565db Signed-off-by: Saba Touheed Mujawar <sabatouheed.mujawar@windriver.com>
257 lines
12 KiB
Diff
257 lines
12 KiB
Diff
From 752e3e88d162aada55282aea7544e458e610c947 Mon Sep 17 00:00:00 2001
|
|
From: Boovan Rajendran <boovan.rajendran@windriver.com>
|
|
Date: Wed, 30 Aug 2023 06:01:30 -0400
|
|
Subject: [PATCH] kubelet cpumanager disable CFS quota throttling
|
|
|
|
This disables CFS CPU quota to avoid performance degradation due to
|
|
Linux kernel CFS quota implementation. Note that 4.18 kernel attempts
|
|
to solve the CFS throttling problem, but there are reports that it is
|
|
not completely effective.
|
|
|
|
This disables CFS quota throttling for Guaranteed pods for both
|
|
parent and container cgroups by writing -1 to cgroup cpu.cfs_quota_us.
|
|
Disabling has a dramatic latency improvement for HTTP response times.
|
|
|
|
This patch is refactored in 1.22.5 due to new internal_container_lifecycle
|
|
framework. We leverage the same mechanism to set Linux resources as:
|
|
cpu manager: specify the container CPU set during the creation
|
|
|
|
Co-authored-by: Jim Gauld <james.gauld@windriver.com>
|
|
Signed-off-by: Sachin Gopala Krishna <saching.krishna@windriver.com>
|
|
Signed-off-by: Boovan Rajendran <boovan.rajendran@windriver.com>
|
|
---
|
|
pkg/kubelet/cm/cpumanager/cpu_manager.go | 7 +++
|
|
pkg/kubelet/cm/cpumanager/fake_cpu_manager.go | 10 ++++-
|
|
pkg/kubelet/cm/helpers_linux.go | 10 +++++
|
|
pkg/kubelet/cm/helpers_linux_test.go | 43 ++++++++++---------
|
|
.../cm/internal_container_lifecycle_linux.go | 9 ++++
|
|
5 files changed, 57 insertions(+), 22 deletions(-)
|
|
|
|
diff --git a/pkg/kubelet/cm/cpumanager/cpu_manager.go b/pkg/kubelet/cm/cpumanager/cpu_manager.go
|
|
index 8b5049d7d74..1d8901f4b36 100644
|
|
--- a/pkg/kubelet/cm/cpumanager/cpu_manager.go
|
|
+++ b/pkg/kubelet/cm/cpumanager/cpu_manager.go
|
|
@@ -73,6 +73,9 @@ type Manager interface {
|
|
// State returns a read-only interface to the internal CPU manager state.
|
|
State() state.Reader
|
|
|
|
+ // GetCPUPolicy returns the assigned CPU manager policy
|
|
+ GetCPUPolicy() string
|
|
+
|
|
// GetTopologyHints implements the topologymanager.HintProvider Interface
|
|
// and is consulted to achieve NUMA aware resource alignment among this
|
|
// and other resource controllers.
|
|
@@ -315,6 +318,10 @@ func (m *manager) State() state.Reader {
|
|
return m.state
|
|
}
|
|
|
|
+func (m *manager) GetCPUPolicy() string {
|
|
+ return m.policy.Name()
|
|
+}
|
|
+
|
|
func (m *manager) GetTopologyHints(pod *v1.Pod, container *v1.Container) map[string][]topologymanager.TopologyHint {
|
|
// The pod is during the admission phase. We need to save the pod to avoid it
|
|
// being cleaned before the admission ended
|
|
diff --git a/pkg/kubelet/cm/cpumanager/fake_cpu_manager.go b/pkg/kubelet/cm/cpumanager/fake_cpu_manager.go
|
|
index 4a03f3dd23f..b36fb0da3b4 100644
|
|
--- a/pkg/kubelet/cm/cpumanager/fake_cpu_manager.go
|
|
+++ b/pkg/kubelet/cm/cpumanager/fake_cpu_manager.go
|
|
@@ -28,7 +28,8 @@ import (
|
|
)
|
|
|
|
type fakeManager struct {
|
|
- state state.State
|
|
+ policy Policy
|
|
+ state state.State
|
|
}
|
|
|
|
func (m *fakeManager) Start(activePods ActivePodsFunc, sourcesReady config.SourcesReady, podStatusProvider status.PodStatusProvider, containerRuntime runtimeService, initialContainers containermap.ContainerMap) error {
|
|
@@ -70,6 +71,10 @@ func (m *fakeManager) State() state.Reader {
|
|
return m.state
|
|
}
|
|
|
|
+func (m *fakeManager) GetCPUPolicy() string {
|
|
+ return m.policy.Name()
|
|
+}
|
|
+
|
|
func (m *fakeManager) GetExclusiveCPUs(podUID, containerName string) cpuset.CPUSet {
|
|
klog.InfoS("GetExclusiveCPUs", "podUID", podUID, "containerName", containerName)
|
|
return cpuset.CPUSet{}
|
|
@@ -88,6 +93,7 @@ func (m *fakeManager) GetCPUAffinity(podUID, containerName string) cpuset.CPUSet
|
|
// NewFakeManager creates empty/fake cpu manager
|
|
func NewFakeManager() Manager {
|
|
return &fakeManager{
|
|
- state: state.NewMemoryState(),
|
|
+ policy: &nonePolicy{},
|
|
+ state: state.NewMemoryState(),
|
|
}
|
|
}
|
|
diff --git a/pkg/kubelet/cm/helpers_linux.go b/pkg/kubelet/cm/helpers_linux.go
|
|
index 8a144e7a73c..008b955ee98 100644
|
|
--- a/pkg/kubelet/cm/helpers_linux.go
|
|
+++ b/pkg/kubelet/cm/helpers_linux.go
|
|
@@ -170,6 +170,16 @@ func ResourceConfigForPod(pod *v1.Pod, enforceCPULimits bool, cpuPeriod uint64,
|
|
// build the result
|
|
result := &ResourceConfig{}
|
|
if qosClass == v1.PodQOSGuaranteed {
|
|
+ // Disable CFS CPU quota to avoid performance degradation due to
|
|
+ // Linux kernel CFS throttle implementation.
|
|
+ // NOTE: 4.18 kernel attempts to solve CFS throttling problem,
|
|
+ // but there are reports that it is not completely effective.
|
|
+ // This will configure cgroup CFS parameters at pod level:
|
|
+ // /sys/fs/cgroup/cpu/k8s-infra/kubepods/<pod>/cpu.cfs_quota_us
|
|
+ // /sys/fs/cgroup/cpu/k8s-infra/kubepods/<pod>/cpu.cfs_period_us
|
|
+ cpuQuota = int64(-1)
|
|
+ cpuPeriod = uint64(100000)
|
|
+
|
|
result.CPUShares = &cpuShares
|
|
result.CPUQuota = &cpuQuota
|
|
result.CPUPeriod = &cpuPeriod
|
|
diff --git a/pkg/kubelet/cm/helpers_linux_test.go b/pkg/kubelet/cm/helpers_linux_test.go
|
|
index fba41fd49be..60609394659 100644
|
|
--- a/pkg/kubelet/cm/helpers_linux_test.go
|
|
+++ b/pkg/kubelet/cm/helpers_linux_test.go
|
|
@@ -64,8 +64,9 @@ func TestResourceConfigForPod(t *testing.T) {
|
|
burstablePartialShares := MilliCPUToShares(200)
|
|
burstableQuota := MilliCPUToQuota(200, int64(defaultQuotaPeriod))
|
|
guaranteedShares := MilliCPUToShares(100)
|
|
- guaranteedQuota := MilliCPUToQuota(100, int64(defaultQuotaPeriod))
|
|
- guaranteedTunedQuota := MilliCPUToQuota(100, int64(tunedQuotaPeriod))
|
|
+ guaranteedQuotaPeriod := uint64(100000)
|
|
+ guaranteedQuota := int64(-1)
|
|
+ guaranteedTunedQuota := int64(-1)
|
|
memoryQuantity = resource.MustParse("100Mi")
|
|
cpuNoLimit := int64(-1)
|
|
guaranteedMemory := memoryQuantity.Value()
|
|
@@ -204,8 +205,8 @@ func TestResourceConfigForPod(t *testing.T) {
|
|
},
|
|
},
|
|
enforceCPULimits: true,
|
|
- quotaPeriod: defaultQuotaPeriod,
|
|
- expected: &ResourceConfig{CPUShares: &guaranteedShares, CPUQuota: &guaranteedQuota, CPUPeriod: &defaultQuotaPeriod, Memory: &guaranteedMemory},
|
|
+ quotaPeriod: guaranteedQuotaPeriod,
|
|
+ expected: &ResourceConfig{CPUShares: &guaranteedShares, CPUQuota: &guaranteedQuota, CPUPeriod: &guaranteedQuotaPeriod, Memory: &guaranteedMemory},
|
|
},
|
|
"guaranteed-no-cpu-enforcement": {
|
|
pod: &v1.Pod{
|
|
@@ -218,8 +219,8 @@ func TestResourceConfigForPod(t *testing.T) {
|
|
},
|
|
},
|
|
enforceCPULimits: false,
|
|
- quotaPeriod: defaultQuotaPeriod,
|
|
- expected: &ResourceConfig{CPUShares: &guaranteedShares, CPUQuota: &cpuNoLimit, CPUPeriod: &defaultQuotaPeriod, Memory: &guaranteedMemory},
|
|
+ quotaPeriod: guaranteedQuotaPeriod,
|
|
+ expected: &ResourceConfig{CPUShares: &guaranteedShares, CPUQuota: &cpuNoLimit, CPUPeriod: &guaranteedQuotaPeriod, Memory: &guaranteedMemory},
|
|
},
|
|
"guaranteed-with-tuned-quota": {
|
|
pod: &v1.Pod{
|
|
@@ -232,8 +233,8 @@ func TestResourceConfigForPod(t *testing.T) {
|
|
},
|
|
},
|
|
enforceCPULimits: true,
|
|
- quotaPeriod: tunedQuotaPeriod,
|
|
- expected: &ResourceConfig{CPUShares: &guaranteedShares, CPUQuota: &guaranteedTunedQuota, CPUPeriod: &tunedQuotaPeriod, Memory: &guaranteedMemory},
|
|
+ quotaPeriod: guaranteedQuotaPeriod,
|
|
+ expected: &ResourceConfig{CPUShares: &guaranteedShares, CPUQuota: &guaranteedTunedQuota, CPUPeriod: &guaranteedQuotaPeriod, Memory: &guaranteedMemory},
|
|
},
|
|
"guaranteed-no-cpu-enforcement-with-tuned-quota": {
|
|
pod: &v1.Pod{
|
|
@@ -246,8 +247,8 @@ func TestResourceConfigForPod(t *testing.T) {
|
|
},
|
|
},
|
|
enforceCPULimits: false,
|
|
- quotaPeriod: tunedQuotaPeriod,
|
|
- expected: &ResourceConfig{CPUShares: &guaranteedShares, CPUQuota: &cpuNoLimit, CPUPeriod: &tunedQuotaPeriod, Memory: &guaranteedMemory},
|
|
+ quotaPeriod: guaranteedQuotaPeriod,
|
|
+ expected: &ResourceConfig{CPUShares: &guaranteedShares, CPUQuota: &cpuNoLimit, CPUPeriod: &guaranteedQuotaPeriod, Memory: &guaranteedMemory},
|
|
},
|
|
"burstable-partial-limits-with-init-containers": {
|
|
pod: &v1.Pod{
|
|
@@ -309,8 +310,10 @@ func TestResourceConfigForPodWithCustomCPUCFSQuotaPeriod(t *testing.T) {
|
|
burstablePartialShares := MilliCPUToShares(200)
|
|
burstableQuota := MilliCPUToQuota(200, int64(defaultQuotaPeriod))
|
|
guaranteedShares := MilliCPUToShares(100)
|
|
- guaranteedQuota := MilliCPUToQuota(100, int64(defaultQuotaPeriod))
|
|
- guaranteedTunedQuota := MilliCPUToQuota(100, int64(tunedQuotaPeriod))
|
|
+ guaranteedQuotaPeriod := uint64(100000)
|
|
+ guaranteedQuota := int64(-1)
|
|
+ guaranteedTunedQuota := int64(-1)
|
|
+
|
|
memoryQuantity = resource.MustParse("100Mi")
|
|
cpuNoLimit := int64(-1)
|
|
guaranteedMemory := memoryQuantity.Value()
|
|
@@ -449,8 +452,8 @@ func TestResourceConfigForPodWithCustomCPUCFSQuotaPeriod(t *testing.T) {
|
|
},
|
|
},
|
|
enforceCPULimits: true,
|
|
- quotaPeriod: defaultQuotaPeriod,
|
|
- expected: &ResourceConfig{CPUShares: &guaranteedShares, CPUQuota: &guaranteedQuota, CPUPeriod: &defaultQuotaPeriod, Memory: &guaranteedMemory},
|
|
+ quotaPeriod: guaranteedQuotaPeriod,
|
|
+ expected: &ResourceConfig{CPUShares: &guaranteedShares, CPUQuota: &guaranteedQuota, CPUPeriod: &guaranteedQuotaPeriod, Memory: &guaranteedMemory},
|
|
},
|
|
"guaranteed-no-cpu-enforcement": {
|
|
pod: &v1.Pod{
|
|
@@ -463,8 +466,8 @@ func TestResourceConfigForPodWithCustomCPUCFSQuotaPeriod(t *testing.T) {
|
|
},
|
|
},
|
|
enforceCPULimits: false,
|
|
- quotaPeriod: defaultQuotaPeriod,
|
|
- expected: &ResourceConfig{CPUShares: &guaranteedShares, CPUQuota: &cpuNoLimit, CPUPeriod: &defaultQuotaPeriod, Memory: &guaranteedMemory},
|
|
+ quotaPeriod: guaranteedQuotaPeriod,
|
|
+ expected: &ResourceConfig{CPUShares: &guaranteedShares, CPUQuota: &cpuNoLimit, CPUPeriod: &guaranteedQuotaPeriod, Memory: &guaranteedMemory},
|
|
},
|
|
"guaranteed-with-tuned-quota": {
|
|
pod: &v1.Pod{
|
|
@@ -477,8 +480,8 @@ func TestResourceConfigForPodWithCustomCPUCFSQuotaPeriod(t *testing.T) {
|
|
},
|
|
},
|
|
enforceCPULimits: true,
|
|
- quotaPeriod: tunedQuotaPeriod,
|
|
- expected: &ResourceConfig{CPUShares: &guaranteedShares, CPUQuota: &guaranteedTunedQuota, CPUPeriod: &tunedQuotaPeriod, Memory: &guaranteedMemory},
|
|
+ quotaPeriod: guaranteedQuotaPeriod,
|
|
+ expected: &ResourceConfig{CPUShares: &guaranteedShares, CPUQuota: &guaranteedTunedQuota, CPUPeriod: &guaranteedQuotaPeriod, Memory: &guaranteedMemory},
|
|
},
|
|
"guaranteed-no-cpu-enforcement-with-tuned-quota": {
|
|
pod: &v1.Pod{
|
|
@@ -491,8 +494,8 @@ func TestResourceConfigForPodWithCustomCPUCFSQuotaPeriod(t *testing.T) {
|
|
},
|
|
},
|
|
enforceCPULimits: false,
|
|
- quotaPeriod: tunedQuotaPeriod,
|
|
- expected: &ResourceConfig{CPUShares: &guaranteedShares, CPUQuota: &cpuNoLimit, CPUPeriod: &tunedQuotaPeriod, Memory: &guaranteedMemory},
|
|
+ quotaPeriod: guaranteedQuotaPeriod,
|
|
+ expected: &ResourceConfig{CPUShares: &guaranteedShares, CPUQuota: &cpuNoLimit, CPUPeriod: &guaranteedQuotaPeriod, Memory: &guaranteedMemory},
|
|
},
|
|
}
|
|
|
|
diff --git a/pkg/kubelet/cm/internal_container_lifecycle_linux.go b/pkg/kubelet/cm/internal_container_lifecycle_linux.go
|
|
index cb7c0cfa543..a99d01f8884 100644
|
|
--- a/pkg/kubelet/cm/internal_container_lifecycle_linux.go
|
|
+++ b/pkg/kubelet/cm/internal_container_lifecycle_linux.go
|
|
@@ -25,6 +25,7 @@ import (
|
|
|
|
"k8s.io/api/core/v1"
|
|
runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1"
|
|
+ v1qos "k8s.io/kubernetes/pkg/apis/core/v1/helper/qos"
|
|
)
|
|
|
|
func (i *internalContainerLifecycleImpl) PreCreateContainer(pod *v1.Pod, container *v1.Container, containerConfig *runtimeapi.ContainerConfig) error {
|
|
@@ -35,6 +36,14 @@ func (i *internalContainerLifecycleImpl) PreCreateContainer(pod *v1.Pod, contain
|
|
}
|
|
}
|
|
|
|
+ // Disable cgroup CFS throttle at the container level.
|
|
+ // /sys/fs/cgroup/cpu/k8s-infra/kubepods/<pod>/<container>/cpu.cfs_quota_us
|
|
+ // /sys/fs/cgroup/cpu/k8s-infra/kubepods/<pod>/<container>/cpu.cfs_period_us
|
|
+ if i.cpuManager.GetCPUPolicy() == "static" && v1qos.GetPodQOS(pod) == v1.PodQOSGuaranteed {
|
|
+ containerConfig.Linux.Resources.CpuPeriod = int64(100000)
|
|
+ containerConfig.Linux.Resources.CpuQuota = int64(-1)
|
|
+ }
|
|
+
|
|
if i.memoryManager != nil {
|
|
numaNodes := i.memoryManager.GetMemoryNUMANodes(pod, container)
|
|
if numaNodes.Len() > 0 {
|
|
--
|
|
2.25.1
|
|
|