From 6c2a5206bffa8481f601fd7351808bda17245371 Mon Sep 17 00:00:00 2001 From: Boovan Rajendran Date: Tue, 5 Mar 2024 06:20:41 -0500 Subject: [PATCH] kubelet cpumanager introduce concept of isolated CPUs This introduces the concept of "isolated CPUs", which are CPUs that have been isolated at the kernel level via the "isolcpus" kernel boot parameter. When starting the kubelet process, two separate sets of reserved CPUs may be specified. With this change CPUs reserved via '--system-reserved=cpu' will be used for infrastructure pods while the isolated CPUs should be reserved via '--kube-reserved=cpu' to cause kubelet to skip over them for "normal" CPU resource tracking. The kubelet code will double-check that the specified isolated CPUs match what the kernel exposes in "/sys/devices/system/cpu/isolated". A plugin (outside the scope of this commit) will expose the isolated CPUs to kubelet via the device plugin API. If a pod specifies some number of "isolcpus" resources, the device manager will allocate them. In this code we check whether such resources have been allocated, and if so we set the container cpuset to the isolated CPUs. This does mean that it really only makes sense to specify "isolcpus" resources for best-effort or burstable pods, not for guaranteed ones since that would throw off the accounting code. In order to ensure the accounting still works as designed, if "isolcpus" are specified for guaranteed pods, the affinity will be set to the non-isolated CPUs. This patch was refactored in 1.21.3 due to upstream API change node: podresources: make GetDevices() consistent (commit ad68f9588c72d6477b5a290c548a9031063ac659). The routine podIsolCPUs() was refactored in 1.21.3 since the API p.deviceManager.GetDevices() is returning multiple devices with a device per cpu. The resultant cpuset needs to be the aggregate. The routine NewStaticPolicy was refactored in 1.22.5, adding a new argument in its signature: cpuPolicyOptions map[string]string. This change is implies shifting the new arguments(deviceManager, excludeReserved) with one position to the right. Co-authored-by: Jim Gauld Co-authored-by: Chris Friesen Signed-off-by: Gleb Aronsky Signed-off-by: Ramesh Kumar Sivanandam Signed-off-by: Sachin Gopala Krishna Signed-off-by: Boovan Rajendran Signed-off-by: Saba Touheed Mujawar Signed-off-by: Boovan Rajendran --- pkg/kubelet/cm/container_manager_linux.go | 1 + pkg/kubelet/cm/cpumanager/cpu_manager.go | 35 +++++- pkg/kubelet/cm/cpumanager/cpu_manager_test.go | 23 +++- pkg/kubelet/cm/cpumanager/policy_static.go | 83 ++++++++++++- .../cm/cpumanager/policy_static_test.go | 57 +++++++-- pkg/kubelet/cm/devicemanager/manager_stub.go | 110 ++++++++++++++++++ 6 files changed, 287 insertions(+), 22 deletions(-) create mode 100644 pkg/kubelet/cm/devicemanager/manager_stub.go diff --git a/pkg/kubelet/cm/container_manager_linux.go b/pkg/kubelet/cm/container_manager_linux.go index 7dbfd4cc664..e1000e27be5 100644 --- a/pkg/kubelet/cm/container_manager_linux.go +++ b/pkg/kubelet/cm/container_manager_linux.go @@ -324,6 +324,7 @@ func NewContainerManager(mountUtil mount.Interface, cadvisorInterface cadvisor.I cm.GetNodeAllocatableReservation(), nodeConfig.KubeletRootDir, cm.topologyManager, + cm.deviceManager, ) if err != nil { klog.ErrorS(err, "Failed to initialize cpu manager") diff --git a/pkg/kubelet/cm/cpumanager/cpu_manager.go b/pkg/kubelet/cm/cpumanager/cpu_manager.go index e0a359932b7..d3a94b66ed0 100644 --- a/pkg/kubelet/cm/cpumanager/cpu_manager.go +++ b/pkg/kubelet/cm/cpumanager/cpu_manager.go @@ -19,7 +19,9 @@ package cpumanager import ( "context" "fmt" + "os" "math" + "strings" "sync" "time" @@ -32,6 +34,7 @@ import ( "k8s.io/kubernetes/pkg/kubelet/cm/containermap" "k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/state" "k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/topology" + "k8s.io/kubernetes/pkg/kubelet/cm/devicemanager" "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager" "k8s.io/kubernetes/pkg/kubelet/config" kubecontainer "k8s.io/kubernetes/pkg/kubelet/container" @@ -51,6 +54,25 @@ type policyName string // cpuManagerStateFileName is the file name where cpu manager stores its state const cpuManagerStateFileName = "cpu_manager_state" +// get the system-level isolated CPUs +func getIsolcpus() cpuset.CPUSet { + dat, err := os.ReadFile("/sys/devices/system/cpu/isolated") + if err != nil { + klog.Errorf("[cpumanager] unable to read sysfs isolcpus subdir") + return cpuset.New() + } + + // The isolated cpus string ends in a newline + cpustring := strings.TrimSuffix(string(dat), "\n") + cset, err := cpuset.Parse(cpustring) + if err != nil { + klog.Errorf("[cpumanager] unable to parse sysfs isolcpus string to cpuset") + return cpuset.New() + } + + return cset +} + // Manager interface provides methods for Kubelet to manage pod cpus. type Manager interface { // Start is called during Kubelet initialization. @@ -154,7 +176,8 @@ func (s *sourcesReadyStub) AddSource(source string) {} func (s *sourcesReadyStub) AllReady() bool { return true } // NewManager creates new cpu manager based on provided policy -func NewManager(cpuPolicyName string, cpuPolicyOptions map[string]string, reconcilePeriod time.Duration, machineInfo *cadvisorapi.MachineInfo, specificCPUs cpuset.CPUSet, nodeAllocatableReservation v1.ResourceList, stateFileDirectory string, affinity topologymanager.Store) (Manager, error) { +func NewManager(cpuPolicyName string, cpuPolicyOptions map[string]string, reconcilePeriod time.Duration, machineInfo *cadvisorapi.MachineInfo, specificCPUs cpuset.CPUSet, nodeAllocatableReservation v1.ResourceList, stateFileDirectory string, affinity topologymanager.Store, deviceManager devicemanager.Manager) (Manager, error) { + var topo *topology.CPUTopology var policy Policy var err error @@ -195,7 +218,15 @@ func NewManager(cpuPolicyName string, cpuPolicyOptions map[string]string, reconc // NOTE: Set excludeReserved unconditionally to exclude reserved CPUs from default cpuset. // This variable is primarily to make testing easier. excludeReserved := true - policy, err = NewStaticPolicy(topo, numReservedCPUs, specificCPUs, affinity, cpuPolicyOptions, excludeReserved) + + // isolCPUs is the set of kernel-isolated CPUs. They should be a subset of specificCPUs or + // of the CPUs that NewStaticPolicy() will pick if numReservedCPUs is set. It's only in the + // argument list here for ease of testing, it's really internal to the policy. + isolCPUs := getIsolcpus() + policy, err = NewStaticPolicy(topo, numReservedCPUs, specificCPUs, isolCPUs, affinity, cpuPolicyOptions, deviceManager, excludeReserved) + if err != nil { + return nil, fmt.Errorf("new static policy error: %v", err) + } if err != nil { return nil, fmt.Errorf("new static policy error: %w", err) diff --git a/pkg/kubelet/cm/cpumanager/cpu_manager_test.go b/pkg/kubelet/cm/cpumanager/cpu_manager_test.go index e5f5d07a2ad..702a4395811 100644 --- a/pkg/kubelet/cm/cpumanager/cpu_manager_test.go +++ b/pkg/kubelet/cm/cpumanager/cpu_manager_test.go @@ -36,6 +36,7 @@ import ( "k8s.io/kubernetes/pkg/kubelet/cm/containermap" "k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/state" "k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/topology" + "k8s.io/kubernetes/pkg/kubelet/cm/devicemanager" "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager" "k8s.io/utils/cpuset" ) @@ -263,6 +264,7 @@ func makeMultiContainerPodWithOptions(initCPUs, appCPUs []*containerOptions) *v1 } func TestCPUManagerAdd(t *testing.T) { + testDM, _ := devicemanager.NewManagerStub() testExcl := false testPolicy, _ := NewStaticPolicy( &topology.CPUTopology{ @@ -278,8 +280,10 @@ func TestCPUManagerAdd(t *testing.T) { }, 0, cpuset.New(), + cpuset.New(), topologymanager.NewFakeManager(), nil, + testDM, testExcl) testCases := []struct { description string @@ -530,8 +534,9 @@ func TestCPUManagerAddWithInitContainers(t *testing.T) { } testExcl := false + testDM, _ := devicemanager.NewManagerStub() for _, testCase := range testCases { - policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.New(), topologymanager.NewFakeManager(), nil, testExcl) + policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.New(), cpuset.New(), topologymanager.NewFakeManager(), nil, testDM, testExcl) mockState := &mockState{ assignments: testCase.stAssignments, @@ -686,7 +691,9 @@ func TestCPUManagerGenerate(t *testing.T) { } defer os.RemoveAll(sDir) - mgr, err := NewManager(testCase.cpuPolicyName, nil, 5*time.Second, machineInfo, cpuset.New(), testCase.nodeAllocatableReservation, sDir, topologymanager.NewFakeManager()) + testDM, err := devicemanager.NewManagerStub() + mgr, err := NewManager(testCase.cpuPolicyName, nil, 5*time.Second, machineInfo, cpuset.New(), testCase.nodeAllocatableReservation, sDir, topologymanager.NewFakeManager(), testDM) + if testCase.expectedError != nil { if !strings.Contains(err.Error(), testCase.expectedError.Error()) { t.Errorf("Unexpected error message. Have: %s wants %s", err.Error(), testCase.expectedError.Error()) @@ -757,6 +764,7 @@ func TestCPUManagerRemove(t *testing.T) { func TestReconcileState(t *testing.T) { testExcl := false + testDM, _ := devicemanager.NewManagerStub() testPolicy, _ := NewStaticPolicy( &topology.CPUTopology{ NumCPUs: 8, @@ -775,8 +783,10 @@ func TestReconcileState(t *testing.T) { }, 0, cpuset.New(), + cpuset.New(), topologymanager.NewFakeManager(), nil, + testDM, testExcl) testCases := []struct { @@ -1282,6 +1292,7 @@ func TestReconcileState(t *testing.T) { // the following tests are with --reserved-cpus configured func TestCPUManagerAddWithResvList(t *testing.T) { testExcl := false + testDM, _ := devicemanager.NewManagerStub() testPolicy, _ := NewStaticPolicy( &topology.CPUTopology{ NumCPUs: 4, @@ -1296,8 +1307,10 @@ func TestCPUManagerAddWithResvList(t *testing.T) { }, 1, cpuset.New(0), + cpuset.New(), topologymanager.NewFakeManager(), nil, + testDM, testExcl) testCases := []struct { description string @@ -1410,7 +1423,8 @@ func TestCPUManagerHandlePolicyOptions(t *testing.T) { } defer os.RemoveAll(sDir) - _, err = NewManager(testCase.cpuPolicyName, testCase.cpuPolicyOptions, 5*time.Second, machineInfo, cpuset.New(), nodeAllocatableReservation, sDir, topologymanager.NewFakeManager()) + testDM, err := devicemanager.NewManagerStub() + _, err = NewManager(testCase.cpuPolicyName, testCase.cpuPolicyOptions, 5*time.Second, machineInfo, cpuset.New(), nodeAllocatableReservation, sDir, topologymanager.NewFakeManager(), testDM) if err == nil { t.Errorf("Expected error, but NewManager succeeded") } @@ -1424,6 +1438,7 @@ func TestCPUManagerHandlePolicyOptions(t *testing.T) { func TestCPUManagerGetAllocatableCPUs(t *testing.T) { testExcl := false + testDm, _ := devicemanager.NewManagerStub() nonePolicy, _ := NewNonePolicy(nil) staticPolicy, _ := NewStaticPolicy( &topology.CPUTopology{ @@ -1439,8 +1454,10 @@ func TestCPUManagerGetAllocatableCPUs(t *testing.T) { }, 1, cpuset.New(0), + cpuset.New(), topologymanager.NewFakeManager(), nil, + testDm, testExcl) testCases := []struct { diff --git a/pkg/kubelet/cm/cpumanager/policy_static.go b/pkg/kubelet/cm/cpumanager/policy_static.go index c9c478ba1a8..93aec8b5657 100644 --- a/pkg/kubelet/cm/cpumanager/policy_static.go +++ b/pkg/kubelet/cm/cpumanager/policy_static.go @@ -18,6 +18,7 @@ package cpumanager import ( "fmt" + "strconv" v1 "k8s.io/api/core/v1" utilfeature "k8s.io/apiserver/pkg/util/feature" @@ -27,6 +28,7 @@ import ( "k8s.io/kubernetes/pkg/features" "k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/state" "k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/topology" + "k8s.io/kubernetes/pkg/kubelet/cm/devicemanager" "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager" "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager/bitmask" "k8s.io/kubernetes/pkg/kubelet/metrics" @@ -111,6 +113,10 @@ type staticPolicy struct { topology *topology.CPUTopology // set of CPUs that is not available for exclusive assignment reservedCPUs cpuset.CPUSet + // subset of reserved CPUs with isolcpus attribute + isolcpus cpuset.CPUSet + // parent containerManager, used to get device list + deviceManager devicemanager.Manager // If true, default CPUSet should exclude reserved CPUs excludeReserved bool // Superset of reservedCPUs. It includes not just the reservedCPUs themselves, @@ -133,7 +139,8 @@ var _ Policy = &staticPolicy{} // NewStaticPolicy returns a CPU manager policy that does not change CPU // assignments for exclusively pinned guaranteed containers after the main // container process starts. -func NewStaticPolicy(topology *topology.CPUTopology, numReservedCPUs int, reservedCPUs cpuset.CPUSet, affinity topologymanager.Store, cpuPolicyOptions map[string]string, excludeReserved bool) (Policy, error) { +func NewStaticPolicy(topology *topology.CPUTopology, numReservedCPUs int, reservedCPUs cpuset.CPUSet, isolCPUs cpuset.CPUSet, affinity topologymanager.Store, cpuPolicyOptions map[string]string, deviceManager devicemanager.Manager, excludeReserved bool) (Policy, error) { + opts, err := NewStaticPolicyOptions(cpuPolicyOptions) if err != nil { return nil, err @@ -148,6 +155,8 @@ func NewStaticPolicy(topology *topology.CPUTopology, numReservedCPUs int, reserv policy := &staticPolicy{ topology: topology, affinity: affinity, + isolcpus: isolCPUs, + deviceManager: deviceManager, excludeReserved: excludeReserved, cpusToReuse: make(map[string]cpuset.CPUSet), options: opts, @@ -184,6 +193,12 @@ func NewStaticPolicy(topology *topology.CPUTopology, numReservedCPUs int, reserv policy.reservedCPUs = reserved policy.reservedPhysicalCPUs = reservedPhysicalCPUs + if !isolCPUs.IsSubsetOf(reserved) { + klog.Errorf("[cpumanager] isolCPUs %v is not a subset of reserved %v", isolCPUs, reserved) + reserved = reserved.Union(isolCPUs) + klog.Warningf("[cpumanager] mismatch isolCPUs %v, force reserved %v", isolCPUs, reserved) + } + return policy, nil } @@ -217,8 +232,9 @@ func (p *staticPolicy) validateState(s state.State) error { } else { s.SetDefaultCPUSet(allCPUs) } - klog.Infof("[cpumanager] static policy: CPUSet: allCPUs:%v, reserved:%v, default:%v\n", - allCPUs, p.reservedCPUs, s.GetDefaultCPUSet()) + klog.Infof("[cpumanager] static policy: CPUSet: allCPUs:%v, reserved:%v, isolcpus:%v, default:%v\n", + allCPUs, p.reservedCPUs, p.isolcpus, s.GetDefaultCPUSet()) + return nil } @@ -323,16 +339,39 @@ func (p *staticPolicy) Allocate(s state.State, pod *v1.Pod, container *v1.Contai return nil } - cpuset := p.reservedCPUs + cpuset := p.reservedCPUs.Clone().Difference(p.isolcpus) if cpuset.IsEmpty() { // If this happens then someone messed up. - return fmt.Errorf("[cpumanager] static policy: reserved container unable to allocate cpus (namespace: %s, pod UID: %s, pod: %s, container: %s); cpuset=%v, reserved:%v", pod.Namespace, string(pod.UID), pod.Name, container.Name, cpuset, p.reservedCPUs) + return fmt.Errorf("[cpumanager] static policy: reserved container unable to allocate cpus (namespace: %s, pod UID: %s, pod: %s, container: %s); cpuset=%v, reserved:%v, isolcpus:%v", pod.Namespace, string(pod.UID), pod.Name, container.Name, cpuset, p.reservedCPUs, p.isolcpus) + } s.SetCPUSet(string(pod.UID), container.Name, cpuset) klog.Infof("[cpumanager] static policy: reserved: AddContainer (namespace: %s, pod UID: %s, pod: %s, container: %s); cpuset=%v", pod.Namespace, string(pod.UID), pod.Name, container.Name, cpuset) return nil } + if isolcpus := p.podIsolCPUs(pod, container); isolcpus.Size() > 0 { + // container has requested isolated CPUs + if set, ok := s.GetCPUSet(string(pod.UID), container.Name); ok { + if set.Equals(isolcpus) { + klog.Infof("[cpumanager] isolcpus container already present in state, skipping (namespace: %s, pod UID: %s, pod: %s, container: %s)", + pod.Namespace, string(pod.UID), pod.Name, container.Name) + return nil + } else { + klog.Infof("[cpumanager] isolcpus container state has cpus %v, should be %v (namespace: %s, pod UID: %s, pod: %s, container: %s)", + isolcpus, set, pod.Namespace, string(pod.UID), pod.Name, container.Name) + } + } + // Note that we do not do anything about init containers here. + // It looks like devices are allocated per-pod based on effective requests/limits + // and extra devices from initContainers are not freed up when the regular containers start. + // TODO: confirm this is still true for 1.20 + s.SetCPUSet(string(pod.UID), container.Name, isolcpus) + klog.Infof("[cpumanager] isolcpus: AddContainer (namespace: %s, pod UID: %s, pod: %s, container: %s); cpuset=%v", + pod.Namespace, string(pod.UID), pod.Name, container.Name, isolcpus) + return nil + } + numCPUs := p.guaranteedCPUs(pod, container) if numCPUs == 0 { // container belongs in the shared pool (nothing to do; use default cpuset) @@ -398,7 +437,9 @@ func (p *staticPolicy) Allocate(s state.State, pod *v1.Pod, container *v1.Contai } s.SetCPUSet(string(pod.UID), container.Name, cpuset) p.updateCPUsToReuse(pod, container, cpuset) - + klog.Infof("[cpumanager] guaranteed: AddContainer "+ + "(namespace: %s, pod UID: %s, pod: %s, container: %s); numCPUS=%d, cpuset=%v", + pod.Namespace, string(pod.UID), pod.Name, container.Name, numCPUs, cpuset) return nil } @@ -723,6 +764,36 @@ func isKubeInfra(pod *v1.Pod) bool { return false } +// get the isolated CPUs (if any) from the devices associated with a specific container +func (p *staticPolicy) podIsolCPUs(pod *v1.Pod, container *v1.Container) cpuset.CPUSet { + // NOTE: This is required for TestStaticPolicyAdd() since makePod() does + // not create UID. We also need a way to properly stub devicemanager. + if len(string(pod.UID)) == 0 { + return cpuset.New() + } + resContDevices := p.deviceManager.GetDevices(string(pod.UID), container.Name) + cpuSet := cpuset.New() + for resourceName, resourceDevs := range resContDevices { + // this resource name needs to match the isolcpus device plugin + if resourceName == "windriver.com/isolcpus" { + for devID, _ := range resourceDevs { + cpuStrList := []string{devID} + if len(cpuStrList) > 0 { + // loop over the list of strings, convert each one to int, add to cpuset + for _, cpuStr := range cpuStrList { + cpu, err := strconv.Atoi(cpuStr) + if err != nil { + panic(err) + } + cpuSet = cpuSet.Union(cpuset.New(cpu)) + } + } + } + } + } + return cpuSet +} + // isHintSocketAligned function return true if numa nodes in hint are socket aligned. func (p *staticPolicy) isHintSocketAligned(hint topologymanager.TopologyHint, minAffinitySize int) bool { numaNodesBitMask := hint.NUMANodeAffinity.GetBits() diff --git a/pkg/kubelet/cm/cpumanager/policy_static_test.go b/pkg/kubelet/cm/cpumanager/policy_static_test.go index 90863a9a69e..0e926c1b1e6 100644 --- a/pkg/kubelet/cm/cpumanager/policy_static_test.go +++ b/pkg/kubelet/cm/cpumanager/policy_static_test.go @@ -27,6 +27,7 @@ import ( pkgfeatures "k8s.io/kubernetes/pkg/features" "k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/state" "k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/topology" + "k8s.io/kubernetes/pkg/kubelet/cm/devicemanager" "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager" "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager/bitmask" "k8s.io/utils/cpuset" @@ -70,8 +71,9 @@ func (spt staticPolicyTest) PseudoClone() staticPolicyTest { } func TestStaticPolicyName(t *testing.T) { + testDM, _ := devicemanager.NewManagerStub() testExcl := false - policy, _ := NewStaticPolicy(topoSingleSocketHT, 1, cpuset.New(), topologymanager.NewFakeManager(), nil, testExcl) + policy, _ := NewStaticPolicy(topoSingleSocketHT, 1, cpuset.New(), cpuset.New(), topologymanager.NewFakeManager(), nil, testDM, testExcl) policyName := policy.Name() if policyName != "static" { @@ -81,6 +83,7 @@ func TestStaticPolicyName(t *testing.T) { } func TestStaticPolicyStart(t *testing.T) { + testDM, _ := devicemanager.NewManagerStub() testCases := []staticPolicyTest{ { description: "non-corrupted state", @@ -156,7 +159,7 @@ func TestStaticPolicyStart(t *testing.T) { } for _, testCase := range testCases { t.Run(testCase.description, func(t *testing.T) { - p, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.New(), topologymanager.NewFakeManager(), nil, testCase.excludeReserved) + p, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.New(), cpuset.New(), topologymanager.NewFakeManager(), nil, testDM, testCase.excludeReserved) policy := p.(*staticPolicy) st := &mockState{ @@ -204,7 +207,6 @@ func TestStaticPolicyAdd(t *testing.T) { largeTopoCPUSet := cpuset.New(largeTopoCPUids...) largeTopoSock0CPUSet := cpuset.New(largeTopoSock0CPUids...) largeTopoSock1CPUSet := cpuset.New(largeTopoSock1CPUids...) - // these are the cases which must behave the same regardless the policy options. // So we will permutate the options to ensure this holds true. @@ -627,7 +629,9 @@ func runStaticPolicyTestCase(t *testing.T, testCase staticPolicyTest) { cpus = testCase.reservedCPUs.Clone() } testExcl := false - policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpus, tm, testCase.options, testExcl) + testDM, _ := devicemanager.NewManagerStub() + policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpus, cpus, tm, testCase.options, testDM, testExcl) + st := &mockState{ assignments: testCase.stAssignments, @@ -674,6 +678,8 @@ func runStaticPolicyTestCaseWithFeatureGate(t *testing.T, testCase staticPolicyT } func TestStaticPolicyReuseCPUs(t *testing.T) { + excludeReserved := false + testDM, _ := devicemanager.NewManagerStub() testCases := []struct { staticPolicyTest expCSetAfterAlloc cpuset.CPUSet @@ -698,7 +704,7 @@ func TestStaticPolicyReuseCPUs(t *testing.T) { } for _, testCase := range testCases { - policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.New(), topologymanager.NewFakeManager(), nil, testCase.excludeReserved) + policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.New(), cpuset.New(), topologymanager.NewFakeManager(), nil, testDM, excludeReserved) st := &mockState{ assignments: testCase.stAssignments, @@ -730,6 +736,8 @@ func TestStaticPolicyReuseCPUs(t *testing.T) { } func TestStaticPolicyDoNotReuseCPUs(t *testing.T) { + excludeReserved := false + testDM, _ := devicemanager.NewManagerStub() testCases := []struct { staticPolicyTest expCSetAfterAlloc cpuset.CPUSet @@ -751,7 +759,7 @@ func TestStaticPolicyDoNotReuseCPUs(t *testing.T) { } for _, testCase := range testCases { - policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.New(), topologymanager.NewFakeManager(), nil, testCase.excludeReserved) + policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.New(), cpuset.New(), topologymanager.NewFakeManager(), nil, testDM, excludeReserved) st := &mockState{ assignments: testCase.stAssignments, @@ -776,6 +784,7 @@ func TestStaticPolicyDoNotReuseCPUs(t *testing.T) { func TestStaticPolicyRemove(t *testing.T) { excludeReserved := false + testDM, _ := devicemanager.NewManagerStub() testCases := []staticPolicyTest{ { description: "SingleSocketHT, DeAllocOneContainer", @@ -834,7 +843,7 @@ func TestStaticPolicyRemove(t *testing.T) { } for _, testCase := range testCases { - policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.New(), topologymanager.NewFakeManager(), nil, excludeReserved) + policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.New(), cpuset.New(), topologymanager.NewFakeManager(), nil, testDM, excludeReserved) st := &mockState{ assignments: testCase.stAssignments, @@ -857,6 +866,7 @@ func TestStaticPolicyRemove(t *testing.T) { func TestTopologyAwareAllocateCPUs(t *testing.T) { excludeReserved := false + testDM, _ := devicemanager.NewManagerStub() testCases := []struct { description string topo *topology.CPUTopology @@ -925,7 +935,8 @@ func TestTopologyAwareAllocateCPUs(t *testing.T) { }, } for _, tc := range testCases { - p, _ := NewStaticPolicy(tc.topo, 0, cpuset.New(), topologymanager.NewFakeManager(), nil, excludeReserved) + p, _ := NewStaticPolicy(tc.topo, 0, cpuset.New(), cpuset.New(), topologymanager.NewFakeManager(), nil, testDM, excludeReserved) + policy := p.(*staticPolicy) st := &mockState{ assignments: tc.stAssignments, @@ -958,6 +969,7 @@ type staticPolicyTestWithResvList struct { topo *topology.CPUTopology numReservedCPUs int reserved cpuset.CPUSet + isolcpus cpuset.CPUSet stAssignments state.ContainerCPUAssignments stDefaultCPUSet cpuset.CPUSet pod *v1.Pod @@ -968,6 +980,8 @@ type staticPolicyTestWithResvList struct { } func TestStaticPolicyStartWithResvList(t *testing.T) { + testDM, _ := devicemanager.NewManagerStub() + testExcl := false testCases := []staticPolicyTestWithResvList{ { description: "empty cpuset", @@ -997,10 +1011,9 @@ func TestStaticPolicyStartWithResvList(t *testing.T) { expNewErr: fmt.Errorf("[cpumanager] unable to reserve the required amount of CPUs (size of 0-1 did not equal 1)"), }, } - testExcl := false for _, testCase := range testCases { t.Run(testCase.description, func(t *testing.T) { - p, err := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, testCase.reserved, topologymanager.NewFakeManager(), nil, testExcl) + p, err := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, testCase.reserved, cpuset.New(), topologymanager.NewFakeManager(), nil, testDM, testExcl) if !reflect.DeepEqual(err, testCase.expNewErr) { t.Errorf("StaticPolicy Start() error (%v). expected error: %v but got: %v", @@ -1041,6 +1054,7 @@ func TestStaticPolicyAddWithResvList(t *testing.T) { topo: topoSingleSocketHT, numReservedCPUs: 1, reserved: cpuset.New(0), + isolcpus: cpuset.New(), stAssignments: state.ContainerCPUAssignments{}, stDefaultCPUSet: cpuset.New(1, 2, 3, 4, 5, 6, 7), pod: makePod("fakePod", "fakeContainer2", "8000m", "8000m"), @@ -1053,6 +1067,7 @@ func TestStaticPolicyAddWithResvList(t *testing.T) { topo: topoSingleSocketHT, numReservedCPUs: 2, reserved: cpuset.New(0, 1), + isolcpus: cpuset.New(), stAssignments: state.ContainerCPUAssignments{}, stDefaultCPUSet: cpuset.New(2, 3, 4, 5, 6, 7), pod: makePod("fakePod", "fakeContainer2", "1000m", "1000m"), @@ -1065,6 +1080,7 @@ func TestStaticPolicyAddWithResvList(t *testing.T) { topo: topoSingleSocketHT, numReservedCPUs: 2, reserved: cpuset.New(0, 1), + isolcpus: cpuset.New(), stAssignments: state.ContainerCPUAssignments{ "fakePod": map[string]cpuset.CPUSet{ "fakeContainer100": cpuset.New(2, 3, 6, 7), @@ -1081,6 +1097,7 @@ func TestStaticPolicyAddWithResvList(t *testing.T) { topo: topoSingleSocketHT, numReservedCPUs: 2, reserved: cpuset.New(0, 1), + isolcpus: cpuset.New(), stAssignments: state.ContainerCPUAssignments{ "fakePod": map[string]cpuset.CPUSet{ "fakeContainer100": cpuset.New(2, 3, 6, 7), @@ -1092,11 +1109,29 @@ func TestStaticPolicyAddWithResvList(t *testing.T) { expCPUAlloc: true, expCSet: cpuset.New(0, 1), }, + { + description: "InfraPod, SingleSocketHT, Isolcpus, ExpectAllocReserved", + topo: topoSingleSocketHT, + numReservedCPUs: 2, + reserved: cpuset.New(0, 1), + isolcpus: cpuset.New(1), + stAssignments: state.ContainerCPUAssignments{ + "fakePod": map[string]cpuset.CPUSet{ + "fakeContainer100": cpuset.New(2, 3, 6, 7), + }, + }, + stDefaultCPUSet: cpuset.New(4, 5), + pod: infraPod, + expErr: nil, + expCPUAlloc: true, + expCSet: cpuset.New(0), + }, } testExcl := true + testDM, _ := devicemanager.NewManagerStub() for _, testCase := range testCases { - policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, testCase.reserved, topologymanager.NewFakeManager(), nil, testExcl) + policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, testCase.reserved, testCase.isolcpus, topologymanager.NewFakeManager(), nil, testDM, testExcl) st := &mockState{ assignments: testCase.stAssignments, diff --git a/pkg/kubelet/cm/devicemanager/manager_stub.go b/pkg/kubelet/cm/devicemanager/manager_stub.go new file mode 100644 index 00000000000..8082b02d6de --- /dev/null +++ b/pkg/kubelet/cm/devicemanager/manager_stub.go @@ -0,0 +1,110 @@ +/* +Copyright 2017 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package devicemanager + +import ( + v1 "k8s.io/api/core/v1" + "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager" + "k8s.io/kubernetes/pkg/kubelet/config" + "k8s.io/kubernetes/pkg/kubelet/lifecycle" + "k8s.io/kubernetes/pkg/kubelet/pluginmanager/cache" + schedulerframework "k8s.io/kubernetes/pkg/scheduler/framework" + "k8s.io/apimachinery/pkg/util/sets" + "k8s.io/kubernetes/pkg/kubelet/cm/containermap" +) + +// ManagerStub provides a simple stub implementation for the Device Manager. +type ManagerStub struct{ + // containerMap provides a mapping from (pod, container) -> containerID + // for all containers in a pod. Used to detect pods running across a restart + containerMap containermap.ContainerMap + + // containerRunningSet identifies which container among those present in `containerMap` + // was reported running by the container runtime when `containerMap` was computed. + // Used to detect pods running across a restart + containerRunningSet sets.Set[string] +} + +// NewManagerStub creates a ManagerStub. +func NewManagerStub() (*ManagerStub, error) { + return &ManagerStub{}, nil +} + +// Start simply returns nil. +func (h *ManagerStub) Start(activePods ActivePodsFunc, sourcesReady config.SourcesReady, initialContainers containermap.ContainerMap, initialContainerRunningSet sets.Set[string]) error { + return nil +} + +// Stop simply returns nil. +func (h *ManagerStub) Stop() error { + return nil +} + +// Allocate simply returns nil. +func (h *ManagerStub) Allocate(pod *v1.Pod, container *v1.Container) error { + return nil +} + +// UpdatePluginResources simply returns nil. +func (h *ManagerStub) UpdatePluginResources(node *schedulerframework.NodeInfo, attrs *lifecycle.PodAdmitAttributes) error { + return nil +} + +// GetDeviceRunContainerOptions simply returns nil. +func (h *ManagerStub) GetDeviceRunContainerOptions(pod *v1.Pod, container *v1.Container) (*DeviceRunContainerOptions, error) { + return nil, nil +} + +// GetCapacity simply returns nil capacity and empty removed resource list. +func (h *ManagerStub) GetCapacity() (v1.ResourceList, v1.ResourceList, []string) { + return nil, nil, []string{} +} + +// GetWatcherHandler returns plugin watcher interface +func (h *ManagerStub) GetWatcherHandler() cache.PluginHandler { + return nil +} + +// GetTopologyHints returns an empty TopologyHint map +func (h *ManagerStub) GetTopologyHints(pod *v1.Pod, container *v1.Container) map[string][]topologymanager.TopologyHint { + return map[string][]topologymanager.TopologyHint{} +} + +// GetPodTopologyHints returns an empty TopologyHint map +func (h *ManagerStub) GetPodTopologyHints(pod *v1.Pod) map[string][]topologymanager.TopologyHint { + return map[string][]topologymanager.TopologyHint{} +} + +// GetDevices returns nil +func (h *ManagerStub) GetDevices(_, _ string) ResourceDeviceInstances { + return nil +} + +// GetAllocatableDevices returns nothing +func (h *ManagerStub) GetAllocatableDevices() ResourceDeviceInstances { + return nil +} + +// ShouldResetExtendedResourceCapacity returns false +func (h *ManagerStub) ShouldResetExtendedResourceCapacity() bool { + return false +} + +// UpdateAllocatedDevices returns nothing +func (h *ManagerStub) UpdateAllocatedDevices() { + return +} -- 2.25.1