implement preferAlignByUncorecache within TakeByTopologyNUMAPacked and

wongchar · ffromani · commit 56dd0d505061 · 2024-10-29T16:02:42.000+01:00
test cases (cherry picked from commit 726e602)
diff --git a/pkg/kubelet/cm/cpumanager/cpu_assignment.go b/pkg/kubelet/cm/cpumanager/cpu_assignment.go
@@ -102,10 +102,8 @@ func min(x, y int) int {
 type numaOrSocketsFirstFuncs interface {
 	takeFullFirstLevel()
 	takeFullSecondLevel()
-	takeThirdLevel()
 	sortAvailableNUMANodes() []int
 	sortAvailableSockets() []int
-	sortAvailableUncoreCaches() []int
 	sortAvailableCores() []int
 }
 
@@ -127,18 +125,12 @@ func (n *numaFirst) takeFullSecondLevel() {
 	n.acc.takeFullSockets()
 }
 
-// In Split UncoreCache Topology, we take from the sets of UncoreCache as the third level
-func (n *numaFirst) takeThirdLevel() {
-	n.acc.takeUncoreCache()
-}
-
-// If NUMA nodes are higher in the memory hierarchy than sockets, then just
-// sort the NUMA nodes directly, and return them.
-func (n *numaFirst) sortAvailableUncoreCaches() []int {
+// Sort the UncoreCaches within the NUMA nodes.
+func (a *cpuAccumulator) sortAvailableUncoreCaches() []int {
 	var result []int
-	for _, socket := range n.acc.sortAvailableNUMANodes() {
-		uncore := n.acc.details.UncoreInNUMANodes(socket).UnsortedList()
-		n.acc.sort(uncore, n.acc.details.CPUsInUncoreCaches)
+	for _, numa := range a.sortAvailableNUMANodes() {
+		uncore := a.details.UncoreInNUMANodes(numa).UnsortedList()
+		a.sort(uncore, a.details.CPUsInUncoreCaches)
 		result = append(result, uncore...)
 	}
 	return result
@@ -189,10 +181,6 @@ func (s *socketsFirst) takeFullSecondLevel() {
 	s.acc.takeFullNUMANodes()
 }
 
-func (s *socketsFirst) takeThirdLevel() {
-	s.acc.takeUncoreCache()
-}
-
 // If sockets are higher in the memory hierarchy than NUMA nodes, then we need
 // to pull the set of NUMA nodes out of each sorted Socket, and accumulate the
 // partial order across them.
@@ -214,18 +202,6 @@ func (s *socketsFirst) sortAvailableSockets() []int {
 	return sockets
 }
 
-// If sockets  higher in the memory hierarchy than NUMA nodes, then UncoreCache
-// sit directly below NUMA Nodes in the memory hierchy
-func (s *socketsFirst) sortAvailableUncoreCaches() []int {
-	var result []int
-	for _, uncore := range s.acc.sortAvailableNUMANodes() {
-		uncore := s.acc.details.UncoreInNUMANodes(uncore).UnsortedList()
-		s.acc.sort(uncore, s.acc.details.CPUsInUncoreCaches)
-		result = append(result, uncore...)
-	}
-	return result
-}
-
 // If sockets are higher in the memory hierarchy than NUMA nodes, then cores
 // sit directly below NUMA Nodes in the memory hierarchy.
 func (s *socketsFirst) sortAvailableCores() []int {
@@ -273,8 +249,8 @@ func (a *cpuAccumulator) isSocketFree(socketID int) bool {
 	return a.details.CPUsInSockets(socketID).Size() == a.topo.CPUsPerSocket()
 }
 
-// Returns true if the supplied UnCoreCache is fully available in `a.details`.
-// "fully available" means that all the CPUs in it are free.
+// Returns true if the supplied UnCoreCache is fully available,
+// meaning all its CPUs are free in `a.details`.
 func (a *cpuAccumulator) isUncoreCacheFree(uncoreID int) bool {
 	return a.details.CPUsInUncoreCaches(uncoreID).Size() == a.topo.CPUDetails.CPUsInUncoreCaches(uncoreID).Size()
 }
@@ -309,19 +285,14 @@ func (a *cpuAccumulator) freeSockets() []int {
 // Returns free UncoreCache IDs as a slice sorted by sortAvailableUnCoreCache().
 func (a *cpuAccumulator) freeUncoreCache() []int {
 	free := []int{}
-	for _, uncore := range a.numaOrSocketsFirst.sortAvailableUncoreCaches() {
+	for _, uncore := range a.sortAvailableUncoreCaches() {
 		if a.isUncoreCacheFree(uncore) {
 			free = append(free, uncore)
 		}
 	}
 	return free
 }
 
-// Returns all UncoreCache IDs as a slice sorted by sortAvailableUncoreCache().
-func (a *cpuAccumulator) allUncoreCache() []int {
-	return a.numaOrSocketsFirst.sortAvailableUncoreCaches()
-}
-
 // Returns free core IDs as a slice sorted by sortAvailableCores().
 func (a *cpuAccumulator) freeCores() []int {
 	free := []int{}
@@ -414,7 +385,7 @@ func (a *cpuAccumulator) takeFullSockets() {
 		a.take(cpusInSocket)
 	}
 }
-func (a *cpuAccumulator) takeFullUnCore() {
+func (a *cpuAccumulator) takeFullUncore() {
 	for _, uncore := range a.freeUncoreCache() {
 		cpusInUncore := a.topo.CPUDetails.CPUsInUncoreCaches(uncore)
 		if !a.needsAtLeast(cpusInUncore.Size()) {
@@ -424,30 +395,34 @@ func (a *cpuAccumulator) takeFullUnCore() {
 	}
 }
 
+func (a *cpuAccumulator) takePartialUncore(uncoreID int) {
+	numCoresNeeded := a.numCPUsNeeded / a.topo.CPUsPerCore()
+	var freeCPUsInUncoreCache cpuset.CPUSet
+	freeCoresInUncoreCache := a.details.CoresNeededInUncoreCache(numCoresNeeded, uncoreID)
+	for _, coreID := range freeCoresInUncoreCache.List() {
+		freeCPUsInUncoreCache = freeCPUsInUncoreCache.Union(a.topo.CPUDetails.CPUsInCores(coreID))
+	}
+	klog.V(4).InfoS("freeCPUsInUncorecache  : ", "freeCPUsInUncorecache", freeCPUsInUncoreCache.String(), "freeCPUsInUnCoreCache", freeCPUsInUncoreCache.String())
+	if a.numCPUsNeeded == freeCPUsInUncoreCache.Size() {
+		a.take(freeCPUsInUncoreCache)
+	}
+}
+
 // First try to take full UncoreCache, if available and need is at least the size of the UncoreCache group.
 // Second try to take the partial UncoreCache if available and the request size can fit w/in the UncoreCache.
 func (a *cpuAccumulator) takeUncoreCache() {
-	for _, uncore := range a.allUncoreCache() {
-		numCoresNeeded := a.numCPUsNeeded / a.topo.CPUsPerCore()
-
-		// take full UncoreCache if the CPUs needed is greater a UncoreCache size
-		if a.numCPUsNeeded >= a.topo.NumCPUs/a.topo.NumUncoreCache {
-			a.takeFullUnCore()
+	cpusPerUncoreCache := a.topo.NumCPUs / a.topo.NumUncoreCache
+	for _, uncore := range a.sortAvailableUncoreCaches() {
+		// take full UncoreCache if the CPUs needed is greater than free UncoreCache size
+		if a.needsAtLeast(cpusPerUncoreCache) {
+			a.takeFullUncore()
 		}
 
-		var freeCPUsInUncoreCache cpuset.CPUSet
-		// need to get needed cores in UncoreCache
-		freeCoresInUncoreCache := a.details.CoresNeededInUncoreCache(numCoresNeeded, uncore)
-		klog.V(2).InfoS("free cores from a.details list: ", "freeCoresInUncorecache", freeCoresInUncoreCache)
-		for _, coreID := range freeCoresInUncoreCache.List() {
-			freeCPUsInUncoreCache = freeCPUsInUncoreCache.Union(a.topo.CPUDetails.CPUsInCores(coreID))
-		}
-		klog.V(2).InfoS("freeCPUsInUncorecache  : ", "freeCPUsInUncorecache", freeCPUsInUncoreCache)
-		if a.numCPUsNeeded == freeCPUsInUncoreCache.Size() {
-			klog.V(4).InfoS("takePartialUncore: claiming cores from Uncorecache ID", "uncore", uncore)
-			a.take(freeCPUsInUncoreCache)
+		if a.isSatisfied() {
+			return
 		}
 
+		a.takePartialUncore(uncore)
 		if a.isSatisfied() {
 			return
 		}
@@ -543,7 +518,7 @@ func (a *cpuAccumulator) iterateCombinations(n []int, k int, f func([]int) LoopC
 	helper(n, k, 0, []int{}, f)
 }
 
-func takeByTopologyNUMAPacked(topo *topology.CPUTopology, availableCPUs cpuset.CPUSet, numCPUs int) (cpuset.CPUSet, error) {
+func takeByTopologyNUMAPacked(topo *topology.CPUTopology, availableCPUs cpuset.CPUSet, numCPUs int, preferAlignByUncoreCache bool) (cpuset.CPUSet, error) {
 	acc := newCPUAccumulator(topo, availableCPUs, numCPUs)
 	if acc.isSatisfied() {
 		return acc.result, nil
@@ -566,66 +541,23 @@ func takeByTopologyNUMAPacked(topo *topology.CPUTopology, availableCPUs cpuset.C
 		return acc.result, nil
 	}
 
-	// 2. Acquire whole cores, if available and the container requires at least
-	//    a core's-worth of CPUs.
-	acc.takeFullCores()
-	if acc.isSatisfied() {
-		return acc.result, nil
-	}
-
-	// 3. Acquire single threads, preferring to fill partially-allocated cores
-	//    on the same sockets as the whole cores we have already taken in this
-	//    allocation.
-	acc.takeRemainingCPUs()
-	if acc.isSatisfied() {
-		return acc.result, nil
-	}
-
-	return cpuset.New(), fmt.Errorf("failed to allocate cpus")
-}
-
-// takeByTopologyUnCoreCachePacked uses the "packed" sorting strategy similar to takeByTopologyNUMAPacked.
-// It includes an additional level of sorting by uncorecache
-func takeByTopologyUncoreCachePacked(topo *topology.CPUTopology, availableCPUs cpuset.CPUSet, numCPUs int, cpuSortingStrategy CPUSortingStrategy) (cpuset.CPUSet, error) {
-	acc := newCPUAccumulator(topo, availableCPUs, numCPUs, cpuSortingStrategy)
-	if acc.isSatisfied() {
-		return acc.result, nil
-	}
-	if acc.isFailed() {
-		return cpuset.New(), fmt.Errorf("not enough cpus available to satisfy request: requested=%d, available=%d", numCPUs, availableCPUs.Size())
-	}
-
-	// Algorithm: topology-aware best-fit
-	// 1. Acquire whole NUMA nodes and sockets, if available and the container
-	//    requires at least a NUMA node or socket's-worth of CPUs. If NUMA
-	//    Nodes map to 1 or more sockets, pull from NUMA nodes first.
-	//    Otherwise pull from sockets first.
-	acc.numaOrSocketsFirst.takeFullFirstLevel()
-	if acc.isSatisfied() {
-		return acc.result, nil
-	}
-	acc.numaOrSocketsFirst.takeFullSecondLevel()
-	if acc.isSatisfied() {
-		return acc.result, nil
-	}
-
-	// 2. Acquire partial uncorecache, if there are enough CPUs available to satisfy the container requirement
-	//    Acquire the full uncorecache, if available and the container requires at least all the CPUs in the uncorecache grouping
-	acc.numaOrSocketsFirst.takeThirdLevel()
-	if acc.isSatisfied() {
-		return acc.result, nil
+	// 2. If PreferAlignByUncoreCache is enabled, acquire whole UncoreCaches
+	//    if available and the container requires at least a UncoreCache's-worth
+	//    of CPUs. Otherwise, acquire CPUs from the least amount of UncoreCaches.
+	if preferAlignByUncoreCache {
+		acc.takeUncoreCache()
+		if acc.isSatisfied() {
+			return acc.result, nil
+		}
 	}
 
 	// 3. Acquire whole cores, if available and the container requires at least
 	//    a core's-worth of CPUs.
 	//    If `CPUSortingStrategySpread` is specified, skip taking the whole core.
-	if cpuSortingStrategy != CPUSortingStrategySpread {
-		acc.takeFullCores()
-		if acc.isSatisfied() {
-			return acc.result, nil
-		}
+	acc.takeFullCores()
+	if acc.isSatisfied() {
+		return acc.result, nil
 	}
-
 	// 4. Acquire single threads, preferring to fill partially-allocated cores
 	//    on the same sockets as the whole cores we have already taken in this
 	//    allocation.
@@ -704,8 +636,10 @@ func takeByTopologyNUMADistributed(topo *topology.CPUTopology, availableCPUs cpu
 	// If the number of CPUs requested cannot be handed out in chunks of
 	// 'cpuGroupSize', then we just call out the packing algorithm since we
 	// can't distribute CPUs in this chunk size.
+	// PreferAlignByUncoreCache feature not implemented here yet and set to false.
+	// Support for PreferAlignByUncoreCache to be done at beta release.
 	if (numCPUs % cpuGroupSize) != 0 {
-		return takeByTopologyNUMAPacked(topo, availableCPUs, numCPUs)
+		return takeByTopologyNUMAPacked(topo, availableCPUs, numCPUs, false)
 	}
 
 	// Otherwise build an accumulator to start allocating CPUs from.
@@ -888,7 +822,7 @@ func takeByTopologyNUMADistributed(topo *topology.CPUTopology, availableCPUs cpu
 		// size 'cpuGroupSize' from 'bestCombo'.
 		distribution := (numCPUs / len(bestCombo) / cpuGroupSize) * cpuGroupSize
 		for _, numa := range bestCombo {
-			cpus, _ := takeByTopologyNUMAPacked(acc.topo, acc.details.CPUsInNUMANodes(numa), distribution)
+			cpus, _ := takeByTopologyNUMAPacked(acc.topo, acc.details.CPUsInNUMANodes(numa), distribution, false)
 			acc.take(cpus)
 		}
 
@@ -903,7 +837,7 @@ func takeByTopologyNUMADistributed(topo *topology.CPUTopology, availableCPUs cpu
 				if acc.details.CPUsInNUMANodes(numa).Size() < cpuGroupSize {
 					continue
 				}
-				cpus, _ := takeByTopologyNUMAPacked(acc.topo, acc.details.CPUsInNUMANodes(numa), cpuGroupSize)
+				cpus, _ := takeByTopologyNUMAPacked(acc.topo, acc.details.CPUsInNUMANodes(numa), cpuGroupSize, false)
 				acc.take(cpus)
 				remainder -= cpuGroupSize
 			}
@@ -927,5 +861,5 @@ func takeByTopologyNUMADistributed(topo *topology.CPUTopology, availableCPUs cpu
 
 	// If we never found a combination of NUMA nodes that we could properly
 	// distribute CPUs across, fall back to the packing algorithm.
-	return takeByTopologyNUMAPacked(topo, availableCPUs, numCPUs)
+	return takeByTopologyNUMAPacked(topo, availableCPUs, numCPUs, false)
 }
diff --git a/pkg/kubelet/cm/cpumanager/cpu_assignment_test.go b/pkg/kubelet/cm/cpumanager/cpu_assignment_test.go
@@ -676,6 +676,16 @@ func TestTakeByTopologyUncoreCachePacked(t *testing.T) {
 		expErr        string
 		expResult     cpuset.CPUSet
 	}{
+		// Test cases for PreferAlignByUncoreCache
+		{
+			"take cpus from two full UncoreCaches and partial from a single UncoreCache",
+			topoUncoreSingleSocketNoSMT,
+			StaticPolicyOptions{PreferAlignByUncoreCacheOption: true},
+			mustParseCPUSet(t, "1-15"),
+			10,
+			"",
+			cpuset.New(1, 2, 4, 5, 6, 7, 8, 9, 10, 11),
+		},
 		{
 			"take one cpu from dual socket with HT - core from Socket 0",
 			topoDualSocketHT,
@@ -748,7 +758,7 @@ func TestTakeByTopologyUncoreCachePacked(t *testing.T) {
 				strategy = CPUSortingStrategySpread
 			}
 
-			result, err := takeByTopologyUncoreCachePacked(tc.topo, tc.availableCPUs, tc.numCPUs, strategy)
+			result, err := takeByTopologyNUMAPacked(tc.topo, tc.availableCPUs, tc.numCPUs, strategy, tc.opts.PreferAlignByUncoreCacheOption)
 			if tc.expErr != "" && err != nil && err.Error() != tc.expErr {
 				t.Errorf("expected error to be [%v] but it was [%v]", tc.expErr, err)
 			}
diff --git a/pkg/kubelet/cm/cpumanager/policy_static.go b/pkg/kubelet/cm/cpumanager/policy_static.go
@@ -506,12 +506,7 @@ func (p *staticPolicy) takeByTopology(availableCPUs cpuset.CPUSet, numCPUs int)
 		return takeByTopologyNUMADistributed(p.topology, availableCPUs, numCPUs, cpuGroupSize)
 	}
 
-	if p.options.PreferAlignByUncoreCacheOption {
-
-		return takeByTopologyUncoreCachePacked(p.topology, availableCPUs, numCPUs, cpuSortingStrategy)
-
-	}
-	return takeByTopologyNUMAPacked(p.topology, availableCPUs, numCPUs, cpuSortingStrategy)
+	return takeByTopologyNUMAPacked(p.topology, availableCPUs, numCPUs, cpuSortingStrategy, p.options.PreferAlignByUncoreCacheOption)
 }
 
 func (p *staticPolicy) GetTopologyHints(s state.State, pod *v1.Pod, container *v1.Container) map[string][]topologymanager.TopologyHint {
diff --git a/pkg/kubelet/cm/cpumanager/policy_test.go b/pkg/kubelet/cm/cpumanager/policy_test.go
@@ -72,14 +72,14 @@ var (
 			5:  {CoreID: 5, SocketID: 0, NUMANodeID: 0, UncoreCacheID: 1},
 			6:  {CoreID: 6, SocketID: 0, NUMANodeID: 0, UncoreCacheID: 1},
 			7:  {CoreID: 7, SocketID: 0, NUMANodeID: 0, UncoreCacheID: 1},
-			8:  {CoreID: 8, SocketID: 1, NUMANodeID: 1, UncoreCacheID: 2},
-			9:  {CoreID: 9, SocketID: 1, NUMANodeID: 1, UncoreCacheID: 2},
-			10: {CoreID: 10, SocketID: 1, NUMANodeID: 1, UncoreCacheID: 2},
-			11: {CoreID: 11, SocketID: 1, NUMANodeID: 1, UncoreCacheID: 2},
-			12: {CoreID: 12, SocketID: 1, NUMANodeID: 1, UncoreCacheID: 3},
-			13: {CoreID: 13, SocketID: 1, NUMANodeID: 1, UncoreCacheID: 3},
-			14: {CoreID: 14, SocketID: 1, NUMANodeID: 1, UncoreCacheID: 3},
-			15: {CoreID: 15, SocketID: 1, NUMANodeID: 1, UncoreCacheID: 3},
+			8:  {CoreID: 8, SocketID: 1, NUMANodeID: 0, UncoreCacheID: 2},
+			9:  {CoreID: 9, SocketID: 1, NUMANodeID: 0, UncoreCacheID: 2},
+			10: {CoreID: 10, SocketID: 1, NUMANodeID: 0, UncoreCacheID: 2},
+			11: {CoreID: 11, SocketID: 1, NUMANodeID: 0, UncoreCacheID: 2},
+			12: {CoreID: 12, SocketID: 1, NUMANodeID: 0, UncoreCacheID: 3},
+			13: {CoreID: 13, SocketID: 1, NUMANodeID: 0, UncoreCacheID: 3},
+			14: {CoreID: 14, SocketID: 1, NUMANodeID: 0, UncoreCacheID: 3},
+			15: {CoreID: 15, SocketID: 1, NUMANodeID: 0, UncoreCacheID: 3},
 		},
 	}
 
@@ -133,6 +133,31 @@ var (
 		},
 	}
 
+	topoUncoreSingleSocketNoSMT = &topology.CPUTopology{
+		NumCPUs:        16,
+		NumSockets:     1,
+		NumCores:       16,
+		NumUncoreCache: 4,
+		CPUDetails: map[int]topology.CPUInfo{
+			0:  {CoreID: 0, SocketID: 0, NUMANodeID: 0, UncoreCacheID: 0},
+			1:  {CoreID: 1, SocketID: 0, NUMANodeID: 0, UncoreCacheID: 0},
+			2:  {CoreID: 2, SocketID: 0, NUMANodeID: 0, UncoreCacheID: 0},
+			3:  {CoreID: 3, SocketID: 0, NUMANodeID: 0, UncoreCacheID: 0},
+			4:  {CoreID: 4, SocketID: 0, NUMANodeID: 0, UncoreCacheID: 1},
+			5:  {CoreID: 5, SocketID: 0, NUMANodeID: 0, UncoreCacheID: 1},
+			6:  {CoreID: 6, SocketID: 0, NUMANodeID: 0, UncoreCacheID: 1},
+			7:  {CoreID: 7, SocketID: 0, NUMANodeID: 0, UncoreCacheID: 1},
+			8:  {CoreID: 8, SocketID: 0, NUMANodeID: 0, UncoreCacheID: 2},
+			9:  {CoreID: 9, SocketID: 0, NUMANodeID: 0, UncoreCacheID: 2},
+			10: {CoreID: 10, SocketID: 0, NUMANodeID: 0, UncoreCacheID: 2},
+			11: {CoreID: 11, SocketID: 0, NUMANodeID: 0, UncoreCacheID: 2},
+			12: {CoreID: 12, SocketID: 0, NUMANodeID: 0, UncoreCacheID: 3},
+			13: {CoreID: 13, SocketID: 0, NUMANodeID: 0, UncoreCacheID: 3},
+			14: {CoreID: 14, SocketID: 0, NUMANodeID: 0, UncoreCacheID: 3},
+			15: {CoreID: 15, SocketID: 0, NUMANodeID: 0, UncoreCacheID: 3},
+		},
+	}
+
 	topoDualSocketNoHT = &topology.CPUTopology{
 		NumCPUs:    8,
 		NumSockets: 2,
diff --git a/pkg/kubelet/cm/cpumanager/topology/topology.go b/pkg/kubelet/cm/cpumanager/topology/topology.go
diff --git a/pkg/kubelet/cm/cpumanager/topology/topology_test.go b/pkg/kubelet/cm/cpumanager/topology/topology_test.go

Original file line number	Diff line number	Diff line change
`@@ -506,12 +506,7 @@ func (p *staticPolicy) takeByTopology(availableCPUs cpuset.CPUSet, numCPUs int)`
`506`	`506`	`return takeByTopologyNUMADistributed(p.topology, availableCPUs, numCPUs, cpuGroupSize)`
`507`	`507`	`}`
`508`	`508`
`509`		`- if p.options.PreferAlignByUncoreCacheOption {`
`510`		`-`
`511`		`- return takeByTopologyUncoreCachePacked(p.topology, availableCPUs, numCPUs, cpuSortingStrategy)`
`512`		`-`
`513`		`- }`
`514`		`- return takeByTopologyNUMAPacked(p.topology, availableCPUs, numCPUs, cpuSortingStrategy)`
	`509`	`+ return takeByTopologyNUMAPacked(p.topology, availableCPUs, numCPUs, cpuSortingStrategy, p.options.PreferAlignByUncoreCacheOption)`
`515`	`510`	`}`
`516`	`511`
`517`	`512`	`func (p staticPolicy) GetTopologyHints(s state.State, pod v1.Pod, container *v1.Container) map[string][]topologymanager.TopologyHint {`