Skip to content

Commit eba6094

Browse files
authored
scheduler: support numa topology policy on pod (#1939)
Signed-off-by: KunWuLuan <[email protected]>
1 parent afa430a commit eba6094

21 files changed

+475
-40
lines changed

apis/extension/numa_aware.go

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,9 @@ import (
2626

2727
// Defines the pod level annotations and labels
2828
const (
29+
// AnnotationNUMATopologySpec represents numa allocation API defined by Koordinator.
30+
// The user specifies the desired numa policy by setting the annotation.
31+
AnnotationNUMATopologySpec = SchedulingDomainPrefix + "/numa-topology-spec"
2932
// AnnotationResourceSpec represents resource allocation API defined by Koordinator.
3033
// The user specifies the desired CPU orchestration policy by setting the annotation.
3134
AnnotationResourceSpec = SchedulingDomainPrefix + "/resource-spec"
@@ -67,6 +70,15 @@ type ResourceSpec struct {
6770
PreferredCPUExclusivePolicy CPUExclusivePolicy `json:"preferredCPUExclusivePolicy,omitempty"`
6871
}
6972

73+
type NUMATopologySpec struct {
74+
// NUMATopologyPolicy represents the numa topology policy when schedule pod
75+
NUMATopologyPolicy NUMATopologyPolicy `json:"numaTopologyPolicy,omitempty"`
76+
// SingleNUMANodeExclusive represents whether a Pod that will use a single NUMA node/multiple NUMA nodes
77+
// on a NUMA node can be scheduled to use the NUMA node when another Pod that uses multiple NUMA nodes/a single NUMA node
78+
// is already running on the same node.
79+
SingleNUMANodeExclusive NumaTopologyExclusive `json:"singleNUMANodeExclusive,omitempty"`
80+
}
81+
7082
// ResourceStatus describes resource allocation result, such as how to bind CPU.
7183
type ResourceStatus struct {
7284
// CPUSet represents the allocated CPUs. It is Linux CPU list formatted string.
@@ -135,6 +147,21 @@ const (
135147
NodeNUMAAllocateStrategyMostAllocated = NUMAMostAllocated
136148
)
137149

150+
type NumaTopologyExclusive string
151+
152+
const (
153+
NumaTopologyExclusivePreferred NumaTopologyExclusive = "Preferred"
154+
NumaTopologyExclusiveRequired NumaTopologyExclusive = "Required"
155+
)
156+
157+
type NumaNodeStatus string
158+
159+
const (
160+
NumaNodeStatusIdle NumaNodeStatus = "idle"
161+
NumaNodeStatusShared NumaNodeStatus = "shared"
162+
NumaNodeStatusSingle NumaNodeStatus = "single"
163+
)
164+
138165
type NUMATopologyPolicy string
139166

140167
const (
@@ -187,6 +214,19 @@ type KubeletCPUManagerPolicy struct {
187214
ReservedCPUs string `json:"reservedCPUs,omitempty"`
188215
}
189216

217+
func GetNUMATopologySpec(annotations map[string]string) (*NUMATopologySpec, error) {
218+
numaSpec := &NUMATopologySpec{}
219+
data, ok := annotations[AnnotationNUMATopologySpec]
220+
if !ok {
221+
return numaSpec, nil
222+
}
223+
err := json.Unmarshal([]byte(data), numaSpec)
224+
if err != nil {
225+
return nil, err
226+
}
227+
return numaSpec, nil
228+
}
229+
190230
// GetResourceSpec parses ResourceSpec from annotations
191231
func GetResourceSpec(annotations map[string]string) (*ResourceSpec, error) {
192232
resourceSpec := &ResourceSpec{}

pkg/scheduler/frameworkext/framework_extender.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -446,8 +446,8 @@ func (ext *frameworkExtenderImpl) ForgetPod(pod *corev1.Pod) error {
446446
return nil
447447
}
448448

449-
func (ext *frameworkExtenderImpl) RunNUMATopologyManagerAdmit(ctx context.Context, cycleState *framework.CycleState, pod *corev1.Pod, nodeName string, numaNodes []int, policyType apiext.NUMATopologyPolicy) *framework.Status {
450-
return ext.topologyManager.Admit(ctx, cycleState, pod, nodeName, numaNodes, policyType)
449+
func (ext *frameworkExtenderImpl) RunNUMATopologyManagerAdmit(ctx context.Context, cycleState *framework.CycleState, pod *corev1.Pod, nodeName string, numaNodes []int, policyType apiext.NUMATopologyPolicy, exclusivePolicy apiext.NumaTopologyExclusive, allNUMANodeStatus []apiext.NumaNodeStatus) *framework.Status {
450+
return ext.topologyManager.Admit(ctx, cycleState, pod, nodeName, numaNodes, policyType, exclusivePolicy, allNUMANodeStatus)
451451
}
452452

453453
func (ext *frameworkExtenderImpl) GetNUMATopologyHintProvider() []topologymanager.NUMATopologyHintProvider {

pkg/scheduler/frameworkext/interface.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ type FrameworkExtender interface {
6464
RunReservationFilterPlugins(ctx context.Context, cycleState *framework.CycleState, pod *corev1.Pod, reservationInfo *ReservationInfo, nodeName string) *framework.Status
6565
RunReservationScorePlugins(ctx context.Context, cycleState *framework.CycleState, pod *corev1.Pod, reservationInfos []*ReservationInfo, nodeName string) (PluginToReservationScores, *framework.Status)
6666

67-
RunNUMATopologyManagerAdmit(ctx context.Context, cycleState *framework.CycleState, pod *corev1.Pod, nodeName string, numaNodes []int, policyType apiext.NUMATopologyPolicy) *framework.Status
67+
RunNUMATopologyManagerAdmit(ctx context.Context, cycleState *framework.CycleState, pod *corev1.Pod, nodeName string, numaNodes []int, policyType apiext.NUMATopologyPolicy, exclusivePolicy apiext.NumaTopologyExclusive, allNUMANodeStatus []apiext.NumaNodeStatus) *framework.Status
6868

6969
RunResizePod(ctx context.Context, cycleState *framework.CycleState, pod *corev1.Pod, nodeName string) *framework.Status
7070
}

pkg/scheduler/frameworkext/topologymanager/manager.go

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ import (
2727
)
2828

2929
type Interface interface {
30-
Admit(ctx context.Context, cycleState *framework.CycleState, pod *corev1.Pod, nodeName string, numaNodes []int, policyType apiext.NUMATopologyPolicy) *framework.Status
30+
Admit(ctx context.Context, cycleState *framework.CycleState, pod *corev1.Pod, nodeName string, numaNodes []int, policyType apiext.NUMATopologyPolicy, exclusivePolicy apiext.NumaTopologyExclusive, allNUMANodeStatus []apiext.NumaNodeStatus) *framework.Status
3131
}
3232

3333
type NUMATopologyHintProvider interface {
@@ -55,7 +55,7 @@ func New(hintProviderFactory NUMATopologyHintProviderFactory) Interface {
5555
}
5656
}
5757

58-
func (m *topologyManager) Admit(ctx context.Context, cycleState *framework.CycleState, pod *corev1.Pod, nodeName string, numaNodes []int, policyType apiext.NUMATopologyPolicy) *framework.Status {
58+
func (m *topologyManager) Admit(ctx context.Context, cycleState *framework.CycleState, pod *corev1.Pod, nodeName string, numaNodes []int, policyType apiext.NUMATopologyPolicy, exclusivePolicy apiext.NumaTopologyExclusive, allNUMANodeStatus []apiext.NumaNodeStatus) *framework.Status {
5959
s, err := cycleState.Read(affinityStateKey)
6060
if err != nil {
6161
return framework.AsStatus(err)
@@ -64,7 +64,7 @@ func (m *topologyManager) Admit(ctx context.Context, cycleState *framework.Cycle
6464

6565
policy := createNUMATopologyPolicy(policyType, numaNodes)
6666

67-
bestHint, admit := m.calculateAffinity(ctx, cycleState, policy, pod, nodeName)
67+
bestHint, admit := m.calculateAffinity(ctx, cycleState, policy, pod, nodeName, exclusivePolicy, allNUMANodeStatus)
6868
klog.V(5).Infof("Best TopologyHint for (pod: %v): %v on node: %v", klog.KObj(pod), bestHint, nodeName)
6969
if !admit {
7070
return framework.NewStatus(framework.Unschedulable, "node(s) NUMA Topology affinity error")
@@ -79,9 +79,13 @@ func (m *topologyManager) Admit(ctx context.Context, cycleState *framework.Cycle
7979
return nil
8080
}
8181

82-
func (m *topologyManager) calculateAffinity(ctx context.Context, cycleState *framework.CycleState, policy Policy, pod *corev1.Pod, nodeName string) (NUMATopologyHint, bool) {
82+
func (m *topologyManager) calculateAffinity(ctx context.Context, cycleState *framework.CycleState, policy Policy, pod *corev1.Pod, nodeName string, exclusivePolicy apiext.NumaTopologyExclusive, allNUMANodeStatus []apiext.NumaNodeStatus) (NUMATopologyHint, bool) {
8383
providersHints := m.accumulateProvidersHints(ctx, cycleState, pod, nodeName)
84-
bestHint, admit := policy.Merge(providersHints)
84+
bestHint, admit := policy.Merge(providersHints, exclusivePolicy, allNUMANodeStatus)
85+
if !checkExclusivePolicy(bestHint, exclusivePolicy, allNUMANodeStatus) {
86+
klog.V(5).Infof("bestHint violated the exclusivePolicy requirement: bestHint: %v, policy: %v, numaStatus: %v, nodeName: %v, pod: %v",
87+
bestHint, exclusivePolicy, allNUMANodeStatus, nodeName, pod.Name)
88+
}
8589
klog.V(5).Infof("PodTopologyHint: %v", bestHint)
8690
return bestHint, admit
8791
}

pkg/scheduler/frameworkext/topologymanager/policy.go

Lines changed: 29 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,14 +20,15 @@ package topologymanager
2020
import (
2121
"k8s.io/klog/v2"
2222

23+
apiext "github.com/koordinator-sh/koordinator/apis/extension"
2324
"github.com/koordinator-sh/koordinator/pkg/util/bitmask"
2425
)
2526

2627
type Policy interface {
2728
// Name returns Policy Name
2829
Name() string
2930
// Merge returns a merged NUMATopologyHint based on input from hint providers
30-
Merge(providersHints []map[string][]NUMATopologyHint) (NUMATopologyHint, bool)
31+
Merge(providersHints []map[string][]NUMATopologyHint, exclusivePolicy apiext.NumaTopologyExclusive, allNUMANodeStatus []apiext.NumaNodeStatus) (NUMATopologyHint, bool)
3132
}
3233

3334
// NUMATopologyHint is a struct containing the NUMANodeAffinity for a Container
@@ -62,6 +63,29 @@ func (th *NUMATopologyHint) LessThan(other NUMATopologyHint) bool {
6263
return th.NUMANodeAffinity.IsNarrowerThan(other.NUMANodeAffinity)
6364
}
6465

66+
// Check if the affinity match the exclusive policy, return true if match or false otherwise.
67+
func checkExclusivePolicy(affinity NUMATopologyHint, exclusivePolicy apiext.NumaTopologyExclusive, allNUMANodeStatus []apiext.NumaNodeStatus) bool {
68+
// check bestHint again if default hint is the best
69+
if affinity.NUMANodeAffinity == nil {
70+
return false
71+
}
72+
if exclusivePolicy == apiext.NumaTopologyExclusiveRequired {
73+
if affinity.NUMANodeAffinity.Count() > 1 {
74+
// we should make sure no numa is in single state
75+
for _, nodeid := range affinity.NUMANodeAffinity.GetBits() {
76+
if allNUMANodeStatus[nodeid] == apiext.NumaNodeStatusSingle {
77+
return false
78+
}
79+
}
80+
} else {
81+
if allNUMANodeStatus[affinity.NUMANodeAffinity.GetBits()[0]] == apiext.NumaNodeStatusShared {
82+
return false
83+
}
84+
}
85+
}
86+
return true
87+
}
88+
6589
// Merge a TopologyHints permutation to a single hint by performing a bitwise-AND
6690
// of their affinity masks. The hint shall be preferred if all hits in the permutation
6791
// are preferred.
@@ -126,7 +150,7 @@ func filterProvidersHints(providersHints []map[string][]NUMATopologyHint) [][]NU
126150
return allProviderHints
127151
}
128152

129-
func mergeFilteredHints(numaNodes []int, filteredHints [][]NUMATopologyHint) NUMATopologyHint {
153+
func mergeFilteredHints(numaNodes []int, filteredHints [][]NUMATopologyHint, exclusivePolicy apiext.NumaTopologyExclusive, allNUMANodeStatus []apiext.NumaNodeStatus) NUMATopologyHint {
130154
// Set the default affinity as an any-numa affinity containing the list
131155
// of NUMA Nodes available on this machine.
132156
defaultAffinity, _ := bitmask.NewBitMask(numaNodes...)
@@ -144,6 +168,9 @@ func mergeFilteredHints(numaNodes []int, filteredHints [][]NUMATopologyHint) NUM
144168
if mergedHint.NUMANodeAffinity.Count() == 0 {
145169
return
146170
}
171+
if !checkExclusivePolicy(mergedHint, exclusivePolicy, allNUMANodeStatus) {
172+
mergedHint.Preferred = false
173+
}
147174

148175
for _, v := range permutation {
149176
if v.NUMANodeAffinity != nil && mergedHint.NUMANodeAffinity.IsEqual(v.NUMANodeAffinity) {

pkg/scheduler/frameworkext/topologymanager/policy_best_effort.go

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@ limitations under the License.
1717

1818
package topologymanager
1919

20+
import apiext "github.com/koordinator-sh/koordinator/apis/extension"
21+
2022
type bestEffortPolicy struct {
2123
//List of NUMA Nodes available on the underlying machine
2224
numaNodes []int
@@ -40,9 +42,9 @@ func (p *bestEffortPolicy) canAdmitPodResult(hint *NUMATopologyHint) bool {
4042
return true
4143
}
4244

43-
func (p *bestEffortPolicy) Merge(providersHints []map[string][]NUMATopologyHint) (NUMATopologyHint, bool) {
45+
func (p *bestEffortPolicy) Merge(providersHints []map[string][]NUMATopologyHint, exclusivePolicy apiext.NumaTopologyExclusive, allNUMANodeStatus []apiext.NumaNodeStatus) (NUMATopologyHint, bool) {
4446
filteredProvidersHints := filterProvidersHints(providersHints)
45-
bestHint := mergeFilteredHints(p.numaNodes, filteredProvidersHints)
47+
bestHint := mergeFilteredHints(p.numaNodes, filteredProvidersHints, exclusivePolicy, allNUMANodeStatus)
4648
admit := p.canAdmitPodResult(&bestHint)
4749
return bestHint, admit
4850
}

pkg/scheduler/frameworkext/topologymanager/policy_none.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@ limitations under the License.
1717

1818
package topologymanager
1919

20+
import apiext "github.com/koordinator-sh/koordinator/apis/extension"
21+
2022
type nonePolicy struct{}
2123

2224
var _ Policy = &nonePolicy{}
@@ -37,6 +39,6 @@ func (p *nonePolicy) canAdmitPodResult(hint *NUMATopologyHint) bool {
3739
return true
3840
}
3941

40-
func (p *nonePolicy) Merge(providersHints []map[string][]NUMATopologyHint) (NUMATopologyHint, bool) {
42+
func (p *nonePolicy) Merge(providersHints []map[string][]NUMATopologyHint, exclusivePolicy apiext.NumaTopologyExclusive, allNUMANodeStatus []apiext.NumaNodeStatus) (NUMATopologyHint, bool) {
4143
return NUMATopologyHint{}, p.canAdmitPodResult(nil)
4244
}

pkg/scheduler/frameworkext/topologymanager/policy_none_test.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@ package topologymanager
1919

2020
import (
2121
"testing"
22+
23+
apiext "github.com/koordinator-sh/koordinator/apis/extension"
2224
)
2325

2426
func TestPolicyNoneName(t *testing.T) {
@@ -104,7 +106,7 @@ func TestPolicyNoneMerge(t *testing.T) {
104106

105107
for _, tc := range tcases {
106108
policy := NewNonePolicy()
107-
result, admit := policy.Merge(tc.providersHints)
109+
result, admit := policy.Merge(tc.providersHints, apiext.NumaTopologyExclusivePreferred, []apiext.NumaNodeStatus{})
108110
if !result.IsEqual(tc.expectedHint) || admit != tc.expectedAdmit {
109111
t.Errorf("Test Case: %s: Expected merge hint to be %v, got %v", tc.name, tc.expectedHint, result)
110112
}

pkg/scheduler/frameworkext/topologymanager/policy_restricted.go

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@ limitations under the License.
1717

1818
package topologymanager
1919

20+
import apiext "github.com/koordinator-sh/koordinator/apis/extension"
21+
2022
type restrictedPolicy struct {
2123
bestEffortPolicy
2224
}
@@ -39,9 +41,9 @@ func (p *restrictedPolicy) canAdmitPodResult(hint *NUMATopologyHint) bool {
3941
return hint.Preferred
4042
}
4143

42-
func (p *restrictedPolicy) Merge(providersHints []map[string][]NUMATopologyHint) (NUMATopologyHint, bool) {
44+
func (p *restrictedPolicy) Merge(providersHints []map[string][]NUMATopologyHint, exclusivePolicy apiext.NumaTopologyExclusive, allNUMANodeStatus []apiext.NumaNodeStatus) (NUMATopologyHint, bool) {
4345
filteredHints := filterProvidersHints(providersHints)
44-
hint := mergeFilteredHints(p.numaNodes, filteredHints)
46+
hint := mergeFilteredHints(p.numaNodes, filteredHints, exclusivePolicy, allNUMANodeStatus)
4547
admit := p.canAdmitPodResult(&hint)
4648
return hint, admit
4749
}

pkg/scheduler/frameworkext/topologymanager/policy_single_numa_node.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ limitations under the License.
1818
package topologymanager
1919

2020
import (
21+
apiext "github.com/koordinator-sh/koordinator/apis/extension"
2122
"github.com/koordinator-sh/koordinator/pkg/util/bitmask"
2223
)
2324

@@ -62,11 +63,11 @@ func filterSingleNumaHints(allResourcesHints [][]NUMATopologyHint) [][]NUMATopol
6263
return filteredResourcesHints
6364
}
6465

65-
func (p *singleNumaNodePolicy) Merge(providersHints []map[string][]NUMATopologyHint) (NUMATopologyHint, bool) {
66+
func (p *singleNumaNodePolicy) Merge(providersHints []map[string][]NUMATopologyHint, exclusivePolicy apiext.NumaTopologyExclusive, allNUMANodeStatus []apiext.NumaNodeStatus) (NUMATopologyHint, bool) {
6667
filteredHints := filterProvidersHints(providersHints)
6768
// Filter to only include don't care and hints with a single NUMA node.
6869
singleNumaHints := filterSingleNumaHints(filteredHints)
69-
bestHint := mergeFilteredHints(p.numaNodes, singleNumaHints)
70+
bestHint := mergeFilteredHints(p.numaNodes, singleNumaHints, exclusivePolicy, allNUMANodeStatus)
7071

7172
defaultAffinity, _ := bitmask.NewBitMask(p.numaNodes...)
7273
if bestHint.NUMANodeAffinity.IsEqual(defaultAffinity) {

0 commit comments

Comments
 (0)