Skip to content

Commit ed5d78a

Browse files
ZiMengShengwangjianyu.wjy
andauthored
scheduler: gpuSharedPod doesn't fit secondaryDeviceWellPlanned (#2454)
Signed-off-by: wangjianyu.wjy <[email protected]> Co-authored-by: wangjianyu.wjy <[email protected]>
1 parent a1bf96f commit ed5d78a

File tree

4 files changed

+39
-31
lines changed

4 files changed

+39
-31
lines changed

pkg/scheduler/plugins/deviceshare/device_allocator.go

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ func (a *AutopilotAllocator) Prepare() *framework.Status {
7777
}
7878
state := a.state
7979
nodeDevice := a.nodeDevice
80-
requestsPerInstance, desiredCountPerDeviceType, status := a.calcRequestsAndCountByDeviceType(state.podRequests, nodeDevice, state.hints, state.primaryDeviceType)
80+
requestsPerInstance, desiredCountPerDeviceType, status := a.calcRequestsAndCountByDeviceType(state.podRequests, nodeDevice, state.hints, state.primaryDeviceType, state.podFitsSecondaryDeviceWellPlanned)
8181
if !status.IsSuccess() {
8282
return status
8383
}
@@ -169,8 +169,9 @@ func (a *AutopilotAllocator) filterNodeDevice(
169169
}
170170

171171
func (a *AutopilotAllocator) calcRequestsAndCountByDeviceType(
172-
podRequests map[schedulingv1alpha1.DeviceType]corev1.ResourceList,
173-
nodeDevice *nodeDevice, hints apiext.DeviceAllocateHints, primaryDeviceType schedulingv1alpha1.DeviceType,
172+
podRequests map[schedulingv1alpha1.DeviceType]corev1.ResourceList, nodeDevice *nodeDevice,
173+
hints apiext.DeviceAllocateHints, primaryDeviceType schedulingv1alpha1.DeviceType,
174+
podFitsSecondaryDeviceWellPlanned bool,
174175
) (map[schedulingv1alpha1.DeviceType]corev1.ResourceList, map[schedulingv1alpha1.DeviceType]int, *framework.Status) {
175176
requestPerInstance := map[schedulingv1alpha1.DeviceType]corev1.ResourceList{}
176177
desiredCountPerDeviceType := map[schedulingv1alpha1.DeviceType]int{}
@@ -184,7 +185,7 @@ func (a *AutopilotAllocator) calcRequestsAndCountByDeviceType(
184185
continue
185186
}
186187

187-
if primaryDeviceType != "" && deviceType != primaryDeviceType && nodeDevice.secondaryDeviceWellPlanned && a.phaseBeingExecuted != schedulingphase.Reserve {
188+
if primaryDeviceType != "" && deviceType != primaryDeviceType && podFitsSecondaryDeviceWellPlanned && nodeDevice.secondaryDeviceWellPlanned && a.phaseBeingExecuted != schedulingphase.Reserve {
188189
continue
189190
}
190191

pkg/scheduler/plugins/deviceshare/plugin.go

Lines changed: 23 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -78,17 +78,18 @@ type Plugin struct {
7878
}
7979

8080
type preFilterState struct {
81-
skip bool
82-
podRequests map[schedulingv1alpha1.DeviceType]corev1.ResourceList
83-
hints apiext.DeviceAllocateHints
84-
hintSelectors map[schedulingv1alpha1.DeviceType][2]labels.Selector
85-
hasSelectors bool
86-
jointAllocate *apiext.DeviceJointAllocate
87-
primaryDeviceType schedulingv1alpha1.DeviceType
88-
gpuRequirements *GPURequirements
89-
allocationResult apiext.DeviceAllocations
90-
preemptibleDevices map[string]map[schedulingv1alpha1.DeviceType]deviceResources
91-
preemptibleInRRs map[string]map[types.UID]map[schedulingv1alpha1.DeviceType]deviceResources
81+
skip bool
82+
podRequests map[schedulingv1alpha1.DeviceType]corev1.ResourceList
83+
hints apiext.DeviceAllocateHints
84+
hintSelectors map[schedulingv1alpha1.DeviceType][2]labels.Selector
85+
hasSelectors bool
86+
jointAllocate *apiext.DeviceJointAllocate
87+
primaryDeviceType schedulingv1alpha1.DeviceType
88+
podFitsSecondaryDeviceWellPlanned bool
89+
gpuRequirements *GPURequirements
90+
allocationResult apiext.DeviceAllocations
91+
preemptibleDevices map[string]map[schedulingv1alpha1.DeviceType]deviceResources
92+
preemptibleInRRs map[string]map[types.UID]map[schedulingv1alpha1.DeviceType]deviceResources
9293

9394
hasReservationAffinity bool
9495
}
@@ -106,16 +107,17 @@ type GPURequirements struct {
106107

107108
func (s *preFilterState) Clone() framework.StateData {
108109
ns := &preFilterState{
109-
skip: s.skip,
110-
podRequests: s.podRequests,
111-
hints: s.hints,
112-
hasSelectors: s.hasSelectors,
113-
gpuRequirements: s.gpuRequirements,
114-
hintSelectors: s.hintSelectors,
115-
jointAllocate: s.jointAllocate,
116-
primaryDeviceType: s.primaryDeviceType,
117-
allocationResult: s.allocationResult,
118-
hasReservationAffinity: s.hasReservationAffinity,
110+
skip: s.skip,
111+
podRequests: s.podRequests,
112+
hints: s.hints,
113+
hasSelectors: s.hasSelectors,
114+
gpuRequirements: s.gpuRequirements,
115+
hintSelectors: s.hintSelectors,
116+
jointAllocate: s.jointAllocate,
117+
primaryDeviceType: s.primaryDeviceType,
118+
podFitsSecondaryDeviceWellPlanned: s.podFitsSecondaryDeviceWellPlanned,
119+
allocationResult: s.allocationResult,
120+
hasReservationAffinity: s.hasReservationAffinity,
119121
}
120122

121123
preemptibleDevices := map[string]map[schedulingv1alpha1.DeviceType]deviceResources{}

pkg/scheduler/plugins/deviceshare/plugin_test.go

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -638,8 +638,9 @@ func Test_Plugin_PreFilter(t *testing.T) {
638638
apiext.ResourceGPUMemoryRatio: resource.MustParse("100"),
639639
},
640640
},
641-
preemptibleDevices: map[string]map[schedulingv1alpha1.DeviceType]deviceResources{},
642-
preemptibleInRRs: map[string]map[types.UID]map[schedulingv1alpha1.DeviceType]deviceResources{},
641+
podFitsSecondaryDeviceWellPlanned: true,
642+
preemptibleDevices: map[string]map[schedulingv1alpha1.DeviceType]deviceResources{},
643+
preemptibleInRRs: map[string]map[types.UID]map[schedulingv1alpha1.DeviceType]deviceResources{},
643644
},
644645
},
645646
{
@@ -677,8 +678,9 @@ func Test_Plugin_PreFilter(t *testing.T) {
677678
apiext.ResourceGPUMemoryRatio: resource.MustParse("100"),
678679
},
679680
},
680-
preemptibleDevices: map[string]map[schedulingv1alpha1.DeviceType]deviceResources{},
681-
preemptibleInRRs: map[string]map[types.UID]map[schedulingv1alpha1.DeviceType]deviceResources{},
681+
podFitsSecondaryDeviceWellPlanned: true,
682+
preemptibleDevices: map[string]map[schedulingv1alpha1.DeviceType]deviceResources{},
683+
preemptibleInRRs: map[string]map[types.UID]map[schedulingv1alpha1.DeviceType]deviceResources{},
682684
},
683685
},
684686
{
@@ -802,8 +804,9 @@ func Test_Plugin_PreFilter(t *testing.T) {
802804
apiext.ResourceGPUMemoryRatio: resource.MustParse("100"),
803805
},
804806
},
805-
preemptibleDevices: map[string]map[schedulingv1alpha1.DeviceType]deviceResources{},
806-
preemptibleInRRs: map[string]map[types.UID]map[schedulingv1alpha1.DeviceType]deviceResources{},
807+
podFitsSecondaryDeviceWellPlanned: true,
808+
preemptibleDevices: map[string]map[schedulingv1alpha1.DeviceType]deviceResources{},
809+
preemptibleInRRs: map[string]map[types.UID]map[schedulingv1alpha1.DeviceType]deviceResources{},
807810
},
808811
},
809812
{
@@ -873,6 +876,7 @@ func Test_Plugin_PreFilter(t *testing.T) {
873876
apiext.ResourceRDMA: resource.MustParse("100"),
874877
},
875878
},
879+
podFitsSecondaryDeviceWellPlanned: true,
876880
gpuRequirements: &GPURequirements{
877881
numberOfGPUs: 1,
878882
requestsPerGPU: corev1.ResourceList{

pkg/scheduler/plugins/deviceshare/utils.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -322,6 +322,7 @@ func preparePod(pod *corev1.Pod) (state *preFilterState, status *framework.Statu
322322
if err != nil {
323323
return nil, framework.NewStatus(framework.UnschedulableAndUnresolvable, err.Error())
324324
}
325+
state.podFitsSecondaryDeviceWellPlanned = state.gpuRequirements != nil && !state.gpuRequirements.gpuShared
325326
reservationAffinity, err := reservationutil.GetRequiredReservationAffinity(pod)
326327
if err != nil {
327328
return nil, framework.NewStatus(framework.UnschedulableAndUnresolvable, err.Error())

0 commit comments

Comments
 (0)