Skip to content

Commit 31b04ec

Browse files
Merge pull request volcano-sh#34 from lminzhw/scheduler_detailed_event
support scheduler detailed event
2 parents a92572f + 8651f7a commit 31b04ec

File tree

11 files changed

+202
-60
lines changed

11 files changed

+202
-60
lines changed

pkg/scheduler/actions/allocate/allocate.go

+3-5
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,6 @@ limitations under the License.
1717
package allocate
1818

1919
import (
20-
"fmt"
21-
2220
"github.com/golang/glog"
2321

2422
"github.com/kubernetes-sigs/kube-batch/pkg/apis/scheduling/v1alpha1"
@@ -88,8 +86,7 @@ func (alloc *allocateAction) Execute(ssn *framework.Session) {
8886
// ...
8987
// }
9088
if !task.InitResreq.LessEqual(node.Idle) && !task.InitResreq.LessEqual(node.Releasing) {
91-
return fmt.Errorf("task <%s/%s> ResourceFit failed on node <%s>",
92-
task.Namespace, task.Name, node.Name)
89+
return api.NewFitError(task, node, api.NodeResourceFitFailed)
9390
}
9491

9592
return ssn.PredicateFn(task, node)
@@ -149,8 +146,9 @@ func (alloc *allocateAction) Execute(ssn *framework.Session) {
149146
job.NodesFitDelta = make(api.NodeResourceMap)
150147
}
151148

152-
predicateNodes := util.PredicateNodes(task, allNodes, predicateFn)
149+
predicateNodes, fitErrors := util.PredicateNodes(task, allNodes, predicateFn)
153150
if len(predicateNodes) == 0 {
151+
job.NodesFitErrors[task.UID] = fitErrors
154152
break
155153
}
156154

pkg/scheduler/actions/backfill/backfill.go

+11
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,9 @@ func (alloc *backfillAction) Execute(ssn *framework.Session) {
5454

5555
for _, task := range job.TaskStatusIndex[api.Pending] {
5656
if task.InitResreq.IsEmpty() {
57+
allocated := false
58+
fe := api.NewFitErrors()
59+
5760
// As task did not request resources, so it only need to meet predicates.
5861
// TODO (k82cn): need to prioritize nodes to avoid pod hole.
5962
for _, node := range ssn.Nodes {
@@ -62,16 +65,24 @@ func (alloc *backfillAction) Execute(ssn *framework.Session) {
6265
if err := ssn.PredicateFn(task, node); err != nil {
6366
glog.V(3).Infof("Predicates failed for task <%s/%s> on node <%s>: %v",
6467
task.Namespace, task.Name, node.Name, err)
68+
fe.SetNodeError(node.Name, err)
6569
continue
6670
}
6771

6872
glog.V(3).Infof("Binding Task <%v/%v> to node <%v>", task.Namespace, task.Name, node.Name)
6973
if err := ssn.Allocate(task, node.Name); err != nil {
7074
glog.Errorf("Failed to bind Task %v on %v in Session %v", task.UID, node.Name, ssn.UID)
75+
fe.SetNodeError(node.Name, err)
7176
continue
7277
}
78+
79+
allocated = true
7380
break
7481
}
82+
83+
if !allocated {
84+
job.NodesFitErrors[task.UID] = fe
85+
}
7586
} else {
7687
// TODO (k82cn): backfill for other case.
7788
}

pkg/scheduler/actions/preempt/preempt.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -188,7 +188,7 @@ func preempt(
188188

189189
allNodes := util.GetNodeList(nodes)
190190

191-
predicateNodes := util.PredicateNodes(preemptor, allNodes, ssn.PredicateFn)
191+
predicateNodes, _ := util.PredicateNodes(preemptor, allNodes, ssn.PredicateFn)
192192

193193
nodeScores := util.PrioritizeNodes(preemptor, predicateNodes, ssn.BatchNodeOrderFn, ssn.NodeOrderMapFn, ssn.NodeOrderReduceFn)
194194

pkg/scheduler/api/job_info.go

+12-20
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,9 @@ type JobInfo struct {
139139

140140
NodesFitDelta NodeResourceMap
141141

142+
JobFitErrors string
143+
NodesFitErrors map[TaskID]*FitErrors
144+
142145
// All tasks of the Job.
143146
TaskStatusIndex map[TaskStatus]tasksMap
144147
Tasks tasksMap
@@ -164,6 +167,8 @@ func NewJobInfo(uid JobID, tasks ...*TaskInfo) *JobInfo {
164167
Allocated: EmptyResource(),
165168
TotalRequest: EmptyResource(),
166169

170+
NodesFitErrors: make(map[TaskID]*FitErrors),
171+
167172
TaskStatusIndex: map[TaskStatus]tasksMap{},
168173
Tasks: tasksMap{},
169174
}
@@ -301,6 +306,8 @@ func (ji *JobInfo) Clone() *JobInfo {
301306
TotalRequest: EmptyResource(),
302307
NodesFitDelta: make(NodeResourceMap),
303308

309+
NodesFitErrors: make(map[TaskID]*FitErrors),
310+
304311
PDB: ji.PDB,
305312
PodGroup: ji.PodGroup.DeepCopy(),
306313

@@ -338,36 +345,21 @@ func (ji JobInfo) String() string {
338345
// FitError returns detailed information on why a job's task failed to fit on
339346
// each available node
340347
func (ji *JobInfo) FitError() string {
341-
if len(ji.NodesFitDelta) == 0 {
342-
reasonMsg := fmt.Sprintf("0 nodes are available")
343-
return reasonMsg
344-
}
345-
346348
reasons := make(map[string]int)
347-
for _, v := range ji.NodesFitDelta {
348-
if v.Get(v1.ResourceCPU) < 0 {
349-
reasons["cpu"]++
350-
}
351-
if v.Get(v1.ResourceMemory) < 0 {
352-
reasons["memory"]++
353-
}
354-
355-
for rName, rQuant := range v.ScalarResources {
356-
if rQuant < 0 {
357-
reasons[string(rName)]++
358-
}
359-
}
349+
for status, taskMap := range ji.TaskStatusIndex {
350+
reasons[fmt.Sprintf("%s", status)] += len(taskMap)
360351
}
352+
reasons["minAvailable"] = int(ji.MinAvailable)
361353

362354
sortReasonsHistogram := func() []string {
363355
reasonStrings := []string{}
364356
for k, v := range reasons {
365-
reasonStrings = append(reasonStrings, fmt.Sprintf("%v insufficient %v", v, k))
357+
reasonStrings = append(reasonStrings, fmt.Sprintf("%v %v", v, k))
366358
}
367359
sort.Strings(reasonStrings)
368360
return reasonStrings
369361
}
370-
reasonMsg := fmt.Sprintf("0/%v nodes are available, %v.", len(ji.NodesFitDelta), strings.Join(sortReasonsHistogram(), ", "))
362+
reasonMsg := fmt.Sprintf("job is not ready, %v.", strings.Join(sortReasonsHistogram(), ", "))
371363
return reasonMsg
372364
}
373365

pkg/scheduler/api/job_info_test.go

+6
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,8 @@ func TestAddTaskInfo(t *testing.T) {
8181
},
8282
NodeSelector: make(map[string]string),
8383
NodesFitDelta: make(NodeResourceMap),
84+
85+
NodesFitErrors: make(map[TaskID]*FitErrors),
8486
},
8587
},
8688
}
@@ -147,6 +149,8 @@ func TestDeleteTaskInfo(t *testing.T) {
147149
},
148150
NodeSelector: make(map[string]string),
149151
NodesFitDelta: make(NodeResourceMap),
152+
153+
NodesFitErrors: make(map[TaskID]*FitErrors),
150154
},
151155
},
152156
{
@@ -172,6 +176,8 @@ func TestDeleteTaskInfo(t *testing.T) {
172176
},
173177
NodeSelector: make(map[string]string),
174178
NodesFitDelta: make(NodeResourceMap),
179+
180+
NodesFitErrors: make(map[TaskID]*FitErrors),
175181
},
176182
},
177183
}

pkg/scheduler/api/types.go

+4
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,10 @@ func (ts TaskStatus) String() string {
6161
switch ts {
6262
case Pending:
6363
return "Pending"
64+
case Allocated:
65+
return "Allocated"
66+
case Pipelined:
67+
return "Pipelined"
6468
case Binding:
6569
return "Binding"
6670
case Bound:

pkg/scheduler/api/unschedule_info.go

+112
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
package api
2+
3+
import (
4+
"fmt"
5+
"sort"
6+
"strings"
7+
8+
"k8s.io/kubernetes/pkg/scheduler/algorithm"
9+
)
10+
11+
const (
12+
// NodePodNumberExceeded means pods in node exceed the allocatable pod number
13+
NodePodNumberExceeded = "node(s) pod number exceeded"
14+
// NodeResourceFitFailed means node could not fit the request of pod
15+
NodeResourceFitFailed = "node(s) resource fit failed"
16+
17+
// AllNodeUnavailableMsg is the default error message
18+
AllNodeUnavailableMsg = "all nodes are unavailable"
19+
)
20+
21+
// FitErrors is set of FitError on many nodes
22+
type FitErrors struct {
23+
nodes map[string]*FitError
24+
err string
25+
}
26+
27+
// NewFitErrors returns an FitErrors
28+
func NewFitErrors() *FitErrors {
29+
f := new(FitErrors)
30+
f.nodes = make(map[string]*FitError)
31+
return f
32+
}
33+
34+
// SetError set the common error message in FitErrors
35+
func (f *FitErrors) SetError(err string) {
36+
f.err = err
37+
}
38+
39+
// SetNodeError set the node error in FitErrors
40+
func (f *FitErrors) SetNodeError(nodeName string, err error) {
41+
var fe *FitError
42+
switch obj := err.(type) {
43+
case *FitError:
44+
obj.NodeName = nodeName
45+
fe = obj
46+
default:
47+
fe = &FitError{
48+
NodeName: nodeName,
49+
Reasons: []string{obj.Error()},
50+
}
51+
}
52+
53+
f.nodes[nodeName] = fe
54+
}
55+
56+
// Error returns the final error message
57+
func (f *FitErrors) Error() string {
58+
reasons := make(map[string]int)
59+
60+
for _, node := range f.nodes {
61+
for _, reason := range node.Reasons {
62+
reasons[reason]++
63+
}
64+
}
65+
66+
sortReasonsHistogram := func() []string {
67+
reasonStrings := []string{}
68+
for k, v := range reasons {
69+
reasonStrings = append(reasonStrings, fmt.Sprintf("%v %v", v, k))
70+
}
71+
sort.Strings(reasonStrings)
72+
return reasonStrings
73+
}
74+
if f.err == "" {
75+
f.err = AllNodeUnavailableMsg
76+
}
77+
reasonMsg := fmt.Sprintf(f.err+": %v.", strings.Join(sortReasonsHistogram(), ", "))
78+
return reasonMsg
79+
}
80+
81+
// FitError describe the reason why task could not fit that node
82+
type FitError struct {
83+
taskNamespace string
84+
taskName string
85+
NodeName string
86+
Reasons []string
87+
}
88+
89+
// NewFitError return FitError by message
90+
func NewFitError(task *TaskInfo, node *NodeInfo, message ...string) *FitError {
91+
fe := &FitError{
92+
taskName: task.Name,
93+
taskNamespace: task.Namespace,
94+
NodeName: node.Name,
95+
Reasons: message,
96+
}
97+
return fe
98+
}
99+
100+
// NewFitErrorByReasons return FitError by reasons
101+
func NewFitErrorByReasons(task *TaskInfo, node *NodeInfo, reasons ...algorithm.PredicateFailureReason) *FitError {
102+
message := make([]string, 0, len(reasons))
103+
for _, reason := range reasons {
104+
message = append(message, reason.GetReason())
105+
}
106+
return NewFitError(task, node, message...)
107+
}
108+
109+
// Error returns the final error message
110+
func (f *FitError) Error() string {
111+
return fmt.Sprintf("task %s/%s on node %s fit failed: %s", f.taskNamespace, f.taskName, f.NodeName, strings.Join(f.Reasons, ", "))
112+
}

pkg/scheduler/cache/cache.go

+11-5
Original file line numberDiff line numberDiff line change
@@ -686,7 +686,10 @@ func (sc *SchedulerCache) String() string {
686686

687687
// RecordJobStatusEvent records related events according to job status.
688688
func (sc *SchedulerCache) RecordJobStatusEvent(job *kbapi.JobInfo) {
689-
jobErrMsg := job.FitError()
689+
baseErrorMessage := job.JobFitErrors
690+
if baseErrorMessage == "" {
691+
baseErrorMessage = kbapi.AllNodeUnavailableMsg
692+
}
690693

691694
if !shadowPodGroup(job.PodGroup) {
692695
pgUnschedulable := job.PodGroup != nil &&
@@ -696,17 +699,20 @@ func (sc *SchedulerCache) RecordJobStatusEvent(job *kbapi.JobInfo) {
696699

697700
// If pending or unschedulable, record unschedulable event.
698701
if pgUnschedulable || pdbUnschedulabe {
699-
msg := fmt.Sprintf("%v/%v tasks in gang unschedulable: %v",
700-
len(job.TaskStatusIndex[api.Pending]), len(job.Tasks), job.FitError())
701702
sc.Recorder.Eventf(job.PodGroup, v1.EventTypeWarning,
702-
string(v1alpha1.PodGroupUnschedulableType), msg)
703+
string(v1alpha1.PodGroupUnschedulableType), baseErrorMessage)
703704
}
704705
}
705706

706707
// Update podCondition for tasks Allocated and Pending before job discarded
707708
for _, status := range []api.TaskStatus{api.Allocated, api.Pending} {
708709
for _, taskInfo := range job.TaskStatusIndex[status] {
709-
if err := sc.taskUnschedulable(taskInfo, jobErrMsg); err != nil {
710+
msg := baseErrorMessage
711+
fitError := job.NodesFitErrors[taskInfo.UID]
712+
if fitError != nil {
713+
msg = fitError.Error()
714+
}
715+
if err := sc.taskUnschedulable(taskInfo, msg); err != nil {
710716
glog.Errorf("Failed to update unschedulable task status <%s/%s>: %v",
711717
taskInfo.Namespace, taskInfo.Name, err)
712718
}

pkg/scheduler/plugins/gang/gang.go

+13
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,7 @@ func (gp *gangPlugin) OnSessionClose(ssn *framework.Session) {
137137
unreadyTaskCount = job.MinAvailable - job.ReadyTaskNum()
138138
msg := fmt.Sprintf("%v/%v tasks in gang unschedulable: %v",
139139
job.MinAvailable-job.ReadyTaskNum(), len(job.Tasks), job.FitError())
140+
job.JobFitErrors = msg
140141

141142
unScheduleJobCount++
142143
metrics.UpdateUnscheduleTaskCount(job.Name, int(unreadyTaskCount))
@@ -155,6 +156,18 @@ func (gp *gangPlugin) OnSessionClose(ssn *framework.Session) {
155156
glog.Errorf("Failed to update job <%s/%s> condition: %v",
156157
job.Namespace, job.Name, err)
157158
}
159+
160+
// allocated task should follow the job fit error
161+
for _, taskInfo := range job.TaskStatusIndex[api.Allocated] {
162+
fitError := job.NodesFitErrors[taskInfo.UID]
163+
if fitError != nil {
164+
continue
165+
}
166+
167+
fitError = api.NewFitErrors()
168+
job.NodesFitErrors[taskInfo.UID] = fitError
169+
fitError.SetError(msg)
170+
}
158171
}
159172
}
160173

0 commit comments

Comments
 (0)