Skip to content

Commit 380f81e

Browse files
authored
Yolgun/revert wait for first consumer (volcano-sh#42)
* Revert "fix scheduler panic issue (volcano-sh#39)" This reverts commit 176de8f. * Revert "fix waitForFirstConsumer support (volcano-sh#35)" This reverts commit ef7ea7a Co-authored-by: Yunus Olgun <[email protected]>
1 parent e3b4238 commit 380f81e

File tree

25 files changed

+106
-1305
lines changed

25 files changed

+106
-1305
lines changed

LICENSES/vendor/github.com/agiledragon/gomonkey/v2/LICENSE

-25
This file was deleted.

go.mod

-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@ module volcano.sh/volcano
33
go 1.17
44

55
require (
6-
github.com/agiledragon/gomonkey/v2 v2.9.0
76
github.com/fsnotify/fsnotify v1.4.9
87
github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510
98
github.com/hashicorp/go-multierror v1.0.0

go.sum

-2
Original file line numberDiff line numberDiff line change
@@ -68,8 +68,6 @@ github.com/PuerkitoBio/purell v1.1.1 h1:WEQqlqaGbrPkxLJWfBwQmfEAE1Z7ONdDLqrN38tN
6868
github.com/PuerkitoBio/purell v1.1.1/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0=
6969
github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578 h1:d+Bc7a5rLufV/sSk/8dngufqelfh6jnri85riMAaF/M=
7070
github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578/go.mod h1:uGdkoq3SwY9Y+13GIhn11/XLaGBb4BfwItxLd5jeuXE=
71-
github.com/agiledragon/gomonkey/v2 v2.9.0 h1:PDiKKybR596O6FHW+RVSG0Z7uGCBNbmbUXh3uCNQ7Hc=
72-
github.com/agiledragon/gomonkey/v2 v2.9.0/go.mod h1:ap1AmDzcVOAz1YpeJ3TCzIgstoaWLA6jbbgxfB4w2iY=
7371
github.com/ajstarks/svgo v0.0.0-20180226025133-644b8db467af/go.mod h1:K08gAheRH3/J6wwsYMMT4xOr94bZjxIelGM0+d/wbFw=
7472
github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
7573
github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=

pkg/scheduler/cache/cache.go

+68-183
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,6 @@ package cache
1919
import (
2020
"context"
2121
"fmt"
22-
"os"
23-
"strconv"
24-
"strings"
2522
"sync"
2623
"time"
2724

@@ -56,7 +53,6 @@ import (
5653
vcinformerv1 "volcano.sh/apis/pkg/client/informers/externalversions/scheduling/v1beta1"
5754
"volcano.sh/volcano/cmd/scheduler/app/options"
5855
schedulingapi "volcano.sh/volcano/pkg/scheduler/api"
59-
"volcano.sh/volcano/pkg/scheduler/metrics"
6056
)
6157

6258
func init() {
@@ -115,44 +111,27 @@ type SchedulerCache struct {
115111
deletedJobs workqueue.RateLimitingInterface
116112

117113
informerFactory informers.SharedInformerFactory
118-
119-
BindFlowChannel chan *schedulingapi.TaskInfo
120-
bindCache []*schedulingapi.TaskInfo
121-
batchNum int
122114
}
123115

124116
type defaultBinder struct {
125117
kubeclient *kubernetes.Clientset
126118
}
127119

128120
//Bind will send bind request to api server
129-
func (db *defaultBinder) Bind(kubeClient *kubernetes.Clientset, tasks []*schedulingapi.TaskInfo) (error, []*schedulingapi.TaskInfo) {
130-
var errTasks []*schedulingapi.TaskInfo
131-
for _, task := range tasks {
132-
p := task.Pod
133-
if err := kubeClient.CoreV1().Pods(p.Namespace).Bind(context.TODO(),
134-
&v1.Binding{
135-
ObjectMeta: metav1.ObjectMeta{Namespace: p.Namespace, Name: p.Name, UID: p.UID, Annotations: p.Annotations},
136-
Target: v1.ObjectReference{
137-
Kind: "Node",
138-
Name: task.NodeName,
139-
},
121+
func (db *defaultBinder) Bind(p *v1.Pod, hostname string) error {
122+
if err := db.kubeclient.CoreV1().Pods(p.Namespace).Bind(context.TODO(),
123+
&v1.Binding{
124+
ObjectMeta: metav1.ObjectMeta{Namespace: p.Namespace, Name: p.Name, UID: p.UID, Annotations: p.Annotations},
125+
Target: v1.ObjectReference{
126+
Kind: "Node",
127+
Name: hostname,
140128
},
141-
metav1.CreateOptions{}); err != nil {
142-
klog.Errorf("Failed to bind pod <%v/%v> to node %s : %#v", p.Namespace, p.Name, task.NodeName, err)
143-
errTasks = append(errTasks, task)
144-
}
145-
}
146-
147-
if len(errTasks) > 0 {
148-
return fmt.Errorf("failed to bind pods"), errTasks
129+
},
130+
metav1.CreateOptions{}); err != nil {
131+
klog.Errorf("Failed to bind pod <%v/%v>: %#v", p.Namespace, p.Name, err)
132+
return err
149133
}
150-
151-
return nil, nil
152-
}
153-
154-
func NewBinder() *defaultBinder {
155-
return &defaultBinder{}
134+
return nil
156135
}
157136

158137
type defaultEvictor struct {
@@ -268,38 +247,15 @@ func (dvb *defaultVolumeBinder) AllocateVolumes(task *schedulingapi.TaskInfo, ho
268247
return err
269248
}
270249

271-
// RevertVolumes clean cache generated by AllocateVolumes
272-
func (dvb *defaultVolumeBinder) RevertVolumes(task *schedulingapi.TaskInfo, podVolumes *volumescheduling.PodVolumes) {
273-
if podVolumes != nil {
274-
klog.Infof("Revert assumed volumes for task %v/%v on node %s", task.Namespace, task.Name, task.NodeName)
275-
dvb.volumeBinder.RevertAssumedPodVolumes(podVolumes)
276-
task.VolumeReady = false
277-
task.PodVolumes = nil
278-
}
279-
}
280-
281250
// GetPodVolumes get pod volume on the host
282251
func (dvb *defaultVolumeBinder) GetPodVolumes(task *schedulingapi.TaskInfo,
283252
node *v1.Node) (podVolumes *volumescheduling.PodVolumes, err error) {
284-
boundClaims, claimsToBind, unboundClaimsImmediate, err := dvb.volumeBinder.GetPodVolumes(task.Pod)
253+
boundClaims, claimsToBind, _, err := dvb.volumeBinder.GetPodVolumes(task.Pod)
285254
if err != nil {
286255
return nil, err
287256
}
288-
if len(unboundClaimsImmediate) > 0 {
289-
return nil, fmt.Errorf("pod has unbound immediate PersistentVolumeClaims")
290-
}
291-
292-
podVolumes, reasons, err := dvb.volumeBinder.FindPodVolumes(task.Pod, boundClaims, claimsToBind, node)
293-
if err != nil {
294-
return nil, err
295-
} else if len(reasons) > 0 {
296-
var errors []string
297-
for _, reason := range reasons {
298-
errors = append(errors, string(reason))
299-
}
300-
return nil, fmt.Errorf(strings.Join(errors, ","))
301-
}
302257

258+
podVolumes, _, err = dvb.volumeBinder.FindPodVolumes(task.Pod, boundClaims, claimsToBind, node)
303259
return podVolumes, err
304260
}
305261

@@ -362,15 +318,8 @@ func newSchedulerCache(config *rest.Config, schedulerName string, defaultQueue s
362318
broadcaster.StartRecordingToSink(&corev1.EventSinkImpl{Interface: eventClient.CoreV1().Events("")})
363319
sc.Recorder = broadcaster.NewRecorder(scheme.Scheme, v1.EventSource{Component: schedulerName})
364320

365-
sc.BindFlowChannel = make(chan *schedulingapi.TaskInfo, 5000)
366-
sc.Binder = GetBindMethod()
367-
368-
var batchNum int
369-
batchNum, err = strconv.Atoi(os.Getenv("BATCH_BIND_NUM"))
370-
if err == nil && batchNum > 0 {
371-
sc.batchNum = batchNum
372-
} else {
373-
sc.batchNum = 1
321+
sc.Binder = &defaultBinder{
322+
kubeclient: sc.kubeClient,
374323
}
375324

376325
sc.Evictor = &defaultEvictor{
@@ -499,8 +448,6 @@ func (sc *SchedulerCache) Run(stopCh <-chan struct{}) {
499448

500449
// Cleanup jobs.
501450
go wait.Until(sc.processCleanupJob, 0, stopCh)
502-
503-
go wait.Until(sc.processBindTask, time.Millisecond*20, stopCh)
504451
}
505452

506453
// WaitForCacheSync sync the cache with the api server
@@ -598,24 +545,60 @@ func (sc *SchedulerCache) Evict(taskInfo *schedulingapi.TaskInfo, reason string)
598545
}
599546

600547
// Bind binds task to the target host.
601-
func (sc *SchedulerCache) Bind(tasks []*schedulingapi.TaskInfo) error {
602-
go func(taskArray []*schedulingapi.TaskInfo) {
603-
tmp := time.Now()
604-
err, errTasks := sc.Binder.Bind(sc.kubeClient, taskArray)
605-
if err == nil {
606-
klog.V(3).Infof("bind ok, latency %v", time.Since(tmp))
607-
for _, task := range tasks {
608-
sc.Recorder.Eventf(task.Pod, v1.EventTypeNormal, "Scheduled", "Successfully assigned %v/%v to %v",
609-
task.Namespace, task.Name, task.NodeName)
610-
}
548+
func (sc *SchedulerCache) Bind(taskInfo *schedulingapi.TaskInfo, hostname string) error {
549+
sc.Mutex.Lock()
550+
defer sc.Mutex.Unlock()
551+
552+
job, task, err := sc.findJobAndTask(taskInfo)
553+
554+
if err != nil {
555+
return err
556+
}
557+
558+
node, found := sc.Nodes[hostname]
559+
if !found {
560+
return fmt.Errorf("failed to bind Task %v to host %v, host does not exist",
561+
task.UID, hostname)
562+
}
563+
564+
originalStatus := task.Status
565+
if err := job.UpdateTaskStatus(task, schedulingapi.Binding); err != nil {
566+
return err
567+
}
568+
569+
// Add task to the node.
570+
if err := node.AddTask(task); err != nil {
571+
// After failing to update task to a node we need to revert task status from Releasing,
572+
// otherwise task might be stuck in the Releasing state indefinitely.
573+
if err := job.UpdateTaskStatus(task, originalStatus); err != nil {
574+
klog.Errorf("Task <%s/%s> will be resynchronized after failing to revert status "+
575+
"from %s to %s after failing to update Task on Node <%s>: %v",
576+
task.Namespace, task.Name, task.Status, originalStatus, node.Name, err)
577+
sc.resyncTask(task)
578+
}
579+
return err
580+
}
581+
582+
p := task.Pod
583+
go func() {
584+
taskID := schedulingapi.PodKey(p)
585+
586+
sc.Lock()
587+
node.AddBindingTask(taskID)
588+
sc.Unlock()
589+
590+
defer func() {
591+
sc.Lock()
592+
node.RemoveBindingTask(taskID)
593+
sc.Unlock()
594+
}()
595+
596+
if err := sc.Binder.Bind(p, hostname); err != nil {
597+
sc.resyncTask(task)
611598
} else {
612-
for _, task := range errTasks {
613-
klog.V(2).Infof("resyncTask task %s", task.Name)
614-
sc.VolumeBinder.RevertVolumes(task, task.PodVolumes)
615-
sc.resyncTask(task)
616-
}
599+
sc.Recorder.Eventf(p, v1.EventTypeNormal, "Scheduled", "Successfully assigned %v/%v to %v", p.Namespace, p.Name, hostname)
617600
}
618-
}(tasks)
601+
}()
619602

620603
return nil
621604
}
@@ -635,11 +618,6 @@ func (sc *SchedulerCache) BindVolumes(task *schedulingapi.TaskInfo, podVolumes *
635618
return sc.VolumeBinder.BindVolumes(task, podVolumes)
636619
}
637620

638-
// RevertVolumes clean cache generated by AllocateVolumes
639-
func (sc *SchedulerCache) RevertVolumes(task *schedulingapi.TaskInfo, podVolumes *volumescheduling.PodVolumes) {
640-
sc.VolumeBinder.RevertVolumes(task, podVolumes)
641-
}
642-
643621
// Client returns the kubernetes clientSet
644622
func (sc *SchedulerCache) Client() kubernetes.Interface {
645623
return sc.kubeClient
@@ -759,99 +737,6 @@ func (sc *SchedulerCache) processResyncTask() {
759737
}
760738
}
761739

762-
func (sc *SchedulerCache) AddBindTask(taskInfo *schedulingapi.TaskInfo) error {
763-
klog.V(5).Infof("add bind task %v/%v", taskInfo.Namespace, taskInfo.Name)
764-
sc.Mutex.Lock()
765-
defer sc.Mutex.Unlock()
766-
job, task, err := sc.findJobAndTask(taskInfo)
767-
if err != nil {
768-
return err
769-
}
770-
771-
node, found := sc.Nodes[taskInfo.NodeName]
772-
if !found {
773-
return fmt.Errorf("failed to bind Task %v to host %v, host does not exist",
774-
task.UID, taskInfo.NodeName)
775-
}
776-
777-
originalStatus := task.Status
778-
if err := job.UpdateTaskStatus(task, schedulingapi.Binding); err != nil {
779-
return err
780-
}
781-
782-
// Add task to the node.
783-
if err := node.AddTask(task); err != nil {
784-
// After failing to update task to a node we need to revert task status from Releasing,
785-
// otherwise task might be stuck in the Releasing state indefinitely.
786-
if err := job.UpdateTaskStatus(task, originalStatus); err != nil {
787-
klog.Errorf("Task <%s/%s> will be resynchronized after failing to revert status "+
788-
"from %s to %s after failing to update Task on Node <%s>: %v",
789-
task.Namespace, task.Name, task.Status, originalStatus, node.Name, err)
790-
sc.resyncTask(task)
791-
}
792-
return err
793-
}
794-
795-
sc.BindFlowChannel <- taskInfo
796-
797-
return nil
798-
}
799-
800-
func (sc *SchedulerCache) processBindTask() {
801-
for {
802-
select {
803-
case taskInfo, ok := <-sc.BindFlowChannel:
804-
if !ok {
805-
return
806-
}
807-
808-
sc.bindCache = append(sc.bindCache, taskInfo)
809-
if len(sc.bindCache) == sc.batchNum {
810-
sc.BindTask()
811-
}
812-
}
813-
814-
if len(sc.BindFlowChannel) == 0 {
815-
break
816-
}
817-
}
818-
819-
if len(sc.bindCache) == 0 {
820-
return
821-
}
822-
823-
sc.BindTask()
824-
}
825-
826-
func (sc *SchedulerCache) BindTask() {
827-
klog.V(5).Infof("batch bind task count %d", len(sc.bindCache))
828-
successfulTasks := make([]*schedulingapi.TaskInfo, 0)
829-
for _, task := range sc.bindCache {
830-
if err := sc.VolumeBinder.BindVolumes(task, task.PodVolumes); err != nil {
831-
klog.Errorf("task %s/%s bind Volumes failed: %#v", task.Namespace, task.Name, err)
832-
sc.VolumeBinder.RevertVolumes(task, task.PodVolumes)
833-
sc.resyncTask(task)
834-
} else {
835-
successfulTasks = append(successfulTasks, task)
836-
klog.V(5).Infof("task %s/%s bind Volumes done", task.Namespace, task.Name)
837-
}
838-
}
839-
840-
bindTasks := make([]*schedulingapi.TaskInfo, len(successfulTasks))
841-
copy(bindTasks, successfulTasks)
842-
if err := sc.Bind(bindTasks); err != nil {
843-
klog.Errorf("failed to bind task count %d: %#v", len(bindTasks), err)
844-
return
845-
}
846-
847-
for _, task := range successfulTasks {
848-
metrics.UpdateTaskScheduleDuration(metrics.Duration(task.Pod.CreationTimestamp.Time))
849-
}
850-
851-
sc.bindCache = sc.bindCache[0:0]
852-
return
853-
}
854-
855740
// Snapshot returns the complete snapshot of the cluster from cache
856741
func (sc *SchedulerCache) Snapshot() *schedulingapi.ClusterInfo {
857742
sc.Mutex.Lock()

0 commit comments

Comments
 (0)