@@ -68,40 +68,32 @@ func (cc *Controller) killJob(jobInfo *apis.JobInfo, nextState state.NextStateFn
68
68
69
69
switch pod .Status .Phase {
70
70
case v1 .PodRunning :
71
- err := cc .kubeClients . CoreV1 (). Pods ( pod . Namespace ). Delete ( pod . Name , nil )
71
+ err := cc .deleteJobPod ( job . Name , pod )
72
72
if err != nil {
73
73
running ++
74
- glog .Errorf ("Failed to delete pod %s for Job %s, err %#v" ,
75
- pod .Name , job .Name , err )
76
74
errs = append (errs , err )
77
75
continue
78
76
}
79
77
terminating ++
80
78
case v1 .PodPending :
81
- err := cc .kubeClients . CoreV1 (). Pods ( pod . Namespace ). Delete ( pod . Name , nil )
79
+ err := cc .deleteJobPod ( job . Name , pod )
82
80
if err != nil {
83
81
pending ++
84
- glog .Errorf ("Failed to delete pod %s for Job %s, err %#v" ,
85
- pod .Name , job .Name , err )
86
82
errs = append (errs , err )
87
83
continue
88
84
}
89
85
terminating ++
90
86
case v1 .PodSucceeded :
91
- err := cc .kubeClients . CoreV1 (). Pods ( pod . Namespace ). Delete ( pod . Name , nil )
87
+ err := cc .deleteJobPod ( job . Name , pod )
92
88
if err != nil {
93
89
succeeded ++
94
- glog .Errorf ("Failed to delete pod %s for Job %s, err %#v" ,
95
- pod .Name , job .Name , err )
96
90
errs = append (errs , err )
97
91
continue
98
92
}
99
93
case v1 .PodFailed :
100
- err := cc .kubeClients . CoreV1 (). Pods ( pod . Namespace ). Delete ( pod . Name , nil )
94
+ err := cc .deleteJobPod ( job . Name , pod )
101
95
if err != nil {
102
96
failed ++
103
- glog .Errorf ("Failed to delete pod %s for Job %s, err %#v" ,
104
- pod .Name , job .Name , err )
105
97
errs = append (errs , err )
106
98
continue
107
99
}
@@ -294,9 +286,9 @@ func (cc *Controller) syncJob(jobInfo *apis.JobInfo, nextState state.NextStateFn
294
286
for _ , pod := range podToDelete {
295
287
go func (pod * v1.Pod ) {
296
288
defer waitDeletionGroup .Done ()
297
- err := cc .kubeClients . CoreV1 (). Pods ( pod . Namespace ). Delete ( pod . Name , nil )
289
+ err := cc .deleteJobPod ( job . Name , pod )
298
290
if err != nil {
299
- // Failed to create Pod, waitCreationGroup a moment and then create it again
291
+ // Failed to delete Pod, waitCreationGroup a moment and then create it again
300
292
// This is to ensure all podsMap under the same Job created
301
293
// So gang-scheduling could schedule the Job successfully
302
294
glog .Errorf ("Failed to delete pod %s for Job %s, err %#v" ,
@@ -504,3 +496,15 @@ func (cc *Controller) createPodGroupIfNotExist(job *vkv1.Job) error {
504
496
505
497
return nil
506
498
}
499
+
500
+ func (cc * Controller ) deleteJobPod (jobName string , pod * v1.Pod ) error {
501
+ err := cc .kubeClients .CoreV1 ().Pods (pod .Namespace ).Delete (pod .Name , nil )
502
+ if err != nil && ! apierrors .IsNotFound (err ) {
503
+ glog .Errorf ("Failed to delete pod %s/%s for Job %s, err %#v" ,
504
+ pod .Namespace , pod .Name , jobName , err )
505
+
506
+ return err
507
+ }
508
+
509
+ return nil
510
+ }
0 commit comments