Skip to content

Commit 5caa9d5

Browse files
authored
Reuse the core kubernetes API reason for the BackoffLimitExceeded (kubeflow#667)
Signed-off-by: Yuki Iwai <[email protected]>
1 parent 42335bc commit 5caa9d5

File tree

2 files changed

+3
-7
lines changed

2 files changed

+3
-7
lines changed

pkg/controller/mpi_job_controller.go

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -111,10 +111,6 @@ const (
111111
// From: k8s.io/kubernetes/pkg/apis/core/validation/events.go
112112
eventMessageLimit = 1024
113113

114-
// jobBackoffLimitExceededReason is the reason that the k8s job controller
115-
// uses when the backoff limit is exceeded.
116-
jobBackoffLimitExceededReason = "BackoffLimitExceeded"
117-
118114
openMPISlotsEnv = "OMPI_MCA_orte_set_default_slots"
119115
intelMPISlotsEnv = "I_MPI_PERHOST"
120116
)
@@ -1149,7 +1145,7 @@ func (c *MPIJobController) updateMPIJobFailedStatus(mpiJob *kubeflow.MPIJob, lau
11491145
if msg == "" {
11501146
msg = fmt.Sprintf("MPIJob %s/%s has failed", mpiJob.Namespace, mpiJob.Name)
11511147
}
1152-
if reason == jobBackoffLimitExceededReason {
1148+
if reason == batchv1.JobReasonBackoffLimitExceeded {
11531149
// Concatenate the reason and message from the last failed Pod.
11541150
var lastFailedPod *corev1.Pod
11551151
for _, p := range launcherPods {

pkg/controller/mpi_job_controller_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -635,7 +635,7 @@ func TestLauncherFailed(t *testing.T) {
635635
launcher.Status.Conditions = append(launcher.Status.Conditions, batchv1.JobCondition{
636636
Type: batchv1.JobFailed,
637637
Status: corev1.ConditionTrue,
638-
Reason: jobBackoffLimitExceededReason,
638+
Reason: batchv1.JobReasonBackoffLimitExceeded,
639639
Message: "Job has reached the specified backoff limit",
640640
})
641641
launcher.Status.Failed = 2
@@ -668,7 +668,7 @@ func TestLauncherFailed(t *testing.T) {
668668
msg := fmt.Sprintf("MPIJob %s/%s is created.", mpiJob.Namespace, mpiJob.Name)
669669
updateMPIJobConditions(mpiJobCopy, kubeflow.JobCreated, corev1.ConditionTrue, mpiJobCreatedReason, msg)
670670
msg = "Job has reached the specified backoff limit: second message"
671-
updateMPIJobConditions(mpiJobCopy, kubeflow.JobFailed, corev1.ConditionTrue, jobBackoffLimitExceededReason+"/FailedReason2", msg)
671+
updateMPIJobConditions(mpiJobCopy, kubeflow.JobFailed, corev1.ConditionTrue, batchv1.JobReasonBackoffLimitExceeded+"/FailedReason2", msg)
672672

673673
f.expectUpdateMPIJobStatusAction(mpiJobCopy)
674674

0 commit comments

Comments
 (0)