Skip to content

Commit 7383114

Browse files
tkatilatenzen-y
andauthored
Add default Intel MPI env variables to MPIJob (#1804)
Signed-off-by: Tuomas Katila <[email protected]> Co-authored-by: Yuki Iwai <[email protected]>
1 parent e002b8a commit 7383114

File tree

3 files changed

+125
-0
lines changed

3 files changed

+125
-0
lines changed

pkg/controller.v1/mpi/mpijob.go

+21
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ const (
4545
initContainerCpu = "100m"
4646
initContainerEphStorage = "5Gi"
4747
initContainerMem = "512Mi"
48+
iMPIDefaultBootstrap = "rsh"
4849
)
4950

5051
const (
@@ -218,6 +219,26 @@ func isGPULauncher(mpiJob *kubeflowv1.MPIJob) bool {
218219
return false
219220
}
220221

222+
// hasIntelMPIBootstrapValues returns the existence of I_MPI_HYDRA_BOOTSTRAP
223+
// and I_MPI_HYDRA_BOOTSTRAP_EXEC values.
224+
// There are also _EXEC_EXTRA_ARGS and _AUTOFORK under the I_MPI_HYDRA_BOOTSTRAP
225+
// prefix but those are not checked on purpose.
226+
func hasIntelMPIBootstrapValues(envs []corev1.EnvVar) (bootstrap, exec bool) {
227+
for _, env := range envs {
228+
if env.Name == "I_MPI_HYDRA_BOOTSTRAP" {
229+
bootstrap = true
230+
} else if env.Name == "I_MPI_HYDRA_BOOTSTRAP_EXEC" {
231+
exec = true
232+
}
233+
234+
if bootstrap && exec {
235+
break
236+
}
237+
}
238+
239+
return bootstrap, exec
240+
}
241+
221242
func defaultReplicaLabels(genericLabels map[string]string, roleLabelVal string) map[string]string {
222243
replicaLabels := map[string]string{}
223244
for k, v := range genericLabels {

pkg/controller.v1/mpi/mpijob_controller.go

+19
Original file line numberDiff line numberDiff line change
@@ -1152,6 +1152,25 @@ func (jc *MPIJobReconciler) newLauncher(mpiJob *kubeflowv1.MPIJob, kubectlDelive
11521152
})
11531153
}
11541154

1155+
// Add default Intel MPI bootstrap variables if not provided by the user.
1156+
bootstrap, exec := hasIntelMPIBootstrapValues(container.Env)
1157+
if !bootstrap {
1158+
container.Env = append(container.Env,
1159+
corev1.EnvVar{
1160+
Name: "I_MPI_HYDRA_BOOTSTRAP",
1161+
Value: iMPIDefaultBootstrap,
1162+
},
1163+
)
1164+
}
1165+
if !exec {
1166+
container.Env = append(container.Env,
1167+
corev1.EnvVar{
1168+
Name: "I_MPI_HYDRA_BOOTSTRAP_EXEC",
1169+
Value: fmt.Sprintf("%s/%s", configMountPath, kubexecScriptName),
1170+
},
1171+
)
1172+
}
1173+
11551174
container.VolumeMounts = append(container.VolumeMounts,
11561175
corev1.VolumeMount{
11571176
Name: kubectlVolumeName,

pkg/controller.v1/mpi/mpijob_controller_test.go

+85
Original file line numberDiff line numberDiff line change
@@ -709,6 +709,91 @@ var _ = Describe("MPIJob controller", func() {
709709
})
710710
})
711711

712+
Context("Test launcher's Intel MPI handling", func() {
713+
It("Should create a launcher job with Intel MPI env variables", func() {
714+
By("By creating MPIJobs with and without preset env variables")
715+
716+
testCases := map[string]struct {
717+
envVariables map[string]string
718+
expectedEnvVariables map[string]string
719+
}{
720+
"withoutIMPIValues": {
721+
envVariables: map[string]string{
722+
"X_MPI_HYDRA_BOOTSTRAP": "foo",
723+
},
724+
expectedEnvVariables: map[string]string{
725+
"I_MPI_HYDRA_BOOTSTRAP": iMPIDefaultBootstrap,
726+
"I_MPI_HYDRA_BOOTSTRAP_EXEC": fmt.Sprintf("%s/%s", configMountPath, kubexecScriptName),
727+
},
728+
},
729+
"withIMPIBootstrap": {
730+
envVariables: map[string]string{
731+
"I_MPI_HYDRA_BOOTSTRAP": "RSH",
732+
},
733+
expectedEnvVariables: map[string]string{
734+
"I_MPI_HYDRA_BOOTSTRAP": "RSH",
735+
"I_MPI_HYDRA_BOOTSTRAP_EXEC": fmt.Sprintf("%s/%s", configMountPath, kubexecScriptName),
736+
},
737+
},
738+
"withIMPIBootstrapExec": {
739+
envVariables: map[string]string{
740+
"I_MPI_HYDRA_BOOTSTRAP_EXEC": "/script.sh",
741+
},
742+
expectedEnvVariables: map[string]string{
743+
"I_MPI_HYDRA_BOOTSTRAP": iMPIDefaultBootstrap,
744+
"I_MPI_HYDRA_BOOTSTRAP_EXEC": "/script.sh",
745+
},
746+
},
747+
"withIMPIBootstrapAndExec": {
748+
envVariables: map[string]string{
749+
"I_MPI_HYDRA_BOOTSTRAP": "RSH",
750+
"I_MPI_HYDRA_BOOTSTRAP_EXEC": "/script.sh",
751+
},
752+
expectedEnvVariables: map[string]string{
753+
"I_MPI_HYDRA_BOOTSTRAP": "RSH",
754+
"I_MPI_HYDRA_BOOTSTRAP_EXEC": "/script.sh",
755+
},
756+
},
757+
}
758+
759+
for testName, testCase := range testCases {
760+
ctx := context.Background()
761+
startTime := metav1.Now()
762+
completionTime := metav1.Now()
763+
764+
jobName := "test-launcher-creation-" + strings.ToLower(testName)
765+
766+
mpiJob := newMPIJob(jobName, pointer.Int32(1), 1, gpuResourceName, &startTime, &completionTime)
767+
Expect(testK8sClient.Create(ctx, mpiJob)).Should(Succeed())
768+
769+
template := &mpiJob.Spec.MPIReplicaSpecs[kubeflowv1.MPIJobReplicaTypeLauncher].Template
770+
Expect(len(template.Spec.Containers) == 1).To(BeTrue())
771+
772+
cont := &template.Spec.Containers[0]
773+
774+
for k, v := range testCase.envVariables {
775+
cont.Env = append(cont.Env,
776+
corev1.EnvVar{
777+
Name: k,
778+
Value: v,
779+
},
780+
)
781+
}
782+
783+
launcher := reconciler.newLauncher(mpiJob, "kubectl-delivery", false)
784+
785+
Expect(len(launcher.Spec.Containers) == 1).To(BeTrue())
786+
for expectedKey, expectedValue := range testCase.expectedEnvVariables {
787+
Expect(launcher.Spec.Containers[0].Env).Should(ContainElements(
788+
corev1.EnvVar{
789+
Name: expectedKey,
790+
Value: expectedValue,
791+
}),
792+
)
793+
}
794+
}
795+
})
796+
})
712797
})
713798

714799
func ReplicaStatusMatch(replicaStatuses map[common.ReplicaType]*common.ReplicaStatus,

0 commit comments

Comments
 (0)