diff --git a/example/volcano/integrations/mpi/mpi-example.yaml b/example/volcano/integrations/mpi/mpi-example.yaml index 9f0a4128a..c14d17ca4 100644 --- a/example/volcano/integrations/mpi/mpi-example.yaml +++ b/example/volcano/integrations/mpi/mpi-example.yaml @@ -1,3 +1,41 @@ +################################################ +# # +# Demo for running MPI tasks on volcano # +# # +################################################ +# +# This yaml used to demonstrate how to running a hello-world MPI +# task via Volcano Job, the MPI program is directly brought from +# official website (https://github.com/wesleykendall/mpitutorial/tree/gh-pages/tutorials/mpi-hello-world) +# more details are located in `Dockerfile`. +# +# There are two plugins that make MPI works in cluster. +# 1. **ENV**: env plugin used to generate host file for MPI communicating between pods. +# 2. **SSH**: ssh plugin used to generate required key and config for ssh tools. +# +# When Job is running, you can ensure the mpi-hello-world works correctly from master's stdout via: +# +# kubectl logs lm-mpi-job-mpimaster-0 +# +# and output would below: +# +# ------------------------------------------------------------------------- +# [[40437,1],1]: A high-performance Open MPI point-to-point messaging module +# was unable to find any relevant network interfaces: +# +# Module: OpenFabrics (openib) +# Host: lm-mpi-job-mpiworker-1 +# +# Another transport will be used instead, although this may result in +# lower performance. +# -------------------------------------------------------------------------- +# Hello world from processor lm-mpi-job-mpiworker-1, rank 1 out of 2 processors +# Hello world from processor lm-mpi-job-mpiworker-0, rank 0 out of 2 processors +# +# **NOTE**: There are two sleep command before&after MPI execution, the previous is used +# for waiting worker pods to become ready and the latter is used to guarantee +# logs are captured before exiting. + apiVersion: batch.volcano.sh/v1alpha1 kind: Job metadata: @@ -21,9 +59,10 @@ spec: - /bin/sh - -c - | - MPI_HOST=`cat /etc/volcano/mpiworker.host | tr "\n" ","`; + sleep 10s mkdir -p /var/run/sshd; /usr/sbin/sshd; - mpiexec --allow-run-as-root --host ${MPI_HOST} -np 2 mpi_hello_world > /home/re; + mpiexec --allow-run-as-root --hostfile /etc/volcano/mpiworker.host -np 2 mpi_hello_world; + sleep 10s image: volcanosh/example-mpi:0.0.1 name: mpimaster ports: