Skip to content

Commit

Permalink
Adapt hostfile to IntelMPI
Browse files Browse the repository at this point in the history
Signed-off-by: Yuki Iwai <yuki.iwai.tz@gmail.com>
  • Loading branch information
tenzen-y committed Feb 9, 2023
1 parent dd26fa7 commit ec371bd
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 14 deletions.
2 changes: 1 addition & 1 deletion build/base/intel-entrypoint.sh
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ function resolve_host() {

if [ "$K_MPI_JOB_ROLE" == "launcher" ]; then
resolve_host "$HOSTNAME"
cut -d ' ' -f 1 /etc/mpi/hostfile | while read -r host
cut -d ':' -f 1 /etc/mpi/hostfile | while read -r host
do
resolve_host "$host"
done
Expand Down
6 changes: 5 additions & 1 deletion pkg/controller/mpi_job_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -1187,7 +1187,11 @@ func newConfigMap(mpiJob *kubeflow.MPIJob, workerReplicas int32) *corev1.ConfigM
slots = int(*mpiJob.Spec.SlotsPerWorker)
}
for i := 0; i < int(workerReplicas); i++ {
buffer.WriteString(fmt.Sprintf("%s%s-%d.%s.%s.svc slots=%d\n", mpiJob.Name, workerSuffix, i, workersService, mpiJob.Namespace, slots))
if mpiJob.Spec.MPIImplementation == kubeflow.MPIImplementationOpenMPI {
buffer.WriteString(fmt.Sprintf("%s%s-%d.%s.%s.svc slots=%d\n", mpiJob.Name, workerSuffix, i, workersService, mpiJob.Namespace, slots))
} else if mpiJob.Spec.MPIImplementation == kubeflow.MPIImplementationIntel {
buffer.WriteString(fmt.Sprintf("%s%s-%d.%s.%s.svc:%d\n", mpiJob.Name, workerSuffix, i, workersService, mpiJob.Namespace, slots))
}
}

return &corev1.ConfigMap{
Expand Down
27 changes: 15 additions & 12 deletions pkg/controller/mpi_job_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1506,49 +1506,52 @@ func TestNewConfigMap(t *testing.T) {
workerReplicas int32
wantCM *corev1.ConfigMap
}{
"without slots": {
"OpenMPI without slots": {
mpiJob: &kubeflow.MPIJob{
ObjectMeta: metav1.ObjectMeta{
Name: "without-slots",
Name: "openmpi-without-slots",
Namespace: "tenant-a",
},
Spec: kubeflow.MPIJobSpec{},
Spec: kubeflow.MPIJobSpec{
MPIImplementation: kubeflow.MPIImplementationOpenMPI,
},
},
workerReplicas: 2,
wantCM: &corev1.ConfigMap{
ObjectMeta: metav1.ObjectMeta{
Name: "without-slots-config",
Name: "openmpi-without-slots-config",
Namespace: "tenant-a",
Labels: map[string]string{
"app": "without-slots",
"app": "openmpi-without-slots",
},
},
Data: map[string]string{
"hostfile": "without-slots-worker-0.without-slots-worker.tenant-a.svc slots=1\nwithout-slots-worker-1.without-slots-worker.tenant-a.svc slots=1\n",
"hostfile": "openmpi-without-slots-worker-0.openmpi-without-slots-worker.tenant-a.svc slots=1\nopenmpi-without-slots-worker-1.openmpi-without-slots-worker.tenant-a.svc slots=1\n",
},
},
},
"with slots": {
"IntelMPI with slots": {
mpiJob: &kubeflow.MPIJob{
ObjectMeta: metav1.ObjectMeta{
Name: "with-slots",
Name: "intelmpi-with-slots",
Namespace: "project-x",
},
Spec: kubeflow.MPIJobSpec{
SlotsPerWorker: pointer.Int32(10),
SlotsPerWorker: pointer.Int32(10),
MPIImplementation: kubeflow.MPIImplementationIntel,
},
},
workerReplicas: 1,
wantCM: &corev1.ConfigMap{
ObjectMeta: metav1.ObjectMeta{
Name: "with-slots-config",
Name: "intelmpi-with-slots-config",
Namespace: "project-x",
Labels: map[string]string{
"app": "with-slots",
"app": "intelmpi-with-slots",
},
},
Data: map[string]string{
"hostfile": "with-slots-worker-0.with-slots-worker.project-x.svc slots=10\n",
"hostfile": "intelmpi-with-slots-worker-0.intelmpi-with-slots-worker.project-x.svc:10\n",
},
},
},
Expand Down

0 comments on commit ec371bd

Please sign in to comment.