Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow running MPI applications as non-root #383

Merged
merged 2 commits into from
Jul 27, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions examples/pi/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
FROM debian:buster

RUN apt update && apt install -y --no-install-recommends \
build-essential \
libopenmpi-dev \
openmpi-bin \
openssh-server \
openssh-client \
&& rm -rf /var/lib/apt/lists/*
# Add priviledge separation directoy to run sshd as root.
RUN mkdir -p /var/run/sshd
# Add capability to run sshd as non-root.
RUN setcap CAP_NET_BIND_SERVICE=+eip /usr/sbin/sshd

RUN useradd -m mpiuser
WORKDIR /home/mpiuser
COPY --chown=mpiuser sshd_config .sshd_config
RUN sed -i 's/[ #]\(.*StrictHostKeyChecking \).*/ \1no/g' /etc/ssh/ssh_config
COPY pi.cc /src/pi.cc
RUN mpic++ /src/pi.cc -o /home/mpiuser/pi
24 changes: 24 additions & 0 deletions examples/pi/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# Pure MPI example

This example shows to run a pure MPI application.

The program prints some basic information about the workers.
Then, it calculates an approximate value for pi.

## How to build Image

```bash
docker build -t mpi-pi .
```

## Create MPIJob

Modify `pi.yaml` to set up the image name from your own registry.

Then, run:

```
kubectl create -f pi.yaml
```

The YAML shows how to run the binaries as a non-root user.
51 changes: 51 additions & 0 deletions examples/pi/pi.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
// Copyright 2021 The Kubeflow Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "mpi.h"
#include <random>
#include <cstdio>

int main(int argc, char *argv[]) {
int rank, workers, proc_name_size;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &workers);
if (rank == 0) {
printf("Workers: %d\n", workers);
}
char hostname[MPI_MAX_PROCESSOR_NAME];
MPI_Get_processor_name(hostname, &proc_name_size);
printf("Rank %d on host %s\n", rank, hostname);

std::minstd_rand generator(rank);
std::uniform_real_distribution<double> distribution(-1.0, 1.0);
double x, y;
long long worker_count = 0;
int worker_tests = 100000000;
for (int i = 0; i < worker_tests; i++) {
x = distribution(generator);
y = distribution(generator);
if (x * x + y * y <= 1.0) {
worker_count++;
}
}
long long total_count = 0;
MPI_Reduce(&worker_count, &total_count, 1, MPI_LONG_LONG, MPI_SUM, 0, MPI_COMM_WORLD);
if (rank == 0) {
double pi = 4 * (double)total_count / (double)(worker_tests) / (double)(workers);
printf("pi is approximately %.16lf\n", pi);
}
MPI_Finalize();
return 0;
}
50 changes: 50 additions & 0 deletions examples/pi/pi.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
apiVersion: kubeflow.org/v2beta1
kind: MPIJob
metadata:
name: pi
spec:
slotsPerWorker: 1
cleanPodPolicy: Running
sshAuthMountPath: /home/mpiuser/.ssh
mpiReplicaSpecs:
Launcher:
replicas: 1
template:
spec:
containers:
- image: docker.io/kubeflow/mpi-pi
name: mpi-launcher
securityContext:
runAsUser: 1000
command:
- mpirun
args:
- -np
- "2"
- /home/mpiuser/pi
resources:
limits:
cpu: 1
memory: 2Gi
Worker:
replicas: 2
template:
spec:
containers:
- image: docker.io/kubeflow/mpi-pi
name: mpi-worker
securityContext:
runAsUser: 1000
capabilities:
add:
- NET_BIND_SERVICE
command:
- /usr/sbin/sshd
args:
- -De
- -f
- /home/mpiuser/.sshd_config
resources:
limits:
cpu: 2
memory: 4Gi
2 changes: 2 additions & 0 deletions examples/pi/sshd_config
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
PidFile /home/mpiuser/sshd.pid
HostKey /home/mpiuser/.ssh/id_rsa
2 changes: 2 additions & 0 deletions manifests/base/crd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,8 @@ spec:
type: string
enum: ["None", "Running", "All"]
description: "Defines which Pods must be deleted after the Job completes"
sshAuthMountPath:
type: string
mpiReplicaSpecs:
type: object
properties:
Expand Down
3 changes: 3 additions & 0 deletions v2/cmd/mpi-operator/app/options/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ type ServerOption struct {
LockNamespace string
QPS int
Burst int
ScriptingImage string
}

// NewServerOption creates a new CMServer with a default config.
Expand Down Expand Up @@ -68,4 +69,6 @@ func (s *ServerOption) AddFlags(fs *flag.FlagSet) {

fs.IntVar(&s.QPS, "kube-api-qps", 5, "QPS indicates the maximum QPS to the master from this client.")
fs.IntVar(&s.Burst, "kube-api-burst", 10, "Maximum burst for throttle.")

fs.StringVar(&s.ScriptingImage, "scripting-image", "alpine:3.14", "Container image used for scripting, such as in init containers.")
}
3 changes: 2 additions & 1 deletion v2/cmd/mpi-operator/app/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,8 @@ func Run(opt *options.ServerOption) error {
kubeInformerFactory.Core().V1().Pods(),
podgroupsInformer,
kubeflowInformerFactory.Kubeflow().V2beta1().MPIJobs(),
opt.GangSchedulingName)
opt.GangSchedulingName,
opt.ScriptingImage)

go kubeInformerFactory.Start(ctx.Done())
go kubeflowInformerFactory.Start(ctx.Done())
Expand Down
3 changes: 3 additions & 0 deletions v2/pkg/apis/kubeflow/v2beta1/default.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,9 @@ func SetDefaults_MPIJob(mpiJob *MPIJob) {
if mpiJob.Spec.SlotsPerWorker == nil {
mpiJob.Spec.SlotsPerWorker = newInt32(1)
}
if mpiJob.Spec.SSHAuthMountPath == "" {
mpiJob.Spec.SSHAuthMountPath = "/root/.ssh"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

could you make the default path ("/root/.ssh") a constant?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think that makes sense for controller code, where a value might be used more than once. However, this file is all about defaults. Creating a constant would just make the reader have to jump from one line to another to see what the default for a field is.

}

// set default to Launcher
setDefaultsTypeLauncher(mpiJob.Spec.MPIReplicaSpecs[MPIReplicaTypeLauncher])
Expand Down
25 changes: 15 additions & 10 deletions v2/pkg/apis/kubeflow/v2beta1/default_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,22 +29,25 @@ func TestSetDefaults_MPIJob(t *testing.T) {
"base defaults": {
want: MPIJob{
Spec: MPIJobSpec{
SlotsPerWorker: newInt32(1),
CleanPodPolicy: newCleanPodPolicy(common.CleanPodPolicyNone),
SlotsPerWorker: newInt32(1),
CleanPodPolicy: newCleanPodPolicy(common.CleanPodPolicyNone),
SSHAuthMountPath: "/root/.ssh",
},
},
},
"base defaults overridden": {
job: MPIJob{
Spec: MPIJobSpec{
SlotsPerWorker: newInt32(10),
CleanPodPolicy: newCleanPodPolicy(common.CleanPodPolicyRunning),
SlotsPerWorker: newInt32(10),
CleanPodPolicy: newCleanPodPolicy(common.CleanPodPolicyRunning),
SSHAuthMountPath: "/home/mpiuser/.ssh",
},
},
want: MPIJob{
Spec: MPIJobSpec{
SlotsPerWorker: newInt32(10),
CleanPodPolicy: newCleanPodPolicy(common.CleanPodPolicyRunning),
SlotsPerWorker: newInt32(10),
CleanPodPolicy: newCleanPodPolicy(common.CleanPodPolicyRunning),
SSHAuthMountPath: "/home/mpiuser/.ssh",
},
},
},
Expand All @@ -58,8 +61,9 @@ func TestSetDefaults_MPIJob(t *testing.T) {
},
want: MPIJob{
Spec: MPIJobSpec{
SlotsPerWorker: newInt32(1),
CleanPodPolicy: newCleanPodPolicy(common.CleanPodPolicyNone),
SlotsPerWorker: newInt32(1),
CleanPodPolicy: newCleanPodPolicy(common.CleanPodPolicyNone),
SSHAuthMountPath: "/root/.ssh",
MPIReplicaSpecs: map[MPIReplicaType]*common.ReplicaSpec{
MPIReplicaTypeLauncher: {
Replicas: newInt32(1),
Expand All @@ -79,8 +83,9 @@ func TestSetDefaults_MPIJob(t *testing.T) {
},
want: MPIJob{
Spec: MPIJobSpec{
SlotsPerWorker: newInt32(1),
CleanPodPolicy: newCleanPodPolicy(common.CleanPodPolicyNone),
SlotsPerWorker: newInt32(1),
CleanPodPolicy: newCleanPodPolicy(common.CleanPodPolicyNone),
SSHAuthMountPath: "/root/.ssh",
MPIReplicaSpecs: map[MPIReplicaType]*common.ReplicaSpec{
MPIReplicaTypeWorker: {
Replicas: newInt32(0),
Expand Down
6 changes: 5 additions & 1 deletion v2/pkg/apis/kubeflow/v2beta1/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,13 @@ type MPIJobSpec struct {
// Defaults to None.
CleanPodPolicy *common.CleanPodPolicy `json:"cleanPodPolicy,omitempty"`

// `MPIReplicaSpecs` contains maps from `MPIReplicaType` to `ReplicaSpec` that
// MPIReplicaSpecs contains maps from `MPIReplicaType` to `ReplicaSpec` that
// specify the MPI replicas to run.
MPIReplicaSpecs map[MPIReplicaType]*common.ReplicaSpec `json:"mpiReplicaSpecs"`

// SSHAuthMountPath is the directory where SSH keys are mounted.
// Defaults to "/root/.ssh".
SSHAuthMountPath string `json:"sshAuthMountPath,omitempty"`
}

// MPIReplicaType is the type for MPIReplica.
Expand Down
3 changes: 3 additions & 0 deletions v2/pkg/apis/kubeflow/validation/validation.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,9 @@ func validateMPIJobSpec(spec *kubeflow.MPIJobSpec, path *field.Path) field.Error
} else if !validCleanPolicies.Has(string(*spec.CleanPodPolicy)) {
errs = append(errs, field.NotSupported(path.Child("cleanPodPolicy"), *spec.CleanPodPolicy, validCleanPolicies.List()))
}
if spec.SSHAuthMountPath == "" {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

does this validation contradict with the defaulting here?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wanted to keep validation independent from defaulting. But yes, if you put them together, this line of code would not be reachable.

errs = append(errs, field.Required(path.Child("sshAuthMountPath"), "must have a mount path for SSH credentials"))
}
return errs
}

Expand Down
31 changes: 20 additions & 11 deletions v2/pkg/apis/kubeflow/validation/validation_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,9 @@ func TestValidateMPIJob(t *testing.T) {
"valid": {
job: v2beta1.MPIJob{
Spec: v2beta1.MPIJobSpec{
SlotsPerWorker: newInt32(2),
CleanPodPolicy: newCleanPodPolicy(common.CleanPodPolicyRunning),
SlotsPerWorker: newInt32(2),
CleanPodPolicy: newCleanPodPolicy(common.CleanPodPolicyRunning),
SSHAuthMountPath: "/home/mpiuser/.ssh",
MPIReplicaSpecs: map[v2beta1.MPIReplicaType]*common.ReplicaSpec{
v2beta1.MPIReplicaTypeLauncher: {
Replicas: newInt32(1),
Expand All @@ -51,8 +52,9 @@ func TestValidateMPIJob(t *testing.T) {
"valid with worker": {
job: v2beta1.MPIJob{
Spec: v2beta1.MPIJobSpec{
SlotsPerWorker: newInt32(2),
CleanPodPolicy: newCleanPodPolicy(common.CleanPodPolicyRunning),
SlotsPerWorker: newInt32(2),
CleanPodPolicy: newCleanPodPolicy(common.CleanPodPolicyRunning),
SSHAuthMountPath: "/home/mpiuser/.ssh",
MPIReplicaSpecs: map[v2beta1.MPIReplicaType]*common.ReplicaSpec{
v2beta1.MPIReplicaTypeLauncher: {
Replicas: newInt32(1),
Expand Down Expand Up @@ -88,14 +90,19 @@ func TestValidateMPIJob(t *testing.T) {
Type: field.ErrorTypeRequired,
Field: "spec.cleanPodPolicy",
},
&field.Error{
Type: field.ErrorTypeRequired,
Field: "spec.sshAuthMountPath",
},
},
},
"empty replica specs": {
job: v2beta1.MPIJob{
Spec: v2beta1.MPIJobSpec{
SlotsPerWorker: newInt32(2),
CleanPodPolicy: newCleanPodPolicy(common.CleanPodPolicyRunning),
MPIReplicaSpecs: map[v2beta1.MPIReplicaType]*common.ReplicaSpec{},
SlotsPerWorker: newInt32(2),
CleanPodPolicy: newCleanPodPolicy(common.CleanPodPolicyRunning),
SSHAuthMountPath: "/root/.ssh",
MPIReplicaSpecs: map[v2beta1.MPIReplicaType]*common.ReplicaSpec{},
},
},
wantErrs: field.ErrorList{
Expand All @@ -108,8 +115,9 @@ func TestValidateMPIJob(t *testing.T) {
"missing replica spec fields": {
job: v2beta1.MPIJob{
Spec: v2beta1.MPIJobSpec{
SlotsPerWorker: newInt32(2),
CleanPodPolicy: newCleanPodPolicy(common.CleanPodPolicyRunning),
SlotsPerWorker: newInt32(2),
CleanPodPolicy: newCleanPodPolicy(common.CleanPodPolicyRunning),
SSHAuthMountPath: "/root/.ssh",
MPIReplicaSpecs: map[v2beta1.MPIReplicaType]*common.ReplicaSpec{
v2beta1.MPIReplicaTypeLauncher: {},
v2beta1.MPIReplicaTypeWorker: {},
Expand Down Expand Up @@ -138,8 +146,9 @@ func TestValidateMPIJob(t *testing.T) {
"invalid replica counts": {
job: v2beta1.MPIJob{
Spec: v2beta1.MPIJobSpec{
SlotsPerWorker: newInt32(2),
CleanPodPolicy: newCleanPodPolicy(common.CleanPodPolicyRunning),
SlotsPerWorker: newInt32(2),
CleanPodPolicy: newCleanPodPolicy(common.CleanPodPolicyRunning),
SSHAuthMountPath: "/root/.ssh",
MPIReplicaSpecs: map[v2beta1.MPIReplicaType]*common.ReplicaSpec{
v2beta1.MPIReplicaTypeLauncher: {
Replicas: newInt32(2),
Expand Down
Loading