diff --git a/example/integrations/mpi/Dockerfile b/example/integrations/mpi/Dockerfile new file mode 100644 index 0000000000..56a2d2ed77 --- /dev/null +++ b/example/integrations/mpi/Dockerfile @@ -0,0 +1,17 @@ +FROM ubuntu:16.04 +MAINTAINER volcano +RUN apt-get update --fix-missing \ + && apt-get install -y libopenmpi-dev openmpi-bin \ + && apt-get install -y git \ + && apt-get install -y build-essential \ + && apt-get install -y ssh \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* +RUN git clone https://github.com/wesleykendall/mpitutorial \ + && cd mpitutorial/tutorials/mpi-hello-world/code \ + && make \ + && cp mpi_hello_world /home/ \ + && apt-get autoremove -y git \ + && apt-get autoremove -y build-essential \ + && rm -rf "/mpitutorial" +CMD mkdir -p /var/run/sshd; /usr/sbin/sshd; \ No newline at end of file diff --git a/example/integrations/mpi/mpi-example.yaml b/example/integrations/mpi/mpi-example.yaml new file mode 100644 index 0000000000..876a277514 --- /dev/null +++ b/example/integrations/mpi/mpi-example.yaml @@ -0,0 +1,52 @@ +apiVersion: batch.volcano.sh/v1alpha1 +kind: Job +metadata: + name: lm-mpi-job +spec: + minAvailable: 2 + schedulerName: kube-batch + plugins: + ssh: [] + env: [] + tasks: + - replicas: 1 + name: mpimaster + policies: + - event: TaskCompleted + action: CompleteJob + template: + spec: + containers: + - command: + - /bin/sh + - -c + - | + MPI_HOST=`cat /etc/volcano/mpiworker.host | tr "\n" ","`; + mkdir -p /var/run/sshd; /usr/sbin/sshd; + mpiexec --allow-run-as-root --host ${MPI_HOST} -np 2 mpi_hello_world > /home/re; + #TODO: use volcano repo instead in the future. + image: tommylike/volcano-example-mpi:0.0.1 + name: mpimaster + ports: + - containerPort: 22 + name: mpijob-port + workingDir: /home + restartPolicy: OnFailure + - replicas: 2 + name: mpiworker + template: + spec: + containers: + - command: + - /bin/sh + - -c + - | + mkdir -p /var/run/sshd; /usr/sbin/sshd -D; + image: tommylike/volcano-example-mpi:0.0.1 + name: mpiworker + ports: + - containerPort: 22 + name: mpijob-port + workingDir: /home + restartPolicy: OnFailure +--- diff --git a/hack/run-e2e-kind.sh b/hack/run-e2e-kind.sh index f67f377ec2..f48e54982d 100755 --- a/hack/run-e2e-kind.sh +++ b/hack/run-e2e-kind.sh @@ -5,6 +5,8 @@ export VK_BIN=${VK_ROOT}/_output/bin export LOG_LEVEL=3 export SHOW_VOLCANO_LOGS=${SHOW_VOLCANO_LOGS:-1} export CLEANUP_CLUSTER=${CLEANUP_CLUSTER:-1} +#TODO: Use volcano repo instead in the future +export MPI_EXAMPLE_IMAGE=${MPI_EXAMPLE_IMAGE:-"tommylike/volcano-example-mpi:0.0.1"} if [[ "${CLUSTER_NAME}xxx" != "xxx" ]];then export CLUSTER_CONTEXT="--name ${CLUSTER_NAME}" @@ -52,10 +54,14 @@ function install-volcano { chmod 700 get_helm.sh && ./get_helm.sh --version v2.13.0 helm init --service-account tiller --kubeconfig ${KUBECONFIG} --wait + echo "Pulling required docker images" + docker pull ${MPI_EXAMPLE_IMAGE} + echo "Loading docker images into kind cluster" kind load docker-image ${IMAGE}-controllers:${TAG} ${CLUSTER_CONTEXT} kind load docker-image ${IMAGE}-scheduler:${TAG} ${CLUSTER_CONTEXT} kind load docker-image ${IMAGE}-admission:${TAG} ${CLUSTER_CONTEXT} + kind load docker-image ${MPI_EXAMPLE_IMAGE} ${CLUSTER_CONTEXT} echo "Install volcano plugin into cluster...." helm plugin install --kubeconfig ${KUBECONFIG} installer/chart/volcano/plugins/gen-admission-secret diff --git a/pkg/apis/batch/v1alpha1/job.go b/pkg/apis/batch/v1alpha1/job.go index 31e78c4c0a..de28fc46ab 100644 --- a/pkg/apis/batch/v1alpha1/job.go +++ b/pkg/apis/batch/v1alpha1/job.go @@ -239,7 +239,7 @@ type JobStatus struct { //Current version of job Version int32 `json:"version,omitempty" protobuf:"bytes,8,opt,name=version"` // The resources that controlled by this job, e.g. Service, ConfigMap - ControlledResources map[string]string + ControlledResources map[string]string `json:"controlledResources,omitempty" protobuf:"bytes,8,opt,name=controlledResources"` } // +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object diff --git a/test/e2e/mpi.go b/test/e2e/mpi.go new file mode 100644 index 0000000000..b5c4d17075 --- /dev/null +++ b/test/e2e/mpi.go @@ -0,0 +1,77 @@ +/* +Copyright 2019 The Volcano Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +package e2e + +import ( + . "github.com/onsi/ginkgo" + . "github.com/onsi/gomega" + + vkv1 "volcano.sh/volcano/pkg/apis/batch/v1alpha1" +) + +var _ = Describe("MPI E2E Test", func() { + It("will run and complete finally", func() { + context := initTestContext() + defer cleanupTestContext(context) + + slot := oneCPU + + spec := &jobSpec{ + name: "mpi", + policies: []vkv1.LifecyclePolicy{ + { + Action: vkv1.CompleteJobAction, + Event: vkv1.TaskCompletedEvent, + }, + }, + plugins: map[string][]string{ + "ssh": {}, + "env": {}, + }, + tasks: []taskSpec{ + { + name: "mpimaster", + img: defaultMPIImage, + req: slot, + min: 1, + rep: 1, + workingDir: "/home", + //Need sometime waiting for worker node ready + command: `sleep 5; +MPI_HOST=` + "`" + `cat /etc/volcano/mpiworker.host | tr "\n" ","` + "`" + `; +mkdir -p /var/run/sshd; /usr/sbin/sshd; +mpiexec --allow-run-as-root --host ${MPI_HOST} -np 2 mpi_hello_world > /home/re`, + }, + { + name: "mpiworker", + img: defaultMPIImage, + req: slot, + min: 2, + rep: 2, + workingDir: "/home", + command: "mkdir -p /var/run/sshd; /usr/sbin/sshd -D;", + }, + }, + } + + job := createJob(context, spec) + + err := waitJobStates(context, job, []vkv1.JobPhase{ + vkv1.Pending, vkv1.Running, vkv1.Completing, vkv1.Completed}) + Expect(err).NotTo(HaveOccurred()) + }) + +}) diff --git a/test/e2e/util.go b/test/e2e/util.go index c5f4864548..95f21f45c9 100644 --- a/test/e2e/util.go +++ b/test/e2e/util.go @@ -59,6 +59,8 @@ const ( masterPriority = "master-pri" defaultNginxImage = "nginx:1.14" defaultBusyBoxImage = "busybox:1.24" + //TODO: Use volcano repo instead in the future + defaultMPIImage = "tommylike/volcano-example-mpi:0.0.1" ) func cpuResource(request string) v1.ResourceList { @@ -279,6 +281,7 @@ type taskSpec struct { min, rep int32 img string command string + workingDir string hostport int32 req v1.ResourceList affinity *v1.Affinity @@ -359,7 +362,7 @@ func createJobInner(context *context, jobSpec *jobSpec) (*vkv1.Job, error) { Spec: v1.PodSpec{ SchedulerName: "kube-batch", RestartPolicy: restartPolicy, - Containers: createContainers(task.img, task.command, task.req, task.hostport), + Containers: createContainers(task.img, task.command, task.workingDir, task.req, task.hostport), Affinity: task.affinity, }, }, @@ -553,7 +556,8 @@ func waitJobUnschedulable(ctx *context, job *vkv1.Job) error { return wait.Poll(10*time.Second, oneMinute, jobUnschedulable(ctx, job, now)) } -func createContainers(img, command string, req v1.ResourceList, hostport int32) []v1.Container { +func createContainers(img, command, workingDir string, req v1.ResourceList, hostport int32) []v1.Container { + var imageRepo []string container := v1.Container{ Image: img, ImagePullPolicy: v1.PullIfNotPresent, @@ -562,10 +566,11 @@ func createContainers(img, command string, req v1.ResourceList, hostport int32) }, } if strings.Index(img, ":") < 0 { - container.Name = img + imageRepo = strings.Split(img, "/") } else { - container.Name = img[:strings.Index(img, ":")] + imageRepo = strings.Split(img[:strings.Index(img, ":")], "/") } + container.Name = imageRepo[len(imageRepo)-1] if len(command) > 0 { container.Command = []string{"/bin/sh"} @@ -581,6 +586,10 @@ func createContainers(img, command string, req v1.ResourceList, hostport int32) } } + if len(workingDir) > 0 { + container.WorkingDir = workingDir + } + return []v1.Container{container} }