Skip to content

Commit

Permalink
Support GPU image for dnntrainer component (#530)
Browse files Browse the repository at this point in the history
* add gpu support for dnntrainer

* Add args for build gpu image for dnntrainer and update build scripts

* add unsaved changes

* make base image tag and local image name configurable

* fix license link
  • Loading branch information
hongye-sun authored and k8s-ci-robot committed Dec 19, 2018
1 parent 09dbf5f commit f3de359
Show file tree
Hide file tree
Showing 5 changed files with 45 additions and 8 deletions.
5 changes: 5 additions & 0 deletions .cloudbuild.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,10 @@ steps:
entrypoint: '/bin/bash'
args: ['-c', 'cd /workspace/components/kubeflow/launcher && ./build_image.sh -p $PROJECT_ID -t $COMMIT_SHA']
id: 'buildLauncher'
- name: 'gcr.io/cloud-builders/docker'
entrypoint: '/bin/bash'
args: ['-c', 'cd /workspace/components/kubeflow/dnntrainer && ./build_image.sh -p $PROJECT_ID -t $COMMIT_SHA -l ml-pipeline-kubeflow-tf-trainer-gpu -b 1.6.0-gpu']
id: 'buildGpuTrainer'

# Build the Dataproc-based pipeline component images
- name: 'gcr.io/cloud-builders/docker'
Expand Down Expand Up @@ -199,6 +203,7 @@ images:
# Images for the Kubeflow-based pipeline components
- 'gcr.io/$PROJECT_ID/ml-pipeline-kubeflow-deployer:$COMMIT_SHA'
- 'gcr.io/$PROJECT_ID/ml-pipeline-kubeflow-tf-trainer:$COMMIT_SHA'
- 'gcr.io/$PROJECT_ID/ml-pipeline-kubeflow-tf-trainer-gpu:$COMMIT_SHA'
- 'gcr.io/$PROJECT_ID/ml-pipeline-kubeflow-tf:$COMMIT_SHA'

# Images for the Dataproc-based pipeline components
Expand Down
11 changes: 11 additions & 0 deletions .release.cloudbuild.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,17 @@ steps:
args: ['tag', 'gcr.io/$PROJECT_ID/ml-pipeline-kubeflow-tf-trainer:$COMMIT_SHA', 'gcr.io/ml-pipeline/ml-pipeline-kubeflow-tf-trainer:$COMMIT_SHA']
id: 'tagTrainerCommitSHA'
waitFor: ['pullTrainer']
- name: 'gcr.io/cloud-builders/docker'
args: ['pull', 'gcr.io/$PROJECT_ID/ml-pipeline-kubeflow-tf-trainer-gpu:$COMMIT_SHA']
id: 'pullGpuTrainer'
- name: 'gcr.io/cloud-builders/docker'
args: ['tag', 'gcr.io/$PROJECT_ID/ml-pipeline-kubeflow-tf-trainer-gpu:$COMMIT_SHA', 'gcr.io/ml-pipeline/ml-pipeline-kubeflow-tf-trainer-gpu:$TAG_NAME']
id: 'tagGpuTrainerVersionNumber'
waitFor: ['pullGpuTrainer']
- name: 'gcr.io/cloud-builders/docker'
args: ['tag', 'gcr.io/$PROJECT_ID/ml-pipeline-kubeflow-tf-trainer-gpu:$COMMIT_SHA', 'gcr.io/ml-pipeline/ml-pipeline-kubeflow-tf-trainer-gpu:$COMMIT_SHA']
id: 'tagGpuTrainerCommitSHA'
waitFor: ['pullGpuTrainer']
- name: 'gcr.io/cloud-builders/docker'
args: ['pull', 'gcr.io/$PROJECT_ID/ml-pipeline-kubeflow-tf:$COMMIT_SHA']
id: 'pullLauncher'
Expand Down
6 changes: 2 additions & 4 deletions components/kubeflow/dnntrainer/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -12,18 +12,16 @@
# See the License for the specific language governing permissions and
# limitations under the License.

FROM ubuntu:16.04
ARG TF_TAG
FROM tensorflow/tensorflow:$TF_TAG

RUN apt-get update -y

RUN apt-get install --no-install-recommends -y -q ca-certificates python-dev python-setuptools \
wget unzip git

RUN easy_install pip

RUN apt-get install --no-install-recommends -y -q build-essential && \
pip install pyyaml==3.12 six==1.11.0 \
tensorflow==1.6.0 \
tensorflow-transform==0.6.0 \
tensorflow-model-analysis==0.6.0 && \
apt-get --purge autoremove -y build-essential
Expand Down
22 changes: 18 additions & 4 deletions components/kubeflow/dnntrainer/build_image.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,14 @@
# See the License for the specific language governing permissions and
# limitations under the License.

while getopts ":hp:t:i:" opt; do
while getopts ":hp:t:i:b:l:" opt; do
case "${opt}" in
h) echo "-p: project name"
echo "-t: tag name"
echo "-i: image name. If provided, project name and tag name are not necessary"
echo "-b: tensorflow base image tag. Optional. The value can be tags listed under \
https://hub.docker.com/r/tensorflow/tensorflow/tags. Defaults to '1.6.0'."
echo "-l: local image name. Optional. Defaults to 'ml-pipeline-kubeflow-tf-trainer'"
exit
;;
p) PROJECT_ID=${OPTARG}
Expand All @@ -26,13 +29,20 @@ while getopts ":hp:t:i:" opt; do
;;
i) IMAGE_NAME=${OPTARG}
;;
\? ) echo "Usage: cmd [-p] project [-t] tag [-i] image"
b) TF_BASE_TAG=${OPTARG}
;;
l) LOCAL_IMAGE_NAME=${OPTARG}
;;
\? ) echo "Usage: cmd [-p] project [-t] tag [-i] image [-b] base image tag [l] local image"
exit
;;
esac
done

LOCAL_IMAGE_NAME=ml-pipeline-kubeflow-tf-trainer
set -x
if [ -z "${LOCAL_IMAGE_NAME}" ]; then
LOCAL_IMAGE_NAME=ml-pipeline-kubeflow-tf-trainer
fi

if [ -z "${PROJECT_ID}" ]; then
PROJECT_ID=$(gcloud config config-helper --format "value(configuration.properties.core.project)")
Expand All @@ -42,12 +52,16 @@ if [ -z "${TAG_NAME}" ]; then
TAG_NAME=$(date +v%Y%m%d)-$(git describe --tags --always --dirty)-$(git diff | shasum -a256 | cut -c -6)
fi

if [ -z "${TF_BASE_TAG}" ]; then
TF_BASE_TAG=1.6.0
fi

mkdir -p ./build
rsync -arvp ./src/ ./build/
cp ../../license.sh ./build
cp ../../third_party_licenses.csv ./build

docker build -t ${LOCAL_IMAGE_NAME} .
docker build --build-arg TF_TAG=${TF_BASE_TAG} -t ${LOCAL_IMAGE_NAME} .
if [ -z "${IMAGE_NAME}" ]; then
docker tag ${LOCAL_IMAGE_NAME} gcr.io/${PROJECT_ID}/${LOCAL_IMAGE_NAME}:${TAG_NAME}
docker push gcr.io/${PROJECT_ID}/${LOCAL_IMAGE_NAME}:${TAG_NAME}
Expand Down
9 changes: 9 additions & 0 deletions components/third_party_licenses.csv
Original file line number Diff line number Diff line change
Expand Up @@ -169,5 +169,14 @@ tensorflow-tensorboard,https://raw.githubusercontent.com/tensorflow/tensorboard/
tensorflow-data-validation,https://raw.githubusercontent.com/tensorflow/data-validation/master/LICENSE,Apache 2.0
tensorflow-metadata,https://raw.githubusercontent.com/tensorflow/metadata/master/LICENSE,Apache 2.0
defusedxml,https://raw.githubusercontent.com/tiran/defusedxml/master/LICENSE,PSF
backports.functools-lru-cache,https://raw.githubusercontent.com/jaraco/backports.functools_lru_cache/master/LICENSE,MIT
cycler,https://raw.githubusercontent.com/matplotlib/cycler/master/LICENSE,MIT
h5py,https://raw.githubusercontent.com/h5py/h5py/master/licenses/license.txt
matplotlib,https://raw.githubusercontent.com/matplotlib/matplotlib/master/LICENSE/LICENSE
Pillow,https://raw.githubusercontent.com/python-pillow/Pillow/master/LICENSE
sklearn,https://raw.githubusercontent.com/scikit-learn/scikit-learn/master/COPYING,BSD
tensorflow-gpu,https://raw.githubusercontent.com/tensorflow/tensorflow/master/LICENSE,Apache 2.0
webencodings,https://raw.githubusercontent.com/gsnedders/python-webencodings/master/LICENSE,BSD
google-api-core,https://raw.githubusercontent.com/googleapis/google-cloud-python/master/LICENSE,Apache 2.0
google-resumable-media,https://raw.githubusercontent.com/googleapis/google-resumable-media-python/master/LICENSE,Apache 2.0

0 comments on commit f3de359

Please sign in to comment.