From 32041490e003a5e13d686fd70392173d2ce1fc20 Mon Sep 17 00:00:00 2001 From: deepanker13 Date: Mon, 8 Jan 2024 15:52:03 +0530 Subject: [PATCH] code review changes --- .github/workflows/publish-example-images.yaml | 4 ++- .github/workflows/publish-sdk-images.yaml | 26 ------------------- sdk/python/kubeflow/trainer/hf_dockerfile | 2 +- 3 files changed, 4 insertions(+), 28 deletions(-) delete mode 100644 .github/workflows/publish-sdk-images.yaml diff --git a/.github/workflows/publish-example-images.yaml b/.github/workflows/publish-example-images.yaml index 616c2f1072..a9e805b60b 100644 --- a/.github/workflows/publish-example-images.yaml +++ b/.github/workflows/publish-example-images.yaml @@ -52,7 +52,9 @@ jobs: - component-name: mxnet-auto-tuning dockerfile: examples/mxnet/tune/Dockerfile context: examples/mxnet/tune - + - component-name: train-api-hf-image + dockerfile: sdk/python/kubeflow/trainer/hf_dockerfile + context: sdk/python/kubeflow/trainer # TODO (tenzen-y): Fix the below broken Dockerfiles # - component-name: pytorch-dist-mnist-mpi # dockerfile: examples/pytorch/mnist/Dockerfile-mpi diff --git a/.github/workflows/publish-sdk-images.yaml b/.github/workflows/publish-sdk-images.yaml deleted file mode 100644 index 432e17e9e8..0000000000 --- a/.github/workflows/publish-sdk-images.yaml +++ /dev/null @@ -1,26 +0,0 @@ -name: Publish Training Operator SDK Images - -on: - - push - - pull_request - -jobs: - core: - name: Publish Image - uses: ./.github/workflows/build-and-publish-images.yaml - with: - component-name: ${{ matrix.component-name }} - platforms: linux/amd64,linux/arm64,linux/ppc64le - dockerfile: ${{ matrix.dockerfile }} - context: ${{ matrix.context }} - secrets: - DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }} - DOCKERHUB_TOKEN: ${{ secrets.DOCKERHUB_TOKEN }} - - strategy: - fail-fast: false - matrix: - include: - - component-name: train-api-hf-image - dockerfile: sdk/python/kubeflow/trainer/hf_dockerfile - context: sdk/python/kubeflow/trainer diff --git a/sdk/python/kubeflow/trainer/hf_dockerfile b/sdk/python/kubeflow/trainer/hf_dockerfile index c7671aefb9..f0ddd8f9c2 100644 --- a/sdk/python/kubeflow/trainer/hf_dockerfile +++ b/sdk/python/kubeflow/trainer/hf_dockerfile @@ -1,5 +1,5 @@ # Use an official Pytorch runtime as a parent image -FROM pytorch/pytorch:2.0.1-cuda11.7-cudnn8-runtime +FROM nvcr.io/nvidia/pytorch:23.12-py3 # Set the working directory in the container WORKDIR /app