From 2550fbab224acef5f36fc4ce585fd505f518a94a Mon Sep 17 00:00:00 2001 From: Michael Mi Date: Mon, 14 Oct 2024 21:56:43 -0700 Subject: [PATCH] ci: build cuda 12.4 for scalellm cpp images (#346) --- .github/workflows/publish_cpp_image.yml | 44 +++++++++++++++++++ .github/workflows/publish_devel_image.yml | 2 +- .github/workflows/publish_manylinux_image.yml | 8 ++-- 3 files changed, 48 insertions(+), 6 deletions(-) diff --git a/.github/workflows/publish_cpp_image.yml b/.github/workflows/publish_cpp_image.yml index e50a3f68..cde07668 100644 --- a/.github/workflows/publish_cpp_image.yml +++ b/.github/workflows/publish_cpp_image.yml @@ -136,3 +136,47 @@ jobs: vectorchai/scalellm_cu118:${{ inputs.tag }} vectorchai/scalellm_cu118:latest + publish_scalellm_cuda124: + runs-on: [self-hosted, linux, release] + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + submodules: recursive + + - name: Build ScaleLLM for cuda 12.4 + timeout-minutes: 60 + run: | + docker pull vectorchai/scalellm_devel:cuda12.4 + docker run --rm -t \ + -v "$CI_CACHE_DIR":/ci_cache \ + -v "$GITHUB_WORKSPACE":/ScaleLLM \ + -e VCPKG_DEFAULT_BINARY_CACHE=/ci_cache/.vcpkg/bincache \ + -e CCACHE_DIR=/ci_cache/.ccache \ + -u $(id -u):$(id -g) \ + vectorchai/scalellm_devel:cuda12.4 \ + bash /ScaleLLM/scripts/build_scalellm.sh + + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Login to Docker Hub + uses: docker/login-action@v3 + with: + username: ${{ secrets.DOCKER_HUB_USER }} + password: ${{ secrets.DOCKER_HUB_TOKEN }} + + - name: Build and push scalellm for cuda 12.4 + uses: docker/build-push-action@v5 + with: + context: . + file: ./docker/Dockerfile.scalellm + push: true + no-cache: true + tags: | + vectorchai/scalellm_cu124:${{ inputs.tag }} + vectorchai/scalellm_cu124:latest + diff --git a/.github/workflows/publish_devel_image.yml b/.github/workflows/publish_devel_image.yml index 0adf015d..0a3fad3d 100644 --- a/.github/workflows/publish_devel_image.yml +++ b/.github/workflows/publish_devel_image.yml @@ -7,7 +7,7 @@ env: jobs: publish_base: - runs-on: [self-hosted, linux, release] + runs-on: [self-hosted, linux, build] steps: - name: Checkout repository uses: actions/checkout@v4 diff --git a/.github/workflows/publish_manylinux_image.yml b/.github/workflows/publish_manylinux_image.yml index c1327414..afa6545c 100644 --- a/.github/workflows/publish_manylinux_image.yml +++ b/.github/workflows/publish_manylinux_image.yml @@ -11,9 +11,7 @@ jobs: fail-fast: false matrix: cuda: ["11.8", "12.1", "12.4"] - runs-on: [self-hosted, linux, release] - env: - CUDA_VERSION: ${{ matrix.cuda }} + runs-on: [self-hosted, linux, build] steps: - name: Checkout repository uses: actions/checkout@v4 @@ -42,7 +40,7 @@ jobs: cache-from: type=local,src=$CI_CACHE_DIR/.buildx-cache cache-to: type=local,dest=$CI_CACHE_DIR/.buildx-cache build-args: | - CUDA_VERSION=${CUDA_VERSION} + CUDA_VERSION=${{ matrix.cuda }} tags: | - vectorchai/scalellm_manylinux:cuda${CUDA_VERSION} + vectorchai/scalellm_manylinux:cuda${{ matrix.cuda }}