From b7ac31383af22ff7d238d5fad6274a2167f25d0b Mon Sep 17 00:00:00 2001 From: Michael Mi Date: Wed, 24 Jul 2024 11:29:34 -0700 Subject: [PATCH] upgrade torch to 2.4.0 (#280) --- .github/workflows/build_wheel.yml | 7 ++- .github/workflows/package_test.yml | 6 +-- .github/workflows/publish_devel_image.yml | 2 +- .github/workflows/publish_manylinux_image.yml | 2 +- .github/workflows/publish_wheel.yml | 6 +-- .github/workflows/release_test.yml | 6 +-- CMakeLists.txt | 20 ++++----- README.md | 19 +++++--- docker/common/install_cuda.sh | 44 +++++++++++-------- docs/source/index.rst | 2 +- docs/source/quick_start.rst | 34 +++++++++----- setup.py | 11 ++++- 12 files changed, 96 insertions(+), 63 deletions(-) diff --git a/.github/workflows/build_wheel.yml b/.github/workflows/build_wheel.yml index 6f1673dc..0b2af2c6 100644 --- a/.github/workflows/build_wheel.yml +++ b/.github/workflows/build_wheel.yml @@ -23,9 +23,12 @@ jobs: strategy: fail-fast: false matrix: - python: ["3.8", "3.9", "3.10", "3.11"] + python: ["3.8", "3.9", "3.10", "3.11", "3.12"] cuda: ["11.8", "12.1"] - torch: ["2.1.2", "2.2.2", "2.3.1"] + torch: ["2.2.2", "2.3.1", "2.4.0"] + include: + - cuda: "12.4" + torch: "2.4.0" runs-on: [self-hosted, linux, release] env: PYTHON_VERSION: ${{ matrix.python }} diff --git a/.github/workflows/package_test.yml b/.github/workflows/package_test.yml index ead55f76..e15bd472 100644 --- a/.github/workflows/package_test.yml +++ b/.github/workflows/package_test.yml @@ -38,9 +38,9 @@ jobs: strategy: fail-fast: false matrix: - python: ["3.10"] - cuda: ["12.1"] - torch: ["2.3.1"] + python: ["3.12"] + cuda: ["12.4"] + torch: ["2.4.0"] runs-on: [self-hosted, linux, build] env: PYTHON_VERSION: ${{ matrix.python }} diff --git a/.github/workflows/publish_devel_image.yml b/.github/workflows/publish_devel_image.yml index ea17fb66..7dd81321 100644 --- a/.github/workflows/publish_devel_image.yml +++ b/.github/workflows/publish_devel_image.yml @@ -42,6 +42,7 @@ jobs: tags: | vectorchai/scalellm_devel:cuda12.4-ubuntu22.04 vectorchai/scalellm_devel:cuda12.4 + vectorchai/scalellm_devel:latest - name: Build devel image for cuda 12.1 uses: docker/build-push-action@v5 @@ -58,7 +59,6 @@ jobs: tags: | vectorchai/scalellm_devel:cuda12.1-ubuntu22.04 vectorchai/scalellm_devel:cuda12.1 - vectorchai/scalellm_devel:latest - name: Build devel image for cuda 11.8 uses: docker/build-push-action@v5 diff --git a/.github/workflows/publish_manylinux_image.yml b/.github/workflows/publish_manylinux_image.yml index a05f2c2a..2ed14873 100644 --- a/.github/workflows/publish_manylinux_image.yml +++ b/.github/workflows/publish_manylinux_image.yml @@ -27,7 +27,7 @@ jobs: - name: Create cache directory run: mkdir -p $CI_CACHE_DIR/.buildx-cache - - name: Build base for cuda 12.4 (experimental) + - name: Build base for cuda 12.4 uses: docker/build-push-action@v5 with: context: ./docker diff --git a/.github/workflows/publish_wheel.yml b/.github/workflows/publish_wheel.yml index 30e2f4f5..a28f05c7 100644 --- a/.github/workflows/publish_wheel.yml +++ b/.github/workflows/publish_wheel.yml @@ -22,9 +22,9 @@ jobs: strategy: fail-fast: false matrix: - python: ["3.8", "3.9", "3.10", "3.11"] - cuda: ["12.1"] - torch: ["2.3.1"] + python: ["3.8", "3.9", "3.10", "3.11", "3.12"] + cuda: ["12.4"] + torch: ["2.4.0"] runs-on: [self-hosted, linux, release] env: PYTHON_VERSION: ${{ matrix.python }} diff --git a/.github/workflows/release_test.yml b/.github/workflows/release_test.yml index adb2b0f4..749a7749 100644 --- a/.github/workflows/release_test.yml +++ b/.github/workflows/release_test.yml @@ -13,9 +13,9 @@ jobs: strategy: fail-fast: false matrix: - python: ["3.10"] - cuda: ["12.1"] - torch: ["2.3.1"] + python: ["3.12"] + cuda: ["12.4"] + torch: ["2.4.0"] runs-on: [self-hosted, linux, build] env: PYTHON_VERSION: ${{ matrix.python }} diff --git a/CMakeLists.txt b/CMakeLists.txt index 20880087..0250f8a5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -194,25 +194,25 @@ if (DEFINED ENV{LIBTORCH_ROOT}) else() include(FetchContent) if (CUDAToolkit_VERSION VERSION_GREATER_EQUAL 12.4) - # download nightly libtorch with cuda 12.4 from pytorch.org (experimental) + # download libtorch 2.4.0 with cuda 12.4 from pytorch.org if (USE_CXX11_ABI) - set(LIBTORCH_URL "https://download.pytorch.org/libtorch/nightly/cu124/libtorch-cxx11-abi-shared-with-deps-latest.zip") + set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cu124/libtorch-cxx11-abi-shared-with-deps-2.4.0%2Bcu124.zip") else() - set(LIBTORCH_URL "https://download.pytorch.org/libtorch/nightly/cu124/libtorch-shared-with-deps-latest.zip") + set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cu124/libtorch-shared-with-deps-2.4.0%2Bcu124.zip") endif() elseif(CUDAToolkit_VERSION VERSION_GREATER_EQUAL 12.1) - # download libtorch 2.3.1 with cuda 12.1 from pytorch.org + # download libtorch 2.4.0 with cuda 12.1 from pytorch.org if (USE_CXX11_ABI) - set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cu121/libtorch-cxx11-abi-shared-with-deps-2.3.1%2Bcu121.zip") + set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cu121/libtorch-cxx11-abi-shared-with-deps-2.4.0%2Bcu121.zip") else() - set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cu121/libtorch-shared-with-deps-2.3.1%2Bcu121.zip") + set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cu121/libtorch-shared-with-deps-2.4.0%2Bcu121.zip") endif() elseif(CUDAToolkit_VERSION VERSION_GREATER_EQUAL 11.8) - # download libtorch 2.3.1 with cuda 11.8 from pytorch.org + # download libtorch 2.4.0 with cuda 11.8 from pytorch.org if (USE_CXX11_ABI) - set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cu118/libtorch-cxx11-abi-shared-with-deps-2.3.1%2Bcu118.zip") + set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cu118/libtorch-cxx11-abi-shared-with-deps-2.4.0%2Bcu118.zip") else() - set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cu118/libtorch-shared-with-deps-2.3.1%2Bcu118.zip") + set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cu118/libtorch-shared-with-deps-2.4.0%2Bcu118.zip") endif() else() # error out if cuda version is not supported @@ -232,7 +232,7 @@ else() FetchContent_MakeAvailable(libtorch) find_package(Torch REQUIRED PATHS ${libtorch_SOURCE_DIR} NO_DEFAULT_PATH) - message(STATUS "Downloading and using libtorch 2.3.1 for cuda ${CUDA_VERSION} at ${libtorch_SOURCE_DIR}") + message(STATUS "Downloading and using libtorch 2.4.0 for cuda ${CUDA_VERSION} at ${libtorch_SOURCE_DIR}") endif() # check if USE_CXX11_ABI is set correctly diff --git a/README.md b/README.md index 2105e28e..7590024a 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,15 @@ -# ScaleLLM: An efficient LLM Inference solution -[![License](https://img.shields.io/badge/License-Apache_2.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) [![GitHub Repo stars](https://img.shields.io/github/stars/vectorch-ai/ScaleLLM?style=social)](https://github.com/vectorch-ai/ScaleLLM/stargazers) [![build and test](https://github.com/vectorch-ai/ScaleLLM/actions/workflows/build.yml/badge.svg?branch=main)](https://github.com/vectorch-ai/ScaleLLM/actions/workflows/build.yml) +

+ScaleLLM: An efficient LLM Inference solution +

+[![License](https://img.shields.io/badge/License-Apache_2.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) +[![build](https://github.com/vectorch-ai/ScaleLLM/actions/workflows/build.yml/badge.svg?branch=main)](https://github.com/vectorch-ai/ScaleLLM/actions/workflows/build.yml) +[![PyPI](https://badge.fury.io/py/scalellm.svg)](https://badge.fury.io/py/scalellm) +[![Twitter](https://img.shields.io/twitter/url?label=%20%40VectorchAI&style=social&url=https://x.com/VectorchAI)](https://x.com/VectorchAI) +[![Discord](https://dcbadge.vercel.app/api/server/PKe5gvBZfn?compact=true&style=flat)](https://discord.gg/PKe5gvBZfn) -[![Discord](https://dcbadge.vercel.app/api/server/PKe5gvBZfn)](https://discord.gg/PKe5gvBZfn) -[ScaleLLM]() is a cutting-edge inference system engineered for large language models (LLMs), meticulously designed to meet the demands of production environments. It extends its support to a wide range of popular open-source models, including [Llama3](https://github.com/meta-llama/llama3), [Gemma](https://github.com/google-deepmind/gemma), Bloom, GPT-NeoX, and more. +[ScaleLLM](#) is a cutting-edge inference system engineered for large language models (LLMs), meticulously designed to meet the demands of production environments. It extends its support to a wide range of popular open-source models, including [Llama3](https://github.com/meta-llama/llama3), [Gemma](https://github.com/google-deepmind/gemma), Bloom, GPT-NeoX, and more. ScaleLLM is currently undergoing active development. We are fully committed to consistently enhancing its efficiency while also incorporating additional features. Feel free to explore our [**_Roadmap_**](https://github.com/vectorch-ai/ScaleLLM/issues/84) for more details. @@ -45,14 +50,14 @@ ScaleLLM is currently undergoing active development. We are fully committed to c ScaleLLM is available as a Python Wheel package on PyPI. You can install it using pip: ```bash -# Install scalellm with CUDA 12.1 and Pytorch 2.3 +# Install scalellm with CUDA 12.4 and Pytorch 2.4.0 pip install scalellm ``` -If you want to install ScaleLLM with different version of CUDA and Pytorch, you can pip install it with provding index URL of the version. For example, to install ScaleLLM with CUDA 11.8 and Pytorch 2.2.2, you can use the following command: +If you want to install ScaleLLM with different version of CUDA and Pytorch, you can pip install it with provding index URL of the version. For example, to install ScaleLLM with CUDA 12.1 and Pytorch 2.2.2, you can use the following command: ```bash -pip install scalellm -i https://whl.vectorch.com/cu118/torch2.2.2/ +pip install scalellm -i https://whl.vectorch.com/cu121/torch2.2.2/ ``` ### Build from source diff --git a/docker/common/install_cuda.sh b/docker/common/install_cuda.sh index fd32031b..1e90973c 100755 --- a/docker/common/install_cuda.sh +++ b/docker/common/install_cuda.sh @@ -4,6 +4,9 @@ set -ex +NCCL_VERSION=v2.21.5-1 +CUDNN_VERSION=9.1.0.70 + function install_cusparselt_040 { # cuSparseLt license: https://docs.nvidia.com/cuda/cusparselt/license.html mkdir tmp_cusparselt && pushd tmp_cusparselt @@ -27,7 +30,7 @@ function install_cusparselt_052 { } function install_118 { - echo "Installing CUDA 11.8 and cuDNN 8.7 and NCCL 2.15 and cuSparseLt-0.4.0" + echo "Installing CUDA 11.8 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.4.0" rm -rf /usr/local/cuda-11.8 /usr/local/cuda # install CUDA 11.8.0 in the same container wget -q https://developer.download.nvidia.com/compute/cuda/11.8.0/local_installers/cuda_11.8.0_520.61.05_linux.run @@ -38,16 +41,16 @@ function install_118 { # cuDNN license: https://developer.nvidia.com/cudnn/license_agreement mkdir tmp_cudnn && cd tmp_cudnn - wget -q https://developer.download.nvidia.com/compute/redist/cudnn/v8.7.0/local_installers/11.8/cudnn-linux-x86_64-8.7.0.84_cuda11-archive.tar.xz -O cudnn-linux-x86_64-8.7.0.84_cuda11-archive.tar.xz - tar xf cudnn-linux-x86_64-8.7.0.84_cuda11-archive.tar.xz - cp -a cudnn-linux-x86_64-8.7.0.84_cuda11-archive/include/* /usr/local/cuda/include/ - cp -a cudnn-linux-x86_64-8.7.0.84_cuda11-archive/lib/* /usr/local/cuda/lib64/ + wget -q https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-x86_64/cudnn-linux-x86_64-${CUDNN_VERSION}_cuda11-archive.tar.xz -O cudnn-linux-x86_64-${CUDNN_VERSION}_cuda11-archive.tar.xz + tar xf cudnn-linux-x86_64-${CUDNN_VERSION}_cuda11-archive.tar.xz + cp -a cudnn-linux-x86_64-${CUDNN_VERSION}_cuda11-archive/include/* /usr/local/cuda/include/ + cp -a cudnn-linux-x86_64-${CUDNN_VERSION}_cuda11-archive/lib/* /usr/local/cuda/lib64/ cd .. rm -rf tmp_cudnn # NCCL license: https://docs.nvidia.com/deeplearning/nccl/#licenses # Follow build: https://github.com/NVIDIA/nccl/tree/master?tab=readme-ov-file#build - git clone -b v2.20.5-1 --depth 1 https://github.com/NVIDIA/nccl.git + git clone -b $NCCL_VERSION --depth 1 https://github.com/NVIDIA/nccl.git cd nccl && make -j src.build cp -a build/include/* /usr/local/cuda/include/ cp -a build/lib/* /usr/local/cuda/lib64/ @@ -60,7 +63,7 @@ function install_118 { } function install_121 { - echo "Installing CUDA 12.1 and cuDNN 8.9 and NCCL 2.20.5 and cuSparseLt-0.5.2" + echo "Installing CUDA 12.1 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.5.2" rm -rf /usr/local/cuda-12.1 /usr/local/cuda # install CUDA 12.1.0 in the same container wget -q https://developer.download.nvidia.com/compute/cuda/12.1.1/local_installers/cuda_12.1.1_530.30.02_linux.run @@ -71,16 +74,16 @@ function install_121 { # cuDNN license: https://developer.nvidia.com/cudnn/license_agreement mkdir tmp_cudnn && cd tmp_cudnn - wget -q https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-x86_64/cudnn-linux-x86_64-8.9.2.26_cuda12-archive.tar.xz -O cudnn-linux-x86_64-8.9.2.26_cuda12-archive.tar.xz - tar xf cudnn-linux-x86_64-8.9.2.26_cuda12-archive.tar.xz - cp -a cudnn-linux-x86_64-8.9.2.26_cuda12-archive/include/* /usr/local/cuda/include/ - cp -a cudnn-linux-x86_64-8.9.2.26_cuda12-archive/lib/* /usr/local/cuda/lib64/ + wget -q https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-x86_64/cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive.tar.xz -O cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive.tar.xz + tar xf cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive.tar.xz + cp -a cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive/include/* /usr/local/cuda/include/ + cp -a cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive/lib/* /usr/local/cuda/lib64/ cd .. rm -rf tmp_cudnn # NCCL license: https://docs.nvidia.com/deeplearning/nccl/#licenses # Follow build: https://github.com/NVIDIA/nccl/tree/master?tab=readme-ov-file#build - git clone -b v2.20.5-1 --depth 1 https://github.com/NVIDIA/nccl.git + git clone -b $NCCL_VERSION --depth 1 https://github.com/NVIDIA/nccl.git cd nccl && make -j src.build cp -a build/include/* /usr/local/cuda/include/ cp -a build/lib/* /usr/local/cuda/lib64/ @@ -93,7 +96,7 @@ function install_121 { } function install_124 { - echo "Installing CUDA 12.4 and cuDNN 8.9 and NCCL 2.20.5 and cuSparseLt-0.5.2" + echo "Installing CUDA 12.4 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.5.2" rm -rf /usr/local/cuda-12.4 /usr/local/cuda # install CUDA 12.4.0 in the same container wget -q https://developer.download.nvidia.com/compute/cuda/12.4.0/local_installers/cuda_12.4.0_550.54.14_linux.run @@ -104,16 +107,16 @@ function install_124 { # cuDNN license: https://developer.nvidia.com/cudnn/license_agreement mkdir tmp_cudnn && cd tmp_cudnn - wget -q https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-x86_64/cudnn-linux-x86_64-8.9.2.26_cuda12-archive.tar.xz -O cudnn-linux-x86_64-8.9.2.26_cuda12-archive.tar.xz - tar xf cudnn-linux-x86_64-8.9.2.26_cuda12-archive.tar.xz - cp -a cudnn-linux-x86_64-8.9.2.26_cuda12-archive/include/* /usr/local/cuda/include/ - cp -a cudnn-linux-x86_64-8.9.2.26_cuda12-archive/lib/* /usr/local/cuda/lib64/ + wget -q https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-x86_64/cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive.tar.xz -O cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive.tar.xz + tar xf cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive.tar.xz + cp -a cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive/include/* /usr/local/cuda/include/ + cp -a cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive/lib/* /usr/local/cuda/lib64/ cd .. rm -rf tmp_cudnn # NCCL license: https://docs.nvidia.com/deeplearning/nccl/#licenses # Follow build: https://github.com/NVIDIA/nccl/tree/master?tab=readme-ov-file#build - git clone -b v2.20.5-1 --depth 1 https://github.com/NVIDIA/nccl.git + git clone -b $NCCL_VERSION --depth 1 https://github.com/NVIDIA/nccl.git cd nccl && make -j src.build cp -a build/include/* /usr/local/cuda/include/ cp -a build/lib/* /usr/local/cuda/lib64/ @@ -201,6 +204,9 @@ function prune_124 { if [[ -n "$OVERRIDE_GENCODE" ]]; then export GENCODE=$OVERRIDE_GENCODE fi + if [[ -n "$OVERRIDE_GENCODE_CUDNN" ]]; then + export GENCODE_CUDNN=$OVERRIDE_GENCODE_CUDNN + fi # all CUDA libs except CuDNN and CuBLAS ls $CUDA_LIB_DIR/ | grep "\.a" | grep -v "culibos" | grep -v "cudart" | grep -v "cudnn" | grep -v "cublas" | grep -v "metis" \ @@ -232,4 +238,4 @@ do ;; esac shift -done +done \ No newline at end of file diff --git a/docs/source/index.rst b/docs/source/index.rst index 7592cdb7..cc95c4e7 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -12,7 +12,7 @@ ScaleLLM is available as a Python Wheel package on `PyPI