upgrade torch to 2.4.0 (#280)

vectorch-ai · Jul 24, 2024 · b7ac313 · b7ac313
1 parent 8efd28c
commit b7ac313
Show file tree

Hide file tree

Showing 12 changed files with 96 additions and 63 deletions.
diff --git a/.github/workflows/build_wheel.yml b/.github/workflows/build_wheel.yml
@@ -23,9 +23,12 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python: ["3.8", "3.9", "3.10", "3.11"]
+        python: ["3.8", "3.9", "3.10", "3.11", "3.12"]
         cuda: ["11.8", "12.1"]
-        torch: ["2.1.2", "2.2.2", "2.3.1"]
+        torch: ["2.2.2", "2.3.1", "2.4.0"]
+        include:
+          - cuda: "12.4"
+            torch: "2.4.0"
     runs-on: [self-hosted, linux, release]
     env:
       PYTHON_VERSION: ${{ matrix.python }}

diff --git a/.github/workflows/package_test.yml b/.github/workflows/package_test.yml
@@ -38,9 +38,9 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python: ["3.10"]
-        cuda: ["12.1"]
-        torch: ["2.3.1"]
+        python: ["3.12"]
+        cuda: ["12.4"]
+        torch: ["2.4.0"]
     runs-on: [self-hosted, linux, build]
     env:
       PYTHON_VERSION: ${{ matrix.python }}

diff --git a/.github/workflows/publish_devel_image.yml b/.github/workflows/publish_devel_image.yml
@@ -42,6 +42,7 @@ jobs:
           tags: |
             vectorchai/scalellm_devel:cuda12.4-ubuntu22.04
             vectorchai/scalellm_devel:cuda12.4
+            vectorchai/scalellm_devel:latest
 
       - name: Build devel image for cuda 12.1
         uses: docker/build-push-action@v5
@@ -58,7 +59,6 @@ jobs:
           tags: |
             vectorchai/scalellm_devel:cuda12.1-ubuntu22.04
             vectorchai/scalellm_devel:cuda12.1
-            vectorchai/scalellm_devel:latest
 
       - name: Build devel image for cuda 11.8
         uses: docker/build-push-action@v5

diff --git a/.github/workflows/publish_manylinux_image.yml b/.github/workflows/publish_manylinux_image.yml
@@ -27,7 +27,7 @@ jobs:
       - name: Create cache directory
         run: mkdir -p $CI_CACHE_DIR/.buildx-cache
 
-      - name: Build base for cuda 12.4 (experimental)
+      - name: Build base for cuda 12.4
         uses: docker/build-push-action@v5
         with:
           context: ./docker

diff --git a/.github/workflows/publish_wheel.yml b/.github/workflows/publish_wheel.yml
@@ -22,9 +22,9 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python: ["3.8", "3.9", "3.10", "3.11"]
-        cuda: ["12.1"]
-        torch: ["2.3.1"]
+        python: ["3.8", "3.9", "3.10", "3.11", "3.12"]
+        cuda: ["12.4"]
+        torch: ["2.4.0"]
     runs-on: [self-hosted, linux, release]
     env:
       PYTHON_VERSION: ${{ matrix.python }}

diff --git a/.github/workflows/release_test.yml b/.github/workflows/release_test.yml
@@ -13,9 +13,9 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python: ["3.10"]
-        cuda: ["12.1"]
-        torch: ["2.3.1"]
+        python: ["3.12"]
+        cuda: ["12.4"]
+        torch: ["2.4.0"]
     runs-on: [self-hosted, linux, build]
     env:
       PYTHON_VERSION: ${{ matrix.python }}

diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -194,25 +194,25 @@ if (DEFINED ENV{LIBTORCH_ROOT})
 else()
   include(FetchContent)
   if (CUDAToolkit_VERSION VERSION_GREATER_EQUAL 12.4)
-    # download nightly libtorch with cuda 12.4 from pytorch.org (experimental)
+    # download libtorch 2.4.0 with cuda 12.4 from pytorch.org
     if (USE_CXX11_ABI)
-      set(LIBTORCH_URL "https://download.pytorch.org/libtorch/nightly/cu124/libtorch-cxx11-abi-shared-with-deps-latest.zip")
+      set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cu124/libtorch-cxx11-abi-shared-with-deps-2.4.0%2Bcu124.zip")
     else()
-      set(LIBTORCH_URL "https://download.pytorch.org/libtorch/nightly/cu124/libtorch-shared-with-deps-latest.zip")
+      set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cu124/libtorch-shared-with-deps-2.4.0%2Bcu124.zip")
     endif()
   elseif(CUDAToolkit_VERSION VERSION_GREATER_EQUAL 12.1)
-    # download libtorch 2.3.1 with cuda 12.1 from pytorch.org
+    # download libtorch 2.4.0 with cuda 12.1 from pytorch.org
     if (USE_CXX11_ABI)
-      set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cu121/libtorch-cxx11-abi-shared-with-deps-2.3.1%2Bcu121.zip")
+      set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cu121/libtorch-cxx11-abi-shared-with-deps-2.4.0%2Bcu121.zip")
     else()
-      set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cu121/libtorch-shared-with-deps-2.3.1%2Bcu121.zip")
+      set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cu121/libtorch-shared-with-deps-2.4.0%2Bcu121.zip")
     endif()
   elseif(CUDAToolkit_VERSION VERSION_GREATER_EQUAL 11.8)
-    # download libtorch 2.3.1 with cuda 11.8 from pytorch.org
+    # download libtorch 2.4.0 with cuda 11.8 from pytorch.org
     if (USE_CXX11_ABI)
-      set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cu118/libtorch-cxx11-abi-shared-with-deps-2.3.1%2Bcu118.zip")
+      set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cu118/libtorch-cxx11-abi-shared-with-deps-2.4.0%2Bcu118.zip")
     else()
-      set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cu118/libtorch-shared-with-deps-2.3.1%2Bcu118.zip")
+      set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cu118/libtorch-shared-with-deps-2.4.0%2Bcu118.zip")
     endif()
   else()
     # error out if cuda version is not supported
@@ -232,7 +232,7 @@ else()
   FetchContent_MakeAvailable(libtorch)
 
   find_package(Torch REQUIRED PATHS ${libtorch_SOURCE_DIR} NO_DEFAULT_PATH)
-  message(STATUS "Downloading and using libtorch 2.3.1 for cuda ${CUDA_VERSION} at ${libtorch_SOURCE_DIR}")
+  message(STATUS "Downloading and using libtorch 2.4.0 for cuda ${CUDA_VERSION} at ${libtorch_SOURCE_DIR}")
 endif()
 
 # check if USE_CXX11_ABI is set correctly

diff --git a/README.md b/README.md
@@ -1,10 +1,15 @@
-# ScaleLLM: An efficient LLM Inference solution
-[![License](https://img.shields.io/badge/License-Apache_2.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) [![GitHub Repo stars](https://img.shields.io/github/stars/vectorch-ai/ScaleLLM?style=social)](https://github.com/vectorch-ai/ScaleLLM/stargazers) [![build and test](https://github.com/vectorch-ai/ScaleLLM/actions/workflows/build.yml/badge.svg?branch=main)](https://github.com/vectorch-ai/ScaleLLM/actions/workflows/build.yml) 
+<h1 align="center">
+ScaleLLM: An efficient LLM Inference solution
+</h1>
 
+[![License](https://img.shields.io/badge/License-Apache_2.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) 
+[![build](https://github.com/vectorch-ai/ScaleLLM/actions/workflows/build.yml/badge.svg?branch=main)](https://github.com/vectorch-ai/ScaleLLM/actions/workflows/build.yml) 
+[![PyPI](https://badge.fury.io/py/scalellm.svg)](https://badge.fury.io/py/scalellm) 
+[![Twitter](https://img.shields.io/twitter/url?label=%20%40VectorchAI&style=social&url=https://x.com/VectorchAI)](https://x.com/VectorchAI)
+[![Discord](https://dcbadge.vercel.app/api/server/PKe5gvBZfn?compact=true&style=flat)](https://discord.gg/PKe5gvBZfn)
 
-[![Discord](https://dcbadge.vercel.app/api/server/PKe5gvBZfn)](https://discord.gg/PKe5gvBZfn)
 
-[ScaleLLM]() is a cutting-edge inference system engineered for large language models (LLMs), meticulously designed to meet the demands of production environments. It extends its support to a wide range of popular open-source models, including [Llama3](https://github.com/meta-llama/llama3), [Gemma](https://github.com/google-deepmind/gemma), Bloom, GPT-NeoX, and more. 
+[ScaleLLM](#) is a cutting-edge inference system engineered for large language models (LLMs), meticulously designed to meet the demands of production environments. It extends its support to a wide range of popular open-source models, including [Llama3](https://github.com/meta-llama/llama3), [Gemma](https://github.com/google-deepmind/gemma), Bloom, GPT-NeoX, and more. 
 
 ScaleLLM is currently undergoing active development. We are fully committed to consistently enhancing its efficiency while also incorporating additional features. Feel free to explore our [**_Roadmap_**](https://github.com/vectorch-ai/ScaleLLM/issues/84) for more details.
 
@@ -45,14 +50,14 @@ ScaleLLM is currently undergoing active development. We are fully committed to c
 
 ScaleLLM is available as a Python Wheel package on PyPI. You can install it using pip:
 ```bash
-# Install scalellm with CUDA 12.1 and Pytorch 2.3
+# Install scalellm with CUDA 12.4 and Pytorch 2.4.0
 pip install scalellm
 ```
 
-If you want to install ScaleLLM with different version of CUDA and Pytorch, you can pip install it with provding index URL of the version. For example, to install ScaleLLM with CUDA 11.8 and Pytorch 2.2.2, you can use the following command:
+If you want to install ScaleLLM with different version of CUDA and Pytorch, you can pip install it with provding index URL of the version. For example, to install ScaleLLM with CUDA 12.1 and Pytorch 2.2.2, you can use the following command:
 
 ```bash
-pip install scalellm -i https://whl.vectorch.com/cu118/torch2.2.2/
+pip install scalellm -i https://whl.vectorch.com/cu121/torch2.2.2/
 ```
 
 ### Build from source

diff --git a/docker/common/install_cuda.sh b/docker/common/install_cuda.sh
@@ -4,6 +4,9 @@
 
 set -ex
 
+NCCL_VERSION=v2.21.5-1
+CUDNN_VERSION=9.1.0.70
+
 function install_cusparselt_040 {
     # cuSparseLt license: https://docs.nvidia.com/cuda/cusparselt/license.html
     mkdir tmp_cusparselt && pushd tmp_cusparselt
@@ -27,7 +30,7 @@ function install_cusparselt_052 {
 }
 
 function install_118 {
-    echo "Installing CUDA 11.8 and cuDNN 8.7 and NCCL 2.15 and cuSparseLt-0.4.0"
+    echo "Installing CUDA 11.8 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.4.0"
     rm -rf /usr/local/cuda-11.8 /usr/local/cuda
     # install CUDA 11.8.0 in the same container
     wget -q https://developer.download.nvidia.com/compute/cuda/11.8.0/local_installers/cuda_11.8.0_520.61.05_linux.run
@@ -38,16 +41,16 @@ function install_118 {
 
     # cuDNN license: https://developer.nvidia.com/cudnn/license_agreement
     mkdir tmp_cudnn && cd tmp_cudnn
-    wget -q https://developer.download.nvidia.com/compute/redist/cudnn/v8.7.0/local_installers/11.8/cudnn-linux-x86_64-8.7.0.84_cuda11-archive.tar.xz -O cudnn-linux-x86_64-8.7.0.84_cuda11-archive.tar.xz
-    tar xf cudnn-linux-x86_64-8.7.0.84_cuda11-archive.tar.xz
-    cp -a cudnn-linux-x86_64-8.7.0.84_cuda11-archive/include/* /usr/local/cuda/include/
-    cp -a cudnn-linux-x86_64-8.7.0.84_cuda11-archive/lib/* /usr/local/cuda/lib64/
+    wget -q https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-x86_64/cudnn-linux-x86_64-${CUDNN_VERSION}_cuda11-archive.tar.xz -O cudnn-linux-x86_64-${CUDNN_VERSION}_cuda11-archive.tar.xz
+    tar xf cudnn-linux-x86_64-${CUDNN_VERSION}_cuda11-archive.tar.xz
+    cp -a cudnn-linux-x86_64-${CUDNN_VERSION}_cuda11-archive/include/* /usr/local/cuda/include/
+    cp -a cudnn-linux-x86_64-${CUDNN_VERSION}_cuda11-archive/lib/* /usr/local/cuda/lib64/
     cd ..
     rm -rf tmp_cudnn
 
     # NCCL license: https://docs.nvidia.com/deeplearning/nccl/#licenses
     # Follow build: https://github.com/NVIDIA/nccl/tree/master?tab=readme-ov-file#build
-    git clone -b v2.20.5-1 --depth 1 https://github.com/NVIDIA/nccl.git
+    git clone -b $NCCL_VERSION --depth 1 https://github.com/NVIDIA/nccl.git
     cd nccl && make -j src.build
     cp -a build/include/* /usr/local/cuda/include/
     cp -a build/lib/* /usr/local/cuda/lib64/
@@ -60,7 +63,7 @@ function install_118 {
 }
 
 function install_121 {
-    echo "Installing CUDA 12.1 and cuDNN 8.9 and NCCL 2.20.5 and cuSparseLt-0.5.2"
+    echo "Installing CUDA 12.1 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.5.2"
     rm -rf /usr/local/cuda-12.1 /usr/local/cuda
     # install CUDA 12.1.0 in the same container
     wget -q https://developer.download.nvidia.com/compute/cuda/12.1.1/local_installers/cuda_12.1.1_530.30.02_linux.run
@@ -71,16 +74,16 @@ function install_121 {
 
     # cuDNN license: https://developer.nvidia.com/cudnn/license_agreement
     mkdir tmp_cudnn && cd tmp_cudnn
-    wget -q https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-x86_64/cudnn-linux-x86_64-8.9.2.26_cuda12-archive.tar.xz -O cudnn-linux-x86_64-8.9.2.26_cuda12-archive.tar.xz
-    tar xf cudnn-linux-x86_64-8.9.2.26_cuda12-archive.tar.xz
-    cp -a cudnn-linux-x86_64-8.9.2.26_cuda12-archive/include/* /usr/local/cuda/include/
-    cp -a cudnn-linux-x86_64-8.9.2.26_cuda12-archive/lib/* /usr/local/cuda/lib64/
+    wget -q https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-x86_64/cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive.tar.xz -O cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive.tar.xz
+    tar xf cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive.tar.xz
+    cp -a cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive/include/* /usr/local/cuda/include/
+    cp -a cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive/lib/* /usr/local/cuda/lib64/
     cd ..
     rm -rf tmp_cudnn
 
     # NCCL license: https://docs.nvidia.com/deeplearning/nccl/#licenses
     # Follow build: https://github.com/NVIDIA/nccl/tree/master?tab=readme-ov-file#build
-    git clone -b v2.20.5-1 --depth 1 https://github.com/NVIDIA/nccl.git
+    git clone -b $NCCL_VERSION --depth 1 https://github.com/NVIDIA/nccl.git
     cd nccl && make -j src.build
     cp -a build/include/* /usr/local/cuda/include/
     cp -a build/lib/* /usr/local/cuda/lib64/
@@ -93,7 +96,7 @@ function install_121 {
 }
 
 function install_124 {
-  echo "Installing CUDA 12.4 and cuDNN 8.9 and NCCL 2.20.5 and cuSparseLt-0.5.2"
+  echo "Installing CUDA 12.4 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.5.2"
   rm -rf /usr/local/cuda-12.4 /usr/local/cuda
   # install CUDA 12.4.0 in the same container
   wget -q https://developer.download.nvidia.com/compute/cuda/12.4.0/local_installers/cuda_12.4.0_550.54.14_linux.run
@@ -104,16 +107,16 @@ function install_124 {
 
   # cuDNN license: https://developer.nvidia.com/cudnn/license_agreement
   mkdir tmp_cudnn && cd tmp_cudnn
-  wget -q https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-x86_64/cudnn-linux-x86_64-8.9.2.26_cuda12-archive.tar.xz -O cudnn-linux-x86_64-8.9.2.26_cuda12-archive.tar.xz
-  tar xf cudnn-linux-x86_64-8.9.2.26_cuda12-archive.tar.xz
-  cp -a cudnn-linux-x86_64-8.9.2.26_cuda12-archive/include/* /usr/local/cuda/include/
-  cp -a cudnn-linux-x86_64-8.9.2.26_cuda12-archive/lib/* /usr/local/cuda/lib64/
+  wget -q https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-x86_64/cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive.tar.xz -O cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive.tar.xz
+  tar xf cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive.tar.xz
+  cp -a cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive/include/* /usr/local/cuda/include/
+  cp -a cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive/lib/* /usr/local/cuda/lib64/
   cd ..
   rm -rf tmp_cudnn
 
   # NCCL license: https://docs.nvidia.com/deeplearning/nccl/#licenses
   # Follow build: https://github.com/NVIDIA/nccl/tree/master?tab=readme-ov-file#build
-  git clone -b v2.20.5-1 --depth 1 https://github.com/NVIDIA/nccl.git
+  git clone -b $NCCL_VERSION --depth 1 https://github.com/NVIDIA/nccl.git
   cd nccl && make -j src.build
   cp -a build/include/* /usr/local/cuda/include/
   cp -a build/lib/* /usr/local/cuda/lib64/
@@ -201,6 +204,9 @@ function prune_124 {
   if [[ -n "$OVERRIDE_GENCODE" ]]; then
       export GENCODE=$OVERRIDE_GENCODE
   fi
+  if [[ -n "$OVERRIDE_GENCODE_CUDNN" ]]; then
+      export GENCODE_CUDNN=$OVERRIDE_GENCODE_CUDNN
+  fi
 
   # all CUDA libs except CuDNN and CuBLAS
   ls $CUDA_LIB_DIR/ | grep "\.a" | grep -v "culibos" | grep -v "cudart" | grep -v "cudnn" | grep -v "cublas" | grep -v "metis"  \
@@ -232,4 +238,4 @@ do
         ;;
     esac
     shift
-done
+done
diff --git a/docs/source/index.rst b/docs/source/index.rst
@@ -12,7 +12,7 @@ ScaleLLM is available as a Python Wheel package on `PyPI <https://pypi.org/proje
 
 .. code-block:: bash
 
-   # Install scalellm with CUDA 12.1 and PyTorch 2.3
+   # Install scalellm with CUDA 12.4 and Pytorch 2.4.0
    $ pip install scalellm
 
 

diff --git a/docs/source/quick_start.rst b/docs/source/quick_start.rst
@@ -12,7 +12,7 @@ ScaleLLM is available as a Python Wheel package on `PyPI <https://pypi.org/proje
 
 .. code-block:: bash
 
-    # Install ScaleLLM with CUDA 12.1 and PyTorch 2.3
+    # Install ScaleLLM with CUDA 12.4 and Pytorch 2.4.0
     $ pip install scalellm
 
 Install other versions
@@ -21,10 +21,26 @@ If you want to install ScaleLLM with different versions of CUDA and PyTorch, you
 
 .. tabs::
 
+    .. tab:: CUDA 12.4
+
+        .. tabs::
+
+            .. tab:: PyTorch 2.4.0
+
+                .. code-block:: bash
+
+                    $ pip install scalellm -i https://whl.vectorch.com/cu124/torch2.4.0/
+
     .. tab:: CUDA 12.1
 
         .. tabs::
 
+            .. tab:: PyTorch 2.4.0
+
+                .. code-block:: bash
+
+                    $ pip install scalellm -i https://whl.vectorch.com/cu121/torch2.4.0/
+
             .. tab:: PyTorch 2.3.1
 
                 .. code-block:: bash
@@ -37,15 +53,15 @@ If you want to install ScaleLLM with different versions of CUDA and PyTorch, you
 
                     $ pip install scalellm -i https://whl.vectorch.com/cu121/torch2.2.2/
 
-            .. tab:: PyTorch 2.1.2
+    .. tab:: CUDA 11.8
 
-                .. code-block:: bash
+        .. tabs::
 
-                    $ pip install scalellm -i https://whl.vectorch.com/cu121/torch2.1.2/
+            .. tab:: PyTorch 2.4.0
 
-    .. tab:: CUDA 11.8
+                .. code-block:: bash
 
-        .. tabs::
+                    $ pip install scalellm -i https://whl.vectorch.com/cu118/torch2.4.0/
 
             .. tab:: PyTorch 2.3.1
 
@@ -59,12 +75,6 @@ If you want to install ScaleLLM with different versions of CUDA and PyTorch, you
 
                     $ pip install scalellm -i https://whl.vectorch.com/cu118/torch2.2.2/
 
-            .. tab:: PyTorch 2.1.2
-
-                .. code-block:: bash
-
-                    $ pip install scalellm -i https://whl.vectorch.com/cu118/torch2.1.2/
-
 
 Build from source
 ~~~~~~~~~~~~~~~~~

diff --git a/setup.py b/setup.py
@@ -246,13 +246,22 @@ def build_extension(self, ext: CMakeExtension):
         },
         classifiers=[
             "Development Status :: 3 - Alpha",
+            "Intended Audience :: Developers",
+            "Intended Audience :: Education",
+            "Programming Language :: C++",
+            "Programming Language :: CUDA",
+            "Programming Language :: Python :: 3 :: Only",
             "Programming Language :: Python :: 3.8",
             "Programming Language :: Python :: 3.9",
             "Programming Language :: Python :: 3.10",
             "Programming Language :: Python :: 3.11",
-            "Intended Audience :: Developers",
+            "Programming Language :: Python :: 3.12",
+            "Environment :: GPU :: NVIDIA CUDA",
             "Operating System :: POSIX",
             "License :: OSI Approved :: Apache Software License",
+            "Topic :: Scientific/Engineering",
+            "Topic :: Scientific/Engineering :: Artificial Intelligence",
+
         ],
         packages=["scalellm", "scalellm/serve", "scalellm/_C", "examples"],
         ext_modules=[CMakeExtension("_C", "scalellm/")],