From b7ac31383af22ff7d238d5fad6274a2167f25d0b Mon Sep 17 00:00:00 2001
From: Michael Mi <guocuimi@gmail.com>
Date: Wed, 24 Jul 2024 11:29:34 -0700
Subject: [PATCH] upgrade torch to 2.4.0 (#280)

---
 .github/workflows/build_wheel.yml             |  7 ++-
 .github/workflows/package_test.yml            |  6 +--
 .github/workflows/publish_devel_image.yml     |  2 +-
 .github/workflows/publish_manylinux_image.yml |  2 +-
 .github/workflows/publish_wheel.yml           |  6 +--
 .github/workflows/release_test.yml            |  6 +--
 CMakeLists.txt                                | 20 ++++-----
 README.md                                     | 19 +++++---
 docker/common/install_cuda.sh                 | 44 +++++++++++--------
 docs/source/index.rst                         |  2 +-
 docs/source/quick_start.rst                   | 34 +++++++++-----
 setup.py                                      | 11 ++++-
 12 files changed, 96 insertions(+), 63 deletions(-)

diff --git a/.github/workflows/build_wheel.yml b/.github/workflows/build_wheel.yml
index 6f1673dc..0b2af2c6 100644
--- a/.github/workflows/build_wheel.yml
+++ b/.github/workflows/build_wheel.yml
@@ -23,9 +23,12 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python: ["3.8", "3.9", "3.10", "3.11"]
+        python: ["3.8", "3.9", "3.10", "3.11", "3.12"]
         cuda: ["11.8", "12.1"]
-        torch: ["2.1.2", "2.2.2", "2.3.1"]
+        torch: ["2.2.2", "2.3.1", "2.4.0"]
+        include:
+          - cuda: "12.4"
+            torch: "2.4.0"
     runs-on: [self-hosted, linux, release]
     env:
       PYTHON_VERSION: ${{ matrix.python }}
diff --git a/.github/workflows/package_test.yml b/.github/workflows/package_test.yml
index ead55f76..e15bd472 100644
--- a/.github/workflows/package_test.yml
+++ b/.github/workflows/package_test.yml
@@ -38,9 +38,9 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python: ["3.10"]
-        cuda: ["12.1"]
-        torch: ["2.3.1"]
+        python: ["3.12"]
+        cuda: ["12.4"]
+        torch: ["2.4.0"]
     runs-on: [self-hosted, linux, build]
     env:
       PYTHON_VERSION: ${{ matrix.python }}
diff --git a/.github/workflows/publish_devel_image.yml b/.github/workflows/publish_devel_image.yml
index ea17fb66..7dd81321 100644
--- a/.github/workflows/publish_devel_image.yml
+++ b/.github/workflows/publish_devel_image.yml
@@ -42,6 +42,7 @@ jobs:
           tags: |
             vectorchai/scalellm_devel:cuda12.4-ubuntu22.04
             vectorchai/scalellm_devel:cuda12.4
+            vectorchai/scalellm_devel:latest
 
       - name: Build devel image for cuda 12.1
         uses: docker/build-push-action@v5
@@ -58,7 +59,6 @@ jobs:
           tags: |
             vectorchai/scalellm_devel:cuda12.1-ubuntu22.04
             vectorchai/scalellm_devel:cuda12.1
-            vectorchai/scalellm_devel:latest
 
       - name: Build devel image for cuda 11.8
         uses: docker/build-push-action@v5
diff --git a/.github/workflows/publish_manylinux_image.yml b/.github/workflows/publish_manylinux_image.yml
index a05f2c2a..2ed14873 100644
--- a/.github/workflows/publish_manylinux_image.yml
+++ b/.github/workflows/publish_manylinux_image.yml
@@ -27,7 +27,7 @@ jobs:
       - name: Create cache directory
         run: mkdir -p $CI_CACHE_DIR/.buildx-cache
 
-      - name: Build base for cuda 12.4 (experimental)
+      - name: Build base for cuda 12.4
         uses: docker/build-push-action@v5
         with:
           context: ./docker
diff --git a/.github/workflows/publish_wheel.yml b/.github/workflows/publish_wheel.yml
index 30e2f4f5..a28f05c7 100644
--- a/.github/workflows/publish_wheel.yml
+++ b/.github/workflows/publish_wheel.yml
@@ -22,9 +22,9 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python: ["3.8", "3.9", "3.10", "3.11"]
-        cuda: ["12.1"]
-        torch: ["2.3.1"]
+        python: ["3.8", "3.9", "3.10", "3.11", "3.12"]
+        cuda: ["12.4"]
+        torch: ["2.4.0"]
     runs-on: [self-hosted, linux, release]
     env:
       PYTHON_VERSION: ${{ matrix.python }}
diff --git a/.github/workflows/release_test.yml b/.github/workflows/release_test.yml
index adb2b0f4..749a7749 100644
--- a/.github/workflows/release_test.yml
+++ b/.github/workflows/release_test.yml
@@ -13,9 +13,9 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python: ["3.10"]
-        cuda: ["12.1"]
-        torch: ["2.3.1"]
+        python: ["3.12"]
+        cuda: ["12.4"]
+        torch: ["2.4.0"]
     runs-on: [self-hosted, linux, build]
     env:
       PYTHON_VERSION: ${{ matrix.python }}
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 20880087..0250f8a5 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -194,25 +194,25 @@ if (DEFINED ENV{LIBTORCH_ROOT})
 else()
   include(FetchContent)
   if (CUDAToolkit_VERSION VERSION_GREATER_EQUAL 12.4)
-    # download nightly libtorch with cuda 12.4 from pytorch.org (experimental)
+    # download libtorch 2.4.0 with cuda 12.4 from pytorch.org
     if (USE_CXX11_ABI)
-      set(LIBTORCH_URL "https://download.pytorch.org/libtorch/nightly/cu124/libtorch-cxx11-abi-shared-with-deps-latest.zip")
+      set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cu124/libtorch-cxx11-abi-shared-with-deps-2.4.0%2Bcu124.zip")
     else()
-      set(LIBTORCH_URL "https://download.pytorch.org/libtorch/nightly/cu124/libtorch-shared-with-deps-latest.zip")
+      set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cu124/libtorch-shared-with-deps-2.4.0%2Bcu124.zip")
     endif()
   elseif(CUDAToolkit_VERSION VERSION_GREATER_EQUAL 12.1)
-    # download libtorch 2.3.1 with cuda 12.1 from pytorch.org
+    # download libtorch 2.4.0 with cuda 12.1 from pytorch.org
     if (USE_CXX11_ABI)
-      set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cu121/libtorch-cxx11-abi-shared-with-deps-2.3.1%2Bcu121.zip")
+      set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cu121/libtorch-cxx11-abi-shared-with-deps-2.4.0%2Bcu121.zip")
     else()
-      set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cu121/libtorch-shared-with-deps-2.3.1%2Bcu121.zip")
+      set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cu121/libtorch-shared-with-deps-2.4.0%2Bcu121.zip")
     endif()
   elseif(CUDAToolkit_VERSION VERSION_GREATER_EQUAL 11.8)
-    # download libtorch 2.3.1 with cuda 11.8 from pytorch.org
+    # download libtorch 2.4.0 with cuda 11.8 from pytorch.org
     if (USE_CXX11_ABI)
-      set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cu118/libtorch-cxx11-abi-shared-with-deps-2.3.1%2Bcu118.zip")
+      set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cu118/libtorch-cxx11-abi-shared-with-deps-2.4.0%2Bcu118.zip")
     else()
-      set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cu118/libtorch-shared-with-deps-2.3.1%2Bcu118.zip")
+      set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cu118/libtorch-shared-with-deps-2.4.0%2Bcu118.zip")
     endif()
   else()
     # error out if cuda version is not supported
@@ -232,7 +232,7 @@ else()
   FetchContent_MakeAvailable(libtorch)
   
   find_package(Torch REQUIRED PATHS ${libtorch_SOURCE_DIR} NO_DEFAULT_PATH)
-  message(STATUS "Downloading and using libtorch 2.3.1 for cuda ${CUDA_VERSION} at ${libtorch_SOURCE_DIR}")
+  message(STATUS "Downloading and using libtorch 2.4.0 for cuda ${CUDA_VERSION} at ${libtorch_SOURCE_DIR}")
 endif()
 
 # check if USE_CXX11_ABI is set correctly
diff --git a/README.md b/README.md
index 2105e28e..7590024a 100644
--- a/README.md
+++ b/README.md
@@ -1,10 +1,15 @@
-# ScaleLLM: An efficient LLM Inference solution
-[![License](https://img.shields.io/badge/License-Apache_2.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) [![GitHub Repo stars](https://img.shields.io/github/stars/vectorch-ai/ScaleLLM?style=social)](https://github.com/vectorch-ai/ScaleLLM/stargazers) [![build and test](https://github.com/vectorch-ai/ScaleLLM/actions/workflows/build.yml/badge.svg?branch=main)](https://github.com/vectorch-ai/ScaleLLM/actions/workflows/build.yml) 
+<h1 align="center">
+ScaleLLM: An efficient LLM Inference solution
+</h1>
 
+[![License](https://img.shields.io/badge/License-Apache_2.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) 
+[![build](https://github.com/vectorch-ai/ScaleLLM/actions/workflows/build.yml/badge.svg?branch=main)](https://github.com/vectorch-ai/ScaleLLM/actions/workflows/build.yml) 
+[![PyPI](https://badge.fury.io/py/scalellm.svg)](https://badge.fury.io/py/scalellm) 
+[![Twitter](https://img.shields.io/twitter/url?label=%20%40VectorchAI&style=social&url=https://x.com/VectorchAI)](https://x.com/VectorchAI)
+[![Discord](https://dcbadge.vercel.app/api/server/PKe5gvBZfn?compact=true&style=flat)](https://discord.gg/PKe5gvBZfn)
 
-[![Discord](https://dcbadge.vercel.app/api/server/PKe5gvBZfn)](https://discord.gg/PKe5gvBZfn)
 
-[ScaleLLM]() is a cutting-edge inference system engineered for large language models (LLMs), meticulously designed to meet the demands of production environments. It extends its support to a wide range of popular open-source models, including [Llama3](https://github.com/meta-llama/llama3), [Gemma](https://github.com/google-deepmind/gemma), Bloom, GPT-NeoX, and more. 
+[ScaleLLM](#) is a cutting-edge inference system engineered for large language models (LLMs), meticulously designed to meet the demands of production environments. It extends its support to a wide range of popular open-source models, including [Llama3](https://github.com/meta-llama/llama3), [Gemma](https://github.com/google-deepmind/gemma), Bloom, GPT-NeoX, and more. 
 
 ScaleLLM is currently undergoing active development. We are fully committed to consistently enhancing its efficiency while also incorporating additional features. Feel free to explore our [**_Roadmap_**](https://github.com/vectorch-ai/ScaleLLM/issues/84) for more details.
 
@@ -45,14 +50,14 @@ ScaleLLM is currently undergoing active development. We are fully committed to c
 
 ScaleLLM is available as a Python Wheel package on PyPI. You can install it using pip:
 ```bash
-# Install scalellm with CUDA 12.1 and Pytorch 2.3
+# Install scalellm with CUDA 12.4 and Pytorch 2.4.0
 pip install scalellm
 ```
 
-If you want to install ScaleLLM with different version of CUDA and Pytorch, you can pip install it with provding index URL of the version. For example, to install ScaleLLM with CUDA 11.8 and Pytorch 2.2.2, you can use the following command:
+If you want to install ScaleLLM with different version of CUDA and Pytorch, you can pip install it with provding index URL of the version. For example, to install ScaleLLM with CUDA 12.1 and Pytorch 2.2.2, you can use the following command:
 
 ```bash
-pip install scalellm -i https://whl.vectorch.com/cu118/torch2.2.2/
+pip install scalellm -i https://whl.vectorch.com/cu121/torch2.2.2/
 ```
 
 ### Build from source
diff --git a/docker/common/install_cuda.sh b/docker/common/install_cuda.sh
index fd32031b..1e90973c 100755
--- a/docker/common/install_cuda.sh
+++ b/docker/common/install_cuda.sh
@@ -4,6 +4,9 @@
 
 set -ex
 
+NCCL_VERSION=v2.21.5-1
+CUDNN_VERSION=9.1.0.70
+
 function install_cusparselt_040 {
     # cuSparseLt license: https://docs.nvidia.com/cuda/cusparselt/license.html
     mkdir tmp_cusparselt && pushd tmp_cusparselt
@@ -27,7 +30,7 @@ function install_cusparselt_052 {
 }
 
 function install_118 {
-    echo "Installing CUDA 11.8 and cuDNN 8.7 and NCCL 2.15 and cuSparseLt-0.4.0"
+    echo "Installing CUDA 11.8 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.4.0"
     rm -rf /usr/local/cuda-11.8 /usr/local/cuda
     # install CUDA 11.8.0 in the same container
     wget -q https://developer.download.nvidia.com/compute/cuda/11.8.0/local_installers/cuda_11.8.0_520.61.05_linux.run
@@ -38,16 +41,16 @@ function install_118 {
 
     # cuDNN license: https://developer.nvidia.com/cudnn/license_agreement
     mkdir tmp_cudnn && cd tmp_cudnn
-    wget -q https://developer.download.nvidia.com/compute/redist/cudnn/v8.7.0/local_installers/11.8/cudnn-linux-x86_64-8.7.0.84_cuda11-archive.tar.xz -O cudnn-linux-x86_64-8.7.0.84_cuda11-archive.tar.xz
-    tar xf cudnn-linux-x86_64-8.7.0.84_cuda11-archive.tar.xz
-    cp -a cudnn-linux-x86_64-8.7.0.84_cuda11-archive/include/* /usr/local/cuda/include/
-    cp -a cudnn-linux-x86_64-8.7.0.84_cuda11-archive/lib/* /usr/local/cuda/lib64/
+    wget -q https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-x86_64/cudnn-linux-x86_64-${CUDNN_VERSION}_cuda11-archive.tar.xz -O cudnn-linux-x86_64-${CUDNN_VERSION}_cuda11-archive.tar.xz
+    tar xf cudnn-linux-x86_64-${CUDNN_VERSION}_cuda11-archive.tar.xz
+    cp -a cudnn-linux-x86_64-${CUDNN_VERSION}_cuda11-archive/include/* /usr/local/cuda/include/
+    cp -a cudnn-linux-x86_64-${CUDNN_VERSION}_cuda11-archive/lib/* /usr/local/cuda/lib64/
     cd ..
     rm -rf tmp_cudnn
 
     # NCCL license: https://docs.nvidia.com/deeplearning/nccl/#licenses
     # Follow build: https://github.com/NVIDIA/nccl/tree/master?tab=readme-ov-file#build
-    git clone -b v2.20.5-1 --depth 1 https://github.com/NVIDIA/nccl.git
+    git clone -b $NCCL_VERSION --depth 1 https://github.com/NVIDIA/nccl.git
     cd nccl && make -j src.build
     cp -a build/include/* /usr/local/cuda/include/
     cp -a build/lib/* /usr/local/cuda/lib64/
@@ -60,7 +63,7 @@ function install_118 {
 }
 
 function install_121 {
-    echo "Installing CUDA 12.1 and cuDNN 8.9 and NCCL 2.20.5 and cuSparseLt-0.5.2"
+    echo "Installing CUDA 12.1 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.5.2"
     rm -rf /usr/local/cuda-12.1 /usr/local/cuda
     # install CUDA 12.1.0 in the same container
     wget -q https://developer.download.nvidia.com/compute/cuda/12.1.1/local_installers/cuda_12.1.1_530.30.02_linux.run
@@ -71,16 +74,16 @@ function install_121 {
 
     # cuDNN license: https://developer.nvidia.com/cudnn/license_agreement
     mkdir tmp_cudnn && cd tmp_cudnn
-    wget -q https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-x86_64/cudnn-linux-x86_64-8.9.2.26_cuda12-archive.tar.xz -O cudnn-linux-x86_64-8.9.2.26_cuda12-archive.tar.xz
-    tar xf cudnn-linux-x86_64-8.9.2.26_cuda12-archive.tar.xz
-    cp -a cudnn-linux-x86_64-8.9.2.26_cuda12-archive/include/* /usr/local/cuda/include/
-    cp -a cudnn-linux-x86_64-8.9.2.26_cuda12-archive/lib/* /usr/local/cuda/lib64/
+    wget -q https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-x86_64/cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive.tar.xz -O cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive.tar.xz
+    tar xf cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive.tar.xz
+    cp -a cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive/include/* /usr/local/cuda/include/
+    cp -a cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive/lib/* /usr/local/cuda/lib64/
     cd ..
     rm -rf tmp_cudnn
 
     # NCCL license: https://docs.nvidia.com/deeplearning/nccl/#licenses
     # Follow build: https://github.com/NVIDIA/nccl/tree/master?tab=readme-ov-file#build
-    git clone -b v2.20.5-1 --depth 1 https://github.com/NVIDIA/nccl.git
+    git clone -b $NCCL_VERSION --depth 1 https://github.com/NVIDIA/nccl.git
     cd nccl && make -j src.build
     cp -a build/include/* /usr/local/cuda/include/
     cp -a build/lib/* /usr/local/cuda/lib64/
@@ -93,7 +96,7 @@ function install_121 {
 }
 
 function install_124 {
-  echo "Installing CUDA 12.4 and cuDNN 8.9 and NCCL 2.20.5 and cuSparseLt-0.5.2"
+  echo "Installing CUDA 12.4 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.5.2"
   rm -rf /usr/local/cuda-12.4 /usr/local/cuda
   # install CUDA 12.4.0 in the same container
   wget -q https://developer.download.nvidia.com/compute/cuda/12.4.0/local_installers/cuda_12.4.0_550.54.14_linux.run
@@ -104,16 +107,16 @@ function install_124 {
 
   # cuDNN license: https://developer.nvidia.com/cudnn/license_agreement
   mkdir tmp_cudnn && cd tmp_cudnn
-  wget -q https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-x86_64/cudnn-linux-x86_64-8.9.2.26_cuda12-archive.tar.xz -O cudnn-linux-x86_64-8.9.2.26_cuda12-archive.tar.xz
-  tar xf cudnn-linux-x86_64-8.9.2.26_cuda12-archive.tar.xz
-  cp -a cudnn-linux-x86_64-8.9.2.26_cuda12-archive/include/* /usr/local/cuda/include/
-  cp -a cudnn-linux-x86_64-8.9.2.26_cuda12-archive/lib/* /usr/local/cuda/lib64/
+  wget -q https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-x86_64/cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive.tar.xz -O cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive.tar.xz
+  tar xf cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive.tar.xz
+  cp -a cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive/include/* /usr/local/cuda/include/
+  cp -a cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive/lib/* /usr/local/cuda/lib64/
   cd ..
   rm -rf tmp_cudnn
 
   # NCCL license: https://docs.nvidia.com/deeplearning/nccl/#licenses
   # Follow build: https://github.com/NVIDIA/nccl/tree/master?tab=readme-ov-file#build
-  git clone -b v2.20.5-1 --depth 1 https://github.com/NVIDIA/nccl.git
+  git clone -b $NCCL_VERSION --depth 1 https://github.com/NVIDIA/nccl.git
   cd nccl && make -j src.build
   cp -a build/include/* /usr/local/cuda/include/
   cp -a build/lib/* /usr/local/cuda/lib64/
@@ -201,6 +204,9 @@ function prune_124 {
   if [[ -n "$OVERRIDE_GENCODE" ]]; then
       export GENCODE=$OVERRIDE_GENCODE
   fi
+  if [[ -n "$OVERRIDE_GENCODE_CUDNN" ]]; then
+      export GENCODE_CUDNN=$OVERRIDE_GENCODE_CUDNN
+  fi
 
   # all CUDA libs except CuDNN and CuBLAS
   ls $CUDA_LIB_DIR/ | grep "\.a" | grep -v "culibos" | grep -v "cudart" | grep -v "cudnn" | grep -v "cublas" | grep -v "metis"  \
@@ -232,4 +238,4 @@ do
         ;;
     esac
     shift
-done
+done
\ No newline at end of file
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 7592cdb7..cc95c4e7 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -12,7 +12,7 @@ ScaleLLM is available as a Python Wheel package on `PyPI <https://pypi.org/proje
 
 .. code-block:: bash
 
-   # Install scalellm with CUDA 12.1 and PyTorch 2.3
+   # Install scalellm with CUDA 12.4 and Pytorch 2.4.0
    $ pip install scalellm
 
 
diff --git a/docs/source/quick_start.rst b/docs/source/quick_start.rst
index ebea1a1b..0f66767b 100644
--- a/docs/source/quick_start.rst
+++ b/docs/source/quick_start.rst
@@ -12,7 +12,7 @@ ScaleLLM is available as a Python Wheel package on `PyPI <https://pypi.org/proje
 
 .. code-block:: bash
 
-    # Install ScaleLLM with CUDA 12.1 and PyTorch 2.3
+    # Install ScaleLLM with CUDA 12.4 and Pytorch 2.4.0
     $ pip install scalellm
 
 Install other versions
@@ -21,10 +21,26 @@ If you want to install ScaleLLM with different versions of CUDA and PyTorch, you
 
 .. tabs::
 
+    .. tab:: CUDA 12.4
+
+        .. tabs::
+
+            .. tab:: PyTorch 2.4.0
+
+                .. code-block:: bash
+
+                    $ pip install scalellm -i https://whl.vectorch.com/cu124/torch2.4.0/
+
     .. tab:: CUDA 12.1
 
         .. tabs::
 
+            .. tab:: PyTorch 2.4.0
+
+                .. code-block:: bash
+
+                    $ pip install scalellm -i https://whl.vectorch.com/cu121/torch2.4.0/
+
             .. tab:: PyTorch 2.3.1
 
                 .. code-block:: bash
@@ -37,15 +53,15 @@ If you want to install ScaleLLM with different versions of CUDA and PyTorch, you
 
                     $ pip install scalellm -i https://whl.vectorch.com/cu121/torch2.2.2/
 
-            .. tab:: PyTorch 2.1.2
+    .. tab:: CUDA 11.8
 
-                .. code-block:: bash
+        .. tabs::
 
-                    $ pip install scalellm -i https://whl.vectorch.com/cu121/torch2.1.2/
+            .. tab:: PyTorch 2.4.0
 
-    .. tab:: CUDA 11.8
+                .. code-block:: bash
 
-        .. tabs::
+                    $ pip install scalellm -i https://whl.vectorch.com/cu118/torch2.4.0/
 
             .. tab:: PyTorch 2.3.1
 
@@ -59,12 +75,6 @@ If you want to install ScaleLLM with different versions of CUDA and PyTorch, you
 
                     $ pip install scalellm -i https://whl.vectorch.com/cu118/torch2.2.2/
 
-            .. tab:: PyTorch 2.1.2
-
-                .. code-block:: bash
-
-                    $ pip install scalellm -i https://whl.vectorch.com/cu118/torch2.1.2/
-
 
 Build from source
 ~~~~~~~~~~~~~~~~~
diff --git a/setup.py b/setup.py
index 09d5a7be..371810ab 100644
--- a/setup.py
+++ b/setup.py
@@ -246,13 +246,22 @@ def build_extension(self, ext: CMakeExtension):
         },
         classifiers=[
             "Development Status :: 3 - Alpha",
+            "Intended Audience :: Developers",
+            "Intended Audience :: Education",
+            "Programming Language :: C++",
+            "Programming Language :: CUDA",
+            "Programming Language :: Python :: 3 :: Only",
             "Programming Language :: Python :: 3.8",
             "Programming Language :: Python :: 3.9",
             "Programming Language :: Python :: 3.10",
             "Programming Language :: Python :: 3.11",
-            "Intended Audience :: Developers",
+            "Programming Language :: Python :: 3.12",
+            "Environment :: GPU :: NVIDIA CUDA",
             "Operating System :: POSIX",
             "License :: OSI Approved :: Apache Software License",
+            "Topic :: Scientific/Engineering",
+            "Topic :: Scientific/Engineering :: Artificial Intelligence",
+            
         ],
         packages=["scalellm", "scalellm/serve", "scalellm/_C", "examples"],
         ext_modules=[CMakeExtension("_C", "scalellm/")],