Skip to content

Commit

Permalink
upgrade torch to 2.4.0 (#280)
Browse files Browse the repository at this point in the history
  • Loading branch information
guocuimi authored Jul 24, 2024
1 parent 8efd28c commit b7ac313
Show file tree
Hide file tree
Showing 12 changed files with 96 additions and 63 deletions.
7 changes: 5 additions & 2 deletions .github/workflows/build_wheel.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,12 @@ jobs:
strategy:
fail-fast: false
matrix:
python: ["3.8", "3.9", "3.10", "3.11"]
python: ["3.8", "3.9", "3.10", "3.11", "3.12"]
cuda: ["11.8", "12.1"]
torch: ["2.1.2", "2.2.2", "2.3.1"]
torch: ["2.2.2", "2.3.1", "2.4.0"]
include:
- cuda: "12.4"
torch: "2.4.0"
runs-on: [self-hosted, linux, release]
env:
PYTHON_VERSION: ${{ matrix.python }}
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/package_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,9 @@ jobs:
strategy:
fail-fast: false
matrix:
python: ["3.10"]
cuda: ["12.1"]
torch: ["2.3.1"]
python: ["3.12"]
cuda: ["12.4"]
torch: ["2.4.0"]
runs-on: [self-hosted, linux, build]
env:
PYTHON_VERSION: ${{ matrix.python }}
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/publish_devel_image.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ jobs:
tags: |
vectorchai/scalellm_devel:cuda12.4-ubuntu22.04
vectorchai/scalellm_devel:cuda12.4
vectorchai/scalellm_devel:latest
- name: Build devel image for cuda 12.1
uses: docker/build-push-action@v5
Expand All @@ -58,7 +59,6 @@ jobs:
tags: |
vectorchai/scalellm_devel:cuda12.1-ubuntu22.04
vectorchai/scalellm_devel:cuda12.1
vectorchai/scalellm_devel:latest
- name: Build devel image for cuda 11.8
uses: docker/build-push-action@v5
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/publish_manylinux_image.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ jobs:
- name: Create cache directory
run: mkdir -p $CI_CACHE_DIR/.buildx-cache

- name: Build base for cuda 12.4 (experimental)
- name: Build base for cuda 12.4
uses: docker/build-push-action@v5
with:
context: ./docker
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/publish_wheel.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,9 @@ jobs:
strategy:
fail-fast: false
matrix:
python: ["3.8", "3.9", "3.10", "3.11"]
cuda: ["12.1"]
torch: ["2.3.1"]
python: ["3.8", "3.9", "3.10", "3.11", "3.12"]
cuda: ["12.4"]
torch: ["2.4.0"]
runs-on: [self-hosted, linux, release]
env:
PYTHON_VERSION: ${{ matrix.python }}
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/release_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@ jobs:
strategy:
fail-fast: false
matrix:
python: ["3.10"]
cuda: ["12.1"]
torch: ["2.3.1"]
python: ["3.12"]
cuda: ["12.4"]
torch: ["2.4.0"]
runs-on: [self-hosted, linux, build]
env:
PYTHON_VERSION: ${{ matrix.python }}
Expand Down
20 changes: 10 additions & 10 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -194,25 +194,25 @@ if (DEFINED ENV{LIBTORCH_ROOT})
else()
include(FetchContent)
if (CUDAToolkit_VERSION VERSION_GREATER_EQUAL 12.4)
# download nightly libtorch with cuda 12.4 from pytorch.org (experimental)
# download libtorch 2.4.0 with cuda 12.4 from pytorch.org
if (USE_CXX11_ABI)
set(LIBTORCH_URL "https://download.pytorch.org/libtorch/nightly/cu124/libtorch-cxx11-abi-shared-with-deps-latest.zip")
set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cu124/libtorch-cxx11-abi-shared-with-deps-2.4.0%2Bcu124.zip")
else()
set(LIBTORCH_URL "https://download.pytorch.org/libtorch/nightly/cu124/libtorch-shared-with-deps-latest.zip")
set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cu124/libtorch-shared-with-deps-2.4.0%2Bcu124.zip")
endif()
elseif(CUDAToolkit_VERSION VERSION_GREATER_EQUAL 12.1)
# download libtorch 2.3.1 with cuda 12.1 from pytorch.org
# download libtorch 2.4.0 with cuda 12.1 from pytorch.org
if (USE_CXX11_ABI)
set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cu121/libtorch-cxx11-abi-shared-with-deps-2.3.1%2Bcu121.zip")
set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cu121/libtorch-cxx11-abi-shared-with-deps-2.4.0%2Bcu121.zip")
else()
set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cu121/libtorch-shared-with-deps-2.3.1%2Bcu121.zip")
set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cu121/libtorch-shared-with-deps-2.4.0%2Bcu121.zip")
endif()
elseif(CUDAToolkit_VERSION VERSION_GREATER_EQUAL 11.8)
# download libtorch 2.3.1 with cuda 11.8 from pytorch.org
# download libtorch 2.4.0 with cuda 11.8 from pytorch.org
if (USE_CXX11_ABI)
set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cu118/libtorch-cxx11-abi-shared-with-deps-2.3.1%2Bcu118.zip")
set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cu118/libtorch-cxx11-abi-shared-with-deps-2.4.0%2Bcu118.zip")
else()
set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cu118/libtorch-shared-with-deps-2.3.1%2Bcu118.zip")
set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cu118/libtorch-shared-with-deps-2.4.0%2Bcu118.zip")
endif()
else()
# error out if cuda version is not supported
Expand All @@ -232,7 +232,7 @@ else()
FetchContent_MakeAvailable(libtorch)

find_package(Torch REQUIRED PATHS ${libtorch_SOURCE_DIR} NO_DEFAULT_PATH)
message(STATUS "Downloading and using libtorch 2.3.1 for cuda ${CUDA_VERSION} at ${libtorch_SOURCE_DIR}")
message(STATUS "Downloading and using libtorch 2.4.0 for cuda ${CUDA_VERSION} at ${libtorch_SOURCE_DIR}")
endif()

# check if USE_CXX11_ABI is set correctly
Expand Down
19 changes: 12 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,10 +1,15 @@
# ScaleLLM: An efficient LLM Inference solution
[![License](https://img.shields.io/badge/License-Apache_2.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) [![GitHub Repo stars](https://img.shields.io/github/stars/vectorch-ai/ScaleLLM?style=social)](https://github.com/vectorch-ai/ScaleLLM/stargazers) [![build and test](https://github.com/vectorch-ai/ScaleLLM/actions/workflows/build.yml/badge.svg?branch=main)](https://github.com/vectorch-ai/ScaleLLM/actions/workflows/build.yml)
<h1 align="center">
ScaleLLM: An efficient LLM Inference solution
</h1>

[![License](https://img.shields.io/badge/License-Apache_2.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)
[![build](https://github.com/vectorch-ai/ScaleLLM/actions/workflows/build.yml/badge.svg?branch=main)](https://github.com/vectorch-ai/ScaleLLM/actions/workflows/build.yml)
[![PyPI](https://badge.fury.io/py/scalellm.svg)](https://badge.fury.io/py/scalellm)
[![Twitter](https://img.shields.io/twitter/url?label=%20%40VectorchAI&style=social&url=https://x.com/VectorchAI)](https://x.com/VectorchAI)
[![Discord](https://dcbadge.vercel.app/api/server/PKe5gvBZfn?compact=true&style=flat)](https://discord.gg/PKe5gvBZfn)

[![Discord](https://dcbadge.vercel.app/api/server/PKe5gvBZfn)](https://discord.gg/PKe5gvBZfn)

[ScaleLLM]() is a cutting-edge inference system engineered for large language models (LLMs), meticulously designed to meet the demands of production environments. It extends its support to a wide range of popular open-source models, including [Llama3](https://github.com/meta-llama/llama3), [Gemma](https://github.com/google-deepmind/gemma), Bloom, GPT-NeoX, and more.
[ScaleLLM](#) is a cutting-edge inference system engineered for large language models (LLMs), meticulously designed to meet the demands of production environments. It extends its support to a wide range of popular open-source models, including [Llama3](https://github.com/meta-llama/llama3), [Gemma](https://github.com/google-deepmind/gemma), Bloom, GPT-NeoX, and more.

ScaleLLM is currently undergoing active development. We are fully committed to consistently enhancing its efficiency while also incorporating additional features. Feel free to explore our [**_Roadmap_**](https://github.com/vectorch-ai/ScaleLLM/issues/84) for more details.

Expand Down Expand Up @@ -45,14 +50,14 @@ ScaleLLM is currently undergoing active development. We are fully committed to c

ScaleLLM is available as a Python Wheel package on PyPI. You can install it using pip:
```bash
# Install scalellm with CUDA 12.1 and Pytorch 2.3
# Install scalellm with CUDA 12.4 and Pytorch 2.4.0
pip install scalellm
```

If you want to install ScaleLLM with different version of CUDA and Pytorch, you can pip install it with provding index URL of the version. For example, to install ScaleLLM with CUDA 11.8 and Pytorch 2.2.2, you can use the following command:
If you want to install ScaleLLM with different version of CUDA and Pytorch, you can pip install it with provding index URL of the version. For example, to install ScaleLLM with CUDA 12.1 and Pytorch 2.2.2, you can use the following command:

```bash
pip install scalellm -i https://whl.vectorch.com/cu118/torch2.2.2/
pip install scalellm -i https://whl.vectorch.com/cu121/torch2.2.2/
```

### Build from source
Expand Down
44 changes: 25 additions & 19 deletions docker/common/install_cuda.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@

set -ex

NCCL_VERSION=v2.21.5-1
CUDNN_VERSION=9.1.0.70

function install_cusparselt_040 {
# cuSparseLt license: https://docs.nvidia.com/cuda/cusparselt/license.html
mkdir tmp_cusparselt && pushd tmp_cusparselt
Expand All @@ -27,7 +30,7 @@ function install_cusparselt_052 {
}

function install_118 {
echo "Installing CUDA 11.8 and cuDNN 8.7 and NCCL 2.15 and cuSparseLt-0.4.0"
echo "Installing CUDA 11.8 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.4.0"
rm -rf /usr/local/cuda-11.8 /usr/local/cuda
# install CUDA 11.8.0 in the same container
wget -q https://developer.download.nvidia.com/compute/cuda/11.8.0/local_installers/cuda_11.8.0_520.61.05_linux.run
Expand All @@ -38,16 +41,16 @@ function install_118 {

# cuDNN license: https://developer.nvidia.com/cudnn/license_agreement
mkdir tmp_cudnn && cd tmp_cudnn
wget -q https://developer.download.nvidia.com/compute/redist/cudnn/v8.7.0/local_installers/11.8/cudnn-linux-x86_64-8.7.0.84_cuda11-archive.tar.xz -O cudnn-linux-x86_64-8.7.0.84_cuda11-archive.tar.xz
tar xf cudnn-linux-x86_64-8.7.0.84_cuda11-archive.tar.xz
cp -a cudnn-linux-x86_64-8.7.0.84_cuda11-archive/include/* /usr/local/cuda/include/
cp -a cudnn-linux-x86_64-8.7.0.84_cuda11-archive/lib/* /usr/local/cuda/lib64/
wget -q https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-x86_64/cudnn-linux-x86_64-${CUDNN_VERSION}_cuda11-archive.tar.xz -O cudnn-linux-x86_64-${CUDNN_VERSION}_cuda11-archive.tar.xz
tar xf cudnn-linux-x86_64-${CUDNN_VERSION}_cuda11-archive.tar.xz
cp -a cudnn-linux-x86_64-${CUDNN_VERSION}_cuda11-archive/include/* /usr/local/cuda/include/
cp -a cudnn-linux-x86_64-${CUDNN_VERSION}_cuda11-archive/lib/* /usr/local/cuda/lib64/
cd ..
rm -rf tmp_cudnn

# NCCL license: https://docs.nvidia.com/deeplearning/nccl/#licenses
# Follow build: https://github.com/NVIDIA/nccl/tree/master?tab=readme-ov-file#build
git clone -b v2.20.5-1 --depth 1 https://github.com/NVIDIA/nccl.git
git clone -b $NCCL_VERSION --depth 1 https://github.com/NVIDIA/nccl.git
cd nccl && make -j src.build
cp -a build/include/* /usr/local/cuda/include/
cp -a build/lib/* /usr/local/cuda/lib64/
Expand All @@ -60,7 +63,7 @@ function install_118 {
}

function install_121 {
echo "Installing CUDA 12.1 and cuDNN 8.9 and NCCL 2.20.5 and cuSparseLt-0.5.2"
echo "Installing CUDA 12.1 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.5.2"
rm -rf /usr/local/cuda-12.1 /usr/local/cuda
# install CUDA 12.1.0 in the same container
wget -q https://developer.download.nvidia.com/compute/cuda/12.1.1/local_installers/cuda_12.1.1_530.30.02_linux.run
Expand All @@ -71,16 +74,16 @@ function install_121 {

# cuDNN license: https://developer.nvidia.com/cudnn/license_agreement
mkdir tmp_cudnn && cd tmp_cudnn
wget -q https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-x86_64/cudnn-linux-x86_64-8.9.2.26_cuda12-archive.tar.xz -O cudnn-linux-x86_64-8.9.2.26_cuda12-archive.tar.xz
tar xf cudnn-linux-x86_64-8.9.2.26_cuda12-archive.tar.xz
cp -a cudnn-linux-x86_64-8.9.2.26_cuda12-archive/include/* /usr/local/cuda/include/
cp -a cudnn-linux-x86_64-8.9.2.26_cuda12-archive/lib/* /usr/local/cuda/lib64/
wget -q https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-x86_64/cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive.tar.xz -O cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive.tar.xz
tar xf cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive.tar.xz
cp -a cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive/include/* /usr/local/cuda/include/
cp -a cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive/lib/* /usr/local/cuda/lib64/
cd ..
rm -rf tmp_cudnn

# NCCL license: https://docs.nvidia.com/deeplearning/nccl/#licenses
# Follow build: https://github.com/NVIDIA/nccl/tree/master?tab=readme-ov-file#build
git clone -b v2.20.5-1 --depth 1 https://github.com/NVIDIA/nccl.git
git clone -b $NCCL_VERSION --depth 1 https://github.com/NVIDIA/nccl.git
cd nccl && make -j src.build
cp -a build/include/* /usr/local/cuda/include/
cp -a build/lib/* /usr/local/cuda/lib64/
Expand All @@ -93,7 +96,7 @@ function install_121 {
}

function install_124 {
echo "Installing CUDA 12.4 and cuDNN 8.9 and NCCL 2.20.5 and cuSparseLt-0.5.2"
echo "Installing CUDA 12.4 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.5.2"
rm -rf /usr/local/cuda-12.4 /usr/local/cuda
# install CUDA 12.4.0 in the same container
wget -q https://developer.download.nvidia.com/compute/cuda/12.4.0/local_installers/cuda_12.4.0_550.54.14_linux.run
Expand All @@ -104,16 +107,16 @@ function install_124 {

# cuDNN license: https://developer.nvidia.com/cudnn/license_agreement
mkdir tmp_cudnn && cd tmp_cudnn
wget -q https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-x86_64/cudnn-linux-x86_64-8.9.2.26_cuda12-archive.tar.xz -O cudnn-linux-x86_64-8.9.2.26_cuda12-archive.tar.xz
tar xf cudnn-linux-x86_64-8.9.2.26_cuda12-archive.tar.xz
cp -a cudnn-linux-x86_64-8.9.2.26_cuda12-archive/include/* /usr/local/cuda/include/
cp -a cudnn-linux-x86_64-8.9.2.26_cuda12-archive/lib/* /usr/local/cuda/lib64/
wget -q https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-x86_64/cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive.tar.xz -O cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive.tar.xz
tar xf cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive.tar.xz
cp -a cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive/include/* /usr/local/cuda/include/
cp -a cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive/lib/* /usr/local/cuda/lib64/
cd ..
rm -rf tmp_cudnn

# NCCL license: https://docs.nvidia.com/deeplearning/nccl/#licenses
# Follow build: https://github.com/NVIDIA/nccl/tree/master?tab=readme-ov-file#build
git clone -b v2.20.5-1 --depth 1 https://github.com/NVIDIA/nccl.git
git clone -b $NCCL_VERSION --depth 1 https://github.com/NVIDIA/nccl.git
cd nccl && make -j src.build
cp -a build/include/* /usr/local/cuda/include/
cp -a build/lib/* /usr/local/cuda/lib64/
Expand Down Expand Up @@ -201,6 +204,9 @@ function prune_124 {
if [[ -n "$OVERRIDE_GENCODE" ]]; then
export GENCODE=$OVERRIDE_GENCODE
fi
if [[ -n "$OVERRIDE_GENCODE_CUDNN" ]]; then
export GENCODE_CUDNN=$OVERRIDE_GENCODE_CUDNN
fi

# all CUDA libs except CuDNN and CuBLAS
ls $CUDA_LIB_DIR/ | grep "\.a" | grep -v "culibos" | grep -v "cudart" | grep -v "cudnn" | grep -v "cublas" | grep -v "metis" \
Expand Down Expand Up @@ -232,4 +238,4 @@ do
;;
esac
shift
done
done
2 changes: 1 addition & 1 deletion docs/source/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ ScaleLLM is available as a Python Wheel package on `PyPI <https://pypi.org/proje

.. code-block:: bash
# Install scalellm with CUDA 12.1 and PyTorch 2.3
# Install scalellm with CUDA 12.4 and Pytorch 2.4.0
$ pip install scalellm
Expand Down
34 changes: 22 additions & 12 deletions docs/source/quick_start.rst
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ ScaleLLM is available as a Python Wheel package on `PyPI <https://pypi.org/proje

.. code-block:: bash
# Install ScaleLLM with CUDA 12.1 and PyTorch 2.3
# Install ScaleLLM with CUDA 12.4 and Pytorch 2.4.0
$ pip install scalellm
Install other versions
Expand All @@ -21,10 +21,26 @@ If you want to install ScaleLLM with different versions of CUDA and PyTorch, you

.. tabs::

.. tab:: CUDA 12.4

.. tabs::

.. tab:: PyTorch 2.4.0

.. code-block:: bash
$ pip install scalellm -i https://whl.vectorch.com/cu124/torch2.4.0/
.. tab:: CUDA 12.1

.. tabs::

.. tab:: PyTorch 2.4.0

.. code-block:: bash
$ pip install scalellm -i https://whl.vectorch.com/cu121/torch2.4.0/
.. tab:: PyTorch 2.3.1

.. code-block:: bash
Expand All @@ -37,15 +53,15 @@ If you want to install ScaleLLM with different versions of CUDA and PyTorch, you
$ pip install scalellm -i https://whl.vectorch.com/cu121/torch2.2.2/
.. tab:: PyTorch 2.1.2
.. tab:: CUDA 11.8

.. code-block:: bash
.. tabs::

$ pip install scalellm -i https://whl.vectorch.com/cu121/torch2.1.2/
.. tab:: PyTorch 2.4.0

.. tab:: CUDA 11.8
.. code-block:: bash
.. tabs::
$ pip install scalellm -i https://whl.vectorch.com/cu118/torch2.4.0/
.. tab:: PyTorch 2.3.1

Expand All @@ -59,12 +75,6 @@ If you want to install ScaleLLM with different versions of CUDA and PyTorch, you
$ pip install scalellm -i https://whl.vectorch.com/cu118/torch2.2.2/
.. tab:: PyTorch 2.1.2

.. code-block:: bash
$ pip install scalellm -i https://whl.vectorch.com/cu118/torch2.1.2/
Build from source
~~~~~~~~~~~~~~~~~
Expand Down
11 changes: 10 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -246,13 +246,22 @@ def build_extension(self, ext: CMakeExtension):
},
classifiers=[
"Development Status :: 3 - Alpha",
"Intended Audience :: Developers",
"Intended Audience :: Education",
"Programming Language :: C++",
"Programming Language :: CUDA",
"Programming Language :: Python :: 3 :: Only",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Intended Audience :: Developers",
"Programming Language :: Python :: 3.12",
"Environment :: GPU :: NVIDIA CUDA",
"Operating System :: POSIX",
"License :: OSI Approved :: Apache Software License",
"Topic :: Scientific/Engineering",
"Topic :: Scientific/Engineering :: Artificial Intelligence",

],
packages=["scalellm", "scalellm/serve", "scalellm/_C", "examples"],
ext_modules=[CMakeExtension("_C", "scalellm/")],
Expand Down

0 comments on commit b7ac313

Please sign in to comment.