Skip to content

Commit

Permalink
Updating cudnn from 8 to 9 on exsiting cuda 12 docker image (#20925)
Browse files Browse the repository at this point in the history
### Description
Adding support of cudnn 9

### Motivation and Context
Keep exsiting  cuda 12.2 with nvidia dirver 535
  • Loading branch information
jchen351 authored and yf711 committed Jun 18, 2024
1 parent bdcb4c5 commit aeb278a
Show file tree
Hide file tree
Showing 10 changed files with 76 additions and 256 deletions.
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,7 @@ def run(self):
"libcudart.so.11.0",
"libcudart.so.12",
"libcudnn.so.8",
"libcudnn.so.9",
"libcufft.so.10",
"libcufft.so.11",
"libcurand.so.10",
Expand Down
65 changes: 10 additions & 55 deletions tools/ci_build/github/azure-pipelines/linux-gpu-ci-pipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -45,20 +45,12 @@ parameters:
type: string
default: '0'

resources:
repositories:
- repository: manylinux
type: Github
endpoint: Microsoft
name: pypa/manylinux
ref: 5eda9aded5462201e6310105728d33016e637ea7

variables:
- name: docker_base_image
${{ if eq(parameters.CudaVersion, '11.8') }}:
value: nvidia/cuda:11.8.0-cudnn8-devel-ubi8
value: onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda11_x64_almalinux8_gcc11:20240531.1
${{ if eq(parameters.CudaVersion, '12.2') }}:
value: nvidia/cuda:12.2.2-cudnn8-devel-ubi8
value: onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda12_x64_ubi8_gcc12:20240610.1

- name: linux_trt_version
${{ if eq(parameters.CudaVersion, '11.8') }}:
Expand All @@ -68,9 +60,9 @@ variables:

- name: Repository
${{ if eq(parameters.CudaVersion, '11.8') }}:
value: 'onnxruntimecuda11build'
value: 'onnxruntimecuda11manylinuxbuild'
${{ if eq(parameters.CudaVersion, '12.2') }}:
value: 'onnxruntimecuda12build'
value: 'onnxruntimecuda12manylinuxbuild'

stages:
- stage: Linux_Build
Expand All @@ -97,12 +89,7 @@ stages:
parameters:
Dockerfile: tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_cuda
Context: tools/ci_build/github/linux/docker
DockerBuildArgs: "
--network=host
--build-arg BASEIMAGE=$(docker_base_image)
--build-arg TRT_VERSION=$(linux_trt_version)
--build-arg BUILD_UID=$( id -u )
"
DockerBuildArgs: "--build-arg BASEIMAGE=$(docker_base_image) --build-arg BUILD_UID=$( id -u )"
Repository: $(Repository)

- task: Cache@2
Expand All @@ -123,7 +110,7 @@ stages:
- script: |
set -e -x
mkdir -p $HOME/.onnx
docker run -e CFLAGS="-Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -fstack-clash-protection -fcf-protection -O3 -Wl,--strip-all" -e CXXFLAGS="-Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -fstack-clash-protection -fcf-protection -O3 -Wl,--strip-all" --rm \
docker run --rm \
--volume /data/onnx:/data/onnx:ro \
--volume $(Build.SourcesDirectory):/onnxruntime_src \
--volume $(Build.BinariesDirectory):/build \
Expand All @@ -133,39 +120,12 @@ stages:
-e ALLOW_RELEASED_ONNX_OPSET_ONLY=0 \
-e NIGHTLY_BUILD \
-e BUILD_BUILDNUMBER \
-e CCACHE_DIR=/cache \
$(Repository) \
/bin/bash -c "
set -ex; \
env; \
ccache -s; \
/opt/python/cp38-cp38/bin/python3 /onnxruntime_src/tools/ci_build/build.py \
--build_dir /build --cmake_generator Ninja \
--config Release --update --build \
--skip_submodule_sync \
--build_shared_lib \
--parallel --use_binskim_compliant_compile_flags \
--build_wheel \
--enable_onnx_tests --use_cuda --cuda_version=${{parameters.CudaVersion}} --cuda_home=/usr/local/cuda-${{parameters.CudaVersion}} --cudnn_home=/usr/local/cuda-${{parameters.CudaVersion}} \
--enable_cuda_profiling --enable_cuda_nhwc_ops \
--enable_pybind --build_java \
--use_cache \
--cmake_extra_defines CMAKE_CUDA_ARCHITECTURES=75 \
--cmake_extra_defines onnxruntime_BUILD_UNIT_TESTS=ON \
--cmake_extra_defines onnxruntime_ENABLE_CUDA_EP_INTERNAL_TESTS=ON; \
ccache -sv; \
ccache -z"
-e CCACHE_DIR=/cache -w /onnxruntime_src \
$(Repository) tools/ci_build/github/linux/build_cuda_ci.sh
workingDirectory: $(Build.SourcesDirectory)
displayName: Build Onnxruntime
- task: CmdLine@2
inputs:
script: |
rm -rf $(Build.BinariesDirectory)/Release/onnxruntime $(Build.BinariesDirectory)/Release/pybind11
rm -f $(Build.BinariesDirectory)/Release/models
find $(Build.BinariesDirectory)/Release/_deps -mindepth 1 ! -regex '^$(Build.BinariesDirectory)/Release/_deps/onnx-src\(/.*\)?' -delete
cd $(Build.BinariesDirectory)/Release
find -executable -type f > $(Build.BinariesDirectory)/Release/perms.txt
- script: $(Build.SourcesDirectory)/tools/ci_build/github/linux/delete_unused_files_before_upload.sh

- task: PublishPipelineArtifact@0
displayName: 'Publish Pipeline Artifact'
Expand Down Expand Up @@ -203,12 +163,7 @@ stages:
parameters:
Dockerfile: tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_cuda
Context: tools/ci_build/github/linux/docker
DockerBuildArgs: "
--network=host
--build-arg BASEIMAGE=$(docker_base_image)
--build-arg TRT_VERSION=$(linux_trt_version)
--build-arg BUILD_UID=$( id -u )
"
DockerBuildArgs: "--build-arg BASEIMAGE=$(docker_base_image) --build-arg BUILD_UID=$( id -u )"
Repository: $(Repository)

- task: CmdLine@2
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,29 +34,19 @@ parameters:
values:
- 11.8
- 12.2
- 12.4
resources:
repositories:
- repository: manylinux
type: Github
endpoint: Microsoft
name: pypa/manylinux
ref: 5eda9aded5462201e6310105728d33016e637ea7

variables:
- name: docker_base_image
${{ if eq(parameters.CudaVersion, '11.8') }}:
value: nvidia/cuda:11.8.0-cudnn8-devel-ubi8
value: onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda11_x64_almalinux8_gcc11:20240531.1
${{ if eq(parameters.CudaVersion, '12.2') }}:
value: nvidia/cuda:12.2.2-cudnn8-devel-ubi8
${{ if eq(parameters.CudaVersion, '12.4') }}:
value: nvidia/cuda:12.4.1-cudnn-devel-ubi8
value: onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda12_x64_ubi8_gcc12:20240610.1
- name: linux_trt_version
${{ if eq(parameters.CudaVersion, '11.8') }}:
value: 10.0.1.6-1.cuda11.8
${{ if eq(parameters.CudaVersion, '12.2') }}:
value: 10.0.1.6-1.cuda12.4
${{ if eq(parameters.CudaVersion, '12.4') }}:
value: 10.0.1.6-1.cuda12.4

jobs:
- job: Linux_Build
timeoutInMinutes: 180
Expand Down Expand Up @@ -99,7 +89,7 @@ jobs:
- task: CmdLine@2
inputs:
script: |
docker run --gpus all -e CFLAGS="-Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -fstack-clash-protection -fcf-protection -O3 -Wl,--strip-all" -e CXXFLAGS="-Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -fstack-clash-protection -fcf-protection -O3 -Wl,--strip-all" --rm \
docker run --gpus all --rm \
--volume /data/onnx:/data/onnx:ro \
--volume $(Build.SourcesDirectory):/onnxruntime_src \
--volume $(Build.BinariesDirectory):/build \
Expand All @@ -109,26 +99,8 @@ jobs:
-e ALLOW_RELEASED_ONNX_OPSET_ONLY=0 \
-e NIGHTLY_BUILD \
-e BUILD_BUILDNUMBER \
-e CCACHE_DIR=/cache \
onnxruntimetensorrt86gpubuild \
/bin/bash -c "
set -ex; \
ccache -s; \
/opt/python/cp38-cp38/bin/python3 /onnxruntime_src/tools/ci_build/build.py \
--build_dir /build --cmake_generator Ninja \
--config Release \
--skip_submodule_sync \
--build_shared_lib \
--parallel --use_binskim_compliant_compile_flags \
--build_wheel \
--enable_onnx_tests \
--use_cuda --cuda_home=/usr/local/cuda-${{ parameters.CudaVersion }} --cudnn_home=/usr/lib64/ \
--enable_pybind --build_java \
--use_tensorrt --tensorrt_home /usr \
--cmake_extra_defines CMAKE_CUDA_ARCHITECTURES=75 \
--use_cache; \
ccache -sv; \
ccache -z"
-e CCACHE_DIR=/cache -w /onnxruntime_src \
onnxruntimetensorrt86gpubuild tools/ci_build/github/linux/build_tensorrt_ci.sh
workingDirectory: $(Build.SourcesDirectory)

- template: templates/explicitly-defined-final-tasks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ stages:
Context: tools/ci_build/github/linux/docker/
${{ if eq(parameters.CudaVersion, '12.2') }}:
DockerBuildArgs: "
--build-arg BASEIMAGE=nvidia/cuda:12.2.2-cudnn8-devel-ubuntu20.04
--build-arg BASEIMAGE=nvidia/cuda:12.2.2-devel-ubuntu20.04
--build-arg TRT_VERSION=10.0.1.6-1+cuda12.4
--build-arg BUILD_UID=$( id -u )
"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -139,9 +139,9 @@ stages:
value: false
- name: docker_base_image
${{ if eq(parameters.CudaVersion, '11.8') }}:
value: nvidia/cuda:11.8.0-cudnn8-devel-ubi8
value: onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda11_x64_almalinux8_gcc11:20240531.1
${{ if eq(parameters.CudaVersion, '12.2') }}:
value: nvidia/cuda:12.2.2-cudnn8-devel-ubi8
value: onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda12_x64_ubi8_gcc12:20240610.1
timeoutInMinutes: 60

steps:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ jobs:
${{ if eq(parameters.CudaVersion, '11.8') }}:
value: nvidia/cuda:11.8.0-cudnn8-devel-ubi8
${{ if eq(parameters.CudaVersion, '12.2') }}:
value: nvidia/cuda:12.2.2-cudnn8-devel-ubi8
value: onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda12_x64_ubi8_gcc12:20240610.1
- name: linux_trt_version
${{ if eq(parameters.CudaVersion, '11.8') }}:
value: 10.0.1.6-1.cuda11.8
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,8 +78,8 @@ stages:
cmake_build_type: ${{ parameters.cmake_build_type }}
cuda_version: ${{ parameters.cuda_version }}
${{ if eq(parameters.cuda_version, '11.8') }}:
docker_base_image: nvidia/cuda:11.8.0-cudnn8-devel-ubi8
docker_base_image: onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda11_x64_almalinux8_gcc11:20240531.1
trt_version: 10.0.1.6-1.cuda11.8
${{ if eq(parameters.cuda_version, '12.2') }}:
docker_base_image: nvidia/cuda:12.2.2-cudnn8-devel-ubi8
docker_base_image: onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda12_x64_ubi8_gcc12:20240610.1
trt_version: 10.0.1.6-1.cuda12.4
Original file line number Diff line number Diff line change
@@ -1,162 +1,4 @@
ARG BASEIMAGE=nvidia/cuda:12.2.2-cudnn8-devel-ubi8
ARG POLICY=manylinux2014
ARG PLATFORM=x86_64
ARG DEVTOOLSET_ROOTPATH=
ARG LD_LIBRARY_PATH_ARG=
ARG PREPEND_PATH=

#We need both CUDA and manylinux. But the CUDA Toolkit End User License Agreement says NVIDIA CUDA Driver Libraries(libcuda.so, libnvidia-ptxjitcompiler.so) are only distributable in applications that meet this criteria:
#1. The application was developed starting from a NVIDIA CUDA container obtained from Docker Hub or the NVIDIA GPU Cloud, and
#2. The resulting application is packaged as a Docker container and distributed to users on Docker Hub or the NVIDIA GPU Cloud only.
#So we use CUDA as the base image then add manylinux on top of it.

#Build manylinux2014 docker image begin
FROM $BASEIMAGE AS runtime_base
ARG POLICY
ARG PLATFORM
ARG DEVTOOLSET_ROOTPATH
ARG LD_LIBRARY_PATH_ARG
ARG PREPEND_PATH
LABEL maintainer="The ManyLinux project"

ENV AUDITWHEEL_POLICY=${POLICY} AUDITWHEEL_ARCH=${PLATFORM} AUDITWHEEL_PLAT=${POLICY}_${PLATFORM}
ENV LC_ALL=en_US.UTF-8 LANG=en_US.UTF-8 LANGUAGE=en_US.UTF-8
ENV DEVTOOLSET_ROOTPATH=${DEVTOOLSET_ROOTPATH}
ENV LD_LIBRARY_PATH=${LD_LIBRARY_PATH_ARG}
ENV PATH=${PREPEND_PATH}${PATH}
ENV PKG_CONFIG_PATH=/usr/local/lib/pkgconfig

# first copy the fixup mirrors script, keep the script around
COPY build_scripts/fixup-mirrors.sh /usr/local/sbin/fixup-mirrors

# setup entrypoint, this will wrap commands with `linux32` with i686 images
COPY build_scripts/install-entrypoint.sh \
build_scripts/build_utils.sh \
/build_scripts/

RUN /build_scripts/install-entrypoint.sh && rm -rf /build_scripts
COPY manylinux-entrypoint /usr/local/bin/manylinux-entrypoint
ENTRYPOINT ["manylinux-entrypoint"]

COPY build_scripts/install-runtime-packages.sh \
build_scripts/build_utils.sh \
/build_scripts/
RUN manylinux-entrypoint /build_scripts/install-runtime-packages.sh && rm -rf /build_scripts/

COPY build_scripts/build_utils.sh /build_scripts/

COPY build_scripts/install-autoconf.sh /build_scripts/
RUN export AUTOCONF_ROOT=autoconf-2.71 && \
export AUTOCONF_HASH=431075ad0bf529ef13cb41e9042c542381103e80015686222b8a9d4abef42a1c && \
export AUTOCONF_DOWNLOAD_URL=http://ftp.gnu.org/gnu/autoconf && \
manylinux-entrypoint /build_scripts/install-autoconf.sh

COPY build_scripts/install-automake.sh /build_scripts/
RUN export AUTOMAKE_ROOT=automake-1.16.5 && \
export AUTOMAKE_HASH=07bd24ad08a64bc17250ce09ec56e921d6343903943e99ccf63bbf0705e34605 && \
export AUTOMAKE_DOWNLOAD_URL=http://ftp.gnu.org/gnu/automake && \
manylinux-entrypoint /build_scripts/install-automake.sh

COPY build_scripts/install-libtool.sh /build_scripts/
RUN export LIBTOOL_ROOT=libtool-2.4.7 && \
export LIBTOOL_HASH=04e96c2404ea70c590c546eba4202a4e12722c640016c12b9b2f1ce3d481e9a8 && \
export LIBTOOL_DOWNLOAD_URL=http://ftp.gnu.org/gnu/libtool && \
manylinux-entrypoint /build_scripts/install-libtool.sh

COPY build_scripts/install-libxcrypt.sh /build_scripts/
RUN export LIBXCRYPT_VERSION=4.4.28 && \
export LIBXCRYPT_HASH=db7e37901969cb1d1e8020cb73a991ef81e48e31ea5b76a101862c806426b457 && \
export LIBXCRYPT_DOWNLOAD_URL=https://github.com/besser82/libxcrypt/archive && \
export PERL_ROOT=perl-5.34.0 && \
export PERL_HASH=551efc818b968b05216024fb0b727ef2ad4c100f8cb6b43fab615fa78ae5be9a && \
export PERL_DOWNLOAD_URL=https://www.cpan.org/src/5.0 && \
manylinux-entrypoint /build_scripts/install-libxcrypt.sh

FROM runtime_base AS build_base
COPY build_scripts/install-build-packages.sh /build_scripts/
RUN manylinux-entrypoint /build_scripts/install-build-packages.sh


FROM build_base AS build_git
COPY build_scripts/build-git.sh /build_scripts/
RUN export GIT_ROOT=git-2.36.2 && \
export GIT_HASH=6dc2cdea5fb23d823ba4871cc23222c1db31dfbb6d6c6ff74c4128700df57c68 && \
export GIT_DOWNLOAD_URL=https://www.kernel.org/pub/software/scm/git && \
manylinux-entrypoint /build_scripts/build-git.sh


FROM build_base AS build_cpython
COPY build_scripts/build-sqlite3.sh /build_scripts/
RUN export SQLITE_AUTOCONF_ROOT=sqlite-autoconf-3390200 && \
export SQLITE_AUTOCONF_HASH=852be8a6183a17ba47cee0bbff7400b7aa5affd283bf3beefc34fcd088a239de && \
export SQLITE_AUTOCONF_DOWNLOAD_URL=https://www.sqlite.org/2022 && \
manylinux-entrypoint /build_scripts/build-sqlite3.sh

COPY build_scripts/build-openssl.sh /build_scripts/
RUN export OPENSSL_ROOT=openssl-1.1.1q && \
export OPENSSL_HASH=d7939ce614029cdff0b6c20f0e2e5703158a489a72b2507b8bd51bf8c8fd10ca && \
export OPENSSL_DOWNLOAD_URL=https://www.openssl.org/source && \
manylinux-entrypoint /build_scripts/build-openssl.sh

COPY build_scripts/build-cpython.sh /build_scripts/


FROM build_cpython AS build_cpython38
COPY build_scripts/ambv-pubkey.txt /build_scripts/cpython-pubkeys.txt
RUN manylinux-entrypoint /build_scripts/build-cpython.sh 3.8.13


FROM build_cpython AS build_cpython39
COPY build_scripts/ambv-pubkey.txt /build_scripts/cpython-pubkeys.txt
RUN manylinux-entrypoint /build_scripts/build-cpython.sh 3.9.13


FROM build_cpython AS build_cpython310
COPY build_scripts/cpython-pubkey-310-311.txt /build_scripts/cpython-pubkeys.txt
RUN manylinux-entrypoint /build_scripts/build-cpython.sh 3.10.5

FROM build_cpython AS build_cpython311
COPY build_scripts/cpython-pubkey-310-311.txt /build_scripts/cpython-pubkeys.txt
RUN manylinux-entrypoint /build_scripts/build-cpython.sh 3.11.2

FROM build_cpython AS build_cpython312
COPY build_scripts/cpython-pubkey-312-313.txt /build_scripts/cpython-pubkeys.txt
RUN manylinux-entrypoint /build_scripts/build-cpython.sh 3.12.1

FROM build_cpython AS all_python
COPY build_scripts/install-pypy.sh \
build_scripts/pypy.sha256 \
build_scripts/finalize-python.sh \
/build_scripts/
RUN manylinux-entrypoint /build_scripts/install-pypy.sh 3.8 7.3.9
RUN manylinux-entrypoint /build_scripts/install-pypy.sh 3.9 7.3.9
COPY --from=build_cpython38 /opt/_internal /opt/_internal/
COPY --from=build_cpython39 /opt/_internal /opt/_internal/
COPY --from=build_cpython310 /opt/_internal /opt/_internal/
COPY --from=build_cpython311 /opt/_internal /opt/_internal/
COPY --from=build_cpython312 /opt/_internal /opt/_internal/
RUN manylinux-entrypoint /build_scripts/finalize-python.sh


FROM runtime_base
COPY --from=build_git /manylinux-rootfs /
COPY --from=build_cpython /manylinux-rootfs /
COPY --from=all_python /opt/_internal /opt/_internal/
COPY build_scripts/finalize.sh \
build_scripts/python-tag-abi-tag.py \
build_scripts/requirements3.8.txt \
build_scripts/requirements3.9.txt \
build_scripts/requirements3.10.txt \
build_scripts/requirements3.11.txt \
build_scripts/requirements3.12.txt \
build_scripts/requirements-base-tools.txt \
/build_scripts/
COPY build_scripts/requirements-tools/* /build_scripts/requirements-tools/
RUN manylinux-entrypoint /build_scripts/finalize.sh && rm -rf /build_scripts

ENV SSL_CERT_FILE=/opt/_internal/certs.pem

CMD ["/bin/bash"]
FROM onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda12_x64_ubi8_gcc12:20240610.1

#Build manylinux2014 docker image end
ARG PYTHON_VERSION=3.9
Expand Down
Loading

0 comments on commit aeb278a

Please sign in to comment.