From 06997ce4320d656cd133a509c36f6d1a5ade4d07 Mon Sep 17 00:00:00 2001 From: Frank Liu Date: Mon, 18 Apr 2022 23:52:53 -0700 Subject: [PATCH] [pytorch] Upgrade to pytorch 1.11.0 (#1583) * [pytorch] Upgrade PyTorch to 1.11.0 Change-Id: I9ee7d61a1c3fa68df50da4cf9dc8f54f125b6b43 * Building Pytorch for precxx11 aarch64 using v1.11.0 * Fix build.sh typo * Adding support for precxx11/cxx11 aarch64 builds * Adding support for precxx11/cxx11 aarch64 build in github action * [pytorch] Upgrade PyTorch to 1.11.0 Change-Id: I9ee7d61a1c3fa68df50da4cf9dc8f54f125b6b43 Co-authored-by: Harris Dizdarevic --- .github/workflows/native_jni_s3_pytorch.yml | 33 +- .github/workflows/nightly_publish.yml | 4 +- .../djl/pytorch/integration/MkldnnTest.java | 5 + engines/pytorch/pytorch-jni/build.gradle | 12 +- engines/pytorch/pytorch-native/CMakeLists.txt | 4 + engines/pytorch/pytorch-native/build.cmd | 14 +- engines/pytorch/pytorch-native/build.gradle | 18 +- engines/pytorch/pytorch-native/build.sh | 9 +- ...i_djl_pytorch_jni_PyTorchLibrary_system.cc | 28 +- .../src/main/native/djl_pytorch_utils.h | 10 + .../pytorch-native/src/main/patch/cuda.cmake | 511 ++++++++++++++++++ gradle.properties | 2 +- integration/build.gradle | 1 + 13 files changed, 609 insertions(+), 42 deletions(-) create mode 100644 engines/pytorch/pytorch-native/src/main/patch/cuda.cmake diff --git a/.github/workflows/native_jni_s3_pytorch.yml b/.github/workflows/native_jni_s3_pytorch.yml index 9e6b28a5f78..2d306d71759 100644 --- a/.github/workflows/native_jni_s3_pytorch.yml +++ b/.github/workflows/native_jni_s3_pytorch.yml @@ -66,13 +66,14 @@ jobs: - name: Install Environment run: | apt-get update - DEBIAN_FRONTEND=noninteractive apt-get install -y locales cmake curl unzip software-properties-common gcc-aarch64-linux-gnu g++-aarch64-linux-gnu + DEBIAN_FRONTEND=noninteractive apt-get install -y locales curl unzip software-properties-common add-apt-repository -y ppa:deadsnakes/ppa apt-get update apt-get install -y python3 python3-distutils curl -O https://bootstrap.pypa.io/pip/3.6/get-pip.py python3 get-pip.py - pip3 install awscli --upgrade + pip3 install awscli cmake + ln -s /usr/local/bin/cmake /usr/bin/cmake - name: Release JNI prep run: | PYTORCH_VERSION=${{ github.event.inputs.pt_version }} @@ -84,8 +85,6 @@ jobs: ./gradlew :engines:pytorch:pytorch-native:compileJNI -Pcu10 -Ppt_version=$PYTORCH_VERSION ./gradlew :engines:pytorch:pytorch-native:cleanJNI ./gradlew :engines:pytorch:pytorch-native:compileJNI -Pcu11 -Ppt_version=$PYTORCH_VERSION - ./gradlew :engines:pytorch:pytorch-native:cleanJNI - CXX=aarch64-linux-gnu-gcc ./gradlew :engines:pytorch:pytorch-native:compileJNI -Paarch64 -Ppt_version=$PYTORCH_VERSION - name: Configure AWS Credentials uses: aws-actions/configure-aws-credentials@v1 with: @@ -161,33 +160,27 @@ jobs: key: ${{ runner.os }}-gradle-${{ hashFiles('**/*.gradle*') }} restore-keys: | ${{ runner.os }}-gradle- - - name: Install CUDA 10.2 + - name: Install CUDA 11.3 shell: cmd run: | - curl.exe -L http://developer.download.nvidia.com/compute/cuda/10.2/Prod/local_installers/cuda_10.2.89_441.22_windows.exe -o cuda102.exe - curl.exe -L https://developer.download.nvidia.com/compute/redist/cudnn/v7.6.4/cudnn-10.1-windows7-x64-v7.6.4.38.zip -o cudnn.zip - cuda102.exe -s + call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" amd64 + curl.exe -L https://developer.download.nvidia.com/compute/cuda/11.3.1/local_installers/cuda_11.3.1_465.89_win10.exe -o cuda.exe + curl.exe -L https://developer.download.nvidia.com/compute/redist/cudnn/v8.2.1/cudnn-11.3-windows-x64-v8.2.1.32.zip -o cudnn.zip + cuda.exe -s mkdir cuda unzip.exe cudnn.zip - cp.exe -a cuda/include cuda/lib cuda/bin "C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.2/" - rm.exe -Rf cuda102.exe cuda.exe cudnn.zip cuda + cp.exe -a cuda/include cuda/lib cuda/bin "C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v11.3/" + rm.exe -Rf cuda.exe cuda.exe cudnn.zip cuda - name: Release CPU JNI shell: cmd run: | - call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" amd64 + call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" amd64 gradlew :engines:pytorch:pytorch-native:compileJNI -Ppt_version=${{ github.event.inputs.pt_version }} - - name: Release cuda10 JNI - shell: cmd - run: | - call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" amd64 - set "CUDA_PATH=%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v10.2" - set "PATH=%CUDA_PATH%\bin;%CUDA_PATH%\libnvvp;%PATH%" - gradlew :engines:pytorch:pytorch-native:cleanJNI :engines:pytorch:pytorch-native:compileJNI -Pcu10 -Ppt_version=${{ github.event.inputs.pt_version }} - name: Release cuda11 JNI shell: cmd run: | - call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" amd64 - set "CUDA_PATH=%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v10.2" + call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" amd64 + set "CUDA_PATH=%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v11.3" set "PATH=%CUDA_PATH%\bin;%CUDA_PATH%\libnvvp;%PATH%" gradlew :engines:pytorch:pytorch-native:cleanJNI :engines:pytorch:pytorch-native:compileJNI -Pcu11 -Ppt_version=${{ github.event.inputs.pt_version }} - name: Configure AWS Credentials diff --git a/.github/workflows/nightly_publish.yml b/.github/workflows/nightly_publish.yml index c5c4f3ce0e7..a27a5aa1381 100644 --- a/.github/workflows/nightly_publish.yml +++ b/.github/workflows/nightly_publish.yml @@ -108,8 +108,8 @@ jobs: - name: Publish to snapshot repository if: ${{ github.event.inputs.mode == '' || github.event.inputs.mode == 'snapshot' }} run: | - ./gradlew clean engines:pytorch:pytorch-jni:publish -Ppt_version=1.8.1 -Psnapshot ./gradlew clean engines:pytorch:pytorch-jni:publish -Ppt_version=1.9.1 -Psnapshot + ./gradlew clean engines:pytorch:pytorch-jni:publish -Ppt_version=1.10.0 -Psnapshot ./gradlew clean publish -Psnapshot cd bom ./gradlew publish -Psnapshot @@ -121,8 +121,8 @@ jobs: - name: Publish to staging repository if: ${{ github.event.inputs.mode == 'staging' }} run: | - ./gradlew clean engines:pytorch:pytorch-jni:publish -Ppt_version=1.8.1 -P${{ github.event.inputs.mode }} ./gradlew clean engines:pytorch:pytorch-jni:publish -Ppt_version=1.9.1 -P${{ github.event.inputs.mode }} + ./gradlew clean engines:pytorch:pytorch-jni:publish -Ppt_version=1.10.0 -P${{ github.event.inputs.mode }} ./gradlew clean publish -P${{ github.event.inputs.mode }} cd bom ./gradlew publish -P${{ github.event.inputs.mode }} diff --git a/engines/pytorch/pytorch-engine/src/test/java/ai/djl/pytorch/integration/MkldnnTest.java b/engines/pytorch/pytorch-engine/src/test/java/ai/djl/pytorch/integration/MkldnnTest.java index 7d86b3ae150..8ca92ee264a 100644 --- a/engines/pytorch/pytorch-engine/src/test/java/ai/djl/pytorch/integration/MkldnnTest.java +++ b/engines/pytorch/pytorch-engine/src/test/java/ai/djl/pytorch/integration/MkldnnTest.java @@ -17,6 +17,7 @@ import ai.djl.ndarray.NDManager; import ai.djl.ndarray.types.Shape; import java.util.Arrays; +import org.testng.SkipException; import org.testng.annotations.Test; /** The file is for testing PyTorch MKLDNN functionalities. */ @@ -24,6 +25,10 @@ public class MkldnnTest { @Test public void testMkldnn() { + if (!"amd64".equals(System.getProperty("os.arch"))) { + throw new SkipException("MKLDNN Test requires x86_64 arch."); + } + System.setProperty("ai.djl.pytorch.use_mkldnn", "true"); try (NDManager manager = NDManager.newBaseManager()) { NDArray[] arrays = { diff --git a/engines/pytorch/pytorch-jni/build.gradle b/engines/pytorch/pytorch-jni/build.gradle index 2d9380590fe..1ea184810ec 100644 --- a/engines/pytorch/pytorch-jni/build.gradle +++ b/engines/pytorch/pytorch-jni/build.gradle @@ -23,17 +23,23 @@ processResources { "linux-x86_64/cu102/libdjl_torch.so", "linux-x86_64/cpu-precxx11/libdjl_torch.so", "osx-x86_64/cpu/libdjl_torch.dylib", - "win-x86_64/cpu/djl_torch.dll", - "win-x86_64/cu102/djl_torch.dll" + "win-x86_64/cpu/djl_torch.dll" ] - if (ptVersion.startsWith("1.10.")) { + if (ptVersion.startsWith("1.11.")) { + files.add("linux-aarch64/cpu/libdjl_torch.so") files.add("linux-x86_64/cu113/libdjl_torch.so") files.add("linux-x86_64/cu113-precxx11/libdjl_torch.so") files.add("win-x86_64/cu113/djl_torch.dll") + } else if (ptVersion.startsWith("1.10.")) { + files.add("linux-x86_64/cu113/libdjl_torch.so") + files.add("linux-x86_64/cu113-precxx11/libdjl_torch.so") + files.add("win-x86_64/cu113/djl_torch.dll") + files.add("win-x86_64/cu102/djl_torch.dll") } else { files.add("linux-x86_64/cu111/libdjl_torch.so") files.add("linux-x86_64/cu111-precxx11/libdjl_torch.so") files.add("win-x86_64/cu111/djl_torch.dll") + files.add("win-x86_64/cu102/djl_torch.dll") } String jnilibDir = "${project.projectDir}/jnilib/${djl_version}" files.each { entry -> diff --git a/engines/pytorch/pytorch-native/CMakeLists.txt b/engines/pytorch/pytorch-native/CMakeLists.txt index a321a54558e..0db26531233 100644 --- a/engines/pytorch/pytorch-native/CMakeLists.txt +++ b/engines/pytorch/pytorch-native/CMakeLists.txt @@ -48,6 +48,10 @@ set(SOURCE_FILES "src/main/native/ai_djl_pytorch_jni_cache.h" "src/main/native/ai_djl_pytorch_jni_cache.cc") +if(PT_OLD_VERSION) + add_compile_definitions(V1_10_X) +endif() + add_library(djl_torch SHARED ${SOURCE_FILES}) # build host if(NOT BUILD_ANDROID) diff --git a/engines/pytorch/pytorch-native/build.cmd b/engines/pytorch/pytorch-native/build.cmd index ee22e8d6a6b..90d664e928e 100644 --- a/engines/pytorch/pytorch-native/build.cmd +++ b/engines/pytorch/pytorch-native/build.cmd @@ -5,7 +5,7 @@ @rem choco install jdk8 -y set FILEPATH="libtorch" -set VERSION="%1" +set VERSION=%1 if "%2" == "cpu" ( set DOWNLOAD_URL="https://download.pytorch.org/libtorch/cpu/libtorch-win-shared-with-deps-%VERSION%%%2Bcpu.zip" ) else if "%2" == "cu102" ( @@ -28,9 +28,19 @@ if exist %FILEPATH% ( echo Finished downloading libtorch ) +if "%VERSION%" == "1.11.0" ( + copy /y src\main\patch\cuda.cmake libtorch\share\cmake\Caffe2\public\ +) +if "%VERSION%" == "1.10.0" ( + set PT_OLD_VERSION=1 +) +if "%VERSION%" == "1.9.1" ( + set PT_OLD_VERSION=1 +) + if exist build rd /q /s build md build\classes cd build javac -sourcepath ..\..\pytorch-engine\src\main\java\ ..\..\pytorch-engine\src\main\java\ai\djl\pytorch\jni\PyTorchLibrary.java -h include -d classes -cmake -DCMAKE_PREFIX_PATH=libtorch .. +cmake -DCMAKE_PREFIX_PATH=libtorch -DPT_OLD_VERSION=%PT_OLD_VERSION% .. cmake --build . --config Release diff --git a/engines/pytorch/pytorch-native/build.gradle b/engines/pytorch/pytorch-native/build.gradle index 73fbed5f068..f1fdd975e55 100644 --- a/engines/pytorch/pytorch-native/build.gradle +++ b/engines/pytorch/pytorch-native/build.gradle @@ -12,7 +12,7 @@ if (project.hasProperty("pt_version") && project.property("pt_version") != "") { } boolean isRelease = project.hasProperty("release") || project.hasProperty("staging") boolean isPrecxx11 = project.hasProperty("precxx11") -boolean isAarch64 = project.hasProperty("aarch64") +boolean isAarch64 = project.hasProperty("aarch64") || System.properties["os.arch"] == "aarch64" String FLAVOR = "cpu" if (project.hasProperty("cu10")) { @@ -91,14 +91,13 @@ def prepareNativeLib(String binaryRoot, String ver) { "cpu/libtorch-win-shared-with-deps-${ver}%2Bcpu.zip" : "cpu/win-x86_64", "cu102/libtorch-cxx11-abi-shared-with-deps-${ver}%2Bcu102.zip" : "cu102/linux-x86_64", "${cu11}/libtorch-cxx11-abi-shared-with-deps-${ver}%2B${cu11}.zip": "${cu11}/linux-x86_64", - "cu102/libtorch-win-shared-with-deps-${ver}%2Bcu102.zip" : "cu102/win-x86_64", "${cu11}/libtorch-win-shared-with-deps-${ver}%2B${cu11}.zip" : "${cu11}/win-x86_64", "cpu/libtorch-shared-with-deps-${ver}%2Bcpu.zip" : "cpu-precxx11/linux-x86_64", "${cu11}/libtorch-shared-with-deps-${ver}%2B${cu11}.zip" : "${cu11}-precxx11/linux-x86_64" ] def aarch64Files = [ - "${ver}/libtorch-cxx11-shared-with-deps-${ver}-aarch64.zip" : "cpu/linux-aarch64" + "${ver}/libtorch-cxx11-shared-with-deps-${ver}-aarch64.zip": "cpu/linux-aarch64" ] copyNativeLibToOutputDir(files, binaryRoot, officialPytorchUrl) @@ -130,7 +129,7 @@ def copyNativeLibToOutputDir(Map fileStoreMap, String binaryRoot } into("${outputDir}/native/lib") } - if (entry.value.contains("-precxx11")) { + if (entry.value.contains("-precxx11") && !entry.value.contains("aarch64")) { def libstd = new File("${outputDir}/native/lib/libstdc++.so.6") new URL("https://publish.djl.ai/extra/libstdc%2B%2B.so.6").withInputStream { i -> libstd.withOutputStream { it << i } @@ -253,11 +252,10 @@ task uploadS3 { "${BINARY_ROOT}/cpu/win-x86_64/native/lib/", "${BINARY_ROOT}/cpu-precxx11/linux-x86_64/native/lib/", "${BINARY_ROOT}/cu102/linux/native/lib/", - "${BINARY_ROOT}/cu102/win-x86_64/native/lib/", "${BINARY_ROOT}/cu113/linux-x86_64/native/lib/", "${BINARY_ROOT}/cu113/win-x86_64/native/lib/", "${BINARY_ROOT}/cu113-precxx11/linux-x86_64/native/lib/", - "${BINARY_ROOT}/cpu/linux-aarch64/native/lib" + "${BINARY_ROOT}/cpu/linux-aarch64/native/lib/" ] uploadDirs.each { item -> fileTree(item).files.name.each { @@ -315,11 +313,11 @@ flavorNames.each { flavor -> libstd.text = new URL("https://publish.djl.ai/extra/THIRD-PARTY-LICENSES_qHnMKgbdWa.txt").text } } - from ("${BINARY_ROOT}/${flavor}/${osName}/native/lib") { - into ("pytorch/${flavor}/${osName}") + from("${BINARY_ROOT}/${flavor}/${osName}/native/lib") { + into("pytorch/${flavor}/${osName}") } - from ("${BINARY_ROOT}/pytorch.properties") { - into ("native/lib") + from("${BINARY_ROOT}/pytorch.properties") { + into("native/lib") } from "src/main/resources" archiveClassifier = "${osName}" diff --git a/engines/pytorch/pytorch-native/build.sh b/engines/pytorch/pytorch-native/build.sh index af2cfe4e742..419e9ad5380 100755 --- a/engines/pytorch/pytorch-native/build.sh +++ b/engines/pytorch/pytorch-native/build.sh @@ -13,9 +13,11 @@ fi PLATFORM=$(uname | tr '[:upper:]' '[:lower:]') VERSION=$1 FLAVOR=$2 +AARCH64_CXX11ABI="-cxx11" CXX11ABI="-cxx11-abi" if [[ $3 == "precxx11" ]]; then CXX11ABI="" + AARCH64_CXX11ABI="" fi ARCH=$4 @@ -27,7 +29,7 @@ if [[ ! -d "libtorch" ]]; then fi if [[ $ARCH == 'aarch64' ]]; then - curl -s https://djl-ai.s3.amazonaws.com/publish/pytorch/${VERSION}/libtorch-cxx11-shared-with-deps-${VERSION}-aarch64.zip | jar xv + curl -s https://djl-ai.s3.amazonaws.com/publish/pytorch/${VERSION}/libtorch${AARCH64_CXX11ABI}-shared-with-deps-${VERSION}-aarch64.zip | jar xv else curl -s https://download.pytorch.org/libtorch/${FLAVOR}/libtorch${CXX11ABI}-shared-with-deps-${VERSION}%2B${FLAVOR}.zip | jar xv fi @@ -40,13 +42,16 @@ if [[ ! -d "libtorch" ]]; then fi fi +if [[ "$VERSION" =~ ^1\.10\..*|^1\.9\..* ]]; then + PT_OLD_VERSION=1 +fi pushd . rm -rf build mkdir build && cd build mkdir classes javac -sourcepath ../../pytorch-engine/src/main/java/ ../../pytorch-engine/src/main/java/ai/djl/pytorch/jni/PyTorchLibrary.java -h include -d classes -cmake -DCMAKE_PREFIX_PATH=libtorch .. +cmake -DCMAKE_PREFIX_PATH=libtorch -DPT_OLD_VERSION=${PT_OLD_VERSION} .. cmake --build . --config Release -- -j "${NUM_PROC}" if [[ $PLATFORM == 'darwin' ]]; then diff --git a/engines/pytorch/pytorch-native/src/main/native/ai_djl_pytorch_jni_PyTorchLibrary_system.cc b/engines/pytorch/pytorch-native/src/main/native/ai_djl_pytorch_jni_PyTorchLibrary_system.cc index 6b0384a6abd..736eea1fcac 100644 --- a/engines/pytorch/pytorch-native/src/main/native/ai_djl_pytorch_jni_PyTorchLibrary_system.cc +++ b/engines/pytorch/pytorch-native/src/main/native/ai_djl_pytorch_jni_PyTorchLibrary_system.cc @@ -12,7 +12,12 @@ */ #include // clang-format off -#include +#ifdef V1_10_X + #include +#else + #include +#endif +#include // clang-format on #include @@ -162,7 +167,8 @@ inline std::string FormatMemory(int64_t bytes) { return oss.str(); } -// the code snippet is copied from torch/csrc/autograd/profiler.cpp +// the code snippet is copied from torch/csrc/autograd/profiler_legacy.cpp +#ifdef V1_10_X static torch::jit::CodeTemplate event_template(R"( { "name": "${name}", @@ -175,6 +181,20 @@ static torch::jit::CodeTemplate event_template(R"( "cpu mem": "${cpu_mem}", "args": {} })"); +#else +static const at::jit::CodeTemplate event_template(R"( +{ + "name": "${name}", + "ph": "X", + "ts": ${ts}, + "dur": ${dur}, + "tid": ${tid}, + "pid": "CPU Functions", + "shape": ${shape}, + "cpu mem": "${cpu_mem}", + "args": {} +})"); +#endif // The function doesn't support GPU yet // You can refer to @@ -227,7 +247,11 @@ void WriteProfilerEventsToStream(std::ostream& out, const std::vectorsecond; int64_t memory_usage = mem_it->second; +#ifdef V1_10_X torch::jit::TemplateEnv env; +#else + at::jit::TemplateEnv env; +#endif env.s("name", start->name()); env.d("ts", profiler_start->cpuElapsedUs(*start)); env.d("dur", start->cpuElapsedUs(*evt)); diff --git a/engines/pytorch/pytorch-native/src/main/native/djl_pytorch_utils.h b/engines/pytorch/pytorch-native/src/main/native/djl_pytorch_utils.h index 5223e40ddf8..45fd527ffa7 100644 --- a/engines/pytorch/pytorch-native/src/main/native/djl_pytorch_utils.h +++ b/engines/pytorch/pytorch-native/src/main/native/djl_pytorch_utils.h @@ -30,9 +30,15 @@ namespace utils { #if !defined(__ANDROID__) // for image interpolation +#ifdef V1_10_X typedef torch::variant mode_t; +#else +typedef torch::variant + mode_t; +#endif #endif inline jint GetDTypeFromScalarType(const torch::ScalarType& type) { @@ -108,6 +114,10 @@ inline mode_t GetInterpolationMode(jint jmode) { return torch::kTrilinear; case 5: return torch::kArea; + case 6: +#ifndef V1_10_X + return torch::kNearestExact; +#endif default: throw; } diff --git a/engines/pytorch/pytorch-native/src/main/patch/cuda.cmake b/engines/pytorch/pytorch-native/src/main/patch/cuda.cmake new file mode 100644 index 00000000000..88b7158a83b --- /dev/null +++ b/engines/pytorch/pytorch-native/src/main/patch/cuda.cmake @@ -0,0 +1,511 @@ +# copied from libtorch\share\cmake\Caffe2\public\cuda.cmake +# This file is used to workaround pytorch 1.11.0-cu113 build failure on Windows +# ---[ cuda + +# Poor man's include guard +if(TARGET torch::cudart) + return() +endif() + +# sccache is only supported in CMake master and not in the newest official +# release (3.11.3) yet. Hence we need our own Modules_CUDA_fix to enable sccache. +list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_LIST_DIR}/../Modules_CUDA_fix) + +# We don't want to statically link cudart, because we rely on it's dynamic linkage in +# python (follow along torch/cuda/__init__.py and usage of cudaGetErrorName). +# Technically, we can link cudart here statically, and link libtorch_python.so +# to a dynamic libcudart.so, but that's just wasteful. +# However, on Windows, if this one gets switched off, the error "cuda: unknown error" +# will be raised when running the following code: +# >>> import torch +# >>> torch.cuda.is_available() +# >>> torch.cuda.current_device() +# More details can be found in the following links. +# https://github.com/pytorch/pytorch/issues/20635 +# https://github.com/pytorch/pytorch/issues/17108 +if(NOT MSVC) + set(CUDA_USE_STATIC_CUDA_RUNTIME OFF CACHE INTERNAL "") +endif() + +# Find CUDA. +find_package(CUDA) +if(NOT CUDA_FOUND) + message(WARNING + "Caffe2: CUDA cannot be found. Depending on whether you are building " + "Caffe2 or a Caffe2 dependent library, the next warning / error will " + "give you more info.") + set(CAFFE2_USE_CUDA OFF) + return() +endif() + +# Enable CUDA language support +set(CUDAToolkit_ROOT "${CUDA_TOOLKIT_ROOT_DIR}") +set(CMAKE_CUDA_STANDARD ${CMAKE_CXX_STANDARD}) +set(CMAKE_CUDA_STANDARD_REQUIRED ON) + +message(STATUS "Caffe2: CUDA detected: " ${CUDA_VERSION}) +message(STATUS "Caffe2: CUDA nvcc is: " ${CUDA_NVCC_EXECUTABLE}) +message(STATUS "Caffe2: CUDA toolkit directory: " ${CUDA_TOOLKIT_ROOT_DIR}) +if(CUDA_VERSION VERSION_LESS 10.2) + message(FATAL_ERROR "PyTorch requires CUDA 10.2 or above.") +endif() + +if(CUDA_FOUND) + # Sometimes, we may mismatch nvcc with the CUDA headers we are + # compiling with, e.g., if a ccache nvcc is fed to us by CUDA_NVCC_EXECUTABLE + # but the PATH is not consistent with CUDA_HOME. It's better safe + # than sorry: make sure everything is consistent. + if(MSVC AND CMAKE_GENERATOR MATCHES "Visual Studio") + # When using Visual Studio, it attempts to lock the whole binary dir when + # `try_run` is called, which will cause the build to fail. + string(RANDOM BUILD_SUFFIX) + set(PROJECT_RANDOM_BINARY_DIR "${PROJECT_BINARY_DIR}/${BUILD_SUFFIX}") + else() + set(PROJECT_RANDOM_BINARY_DIR "${PROJECT_BINARY_DIR}") + endif() + set(file "${PROJECT_BINARY_DIR}/detect_cuda_version.cc") + file(WRITE ${file} "" + "#include \n" + "#include \n" + "int main() {\n" + " printf(\"%d.%d\", CUDA_VERSION / 1000, (CUDA_VERSION / 10) % 100);\n" + " return 0;\n" + "}\n" + ) + if(NOT CMAKE_CROSSCOMPILING) + try_run(run_result compile_result ${PROJECT_RANDOM_BINARY_DIR} ${file} + CMAKE_FLAGS "-DINCLUDE_DIRECTORIES=${CUDA_INCLUDE_DIRS}" + LINK_LIBRARIES ${CUDA_LIBRARIES} + RUN_OUTPUT_VARIABLE cuda_version_from_header + COMPILE_OUTPUT_VARIABLE output_var + ) + if(NOT compile_result) + message(FATAL_ERROR "Caffe2: Couldn't determine version from header: " ${output_var}) + endif() + message(STATUS "Caffe2: Header version is: " ${cuda_version_from_header}) + if(NOT cuda_version_from_header STREQUAL ${CUDA_VERSION_STRING}) + # Force CUDA to be processed for again next time + # TODO: I'm not sure if this counts as an implementation detail of + # FindCUDA + set(${cuda_version_from_findcuda} ${CUDA_VERSION_STRING}) + unset(CUDA_TOOLKIT_ROOT_DIR_INTERNAL CACHE) + # Not strictly necessary, but for good luck. + unset(CUDA_VERSION CACHE) + # Error out + message(FATAL_ERROR "FindCUDA says CUDA version is ${cuda_version_from_findcuda} (usually determined by nvcc), " + "but the CUDA headers say the version is ${cuda_version_from_header}. This often occurs " + "when you set both CUDA_HOME and CUDA_NVCC_EXECUTABLE to " + "non-standard locations, without also setting PATH to point to the correct nvcc. " + "Perhaps, try re-running this command again with PATH=${CUDA_TOOLKIT_ROOT_DIR}/bin:$PATH. " + "See above log messages for more diagnostics, and see https://github.com/pytorch/pytorch/issues/8092 for more details.") + endif() + endif() +endif() + +# Find cuDNN. +if(USE_STATIC_CUDNN) + set(CUDNN_STATIC ON CACHE BOOL "") +else() + set(CUDNN_STATIC OFF CACHE BOOL "") +endif() + +find_package(CUDNN) + +if(CAFFE2_USE_CUDNN AND NOT CUDNN_FOUND) + message(WARNING + "Caffe2: Cannot find cuDNN library. Turning the option off") + set(CAFFE2_USE_CUDNN OFF) +endif() + +# Optionally, find TensorRT +if(CAFFE2_USE_TENSORRT) + find_path(TENSORRT_INCLUDE_DIR NvInfer.h + HINTS ${TENSORRT_ROOT} ${CUDA_TOOLKIT_ROOT_DIR} + PATH_SUFFIXES include) + find_library(TENSORRT_LIBRARY nvinfer + HINTS ${TENSORRT_ROOT} ${CUDA_TOOLKIT_ROOT_DIR} + PATH_SUFFIXES lib lib64 lib/x64) + find_package_handle_standard_args( + TENSORRT DEFAULT_MSG TENSORRT_INCLUDE_DIR TENSORRT_LIBRARY) + if(TENSORRT_FOUND) + execute_process(COMMAND /bin/sh -c "[ -r \"${TENSORRT_INCLUDE_DIR}/NvInferVersion.h\" ] && awk '/^\#define NV_TENSORRT_MAJOR/ {print $3}' \"${TENSORRT_INCLUDE_DIR}/NvInferVersion.h\"" OUTPUT_VARIABLE TENSORRT_VERSION_MAJOR) + execute_process(COMMAND /bin/sh -c "[ -r \"${TENSORRT_INCLUDE_DIR}/NvInferVersion.h\" ] && awk '/^\#define NV_TENSORRT_MINOR/ {print $3}' \"${TENSORRT_INCLUDE_DIR}/NvInferVersion.h\"" OUTPUT_VARIABLE TENSORRT_VERSION_MINOR) + if(TENSORRT_VERSION_MAJOR) + string(STRIP ${TENSORRT_VERSION_MAJOR} TENSORRT_VERSION_MAJOR) + string(STRIP ${TENSORRT_VERSION_MINOR} TENSORRT_VERSION_MINOR) + set(TENSORRT_VERSION "${TENSORRT_VERSION_MAJOR}.${TENSORRT_VERSION_MINOR}") + #CAFFE2_USE_TRT is set in Dependencies + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DTENSORRT_VERSION_MAJOR=${TENSORRT_VERSION_MAJOR}") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DTENSORRT_VERSION_MINOR=${TENSORRT_VERSION_MINOR}") + else() + message(WARNING "Caffe2: Cannot find ${TENSORRT_INCLUDE_DIR}/NvInferVersion.h. Assuming TRT 5.0 which is no longer supported. Turning the option off.") + set(CAFFE2_USE_TENSORRT OFF) + endif() + else() + message(WARNING + "Caffe2: Cannot find TensorRT library. Turning the option off.") + set(CAFFE2_USE_TENSORRT OFF) + endif() +endif() + +# ---[ Extract versions +if(CAFFE2_USE_CUDNN) + # Get cuDNN version + if(EXISTS ${CUDNN_INCLUDE_PATH}/cudnn_version.h) + file(READ ${CUDNN_INCLUDE_PATH}/cudnn_version.h CUDNN_HEADER_CONTENTS) + else() + file(READ ${CUDNN_INCLUDE_PATH}/cudnn.h CUDNN_HEADER_CONTENTS) + endif() + string(REGEX MATCH "define CUDNN_MAJOR * +([0-9]+)" + CUDNN_VERSION_MAJOR "${CUDNN_HEADER_CONTENTS}") + string(REGEX REPLACE "define CUDNN_MAJOR * +([0-9]+)" "\\1" + CUDNN_VERSION_MAJOR "${CUDNN_VERSION_MAJOR}") + string(REGEX MATCH "define CUDNN_MINOR * +([0-9]+)" + CUDNN_VERSION_MINOR "${CUDNN_HEADER_CONTENTS}") + string(REGEX REPLACE "define CUDNN_MINOR * +([0-9]+)" "\\1" + CUDNN_VERSION_MINOR "${CUDNN_VERSION_MINOR}") + string(REGEX MATCH "define CUDNN_PATCHLEVEL * +([0-9]+)" + CUDNN_VERSION_PATCH "${CUDNN_HEADER_CONTENTS}") + string(REGEX REPLACE "define CUDNN_PATCHLEVEL * +([0-9]+)" "\\1" + CUDNN_VERSION_PATCH "${CUDNN_VERSION_PATCH}") + # Assemble cuDNN version + if(NOT CUDNN_VERSION_MAJOR) + set(CUDNN_VERSION "?") + else() + set(CUDNN_VERSION + "${CUDNN_VERSION_MAJOR}.${CUDNN_VERSION_MINOR}.${CUDNN_VERSION_PATCH}") + endif() + message(STATUS "Found cuDNN: v${CUDNN_VERSION} (include: ${CUDNN_INCLUDE_PATH}, library: ${CUDNN_LIBRARY_PATH})") + if(CUDNN_VERSION VERSION_LESS "7.0.0") + message(FATAL_ERROR "PyTorch requires cuDNN 7 and above.") + endif() +endif() + +# ---[ CUDA libraries wrapper + +# find libcuda.so and lbnvrtc.so +# For libcuda.so, we will find it under lib, lib64, and then the +# stubs folder, in case we are building on a system that does not +# have cuda driver installed. On windows, we also search under the +# folder lib/x64. +find_library(CUDA_CUDA_LIB cuda + PATHS ${CUDA_TOOLKIT_ROOT_DIR} + PATH_SUFFIXES lib lib64 lib/stubs lib64/stubs lib/x64) +find_library(CUDA_NVRTC_LIB nvrtc + PATHS ${CUDA_TOOLKIT_ROOT_DIR} + PATH_SUFFIXES lib lib64 lib/x64) +if(CUDA_NVRTC_LIB AND NOT CUDA_NVRTC_SHORTHASH) + if("${PYTHON_EXECUTABLE}" STREQUAL "") + set(_python_exe "python") + else() + set(_python_exe "${PYTHON_EXECUTABLE}") + endif() + execute_process( + COMMAND "${_python_exe}" -c + "import hashlib;hash=hashlib.sha256();hash.update(open('${CUDA_NVRTC_LIB}','rb').read());print(hash.hexdigest()[:8])" + RESULT_VARIABLE _retval + OUTPUT_VARIABLE CUDA_NVRTC_SHORTHASH) + if(NOT _retval EQUAL 0) + message(WARNING "Failed to compute shorthash for libnvrtc.so") + set(CUDA_NVRTC_SHORTHASH "XXXXXXXX") + else() + string(STRIP "${CUDA_NVRTC_SHORTHASH}" CUDA_NVRTC_SHORTHASH) + message(STATUS "${CUDA_NVRTC_LIB} shorthash is ${CUDA_NVRTC_SHORTHASH}") + endif() +endif() + +# Create new style imported libraries. +# Several of these libraries have a hardcoded path if CAFFE2_STATIC_LINK_CUDA +# is set. This path is where sane CUDA installations have their static +# libraries installed. This flag should only be used for binary builds, so +# end-users should never have this flag set. + +# cuda +add_library(caffe2::cuda UNKNOWN IMPORTED) +set_property( + TARGET caffe2::cuda PROPERTY IMPORTED_LOCATION + ${CUDA_CUDA_LIB}) +set_property( + TARGET caffe2::cuda PROPERTY INTERFACE_INCLUDE_DIRECTORIES + ${CUDA_INCLUDE_DIRS}) + +# cudart. CUDA_LIBRARIES is actually a list, so we will make an interface +# library. +add_library(torch::cudart INTERFACE IMPORTED) +if(CAFFE2_STATIC_LINK_CUDA) + set_property( + TARGET torch::cudart PROPERTY INTERFACE_LINK_LIBRARIES + "${CUDA_cudart_static_LIBRARY}") + if(NOT WIN32) + set_property( + TARGET torch::cudart APPEND PROPERTY INTERFACE_LINK_LIBRARIES + rt dl) + endif() +else() + set_property( + TARGET torch::cudart PROPERTY INTERFACE_LINK_LIBRARIES + ${CUDA_LIBRARIES}) +endif() +set_property( + TARGET torch::cudart PROPERTY INTERFACE_INCLUDE_DIRECTORIES + ${CUDA_INCLUDE_DIRS}) + +# nvToolsExt +add_library(torch::nvtoolsext INTERFACE IMPORTED) +if(MSVC) + if(NOT NVTOOLEXT_HOME) + set(NVTOOLEXT_HOME "C:/Program Files/NVIDIA Corporation/NvToolsExt") + endif() + if(DEFINED ENV{NVTOOLSEXT_PATH}) + set(NVTOOLEXT_HOME $ENV{NVTOOLSEXT_PATH}) + file(TO_CMAKE_PATH ${NVTOOLEXT_HOME} NVTOOLEXT_HOME) + endif() + set_target_properties( + torch::nvtoolsext PROPERTIES + INTERFACE_LINK_LIBRARIES ${NVTOOLEXT_HOME}/lib/x64/nvToolsExt64_1.lib + INTERFACE_INCLUDE_DIRECTORIES ${NVTOOLEXT_HOME}/include) + +elseif(APPLE) + set_property( + TARGET torch::nvtoolsext PROPERTY INTERFACE_LINK_LIBRARIES + ${CUDA_TOOLKIT_ROOT_DIR}/lib/libnvrtc.dylib + ${CUDA_TOOLKIT_ROOT_DIR}/lib/libnvToolsExt.dylib) + +else() + find_library(LIBNVTOOLSEXT libnvToolsExt.so PATHS ${CUDA_TOOLKIT_ROOT_DIR}/lib64/) + set_property( + TARGET torch::nvtoolsext PROPERTY INTERFACE_LINK_LIBRARIES + ${LIBNVTOOLSEXT}) +endif() + +# cublas. CUDA_CUBLAS_LIBRARIES is actually a list, so we will make an +# interface library similar to cudart. +add_library(caffe2::cublas INTERFACE IMPORTED) +if(CAFFE2_STATIC_LINK_CUDA AND NOT WIN32) + set_property( + TARGET caffe2::cublas PROPERTY INTERFACE_LINK_LIBRARIES + "${CUDA_TOOLKIT_ROOT_DIR}/lib64/libcublas_static.a") + set_property( + TARGET caffe2::cublas APPEND PROPERTY INTERFACE_LINK_LIBRARIES + "${CUDA_TOOLKIT_ROOT_DIR}/lib64/libcublasLt_static.a") + # Add explicit dependency to cudart_static to fix + # libcublasLt_static.a.o): undefined reference to symbol 'cudaStreamWaitEvent' + # error adding symbols: DSO missing from command line + set_property( + TARGET caffe2::cublas APPEND PROPERTY INTERFACE_LINK_LIBRARIES + "${CUDA_cudart_static_LIBRARY}" rt dl) +else() + set_property( + TARGET caffe2::cublas PROPERTY INTERFACE_LINK_LIBRARIES + ${CUDA_CUBLAS_LIBRARIES}) +endif() +set_property( + TARGET caffe2::cublas PROPERTY INTERFACE_INCLUDE_DIRECTORIES + ${CUDA_INCLUDE_DIRS}) + +# cudnn public and private interfaces +# static linking is handled by USE_STATIC_CUDNN environment variable +# If library is linked dynamically, than private interface is no-op +# If library is linked statically: +# - public interface would only reference headers +# - private interface will contain the actual link instructions +if(CAFFE2_USE_CUDNN) + add_library(caffe2::cudnn-public INTERFACE IMPORTED) + set_property( + TARGET caffe2::cudnn-public PROPERTY INTERFACE_INCLUDE_DIRECTORIES + ${CUDNN_INCLUDE_PATH}) + add_library(caffe2::cudnn-private INTERFACE IMPORTED) + set_property( + TARGET caffe2::cudnn-private PROPERTY INTERFACE_INCLUDE_DIRECTORIES + ${CUDNN_INCLUDE_PATH}) + if(CUDNN_STATIC AND NOT WIN32) + if(USE_WHOLE_CUDNN) + set_property( + TARGET caffe2::cudnn-private PROPERTY INTERFACE_LINK_LIBRARIES + "-Wl,--whole-archive,\"${CUDNN_LIBRARY_PATH}\" -Wl,--no-whole-archive") + else() + set_property( + TARGET caffe2::cudnn-private PROPERTY INTERFACE_LINK_LIBRARIES + ${CUDNN_LIBRARY_PATH}) + endif() + set_property( + TARGET caffe2::cudnn-private APPEND PROPERTY INTERFACE_LINK_LIBRARIES + "${CUDA_TOOLKIT_ROOT_DIR}/lib64/libculibos.a" dl) + # Add explicit dependency on cublas to cudnn + get_target_property(__tmp caffe2::cublas INTERFACE_LINK_LIBRARIES) + set_property( + TARGET caffe2::cudnn-private APPEND PROPERTY INTERFACE_LINK_LIBRARIES + "${__tmp}") + # Lines below use target_link_libraries because we support cmake 3.5+. + # For cmake 3.13+, target_link_options to set INTERFACE_LINK_OPTIONS would be better. + # https://cmake.org/cmake/help/v3.5/command/target_link_libraries.html warns + # "Item names starting with -, but not -l or -framework, are treated as linker flags. + # Note that such flags will be treated like any other library link item for purposes + # of transitive dependencies, so they are generally safe to specify only as private + # link items that will not propagate to dependents." + # Propagating to a dependent (torch_cuda) is exactly what we want here, so we are + # flouting the warning, but I can't think of a better (3.5+ compatible) way. + target_link_libraries(caffe2::cudnn-private INTERFACE + "-Wl,--exclude-libs,libcudnn_static.a") + else() + set_property( + TARGET caffe2::cudnn-public PROPERTY INTERFACE_LINK_LIBRARIES + ${CUDNN_LIBRARY_PATH}) + endif() +endif() + +# curand +add_library(caffe2::curand UNKNOWN IMPORTED) +if(CAFFE2_STATIC_LINK_CUDA AND NOT WIN32) + set_property( + TARGET caffe2::curand PROPERTY IMPORTED_LOCATION + "${CUDA_TOOLKIT_ROOT_DIR}/lib64/libcurand_static.a") + set_property( + TARGET caffe2::curand PROPERTY INTERFACE_LINK_LIBRARIES + "${CUDA_TOOLKIT_ROOT_DIR}/lib64/libculibos.a" dl) +else() + set_property( + TARGET caffe2::curand PROPERTY IMPORTED_LOCATION + ${CUDA_curand_LIBRARY}) +endif() +set_property( + TARGET caffe2::curand PROPERTY INTERFACE_INCLUDE_DIRECTORIES + ${CUDA_INCLUDE_DIRS}) + +# cufft. CUDA_CUFFT_LIBRARIES is actually a list, so we will make an +# interface library similar to cudart. +add_library(caffe2::cufft INTERFACE IMPORTED) +if(CAFFE2_STATIC_LINK_CUDA AND NOT WIN32) + set_property( + TARGET caffe2::cufft PROPERTY INTERFACE_LINK_LIBRARIES + "${CUDA_TOOLKIT_ROOT_DIR}/lib64/libcufft_static_nocallback.a" + "${CUDA_TOOLKIT_ROOT_DIR}/lib64/libculibos.a" dl) +else() + set_property( + TARGET caffe2::cufft PROPERTY INTERFACE_LINK_LIBRARIES + ${CUDA_CUFFT_LIBRARIES}) +endif() +set_property( + TARGET caffe2::cufft PROPERTY INTERFACE_INCLUDE_DIRECTORIES + ${CUDA_INCLUDE_DIRS}) + +# TensorRT +if(CAFFE2_USE_TENSORRT) + add_library(caffe2::tensorrt UNKNOWN IMPORTED) + set_property( + TARGET caffe2::tensorrt PROPERTY IMPORTED_LOCATION + ${TENSORRT_LIBRARY}) + set_property( + TARGET caffe2::tensorrt PROPERTY INTERFACE_INCLUDE_DIRECTORIES + ${TENSORRT_INCLUDE_DIR}) +endif() + +# nvrtc +add_library(caffe2::nvrtc UNKNOWN IMPORTED) +set_property( + TARGET caffe2::nvrtc PROPERTY IMPORTED_LOCATION + ${CUDA_NVRTC_LIB}) +set_property( + TARGET caffe2::nvrtc PROPERTY INTERFACE_INCLUDE_DIRECTORIES + ${CUDA_INCLUDE_DIRS}) + +# Note: in theory, we can add similar dependent library wrappers. For +# now, Caffe2 only uses the above libraries, so we will only wrap +# these. + +# Special care for windows platform: we know that 32-bit windows does not +# support cuda. +if(${CMAKE_SYSTEM_NAME} STREQUAL "Windows") + if(NOT (CMAKE_SIZEOF_VOID_P EQUAL 8)) + message(FATAL_ERROR + "CUDA support not available with 32-bit windows. Did you " + "forget to set Win64 in the generator target?") + return() + endif() +endif() + +# Add onnx namepsace definition to nvcc +if(ONNX_NAMESPACE) + list(APPEND CUDA_NVCC_FLAGS "-DONNX_NAMESPACE=${ONNX_NAMESPACE}") +else() + list(APPEND CUDA_NVCC_FLAGS "-DONNX_NAMESPACE=onnx_c2") +endif() + +# Don't activate VC env again for Ninja generators with MSVC on Windows if CUDAHOSTCXX is not defined +# by adding --use-local-env. +if(MSVC AND CMAKE_GENERATOR STREQUAL "Ninja" AND NOT DEFINED ENV{CUDAHOSTCXX}) + list(APPEND CUDA_NVCC_FLAGS "--use-local-env") +endif() + +# setting nvcc arch flags +torch_cuda_get_nvcc_gencode_flag(NVCC_FLAGS_EXTRA) +# CMake 3.18 adds integrated support for architecture selection, but we can't rely on it +set(CMAKE_CUDA_ARCHITECTURES OFF) +list(APPEND CUDA_NVCC_FLAGS ${NVCC_FLAGS_EXTRA}) +message(STATUS "Added CUDA NVCC flags for: ${NVCC_FLAGS_EXTRA}") + +# disable some nvcc diagnostic that appears in boost, glog, glags, opencv, etc. +foreach(diag cc_clobber_ignored integer_sign_change useless_using_declaration + set_but_not_used field_without_dll_interface + base_class_has_different_dll_interface + dll_interface_conflict_none_assumed + dll_interface_conflict_dllexport_assumed + implicit_return_from_non_void_function + unsigned_compare_with_zero + declared_but_not_referenced + bad_friend_decl) + list(APPEND SUPPRESS_WARNING_FLAGS --diag_suppress=${diag}) +endforeach() +string(REPLACE ";" "," SUPPRESS_WARNING_FLAGS "${SUPPRESS_WARNING_FLAGS}") +list(APPEND CUDA_NVCC_FLAGS -Xcudafe ${SUPPRESS_WARNING_FLAGS}) + +set(CUDA_PROPAGATE_HOST_FLAGS_BLOCKLIST "-Werror") +if(MSVC) + list(APPEND CUDA_NVCC_FLAGS "--Werror" "cross-execution-space-call") + list(APPEND CUDA_NVCC_FLAGS "--no-host-device-move-forward") +endif() + +# OpenMP flags for NVCC with Clang-cl +if("${CMAKE_CXX_SIMULATE_ID}" STREQUAL "MSVC" + AND "${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang") + list(APPEND CUDA_PROPAGATE_HOST_FLAGS_BLOCKLIST "-Xclang" "-fopenmp") + if(MSVC_TOOLSET_VERSION LESS 142) + list(APPEND CUDA_NVCC_FLAGS "-Xcompiler" "-openmp") + else() + list(APPEND CUDA_NVCC_FLAGS "-Xcompiler" "-openmp:experimental") + endif() +endif() + +# Debug and Release symbol support +if(MSVC) + if(${CAFFE2_USE_MSVC_STATIC_RUNTIME}) + string(APPEND CMAKE_CUDA_FLAGS_DEBUG " -Xcompiler /MTd") + string(APPEND CMAKE_CUDA_FLAGS_MINSIZEREL " -Xcompiler /MT") + string(APPEND CMAKE_CUDA_FLAGS_RELEASE " -Xcompiler /MT") + string(APPEND CMAKE_CUDA_FLAGS_RELWITHDEBINFO " -Xcompiler /MT") + else() + string(APPEND CMAKE_CUDA_FLAGS_DEBUG " -Xcompiler /MDd") + string(APPEND CMAKE_CUDA_FLAGS_MINSIZEREL " -Xcompiler /MD") + string(APPEND CMAKE_CUDA_FLAGS_RELEASE " -Xcompiler /MD") + string(APPEND CMAKE_CUDA_FLAGS_RELWITHDEBINFO " -Xcompiler /MD") + endif() + if(CUDA_NVCC_FLAGS MATCHES "Zi") + list(APPEND CUDA_NVCC_FLAGS "-Xcompiler" "-FS") + endif() +elseif(CUDA_DEVICE_DEBUG) + list(APPEND CUDA_NVCC_FLAGS "-g" "-G") # -G enables device code debugging symbols +endif() + +# Set expt-relaxed-constexpr to suppress Eigen warnings +list(APPEND CUDA_NVCC_FLAGS "--expt-relaxed-constexpr") + +# Set expt-extended-lambda to support lambda on device +list(APPEND CUDA_NVCC_FLAGS "--expt-extended-lambda") + +foreach(FLAG ${CUDA_NVCC_FLAGS}) + string(FIND "${FLAG}" " " flag_space_position) + if(NOT flag_space_position EQUAL -1) + message(FATAL_ERROR "Found spaces in CUDA_NVCC_FLAGS entry '${FLAG}'") + endif() + string(APPEND CMAKE_CUDA_FLAGS " ${FLAG}") +endforeach() diff --git a/gradle.properties b/gradle.properties index a2efedbbe74..23f45258eed 100644 --- a/gradle.properties +++ b/gradle.properties @@ -9,7 +9,7 @@ systemProp.org.gradle.internal.publish.checksums.insecure=true djl_version=0.17.0 mxnet_version=1.9.0 -pytorch_version=1.10.2 +pytorch_version=1.11.0 tensorflow_version=2.7.0 tflite_version=2.6.2 dlr_version=1.6.0 diff --git a/integration/build.gradle b/integration/build.gradle index 984f121b0ec..f4dbdaa3ab2 100644 --- a/integration/build.gradle +++ b/integration/build.gradle @@ -15,6 +15,7 @@ dependencies { runtimeOnly project(":engines:mxnet:mxnet-model-zoo") runtimeOnly project(":engines:pytorch:pytorch-model-zoo") + runtimeOnly project(":engines:pytorch:pytorch-jni") runtimeOnly project(":engines:tensorflow:tensorflow-model-zoo") runtimeOnly project(":engines:ml:xgboost")