diff --git a/.github/workflows/native_jni_s3_pytorch.yml b/.github/workflows/native_jni_s3_pytorch.yml
index 7faef36252a..2d306d71759 100644
--- a/.github/workflows/native_jni_s3_pytorch.yml
+++ b/.github/workflows/native_jni_s3_pytorch.yml
@@ -66,13 +66,14 @@ jobs:
       - name: Install Environment
         run: |
           apt-get update
-          DEBIAN_FRONTEND=noninteractive apt-get install -y locales cmake curl unzip software-properties-common gcc-aarch64-linux-gnu g++-aarch64-linux-gnu
+          DEBIAN_FRONTEND=noninteractive apt-get install -y locales curl unzip software-properties-common
           add-apt-repository -y ppa:deadsnakes/ppa
           apt-get update
           apt-get install -y python3 python3-distutils
           curl -O https://bootstrap.pypa.io/pip/3.6/get-pip.py
           python3 get-pip.py
-          pip3 install awscli --upgrade
+          pip3 install awscli cmake
+          ln -s /usr/local/bin/cmake /usr/bin/cmake
       - name: Release JNI prep
         run: |
           PYTORCH_VERSION=${{ github.event.inputs.pt_version }}
@@ -84,8 +85,6 @@ jobs:
           ./gradlew :engines:pytorch:pytorch-native:compileJNI -Pcu10 -Ppt_version=$PYTORCH_VERSION
           ./gradlew :engines:pytorch:pytorch-native:cleanJNI
           ./gradlew :engines:pytorch:pytorch-native:compileJNI -Pcu11 -Ppt_version=$PYTORCH_VERSION
-          ./gradlew :engines:pytorch:pytorch-native:cleanJNI
-          CXX=aarch64-linux-gnu-gcc ./gradlew :engines:pytorch:pytorch-native:compileJNI -Paarch64 -Ppt_version=$PYTORCH_VERSION
       - name: Configure AWS Credentials
         uses: aws-actions/configure-aws-credentials@v1
         with:
@@ -109,7 +108,6 @@ jobs:
           yum -y update
           yum -y install centos-release-scl-rh epel-release
           yum -y install devtoolset-7 rh-git218 patch cmake3
-          yum -y install gcc-aarch64-linux-gnu g++-aarch64-linux-gnu
           ln -s /usr/bin/cmake3 /usr/bin/cmake
           pip3 install awscli --upgrade
       - uses: actions/checkout@v2
@@ -134,8 +132,6 @@ jobs:
           ./gradlew -Pjni -Ppt_version=$PYTORCH_VERSION :integration:test "-Dai.djl.default_engine=PyTorch"
           ./gradlew :engines:pytorch:pytorch-native:cleanJNI
           ./gradlew :engines:pytorch:pytorch-native:compileJNI -Pcu11 -Pprecxx11 -Ppt_version=$PYTORCH_VERSION
-          ./gradlew :engines:pytorch:pytorch-native:cleanJNI
-          CXX=aarch64-linux-gnu-gcc ./gradlew :engines:pytorch:pytorch-native:compileJNI -Pprecxx11 -Paarch64 -Ppt_version=$PYTORCH_VERSION
       - name: Configure AWS Credentials
         uses: aws-actions/configure-aws-credentials@v1
         with:
@@ -164,33 +160,27 @@ jobs:
           key: ${{ runner.os }}-gradle-${{ hashFiles('**/*.gradle*') }}
           restore-keys: |
             ${{ runner.os }}-gradle-
-      - name: Install CUDA 10.2
+      - name: Install CUDA 11.3
         shell: cmd
         run: |
-          curl.exe -L http://developer.download.nvidia.com/compute/cuda/10.2/Prod/local_installers/cuda_10.2.89_441.22_windows.exe -o cuda102.exe
-          curl.exe -L https://developer.download.nvidia.com/compute/redist/cudnn/v7.6.4/cudnn-10.1-windows7-x64-v7.6.4.38.zip -o cudnn.zip
-          cuda102.exe -s
+          call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" amd64
+          curl.exe -L https://developer.download.nvidia.com/compute/cuda/11.3.1/local_installers/cuda_11.3.1_465.89_win10.exe -o cuda.exe
+          curl.exe -L https://developer.download.nvidia.com/compute/redist/cudnn/v8.2.1/cudnn-11.3-windows-x64-v8.2.1.32.zip -o cudnn.zip
+          cuda.exe -s
           mkdir cuda
           unzip.exe cudnn.zip
-          cp.exe -a cuda/include cuda/lib cuda/bin "C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.2/"
-          rm.exe -Rf cuda102.exe cuda.exe cudnn.zip cuda
+          cp.exe -a cuda/include cuda/lib cuda/bin "C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v11.3/"
+          rm.exe -Rf cuda.exe cuda.exe cudnn.zip cuda
       - name: Release CPU JNI
         shell: cmd
         run: |
-          call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" amd64
+          call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" amd64
           gradlew :engines:pytorch:pytorch-native:compileJNI -Ppt_version=${{ github.event.inputs.pt_version }}
-      - name: Release cuda10 JNI
-        shell: cmd
-        run: |
-          call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" amd64
-          set "CUDA_PATH=%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v10.2"
-          set "PATH=%CUDA_PATH%\bin;%CUDA_PATH%\libnvvp;%PATH%"
-          gradlew :engines:pytorch:pytorch-native:cleanJNI :engines:pytorch:pytorch-native:compileJNI -Pcu10 -Ppt_version=${{ github.event.inputs.pt_version }}
       - name: Release cuda11 JNI
         shell: cmd
         run: |
-          call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" amd64
-          set "CUDA_PATH=%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v10.2"
+          call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" amd64
+          set "CUDA_PATH=%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v11.3"
           set "PATH=%CUDA_PATH%\bin;%CUDA_PATH%\libnvvp;%PATH%"
           gradlew :engines:pytorch:pytorch-native:cleanJNI :engines:pytorch:pytorch-native:compileJNI -Pcu11 -Ppt_version=${{ github.event.inputs.pt_version }}
       - name: Configure AWS Credentials
diff --git a/.github/workflows/nightly_publish.yml b/.github/workflows/nightly_publish.yml
index c5c4f3ce0e7..a27a5aa1381 100644
--- a/.github/workflows/nightly_publish.yml
+++ b/.github/workflows/nightly_publish.yml
@@ -108,8 +108,8 @@ jobs:
       - name: Publish to snapshot repository
         if: ${{ github.event.inputs.mode == '' || github.event.inputs.mode == 'snapshot' }}
         run: |
-          ./gradlew clean engines:pytorch:pytorch-jni:publish -Ppt_version=1.8.1 -Psnapshot
           ./gradlew clean engines:pytorch:pytorch-jni:publish -Ppt_version=1.9.1 -Psnapshot
+          ./gradlew clean engines:pytorch:pytorch-jni:publish -Ppt_version=1.10.0 -Psnapshot
           ./gradlew clean publish -Psnapshot
           cd bom
           ./gradlew publish -Psnapshot
@@ -121,8 +121,8 @@ jobs:
       - name: Publish to staging repository
         if: ${{ github.event.inputs.mode == 'staging' }}
         run: |
-          ./gradlew clean engines:pytorch:pytorch-jni:publish -Ppt_version=1.8.1 -P${{ github.event.inputs.mode }}
           ./gradlew clean engines:pytorch:pytorch-jni:publish -Ppt_version=1.9.1 -P${{ github.event.inputs.mode }}
+          ./gradlew clean engines:pytorch:pytorch-jni:publish -Ppt_version=1.10.0 -P${{ github.event.inputs.mode }}
           ./gradlew clean publish -P${{ github.event.inputs.mode }}
           cd bom
           ./gradlew publish -P${{ github.event.inputs.mode }}
diff --git a/engines/pytorch/pytorch-engine/src/test/java/ai/djl/pytorch/integration/MkldnnTest.java b/engines/pytorch/pytorch-engine/src/test/java/ai/djl/pytorch/integration/MkldnnTest.java
index 7d86b3ae150..8607b724110 100644
--- a/engines/pytorch/pytorch-engine/src/test/java/ai/djl/pytorch/integration/MkldnnTest.java
+++ b/engines/pytorch/pytorch-engine/src/test/java/ai/djl/pytorch/integration/MkldnnTest.java
@@ -17,6 +17,8 @@
 import ai.djl.ndarray.NDManager;
 import ai.djl.ndarray.types.Shape;
 import java.util.Arrays;
+
+import org.testng.SkipException;
 import org.testng.annotations.Test;
 
 /** The file is for testing PyTorch MKLDNN functionalities. */
@@ -24,6 +26,10 @@ public class MkldnnTest {
 
     @Test
     public void testMkldnn() {
+        if (!"amd64".equals(System.getProperty("os.arch"))) {
+            throw new SkipException("MKLDNN Test requires x86_64 arch.");
+        }
+
         System.setProperty("ai.djl.pytorch.use_mkldnn", "true");
         try (NDManager manager = NDManager.newBaseManager()) {
             NDArray[] arrays = {
diff --git a/engines/pytorch/pytorch-jni/build.gradle b/engines/pytorch/pytorch-jni/build.gradle
index 2d9380590fe..039c786b51f 100644
--- a/engines/pytorch/pytorch-jni/build.gradle
+++ b/engines/pytorch/pytorch-jni/build.gradle
@@ -26,7 +26,12 @@ processResources {
                 "win-x86_64/cpu/djl_torch.dll",
                 "win-x86_64/cu102/djl_torch.dll"
         ]
-        if (ptVersion.startsWith("1.10.")) {
+        if (ptVersion.startsWith("1.11.")) {
+            files.add("linux-aarch64/cpu/libdjl_torch.so")
+            files.add("linux-x86_64/cu113/libdjl_torch.so")
+            files.add("linux-x86_64/cu113-precxx11/libdjl_torch.so")
+            files.add("win-x86_64/cu113/djl_torch.dll")
+        } else if (ptVersion.startsWith("1.10.")) {
             files.add("linux-x86_64/cu113/libdjl_torch.so")
             files.add("linux-x86_64/cu113-precxx11/libdjl_torch.so")
             files.add("win-x86_64/cu113/djl_torch.dll")
diff --git a/engines/pytorch/pytorch-native/CMakeLists.txt b/engines/pytorch/pytorch-native/CMakeLists.txt
index a321a54558e..0db26531233 100644
--- a/engines/pytorch/pytorch-native/CMakeLists.txt
+++ b/engines/pytorch/pytorch-native/CMakeLists.txt
@@ -48,6 +48,10 @@ set(SOURCE_FILES
     "src/main/native/ai_djl_pytorch_jni_cache.h"
     "src/main/native/ai_djl_pytorch_jni_cache.cc")
 
+if(PT_OLD_VERSION)
+    add_compile_definitions(V1_10_X)
+endif()
+
 add_library(djl_torch SHARED ${SOURCE_FILES})
 # build host
 if(NOT BUILD_ANDROID)
diff --git a/engines/pytorch/pytorch-native/build.cmd b/engines/pytorch/pytorch-native/build.cmd
index ee22e8d6a6b..90d664e928e 100644
--- a/engines/pytorch/pytorch-native/build.cmd
+++ b/engines/pytorch/pytorch-native/build.cmd
@@ -5,7 +5,7 @@
 @rem choco install jdk8 -y
 
 set FILEPATH="libtorch"
-set VERSION="%1"
+set VERSION=%1
 if "%2" == "cpu" (
     set DOWNLOAD_URL="https://download.pytorch.org/libtorch/cpu/libtorch-win-shared-with-deps-%VERSION%%%2Bcpu.zip"
 ) else if "%2" == "cu102" (
@@ -28,9 +28,19 @@ if exist %FILEPATH% (
     echo Finished downloading libtorch
 )
 
+if "%VERSION%" == "1.11.0" (
+    copy /y src\main\patch\cuda.cmake libtorch\share\cmake\Caffe2\public\
+)
+if "%VERSION%" == "1.10.0" (
+    set PT_OLD_VERSION=1
+)
+if "%VERSION%" == "1.9.1" (
+    set PT_OLD_VERSION=1
+)
+
 if exist build rd /q /s build
 md build\classes
 cd build
 javac -sourcepath ..\..\pytorch-engine\src\main\java\ ..\..\pytorch-engine\src\main\java\ai\djl\pytorch\jni\PyTorchLibrary.java -h include -d classes
-cmake -DCMAKE_PREFIX_PATH=libtorch ..
+cmake -DCMAKE_PREFIX_PATH=libtorch -DPT_OLD_VERSION=%PT_OLD_VERSION% ..
 cmake --build . --config Release
diff --git a/engines/pytorch/pytorch-native/build.gradle b/engines/pytorch/pytorch-native/build.gradle
index f91369d6eff..f1fdd975e55 100644
--- a/engines/pytorch/pytorch-native/build.gradle
+++ b/engines/pytorch/pytorch-native/build.gradle
@@ -12,7 +12,7 @@ if (project.hasProperty("pt_version") && project.property("pt_version") != "") {
 }
 boolean isRelease = project.hasProperty("release") || project.hasProperty("staging")
 boolean isPrecxx11 = project.hasProperty("precxx11")
-boolean isAarch64 = project.hasProperty("aarch64")
+boolean isAarch64 = project.hasProperty("aarch64") || System.properties["os.arch"] == "aarch64"
 
 String FLAVOR = "cpu"
 if (project.hasProperty("cu10")) {
@@ -97,8 +97,7 @@ def prepareNativeLib(String binaryRoot, String ver) {
     ]
 
     def aarch64Files = [
-            "${ver}/libtorch-cxx11-shared-with-deps-${ver}-aarch64.zip": "cpu/linux-aarch64",
-            "${ver}/libtorch-shared-with-deps-${ver}-aarch64.zip"      : "cpu-precxx11/linux-aarch64"
+            "${ver}/libtorch-cxx11-shared-with-deps-${ver}-aarch64.zip": "cpu/linux-aarch64"
     ]
 
     copyNativeLibToOutputDir(files, binaryRoot, officialPytorchUrl)
@@ -256,8 +255,7 @@ task uploadS3 {
                 "${BINARY_ROOT}/cu113/linux-x86_64/native/lib/",
                 "${BINARY_ROOT}/cu113/win-x86_64/native/lib/",
                 "${BINARY_ROOT}/cu113-precxx11/linux-x86_64/native/lib/",
-                "${BINARY_ROOT}/cpu/linux-aarch64/native/lib/",
-                "${BINARY_ROOT}/cpu-precxx11/linux-aarch64/native/lib/"
+                "${BINARY_ROOT}/cpu/linux-aarch64/native/lib/"
         ]
         uploadDirs.each { item ->
             fileTree(item).files.name.each {
@@ -315,11 +313,11 @@ flavorNames.each { flavor ->
                     libstd.text = new URL("https://publish.djl.ai/extra/THIRD-PARTY-LICENSES_qHnMKgbdWa.txt").text
                 }
             }
-            from ("${BINARY_ROOT}/${flavor}/${osName}/native/lib") {
-                into ("pytorch/${flavor}/${osName}")
+            from("${BINARY_ROOT}/${flavor}/${osName}/native/lib") {
+                into("pytorch/${flavor}/${osName}")
             }
-            from ("${BINARY_ROOT}/pytorch.properties") {
-                into ("native/lib")
+            from("${BINARY_ROOT}/pytorch.properties") {
+                into("native/lib")
             }
             from "src/main/resources"
             archiveClassifier = "${osName}"
diff --git a/engines/pytorch/pytorch-native/build.sh b/engines/pytorch/pytorch-native/build.sh
index 527f44699c1..419e9ad5380 100755
--- a/engines/pytorch/pytorch-native/build.sh
+++ b/engines/pytorch/pytorch-native/build.sh
@@ -42,17 +42,20 @@ if [[ ! -d "libtorch" ]]; then
   fi
 fi
 
+if [[ "$VERSION" =~ ^1\.10\..*|^1\.9\..* ]]; then
+  PT_OLD_VERSION=1
+fi
 pushd .
 
 rm -rf build
 mkdir build && cd build
 mkdir classes
 javac -sourcepath ../../pytorch-engine/src/main/java/ ../../pytorch-engine/src/main/java/ai/djl/pytorch/jni/PyTorchLibrary.java -h include -d classes
-cmake -DCMAKE_PREFIX_PATH=libtorch ..
+cmake -DCMAKE_PREFIX_PATH=libtorch -DPT_OLD_VERSION=${PT_OLD_VERSION} ..
 cmake --build . --config Release -- -j "${NUM_PROC}"
 
 if [[ $PLATFORM == 'darwin' ]]; then
   install_name_tool -add_rpath @loader_path libdjl_torch.dylib
 fi
 
-popd
\ No newline at end of file
+popd
diff --git a/engines/pytorch/pytorch-native/src/main/native/ai_djl_pytorch_jni_PyTorchLibrary_system.cc b/engines/pytorch/pytorch-native/src/main/native/ai_djl_pytorch_jni_PyTorchLibrary_system.cc
index e8be3e23798..736eea1fcac 100644
--- a/engines/pytorch/pytorch-native/src/main/native/ai_djl_pytorch_jni_PyTorchLibrary_system.cc
+++ b/engines/pytorch/pytorch-native/src/main/native/ai_djl_pytorch_jni_PyTorchLibrary_system.cc
@@ -12,10 +12,12 @@
  */
 #include <torch/torch.h>
 // clang-format off
-//#include <torch/csrc/jit/frontend/code_template.h>
-#include <ATen/code_template.h>
+#ifdef V1_10_X
+    #include <torch/csrc/jit/frontend/code_template.h>
+#else
+    #include <ATen/code_template.h>
+#endif
 #include <ATen/core/jit_type.h>
-
 // clang-format on
 
 #include <sstream>
@@ -165,8 +167,22 @@ inline std::string FormatMemory(int64_t bytes) {
   return oss.str();
 }
 
-// the code snippet is copied from torch/csrc/autograd/profiler.cpp
-static at::jit::CodeTemplate event_template(R"(
+// the code snippet is copied from torch/csrc/autograd/profiler_legacy.cpp
+#ifdef V1_10_X
+static torch::jit::CodeTemplate event_template(R"(
+{
+  "name": "${name}",
+  "ph": "X",
+  "ts": ${ts},
+  "dur": ${dur},
+  "tid": ${tid},
+  "pid": "CPU Functions",
+  "shape": ${shape},
+  "cpu mem": "${cpu_mem}",
+  "args": {}
+})");
+#else
+static const at::jit::CodeTemplate event_template(R"(
 {
   "name": "${name}",
   "ph": "X",
@@ -178,6 +194,7 @@ static at::jit::CodeTemplate event_template(R"(
   "cpu mem": "${cpu_mem}",
   "args": {}
 })");
+#endif
 
 // The function doesn't support GPU yet
 // You can refer to
@@ -230,7 +247,11 @@ void WriteProfilerEventsToStream(std::ostream& out, const std::vector<std::vecto
         LegacyEvent* start = it->second;
         int64_t memory_usage = mem_it->second;
 
+#ifdef V1_10_X
+        torch::jit::TemplateEnv env;
+#else
         at::jit::TemplateEnv env;
+#endif
         env.s("name", start->name());
         env.d("ts", profiler_start->cpuElapsedUs(*start));
         env.d("dur", start->cpuElapsedUs(*evt));
diff --git a/engines/pytorch/pytorch-native/src/main/native/djl_pytorch_utils.h b/engines/pytorch/pytorch-native/src/main/native/djl_pytorch_utils.h
index 375681be777..45fd527ffa7 100644
--- a/engines/pytorch/pytorch-native/src/main/native/djl_pytorch_utils.h
+++ b/engines/pytorch/pytorch-native/src/main/native/djl_pytorch_utils.h
@@ -30,10 +30,16 @@ namespace utils {
 
 #if !defined(__ANDROID__)
 // for image interpolation
+#ifdef V1_10_X
+typedef torch::variant<torch::enumtype::kNearest, torch::enumtype::kLinear, torch::enumtype::kBilinear,
+    torch::enumtype::kBicubic, torch::enumtype::kTrilinear, torch::enumtype::kArea>
+    mode_t;
+#else
 typedef torch::variant<torch::enumtype::kNearest, torch::enumtype::kLinear, torch::enumtype::kBilinear,
     torch::enumtype::kBicubic, torch::enumtype::kTrilinear, torch::enumtype::kArea, torch::enumtype::kNearestExact>
     mode_t;
 #endif
+#endif
 
 inline jint GetDTypeFromScalarType(const torch::ScalarType& type) {
   if (torch::kFloat32 == type) {
@@ -109,7 +115,9 @@ inline mode_t GetInterpolationMode(jint jmode) {
     case 5:
       return torch::kArea;
     case 6:
+#ifndef V1_10_X
       return torch::kNearestExact;
+#endif
     default:
       throw;
   }
diff --git a/engines/pytorch/pytorch-native/src/main/patch/cuda.cmake b/engines/pytorch/pytorch-native/src/main/patch/cuda.cmake
new file mode 100644
index 00000000000..8308f21a4c2
--- /dev/null
+++ b/engines/pytorch/pytorch-native/src/main/patch/cuda.cmake
@@ -0,0 +1,509 @@
+# ---[ cuda
+
+# Poor man's include guard
+if(TARGET torch::cudart)
+  return()
+endif()
+
+# sccache is only supported in CMake master and not in the newest official
+# release (3.11.3) yet. Hence we need our own Modules_CUDA_fix to enable sccache.
+list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_LIST_DIR}/../Modules_CUDA_fix)
+
+# We don't want to statically link cudart, because we rely on it's dynamic linkage in
+# python (follow along torch/cuda/__init__.py and usage of cudaGetErrorName).
+# Technically, we can link cudart here statically, and link libtorch_python.so
+# to a dynamic libcudart.so, but that's just wasteful.
+# However, on Windows, if this one gets switched off, the error "cuda: unknown error"
+# will be raised when running the following code:
+# >>> import torch
+# >>> torch.cuda.is_available()
+# >>> torch.cuda.current_device()
+# More details can be found in the following links.
+# https://github.com/pytorch/pytorch/issues/20635
+# https://github.com/pytorch/pytorch/issues/17108
+if(NOT MSVC)
+  set(CUDA_USE_STATIC_CUDA_RUNTIME OFF CACHE INTERNAL "")
+endif()
+
+# Find CUDA.
+find_package(CUDA)
+if(NOT CUDA_FOUND)
+  message(WARNING
+    "Caffe2: CUDA cannot be found. Depending on whether you are building "
+    "Caffe2 or a Caffe2 dependent library, the next warning / error will "
+    "give you more info.")
+  set(CAFFE2_USE_CUDA OFF)
+  return()
+endif()
+
+# Enable CUDA language support
+set(CUDAToolkit_ROOT "${CUDA_TOOLKIT_ROOT_DIR}")
+set(CMAKE_CUDA_STANDARD ${CMAKE_CXX_STANDARD})
+set(CMAKE_CUDA_STANDARD_REQUIRED ON)
+
+message(STATUS "Caffe2: CUDA detected: " ${CUDA_VERSION})
+message(STATUS "Caffe2: CUDA nvcc is: " ${CUDA_NVCC_EXECUTABLE})
+message(STATUS "Caffe2: CUDA toolkit directory: " ${CUDA_TOOLKIT_ROOT_DIR})
+if(CUDA_VERSION VERSION_LESS 10.2)
+  message(FATAL_ERROR "PyTorch requires CUDA 10.2 or above.")
+endif()
+
+if(CUDA_FOUND)
+  # Sometimes, we may mismatch nvcc with the CUDA headers we are
+  # compiling with, e.g., if a ccache nvcc is fed to us by CUDA_NVCC_EXECUTABLE
+  # but the PATH is not consistent with CUDA_HOME.  It's better safe
+  # than sorry: make sure everything is consistent.
+  if(MSVC AND CMAKE_GENERATOR MATCHES "Visual Studio")
+    # When using Visual Studio, it attempts to lock the whole binary dir when
+    # `try_run` is called, which will cause the build to fail.
+    string(RANDOM BUILD_SUFFIX)
+    set(PROJECT_RANDOM_BINARY_DIR "${PROJECT_BINARY_DIR}/${BUILD_SUFFIX}")
+  else()
+    set(PROJECT_RANDOM_BINARY_DIR "${PROJECT_BINARY_DIR}")
+  endif()
+  set(file "${PROJECT_BINARY_DIR}/detect_cuda_version.cc")
+  file(WRITE ${file} ""
+    "#include <cuda.h>\n"
+    "#include <cstdio>\n"
+    "int main() {\n"
+    "  printf(\"%d.%d\", CUDA_VERSION / 1000, (CUDA_VERSION / 10) % 100);\n"
+    "  return 0;\n"
+    "}\n"
+    )
+  if(NOT CMAKE_CROSSCOMPILING)
+    try_run(run_result compile_result ${PROJECT_RANDOM_BINARY_DIR} ${file}
+      CMAKE_FLAGS "-DINCLUDE_DIRECTORIES=${CUDA_INCLUDE_DIRS}"
+      LINK_LIBRARIES ${CUDA_LIBRARIES}
+      RUN_OUTPUT_VARIABLE cuda_version_from_header
+      COMPILE_OUTPUT_VARIABLE output_var
+      )
+    if(NOT compile_result)
+      message(FATAL_ERROR "Caffe2: Couldn't determine version from header: " ${output_var})
+    endif()
+    message(STATUS "Caffe2: Header version is: " ${cuda_version_from_header})
+    if(NOT cuda_version_from_header STREQUAL ${CUDA_VERSION_STRING})
+      # Force CUDA to be processed for again next time
+      # TODO: I'm not sure if this counts as an implementation detail of
+      # FindCUDA
+      set(${cuda_version_from_findcuda} ${CUDA_VERSION_STRING})
+      unset(CUDA_TOOLKIT_ROOT_DIR_INTERNAL CACHE)
+      # Not strictly necessary, but for good luck.
+      unset(CUDA_VERSION CACHE)
+      # Error out
+      message(FATAL_ERROR "FindCUDA says CUDA version is ${cuda_version_from_findcuda} (usually determined by nvcc), "
+        "but the CUDA headers say the version is ${cuda_version_from_header}.  This often occurs "
+        "when you set both CUDA_HOME and CUDA_NVCC_EXECUTABLE to "
+        "non-standard locations, without also setting PATH to point to the correct nvcc.  "
+        "Perhaps, try re-running this command again with PATH=${CUDA_TOOLKIT_ROOT_DIR}/bin:$PATH.  "
+        "See above log messages for more diagnostics, and see https://github.com/pytorch/pytorch/issues/8092 for more details.")
+    endif()
+  endif()
+endif()
+
+# Find cuDNN.
+if(USE_STATIC_CUDNN)
+  set(CUDNN_STATIC ON CACHE BOOL "")
+else()
+  set(CUDNN_STATIC OFF CACHE BOOL "")
+endif()
+
+find_package(CUDNN)
+
+if(CAFFE2_USE_CUDNN AND NOT CUDNN_FOUND)
+  message(WARNING
+    "Caffe2: Cannot find cuDNN library. Turning the option off")
+  set(CAFFE2_USE_CUDNN OFF)
+endif()
+
+# Optionally, find TensorRT
+if(CAFFE2_USE_TENSORRT)
+  find_path(TENSORRT_INCLUDE_DIR NvInfer.h
+    HINTS ${TENSORRT_ROOT} ${CUDA_TOOLKIT_ROOT_DIR}
+    PATH_SUFFIXES include)
+  find_library(TENSORRT_LIBRARY nvinfer
+    HINTS ${TENSORRT_ROOT} ${CUDA_TOOLKIT_ROOT_DIR}
+    PATH_SUFFIXES lib lib64 lib/x64)
+  find_package_handle_standard_args(
+    TENSORRT DEFAULT_MSG TENSORRT_INCLUDE_DIR TENSORRT_LIBRARY)
+  if(TENSORRT_FOUND)
+    execute_process(COMMAND /bin/sh -c "[ -r \"${TENSORRT_INCLUDE_DIR}/NvInferVersion.h\" ] && awk '/^\#define NV_TENSORRT_MAJOR/ {print $3}' \"${TENSORRT_INCLUDE_DIR}/NvInferVersion.h\"" OUTPUT_VARIABLE TENSORRT_VERSION_MAJOR)
+    execute_process(COMMAND /bin/sh -c "[ -r \"${TENSORRT_INCLUDE_DIR}/NvInferVersion.h\" ] && awk '/^\#define NV_TENSORRT_MINOR/ {print $3}' \"${TENSORRT_INCLUDE_DIR}/NvInferVersion.h\"" OUTPUT_VARIABLE TENSORRT_VERSION_MINOR)
+    if(TENSORRT_VERSION_MAJOR)
+      string(STRIP ${TENSORRT_VERSION_MAJOR} TENSORRT_VERSION_MAJOR)
+      string(STRIP ${TENSORRT_VERSION_MINOR} TENSORRT_VERSION_MINOR)
+      set(TENSORRT_VERSION "${TENSORRT_VERSION_MAJOR}.${TENSORRT_VERSION_MINOR}")
+      #CAFFE2_USE_TRT is set in Dependencies
+      set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DTENSORRT_VERSION_MAJOR=${TENSORRT_VERSION_MAJOR}")
+      set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DTENSORRT_VERSION_MINOR=${TENSORRT_VERSION_MINOR}")
+    else()
+      message(WARNING "Caffe2: Cannot find ${TENSORRT_INCLUDE_DIR}/NvInferVersion.h. Assuming TRT 5.0 which is no longer supported. Turning the option off.")
+      set(CAFFE2_USE_TENSORRT OFF)
+    endif()
+  else()
+    message(WARNING
+      "Caffe2: Cannot find TensorRT library. Turning the option off.")
+    set(CAFFE2_USE_TENSORRT OFF)
+  endif()
+endif()
+
+# ---[ Extract versions
+if(CAFFE2_USE_CUDNN)
+  # Get cuDNN version
+  if(EXISTS ${CUDNN_INCLUDE_PATH}/cudnn_version.h)
+    file(READ ${CUDNN_INCLUDE_PATH}/cudnn_version.h CUDNN_HEADER_CONTENTS)
+  else()
+    file(READ ${CUDNN_INCLUDE_PATH}/cudnn.h CUDNN_HEADER_CONTENTS)
+  endif()
+  string(REGEX MATCH "define CUDNN_MAJOR * +([0-9]+)"
+               CUDNN_VERSION_MAJOR "${CUDNN_HEADER_CONTENTS}")
+  string(REGEX REPLACE "define CUDNN_MAJOR * +([0-9]+)" "\\1"
+               CUDNN_VERSION_MAJOR "${CUDNN_VERSION_MAJOR}")
+  string(REGEX MATCH "define CUDNN_MINOR * +([0-9]+)"
+               CUDNN_VERSION_MINOR "${CUDNN_HEADER_CONTENTS}")
+  string(REGEX REPLACE "define CUDNN_MINOR * +([0-9]+)" "\\1"
+               CUDNN_VERSION_MINOR "${CUDNN_VERSION_MINOR}")
+  string(REGEX MATCH "define CUDNN_PATCHLEVEL * +([0-9]+)"
+               CUDNN_VERSION_PATCH "${CUDNN_HEADER_CONTENTS}")
+  string(REGEX REPLACE "define CUDNN_PATCHLEVEL * +([0-9]+)" "\\1"
+               CUDNN_VERSION_PATCH "${CUDNN_VERSION_PATCH}")
+  # Assemble cuDNN version
+  if(NOT CUDNN_VERSION_MAJOR)
+    set(CUDNN_VERSION "?")
+  else()
+    set(CUDNN_VERSION
+        "${CUDNN_VERSION_MAJOR}.${CUDNN_VERSION_MINOR}.${CUDNN_VERSION_PATCH}")
+  endif()
+  message(STATUS "Found cuDNN: v${CUDNN_VERSION}  (include: ${CUDNN_INCLUDE_PATH}, library: ${CUDNN_LIBRARY_PATH})")
+  if(CUDNN_VERSION VERSION_LESS "7.0.0")
+    message(FATAL_ERROR "PyTorch requires cuDNN 7 and above.")
+  endif()
+endif()
+
+# ---[ CUDA libraries wrapper
+
+# find libcuda.so and lbnvrtc.so
+# For libcuda.so, we will find it under lib, lib64, and then the
+# stubs folder, in case we are building on a system that does not
+# have cuda driver installed. On windows, we also search under the
+# folder lib/x64.
+find_library(CUDA_CUDA_LIB cuda
+    PATHS ${CUDA_TOOLKIT_ROOT_DIR}
+    PATH_SUFFIXES lib lib64 lib/stubs lib64/stubs lib/x64)
+find_library(CUDA_NVRTC_LIB nvrtc
+    PATHS ${CUDA_TOOLKIT_ROOT_DIR}
+    PATH_SUFFIXES lib lib64 lib/x64)
+if(CUDA_NVRTC_LIB AND NOT CUDA_NVRTC_SHORTHASH)
+  if("${PYTHON_EXECUTABLE}" STREQUAL "")
+    set(_python_exe "python")
+  else()
+    set(_python_exe "${PYTHON_EXECUTABLE}")
+  endif()
+  execute_process(
+    COMMAND "${_python_exe}" -c
+    "import hashlib;hash=hashlib.sha256();hash.update(open('${CUDA_NVRTC_LIB}','rb').read());print(hash.hexdigest()[:8])"
+    RESULT_VARIABLE _retval
+    OUTPUT_VARIABLE CUDA_NVRTC_SHORTHASH)
+  if(NOT _retval EQUAL 0)
+    message(WARNING "Failed to compute shorthash for libnvrtc.so")
+    set(CUDA_NVRTC_SHORTHASH "XXXXXXXX")
+  else()
+    string(STRIP "${CUDA_NVRTC_SHORTHASH}" CUDA_NVRTC_SHORTHASH)
+    message(STATUS "${CUDA_NVRTC_LIB} shorthash is ${CUDA_NVRTC_SHORTHASH}")
+  endif()
+endif()
+
+# Create new style imported libraries.
+# Several of these libraries have a hardcoded path if CAFFE2_STATIC_LINK_CUDA
+# is set. This path is where sane CUDA installations have their static
+# libraries installed. This flag should only be used for binary builds, so
+# end-users should never have this flag set.
+
+# cuda
+add_library(caffe2::cuda UNKNOWN IMPORTED)
+set_property(
+    TARGET caffe2::cuda PROPERTY IMPORTED_LOCATION
+    ${CUDA_CUDA_LIB})
+set_property(
+    TARGET caffe2::cuda PROPERTY INTERFACE_INCLUDE_DIRECTORIES
+    ${CUDA_INCLUDE_DIRS})
+
+# cudart. CUDA_LIBRARIES is actually a list, so we will make an interface
+# library.
+add_library(torch::cudart INTERFACE IMPORTED)
+if(CAFFE2_STATIC_LINK_CUDA)
+    set_property(
+        TARGET torch::cudart PROPERTY INTERFACE_LINK_LIBRARIES
+        "${CUDA_cudart_static_LIBRARY}")
+    if(NOT WIN32)
+      set_property(
+          TARGET torch::cudart APPEND PROPERTY INTERFACE_LINK_LIBRARIES
+          rt dl)
+    endif()
+else()
+    set_property(
+        TARGET torch::cudart PROPERTY INTERFACE_LINK_LIBRARIES
+        ${CUDA_LIBRARIES})
+endif()
+set_property(
+    TARGET torch::cudart PROPERTY INTERFACE_INCLUDE_DIRECTORIES
+    ${CUDA_INCLUDE_DIRS})
+
+# nvToolsExt
+add_library(torch::nvtoolsext INTERFACE IMPORTED)
+if(MSVC)
+  if(NOT NVTOOLEXT_HOME)
+    set(NVTOOLEXT_HOME "C:/Program Files/NVIDIA Corporation/NvToolsExt")
+  endif()
+  if(DEFINED ENV{NVTOOLSEXT_PATH})
+    set(NVTOOLEXT_HOME $ENV{NVTOOLSEXT_PATH})
+    file(TO_CMAKE_PATH ${NVTOOLEXT_HOME} NVTOOLEXT_HOME)
+  endif()
+  set_target_properties(
+      torch::nvtoolsext PROPERTIES
+      INTERFACE_LINK_LIBRARIES ${NVTOOLEXT_HOME}/lib/x64/nvToolsExt64_1.lib
+      INTERFACE_INCLUDE_DIRECTORIES ${NVTOOLEXT_HOME}/include)
+
+elseif(APPLE)
+  set_property(
+      TARGET torch::nvtoolsext PROPERTY INTERFACE_LINK_LIBRARIES
+      ${CUDA_TOOLKIT_ROOT_DIR}/lib/libnvrtc.dylib
+      ${CUDA_TOOLKIT_ROOT_DIR}/lib/libnvToolsExt.dylib)
+
+else()
+  find_library(LIBNVTOOLSEXT libnvToolsExt.so PATHS ${CUDA_TOOLKIT_ROOT_DIR}/lib64/)
+  set_property(
+      TARGET torch::nvtoolsext PROPERTY INTERFACE_LINK_LIBRARIES
+      ${LIBNVTOOLSEXT})
+endif()
+
+# cublas. CUDA_CUBLAS_LIBRARIES is actually a list, so we will make an
+# interface library similar to cudart.
+add_library(caffe2::cublas INTERFACE IMPORTED)
+if(CAFFE2_STATIC_LINK_CUDA AND NOT WIN32)
+    set_property(
+        TARGET caffe2::cublas PROPERTY INTERFACE_LINK_LIBRARIES
+        "${CUDA_TOOLKIT_ROOT_DIR}/lib64/libcublas_static.a")
+    set_property(
+      TARGET caffe2::cublas APPEND PROPERTY INTERFACE_LINK_LIBRARIES
+      "${CUDA_TOOLKIT_ROOT_DIR}/lib64/libcublasLt_static.a")
+    # Add explicit dependency to cudart_static to fix
+    # libcublasLt_static.a.o): undefined reference to symbol 'cudaStreamWaitEvent'
+    # error adding symbols: DSO missing from command line
+    set_property(
+      TARGET caffe2::cublas APPEND PROPERTY INTERFACE_LINK_LIBRARIES
+      "${CUDA_cudart_static_LIBRARY}" rt dl)
+else()
+    set_property(
+        TARGET caffe2::cublas PROPERTY INTERFACE_LINK_LIBRARIES
+        ${CUDA_CUBLAS_LIBRARIES})
+endif()
+set_property(
+    TARGET caffe2::cublas PROPERTY INTERFACE_INCLUDE_DIRECTORIES
+    ${CUDA_INCLUDE_DIRS})
+
+# cudnn public and private interfaces
+# static linking is handled by USE_STATIC_CUDNN environment variable
+# If library is linked dynamically, than private interface is no-op
+# If library is linked statically:
+#  - public interface would only reference headers
+#  - private interface will contain the actual link instructions
+if(CAFFE2_USE_CUDNN)
+  add_library(caffe2::cudnn-public INTERFACE IMPORTED)
+  set_property(
+    TARGET caffe2::cudnn-public PROPERTY INTERFACE_INCLUDE_DIRECTORIES
+    ${CUDNN_INCLUDE_PATH})
+  add_library(caffe2::cudnn-private INTERFACE IMPORTED)
+  set_property(
+    TARGET caffe2::cudnn-private PROPERTY INTERFACE_INCLUDE_DIRECTORIES
+    ${CUDNN_INCLUDE_PATH})
+  if(CUDNN_STATIC AND NOT WIN32)
+    if(USE_WHOLE_CUDNN)
+      set_property(
+        TARGET caffe2::cudnn-private PROPERTY INTERFACE_LINK_LIBRARIES
+        "-Wl,--whole-archive,\"${CUDNN_LIBRARY_PATH}\" -Wl,--no-whole-archive")
+    else()
+      set_property(
+        TARGET caffe2::cudnn-private PROPERTY INTERFACE_LINK_LIBRARIES
+        ${CUDNN_LIBRARY_PATH})
+    endif()
+    set_property(
+      TARGET caffe2::cudnn-private APPEND PROPERTY INTERFACE_LINK_LIBRARIES
+      "${CUDA_TOOLKIT_ROOT_DIR}/lib64/libculibos.a" dl)
+    # Add explicit dependency on cublas to cudnn
+    get_target_property(__tmp caffe2::cublas INTERFACE_LINK_LIBRARIES)
+    set_property(
+      TARGET caffe2::cudnn-private APPEND PROPERTY INTERFACE_LINK_LIBRARIES
+      "${__tmp}")
+    # Lines below use target_link_libraries because we support cmake 3.5+.
+    # For cmake 3.13+, target_link_options to set INTERFACE_LINK_OPTIONS would be better.
+    # https://cmake.org/cmake/help/v3.5/command/target_link_libraries.html warns
+    # "Item names starting with -, but not -l or -framework, are treated as linker flags.
+    #  Note that such flags will be treated like any other library link item for purposes
+    #  of transitive dependencies, so they are generally safe to specify only as private
+    #  link items that will not propagate to dependents."
+    # Propagating to a dependent (torch_cuda) is exactly what we want here, so we are
+    # flouting the warning, but I can't think of a better (3.5+ compatible) way.
+    target_link_libraries(caffe2::cudnn-private INTERFACE
+        "-Wl,--exclude-libs,libcudnn_static.a")
+  else()
+  set_property(
+    TARGET caffe2::cudnn-public PROPERTY INTERFACE_LINK_LIBRARIES
+    ${CUDNN_LIBRARY_PATH})
+  endif()
+endif()
+
+# curand
+add_library(caffe2::curand UNKNOWN IMPORTED)
+if(CAFFE2_STATIC_LINK_CUDA AND NOT WIN32)
+    set_property(
+        TARGET caffe2::curand PROPERTY IMPORTED_LOCATION
+        "${CUDA_TOOLKIT_ROOT_DIR}/lib64/libcurand_static.a")
+    set_property(
+        TARGET caffe2::curand PROPERTY INTERFACE_LINK_LIBRARIES
+        "${CUDA_TOOLKIT_ROOT_DIR}/lib64/libculibos.a" dl)
+else()
+    set_property(
+        TARGET caffe2::curand PROPERTY IMPORTED_LOCATION
+        ${CUDA_curand_LIBRARY})
+endif()
+set_property(
+    TARGET caffe2::curand PROPERTY INTERFACE_INCLUDE_DIRECTORIES
+    ${CUDA_INCLUDE_DIRS})
+
+# cufft. CUDA_CUFFT_LIBRARIES is actually a list, so we will make an
+# interface library similar to cudart.
+add_library(caffe2::cufft INTERFACE IMPORTED)
+if(CAFFE2_STATIC_LINK_CUDA AND NOT WIN32)
+    set_property(
+        TARGET caffe2::cufft PROPERTY INTERFACE_LINK_LIBRARIES
+        "${CUDA_TOOLKIT_ROOT_DIR}/lib64/libcufft_static_nocallback.a"
+        "${CUDA_TOOLKIT_ROOT_DIR}/lib64/libculibos.a" dl)
+else()
+    set_property(
+        TARGET caffe2::cufft PROPERTY INTERFACE_LINK_LIBRARIES
+        ${CUDA_CUFFT_LIBRARIES})
+endif()
+set_property(
+    TARGET caffe2::cufft PROPERTY INTERFACE_INCLUDE_DIRECTORIES
+    ${CUDA_INCLUDE_DIRS})
+
+# TensorRT
+if(CAFFE2_USE_TENSORRT)
+  add_library(caffe2::tensorrt UNKNOWN IMPORTED)
+  set_property(
+      TARGET caffe2::tensorrt PROPERTY IMPORTED_LOCATION
+      ${TENSORRT_LIBRARY})
+  set_property(
+      TARGET caffe2::tensorrt PROPERTY INTERFACE_INCLUDE_DIRECTORIES
+      ${TENSORRT_INCLUDE_DIR})
+endif()
+
+# nvrtc
+add_library(caffe2::nvrtc UNKNOWN IMPORTED)
+set_property(
+    TARGET caffe2::nvrtc PROPERTY IMPORTED_LOCATION
+    ${CUDA_NVRTC_LIB})
+set_property(
+    TARGET caffe2::nvrtc PROPERTY INTERFACE_INCLUDE_DIRECTORIES
+    ${CUDA_INCLUDE_DIRS})
+
+# Note: in theory, we can add similar dependent library wrappers. For
+# now, Caffe2 only uses the above libraries, so we will only wrap
+# these.
+
+# Special care for windows platform: we know that 32-bit windows does not
+# support cuda.
+if(${CMAKE_SYSTEM_NAME} STREQUAL "Windows")
+  if(NOT (CMAKE_SIZEOF_VOID_P EQUAL 8))
+    message(FATAL_ERROR
+            "CUDA support not available with 32-bit windows. Did you "
+            "forget to set Win64 in the generator target?")
+    return()
+  endif()
+endif()
+
+# Add onnx namepsace definition to nvcc
+if(ONNX_NAMESPACE)
+  list(APPEND CUDA_NVCC_FLAGS "-DONNX_NAMESPACE=${ONNX_NAMESPACE}")
+else()
+  list(APPEND CUDA_NVCC_FLAGS "-DONNX_NAMESPACE=onnx_c2")
+endif()
+
+# Don't activate VC env again for Ninja generators with MSVC on Windows if CUDAHOSTCXX is not defined
+# by adding --use-local-env.
+if(MSVC AND CMAKE_GENERATOR STREQUAL "Ninja" AND NOT DEFINED ENV{CUDAHOSTCXX})
+  list(APPEND CUDA_NVCC_FLAGS "--use-local-env")
+endif()
+
+# setting nvcc arch flags
+torch_cuda_get_nvcc_gencode_flag(NVCC_FLAGS_EXTRA)
+# CMake 3.18 adds integrated support for architecture selection, but we can't rely on it
+set(CMAKE_CUDA_ARCHITECTURES OFF)
+list(APPEND CUDA_NVCC_FLAGS ${NVCC_FLAGS_EXTRA})
+message(STATUS "Added CUDA NVCC flags for: ${NVCC_FLAGS_EXTRA}")
+
+# disable some nvcc diagnostic that appears in boost, glog, glags, opencv, etc.
+foreach(diag cc_clobber_ignored integer_sign_change useless_using_declaration
+             set_but_not_used field_without_dll_interface
+             base_class_has_different_dll_interface
+             dll_interface_conflict_none_assumed
+             dll_interface_conflict_dllexport_assumed
+             implicit_return_from_non_void_function
+             unsigned_compare_with_zero
+             declared_but_not_referenced
+             bad_friend_decl)
+  list(APPEND SUPPRESS_WARNING_FLAGS --diag_suppress=${diag})
+endforeach()
+string(REPLACE ";" "," SUPPRESS_WARNING_FLAGS "${SUPPRESS_WARNING_FLAGS}")
+list(APPEND CUDA_NVCC_FLAGS -Xcudafe ${SUPPRESS_WARNING_FLAGS})
+
+set(CUDA_PROPAGATE_HOST_FLAGS_BLOCKLIST "-Werror")
+if(MSVC)
+  list(APPEND CUDA_NVCC_FLAGS "--Werror" "cross-execution-space-call")
+  list(APPEND CUDA_NVCC_FLAGS "--no-host-device-move-forward")
+endif()
+
+# OpenMP flags for NVCC with Clang-cl
+if("${CMAKE_CXX_SIMULATE_ID}" STREQUAL "MSVC"
+  AND "${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
+  list(APPEND CUDA_PROPAGATE_HOST_FLAGS_BLOCKLIST "-Xclang" "-fopenmp")
+  if(MSVC_TOOLSET_VERSION LESS 142)
+    list(APPEND CUDA_NVCC_FLAGS "-Xcompiler" "-openmp")
+  else()
+    list(APPEND CUDA_NVCC_FLAGS "-Xcompiler" "-openmp:experimental")
+  endif()
+endif()
+
+# Debug and Release symbol support
+if(MSVC)
+  if(${CAFFE2_USE_MSVC_STATIC_RUNTIME})
+    string(APPEND CMAKE_CUDA_FLAGS_DEBUG " -Xcompiler /MTd")
+    string(APPEND CMAKE_CUDA_FLAGS_MINSIZEREL " -Xcompiler /MT")
+    string(APPEND CMAKE_CUDA_FLAGS_RELEASE " -Xcompiler /MT")
+    string(APPEND CMAKE_CUDA_FLAGS_RELWITHDEBINFO " -Xcompiler /MT")
+  else()
+    string(APPEND CMAKE_CUDA_FLAGS_DEBUG " -Xcompiler /MDd")
+    string(APPEND CMAKE_CUDA_FLAGS_MINSIZEREL " -Xcompiler /MD")
+    string(APPEND CMAKE_CUDA_FLAGS_RELEASE " -Xcompiler /MD")
+    string(APPEND CMAKE_CUDA_FLAGS_RELWITHDEBINFO " -Xcompiler /MD")
+  endif()
+  if(CUDA_NVCC_FLAGS MATCHES "Zi")
+    list(APPEND CUDA_NVCC_FLAGS "-Xcompiler" "-FS")
+  endif()
+elseif(CUDA_DEVICE_DEBUG)
+  list(APPEND CUDA_NVCC_FLAGS "-g" "-G")  # -G enables device code debugging symbols
+endif()
+
+# Set expt-relaxed-constexpr to suppress Eigen warnings
+list(APPEND CUDA_NVCC_FLAGS "--expt-relaxed-constexpr")
+
+# Set expt-extended-lambda to support lambda on device
+list(APPEND CUDA_NVCC_FLAGS "--expt-extended-lambda")
+
+foreach(FLAG ${CUDA_NVCC_FLAGS})
+  string(FIND "${FLAG}" " " flag_space_position)
+  if(NOT flag_space_position EQUAL -1)
+    message(FATAL_ERROR "Found spaces in CUDA_NVCC_FLAGS entry '${FLAG}'")
+  endif()
+  string(APPEND CMAKE_CUDA_FLAGS " ${FLAG}")
+endforeach()
diff --git a/integration/build.gradle b/integration/build.gradle
index 984f121b0ec..f4dbdaa3ab2 100644
--- a/integration/build.gradle
+++ b/integration/build.gradle
@@ -15,6 +15,7 @@ dependencies {
 
     runtimeOnly project(":engines:mxnet:mxnet-model-zoo")
     runtimeOnly project(":engines:pytorch:pytorch-model-zoo")
+    runtimeOnly project(":engines:pytorch:pytorch-jni")
     runtimeOnly project(":engines:tensorflow:tensorflow-model-zoo")
     runtimeOnly project(":engines:ml:xgboost")