apache · apeforest · Feb 9, 2020 · Feb 8, 2020 · Feb 8, 2020 · Feb 8, 2020
@@ -889,6 +889,30 @@ build_ubuntu_gpu_cmake() {
     ninja -v
 }
 
+build_ubuntu_gpu_cmake_no_rtc() {
+    set -ex
+    cd /work/build
+    build_ccache_wrappers
+    cmake \
+        -DUSE_SIGNAL_HANDLER=ON                 \
+        -DUSE_CUDA=ON                           \
+        -DUSE_CUDNN=ON                          \
+        -DUSE_TVM_OP=ON                         \
+        -DPython3_EXECUTABLE=/usr/bin/python3   \
+        -DUSE_MKL_IF_AVAILABLE=OFF              \
+        -DUSE_MKLML_MKL=OFF                     \
+        -DUSE_MKLDNN=ON                         \
+        -DUSE_DIST_KVSTORE=ON                   \
+        -DCMAKE_BUILD_TYPE=Release              \
+        -DMXNET_CUDA_ARCH="$CI_CMAKE_CUDA_ARCH" \
+        -DBUILD_CYTHON_MODULES=1                \
+        -DENABLE_CUDA_RTC=OFF                   \
+        -G Ninja                                \
+        /work/mxnet
+
+    ninja -v
+}
+
 build_ubuntu_gpu_cmake_no_tvm_op() {
     set -ex
     cd /work/build

@@ -311,6 +311,19 @@ def compile_unix_cmake_gpu_no_tvm_op() {
     }]
 }
 
+def compile_unix_cmake_gpu_no_rtc() {
+    return ['GPU: CMake CUDA RTC OFF': {
+        node(NODE_LINUX_CPU) {
+            ws('workspace/build-cmake-gpu-no-rtc') {
+                timeout(time: max_time, unit: 'MINUTES') {
+                    utils.init_git()
+                    utils.docker_run('ubuntu_gpu_cu101', 'build_ubuntu_gpu_cmake_no_rtc', false)
+                }
+            }
+        }
+    }]
+}
+
 def compile_unix_tensorrt_gpu() {
     return ['TensorRT': {
       node(NODE_LINUX_CPU) {

@@ -43,6 +43,7 @@ core_logic: {
     custom_steps.compile_unix_int64_gpu(),
     custom_steps.compile_unix_full_gpu_no_tvm_op(),
     custom_steps.compile_unix_cmake_gpu_no_tvm_op(),
+    custom_steps.compile_unix_cmake_gpu_no_rtc(),
     custom_steps.compile_unix_full_gpu_mkldnn_cpp_test()
   ])
 

diff --git a/config/config.cmake b/config/config.cmake
@@ -125,5 +125,5 @@ set(USE_INT64_TENSOR_SIZE OFF CACHE BOOL "Use int64_t to represent the total num
 # Other GPU features
 set(USE_NCCL "Use NVidia NCCL with CUDA" OFF)
 set(NCCL_ROOT "" CACHE BOOL "NCCL install path. Supports autodetection.")
-set(ENABLE_CUDA_RTC OFF CACHE BOOL "Build with CUDA runtime compilation support")
+set(ENABLE_CUDA_RTC ON CACHE BOOL "Build with CUDA runtime compilation support")
 set(USE_NVTX ON CACHE BOOL "Build with NVTX support")
diff --git a/src/imperative/cached_op.h b/src/imperative/cached_op.h
@@ -230,7 +230,7 @@ void SetRefCounts(nnvm::Graph* fwd_graph, const nnvm::Graph& full_graph) {
 
 void OptimizeGraph(nnvm::Graph * full_graph, nnvm::Graph * fwd_graph, nnvm::Graph * grad_graph,
                    const Context& context, size_t num_forward_outputs, const bool inlining) {
-#if MXNET_USE_CUDA && !defined(_WIN32)
+#if MXNET_USE_CUDA && MXNET_ENABLE_CUDA_RTC && !defined(_WIN32)
   if (context.dev_mask() == kGPU &&
       !inlining &&
       dmlc::GetEnv("MXNET_USE_FUSION", true)) {
@@ -265,7 +265,13 @@ void OptimizeGraph(nnvm::Graph * full_graph, nnvm::Graph * fwd_graph, nnvm::Grap
         << "Graph contains duplicate names for some of its inputs - fusion is NOT enabled!";
      }
   }
-#endif  // MXNET_USE_CUDA
+#else
+  // Only warn user if MXNET_USE_FUSION env var is explicitly set
+  if (context.dev_mask() == kGPU && !inlining &&
+      dmlc::GetEnv("MXNET_USE_FUSION", false)) {
+    exec::WarnFusionNotSupported();
+  }
+#endif  // MXNET_USE_CUDA && MXNET_ENABLE_CUDA_RTC && !defined(_WIN32)
 
   *fwd_graph = nnvm::Graph();
   fwd_graph->outputs = std::vector<nnvm::NodeEntry>(full_graph->outputs.begin(),