Fix TRT integration after merge broke it. (apache#41)

Remove unnecessary target param and conditions from BuildConfig.optimize(). Since target wasn't getting passed to optimize() from build(), TRT subgraph partioning was skipped due to unnecessary conditions. Create empty module when there are no funcs to compile to avoid crash Update TensorRT tests and add to CI Fix bug in TRT cmake Remove changes to CI
zhiics · Dec 19, 2019 · cfde295 · cfde295
1 parent 87cf899
commit cfde295
Show file tree

Hide file tree

Showing 7 changed files with 85 additions and 23 deletions.
diff --git a/cmake/modules/contrib/TensorRT.cmake b/cmake/modules/contrib/TensorRT.cmake
@@ -17,16 +17,20 @@
 
 # TensorRT Module
 
-if(IS_DIRECTORY ${USE_TENSORRT})
-    set(TENSORRT_ROOT_DIR ${USE_TENSORRT})
-    message(STATUS "Custom TensorRT path: " ${TENSORRT_ROOT_DIR})
-    set(TENSORRT_INCLUDE_DIR ${TENSORRT_ROOT_DIR}/include)
-    set(TENSORRT_LIB_DIR ${TENSORRT_ROOT_DIR}/lib)
+if(USE_TENSORRT)
+    if(IS_DIRECTORY ${USE_TENSORRT})
+        set(TENSORRT_ROOT_DIR ${USE_TENSORRT})
+    endif()
+    find_path(TENSORRT_INCLUDE_DIR NvInfer.h HINTS ${TENSORRT_ROOT_DIR} PATH_SUFFIXES include)
+    find_library(TENSORRT_LIB_DIR nvinfer HINTS ${TENSORRT_ROOT_DIR} PATH_SUFFIXES lib)
+    find_package_handle_standard_args(TENSORRT DEFAULT_MSG TENSORRT_INCLUDE_DIR TENSORRT_LIB_DIR)
+    if(NOT TENSORRT_FOUND)
+        message(ERROR "Could not find TensorRT.")
+    endif()
     file(GLOB TENSORRT_SRCS src/contrib/subgraph/*.cc)
     include_directories(${TENSORRT_INCLUDE_DIR})
     list(APPEND RUNTIME_SRCS ${TENSORRT_SRCS})
-    find_library(TENSORRT_NVINFER_LIBRARY nvinfer ${TENSORRT_LIB_DIR})
-    list(APPEND TVM_RUNTIME_LINKER_LIBS ${TENSORRT_NVINFER_LIBRARY})
+    list(APPEND TVM_RUNTIME_LINKER_LIBS ${TENSORRT_LIB_DIR})
     set_source_files_properties(${RUNTIME_GRAPH_SRCS}
             PROPERTIES COMPILE_DEFINITIONS "TVM_GRAPH_RUNTIME_TENSORRT")
 endif()
diff --git a/nnvm/python/nnvm/compiler/build_module.py b/nnvm/python/nnvm/compiler/build_module.py
@@ -335,7 +335,7 @@ def build(graph, target=None, shape=None, dtype="float32",
         graph = _annotate_graph(graph, device_target,
                                 AnnotationType.DEVICE_TARGET)
         # Apply optimization
-        graph = optimize(graph, shape, dtype, layout)
+        graph = optimize(graph, shape, dtype, layout, target)
 
         # Clear extra params without nodes.
         _remove_noref_params(params, graph)

diff --git a/nnvm/src/compiler/graph_compile.cc b/nnvm/src/compiler/graph_compile.cc
@@ -321,11 +321,15 @@ nnvm::Graph GraphCompile(const nnvm::Graph& g) {
     ret.attrs["device_index"] = std::make_shared<any>(std::move(device_vec));
   }
   // Setup module.
-  static const PackedFunc& fbuild = GetPackedFunc("nnvm.compiler.build_target");
-  tvm::runtime::Module module =
-      fbuild(tvm::Map<std::string, Array<tvm::LoweredFunc>>(
-                 tar_func_map.begin(), tar_func_map.end()),
-             "", target_host);
+  tvm::runtime::Module module;
+  // When using external accelerators such as TensorRT, there might not be any
+  // functions to compile in the graph. In that case, an empty module is used.
+  if (!tar_func_map.empty()) {
+    static const PackedFunc& fbuild = GetPackedFunc("nnvm.compiler.build_target");
+    module = fbuild(tvm::Map<std::string, Array<tvm::LoweredFunc>>(
+                        tar_func_map.begin(), tar_func_map.end()),
+                    "", target_host);
+  }
 
   ret.attrs["module"] = std::make_shared<any>(std::move(module));
   ret = nnvm::ApplyPass(ret, "PlanMemory");

diff --git a/tests/python/tensorrt/test_avg_pool2d.py b/tests/python/tensorrt/test_avg_pool2d.py
@@ -21,21 +21,21 @@
 import nnvm
 import tvm
 from tvm.contrib import graph_runtime
+import json
 
 
 def test_avg_pool2d():
 
     # Generate the data
     np.random.seed(0)
     input_shape = [1, 1, 28, 28]
-    output_shape = [1, 10]
+    output_shape = [1, 1, 28, 28]
     data = np.random.random(input_shape).astype('float32')
 
     # Baseline model in MXNet
     net = gluon.nn.HybridSequential()
     with net.name_scope():
         net.add(gluon.nn.AvgPool2D(pool_size=3, strides=1, padding=1))
-        net.add(gluon.nn.Dense(10))
     net.collect_params().initialize(mx.init.Xavier(), ctx=mx.cpu())
     net.hybridize()
     baseline_input = mx.nd.array(data, ctx=mx.cpu())
@@ -48,6 +48,17 @@ def test_avg_pool2d():
         graph, lib, params = nnvm.compiler.build(sym, target,
                                                  shape={'data': input_shape},
                                                  params=params)
+
+    # Verify that TRT subgraphs are partitioned
+    def check_trt_used(graph):
+        graph = json.loads(graph.json())
+        num_trt_subgraphs = sum([1 for n in graph['nodes'] if n['op'] == '_tensorrt_subgraph_op'])
+        assert num_trt_subgraphs == 1
+    check_trt_used(graph)
+
+    # Execute
+    if not tvm.module.enabled("gpu"):
+        return
     compiled_model = graph_runtime.create(graph, lib, tvm.gpu())
     compiled_input = tvm.nd.array(data, ctx=tvm.gpu())
     compiled_model.set_input('data', compiled_input)

diff --git a/tests/python/tensorrt/test_cross_compile.py b/tests/python/tensorrt/test_cross_compile.py
@@ -22,6 +22,7 @@
 import tvm
 from tvm.contrib import graph_runtime
 from tvm.autotvm.measure.measure_methods import set_cuda_target_arch
+import json
 
 batch_size = 1
 
@@ -96,6 +97,14 @@ def get_data_shape(model_name):
     with nnvm.compiler.build_config(opt_level=opt_level, ext_accel=ext_accel):
         graph, lib, params = nnvm.compiler.build(
             net, target, shape={"data": data_shape}, params=params, target_host=target_host)
+
+    # Verify that TRT subgraphs are partitioned
+    def check_trt_used(graph):
+        graph = json.loads(graph.json())
+        num_trt_subgraphs = sum([1 for n in graph['nodes'] if n['op'] == '_tensorrt_subgraph_op'])
+        assert num_trt_subgraphs >= 1
+    check_trt_used(graph)
+
     print("===========Compiling model %s took %.3fs" % (network, time.time() - start))
 
     print("===========Saving lowered graph for model %s" % network)

diff --git a/tests/python/tensorrt/test_tensorrt.py b/tests/python/tensorrt/test_tensorrt.py
@@ -19,6 +19,7 @@
 import logging
 logging.basicConfig(level=logging.INFO)
 import numpy as np
+import json
 
 import nnvm.compiler
 import nnvm.testing
@@ -30,15 +31,11 @@
 
 
 def test_tensorrt_image_classification_models():
-    def compile_model(graph, params, data_shapes, subgraph_backend=None, op_names=None, **kwargs):
+    def compile_model(graph, params, data_shapes, **kwargs):
         _, output_shapes = nnvm.compiler.graph_util.infer_shape(graph, **data_shapes)
         assert len(output_shapes) == 1
-        flags = kwargs
-        if subgraph_backend is not None and op_names is not None:
-            graph = nnvm.subgraph._partition(graph, subgraph_backend, op_names)
-            flags = {}
         target = tvm.target.cuda()
-        with nnvm.compiler.build_config(opt_level=3, **flags):
+        with nnvm.compiler.build_config(opt_level=3, **kwargs):
             graph, lib, params = nnvm.compiler.build(
                 graph, target, shape=data_shapes, params=params)
         return graph, lib, params, output_shapes[0]
@@ -60,7 +57,16 @@ def copy_params(params):
     def check_trt_model(baseline_module, baseline_params, graph, params, data_shape,
                         subgraph_backend=None, op_names=None, **kwargs):
         trt_graph, trt_lib, trt_params, output_shape = compile_model(graph, params, {'data': data_shape},
-                                                                     subgraph_backend, op_names, **kwargs)
+                                                                     **kwargs)
+        # Verify that TRT subgraphs are partitioned
+        def check_trt_used(graph):
+            graph = json.loads(graph.json())
+            num_trt_subgraphs = sum([1 for n in graph['nodes'] if n['op'] == '_tensorrt_subgraph_op'])
+            assert num_trt_subgraphs >= 1
+        check_trt_used(trt_graph)
+
+        if not tvm.module.enabled("gpu"):
+            return
         data = np.random.uniform(-1, 1, size=data_shape).astype("float32")
         baseline_out = get_output(baseline_module, data, baseline_params, output_shape)
         trt_module = graph_runtime.create(trt_graph, trt_lib, tvm.gpu())
@@ -94,7 +100,8 @@ def check_trt_model(baseline_module, baseline_params, graph, params, data_shape,
                 shape={'data': data_shape}, params=copy_params(params))
         baseline_module = graph_runtime.create(baseline_graph, baseline_lib, tvm.gpu())
 
-        # test whole graph run using tensorrt, nnvm.compiler.build_config has graph partitioning turned on
+        # Test whole graph run using tensorrt. nnvm.compiler.build_config has
+        # graph partitioning turned on when ext_accel='tensorrt'.
         check_trt_model(baseline_module, baseline_params, nnvm.graph.load_json(graph_json_str),
                         copy_params(params), data_shape, ext_accel='tensorrt')
 

diff --git a/tests/scripts/task_python_tensorrt.sh b/tests/scripts/task_python_tensorrt.sh
@@ -0,0 +1,27 @@
+#!/bin/bash
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -e
+set -u
+
+export PYTHONPATH=nnvm/python:python:topi/python
+export LD_LIBRARY_PATH="build:${LD_LIBRARY_PATH:-}"
+
+rm -rf python/tvm/*.pyc python/tvm/*/*.pyc python/tvm/*/*/*.pyc
+
+TVM_FFI=ctypes python3 -m nose -v tests/python/tensorrt