Skip to content

Commit

Permalink
Fix TRT integration after merge broke it. (apache#41)
Browse files Browse the repository at this point in the history
Remove unnecessary target param and conditions from BuildConfig.optimize().

Since target wasn't getting passed to optimize() from build(), TRT subgraph partioning was skipped due to unnecessary conditions.

Create empty module when there are no funcs to compile to avoid crash

Update TensorRT tests and add to CI

Fix bug in TRT cmake

Remove changes to CI
  • Loading branch information
trevor-m authored and Xingyu Zhou committed Dec 19, 2019
1 parent 87cf899 commit cfde295
Show file tree
Hide file tree
Showing 7 changed files with 85 additions and 23 deletions.
18 changes: 11 additions & 7 deletions cmake/modules/contrib/TensorRT.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -17,16 +17,20 @@

# TensorRT Module

if(IS_DIRECTORY ${USE_TENSORRT})
set(TENSORRT_ROOT_DIR ${USE_TENSORRT})
message(STATUS "Custom TensorRT path: " ${TENSORRT_ROOT_DIR})
set(TENSORRT_INCLUDE_DIR ${TENSORRT_ROOT_DIR}/include)
set(TENSORRT_LIB_DIR ${TENSORRT_ROOT_DIR}/lib)
if(USE_TENSORRT)
if(IS_DIRECTORY ${USE_TENSORRT})
set(TENSORRT_ROOT_DIR ${USE_TENSORRT})
endif()
find_path(TENSORRT_INCLUDE_DIR NvInfer.h HINTS ${TENSORRT_ROOT_DIR} PATH_SUFFIXES include)
find_library(TENSORRT_LIB_DIR nvinfer HINTS ${TENSORRT_ROOT_DIR} PATH_SUFFIXES lib)
find_package_handle_standard_args(TENSORRT DEFAULT_MSG TENSORRT_INCLUDE_DIR TENSORRT_LIB_DIR)
if(NOT TENSORRT_FOUND)
message(ERROR "Could not find TensorRT.")
endif()
file(GLOB TENSORRT_SRCS src/contrib/subgraph/*.cc)
include_directories(${TENSORRT_INCLUDE_DIR})
list(APPEND RUNTIME_SRCS ${TENSORRT_SRCS})
find_library(TENSORRT_NVINFER_LIBRARY nvinfer ${TENSORRT_LIB_DIR})
list(APPEND TVM_RUNTIME_LINKER_LIBS ${TENSORRT_NVINFER_LIBRARY})
list(APPEND TVM_RUNTIME_LINKER_LIBS ${TENSORRT_LIB_DIR})
set_source_files_properties(${RUNTIME_GRAPH_SRCS}
PROPERTIES COMPILE_DEFINITIONS "TVM_GRAPH_RUNTIME_TENSORRT")
endif()
2 changes: 1 addition & 1 deletion nnvm/python/nnvm/compiler/build_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -335,7 +335,7 @@ def build(graph, target=None, shape=None, dtype="float32",
graph = _annotate_graph(graph, device_target,
AnnotationType.DEVICE_TARGET)
# Apply optimization
graph = optimize(graph, shape, dtype, layout)
graph = optimize(graph, shape, dtype, layout, target)

# Clear extra params without nodes.
_remove_noref_params(params, graph)
Expand Down
14 changes: 9 additions & 5 deletions nnvm/src/compiler/graph_compile.cc
Original file line number Diff line number Diff line change
Expand Up @@ -321,11 +321,15 @@ nnvm::Graph GraphCompile(const nnvm::Graph& g) {
ret.attrs["device_index"] = std::make_shared<any>(std::move(device_vec));
}
// Setup module.
static const PackedFunc& fbuild = GetPackedFunc("nnvm.compiler.build_target");
tvm::runtime::Module module =
fbuild(tvm::Map<std::string, Array<tvm::LoweredFunc>>(
tar_func_map.begin(), tar_func_map.end()),
"", target_host);
tvm::runtime::Module module;
// When using external accelerators such as TensorRT, there might not be any
// functions to compile in the graph. In that case, an empty module is used.
if (!tar_func_map.empty()) {
static const PackedFunc& fbuild = GetPackedFunc("nnvm.compiler.build_target");
module = fbuild(tvm::Map<std::string, Array<tvm::LoweredFunc>>(
tar_func_map.begin(), tar_func_map.end()),
"", target_host);
}

ret.attrs["module"] = std::make_shared<any>(std::move(module));
ret = nnvm::ApplyPass(ret, "PlanMemory");
Expand Down
15 changes: 13 additions & 2 deletions tests/python/tensorrt/test_avg_pool2d.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,21 +21,21 @@
import nnvm
import tvm
from tvm.contrib import graph_runtime
import json


def test_avg_pool2d():

# Generate the data
np.random.seed(0)
input_shape = [1, 1, 28, 28]
output_shape = [1, 10]
output_shape = [1, 1, 28, 28]
data = np.random.random(input_shape).astype('float32')

# Baseline model in MXNet
net = gluon.nn.HybridSequential()
with net.name_scope():
net.add(gluon.nn.AvgPool2D(pool_size=3, strides=1, padding=1))
net.add(gluon.nn.Dense(10))
net.collect_params().initialize(mx.init.Xavier(), ctx=mx.cpu())
net.hybridize()
baseline_input = mx.nd.array(data, ctx=mx.cpu())
Expand All @@ -48,6 +48,17 @@ def test_avg_pool2d():
graph, lib, params = nnvm.compiler.build(sym, target,
shape={'data': input_shape},
params=params)

# Verify that TRT subgraphs are partitioned
def check_trt_used(graph):
graph = json.loads(graph.json())
num_trt_subgraphs = sum([1 for n in graph['nodes'] if n['op'] == '_tensorrt_subgraph_op'])
assert num_trt_subgraphs == 1
check_trt_used(graph)

# Execute
if not tvm.module.enabled("gpu"):
return
compiled_model = graph_runtime.create(graph, lib, tvm.gpu())
compiled_input = tvm.nd.array(data, ctx=tvm.gpu())
compiled_model.set_input('data', compiled_input)
Expand Down
9 changes: 9 additions & 0 deletions tests/python/tensorrt/test_cross_compile.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import tvm
from tvm.contrib import graph_runtime
from tvm.autotvm.measure.measure_methods import set_cuda_target_arch
import json

batch_size = 1

Expand Down Expand Up @@ -96,6 +97,14 @@ def get_data_shape(model_name):
with nnvm.compiler.build_config(opt_level=opt_level, ext_accel=ext_accel):
graph, lib, params = nnvm.compiler.build(
net, target, shape={"data": data_shape}, params=params, target_host=target_host)

# Verify that TRT subgraphs are partitioned
def check_trt_used(graph):
graph = json.loads(graph.json())
num_trt_subgraphs = sum([1 for n in graph['nodes'] if n['op'] == '_tensorrt_subgraph_op'])
assert num_trt_subgraphs >= 1
check_trt_used(graph)

print("===========Compiling model %s took %.3fs" % (network, time.time() - start))

print("===========Saving lowered graph for model %s" % network)
Expand Down
23 changes: 15 additions & 8 deletions tests/python/tensorrt/test_tensorrt.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import logging
logging.basicConfig(level=logging.INFO)
import numpy as np
import json

import nnvm.compiler
import nnvm.testing
Expand All @@ -30,15 +31,11 @@


def test_tensorrt_image_classification_models():
def compile_model(graph, params, data_shapes, subgraph_backend=None, op_names=None, **kwargs):
def compile_model(graph, params, data_shapes, **kwargs):
_, output_shapes = nnvm.compiler.graph_util.infer_shape(graph, **data_shapes)
assert len(output_shapes) == 1
flags = kwargs
if subgraph_backend is not None and op_names is not None:
graph = nnvm.subgraph._partition(graph, subgraph_backend, op_names)
flags = {}
target = tvm.target.cuda()
with nnvm.compiler.build_config(opt_level=3, **flags):
with nnvm.compiler.build_config(opt_level=3, **kwargs):
graph, lib, params = nnvm.compiler.build(
graph, target, shape=data_shapes, params=params)
return graph, lib, params, output_shapes[0]
Expand All @@ -60,7 +57,16 @@ def copy_params(params):
def check_trt_model(baseline_module, baseline_params, graph, params, data_shape,
subgraph_backend=None, op_names=None, **kwargs):
trt_graph, trt_lib, trt_params, output_shape = compile_model(graph, params, {'data': data_shape},
subgraph_backend, op_names, **kwargs)
**kwargs)
# Verify that TRT subgraphs are partitioned
def check_trt_used(graph):
graph = json.loads(graph.json())
num_trt_subgraphs = sum([1 for n in graph['nodes'] if n['op'] == '_tensorrt_subgraph_op'])
assert num_trt_subgraphs >= 1
check_trt_used(trt_graph)

if not tvm.module.enabled("gpu"):
return
data = np.random.uniform(-1, 1, size=data_shape).astype("float32")
baseline_out = get_output(baseline_module, data, baseline_params, output_shape)
trt_module = graph_runtime.create(trt_graph, trt_lib, tvm.gpu())
Expand Down Expand Up @@ -94,7 +100,8 @@ def check_trt_model(baseline_module, baseline_params, graph, params, data_shape,
shape={'data': data_shape}, params=copy_params(params))
baseline_module = graph_runtime.create(baseline_graph, baseline_lib, tvm.gpu())

# test whole graph run using tensorrt, nnvm.compiler.build_config has graph partitioning turned on
# Test whole graph run using tensorrt. nnvm.compiler.build_config has
# graph partitioning turned on when ext_accel='tensorrt'.
check_trt_model(baseline_module, baseline_params, nnvm.graph.load_json(graph_json_str),
copy_params(params), data_shape, ext_accel='tensorrt')

Expand Down
27 changes: 27 additions & 0 deletions tests/scripts/task_python_tensorrt.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
#!/bin/bash
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

set -e
set -u

export PYTHONPATH=nnvm/python:python:topi/python
export LD_LIBRARY_PATH="build:${LD_LIBRARY_PATH:-}"

rm -rf python/tvm/*.pyc python/tvm/*/*.pyc python/tvm/*/*/*.pyc

TVM_FFI=ctypes python3 -m nose -v tests/python/tensorrt

0 comments on commit cfde295

Please sign in to comment.