From 59113cfa52a46a5ea0d889bc88e467d0d3350f71 Mon Sep 17 00:00:00 2001 From: Naren Dasan Date: Wed, 21 Oct 2020 15:22:54 -0700 Subject: [PATCH] feat(//py): Initial compiliant implementation of the to_backend api for PyTorch Users can now use a direct PyTorch integration by just importing the trtorch package. The only difference between torch._C._jit_to_tensorrt and trtorch.compile is that you need to use the trtorch.TensorRTCompileSpec constructor to build a wrapper around your spec dictionary Signed-off-by: Naren Dasan Signed-off-by: Naren Dasan --- py/setup.py | 8 +- py/trtorch/__init__.py | 1 + py/trtorch/_compile_spec.py | 93 ++++++++++-- py/trtorch/_compiler.py | 4 +- py/trtorch/csrc/register_tensorrt_classes.cpp | 47 ++++++ py/trtorch/csrc/tensorrt_backend.cpp | 86 +++++++++++ py/trtorch/csrc/tensorrt_backend.h | 19 +++ py/trtorch/csrc/tensorrt_classes.cpp | 143 ++++++++++++++++++ py/trtorch/csrc/tensorrt_classes.h | 101 +++++++++++++ py/trtorch/csrc/trtorch_py.cpp | 106 +------------ tests/BUILD | 3 +- tests/py/BUILD | 16 +- tests/py/model_test_case.py | 19 +++ tests/py/test_api.py | 16 +- tests/py/test_to_backend_api.py | 44 ++++++ 15 files changed, 573 insertions(+), 133 deletions(-) create mode 100644 py/trtorch/csrc/register_tensorrt_classes.cpp create mode 100644 py/trtorch/csrc/tensorrt_backend.cpp create mode 100644 py/trtorch/csrc/tensorrt_backend.h create mode 100644 py/trtorch/csrc/tensorrt_classes.cpp create mode 100644 py/trtorch/csrc/tensorrt_classes.h create mode 100644 tests/py/model_test_case.py create mode 100644 tests/py/test_to_backend_api.py diff --git a/py/setup.py b/py/setup.py index 53f85dada1..01dfdfdfb7 100644 --- a/py/setup.py +++ b/py/setup.py @@ -156,7 +156,12 @@ def run(self): ext_modules = [ cpp_extension.CUDAExtension('trtorch._C', - ['trtorch/csrc/trtorch_py.cpp'], + [ + 'trtorch/csrc/trtorch_py.cpp', + 'trtorch/csrc/tensorrt_backend.cpp', + 'trtorch/csrc/tensorrt_classes.cpp', + 'trtorch/csrc/register_tensorrt_classes.cpp', + ], library_dirs=[ (dir_path + '/trtorch/lib/'), "/opt/conda/lib/python3.6/config-3.6m-x86_64-linux-gnu" @@ -165,6 +170,7 @@ def run(self): "trtorch" ], include_dirs=[ + dir_path + "trtorch/csrc", dir_path + "/../", dir_path + "/../bazel-TRTorch/external/tensorrt/include", ], diff --git a/py/trtorch/__init__.py b/py/trtorch/__init__.py index 88e1ca6db9..772b6ff08f 100644 --- a/py/trtorch/__init__.py +++ b/py/trtorch/__init__.py @@ -9,6 +9,7 @@ from trtorch._version import __version__ from trtorch._compiler import * +from trtorch._compile_spec import TensorRTCompileSpec from trtorch._types import * from trtorch import logging diff --git a/py/trtorch/_compile_spec.py b/py/trtorch/_compile_spec.py index aa060bd085..6f0ff49d4a 100644 --- a/py/trtorch/_compile_spec.py +++ b/py/trtorch/_compile_spec.py @@ -73,16 +73,21 @@ def _parse_op_precision(precision: Any) -> _types.dtype: def _parse_device_type(device: Any) -> _types.DeviceType: if isinstance(device, torch.device): - if torch.device.type == 'cuda': + if device.type == 'cuda': return _types.DeviceType.gpu else: - raise TypeError("Valid device choices are GPU (and DLA if on Jetson platforms) however got device type" + str(device.type)) - + ValueError("Got a device type other than GPU or DLA (type: " + str(device.type) + ")") elif isinstance(device, _types.DeviceType): return device - + elif isinstance(device, str): + if device == "gpu" or device == "GPU": + return _types.DeviceType.gpu + elif device == "dla" or device == "DLA": + return _types.DeviceType.dla + else: + ValueError("Got a device type other than GPU or DLA (type: " + str(device) + ")") else: - raise TypeError("Device specification must be of type torch.device or trtorch.DeviceType, but got: " + str(type(device))) + raise TypeError("Device specification must be of type torch.device, string or trtorch.DeviceType, but got: " + str(type(device))) def _parse_compile_spec(compile_spec: Dict[str, Any]) -> trtorch._C.CompileSpec: info = trtorch._C.CompileSpec() @@ -110,11 +115,11 @@ def _parse_compile_spec(compile_spec: Dict[str, Any]) -> trtorch._C.CompileSpec: assert isinstance(compile_spec["allow_gpu_fallback"], bool) info.allow_gpu_fallback = compile_spec["allow_gpu_fallback"] - if "device" in compile_spec: - info.device = _parse_device_type(compile_spec["device"]) + if "device_type" in compile_spec: + info.device = _parse_device_type(compile_spec["device_type"]) if "capability" in compile_spec: - assert isinstance(compile_spec["capability"], type.EngineCapability) + assert isinstance(compile_spec["capability"], _types.EngineCapability) info.capability = compile_spec["capability"] if "num_min_timing_iters" in compile_spec: @@ -133,4 +138,74 @@ def _parse_compile_spec(compile_spec: Dict[str, Any]) -> trtorch._C.CompileSpec: assert type(compile_spec["max_batch_size"]) is int info.max_batch_size = compile_spec["max_batch_size"] - return info \ No newline at end of file + return info + +def TensorRTCompileSpec(compile_spec: Dict[str, Any]): + """ + Utility to create a formated spec dictionary for using the PyTorch TensorRT backend + + Args: + compile_spec (dict): Compilation settings including operating precision, target device, etc. + One key is required which is ``input_shapes``, describing the input sizes or ranges for inputs + to the graph. All other keys are optional. Entries for each method to be compiled. + + .. code-block:: py + + CompileSpec = { + "forward" : trtorch.TensorRTCompileSpec({ + "input_shapes": [ + (1, 3, 224, 224), # Static input shape for input #1 + { + "min": (1, 3, 224, 224), + "opt": (1, 3, 512, 512), + "max": (1, 3, 1024, 1024) + } # Dynamic input shape for input #2 + ], + "op_precision": torch.half, # Operating precision set to FP16 + "refit": false, # enable refit + "debug": false, # enable debuggable engine + "strict_types": false, # kernels should strictly run in operating precision + "allow_gpu_fallback": false, # (DLA only) Allow layers unsupported on DLA to run on GPU + "device": torch.device("cuda"), # Type of device to run engine on (for DLA use trtorch.DeviceType.DLA) + "capability": trtorch.EngineCapability.DEFAULT, # Restrict kernel selection to safe gpu kernels or safe dla kernels + "num_min_timing_iters": 2, # Number of minimization timing iterations used to select kernels + "num_avg_timing_iters": 1, # Number of averaging timing iterations used to select kernels + "workspace_size": 0, # Maximum size of workspace given to TensorRT + "max_batch_size": 0, # Maximum batch size (must be >= 1 to be set, 0 means not set) + }) + } + + Input Sizes can be specified as torch sizes, tuples or lists. Op precisions can be specified using + torch datatypes or trtorch datatypes and you can use either torch devices or the trtorch device type enum + to select device type. + + Returns: + torch.classes.tensorrt.CompileSpec: List of methods and formated spec objects to be provided to ``torch._C._jit_to_tensorrt`` + """ + + parsed_spec = _parse_compile_spec(compile_spec) + + backend_spec = torch.classes.tensorrt.CompileSpec() + + for i in parsed_spec.input_ranges: + ir = torch.classes.tensorrt.InputRange() + ir.set_min(i.min) + ir.set_opt(i.opt) + ir.set_max(i.max) + backend_spec.append_input_range(ir) + + backend_spec.set_op_precision(int(parsed_spec.op_precision)) + backend_spec.set_refit(parsed_spec.refit) + backend_spec.set_debug(parsed_spec.debug) + backend_spec.set_refit(parsed_spec.refit) + backend_spec.set_strict_types(parsed_spec.strict_types) + backend_spec.set_allow_gpu_fallback(parsed_spec.allow_gpu_fallback) + backend_spec.set_device(int(parsed_spec.device)) + backend_spec.set_capability(int(parsed_spec.capability)) + backend_spec.set_num_min_timing_iters(parsed_spec.num_min_timing_iters) + backend_spec.set_num_avg_timing_iters(parsed_spec.num_avg_timing_iters) + backend_spec.set_workspace_size(parsed_spec.workspace_size) + backend_spec.set_max_batch_size(parsed_spec.max_batch_size) + + return backend_spec + diff --git a/py/trtorch/_compiler.py b/py/trtorch/_compiler.py index 1c35dbe4a1..443db12a7b 100644 --- a/py/trtorch/_compiler.py +++ b/py/trtorch/_compiler.py @@ -39,7 +39,7 @@ def compile(module: torch.jit.ScriptModule, compile_spec: Any) -> torch.jit.Scri "debug": false, # enable debuggable engine "strict_types": false, # kernels should strictly run in operating precision "allow_gpu_fallback": false, # (DLA only) Allow layers unsupported on DLA to run on GPU - "device": torch.device("cuda"), # Type of device to run engine on (for DLA use trtorch.DeviceType.DLA) + "device_type": torch.device("cuda"), # Type of device to run engine on (for DLA use trtorch.DeviceType.DLA) "capability": trtorch.EngineCapability.DEFAULT, # Restrict kernel selection to safe gpu kernels or safe dla kernels "num_min_timing_iters": 2, # Number of minimization timing iterations used to select kernels "num_avg_timing_iters": 1, # Number of averaging timing iterations used to select kernels @@ -91,7 +91,7 @@ def convert_method_to_trt_engine(module: torch.jit.ScriptModule, method_name: st "debug": false, # enable debuggable engine "strict_types": false, # kernels should strictly run in operating precision "allow_gpu_fallback": false, # (DLA only) Allow layers unsupported on DLA to run on GPU - "device": torch.device("cuda"), # Type of device to run engine on (for DLA use trtorch.DeviceType.DLA) + "device_type": torch.device("cuda"), # Type of device to run engine on (for DLA use trtorch.DeviceType.DLA) "capability": trtorch.EngineCapability.DEFAULT, # Restrict kernel selection to safe gpu kernels or safe dla kernels "num_min_timing_iters": 2, # Number of minimization timing iterations used to select kernels "num_avg_timing_iters": 1, # Number of averaging timing iterations used to select kernels diff --git a/py/trtorch/csrc/register_tensorrt_classes.cpp b/py/trtorch/csrc/register_tensorrt_classes.cpp new file mode 100644 index 0000000000..7d66ca6580 --- /dev/null +++ b/py/trtorch/csrc/register_tensorrt_classes.cpp @@ -0,0 +1,47 @@ +#include "tensorrt_classes.h" + +namespace trtorch { +namespace backend { +namespace { + void RegisterTRTCompileSpec() { + #define ADD_FIELD_GET_SET_REGISTRATION(registry, class_name, field_name) \ + (registry).def("set_"#field_name, &class_name::set_##field_name); \ + (registry).def("get_"#field_name, &class_name::get_##field_name); + + static auto TRTORCH_UNUSED TRTInputRangeTSRegistrtion = torch::class_("tensorrt", "InputRange") + .def(torch::init<>()); + + ADD_FIELD_GET_SET_REGISTRATION(TRTInputRangeTSRegistrtion, trtorch::pyapi::InputRange, min); + ADD_FIELD_GET_SET_REGISTRATION(TRTInputRangeTSRegistrtion, trtorch::pyapi::InputRange, opt); + ADD_FIELD_GET_SET_REGISTRATION(TRTInputRangeTSRegistrtion, trtorch::pyapi::InputRange, max); + + static auto TRTORCH_UNUSED TRTCompileSpecTSRegistrtion = torch::class_("tensorrt", "CompileSpec") + .def(torch::init<>()) + .def("append_input_range", &trtorch::pyapi::CompileSpec::appendInputRange) + .def("__str__", &trtorch::pyapi::CompileSpec::stringify); + + ADD_FIELD_GET_SET_REGISTRATION(TRTCompileSpecTSRegistrtion, trtorch::pyapi::CompileSpec, op_precision); + ADD_FIELD_GET_SET_REGISTRATION(TRTCompileSpecTSRegistrtion, trtorch::pyapi::CompileSpec, refit); + ADD_FIELD_GET_SET_REGISTRATION(TRTCompileSpecTSRegistrtion, trtorch::pyapi::CompileSpec, debug); + ADD_FIELD_GET_SET_REGISTRATION(TRTCompileSpecTSRegistrtion, trtorch::pyapi::CompileSpec, strict_types); + ADD_FIELD_GET_SET_REGISTRATION(TRTCompileSpecTSRegistrtion, trtorch::pyapi::CompileSpec, allow_gpu_fallback); + ADD_FIELD_GET_SET_REGISTRATION(TRTCompileSpecTSRegistrtion, trtorch::pyapi::CompileSpec, device); + ADD_FIELD_GET_SET_REGISTRATION(TRTCompileSpecTSRegistrtion, trtorch::pyapi::CompileSpec, capability); + ADD_FIELD_GET_SET_REGISTRATION(TRTCompileSpecTSRegistrtion, trtorch::pyapi::CompileSpec, num_min_timing_iters); + ADD_FIELD_GET_SET_REGISTRATION(TRTCompileSpecTSRegistrtion, trtorch::pyapi::CompileSpec, num_avg_timing_iters); + ADD_FIELD_GET_SET_REGISTRATION(TRTCompileSpecTSRegistrtion, trtorch::pyapi::CompileSpec, workspace_size); + ADD_FIELD_GET_SET_REGISTRATION(TRTCompileSpecTSRegistrtion, trtorch::pyapi::CompileSpec, max_batch_size); + } + +struct TRTTSRegistrations { + TRTTSRegistrations() { + RegisterTRTCompileSpec(); + } +}; + +static TRTTSRegistrations register_trt_classes = TRTTSRegistrations(); +} +} // namespace backend +} // namespace trtorch + + diff --git a/py/trtorch/csrc/tensorrt_backend.cpp b/py/trtorch/csrc/tensorrt_backend.cpp new file mode 100644 index 0000000000..1d679450c6 --- /dev/null +++ b/py/trtorch/csrc/tensorrt_backend.cpp @@ -0,0 +1,86 @@ +#include "torch/csrc/jit/passes/lower_graph.h" + +#include "tensorrt_backend.h" +#include "tensorrt_classes.h" + +#include "core/compiler.h" +#include "core/lowering/lowering.h" +#include "core/runtime/runtime.h" + +namespace trtorch { +namespace backend { + +c10::IValue TensorRTBackend::preprocess(c10::IValue mod, c10::impl::GenericDict method_compile_spec) { + auto mod_ = mod.toModule(); + LOG_DEBUG("Placing module in eval mode if not already"); + mod_.eval(); + mod_ = core::lowering::LowerModule(mod_); + + auto spec = + c10::impl::toTypedDict(method_compile_spec); + + for (auto it = spec.begin(), end = spec.end(); it != end; ++it) { + TRTORCH_CHECK(core::CheckMethodOperatorSupport(mod.toModule(), it->key()), + "Method " << it->key() << "cannot be compiled by TRTorch"); + } + + for (auto it = spec.begin(), end = spec.end(); it != end; ++it) { + const auto& method_name = it->key(); + auto method = mod_.get_method(method_name); + auto graph = method.graph(); + core::lowering::LowerGraph(graph); + } + + return mod_._ivalue(); +} + +c10::impl::GenericDict TensorRTBackend::compile(c10::IValue processed_mod, c10::impl::GenericDict method_compile_spec) { + auto mod = processed_mod.toModule(); + auto spec = + c10::impl::toTypedDict(method_compile_spec); + + auto handles = c10::impl::GenericDict(c10::StringType::get(), c10::getCustomClassType>()); + + for (auto it = spec.begin(), end = spec.end(); it != end; ++it) { + const auto& method_name = it->key(); + auto method = mod.get_method(method_name); + auto g = method.graph(); + + auto raw_spec = it->value().toGenericDict().at(it->key()).toCustomClass(); + LOG_DEBUG(raw_spec->stringify()); + auto cfg = raw_spec->toInternalCompileSpec(); + auto convert_cfg = std::move(cfg.convert_info); + auto graph_and_ivalues = torch::jit::LowerGraph(*g, mod._ivalue()); + + g = graph_and_ivalues.first; + auto params = graph_and_ivalues.second; + auto named_params = core::conversion::get_named_params(g->inputs(), params); + + auto serialized_engine = core::conversion::ConvertBlockToEngine(g->block(), convert_cfg, named_params); + auto engine_handle = c10::make_intrusive(it->key(), serialized_engine); + handles.insert(method.name(), at::IValue(engine_handle)); + } + + return c10::impl::toGenericDict(handles); +} + + +c10::impl::GenericList TensorRTBackend::execute(c10::IValue handle, c10::impl::GenericList inputs) { + TRTORCH_ASSERT(inputs.size() > 0, "Trying to execute on empty list of arguments"); + auto engine = handle.toCustomClass(); + std::vector in_vec; + for (size_t i = 0, e = inputs.size(); i < e; ++i) { + c10::IValue val = inputs[i]; + TRTORCH_CHECK(val.isTensor(), "TensorRT currently only accepts Tensors as inputs"); + in_vec.push_back(val.toTensor()); + } + auto outputs = core::runtime::execute_engine(in_vec, engine); + return c10::impl::toList(c10::List(outputs)); +} + +namespace { +static auto reg = torch::jit::backend("tensorrt"); +} + +} // namespace backend +} // namespace trtorch \ No newline at end of file diff --git a/py/trtorch/csrc/tensorrt_backend.h b/py/trtorch/csrc/tensorrt_backend.h new file mode 100644 index 0000000000..6150604b3e --- /dev/null +++ b/py/trtorch/csrc/tensorrt_backend.h @@ -0,0 +1,19 @@ +#pragma once +#include "torch/csrc/jit/api/module.h" +#include "torch/csrc/jit/backends/backend.h" + +namespace trtorch { +namespace backend { + +class TensorRTBackend: public torch::jit::PyTorchBackendInterface { + public: + explicit TensorRTBackend() {} + virtual ~TensorRTBackend() = default; + + c10::IValue preprocess(c10::IValue mod, c10::impl::GenericDict method_compile_spec) override; + c10::impl::GenericDict compile(c10::IValue processed_mod, c10::impl::GenericDict method_compile_spec) override; + c10::impl::GenericList execute(c10::IValue handle, c10::impl::GenericList inputs) override; +}; + +} // namespace backend +} // namespace trtorch \ No newline at end of file diff --git a/py/trtorch/csrc/tensorrt_classes.cpp b/py/trtorch/csrc/tensorrt_classes.cpp new file mode 100644 index 0000000000..43e63d553b --- /dev/null +++ b/py/trtorch/csrc/tensorrt_classes.cpp @@ -0,0 +1,143 @@ + +#include "tensorrt_classes.h" + +namespace trtorch { +namespace pyapi { + +std::string to_str(InputRange& value) { + auto vec_to_str = [](std::vector shape) -> std::string { + std::stringstream ss; + ss << '['; + for(auto i : shape) { + ss << i << ','; + } + ss << ']'; + return ss.str(); + }; + + std::stringstream ss; + ss << " {" << std::endl; + ss << " min: " << vec_to_str(value.min) << ',' << std::endl; + ss << " opt: " << vec_to_str(value.opt) << ',' << std::endl; + ss << " max: " << vec_to_str(value.max) << ',' << std::endl; + ss << " }" << std::endl; + return ss.str(); +} + +std::string to_str(DataType value) { + switch (value) { + case DataType::kHalf: + return "Half"; + case DataType::kChar: + return "Int8"; + case DataType::kFloat: + default: + return "Float"; + } +} + +nvinfer1::DataType toTRTDataType(DataType value) { + switch (value) { + case DataType::kChar: + return nvinfer1::DataType::kINT8; + case DataType::kHalf: + return nvinfer1::DataType::kHALF; + case DataType::kFloat: + default: + return nvinfer1::DataType::kFLOAT; + } +} + +std::string to_str(DeviceType value) { + switch (value) { + case DeviceType::kDLA: + return "DLA"; + case DeviceType::kGPU: + default: + return "GPU"; + } +} + +nvinfer1::DeviceType toTRTDeviceType(DeviceType value) { + switch (value) { + case DeviceType::kDLA: + return nvinfer1::DeviceType::kDLA; + case DeviceType::kGPU: + default: + return nvinfer1::DeviceType::kGPU; + } +} + +std::string to_str(EngineCapability value) { + switch (value) { + case EngineCapability::kSAFE_GPU: + return "Safe GPU"; + case EngineCapability::kSAFE_DLA: + return "Safe DLA"; + case EngineCapability::kDEFAULT: + default: + return "Default"; + } +} + +nvinfer1::EngineCapability toTRTEngineCapability(EngineCapability value) { + switch (value) { + case EngineCapability::kSAFE_DLA: + return nvinfer1::EngineCapability::kSAFE_DLA; + case EngineCapability::kSAFE_GPU: + return nvinfer1::EngineCapability::kSAFE_GPU; + case EngineCapability::kDEFAULT: + default: + return nvinfer1::EngineCapability::kDEFAULT; + } +} + +core::CompileSpec CompileSpec::toInternalCompileSpec() { + std::vector internal_input_ranges; + for (auto i : input_ranges) { + internal_input_ranges.push_back(i.toInternalInputRange()); + } + auto info = core::CompileSpec(internal_input_ranges); + info.convert_info.engine_settings.op_precision = toTRTDataType(op_precision); + info.convert_info.engine_settings.refit = refit; + info.convert_info.engine_settings.debug = debug; + info.convert_info.engine_settings.strict_types = strict_types; + info.convert_info.engine_settings.allow_gpu_fallback = allow_gpu_fallback; + info.convert_info.engine_settings.device = toTRTDeviceType(device); + info.convert_info.engine_settings.capability = toTRTEngineCapability(capability); + TRTORCH_CHECK(num_min_timing_iters >= 0, "num_min_timing_iters must be 0 or greater"); + info.convert_info.engine_settings.num_min_timing_iters = num_min_timing_iters; + TRTORCH_CHECK(num_avg_timing_iters >= 0, "num_avg_timing_iters must be 0 or greater"); + info.convert_info.engine_settings.num_avg_timing_iters = num_avg_timing_iters; + TRTORCH_CHECK(workspace_size >= 0, "workspace_size must be 0 or greater"); + info.convert_info.engine_settings.workspace_size = workspace_size; + TRTORCH_CHECK(max_batch_size >= 0, "max_batch_size must be 0 or greater"); + info.convert_info.engine_settings.max_batch_size = max_batch_size; + return info; +} + +std::string CompileSpec::stringify() { + std::stringstream ss; + ss << "TensorRT Compile Spec: {" << std::endl; + ss << " \"Input Shapes\": [" << std::endl; + for (auto i : input_ranges) { + ss << to_str(i); + } + ss << " ]" << std::endl; + ss << " \"Op Precision\": " << to_str(op_precision) << std::endl; + ss << " \"Refit\": " << refit << std::endl; + ss << " \"Debug\": " << debug << std::endl; + ss << " \"Strict Types\": " << strict_types << std::endl; + ss << " \"Allow GPU Fallback\": " << allow_gpu_fallback << std::endl; + ss << " \"Device\": " << to_str(capability) << std::endl; + ss << " \"Engine Capability\": " << to_str(capability) << std::endl; + ss << " \"Num Min Timing Iters\": " << num_min_timing_iters << std::endl; + ss << " \"Num Avg Timing Iters\": " << num_avg_timing_iters << std::endl; + ss << " \"Workspace Size\": " << workspace_size << std::endl; + ss << " \"Max Batch Size\": " << max_batch_size << std::endl; + ss << "}"; + return ss.str(); +} + +} // namespace pyapi +} // namespace trtorch \ No newline at end of file diff --git a/py/trtorch/csrc/tensorrt_classes.h b/py/trtorch/csrc/tensorrt_classes.h new file mode 100644 index 0000000000..e98a093358 --- /dev/null +++ b/py/trtorch/csrc/tensorrt_classes.h @@ -0,0 +1,101 @@ +#pragma once + +#include "core/compiler.h" +#include "core/conversion/conversion.h" +#include "torch/torch.h" +#include "torch/script.h" +#include "torch/custom_class.h" + +namespace trtorch { +namespace pyapi { + +#define ADD_FIELD_GET_SET(field_name, type) \ + void set_##field_name(type val) {field_name = val;} \ + type get_##field_name() {return field_name;} + +struct InputRange : torch::CustomClassHolder { + std::vector min; + std::vector opt; + std::vector max; + + core::conversion::InputRange toInternalInputRange() { + return core::conversion::InputRange(min, opt, max); + } + + ADD_FIELD_GET_SET(min, std::vector); + ADD_FIELD_GET_SET(opt, std::vector); + ADD_FIELD_GET_SET(max, std::vector); +}; + +std::string to_str(InputRange& value); + + +enum class DataType : int8_t { + kFloat, + kHalf, + kChar, +}; + +std::string to_str(DataType value); +nvinfer1::DataType toTRTDataType(DataType value); + +enum DeviceType : int8_t { + kGPU, + kDLA, +}; + +std::string to_str(DeviceType value); +nvinfer1::DeviceType toTRTDeviceType(DeviceType value); + +enum class EngineCapability : int8_t { + kDEFAULT, + kSAFE_GPU, + kSAFE_DLA, +}; + +std::string to_str(EngineCapability value); +nvinfer1::EngineCapability toTRTEngineCapability(EngineCapability value); + +// TODO: Make this error message more informative +#define ADD_ENUM_GET_SET(field_name, type, max_val) \ + void set_##field_name(int64_t val) { \ + TRTORCH_CHECK(val < max_val, "Invalid enum value for field"); \ + field_name = static_cast(val); \ + } \ + int64_t get_##field_name() {return static_cast(field_name);} + +struct CompileSpec : torch::CustomClassHolder { + core::CompileSpec toInternalCompileSpec(); + std::string stringify(); + void appendInputRange(const c10::intrusive_ptr& ir) { + input_ranges.push_back(*ir); + } + + ADD_ENUM_GET_SET(op_precision, DataType, 3); + ADD_FIELD_GET_SET(refit, bool); + ADD_FIELD_GET_SET(debug, bool); + ADD_FIELD_GET_SET(strict_types, bool); + ADD_FIELD_GET_SET(allow_gpu_fallback, bool); + ADD_ENUM_GET_SET(device, DeviceType, 2); + ADD_ENUM_GET_SET(capability, EngineCapability, 3); + ADD_FIELD_GET_SET(num_min_timing_iters, int64_t); + ADD_FIELD_GET_SET(num_avg_timing_iters, int64_t); + ADD_FIELD_GET_SET(workspace_size, int64_t); + ADD_FIELD_GET_SET(max_batch_size, int64_t); + + std::vector input_ranges; + DataType op_precision = DataType::kFloat; + bool refit = false; + bool debug = false; + bool strict_types = false; + bool allow_gpu_fallback = true; + DeviceType device = DeviceType::kGPU; + EngineCapability capability = EngineCapability::kDEFAULT; + int64_t num_min_timing_iters = 2; + int64_t num_avg_timing_iters = 1; + int64_t workspace_size = 0; + int64_t max_batch_size = 0; +}; + +} // namespace pyapi +} // namespace trtorch \ No newline at end of file diff --git a/py/trtorch/csrc/trtorch_py.cpp b/py/trtorch/csrc/trtorch_py.cpp index da6d2b2688..4f9363542d 100644 --- a/py/trtorch/csrc/trtorch_py.cpp +++ b/py/trtorch/csrc/trtorch_py.cpp @@ -1,11 +1,12 @@ #include "pybind11/pybind11.h" #include "pybind11/stl.h" -//TODO: Remove when we have access to PyTorch to_backend autoregistration -#include "core/backend.h" + +#include "tensorrt_classes.h" #include "core/compiler.h" #include "core/conversion/conversion.h" #include "torch/torch.h" #include "torch/script.h" +#include "torch/custom_class.h" #include "torch/csrc/jit/python/pybind_utils.h" #include "Python.h" @@ -14,103 +15,6 @@ namespace py = pybind11; namespace trtorch { namespace pyapi { -struct InputRange { - std::vector min; - std::vector opt; - std::vector max; - - core::conversion::InputRange toInternalInputRange() { - return core::conversion::InputRange(min, opt, max); - } -}; - -enum class DataType : int8_t { - kFloat, - kHalf, - kChar, -}; - -nvinfer1::DataType toTRTDataType(DataType value) { - switch (value) { - case DataType::kChar: - return nvinfer1::DataType::kINT8; - case DataType::kHalf: - return nvinfer1::DataType::kHALF; - case DataType::kFloat: - default: - return nvinfer1::DataType::kFLOAT; - } -} - -enum DeviceType : int8_t { - kGPU, - kDLA, -}; - -nvinfer1::DeviceType toTRTDeviceType(DeviceType value) { - switch (value) { - case DeviceType::kDLA: - return nvinfer1::DeviceType::kDLA; - case DeviceType::kGPU: - default: - return nvinfer1::DeviceType::kGPU; - } -} - -enum class EngineCapability : int8_t { - kDEFAULT, - kSAFE_GPU, - kSAFE_DLA, -}; - -nvinfer1::EngineCapability toTRTEngineCapability(EngineCapability value) { - switch (value) { - case EngineCapability::kSAFE_DLA: - return nvinfer1::EngineCapability::kSAFE_DLA; - case EngineCapability::kSAFE_GPU: - return nvinfer1::EngineCapability::kSAFE_GPU; - case EngineCapability::kDEFAULT: - default: - return nvinfer1::EngineCapability::kDEFAULT; - } -} - -struct CompileSpec { - - core::CompileSpec toInternalCompileSpec() { - for (auto i : input_ranges) { - internal_input_ranges.push_back(i.toInternalInputRange()); - } - auto info = core::CompileSpec(internal_input_ranges); - info.convert_info.engine_settings.op_precision = toTRTDataType(op_precision); - info.convert_info.engine_settings.refit = refit; - info.convert_info.engine_settings.debug = debug; - info.convert_info.engine_settings.strict_types = strict_types; - info.convert_info.engine_settings.allow_gpu_fallback = allow_gpu_fallback; - info.convert_info.engine_settings.device = toTRTDeviceType(device); - info.convert_info.engine_settings.capability = toTRTEngineCapability(capability); - info.convert_info.engine_settings.num_min_timing_iters = num_min_timing_iters; - info.convert_info.engine_settings.num_avg_timing_iters = num_avg_timing_iters; - info.convert_info.engine_settings.workspace_size = workspace_size; - info.convert_info.engine_settings.max_batch_size = max_batch_size; - return info; - } - - std::vector input_ranges; - std::vector internal_input_ranges; - DataType op_precision = DataType::kFloat; - bool refit = false; - bool debug = false; - bool strict_types = false; - bool allow_gpu_fallback = true; - DeviceType device = DeviceType::kGPU; - EngineCapability capability = EngineCapability::kDEFAULT; - uint64_t num_min_timing_iters = 2; - uint64_t num_avg_timing_iters = 1; - uint64_t workspace_size = 0; - uint64_t max_batch_size = 0; -}; - torch::jit::Module CompileGraph(const torch::jit::Module& mod, CompileSpec& info) { py::gil_scoped_acquire gil; auto trt_mod = core::CompileGraph(mod, info.toInternalCompileSpec()); @@ -227,11 +131,7 @@ PYBIND11_MODULE(_C, m) { .value("INFO", core::util::logging::LogLevel::kINFO) .value("DEBUG", core::util::logging::LogLevel::kDEBUG) .export_values(); - - //TODO: Remove when we have access to PyTorch autoregistration - //m.def("to_tensorrt", backend::GetTensorRTBackend().generateToBackendFn()); } - } // namespace pyapi } // namespace trtorch diff --git a/tests/BUILD b/tests/BUILD index f784798a57..81a43aecbc 100644 --- a/tests/BUILD +++ b/tests/BUILD @@ -17,6 +17,7 @@ test_suite( test_suite( name = "python_api_tests", tests = [ - "//tests/py:test_api" + "//tests/py:test_api", + "//tests/py:test_to_backend_api" ] ) \ No newline at end of file diff --git a/tests/py/BUILD b/tests/py/BUILD index 054e1cbbb3..0d643d65d8 100644 --- a/tests/py/BUILD +++ b/tests/py/BUILD @@ -5,9 +5,21 @@ load("@py_test_deps//:requirements.bzl", "requirement") py_test( name = "test_api", srcs = [ - "test_api.py" + "test_api.py", + "model_test_case.py" ], deps = [ requirement("torchvision") ] -) \ No newline at end of file +) + +py_test( + name = "test_to_backend_api", + srcs = [ + "test_to_backend_api.py", + "model_test_case.py" + ], + deps = [ + requirement("torchvision") + ] +) diff --git a/tests/py/model_test_case.py b/tests/py/model_test_case.py new file mode 100644 index 0000000000..3730f6507b --- /dev/null +++ b/tests/py/model_test_case.py @@ -0,0 +1,19 @@ +import unittest +import trtorch +import torch +import torchvision.models as models + +class ModelTestCase(unittest.TestCase): + def __init__(self, methodName='runTest', model=None): + super(ModelTestCase, self).__init__(methodName) + self.model = model + self.model.eval().to("cuda") + + @staticmethod + def parametrize(testcase_class, model=None): + testloader = unittest.TestLoader() + testnames = testloader.getTestCaseNames(testcase_class) + suite = unittest.TestSuite() + for name in testnames: + suite.addTest(testcase_class(name, model=model)) + return suite \ No newline at end of file diff --git a/tests/py/test_api.py b/tests/py/test_api.py index e0cd113db6..2d9d2d1e56 100644 --- a/tests/py/test_api.py +++ b/tests/py/test_api.py @@ -3,21 +3,7 @@ import torch import torchvision.models as models - -class ModelTestCase(unittest.TestCase): - def __init__(self, methodName='runTest', model=None): - super(ModelTestCase, self).__init__(methodName) - self.model = model - self.model.eval().to("cuda") - - @staticmethod - def parametrize(testcase_class, model=None): - testloader = unittest.TestLoader() - testnames = testloader.getTestCaseNames(testcase_class) - suite = unittest.TestSuite() - for name in testnames: - suite.addTest(testcase_class(name, model=model)) - return suite +from model_test_case import ModelTestCase class TestCompile(ModelTestCase): def setUp(self): diff --git a/tests/py/test_to_backend_api.py b/tests/py/test_to_backend_api.py new file mode 100644 index 0000000000..e643aa6ce2 --- /dev/null +++ b/tests/py/test_to_backend_api.py @@ -0,0 +1,44 @@ +import unittest +import trtorch +import torch +import torchvision.models as models + +from model_test_case import ModelTestCase + +class TestToBackendLowering(ModelTestCase): + def setUp(self): + self.input = torch.randn((1, 3, 300, 300)).to("cuda") + self.scripted_model = torch.jit.script(self.model) + self.spec = { + "forward": trtorch.TensorRTCompileSpec({ + "input_shapes": [[1, 3, 300, 300]], + "op_precision": torch.float, + "refit": False, + "debug": False, + "strict_types": False, + "allow_gpu_fallback": True, + "device_type": "gpu", + "capability": trtorch.EngineCapability.default, + "num_min_timing_iters": 2, + "num_avg_timing_iters": 1, + "max_batch_size": 0, + }) + } + + def test_to_backend_lowering(self): + trt_mod = torch._C._jit_to_tensorrt(self.scripted_model._c, {"forward": self.spec}) + same = (trt_mod.forward(self.input) - self.scripted_model(self.input)).abs().max() + self.assertTrue(same < 2e-3) + +def test_suite(): + suite = unittest.TestSuite() + suite.addTest(TestToBackendLowering.parametrize(TestToBackendLowering, model=models.mobilenet_v2(pretrained=True))) + + return suite + +suite = test_suite() + +runner = unittest.TextTestRunner() +result = runner.run(suite) + +exit(int(not result.wasSuccessful())) \ No newline at end of file