From 59113cfa52a46a5ea0d889bc88e467d0d3350f71 Mon Sep 17 00:00:00 2001
From: Naren Dasan <naren@narendasan.com>
Date: Wed, 21 Oct 2020 15:22:54 -0700
Subject: [PATCH] feat(//py): Initial compiliant implementation of the
 to_backend api for PyTorch

Users can now use a direct PyTorch integration by just importing the
trtorch package. The only difference between torch._C._jit_to_tensorrt
and trtorch.compile is that you need to use the
trtorch.TensorRTCompileSpec constructor to build a wrapper around your
spec dictionary

Signed-off-by: Naren Dasan <naren@narendasan.com>
Signed-off-by: Naren Dasan <narens@nvidia.com>
---
 py/setup.py                                   |   8 +-
 py/trtorch/__init__.py                        |   1 +
 py/trtorch/_compile_spec.py                   |  93 ++++++++++--
 py/trtorch/_compiler.py                       |   4 +-
 py/trtorch/csrc/register_tensorrt_classes.cpp |  47 ++++++
 py/trtorch/csrc/tensorrt_backend.cpp          |  86 +++++++++++
 py/trtorch/csrc/tensorrt_backend.h            |  19 +++
 py/trtorch/csrc/tensorrt_classes.cpp          | 143 ++++++++++++++++++
 py/trtorch/csrc/tensorrt_classes.h            | 101 +++++++++++++
 py/trtorch/csrc/trtorch_py.cpp                | 106 +------------
 tests/BUILD                                   |   3 +-
 tests/py/BUILD                                |  16 +-
 tests/py/model_test_case.py                   |  19 +++
 tests/py/test_api.py                          |  16 +-
 tests/py/test_to_backend_api.py               |  44 ++++++
 15 files changed, 573 insertions(+), 133 deletions(-)
 create mode 100644 py/trtorch/csrc/register_tensorrt_classes.cpp
 create mode 100644 py/trtorch/csrc/tensorrt_backend.cpp
 create mode 100644 py/trtorch/csrc/tensorrt_backend.h
 create mode 100644 py/trtorch/csrc/tensorrt_classes.cpp
 create mode 100644 py/trtorch/csrc/tensorrt_classes.h
 create mode 100644 tests/py/model_test_case.py
 create mode 100644 tests/py/test_to_backend_api.py

diff --git a/py/setup.py b/py/setup.py
index 53f85dada1..01dfdfdfb7 100644
--- a/py/setup.py
+++ b/py/setup.py
@@ -156,7 +156,12 @@ def run(self):
 
 ext_modules = [
     cpp_extension.CUDAExtension('trtorch._C',
-                                ['trtorch/csrc/trtorch_py.cpp'],
+                                [
+                                    'trtorch/csrc/trtorch_py.cpp',
+                                    'trtorch/csrc/tensorrt_backend.cpp',
+                                    'trtorch/csrc/tensorrt_classes.cpp',
+                                    'trtorch/csrc/register_tensorrt_classes.cpp',
+                                ],
                                 library_dirs=[
                                     (dir_path + '/trtorch/lib/'),
                                     "/opt/conda/lib/python3.6/config-3.6m-x86_64-linux-gnu"
@@ -165,6 +170,7 @@ def run(self):
                                     "trtorch"
                                 ],
                                 include_dirs=[
+                                    dir_path + "trtorch/csrc",
                                     dir_path + "/../",
                                     dir_path + "/../bazel-TRTorch/external/tensorrt/include",
                                 ],
diff --git a/py/trtorch/__init__.py b/py/trtorch/__init__.py
index 88e1ca6db9..772b6ff08f 100644
--- a/py/trtorch/__init__.py
+++ b/py/trtorch/__init__.py
@@ -9,6 +9,7 @@
 
 from trtorch._version import __version__
 from trtorch._compiler import *
+from trtorch._compile_spec import TensorRTCompileSpec
 from trtorch._types import *
 from trtorch import logging
 
diff --git a/py/trtorch/_compile_spec.py b/py/trtorch/_compile_spec.py
index aa060bd085..6f0ff49d4a 100644
--- a/py/trtorch/_compile_spec.py
+++ b/py/trtorch/_compile_spec.py
@@ -73,16 +73,21 @@ def _parse_op_precision(precision: Any) -> _types.dtype:
 
 def _parse_device_type(device: Any) -> _types.DeviceType:
     if isinstance(device, torch.device):
-        if torch.device.type == 'cuda':
+        if device.type == 'cuda':
             return _types.DeviceType.gpu
         else:
-            raise TypeError("Valid device choices are GPU (and DLA if on Jetson platforms) however got device type" + str(device.type))
-
+            ValueError("Got a device type other than GPU or DLA (type: " + str(device.type) + ")")
     elif isinstance(device, _types.DeviceType):
         return device
-
+    elif isinstance(device, str):
+        if device == "gpu" or device == "GPU":
+            return _types.DeviceType.gpu
+        elif device == "dla" or device == "DLA":
+            return _types.DeviceType.dla
+        else:
+            ValueError("Got a device type other than GPU or DLA (type: " + str(device) + ")")
     else:
-        raise TypeError("Device specification must be of type torch.device or trtorch.DeviceType, but got: " + str(type(device)))
+        raise TypeError("Device specification must be of type torch.device, string or trtorch.DeviceType, but got: " + str(type(device)))
 
 def _parse_compile_spec(compile_spec: Dict[str, Any]) -> trtorch._C.CompileSpec:
     info = trtorch._C.CompileSpec()
@@ -110,11 +115,11 @@ def _parse_compile_spec(compile_spec: Dict[str, Any]) -> trtorch._C.CompileSpec:
         assert isinstance(compile_spec["allow_gpu_fallback"], bool)
         info.allow_gpu_fallback = compile_spec["allow_gpu_fallback"]
 
-    if "device" in compile_spec:
-        info.device = _parse_device_type(compile_spec["device"])
+    if "device_type" in compile_spec:
+        info.device = _parse_device_type(compile_spec["device_type"])
 
     if "capability" in compile_spec:
-        assert isinstance(compile_spec["capability"], type.EngineCapability)
+        assert isinstance(compile_spec["capability"], _types.EngineCapability)
         info.capability = compile_spec["capability"]
 
     if "num_min_timing_iters" in compile_spec:
@@ -133,4 +138,74 @@ def _parse_compile_spec(compile_spec: Dict[str, Any]) -> trtorch._C.CompileSpec:
         assert type(compile_spec["max_batch_size"]) is int
         info.max_batch_size = compile_spec["max_batch_size"]
 
-    return info
\ No newline at end of file
+    return info
+
+def TensorRTCompileSpec(compile_spec: Dict[str, Any]):
+    """
+    Utility to create a formated spec dictionary for using the PyTorch TensorRT backend
+
+    Args:
+        compile_spec (dict): Compilation settings including operating precision, target device, etc.
+            One key is required which is ``input_shapes``, describing the input sizes or ranges for inputs
+            to the graph. All other keys are optional. Entries for each method to be compiled.
+
+            .. code-block:: py
+
+                CompileSpec = {
+                    "forward" : trtorch.TensorRTCompileSpec({
+                        "input_shapes": [
+                            (1, 3, 224, 224), # Static input shape for input #1
+                            {
+                                "min": (1, 3, 224, 224),
+                                "opt": (1, 3, 512, 512),
+                                "max": (1, 3, 1024, 1024)
+                            } # Dynamic input shape for input #2
+                        ],
+                        "op_precision": torch.half, # Operating precision set to FP16
+                        "refit": false, # enable refit
+                        "debug": false, # enable debuggable engine
+                        "strict_types": false, # kernels should strictly run in operating precision
+                        "allow_gpu_fallback": false, # (DLA only) Allow layers unsupported on DLA to run on GPU
+                        "device": torch.device("cuda"), # Type of device to run engine on (for DLA use trtorch.DeviceType.DLA)
+                        "capability": trtorch.EngineCapability.DEFAULT, # Restrict kernel selection to safe gpu kernels or safe dla kernels
+                        "num_min_timing_iters": 2, # Number of minimization timing iterations used to select kernels
+                        "num_avg_timing_iters": 1, # Number of averaging timing iterations used to select kernels
+                        "workspace_size": 0, # Maximum size of workspace given to TensorRT
+                        "max_batch_size": 0, # Maximum batch size (must be >= 1 to be set, 0 means not set)
+                    })
+                }
+
+            Input Sizes can be specified as torch sizes, tuples or lists. Op precisions can be specified using
+            torch datatypes or trtorch datatypes and you can use either torch devices or the trtorch device type enum
+            to select device type.
+
+    Returns:
+        torch.classes.tensorrt.CompileSpec: List of methods and formated spec objects to be provided to ``torch._C._jit_to_tensorrt``
+    """
+
+    parsed_spec = _parse_compile_spec(compile_spec)
+
+    backend_spec = torch.classes.tensorrt.CompileSpec()
+
+    for i in parsed_spec.input_ranges:
+        ir = torch.classes.tensorrt.InputRange()
+        ir.set_min(i.min)
+        ir.set_opt(i.opt)
+        ir.set_max(i.max)
+        backend_spec.append_input_range(ir)
+
+    backend_spec.set_op_precision(int(parsed_spec.op_precision))
+    backend_spec.set_refit(parsed_spec.refit)
+    backend_spec.set_debug(parsed_spec.debug)
+    backend_spec.set_refit(parsed_spec.refit)
+    backend_spec.set_strict_types(parsed_spec.strict_types)
+    backend_spec.set_allow_gpu_fallback(parsed_spec.allow_gpu_fallback)
+    backend_spec.set_device(int(parsed_spec.device))
+    backend_spec.set_capability(int(parsed_spec.capability))
+    backend_spec.set_num_min_timing_iters(parsed_spec.num_min_timing_iters)
+    backend_spec.set_num_avg_timing_iters(parsed_spec.num_avg_timing_iters)
+    backend_spec.set_workspace_size(parsed_spec.workspace_size)
+    backend_spec.set_max_batch_size(parsed_spec.max_batch_size)
+
+    return backend_spec
+
diff --git a/py/trtorch/_compiler.py b/py/trtorch/_compiler.py
index 1c35dbe4a1..443db12a7b 100644
--- a/py/trtorch/_compiler.py
+++ b/py/trtorch/_compiler.py
@@ -39,7 +39,7 @@ def compile(module: torch.jit.ScriptModule, compile_spec: Any) -> torch.jit.Scri
                     "debug": false, # enable debuggable engine
                     "strict_types": false, # kernels should strictly run in operating precision
                     "allow_gpu_fallback": false, # (DLA only) Allow layers unsupported on DLA to run on GPU
-                    "device": torch.device("cuda"), # Type of device to run engine on (for DLA use trtorch.DeviceType.DLA)
+                    "device_type": torch.device("cuda"), # Type of device to run engine on (for DLA use trtorch.DeviceType.DLA)
                     "capability": trtorch.EngineCapability.DEFAULT, # Restrict kernel selection to safe gpu kernels or safe dla kernels
                     "num_min_timing_iters": 2, # Number of minimization timing iterations used to select kernels
                     "num_avg_timing_iters": 1, # Number of averaging timing iterations used to select kernels
@@ -91,7 +91,7 @@ def convert_method_to_trt_engine(module: torch.jit.ScriptModule, method_name: st
                     "debug": false, # enable debuggable engine
                     "strict_types": false, # kernels should strictly run in operating precision
                     "allow_gpu_fallback": false, # (DLA only) Allow layers unsupported on DLA to run on GPU
-                    "device": torch.device("cuda"), # Type of device to run engine on (for DLA use trtorch.DeviceType.DLA)
+                    "device_type": torch.device("cuda"), # Type of device to run engine on (for DLA use trtorch.DeviceType.DLA)
                     "capability": trtorch.EngineCapability.DEFAULT, # Restrict kernel selection to safe gpu kernels or safe dla kernels
                     "num_min_timing_iters": 2, # Number of minimization timing iterations used to select kernels
                     "num_avg_timing_iters": 1, # Number of averaging timing iterations used to select kernels
diff --git a/py/trtorch/csrc/register_tensorrt_classes.cpp b/py/trtorch/csrc/register_tensorrt_classes.cpp
new file mode 100644
index 0000000000..7d66ca6580
--- /dev/null
+++ b/py/trtorch/csrc/register_tensorrt_classes.cpp
@@ -0,0 +1,47 @@
+#include "tensorrt_classes.h"
+
+namespace trtorch {
+namespace backend {
+namespace {
+  void RegisterTRTCompileSpec() {
+    #define ADD_FIELD_GET_SET_REGISTRATION(registry, class_name, field_name) \
+      (registry).def("set_"#field_name, &class_name::set_##field_name);      \
+      (registry).def("get_"#field_name, &class_name::get_##field_name);
+
+    static auto TRTORCH_UNUSED TRTInputRangeTSRegistrtion = torch::class_<trtorch::pyapi::InputRange>("tensorrt", "InputRange")
+      .def(torch::init<>());
+
+    ADD_FIELD_GET_SET_REGISTRATION(TRTInputRangeTSRegistrtion, trtorch::pyapi::InputRange, min);
+    ADD_FIELD_GET_SET_REGISTRATION(TRTInputRangeTSRegistrtion, trtorch::pyapi::InputRange, opt);
+    ADD_FIELD_GET_SET_REGISTRATION(TRTInputRangeTSRegistrtion, trtorch::pyapi::InputRange, max);
+
+    static auto TRTORCH_UNUSED TRTCompileSpecTSRegistrtion = torch::class_<trtorch::pyapi::CompileSpec>("tensorrt", "CompileSpec")
+      .def(torch::init<>())
+      .def("append_input_range", &trtorch::pyapi::CompileSpec::appendInputRange)
+      .def("__str__", &trtorch::pyapi::CompileSpec::stringify);
+
+    ADD_FIELD_GET_SET_REGISTRATION(TRTCompileSpecTSRegistrtion, trtorch::pyapi::CompileSpec, op_precision);
+    ADD_FIELD_GET_SET_REGISTRATION(TRTCompileSpecTSRegistrtion, trtorch::pyapi::CompileSpec, refit);
+    ADD_FIELD_GET_SET_REGISTRATION(TRTCompileSpecTSRegistrtion, trtorch::pyapi::CompileSpec, debug);
+    ADD_FIELD_GET_SET_REGISTRATION(TRTCompileSpecTSRegistrtion, trtorch::pyapi::CompileSpec, strict_types);
+    ADD_FIELD_GET_SET_REGISTRATION(TRTCompileSpecTSRegistrtion, trtorch::pyapi::CompileSpec, allow_gpu_fallback);
+    ADD_FIELD_GET_SET_REGISTRATION(TRTCompileSpecTSRegistrtion, trtorch::pyapi::CompileSpec, device);
+    ADD_FIELD_GET_SET_REGISTRATION(TRTCompileSpecTSRegistrtion, trtorch::pyapi::CompileSpec, capability);
+    ADD_FIELD_GET_SET_REGISTRATION(TRTCompileSpecTSRegistrtion, trtorch::pyapi::CompileSpec, num_min_timing_iters);
+    ADD_FIELD_GET_SET_REGISTRATION(TRTCompileSpecTSRegistrtion, trtorch::pyapi::CompileSpec, num_avg_timing_iters);
+    ADD_FIELD_GET_SET_REGISTRATION(TRTCompileSpecTSRegistrtion, trtorch::pyapi::CompileSpec, workspace_size);
+    ADD_FIELD_GET_SET_REGISTRATION(TRTCompileSpecTSRegistrtion, trtorch::pyapi::CompileSpec, max_batch_size);
+  }
+
+struct TRTTSRegistrations {
+  TRTTSRegistrations() {
+    RegisterTRTCompileSpec();
+  }
+};
+
+static TRTTSRegistrations register_trt_classes = TRTTSRegistrations();
+}
+} // namespace backend
+} // namespace trtorch
+
+
diff --git a/py/trtorch/csrc/tensorrt_backend.cpp b/py/trtorch/csrc/tensorrt_backend.cpp
new file mode 100644
index 0000000000..1d679450c6
--- /dev/null
+++ b/py/trtorch/csrc/tensorrt_backend.cpp
@@ -0,0 +1,86 @@
+#include "torch/csrc/jit/passes/lower_graph.h"
+
+#include "tensorrt_backend.h"
+#include "tensorrt_classes.h"
+
+#include "core/compiler.h"
+#include "core/lowering/lowering.h"
+#include "core/runtime/runtime.h"
+
+namespace trtorch {
+namespace backend {
+
+c10::IValue TensorRTBackend::preprocess(c10::IValue mod, c10::impl::GenericDict method_compile_spec) {
+  auto mod_ = mod.toModule();
+  LOG_DEBUG("Placing module in eval mode if not already");
+  mod_.eval();
+  mod_ = core::lowering::LowerModule(mod_);
+
+  auto spec =
+      c10::impl::toTypedDict<std::string, at::IValue>(method_compile_spec);
+
+  for (auto it = spec.begin(), end = spec.end(); it != end; ++it) {
+    TRTORCH_CHECK(core::CheckMethodOperatorSupport(mod.toModule(), it->key()),
+        "Method " << it->key() << "cannot be compiled by TRTorch");
+  }
+
+  for (auto it = spec.begin(), end = spec.end(); it != end; ++it) {
+    const auto& method_name = it->key();
+    auto method = mod_.get_method(method_name);
+    auto graph = method.graph();
+    core::lowering::LowerGraph(graph);
+  }
+
+  return mod_._ivalue();
+}
+
+c10::impl::GenericDict TensorRTBackend::compile(c10::IValue processed_mod, c10::impl::GenericDict method_compile_spec) {
+  auto mod = processed_mod.toModule();
+  auto spec =
+      c10::impl::toTypedDict<std::string, at::IValue>(method_compile_spec);
+
+  auto handles = c10::impl::GenericDict(c10::StringType::get(), c10::getCustomClassType<c10::intrusive_ptr<core::runtime::TRTEngine>>());
+
+  for (auto it = spec.begin(), end = spec.end(); it != end; ++it) {
+    const auto& method_name = it->key();
+    auto method = mod.get_method(method_name);
+    auto g = method.graph();
+
+    auto raw_spec = it->value().toGenericDict().at(it->key()).toCustomClass<trtorch::pyapi::CompileSpec>();
+    LOG_DEBUG(raw_spec->stringify());
+    auto cfg = raw_spec->toInternalCompileSpec();
+    auto convert_cfg = std::move(cfg.convert_info);
+    auto graph_and_ivalues = torch::jit::LowerGraph(*g, mod._ivalue());
+
+    g = graph_and_ivalues.first;
+    auto params = graph_and_ivalues.second;
+    auto named_params = core::conversion::get_named_params(g->inputs(), params);
+
+    auto serialized_engine = core::conversion::ConvertBlockToEngine(g->block(), convert_cfg, named_params);
+    auto engine_handle = c10::make_intrusive<core::runtime::TRTEngine>(it->key(), serialized_engine);
+    handles.insert(method.name(), at::IValue(engine_handle));
+  }
+
+  return c10::impl::toGenericDict(handles);
+}
+
+
+c10::impl::GenericList TensorRTBackend::execute(c10::IValue handle, c10::impl::GenericList inputs) {
+  TRTORCH_ASSERT(inputs.size() > 0, "Trying to execute on empty list of arguments");
+  auto engine = handle.toCustomClass<core::runtime::TRTEngine>();
+  std::vector<at::Tensor> in_vec;
+  for (size_t i = 0, e = inputs.size(); i < e; ++i) {
+    c10::IValue val = inputs[i];
+    TRTORCH_CHECK(val.isTensor(), "TensorRT currently only accepts Tensors as inputs");
+    in_vec.push_back(val.toTensor());
+  }
+  auto outputs = core::runtime::execute_engine(in_vec, engine);
+  return c10::impl::toList(c10::List<at::Tensor>(outputs));
+}
+
+namespace {
+static auto reg = torch::jit::backend<TensorRTBackend>("tensorrt");
+}
+
+} // namespace backend
+} // namespace trtorch
\ No newline at end of file
diff --git a/py/trtorch/csrc/tensorrt_backend.h b/py/trtorch/csrc/tensorrt_backend.h
new file mode 100644
index 0000000000..6150604b3e
--- /dev/null
+++ b/py/trtorch/csrc/tensorrt_backend.h
@@ -0,0 +1,19 @@
+#pragma once
+#include "torch/csrc/jit/api/module.h"
+#include "torch/csrc/jit/backends/backend.h"
+
+namespace trtorch {
+namespace backend {
+
+class TensorRTBackend: public torch::jit::PyTorchBackendInterface {
+  public:
+    explicit TensorRTBackend() {}
+    virtual ~TensorRTBackend() = default;
+
+  c10::IValue preprocess(c10::IValue mod, c10::impl::GenericDict method_compile_spec) override;
+  c10::impl::GenericDict compile(c10::IValue processed_mod, c10::impl::GenericDict method_compile_spec) override;
+  c10::impl::GenericList execute(c10::IValue handle, c10::impl::GenericList inputs) override;
+};
+
+} // namespace backend
+} // namespace trtorch
\ No newline at end of file
diff --git a/py/trtorch/csrc/tensorrt_classes.cpp b/py/trtorch/csrc/tensorrt_classes.cpp
new file mode 100644
index 0000000000..43e63d553b
--- /dev/null
+++ b/py/trtorch/csrc/tensorrt_classes.cpp
@@ -0,0 +1,143 @@
+
+#include "tensorrt_classes.h"
+
+namespace trtorch {
+namespace pyapi {
+
+std::string to_str(InputRange& value) {
+    auto vec_to_str = [](std::vector<int64_t> shape) -> std::string {
+        std::stringstream ss;
+        ss << '[';
+        for(auto i : shape) {
+            ss << i << ',';
+        }
+        ss << ']';
+        return ss.str();
+    };
+
+    std::stringstream ss;
+    ss << "        {" << std::endl;
+    ss << "            min: " << vec_to_str(value.min) << ',' << std::endl;
+    ss << "            opt: " << vec_to_str(value.opt) << ',' << std::endl;
+    ss << "            max: " << vec_to_str(value.max) << ',' << std::endl;
+    ss << "        }" << std::endl;
+    return ss.str();
+}
+
+std::string to_str(DataType value) {
+  switch (value) {
+    case DataType::kHalf:
+      return "Half";
+    case DataType::kChar:
+      return "Int8";
+    case DataType::kFloat:
+    default:
+      return "Float";
+  }
+}
+
+nvinfer1::DataType toTRTDataType(DataType value) {
+  switch (value) {
+  case DataType::kChar:
+    return nvinfer1::DataType::kINT8;
+  case DataType::kHalf:
+    return nvinfer1::DataType::kHALF;
+  case DataType::kFloat:
+  default:
+    return nvinfer1::DataType::kFLOAT;
+  }
+}
+
+std::string to_str(DeviceType value) {
+  switch (value) {
+    case DeviceType::kDLA:
+      return "DLA";
+    case DeviceType::kGPU:
+    default:
+      return "GPU";
+  }
+}
+
+nvinfer1::DeviceType toTRTDeviceType(DeviceType value) {
+  switch (value) {
+  case DeviceType::kDLA:
+    return nvinfer1::DeviceType::kDLA;
+  case DeviceType::kGPU:
+  default:
+    return nvinfer1::DeviceType::kGPU;
+  }
+}
+
+std::string to_str(EngineCapability value) {
+  switch (value) {
+    case EngineCapability::kSAFE_GPU:
+      return "Safe GPU";
+    case EngineCapability::kSAFE_DLA:
+      return "Safe DLA";
+    case EngineCapability::kDEFAULT:
+    default:
+      return "Default";
+  }
+}
+
+nvinfer1::EngineCapability toTRTEngineCapability(EngineCapability value) {
+  switch (value) {
+  case EngineCapability::kSAFE_DLA:
+    return nvinfer1::EngineCapability::kSAFE_DLA;
+  case EngineCapability::kSAFE_GPU:
+    return nvinfer1::EngineCapability::kSAFE_GPU;
+  case EngineCapability::kDEFAULT:
+  default:
+    return nvinfer1::EngineCapability::kDEFAULT;
+  }
+}
+
+core::CompileSpec CompileSpec::toInternalCompileSpec() {
+    std::vector<core::conversion::InputRange> internal_input_ranges;
+    for (auto i : input_ranges) {
+        internal_input_ranges.push_back(i.toInternalInputRange());
+    }
+    auto info = core::CompileSpec(internal_input_ranges);
+    info.convert_info.engine_settings.op_precision = toTRTDataType(op_precision);
+    info.convert_info.engine_settings.refit = refit;
+    info.convert_info.engine_settings.debug = debug;
+    info.convert_info.engine_settings.strict_types = strict_types;
+    info.convert_info.engine_settings.allow_gpu_fallback = allow_gpu_fallback;
+    info.convert_info.engine_settings.device = toTRTDeviceType(device);
+    info.convert_info.engine_settings.capability = toTRTEngineCapability(capability);
+    TRTORCH_CHECK(num_min_timing_iters >= 0,  "num_min_timing_iters must be 0 or greater");
+    info.convert_info.engine_settings.num_min_timing_iters = num_min_timing_iters;
+    TRTORCH_CHECK(num_avg_timing_iters >= 0, "num_avg_timing_iters must be 0 or greater");
+    info.convert_info.engine_settings.num_avg_timing_iters = num_avg_timing_iters;
+    TRTORCH_CHECK(workspace_size >= 0, "workspace_size must be 0 or greater");
+    info.convert_info.engine_settings.workspace_size = workspace_size;
+    TRTORCH_CHECK(max_batch_size >= 0, "max_batch_size must be 0 or greater");
+    info.convert_info.engine_settings.max_batch_size = max_batch_size;
+    return info;
+}
+
+std::string CompileSpec::stringify() {
+    std::stringstream ss;
+    ss << "TensorRT Compile Spec: {" << std::endl;
+    ss << "     \"Input Shapes\": [" << std::endl;
+    for (auto i : input_ranges) {
+    ss << to_str(i);
+    }
+    ss << "     ]" << std::endl;
+    ss << "     \"Op Precision\": " << to_str(op_precision) << std::endl;
+    ss << "     \"Refit\": " << refit << std::endl;
+    ss << "     \"Debug\": " << debug << std::endl;
+    ss << "     \"Strict Types\": " << strict_types << std::endl;
+    ss << "     \"Allow GPU Fallback\": " << allow_gpu_fallback << std::endl;
+    ss << "     \"Device\": " << to_str(capability) << std::endl;
+    ss << "     \"Engine Capability\": " << to_str(capability) << std::endl;
+    ss << "     \"Num Min Timing Iters\": " << num_min_timing_iters << std::endl;
+    ss << "     \"Num Avg Timing Iters\": " << num_avg_timing_iters << std::endl;
+    ss << "     \"Workspace Size\": " << workspace_size << std::endl;
+    ss << "     \"Max Batch Size\": " << max_batch_size << std::endl;
+    ss << "}";
+    return ss.str();
+}
+
+} // namespace pyapi
+} // namespace trtorch
\ No newline at end of file
diff --git a/py/trtorch/csrc/tensorrt_classes.h b/py/trtorch/csrc/tensorrt_classes.h
new file mode 100644
index 0000000000..e98a093358
--- /dev/null
+++ b/py/trtorch/csrc/tensorrt_classes.h
@@ -0,0 +1,101 @@
+#pragma once
+
+#include "core/compiler.h"
+#include "core/conversion/conversion.h"
+#include "torch/torch.h"
+#include "torch/script.h"
+#include "torch/custom_class.h"
+
+namespace trtorch {
+namespace pyapi {
+
+#define ADD_FIELD_GET_SET(field_name, type)             \
+  void set_##field_name(type val) {field_name = val;}   \
+  type get_##field_name() {return field_name;}
+
+struct InputRange : torch::CustomClassHolder {
+  std::vector<int64_t> min;
+  std::vector<int64_t> opt;
+  std::vector<int64_t> max;
+
+  core::conversion::InputRange toInternalInputRange() {
+    return core::conversion::InputRange(min, opt, max);
+  }
+
+  ADD_FIELD_GET_SET(min, std::vector<int64_t>);
+  ADD_FIELD_GET_SET(opt, std::vector<int64_t>);
+  ADD_FIELD_GET_SET(max, std::vector<int64_t>);
+};
+
+std::string to_str(InputRange& value);
+
+
+enum class DataType : int8_t {
+  kFloat,
+  kHalf,
+  kChar,
+};
+
+std::string to_str(DataType value);
+nvinfer1::DataType toTRTDataType(DataType value);
+
+enum DeviceType : int8_t {
+  kGPU,
+  kDLA,
+};
+
+std::string to_str(DeviceType value);
+nvinfer1::DeviceType toTRTDeviceType(DeviceType value);
+
+enum class EngineCapability : int8_t {
+    kDEFAULT,
+    kSAFE_GPU,
+    kSAFE_DLA,
+};
+
+std::string to_str(EngineCapability value);
+nvinfer1::EngineCapability toTRTEngineCapability(EngineCapability value);
+
+// TODO: Make this error message more informative
+#define ADD_ENUM_GET_SET(field_name, type, max_val)                \
+  void set_##field_name(int64_t val) {                             \
+    TRTORCH_CHECK(val < max_val, "Invalid enum value for field");  \
+    field_name = static_cast<type>(val);                           \
+  }                                                                \
+  int64_t get_##field_name() {return static_cast<int64_t>(field_name);}
+
+struct CompileSpec : torch::CustomClassHolder {
+  core::CompileSpec toInternalCompileSpec();
+  std::string stringify();
+  void appendInputRange(const c10::intrusive_ptr<InputRange>& ir) {
+    input_ranges.push_back(*ir);
+  }
+
+  ADD_ENUM_GET_SET(op_precision, DataType, 3);
+  ADD_FIELD_GET_SET(refit, bool);
+  ADD_FIELD_GET_SET(debug, bool);
+  ADD_FIELD_GET_SET(strict_types, bool);
+  ADD_FIELD_GET_SET(allow_gpu_fallback, bool);
+  ADD_ENUM_GET_SET(device, DeviceType, 2);
+  ADD_ENUM_GET_SET(capability, EngineCapability, 3);
+  ADD_FIELD_GET_SET(num_min_timing_iters, int64_t);
+  ADD_FIELD_GET_SET(num_avg_timing_iters, int64_t);
+  ADD_FIELD_GET_SET(workspace_size, int64_t);
+  ADD_FIELD_GET_SET(max_batch_size, int64_t);
+
+  std::vector<InputRange> input_ranges;
+  DataType op_precision = DataType::kFloat;
+  bool refit = false;
+  bool debug = false;
+  bool strict_types = false;
+  bool allow_gpu_fallback = true;
+  DeviceType device = DeviceType::kGPU;
+  EngineCapability capability = EngineCapability::kDEFAULT;
+  int64_t num_min_timing_iters = 2;
+  int64_t num_avg_timing_iters = 1;
+  int64_t workspace_size = 0;
+  int64_t max_batch_size = 0;
+};
+
+} // namespace pyapi
+} // namespace trtorch
\ No newline at end of file
diff --git a/py/trtorch/csrc/trtorch_py.cpp b/py/trtorch/csrc/trtorch_py.cpp
index da6d2b2688..4f9363542d 100644
--- a/py/trtorch/csrc/trtorch_py.cpp
+++ b/py/trtorch/csrc/trtorch_py.cpp
@@ -1,11 +1,12 @@
 #include "pybind11/pybind11.h"
 #include "pybind11/stl.h"
-//TODO: Remove when we have access to PyTorch to_backend autoregistration
-#include "core/backend.h"
+
+#include "tensorrt_classes.h"
 #include "core/compiler.h"
 #include "core/conversion/conversion.h"
 #include "torch/torch.h"
 #include "torch/script.h"
+#include "torch/custom_class.h"
 #include "torch/csrc/jit/python/pybind_utils.h"
 #include "Python.h"
 
@@ -14,103 +15,6 @@ namespace py = pybind11;
 namespace trtorch {
 namespace pyapi {
 
-struct InputRange {
-  std::vector<int64_t> min;
-  std::vector<int64_t> opt;
-  std::vector<int64_t> max;
-
-  core::conversion::InputRange toInternalInputRange() {
-    return core::conversion::InputRange(min, opt, max);
-  }
-};
-
-enum class DataType : int8_t {
-  kFloat,
-  kHalf,
-  kChar,
-};
-
-nvinfer1::DataType toTRTDataType(DataType value) {
-  switch (value) {
-  case DataType::kChar:
-    return nvinfer1::DataType::kINT8;
-  case DataType::kHalf:
-    return nvinfer1::DataType::kHALF;
-  case DataType::kFloat:
-  default:
-    return nvinfer1::DataType::kFLOAT;
-  }
-}
-
-enum DeviceType : int8_t {
-  kGPU,
-  kDLA,
-};
-
-nvinfer1::DeviceType toTRTDeviceType(DeviceType value) {
-  switch (value) {
-  case DeviceType::kDLA:
-    return nvinfer1::DeviceType::kDLA;
-  case DeviceType::kGPU:
-  default:
-    return nvinfer1::DeviceType::kGPU;
-  }
-}
-
-enum class EngineCapability : int8_t {
-    kDEFAULT,
-    kSAFE_GPU,
-    kSAFE_DLA,
-};
-
-nvinfer1::EngineCapability toTRTEngineCapability(EngineCapability value) {
-  switch (value) {
-  case EngineCapability::kSAFE_DLA:
-    return nvinfer1::EngineCapability::kSAFE_DLA;
-  case EngineCapability::kSAFE_GPU:
-    return nvinfer1::EngineCapability::kSAFE_GPU;
-  case EngineCapability::kDEFAULT:
-  default:
-    return nvinfer1::EngineCapability::kDEFAULT;
-  }
-}
-
-struct CompileSpec {
-
-  core::CompileSpec toInternalCompileSpec() {
-    for (auto i : input_ranges) {
-      internal_input_ranges.push_back(i.toInternalInputRange());
-    }
-    auto info = core::CompileSpec(internal_input_ranges);
-    info.convert_info.engine_settings.op_precision = toTRTDataType(op_precision);
-    info.convert_info.engine_settings.refit = refit;
-    info.convert_info.engine_settings.debug = debug;
-    info.convert_info.engine_settings.strict_types = strict_types;
-    info.convert_info.engine_settings.allow_gpu_fallback = allow_gpu_fallback;
-    info.convert_info.engine_settings.device = toTRTDeviceType(device);
-    info.convert_info.engine_settings.capability = toTRTEngineCapability(capability);
-    info.convert_info.engine_settings.num_min_timing_iters = num_min_timing_iters;
-    info.convert_info.engine_settings.num_avg_timing_iters = num_avg_timing_iters;
-    info.convert_info.engine_settings.workspace_size = workspace_size;
-    info.convert_info.engine_settings.max_batch_size = max_batch_size;
-    return info;
-  }
-
-  std::vector<InputRange> input_ranges;
-  std::vector<core::conversion::InputRange> internal_input_ranges;
-  DataType op_precision = DataType::kFloat;
-  bool refit = false;
-  bool debug = false;
-  bool strict_types = false;
-  bool allow_gpu_fallback = true;
-  DeviceType device = DeviceType::kGPU;
-  EngineCapability capability = EngineCapability::kDEFAULT;
-  uint64_t num_min_timing_iters = 2;
-  uint64_t num_avg_timing_iters = 1;
-  uint64_t workspace_size = 0;
-  uint64_t max_batch_size = 0;
-};
-
 torch::jit::Module CompileGraph(const torch::jit::Module& mod, CompileSpec& info) {
   py::gil_scoped_acquire gil;
   auto trt_mod = core::CompileGraph(mod, info.toInternalCompileSpec());
@@ -227,11 +131,7 @@ PYBIND11_MODULE(_C, m) {
     .value("INFO", core::util::logging::LogLevel::kINFO)
     .value("DEBUG", core::util::logging::LogLevel::kDEBUG)
     .export_values();
-
-  //TODO: Remove when we have access to PyTorch autoregistration
-  //m.def("to_tensorrt", backend::GetTensorRTBackend().generateToBackendFn());
 }
 
-
 } // namespace pyapi
 } // namespace trtorch
diff --git a/tests/BUILD b/tests/BUILD
index f784798a57..81a43aecbc 100644
--- a/tests/BUILD
+++ b/tests/BUILD
@@ -17,6 +17,7 @@ test_suite(
 test_suite(
     name = "python_api_tests",
     tests = [
-        "//tests/py:test_api"
+        "//tests/py:test_api",
+        "//tests/py:test_to_backend_api"
     ]
 )
\ No newline at end of file
diff --git a/tests/py/BUILD b/tests/py/BUILD
index 054e1cbbb3..0d643d65d8 100644
--- a/tests/py/BUILD
+++ b/tests/py/BUILD
@@ -5,9 +5,21 @@ load("@py_test_deps//:requirements.bzl", "requirement")
 py_test(
     name = "test_api",
     srcs = [
-        "test_api.py"
+        "test_api.py",
+        "model_test_case.py"
     ],
     deps = [
         requirement("torchvision")
     ]
-)
\ No newline at end of file
+)
+
+py_test(
+    name = "test_to_backend_api",
+    srcs = [
+        "test_to_backend_api.py",
+        "model_test_case.py"
+    ],
+    deps = [
+        requirement("torchvision")
+    ]
+)
diff --git a/tests/py/model_test_case.py b/tests/py/model_test_case.py
new file mode 100644
index 0000000000..3730f6507b
--- /dev/null
+++ b/tests/py/model_test_case.py
@@ -0,0 +1,19 @@
+import unittest
+import trtorch
+import torch
+import torchvision.models as models
+
+class ModelTestCase(unittest.TestCase):
+    def __init__(self, methodName='runTest', model=None):
+        super(ModelTestCase, self).__init__(methodName)
+        self.model = model
+        self.model.eval().to("cuda")
+
+    @staticmethod
+    def parametrize(testcase_class, model=None):
+        testloader = unittest.TestLoader()
+        testnames = testloader.getTestCaseNames(testcase_class)
+        suite = unittest.TestSuite()
+        for name in testnames:
+            suite.addTest(testcase_class(name, model=model))
+        return suite
\ No newline at end of file
diff --git a/tests/py/test_api.py b/tests/py/test_api.py
index e0cd113db6..2d9d2d1e56 100644
--- a/tests/py/test_api.py
+++ b/tests/py/test_api.py
@@ -3,21 +3,7 @@
 import torch
 import torchvision.models as models
 
-
-class ModelTestCase(unittest.TestCase):
-    def __init__(self, methodName='runTest', model=None):
-        super(ModelTestCase, self).__init__(methodName)
-        self.model = model
-        self.model.eval().to("cuda")
-
-    @staticmethod
-    def parametrize(testcase_class, model=None):
-        testloader = unittest.TestLoader()
-        testnames = testloader.getTestCaseNames(testcase_class)
-        suite = unittest.TestSuite()
-        for name in testnames:
-            suite.addTest(testcase_class(name, model=model))
-        return suite
+from model_test_case import ModelTestCase
 
 class TestCompile(ModelTestCase):
     def setUp(self):
diff --git a/tests/py/test_to_backend_api.py b/tests/py/test_to_backend_api.py
new file mode 100644
index 0000000000..e643aa6ce2
--- /dev/null
+++ b/tests/py/test_to_backend_api.py
@@ -0,0 +1,44 @@
+import unittest
+import trtorch
+import torch
+import torchvision.models as models
+
+from model_test_case import ModelTestCase
+
+class TestToBackendLowering(ModelTestCase):
+    def setUp(self):
+        self.input = torch.randn((1, 3, 300, 300)).to("cuda")
+        self.scripted_model = torch.jit.script(self.model)
+        self.spec = {
+            "forward": trtorch.TensorRTCompileSpec({
+                "input_shapes": [[1, 3, 300, 300]],
+                "op_precision": torch.float,
+                "refit": False,
+                "debug": False,
+                "strict_types": False,
+                "allow_gpu_fallback": True,
+                "device_type": "gpu",
+                "capability": trtorch.EngineCapability.default,
+                "num_min_timing_iters": 2,
+                "num_avg_timing_iters": 1,
+                "max_batch_size": 0,
+            })
+        }
+
+    def test_to_backend_lowering(self):
+        trt_mod = torch._C._jit_to_tensorrt(self.scripted_model._c, {"forward": self.spec})
+        same = (trt_mod.forward(self.input) - self.scripted_model(self.input)).abs().max()
+        self.assertTrue(same < 2e-3)
+
+def test_suite():
+    suite = unittest.TestSuite()
+    suite.addTest(TestToBackendLowering.parametrize(TestToBackendLowering, model=models.mobilenet_v2(pretrained=True)))
+
+    return suite
+
+suite = test_suite()
+
+runner = unittest.TextTestRunner()
+result = runner.run(suite)
+
+exit(int(not result.wasSuccessful()))
\ No newline at end of file