From 2e9a7f3af00efa7ffa89fa512fe118cda65339d0 Mon Sep 17 00:00:00 2001
From: Rohan Mukherjee <mukrohan@amazon.com>
Date: Fri, 6 Nov 2020 19:11:17 +0000
Subject: [PATCH 01/12] handling dynamism in TensorRT to support OD models

refactoring test tensort code

added comments to dynamic check wrapper

log.warn changed to logger.info

TRT codegen taking slice_mode into account

TRT codegen to handle both stride_mode

refactoring TRT codegen

adding a test for dynamic offload

[TRT] bug in codegen for slice_mode=end

ctx determined from target in test + io test was missing
---
 python/tvm/relay/op/contrib/tensorrt.py       | 244 ++++++----
 src/relay/backend/contrib/tensorrt/codegen.cc |  22 +-
 src/runtime/contrib/tensorrt/tensorrt_ops.cc  |   2 +-
 tests/python/contrib/test_tensorrt.py         | 448 +++++++++++-------
 4 files changed, 468 insertions(+), 248 deletions(-)

diff --git a/python/tvm/relay/op/contrib/tensorrt.py b/python/tvm/relay/op/contrib/tensorrt.py
index 24c468fee0fe..9cdc50c24075 100644
--- a/python/tvm/relay/op/contrib/tensorrt.py
+++ b/python/tvm/relay/op/contrib/tensorrt.py
@@ -18,11 +18,12 @@
 """TensorRT supported operators."""
 import logging
 import numpy as np
+import os
 import tvm
 from tvm import relay
 from tvm.relay import transform
 from tvm.relay.build_module import bind_params_by_name
-from tvm.relay.expr import Call, Constant, Tuple, GlobalVar
+from tvm.relay.expr import Call, Constant, Tuple, GlobalVar, Var, TupleGetItem
 from tvm.relay.expr_functor import ExprMutator
 
 logger = logging.getLogger("TensorRT")
@@ -134,15 +135,18 @@ def partition_for_tensorrt(
 
     if params:
         mod["main"] = bind_params_by_name(mod["main"], params)
+
     seq = tvm.transform.Sequential(
         [
             transform.InferType(),
             RemoveDropoutPass(),
             transform.RemoveUnusedFunctions(),
             transform.ConvertLayout(
-                {"nn.conv2d": ["NCHW", "default"], "nn.conv3d": ["NCDHW", "default"]}
+                {"nn.conv2d": ["NCHW", "default"],
+                 "nn.conv3d": ["NCDHW", "default"]}
             ),
             transform.FoldConstant(),
+            transform.InferType(),
             transform.AnnotateTarget("tensorrt"),
             transform.MergeCompilerRegions(),
             transform.PartitionGraph(),
@@ -152,13 +156,51 @@ def partition_for_tensorrt(
     with tvm.transform.PassContext(opt_level=3, config={"relay.ext.tensorrt.options": config}):
         mod = seq(mod)
         mod = prune_tensorrt_subgraphs(mod)
+
     return mod, config
 
+def check_dynamism(args, op_name):
+    """
+    This function checks for dynamism inside any of the args in the op.
+    Can be used to offload dynamic ops that are not supported by TRT to
+    be offloaded to relay VM.
+
+    Raises a NotImplementedError if the type of the arg is not of types
+    Call, Var, Constant, or TupleGetItem.
+
+    Parameters
+    ----------
+    args: a TRT array of the arguments of the op
+    op_name: name of the op for debugging purposes only
+
+    Returns
+    ----------
+    True if dynamism is present, False otherwise
+    """
+    for arg in args:
+        if isinstance(arg, (Call, Var, Constant, TupleGetItem)):
+            for dim_shape in arg.checked_type.shape:
+                if isinstance(dim_shape, tvm.tir.expr.Any):
+                    return True
+        elif isinstance(arg, Tuple):
+            return check_dynamism(arg.fields, op_name)
+        else:
+            logger.info(
+                "Arg not supported in TensorRT for ",
+                op_name,
+                type(arg),
+            )
+            return True
+    return False
+
 
 def _register_external_op_helper_with_checker(op_name, checker):
     @tvm.ir.register_op_attr(op_name, "target.tensorrt")
     def _func_wrapper(expr):
         attrs, args = expr.attrs, expr.args
+        # ops with dynamic shapes are offloaded to VM
+        if check_dynamism(args, op_name):
+            return False
         if any([x.checked_type.dtype != "float32" for x in args]):
             logger.info("Only float32 inputs are supported for TensorRT.")
             return False
@@ -173,6 +215,29 @@ def _register_external_op_helper(op_name, supported=True):
     )
 
 
+def _register_external_dynamic_check_func(op_name, checker):
+    """
+    Wrapper to check dynamic shapes inside any of the args in the op
+
+    Parameters
+    ----------
+    op_name: name of the op for debugging purposes only
+    checker: additional checker function specific to the op
+
+    Returns
+    ----------
+    wrapped checker function with dynamism check
+    """
+    @tvm.ir.register_op_attr(op_name, "target.tensorrt")
+    def _func_wrapper(expr):
+        attrs, args = expr.attrs, expr.args
+        # ops with dynamic shapes are offloaded to VM
+        if check_dynamism(args, op_name):
+            return False
+        return checker(expr)
+    return _func_wrapper
+
+
 # Ops which are always supported
 _register_external_op_helper("nn.relu")
 _register_external_op_helper("sigmoid")
@@ -192,7 +257,49 @@ def _register_external_op_helper(op_name, supported=True):
 _register_external_op_helper("clip")
 
 
-@tvm.ir.register_op_attr("add", "target.tensorrt")
+def reduce_annotate_fn(attrs, args, op_name):
+    """Helper for reduce operations."""
+    if not attrs.axis or len(attrs.axis) == 0:
+        logger.info("%s: cannot reduce to scalar.", op_name)
+        return False
+    if attrs.exclude:
+        logger.info("%s: exclude not supported.", op_name)
+        return False
+    if get_tensorrt_use_implicit_batch_mode() and any([x == 0 for x in map(int, attrs.axis)]):
+        logger.info("%s: can't modify batch dimension.", op_name)
+        return False
+    return True
+
+
+_register_external_op_helper_with_checker("sum", reduce_annotate_fn)
+_register_external_op_helper_with_checker("prod", reduce_annotate_fn)
+_register_external_op_helper_with_checker("max", reduce_annotate_fn)
+_register_external_op_helper_with_checker("min", reduce_annotate_fn)
+_register_external_op_helper_with_checker("mean", reduce_annotate_fn)
+
+
+def trt_version_annotate_fn(version):
+    """Helper for ops which require a minimum TRT version"""
+
+    def _func_wrapper(attrs, args, op_name):
+        if get_tensorrt_version() < version:
+            logger.info(
+                "%s: requires TensorRT version %s or higher.", op_name, ".".join(map(str, version))
+            )
+            return False
+        return True
+
+    return _func_wrapper
+
+
+_register_external_op_helper_with_checker("nn.leaky_relu", trt_version_annotate_fn((5, 1, 5)))
+_register_external_op_helper_with_checker("sin", trt_version_annotate_fn((5, 1, 5)))
+_register_external_op_helper_with_checker("cos", trt_version_annotate_fn((5, 1, 5)))
+_register_external_op_helper_with_checker("atan", trt_version_annotate_fn((5, 1, 5)))
+_register_external_op_helper_with_checker("ceil", trt_version_annotate_fn((5, 1, 5)))
+
+
+
 def add_annotate_fn(expr):  # pylint: disable=unused-variable
     """Check if add is supported by TensorRT."""
 
@@ -211,8 +318,6 @@ def add_annotate_fn(expr):  # pylint: disable=unused-variable
         return False
     return True
 
-
-@tvm.ir.register_op_attr("nn.batch_norm", "target.tensorrt")
 def batch_norm_annotate_fn(expr):  # pylint: disable=unused-variable
     """Check if nn.batch_norm is supported by TensorRT."""
 
@@ -226,7 +331,6 @@ def batch_norm_annotate_fn(expr):  # pylint: disable=unused-variable
     return True
 
 
-@tvm.ir.register_op_attr("nn.softmax", "target.tensorrt")
 def softmax_annotate_fn(expr):  # pylint: disable=unused-variable
     """Check if nn.softmax is supported by TensorRT."""
 
@@ -240,7 +344,7 @@ def softmax_annotate_fn(expr):  # pylint: disable=unused-variable
     return True
 
 
-@tvm.ir.register_op_attr("nn.conv2d", "target.tensorrt")
+
 def conv2d_annotate_fn(expr):  # pylint: disable=unused-variable
     """Check if nn.conv2d is supported by TensorRT."""
 
@@ -260,7 +364,7 @@ def conv2d_annotate_fn(expr):  # pylint: disable=unused-variable
     return True
 
 
-@tvm.ir.register_op_attr("nn.dense", "target.tensorrt")
+
 def dense_annotate_fn(expr):  # pylint: disable=unused-variable
     """Check if dense is supported by TensorRT."""
 
@@ -279,7 +383,7 @@ def dense_annotate_fn(expr):  # pylint: disable=unused-variable
     return True
 
 
-@tvm.ir.register_op_attr("nn.bias_add", "target.tensorrt")
+
 def bias_add_annotate_fn(expr):  # pylint: disable=unused-variable
     """Check if nn.bias_add is supported by TensorRT."""
 
@@ -294,7 +398,6 @@ def bias_add_annotate_fn(expr):  # pylint: disable=unused-variable
     return True
 
 
-@tvm.ir.register_op_attr("nn.max_pool2d", "target.tensorrt")
 def max_pool_2d_annotate_fn(expr):  # pylint: disable=unused-variable
     """Check if nn.max_pool2d is supported by TensorRT."""
 
@@ -311,7 +414,6 @@ def max_pool_2d_annotate_fn(expr):  # pylint: disable=unused-variable
     return True
 
 
-@tvm.ir.register_op_attr("nn.avg_pool2d", "target.tensorrt")
 def avg_pool_2d_annotate_fn(expr):  # pylint: disable=unused-variable
     """Check if nn.avg_pool2d is supported by TensorRT."""
 
@@ -341,7 +443,6 @@ def avg_pool_2d_annotate_fn(expr):  # pylint: disable=unused-variable
     return True
 
 
-@tvm.ir.register_op_attr("nn.global_max_pool2d", "target.tensorrt")
 def global_max_pool_2d_annotate_fn(expr):  # pylint: disable=unused-variable
     """Check if nn.global_max_pool2d is supported by TensorRT."""
 
@@ -355,7 +456,7 @@ def global_max_pool_2d_annotate_fn(expr):  # pylint: disable=unused-variable
     return True
 
 
-@tvm.ir.register_op_attr("nn.global_avg_pool2d", "target.tensorrt")
+
 def global_avg_pool_2d_annotate_fn(expr):  # pylint: disable=unused-variable
     """Check if nn.global_avg_pool2d is supported by TensorRT."""
 
@@ -369,7 +470,6 @@ def global_avg_pool_2d_annotate_fn(expr):  # pylint: disable=unused-variable
     return True
 
 
-@tvm.ir.register_op_attr("expand_dims", "target.tensorrt")
 def expand_dims_annotate_fn(expr):  # pylint: disable=unused-variable
     """Check if expand_dims is supported by TensorRT."""
 
@@ -383,7 +483,6 @@ def expand_dims_annotate_fn(expr):  # pylint: disable=unused-variable
     return True
 
 
-@tvm.ir.register_op_attr("squeeze", "target.tensorrt")
 def squeeze_annotate_fn(expr):  # pylint: disable=unused-variable
     """Check if squeeze is supported by TensorRT."""
 
@@ -400,7 +499,6 @@ def squeeze_annotate_fn(expr):  # pylint: disable=unused-variable
     return True
 
 
-@tvm.ir.register_op_attr("concatenate", "target.tensorrt")
 def concatenate_annotate_fn(expr):  # pylint: disable=unused-variable
     """Check if concatenate is supported by TensorRT."""
 
@@ -421,7 +519,6 @@ def concatenate_annotate_fn(expr):  # pylint: disable=unused-variable
     return True
 
 
-@tvm.ir.register_op_attr("nn.conv2d_transpose", "target.tensorrt")
 def conv2d_transpose_annotate_fn(expr):  # pylint: disable=unused-variable
     """Check if nn.conv2d_transpose is supported by TensorRT."""
 
@@ -446,7 +543,6 @@ def conv2d_transpose_annotate_fn(expr):  # pylint: disable=unused-variable
     return True
 
 
-@tvm.ir.register_op_attr("transpose", "target.tensorrt")
 def transpose_annotate_fn(expr):  # pylint: disable=unused-variable
     """Check if transpose is supported by TensorRT."""
 
@@ -460,7 +556,6 @@ def transpose_annotate_fn(expr):  # pylint: disable=unused-variable
     return True
 
 
-@tvm.ir.register_op_attr("layout_transform", "target.tensorrt")
 def layout_transform_annotate_fn(expr):  # pylint: disable=unused-variable
     """Check if layout_transform is supported by TensorRT."""
 
@@ -480,8 +575,6 @@ def layout_transform_annotate_fn(expr):  # pylint: disable=unused-variable
         return False
     return True
 
-
-@tvm.ir.register_op_attr("reshape", "target.tensorrt")
 def reshape_annotate_fn(expr):  # pylint: disable=unused-variable
     """Check if reshape is supported by TensorRT."""
 
@@ -513,8 +606,6 @@ def reshape_annotate_fn(expr):  # pylint: disable=unused-variable
             return False
     return True
 
-
-@tvm.ir.register_op_attr("nn.pad", "target.tensorrt")
 def pad_annotate_fn(expr):  # pylint: disable=unused-variable
     """Check if nn.pad is supported by TensorRT."""
 
@@ -536,49 +627,6 @@ def pad_annotate_fn(expr):  # pylint: disable=unused-variable
     return True
 
 
-def reduce_annotate_fn(attrs, args, op_name):
-    """Helper for reduce operations."""
-    if not attrs.axis or len(attrs.axis) == 0:
-        logger.info("%s: cannot reduce to scalar.", op_name)
-        return False
-    if attrs.exclude:
-        logger.info("%s: exclude not supported.", op_name)
-        return False
-    if get_tensorrt_use_implicit_batch_mode() and any([x == 0 for x in map(int, attrs.axis)]):
-        logger.info("%s: can't modify batch dimension.", op_name)
-        return False
-    return True
-
-
-_register_external_op_helper_with_checker("sum", reduce_annotate_fn)
-_register_external_op_helper_with_checker("prod", reduce_annotate_fn)
-_register_external_op_helper_with_checker("max", reduce_annotate_fn)
-_register_external_op_helper_with_checker("min", reduce_annotate_fn)
-_register_external_op_helper_with_checker("mean", reduce_annotate_fn)
-
-
-def trt_version_annotate_fn(version):
-    """Helper for ops which require a minimum TRT version"""
-
-    def _func_wrapper(attrs, args, op_name):
-        if get_tensorrt_version() < version:
-            logger.info(
-                "%s: requires TensorRT version %s or higher.", op_name, ".".join(map(str, version))
-            )
-            return False
-        return True
-
-    return _func_wrapper
-
-
-_register_external_op_helper_with_checker("nn.leaky_relu", trt_version_annotate_fn((5, 1, 5)))
-_register_external_op_helper_with_checker("sin", trt_version_annotate_fn((5, 1, 5)))
-_register_external_op_helper_with_checker("cos", trt_version_annotate_fn((5, 1, 5)))
-_register_external_op_helper_with_checker("atan", trt_version_annotate_fn((5, 1, 5)))
-_register_external_op_helper_with_checker("ceil", trt_version_annotate_fn((5, 1, 5)))
-
-
-@tvm.ir.register_op_attr("strided_slice", "target.tensorrt")
 def strided_slice_annotate_fn(expr):  # pylint: disable=unused-variable
     """Check if strided_slice is supported by TensorRT."""
 
@@ -598,14 +646,26 @@ def strided_slice_annotate_fn(expr):  # pylint: disable=unused-variable
         if batch_dim_begin_modified or batch_dim_end_modified:
             logger.info("strided_slice: can't modify batch dimension.")
             return False
+
     if any([x is not None and x <= 0 for x in attrs.strides]):
         logger.info("strided_slice: stride must be positive")
         return False
+
+    for i in range(0, len(args[0].checked_type.shape)):
+        begin = int(attrs.begin[i])
+        end = (
+            int(attrs.end[i])
+            if attrs.end[i] is not None and int(attrs.end[i]) != -1
+            else args[0].checked_type.shape[i]
+        )
+        if int(end) - int(begin) < 1:
+            print("strided_slice: size of slice must be at least 1")
+            return False
+
     return True
 
 
-@tvm.ir.register_op_attr("nn.adaptive_max_pool2d", "target.tensorrt")
-def adapative_max_pool2d_annotate_fn(expr):  # pylint: disable=unused-variable
+def adaptive_max_pool2d_annotate_fn(expr):  # pylint: disable=unused-variable
     """Check if nn.adaptive_max_pool2d is supported by TensorRT."""
 
     attrs, args = expr.attrs, expr.args
@@ -618,8 +678,7 @@ def adapative_max_pool2d_annotate_fn(expr):  # pylint: disable=unused-variable
     return True
 
 
-@tvm.ir.register_op_attr("nn.adaptive_avg_pool2d", "target.tensorrt")
-def adapative_avg_pool2d_annotate_fn(expr):  # pylint: disable=unused-variable
+def adaptive_avg_pool2d_annotate_fn(expr):  # pylint: disable=unused-variable
     """Check if nn.adaptive_avg_pool2d is supported by TensorRT."""
 
     attrs, args = expr.attrs, expr.args
@@ -632,7 +691,7 @@ def adapative_avg_pool2d_annotate_fn(expr):  # pylint: disable=unused-variable
     return True
 
 
-@tvm.ir.register_op_attr("nn.conv3d", "target.tensorrt")
+
 def conv3d_annotate_fn(expr):  # pylint: disable=unused-variable
     """Check if nn.conv3d is supported by TensorRT."""
 
@@ -654,7 +713,6 @@ def conv3d_annotate_fn(expr):  # pylint: disable=unused-variable
     return True
 
 
-@tvm.ir.register_op_attr("nn.max_pool3d", "target.tensorrt")
 def max_pool_3d_annotate_fn(expr):  # pylint: disable=unused-variable
     """Check if nn.max_pool3d is supported by TensorRT."""
 
@@ -670,7 +728,6 @@ def max_pool_3d_annotate_fn(expr):  # pylint: disable=unused-variable
     return True
 
 
-@tvm.ir.register_op_attr("nn.avg_pool3d", "target.tensorrt")
 def avg_pool_3d_annotate_fn(expr):  # pylint: disable=unused-variable
     """Check if nn.avg_pool3d is supported by TensorRT."""
 
@@ -686,7 +743,6 @@ def avg_pool_3d_annotate_fn(expr):  # pylint: disable=unused-variable
     return True
 
 
-@tvm.ir.register_op_attr("nn.conv3d_transpose", "target.tensorrt")
 def conv3d_transpose_annotate_fn(expr):  # pylint: disable=unused-variable
     """Check if nn.conv3d_transpose is supported by TensorRT."""
 
@@ -715,6 +771,34 @@ def conv3d_transpose_annotate_fn(expr):  # pylint: disable=unused-variable
         return False
     return True
 
+_register_external_dynamic_check_func("add", add_annotate_fn)
+_register_external_dynamic_check_func("nn.batch_norm", batch_norm_annotate_fn)
+_register_external_dynamic_check_func("nn.softmax", softmax_annotate_fn)
+_register_external_dynamic_check_func("nn.conv2d", conv2d_annotate_fn)
+_register_external_dynamic_check_func("nn.dense", dense_annotate_fn)
+_register_external_dynamic_check_func("nn.bias_add", bias_add_annotate_fn)
+_register_external_dynamic_check_func("nn.max_pool2d", max_pool_2d_annotate_fn)
+_register_external_dynamic_check_func("nn.avg_pool2d", avg_pool_2d_annotate_fn)
+_register_external_dynamic_check_func("nn.global_max_pool2d", global_max_pool_2d_annotate_fn)
+_register_external_dynamic_check_func("nn.global_avg_pool2d", global_avg_pool_2d_annotate_fn)
+_register_external_dynamic_check_func("expand_dims", expand_dims_annotate_fn)
+_register_external_dynamic_check_func("squeeze", squeeze_annotate_fn)
+_register_external_dynamic_check_func("concatenate", concatenate_annotate_fn)
+_register_external_dynamic_check_func("nn.conv2d_transpose", conv2d_transpose_annotate_fn)
+_register_external_dynamic_check_func("transpose", transpose_annotate_fn)
+_register_external_dynamic_check_func("layout_transform", layout_transform_annotate_fn)
+_register_external_dynamic_check_func("reshape", reshape_annotate_fn)
+_register_external_dynamic_check_func("nn.pad", pad_annotate_fn)
+_register_external_dynamic_check_func("strided_slice", strided_slice_annotate_fn)
+_register_external_dynamic_check_func("nn.adaptive_max_pool2d", adaptive_max_pool2d_annotate_fn)
+_register_external_dynamic_check_func("nn.adaptive_avg_pool2d", adaptive_avg_pool2d_annotate_fn)
+_register_external_dynamic_check_func("nn.conv3d", conv3d_annotate_fn)
+_register_external_dynamic_check_func("nn.max_pool3d", max_pool_3d_annotate_fn)
+_register_external_dynamic_check_func("nn.avg_pool3d", avg_pool_3d_annotate_fn)
+_register_external_dynamic_check_func("nn.conv3d_transpose", conv3d_transpose_annotate_fn)
+
+
+
 
 def is_valid_subgraph(params, body):
     """Final check on whether the subgraph is valid and should be offloaded to TensorRT."""
@@ -755,7 +839,6 @@ class SubgraphRemover(ExprMutator):
         """
         Reverts subgraphs in subgraphs_to_remove back to TVM instead of using an external codegen.
         """
-
         def __init__(self, subgraphs_to_remove, mod, new_mod):
             ExprMutator.__init__(self)
             self.subgraphs_to_remove = subgraphs_to_remove
@@ -773,14 +856,11 @@ def visit_call(self, call):
                         var_map[param] = super().visit(arg)
                     new_body = relay.bind(func.body, var_map)
                     return new_body
-                if name != "main":
-                    # Copy the GlobalVar (subgraph function) to the new module and call.
+                elif name != "main":
                     args = []
                     for arg in call.args:
                         args.append(super().visit(arg))
-                    subgraph_gv = relay.GlobalVar(name)
-                    self.new_mod[subgraph_gv] = self.mod[name]
-                    return subgraph_gv(*args)
+                    return call.op(*args) 
             return super().visit_call(call)
 
     subgraphs_to_remove = []
@@ -792,7 +872,7 @@ def visit_call(self, call):
         if not is_valid_subgraph(mod[name].params, mod[name].body):
             subgraphs_to_remove.append(name)
     # Create new pruned module
-    new_mod = tvm.IRModule()
+    new_mod = tvm.IRModule(mod.functions, mod.type_definitions)
     new_mod["main"] = SubgraphRemover(subgraphs_to_remove, mod, new_mod).visit(mod["main"])
     return new_mod
 
@@ -817,3 +897,5 @@ def visit_tuple_getitem(self, op):
 class RemoveDropoutPass:
     def transform_function(self, func, mod, _):
         return RemoveDropout().visit(func)
+
+
diff --git a/src/relay/backend/contrib/tensorrt/codegen.cc b/src/relay/backend/contrib/tensorrt/codegen.cc
index 26f674dcd7b5..5f53c1f93444 100644
--- a/src/relay/backend/contrib/tensorrt/codegen.cc
+++ b/src/relay/backend/contrib/tensorrt/codegen.cc
@@ -133,26 +133,34 @@ class TensorRTJSONSerializer : public backend::contrib::JSONSerializer {
     auto process_slice_index = [](Integer x, int default_value, int dim_value) {
       if (!x.defined()) return default_value;
       int value = x.as<IntImmNode>()->value;
-      if (value < 0) value += dim_value;
+      value = (value < 0 ) ? dim_value + value : value;
       return value;
     };
 
     std::vector<std::string> start, size, strides;
     for (size_t i = 0; i < attrs->begin.value().size(); ++i) {
       const int begin_value = process_slice_index(attrs->begin.value()[i], 0, ishape[i]);
-      const int end_value = process_slice_index(attrs->end.value()[i], ishape[i], ishape[i]);
+      ICHECK_GE(begin_value, 0);
+      start.push_back(std::to_string(begin_value));
       const int stride_value = (default_strides || i >= attrs->strides.value().size() ||
                                 !attrs->strides.value()[i].defined())
                                    ? 1
                                    : attrs->strides.value()[i].as<IntImmNode>()->value;
       ICHECK_GT(stride_value, 0);
-      const int size_value = (end_value - begin_value + stride_value - 1) / stride_value;
-      ICHECK_GE(begin_value, 0);
+      strides.push_back(std::to_string(stride_value));
+      int size_value;
+      if (attrs->slice_mode == "end"){
+        const int end_value = process_slice_index(attrs->end.value()[i], ishape[i], ishape[i]);
+        size_value = (end_value - begin_value + stride_value - 1) / stride_value;
+      }
+      else if (attrs->slice_mode == "size"){
+        // with slice_mode = "size", attrs->end_value mean the size of the slice
+        int end_value = attrs->end.value()[i].as<IntImmNode>()->value;
+        size_value = (end_value == -1) ? ishape[i] - begin_value: end_value;
+      }
       ICHECK_GT(size_value, 0);
-      start.push_back(std::to_string(begin_value));
       size.push_back(std::to_string(size_value));
-      strides.push_back(std::to_string(stride_value));
-    }
+    } 
     std::vector<dmlc::any> start_attr, size_attr, strides_attr;
     start_attr.emplace_back(start);
     size_attr.emplace_back(size);
diff --git a/src/runtime/contrib/tensorrt/tensorrt_ops.cc b/src/runtime/contrib/tensorrt/tensorrt_ops.cc
index a86f107941bc..415caf50ea4d 100644
--- a/src/runtime/contrib/tensorrt/tensorrt_ops.cc
+++ b/src/runtime/contrib/tensorrt/tensorrt_ops.cc
@@ -944,7 +944,7 @@ class ReduceOpConverter : public TensorRTOpConverter {
 #if TRT_VERSION_GE(5, 1, 5)
 class StridedSliceOpConverter : public TensorRTOpConverter {
  public:
-  StridedSliceOpConverter() : TensorRTOpConverter({kTensor, kWeight, kWeight, kWeight}) {}
+  StridedSliceOpConverter() : TensorRTOpConverter({kTensor}) {} // , kWeight, kWeight, kWeight}) {}
 
   void Convert(TensorRTOpConverterParams* params) const {
     auto input = params->inputs.at(0).tensor;
diff --git a/tests/python/contrib/test_tensorrt.py b/tests/python/contrib/test_tensorrt.py
index 8e8e54e8650a..e96f93b5b266 100644
--- a/tests/python/contrib/test_tensorrt.py
+++ b/tests/python/contrib/test_tensorrt.py
@@ -17,13 +17,16 @@
 import numpy as np
 import time
 import pytest
+import itertools
 
 import tvm
 import tvm.relay.testing
 from tvm import relay
 from tvm.relay.op.contrib import tensorrt
 from tvm.contrib import graph_runtime
-
+from tvm.runtime.vm import VirtualMachine
+from tvm.relay import Any, GlobalVar, transform
+from mxnet.gluon.model_zoo.vision import get_model
 
 def skip_codegen_test():
     """Skip test if TensorRT and CUDA codegen are not present"""
@@ -46,6 +49,23 @@ def skip_runtime_test():
     return False
 
 
+def vmobj_to_list(o):
+    if isinstance(o, tvm.nd.NDArray):
+        return [o.asnumpy()]
+    elif isinstance(o, tvm.runtime.container.ADT) or isinstance(o, list):
+        return [vmobj_to_list(f) for f in o]
+    else:
+        raise RuntimeError("Unknown object type: %s" % type(o))
+
+
+def assert_result_dict_holds(result_dict):
+    for k1, k2 in itertools.combinations(result_dict, 2):
+        res1 = vmobj_to_list(result_dict[k1])
+        res2 = vmobj_to_list(result_dict[k2])
+        for r1, r2 in zip(res1, res2):
+            tvm.testing.assert_allclose(r1, r2, rtol=1e-3, atol=1e-3)
+
+
 def run_and_verify_func(config, target="cuda"):
     """Test a Relay func by compiling, running, and comparing TVM and TRT outputs.
 
@@ -64,100 +84,71 @@ def run_and_verify_func(config, target="cuda"):
         for k, v in input_shapes.items()
         if k not in is_param
     }
-
-    # Run TRT
-    mod = tvm.IRModule()
-    mod["main"] = f
-    mod, config = tensorrt.partition_for_tensorrt(mod, params)
-    with tvm.transform.PassContext(opt_level=3, config={"relay.ext.tensorrt.options": config}):
-        graph, lib, graph_params = relay.build(mod, target, params=params)
-    if skip_runtime_test():
-        return
     ctx = tvm.context(target)
-    mod = graph_runtime.create(graph, lib, ctx=ctx)
-    mod.set_input(**graph_params)
-    mod.run(**input_dict)
-    results = [mod.get_output(i) for i in range(mod.get_num_outputs())]
-
-    # Run reference
-    mod = tvm.IRModule()
-    mod["main"] = f
-    with tvm.transform.PassContext(opt_level=3):
-        graph, lib, graph_params = relay.build(mod, target, params=params)
-    mod = graph_runtime.create(graph, lib, ctx=ctx)
-    mod.set_input(**graph_params)
-    mod.run(**input_dict)
-    ref_results = [mod.get_output(i) for i in range(mod.get_num_outputs())]
 
-    assert len(results) == len(ref_results)
-    for i in range(len(results)):
-        res = results[i].asnumpy()
-        ref_res = ref_results[i].asnumpy()
-        assert res.shape == ref_res.shape
-        tvm.testing.assert_allclose(res, ref_res, rtol=1e-3, atol=1e-3)
+    result_dict = dict()
+    for mode in ["graph", "vm"]:
+        for use_trt in [False, True]:
+            mod = tvm.IRModule()
+            mod["main"] = f
+            result_key = mode + ("_trt" if use_trt else "")
+            if use_trt:
+                mod, config = tensorrt.partition_for_tensorrt(mod, params)
+                with tvm.transform.PassContext(opt_level=3, config={"relay.ext.tensorrt.options": config}):
+                    exec = relay.create_executor(mode, mod=mod, ctx=ctx, target=target)
+            else:
+                with tvm.transform.PassContext(opt_level=3):
+                    exec = relay.create_executor(mode, mod=mod, ctx=ctx, target=target)
+            if not skip_runtime_test():
+                result_dict[result_key] = exec.evaluate()(**input_dict, **params)
+
+    if not skip_runtime_test():
+        assert_result_dict_holds(result_dict)
 
 
 def run_and_verify_model(model):
     if skip_codegen_test():
         return
 
-    def compile_and_run(i_data, input_shape, dtype, use_trt=True, num_iteration=1):
-        import mxnet as mx
-        from mxnet.gluon.model_zoo.vision import get_model
-
-        def check_trt_used(graph):
-            import json
-
-            graph = json.loads(graph)
-            num_trt_subgraphs = sum(
-                [
-                    1
-                    for n in graph["nodes"]
-                    if n.get("attrs", {}).get("func_name", "").startswith("tensorrt_")
-                ]
-            )
-            assert num_trt_subgraphs >= 1
+    def check_trt_used(mod):
+        num_trt_subgraphs = sum(
+            [1 if gv.name_hint == "tensorrt_0" else 0 for gv in mod.get_global_vars()]
+        )
+        assert num_trt_subgraphs == 1
 
-        block = get_model(model, pretrained=True)
-        mod, params = relay.frontend.from_mxnet(block, shape={"data": input_shape}, dtype=dtype)
+    def compile_and_run(mod, params, i_data, mode="vm", use_trt=True):
+        assert mode in ["graph", "vm"]
 
         if use_trt:
             mod, config = tensorrt.partition_for_tensorrt(mod, params)
+            check_trt_used(mod)
             with tvm.transform.PassContext(
-                opt_level=3, config={"relay.ext.tensorrt.options": config}
+                    opt_level=3, config={"relay.ext.tensorrt.options": config}
             ):
-                graph, lib, params = relay.build(mod, "cuda", params=params)
-            check_trt_used(graph)
+                exec = relay.create_executor(mode, mod=mod, ctx=tvm.gpu(0), target="cuda")
         else:
             with tvm.transform.PassContext(opt_level=3):
-                graph, lib, params = relay.build(mod, "cuda", params=params)
-
-        if skip_runtime_test():
-            return
-        mod = graph_runtime.create(graph, lib, ctx=tvm.gpu(0))
-        mod.set_input(**params)
-        # Warmup
-        for i in range(10):
-            mod.run(data=i_data)
-        # Time
-        times = []
-        for i in range(num_iteration):
-            start_time = time.time()
-            mod.run(data=i_data)
-            res = mod.get_output(0)
-            times.append(time.time() - start_time)
-        latency = 1000.0 * np.mean(times)
-        print(model, latency)
+                exec = relay.create_executor(mode, mod=mod, ctx=tvm.gpu(0), target="cuda")
+
+        res = exec.evaluate()(i_data, **params) if not skip_runtime_test() else None
         return res
 
     dtype = "float32"
     input_shape = (1, 3, 224, 224)
     i_data = np.random.uniform(-1, 1, input_shape).astype(dtype)
-    res = compile_and_run(i_data, input_shape, dtype, use_trt=True)
-    if skip_runtime_test():
-        return
-    ref_res = compile_and_run(i_data, input_shape, dtype, use_trt=False)
-    tvm.testing.assert_allclose(res.asnumpy(), ref_res.asnumpy(), rtol=1e-3, atol=1e-3)
+    block = get_model(model, pretrained=True)
+    mod, params = relay.frontend.from_mxnet(block, shape={"data": input_shape}, dtype=dtype)
+
+    result_dict = dict()
+    for mode in ["vm", "graph"]:
+        for use_trt in [True, False]:
+            result_key = mode + ("_trt" if use_trt else "")
+            result_dict[result_key] = compile_and_run(
+                 mod, params, i_data, mode=mode, use_trt=use_trt
+            )
+
+    if not skip_runtime_test():
+        assert_result_dict_holds(result_dict)
 
 
 def test_tensorrt_simple():
@@ -174,19 +165,29 @@ def test_tensorrt_simple():
     out = relay.nn.relu(w)
     f = relay.Function([x, y, z], out)
 
-    mod = tvm.IRModule()
-    mod["main"] = f
-    mod, config = tensorrt.partition_for_tensorrt(mod)
-    with tvm.transform.PassContext(opt_level=3, config={"relay.ext.tensorrt.options": config}):
-        graph, lib, params = relay.build(mod, "cuda")
-    if skip_runtime_test():
-        return
-    mod = graph_runtime.create(graph, lib, ctx=tvm.gpu(0))
     x_data = np.random.uniform(-1, 1, xshape).astype(dtype)
     y_data = np.random.uniform(-1, 1, yshape).astype(dtype)
     z_data = np.random.uniform(-1, 1, zshape).astype(dtype)
-    mod.run(x=x_data, y=y_data, z=z_data)
-    results = [mod.get_output(i).asnumpy() for i in range(mod.get_num_outputs())]
+
+
+    result_dict = dict()
+    for mode in ["vm", "graph"]:
+        for use_trt in [True, False]:
+            mod = tvm.IRModule()
+            mod["main"] = f
+            result_key = mode + ("_trt" if use_trt else "")
+            if use_trt:
+                mod, config = tensorrt.partition_for_tensorrt(mod)
+                with tvm.transform.PassContext(opt_level=3, config={"relay.ext.tensorrt.options": config}):
+                    relay_exec = relay.create_executor(mode, mod=mod, ctx=tvm.gpu(0), target="cuda")
+            else:
+                with tvm.transform.PassContext(opt_level=3):
+                    relay_exec = relay.create_executor(mode, mod=mod, ctx=tvm.gpu(0), target="cuda")
+            if not skip_runtime_test():
+                result_dict[result_key] = relay_exec.evaluate()(x_data, y_data, z_data)
+
+    if not skip_runtime_test():
+        assert_result_dict_holds(result_dict)
 
 
 def test_tensorrt_simple_cpu_io():
@@ -211,6 +212,8 @@ def test_tensorrt_not_compatible():
         return
     dtype = "float32"
     xshape = (1, 32, 14, 14)
+    x_data = np.random.uniform(-1, 1, xshape).astype(dtype)
+
     x = relay.var("x", shape=(xshape), dtype=dtype)
     y = relay.add(x, x)
     z = relay.erf(y)
@@ -219,40 +222,95 @@ def test_tensorrt_not_compatible():
     mod = tvm.IRModule()
     mod["main"] = f
     mod, config = tensorrt.partition_for_tensorrt(mod)
-    with tvm.transform.PassContext(opt_level=3, config={"relay.ext.tensorrt.options": config}):
-        graph, lib, params = relay.build(mod, "cuda")
-    if skip_runtime_test():
-        return
-    mod = graph_runtime.create(graph, lib, ctx=tvm.gpu(0))
-    x_data = np.random.uniform(-1, 1, xshape).astype(dtype)
-    mod.run(x=x_data)
-    results = [mod.get_output(i).asnumpy() for i in range(mod.get_num_outputs())]
+    for mode in ["graph", "vm"]:
+        with tvm.transform.PassContext(opt_level=3, config={"relay.ext.tensorrt.options": config}):
+            exec = relay.create_executor(mode, mod=mod, ctx=tvm.gpu(0), target="cuda")
+            if not skip_runtime_test():
+                results = exec.evaluate()(x_data)
+
 
 
-def test_tensorrt_serialize():
+def test_tensorrt_serialize(data_shape=(1, 3, 224, 224), data_type="float32"):
     if skip_codegen_test():
         return
-    import mxnet
-    from mxnet.gluon.model_zoo.vision import get_model
 
+    i_data = np.random.uniform(0, 1, data_shape).astype(data_type)
     block = get_model("resnet18_v1", pretrained=True)
-    mod, params = relay.frontend.from_mxnet(
-        block, shape={"data": (1, 3, 224, 224)}, dtype="float32"
-    )
-    # Compile
-    mod, config = tensorrt.partition_for_tensorrt(mod, params)
-    with tvm.transform.PassContext(opt_level=3, config={"relay.ext.tensorrt.options": config}):
-        lib = relay.build(mod, "cuda", params=params)
-    # Serialize
-    lib.export_library("compiled.so")
-    # Deserialize
-    loaded_lib = tvm.runtime.load_module("compiled.so")
-    # Run
-    if skip_runtime_test():
-        return
-    gen_module = tvm.contrib.graph_runtime.GraphModule(loaded_lib["default"](tvm.gpu(0)))
-    i_data = np.random.uniform(0, 1, (1, 3, 224, 224)).astype("float32")
-    gen_module.run(data=i_data)
+    mod, params = relay.frontend.from_mxnet(block, shape={"data": data_shape}, dtype=data_type)
+    mod, config = tensorrt.partition_for_tensorrt(mod)
+
+    def compile_vm(mod, params):
+        with tvm.transform.PassContext(opt_level=3, config={"relay.ext.tensorrt.options": config}):
+            vm_exec = relay.vm.compile(mod, target="cuda", params=params)
+            code, lib = vm_exec.save()
+        return code, lib
+
+    def run_vm(code, lib):
+        vm_exec = tvm.runtime.vm.Executable.load_exec(code, lib)
+        vm = VirtualMachine(vm_exec, tvm.gpu(0))
+        result = vm.invoke("main", data=i_data)
+        return result
+
+    def save_vm(code, lib):
+        # save and load the code and lib file.
+        lib.export_library("path_lib.so")
+        with open("path_code.ro", "wb") as fo:
+            fo.write(code)
+
+    def load_vm():
+        lib = tvm.runtime.load_module("path_lib.so")
+        code = bytearray(open("path_code.ro", "rb").read())
+        return lib, code
+
+    def compile_graph(mod, params):
+        with tvm.transform.PassContext(opt_level=3, config={"relay.ext.tensorrt.options": config}):
+            graph, lib, params = relay.build(mod, params=params, target="cuda")
+            params = relay.save_param_dict(params)
+        return graph, lib, params
+
+    def run_graph(graph, lib, params):
+        mod_ = graph_runtime.create(graph, lib, ctx=tvm.gpu(0))
+        mod_.load_params(params)
+        mod_.run(data=i_data)
+        res = mod_.get_output(0)
+        return res
+
+    def save_graph(graph, lib, params):
+        # Serialize
+        with open("compiled.json", "w") as f_graph_json:
+            f_graph_json.write(graph)
+        with open("compiled.params", "wb") as f_params:
+            f_params.write(params)
+        lib.export_library("compiled.so")
+
+    def load_graph():
+        # Deserialize
+        with open("compiled.json", "r") as f_graph_json:
+            graph = f_graph_json.read()
+        with open("compiled.params", "rb") as f_params:
+            params = bytearray(f_params.read())
+        lib = tvm.runtime.load_module("compiled.so")
+        return graph, lib, params
+
+    # Test serialization with graph runtime
+    graph, lib, graph_params = compile_graph(mod, params)
+    save_graph(graph, lib, graph_params)
+    loaded_graph, loaded_lib, loaded_params = load_graph()
+
+    # Test serialization with VM
+    code_vm, lib_vm = compile_vm(mod, params)
+    save_vm(code_vm, lib_vm)
+    loaded_lib_vm, loaded_code_vm = load_vm()
+
+    if not skip_runtime_test():
+        result_dict = dict()
+        result_dict['graph'] = run_graph(graph, lib, graph_params)
+        result_dict['graph_ref'] = run_graph(loaded_graph, loaded_lib, loaded_params)
+
+        result_dict['vm'] = run_vm(code_vm, lib_vm)
+        result_dict['vm_ref'] = run_vm(loaded_code_vm, loaded_lib_vm)
+
+        assert_result_dict_holds(result_dict)
 
 
 def test_conv2d():
@@ -701,27 +759,34 @@ def get_graph(op, x_shape=(1, 2, 3, 4), axis=(2, 3), keepdims=False):
 
 
 def test_strided_slice():
-    def get_graph(x_shape, begin, end, strides=None):
+    def get_graph(x_shape, begin, end, strides=None, slice_mode="size"):
         x = relay.var("x", shape=(x_shape), dtype="float32")
         if strides:
             out = relay.strided_slice(
                 x,
-                relay.expr.const(begin, dtype="int32"),
-                relay.expr.const(end, dtype="int32"),
-                relay.expr.const(strides, dtype="int32"),
+                begin,
+                end,
+                strides,
+                slice_mode=slice_mode,
             )
         else:
             out = relay.strided_slice(
                 x,
-                relay.expr.const(begin, dtype="int32"),
-                relay.expr.const(end, dtype="int32"),
+                begin,
+                end,
+                slice_mode=slice_mode,
             )
         f = relay.Function([x], out)
         return f, {"x": x_shape}, []
 
-    run_and_verify_func(get_graph((1, 3, 6, 7), [0, 0, 0, 0], [1, 1, 6, 7]))
-    run_and_verify_func(get_graph((1, 3, 6, 7), [0, 1, 0, 0], [1, 2, 6, 6]))
-    run_and_verify_func(get_graph((1, 10), [0, 0], [1, 10], [1, 2]))
+
+    for slice_mode in ["size", "end"]:
+        run_and_verify_func(get_graph((1, 3, 6, 7), (0, 0, 0, 0), (1, 1, 6, 7), slice_mode=slice_mode))
+        run_and_verify_func(get_graph((1, 3, 6, 7), [0, 1, 0, 0], [1, 2, 6, 6], slice_mode=slice_mode))
+        run_and_verify_func(get_graph((2, 3, 6, 7), [0, 0, 0, 0], [-1,-1,-1,-1], slice_mode=slice_mode ))
+        run_and_verify_func(get_graph((2, 3, 6, 7), [0, 1, 0, 0], [-1,-1,-1,-1], slice_mode=slice_mode ))
+        run_and_verify_func(get_graph((1, 6), [0, 1], [1, 3], slice_mode=slice_mode ))
+    
 
 
 def test_adaptive_pool2d():
@@ -841,6 +906,39 @@ def get_graph(
     run_and_verify_func(get_graph(strides=(2, 2, 2)))
     run_and_verify_func(get_graph(strides=(2, 2, 2), output_padding=(1, 1, 1)))
 
+def test_tensorrt_ops():
+    # Op tests
+    test_conv2d()
+    test_conv2d_nhwc()
+    test_conv2d_weights_const()
+    test_conv2d_weights_transposed()
+    test_dense()
+    test_bias_add()
+    test_pool2d()
+    test_global_pool2d()
+    test_batch_flatten()
+    test_expand_dims()
+    test_squeeze()
+    test_concatenate()
+    test_conv2d_transpose()
+    test_reshape()
+    test_transpose()
+    test_float_const()
+    test_pad()
+    test_softmax()
+    test_batch_norm()
+    test_unary()
+    test_clip()
+    test_leaky_relu()
+    test_binary()
+    test_reduce()
+    test_strided_slice()
+    test_adaptive_pool2d()
+    test_multiple_outputs()
+    test_conv3d()
+    test_pool3d()
+    test_conv3d_transpose()
+
 
 def test_alexnet():
     run_and_verify_model("alexnet")
@@ -874,44 +972,7 @@ def test_densenet121():
     run_and_verify_model("densenet121")
 
 
-if __name__ == "__main__":
-    test_tensorrt_not_compatible()
-    test_tensorrt_simple()
-    test_tensorrt_simple_cpu_io()
-    test_tensorrt_serialize()
-
-    # Op tests
-    test_conv2d()
-    test_conv2d_nhwc()
-    test_conv2d_weights_const()
-    test_conv2d_weights_transposed()
-    test_dense()
-    test_bias_add()
-    test_pool2d()
-    test_global_pool2d()
-    test_batch_flatten()
-    test_expand_dims()
-    test_squeeze()
-    test_concatenate()
-    test_conv2d_transpose()
-    test_reshape()
-    test_transpose()
-    test_float_const()
-    test_pad()
-    test_softmax()
-    test_batch_norm()
-    test_unary()
-    test_clip()
-    test_leaky_relu()
-    test_binary()
-    test_reduce()
-    test_strided_slice()
-    test_adaptive_pool2d()
-    test_multiple_outputs()
-    test_conv3d()
-    test_pool3d()
-    test_conv3d_transpose()
-
+def test_tensorrt_integration():
     # Integration tests
     test_alexnet()
     test_resnet18_v1()
@@ -921,3 +982,72 @@ def test_densenet121():
     test_mobilenet_v2()
     test_vgg11()
     test_densenet121()
+
+
+    
+def test_dynamic_offload(data_shape=(1, 32, 8, 8), k_shape=(1, 32, 3, 3)):
+    """
+    This test checks for proper dynamic offloading of relay graphs. An addition between 
+    the outputs of two conv2d's is performed, one of them having all static args whereas
+    the other has a arg with dynamic shape. It is expected for the TRT partitioner to
+    offload the conv2d with dynamic arg to TVM while running the other in TRT.
+    """
+    x = relay.var('x', shape=(data_shape[0], data_shape[1], Any(), Any()), dtype='float32')
+    y = relay.var('y', shape=(data_shape), dtype='float32')
+    kernel = relay.var('kernel', shape=(k_shape), dtype='float32')
+
+    def get_expected():
+
+        def set_func_attr(func, compile_name, symbol_name):
+            func = func.with_attr("Primitive", tvm.tir.IntImm("int32", 1))
+            func = func.with_attr("Inline", tvm.tir.IntImm("int32", 1))
+            func = func.with_attr("Compiler", compile_name)
+            func = func.with_attr("global_symbol", symbol_name)
+            return func
+
+        # Create a nested TRT function that matches the expected output
+        mod = tvm.IRModule()
+        var1 = relay.var('tensorrt_0_i0', shape=(data_shape), dtype='float32')
+        kernel_trt = relay.var('tensorrt_0_i1', shape=(k_shape), dtype='float32')
+        out1 = relay.nn.conv2d(var1, kernel_trt, channels=k_shape[0], kernel_size=k_shape[2:4])
+        f1 = GlobalVar("tensorrt_0")
+        func = relay.Function([var1, kernel_trt], out1)
+        func = set_func_attr(func, "tensorrt", "tensorrt_0")
+        mod[f1] = func
+        mod = relay.transform.InferType()(mod)
+
+        # Create the main function 
+        out1 = relay.nn.conv2d(x, kernel, channels=k_shape[0], kernel_size=k_shape[2:4])
+        out = relay.add(out1, f1(y, kernel))
+        f = relay.Function([x, y, kernel], out)
+        mod["main"] = f
+        mod = relay.transform.InferType()(mod)
+        
+        return mod
+
+    # Create relay function that will be offloaded to TRT
+    out1 = relay.nn.conv2d(x, kernel, channels=k_shape[0], kernel_size=k_shape[2:4])
+    out2 = relay.nn.conv2d(y, kernel, channels=k_shape[0], kernel_size=k_shape[2:4])
+    out = relay.add(out1, out2)
+    f = relay.Function([x, y, kernel], out)
+
+    # Pass the function to TRT compilation
+    mod = tvm.IRModule()
+    mod["main"] = f
+    mod = relay.transform.InferType()(mod)
+    mod_trt, config = tensorrt.partition_for_tensorrt(mod, params={})
+
+    # Get the expected relay graph and compare
+    mod_exp = get_expected()
+    tvm.ir.assert_structural_equal(mod_trt, mod_exp, map_free_vars=True)
+    return
+
+if __name__ == "__main__":
+    test_tensorrt_not_compatible()
+    test_tensorrt_simple()
+    test_tensorrt_simple_cpu_io()
+    test_tensorrt_serialize()
+    test_tensorrt_ops()
+    test_tensorrt_integration()
+    test_dynamic_offload()
+    

From 416ea33ae4fc8b690abe8ea719aa934f7329e3df Mon Sep 17 00:00:00 2001
From: Rohan Mukherjee <mukrohan@amazon.com>
Date: Thu, 12 Nov 2020 19:15:01 +0000
Subject: [PATCH 02/12] Addressed the formatting/refactoring comments

---
 python/tvm/relay/op/contrib/tensorrt.py      |  55 ++----
 src/runtime/contrib/tensorrt/tensorrt_ops.cc |   2 +-
 tests/python/contrib/test_tensorrt.py        | 193 ++++++++++---------
 3 files changed, 124 insertions(+), 126 deletions(-)

diff --git a/python/tvm/relay/op/contrib/tensorrt.py b/python/tvm/relay/op/contrib/tensorrt.py
index 9cdc50c24075..3455e3db03b3 100644
--- a/python/tvm/relay/op/contrib/tensorrt.py
+++ b/python/tvm/relay/op/contrib/tensorrt.py
@@ -18,7 +18,6 @@
 """TensorRT supported operators."""
 import logging
 import numpy as np
-import os
 import tvm
 from tvm import relay
 from tvm.relay import transform
@@ -142,11 +141,9 @@ def partition_for_tensorrt(
             RemoveDropoutPass(),
             transform.RemoveUnusedFunctions(),
             transform.ConvertLayout(
-                {"nn.conv2d": ["NCHW", "default"],
-                 "nn.conv3d": ["NCDHW", "default"]}
+                {"nn.conv2d": ["NCHW", "default"], "nn.conv3d": ["NCDHW", "default"]}
             ),
             transform.FoldConstant(),
-            transform.InferType(),
             transform.AnnotateTarget("tensorrt"),
             transform.MergeCompilerRegions(),
             transform.PartitionGraph(),
@@ -159,24 +156,24 @@ def partition_for_tensorrt(
 
     return mod, config
 
+
 def check_dynamism(args, op_name):
     """
-    This function checks for dynamism inside any of the args in the op.
-    Can be used to offload dynamic ops that are not supported by TRT to
-    be offloaded to relay VM.
-
-    Raises a NotImplementedError if the type of the arg is not of types
-    Call, Var, Constant, or TupleGetItem.
+    Check for dynamism inside any of the args in the op.
 
     Parameters
     ----------
-    args: a TRT array of the arguments of the op
-    op_name: name of the op for debugging purposes only
-
+    args : tvm.ir.container.Array
+        Arguments of the op. Each of the argument shape is checked for presence of dynamic
+        components.
+    op_name: str
+        Name of the op for debugging purposes only.
     Returns
     ----------
-    True if dynamism is present, False otherwise
+    ret : bool
+        True if dynamism is present, False otherwise
     """
+    print(type(op_name))
     for arg in args:
         if isinstance(arg, (Call, Var, Constant, TupleGetItem)):
             for dim_shape in arg.checked_type.shape:
@@ -216,18 +213,8 @@ def _register_external_op_helper(op_name, supported=True):
 
 
 def _register_external_dynamic_check_func(op_name, checker):
-    """
-    Wrapper to check dynamic shapes inside any of the args in the op
-
-    Parameters
-    ----------
-    op_name: name of the op for debugging purposes only
-    checker: additional checker function specific to the op
+    """Wrapper to check dynamic shapes inside any of the args in the op."""
 
-    Returns
-    ----------
-    wrapped checker function with dynamism check
-    """
     @tvm.ir.register_op_attr(op_name, "target.tensorrt")
     def _func_wrapper(expr):
         attrs, args = expr.attrs, expr.args
@@ -235,6 +222,7 @@ def _func_wrapper(expr):
         if check_dynamism(args, op_name):
             return False
         return checker(expr)
+
     return _func_wrapper
 
 
@@ -299,7 +287,6 @@ def _func_wrapper(attrs, args, op_name):
 _register_external_op_helper_with_checker("ceil", trt_version_annotate_fn((5, 1, 5)))
 
 
-
 def add_annotate_fn(expr):  # pylint: disable=unused-variable
     """Check if add is supported by TensorRT."""
 
@@ -318,6 +305,7 @@ def add_annotate_fn(expr):  # pylint: disable=unused-variable
         return False
     return True
 
+
 def batch_norm_annotate_fn(expr):  # pylint: disable=unused-variable
     """Check if nn.batch_norm is supported by TensorRT."""
 
@@ -344,7 +332,6 @@ def softmax_annotate_fn(expr):  # pylint: disable=unused-variable
     return True
 
 
-
 def conv2d_annotate_fn(expr):  # pylint: disable=unused-variable
     """Check if nn.conv2d is supported by TensorRT."""
 
@@ -364,7 +351,6 @@ def conv2d_annotate_fn(expr):  # pylint: disable=unused-variable
     return True
 
 
-
 def dense_annotate_fn(expr):  # pylint: disable=unused-variable
     """Check if dense is supported by TensorRT."""
 
@@ -383,7 +369,6 @@ def dense_annotate_fn(expr):  # pylint: disable=unused-variable
     return True
 
 
-
 def bias_add_annotate_fn(expr):  # pylint: disable=unused-variable
     """Check if nn.bias_add is supported by TensorRT."""
 
@@ -456,7 +441,6 @@ def global_max_pool_2d_annotate_fn(expr):  # pylint: disable=unused-variable
     return True
 
 
-
 def global_avg_pool_2d_annotate_fn(expr):  # pylint: disable=unused-variable
     """Check if nn.global_avg_pool2d is supported by TensorRT."""
 
@@ -575,6 +559,7 @@ def layout_transform_annotate_fn(expr):  # pylint: disable=unused-variable
         return False
     return True
 
+
 def reshape_annotate_fn(expr):  # pylint: disable=unused-variable
     """Check if reshape is supported by TensorRT."""
 
@@ -606,6 +591,7 @@ def reshape_annotate_fn(expr):  # pylint: disable=unused-variable
             return False
     return True
 
+
 def pad_annotate_fn(expr):  # pylint: disable=unused-variable
     """Check if nn.pad is supported by TensorRT."""
 
@@ -691,7 +677,6 @@ def adaptive_avg_pool2d_annotate_fn(expr):  # pylint: disable=unused-variable
     return True
 
 
-
 def conv3d_annotate_fn(expr):  # pylint: disable=unused-variable
     """Check if nn.conv3d is supported by TensorRT."""
 
@@ -771,6 +756,7 @@ def conv3d_transpose_annotate_fn(expr):  # pylint: disable=unused-variable
         return False
     return True
 
+
 _register_external_dynamic_check_func("add", add_annotate_fn)
 _register_external_dynamic_check_func("nn.batch_norm", batch_norm_annotate_fn)
 _register_external_dynamic_check_func("nn.softmax", softmax_annotate_fn)
@@ -798,8 +784,6 @@ def conv3d_transpose_annotate_fn(expr):  # pylint: disable=unused-variable
 _register_external_dynamic_check_func("nn.conv3d_transpose", conv3d_transpose_annotate_fn)
 
 
-
-
 def is_valid_subgraph(params, body):
     """Final check on whether the subgraph is valid and should be offloaded to TensorRT."""
     # Remove invalid subgraphs for implicit batch mode.
@@ -839,6 +823,7 @@ class SubgraphRemover(ExprMutator):
         """
         Reverts subgraphs in subgraphs_to_remove back to TVM instead of using an external codegen.
         """
+
         def __init__(self, subgraphs_to_remove, mod, new_mod):
             ExprMutator.__init__(self)
             self.subgraphs_to_remove = subgraphs_to_remove
@@ -860,7 +845,7 @@ def visit_call(self, call):
                     args = []
                     for arg in call.args:
                         args.append(super().visit(arg))
-                    return call.op(*args) 
+                    return call.op(*args)
             return super().visit_call(call)
 
     subgraphs_to_remove = []
@@ -897,5 +882,3 @@ def visit_tuple_getitem(self, op):
 class RemoveDropoutPass:
     def transform_function(self, func, mod, _):
         return RemoveDropout().visit(func)
-
-
diff --git a/src/runtime/contrib/tensorrt/tensorrt_ops.cc b/src/runtime/contrib/tensorrt/tensorrt_ops.cc
index 415caf50ea4d..5d29e9435ad1 100644
--- a/src/runtime/contrib/tensorrt/tensorrt_ops.cc
+++ b/src/runtime/contrib/tensorrt/tensorrt_ops.cc
@@ -944,7 +944,7 @@ class ReduceOpConverter : public TensorRTOpConverter {
 #if TRT_VERSION_GE(5, 1, 5)
 class StridedSliceOpConverter : public TensorRTOpConverter {
  public:
-  StridedSliceOpConverter() : TensorRTOpConverter({kTensor}) {} // , kWeight, kWeight, kWeight}) {}
+  StridedSliceOpConverter() : TensorRTOpConverter({kTensor}) {} 
 
   void Convert(TensorRTOpConverterParams* params) const {
     auto input = params->inputs.at(0).tensor;
diff --git a/tests/python/contrib/test_tensorrt.py b/tests/python/contrib/test_tensorrt.py
index e96f93b5b266..525f53853a1a 100644
--- a/tests/python/contrib/test_tensorrt.py
+++ b/tests/python/contrib/test_tensorrt.py
@@ -28,6 +28,7 @@
 from tvm.relay import Any, GlobalVar, transform
 from mxnet.gluon.model_zoo.vision import get_model
 
+
 def skip_codegen_test():
     """Skip test if TensorRT and CUDA codegen are not present"""
     if not tvm.runtime.enabled("cuda") or not tvm.gpu(0).exist:
@@ -94,7 +95,9 @@ def run_and_verify_func(config, target="cuda"):
             result_key = mode + ("_trt" if use_trt else "")
             if use_trt:
                 mod, config = tensorrt.partition_for_tensorrt(mod, params)
-                with tvm.transform.PassContext(opt_level=3, config={"relay.ext.tensorrt.options": config}):
+                with tvm.transform.PassContext(
+                    opt_level=3, config={"relay.ext.tensorrt.options": config}
+                ):
                     exec = relay.create_executor(mode, mod=mod, ctx=ctx, target=target)
             else:
                 with tvm.transform.PassContext(opt_level=3):
@@ -123,7 +126,7 @@ def compile_and_run(mod, params, i_data, mode="vm", use_trt=True):
             mod, config = tensorrt.partition_for_tensorrt(mod, params)
             check_trt_used(mod)
             with tvm.transform.PassContext(
-                    opt_level=3, config={"relay.ext.tensorrt.options": config}
+                opt_level=3, config={"relay.ext.tensorrt.options": config}
             ):
                 exec = relay.create_executor(mode, mod=mod, ctx=tvm.gpu(0), target="cuda")
         else:
@@ -144,7 +147,7 @@ def compile_and_run(mod, params, i_data, mode="vm", use_trt=True):
         for use_trt in [True, False]:
             result_key = mode + ("_trt" if use_trt else "")
             result_dict[result_key] = compile_and_run(
-                 mod, params, i_data, mode=mode, use_trt=use_trt
+                mod, params, i_data, mode=mode, use_trt=use_trt
             )
 
     if not skip_runtime_test():
@@ -169,7 +172,6 @@ def test_tensorrt_simple():
     y_data = np.random.uniform(-1, 1, yshape).astype(dtype)
     z_data = np.random.uniform(-1, 1, zshape).astype(dtype)
 
-
     result_dict = dict()
     for mode in ["vm", "graph"]:
         for use_trt in [True, False]:
@@ -178,7 +180,9 @@ def test_tensorrt_simple():
             result_key = mode + ("_trt" if use_trt else "")
             if use_trt:
                 mod, config = tensorrt.partition_for_tensorrt(mod)
-                with tvm.transform.PassContext(opt_level=3, config={"relay.ext.tensorrt.options": config}):
+                with tvm.transform.PassContext(
+                    opt_level=3, config={"relay.ext.tensorrt.options": config}
+                ):
                     relay_exec = relay.create_executor(mode, mod=mod, ctx=tvm.gpu(0), target="cuda")
             else:
                 with tvm.transform.PassContext(opt_level=3):
@@ -229,8 +233,7 @@ def test_tensorrt_not_compatible():
                 results = exec.evaluate()(x_data)
 
 
-
-def test_tensorrt_serialize(data_shape=(1, 3, 224, 224), data_type="float32"):
+def test_tensorrt_serialize_graph_runtime(data_shape=(1, 3, 224, 224), data_type="float32"):
     if skip_codegen_test():
         return
 
@@ -239,29 +242,6 @@ def test_tensorrt_serialize(data_shape=(1, 3, 224, 224), data_type="float32"):
     mod, params = relay.frontend.from_mxnet(block, shape={"data": data_shape}, dtype=data_type)
     mod, config = tensorrt.partition_for_tensorrt(mod)
 
-    def compile_vm(mod, params):
-        with tvm.transform.PassContext(opt_level=3, config={"relay.ext.tensorrt.options": config}):
-            vm_exec = relay.vm.compile(mod, target="cuda", params=params)
-            code, lib = vm_exec.save()
-        return code, lib
-
-    def run_vm(code, lib):
-        vm_exec = tvm.runtime.vm.Executable.load_exec(code, lib)
-        vm = VirtualMachine(vm_exec, tvm.gpu(0))
-        result = vm.invoke("main", data=i_data)
-        return result
-
-    def save_vm(code, lib):
-        # save and load the code and lib file.
-        lib.export_library("path_lib.so")
-        with open("path_code.ro", "wb") as fo:
-            fo.write(code)
-
-    def load_vm():
-        lib = tvm.runtime.load_module("path_lib.so")
-        code = bytearray(open("path_code.ro", "rb").read())
-        return lib, code
-
     def compile_graph(mod, params):
         with tvm.transform.PassContext(opt_level=3, config={"relay.ext.tensorrt.options": config}):
             graph, lib, params = relay.build(mod, params=params, target="cuda")
@@ -297,6 +277,45 @@ def load_graph():
     save_graph(graph, lib, graph_params)
     loaded_graph, loaded_lib, loaded_params = load_graph()
 
+    if not skip_runtime_test():
+        result_dict = dict()
+        result_dict["graph"] = run_graph(graph, lib, graph_params)
+        result_dict["graph_ref"] = run_graph(loaded_graph, loaded_lib, loaded_params)
+        assert_result_dict_holds(result_dict)
+
+
+def test_tensorrt_serialize_vm(data_shape=(1, 3, 224, 224), data_type="float32"):
+    if skip_codegen_test():
+        return
+
+    i_data = np.random.uniform(0, 1, data_shape).astype(data_type)
+    block = get_model("resnet18_v1", pretrained=True)
+    mod, params = relay.frontend.from_mxnet(block, shape={"data": data_shape}, dtype=data_type)
+    mod, config = tensorrt.partition_for_tensorrt(mod)
+
+    def compile_vm(mod, params):
+        with tvm.transform.PassContext(opt_level=3, config={"relay.ext.tensorrt.options": config}):
+            vm_exec = relay.vm.compile(mod, target="cuda", params=params)
+            code, lib = vm_exec.save()
+        return code, lib
+
+    def run_vm(code, lib):
+        vm_exec = tvm.runtime.vm.Executable.load_exec(code, lib)
+        vm = VirtualMachine(vm_exec, tvm.gpu(0))
+        result = vm.invoke("main", data=i_data)
+        return result
+
+    def save_vm(code, lib):
+        # save and load the code and lib file.
+        lib.export_library("path_lib.so")
+        with open("path_code.ro", "wb") as fo:
+            fo.write(code)
+
+    def load_vm():
+        lib = tvm.runtime.load_module("path_lib.so")
+        code = bytearray(open("path_code.ro", "rb").read())
+        return lib, code
+
     # Test serialization with VM
     code_vm, lib_vm = compile_vm(mod, params)
     save_vm(code_vm, lib_vm)
@@ -304,12 +323,8 @@ def load_graph():
 
     if not skip_runtime_test():
         result_dict = dict()
-        result_dict['graph'] = run_graph(graph, lib, graph_params)
-        result_dict['graph_ref'] = run_graph(loaded_graph, loaded_lib, loaded_params)
-
-        result_dict['vm'] = run_vm(code_vm, lib_vm)
-        result_dict['vm_ref'] = run_vm(loaded_code_vm, loaded_lib_vm)
-
+        result_dict["vm"] = run_vm(code_vm, lib_vm)
+        result_dict["vm_ref"] = run_vm(loaded_code_vm, loaded_lib_vm)
         assert_result_dict_holds(result_dict)
 
 
@@ -779,14 +794,20 @@ def get_graph(x_shape, begin, end, strides=None, slice_mode="size"):
         f = relay.Function([x], out)
         return f, {"x": x_shape}, []
 
-
     for slice_mode in ["size", "end"]:
-        run_and_verify_func(get_graph((1, 3, 6, 7), (0, 0, 0, 0), (1, 1, 6, 7), slice_mode=slice_mode))
-        run_and_verify_func(get_graph((1, 3, 6, 7), [0, 1, 0, 0], [1, 2, 6, 6], slice_mode=slice_mode))
-        run_and_verify_func(get_graph((2, 3, 6, 7), [0, 0, 0, 0], [-1,-1,-1,-1], slice_mode=slice_mode ))
-        run_and_verify_func(get_graph((2, 3, 6, 7), [0, 1, 0, 0], [-1,-1,-1,-1], slice_mode=slice_mode ))
-        run_and_verify_func(get_graph((1, 6), [0, 1], [1, 3], slice_mode=slice_mode ))
-    
+        run_and_verify_func(
+            get_graph((1, 3, 6, 7), (0, 0, 0, 0), (1, 1, 6, 7), slice_mode=slice_mode)
+        )
+        run_and_verify_func(
+            get_graph((1, 3, 6, 7), [0, 1, 0, 0], [1, 2, 6, 6], slice_mode=slice_mode)
+        )
+        run_and_verify_func(
+            get_graph((2, 3, 6, 7), [0, 0, 0, 0], [-1, -1, -1, -1], slice_mode=slice_mode)
+        )
+        run_and_verify_func(
+            get_graph((2, 3, 6, 7), [0, 1, 0, 0], [-1, -1, -1, -1], slice_mode=slice_mode)
+        )
+        run_and_verify_func(get_graph((1, 6), [0, 1], [1, 3], slice_mode=slice_mode))
 
 
 def test_adaptive_pool2d():
@@ -906,39 +927,6 @@ def get_graph(
     run_and_verify_func(get_graph(strides=(2, 2, 2)))
     run_and_verify_func(get_graph(strides=(2, 2, 2), output_padding=(1, 1, 1)))
 
-def test_tensorrt_ops():
-    # Op tests
-    test_conv2d()
-    test_conv2d_nhwc()
-    test_conv2d_weights_const()
-    test_conv2d_weights_transposed()
-    test_dense()
-    test_bias_add()
-    test_pool2d()
-    test_global_pool2d()
-    test_batch_flatten()
-    test_expand_dims()
-    test_squeeze()
-    test_concatenate()
-    test_conv2d_transpose()
-    test_reshape()
-    test_transpose()
-    test_float_const()
-    test_pad()
-    test_softmax()
-    test_batch_norm()
-    test_unary()
-    test_clip()
-    test_leaky_relu()
-    test_binary()
-    test_reduce()
-    test_strided_slice()
-    test_adaptive_pool2d()
-    test_multiple_outputs()
-    test_conv3d()
-    test_pool3d()
-    test_conv3d_transpose()
-
 
 def test_alexnet():
     run_and_verify_model("alexnet")
@@ -984,20 +972,18 @@ def test_tensorrt_integration():
     test_densenet121()
 
 
-    
 def test_dynamic_offload(data_shape=(1, 32, 8, 8), k_shape=(1, 32, 3, 3)):
     """
-    This test checks for proper dynamic offloading of relay graphs. An addition between 
+    This test checks for proper dynamic offloading of relay graphs. An addition between
     the outputs of two conv2d's is performed, one of them having all static args whereas
     the other has a arg with dynamic shape. It is expected for the TRT partitioner to
     offload the conv2d with dynamic arg to TVM while running the other in TRT.
     """
-    x = relay.var('x', shape=(data_shape[0], data_shape[1], Any(), Any()), dtype='float32')
-    y = relay.var('y', shape=(data_shape), dtype='float32')
-    kernel = relay.var('kernel', shape=(k_shape), dtype='float32')
+    x = relay.var("x", shape=(data_shape[0], data_shape[1], Any(), Any()), dtype="float32")
+    y = relay.var("y", shape=(data_shape), dtype="float32")
+    kernel = relay.var("kernel", shape=(k_shape), dtype="float32")
 
     def get_expected():
-
         def set_func_attr(func, compile_name, symbol_name):
             func = func.with_attr("Primitive", tvm.tir.IntImm("int32", 1))
             func = func.with_attr("Inline", tvm.tir.IntImm("int32", 1))
@@ -1007,8 +993,8 @@ def set_func_attr(func, compile_name, symbol_name):
 
         # Create a nested TRT function that matches the expected output
         mod = tvm.IRModule()
-        var1 = relay.var('tensorrt_0_i0', shape=(data_shape), dtype='float32')
-        kernel_trt = relay.var('tensorrt_0_i1', shape=(k_shape), dtype='float32')
+        var1 = relay.var("tensorrt_0_i0", shape=(data_shape), dtype="float32")
+        kernel_trt = relay.var("tensorrt_0_i1", shape=(k_shape), dtype="float32")
         out1 = relay.nn.conv2d(var1, kernel_trt, channels=k_shape[0], kernel_size=k_shape[2:4])
         f1 = GlobalVar("tensorrt_0")
         func = relay.Function([var1, kernel_trt], out1)
@@ -1016,13 +1002,12 @@ def set_func_attr(func, compile_name, symbol_name):
         mod[f1] = func
         mod = relay.transform.InferType()(mod)
 
-        # Create the main function 
+        # Create the main function
         out1 = relay.nn.conv2d(x, kernel, channels=k_shape[0], kernel_size=k_shape[2:4])
         out = relay.add(out1, f1(y, kernel))
         f = relay.Function([x, y, kernel], out)
         mod["main"] = f
         mod = relay.transform.InferType()(mod)
-        
         return mod
 
     # Create relay function that will be offloaded to TRT
@@ -1042,12 +1027,42 @@ def set_func_attr(func, compile_name, symbol_name):
     tvm.ir.assert_structural_equal(mod_trt, mod_exp, map_free_vars=True)
     return
 
+
 if __name__ == "__main__":
     test_tensorrt_not_compatible()
     test_tensorrt_simple()
     test_tensorrt_simple_cpu_io()
-    test_tensorrt_serialize()
-    test_tensorrt_ops()
+    test_tensorrt_serialize_graph_runtime()
+    test_tensorrt_serialize_vm()
     test_tensorrt_integration()
     test_dynamic_offload()
-    
+    test_conv2d()
+    test_conv2d_nhwc()
+    test_conv2d_weights_const()
+    test_conv2d_weights_transposed()
+    test_dense()
+    test_bias_add()
+    test_pool2d()
+    test_global_pool2d()
+    test_batch_flatten()
+    test_expand_dims()
+    test_squeeze()
+    test_concatenate()
+    test_conv2d_transpose()
+    test_reshape()
+    test_transpose()
+    test_float_const()
+    test_pad()
+    test_softmax()
+    test_batch_norm()
+    test_unary()
+    test_clip()
+    test_leaky_relu()
+    test_binary()
+    test_reduce()
+    test_strided_slice()
+    test_adaptive_pool2d()
+    test_multiple_outputs()
+    test_conv3d()
+    test_pool3d()
+    test_conv3d_transpose()

From c33be64e56295b38855f467a090d7df50b569dbf Mon Sep 17 00:00:00 2001
From: Rohan Mukherjee <mukrohan@amazon.com>
Date: Thu, 12 Nov 2020 19:19:33 +0000
Subject: [PATCH 03/12] Addressed comment in TRT codegen

Lint formatting
---
 src/relay/backend/contrib/tensorrt/codegen.cc | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/src/relay/backend/contrib/tensorrt/codegen.cc b/src/relay/backend/contrib/tensorrt/codegen.cc
index 5f53c1f93444..cb648333df8d 100644
--- a/src/relay/backend/contrib/tensorrt/codegen.cc
+++ b/src/relay/backend/contrib/tensorrt/codegen.cc
@@ -133,7 +133,7 @@ class TensorRTJSONSerializer : public backend::contrib::JSONSerializer {
     auto process_slice_index = [](Integer x, int default_value, int dim_value) {
       if (!x.defined()) return default_value;
       int value = x.as<IntImmNode>()->value;
-      value = (value < 0 ) ? dim_value + value : value;
+      if (value < 0) value += dim_value;
       return value;
     };
 
@@ -149,18 +149,17 @@ class TensorRTJSONSerializer : public backend::contrib::JSONSerializer {
       ICHECK_GT(stride_value, 0);
       strides.push_back(std::to_string(stride_value));
       int size_value;
-      if (attrs->slice_mode == "end"){
+      if (attrs->slice_mode == "end") {
         const int end_value = process_slice_index(attrs->end.value()[i], ishape[i], ishape[i]);
         size_value = (end_value - begin_value + stride_value - 1) / stride_value;
-      }
-      else if (attrs->slice_mode == "size"){
+      } else if (attrs->slice_mode == "size") {
         // with slice_mode = "size", attrs->end_value mean the size of the slice
         int end_value = attrs->end.value()[i].as<IntImmNode>()->value;
-        size_value = (end_value == -1) ? ishape[i] - begin_value: end_value;
+        size_value = (end_value == -1) ? ishape[i] - begin_value : end_value;
       }
       ICHECK_GT(size_value, 0);
       size.push_back(std::to_string(size_value));
-    } 
+    }
     std::vector<dmlc::any> start_attr, size_attr, strides_attr;
     start_attr.emplace_back(start);
     size_attr.emplace_back(size);

From 99a767e39fd0bfaf782f23680082b9fde5bc3e1a Mon Sep 17 00:00:00 2001
From: Rohan Mukherjee <mukrohan@amazon.com>
Date: Thu, 12 Nov 2020 19:24:11 +0000
Subject: [PATCH 04/12] Lint error

---
 src/runtime/contrib/tensorrt/tensorrt_ops.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/runtime/contrib/tensorrt/tensorrt_ops.cc b/src/runtime/contrib/tensorrt/tensorrt_ops.cc
index 5d29e9435ad1..057743c3b588 100644
--- a/src/runtime/contrib/tensorrt/tensorrt_ops.cc
+++ b/src/runtime/contrib/tensorrt/tensorrt_ops.cc
@@ -944,7 +944,7 @@ class ReduceOpConverter : public TensorRTOpConverter {
 #if TRT_VERSION_GE(5, 1, 5)
 class StridedSliceOpConverter : public TensorRTOpConverter {
  public:
-  StridedSliceOpConverter() : TensorRTOpConverter({kTensor}) {} 
+  StridedSliceOpConverter() : TensorRTOpConverter({kTensor}) {}
 
   void Convert(TensorRTOpConverterParams* params) const {
     auto input = params->inputs.at(0).tensor;

From 11a9968449730721f6e99b49dcda38553f034078 Mon Sep 17 00:00:00 2001
From: Rohan Mukherjee <mukrohan@amazon.com>
Date: Thu, 12 Nov 2020 22:21:00 +0000
Subject: [PATCH 05/12] using slice_mode during strided slice registration in
 tensorrt.py

---
 python/tvm/relay/op/contrib/tensorrt.py | 30 ++++++++++++++++---------
 1 file changed, 20 insertions(+), 10 deletions(-)

diff --git a/python/tvm/relay/op/contrib/tensorrt.py b/python/tvm/relay/op/contrib/tensorrt.py
index 3455e3db03b3..0b6ed4076379 100644
--- a/python/tvm/relay/op/contrib/tensorrt.py
+++ b/python/tvm/relay/op/contrib/tensorrt.py
@@ -173,7 +173,6 @@ def check_dynamism(args, op_name):
     ret : bool
         True if dynamism is present, False otherwise
     """
-    print(type(op_name))
     for arg in args:
         if isinstance(arg, (Call, Var, Constant, TupleGetItem)):
             for dim_shape in arg.checked_type.shape:
@@ -183,7 +182,7 @@ def check_dynamism(args, op_name):
             return check_dynamism(arg.fields, op_name)
         else:
             logger.info(
-                "Arg not supported in TensorRT for ",
+                "Arg not supported in TensorRT for %s with type %s",
                 op_name,
                 type(arg),
             )
@@ -217,7 +216,7 @@ def _register_external_dynamic_check_func(op_name, checker):
 
     @tvm.ir.register_op_attr(op_name, "target.tensorrt")
     def _func_wrapper(expr):
-        attrs, args = expr.attrs, expr.args
+        args = expr.args
         # ops with dynamic shapes are offloaded to VM
         if check_dynamism(args, op_name):
             return False
@@ -639,13 +638,24 @@ def strided_slice_annotate_fn(expr):  # pylint: disable=unused-variable
 
     for i in range(0, len(args[0].checked_type.shape)):
         begin = int(attrs.begin[i])
-        end = (
-            int(attrs.end[i])
-            if attrs.end[i] is not None and int(attrs.end[i]) != -1
-            else args[0].checked_type.shape[i]
-        )
-        if int(end) - int(begin) < 1:
-            print("strided_slice: size of slice must be at least 1")
+        if attrs.slice_mode == "end":
+            end = (
+                int(attrs.end[i])
+                if attrs.end[i] is not None and int(attrs.end[i]) != -1
+                else args[0].checked_type.shape[i]
+            )
+            size = int(end) - int(begin)
+        elif attrs.slice_mode == "size":
+            size = (
+                int(attrs.end[i])
+                if attrs.end[i] is not None and int(attrs.end[i]) != -1
+                else args[0].checked_type.shape[i] - begin
+            )
+        else:
+            logger.warning("strided_slice: unknown slice mode encountered")
+
+        if int(size) < 1:
+            logger.info("strided_slice: size of slice must be at least 1")
             return False
 
     return True

From 8fe7b6297d329e5e900db07357b66ec18de78bbd Mon Sep 17 00:00:00 2001
From: Rohan Mukherjee <mukrohan@amazon.com>
Date: Thu, 12 Nov 2020 22:32:38 +0000
Subject: [PATCH 06/12] removed a few blank lines

---
 python/tvm/relay/op/contrib/tensorrt.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/python/tvm/relay/op/contrib/tensorrt.py b/python/tvm/relay/op/contrib/tensorrt.py
index 0b6ed4076379..13ddf3a27bea 100644
--- a/python/tvm/relay/op/contrib/tensorrt.py
+++ b/python/tvm/relay/op/contrib/tensorrt.py
@@ -134,7 +134,6 @@ def partition_for_tensorrt(
 
     if params:
         mod["main"] = bind_params_by_name(mod["main"], params)
-
     seq = tvm.transform.Sequential(
         [
             transform.InferType(),
@@ -153,7 +152,6 @@ def partition_for_tensorrt(
     with tvm.transform.PassContext(opt_level=3, config={"relay.ext.tensorrt.options": config}):
         mod = seq(mod)
         mod = prune_tensorrt_subgraphs(mod)
-
     return mod, config
 
 
@@ -631,11 +629,9 @@ def strided_slice_annotate_fn(expr):  # pylint: disable=unused-variable
         if batch_dim_begin_modified or batch_dim_end_modified:
             logger.info("strided_slice: can't modify batch dimension.")
             return False
-
     if any([x is not None and x <= 0 for x in attrs.strides]):
         logger.info("strided_slice: stride must be positive")
         return False
-
     for i in range(0, len(args[0].checked_type.shape)):
         begin = int(attrs.begin[i])
         if attrs.slice_mode == "end":

From 2210543e4d29b0908f734b550ec2bdf8957c878f Mon Sep 17 00:00:00 2001
From: Rohan Mukherjee <mukrohan@amazon.com>
Date: Thu, 12 Nov 2020 22:42:04 +0000
Subject: [PATCH 07/12] addressing cli comment on elif-return

---
 python/tvm/relay/op/contrib/tensorrt.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/tvm/relay/op/contrib/tensorrt.py b/python/tvm/relay/op/contrib/tensorrt.py
index 13ddf3a27bea..7f8c725f0801 100644
--- a/python/tvm/relay/op/contrib/tensorrt.py
+++ b/python/tvm/relay/op/contrib/tensorrt.py
@@ -847,7 +847,7 @@ def visit_call(self, call):
                         var_map[param] = super().visit(arg)
                     new_body = relay.bind(func.body, var_map)
                     return new_body
-                elif name != "main":
+                if name != "main":
                     args = []
                     for arg in call.args:
                         args.append(super().visit(arg))

From b041eee00f80cda5fc35af788a9ad77aa15fae9f Mon Sep 17 00:00:00 2001
From: Rohan Mukherjee <mukrohan@amazon.com>
Date: Fri, 13 Nov 2020 03:09:56 +0000
Subject: [PATCH 08/12] Added decorator for tensorrt functions with dynamism
 check

skip_codegen added for test_tensorrt::test_dynamic_offload
---
 python/tvm/relay/op/contrib/tensorrt.py | 75 ++++++++++++-------------
 tests/python/contrib/test_tensorrt.py   |  4 ++
 2 files changed, 40 insertions(+), 39 deletions(-)

diff --git a/python/tvm/relay/op/contrib/tensorrt.py b/python/tvm/relay/op/contrib/tensorrt.py
index 7f8c725f0801..f9ed0b320d37 100644
--- a/python/tvm/relay/op/contrib/tensorrt.py
+++ b/python/tvm/relay/op/contrib/tensorrt.py
@@ -208,20 +208,18 @@ def _register_external_op_helper(op_name, supported=True):
         op_name, lambda attrs, args, op_name: supported
     )
 
-
-def _register_external_dynamic_check_func(op_name, checker):
+def _register_external_dynamic_check_func(op_name):
     """Wrapper to check dynamic shapes inside any of the args in the op."""
-
-    @tvm.ir.register_op_attr(op_name, "target.tensorrt")
-    def _func_wrapper(expr):
-        args = expr.args
-        # ops with dynamic shapes are offloaded to VM
-        if check_dynamism(args, op_name):
-            return False
-        return checker(expr)
-
-    return _func_wrapper
-
+    def _decorator_helper(checker):
+        @tvm.ir.register_op_attr(op_name, "target.tensorrt")
+        def _func_wrapper(expr):
+            args = expr.args
+            # ops with dynamic shapes are offloaded to VM
+            if check_dynamism(args, op_name):
+                return False
+            return checker(expr)
+        return _func_wrapper
+    return _decorator_helper
 
 # Ops which are always supported
 _register_external_op_helper("nn.relu")
@@ -284,6 +282,7 @@ def _func_wrapper(attrs, args, op_name):
 _register_external_op_helper_with_checker("ceil", trt_version_annotate_fn((5, 1, 5)))
 
 
+@_register_external_dynamic_check_func("add")
 def add_annotate_fn(expr):  # pylint: disable=unused-variable
     """Check if add is supported by TensorRT."""
 
@@ -302,7 +301,7 @@ def add_annotate_fn(expr):  # pylint: disable=unused-variable
         return False
     return True
 
-
+@_register_external_dynamic_check_func("nn.batch_norm")
 def batch_norm_annotate_fn(expr):  # pylint: disable=unused-variable
     """Check if nn.batch_norm is supported by TensorRT."""
 
@@ -316,6 +315,7 @@ def batch_norm_annotate_fn(expr):  # pylint: disable=unused-variable
     return True
 
 
+@_register_external_dynamic_check_func("nn.softmax")
 def softmax_annotate_fn(expr):  # pylint: disable=unused-variable
     """Check if nn.softmax is supported by TensorRT."""
 
@@ -329,6 +329,7 @@ def softmax_annotate_fn(expr):  # pylint: disable=unused-variable
     return True
 
 
+@_register_external_dynamic_check_func("nn.conv2d")
 def conv2d_annotate_fn(expr):  # pylint: disable=unused-variable
     """Check if nn.conv2d is supported by TensorRT."""
 
@@ -348,6 +349,7 @@ def conv2d_annotate_fn(expr):  # pylint: disable=unused-variable
     return True
 
 
+@_register_external_dynamic_check_func("nn.dense")
 def dense_annotate_fn(expr):  # pylint: disable=unused-variable
     """Check if dense is supported by TensorRT."""
 
@@ -366,6 +368,7 @@ def dense_annotate_fn(expr):  # pylint: disable=unused-variable
     return True
 
 
+@_register_external_dynamic_check_func("nn.bias_add")
 def bias_add_annotate_fn(expr):  # pylint: disable=unused-variable
     """Check if nn.bias_add is supported by TensorRT."""
 
@@ -380,6 +383,7 @@ def bias_add_annotate_fn(expr):  # pylint: disable=unused-variable
     return True
 
 
+@_register_external_dynamic_check_func("nn.max_pool2d")
 def max_pool_2d_annotate_fn(expr):  # pylint: disable=unused-variable
     """Check if nn.max_pool2d is supported by TensorRT."""
 
@@ -396,6 +400,7 @@ def max_pool_2d_annotate_fn(expr):  # pylint: disable=unused-variable
     return True
 
 
+@_register_external_dynamic_check_func("nn.avg_pool2d")
 def avg_pool_2d_annotate_fn(expr):  # pylint: disable=unused-variable
     """Check if nn.avg_pool2d is supported by TensorRT."""
 
@@ -425,6 +430,7 @@ def avg_pool_2d_annotate_fn(expr):  # pylint: disable=unused-variable
     return True
 
 
+@_register_external_dynamic_check_func("nn.global_max_pool2d")
 def global_max_pool_2d_annotate_fn(expr):  # pylint: disable=unused-variable
     """Check if nn.global_max_pool2d is supported by TensorRT."""
 
@@ -438,6 +444,7 @@ def global_max_pool_2d_annotate_fn(expr):  # pylint: disable=unused-variable
     return True
 
 
+@_register_external_dynamic_check_func("nn.global_avg_pool2d")
 def global_avg_pool_2d_annotate_fn(expr):  # pylint: disable=unused-variable
     """Check if nn.global_avg_pool2d is supported by TensorRT."""
 
@@ -451,6 +458,7 @@ def global_avg_pool_2d_annotate_fn(expr):  # pylint: disable=unused-variable
     return True
 
 
+@_register_external_dynamic_check_func("expand_dims")
 def expand_dims_annotate_fn(expr):  # pylint: disable=unused-variable
     """Check if expand_dims is supported by TensorRT."""
 
@@ -464,6 +472,7 @@ def expand_dims_annotate_fn(expr):  # pylint: disable=unused-variable
     return True
 
 
+@_register_external_dynamic_check_func("squeeze")
 def squeeze_annotate_fn(expr):  # pylint: disable=unused-variable
     """Check if squeeze is supported by TensorRT."""
 
@@ -480,6 +489,7 @@ def squeeze_annotate_fn(expr):  # pylint: disable=unused-variable
     return True
 
 
+@_register_external_dynamic_check_func("concatenate")
 def concatenate_annotate_fn(expr):  # pylint: disable=unused-variable
     """Check if concatenate is supported by TensorRT."""
 
@@ -500,6 +510,7 @@ def concatenate_annotate_fn(expr):  # pylint: disable=unused-variable
     return True
 
 
+@_register_external_dynamic_check_func("nn.conv2d_transpose")
 def conv2d_transpose_annotate_fn(expr):  # pylint: disable=unused-variable
     """Check if nn.conv2d_transpose is supported by TensorRT."""
 
@@ -524,6 +535,7 @@ def conv2d_transpose_annotate_fn(expr):  # pylint: disable=unused-variable
     return True
 
 
+@_register_external_dynamic_check_func("transpose")
 def transpose_annotate_fn(expr):  # pylint: disable=unused-variable
     """Check if transpose is supported by TensorRT."""
 
@@ -537,6 +549,7 @@ def transpose_annotate_fn(expr):  # pylint: disable=unused-variable
     return True
 
 
+@_register_external_dynamic_check_func("layout_transform")
 def layout_transform_annotate_fn(expr):  # pylint: disable=unused-variable
     """Check if layout_transform is supported by TensorRT."""
 
@@ -557,6 +570,7 @@ def layout_transform_annotate_fn(expr):  # pylint: disable=unused-variable
     return True
 
 
+@_register_external_dynamic_check_func("reshape")
 def reshape_annotate_fn(expr):  # pylint: disable=unused-variable
     """Check if reshape is supported by TensorRT."""
 
@@ -589,6 +603,7 @@ def reshape_annotate_fn(expr):  # pylint: disable=unused-variable
     return True
 
 
+@_register_external_dynamic_check_func("nn.pad")
 def pad_annotate_fn(expr):  # pylint: disable=unused-variable
     """Check if nn.pad is supported by TensorRT."""
 
@@ -610,6 +625,7 @@ def pad_annotate_fn(expr):  # pylint: disable=unused-variable
     return True
 
 
+@_register_external_dynamic_check_func("strided_slice")
 def strided_slice_annotate_fn(expr):  # pylint: disable=unused-variable
     """Check if strided_slice is supported by TensorRT."""
 
@@ -657,6 +673,7 @@ def strided_slice_annotate_fn(expr):  # pylint: disable=unused-variable
     return True
 
 
+@_register_external_dynamic_check_func("nn.adaptive_max_pool2d")
 def adaptive_max_pool2d_annotate_fn(expr):  # pylint: disable=unused-variable
     """Check if nn.adaptive_max_pool2d is supported by TensorRT."""
 
@@ -670,6 +687,7 @@ def adaptive_max_pool2d_annotate_fn(expr):  # pylint: disable=unused-variable
     return True
 
 
+@_register_external_dynamic_check_func("nn.adaptive_avg_pool2d")
 def adaptive_avg_pool2d_annotate_fn(expr):  # pylint: disable=unused-variable
     """Check if nn.adaptive_avg_pool2d is supported by TensorRT."""
 
@@ -683,6 +701,7 @@ def adaptive_avg_pool2d_annotate_fn(expr):  # pylint: disable=unused-variable
     return True
 
 
+@_register_external_dynamic_check_func("nn.conv3d")
 def conv3d_annotate_fn(expr):  # pylint: disable=unused-variable
     """Check if nn.conv3d is supported by TensorRT."""
 
@@ -704,6 +723,7 @@ def conv3d_annotate_fn(expr):  # pylint: disable=unused-variable
     return True
 
 
+@_register_external_dynamic_check_func("nn.max_pool3d")
 def max_pool_3d_annotate_fn(expr):  # pylint: disable=unused-variable
     """Check if nn.max_pool3d is supported by TensorRT."""
 
@@ -719,6 +739,7 @@ def max_pool_3d_annotate_fn(expr):  # pylint: disable=unused-variable
     return True
 
 
+@_register_external_dynamic_check_func("nn.avg_pool3d")
 def avg_pool_3d_annotate_fn(expr):  # pylint: disable=unused-variable
     """Check if nn.avg_pool3d is supported by TensorRT."""
 
@@ -734,6 +755,7 @@ def avg_pool_3d_annotate_fn(expr):  # pylint: disable=unused-variable
     return True
 
 
+@_register_external_dynamic_check_func("nn.conv3d_transpose")
 def conv3d_transpose_annotate_fn(expr):  # pylint: disable=unused-variable
     """Check if nn.conv3d_transpose is supported by TensorRT."""
 
@@ -763,31 +785,6 @@ def conv3d_transpose_annotate_fn(expr):  # pylint: disable=unused-variable
     return True
 
 
-_register_external_dynamic_check_func("add", add_annotate_fn)
-_register_external_dynamic_check_func("nn.batch_norm", batch_norm_annotate_fn)
-_register_external_dynamic_check_func("nn.softmax", softmax_annotate_fn)
-_register_external_dynamic_check_func("nn.conv2d", conv2d_annotate_fn)
-_register_external_dynamic_check_func("nn.dense", dense_annotate_fn)
-_register_external_dynamic_check_func("nn.bias_add", bias_add_annotate_fn)
-_register_external_dynamic_check_func("nn.max_pool2d", max_pool_2d_annotate_fn)
-_register_external_dynamic_check_func("nn.avg_pool2d", avg_pool_2d_annotate_fn)
-_register_external_dynamic_check_func("nn.global_max_pool2d", global_max_pool_2d_annotate_fn)
-_register_external_dynamic_check_func("nn.global_avg_pool2d", global_avg_pool_2d_annotate_fn)
-_register_external_dynamic_check_func("expand_dims", expand_dims_annotate_fn)
-_register_external_dynamic_check_func("squeeze", squeeze_annotate_fn)
-_register_external_dynamic_check_func("concatenate", concatenate_annotate_fn)
-_register_external_dynamic_check_func("nn.conv2d_transpose", conv2d_transpose_annotate_fn)
-_register_external_dynamic_check_func("transpose", transpose_annotate_fn)
-_register_external_dynamic_check_func("layout_transform", layout_transform_annotate_fn)
-_register_external_dynamic_check_func("reshape", reshape_annotate_fn)
-_register_external_dynamic_check_func("nn.pad", pad_annotate_fn)
-_register_external_dynamic_check_func("strided_slice", strided_slice_annotate_fn)
-_register_external_dynamic_check_func("nn.adaptive_max_pool2d", adaptive_max_pool2d_annotate_fn)
-_register_external_dynamic_check_func("nn.adaptive_avg_pool2d", adaptive_avg_pool2d_annotate_fn)
-_register_external_dynamic_check_func("nn.conv3d", conv3d_annotate_fn)
-_register_external_dynamic_check_func("nn.max_pool3d", max_pool_3d_annotate_fn)
-_register_external_dynamic_check_func("nn.avg_pool3d", avg_pool_3d_annotate_fn)
-_register_external_dynamic_check_func("nn.conv3d_transpose", conv3d_transpose_annotate_fn)
 
 
 def is_valid_subgraph(params, body):
diff --git a/tests/python/contrib/test_tensorrt.py b/tests/python/contrib/test_tensorrt.py
index 525f53853a1a..96ffd4d06758 100644
--- a/tests/python/contrib/test_tensorrt.py
+++ b/tests/python/contrib/test_tensorrt.py
@@ -979,6 +979,10 @@ def test_dynamic_offload(data_shape=(1, 32, 8, 8), k_shape=(1, 32, 3, 3)):
     the other has a arg with dynamic shape. It is expected for the TRT partitioner to
     offload the conv2d with dynamic arg to TVM while running the other in TRT.
     """
+    
+    if skip_codegen_test():
+        return
+
     x = relay.var("x", shape=(data_shape[0], data_shape[1], Any(), Any()), dtype="float32")
     y = relay.var("y", shape=(data_shape), dtype="float32")
     kernel = relay.var("kernel", shape=(k_shape), dtype="float32")

From 282db6d7f424022edcfd4d0f7aff17ddb514e33d Mon Sep 17 00:00:00 2001
From: Rohan Mukherjee <mukrohan@amazon.com>
Date: Fri, 13 Nov 2020 04:57:05 +0000
Subject: [PATCH 09/12] addressed comments in PR + black linting

---
 python/tvm/relay/op/contrib/tensorrt.py |  8 ++-
 tests/python/contrib/test_tensorrt.py   | 78 ++++++++-----------------
 2 files changed, 31 insertions(+), 55 deletions(-)

diff --git a/python/tvm/relay/op/contrib/tensorrt.py b/python/tvm/relay/op/contrib/tensorrt.py
index f9ed0b320d37..739d49c412e8 100644
--- a/python/tvm/relay/op/contrib/tensorrt.py
+++ b/python/tvm/relay/op/contrib/tensorrt.py
@@ -208,8 +208,10 @@ def _register_external_op_helper(op_name, supported=True):
         op_name, lambda attrs, args, op_name: supported
     )
 
+
 def _register_external_dynamic_check_func(op_name):
     """Wrapper to check dynamic shapes inside any of the args in the op."""
+
     def _decorator_helper(checker):
         @tvm.ir.register_op_attr(op_name, "target.tensorrt")
         def _func_wrapper(expr):
@@ -218,9 +220,12 @@ def _func_wrapper(expr):
             if check_dynamism(args, op_name):
                 return False
             return checker(expr)
+
         return _func_wrapper
+
     return _decorator_helper
 
+
 # Ops which are always supported
 _register_external_op_helper("nn.relu")
 _register_external_op_helper("sigmoid")
@@ -301,6 +306,7 @@ def add_annotate_fn(expr):  # pylint: disable=unused-variable
         return False
     return True
 
+
 @_register_external_dynamic_check_func("nn.batch_norm")
 def batch_norm_annotate_fn(expr):  # pylint: disable=unused-variable
     """Check if nn.batch_norm is supported by TensorRT."""
@@ -785,8 +791,6 @@ def conv3d_transpose_annotate_fn(expr):  # pylint: disable=unused-variable
     return True
 
 
-
-
 def is_valid_subgraph(params, body):
     """Final check on whether the subgraph is valid and should be offloaded to TensorRT."""
     # Remove invalid subgraphs for implicit batch mode.
diff --git a/tests/python/contrib/test_tensorrt.py b/tests/python/contrib/test_tensorrt.py
index 96ffd4d06758..993ec111c1a0 100644
--- a/tests/python/contrib/test_tensorrt.py
+++ b/tests/python/contrib/test_tensorrt.py
@@ -23,7 +23,7 @@
 import tvm.relay.testing
 from tvm import relay
 from tvm.relay.op.contrib import tensorrt
-from tvm.contrib import graph_runtime
+from tvm.contrib import graph_runtime, utils
 from tvm.runtime.vm import VirtualMachine
 from tvm.relay import Any, GlobalVar, transform
 from mxnet.gluon.model_zoo.vision import get_model
@@ -233,14 +233,17 @@ def test_tensorrt_not_compatible():
                 results = exec.evaluate()(x_data)
 
 
-def test_tensorrt_serialize_graph_runtime(data_shape=(1, 3, 224, 224), data_type="float32"):
+def test_tensorrt_serialize_graph_runtime():
     if skip_codegen_test():
         return
 
+    data_shape = (1, 3, 224, 224)
+    data_type = "float32"
     i_data = np.random.uniform(0, 1, data_shape).astype(data_type)
     block = get_model("resnet18_v1", pretrained=True)
     mod, params = relay.frontend.from_mxnet(block, shape={"data": data_shape}, dtype=data_type)
     mod, config = tensorrt.partition_for_tensorrt(mod)
+    tmpdir = utils.tempdir()
 
     def compile_graph(mod, params):
         with tvm.transform.PassContext(opt_level=3, config={"relay.ext.tensorrt.options": config}):
@@ -257,19 +260,19 @@ def run_graph(graph, lib, params):
 
     def save_graph(graph, lib, params):
         # Serialize
-        with open("compiled.json", "w") as f_graph_json:
+        with open(tmpdir.relpath("compiled.json"), "w") as f_graph_json:
             f_graph_json.write(graph)
-        with open("compiled.params", "wb") as f_params:
+        with open(tmpdir.relpath("compiled.params"), "wb") as f_params:
             f_params.write(params)
-        lib.export_library("compiled.so")
+        lib.export_library(tmpdir.relpath("compiled.so"))
 
     def load_graph():
         # Deserialize
-        with open("compiled.json", "r") as f_graph_json:
+        with open(tmpdir.relpath("compiled.json"), "r") as f_graph_json:
             graph = f_graph_json.read()
-        with open("compiled.params", "rb") as f_params:
+        with open(tmpdir.relpath("compiled.params"), "rb") as f_params:
             params = bytearray(f_params.read())
-        lib = tvm.runtime.load_module("compiled.so")
+        lib = tvm.runtime.load_module(tmpdir.relpath("compiled.so"))
         return graph, lib, params
 
     # Test serialization with graph runtime
@@ -284,14 +287,17 @@ def load_graph():
         assert_result_dict_holds(result_dict)
 
 
-def test_tensorrt_serialize_vm(data_shape=(1, 3, 224, 224), data_type="float32"):
+def test_tensorrt_serialize_vm():
     if skip_codegen_test():
         return
 
+    data_shape = (1, 3, 224, 224)
+    data_type = "float32"
     i_data = np.random.uniform(0, 1, data_shape).astype(data_type)
     block = get_model("resnet18_v1", pretrained=True)
     mod, params = relay.frontend.from_mxnet(block, shape={"data": data_shape}, dtype=data_type)
     mod, config = tensorrt.partition_for_tensorrt(mod)
+    tmpdir = utils.tempdir()
 
     def compile_vm(mod, params):
         with tvm.transform.PassContext(opt_level=3, config={"relay.ext.tensorrt.options": config}):
@@ -307,13 +313,13 @@ def run_vm(code, lib):
 
     def save_vm(code, lib):
         # save and load the code and lib file.
-        lib.export_library("path_lib.so")
-        with open("path_code.ro", "wb") as fo:
+        lib.export_library(tmpdir.relpath("path_lib.so"))
+        with open(tmpdir.relpath("path_code.ro"), "wb") as fo:
             fo.write(code)
 
     def load_vm():
-        lib = tvm.runtime.load_module("path_lib.so")
-        code = bytearray(open("path_code.ro", "rb").read())
+        lib = tvm.runtime.load_module(tmpdir.relpath("path_lib.so"))
+        code = bytearray(open(tmpdir.relpath("path_code.ro"), "rb").read())
         return lib, code
 
     # Test serialization with VM
@@ -972,17 +978,20 @@ def test_tensorrt_integration():
     test_densenet121()
 
 
-def test_dynamic_offload(data_shape=(1, 32, 8, 8), k_shape=(1, 32, 3, 3)):
+def test_dynamic_offload():
     """
     This test checks for proper dynamic offloading of relay graphs. An addition between
     the outputs of two conv2d's is performed, one of them having all static args whereas
     the other has a arg with dynamic shape. It is expected for the TRT partitioner to
     offload the conv2d with dynamic arg to TVM while running the other in TRT.
     """
-    
+
     if skip_codegen_test():
         return
 
+    data_shape = (1, 32, 8, 8)
+    k_shape = (1, 32, 3, 3)
+
     x = relay.var("x", shape=(data_shape[0], data_shape[1], Any(), Any()), dtype="float32")
     y = relay.var("y", shape=(data_shape), dtype="float32")
     kernel = relay.var("kernel", shape=(k_shape), dtype="float32")
@@ -1029,44 +1038,7 @@ def set_func_attr(func, compile_name, symbol_name):
     # Get the expected relay graph and compare
     mod_exp = get_expected()
     tvm.ir.assert_structural_equal(mod_trt, mod_exp, map_free_vars=True)
-    return
 
 
 if __name__ == "__main__":
-    test_tensorrt_not_compatible()
-    test_tensorrt_simple()
-    test_tensorrt_simple_cpu_io()
-    test_tensorrt_serialize_graph_runtime()
-    test_tensorrt_serialize_vm()
-    test_tensorrt_integration()
-    test_dynamic_offload()
-    test_conv2d()
-    test_conv2d_nhwc()
-    test_conv2d_weights_const()
-    test_conv2d_weights_transposed()
-    test_dense()
-    test_bias_add()
-    test_pool2d()
-    test_global_pool2d()
-    test_batch_flatten()
-    test_expand_dims()
-    test_squeeze()
-    test_concatenate()
-    test_conv2d_transpose()
-    test_reshape()
-    test_transpose()
-    test_float_const()
-    test_pad()
-    test_softmax()
-    test_batch_norm()
-    test_unary()
-    test_clip()
-    test_leaky_relu()
-    test_binary()
-    test_reduce()
-    test_strided_slice()
-    test_adaptive_pool2d()
-    test_multiple_outputs()
-    test_conv3d()
-    test_pool3d()
-    test_conv3d_transpose()
+    pytest.main([__file__])

From b6d8dc88765736d608542299c2136988513b0a8d Mon Sep 17 00:00:00 2001
From: Rohan Mukherjee <mukrohan@amazon.com>
Date: Fri, 13 Nov 2020 07:54:29 +0000
Subject: [PATCH 10/12] resolved import error in test_tensorrt

---
 tests/python/contrib/test_tensorrt.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/python/contrib/test_tensorrt.py b/tests/python/contrib/test_tensorrt.py
index 993ec111c1a0..a6d862de92b3 100644
--- a/tests/python/contrib/test_tensorrt.py
+++ b/tests/python/contrib/test_tensorrt.py
@@ -18,6 +18,8 @@
 import time
 import pytest
 import itertools
+import mxnet as mx
+from mxnet.gluon.model_zoo.vision import get_model
 
 import tvm
 import tvm.relay.testing
@@ -26,7 +28,6 @@
 from tvm.contrib import graph_runtime, utils
 from tvm.runtime.vm import VirtualMachine
 from tvm.relay import Any, GlobalVar, transform
-from mxnet.gluon.model_zoo.vision import get_model
 
 
 def skip_codegen_test():

From 0b1ac24ae46dcf61861a0497d8b97ef4d3ae058c Mon Sep 17 00:00:00 2001
From: Rohan Mukherjee <mukrohan@amazon.com>
Date: Fri, 13 Nov 2020 14:07:49 +0000
Subject: [PATCH 11/12] import mxnet location changed to pass CI

---
 tests/python/contrib/test_tensorrt.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/tests/python/contrib/test_tensorrt.py b/tests/python/contrib/test_tensorrt.py
index a6d862de92b3..84c6f3a66535 100644
--- a/tests/python/contrib/test_tensorrt.py
+++ b/tests/python/contrib/test_tensorrt.py
@@ -18,8 +18,6 @@
 import time
 import pytest
 import itertools
-import mxnet as mx
-from mxnet.gluon.model_zoo.vision import get_model
 
 import tvm
 import tvm.relay.testing
@@ -114,6 +112,9 @@ def run_and_verify_model(model):
     if skip_codegen_test():
         return
 
+    import mxnet as mx
+    from mxnet.gluon.model_zoo.vision import get_model
+
     def check_trt_used(mod):
         num_trt_subgraphs = sum(
             [1 if gv.name_hint == "tensorrt_0" else 0 for gv in mod.get_global_vars()]
@@ -237,6 +238,8 @@ def test_tensorrt_not_compatible():
 def test_tensorrt_serialize_graph_runtime():
     if skip_codegen_test():
         return
+    import mxnet as mx
+    from mxnet.gluon.model_zoo.vision import get_model
 
     data_shape = (1, 3, 224, 224)
     data_type = "float32"
@@ -291,6 +294,8 @@ def load_graph():
 def test_tensorrt_serialize_vm():
     if skip_codegen_test():
         return
+    import mxnet as mx
+    from mxnet.gluon.model_zoo.vision import get_model
 
     data_shape = (1, 3, 224, 224)
     data_type = "float32"

From 3b8182aac5c5a856a679fd4edbd48bd6bdbb1216 Mon Sep 17 00:00:00 2001
From: Rohan Mukherjee <mukrohan@amazon.com>
Date: Fri, 13 Nov 2020 19:06:17 +0000
Subject: [PATCH 12/12] test_integration removed as components were run by
 pytest anyway

---
 tests/python/contrib/test_tensorrt.py | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/tests/python/contrib/test_tensorrt.py b/tests/python/contrib/test_tensorrt.py
index 84c6f3a66535..8b61323a71ad 100644
--- a/tests/python/contrib/test_tensorrt.py
+++ b/tests/python/contrib/test_tensorrt.py
@@ -972,18 +972,6 @@ def test_densenet121():
     run_and_verify_model("densenet121")
 
 
-def test_tensorrt_integration():
-    # Integration tests
-    test_alexnet()
-    test_resnet18_v1()
-    test_resnet18_v2()
-    test_squeezenet()
-    test_mobilenet()
-    test_mobilenet_v2()
-    test_vgg11()
-    test_densenet121()
-
-
 def test_dynamic_offload():
     """
     This test checks for proper dynamic offloading of relay graphs. An addition between