diff --git a/python/tvm/relay/op/contrib/tensorrt.py b/python/tvm/relay/op/contrib/tensorrt.py
index 24c468fee0fe..739d49c412e8 100644
--- a/python/tvm/relay/op/contrib/tensorrt.py
+++ b/python/tvm/relay/op/contrib/tensorrt.py
@@ -22,7 +22,7 @@
 from tvm import relay
 from tvm.relay import transform
 from tvm.relay.build_module import bind_params_by_name
-from tvm.relay.expr import Call, Constant, Tuple, GlobalVar
+from tvm.relay.expr import Call, Constant, Tuple, GlobalVar, Var, TupleGetItem
 from tvm.relay.expr_functor import ExprMutator
 
 logger = logging.getLogger("TensorRT")
@@ -155,10 +155,46 @@ def partition_for_tensorrt(
     return mod, config
 
 
+def check_dynamism(args, op_name):
+    """
+    Check for dynamism inside any of the args in the op.
+
+    Parameters
+    ----------
+    args : tvm.ir.container.Array
+        Arguments of the op. Each of the argument shape is checked for presence of dynamic
+        components.
+    op_name: str
+        Name of the op for debugging purposes only.
+    Returns
+    ----------
+    ret : bool
+        True if dynamism is present, False otherwise
+    """
+    for arg in args:
+        if isinstance(arg, (Call, Var, Constant, TupleGetItem)):
+            for dim_shape in arg.checked_type.shape:
+                if isinstance(dim_shape, tvm.tir.expr.Any):
+                    return True
+        elif isinstance(arg, Tuple):
+            return check_dynamism(arg.fields, op_name)
+        else:
+            logger.info(
+                "Arg not supported in TensorRT for %s with type %s",
+                op_name,
+                type(arg),
+            )
+            return True
+    return False
+
+
 def _register_external_op_helper_with_checker(op_name, checker):
     @tvm.ir.register_op_attr(op_name, "target.tensorrt")
     def _func_wrapper(expr):
         attrs, args = expr.attrs, expr.args
+        # ops with dynamic shapes are offloaded to VM
+        if check_dynamism(args, op_name):
+            return False
         if any([x.checked_type.dtype != "float32" for x in args]):
             logger.info("Only float32 inputs are supported for TensorRT.")
             return False
@@ -173,6 +209,23 @@ def _register_external_op_helper(op_name, supported=True):
     )
 
 
+def _register_external_dynamic_check_func(op_name):
+    """Wrapper to check dynamic shapes inside any of the args in the op."""
+
+    def _decorator_helper(checker):
+        @tvm.ir.register_op_attr(op_name, "target.tensorrt")
+        def _func_wrapper(expr):
+            args = expr.args
+            # ops with dynamic shapes are offloaded to VM
+            if check_dynamism(args, op_name):
+                return False
+            return checker(expr)
+
+        return _func_wrapper
+
+    return _decorator_helper
+
+
 # Ops which are always supported
 _register_external_op_helper("nn.relu")
 _register_external_op_helper("sigmoid")
@@ -192,7 +245,49 @@ def _register_external_op_helper(op_name, supported=True):
 _register_external_op_helper("clip")
 
 
-@tvm.ir.register_op_attr("add", "target.tensorrt")
+def reduce_annotate_fn(attrs, args, op_name):
+    """Helper for reduce operations."""
+    if not attrs.axis or len(attrs.axis) == 0:
+        logger.info("%s: cannot reduce to scalar.", op_name)
+        return False
+    if attrs.exclude:
+        logger.info("%s: exclude not supported.", op_name)
+        return False
+    if get_tensorrt_use_implicit_batch_mode() and any([x == 0 for x in map(int, attrs.axis)]):
+        logger.info("%s: can't modify batch dimension.", op_name)
+        return False
+    return True
+
+
+_register_external_op_helper_with_checker("sum", reduce_annotate_fn)
+_register_external_op_helper_with_checker("prod", reduce_annotate_fn)
+_register_external_op_helper_with_checker("max", reduce_annotate_fn)
+_register_external_op_helper_with_checker("min", reduce_annotate_fn)
+_register_external_op_helper_with_checker("mean", reduce_annotate_fn)
+
+
+def trt_version_annotate_fn(version):
+    """Helper for ops which require a minimum TRT version"""
+
+    def _func_wrapper(attrs, args, op_name):
+        if get_tensorrt_version() < version:
+            logger.info(
+                "%s: requires TensorRT version %s or higher.", op_name, ".".join(map(str, version))
+            )
+            return False
+        return True
+
+    return _func_wrapper
+
+
+_register_external_op_helper_with_checker("nn.leaky_relu", trt_version_annotate_fn((5, 1, 5)))
+_register_external_op_helper_with_checker("sin", trt_version_annotate_fn((5, 1, 5)))
+_register_external_op_helper_with_checker("cos", trt_version_annotate_fn((5, 1, 5)))
+_register_external_op_helper_with_checker("atan", trt_version_annotate_fn((5, 1, 5)))
+_register_external_op_helper_with_checker("ceil", trt_version_annotate_fn((5, 1, 5)))
+
+
+@_register_external_dynamic_check_func("add")
 def add_annotate_fn(expr):  # pylint: disable=unused-variable
     """Check if add is supported by TensorRT."""
 
@@ -212,7 +307,7 @@ def add_annotate_fn(expr):  # pylint: disable=unused-variable
     return True
 
 
-@tvm.ir.register_op_attr("nn.batch_norm", "target.tensorrt")
+@_register_external_dynamic_check_func("nn.batch_norm")
 def batch_norm_annotate_fn(expr):  # pylint: disable=unused-variable
     """Check if nn.batch_norm is supported by TensorRT."""
 
@@ -226,7 +321,7 @@ def batch_norm_annotate_fn(expr):  # pylint: disable=unused-variable
     return True
 
 
-@tvm.ir.register_op_attr("nn.softmax", "target.tensorrt")
+@_register_external_dynamic_check_func("nn.softmax")
 def softmax_annotate_fn(expr):  # pylint: disable=unused-variable
     """Check if nn.softmax is supported by TensorRT."""
 
@@ -240,7 +335,7 @@ def softmax_annotate_fn(expr):  # pylint: disable=unused-variable
     return True
 
 
-@tvm.ir.register_op_attr("nn.conv2d", "target.tensorrt")
+@_register_external_dynamic_check_func("nn.conv2d")
 def conv2d_annotate_fn(expr):  # pylint: disable=unused-variable
     """Check if nn.conv2d is supported by TensorRT."""
 
@@ -260,7 +355,7 @@ def conv2d_annotate_fn(expr):  # pylint: disable=unused-variable
     return True
 
 
-@tvm.ir.register_op_attr("nn.dense", "target.tensorrt")
+@_register_external_dynamic_check_func("nn.dense")
 def dense_annotate_fn(expr):  # pylint: disable=unused-variable
     """Check if dense is supported by TensorRT."""
 
@@ -279,7 +374,7 @@ def dense_annotate_fn(expr):  # pylint: disable=unused-variable
     return True
 
 
-@tvm.ir.register_op_attr("nn.bias_add", "target.tensorrt")
+@_register_external_dynamic_check_func("nn.bias_add")
 def bias_add_annotate_fn(expr):  # pylint: disable=unused-variable
     """Check if nn.bias_add is supported by TensorRT."""
 
@@ -294,7 +389,7 @@ def bias_add_annotate_fn(expr):  # pylint: disable=unused-variable
     return True
 
 
-@tvm.ir.register_op_attr("nn.max_pool2d", "target.tensorrt")
+@_register_external_dynamic_check_func("nn.max_pool2d")
 def max_pool_2d_annotate_fn(expr):  # pylint: disable=unused-variable
     """Check if nn.max_pool2d is supported by TensorRT."""
 
@@ -311,7 +406,7 @@ def max_pool_2d_annotate_fn(expr):  # pylint: disable=unused-variable
     return True
 
 
-@tvm.ir.register_op_attr("nn.avg_pool2d", "target.tensorrt")
+@_register_external_dynamic_check_func("nn.avg_pool2d")
 def avg_pool_2d_annotate_fn(expr):  # pylint: disable=unused-variable
     """Check if nn.avg_pool2d is supported by TensorRT."""
 
@@ -341,7 +436,7 @@ def avg_pool_2d_annotate_fn(expr):  # pylint: disable=unused-variable
     return True
 
 
-@tvm.ir.register_op_attr("nn.global_max_pool2d", "target.tensorrt")
+@_register_external_dynamic_check_func("nn.global_max_pool2d")
 def global_max_pool_2d_annotate_fn(expr):  # pylint: disable=unused-variable
     """Check if nn.global_max_pool2d is supported by TensorRT."""
 
@@ -355,7 +450,7 @@ def global_max_pool_2d_annotate_fn(expr):  # pylint: disable=unused-variable
     return True
 
 
-@tvm.ir.register_op_attr("nn.global_avg_pool2d", "target.tensorrt")
+@_register_external_dynamic_check_func("nn.global_avg_pool2d")
 def global_avg_pool_2d_annotate_fn(expr):  # pylint: disable=unused-variable
     """Check if nn.global_avg_pool2d is supported by TensorRT."""
 
@@ -369,7 +464,7 @@ def global_avg_pool_2d_annotate_fn(expr):  # pylint: disable=unused-variable
     return True
 
 
-@tvm.ir.register_op_attr("expand_dims", "target.tensorrt")
+@_register_external_dynamic_check_func("expand_dims")
 def expand_dims_annotate_fn(expr):  # pylint: disable=unused-variable
     """Check if expand_dims is supported by TensorRT."""
 
@@ -383,7 +478,7 @@ def expand_dims_annotate_fn(expr):  # pylint: disable=unused-variable
     return True
 
 
-@tvm.ir.register_op_attr("squeeze", "target.tensorrt")
+@_register_external_dynamic_check_func("squeeze")
 def squeeze_annotate_fn(expr):  # pylint: disable=unused-variable
     """Check if squeeze is supported by TensorRT."""
 
@@ -400,7 +495,7 @@ def squeeze_annotate_fn(expr):  # pylint: disable=unused-variable
     return True
 
 
-@tvm.ir.register_op_attr("concatenate", "target.tensorrt")
+@_register_external_dynamic_check_func("concatenate")
 def concatenate_annotate_fn(expr):  # pylint: disable=unused-variable
     """Check if concatenate is supported by TensorRT."""
 
@@ -421,7 +516,7 @@ def concatenate_annotate_fn(expr):  # pylint: disable=unused-variable
     return True
 
 
-@tvm.ir.register_op_attr("nn.conv2d_transpose", "target.tensorrt")
+@_register_external_dynamic_check_func("nn.conv2d_transpose")
 def conv2d_transpose_annotate_fn(expr):  # pylint: disable=unused-variable
     """Check if nn.conv2d_transpose is supported by TensorRT."""
 
@@ -446,7 +541,7 @@ def conv2d_transpose_annotate_fn(expr):  # pylint: disable=unused-variable
     return True
 
 
-@tvm.ir.register_op_attr("transpose", "target.tensorrt")
+@_register_external_dynamic_check_func("transpose")
 def transpose_annotate_fn(expr):  # pylint: disable=unused-variable
     """Check if transpose is supported by TensorRT."""
 
@@ -460,7 +555,7 @@ def transpose_annotate_fn(expr):  # pylint: disable=unused-variable
     return True
 
 
-@tvm.ir.register_op_attr("layout_transform", "target.tensorrt")
+@_register_external_dynamic_check_func("layout_transform")
 def layout_transform_annotate_fn(expr):  # pylint: disable=unused-variable
     """Check if layout_transform is supported by TensorRT."""
 
@@ -481,7 +576,7 @@ def layout_transform_annotate_fn(expr):  # pylint: disable=unused-variable
     return True
 
 
-@tvm.ir.register_op_attr("reshape", "target.tensorrt")
+@_register_external_dynamic_check_func("reshape")
 def reshape_annotate_fn(expr):  # pylint: disable=unused-variable
     """Check if reshape is supported by TensorRT."""
 
@@ -514,7 +609,7 @@ def reshape_annotate_fn(expr):  # pylint: disable=unused-variable
     return True
 
 
-@tvm.ir.register_op_attr("nn.pad", "target.tensorrt")
+@_register_external_dynamic_check_func("nn.pad")
 def pad_annotate_fn(expr):  # pylint: disable=unused-variable
     """Check if nn.pad is supported by TensorRT."""
 
@@ -536,49 +631,7 @@ def pad_annotate_fn(expr):  # pylint: disable=unused-variable
     return True
 
 
-def reduce_annotate_fn(attrs, args, op_name):
-    """Helper for reduce operations."""
-    if not attrs.axis or len(attrs.axis) == 0:
-        logger.info("%s: cannot reduce to scalar.", op_name)
-        return False
-    if attrs.exclude:
-        logger.info("%s: exclude not supported.", op_name)
-        return False
-    if get_tensorrt_use_implicit_batch_mode() and any([x == 0 for x in map(int, attrs.axis)]):
-        logger.info("%s: can't modify batch dimension.", op_name)
-        return False
-    return True
-
-
-_register_external_op_helper_with_checker("sum", reduce_annotate_fn)
-_register_external_op_helper_with_checker("prod", reduce_annotate_fn)
-_register_external_op_helper_with_checker("max", reduce_annotate_fn)
-_register_external_op_helper_with_checker("min", reduce_annotate_fn)
-_register_external_op_helper_with_checker("mean", reduce_annotate_fn)
-
-
-def trt_version_annotate_fn(version):
-    """Helper for ops which require a minimum TRT version"""
-
-    def _func_wrapper(attrs, args, op_name):
-        if get_tensorrt_version() < version:
-            logger.info(
-                "%s: requires TensorRT version %s or higher.", op_name, ".".join(map(str, version))
-            )
-            return False
-        return True
-
-    return _func_wrapper
-
-
-_register_external_op_helper_with_checker("nn.leaky_relu", trt_version_annotate_fn((5, 1, 5)))
-_register_external_op_helper_with_checker("sin", trt_version_annotate_fn((5, 1, 5)))
-_register_external_op_helper_with_checker("cos", trt_version_annotate_fn((5, 1, 5)))
-_register_external_op_helper_with_checker("atan", trt_version_annotate_fn((5, 1, 5)))
-_register_external_op_helper_with_checker("ceil", trt_version_annotate_fn((5, 1, 5)))
-
-
-@tvm.ir.register_op_attr("strided_slice", "target.tensorrt")
+@_register_external_dynamic_check_func("strided_slice")
 def strided_slice_annotate_fn(expr):  # pylint: disable=unused-variable
     """Check if strided_slice is supported by TensorRT."""
 
@@ -601,11 +654,33 @@ def strided_slice_annotate_fn(expr):  # pylint: disable=unused-variable
     if any([x is not None and x <= 0 for x in attrs.strides]):
         logger.info("strided_slice: stride must be positive")
         return False
+    for i in range(0, len(args[0].checked_type.shape)):
+        begin = int(attrs.begin[i])
+        if attrs.slice_mode == "end":
+            end = (
+                int(attrs.end[i])
+                if attrs.end[i] is not None and int(attrs.end[i]) != -1
+                else args[0].checked_type.shape[i]
+            )
+            size = int(end) - int(begin)
+        elif attrs.slice_mode == "size":
+            size = (
+                int(attrs.end[i])
+                if attrs.end[i] is not None and int(attrs.end[i]) != -1
+                else args[0].checked_type.shape[i] - begin
+            )
+        else:
+            logger.warning("strided_slice: unknown slice mode encountered")
+
+        if int(size) < 1:
+            logger.info("strided_slice: size of slice must be at least 1")
+            return False
+
     return True
 
 
-@tvm.ir.register_op_attr("nn.adaptive_max_pool2d", "target.tensorrt")
-def adapative_max_pool2d_annotate_fn(expr):  # pylint: disable=unused-variable
+@_register_external_dynamic_check_func("nn.adaptive_max_pool2d")
+def adaptive_max_pool2d_annotate_fn(expr):  # pylint: disable=unused-variable
     """Check if nn.adaptive_max_pool2d is supported by TensorRT."""
 
     attrs, args = expr.attrs, expr.args
@@ -618,8 +693,8 @@ def adapative_max_pool2d_annotate_fn(expr):  # pylint: disable=unused-variable
     return True
 
 
-@tvm.ir.register_op_attr("nn.adaptive_avg_pool2d", "target.tensorrt")
-def adapative_avg_pool2d_annotate_fn(expr):  # pylint: disable=unused-variable
+@_register_external_dynamic_check_func("nn.adaptive_avg_pool2d")
+def adaptive_avg_pool2d_annotate_fn(expr):  # pylint: disable=unused-variable
     """Check if nn.adaptive_avg_pool2d is supported by TensorRT."""
 
     attrs, args = expr.attrs, expr.args
@@ -632,7 +707,7 @@ def adapative_avg_pool2d_annotate_fn(expr):  # pylint: disable=unused-variable
     return True
 
 
-@tvm.ir.register_op_attr("nn.conv3d", "target.tensorrt")
+@_register_external_dynamic_check_func("nn.conv3d")
 def conv3d_annotate_fn(expr):  # pylint: disable=unused-variable
     """Check if nn.conv3d is supported by TensorRT."""
 
@@ -654,7 +729,7 @@ def conv3d_annotate_fn(expr):  # pylint: disable=unused-variable
     return True
 
 
-@tvm.ir.register_op_attr("nn.max_pool3d", "target.tensorrt")
+@_register_external_dynamic_check_func("nn.max_pool3d")
 def max_pool_3d_annotate_fn(expr):  # pylint: disable=unused-variable
     """Check if nn.max_pool3d is supported by TensorRT."""
 
@@ -670,7 +745,7 @@ def max_pool_3d_annotate_fn(expr):  # pylint: disable=unused-variable
     return True
 
 
-@tvm.ir.register_op_attr("nn.avg_pool3d", "target.tensorrt")
+@_register_external_dynamic_check_func("nn.avg_pool3d")
 def avg_pool_3d_annotate_fn(expr):  # pylint: disable=unused-variable
     """Check if nn.avg_pool3d is supported by TensorRT."""
 
@@ -686,7 +761,7 @@ def avg_pool_3d_annotate_fn(expr):  # pylint: disable=unused-variable
     return True
 
 
-@tvm.ir.register_op_attr("nn.conv3d_transpose", "target.tensorrt")
+@_register_external_dynamic_check_func("nn.conv3d_transpose")
 def conv3d_transpose_annotate_fn(expr):  # pylint: disable=unused-variable
     """Check if nn.conv3d_transpose is supported by TensorRT."""
 
@@ -774,13 +849,10 @@ def visit_call(self, call):
                     new_body = relay.bind(func.body, var_map)
                     return new_body
                 if name != "main":
-                    # Copy the GlobalVar (subgraph function) to the new module and call.
                     args = []
                     for arg in call.args:
                         args.append(super().visit(arg))
-                    subgraph_gv = relay.GlobalVar(name)
-                    self.new_mod[subgraph_gv] = self.mod[name]
-                    return subgraph_gv(*args)
+                    return call.op(*args)
             return super().visit_call(call)
 
     subgraphs_to_remove = []
@@ -792,7 +864,7 @@ def visit_call(self, call):
         if not is_valid_subgraph(mod[name].params, mod[name].body):
             subgraphs_to_remove.append(name)
     # Create new pruned module
-    new_mod = tvm.IRModule()
+    new_mod = tvm.IRModule(mod.functions, mod.type_definitions)
     new_mod["main"] = SubgraphRemover(subgraphs_to_remove, mod, new_mod).visit(mod["main"])
     return new_mod
 
diff --git a/src/relay/backend/contrib/tensorrt/codegen.cc b/src/relay/backend/contrib/tensorrt/codegen.cc
index 26f674dcd7b5..cb648333df8d 100644
--- a/src/relay/backend/contrib/tensorrt/codegen.cc
+++ b/src/relay/backend/contrib/tensorrt/codegen.cc
@@ -140,18 +140,25 @@ class TensorRTJSONSerializer : public backend::contrib::JSONSerializer {
     std::vector<std::string> start, size, strides;
     for (size_t i = 0; i < attrs->begin.value().size(); ++i) {
       const int begin_value = process_slice_index(attrs->begin.value()[i], 0, ishape[i]);
-      const int end_value = process_slice_index(attrs->end.value()[i], ishape[i], ishape[i]);
+      ICHECK_GE(begin_value, 0);
+      start.push_back(std::to_string(begin_value));
       const int stride_value = (default_strides || i >= attrs->strides.value().size() ||
                                 !attrs->strides.value()[i].defined())
                                    ? 1
                                    : attrs->strides.value()[i].as<IntImmNode>()->value;
       ICHECK_GT(stride_value, 0);
-      const int size_value = (end_value - begin_value + stride_value - 1) / stride_value;
-      ICHECK_GE(begin_value, 0);
+      strides.push_back(std::to_string(stride_value));
+      int size_value;
+      if (attrs->slice_mode == "end") {
+        const int end_value = process_slice_index(attrs->end.value()[i], ishape[i], ishape[i]);
+        size_value = (end_value - begin_value + stride_value - 1) / stride_value;
+      } else if (attrs->slice_mode == "size") {
+        // with slice_mode = "size", attrs->end_value mean the size of the slice
+        int end_value = attrs->end.value()[i].as<IntImmNode>()->value;
+        size_value = (end_value == -1) ? ishape[i] - begin_value : end_value;
+      }
       ICHECK_GT(size_value, 0);
-      start.push_back(std::to_string(begin_value));
       size.push_back(std::to_string(size_value));
-      strides.push_back(std::to_string(stride_value));
     }
     std::vector<dmlc::any> start_attr, size_attr, strides_attr;
     start_attr.emplace_back(start);
diff --git a/src/runtime/contrib/tensorrt/tensorrt_ops.cc b/src/runtime/contrib/tensorrt/tensorrt_ops.cc
index a86f107941bc..057743c3b588 100644
--- a/src/runtime/contrib/tensorrt/tensorrt_ops.cc
+++ b/src/runtime/contrib/tensorrt/tensorrt_ops.cc
@@ -944,7 +944,7 @@ class ReduceOpConverter : public TensorRTOpConverter {
 #if TRT_VERSION_GE(5, 1, 5)
 class StridedSliceOpConverter : public TensorRTOpConverter {
  public:
-  StridedSliceOpConverter() : TensorRTOpConverter({kTensor, kWeight, kWeight, kWeight}) {}
+  StridedSliceOpConverter() : TensorRTOpConverter({kTensor}) {}
 
   void Convert(TensorRTOpConverterParams* params) const {
     auto input = params->inputs.at(0).tensor;
diff --git a/tests/python/contrib/test_tensorrt.py b/tests/python/contrib/test_tensorrt.py
index 8e8e54e8650a..8b61323a71ad 100644
--- a/tests/python/contrib/test_tensorrt.py
+++ b/tests/python/contrib/test_tensorrt.py
@@ -17,12 +17,15 @@
 import numpy as np
 import time
 import pytest
+import itertools
 
 import tvm
 import tvm.relay.testing
 from tvm import relay
 from tvm.relay.op.contrib import tensorrt
-from tvm.contrib import graph_runtime
+from tvm.contrib import graph_runtime, utils
+from tvm.runtime.vm import VirtualMachine
+from tvm.relay import Any, GlobalVar, transform
 
 
 def skip_codegen_test():
@@ -46,6 +49,23 @@ def skip_runtime_test():
     return False
 
 
+def vmobj_to_list(o):
+    if isinstance(o, tvm.nd.NDArray):
+        return [o.asnumpy()]
+    elif isinstance(o, tvm.runtime.container.ADT) or isinstance(o, list):
+        return [vmobj_to_list(f) for f in o]
+    else:
+        raise RuntimeError("Unknown object type: %s" % type(o))
+
+
+def assert_result_dict_holds(result_dict):
+    for k1, k2 in itertools.combinations(result_dict, 2):
+        res1 = vmobj_to_list(result_dict[k1])
+        res2 = vmobj_to_list(result_dict[k2])
+        for r1, r2 in zip(res1, res2):
+            tvm.testing.assert_allclose(r1, r2, rtol=1e-3, atol=1e-3)
+
+
 def run_and_verify_func(config, target="cuda"):
     """Test a Relay func by compiling, running, and comparing TVM and TRT outputs.
 
@@ -64,100 +84,76 @@ def run_and_verify_func(config, target="cuda"):
         for k, v in input_shapes.items()
         if k not in is_param
     }
-
-    # Run TRT
-    mod = tvm.IRModule()
-    mod["main"] = f
-    mod, config = tensorrt.partition_for_tensorrt(mod, params)
-    with tvm.transform.PassContext(opt_level=3, config={"relay.ext.tensorrt.options": config}):
-        graph, lib, graph_params = relay.build(mod, target, params=params)
-    if skip_runtime_test():
-        return
     ctx = tvm.context(target)
-    mod = graph_runtime.create(graph, lib, ctx=ctx)
-    mod.set_input(**graph_params)
-    mod.run(**input_dict)
-    results = [mod.get_output(i) for i in range(mod.get_num_outputs())]
-
-    # Run reference
-    mod = tvm.IRModule()
-    mod["main"] = f
-    with tvm.transform.PassContext(opt_level=3):
-        graph, lib, graph_params = relay.build(mod, target, params=params)
-    mod = graph_runtime.create(graph, lib, ctx=ctx)
-    mod.set_input(**graph_params)
-    mod.run(**input_dict)
-    ref_results = [mod.get_output(i) for i in range(mod.get_num_outputs())]
 
-    assert len(results) == len(ref_results)
-    for i in range(len(results)):
-        res = results[i].asnumpy()
-        ref_res = ref_results[i].asnumpy()
-        assert res.shape == ref_res.shape
-        tvm.testing.assert_allclose(res, ref_res, rtol=1e-3, atol=1e-3)
+    result_dict = dict()
+    for mode in ["graph", "vm"]:
+        for use_trt in [False, True]:
+            mod = tvm.IRModule()
+            mod["main"] = f
+            result_key = mode + ("_trt" if use_trt else "")
+            if use_trt:
+                mod, config = tensorrt.partition_for_tensorrt(mod, params)
+                with tvm.transform.PassContext(
+                    opt_level=3, config={"relay.ext.tensorrt.options": config}
+                ):
+                    exec = relay.create_executor(mode, mod=mod, ctx=ctx, target=target)
+            else:
+                with tvm.transform.PassContext(opt_level=3):
+                    exec = relay.create_executor(mode, mod=mod, ctx=ctx, target=target)
+            if not skip_runtime_test():
+                result_dict[result_key] = exec.evaluate()(**input_dict, **params)
+
+    if not skip_runtime_test():
+        assert_result_dict_holds(result_dict)
 
 
 def run_and_verify_model(model):
     if skip_codegen_test():
         return
 
-    def compile_and_run(i_data, input_shape, dtype, use_trt=True, num_iteration=1):
-        import mxnet as mx
-        from mxnet.gluon.model_zoo.vision import get_model
-
-        def check_trt_used(graph):
-            import json
+    import mxnet as mx
+    from mxnet.gluon.model_zoo.vision import get_model
 
-            graph = json.loads(graph)
-            num_trt_subgraphs = sum(
-                [
-                    1
-                    for n in graph["nodes"]
-                    if n.get("attrs", {}).get("func_name", "").startswith("tensorrt_")
-                ]
-            )
-            assert num_trt_subgraphs >= 1
+    def check_trt_used(mod):
+        num_trt_subgraphs = sum(
+            [1 if gv.name_hint == "tensorrt_0" else 0 for gv in mod.get_global_vars()]
+        )
+        assert num_trt_subgraphs == 1
 
-        block = get_model(model, pretrained=True)
-        mod, params = relay.frontend.from_mxnet(block, shape={"data": input_shape}, dtype=dtype)
+    def compile_and_run(mod, params, i_data, mode="vm", use_trt=True):
+        assert mode in ["graph", "vm"]
 
         if use_trt:
             mod, config = tensorrt.partition_for_tensorrt(mod, params)
+            check_trt_used(mod)
             with tvm.transform.PassContext(
                 opt_level=3, config={"relay.ext.tensorrt.options": config}
             ):
-                graph, lib, params = relay.build(mod, "cuda", params=params)
-            check_trt_used(graph)
+                exec = relay.create_executor(mode, mod=mod, ctx=tvm.gpu(0), target="cuda")
         else:
             with tvm.transform.PassContext(opt_level=3):
-                graph, lib, params = relay.build(mod, "cuda", params=params)
-
-        if skip_runtime_test():
-            return
-        mod = graph_runtime.create(graph, lib, ctx=tvm.gpu(0))
-        mod.set_input(**params)
-        # Warmup
-        for i in range(10):
-            mod.run(data=i_data)
-        # Time
-        times = []
-        for i in range(num_iteration):
-            start_time = time.time()
-            mod.run(data=i_data)
-            res = mod.get_output(0)
-            times.append(time.time() - start_time)
-        latency = 1000.0 * np.mean(times)
-        print(model, latency)
+                exec = relay.create_executor(mode, mod=mod, ctx=tvm.gpu(0), target="cuda")
+
+        res = exec.evaluate()(i_data, **params) if not skip_runtime_test() else None
         return res
 
     dtype = "float32"
     input_shape = (1, 3, 224, 224)
     i_data = np.random.uniform(-1, 1, input_shape).astype(dtype)
-    res = compile_and_run(i_data, input_shape, dtype, use_trt=True)
-    if skip_runtime_test():
-        return
-    ref_res = compile_and_run(i_data, input_shape, dtype, use_trt=False)
-    tvm.testing.assert_allclose(res.asnumpy(), ref_res.asnumpy(), rtol=1e-3, atol=1e-3)
+    block = get_model(model, pretrained=True)
+    mod, params = relay.frontend.from_mxnet(block, shape={"data": input_shape}, dtype=dtype)
+
+    result_dict = dict()
+    for mode in ["vm", "graph"]:
+        for use_trt in [True, False]:
+            result_key = mode + ("_trt" if use_trt else "")
+            result_dict[result_key] = compile_and_run(
+                mod, params, i_data, mode=mode, use_trt=use_trt
+            )
+
+    if not skip_runtime_test():
+        assert_result_dict_holds(result_dict)
 
 
 def test_tensorrt_simple():
@@ -174,19 +170,30 @@ def test_tensorrt_simple():
     out = relay.nn.relu(w)
     f = relay.Function([x, y, z], out)
 
-    mod = tvm.IRModule()
-    mod["main"] = f
-    mod, config = tensorrt.partition_for_tensorrt(mod)
-    with tvm.transform.PassContext(opt_level=3, config={"relay.ext.tensorrt.options": config}):
-        graph, lib, params = relay.build(mod, "cuda")
-    if skip_runtime_test():
-        return
-    mod = graph_runtime.create(graph, lib, ctx=tvm.gpu(0))
     x_data = np.random.uniform(-1, 1, xshape).astype(dtype)
     y_data = np.random.uniform(-1, 1, yshape).astype(dtype)
     z_data = np.random.uniform(-1, 1, zshape).astype(dtype)
-    mod.run(x=x_data, y=y_data, z=z_data)
-    results = [mod.get_output(i).asnumpy() for i in range(mod.get_num_outputs())]
+
+    result_dict = dict()
+    for mode in ["vm", "graph"]:
+        for use_trt in [True, False]:
+            mod = tvm.IRModule()
+            mod["main"] = f
+            result_key = mode + ("_trt" if use_trt else "")
+            if use_trt:
+                mod, config = tensorrt.partition_for_tensorrt(mod)
+                with tvm.transform.PassContext(
+                    opt_level=3, config={"relay.ext.tensorrt.options": config}
+                ):
+                    relay_exec = relay.create_executor(mode, mod=mod, ctx=tvm.gpu(0), target="cuda")
+            else:
+                with tvm.transform.PassContext(opt_level=3):
+                    relay_exec = relay.create_executor(mode, mod=mod, ctx=tvm.gpu(0), target="cuda")
+            if not skip_runtime_test():
+                result_dict[result_key] = relay_exec.evaluate()(x_data, y_data, z_data)
+
+    if not skip_runtime_test():
+        assert_result_dict_holds(result_dict)
 
 
 def test_tensorrt_simple_cpu_io():
@@ -211,6 +218,8 @@ def test_tensorrt_not_compatible():
         return
     dtype = "float32"
     xshape = (1, 32, 14, 14)
+    x_data = np.random.uniform(-1, 1, xshape).astype(dtype)
+
     x = relay.var("x", shape=(xshape), dtype=dtype)
     y = relay.add(x, x)
     z = relay.erf(y)
@@ -219,40 +228,116 @@ def test_tensorrt_not_compatible():
     mod = tvm.IRModule()
     mod["main"] = f
     mod, config = tensorrt.partition_for_tensorrt(mod)
-    with tvm.transform.PassContext(opt_level=3, config={"relay.ext.tensorrt.options": config}):
-        graph, lib, params = relay.build(mod, "cuda")
-    if skip_runtime_test():
-        return
-    mod = graph_runtime.create(graph, lib, ctx=tvm.gpu(0))
-    x_data = np.random.uniform(-1, 1, xshape).astype(dtype)
-    mod.run(x=x_data)
-    results = [mod.get_output(i).asnumpy() for i in range(mod.get_num_outputs())]
+    for mode in ["graph", "vm"]:
+        with tvm.transform.PassContext(opt_level=3, config={"relay.ext.tensorrt.options": config}):
+            exec = relay.create_executor(mode, mod=mod, ctx=tvm.gpu(0), target="cuda")
+            if not skip_runtime_test():
+                results = exec.evaluate()(x_data)
 
 
-def test_tensorrt_serialize():
+def test_tensorrt_serialize_graph_runtime():
     if skip_codegen_test():
         return
-    import mxnet
+    import mxnet as mx
     from mxnet.gluon.model_zoo.vision import get_model
 
+    data_shape = (1, 3, 224, 224)
+    data_type = "float32"
+    i_data = np.random.uniform(0, 1, data_shape).astype(data_type)
     block = get_model("resnet18_v1", pretrained=True)
-    mod, params = relay.frontend.from_mxnet(
-        block, shape={"data": (1, 3, 224, 224)}, dtype="float32"
-    )
-    # Compile
-    mod, config = tensorrt.partition_for_tensorrt(mod, params)
-    with tvm.transform.PassContext(opt_level=3, config={"relay.ext.tensorrt.options": config}):
-        lib = relay.build(mod, "cuda", params=params)
-    # Serialize
-    lib.export_library("compiled.so")
-    # Deserialize
-    loaded_lib = tvm.runtime.load_module("compiled.so")
-    # Run
-    if skip_runtime_test():
+    mod, params = relay.frontend.from_mxnet(block, shape={"data": data_shape}, dtype=data_type)
+    mod, config = tensorrt.partition_for_tensorrt(mod)
+    tmpdir = utils.tempdir()
+
+    def compile_graph(mod, params):
+        with tvm.transform.PassContext(opt_level=3, config={"relay.ext.tensorrt.options": config}):
+            graph, lib, params = relay.build(mod, params=params, target="cuda")
+            params = relay.save_param_dict(params)
+        return graph, lib, params
+
+    def run_graph(graph, lib, params):
+        mod_ = graph_runtime.create(graph, lib, ctx=tvm.gpu(0))
+        mod_.load_params(params)
+        mod_.run(data=i_data)
+        res = mod_.get_output(0)
+        return res
+
+    def save_graph(graph, lib, params):
+        # Serialize
+        with open(tmpdir.relpath("compiled.json"), "w") as f_graph_json:
+            f_graph_json.write(graph)
+        with open(tmpdir.relpath("compiled.params"), "wb") as f_params:
+            f_params.write(params)
+        lib.export_library(tmpdir.relpath("compiled.so"))
+
+    def load_graph():
+        # Deserialize
+        with open(tmpdir.relpath("compiled.json"), "r") as f_graph_json:
+            graph = f_graph_json.read()
+        with open(tmpdir.relpath("compiled.params"), "rb") as f_params:
+            params = bytearray(f_params.read())
+        lib = tvm.runtime.load_module(tmpdir.relpath("compiled.so"))
+        return graph, lib, params
+
+    # Test serialization with graph runtime
+    graph, lib, graph_params = compile_graph(mod, params)
+    save_graph(graph, lib, graph_params)
+    loaded_graph, loaded_lib, loaded_params = load_graph()
+
+    if not skip_runtime_test():
+        result_dict = dict()
+        result_dict["graph"] = run_graph(graph, lib, graph_params)
+        result_dict["graph_ref"] = run_graph(loaded_graph, loaded_lib, loaded_params)
+        assert_result_dict_holds(result_dict)
+
+
+def test_tensorrt_serialize_vm():
+    if skip_codegen_test():
         return
-    gen_module = tvm.contrib.graph_runtime.GraphModule(loaded_lib["default"](tvm.gpu(0)))
-    i_data = np.random.uniform(0, 1, (1, 3, 224, 224)).astype("float32")
-    gen_module.run(data=i_data)
+    import mxnet as mx
+    from mxnet.gluon.model_zoo.vision import get_model
+
+    data_shape = (1, 3, 224, 224)
+    data_type = "float32"
+    i_data = np.random.uniform(0, 1, data_shape).astype(data_type)
+    block = get_model("resnet18_v1", pretrained=True)
+    mod, params = relay.frontend.from_mxnet(block, shape={"data": data_shape}, dtype=data_type)
+    mod, config = tensorrt.partition_for_tensorrt(mod)
+    tmpdir = utils.tempdir()
+
+    def compile_vm(mod, params):
+        with tvm.transform.PassContext(opt_level=3, config={"relay.ext.tensorrt.options": config}):
+            vm_exec = relay.vm.compile(mod, target="cuda", params=params)
+            code, lib = vm_exec.save()
+        return code, lib
+
+    def run_vm(code, lib):
+        vm_exec = tvm.runtime.vm.Executable.load_exec(code, lib)
+        vm = VirtualMachine(vm_exec, tvm.gpu(0))
+        result = vm.invoke("main", data=i_data)
+        return result
+
+    def save_vm(code, lib):
+        # save and load the code and lib file.
+        lib.export_library(tmpdir.relpath("path_lib.so"))
+        with open(tmpdir.relpath("path_code.ro"), "wb") as fo:
+            fo.write(code)
+
+    def load_vm():
+        lib = tvm.runtime.load_module(tmpdir.relpath("path_lib.so"))
+        code = bytearray(open(tmpdir.relpath("path_code.ro"), "rb").read())
+        return lib, code
+
+    # Test serialization with VM
+    code_vm, lib_vm = compile_vm(mod, params)
+    save_vm(code_vm, lib_vm)
+    loaded_lib_vm, loaded_code_vm = load_vm()
+
+    if not skip_runtime_test():
+        result_dict = dict()
+        result_dict["vm"] = run_vm(code_vm, lib_vm)
+        result_dict["vm_ref"] = run_vm(loaded_code_vm, loaded_lib_vm)
+        assert_result_dict_holds(result_dict)
 
 
 def test_conv2d():
@@ -701,27 +786,40 @@ def get_graph(op, x_shape=(1, 2, 3, 4), axis=(2, 3), keepdims=False):
 
 
 def test_strided_slice():
-    def get_graph(x_shape, begin, end, strides=None):
+    def get_graph(x_shape, begin, end, strides=None, slice_mode="size"):
         x = relay.var("x", shape=(x_shape), dtype="float32")
         if strides:
             out = relay.strided_slice(
                 x,
-                relay.expr.const(begin, dtype="int32"),
-                relay.expr.const(end, dtype="int32"),
-                relay.expr.const(strides, dtype="int32"),
+                begin,
+                end,
+                strides,
+                slice_mode=slice_mode,
             )
         else:
             out = relay.strided_slice(
                 x,
-                relay.expr.const(begin, dtype="int32"),
-                relay.expr.const(end, dtype="int32"),
+                begin,
+                end,
+                slice_mode=slice_mode,
             )
         f = relay.Function([x], out)
         return f, {"x": x_shape}, []
 
-    run_and_verify_func(get_graph((1, 3, 6, 7), [0, 0, 0, 0], [1, 1, 6, 7]))
-    run_and_verify_func(get_graph((1, 3, 6, 7), [0, 1, 0, 0], [1, 2, 6, 6]))
-    run_and_verify_func(get_graph((1, 10), [0, 0], [1, 10], [1, 2]))
+    for slice_mode in ["size", "end"]:
+        run_and_verify_func(
+            get_graph((1, 3, 6, 7), (0, 0, 0, 0), (1, 1, 6, 7), slice_mode=slice_mode)
+        )
+        run_and_verify_func(
+            get_graph((1, 3, 6, 7), [0, 1, 0, 0], [1, 2, 6, 6], slice_mode=slice_mode)
+        )
+        run_and_verify_func(
+            get_graph((2, 3, 6, 7), [0, 0, 0, 0], [-1, -1, -1, -1], slice_mode=slice_mode)
+        )
+        run_and_verify_func(
+            get_graph((2, 3, 6, 7), [0, 1, 0, 0], [-1, -1, -1, -1], slice_mode=slice_mode)
+        )
+        run_and_verify_func(get_graph((1, 6), [0, 1], [1, 3], slice_mode=slice_mode))
 
 
 def test_adaptive_pool2d():
@@ -874,50 +972,67 @@ def test_densenet121():
     run_and_verify_model("densenet121")
 
 
+def test_dynamic_offload():
+    """
+    This test checks for proper dynamic offloading of relay graphs. An addition between
+    the outputs of two conv2d's is performed, one of them having all static args whereas
+    the other has a arg with dynamic shape. It is expected for the TRT partitioner to
+    offload the conv2d with dynamic arg to TVM while running the other in TRT.
+    """
+
+    if skip_codegen_test():
+        return
+
+    data_shape = (1, 32, 8, 8)
+    k_shape = (1, 32, 3, 3)
+
+    x = relay.var("x", shape=(data_shape[0], data_shape[1], Any(), Any()), dtype="float32")
+    y = relay.var("y", shape=(data_shape), dtype="float32")
+    kernel = relay.var("kernel", shape=(k_shape), dtype="float32")
+
+    def get_expected():
+        def set_func_attr(func, compile_name, symbol_name):
+            func = func.with_attr("Primitive", tvm.tir.IntImm("int32", 1))
+            func = func.with_attr("Inline", tvm.tir.IntImm("int32", 1))
+            func = func.with_attr("Compiler", compile_name)
+            func = func.with_attr("global_symbol", symbol_name)
+            return func
+
+        # Create a nested TRT function that matches the expected output
+        mod = tvm.IRModule()
+        var1 = relay.var("tensorrt_0_i0", shape=(data_shape), dtype="float32")
+        kernel_trt = relay.var("tensorrt_0_i1", shape=(k_shape), dtype="float32")
+        out1 = relay.nn.conv2d(var1, kernel_trt, channels=k_shape[0], kernel_size=k_shape[2:4])
+        f1 = GlobalVar("tensorrt_0")
+        func = relay.Function([var1, kernel_trt], out1)
+        func = set_func_attr(func, "tensorrt", "tensorrt_0")
+        mod[f1] = func
+        mod = relay.transform.InferType()(mod)
+
+        # Create the main function
+        out1 = relay.nn.conv2d(x, kernel, channels=k_shape[0], kernel_size=k_shape[2:4])
+        out = relay.add(out1, f1(y, kernel))
+        f = relay.Function([x, y, kernel], out)
+        mod["main"] = f
+        mod = relay.transform.InferType()(mod)
+        return mod
+
+    # Create relay function that will be offloaded to TRT
+    out1 = relay.nn.conv2d(x, kernel, channels=k_shape[0], kernel_size=k_shape[2:4])
+    out2 = relay.nn.conv2d(y, kernel, channels=k_shape[0], kernel_size=k_shape[2:4])
+    out = relay.add(out1, out2)
+    f = relay.Function([x, y, kernel], out)
+
+    # Pass the function to TRT compilation
+    mod = tvm.IRModule()
+    mod["main"] = f
+    mod = relay.transform.InferType()(mod)
+    mod_trt, config = tensorrt.partition_for_tensorrt(mod, params={})
+
+    # Get the expected relay graph and compare
+    mod_exp = get_expected()
+    tvm.ir.assert_structural_equal(mod_trt, mod_exp, map_free_vars=True)
+
+
 if __name__ == "__main__":
-    test_tensorrt_not_compatible()
-    test_tensorrt_simple()
-    test_tensorrt_simple_cpu_io()
-    test_tensorrt_serialize()
-
-    # Op tests
-    test_conv2d()
-    test_conv2d_nhwc()
-    test_conv2d_weights_const()
-    test_conv2d_weights_transposed()
-    test_dense()
-    test_bias_add()
-    test_pool2d()
-    test_global_pool2d()
-    test_batch_flatten()
-    test_expand_dims()
-    test_squeeze()
-    test_concatenate()
-    test_conv2d_transpose()
-    test_reshape()
-    test_transpose()
-    test_float_const()
-    test_pad()
-    test_softmax()
-    test_batch_norm()
-    test_unary()
-    test_clip()
-    test_leaky_relu()
-    test_binary()
-    test_reduce()
-    test_strided_slice()
-    test_adaptive_pool2d()
-    test_multiple_outputs()
-    test_conv3d()
-    test_pool3d()
-    test_conv3d_transpose()
-
-    # Integration tests
-    test_alexnet()
-    test_resnet18_v1()
-    test_resnet18_v2()
-    test_squeezenet()
-    test_mobilenet()
-    test_mobilenet_v2()
-    test_vgg11()
-    test_densenet121()
+    pytest.main([__file__])