diff --git a/docsrc/py_api/trtorch.rst b/docsrc/py_api/trtorch.rst index 233cd5083f..058c1afa72 100644 --- a/docsrc/py_api/trtorch.rst +++ b/docsrc/py_api/trtorch.rst @@ -1,5 +1,8 @@ .. _trtorch_py: +.. automodule trtorch + :undoc-members: + trtorch =============== diff --git a/py/trtorch/_compile_spec.py b/py/trtorch/_compile_spec.py index 67b71fc5f4..d21ec5bdd1 100644 --- a/py/trtorch/_compile_spec.py +++ b/py/trtorch/_compile_spec.py @@ -64,7 +64,7 @@ def _parse_op_precision(precision: Any) -> _types.dtype: raise TypeError("Provided an unsupported dtype as operating precision (support: int8, half, float), got: " + str(precision)) - elif isinstance(precision, _types.DataTypes): + elif isinstance(precision, _types.dtype): return precision else: @@ -170,6 +170,8 @@ def _parse_compile_spec(compile_spec: Dict[str, Any]) -> trtorch._C.CompileSpec: inputs = [trtorch.Input._from_tensor(i) if isinstance(i, torch.Tensor) else i for i in compile_spec["inputs"]] info.inputs = [i._to_internal() for i in inputs] + assert (len(info.inputs) > 0), "Require at least one input definition to compile model" + if "op_precision" in compile_spec and "enabled_precisions" in compile_spec: raise KeyError( "Found both key \"op_precision\", and \"enabled_precisions\" in compile spec, please port forward to using only \"enabled_precisions\"" diff --git a/py/trtorch/_compiler.py b/py/trtorch/_compiler.py index 6d89c27250..78aad5912e 100644 --- a/py/trtorch/_compiler.py +++ b/py/trtorch/_compiler.py @@ -3,13 +3,29 @@ from torch import nn import trtorch._C +from trtorch._types import EngineCapability from trtorch._compile_spec import _parse_compile_spec from trtorch._version import __version__ from trtorch.Device import Device from types import FunctionType - -def compile(module: torch.jit.ScriptModule, compile_spec: Any) -> torch.jit.ScriptModule: +def compile(module: torch.jit.ScriptModule, + inputs=[], + device=Device._current_device(), + disable_tf32=False, + sparse_weights=False, + enabled_precisions=set(), + refit=False, + debug=False, + strict_types=False, + capability=EngineCapability.default, + num_min_timing_iters=2, + num_avg_timing_iters=1, + workspace_size=0, + max_batch_size=0, + calibrator=None, + truncate_long_and_double=False, + torch_fallback={"enabled": False}) -> torch.jit.ScriptModule: """Compile a TorchScript module for NVIDIA GPUs using TensorRT Takes a existing TorchScript module and a set of settings to configure the compiler @@ -17,58 +33,57 @@ def compile(module: torch.jit.ScriptModule, compile_spec: Any) -> torch.jit.Scri Converts specifically the forward method of a TorchScript Module - Args: + Arguments: module (torch.jit.ScriptModule): Source module, a result of tracing or scripting a PyTorch ``torch.nn.Module`` - compile_spec (dict): Compilation settings including operating precision, target device, etc. - One key is required which is ``inputs``, describing the input sizes or ranges for inputs - to the graph as well as expect types and formats for those inputs. All other keys are optional - - .. code-block:: py - - compile_spec = { - "inputs": [ - trtorch.Input((1, 3, 224, 224)), # Static NCHW input shape for input #1 - trtorch.Input( - min_shape=(1, 224, 224, 3), - opt_shape=(1, 512, 512, 3), - max_shape=(1, 1024, 1024, 3), - dtype=torch.int32 - format=torch.channel_last - ) # Dynamic input shape for input #2 - ], - "device": { - "device_type": torch.device("cuda"), # Type of device to run engine on (for DLA use trtorch.DeviceType.DLA) - "gpu_id": 0, # Target gpu id to run engine (Use Xavier as gpu id for DLA) - "dla_core": 0, # (DLA only) Target dla core id to run engine - "allow_gpu_fallback": false, # (DLA only) Allow layers unsupported on DLA to run on GPU - }, - "disable_tf32": False, # Force FP32 layers to use traditional as FP32 format vs the default behavior of rounding the inputs to 10-bit mantissas before multiplying, but accumulates the sum using 23-bit mantissas - "sparse_weights": Enable sparsity for convolution and fully connected layers. - "enabled_precisions": {torch.float, torch.half}, # Enabling FP16 kernels - "refit": false, # enable refit - "debug": false, # enable debuggable engine - "strict_types": false, # kernels should strictly run in operating precision - "capability": trtorch.EngineCapability.DEFAULT, # Restrict kernel selection to safe gpu kernels or safe dla kernels - "num_min_timing_iters": 2, # Number of minimization timing iterations used to select kernels - "num_avg_timing_iters": 1, # Number of averaging timing iterations used to select kernels - "workspace_size": 0, # Maximum size of workspace given to TensorRT - "max_batch_size": 0, # Maximum batch size (must be >= 1 to be set, 0 means not set) - "torch_fallback": { - "enabled": True, # Turn on or turn off falling back to PyTorch if operations are not supported in TensorRT - "force_fallback_ops": [ - "aten::max_pool2d" # List of specific ops to require running in PyTorch - ], - "force_fallback_modules": [ - "mypymod.mytorchmod" # List of specific torch modules to require running in PyTorch - ], - "min_block_size": 3 # Minimum number of ops an engine must incapsulate to be run in TensorRT - } - } - - Input Sizes can be specified as torch sizes, tuples or lists. dtypes can be specified using + + Keyword Arguments: + inputs (List[Union(trtorch.Input, torch.Tensor)]): List of specifications of input shape, dtype and memory layout for inputs to the module. This argument is required. Input Sizes can be specified as torch sizes, tuples or lists. dtypes can be specified using torch datatypes or trtorch datatypes and you can use either torch devices or the trtorch device type enum - to select device type. + to select device type. :: + + input=[ + trtorch.Input((1, 3, 224, 224)), # Static NCHW input shape for input #1 + trtorch.Input( + min_shape=(1, 224, 224, 3), + opt_shape=(1, 512, 512, 3), + max_shape=(1, 1024, 1024, 3), + dtype=torch.int32 + format=torch.channel_last + ), # Dynamic input shape for input #2 + torch.randn((1, 3, 224, 244)) # Use an example tensor and let trtorch infer settings + ] + + device (Union(trtorch.Device, torch.device, dict)): Target device for TensorRT engines to run on :: + + device=trtorch.Device("dla:1", allow_gpu_fallback=True) + + disable_tf32 (bool): Force FP32 layers to use traditional as FP32 format vs the default behavior of rounding the inputs to 10-bit mantissas before multiplying, but accumulates the sum using 23-bit mantissas + sparse_weights (bool): Enable sparsity for convolution and fully connected layers. + enabled_precision (Set(Union(torch.dtype, trtorch.dtype))): The set of datatypes that TensorRT can use when selecting kernels + refit (bool): Enable refitting + debug (bool): Enable debuggable engine + strict_types (bool): Kernels should strictly run in a particular operating precision. Enabled precision should only have one type in the set + capability (trtorch.EngineCapability): Restrict kernel selection to safe gpu kernels or safe dla kernels + num_min_timing_iters (int): Number of minimization timing iterations used to select kernels + num_avg_timing_iters (int): Number of averaging timing iterations used to select kernels + workspace_size (int): Maximum size of workspace given to TensorRT + max_batch_size (int): Maximum batch size (must be >= 1 to be set, 0 means not set) + truncate_long_and_double (bool): Truncate weights provided in int64 or double (float64) to int32 and float32 + calibrator (Union(trtorch._C.IInt8Calibrator, tensorrt.IInt8Calibrator)): Calibrator object which will provide data to the PTQ system for INT8 Calibration + torch_fallback (dict): Settings related to partial compilation. Partial compilation will run any unsupported operations and any operators or submodules specified by the user in PyTorch :: + + torch_fallback={ + "enabled": True, + "force_fallback_ops": [ + "aten::max_pool2d" # List of specific ops to require running in PyTorch + ], + "force_fallback_modules": [ + "mypymod.mytorchmod" # List of specific torch modules to require running in PyTorch + ], + "min_block_size": 3 # Minimum number of ops an engine must incapsulate to be run in TensorRT + } + Returns: torch.jit.ScriptModule: Compiled TorchScript Module, when run it will execute via TensorRT @@ -78,60 +93,88 @@ def compile(module: torch.jit.ScriptModule, compile_spec: Any) -> torch.jit.Scri raise TypeError( "torch.jit.ScriptFunction currently is not directly supported, wrap the function in a module to compile") - compiled_cpp_mod = trtorch._C.compile_graph(module._c, _parse_compile_spec(compile_spec)) + spec = { + "inputs": inputs, + "device": device, + "disable_tf32": disable_tf32, # Force FP32 layers to use traditional as FP32 format vs the default behavior of rounding the inputs to 10-bit mantissas before multiplying, but accumulates the sum using 23-bit mantissas + "sparse_weights": sparse_weights, #Enable sparsity for convolution and fully connected layers. + "enabled_precisions": enabled_precisions, # Enabling FP16 kernels + "refit": refit, # enable refit + "debug": debug, # enable debuggable engine + "strict_types": strict_types, # kernels should strictly run in operating precision + "capability": capability, # Restrict kernel selection to safe gpu kernels or safe dla kernels + "num_min_timing_iters": num_min_timing_iters, # Number of minimization timing iterations used to select kernels + "num_avg_timing_iters": num_avg_timing_iters, # Number of averaging timing iterations used to select kernels + "workspace_size": workspace_size, # Maximum size of workspace given to TensorRT + "max_batch_size": max_batch_size, # Maximum batch size (must be >= 1 to be set, 0 means not set) + "calibrator": calibrator, + "truncate_long_and_double": truncate_long_and_double, + "torch_fallback": torch_fallback + } + + compiled_cpp_mod = trtorch._C.compile_graph(module._c, _parse_compile_spec(spec)) compiled_module = torch.jit._recursive.wrap_cpp_module(compiled_cpp_mod) return compiled_module -def convert_method_to_trt_engine(module: torch.jit.ScriptModule, method_name: str, compile_spec: Any) -> str: +def convert_method_to_trt_engine(module: torch.jit.ScriptModule, method_name: str, inputs=[], + device=Device._current_device(), + disable_tf32=False, + sparse_weights=False, + enabled_precisions=set(), + refit=False, + debug=False, + strict_types=False, + capability=EngineCapability.default, + num_min_timing_iters=2, + num_avg_timing_iters=1, + workspace_size=0, + max_batch_size=0, + truncate_long_and_double=False, + calibrator=None) -> str: """Convert a TorchScript module method to a serialized TensorRT engine Converts a specified method of a module to a serialized TensorRT engine given a dictionary of conversion settings - Args: + Arguments: module (torch.jit.ScriptModule): Source module, a result of tracing or scripting a PyTorch ``torch.nn.Module`` method_name (str): Name of method to convert - compile_spec (dict): Compilation settings including operating precision, target device, etc. - One key is required which is ``inputs``, describing the input sizes or ranges for inputs - to the graph as well as expect types and formats for those inputs. All other keys are optional - - .. code-block:: py - - CompileSpec = { - "inputs": [ - trtorch.Input((1, 3, 224, 224)), # Static NCHW input shape for input #1 - trtorch.Input( - min_shape=(1, 224, 224, 3), - opt_shape=(1, 512, 512, 3), - max_shape=(1, 1024, 1024, 3), - dtype=torch.int32 - format=torch.channel_last - ) # Dynamic input shape for input #2 - ], - "device": { - "device_type": torch.device("cuda"), # Type of device to run engine on (for DLA use trtorch.DeviceType.DLA) - "gpu_id": 0, # Target gpu id to run engine (Use Xavier as gpu id for DLA) - "dla_core": 0, # (DLA only) Target dla core id to run engine - "allow_gpu_fallback": false, # (DLA only) Allow layers unsupported on DLA to run on GPU - }, - "enabled_precisions": {torch.float, torch.half}, # Enabling FP16 kernels - # List of datatypes that should be configured for each input. Supported options torch.{float|half|int8|int32|bool}. - "disable_tf32": False, # Force FP32 layers to use traditional as FP32 format vs the default behavior of rounding the inputs to 10-bit mantissas before multiplying, but accumulates the sum using 23-bit mantissas - "sparse_weights": Enable sparsity for convolution and fully connected layers. - "refit": false, # enable refit - "debug": false, # enable debuggable engine - "strict_types": false, # kernels should strictly run in operating precision - "capability": trtorch.EngineCapability.DEFAULT, # Restrict kernel selection to safe gpu kernels or safe dla kernels - "num_min_timing_iters": 2, # Number of minimization timing iterations used to select kernels - "num_avg_timing_iters": 1, # Number of averaging timing iterations used to select kernels - "workspace_size": 0, # Maximum size of workspace given to TensorRT - "max_batch_size": 0, # Maximum batch size (must be >= 1 to be set, 0 means not set) - } - - Input Sizes can be specified as torch sizes, tuples or lists. dtypes can be specified using + + Keyword Args: + inputs (List[Union(trtorch.Input, torch.Tensor)]): List of specifications of input shape, dtype and memory layout for inputs to the module. This argument is required. Input Sizes can be specified as torch sizes, tuples or lists. dtypes can be specified using torch datatypes or trtorch datatypes and you can use either torch devices or the trtorch device type enum - to select device type. + to select device type. :: + + input=[ + trtorch.Input((1, 3, 224, 224)), # Static NCHW input shape for input #1 + trtorch.Input( + min_shape=(1, 224, 224, 3), + opt_shape=(1, 512, 512, 3), + max_shape=(1, 1024, 1024, 3), + dtype=torch.int32 + format=torch.channel_last + ), # Dynamic input shape for input #2 + torch.randn((1, 3, 224, 244)) # Use an example tensor and let trtorch infer settings + ] + + device (Union(trtorch.Device, torch.device, dict)): Target device for TensorRT engines to run on :: + + device=trtorch.Device("dla:1", allow_gpu_fallback=True) + + disable_tf32 (bool): Force FP32 layers to use traditional as FP32 format vs the default behavior of rounding the inputs to 10-bit mantissas before multiplying, but accumulates the sum using 23-bit mantissas + sparse_weights (bool): Enable sparsity for convolution and fully connected layers. + enabled_precision (Set(Union(torch.dtype, trtorch.dtype))): The set of datatypes that TensorRT can use when selecting kernels + refit (bool): Enable refitting + debug (bool): Enable debuggable engine + strict_types (bool): Kernels should strictly run in a particular operating precision. Enabled precision should only have one type in the set + capability (trtorch.EngineCapability): Restrict kernel selection to safe gpu kernels or safe dla kernels + num_min_timing_iters (int): Number of minimization timing iterations used to select kernels + num_avg_timing_iters (int): Number of averaging timing iterations used to select kernels + workspace_size (int): Maximum size of workspace given to TensorRT + max_batch_size (int): Maximum batch size (must be >= 1 to be set, 0 means not set) + truncate_long_and_double (bool): Truncate weights provided in int64 or double (float64) to int32 and float32 + calibrator (Union(trtorch._C.IInt8Calibrator, tensorrt.IInt8Calibrator)): Calibrator object which will provide data to the PTQ system for INT8 Calibration Returns: bytes: Serialized TensorRT engine, can either be saved to a file or deserialized via TensorRT APIs @@ -140,6 +183,25 @@ def convert_method_to_trt_engine(module: torch.jit.ScriptModule, method_name: st raise TypeError( "torch.jit.ScriptFunctions currently are not directly supported, wrap the function in a module to compile") + compile_spec = { + "inputs": inputs, + "device": device, + "disable_tf32": disable_tf32, # Force FP32 layers to use traditional as FP32 format vs the default behavior of rounding the inputs to 10-bit mantissas before multiplying, but accumulates the sum using 23-bit mantissas + "sparse_weights": sparse_weights, #Enable sparsity for convolution and fully connected layers. + "enabled_precisions": enabled_precisions, # Enabling FP16 kernels + "refit": refit, # enable refit + "debug": debug, # enable debuggable engine + "strict_types": strict_types, # kernels should strictly run in operating precision + "capability": capability, # Restrict kernel selection to safe gpu kernels or safe dla kernels + "num_min_timing_iters": num_min_timing_iters, # Number of minimization timing iterations used to select kernels + "num_avg_timing_iters": num_avg_timing_iters, # Number of averaging timing iterations used to select kernels + "workspace_size": workspace_size, # Maximum size of workspace given to TensorRT + "max_batch_size": max_batch_size, # Maximum batch size (must be >= 1 to be set, 0 means not set) + "torch_fallback": {"enabled": False}, + "calibrator": calibrator, + "truncate_long_and_double": truncate_long_and_double + } + return trtorch._C.convert_graph_to_trt_engine(module._c, method_name, _parse_compile_spec(compile_spec)) diff --git a/py/trtorch/csrc/tensorrt_classes.cpp b/py/trtorch/csrc/tensorrt_classes.cpp index 92b382f2da..bbab4942fb 100644 --- a/py/trtorch/csrc/tensorrt_classes.cpp +++ b/py/trtorch/csrc/tensorrt_classes.cpp @@ -38,6 +38,13 @@ nvinfer1::DataType toTRTDataType(DataType value) { } } +Device::Device(const core::runtime::CudaDevice& internal_dev) { + device_type = DeviceType::kGPU; + gpu_id = internal_dev.id; + dla_core = -1; + allow_gpu_fallback = false; +} + nvinfer1::TensorFormat toTRTTensorFormat(TensorFormat value) { switch (value) { case TensorFormat::kChannelLast: diff --git a/py/trtorch/csrc/tensorrt_classes.h b/py/trtorch/csrc/tensorrt_classes.h index 355ad8c695..b7b5d08873 100644 --- a/py/trtorch/csrc/tensorrt_classes.h +++ b/py/trtorch/csrc/tensorrt_classes.h @@ -74,6 +74,8 @@ struct Device : torch::CustomClassHolder { allow_gpu_fallback(false) // allow_gpu_fallback {} + Device(const core::runtime::CudaDevice& internal_dev); + ADD_ENUM_GET_SET(device_type, DeviceType, static_cast(DeviceType::kDLA)); ADD_FIELD_GET_SET(gpu_id, int64_t); ADD_FIELD_GET_SET(dla_core, int64_t); diff --git a/py/trtorch/csrc/trtorch_py.cpp b/py/trtorch/csrc/trtorch_py.cpp index 300ae33720..2ec9e38754 100644 --- a/py/trtorch/csrc/trtorch_py.cpp +++ b/py/trtorch/csrc/trtorch_py.cpp @@ -103,6 +103,10 @@ void set_device(const int device_id) { core::set_device(device_id); } +Device get_current_device() { + return Device(core::runtime::get_current_device()); +} + torch::jit::Module CompileGraph(const torch::jit::Module& mod, CompileSpec& info) { py::gil_scoped_acquire gil; auto trt_mod = core::CompileGraph(mod, info.toInternalCompileSpec()); @@ -315,6 +319,8 @@ PYBIND11_MODULE(_C, m) { m.def("_set_is_colored_output_on", &logging::set_is_colored_output_on, "Set if the logging output should be colored"); m.def("_log", &logging::log, "Add a message to the logger"); m.def("set_device", &trtorch::pyapi::set_device, "Set CUDA device id"); + m.def("_get_current_device", &trtorch::pyapi::get_current_device, "Get the current active CUDA device"); + py::enum_(m, "LogLevel", py::arithmetic()) .value("INTERNAL_ERROR", core::util::logging::LogLevel::kINTERNAL_ERROR) diff --git a/tests/py/test_api.py b/tests/py/test_api.py index ca308e54da..76ff13f2c1 100644 --- a/tests/py/test_api.py +++ b/tests/py/test_api.py @@ -23,13 +23,18 @@ def test_compile_traced(self): "enabled_precisions": {torch.float} } - trt_mod = trtorch.compile(self.traced_model, compile_spec) + trt_mod = trtorch.compile(self.traced_model, **compile_spec) same = (trt_mod(self.input) - self.traced_model(self.input)).abs().max() self.assertTrue(same < 2e-2) def test_compile_script(self): + trt_mod = trtorch.compile(self.scripted_model, inputs=[self.input], device=trtorch.Device(gpu_id=0), enabled_precisions={torch.float}) + same = (trt_mod(self.input) - self.scripted_model(self.input)).abs().max() + self.assertTrue(same < 2e-2) + + def test_from_torch_tensor(self): compile_spec = { - "inputs": [trtorch.Input(shape=self.input.shape)], + "inputs": [self.input], "device": { "device_type": trtorch.DeviceType.GPU, "gpu_id": 0, @@ -37,13 +42,21 @@ def test_compile_script(self): "enabled_precisions": {torch.float} } - trt_mod = trtorch.compile(self.scripted_model, compile_spec) + trt_mod = trtorch.compile(self.scripted_model, **compile_spec) same = (trt_mod(self.input) - self.scripted_model(self.input)).abs().max() self.assertTrue(same < 2e-2) - def test_from_torch_tensor(self): + def test_device(self): + compile_spec = {"inputs": [self.input], "device": trtorch.Device("gpu:0"), "enabled_precisions": {torch.float}} + + trt_mod = trtorch.compile(self.scripted_model, **compile_spec) + same = (trt_mod(self.input) - self.scripted_model(self.input)).abs().max() + self.assertTrue(same < 2e-2) + + + def test_compile_script_from_dict(self): compile_spec = { - "inputs": [self.input], + "inputs": [trtorch.Input(shape=self.input.shape)], "device": { "device_type": trtorch.DeviceType.GPU, "gpu_id": 0, @@ -51,16 +64,11 @@ def test_from_torch_tensor(self): "enabled_precisions": {torch.float} } - trt_mod = trtorch.compile(self.scripted_model, compile_spec) - same = (trt_mod(self.input) - self.scripted_model(self.input)).abs().max() + trt_mod = trtorch.compile(self.traced_model, **compile_spec) + same = (trt_mod(self.input) - self.traced_model(self.input)).abs().max() self.assertTrue(same < 2e-2) - def test_device(self): - compile_spec = {"inputs": [self.input], "device": trtorch.Device("gpu:0"), "enabled_precisions": {torch.float}} - trt_mod = trtorch.compile(self.scripted_model, compile_spec) - same = (trt_mod(self.input) - self.scripted_model(self.input)).abs().max() - self.assertTrue(same < 2e-2) class TestCompileHalf(ModelTestCase): @@ -80,7 +88,7 @@ def test_compile_script_half(self): "enabled_precisions": {torch.half} } - trt_mod = trtorch.compile(self.scripted_model, compile_spec) + trt_mod = trtorch.compile(self.scripted_model, **compile_spec) same = (trt_mod(self.input.half()) - self.scripted_model(self.input.half())).abs().max() trtorch.logging.log(trtorch.logging.Level.Debug, "Max diff: " + str(same)) self.assertTrue(same < 3e-2) @@ -103,7 +111,7 @@ def test_compile_script_half_by_default(self): "enabled_precisions": {torch.float, torch.half} } - trt_mod = trtorch.compile(self.scripted_model, compile_spec) + trt_mod = trtorch.compile(self.scripted_model, **compile_spec) same = (trt_mod(self.input.half()) - self.scripted_model(self.input.half())).abs().max() trtorch.logging.log(trtorch.logging.Level.Debug, "Max diff: " + str(same)) self.assertTrue(same < 3e-2) @@ -132,7 +140,7 @@ def test_compile_script(self): } } - trt_mod = trtorch.compile(self.scripted_model, compile_spec) + trt_mod = trtorch.compile(self.scripted_model, **compile_spec) same = (trt_mod(self.input) - self.scripted_model(self.input)).abs().max() self.assertTrue(same < 2e-3) @@ -160,7 +168,7 @@ def test_compile_script(self): } } - trt_mod = trtorch.compile(self.scripted_model, compile_spec) + trt_mod = trtorch.compile(self.scripted_model, **compile_spec) same = (trt_mod(self.input) - self.scripted_model(self.input)).abs().max() self.assertTrue(same < 2e-3) @@ -183,7 +191,7 @@ def test_pt_to_trt_to_pt(self): } } - trt_engine = trtorch.convert_method_to_trt_engine(self.ts_model, "forward", compile_spec) + trt_engine = trtorch.convert_method_to_trt_engine(self.ts_model, "forward", **compile_spec) trt_mod = trtorch.embed_engine_in_new_module(trt_engine, trtorch.Device("cuda:0")) same = (trt_mod(self.input) - self.ts_model(self.input)).abs().max() self.assertTrue(same < 2e-3) diff --git a/tests/py/test_api_dla.py b/tests/py/test_api_dla.py index e58e3c5955..5d954cbec2 100644 --- a/tests/py/test_api_dla.py +++ b/tests/py/test_api_dla.py @@ -40,7 +40,7 @@ def test_compile_traced(self): "enabled_precisions": {torch.half} } - trt_mod = trtorch.compile(self.traced_model, compile_spec) + trt_mod = trtorch.compile(self.traced_model, **compile_spec) same = (trt_mod(self.input) - self.traced_model(self.input)).abs().max() self.assertTrue(same < 2e-2) @@ -56,7 +56,7 @@ def test_compile_script(self): "enabled_precisions": {torch.half} } - trt_mod = trtorch.compile(self.scripted_model, compile_spec) + trt_mod = trtorch.compile(self.scripted_model, **compile_spec) same = (trt_mod(self.input) - self.scripted_model(self.input)).abs().max() self.assertTrue(same < 2e-2) diff --git a/tests/py/test_multi_gpu.py b/tests/py/test_multi_gpu.py index d3aabb1f25..54117b9e3c 100644 --- a/tests/py/test_multi_gpu.py +++ b/tests/py/test_multi_gpu.py @@ -32,7 +32,7 @@ def test_compile_traced(self): } } - trt_mod = trtorch.compile(self.traced_model, compile_spec) + trt_mod = trtorch.compile(self.traced_model, **compile_spec) trtorch.set_device(self.target_gpu) same = (trt_mod(self.input) - self.traced_model(self.input)).abs().max() trtorch.set_device(0) @@ -51,7 +51,7 @@ def test_compile_script(self): } } - trt_mod = trtorch.compile(self.scripted_model, compile_spec) + trt_mod = trtorch.compile(self.scripted_model, **compile_spec) trtorch.set_device(self.target_gpu) same = (trt_mod(self.input) - self.scripted_model(self.input)).abs().max() trtorch.set_device(0) @@ -84,7 +84,7 @@ def test_compile_traced(self): } } - trt_mod = trtorch.compile(self.traced_model, compile_spec) + trt_mod = trtorch.compile(self.traced_model, **compile_spec) # Changing the device ID deliberately. It should still run on correct device ID by context switching trtorch.set_device(1) same = (trt_mod(self.input) - self.traced_model(self.input)).abs().max() @@ -103,7 +103,7 @@ def test_compile_script(self): } } - trt_mod = trtorch.compile(self.scripted_model, compile_spec) + trt_mod = trtorch.compile(self.scripted_model, **compile_spec) # Changing the device ID deliberately. It should still run on correct device ID by context switching trtorch.set_device(1) same = (trt_mod(self.input) - self.scripted_model(self.input)).abs().max() diff --git a/tests/py/test_ptq_dataloader_calibrator.py b/tests/py/test_ptq_dataloader_calibrator.py index 7349a58f70..7db04986bf 100644 --- a/tests/py/test_ptq_dataloader_calibrator.py +++ b/tests/py/test_ptq_dataloader_calibrator.py @@ -72,7 +72,7 @@ def test_compile_script(self): } } - trt_mod = trtorch.compile(self.model, compile_spec) + trt_mod = trtorch.compile(self.model, **compile_spec) int8_test_acc = self.compute_accuracy(self.testing_dataloader, trt_mod) log(Level.Info, "[TRT INT8] Test Acc: {:.2f}%".format(100 * int8_test_acc)) acc_diff = fp32_test_acc - int8_test_acc