diff --git a/py/torch_tensorrt/dynamo/conversion/_TRTInterpreter.py b/py/torch_tensorrt/dynamo/conversion/_TRTInterpreter.py index b8d4994fca..a72e52c522 100644 --- a/py/torch_tensorrt/dynamo/conversion/_TRTInterpreter.py +++ b/py/torch_tensorrt/dynamo/conversion/_TRTInterpreter.py @@ -440,7 +440,7 @@ def check_weight_equal( except Exception: return torch.all(sd_weight == network_weight) - @needs_refit + @needs_refit # type: ignore[misc] def _save_weight_mapping(self) -> None: """ Construct the weight name mapping from engine weight name to state_dict weight name. @@ -577,7 +577,7 @@ def _save_weight_mapping(self) -> None: gc.collect() torch.cuda.empty_cache() - @needs_refit + @needs_refit # type: ignore[misc] def _insert_engine_to_cache(self, hash_val: str, serialized_engine: bytes) -> None: # TODO: @Evan is waiting for TRT's feature to cache the weight-stripped engine # if not self.compilation_settings.strip_engine_weights: @@ -605,7 +605,7 @@ def _insert_engine_to_cache(self, hash_val: str, serialized_engine: bytes) -> No ), ) - @needs_refit + @needs_refit # type: ignore[misc] def _pull_cached_engine(self, hash_val: str) -> Optional[TRTInterpreterResult]: # query the cached TRT engine cached_data = self.engine_cache.check(hash_val) # type: ignore[union-attr] @@ -941,7 +941,14 @@ def output(self, target: str, args: Any, kwargs: Any) -> List[Any]: f"Specified output dtypes ({len(self.output_dtypes)}) differ from number of outputs ({len(outputs)})" ) + marked_outputs_ids = [] for i, output in enumerate(outputs): + # In some cases, the same output tensor may be marked multiple times, such as _to_oppy, + # so we skip marking if the output is already marked + if id(output) in marked_outputs_ids: + continue + marked_outputs_ids.append(id(output)) + name = f"output{i}" output_dtype = dtype.unknown diff --git a/py/torch_tensorrt/dynamo/conversion/aten_ops_converters.py b/py/torch_tensorrt/dynamo/conversion/aten_ops_converters.py index fe9a01b06c..c4dd431818 100644 --- a/py/torch_tensorrt/dynamo/conversion/aten_ops_converters.py +++ b/py/torch_tensorrt/dynamo/conversion/aten_ops_converters.py @@ -1094,7 +1094,7 @@ def aten_ops_clone_copy_dtype( name, args[0], kwargs.get("dtype", args[0].dtype), - force_layer=True, + force_layer=False, ) @@ -1226,7 +1226,7 @@ def aten_ops_sum( name, sum_, kwargs["output_dtype"], - force_layer=True, + force_layer=False, ) else: return sum_