diff --git a/tests/py/dynamo/conversion/test_bitwise_and_aten.py b/tests/py/dynamo/conversion/test_bitwise_and_aten.py index a29a8061db..c42fd2e61f 100644 --- a/tests/py/dynamo/conversion/test_bitwise_and_aten.py +++ b/tests/py/dynamo/conversion/test_bitwise_and_aten.py @@ -141,7 +141,12 @@ def forward(self, lhs_val, rhs_val): mod, inputs, dynamic_shapes=({1: dyn_dim}, {0: dyn_dim}) ) trt_mod = torch_tensorrt.dynamo.compile( - fx_mod, inputs=inputs, enable_precisions={torch.bool}, min_block_size=1 + fx_mod, + inputs=inputs, + enable_precisions={torch.bool}, + min_block_size=1, + cache_built_engines=False, + reuse_cached_engines=False, ) with torch.no_grad(): cuda_inputs = [] diff --git a/tests/py/dynamo/conversion/test_embedding_bag_aten.py b/tests/py/dynamo/conversion/test_embedding_bag_aten.py index d935134ff2..3fef3d70cf 100644 --- a/tests/py/dynamo/conversion/test_embedding_bag_aten.py +++ b/tests/py/dynamo/conversion/test_embedding_bag_aten.py @@ -484,7 +484,12 @@ def forward(self, weights, indices, offsets, per_sample_weights=None): dynamic_shapes["per_sample_weights"] = {} fx_mod = torch.export.export(mod, inputs, dynamic_shapes=dynamic_shapes) trt_mod = torch_tensorrt.dynamo.compile( - fx_mod, inputs=inputs, enable_precisions=torch.float32, min_block_size=1 + fx_mod, + inputs=inputs, + enable_precisions=torch.float32, + min_block_size=1, + cache_built_engines=False, + reuse_cached_engines=False, ) # use the inputs with different shape to inference: if per_sample_weights is None: diff --git a/tests/py/dynamo/conversion/test_index_select_aten.py b/tests/py/dynamo/conversion/test_index_select_aten.py index 3d0b41b791..b1339efdcf 100644 --- a/tests/py/dynamo/conversion/test_index_select_aten.py +++ b/tests/py/dynamo/conversion/test_index_select_aten.py @@ -109,7 +109,12 @@ def forward(self, source_tensor, indice_tensor): fx_mod = torch.export.export(mod, inputs, dynamic_shapes=dynamic_shapes) trt_mod = torch_tensorrt.dynamo.compile( - fx_mod, inputs=inputs, enable_precisions=torch.float32, min_block_size=1 + fx_mod, + inputs=inputs, + enable_precisions=torch.float32, + min_block_size=1, + cache_built_engines=False, + reuse_cached_engines=False, ) # use different shape of inputs for inference: inputs = (source_tensor_1, indice_tensor) diff --git a/tests/py/dynamo/models/test_dtype_support.py b/tests/py/dynamo/models/test_dtype_support.py index 29faf4eff3..b486784e52 100644 --- a/tests/py/dynamo/models/test_dtype_support.py +++ b/tests/py/dynamo/models/test_dtype_support.py @@ -41,6 +41,8 @@ def forward(self, x): truncate_double=True, min_block_size=1, use_python_runtime=False, + cache_built_engines=False, + reuse_cached_engines=False, ) torch_model_results = mod(in_tensor) @@ -79,6 +81,8 @@ def forward(self, x): truncate_double=True, min_block_size=1, use_python_runtime=True, + cache_built_engines=False, + reuse_cached_engines=False, ) torch_model_results = mod(in_tensor) @@ -123,6 +127,8 @@ def forward(self, x): truncate_double=False, min_block_size=1, use_python_runtime=False, + cache_built_engines=False, + reuse_cached_engines=False, ) torch_model_results = mod(in_tensor) @@ -162,6 +168,8 @@ def forward(self, x): truncate_double=False, min_block_size=1, use_python_runtime=True, + cache_built_engines=False, + reuse_cached_engines=False, ) torch_model_results = mod(in_tensor) @@ -214,6 +222,8 @@ def forward(self, x): enabled_precisions={torch.float, torch.bfloat16, torch.half}, min_block_size=1, use_python_runtime=False, + cache_built_engines=False, + reuse_cached_engines=False, ) torch_model_results = mod(in_tensor) @@ -252,6 +262,8 @@ def forward(self, x): enabled_precisions={torch.float, torch.bfloat16, torch.half}, min_block_size=1, use_python_runtime=True, + cache_built_engines=False, + reuse_cached_engines=False, ) torch_model_results = mod(in_tensor) @@ -289,6 +301,8 @@ def forward(self, x): debug=True, min_block_size=1, device=device, + cache_built_engines=False, + reuse_cached_engines=False, ) torch_model_results = mod(*inputs) diff --git a/tests/py/dynamo/models/test_dyn_models.py b/tests/py/dynamo/models/test_dyn_models.py index 50fa9a2f50..f1a05b19bd 100644 --- a/tests/py/dynamo/models/test_dyn_models.py +++ b/tests/py/dynamo/models/test_dyn_models.py @@ -39,6 +39,8 @@ def forward(self, x): "ir": ir, "pass_through_build_failures": True, "min_block_size": 1, + "cache_built_engines": False, + "reuse_cached_engines": False, } if ir == "torch_compile": input_bs4 = torch.randn((4, 3, 224, 224)).to("cuda") @@ -96,6 +98,8 @@ def forward(self, x): "pass_through_build_failures": True, "torch_executed_ops": {"torch.ops.aten.abs.default"}, "min_block_size": 1, + "cache_built_engines": False, + "reuse_cached_engines": False, } if ir == "torch_compile": @@ -147,6 +151,8 @@ def forward(self, x): "ir": ir, "pass_through_build_failures": True, "min_block_size": 1, + "cache_built_engines": False, + "reuse_cached_engines": False, } if ir == "torch_compile": @@ -190,6 +196,8 @@ def test_resnet_dynamic(ir): "ir": ir, "pass_through_build_failures": True, "min_block_size": 1, + "cache_built_engines": False, + "reuse_cached_engines": False, } if ir == "torch_compile": @@ -252,6 +260,8 @@ def forward(self, x): "pass_through_build_failures": True, "optimization_level": 1, "min_block_size": 1, + "cache_built_engines": False, + "reuse_cached_engines": False, } trt_mod = torchtrt.compile(model, **compile_spec) @@ -284,6 +294,8 @@ def forward(self, x): "enabled_precisions": {torch.float}, "ir": ir, "min_block_size": 1, + "cache_built_engines": False, + "reuse_cached_engines": False, } inputs_bs2 = torch.randn(2, 2, 10).to("cuda") if ir == "torch_compile": @@ -338,6 +350,8 @@ def forward(self, x): "pass_through_build_failures": True, "min_block_size": 1, "torch_executed_ops": {"torch.ops.aten.add.Tensor"}, + "cache_built_engines": False, + "reuse_cached_engines": False, } # Compile the model diff --git a/tests/py/dynamo/models/test_engine_cache.py b/tests/py/dynamo/models/test_engine_cache.py index 1a5b874eb4..24bb96c4f2 100644 --- a/tests/py/dynamo/models/test_engine_cache.py +++ b/tests/py/dynamo/models/test_engine_cache.py @@ -21,6 +21,8 @@ def __init__( engine_cache_dir: str, ) -> None: self.engine_cache_dir = engine_cache_dir + if not os.path.exists(self.engine_cache_dir): + os.makedirs(self.engine_cache_dir, exist_ok=True) def save( self, @@ -99,18 +101,18 @@ def test_dynamo_compile_with_default_disk_engine_cache(self): cos_sim = cosine_similarity(results[0], results[1]) assertions.assertTrue( cos_sim > COSINE_THRESHOLD, - msg=f"test_dynamo_compile_with_default_disk_engine_cache: results[0] doesn't match with results[1]. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}", + msg=f"results[0] doesn't match with results[1]. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}", ) cos_sim = cosine_similarity(results[1], results[2]) assertions.assertTrue( cos_sim > COSINE_THRESHOLD, - msg=f"test_dynamo_compile_with_default_disk_engine_cache: results[1] doesn't match with results[2]. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}", + msg=f"results[1] doesn't match with results[2]. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}", ) assertions.assertTrue( times[0] > times[2], - msg=f"test_dynamo_compile_with_default_disk_engine_cache: Engine caching didn't speed up the compilation. Time taken without engine caching: {times[0]} ms, time taken with engine caching: {times[2]} ms", + msg=f"Engine caching didn't speed up the compilation. Time taken without engine caching: {times[0]} ms, time taken with engine caching: {times[2]} ms", ) def test_dynamo_compile_with_custom_engine_cache(self): @@ -167,18 +169,18 @@ def test_dynamo_compile_with_custom_engine_cache(self): cos_sim = cosine_similarity(results[0], results[1]) assertions.assertTrue( cos_sim > COSINE_THRESHOLD, - msg=f"test_dynamo_compile_with_custom_engine_cache: results[0] doesn't match with results[1]. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}", + msg=f"results[0] doesn't match with results[1]. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}", ) cos_sim = cosine_similarity(results[1], results[2]) assertions.assertTrue( cos_sim > COSINE_THRESHOLD, - msg=f"test_dynamo_compile_with_custom_engine_cache: results[1] doesn't match with results[2]. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}", + msg=f"results[1] doesn't match with results[2]. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}", ) assertions.assertTrue( times[0] > times[2], - msg=f"test_dynamo_compile_with_custom_engine_cache: Engine caching didn't speed up the compilation. Time taken without engine caching: {times[0]} ms, time taken with engine caching: {times[2]} ms", + msg=f"Engine caching didn't speed up the compilation. Time taken without engine caching: {times[0]} ms, time taken with engine caching: {times[2]} ms", ) def test_torch_compile_with_default_disk_engine_cache(self): @@ -231,18 +233,18 @@ def test_torch_compile_with_default_disk_engine_cache(self): cos_sim = cosine_similarity(results[0], results[1]) assertions.assertTrue( cos_sim > COSINE_THRESHOLD, - msg=f"test_torch_compile_with_default_disk_engine_cache: results[0] doesn't match with results[1]. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}", + msg=f"results[0] doesn't match with results[1]. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}", ) cos_sim = cosine_similarity(results[1], results[2]) assertions.assertTrue( cos_sim > COSINE_THRESHOLD, - msg=f"test_torch_compile_with_default_disk_engine_cache: results[1] doesn't match with results[2]. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}", + msg=f"results[1] doesn't match with results[2]. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}", ) assertions.assertTrue( times[0] > times[2], - msg=f"test_torch_compile_with_default_disk_engine_cache: Engine caching didn't speed up the compilation. Time taken without engine caching: {times[0]} ms, time taken with engine caching: {times[2]} ms", + msg=f"Engine caching didn't speed up the compilation. Time taken without engine caching: {times[0]} ms, time taken with engine caching: {times[2]} ms", ) def test_torch_compile_with_custom_engine_cache(self): @@ -295,16 +297,16 @@ def test_torch_compile_with_custom_engine_cache(self): cos_sim = cosine_similarity(results[0], results[1]) assertions.assertTrue( cos_sim > COSINE_THRESHOLD, - msg=f"test_torch_compile_with_custom_engine_cache: results[0] doesn't match with results[1]. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}", + msg=f"results[0] doesn't match with results[1]. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}", ) cos_sim = cosine_similarity(results[1], results[2]) assertions.assertTrue( cos_sim > COSINE_THRESHOLD, - msg=f"test_torch_compile_with_custom_engine_cache: results[1] doesn't match with results[2]. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}", + msg=f"results[1] doesn't match with results[2]. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}", ) assertions.assertTrue( times[0] > times[2], - msg=f"test_torch_compile_with_custom_engine_cache: Engine caching didn't speed up the compilation. Time taken without engine caching: {times[0]} ms, time taken with engine caching: {times[2]} ms", + msg=f"Engine caching didn't speed up the compilation. Time taken without engine caching: {times[0]} ms, time taken with engine caching: {times[2]} ms", ) diff --git a/tests/py/dynamo/models/test_export_kwargs_serde.py b/tests/py/dynamo/models/test_export_kwargs_serde.py index 08b23d55e0..52a927e518 100644 --- a/tests/py/dynamo/models/test_export_kwargs_serde.py +++ b/tests/py/dynamo/models/test_export_kwargs_serde.py @@ -63,6 +63,8 @@ def forward(self, x, b=5, c=None, d=None): "optimization_level": 1, "min_block_size": 1, "ir": "dynamo", + "cache_built_engines": False, + "reuse_cached_engines": False, } exp_program = torch.export.export(model, args=tuple(args), kwargs=kwargs) @@ -122,6 +124,8 @@ def forward(self, x, b=5, c=None, d=None): "optimization_level": 1, "min_block_size": 1, "ir": "dynamo", + "cache_built_engines": False, + "reuse_cached_engines": False, } exp_program = torchtrt.dynamo.trace(model, **compile_spec) @@ -190,6 +194,8 @@ def forward(self, x, b=5, c=None, d=None): "optimization_level": 1, "min_block_size": 1, "ir": "dynamo", + "cache_built_engines": False, + "reuse_cached_engines": False, } exp_program = torchtrt.dynamo.trace(model, **compile_spec) @@ -271,6 +277,8 @@ def forward(self, x, b=None, c=None, d=None, e=[]): "optimization_level": 1, "min_block_size": 1, "ir": "dynamo", + "cache_built_engines": False, + "reuse_cached_engines": False, } exp_program = torchtrt.dynamo.trace(model, **compile_spec) @@ -358,6 +366,8 @@ def forward(self, x, b=None, c=None, d=None, e=[]): "optimization_level": 1, "min_block_size": 1, "ir": "dynamo", + "cache_built_engines": False, + "reuse_cached_engines": False, } exp_program = torchtrt.dynamo.trace(model, **compile_spec) @@ -444,6 +454,8 @@ def forward(self, x, b=None, c=None, d=None, e=[]): "optimization_level": 1, "min_block_size": 1, "ir": "dynamo", + "cache_built_engines": False, + "reuse_cached_engines": False, } exp_program = torchtrt.dynamo.trace(model, **compile_spec) @@ -505,6 +517,8 @@ def forward(self, x, b=5, c=None, d=None): "optimization_level": 1, "min_block_size": 1, "ir": "dynamo", + "cache_built_engines": False, + "reuse_cached_engines": False, } exp_program = torch.export.export(model, args=tuple(args), kwargs=kwargs) diff --git a/tests/py/dynamo/models/test_export_serde.py b/tests/py/dynamo/models/test_export_serde.py index c0c0ba0f22..146cc2addf 100644 --- a/tests/py/dynamo/models/test_export_serde.py +++ b/tests/py/dynamo/models/test_export_serde.py @@ -42,6 +42,8 @@ def forward(self, x): ], "ir": ir, "min_block_size": 1, + "cache_built_engines": False, + "reuse_cached_engines": False, } exp_program = torchtrt.dynamo.trace(model, **compile_spec) @@ -94,6 +96,8 @@ def forward(self, x): ], "ir": ir, "min_block_size": 1, + "cache_built_engines": False, + "reuse_cached_engines": False, } exp_program = torchtrt.dynamo.trace(model, **compile_spec) @@ -150,6 +154,8 @@ def forward(self, x): ) ], "ir": ir, + "cache_built_engines": False, + "reuse_cached_engines": False, } exp_program = torchtrt.dynamo.trace(model, **compile_spec) @@ -209,6 +215,8 @@ def forward(self, x): "ir": ir, "min_block_size": 1, "torch_executed_ops": {"torch.ops.aten.relu.default"}, + "cache_built_engines": False, + "reuse_cached_engines": False, } exp_program = torchtrt.dynamo.trace(model, **compile_spec) @@ -250,6 +258,8 @@ def test_resnet18(ir): ], "ir": ir, "min_block_size": 1, + "cache_built_engines": False, + "reuse_cached_engines": False, } exp_program = torchtrt.dynamo.trace(model, **compile_spec) @@ -293,6 +303,8 @@ def test_resnet18_dynamic(ir): ], "ir": ir, "min_block_size": 1, + "cache_built_engines": False, + "reuse_cached_engines": False, } exp_program = torchtrt.dynamo.trace(model, **compile_spec) @@ -340,6 +352,8 @@ def forward(self, x): "ir": ir, "min_block_size": 1, "torch_executed_ops": {"torch.ops.aten.convolution.default"}, + "cache_built_engines": False, + "reuse_cached_engines": False, } exp_program = torchtrt.dynamo.trace(model, **compile_spec) @@ -388,7 +402,14 @@ def forward(self, x): model = MyModule().eval().cuda() input = torch.randn((1, 3, 224, 224)).to("cuda") - trt_gm = torchtrt.compile(model, ir=ir, inputs=[input], min_block_size=1) + trt_gm = torchtrt.compile( + model, + ir=ir, + inputs=[input], + min_block_size=1, + cache_built_engines=False, + reuse_cached_engines=False, + ) assertions.assertTrue( isinstance(trt_gm, torch.fx.GraphModule), msg=f"test_save_load_ts output type does not match with torch.fx.GraphModule", diff --git a/tests/py/dynamo/models/test_models.py b/tests/py/dynamo/models/test_models.py index 2d45af2b49..ba6cb0c776 100644 --- a/tests/py/dynamo/models/test_models.py +++ b/tests/py/dynamo/models/test_models.py @@ -30,6 +30,8 @@ def test_resnet18(ir): "pass_through_build_failures": True, "optimization_level": 1, "ir": "torch_compile", + "cache_built_engines": False, + "reuse_cached_engines": False, } trt_mod = torchtrt.compile(model, **compile_spec) @@ -61,6 +63,8 @@ def test_mobilenet_v2(ir): "optimization_level": 1, "min_block_size": 10, "ir": "torch_compile", + "cache_built_engines": False, + "reuse_cached_engines": False, } trt_mod = torchtrt.compile(model, **compile_spec) @@ -92,6 +96,8 @@ def test_efficientnet_b0(ir): "optimization_level": 1, "min_block_size": 10, "ir": "torch_compile", + "cache_built_engines": False, + "reuse_cached_engines": False, } trt_mod = torchtrt.compile(model, **compile_spec) @@ -132,6 +138,8 @@ def test_bert_base_uncased(ir): "optimization_level": 1, "min_block_size": 15, "ir": "torch_compile", + "cache_built_engines": False, + "reuse_cached_engines": False, } trt_mod = torchtrt.compile(model, **compile_spec) @@ -166,6 +174,8 @@ def test_resnet18_half(ir): "pass_through_build_failures": True, "optimization_level": 1, "ir": "torch_compile", + "cache_built_engines": False, + "reuse_cached_engines": False, } trt_mod = torchtrt.compile(model, **compile_spec) diff --git a/tests/py/dynamo/models/test_models_export.py b/tests/py/dynamo/models/test_models_export.py index df71d6b58a..bf19c3c5e6 100644 --- a/tests/py/dynamo/models/test_models_export.py +++ b/tests/py/dynamo/models/test_models_export.py @@ -31,6 +31,8 @@ def test_resnet18(ir): "pass_through_build_failures": True, "optimization_level": 1, "min_block_size": 8, + "cache_built_engines": False, + "reuse_cached_engines": False, } trt_mod = torchtrt.compile(model, **compile_spec) @@ -61,6 +63,8 @@ def test_mobilenet_v2(ir): "pass_through_build_failures": True, "optimization_level": 1, "min_block_size": 8, + "cache_built_engines": False, + "reuse_cached_engines": False, } trt_mod = torchtrt.compile(model, **compile_spec) @@ -91,6 +95,8 @@ def test_efficientnet_b0(ir): "pass_through_build_failures": True, "optimization_level": 1, "min_block_size": 8, + "cache_built_engines": False, + "reuse_cached_engines": False, } trt_mod = torchtrt.compile(model, **compile_spec) @@ -130,6 +136,8 @@ def test_bert_base_uncased(ir): "truncate_double": True, "ir": ir, "min_block_size": 10, + "cache_built_engines": False, + "reuse_cached_engines": False, } trt_mod = torchtrt.compile(model, **compile_spec) model_outputs = model(input, input2) @@ -168,6 +176,8 @@ def test_resnet18_half(ir): "pass_through_build_failures": True, "optimization_level": 1, "min_block_size": 8, + "cache_built_engines": False, + "reuse_cached_engines": False, } trt_mod = torchtrt.compile(model, **compile_spec) @@ -223,6 +233,8 @@ def calibrate_loop(model): enabled_precisions={torch.float8_e4m3fn}, min_block_size=1, debug=True, + cache_built_engines=False, + reuse_cached_engines=False, ) outputs_trt = trt_model(input_tensor) assert torch.allclose(output_pyt, outputs_trt, rtol=1e-3, atol=1e-2) @@ -272,6 +284,8 @@ def calibrate_loop(model): enabled_precisions={torch.int8}, min_block_size=1, debug=True, + cache_built_engines=False, + reuse_cached_engines=False, ) outputs_trt = trt_model(input_tensor) assert torch.allclose(output_pyt, outputs_trt, rtol=1e-3, atol=1e-2)