diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index 7f76c50e72908d..48df3fccdecd4b 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -370,6 +370,13 @@ AnalysisPredictor::AnalysisPredictor(const AnalysisConfig &config) "is enabled in Paddle-TRT, we set the id of these predictors to " "negative sharing_identifier you specified : " << predictor_id_; + PADDLE_ENFORCE_EQ( + config_.new_executor_enabled(), + true, + platform::errors::InvalidArgument( + "Please call the config.enable_new_executor() in python or " + "config.EnableNewExecutor() in c++ when you want share the engine " + "context memory of multiple predictors.")); } else { predictor_id_ = inference::GetUniqueId(); } diff --git a/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h b/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h index 0d170eae31cfb1..77b0ebe1ce4937 100644 --- a/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h +++ b/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h @@ -835,14 +835,30 @@ class TensorRTEngineOp : public framework::OperatorBase { params.calibrator = calibrator_.get(); params.device_id = dev_place.device; params.with_dynamic_shape = with_dynamic_shape_; - params.context_memory_sharing = Attr("context_memory_sharing"); - params.use_dla = Attr("use_dla"); - params.dla_core = Attr("dla_core"); - params.disable_trt_plugin_fp16 = Attr("disable_trt_plugin_fp16"); - params.enable_low_precision_io = Attr("enable_low_precision_io"); - params.use_inspector = Attr("use_inspector"); - params.engine_info_path = Attr("engine_info_path"); - + if (HasAttr("context_memory_sharing")) { + params.context_memory_sharing = Attr("context_memory_sharing"); + } + if (HasAttr("use_dla")) { + params.use_dla = Attr("use_dla"); + } + if (HasAttr("dla_core")) { + params.dla_core = Attr("dla_core"); + } + if (HasAttr("disable_trt_plugin_fp16")) { + params.disable_trt_plugin_fp16 = Attr("disable_trt_plugin_fp16"); + } + if (HasAttr("enable_low_precision_io")) { + params.enable_low_precision_io = Attr("enable_low_precision_io"); + } + if (HasAttr("use_inspector")) { + params.use_inspector = Attr("use_inspector"); + } + if (HasAttr("engine_info_path")) { + params.engine_info_path = Attr("engine_info_path"); + } + if (HasAttr("optimization_level")) { + params.optimization_level = Attr("optimization_level"); + } if (!shape_range_info_path_.empty()) { inference::DeserializeShapeRangeInfo(shape_range_info_path_, ¶ms.min_input_shape, diff --git a/test/ir/inference/test_trt_inference_fp16_io.py b/test/ir/inference/test_trt_inference_fp16_io.py index 0f30090a324934..31cccac681b618 100644 --- a/test/ir/inference/test_trt_inference_fp16_io.py +++ b/test/ir/inference/test_trt_inference_fp16_io.py @@ -106,8 +106,9 @@ def init_predictor(self, low_precision_io: bool): use_static=False, use_calib_mode=False, ) + config.enable_tensorrt_memory_optim(True, 1) config.enable_tuned_tensorrt_dynamic_shape() - config.enable_memory_optim() + config.enable_new_executor() config.enable_low_precision_io(low_precision_io) config.disable_glog_info() predictor = create_predictor(config) @@ -131,8 +132,9 @@ def init_predictor(self, low_precision_io: bool): use_static=False, use_calib_mode=False, ) + config.enable_tensorrt_memory_optim(True, 1) config.enable_tuned_tensorrt_dynamic_shape() - config.enable_memory_optim() + config.enable_new_executor() config.enable_low_precision_io(low_precision_io) config.exp_disable_tensorrt_ops(["flatten_contiguous_range"]) config.disable_glog_info()