From 533f9d0aa9a607d6312aba02a927fabd45dce92c Mon Sep 17 00:00:00 2001 From: yuanlehome Date: Thu, 30 Mar 2023 05:35:56 +0000 Subject: [PATCH 1/6] inference predictor support paddle::Tensor --- cmake/phi_header.cmake | 31 ++-- paddle/fluid/framework/feed_fetch_method.cc | 2 +- .../fluid/inference/api/analysis_predictor.cc | 155 ++++++++++++++++-- .../fluid/inference/api/analysis_predictor.h | 42 ++++- .../inference/api/demo_ci/CMakeLists.txt | 2 +- paddle/fluid/inference/api/paddle_api.h | 10 ++ .../inference/api/paddle_inference_api.h | 11 ++ paddle/fluid/inference/api/paddle_tensor.h | 2 + paddle/fluid/pybind/inference_api.cc | 32 ++-- paddle/phi/api/include/tensor.h | 6 +- paddle/phi/api/lib/api_custom_impl.cc | 1 + 11 files changed, 249 insertions(+), 45 deletions(-) diff --git a/cmake/phi_header.cmake b/cmake/phi_header.cmake index 7596b59e71c76e..d1f062ba9fda07 100644 --- a/cmake/phi_header.cmake +++ b/cmake/phi_header.cmake @@ -17,21 +17,21 @@ set(PADDLE_INFERENCE_INSTALL_DIR function(phi_header_path_compat TARGET_PATH) message(STATUS "phi header path compat processing: ${TARGET_PATH}") - string(FIND ${TARGET_PATH} "experimental" pos) - if(pos GREATER 1) - file(GLOB HEADERS "${TARGET_PATH}/*" "*.h") - foreach(header ${HEADERS}) - if(${header} MATCHES ".*.h$") - file(READ ${header} HEADER_CONTENT) - string(REPLACE "paddle/phi/" "paddle/include/experimental/phi/" - HEADER_CONTENT "${HEADER_CONTENT}") - string(REPLACE "paddle/utils/" "paddle/include/experimental/utils/" - HEADER_CONTENT "${HEADER_CONTENT}") - file(WRITE ${header} "${HEADER_CONTENT}") - message(STATUS "phi header path compat processing complete: ${header}") - endif() - endforeach() - endif() + # string(FIND ${TARGET_PATH} "experimental" pos) + # if(pos GREATER 1) + file(GLOB HEADERS "${TARGET_PATH}/*" "*.h") + foreach(header ${HEADERS}) + if(${header} MATCHES ".*.h$") + file(READ ${header} HEADER_CONTENT) + string(REPLACE "paddle/phi/" "paddle/include/experimental/phi/" + HEADER_CONTENT "${HEADER_CONTENT}") + string(REPLACE "paddle/utils/" "paddle/include/experimental/utils/" + HEADER_CONTENT "${HEADER_CONTENT}") + file(WRITE ${header} "${HEADER_CONTENT}") + message(STATUS "phi header path compat processing complete: ${header}") + endif() + endforeach() + # endif() endfunction() phi_header_path_compat( @@ -46,6 +46,7 @@ phi_header_path_compat( ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/phi/common) phi_header_path_compat( ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/phi/core) +phi_header_path_compat(${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/) # In order to be compatible with the original behavior, the header file name needs to be changed file(RENAME diff --git a/paddle/fluid/framework/feed_fetch_method.cc b/paddle/fluid/framework/feed_fetch_method.cc index f21ca0c858acc0..0294e1ca54b437 100644 --- a/paddle/fluid/framework/feed_fetch_method.cc +++ b/paddle/fluid/framework/feed_fetch_method.cc @@ -95,7 +95,7 @@ phi::DenseTensor& GetVariableTensor(const Scope& scope, PADDLE_ENFORCE_EQ(var->IsType(), true, platform::errors::InvalidArgument( - "Only support lod tensor in GetVariableTensor now.")); + "Only support DenseTensor in GetVariableTensor now.")); return *var->GetMutable(); } diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index 91dee8a9ae4ee9..fb41d5fd353d3b 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -155,11 +155,10 @@ phi::Backend ConvertBackend(paddle_infer::PlaceType backend) { return phi::Backend::CPU; } } -} // namespace -bool PaddleTensorToLoDTensor(const PaddleTensor &pt, - phi::DenseTensor *t, - const platform::Place &place) { +bool PaddleTensorToDenseTensor(const PaddleTensor &pt, + phi::DenseTensor *t, + const platform::Place &place) { framework::DDim ddim = phi::make_ddim(pt.shape); void *input_ptr; if (pt.dtype == PaddleDType::INT64) { @@ -270,6 +269,7 @@ bool PaddleTensorToLoDTensor(const PaddleTensor &pt, t->set_lod(lod); return true; } +} // namespace bool AnalysisPredictor::Init( const std::shared_ptr &parent_scope, @@ -923,6 +923,17 @@ void AnalysisPredictor::MkldnnPreSet(const std::vector &inputs) { #endif } +void AnalysisPredictor::MkldnnPreSet( + const std::vector &inputs) { +#ifdef PADDLE_WITH_MKLDNN + std::vector> inputs_shape; + for (size_t i = 0; i < inputs.size(); ++i) { + inputs_shape.emplace_back(phi::vectorize(inputs[i].dims())); + } + MkldnnPreSet(inputs_shape); +#endif +} + void AnalysisPredictor::MkldnnPreSet( const std::vector> &inputs_shape) { #ifdef PADDLE_WITH_MKLDNN @@ -1037,6 +1048,73 @@ bool AnalysisPredictor::Run(const std::vector &inputs, return true; } +bool AnalysisPredictor::Run(const std::vector &inputs, + std::vector *outputs) { + inference::DisplayMemoryInfo(place_, "before run"); + paddle::platform::SetNumThreads(config_.cpu_math_library_num_threads()); +#ifdef PADDLE_WITH_MKLDNN + if (config_.use_mkldnn_) MkldnnPreSet(inputs); +#endif + VLOG(3) << "predict start"; + inference::Timer timer; + timer.tic(); + // set feed variable + framework::Scope *scope = sub_scope_ ? sub_scope_ : scope_.get(); + PADDLE_ENFORCE_NOT_NULL( + scope, + platform::errors::PreconditionNotMet("The scope should not be nullptr.")); + if (!SetFeed(inputs, scope)) { + LOG(ERROR) << "fail to set feed"; + return false; + } + +#ifdef PADDLE_WITH_TENSORRT + if (config_.tensorrt_engine_enabled()) { + inference::tensorrt::TensorRTEngine::predictor_id_per_thread = + predictor_id_; + VLOG(3) << "thread_local var predictor_id in TensorRTEngine is set to: " + << inference::tensorrt::TensorRTEngine::predictor_id_per_thread; + } +#endif + + // Run the inference program + // if share variables, we need not create variables + executor_->Run(); + + inference::DisplayMemoryInfo(place_, "after run"); + + // get fetch variable + if (!GetFetch(outputs, scope)) { + LOG(ERROR) << "fail to get fetches"; + return false; + } + VLOG(3) << "predict cost: " << timer.toc() << "ms"; + + // All the containers in the scope will be hold in inference, but the + // operators assume that the container will be reset after each batch. + // Here is a bugfix, collect all the container variables, and reset then to a + // bool; the next time, the operator will call MutableData and construct a new + // container again, so that the container will be empty for each batch. + if (sub_scope_) { + tensor_array_batch_cleaner_.CollectNoTensorVars(sub_scope_); + } + tensor_array_batch_cleaner_.ResetNoTensorVars(); + + // recover the cpu_math_library_num_threads to 1, in order to avoid thread + // conflict when integrating it into deployment service. + paddle::platform::SetNumThreads(1); +#ifdef PADDLE_WITH_MKLDNN + if (config_.use_mkldnn_) MkldnnPostReset(); +#endif +#if defined(PADDLE_WITH_MKLML) + // Frees unused memory allocated by the IntelĀ® MKL Memory Allocator to + // avoid memory leak. See: + // https://software.intel.com/en-us/mkl-developer-reference-c-mkl-free-buffers + platform::dynload::MKL_Free_Buffers(); +#endif + return true; +} + bool AnalysisPredictor::SetFeed(const std::vector &inputs, framework::Scope *scope) { VLOG(3) << "Predictor::set_feed"; @@ -1051,7 +1129,7 @@ bool AnalysisPredictor::SetFeed(const std::vector &inputs, for (size_t i = 0; i < inputs.size(); ++i) { phi::DenseTensor *input = &feed_tensors_[i]; - if (!PaddleTensorToLoDTensor(inputs[i], input, place_)) { + if (!PaddleTensorToDenseTensor(inputs[i], input, place_)) { return false; } int idx = -1; @@ -1065,7 +1143,41 @@ bool AnalysisPredictor::SetFeed(const std::vector &inputs, } else { idx = PADDLE_GET_CONST(int, feeds_[i]->GetAttr("col")); } - framework::SetFeedVariable(scope, *input, "feed", idx); + framework::SetFeedVariable(scope, *input, framework::kFeedOpType, idx); + } + return true; +} + +bool AnalysisPredictor::SetFeed(const std::vector &inputs, + framework::Scope *scope) { + VLOG(3) << "Predictor::set_feed"; + PADDLE_ENFORCE_EQ(inputs.size(), + feeds_.size(), + platform::errors::InvalidArgument( + "wrong feed input size, need %d but get %d.", + feeds_.size(), + inputs.size())); + for (size_t i = 0; i < inputs.size(); ++i) { + PADDLE_ENFORCE_EQ(inputs[i].initialized(), + true, + paddle::platform::errors::InvalidArgument( + "The input Tensor expected to be initialized.")); + } + + if (std::all_of(inputs.cbegin(), inputs.cend(), [&](const paddle::Tensor &t) { + return !t.name().empty() && feed_names_.count(t.name()); + })) { + for (size_t i = 0; i < inputs.size(); ++i) { + auto &t = framework::GetVariableTensor(*scope, inputs[i].name()); + t.ShareDataWith( + *std::dynamic_pointer_cast(inputs[i].impl())); + } + } else { + for (size_t i = 0; i < inputs.size(); ++i) { + auto &t = framework::GetVariableTensor(*scope, idx2feeds_[i]); + t.ShareDataWith( + *std::dynamic_pointer_cast(inputs[i].impl())); + } } return true; } @@ -1104,7 +1216,7 @@ bool AnalysisPredictor::GetFetch(std::vector *outputs, idx, i)); framework::FetchType &fetch_var = - framework::GetFetchVariable(*scope, "fetch", idx); + framework::GetFetchVariable(*scope, framework::kFetchOpType, idx); auto &fetch = PADDLE_GET(phi::DenseTensor, fetch_var); auto type = framework::TransToProtoVarType(fetch.dtype()); auto output = &(outputs->at(i)); @@ -1129,6 +1241,20 @@ bool AnalysisPredictor::GetFetch(std::vector *outputs, return true; } +bool AnalysisPredictor::GetFetch(std::vector *outputs, + framework::Scope *scope) { + VLOG(3) << "Predictor::get_fetch"; + outputs->resize(fetches_.size()); + // TODO(liuyuanle): customize output Tensor's holder + for (size_t i = 0; i < fetches_.size(); ++i) { + auto const &name = idx2fetches_[i]; + auto &t = framework::GetVariableTensor(*scope, name); + (*outputs)[i] = + std::move(paddle::Tensor(std::make_shared(t), name)); + } + return true; +} + void AnalysisPredictor::PrepareArgument() { VLOG(3) << "AnalysisPredictor::PrepareArgument"; // Init std::unique_ptr argument_. @@ -1583,7 +1709,7 @@ void AnalysisPredictor::PrepareFeedFetch() { "The sub_scope should not be nullptr.")); CreateFeedFetchVar(sub_scope_); for (auto *op : inference_program_->Block(0).AllOps()) { - if (op->Type() == "feed") { + if (op->Type() == framework::kFeedOpType) { int idx = PADDLE_GET_CONST(int, op->GetAttr("col")); if (feeds_.size() <= static_cast(idx)) { feeds_.resize(idx + 1); @@ -1591,7 +1717,7 @@ void AnalysisPredictor::PrepareFeedFetch() { feeds_[idx] = op; feed_names_[op->Output("Out")[0]] = idx; idx2feeds_[idx] = op->Output("Out")[0]; - } else if (op->Type() == "fetch") { + } else if (op->Type() == framework::kFetchOpType) { int idx = PADDLE_GET_CONST(int, op->GetAttr("col")); if (fetches_.size() <= static_cast(idx)) { fetches_.resize(idx + 1); @@ -1606,9 +1732,9 @@ void AnalysisPredictor::CreateFeedFetchVar(framework::Scope *scope) { PADDLE_ENFORCE_NOT_NULL( scope, platform::errors::InvalidArgument("The scope should not be nullptr.")); - auto *var = scope->Var("feed"); + auto *var = scope->Var(framework::kFeedOpType); var->GetMutable(); - var = scope->Var("fetch"); + var = scope->Var(framework::kFetchOpType); var->GetMutable(); } @@ -2190,7 +2316,7 @@ void AnalysisPredictor::ClearIntermediateTensor() { const std::string name = var->Name(); auto *variable = executor_->GetScope()->FindVar(name); if (variable != nullptr && variable->IsType() && - name != "feed" && name != "fetch") { + name != framework::kFeedOpType && name != framework::kFetchOpType) { VLOG(3) << "Clear Intermediate Tensor: " << name; auto *t = variable->GetMutable(); t->clear(); @@ -2656,6 +2782,11 @@ std::map Predictor::GetOutputTypes() { bool Predictor::Run() { return predictor_->ZeroCopyRun(); } +bool Predictor::Run(const std::vector &inputs, + std::vector *outputs) { + return predictor_->Run(inputs, outputs); +} + std::unique_ptr Predictor::Clone(void *stream) { auto analysis_pred = predictor_->Clone(stream); std::unique_ptr pred(new Predictor(std::move(analysis_pred))); diff --git a/paddle/fluid/inference/api/analysis_predictor.h b/paddle/fluid/inference/api/analysis_predictor.h index 5a578a9b94fcb7..67ec3c308df839 100644 --- a/paddle/fluid/inference/api/analysis_predictor.h +++ b/paddle/fluid/inference/api/analysis_predictor.h @@ -31,15 +31,14 @@ #include "paddle/fluid/inference/api/paddle_inference_api.h" #include "paddle/fluid/inference/api/resource_manager.h" #include "paddle/fluid/platform/device/gpu/gpu_types.h" -#include "paddle/fluid/platform/float16.h" #include "paddle/fluid/string/printf.h" +#include "paddle/phi/core/dense_tensor.h" #ifdef PADDLE_WITH_TESTING #include #include #endif namespace paddle_infer { -using float16 = paddle::platform::float16; namespace experimental { class InternalUtils; }; @@ -150,6 +149,16 @@ class AnalysisPredictor : public PaddlePredictor { std::vector *output_data, int batch_size = -1) override; + /// + /// \brief Run the prediction engine. + /// + /// \param[in] inputs input tensors + /// \param[out] outputs output tensors + /// \return Whether the function executed successfully + /// + bool Run(const std::vector &inputs, + std::vector *outputs) override; + /// /// \brief Get the input names /// @@ -378,6 +387,17 @@ class AnalysisPredictor : public PaddlePredictor { /// bool SetFeed(const std::vector &input_datas, framework::Scope *scope); + + /// + /// \brief Prepare input data, only used in Run() + /// + /// \param[in] inputs inpute tensors + /// \param[in] scope the scope used by predictor + /// \return Whether the function executed successfully + /// + bool SetFeed(const std::vector &inputs, + framework::Scope *scope); + /// /// \brief Get the output data, only used in Run() /// @@ -387,6 +407,16 @@ class AnalysisPredictor : public PaddlePredictor { /// bool GetFetch(std::vector *output_data, framework::Scope *scope); + + /// + /// \brief Get the output data, only used in Run() + /// + /// \param[out] outputs output tensors + /// \param[in] scope the scope used by predictor + /// \return Whether the function executed successfully + /// + bool GetFetch(std::vector *outputs, framework::Scope *scope); + /// /// \brief Get the output data, only used in GetFetch() /// @@ -404,6 +434,14 @@ class AnalysisPredictor : public PaddlePredictor { /// \param[in] inputs tensors /// void MkldnnPreSet(const std::vector &inputs); + /// + /// \brief PreSet for Mkldnn multi-thread and dynamic shape input. + /// + /// Used in AnalysisPredictor::Run(). + /// + /// \param[in] inputs tensors + /// + void MkldnnPreSet(const std::vector &inputs); /// /// \brief PreSet for Mkldnn multi-thread and dynamic shape input. diff --git a/paddle/fluid/inference/api/demo_ci/CMakeLists.txt b/paddle/fluid/inference/api/demo_ci/CMakeLists.txt index fc23caee656380..11f214bc45d535 100644 --- a/paddle/fluid/inference/api/demo_ci/CMakeLists.txt +++ b/paddle/fluid/inference/api/demo_ci/CMakeLists.txt @@ -83,7 +83,7 @@ else() if(WITH_MKL) set(FLAG_OPENMP "-fopenmp") endif() - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 ${FLAG_OPENMP}") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++14 ${FLAG_OPENMP}") endif() if(WITH_GPU) diff --git a/paddle/fluid/inference/api/paddle_api.h b/paddle/fluid/inference/api/paddle_api.h index e83c1a9f9444c9..8d7f54ea771840 100644 --- a/paddle/fluid/inference/api/paddle_api.h +++ b/paddle/fluid/inference/api/paddle_api.h @@ -221,6 +221,16 @@ class PD_INFER_DECL PaddlePredictor { std::vector* output_data, int batch_size = -1) = 0; + /// \brief This interface takes input and runs the network. + /// \param[in] inputs An list of Tensor as the input to the network. + /// \param[out] output_data Pointer to the tensor list, which holds the output + /// Tensor + /// \return Whether the run is successful + virtual bool Run(const std::vector& inputs, + std::vector* outputs) { + return false; + } + /// \brief Used to get the name of the network input. /// Be inherited by AnalysisPredictor, Only used in ZeroCopy scenarios. /// \return Input tensor names. diff --git a/paddle/fluid/inference/api/paddle_inference_api.h b/paddle/fluid/inference/api/paddle_inference_api.h index d7f15e0529894f..210fa81c99f988 100644 --- a/paddle/fluid/inference/api/paddle_inference_api.h +++ b/paddle/fluid/inference/api/paddle_inference_api.h @@ -128,6 +128,17 @@ class PD_INFER_DECL Predictor { /// bool Run(); + /// + /// \brief Run the prediction engine + /// + /// \param[in] inputs An list of Tensor as the input to the network. + /// \param[out] outputs Pointer to the tensor list, which holds the output + /// Tensor + /// + /// \return Whether the run is successful + bool Run(const std::vector& inputs, + std::vector* outputs); + /// /// \brief Get the output names /// diff --git a/paddle/fluid/inference/api/paddle_tensor.h b/paddle/fluid/inference/api/paddle_tensor.h index 7c61d4098b44dd..5ed1ae47427f9f 100644 --- a/paddle/fluid/inference/api/paddle_tensor.h +++ b/paddle/fluid/inference/api/paddle_tensor.h @@ -21,6 +21,8 @@ #include "paddle_infer_declare.h" // NOLINT +#include "paddle/phi/api/include/tensor.h" // expose paddle::Tensor + #ifdef PADDLE_WITH_ONNXRUNTIME #include "onnxruntime_c_api.h" // NOLINT #include "onnxruntime_cxx_api.h" // NOLINT diff --git a/paddle/fluid/pybind/inference_api.cc b/paddle/fluid/pybind/inference_api.cc index afc5b73e232c78..14be816fbedeaf 100644 --- a/paddle/fluid/pybind/inference_api.cc +++ b/paddle/fluid/pybind/inference_api.cc @@ -65,7 +65,7 @@ constexpr int NPY_UINT16_ = 4; // paddle::platform::float16 as numpy.float16. // Ref: https://github.com/pybind/pybind11/issues/1776 template <> -struct npy_format_descriptor { +struct npy_format_descriptor { static py::dtype dtype() { handle ptr = npy_api::get().PyArray_DescrFromType_(NPY_FLOAT16_); return reinterpret_borrow(ptr); @@ -180,7 +180,7 @@ py::dtype PaddleDTypeToNumpyDType(PaddleDType dtype) { dt = py::dtype::of(); break; case PaddleDType::FLOAT16: - dt = py::dtype::of(); + dt = py::dtype::of(); break; case PaddleDType::UINT8: dt = py::dtype::of(); @@ -264,7 +264,7 @@ void PaddleInferShareExternalData(paddle_infer::Tensor &tensor, // NOLINT ToPaddleInferPlace(input_tensor.place().GetType())); } else if (input_tensor.dtype() == phi::DataType::FLOAT16) { tensor.ShareExternalData( - static_cast(input_tensor.data()), + static_cast(input_tensor.data()), shape, ToPaddleInferPlace(input_tensor.place().GetType())); } else if (input_tensor.dtype() == phi::DataType::INT32) { @@ -353,7 +353,7 @@ size_t PaddleGetDTypeSize(PaddleDType dt) { size = sizeof(float); break; case PaddleDType::FLOAT16: - size = sizeof(paddle_infer::float16); + size = sizeof(phi::dtype::float16); break; case PaddleDType::INT8: size = sizeof(int8_t); @@ -392,8 +392,8 @@ py::array ZeroCopyTensorToNumpy(ZeroCopyTensor &tensor) { // NOLINT tensor.copy_to_cpu(static_cast(array.mutable_data())); break; case PaddleDType::FLOAT16: - tensor.copy_to_cpu( - static_cast(array.mutable_data())); + tensor.copy_to_cpu( + static_cast(array.mutable_data())); break; case PaddleDType::UINT8: tensor.copy_to_cpu(static_cast(array.mutable_data())); @@ -432,8 +432,8 @@ py::array PaddleInferTensorToNumpy(paddle_infer::Tensor &tensor) { // NOLINT tensor.CopyToCpu(static_cast(array.mutable_data())); break; case PaddleDType::FLOAT16: - tensor.CopyToCpu( - static_cast(array.mutable_data())); + tensor.CopyToCpu( + static_cast(array.mutable_data())); break; case PaddleDType::UINT8: tensor.CopyToCpu(static_cast(array.mutable_data())); @@ -1070,6 +1070,17 @@ void BindPaddleInferPredictor(py::module *m) { #endif self.Run(); }) + .def( + "run", + [](paddle_infer::Predictor &self, py::handle py_in_tensor_list) { + auto in_tensor_list = + CastPyArg2VectorOfTensor(py_in_tensor_list.ptr(), 0); + std::vector outputs; + self.Run(in_tensor_list, &outputs); + return py::handle(ToPyObject(outputs)); + }, + py::arg("inputs"), + py::call_guard()) .def("clone", [](paddle_infer::Predictor &self) { return self.Clone(nullptr); }) #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) @@ -1099,7 +1110,7 @@ void BindZeroCopyTensor(py::module *m) { .def("copy_from_cpu", &ZeroCopyTensorCreate) .def("copy_from_cpu", &ZeroCopyTensorCreate) .def("copy_from_cpu", &ZeroCopyTensorCreate) - .def("copy_from_cpu", &ZeroCopyTensorCreate) + .def("copy_from_cpu", &ZeroCopyTensorCreate) .def("copy_from_cpu", &ZeroCopyTensorCreate) .def("copy_from_cpu", &ZeroCopyStringTensorCreate) .def("copy_to_cpu", &ZeroCopyTensorToNumpy) @@ -1123,8 +1134,7 @@ void BindPaddleInferTensor(py::module *m) { .def("_copy_from_cpu_bind", &PaddleInferTensorCreate) .def("_copy_from_cpu_bind", &PaddleInferTensorCreate) .def("_copy_from_cpu_bind", &PaddleInferTensorCreate) - .def("_copy_from_cpu_bind", - &PaddleInferTensorCreate) + .def("_copy_from_cpu_bind", &PaddleInferTensorCreate) .def("_copy_from_cpu_bind", &PaddleInferTensorCreate) .def("_copy_from_cpu_bind", &PaddleInferStringTensorCreate) .def("_share_external_data_bind", &PaddleInferShareExternalData) diff --git a/paddle/phi/api/include/tensor.h b/paddle/phi/api/include/tensor.h index d3943750fd21ef..24bcc63dbd278f 100644 --- a/paddle/phi/api/include/tensor.h +++ b/paddle/phi/api/include/tensor.h @@ -416,7 +416,7 @@ class PADDLE_API Tensor final { /** * @brief Return the name of Tensor. * @note Used to adapt original execution mechanism and debug analysis - * in the development of new dygraph. It may be removed in the future. + * in the development of new dygraph. * * @return const std::string& */ @@ -425,7 +425,7 @@ class PADDLE_API Tensor final { /** * @brief Set name of Tensor. * @note Used to adapt original execution mechanism and debug analysis - * in the development of new dygraph. It may be removed in the future. + * in the development of new dygraph. * * @param const std::string& name */ @@ -657,7 +657,7 @@ class PADDLE_API Tensor final { /** * Tensor name: used to adapt original execution mechanism and debug analysis - * in the development of new dygraph. It may be removed in the future. + * in the development of new dygraph. */ std::string name_{""}; diff --git a/paddle/phi/api/lib/api_custom_impl.cc b/paddle/phi/api/lib/api_custom_impl.cc index 3cebef1588ea5a..6a409b64196239 100644 --- a/paddle/phi/api/lib/api_custom_impl.cc +++ b/paddle/phi/api/lib/api_custom_impl.cc @@ -136,6 +136,7 @@ Tensor add_n_impl(const std::vector& x) { Tensor copy_to_impl(const Tensor& x, Place place, bool blocking) { Tensor out; copy(x, place, blocking, &out); + out.set_name(x.name()); return out; } From d6997ff555b78e1c61021d0d5773d2cd2bcfc0b6 Mon Sep 17 00:00:00 2001 From: yuanlehome Date: Thu, 30 Mar 2023 07:43:35 +0000 Subject: [PATCH 2/6] add unitest --- .../test_inference_double_datatype.py} | 0 .../inference/test_inference_predictor_run.py | 129 ++++++++++++++++++ 2 files changed, 129 insertions(+) rename python/paddle/fluid/tests/unittests/ir/{test_inference_datatype.py => inference/test_inference_double_datatype.py} (100%) create mode 100644 python/paddle/fluid/tests/unittests/ir/inference/test_inference_predictor_run.py diff --git a/python/paddle/fluid/tests/unittests/ir/test_inference_datatype.py b/python/paddle/fluid/tests/unittests/ir/inference/test_inference_double_datatype.py similarity index 100% rename from python/paddle/fluid/tests/unittests/ir/test_inference_datatype.py rename to python/paddle/fluid/tests/unittests/ir/inference/test_inference_double_datatype.py diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_inference_predictor_run.py b/python/paddle/fluid/tests/unittests/ir/inference/test_inference_predictor_run.py new file mode 100644 index 00000000000000..da7144720f5f2c --- /dev/null +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_inference_predictor_run.py @@ -0,0 +1,129 @@ +# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import tempfile +import unittest + +import numpy as np + +import paddle +from paddle.inference import Config, create_predictor + + +class TestNet(paddle.nn.Layer): + def __init__(self): + super().__init__() + self.fc1 = paddle.nn.Linear(4, 4) + self.fc2 = paddle.nn.Linear(4, 4) + + def forward(self, x1, x2): + y1 = self.fc1(x1) + y2 = self.fc2(x2) + return y1 + y2 + + +@unittest.skipIf( + not paddle.is_compiled_with_cuda(), 'should compile with cuda.' +) +class TestPredictorRunWithTensor(unittest.TestCase): + def setUp(self): + self.temp_dir = tempfile.TemporaryDirectory() + net = TestNet() + model = paddle.jit.to_static( + net, + input_spec=[ + paddle.static.InputSpec( + shape=[None, 4], dtype='float32', name='input0' + ), + paddle.static.InputSpec( + shape=[None, 4], dtype='float32', name='input1' + ), + ], + ) + paddle.jit.save( + model, + os.path.join( + self.temp_dir.name, 'test_predictor_run_model/inference' + ), + ) + + def tearDown(self): + self.temp_dir.cleanup() + + def init_predictor(self): + config = Config( + os.path.join( + self.temp_dir.name, + 'test_predictor_run_model/inference.pdmodel', + ), + os.path.join( + self.temp_dir.name, + 'test_predictor_run_model/inference.pdiparams', + ), + ) + config.enable_use_gpu(256, 0) + config.enable_memory_optim() + predictor = create_predictor(config) + return predictor + + def get_inputs(self): + input0 = np.array([[1, 2, 3, 4], [2, 3, 4, 5]]).astype(np.float32) + input1 = np.array([[0.1, 0.2, 0.3, 0.4], [1.2, 1.3, 1.4, 1.5]]).astype( + np.float32 + ) + + input0_tensor = paddle.to_tensor(input0) + input1_tensor = paddle.to_tensor(input1) + + return [input0_tensor, input1_tensor] + + def get_disorder_output(self): + predictor = self.init_predictor() + + [input0_tensor, input1_tensor] = self.get_inputs() + + input_names = predictor.get_input_names() + input0_tensor.name = input_names[0] + input1_tensor.name = input_names[1] + + # disorder + inputs = [input1_tensor, input0_tensor] + outputs = predictor.run(inputs) + + return outputs[0] + + def get_inorder_output(self): + predictor = self.init_predictor() + + [input0_tensor, input1_tensor] = self.get_inputs() + + # disorder + inputs = [input0_tensor, input1_tensor] + outputs = predictor.run(inputs) + + return outputs[0] + + def test_output(self): + inorder_output = self.get_inorder_output() + disorder_output = self.get_disorder_output() + print(inorder_output) + print(disorder_output) + assert np.allclose( + inorder_output.numpy().flatten(), disorder_output.numpy().flatten() + ) + + +if __name__ == '__main__': + unittest.main() From 4f8b943316d9a7589579244f99300aae75db62d8 Mon Sep 17 00:00:00 2001 From: yuanlehome Date: Thu, 30 Mar 2023 12:27:10 +0000 Subject: [PATCH 3/6] fix jit layer --- paddle/fluid/jit/engine/predictor_engine.cc | 135 +----------------- .../inference/test_inference_predictor_run.py | 2 +- 2 files changed, 6 insertions(+), 131 deletions(-) diff --git a/paddle/fluid/jit/engine/predictor_engine.cc b/paddle/fluid/jit/engine/predictor_engine.cc index d18f4f487dbe2e..847018e07e51cc 100644 --- a/paddle/fluid/jit/engine/predictor_engine.cc +++ b/paddle/fluid/jit/engine/predictor_engine.cc @@ -22,11 +22,6 @@ namespace paddle { namespace jit { -static PaddleTensor DenseTensorToPaddleTensor(DenseTensor *t); -static bool PaddleTensorToDenseTensor(const PaddleTensor &pt, - DenseTensor *t, - const platform::Place &place); - PredictorEngine::PredictorEngine( const std::shared_ptr &info, const std::shared_ptr ¶ms_dict, @@ -78,135 +73,15 @@ std::unique_ptr PredictorEngine::Clone(void *stream) { std::vector PredictorEngine::operator()( const std::vector &inputs) { - auto dense_tensors = utils::ToDenseTensors(inputs); - return utils::ToTensors(this->operator()(dense_tensors)); -} - -std::vector PredictorEngine::operator()( - const std::vector &inputs) { - std::vector pt_inputs; - std::vector pt_outputs; - for (auto &t : inputs) { - auto non_const_t = const_cast(&t); - pt_inputs.emplace_back(DenseTensorToPaddleTensor(non_const_t)); - } - - predictor_->Run(pt_inputs, &pt_outputs); - - std::vector outputs; - for (auto &pt : pt_outputs) { - DenseTensor t; - PaddleTensorToDenseTensor(pt, &t, place_); - outputs.emplace_back(t); - } + std::vector outputs; + predictor_->Run(inputs, &outputs); return outputs; } -static PaddleTensor DenseTensorToPaddleTensor(DenseTensor *t) { - PaddleTensor pt; - switch (framework::TransToProtoVarType(t->dtype())) { - case framework::proto::VarType::INT32: { - pt.data.Reset(t->data(), t->numel() * sizeof(int32_t)); - pt.dtype = PaddleDType::INT32; - } break; - case framework::proto::VarType::INT64: { - pt.data.Reset(t->data(), t->numel() * sizeof(int64_t)); - pt.dtype = PaddleDType::INT64; - } break; - case framework::proto::VarType::FP32: { - pt.data.Reset(t->data(), t->numel() * sizeof(float)); - pt.dtype = PaddleDType::FLOAT32; - } break; - default: - PADDLE_THROW( - platform::errors::Unimplemented("Unsupported tensor date type. Now " - "only supports INT64, FP32, INT32.")); - } - pt.shape = phi::vectorize(t->dims()); - return pt; -} - -static bool PaddleTensorToDenseTensor(const PaddleTensor &pt, - DenseTensor *t, - const platform::Place &place) { - framework::DDim ddim = phi::make_ddim(pt.shape); - void *input_ptr; - switch (pt.dtype) { - case PaddleDType::INT64: - input_ptr = t->mutable_data(ddim, place); - break; - case PaddleDType::FLOAT32: - input_ptr = t->mutable_data(ddim, place); - break; - case PaddleDType::INT32: - input_ptr = t->mutable_data(ddim, place); - break; - case PaddleDType::FLOAT16: - input_ptr = t->mutable_data(ddim, place); - break; - default: - LOG(ERROR) << "unsupported feed type " << pt.dtype; - return false; - } - - PADDLE_ENFORCE_NOT_NULL( - input_ptr, - paddle::platform::errors::Fatal( - "Cannot convert to LoDTensor because LoDTensor creation failed.")); - PADDLE_ENFORCE_NOT_NULL( - pt.data.data(), - paddle::platform::errors::InvalidArgument( - "The data contained in the input PaddleTensor is illegal.")); - - if (platform::is_cpu_place(place)) { - // TODO(panyx0718): Init LoDTensor from existing memcpy to save a copy. - std::memcpy( - static_cast(input_ptr), pt.data.data(), pt.data.length()); - } else if (platform::is_ipu_place(place)) { -#ifdef PADDLE_WITH_IPU - std::memcpy( - static_cast(input_ptr), pt.data.data(), pt.data.length()); -#else - PADDLE_THROW(paddle::platform::errors::Fatal( - "Not compile with WITH_IPU, should not reach here.")); -#endif - } else if (platform::is_gpu_place(place)) { - PADDLE_ENFORCE_EQ(platform::is_xpu_place(place), - false, - platform::errors::InvalidArgument( - "Only one choice can be made between CPU and XPU.")); -#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) - platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); - auto *dev_ctx = static_cast(pool.Get(place)); - auto dst_gpu_place = place; - memory::Copy(dst_gpu_place, - static_cast(input_ptr), - platform::CPUPlace(), - pt.data.data(), - pt.data.length(), - dev_ctx->stream()); -#else - PADDLE_THROW(paddle::platform::errors::Fatal( - "Not compile with CUDA, should not reach here.")); -#endif - } else if (platform::is_xpu_place(place)) { -#ifdef PADDLE_WITH_XPU - auto dst_xpu_place = place; - memory::Copy(dst_xpu_place, - static_cast(input_ptr), - platform::CPUPlace(), - pt.data.data(), - pt.data.length()); -#else - PADDLE_THROW(paddle::platform::errors::Fatal( - "Not compile with XPU, should not reach here.")); -#endif - } else { - PADDLE_THROW(paddle::platform::errors::InvalidArgument( - "The analysis predictor supports CPU, GPU and XPU now.")); - } - return true; +std::vector PredictorEngine::operator()( + const std::vector &inputs) { + return utils::ToDenseTensors(this->operator()(utils::ToTensors(inputs))); } } // namespace jit diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_inference_predictor_run.py b/python/paddle/fluid/tests/unittests/ir/inference/test_inference_predictor_run.py index da7144720f5f2c..3dfcdaea30e097 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_inference_predictor_run.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_inference_predictor_run.py @@ -109,7 +109,7 @@ def get_inorder_output(self): [input0_tensor, input1_tensor] = self.get_inputs() - # disorder + # inorder inputs = [input0_tensor, input1_tensor] outputs = predictor.run(inputs) From d2460c6928e0fd3aeabf028d92c2dc6d6714a3bc Mon Sep 17 00:00:00 2001 From: yuanlehome Date: Fri, 31 Mar 2023 09:02:46 +0000 Subject: [PATCH 4/6] fix --- paddle/fluid/jit/engine/predictor_engine.cc | 1 + .../unittests/ir/inference/test_inference_predictor_run.py | 3 +-- ...inference_double_datatype.py => test_inference_datatype.py} | 0 3 files changed, 2 insertions(+), 2 deletions(-) rename python/paddle/fluid/tests/unittests/ir/{inference/test_inference_double_datatype.py => test_inference_datatype.py} (100%) diff --git a/paddle/fluid/jit/engine/predictor_engine.cc b/paddle/fluid/jit/engine/predictor_engine.cc index 847018e07e51cc..54e35bc0f69dd6 100644 --- a/paddle/fluid/jit/engine/predictor_engine.cc +++ b/paddle/fluid/jit/engine/predictor_engine.cc @@ -47,6 +47,7 @@ PredictorEngine::PredictorEngine( config.SetSkipLoadParams(true); config.SetApplyOptim(true); config.SwitchIrOptim(true); + config.SwitchUseFeedFetchOps(false); predictor_.reset(new AnalysisPredictor(config)); diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_inference_predictor_run.py b/python/paddle/fluid/tests/unittests/ir/inference/test_inference_predictor_run.py index 3dfcdaea30e097..99ba29956c5dad 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_inference_predictor_run.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_inference_predictor_run.py @@ -118,8 +118,7 @@ def get_inorder_output(self): def test_output(self): inorder_output = self.get_inorder_output() disorder_output = self.get_disorder_output() - print(inorder_output) - print(disorder_output) + assert np.allclose( inorder_output.numpy().flatten(), disorder_output.numpy().flatten() ) diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_inference_double_datatype.py b/python/paddle/fluid/tests/unittests/ir/test_inference_datatype.py similarity index 100% rename from python/paddle/fluid/tests/unittests/ir/inference/test_inference_double_datatype.py rename to python/paddle/fluid/tests/unittests/ir/test_inference_datatype.py From 8175bfb3b6bc133540642b6344dabb4501d46bbe Mon Sep 17 00:00:00 2001 From: yuanlehome Date: Mon, 10 Apr 2023 11:29:16 +0000 Subject: [PATCH 5/6] fix win bug --- paddle/fluid/inference/api/analysis_predictor.cc | 4 ---- paddle/fluid/pybind/eager_utils.cc | 4 ++++ 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index 42f861c92c0a0b..2e6a59294cb604 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -1052,8 +1052,6 @@ bool AnalysisPredictor::Run(const std::vector &inputs, if (config_.use_mkldnn_) MkldnnPreSet(inputs); #endif VLOG(3) << "predict start"; - inference::Timer timer; - timer.tic(); // set feed variable framework::Scope *scope = sub_scope_ ? sub_scope_ : scope_.get(); PADDLE_ENFORCE_NOT_NULL( @@ -1084,7 +1082,6 @@ bool AnalysisPredictor::Run(const std::vector &inputs, LOG(ERROR) << "fail to get fetches"; return false; } - VLOG(3) << "predict cost: " << timer.toc() << "ms"; // All the containers in the scope will be hold in inference, but the // operators assume that the container will be reset after each batch. @@ -1241,7 +1238,6 @@ bool AnalysisPredictor::GetFetch(std::vector *outputs, framework::Scope *scope) { VLOG(3) << "Predictor::get_fetch"; outputs->resize(fetches_.size()); - // TODO(liuyuanle): customize output Tensor's holder for (size_t i = 0; i < fetches_.size(); ++i) { auto const &name = idx2fetches_[i]; auto &t = framework::GetVariableTensor(*scope, name); diff --git a/paddle/fluid/pybind/eager_utils.cc b/paddle/fluid/pybind/eager_utils.cc index 1a177f59adba22..887125fda19f98 100644 --- a/paddle/fluid/pybind/eager_utils.cc +++ b/paddle/fluid/pybind/eager_utils.cc @@ -736,7 +736,11 @@ PyObject* ToPyObject(const std::vector>& value) { PyObject* ToPyObject(const std::vector& value, bool return_py_none_if_not_initialize) { + // NOTE(liuyuanle): I encountered a bug(access violation) in windows. ref to + // https://stackoverflow.com/questions/55598839/how-to-fix-access-violation-error-when-returning-pyobject-from-c-function-usin + PyGILState_STATE gstate = PyGILState_Ensure(); PyObject* result = PyList_New((Py_ssize_t)value.size()); + PyGILState_Release(gstate); for (size_t i = 0; i < value.size(); i++) { if (!value[i].initialized() && return_py_none_if_not_initialize) { From 2d546c6ed365c86f3e4ab7b4180e6b7c45bfe960 Mon Sep 17 00:00:00 2001 From: yuanlehome Date: Mon, 10 Apr 2023 11:38:05 +0000 Subject: [PATCH 6/6] add (Recommended) --- paddle/fluid/inference/api/analysis_predictor.h | 2 +- paddle/fluid/inference/api/paddle_api.h | 2 +- paddle/fluid/inference/api/paddle_inference_api.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/paddle/fluid/inference/api/analysis_predictor.h b/paddle/fluid/inference/api/analysis_predictor.h index 67ec3c308df839..83207a8bfd654c 100644 --- a/paddle/fluid/inference/api/analysis_predictor.h +++ b/paddle/fluid/inference/api/analysis_predictor.h @@ -150,7 +150,7 @@ class AnalysisPredictor : public PaddlePredictor { int batch_size = -1) override; /// - /// \brief Run the prediction engine. + /// \brief Run the prediction engine (Recommended). /// /// \param[in] inputs input tensors /// \param[out] outputs output tensors diff --git a/paddle/fluid/inference/api/paddle_api.h b/paddle/fluid/inference/api/paddle_api.h index 8d7f54ea771840..3a51f91b3afc22 100644 --- a/paddle/fluid/inference/api/paddle_api.h +++ b/paddle/fluid/inference/api/paddle_api.h @@ -221,7 +221,7 @@ class PD_INFER_DECL PaddlePredictor { std::vector* output_data, int batch_size = -1) = 0; - /// \brief This interface takes input and runs the network. + /// \brief This interface takes input and runs the network (Recommended). /// \param[in] inputs An list of Tensor as the input to the network. /// \param[out] output_data Pointer to the tensor list, which holds the output /// Tensor diff --git a/paddle/fluid/inference/api/paddle_inference_api.h b/paddle/fluid/inference/api/paddle_inference_api.h index 210fa81c99f988..54a9d9af117caa 100644 --- a/paddle/fluid/inference/api/paddle_inference_api.h +++ b/paddle/fluid/inference/api/paddle_inference_api.h @@ -129,7 +129,7 @@ class PD_INFER_DECL Predictor { bool Run(); /// - /// \brief Run the prediction engine + /// \brief Run the prediction engine (Recommended) /// /// \param[in] inputs An list of Tensor as the input to the network. /// \param[out] outputs Pointer to the tensor list, which holds the output