Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Paddle Inference] Predictor support paddle::Tensor #50445

Merged
merged 9 commits into from
Apr 11, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 16 additions & 18 deletions cmake/phi_header.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -17,24 +17,21 @@ set(PADDLE_INFERENCE_INSTALL_DIR

function(phi_header_path_compat TARGET_PATH)
message(STATUS "phi header path compat processing: ${TARGET_PATH}")
string(FIND ${TARGET_PATH} "experimental" pos)
if(pos GREATER 1)
file(GLOB HEADERS "${TARGET_PATH}/*" "*.h")
foreach(header ${HEADERS})
if(${header} MATCHES ".*.h$")
file(READ ${header} HEADER_CONTENT)
string(REPLACE "paddle/phi/" "paddle/include/experimental/phi/"
HEADER_CONTENT "${HEADER_CONTENT}")
string(REPLACE "paddle/fluid/platform/"
"paddle/include/experimental/phi/" HEADER_CONTENT
"${HEADER_CONTENT}")
string(REPLACE "paddle/utils/" "paddle/include/experimental/utils/"
HEADER_CONTENT "${HEADER_CONTENT}")
file(WRITE ${header} "${HEADER_CONTENT}")
message(STATUS "phi header path compat processing complete: ${header}")
endif()
endforeach()
endif()
file(GLOB HEADERS "${TARGET_PATH}/*" "*.h")
foreach(header ${HEADERS})
if(${header} MATCHES ".*.h$")
file(READ ${header} HEADER_CONTENT)
string(REPLACE "paddle/phi/" "paddle/include/experimental/phi/"
HEADER_CONTENT "${HEADER_CONTENT}")
string(REPLACE "paddle/fluid/platform/"
"paddle/include/experimental/phi/" HEADER_CONTENT
"${HEADER_CONTENT}")
string(REPLACE "paddle/utils/" "paddle/include/experimental/utils/"
HEADER_CONTENT "${HEADER_CONTENT}")
file(WRITE ${header} "${HEADER_CONTENT}")
message(STATUS "phi header path compat processing complete: ${header}")
endif()
endforeach()
endfunction()

phi_header_path_compat(
Expand All @@ -51,6 +48,7 @@ phi_header_path_compat(
${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/phi/common)
phi_header_path_compat(
${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/phi/core)
phi_header_path_compat(${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/)

# In order to be compatible with the original behavior, the header file name needs to be changed
file(RENAME
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/framework/feed_fetch_method.cc
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ phi::DenseTensor& GetVariableTensor(const Scope& scope,
PADDLE_ENFORCE_EQ(var->IsType<phi::DenseTensor>(),
true,
platform::errors::InvalidArgument(
"Only support lod tensor in GetVariableTensor now."));
"Only support DenseTensor in GetVariableTensor now."));
return *var->GetMutable<phi::DenseTensor>();
}

Expand Down
151 changes: 139 additions & 12 deletions paddle/fluid/inference/api/analysis_predictor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -155,11 +155,10 @@ phi::Backend ConvertBackend(paddle_infer::PlaceType backend) {
return phi::Backend::CPU;
}
}
} // namespace

bool PaddleTensorToLoDTensor(const PaddleTensor &pt,
phi::DenseTensor *t,
const platform::Place &place) {
bool PaddleTensorToDenseTensor(const PaddleTensor &pt,
phi::DenseTensor *t,
const platform::Place &place) {
framework::DDim ddim = phi::make_ddim(pt.shape);
void *input_ptr;
if (pt.dtype == PaddleDType::INT64) {
Expand Down Expand Up @@ -270,6 +269,7 @@ bool PaddleTensorToLoDTensor(const PaddleTensor &pt,
t->set_lod(lod);
return true;
}
} // namespace

bool AnalysisPredictor::Init(
const std::shared_ptr<framework::Scope> &parent_scope,
Expand Down Expand Up @@ -919,6 +919,17 @@ void AnalysisPredictor::MkldnnPreSet(const std::vector<PaddleTensor> &inputs) {
#endif
}

void AnalysisPredictor::MkldnnPreSet(
const std::vector<paddle::Tensor> &inputs) {
#ifdef PADDLE_WITH_MKLDNN
std::vector<std::vector<int>> inputs_shape;
for (size_t i = 0; i < inputs.size(); ++i) {
inputs_shape.emplace_back(phi::vectorize<int>(inputs[i].dims()));
}
MkldnnPreSet(inputs_shape);
#endif
}

void AnalysisPredictor::MkldnnPreSet(
const std::vector<std::vector<int>> &inputs_shape) {
#ifdef PADDLE_WITH_MKLDNN
Expand Down Expand Up @@ -1033,6 +1044,70 @@ bool AnalysisPredictor::Run(const std::vector<PaddleTensor> &inputs,
return true;
}

bool AnalysisPredictor::Run(const std::vector<paddle::Tensor> &inputs,
std::vector<paddle::Tensor> *outputs) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

和上面Run接口,重复的实现内容能否被复用?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

和上面Run接口,重复的实现内容能否被复用?

想过抽出公共代码出来,但感觉抽出来的代码没有清晰的作用

inference::DisplayMemoryInfo(place_, "before run");
paddle::platform::SetNumThreads(config_.cpu_math_library_num_threads());
#ifdef PADDLE_WITH_MKLDNN
if (config_.use_mkldnn_) MkldnnPreSet(inputs);
#endif
VLOG(3) << "predict start";
// set feed variable
framework::Scope *scope = sub_scope_ ? sub_scope_ : scope_.get();
PADDLE_ENFORCE_NOT_NULL(
scope,
platform::errors::PreconditionNotMet("The scope should not be nullptr."));
if (!SetFeed(inputs, scope)) {
LOG(ERROR) << "fail to set feed";
return false;
}

#ifdef PADDLE_WITH_TENSORRT
if (config_.tensorrt_engine_enabled()) {
inference::tensorrt::TensorRTEngine::predictor_id_per_thread =
predictor_id_;
VLOG(3) << "thread_local var predictor_id in TensorRTEngine is set to: "
<< inference::tensorrt::TensorRTEngine::predictor_id_per_thread;
}
#endif

// Run the inference program
// if share variables, we need not create variables
executor_->Run();

inference::DisplayMemoryInfo(place_, "after run");

// get fetch variable
if (!GetFetch(outputs, scope)) {
LOG(ERROR) << "fail to get fetches";
return false;
}

// All the containers in the scope will be hold in inference, but the
// operators assume that the container will be reset after each batch.
// Here is a bugfix, collect all the container variables, and reset then to a
// bool; the next time, the operator will call MutableData and construct a new
// container again, so that the container will be empty for each batch.
if (sub_scope_) {
tensor_array_batch_cleaner_.CollectNoTensorVars(sub_scope_);
}
tensor_array_batch_cleaner_.ResetNoTensorVars();

// recover the cpu_math_library_num_threads to 1, in order to avoid thread
// conflict when integrating it into deployment service.
paddle::platform::SetNumThreads(1);
#ifdef PADDLE_WITH_MKLDNN
if (config_.use_mkldnn_) MkldnnPostReset();
#endif
#if defined(PADDLE_WITH_MKLML)
// Frees unused memory allocated by the Intel® MKL Memory Allocator to
// avoid memory leak. See:
// https://software.intel.com/en-us/mkl-developer-reference-c-mkl-free-buffers
platform::dynload::MKL_Free_Buffers();
#endif
return true;
}

bool AnalysisPredictor::SetFeed(const std::vector<PaddleTensor> &inputs,
framework::Scope *scope) {
VLOG(3) << "Predictor::set_feed";
Expand All @@ -1047,7 +1122,7 @@ bool AnalysisPredictor::SetFeed(const std::vector<PaddleTensor> &inputs,

for (size_t i = 0; i < inputs.size(); ++i) {
phi::DenseTensor *input = &feed_tensors_[i];
if (!PaddleTensorToLoDTensor(inputs[i], input, place_)) {
if (!PaddleTensorToDenseTensor(inputs[i], input, place_)) {
return false;
}
int idx = -1;
Expand All @@ -1061,7 +1136,41 @@ bool AnalysisPredictor::SetFeed(const std::vector<PaddleTensor> &inputs,
} else {
idx = PADDLE_GET_CONST(int, feeds_[i]->GetAttr("col"));
}
framework::SetFeedVariable(scope, *input, "feed", idx);
framework::SetFeedVariable(scope, *input, framework::kFeedOpType, idx);
}
return true;
}

bool AnalysisPredictor::SetFeed(const std::vector<paddle::Tensor> &inputs,
framework::Scope *scope) {
VLOG(3) << "Predictor::set_feed";
PADDLE_ENFORCE_EQ(inputs.size(),
feeds_.size(),
platform::errors::InvalidArgument(
"wrong feed input size, need %d but get %d.",
feeds_.size(),
inputs.size()));
for (size_t i = 0; i < inputs.size(); ++i) {
PADDLE_ENFORCE_EQ(inputs[i].initialized(),
true,
paddle::platform::errors::InvalidArgument(
"The input Tensor expected to be initialized."));
}

if (std::all_of(inputs.cbegin(), inputs.cend(), [&](const paddle::Tensor &t) {
return !t.name().empty() && feed_names_.count(t.name());
})) {
for (size_t i = 0; i < inputs.size(); ++i) {
auto &t = framework::GetVariableTensor(*scope, inputs[i].name());
t.ShareDataWith(
*std::dynamic_pointer_cast<phi::DenseTensor>(inputs[i].impl()));
}
} else {
for (size_t i = 0; i < inputs.size(); ++i) {
auto &t = framework::GetVariableTensor(*scope, idx2feeds_[i]);
t.ShareDataWith(
*std::dynamic_pointer_cast<phi::DenseTensor>(inputs[i].impl()));
}
}
return true;
}
Expand Down Expand Up @@ -1100,7 +1209,7 @@ bool AnalysisPredictor::GetFetch(std::vector<PaddleTensor> *outputs,
idx,
i));
framework::FetchType &fetch_var =
framework::GetFetchVariable(*scope, "fetch", idx);
framework::GetFetchVariable(*scope, framework::kFetchOpType, idx);
auto &fetch = PADDLE_GET(phi::DenseTensor, fetch_var);
auto type = framework::TransToProtoVarType(fetch.dtype());
auto output = &(outputs->at(i));
Expand All @@ -1125,6 +1234,19 @@ bool AnalysisPredictor::GetFetch(std::vector<PaddleTensor> *outputs,
return true;
}

bool AnalysisPredictor::GetFetch(std::vector<paddle::Tensor> *outputs,
framework::Scope *scope) {
VLOG(3) << "Predictor::get_fetch";
outputs->resize(fetches_.size());
for (size_t i = 0; i < fetches_.size(); ++i) {
auto const &name = idx2fetches_[i];
auto &t = framework::GetVariableTensor(*scope, name);
(*outputs)[i] =
std::move(paddle::Tensor(std::make_shared<phi::DenseTensor>(t), name));
}
return true;
}

void AnalysisPredictor::PrepareArgument() {
VLOG(3) << "AnalysisPredictor::PrepareArgument";
// Init std::unique_ptr argument_.
Expand Down Expand Up @@ -1579,15 +1701,15 @@ void AnalysisPredictor::PrepareFeedFetch() {
"The sub_scope should not be nullptr."));
CreateFeedFetchVar(sub_scope_);
for (auto *op : inference_program_->Block(0).AllOps()) {
if (op->Type() == "feed") {
if (op->Type() == framework::kFeedOpType) {
int idx = PADDLE_GET_CONST(int, op->GetAttr("col"));
if (feeds_.size() <= static_cast<size_t>(idx)) {
feeds_.resize(idx + 1);
}
feeds_[idx] = op;
feed_names_[op->Output("Out")[0]] = idx;
idx2feeds_[idx] = op->Output("Out")[0];
} else if (op->Type() == "fetch") {
} else if (op->Type() == framework::kFetchOpType) {
int idx = PADDLE_GET_CONST(int, op->GetAttr("col"));
if (fetches_.size() <= static_cast<size_t>(idx)) {
fetches_.resize(idx + 1);
Expand All @@ -1602,9 +1724,9 @@ void AnalysisPredictor::CreateFeedFetchVar(framework::Scope *scope) {
PADDLE_ENFORCE_NOT_NULL(
scope,
platform::errors::InvalidArgument("The scope should not be nullptr."));
auto *var = scope->Var("feed");
auto *var = scope->Var(framework::kFeedOpType);
var->GetMutable<framework::FeedList>();
var = scope->Var("fetch");
var = scope->Var(framework::kFetchOpType);
var->GetMutable<framework::FetchList>();
}

Expand Down Expand Up @@ -2186,7 +2308,7 @@ void AnalysisPredictor::ClearIntermediateTensor() {
const std::string name = var->Name();
auto *variable = executor_->GetScope()->FindVar(name);
if (variable != nullptr && variable->IsType<phi::DenseTensor>() &&
name != "feed" && name != "fetch") {
name != framework::kFeedOpType && name != framework::kFetchOpType) {
VLOG(3) << "Clear Intermediate Tensor: " << name;
auto *t = variable->GetMutable<phi::DenseTensor>();
t->clear();
Expand Down Expand Up @@ -2652,6 +2774,11 @@ std::map<std::string, DataType> Predictor::GetOutputTypes() {

bool Predictor::Run() { return predictor_->ZeroCopyRun(); }

bool Predictor::Run(const std::vector<paddle::Tensor> &inputs,
std::vector<paddle::Tensor> *outputs) {
return predictor_->Run(inputs, outputs);
}

std::unique_ptr<Predictor> Predictor::Clone(void *stream) {
auto analysis_pred = predictor_->Clone(stream);
std::unique_ptr<Predictor> pred(new Predictor(std::move(analysis_pred)));
Expand Down
42 changes: 40 additions & 2 deletions paddle/fluid/inference/api/analysis_predictor.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,15 +31,14 @@
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/inference/api/resource_manager.h"
#include "paddle/fluid/platform/device/gpu/gpu_types.h"
#include "paddle/fluid/platform/float16.h"
#include "paddle/fluid/string/printf.h"
#include "paddle/phi/core/dense_tensor.h"
#ifdef PADDLE_WITH_TESTING
#include <gtest/gtest.h>
#include <gtest/gtest_prod.h>
#endif

namespace paddle_infer {
using float16 = paddle::platform::float16;
namespace experimental {
class InternalUtils;
};
Expand Down Expand Up @@ -150,6 +149,16 @@ class AnalysisPredictor : public PaddlePredictor {
std::vector<PaddleTensor> *output_data,
int batch_size = -1) override;

///
/// \brief Run the prediction engine (Recommended).
///
/// \param[in] inputs input tensors
/// \param[out] outputs output tensors
/// \return Whether the function executed successfully
///
bool Run(const std::vector<paddle::Tensor> &inputs,
std::vector<paddle::Tensor> *outputs) override;

///
/// \brief Get the input names
///
Expand Down Expand Up @@ -378,6 +387,17 @@ class AnalysisPredictor : public PaddlePredictor {
///
bool SetFeed(const std::vector<PaddleTensor> &input_datas,
framework::Scope *scope);

///
/// \brief Prepare input data, only used in Run()
///
/// \param[in] inputs inpute tensors
/// \param[in] scope the scope used by predictor
/// \return Whether the function executed successfully
///
bool SetFeed(const std::vector<paddle::Tensor> &inputs,
framework::Scope *scope);

///
/// \brief Get the output data, only used in Run()
///
Expand All @@ -387,6 +407,16 @@ class AnalysisPredictor : public PaddlePredictor {
///
bool GetFetch(std::vector<PaddleTensor> *output_data,
framework::Scope *scope);

///
/// \brief Get the output data, only used in Run()
///
/// \param[out] outputs output tensors
/// \param[in] scope the scope used by predictor
/// \return Whether the function executed successfully
///
bool GetFetch(std::vector<paddle::Tensor> *outputs, framework::Scope *scope);

///
/// \brief Get the output data, only used in GetFetch()
///
Expand All @@ -404,6 +434,14 @@ class AnalysisPredictor : public PaddlePredictor {
/// \param[in] inputs tensors
///
void MkldnnPreSet(const std::vector<PaddleTensor> &inputs);
///
/// \brief PreSet for Mkldnn multi-thread and dynamic shape input.
///
/// Used in AnalysisPredictor::Run().
///
/// \param[in] inputs tensors
///
void MkldnnPreSet(const std::vector<paddle::Tensor> &inputs);

///
/// \brief PreSet for Mkldnn multi-thread and dynamic shape input.
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/inference/api/demo_ci/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ else()
if(WITH_MKL)
set(FLAG_OPENMP "-fopenmp")
endif()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 ${FLAG_OPENMP}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++14 ${FLAG_OPENMP}")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

这里升级对其他是否有影响?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

这里升级对其他是否有影响?

会影响到paddle-inference-demo中的cmakelists文件设置,已经提pr改过了。

endif()

if(WITH_GPU)
Expand Down
Loading