Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[cherry-pick] print output tensor hook support while op #54433

Merged
merged 1 commit into from
Jun 8, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 7 additions & 3 deletions paddle/fluid/framework/naive_executor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,10 @@ void NaiveExecutor::Run() {
platform::NvtxRangeColor::Green);
#endif

if (op->Type() == "while") {
op->SetOutputHooks(hookfuncs_);
}

op->Run(*scope_, place_);

// Update the shared_holder so that only records the max one.
Expand Down Expand Up @@ -97,8 +101,8 @@ void NaiveExecutor::Run() {
#ifdef PADDLE_WITH_INFERENCE_NVTX
platform::CudaNvtxRangePop();
#endif
for (auto &func : hookfunc_) {
func(op.get());
for (auto &func : hookfuncs_) {
func(op.get(), scope_);
}
}
#ifdef PADDLE_WITH_INFERENCE_NVTX
Expand Down Expand Up @@ -178,7 +182,7 @@ phi::DenseTensor *NaiveExecutor::FindTensor(const std::string &name) {
}

void NaiveExecutor::RegisterOutputHook(const HookFunc &hookfunc) {
hookfunc_.push_back(hookfunc);
hookfuncs_.push_back(hookfunc);
}

void NaiveExecutor::MakeReusePlan(
Expand Down
4 changes: 2 additions & 2 deletions paddle/fluid/framework/naive_executor.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ class Scope;

class NaiveExecutor {
public:
using HookFunc = std::function<void(OperatorBase*)>;
using HookFunc = std::function<void(OperatorBase*, Scope*)>;

explicit NaiveExecutor(const platform::Place& place) : place_(place) {}

Expand Down Expand Up @@ -86,7 +86,7 @@ class NaiveExecutor {
std::vector<std::unique_ptr<OperatorBase>> ops_;
Scope* scope_{nullptr};

std::vector<HookFunc> hookfunc_;
std::vector<HookFunc> hookfuncs_;

// Record information that tensor_a should ShareBufferWith tensor_b.
std::unordered_map<OperatorBase*, std::unordered_map<phi::DenseTensor*, int>>
Expand Down
4 changes: 4 additions & 0 deletions paddle/fluid/framework/new_executor/interpretercore.cc
Original file line number Diff line number Diff line change
Expand Up @@ -949,6 +949,10 @@ void InterpreterCore::RunOperator(const Instruction& instr_node) {
#endif
}

for (auto& hook : hookfuncs_) {
hook(op, local_scope);
}

// for debug nan/inf
if (op_with_kernel != nullptr && FLAGS_check_nan_inf) {
VLOG(4) << "Check nan/inf";
Expand Down
7 changes: 7 additions & 0 deletions paddle/fluid/framework/new_executor/interpretercore.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,11 @@ class InterpreterCore {

const platform::Place& GetPlace() const { return place_; }

using HookFunc = std::function<void(OperatorBase*, Scope*)>;
void SetOutputHooks(const std::vector<HookFunc>& hookfuncs) {
hookfuncs_ = hookfuncs;
}

private:
DISABLE_COPY_AND_ASSIGN(InterpreterCore);
// build graph
Expand Down Expand Up @@ -184,6 +189,8 @@ class InterpreterCore {
std::vector<size_t> trace_execute_order_;

InstructionSchedulingPriorityLess instruction_scheduling_priority_less;

std::vector<HookFunc> hookfuncs_;
};

} // namespace framework
Expand Down
7 changes: 7 additions & 0 deletions paddle/fluid/framework/operator.h
Original file line number Diff line number Diff line change
Expand Up @@ -371,6 +371,11 @@ class OperatorBase {

void SetId(uint64_t id) { id_ = id; }

using HookFunc = std::function<void(OperatorBase*, Scope*)>;
void SetOutputHooks(const std::vector<HookFunc>& hookfuncs) {
hookfuncs_ = hookfuncs;
}

protected:
std::string type_;
// NOTE: in case of OpGrad, inputs_ contains:
Expand Down Expand Up @@ -399,6 +404,8 @@ class OperatorBase {
// Whether this operator executes in an Executor.
bool run_by_executor_{true};

std::vector<HookFunc> hookfuncs_;

private:
void GenerateTemporaryNames();
void CheckAllInputOutputSet() const;
Expand Down
55 changes: 15 additions & 40 deletions paddle/fluid/inference/api/analysis_predictor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2638,47 +2638,26 @@ void AnalysisPredictor::RegisterOutputHook(
const OutputTensorHookFunc &hookfunc) {
static std::once_flag register_hook_flag;
std::call_once(register_hook_flag, [this] {
executor_->RegisterOutputHook([this](framework::OperatorBase *op) {
for (auto &output : op->Outputs()) {
for (auto &var_name : output.second) {
auto *var = this->sub_scope_->FindVar(var_name);
if (!var || !var->IsType<phi::DenseTensor>()) continue;
auto dense_tensor = var->Get<phi::DenseTensor>();
if (!dense_tensor.initialized()) continue;
auto tensor = this->GetOutputTensor(var_name);
for (auto &hookfunc : this->hookfuncs_) {
hookfunc(op->Type(), var_name, *tensor);
executor_->RegisterOutputHook(
[this](framework::OperatorBase *op, framework::Scope *scope) {
for (auto &output : op->Outputs()) {
for (auto &var_name : output.second) {
auto *var = scope->FindVar(var_name);
if (!var || !var->IsType<phi::DenseTensor>()) continue;
auto dense_tensor = var->Get<phi::DenseTensor>();
if (!dense_tensor.initialized()) continue;
auto tensor = paddle::Tensor(
std::make_shared<phi::DenseTensor>(dense_tensor), var_name);
for (auto &hookfunc : this->hookfuncs_) {
hookfunc(op->Type(), var_name, tensor);
}
}
}
}
}
});
});
});
hookfuncs_.push_back(hookfunc);
}

void AnalysisPredictor::RegisterOutputHook(
const OutputTensorHookFunc_V2 &hookfunc) {
static std::once_flag register_hook_flag;
std::call_once(register_hook_flag, [this] {
executor_->RegisterOutputHook([this](framework::OperatorBase *op) {
for (auto &output : op->Outputs()) {
for (auto &var_name : output.second) {
auto *var = this->sub_scope_->FindVar(var_name);
if (!var || !var->IsType<phi::DenseTensor>()) continue;
auto dense_tensor = var->Get<phi::DenseTensor>();
if (!dense_tensor.initialized()) continue;
auto tensor = paddle::Tensor(
std::make_shared<phi::DenseTensor>(dense_tensor), var_name);
for (auto &hookfunc : this->hookfuncs_v2_) {
hookfunc(op->Type(), var_name, tensor);
}
}
}
});
});
hookfuncs_v2_.push_back(hookfunc);
}

template <>
std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<AnalysisConfig>(
const AnalysisConfig &config) {
Expand Down Expand Up @@ -2964,10 +2943,6 @@ void Predictor::RegisterOutputHook(const OutputTensorHookFunc &hookfunc) {
predictor_->RegisterOutputHook(hookfunc);
}

void Predictor::RegisterOutputHook(const OutputTensorHookFunc_V2 &hookfunc) {
predictor_->RegisterOutputHook(hookfunc);
}

void *Predictor::GetExecStream() const { return predictor_->GetExecStream(); }

int GetNumBytesOfDataType(DataType dtype) {
Expand Down
13 changes: 1 addition & 12 deletions paddle/fluid/inference/api/analysis_predictor.h
Original file line number Diff line number Diff line change
Expand Up @@ -318,16 +318,6 @@ class AnalysisPredictor : public PaddlePredictor {
///
Argument::fusion_statis_t fusion_statis() { return fusion_statis_; }

///
/// \brief Register a output hook function to operate the intermediate tensor
/// of op output. when using this function, memory reuse should be tured off.
/// The hook function signature is void(const std::string&, const
/// std::string&, const paddle_infer::Tensor&>). Here, the first parameter is
/// op's type, the second param is output var name of the op, and the third
/// parameter is output tensor with the var name.
///
void RegisterOutputHook(const OutputTensorHookFunc &hookfunc) override;

///
/// \brief Register a output hook function to operate the intermediate tensor
/// of op output. when using this function, memory reuse should be tured off.
Expand All @@ -336,7 +326,7 @@ class AnalysisPredictor : public PaddlePredictor {
/// type, the second param is output var name of the op, and the third
/// parameter is output tensor with the var name.
///
void RegisterOutputHook(const OutputTensorHookFunc_V2 &hookfunc) override;
void RegisterOutputHook(const OutputTensorHookFunc &hookfunc) override;

///
/// \brief Initialize mkldnn quantizer and execute mkldnn quantization pass
Expand Down Expand Up @@ -608,7 +598,6 @@ class AnalysisPredictor : public PaddlePredictor {

private:
std::vector<OutputTensorHookFunc> hookfuncs_;
std::vector<OutputTensorHookFunc_V2> hookfuncs_v2_;

// Some status here that help to determine the status inside the predictor.
bool status_is_cloned_{false};
Expand Down
13 changes: 1 addition & 12 deletions paddle/fluid/inference/api/paddle_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@ using PaddleDType = paddle_infer::DataType;
using PaddlePlace = paddle_infer::PlaceType;
using PaddleDataLayout = paddle_infer::DataLayout;
using paddle_infer::OutputTensorHookFunc;
using paddle_infer::OutputTensorHookFunc_V2;

/// \brief Memory manager for PaddleTensor.
///
Expand Down Expand Up @@ -314,16 +313,6 @@ class PD_INFER_DECL PaddlePredictor {
///
virtual uint64_t TryShrinkMemory() { return 0; }

///
/// \brief Register a output hook function to operate the intermediate tensor
/// of op output. when using this function, memory reuse should be tured off.
/// The hook function signature is void(const std::string&, const
/// std::string&, const paddle_infer::Tensor&>). Here, the first parameter is
/// op's type, the second param is output var name of the op, and the third
/// parameter is output tensor with the var name.
///
virtual void RegisterOutputHook(const OutputTensorHookFunc& hookfunc) {}

///
/// \brief Register a output hook function to operate the intermediate tensor
/// of op output. when using this function, memory reuse should be tured off.
Expand All @@ -332,7 +321,7 @@ class PD_INFER_DECL PaddlePredictor {
/// type, the second param is output var name of the op, and the third
/// parameter is output tensor with the var name.
///
virtual void RegisterOutputHook(const OutputTensorHookFunc_V2& hookfunc) {}
virtual void RegisterOutputHook(const OutputTensorHookFunc& hookfunc) {}

/// \brief Clone an existing predictor
/// When using clone, the same network will be created,
Expand Down
10 changes: 0 additions & 10 deletions paddle/fluid/inference/api/paddle_inference_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -199,16 +199,6 @@ class PD_INFER_DECL Predictor {
///
void RegisterOutputHook(const OutputTensorHookFunc& hookfunc);

///
/// \brief Register a output hook function to operate the intermediate tensor
/// of op output. when using this function, memory reuse should be tured off.
/// The hook function signature is void(const std::string&, const
/// std::string&, const Tensor&>). Here, the first parameter is op's
/// type, the second param is output var name of the op, and the third
/// parameter is output tensor with the var name.
///
void RegisterOutputHook(const OutputTensorHookFunc_V2& hookfunc);

///
/// \brief Get the execution stream on devices with a concept of stream,
/// otherwise returns nullptr.
Expand Down
6 changes: 1 addition & 5 deletions paddle/fluid/inference/api/paddle_tensor.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,11 +36,7 @@ namespace paddle_infer {
/// Strings for text data.
using Strings = std::vector<std::string>;

class Tensor;
using OutputTensorHookFunc =
std::function<void(const std::string&, const std::string&, const Tensor&)>;

using OutputTensorHookFunc_V2 = std::function<void(
using OutputTensorHookFunc = std::function<void(
const std::string&, const std::string&, const paddle::Tensor&)>;

typedef void (*CallbackFunc)(void*);
Expand Down
2 changes: 2 additions & 0 deletions paddle/fluid/operators/controlflow/while_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,8 @@ class WhileOp : public framework::OperatorBase {
dev_place, *block, &placeholder, execution_config));
}

core_->SetOutputHooks(hookfuncs_);

if (!is_test) {
while (cond_data) {
auto &current_scope = scope.NewScope();
Expand Down
6 changes: 1 addition & 5 deletions paddle/fluid/pybind/inference_api.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1096,11 +1096,7 @@ void BindPaddleInferPredictor(py::module *m) {
.def("clear_intermediate_tensor",
&paddle_infer::Predictor::ClearIntermediateTensor)
.def("register_output_hook",
py::overload_cast<const paddle_infer::OutputTensorHookFunc &>(
&paddle_infer::Predictor::RegisterOutputHook))
.def("register_output_hook_v2",
py::overload_cast<const paddle_infer::OutputTensorHookFunc_V2 &>(
&paddle_infer::Predictor::RegisterOutputHook));
&paddle_infer::Predictor::RegisterOutputHook);
}

void BindZeroCopyTensor(py::module *m) {
Expand Down
47 changes: 0 additions & 47 deletions test/cpp/inference/api/analysis_predictor_tester.cc
Original file line number Diff line number Diff line change
Expand Up @@ -668,53 +668,6 @@ TEST(Predictor, Streams) {
#endif

TEST(AnalysisPredictor, OutputTensorHookFunc) {
auto hookfunc = [](const std::string& type,
const std::string& var_name,
const Tensor& tensor) { LOG(INFO) << "in hook function"; };

{
Config config;
config.SetModel(FLAGS_dirname);
config.EnableUseGpu(100, 0);

auto predictor = CreatePredictor(config);

predictor->RegisterOutputHook(hookfunc);
auto w0 = predictor->GetInputHandle("firstw");
auto w1 = predictor->GetInputHandle("secondw");
auto w2 = predictor->GetInputHandle("thirdw");
auto w3 = predictor->GetInputHandle("forthw");
w0->Reshape({4, 1});
w1->Reshape({4, 1});
w2->Reshape({4, 1});
w3->Reshape({4, 1});
auto* w0_data = w0->mutable_data<int64_t>(PlaceType::kCPU);
auto* w1_data = w1->mutable_data<int64_t>(PlaceType::kCPU);
auto* w2_data = w2->mutable_data<int64_t>(PlaceType::kCPU);
auto* w3_data = w3->mutable_data<int64_t>(PlaceType::kCPU);
for (int i = 0; i < 4; i++) {
w0_data[i] = i;
w1_data[i] = i;
w2_data[i] = i;
w3_data[i] = i;
}
predictor->Run();
predictor->TryShrinkMemory();
}

{
Config config;
config.SetModel(FLAGS_dirname);
config.EnableMemoryOptim();
config.EnableUseGpu(100, 0);

auto predictor = CreatePredictor(config);

predictor->RegisterOutputHook(hookfunc);
}
}

TEST(AnalysisPredictor, OutputTensorHookFunc_V2) {
auto hookfunc = [](const std::string& type,
const std::string& var_name,
const paddle::Tensor& tensor) {
Expand Down