Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New ir support legacy kernel instruction #55880

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
06d6ede
new ir remove fetch list
phlrain Jul 28, 2023
f4d5f2b
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
phlrain Jul 28, 2023
774196e
fix pattern rewrite bug
phlrain Jul 29, 2023
d2f5ac7
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
phlrain Jul 30, 2023
7e60294
try to remove constant fold
phlrain Jul 30, 2023
4b38bad
revert code
phlrain Jul 31, 2023
354b1f9
add pattern rewrite test flag
phlrain Jul 31, 2023
156d46e
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
phlrain Jul 31, 2023
edf3ce2
fix multi fetch
phlrain Aug 1, 2023
b070b68
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
phlrain Aug 1, 2023
c7206c1
remove usless code
phlrain Aug 1, 2023
aa3af1a
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
phlrain Aug 1, 2023
b80e3d4
new ir support legacy kernel instraction
phlrain Aug 1, 2023
da6b3d4
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
phlrain Aug 1, 2023
755eace
new ir support legacy kernel instruction
phlrain Aug 1, 2023
adf66ea
add scope prefix
phlrain Aug 1, 2023
95c53f3
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
phlrain Aug 1, 2023
e8f64bc
update
phlrain Aug 2, 2023
f770a71
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
phlrain Aug 2, 2023
2f8760d
Merge commit 'refs/pull/55796/head' of https://github.com/PaddlePaddl…
phlrain Aug 2, 2023
e97beed
update
phlrain Aug 2, 2023
ee3f22d
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
phlrain Aug 2, 2023
02ea55a
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
phlrain Aug 4, 2023
3eeec33
update
phlrain Aug 5, 2023
8580a9d
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
phlrain Aug 5, 2023
cdf32c7
update
phlrain Aug 5, 2023
b117bd5
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
phlrain Aug 5, 2023
79a57bf
fix
phlrain Aug 6, 2023
33947b0
revert channel shuffl test
phlrain Aug 6, 2023
14c4e9b
polish code
phlrain Aug 6, 2023
55f6306
try to fix windows compile error
phlrain Aug 6, 2023
a17ab14
polish code
phlrain Aug 7, 2023
54546df
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
phlrain Aug 7, 2023
1f713ed
update
phlrain Aug 7, 2023
ca68a2b
update
phlrain Aug 8, 2023
8ec5558
revert op test
phlrain Aug 8, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
cc_library(
instruction_base
SRCS instruction_base.cc phi_kernel_instruction.cc
legacy_kernel_instruction.cc instruction_util.cc
DEPS phi framework_proto)
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,15 @@
// limitations under the License.

#include "paddle/fluid/framework/new_executor/instruction/instruction_base.h"

#include "paddle/fluid/framework/new_executor/instruction/instruction_util.h"
#include "paddle/fluid/framework/new_executor/interpreter/interpreter_util.h"
#include "paddle/fluid/platform/profiler/event_tracing.h"

#include "paddle/fluid/framework/new_executor/interpreter/stream_analyzer.h"
#include "paddle/fluid/platform/collective_helper.h"
#include "paddle/ir/core/builtin_attribute.h"

namespace paddle {
namespace framework {

Expand Down Expand Up @@ -93,5 +99,59 @@ void InstructionBase::SetOutputs(
output_index_ = outputs;
}

void InstructionBase::InitInputsOutputsIds(
::ir::Operation* op,
Scope* inner_scope,
const std::unordered_map<::ir::Value, std::string>& value_2_var_name,
const std::map<std::string, int>& var_name_2_id,
const std::unordered_map<const paddle::framework::Variable*, std::string>&
variable_2_var_name) {
auto op_attributes = op->attributes();
auto op_name =
op_attributes.at("op_name").dyn_cast<::ir::StrAttribute>().AsString();
std::unordered_map<ir::Value, std::vector<int>> inputs;
for (size_t i = 0; i < op->num_operands(); i++) {
ir::Value value = op->operand_source(i);
if (value) {
PADDLE_ENFORCE_NE(
value_2_var_name.find(value),
value_2_var_name.end(),
phi::errors::PreconditionNotMet(
"input should in name map, [%d] 'th input of [%s] op",
i,
op_name));
std::vector<int> inputs_id = GetValueIds(value,
inner_scope,
value_2_var_name,
var_name_2_id,
variable_2_var_name);
inputs.emplace(value, inputs_id);
}
}
SetInputs(inputs);
VLOG(8) << "finish process inputs_index";
std::unordered_map<ir::Value, std::vector<int>> outputs;
for (size_t i = 0; i < op->num_results(); i++) {
ir::Value value = op->result(i);
if (value && value.type()) {
PADDLE_ENFORCE_NE(
value_2_var_name.find(value),
value_2_var_name.end(),
phi::errors::PreconditionNotMet(
"input should in name map, [%d] 'th input of [%s] op",
i,
op_name));
std::vector<int> outputs_id = GetValueIds(value,
inner_scope,
value_2_var_name,
var_name_2_id,
variable_2_var_name);
outputs.emplace(value, outputs_id);
}
}
SetOutputs(outputs);
VLOG(8) << "finish process outputs_index";
}

} // namespace framework
} // namespace paddle
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

#include "paddle/fluid/framework/new_executor/new_executor_defs.h"
#include "paddle/fluid/platform/event.h"
#include "paddle/ir/core/value.h"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

这里其实不需要包含value.h头文件,Value已经有前置声明


namespace ir {
class Value;
Expand Down Expand Up @@ -137,7 +138,15 @@ class InstructionBase {

virtual const std::string& Name() const = 0;

private:
void InitInputsOutputsIds(
::ir::Operation* op,
Scope* inner_scope,
const std::unordered_map<::ir::Value, std::string>& value_2_var_name,
const std::map<std::string, int>& var_name_2_id,
const std::unordered_map<const paddle::framework::Variable*, std::string>&
variable_2_var_name);

protected:
size_t id_;

bool is_artificial_; // Instruction is artificial means that it is only used
Expand Down
175 changes: 175 additions & 0 deletions paddle/fluid/framework/new_executor/instruction/instruction_util.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,175 @@
// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <map>
#include <string>
#include <unordered_map>
#include <vector>

#include "paddle/fluid/framework/new_executor/instruction/instruction_util.h"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

从规范上,instruction_util.h 要放在第一个来include

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

另外文件名是否直接 utils.h 就可以,不需要再加instruction前缀了,已经很长了~~


#include "paddle/fluid/framework/new_executor/new_executor_defs.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/event.h"
#include "paddle/ir/core/builtin_attribute.h"
#include "paddle/ir/core/operation.h"
#include "paddle/ir/core/value.h"

#include "paddle/fluid/framework/new_executor/interpreter/interpreter_util.h"
#include "paddle/fluid/framework/new_executor/interpreter/stream_analyzer.h"
#include "paddle/fluid/ir/phi_kernel_adaptor/phi_kernel_util.h"
#include "paddle/fluid/platform/collective_helper.h"

namespace paddle {
namespace framework {

std::vector<int> GetValueIds(
ir::Value value,
Scope* inner_scope,
const std::unordered_map<::ir::Value, std::string>& value_2_var_name,
const std::map<std::string, int>& var_name_2_id,
const std::unordered_map<const paddle::framework::Variable*, std::string>&
variable_2_var_name) {
std::vector<int> ids;
std::string var_name = value_2_var_name.at(value);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
std::string var_name = value_2_var_name.at(value);
auto& var_name = value_2_var_name.at(value);

ids.push_back(var_name_2_id.at(var_name));
// NOTE(zhangbo): Value maybe a VariableRefArray
auto var = inner_scope->FindVar(var_name);
if (var->IsType<paddle::framework::VariableRefArray>()) {
auto& var_array = var->Get<paddle::framework::VariableRefArray>();
for (auto item : var_array) {
ids.push_back(var_name_2_id.at(variable_2_var_name.at(item)));
}
}
return ids;
}

platform::DeviceContext* ParseDeviceContext(
ir::Operation* op,
platform::DeviceContext* origin_dev_ctx,
const platform::Place& place,
const std::string& execution_stream,
const int stream_priority) {
auto op_attributes = op->attributes();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
auto op_attributes = op->attributes();
auto& op_attributes = op->attributes();

auto 与 auto& 是有区别的。

auto op_name =
op_attributes.at("op_name").dyn_cast<::ir::StrAttribute>().AsString();
interpreter::ContextManager& ctx_manager =
interpreter::ContextManager::Instance();

platform::DeviceContext* dev_ctx = nullptr;

// only gpu need update. xpu not need, because xpu memcpy op kernel is
// synchronous.
if (platform::is_gpu_place(place) || platform::is_custom_place(place)) {
VLOG(6) << "Parse DeviceContext for " << op_name
<< ", execution stream = " << execution_stream;
if (execution_stream != kDefaultStream) {
dev_ctx = ctx_manager
.Get(std::string(kCustomStream) + "-" + execution_stream,
place,
stream_priority)
.get()
.get();
interpreter::SetDeviceCommContext(op, dev_ctx);
return dev_ctx;
}

if (op_name == interpreter::kMemcpyD2H) {
dev_ctx = ctx_manager.Get(std::string(kD2HStream), place, stream_priority)
.get()
.get();
interpreter::SetDeviceCommContext(op, dev_ctx);
return dev_ctx;
} else if (op_name == interpreter::kMemcpyH2D) {
dev_ctx = ctx_manager.Get(std::string(kH2DStream), place, stream_priority)
.get()
.get();
interpreter::SetDeviceCommContext(op, dev_ctx);
return dev_ctx;
}

#if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL)
// NOTE(Ruibiao): Here supports multi-stream overlap for c_allreduce_sum
// with use_cal_stream==false by returning a device context getting from the
// global NCCLCommContext instance. Because when use_calc_stream==false, in
// OP kernel, the NCCL communication will be launched to the stream directly
// getting from the global NCCLCommContext instance rather than the
// DeviceContext passed from executor (see CAllReduceOpCUDAKernel in
// c_allreduce_op.h). Now it is just a temporary solution for ONLY
// c_allreduce_sum which is used in ResNet50 distributed training.
if (op_name == "c_allreduce_sum" && op_attributes.at("use_calc_stream")
.dyn_cast<::ir::BoolAttribute>()
.data() == false) {
int ring_id =
op_attributes.at("ring_id").dyn_cast<::ir::Int32Attribute>().data();
return platform::NCCLCommContext::Instance()
.Get(ring_id, place)
->dev_context();
}
#endif
}

if (origin_dev_ctx != nullptr) {
interpreter::SetDeviceCommContext(op, origin_dev_ctx);
}
return origin_dev_ctx;
}

OpFuncType AnalyseOpFuncType(::ir::Operation* op,
const platform::Place& place) {
if (platform::is_cpu_place(place)) {
return OpFuncType::kCpuSync;
}

auto kernel_key = op->attributes()
.at("kernel_key")
.dyn_cast<dialect::KernelAttribute>()
.data();
if (phi::TransToPhiPlace(kernel_key.backend()).GetType() ==
phi::AllocationType::CPU) {
return OpFuncType::kCpuSync;
}

PADDLE_ENFORCE_EQ(interpreter::IsSupportedHeterPlace(place),
true,
phi::errors::Fatal("Unsupported current place %s", place));

// Some GPU OPs do not launch CUDA Kernel, but spend a lot of time on CPU
// computing. They execute serially in device thread and block CUDA kernel
// launching in other GPU OPs. To improve performance, set them as kGpuSync
// and so that they would be dispatched to host thread.
auto op_attributes = op->attributes();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
auto op_attributes = op->attributes();
auto& op_attributes = op->attributes();

消除一次隐式copy 构造

auto op_name =
op_attributes.at("op_name").dyn_cast<::ir::StrAttribute>().AsString();
if (op_name == kCoalesceTensor &&
(!platform::is_xpu_place(place) ||
op->attribute<ir::BoolAttribute>("persist_output").data() == false) &&
op->attribute<ir::BoolAttribute>("set_constant").data() == false &&
op->attribute<ir::BoolAttribute>("copy_data").data() == false) {
return OpFuncType::kGpuSync;
}

// for memcpy explicitly called by user
if (platform::is_gpu_place(place) && op_name == interpreter::kMemcpyD2H) {
return OpFuncType::kGpuSync;
}

if (op_name == "shape") {
return OpFuncType::kGpuSync;
}
return OpFuncType::kGpuAsync;
}

} // namespace framework
} // namespace paddle
49 changes: 49 additions & 0 deletions paddle/fluid/framework/new_executor/instruction/instruction_util.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <map>
#include <string>
#include <unordered_map>
#include <vector>

#include "paddle/fluid/framework/new_executor/new_executor_defs.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/event.h"
#include "paddle/ir/core/builtin_attribute.h"
#include "paddle/ir/core/operation.h"
#include "paddle/ir/core/value.h"
namespace paddle {
namespace framework {

std::vector<int> GetValueIds(
ir::Value value,
Scope* inner_scope,
const std::unordered_map<::ir::Value, std::string>& value_2_var_name,
const std::map<std::string, int>& var_name_2_id,
const std::unordered_map<const paddle::framework::Variable*, std::string>&
variable_2_var_name);

platform::DeviceContext* ParseDeviceContext(
ir::Operation* op,
platform::DeviceContext* origin_dev_ctx,
const platform::Place& place,
const std::string& execution_stream,
const int stream_priority);

OpFuncType AnalyseOpFuncType(::ir::Operation* op, const platform::Place& place);

} // namespace framework
} // namespace paddle
Loading