-
Notifications
You must be signed in to change notification settings - Fork 5.7k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
New ir support legacy kernel instruction #55880
Changes from all commits
06d6ede
f4d5f2b
774196e
d2f5ac7
7e60294
4b38bad
354b1f9
156d46e
edf3ce2
b070b68
c7206c1
aa3af1a
b80e3d4
da6b3d4
755eace
adf66ea
95c53f3
e8f64bc
f770a71
2f8760d
e97beed
ee3f22d
02ea55a
3eeec33
8580a9d
cdf32c7
b117bd5
79a57bf
33947b0
14c4e9b
55f6306
a17ab14
54546df
1f713ed
ca68a2b
8ec5558
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,5 @@ | ||
cc_library( | ||
instruction_base | ||
SRCS instruction_base.cc phi_kernel_instruction.cc | ||
legacy_kernel_instruction.cc instruction_util.cc | ||
DEPS phi framework_proto) |
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
@@ -0,0 +1,175 @@ | ||||||
// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. | ||||||
// | ||||||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||||||
// you may not use this file except in compliance with the License. | ||||||
// You may obtain a copy of the License at | ||||||
// | ||||||
// http://www.apache.org/licenses/LICENSE-2.0 | ||||||
// | ||||||
// Unless required by applicable law or agreed to in writing, software | ||||||
// distributed under the License is distributed on an "AS IS" BASIS, | ||||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||||
// See the License for the specific language governing permissions and | ||||||
// limitations under the License. | ||||||
|
||||||
#include <map> | ||||||
#include <string> | ||||||
#include <unordered_map> | ||||||
#include <vector> | ||||||
|
||||||
#include "paddle/fluid/framework/new_executor/instruction/instruction_util.h" | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 从规范上,instruction_util.h 要放在第一个来include There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 另外文件名是否直接 utils.h 就可以,不需要再加instruction前缀了,已经很长了~~ |
||||||
|
||||||
#include "paddle/fluid/framework/new_executor/new_executor_defs.h" | ||||||
#include "paddle/fluid/platform/device_context.h" | ||||||
#include "paddle/fluid/platform/event.h" | ||||||
#include "paddle/ir/core/builtin_attribute.h" | ||||||
#include "paddle/ir/core/operation.h" | ||||||
#include "paddle/ir/core/value.h" | ||||||
|
||||||
#include "paddle/fluid/framework/new_executor/interpreter/interpreter_util.h" | ||||||
#include "paddle/fluid/framework/new_executor/interpreter/stream_analyzer.h" | ||||||
#include "paddle/fluid/ir/phi_kernel_adaptor/phi_kernel_util.h" | ||||||
#include "paddle/fluid/platform/collective_helper.h" | ||||||
|
||||||
namespace paddle { | ||||||
namespace framework { | ||||||
|
||||||
std::vector<int> GetValueIds( | ||||||
ir::Value value, | ||||||
Scope* inner_scope, | ||||||
const std::unordered_map<::ir::Value, std::string>& value_2_var_name, | ||||||
const std::map<std::string, int>& var_name_2_id, | ||||||
const std::unordered_map<const paddle::framework::Variable*, std::string>& | ||||||
variable_2_var_name) { | ||||||
std::vector<int> ids; | ||||||
std::string var_name = value_2_var_name.at(value); | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
ids.push_back(var_name_2_id.at(var_name)); | ||||||
// NOTE(zhangbo): Value maybe a VariableRefArray | ||||||
auto var = inner_scope->FindVar(var_name); | ||||||
if (var->IsType<paddle::framework::VariableRefArray>()) { | ||||||
auto& var_array = var->Get<paddle::framework::VariableRefArray>(); | ||||||
for (auto item : var_array) { | ||||||
ids.push_back(var_name_2_id.at(variable_2_var_name.at(item))); | ||||||
} | ||||||
} | ||||||
return ids; | ||||||
} | ||||||
|
||||||
platform::DeviceContext* ParseDeviceContext( | ||||||
ir::Operation* op, | ||||||
platform::DeviceContext* origin_dev_ctx, | ||||||
const platform::Place& place, | ||||||
const std::string& execution_stream, | ||||||
const int stream_priority) { | ||||||
auto op_attributes = op->attributes(); | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
auto 与 auto& 是有区别的。 |
||||||
auto op_name = | ||||||
op_attributes.at("op_name").dyn_cast<::ir::StrAttribute>().AsString(); | ||||||
interpreter::ContextManager& ctx_manager = | ||||||
interpreter::ContextManager::Instance(); | ||||||
|
||||||
platform::DeviceContext* dev_ctx = nullptr; | ||||||
|
||||||
// only gpu need update. xpu not need, because xpu memcpy op kernel is | ||||||
// synchronous. | ||||||
if (platform::is_gpu_place(place) || platform::is_custom_place(place)) { | ||||||
VLOG(6) << "Parse DeviceContext for " << op_name | ||||||
<< ", execution stream = " << execution_stream; | ||||||
if (execution_stream != kDefaultStream) { | ||||||
dev_ctx = ctx_manager | ||||||
.Get(std::string(kCustomStream) + "-" + execution_stream, | ||||||
place, | ||||||
stream_priority) | ||||||
.get() | ||||||
.get(); | ||||||
interpreter::SetDeviceCommContext(op, dev_ctx); | ||||||
return dev_ctx; | ||||||
} | ||||||
|
||||||
if (op_name == interpreter::kMemcpyD2H) { | ||||||
dev_ctx = ctx_manager.Get(std::string(kD2HStream), place, stream_priority) | ||||||
.get() | ||||||
.get(); | ||||||
interpreter::SetDeviceCommContext(op, dev_ctx); | ||||||
return dev_ctx; | ||||||
} else if (op_name == interpreter::kMemcpyH2D) { | ||||||
dev_ctx = ctx_manager.Get(std::string(kH2DStream), place, stream_priority) | ||||||
.get() | ||||||
.get(); | ||||||
interpreter::SetDeviceCommContext(op, dev_ctx); | ||||||
return dev_ctx; | ||||||
} | ||||||
|
||||||
#if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL) | ||||||
// NOTE(Ruibiao): Here supports multi-stream overlap for c_allreduce_sum | ||||||
// with use_cal_stream==false by returning a device context getting from the | ||||||
// global NCCLCommContext instance. Because when use_calc_stream==false, in | ||||||
// OP kernel, the NCCL communication will be launched to the stream directly | ||||||
// getting from the global NCCLCommContext instance rather than the | ||||||
// DeviceContext passed from executor (see CAllReduceOpCUDAKernel in | ||||||
// c_allreduce_op.h). Now it is just a temporary solution for ONLY | ||||||
// c_allreduce_sum which is used in ResNet50 distributed training. | ||||||
if (op_name == "c_allreduce_sum" && op_attributes.at("use_calc_stream") | ||||||
.dyn_cast<::ir::BoolAttribute>() | ||||||
.data() == false) { | ||||||
int ring_id = | ||||||
op_attributes.at("ring_id").dyn_cast<::ir::Int32Attribute>().data(); | ||||||
return platform::NCCLCommContext::Instance() | ||||||
.Get(ring_id, place) | ||||||
->dev_context(); | ||||||
} | ||||||
#endif | ||||||
} | ||||||
|
||||||
if (origin_dev_ctx != nullptr) { | ||||||
interpreter::SetDeviceCommContext(op, origin_dev_ctx); | ||||||
} | ||||||
return origin_dev_ctx; | ||||||
} | ||||||
|
||||||
OpFuncType AnalyseOpFuncType(::ir::Operation* op, | ||||||
const platform::Place& place) { | ||||||
if (platform::is_cpu_place(place)) { | ||||||
return OpFuncType::kCpuSync; | ||||||
} | ||||||
|
||||||
auto kernel_key = op->attributes() | ||||||
.at("kernel_key") | ||||||
.dyn_cast<dialect::KernelAttribute>() | ||||||
.data(); | ||||||
if (phi::TransToPhiPlace(kernel_key.backend()).GetType() == | ||||||
phi::AllocationType::CPU) { | ||||||
return OpFuncType::kCpuSync; | ||||||
} | ||||||
|
||||||
PADDLE_ENFORCE_EQ(interpreter::IsSupportedHeterPlace(place), | ||||||
true, | ||||||
phi::errors::Fatal("Unsupported current place %s", place)); | ||||||
|
||||||
// Some GPU OPs do not launch CUDA Kernel, but spend a lot of time on CPU | ||||||
// computing. They execute serially in device thread and block CUDA kernel | ||||||
// launching in other GPU OPs. To improve performance, set them as kGpuSync | ||||||
// and so that they would be dispatched to host thread. | ||||||
auto op_attributes = op->attributes(); | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
消除一次隐式copy 构造 |
||||||
auto op_name = | ||||||
op_attributes.at("op_name").dyn_cast<::ir::StrAttribute>().AsString(); | ||||||
if (op_name == kCoalesceTensor && | ||||||
(!platform::is_xpu_place(place) || | ||||||
op->attribute<ir::BoolAttribute>("persist_output").data() == false) && | ||||||
op->attribute<ir::BoolAttribute>("set_constant").data() == false && | ||||||
op->attribute<ir::BoolAttribute>("copy_data").data() == false) { | ||||||
return OpFuncType::kGpuSync; | ||||||
} | ||||||
|
||||||
// for memcpy explicitly called by user | ||||||
if (platform::is_gpu_place(place) && op_name == interpreter::kMemcpyD2H) { | ||||||
return OpFuncType::kGpuSync; | ||||||
} | ||||||
|
||||||
if (op_name == "shape") { | ||||||
return OpFuncType::kGpuSync; | ||||||
} | ||||||
return OpFuncType::kGpuAsync; | ||||||
} | ||||||
|
||||||
} // namespace framework | ||||||
} // namespace paddle |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
#pragma once | ||
|
||
#include <map> | ||
#include <string> | ||
#include <unordered_map> | ||
#include <vector> | ||
|
||
#include "paddle/fluid/framework/new_executor/new_executor_defs.h" | ||
#include "paddle/fluid/platform/device_context.h" | ||
#include "paddle/fluid/platform/event.h" | ||
#include "paddle/ir/core/builtin_attribute.h" | ||
#include "paddle/ir/core/operation.h" | ||
#include "paddle/ir/core/value.h" | ||
namespace paddle { | ||
namespace framework { | ||
|
||
std::vector<int> GetValueIds( | ||
ir::Value value, | ||
Scope* inner_scope, | ||
const std::unordered_map<::ir::Value, std::string>& value_2_var_name, | ||
const std::map<std::string, int>& var_name_2_id, | ||
const std::unordered_map<const paddle::framework::Variable*, std::string>& | ||
variable_2_var_name); | ||
|
||
platform::DeviceContext* ParseDeviceContext( | ||
ir::Operation* op, | ||
platform::DeviceContext* origin_dev_ctx, | ||
const platform::Place& place, | ||
const std::string& execution_stream, | ||
const int stream_priority); | ||
|
||
OpFuncType AnalyseOpFuncType(::ir::Operation* op, const platform::Place& place); | ||
|
||
} // namespace framework | ||
} // namespace paddle |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
这里其实不需要包含value.h头文件,Value已经有前置声明