Skip to content

Commit

Permalink
[NewIR]Change feed list to variable list && support GPU (PaddlePaddle…
Browse files Browse the repository at this point in the history
…#55401)

* add feed with place op

* remove useless unitest

* udpate mkldnn

* update

* new ir support builtin slice op

* fix phi kernel adaptor bug

* add enable_static

* remove useless test case

* change feed list to single variable

* support gpu

* fix bug

* remove template

* add more data type

* fix cimpile bug
  • Loading branch information
phlrain authored and wyf committed Aug 30, 2023
1 parent 0f8f68a commit 196e018
Show file tree
Hide file tree
Showing 14 changed files with 280 additions and 59 deletions.
34 changes: 25 additions & 9 deletions paddle/fluid/framework/feed_fetch_method.cc
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ limitations under the License. */

#include "glog/logging.h"

PHI_DECLARE_bool(enable_new_ir_in_executor);

namespace phi {
class DenseTensor;
} // namespace phi
Expand All @@ -34,16 +36,30 @@ void SetFeedVariable(Scope* scope,
// If var_name Variable is not found in GlobalScope, a new variable will
// be created.
VLOG(3) << "SetFeedVariable name=" << var_name << " index=" << index;
Variable* g_feed_value = scope->Var(var_name);
auto& feed_inputs = *(g_feed_value->GetMutable<FeedList>());
if (index >= feed_inputs.size()) {
feed_inputs.resize(index + 1);
if (FLAGS_enable_new_ir_in_executor) {
// shared data with input tensor
auto inner_var_name = var_name + "_" + std::to_string(index);
auto feed_ele = scope->Var(inner_var_name);
if (!feed_ele->IsType<phi::DenseTensor>()) {
VLOG(3) << "Reset " << inner_var_name << " to phi::DenseTensor";
feed_ele->Clear();
}
auto val = feed_ele->GetMutable<phi::DenseTensor>();
val->ShareDataWith(input);
// set lod
val->set_lod(input.lod());
} else {
Variable* g_feed_value = scope->Var(var_name);
auto& feed_inputs = *(g_feed_value->GetMutable<FeedList>());
if (index >= feed_inputs.size()) {
feed_inputs.resize(index + 1);
}
// shared data with input tensor
auto& val = PADDLE_GET(phi::DenseTensor, feed_inputs[index]);
val.ShareDataWith(input);
// set lod
val.set_lod(input.lod());
}
// shared data with input tensor
auto& val = PADDLE_GET(phi::DenseTensor, feed_inputs[index]);
val.ShareDataWith(input);
// set lod
val.set_lod(input.lod());
}

void SetFeedVariable(Scope* scope,
Expand Down
4 changes: 2 additions & 2 deletions paddle/fluid/framework/new_executor/standalone_executor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -62,11 +62,11 @@ StandaloneExecutor::StandaloneExecutor(const platform::Place& place,
execution_config.skip_gc_vars = job->SkipGcVars();

// TODO(phlrain) we only support cpu for now
if (FLAGS_enable_new_ir_in_executor && platform::is_cpu_place(place)) {
if (FLAGS_enable_new_ir_in_executor) {
VLOG(6) << "begin to translate" << std::endl;
auto base_program = paddle::TranslateLegacyProgramToProgram(*program);
auto kernel_program =
paddle::dialect::PdOpLowerToKernelPass(base_program.get());
paddle::dialect::PdOpLowerToKernelPass(base_program.get(), place);
interpretercores_.emplace_back(std::make_shared<InterpreterCore>(
place_, std::move(kernel_program), scope_, execution_config));
} else {
Expand Down
27 changes: 27 additions & 0 deletions paddle/fluid/ir/dialect/pd_op.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -227,3 +227,30 @@
inplace: null
view: null
backward: null


- name: shaddow_feed
inputs:
- typename: Tensor
name: x
optional: false
no_need_buffer: false
data_transform: {}
attrs: []
outputs:
- {typename: Tensor, name: out, optional: false, intermediate: false}
no_need_buffer: null
data_transform: null
infer_meta:
func: UnchangedInferMeta
param: [x]
kernel:
func: [shaddow_feed]
param: [x]
backend: null
layout: null
data_type: null
dispatch: {fetch: null}
force_backend: null
inplace: null
backward: null
21 changes: 4 additions & 17 deletions paddle/fluid/ir/phi_kernel_adaptor/phi_kernel_util.cc
Original file line number Diff line number Diff line change
Expand Up @@ -193,26 +193,13 @@ void HandleForSpecialOp(

if (op_name == "pd.feed") {
auto value = op->result(0);
auto var = CreateVar(value,
inner_scope,
var_name_prefix,
false,
value_2_var_name,
variable_2_var_name,
var_name_2_id,
variable_list);
// TODO(phlrain): need to update here, support StringTensor
auto out_tensor = var->GetMutable<phi::DenseTensor>();
VLOG(6) << "link feed output to feed in variable" << inner_scope;

auto feed_var =
const_cast<paddle::framework::Scope*>(inner_scope->root())->Var("feed");
VLOG(6) << "Create var: feed in scope " << inner_scope->root();
int index =
op->attributes().at("col").dyn_cast<ir::Int32Attribute>().data();
auto feed_list = feed_var->Get<paddle::framework::FeedList>();
auto& in_tensor = (PADDLE_GET(phi::DenseTensor, feed_list.at(index)));
out_tensor->ShareDataWith(in_tensor);
out_tensor->set_lod(in_tensor.lod());

auto feed_var_name = "feed_" + std::to_string(index);
value_2_var_name->emplace(value, feed_var_name);
}

if (op_name == "builtin.combine") {
Expand Down
53 changes: 41 additions & 12 deletions paddle/fluid/ir/transforms/pd_op_to_kernel_pass.cc
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ phi::KernelKey GetKernelKey(
ir::Operation* op,
const phi::Place& place,
const std::unordered_map<ir::Value, ir::OpResult>& map_value_pair,
const dialect::OpYamlInfoParser* op_info_parser = nullptr) {
std::unique_ptr<dialect::OpYamlInfoParser> op_info_parser = nullptr) {
if (op->name() == "pd.feed") {
// NOTE, for now feed op don't need a kernel, so the data type from Op
// Result the next op use base program datatype
Expand Down Expand Up @@ -223,11 +223,11 @@ phi::KernelKey GetKernelKey(
return res;
}

std::unique_ptr<ir::Program> PdOpLowerToKernelPass(ir::Program* prog) {
std::unique_ptr<ir::Program> PdOpLowerToKernelPass(ir::Program* prog,
phi::Place place) {
auto program = std::make_unique<ir::Program>(ir::IrContext::Instance());

auto block = prog->block();
phi::Place cpu_place(phi::AllocationType::CPU);

ir::IrContext* ctx = ir::IrContext::Instance();
ctx->GetOrRegisterDialect<paddle::dialect::PaddleDialect>();
Expand All @@ -244,14 +244,19 @@ std::unique_ptr<ir::Program> PdOpLowerToKernelPass(ir::Program* prog) {
VLOG(6) << "op name " << (*it)->name();
paddle::dialect::OpYamlInfoInterface op_info_interface =
(*it)->dyn_cast<paddle::dialect::OpYamlInfoInterface>();
OpYamlInfoParser* op_info_parser = nullptr;
std::unique_ptr<OpYamlInfoParser> op_info_parser;
if (op_info_interface) {
op_info_parser = new OpYamlInfoParser(op_info_interface.GetOpInfo());
op_info_parser.reset(new OpYamlInfoParser(op_info_interface.GetOpInfo()));
}

std::string kernel_fn_str;
if (op_info_parser != nullptr) {
kernel_fn_str = op_info_parser->OpRuntimeInfo().kernel_func[0];
}

auto kernel_key =
GetKernelKey(*it, cpu_place, map_value_pair, op_info_parser);
GetKernelKey(*it, place, map_value_pair, std::move(op_info_parser));
VLOG(6) << "kernel type " << kernel_key;
// create new Op

// only for single output
// need update new kernel key layout and data tyep
Expand Down Expand Up @@ -305,11 +310,6 @@ std::unique_ptr<ir::Program> PdOpLowerToKernelPass(ir::Program* prog) {
// constuct input
std::vector<ir::OpResult> vec_inputs;

std::string kernel_fn_str;
if (op_info_parser != nullptr) {
kernel_fn_str = op_info_parser->OpRuntimeInfo().kernel_func[0];
}

if ((*it)->num_operands() > 0) {
for (size_t i = 0; i < (*it)->num_operands(); ++i) {
auto cur_in = (*it)->operand(i);
Expand Down Expand Up @@ -404,6 +404,35 @@ std::unique_ptr<ir::Program> PdOpLowerToKernelPass(ir::Program* prog) {
}

program->block()->push_back(op);

if ((*it)->name() == "pd.feed" && platform::is_gpu_place(place)) {
// add shaddow feed op
phi::KernelKey shaddow_key{
phi::Backend::GPU,
phi::DataLayout::ANY,
TransToPhiDataType(
(*it)->result(0).type().dyn_cast<DenseTensorType>().dtype())};
std::unordered_map<std::string, ir::Attribute> attr_map{
{"op_name", ir::StrAttribute::get(ctx, "pd.shaddow_feed")},
{"kernel_name", ir::StrAttribute::get(ctx, "shaddow_feed")},
{"kernel_key", dialect::KernelAttribute::get(ctx, shaddow_key)}};

auto out_type = paddle::dialect::AllocatedDenseTensorType::get(
ctx,
phi::TransToPhiPlace(shaddow_key.backend()),
(*it)->result(0).type().dyn_cast<dialect::DenseTensorType>());

ir::Operation* shaddow_op =
ir::Operation::Create({op->result(0)}, attr_map, {out_type}, op_info);

map_op_pair[*it] = shaddow_op;
program->block()->push_back(shaddow_op);
if ((*it)->num_results() > 0) {
for (size_t i = 0; i < shaddow_op->num_results(); ++i) {
map_value_pair[(*it)->result(i)] = shaddow_op->result(i);
}
}
}
}

return program;
Expand Down
4 changes: 3 additions & 1 deletion paddle/fluid/ir/transforms/pd_op_to_kernel_pass.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,13 @@
#pragma once

#include "paddle/ir/core/program.h"
#include "paddle/phi/common/place.h"

namespace paddle {
namespace dialect {

std::unique_ptr<ir::Program> PdOpLowerToKernelPass(ir::Program* prog);
std::unique_ptr<ir::Program> PdOpLowerToKernelPass(
ir::Program* prog, phi::Place place = phi::CPUPlace());

} // namespace dialect
} // namespace paddle
16 changes: 13 additions & 3 deletions paddle/phi/kernels/cpu/feed_with_place_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

#include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/impl/feed_with_place_impl.h"

namespace phi {

Expand All @@ -26,11 +27,20 @@ void FeedWithPlaceKernel(const Context& ctx,
DenseTensor* out) {}

} // namespace phi
PD_REGISTER_KERNEL(feed_with_place,

PD_REGISTER_KERNEL(
feed_with_place, CPU, ALL_LAYOUT, phi::FeedWithPlaceKernel, float) {}

PD_REGISTER_KERNEL(shaddow_feed,
CPU,
ALL_LAYOUT,
phi::FeedWithPlaceKernel,
phi::ShaddowFeedKernel,
bool,
float,
int32_t,
int64_t,
double) {}
double,
phi::float16,
phi::bfloat16,
phi::complex64,
phi::complex128) {}
11 changes: 1 addition & 10 deletions paddle/phi/kernels/cpu/fetch_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -16,17 +16,8 @@

#include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/impl/fetch_impl.h"

namespace phi {

template <typename T, typename Context>
void FetchKernel(const Context& dev_ctx,
const DenseTensor& x,
DenseTensor* out) {
phi::Copy(dev_ctx, x, phi::CPUPlace(), true, out);
out->set_lod(x.lod());
}
} // namespace phi
PD_REGISTER_KERNEL(fetch,
CPU,
ALL_LAYOUT,
Expand Down
5 changes: 5 additions & 0 deletions paddle/phi/kernels/feed_with_place_kernel.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,9 @@ void FeedWithPlaceKernel(const Context& ctx,
phi::DataType data_type,
DenseTensor* out);

template <typename T, typename Context>
void ShaddowFeedKernel(const Context& ctx,
const DenseTensor& x,
DenseTensor* out);

} // namespace phi
33 changes: 33 additions & 0 deletions paddle/phi/kernels/gpu/feed_with_place_kernel.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "paddle/phi/kernels/feed_with_place_kernel.h"

#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/impl/feed_with_place_impl.h"

PD_REGISTER_KERNEL(shaddow_feed,
GPU,
ALL_LAYOUT,
phi::ShaddowFeedKernel,
bool,
float,
int32_t,
int64_t,
double,
phi::float16,
phi::bfloat16,
phi::complex64,
phi::complex128) {}
37 changes: 37 additions & 0 deletions paddle/phi/kernels/gpu/fetch_kernel.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "paddle/phi/kernels/fetch_kernel.h"

#include "paddle/phi/kernels/impl/fetch_impl.h"

#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/core/kernel_registry.h"

PD_REGISTER_KERNEL(fetch,
GPU,
ALL_LAYOUT,
phi::FetchKernel,
float,
double,
int,
int64_t,
uint8_t,
int8_t,
int16_t,
phi::float16,
phi::bfloat16,
phi::dtype::complex<float>,
phi::dtype::complex<double>,
bool) {}
Loading

0 comments on commit 196e018

Please sign in to comment.