diff --git a/paddle/fluid/framework/feed_fetch_method.cc b/paddle/fluid/framework/feed_fetch_method.cc index 0294e1ca54b437..c089e4f0c13f95 100644 --- a/paddle/fluid/framework/feed_fetch_method.cc +++ b/paddle/fluid/framework/feed_fetch_method.cc @@ -18,6 +18,8 @@ limitations under the License. */ #include "glog/logging.h" +PHI_DECLARE_bool(enable_new_ir_in_executor); + namespace phi { class DenseTensor; } // namespace phi @@ -34,16 +36,30 @@ void SetFeedVariable(Scope* scope, // If var_name Variable is not found in GlobalScope, a new variable will // be created. VLOG(3) << "SetFeedVariable name=" << var_name << " index=" << index; - Variable* g_feed_value = scope->Var(var_name); - auto& feed_inputs = *(g_feed_value->GetMutable()); - if (index >= feed_inputs.size()) { - feed_inputs.resize(index + 1); + if (FLAGS_enable_new_ir_in_executor) { + // shared data with input tensor + auto inner_var_name = var_name + "_" + std::to_string(index); + auto feed_ele = scope->Var(inner_var_name); + if (!feed_ele->IsType()) { + VLOG(3) << "Reset " << inner_var_name << " to phi::DenseTensor"; + feed_ele->Clear(); + } + auto val = feed_ele->GetMutable(); + val->ShareDataWith(input); + // set lod + val->set_lod(input.lod()); + } else { + Variable* g_feed_value = scope->Var(var_name); + auto& feed_inputs = *(g_feed_value->GetMutable()); + if (index >= feed_inputs.size()) { + feed_inputs.resize(index + 1); + } + // shared data with input tensor + auto& val = PADDLE_GET(phi::DenseTensor, feed_inputs[index]); + val.ShareDataWith(input); + // set lod + val.set_lod(input.lod()); } - // shared data with input tensor - auto& val = PADDLE_GET(phi::DenseTensor, feed_inputs[index]); - val.ShareDataWith(input); - // set lod - val.set_lod(input.lod()); } void SetFeedVariable(Scope* scope, diff --git a/paddle/fluid/framework/new_executor/standalone_executor.cc b/paddle/fluid/framework/new_executor/standalone_executor.cc index f792dd40d49944..448d5712ecf3cd 100644 --- a/paddle/fluid/framework/new_executor/standalone_executor.cc +++ b/paddle/fluid/framework/new_executor/standalone_executor.cc @@ -62,11 +62,11 @@ StandaloneExecutor::StandaloneExecutor(const platform::Place& place, execution_config.skip_gc_vars = job->SkipGcVars(); // TODO(phlrain) we only support cpu for now - if (FLAGS_enable_new_ir_in_executor && platform::is_cpu_place(place)) { + if (FLAGS_enable_new_ir_in_executor) { VLOG(6) << "begin to translate" << std::endl; auto base_program = paddle::TranslateLegacyProgramToProgram(*program); auto kernel_program = - paddle::dialect::PdOpLowerToKernelPass(base_program.get()); + paddle::dialect::PdOpLowerToKernelPass(base_program.get(), place); interpretercores_.emplace_back(std::make_shared( place_, std::move(kernel_program), scope_, execution_config)); } else { diff --git a/paddle/fluid/ir/dialect/pd_op.yaml b/paddle/fluid/ir/dialect/pd_op.yaml index a7d1cb14e35e5c..903584d57bcc66 100644 --- a/paddle/fluid/ir/dialect/pd_op.yaml +++ b/paddle/fluid/ir/dialect/pd_op.yaml @@ -227,3 +227,30 @@ inplace: null view: null backward: null + + +- name: shaddow_feed + inputs: + - typename: Tensor + name: x + optional: false + no_need_buffer: false + data_transform: {} + attrs: [] + outputs: + - {typename: Tensor, name: out, optional: false, intermediate: false} + no_need_buffer: null + data_transform: null + infer_meta: + func: UnchangedInferMeta + param: [x] + kernel: + func: [shaddow_feed] + param: [x] + backend: null + layout: null + data_type: null + dispatch: {fetch: null} + force_backend: null + inplace: null + backward: null diff --git a/paddle/fluid/ir/phi_kernel_adaptor/phi_kernel_util.cc b/paddle/fluid/ir/phi_kernel_adaptor/phi_kernel_util.cc index ad3a804eac9116..1a880210afbe10 100644 --- a/paddle/fluid/ir/phi_kernel_adaptor/phi_kernel_util.cc +++ b/paddle/fluid/ir/phi_kernel_adaptor/phi_kernel_util.cc @@ -193,26 +193,13 @@ void HandleForSpecialOp( if (op_name == "pd.feed") { auto value = op->result(0); - auto var = CreateVar(value, - inner_scope, - var_name_prefix, - false, - value_2_var_name, - variable_2_var_name, - var_name_2_id, - variable_list); - // TODO(phlrain): need to update here, support StringTensor - auto out_tensor = var->GetMutable(); + VLOG(6) << "link feed output to feed in variable" << inner_scope; - auto feed_var = - const_cast(inner_scope->root())->Var("feed"); - VLOG(6) << "Create var: feed in scope " << inner_scope->root(); int index = op->attributes().at("col").dyn_cast().data(); - auto feed_list = feed_var->Get(); - auto& in_tensor = (PADDLE_GET(phi::DenseTensor, feed_list.at(index))); - out_tensor->ShareDataWith(in_tensor); - out_tensor->set_lod(in_tensor.lod()); + + auto feed_var_name = "feed_" + std::to_string(index); + value_2_var_name->emplace(value, feed_var_name); } if (op_name == "builtin.combine") { diff --git a/paddle/fluid/ir/transforms/pd_op_to_kernel_pass.cc b/paddle/fluid/ir/transforms/pd_op_to_kernel_pass.cc index a7f209e7e4c319..d55ce6b24f9cf2 100644 --- a/paddle/fluid/ir/transforms/pd_op_to_kernel_pass.cc +++ b/paddle/fluid/ir/transforms/pd_op_to_kernel_pass.cc @@ -53,7 +53,7 @@ phi::KernelKey GetKernelKey( ir::Operation* op, const phi::Place& place, const std::unordered_map& map_value_pair, - const dialect::OpYamlInfoParser* op_info_parser = nullptr) { + std::unique_ptr op_info_parser = nullptr) { if (op->name() == "pd.feed") { // NOTE, for now feed op don't need a kernel, so the data type from Op // Result the next op use base program datatype @@ -223,11 +223,11 @@ phi::KernelKey GetKernelKey( return res; } -std::unique_ptr PdOpLowerToKernelPass(ir::Program* prog) { +std::unique_ptr PdOpLowerToKernelPass(ir::Program* prog, + phi::Place place) { auto program = std::make_unique(ir::IrContext::Instance()); auto block = prog->block(); - phi::Place cpu_place(phi::AllocationType::CPU); ir::IrContext* ctx = ir::IrContext::Instance(); ctx->GetOrRegisterDialect(); @@ -244,14 +244,19 @@ std::unique_ptr PdOpLowerToKernelPass(ir::Program* prog) { VLOG(6) << "op name " << (*it)->name(); paddle::dialect::OpYamlInfoInterface op_info_interface = (*it)->dyn_cast(); - OpYamlInfoParser* op_info_parser = nullptr; + std::unique_ptr op_info_parser; if (op_info_interface) { - op_info_parser = new OpYamlInfoParser(op_info_interface.GetOpInfo()); + op_info_parser.reset(new OpYamlInfoParser(op_info_interface.GetOpInfo())); } + + std::string kernel_fn_str; + if (op_info_parser != nullptr) { + kernel_fn_str = op_info_parser->OpRuntimeInfo().kernel_func[0]; + } + auto kernel_key = - GetKernelKey(*it, cpu_place, map_value_pair, op_info_parser); + GetKernelKey(*it, place, map_value_pair, std::move(op_info_parser)); VLOG(6) << "kernel type " << kernel_key; - // create new Op // only for single output // need update new kernel key layout and data tyep @@ -305,11 +310,6 @@ std::unique_ptr PdOpLowerToKernelPass(ir::Program* prog) { // constuct input std::vector vec_inputs; - std::string kernel_fn_str; - if (op_info_parser != nullptr) { - kernel_fn_str = op_info_parser->OpRuntimeInfo().kernel_func[0]; - } - if ((*it)->num_operands() > 0) { for (size_t i = 0; i < (*it)->num_operands(); ++i) { auto cur_in = (*it)->operand(i); @@ -404,6 +404,35 @@ std::unique_ptr PdOpLowerToKernelPass(ir::Program* prog) { } program->block()->push_back(op); + + if ((*it)->name() == "pd.feed" && platform::is_gpu_place(place)) { + // add shaddow feed op + phi::KernelKey shaddow_key{ + phi::Backend::GPU, + phi::DataLayout::ANY, + TransToPhiDataType( + (*it)->result(0).type().dyn_cast().dtype())}; + std::unordered_map attr_map{ + {"op_name", ir::StrAttribute::get(ctx, "pd.shaddow_feed")}, + {"kernel_name", ir::StrAttribute::get(ctx, "shaddow_feed")}, + {"kernel_key", dialect::KernelAttribute::get(ctx, shaddow_key)}}; + + auto out_type = paddle::dialect::AllocatedDenseTensorType::get( + ctx, + phi::TransToPhiPlace(shaddow_key.backend()), + (*it)->result(0).type().dyn_cast()); + + ir::Operation* shaddow_op = + ir::Operation::Create({op->result(0)}, attr_map, {out_type}, op_info); + + map_op_pair[*it] = shaddow_op; + program->block()->push_back(shaddow_op); + if ((*it)->num_results() > 0) { + for (size_t i = 0; i < shaddow_op->num_results(); ++i) { + map_value_pair[(*it)->result(i)] = shaddow_op->result(i); + } + } + } } return program; diff --git a/paddle/fluid/ir/transforms/pd_op_to_kernel_pass.h b/paddle/fluid/ir/transforms/pd_op_to_kernel_pass.h index 415ce18bb0756a..3e4848720f4cec 100644 --- a/paddle/fluid/ir/transforms/pd_op_to_kernel_pass.h +++ b/paddle/fluid/ir/transforms/pd_op_to_kernel_pass.h @@ -14,11 +14,13 @@ #pragma once #include "paddle/ir/core/program.h" +#include "paddle/phi/common/place.h" namespace paddle { namespace dialect { -std::unique_ptr PdOpLowerToKernelPass(ir::Program* prog); +std::unique_ptr PdOpLowerToKernelPass( + ir::Program* prog, phi::Place place = phi::CPUPlace()); } // namespace dialect } // namespace paddle diff --git a/paddle/phi/kernels/cpu/feed_with_place_kernel.cc b/paddle/phi/kernels/cpu/feed_with_place_kernel.cc index 5d7f5d747eb3fa..342ad6a334cc30 100644 --- a/paddle/phi/kernels/cpu/feed_with_place_kernel.cc +++ b/paddle/phi/kernels/cpu/feed_with_place_kernel.cc @@ -16,6 +16,7 @@ #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/impl/feed_with_place_impl.h" namespace phi { @@ -26,11 +27,20 @@ void FeedWithPlaceKernel(const Context& ctx, DenseTensor* out) {} } // namespace phi -PD_REGISTER_KERNEL(feed_with_place, + +PD_REGISTER_KERNEL( + feed_with_place, CPU, ALL_LAYOUT, phi::FeedWithPlaceKernel, float) {} + +PD_REGISTER_KERNEL(shaddow_feed, CPU, ALL_LAYOUT, - phi::FeedWithPlaceKernel, + phi::ShaddowFeedKernel, + bool, float, int32_t, int64_t, - double) {} + double, + phi::float16, + phi::bfloat16, + phi::complex64, + phi::complex128) {} diff --git a/paddle/phi/kernels/cpu/fetch_kernel.cc b/paddle/phi/kernels/cpu/fetch_kernel.cc index 672ceba1b84b35..cdd42c9ef83243 100644 --- a/paddle/phi/kernels/cpu/fetch_kernel.cc +++ b/paddle/phi/kernels/cpu/fetch_kernel.cc @@ -16,17 +16,8 @@ #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/impl/fetch_impl.h" -namespace phi { - -template -void FetchKernel(const Context& dev_ctx, - const DenseTensor& x, - DenseTensor* out) { - phi::Copy(dev_ctx, x, phi::CPUPlace(), true, out); - out->set_lod(x.lod()); -} -} // namespace phi PD_REGISTER_KERNEL(fetch, CPU, ALL_LAYOUT, diff --git a/paddle/phi/kernels/feed_with_place_kernel.h b/paddle/phi/kernels/feed_with_place_kernel.h index 624992da5432c9..4e8e9063c8d9b9 100644 --- a/paddle/phi/kernels/feed_with_place_kernel.h +++ b/paddle/phi/kernels/feed_with_place_kernel.h @@ -24,4 +24,9 @@ void FeedWithPlaceKernel(const Context& ctx, phi::DataType data_type, DenseTensor* out); +template +void ShaddowFeedKernel(const Context& ctx, + const DenseTensor& x, + DenseTensor* out); + } // namespace phi diff --git a/paddle/phi/kernels/gpu/feed_with_place_kernel.cu b/paddle/phi/kernels/gpu/feed_with_place_kernel.cu new file mode 100644 index 00000000000000..07d4c8719da2c7 --- /dev/null +++ b/paddle/phi/kernels/gpu/feed_with_place_kernel.cu @@ -0,0 +1,33 @@ +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/kernels/feed_with_place_kernel.h" + +#include "paddle/phi/backends/gpu/gpu_context.h" +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/impl/feed_with_place_impl.h" + +PD_REGISTER_KERNEL(shaddow_feed, + GPU, + ALL_LAYOUT, + phi::ShaddowFeedKernel, + bool, + float, + int32_t, + int64_t, + double, + phi::float16, + phi::bfloat16, + phi::complex64, + phi::complex128) {} diff --git a/paddle/phi/kernels/gpu/fetch_kernel.cu b/paddle/phi/kernels/gpu/fetch_kernel.cu new file mode 100644 index 00000000000000..b132ae975b815a --- /dev/null +++ b/paddle/phi/kernels/gpu/fetch_kernel.cu @@ -0,0 +1,37 @@ +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/kernels/fetch_kernel.h" + +#include "paddle/phi/kernels/impl/fetch_impl.h" + +#include "paddle/phi/backends/gpu/gpu_context.h" +#include "paddle/phi/core/kernel_registry.h" + +PD_REGISTER_KERNEL(fetch, + GPU, + ALL_LAYOUT, + phi::FetchKernel, + float, + double, + int, + int64_t, + uint8_t, + int8_t, + int16_t, + phi::float16, + phi::bfloat16, + phi::dtype::complex, + phi::dtype::complex, + bool) {} diff --git a/paddle/phi/kernels/impl/feed_with_place_impl.h b/paddle/phi/kernels/impl/feed_with_place_impl.h new file mode 100644 index 00000000000000..a7602c2d37927c --- /dev/null +++ b/paddle/phi/kernels/impl/feed_with_place_impl.h @@ -0,0 +1,35 @@ +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "paddle/phi/core/dense_tensor.h" +#include "paddle/phi/core/tensor_utils.h" + +namespace phi { + +template +void ShaddowFeedKernel(const Context& ctx, + const DenseTensor& x, + DenseTensor* out) { + ctx.template Alloc(out); + if (x.place() == out->place()) { + out->ShareDataWith(x); + out->set_lod(x.lod()); + } else { + phi::Copy(ctx, x, ctx.GetPlace(), true, out); + } +} + +} // namespace phi diff --git a/paddle/phi/kernels/impl/fetch_impl.h b/paddle/phi/kernels/impl/fetch_impl.h new file mode 100644 index 00000000000000..d90a813e4a16b3 --- /dev/null +++ b/paddle/phi/kernels/impl/fetch_impl.h @@ -0,0 +1,27 @@ +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "paddle/phi/core/dense_tensor.h" +#include "paddle/phi/core/tensor_utils.h" + +namespace phi { + +template +void FetchKernel(const Context& ctx, const DenseTensor& x, DenseTensor* out) { + phi::Copy(ctx, x, phi::CPUPlace(), true, out); +} + +} // namespace phi diff --git a/test/ir/new_ir/test_standalone_new_ir.py b/test/ir/new_ir/test_standalone_new_ir.py index 9be4e07fddc77d..c67370b2e0a2fc 100644 --- a/test/ir/new_ir/test_standalone_new_ir.py +++ b/test/ir/new_ir/test_standalone_new_ir.py @@ -24,7 +24,11 @@ class TestNewIr(unittest.TestCase): def test_with_new_ir(self): - place = paddle.CPUPlace() + place = ( + paddle.CUDAPlace(0) + if paddle.is_compiled_with_cuda() + else paddle.CPUPlace() + ) exe = paddle.static.Executor(place) main_program = paddle.static.Program() @@ -44,7 +48,11 @@ def test_with_new_ir(self): class TestCombineOp(unittest.TestCase): def test_with_new_ir(self): - place = paddle.CPUPlace() + place = ( + paddle.CUDAPlace(0) + if paddle.is_compiled_with_cuda() + else paddle.CPUPlace() + ) exe = paddle.static.Executor(place) main_program = paddle.static.Program() @@ -64,7 +72,11 @@ def test_with_new_ir(self): class TestFeedOp(unittest.TestCase): def test_with_new_ir(self): - place = paddle.CPUPlace() + place = ( + paddle.CUDAPlace(0) + if paddle.is_compiled_with_cuda() + else paddle.CPUPlace() + ) exe = paddle.static.Executor(place) main_program = paddle.static.Program() @@ -91,6 +103,8 @@ def test_with_new_ir(self): class TestSelectedRows(unittest.TestCase): def test_with_new_ir(self): + # TODO(phlrain): support selected rows in GPU + # place = paddle.CUDAPlace(0) if paddle.is_compiled_with_cuda() else paddle.CPUPlace() place = paddle.CPUPlace() exe = paddle.static.Executor(place) @@ -113,7 +127,11 @@ def test_with_new_ir(self): class TestAddGradOp(unittest.TestCase): def test_with_new_ir(self): - place = paddle.CPUPlace() + place = ( + paddle.CUDAPlace(0) + if paddle.is_compiled_with_cuda() + else paddle.CPUPlace() + ) exe = paddle.static.Executor(place) main_program = paddle.static.Program() @@ -143,7 +161,11 @@ def test_with_new_ir(self): class TestSplitOp(unittest.TestCase): def test_with_new_ir(self): - place = paddle.CPUPlace() + place = ( + paddle.CUDAPlace(0) + if paddle.is_compiled_with_cuda() + else paddle.CPUPlace() + ) exe = paddle.static.Executor(place) main_program = paddle.static.Program()