Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[NewIR]Split python api and vjp #56518

Merged
merged 61 commits into from
Aug 28, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
61 commits
Select commit Hold shift + click to select a range
4b21c66
support ir api form prim
cyber-pioneer Aug 10, 2023
37125f9
convert vector of int to intarray
cyber-pioneer Aug 10, 2023
2c0166c
add reference of lbfgs
xiaoguoguo626807 Aug 11, 2023
37883b2
add reference of lbfgs
xiaoguoguo626807 Aug 11, 2023
58dd125
Merge commit 'refs/pull/56162/head' of https://github.com/PaddlePaddl…
Charles-hit Aug 11, 2023
8834e65
support ir api for prim
cyber-pioneer Aug 10, 2023
d286e7f
Merge commit 'refs/pull/56162/head' of https://github.com/PaddlePaddl…
Charles-hit Aug 14, 2023
79aa356
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
Charles-hit Aug 14, 2023
0cc4336
Add more gen api
Aug 15, 2023
0afe2ed
concat python api to concat_grad
xiaoguoguo626807 Aug 15, 2023
00379a9
fix conflict
xiaoguoguo626807 Aug 15, 2023
3537f91
fix conflict
xiaoguoguo626807 Aug 15, 2023
afd61ea
fix gen conflict
xiaoguoguo626807 Aug 15, 2023
d95467a
fix conflict
xiaoguoguo626807 Aug 15, 2023
adc040f
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
Charles-hit Aug 15, 2023
2f3a72a
support vjp prim mode in new ir
Charles-hit Aug 16, 2023
b88cc54
resolve conflict
Charles-hit Aug 16, 2023
6b6fc8e
remove useless code
Charles-hit Aug 16, 2023
bfbb0e8
add vjp autogen v1.0
changeyoung98 Aug 16, 2023
c428ff6
vjp codegen resolve conflict
changeyoung98 Aug 17, 2023
ece1b32
Merge branch 'PaddlePaddle:develop' into upstream/czy-gen
changeyoung98 Aug 17, 2023
ff5aa2c
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
changeyoung98 Aug 17, 2023
065c011
add test for prim
Charles-hit Aug 17, 2023
578e08b
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
changeyoung98 Aug 17, 2023
0c8db6e
resolve type conflict
changeyoung98 Aug 17, 2023
873ec07
Merge branch 'upstream/czy-gen' of github.com:changeyoung98/Paddle in…
changeyoung98 Aug 17, 2023
7939ab2
rename desctensor to lazytensor
Charles-hit Aug 17, 2023
ffc71f2
modify utils
Charles-hit Aug 17, 2023
c161003
remove useless code
Charles-hit Aug 17, 2023
95642e3
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
Charles-hit Aug 17, 2023
9fcfe39
add split op and modify some bug of vectorType
xiaoguoguo626807 Aug 17, 2023
7fdb9c4
support manual vjp
Charles-hit Aug 17, 2023
e9507d4
fix conflict
xiaoguoguo626807 Aug 17, 2023
4d40cd6
fix conflcit
xiaoguoguo626807 Aug 17, 2023
e7ad743
fix conflict
xiaoguoguo626807 Aug 17, 2023
ad8ea16
fix conflict
xiaoguoguo626807 Aug 17, 2023
011c611
add concat python test
xiaoguoguo626807 Aug 18, 2023
a20be18
fix_conflict
xiaoguoguo626807 Aug 18, 2023
185d30b
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
xiaoguoguo626807 Aug 18, 2023
e02f612
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
xiaoguoguo626807 Aug 18, 2023
fed29f4
Merge branch 'develop', commit 'refs/pull/56316/head' of https://gith…
xiaoguoguo626807 Aug 21, 2023
add6d90
add split python api to vjp
xiaoguoguo626807 Aug 21, 2023
87f13b9
fix conflict
xiaoguoguo626807 Aug 21, 2023
4ef0db1
modify build bug
xiaoguoguo626807 Aug 21, 2023
453deed
modify run bug
xiaoguoguo626807 Aug 22, 2023
6879097
fix conflict
xiaoguoguo626807 Aug 23, 2023
c21b912
fix conflict bug
xiaoguoguo626807 Aug 23, 2023
a3e947e
fix conflict
xiaoguoguo626807 Aug 23, 2023
98e6beb
build bug fix
xiaoguoguo626807 Aug 23, 2023
7dca72b
Merge branch 'develop', commit 'refs/pull/56518/head' of https://gith…
xiaoguoguo626807 Aug 24, 2023
f5d60fb
modify python api bug
xiaoguoguo626807 Aug 24, 2023
47097e8
modify test
xiaoguoguo626807 Aug 24, 2023
877262d
Merge branch 'split_vjp' of https://github.com/xiaoguoguo626807/Paddl…
xiaoguoguo626807 Aug 24, 2023
6054bde
fix conflict
xiaoguoguo626807 Aug 24, 2023
a6b5af3
fix conflict
xiaoguoguo626807 Aug 24, 2023
2fc525f
fluid backward recover
xiaoguoguo626807 Aug 24, 2023
1072cdd
recover conflict
xiaoguoguo626807 Aug 24, 2023
71d1c09
fix conflict
xiaoguoguo626807 Aug 25, 2023
0ded8f3
reply review comments
xiaoguoguo626807 Aug 25, 2023
d342f2e
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
xiaoguoguo626807 Aug 25, 2023
5dcecd9
modify opruntimeinfo num
xiaoguoguo626807 Aug 25, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion paddle/fluid/ir/dialect/op_generator/op_gen.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ class {op_name} : public ir::Op<{op_name}{interfaces}{traits}> {{
'bool': 'ir::BoolAttribute',
}

_NO_NEED_GEN_OPS = {'add_n'}
_NO_NEED_GEN_OPS = {'add_n', 'split_grad'}


def to_phi_and_fluid_op_name(op_item):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,5 +29,6 @@
"sum",
"add",
"concat",
"split",
]
vjp_interface_implementation_gen_op_list = ["tanh", "mean", "divide", "add"]
2 changes: 1 addition & 1 deletion paddle/fluid/ir/dialect/paddle_dialect/ir/pd_dialect.cc
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ void PaddleDialect::initialize() {
#define GET_OP_LIST
#include "paddle/fluid/ir/dialect/paddle_dialect/ir/pd_op.h" // NOLINT
>();
RegisterOp<paddle::dialect::AddNOp>();
RegisterOps<paddle::dialect::AddNOp, paddle::dialect::SplitGradOp>();

RegisterInterfaces<ParameterConvertInterface>();
}
Expand Down
13 changes: 12 additions & 1 deletion paddle/fluid/ir/dialect/paddle_dialect/ir/pd_manual_api.cc
Original file line number Diff line number Diff line change
Expand Up @@ -18,5 +18,16 @@
#include "paddle/ir/core/builtin_op.h"

namespace paddle {
namespace dialect {} // namespace dialect
namespace dialect {
ir::OpResult split_grad(std::vector<ir::OpResult> out_grads,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
ir::OpResult split_grad(std::vector<ir::OpResult> out_grads,
ir::OpResult split_grad(const std::vector<ir::OpResult>& out_grads,

@0x45f 我看 pd_api.h 里的 concat、add_n 的vector 入参也是值copy,这个是不是可以优化为 const &?

Copy link
Contributor Author

@xiaoguoguo626807 xiaoguoguo626807 Aug 25, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

这个在build 函数手写完备后,可加入自动代码生成一起完善

ir::OpResult axis) {
auto combine_op =
APIBuilder::Instance().GetBuilder()->Build<ir::CombineOp>(out_grads);
paddle::dialect::SplitGradOp split_grad_op =
APIBuilder::Instance().GetBuilder()->Build<paddle::dialect::SplitGradOp>(
combine_op.out(), axis);

return split_grad_op.x_grad();
}
} // namespace dialect
} // namespace paddle
6 changes: 5 additions & 1 deletion paddle/fluid/ir/dialect/paddle_dialect/ir/pd_manual_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,5 +21,9 @@
#include "paddle/phi/common/place.h"

namespace paddle {
namespace dialect {} // namespace dialect
namespace dialect {

ir::OpResult split_grad(std::vector<ir::OpResult> out_grads, ir::OpResult axis);

} // namespace dialect
} // namespace paddle
215 changes: 215 additions & 0 deletions paddle/fluid/ir/dialect/paddle_dialect/ir/pd_manual_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

#include "paddle/fluid/ir/dialect/paddle_dialect/ir/pd_manual_op.h"
#include "paddle/fluid/ir/dialect/paddle_dialect/ir/pd_attribute.h"
#include "paddle/fluid/ir/dialect/paddle_dialect/ir/pd_op.h"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

这里是不是可以按需 include 必要的头文件

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

splitgrad 调用了full op, 需要引入此头文件
int axis = axis_.owner()
->dyn_castpaddle::dialect::FullOp()
.attributes()
.at("value")
.dyn_castpaddle::dialect::ScalarAttribute()
.data()
.to();

#include "paddle/fluid/ir/dialect/paddle_dialect/ir/pd_type.h"
#include "paddle/ir/core/builtin_attribute.h"
#include "paddle/ir/core/builtin_op.h"
Expand Down Expand Up @@ -145,7 +146,221 @@ void AddNOp::InferMeta(phi::InferMetaContext *infer_meta) {
fn(infer_meta);
}

const char *SplitGradOp::attributes_name[1] = {"axis"};

OpInfoTuple SplitGradOp::GetOpInfo() {
std::vector<paddle::dialect::OpInputInfo> inputs = {
OpInputInfo("out_grad",
"ir::VectorType<paddle::dialect::DenseTensorType>",
false,
false,
false),
OpInputInfo(
"axis", "paddle::dialect::ScalarAttribute", false, false, true)};
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

axis 是可变 attribute 的话,是不是有必要添加一个axis 为Scalar类型的Build 接口?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done, 此处build接口无使用样例,存在覆盖率问题

std::vector<paddle::dialect::OpAttributeInfo> attributes = {};
std::vector<paddle::dialect::OpOutputInfo> outputs = {
OpOutputInfo("x_grad", "paddle::dialect::DenseTensorType", false, false)};
paddle::dialect::OpRunTimeInfo run_time_info =
OpRunTimeInfo("ConcatInferMeta",
{"out_grad", "axis"},
{"concat"},
{"out_grad", "axis"},
{"out_grad"},
{},
{},
{});

return std::make_tuple(
inputs, attributes, outputs, run_time_info, "split_grad");
}

void SplitGradOp::Build(ir::Builder &builder,
ir::OperationArgument &argument,
ir::OpResult out_grad_,
float axis) {
// Generate scalar mutable attribute: axis
paddle::dialect::FullOp full_axis_op = builder.Build<paddle::dialect::FullOp>(
std::vector<int64_t>{1}, axis, phi::DataType::FLOAT32, phi::CPUPlace());
ir::OpResult axis_ = full_axis_op->result(0);

VLOG(4) << "Builder construction inputs";
std::vector<ir::OpResult> argument_inputs = {out_grad_, axis_};
argument.AddOperands(argument_inputs.begin(), argument_inputs.end());

VLOG(4) << "Builder construction attributes";

VLOG(4) << "Builder construction outputs";
ir::VectorType out_grad = out_grad_.type().dyn_cast<ir::VectorType>();
std::vector<phi::DenseTensor> vec_dense_out_grad;
for (size_t i = 0; i < static_cast<size_t>(out_grad.size()); i++) {
vec_dense_out_grad.push_back(phi::DenseTensor(
std::make_unique<paddle::experimental::DefaultAllocator>(
paddle::platform::CPUPlace())
.get(),
phi::DenseTensorMeta(
paddle::dialect::TransToPhiDataType(
out_grad[i]
.dyn_cast<paddle::dialect::DenseTensorType>()
.dtype()),
out_grad[i].dyn_cast<paddle::dialect::DenseTensorType>().dims(),
out_grad[i]
.dyn_cast<paddle::dialect::DenseTensorType>()
.data_layout(),
out_grad[i].dyn_cast<paddle::dialect::DenseTensorType>().lod(),
out_grad[i]
.dyn_cast<paddle::dialect::DenseTensorType>()
.offset())));
}
std::vector<phi::MetaTensor> vec_meta_out_grad;
for (size_t i = 0; i < vec_dense_out_grad.size(); i++) {
vec_meta_out_grad.push_back(phi::MetaTensor(&vec_dense_out_grad[i]));
}

std::vector<const phi::MetaTensor *> meta_out_grad;
for (size_t i = 0; i < static_cast<size_t>(vec_meta_out_grad.size()); i++) {
meta_out_grad.push_back(&vec_meta_out_grad[i]);
}
phi::DenseTensor dense_x_grad;
phi::MetaTensor meta_x_grad(&dense_x_grad);

phi::ConcatInferMeta(meta_out_grad, axis, &meta_x_grad);

std::vector<ir::Type> argument_outputs;
ir::Type x_grad_dense_tensor_type = paddle::dialect::DenseTensorType::get(
ir::IrContext::Instance(),
paddle::dialect::TransToIrDataType(dense_x_grad.dtype()),
dense_x_grad.dims(),
dense_x_grad.layout(),
dense_x_grad.lod(),
dense_x_grad.offset());
argument_outputs.push_back(x_grad_dense_tensor_type);
argument.AddOutputs(argument_outputs.begin(), argument_outputs.end());
}

void SplitGradOp::Build(ir::Builder &builder,
ir::OperationArgument &argument,
ir::OpResult out_grad_,
ir::OpResult axis_) {
VLOG(4) << "Builder construction inputs";
std::vector<ir::OpResult> argument_inputs = {out_grad_, axis_};
argument.AddOperands(argument_inputs.begin(), argument_inputs.end());

VLOG(4) << "Builder construction attributes";

VLOG(4) << "Builder construction outputs";
ir::VectorType out_grad = out_grad_.type().dyn_cast<ir::VectorType>();
int axis = axis_.owner()
->dyn_cast<paddle::dialect::FullOp>()
.attributes()
.at("value")
.dyn_cast<paddle::dialect::ScalarAttribute>()
.data()
.to<int>();

std::vector<phi::DenseTensor> vec_dense_out_grad;
for (size_t i = 0; i < static_cast<size_t>(out_grad.size()); i++) {
vec_dense_out_grad.push_back(phi::DenseTensor(
std::make_unique<paddle::experimental::DefaultAllocator>(
paddle::platform::CPUPlace())
.get(),
phi::DenseTensorMeta(
TransToPhiDataType(out_grad[i]
.dyn_cast<paddle::dialect::DenseTensorType>()
.dtype()),
out_grad[i].dyn_cast<paddle::dialect::DenseTensorType>().dims(),
out_grad[i]
.dyn_cast<paddle::dialect::DenseTensorType>()
.data_layout(),
out_grad[i].dyn_cast<paddle::dialect::DenseTensorType>().lod(),
out_grad[i]
.dyn_cast<paddle::dialect::DenseTensorType>()
.offset())));
}
std::vector<phi::MetaTensor> vec_meta_out_grad;
for (size_t i = 0; i < vec_dense_out_grad.size(); i++) {
vec_meta_out_grad.push_back(phi::MetaTensor(&vec_dense_out_grad[i]));
}

std::vector<const phi::MetaTensor *> meta_out_grad;
for (size_t i = 0; i < static_cast<size_t>(vec_meta_out_grad.size()); i++) {
meta_out_grad.push_back(&vec_meta_out_grad[i]);
}
phi::DenseTensor dense_x_grad;
phi::MetaTensor meta_x_grad(&dense_x_grad);

phi::ConcatInferMeta(meta_out_grad, axis, &meta_x_grad);

std::vector<ir::Type> argument_outputs;
ir::Type x_grad_dense_tensor_type = paddle::dialect::DenseTensorType::get(
ir::IrContext::Instance(),
TransToIrDataType(dense_x_grad.dtype()),
dense_x_grad.dims(),
dense_x_grad.layout(),
dense_x_grad.lod(),
dense_x_grad.offset());
argument_outputs.push_back(x_grad_dense_tensor_type);
argument.AddOutputs(argument_outputs.begin(), argument_outputs.end());
}

void SplitGradOp::Verify() {
VLOG(4) << "Start Verifying inputs, outputs and attributes for: SplitGradOp.";
VLOG(4) << "Verifying inputs:";
{
auto input_size = num_operands();
PADDLE_ENFORCE_EQ(
input_size,
2u,
phi::errors::PreconditionNotMet(
"The size %d of inputs must be equal to 2.", input_size));
if (auto vec_type =
(*this)->operand_source(0).type().dyn_cast<ir::VectorType>()) {
for (size_t i = 0; i < vec_type.size(); ++i) {
PADDLE_ENFORCE(vec_type[i].isa<paddle::dialect::DenseTensorType>(),
phi::errors::PreconditionNotMet(
"Type validation failed for the 0th input."));
}
} else {
PADDLE_ENFORCE((*this)
->operand_source(0)
.type()
.isa<paddle::dialect::DenseTensorType>(),
phi::errors::PreconditionNotMet(
"Type validation failed for the 0th input."));
}
PADDLE_ENFORCE((*this)
->operand_source(1)
.type()
.isa<paddle::dialect::DenseTensorType>(),
phi::errors::PreconditionNotMet(
"Type validation failed for the 1th input."));
}
VLOG(4) << "Verifying attributes:";
{
// Attributes num is 0, not need to check attributes type.
}
VLOG(4) << "Verifying outputs:";
{
auto output_size = num_results();
PADDLE_ENFORCE_EQ(
output_size,
1u,
phi::errors::PreconditionNotMet(
"The size %d of outputs must be equal to 1.", output_size));
PADDLE_ENFORCE(
(*this)->result(0).type().isa<paddle::dialect::DenseTensorType>(),
phi::errors::PreconditionNotMet(
"Type validation failed for the 0th output."));
}
VLOG(4) << "End Verifying for: SplitGradOp.";
}

void SplitGradOp::InferMeta(phi::InferMetaContext *infer_meta) {
auto fn = PD_INFER_META(phi::ConcatInferMeta);
fn(infer_meta);
}

} // namespace dialect
} // namespace paddle

IR_DEFINE_EXPLICIT_TYPE_ID(paddle::dialect::AddNOp)
IR_DEFINE_EXPLICIT_TYPE_ID(paddle::dialect::SplitGradOp)
26 changes: 25 additions & 1 deletion paddle/fluid/ir/dialect/paddle_dialect/ir/pd_manual_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

#ifdef GET_MANUAL_OP_LIST
#undef GET_MANUAL_OP_LIST
paddle::dialect::AddNOp
paddle::dialect::AddNOp, paddle::dialect::SplitGradOp

#else

Expand Down Expand Up @@ -51,9 +51,33 @@ class AddNOp : public ir::Op<AddNOp, OpYamlInfoInterface> {
static void InferMeta(phi::InferMetaContext *infer_meta);
};

class SplitGradOp : public ir::Op<SplitGradOp, OpYamlInfoInterface> {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

是不需要有 paddle::dialect::InferMetaInterface 接口么?我看pd_op.h 里其他的GradOp 都是有的

Copy link
Contributor Author

@xiaoguoguo626807 xiaoguoguo626807 Aug 25, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

这个splitgrad yaml 中没有infermeta, 使用的是invoke算子concat 的infermeta, 已补充

public:
using Op::Op;
static const char *name() { return "pd.split_grad"; }
static const char *attributes_name[1];
static constexpr uint32_t attributes_num = 1;
static OpInfoTuple GetOpInfo();
static void Build(ir::Builder &builder, // NOLINT
ir::OperationArgument &argument, // NOLINT
ir::OpResult x_,
float axis = 0);
static void Build(ir::Builder &builder, // NOLINT
ir::OperationArgument &argument, // NOLINT
ir::OpResult out_grad_,
ir::OpResult axis_);

void Verify();
ir::Value out_grad() { return operand_source(0); }
ir::Value axis() { return operand_source(1); }
ir::OpResult x_grad() { return result(0); }
static void InferMeta(phi::InferMetaContext *infer_meta);
};

} // namespace dialect
} // namespace paddle

IR_DECLARE_EXPLICIT_TYPE_ID(paddle::dialect::AddNOp)
IR_DECLARE_EXPLICIT_TYPE_ID(paddle::dialect::SplitGradOp)

#endif
34 changes: 34 additions & 0 deletions paddle/fluid/ir/dialect/paddle_dialect/ir/pd_op_vjp_manual.cc
Original file line number Diff line number Diff line change
Expand Up @@ -87,5 +87,39 @@ std::vector<std::vector<ir::OpResult>> SumOp::Vjp(
}
return res;
}

std::vector<std::vector<ir::OpResult>> SplitOp::Vjp(
ir::Operation* op,
const std::vector<std::vector<ir::OpResult>>& out_grads,
const std::vector<std::vector<bool>>& stop_gradients) {
SplitOp op_obj = op->dyn_cast<SplitOp>();

Tensor axis(std::make_shared<primitive::LazyTensor>(op_obj.axis()));
std::vector<Tensor> out_grads_;
for (size_t idx = 0; idx < out_grads[0].size(); idx++) {
out_grads_.emplace_back(
std::make_shared<primitive::LazyTensor>(out_grads[0][idx]));
}

std::vector<std::vector<Tensor>> tensor_res =
primitive::split_vjp(out_grads_, axis, stop_gradients);

std::vector<std::vector<ir::OpResult>> res(tensor_res.size(),
std::vector<ir::OpResult>());

for (uint64_t i = 0; i < tensor_res.size(); i++) {
res[i].resize(tensor_res[i].size());
for (uint64_t j = 0; j < tensor_res[i].size(); j++) {
if (tensor_res[i][j].defined()) {
res[i][j] = std::static_pointer_cast<primitive::LazyTensor>(
tensor_res[i][j].impl())
->getValue()
.dyn_cast<ir::OpResult>();
}
}
}
return res;
}

} // namespace dialect
} // namespace paddle
3 changes: 3 additions & 0 deletions paddle/fluid/primitive/backend/manual/manual_backend.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,9 @@ std::vector<Tensor> concat_grad(const std::vector<Tensor>& x,
const Tensor& out_grad,
const Tensor& axis);

template <typename T>
Tensor split_grad(const std::vector<Tensor>& out_grads, const Tensor& axis);

} // namespace backend
} // namespace primitive
} // namespace paddle
Loading