Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Pten]Change Infermeta for API && Remove MakePtenDenseTensor in reshape kernel #39186

Merged
merged 3 commits into from
Jan 25, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion paddle/fluid/operators/reduce_ops/reduce_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -556,7 +556,7 @@ class ReduceOp : public framework::OperatorWithKernel {
if (ctx.InputVar("X")->IsType<framework::LoDTensor>()) {
if (!reduce_all) {
return framework::KernelSignature(
"sum", {"X"}, {"dim", "keep_dim", "out_dtype"}, {"Out"});
"sum", {"X"}, {"dim", "out_dtype", "keep_dim"}, {"Out"});
}
return framework::KernelSignature(
"sum_raw", {"X"}, {"dim", "keep_dim", "reduce_all", "out_dtype"},
Expand Down
102 changes: 17 additions & 85 deletions paddle/fluid/operators/reshape_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -38,33 +38,6 @@ namespace operators {

using Tensor = framework::Tensor;

inline std::vector<int> get_new_shape(
const std::vector<const Tensor *> &list_new_shape_tensor) {
// get tensor from
std::vector<int> vec_new_shape;
for (size_t i = 0; i < list_new_shape_tensor.size(); ++i) {
auto tensor = list_new_shape_tensor[i];
PADDLE_ENFORCE_EQ(
tensor->dims(), framework::make_ddim({1}),
platform::errors::InvalidArgument(
"If the element type of 'shape' in ReshapeOp is Tensor, "
"the element's shape must be [1]. But received the element's shape "
"is [%s]",
tensor->dims()));
if (platform::is_gpu_place(tensor->place()) ||
platform::is_xpu_place(tensor->place())) {
framework::Tensor temp;
paddle::framework::TensorCopySync(*tensor, platform::CPUPlace(), &temp);

vec_new_shape.push_back(static_cast<int32_t>(*temp.data<int32_t>()));
} else {
vec_new_shape.push_back(static_cast<int32_t>(*tensor->data<int32_t>()));
}
}

return vec_new_shape;
}

class ReshapeOp : public framework::OperatorWithKernel {
public:
ReshapeOp(const std::string &type, const framework::VariableNameMap &inputs,
Expand Down Expand Up @@ -370,30 +343,6 @@ class ReshapeKernel {
void operator()(const framework::ExecutionContext &ctx) const {
auto *out = ctx.Output<framework::LoDTensor>("Out");
auto *in = ctx.Input<framework::LoDTensor>("X");
// framework::DDim out_dims = out->dims();
auto pt_x = paddle::experimental::MakePtenDenseTensor(*in);

// we can't MakePtenDenseTensor by out, because the out of reshape may have
// multiple states, some can MakePtenDenseTensor but other's cannot:
// 1. out tensor is not initialized
// 2. out tensor is input (complete inplace)
// 3. out tensor is view of input
// We can't MakePtenDenseTensor for case 2, so we solve this case by
// creating a temporary tensor here:
pten::DenseTensorMeta meta{pten::TransToPtenDataType(in->type()),
in->dims(), in->layout()};
auto pt_out_tmp = std::make_shared<pten::DenseTensor>(
pten::make_intrusive<paddle::experimental::SharedStorage>(
ctx.GetPlace()),
std::move(meta));
pten::DenseTensor *pt_out = nullptr;
if (in != nullptr && out != nullptr && in->Holder() != nullptr &&
out->Holder() != nullptr &&
in->Holder()->ptr() == out->Holder()->ptr()) {
pt_out = pt_x.get();
} else {
pt_out = pt_out_tmp.get();
}

auto list_new_shape_tensor =
ctx.MultiInput<framework::Tensor>("ShapeTensor");
Expand All @@ -410,55 +359,46 @@ class ReshapeKernel {
framework::Tensor temp;
paddle::framework::TensorCopySync(*tensor, platform::CPUPlace(),
&temp);
pt_vec_shape.push_back(
std::move(*(paddle::experimental::MakePtenDenseTensor(temp))));
pt_vec_shape.push_back(std::move(temp));
} else {
pt_vec_shape.push_back(
std::move(*(paddle::experimental::MakePtenDenseTensor(*tensor))));
pt_vec_shape.push_back(*tensor);
}
}
pt_scalar_shape = pten::ScalarArray(pt_vec_shape);
} else if (shape_tensor) {
std::unique_ptr<pten::DenseTensor> pt_shape;
pten::DenseTensor pt_shape;
if (platform::is_gpu_place(shape_tensor->place()) ||
platform::is_xpu_place(shape_tensor->place())) {
framework::Tensor temp;
paddle::framework::TensorCopySync(*shape_tensor, platform::CPUPlace(),
&temp);
pt_shape = paddle::experimental::MakePtenDenseTensor(temp);
pt_shape = std::move(temp);
} else {
pt_shape = paddle::experimental::MakePtenDenseTensor(*shape_tensor);
pt_shape = *shape_tensor;
}
pt_scalar_shape = pten::ScalarArray(*pt_shape.get());
pt_scalar_shape = pten::ScalarArray(pt_shape);
} else {
auto &shape_attr = ctx.Attr<std::vector<int>>("shape");
pt_scalar_shape = pten::ScalarArray(shape_attr);
}
if (platform::is_cpu_place(ctx.GetPlace())) {
auto &dev_ctx = ctx.device_context<platform::CPUDeviceContext>();
pten::ReshapeKernel(static_cast<const pten::CPUContext &>(dev_ctx),
*pt_x.get(), pt_scalar_shape, pt_out);
pten::ReshapeKernel(static_cast<const pten::CPUContext &>(dev_ctx), *in,
pt_scalar_shape, out);
}
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
if (platform::is_gpu_place(ctx.GetPlace())) {
auto &dev_ctx = ctx.device_context<platform::CUDADeviceContext>();
pten::ReshapeKernel(dev_ctx, *pt_x.get(), pt_scalar_shape, pt_out);
pten::ReshapeKernel(dev_ctx, *in, pt_scalar_shape, out);
}
#endif
#ifdef PADDLE_WITH_XPU
if (platform::is_xpu_place(ctx.GetPlace())) {
auto &dev_ctx = ctx.device_context<platform::XPUDeviceContext>();
pten::ReshapeKernel(static_cast<const pten::XPUContext &>(dev_ctx),
*pt_x.get(), pt_scalar_shape, pt_out);
pten::ReshapeKernel(static_cast<const pten::XPUContext &>(dev_ctx), *in,
pt_scalar_shape, out);
}
#endif
// non-inplace need move all result from pt_out to out, inplace need set
// result dims.
if (in != out) {
paddle::experimental::SharesStorage(pt_out, static_cast<Tensor *>(out));
} else {
out->Resize(pt_out->dims());
}
}
};

Expand All @@ -469,25 +409,22 @@ class ReshapeGradKernel {
auto *d_x = ctx.Output<framework::Tensor>(framework::GradVarName("X"));
d_x->mutable_data(ctx.GetPlace(), d_out->type());

auto pt_d_x = paddle::experimental::MakePtenDenseTensor(*d_x);
auto pt_d_out = paddle::experimental::MakePtenDenseTensor(*d_out);

if (platform::is_cpu_place(ctx.GetPlace())) {
auto &dev_ctx = ctx.device_context<platform::CPUDeviceContext>();
pten::ReshapeGradKernel(static_cast<const pten::CPUContext &>(dev_ctx),
*pt_d_out.get(), pt_d_x.get());
*d_out, d_x);
}
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
if (platform::is_gpu_place(ctx.GetPlace())) {
auto &dev_ctx = ctx.device_context<platform::CUDADeviceContext>();
pten::ReshapeGradKernel(dev_ctx, *pt_d_out.get(), pt_d_x.get());
pten::ReshapeGradKernel(dev_ctx, *d_out, d_x);
}
#endif
#ifdef PADDLE_WITH_XPU
if (platform::is_xpu_place(ctx.GetPlace())) {
auto &dev_ctx = ctx.device_context<platform::XPUDeviceContext>();
pten::ReshapeGradKernel(static_cast<const pten::XPUContext &>(dev_ctx),
*pt_d_out.get(), pt_d_x.get());
*d_out, d_x);
}
#endif
}
Expand All @@ -500,27 +437,22 @@ class ReshapeDoubleGradKernel {
auto *dd_out = ctx.Output<framework::Tensor>("DDOut");
dd_out->mutable_data(ctx.GetPlace(), dd_x->type());

auto pt_dd_x = paddle::experimental::MakePtenDenseTensor(*dd_x);
auto pt_dd_out = paddle::experimental::MakePtenDenseTensor(*dd_out);

if (platform::is_cpu_place(ctx.GetPlace())) {
auto &dev_ctx = ctx.device_context<platform::CPUDeviceContext>();
pten::ReshapeDoubleGradKernel(
static_cast<const pten::CPUContext &>(dev_ctx), *pt_dd_x.get(),
pt_dd_out.get());
static_cast<const pten::CPUContext &>(dev_ctx), *dd_x, dd_out);
}
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
if (platform::is_gpu_place(ctx.GetPlace())) {
auto &dev_ctx = ctx.device_context<platform::CUDADeviceContext>();
pten::ReshapeDoubleGradKernel(dev_ctx, *pt_dd_x.get(), pt_dd_out.get());
pten::ReshapeDoubleGradKernel(dev_ctx, *dd_x, dd_out);
}
#endif
#ifdef PADDLE_WITH_XPU
if (platform::is_xpu_place(ctx.GetPlace())) {
auto &dev_ctx = ctx.device_context<platform::XPUDeviceContext>();
pten::ReshapeDoubleGradKernel(
static_cast<const pten::XPUContext &>(dev_ctx), *pt_dd_x.get(),
pt_dd_out.get());
static_cast<const pten::XPUContext &>(dev_ctx), *dd_x, dd_out);
}
#endif
}
Expand Down
2 changes: 1 addition & 1 deletion paddle/pten/api/include/kernel_signature.h
Original file line number Diff line number Diff line change
Expand Up @@ -102,8 +102,8 @@ using scale_kernel = void (*)(const DeviceContext&,
using sum_kernel = void (*)(const DeviceContext&,
const DenseTensor&,
const std::vector<int64_t>&,
bool,
DataType,
bool,
DenseTensor*);

using subtract_kernel = void (*)(const DeviceContext&,
Expand Down
13 changes: 13 additions & 0 deletions paddle/pten/core/dense_tensor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,19 @@ void DenseTensor::set_meta(DenseTensorMeta&& meta) {
meta_ = std::move(meta);
}

void DenseTensor::set_meta(const DenseTensorMeta& meta) {
PADDLE_ENFORCE(
meta.valid(),
paddle::platform::errors::InvalidArgument(
"Input meta is invalid, please check the meta attribute."));
meta_.dims = meta.dims;
meta_.dtype = meta.dtype;
meta_.is_scalar = meta.is_scalar;
meta_.layout = meta.layout;
meta_.lod = meta.lod;
meta_.offset = meta.offset;
}

/* @jim19930609: This interface will be further modified util we finalized the
design for Allocator - Allocation
For now, we have to temporarily accommodate two independent use cases:
Expand Down
2 changes: 2 additions & 0 deletions paddle/pten/core/dense_tensor.h
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,8 @@ class DenseTensor : public TensorBase,
/// \param meta The meta information of the tensor.
void set_meta(DenseTensorMeta&& meta);

void set_meta(const DenseTensorMeta& meta);

/// \brief Test whether the metadata is valid.
/// \return Whether the metadata is valid.
bool valid() const noexcept override { return meta_.valid(); }
Expand Down
9 changes: 7 additions & 2 deletions paddle/pten/infermeta/binary.cc
Original file line number Diff line number Diff line change
Expand Up @@ -131,8 +131,13 @@ DenseTensorMeta MatmulInferMeta(const DenseTensorMeta& x_meta,
}

DenseTensorMeta ElementwiseInferMeta(const DenseTensorMeta& x_meta,
const DenseTensorMeta& y_meta,
int axis) {
const DenseTensorMeta& y_meta) {
return ElementwiseRawInferMeta(x_meta, y_meta, -1);
}

DenseTensorMeta ElementwiseRawInferMeta(const DenseTensorMeta& x_meta,
const DenseTensorMeta& y_meta,
int axis) {
DenseTensorMeta return_meta(x_meta.dtype, x_meta.dims, x_meta.layout);
if (x_meta.dims != y_meta.dims) {
auto x_dims = x_meta.dims;
Expand Down
8 changes: 6 additions & 2 deletions paddle/pten/infermeta/binary.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,10 @@ DenseTensorMeta MatmulInferMeta(const DenseTensorMeta& x_meta,
bool trans_y);

DenseTensorMeta ElementwiseInferMeta(const DenseTensorMeta& x_meta,
const DenseTensorMeta& y_meta,
int axis);
const DenseTensorMeta& y_meta);

DenseTensorMeta ElementwiseRawInferMeta(const DenseTensorMeta& x_meta,
const DenseTensorMeta& y_meta,
int axis);

} // namespace pten
10 changes: 10 additions & 0 deletions paddle/pten/infermeta/unary.cc
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,16 @@ DenseTensorMeta ReshapeInferMeta(const DenseTensorMeta& x_meta,
return InferMetaFromVecValue(x_meta, shape.GetData());
}

/* Why not use ReduceInferMeta directly?
Because we need make InferMetaFunction's args follow the design of api.yaml
*/
DenseTensorMeta SumInferMeta(const DenseTensorMeta& x_meta,
const std::vector<int64_t>& axis,
DataType dtype,
bool keep_dim) {
return ReduceInferMeta(x_meta, axis, keep_dim, dtype);
}

DenseTensorMeta ReduceInferMeta(const DenseTensorMeta& x_meta,
const std::vector<int64_t>& axis,
bool keep_dim,
Expand Down
5 changes: 5 additions & 0 deletions paddle/pten/infermeta/unary.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,4 +58,9 @@ DenseTensorMeta ReduceInferMeta(const DenseTensorMeta& x_meta,
const std::vector<int64_t>& axis,
bool keep_dim,
DataType dtype = DataType::UNDEFINED);

DenseTensorMeta SumInferMeta(const DenseTensorMeta& x_meta,
const std::vector<int64_t>& axis,
DataType dtype,
bool keep_dim);
} // namespace pten
2 changes: 1 addition & 1 deletion paddle/pten/kernels/math_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@ template <typename T, typename Context>
void SumKernel(const Context& dev_ctx,
const DenseTensor& x,
const std::vector<int64_t>& dims,
bool keep_dim,
DataType out_dtype,
bool keep_dim,
DenseTensor* out) {
bool reduce_all = false;
SumRawKernel<T>(dev_ctx, x, dims, keep_dim, reduce_all, out_dtype, out);
Expand Down
14 changes: 7 additions & 7 deletions paddle/pten/kernels/math_kernel.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,8 @@ template <typename T, typename Context>
void SumKernel(const Context& dev_ctx,
const DenseTensor& x,
const std::vector<int64_t>& dims,
bool keep_dim,
DataType out_dtype,
bool keep_dim,
DenseTensor* out);

template <typename T, typename Context>
Expand Down Expand Up @@ -110,7 +110,7 @@ template <typename T, typename Context>
DenseTensor Add(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y) {
auto out_meta = ElementwiseInferMeta(x.meta(), y.meta(), -1);
auto out_meta = ElementwiseRawInferMeta(x.meta(), y.meta(), -1);
auto dense_out = pten::Empty<T, Context>(dev_ctx, std::move(out_meta));
AddKernel<T, Context>(dev_ctx, x, y, &dense_out);
return dense_out;
Expand All @@ -120,7 +120,7 @@ template <typename T, typename Context>
DenseTensor Subtract(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y) {
auto out_meta = ElementwiseInferMeta(x.meta(), y.meta(), -1);
auto out_meta = ElementwiseRawInferMeta(x.meta(), y.meta(), -1);
auto dense_out = pten::Empty<T, Context>(dev_ctx, std::move(out_meta));
SubtractKernel<T, Context>(dev_ctx, x, y, &dense_out);
return dense_out;
Expand All @@ -130,7 +130,7 @@ template <typename T, typename Context>
DenseTensor Divide(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y) {
auto out_meta = ElementwiseInferMeta(x.meta(), y.meta(), -1);
auto out_meta = ElementwiseRawInferMeta(x.meta(), y.meta(), -1);
auto dense_out = pten::Empty<T, Context>(dev_ctx, std::move(out_meta));
DivideKernel<T, Context>(dev_ctx, x, y, &dense_out);
return dense_out;
Expand All @@ -140,7 +140,7 @@ template <typename T, typename Context>
DenseTensor Multiply(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y) {
auto out_meta = ElementwiseInferMeta(x.meta(), y.meta(), -1);
auto out_meta = ElementwiseRawInferMeta(x.meta(), y.meta(), -1);
auto dense_out = pten::Empty<T, Context>(dev_ctx, std::move(out_meta));
MultiplyKernel<T, Context>(dev_ctx, x, y, &dense_out);
return dense_out;
Expand All @@ -163,10 +163,10 @@ DenseTensor Sum(const Context& dev_ctx,
const std::vector<int64_t>& axis,
DataType dtype,
bool keep_dim) {
auto out_meta = ReduceInferMeta(x.meta(), axis, keep_dim, dtype);
auto out_meta = SumInferMeta(x.meta(), axis, dtype, keep_dim);
auto dense_out = pten::Empty<T, Context>(dev_ctx, std::move(out_meta));

SumKernel<T, Context>(dev_ctx, x, axis, keep_dim, dtype, &dense_out);
SumKernel<T, Context>(dev_ctx, x, axis, dtype, keep_dim, &dense_out);
return dense_out;
}

Expand Down
5 changes: 2 additions & 3 deletions paddle/pten/kernels/reshape_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,8 @@ void ReshapeKernel(const Context& dev_ctx,
out->ResizeAndAllocate(out_meta.dims);
return;
}

out->Resize(x.dims());
out->mutable_data(x.place());
out->set_meta(out_meta);
out->mutable_data(dev_ctx.GetPlace());
pten::Copy(dev_ctx, x, false, out);
out->Resize(out_meta.dims);
out->ResetLoD(x.lod());
Expand Down
Loading