PaddlePaddle · YuanRisheng · Jan 25, 2022 · Jan 24, 2022 · Jan 25, 2022 · Jan 25, 2022
diff --git a/paddle/fluid/operators/reduce_ops/reduce_op.h b/paddle/fluid/operators/reduce_ops/reduce_op.h
@@ -556,7 +556,7 @@ class ReduceOp : public framework::OperatorWithKernel {
       if (ctx.InputVar("X")->IsType<framework::LoDTensor>()) {
         if (!reduce_all) {
           return framework::KernelSignature(
-              "sum", {"X"}, {"dim", "keep_dim", "out_dtype"}, {"Out"});
+              "sum", {"X"}, {"dim", "out_dtype", "keep_dim"}, {"Out"});
         }
         return framework::KernelSignature(
             "sum_raw", {"X"}, {"dim", "keep_dim", "reduce_all", "out_dtype"},

diff --git a/paddle/fluid/operators/reshape_op.cc b/paddle/fluid/operators/reshape_op.cc
@@ -38,33 +38,6 @@ namespace operators {
 
 using Tensor = framework::Tensor;
 
-inline std::vector<int> get_new_shape(
-    const std::vector<const Tensor *> &list_new_shape_tensor) {
-  // get tensor from
-  std::vector<int> vec_new_shape;
-  for (size_t i = 0; i < list_new_shape_tensor.size(); ++i) {
-    auto tensor = list_new_shape_tensor[i];
-    PADDLE_ENFORCE_EQ(
-        tensor->dims(), framework::make_ddim({1}),
-        platform::errors::InvalidArgument(
-            "If the element type of 'shape' in ReshapeOp is Tensor, "
-            "the element's shape must be [1]. But received the element's shape "
-            "is [%s]",
-            tensor->dims()));
-    if (platform::is_gpu_place(tensor->place()) ||
-        platform::is_xpu_place(tensor->place())) {
-      framework::Tensor temp;
-      paddle::framework::TensorCopySync(*tensor, platform::CPUPlace(), &temp);
-
-      vec_new_shape.push_back(static_cast<int32_t>(*temp.data<int32_t>()));
-    } else {
-      vec_new_shape.push_back(static_cast<int32_t>(*tensor->data<int32_t>()));
-    }
-  }
-
-  return vec_new_shape;
-}
-
 class ReshapeOp : public framework::OperatorWithKernel {
  public:
   ReshapeOp(const std::string &type, const framework::VariableNameMap &inputs,
@@ -370,30 +343,6 @@ class ReshapeKernel {
   void operator()(const framework::ExecutionContext &ctx) const {
     auto *out = ctx.Output<framework::LoDTensor>("Out");
     auto *in = ctx.Input<framework::LoDTensor>("X");
-    // framework::DDim out_dims = out->dims();
-    auto pt_x = paddle::experimental::MakePtenDenseTensor(*in);
-
-    // we can't MakePtenDenseTensor by out, because the out of reshape may have
-    // multiple states, some can MakePtenDenseTensor but other's cannot:
-    // 1. out tensor is not initialized
-    // 2. out tensor is input (complete inplace)
-    // 3. out tensor is view of input
-    // We can't MakePtenDenseTensor for case 2, so we solve this case by
-    // creating a temporary tensor here:
-    pten::DenseTensorMeta meta{pten::TransToPtenDataType(in->type()),
-                               in->dims(), in->layout()};
-    auto pt_out_tmp = std::make_shared<pten::DenseTensor>(
-        pten::make_intrusive<paddle::experimental::SharedStorage>(
-            ctx.GetPlace()),
-        std::move(meta));
-    pten::DenseTensor *pt_out = nullptr;
-    if (in != nullptr && out != nullptr && in->Holder() != nullptr &&
-        out->Holder() != nullptr &&
-        in->Holder()->ptr() == out->Holder()->ptr()) {
-      pt_out = pt_x.get();
-    } else {
-      pt_out = pt_out_tmp.get();
-    }
 
     auto list_new_shape_tensor =
         ctx.MultiInput<framework::Tensor>("ShapeTensor");
@@ -410,55 +359,46 @@ class ReshapeKernel {
           framework::Tensor temp;
           paddle::framework::TensorCopySync(*tensor, platform::CPUPlace(),
                                             &temp);
-          pt_vec_shape.push_back(
-              std::move(*(paddle::experimental::MakePtenDenseTensor(temp))));
+          pt_vec_shape.push_back(std::move(temp));
         } else {
-          pt_vec_shape.push_back(
-              std::move(*(paddle::experimental::MakePtenDenseTensor(*tensor))));
+          pt_vec_shape.push_back(*tensor);
         }
       }
       pt_scalar_shape = pten::ScalarArray(pt_vec_shape);
     } else if (shape_tensor) {
-      std::unique_ptr<pten::DenseTensor> pt_shape;
+      pten::DenseTensor pt_shape;
       if (platform::is_gpu_place(shape_tensor->place()) ||
           platform::is_xpu_place(shape_tensor->place())) {
         framework::Tensor temp;
         paddle::framework::TensorCopySync(*shape_tensor, platform::CPUPlace(),
                                           &temp);
-        pt_shape = paddle::experimental::MakePtenDenseTensor(temp);
+        pt_shape = std::move(temp);
       } else {
-        pt_shape = paddle::experimental::MakePtenDenseTensor(*shape_tensor);
+        pt_shape = *shape_tensor;
       }
-      pt_scalar_shape = pten::ScalarArray(*pt_shape.get());
+      pt_scalar_shape = pten::ScalarArray(pt_shape);
     } else {
       auto &shape_attr = ctx.Attr<std::vector<int>>("shape");
       pt_scalar_shape = pten::ScalarArray(shape_attr);
     }
     if (platform::is_cpu_place(ctx.GetPlace())) {
       auto &dev_ctx = ctx.device_context<platform::CPUDeviceContext>();
-      pten::ReshapeKernel(static_cast<const pten::CPUContext &>(dev_ctx),
-                          *pt_x.get(), pt_scalar_shape, pt_out);
+      pten::ReshapeKernel(static_cast<const pten::CPUContext &>(dev_ctx), *in,
+                          pt_scalar_shape, out);
     }
 #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
     if (platform::is_gpu_place(ctx.GetPlace())) {
       auto &dev_ctx = ctx.device_context<platform::CUDADeviceContext>();
-      pten::ReshapeKernel(dev_ctx, *pt_x.get(), pt_scalar_shape, pt_out);
+      pten::ReshapeKernel(dev_ctx, *in, pt_scalar_shape, out);
     }
 #endif
 #ifdef PADDLE_WITH_XPU
     if (platform::is_xpu_place(ctx.GetPlace())) {
       auto &dev_ctx = ctx.device_context<platform::XPUDeviceContext>();
-      pten::ReshapeKernel(static_cast<const pten::XPUContext &>(dev_ctx),
-                          *pt_x.get(), pt_scalar_shape, pt_out);
+      pten::ReshapeKernel(static_cast<const pten::XPUContext &>(dev_ctx), *in,
+                          pt_scalar_shape, out);
     }
 #endif
-    // non-inplace need move all result from pt_out to out, inplace need set
-    // result dims.
-    if (in != out) {
-      paddle::experimental::SharesStorage(pt_out, static_cast<Tensor *>(out));
-    } else {
-      out->Resize(pt_out->dims());
-    }
   }
 };
 
@@ -469,25 +409,22 @@ class ReshapeGradKernel {
     auto *d_x = ctx.Output<framework::Tensor>(framework::GradVarName("X"));
     d_x->mutable_data(ctx.GetPlace(), d_out->type());
 
-    auto pt_d_x = paddle::experimental::MakePtenDenseTensor(*d_x);
-    auto pt_d_out = paddle::experimental::MakePtenDenseTensor(*d_out);
-
     if (platform::is_cpu_place(ctx.GetPlace())) {
       auto &dev_ctx = ctx.device_context<platform::CPUDeviceContext>();
       pten::ReshapeGradKernel(static_cast<const pten::CPUContext &>(dev_ctx),
-                              *pt_d_out.get(), pt_d_x.get());
+                              *d_out, d_x);
     }
 #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
     if (platform::is_gpu_place(ctx.GetPlace())) {
       auto &dev_ctx = ctx.device_context<platform::CUDADeviceContext>();
-      pten::ReshapeGradKernel(dev_ctx, *pt_d_out.get(), pt_d_x.get());
+      pten::ReshapeGradKernel(dev_ctx, *d_out, d_x);
     }
 #endif
 #ifdef PADDLE_WITH_XPU
     if (platform::is_xpu_place(ctx.GetPlace())) {
       auto &dev_ctx = ctx.device_context<platform::XPUDeviceContext>();
       pten::ReshapeGradKernel(static_cast<const pten::XPUContext &>(dev_ctx),
-                              *pt_d_out.get(), pt_d_x.get());
+                              *d_out, d_x);
     }
 #endif
   }
@@ -500,27 +437,22 @@ class ReshapeDoubleGradKernel {
     auto *dd_out = ctx.Output<framework::Tensor>("DDOut");
     dd_out->mutable_data(ctx.GetPlace(), dd_x->type());
 
-    auto pt_dd_x = paddle::experimental::MakePtenDenseTensor(*dd_x);
-    auto pt_dd_out = paddle::experimental::MakePtenDenseTensor(*dd_out);
-
     if (platform::is_cpu_place(ctx.GetPlace())) {
       auto &dev_ctx = ctx.device_context<platform::CPUDeviceContext>();
       pten::ReshapeDoubleGradKernel(
-          static_cast<const pten::CPUContext &>(dev_ctx), *pt_dd_x.get(),
-          pt_dd_out.get());
+          static_cast<const pten::CPUContext &>(dev_ctx), *dd_x, dd_out);
     }
 #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
     if (platform::is_gpu_place(ctx.GetPlace())) {
       auto &dev_ctx = ctx.device_context<platform::CUDADeviceContext>();
-      pten::ReshapeDoubleGradKernel(dev_ctx, *pt_dd_x.get(), pt_dd_out.get());
+      pten::ReshapeDoubleGradKernel(dev_ctx, *dd_x, dd_out);
     }
 #endif
 #ifdef PADDLE_WITH_XPU
     if (platform::is_xpu_place(ctx.GetPlace())) {
       auto &dev_ctx = ctx.device_context<platform::XPUDeviceContext>();
       pten::ReshapeDoubleGradKernel(
-          static_cast<const pten::XPUContext &>(dev_ctx), *pt_dd_x.get(),
-          pt_dd_out.get());
+          static_cast<const pten::XPUContext &>(dev_ctx), *dd_x, dd_out);
     }
 #endif
   }

diff --git a/paddle/pten/api/include/kernel_signature.h b/paddle/pten/api/include/kernel_signature.h
@@ -102,8 +102,8 @@ using scale_kernel = void (*)(const DeviceContext&,
 using sum_kernel = void (*)(const DeviceContext&,
                             const DenseTensor&,
                             const std::vector<int64_t>&,
-                            bool,
                             DataType,
+                            bool,
                             DenseTensor*);
 
 using subtract_kernel = void (*)(const DeviceContext&,

diff --git a/paddle/pten/core/dense_tensor.cc b/paddle/pten/core/dense_tensor.cc
@@ -126,6 +126,19 @@ void DenseTensor::set_meta(DenseTensorMeta&& meta) {
   meta_ = std::move(meta);
 }
 
+void DenseTensor::set_meta(const DenseTensorMeta& meta) {
+  PADDLE_ENFORCE(
+      meta.valid(),
+      paddle::platform::errors::InvalidArgument(
+          "Input meta is invalid, please check the meta attribute."));
+  meta_.dims = meta.dims;
+  meta_.dtype = meta.dtype;
+  meta_.is_scalar = meta.is_scalar;
+  meta_.layout = meta.layout;
+  meta_.lod = meta.lod;
+  meta_.offset = meta.offset;
+}
+
 /* @jim19930609: This interface will be further modified util we finalized the
    design for Allocator - Allocation
    For now, we have to temporarily accommodate two independent use cases:

diff --git a/paddle/pten/core/dense_tensor.h b/paddle/pten/core/dense_tensor.h
@@ -131,6 +131,8 @@ class DenseTensor : public TensorBase,
   /// \param meta The meta information of the tensor.
   void set_meta(DenseTensorMeta&& meta);
 
+  void set_meta(const DenseTensorMeta& meta);
+
   /// \brief Test whether the metadata is valid.
   /// \return Whether the metadata is valid.
   bool valid() const noexcept override { return meta_.valid(); }

diff --git a/paddle/pten/infermeta/binary.cc b/paddle/pten/infermeta/binary.cc
@@ -131,8 +131,13 @@ DenseTensorMeta MatmulInferMeta(const DenseTensorMeta& x_meta,
 }
 
 DenseTensorMeta ElementwiseInferMeta(const DenseTensorMeta& x_meta,
-                                     const DenseTensorMeta& y_meta,
-                                     int axis) {
+                                     const DenseTensorMeta& y_meta) {
+  return ElementwiseRawInferMeta(x_meta, y_meta, -1);
+}
+
+DenseTensorMeta ElementwiseRawInferMeta(const DenseTensorMeta& x_meta,
+                                        const DenseTensorMeta& y_meta,
+                                        int axis) {
   DenseTensorMeta return_meta(x_meta.dtype, x_meta.dims, x_meta.layout);
   if (x_meta.dims != y_meta.dims) {
     auto x_dims = x_meta.dims;

diff --git a/paddle/pten/infermeta/binary.h b/paddle/pten/infermeta/binary.h
@@ -42,6 +42,10 @@ DenseTensorMeta MatmulInferMeta(const DenseTensorMeta& x_meta,
                                 bool trans_y);
 
 DenseTensorMeta ElementwiseInferMeta(const DenseTensorMeta& x_meta,
-                                     const DenseTensorMeta& y_meta,
-                                     int axis);
+                                     const DenseTensorMeta& y_meta);
+
+DenseTensorMeta ElementwiseRawInferMeta(const DenseTensorMeta& x_meta,
+                                        const DenseTensorMeta& y_meta,
+                                        int axis);
+
 }  // namespace pten
diff --git a/paddle/pten/infermeta/unary.cc b/paddle/pten/infermeta/unary.cc
@@ -232,6 +232,16 @@ DenseTensorMeta ReshapeInferMeta(const DenseTensorMeta& x_meta,
   return InferMetaFromVecValue(x_meta, shape.GetData());
 }
 
+/*  Why not use ReduceInferMeta directly?
+    Because we need make InferMetaFunction's args follow the design of api.yaml
+*/
+DenseTensorMeta SumInferMeta(const DenseTensorMeta& x_meta,
+                             const std::vector<int64_t>& axis,
+                             DataType dtype,
+                             bool keep_dim) {
+  return ReduceInferMeta(x_meta, axis, keep_dim, dtype);
+}
+
 DenseTensorMeta ReduceInferMeta(const DenseTensorMeta& x_meta,
                                 const std::vector<int64_t>& axis,
                                 bool keep_dim,

diff --git a/paddle/pten/infermeta/unary.h b/paddle/pten/infermeta/unary.h
@@ -58,4 +58,9 @@ DenseTensorMeta ReduceInferMeta(const DenseTensorMeta& x_meta,
                                 const std::vector<int64_t>& axis,
                                 bool keep_dim,
                                 DataType dtype = DataType::UNDEFINED);
+
+DenseTensorMeta SumInferMeta(const DenseTensorMeta& x_meta,
+                             const std::vector<int64_t>& axis,
+                             DataType dtype,
+                             bool keep_dim);
 }  // namespace pten
diff --git a/paddle/pten/kernels/math_kernel.cc b/paddle/pten/kernels/math_kernel.cc
@@ -33,8 +33,8 @@ template <typename T, typename Context>
 void SumKernel(const Context& dev_ctx,
                const DenseTensor& x,
                const std::vector<int64_t>& dims,
-               bool keep_dim,
                DataType out_dtype,
+               bool keep_dim,
                DenseTensor* out) {
   bool reduce_all = false;
   SumRawKernel<T>(dev_ctx, x, dims, keep_dim, reduce_all, out_dtype, out);

diff --git a/paddle/pten/kernels/math_kernel.h b/paddle/pten/kernels/math_kernel.h
@@ -50,8 +50,8 @@ template <typename T, typename Context>
 void SumKernel(const Context& dev_ctx,
                const DenseTensor& x,
                const std::vector<int64_t>& dims,
-               bool keep_dim,
                DataType out_dtype,
+               bool keep_dim,
                DenseTensor* out);
 
 template <typename T, typename Context>
@@ -110,7 +110,7 @@ template <typename T, typename Context>
 DenseTensor Add(const Context& dev_ctx,
                 const DenseTensor& x,
                 const DenseTensor& y) {
-  auto out_meta = ElementwiseInferMeta(x.meta(), y.meta(), -1);
+  auto out_meta = ElementwiseRawInferMeta(x.meta(), y.meta(), -1);
   auto dense_out = pten::Empty<T, Context>(dev_ctx, std::move(out_meta));
   AddKernel<T, Context>(dev_ctx, x, y, &dense_out);
   return dense_out;
@@ -120,7 +120,7 @@ template <typename T, typename Context>
 DenseTensor Subtract(const Context& dev_ctx,
                      const DenseTensor& x,
                      const DenseTensor& y) {
-  auto out_meta = ElementwiseInferMeta(x.meta(), y.meta(), -1);
+  auto out_meta = ElementwiseRawInferMeta(x.meta(), y.meta(), -1);
   auto dense_out = pten::Empty<T, Context>(dev_ctx, std::move(out_meta));
   SubtractKernel<T, Context>(dev_ctx, x, y, &dense_out);
   return dense_out;
@@ -130,7 +130,7 @@ template <typename T, typename Context>
 DenseTensor Divide(const Context& dev_ctx,
                    const DenseTensor& x,
                    const DenseTensor& y) {
-  auto out_meta = ElementwiseInferMeta(x.meta(), y.meta(), -1);
+  auto out_meta = ElementwiseRawInferMeta(x.meta(), y.meta(), -1);
   auto dense_out = pten::Empty<T, Context>(dev_ctx, std::move(out_meta));
   DivideKernel<T, Context>(dev_ctx, x, y, &dense_out);
   return dense_out;
@@ -140,7 +140,7 @@ template <typename T, typename Context>
 DenseTensor Multiply(const Context& dev_ctx,
                      const DenseTensor& x,
                      const DenseTensor& y) {
-  auto out_meta = ElementwiseInferMeta(x.meta(), y.meta(), -1);
+  auto out_meta = ElementwiseRawInferMeta(x.meta(), y.meta(), -1);
   auto dense_out = pten::Empty<T, Context>(dev_ctx, std::move(out_meta));
   MultiplyKernel<T, Context>(dev_ctx, x, y, &dense_out);
   return dense_out;
@@ -163,10 +163,10 @@ DenseTensor Sum(const Context& dev_ctx,
                 const std::vector<int64_t>& axis,
                 DataType dtype,
                 bool keep_dim) {
-  auto out_meta = ReduceInferMeta(x.meta(), axis, keep_dim, dtype);
+  auto out_meta = SumInferMeta(x.meta(), axis, dtype, keep_dim);
   auto dense_out = pten::Empty<T, Context>(dev_ctx, std::move(out_meta));
 
-  SumKernel<T, Context>(dev_ctx, x, axis, keep_dim, dtype, &dense_out);
+  SumKernel<T, Context>(dev_ctx, x, axis, dtype, keep_dim, &dense_out);
   return dense_out;
 }
 

diff --git a/paddle/pten/kernels/reshape_kernel.cc b/paddle/pten/kernels/reshape_kernel.cc
@@ -31,9 +31,8 @@ void ReshapeKernel(const Context& dev_ctx,
     out->ResizeAndAllocate(out_meta.dims);
     return;
   }
-
-  out->Resize(x.dims());
-  out->mutable_data(x.place());
+  out->set_meta(out_meta);
+  out->mutable_data(dev_ctx.GetPlace());
   pten::Copy(dev_ctx, x, false, out);
   out->Resize(out_meta.dims);
   out->ResetLoD(x.lod());