PaddlePaddle · CtfGo · Jul 26, 2022 · Jul 21, 2022
diff --git a/paddle/fluid/operators/bmm_op.cc b/paddle/fluid/operators/bmm_op.cc
@@ -16,6 +16,11 @@
 
 #include <vector>
 
+#include "paddle/fluid/framework/infershape_utils.h"
+#include "paddle/phi/core/infermeta_utils.h"
+#include "paddle/phi/infermeta/backward.h"
+#include "paddle/phi/infermeta/binary.h"
+
 namespace paddle {
 namespace operators {
 
@@ -24,62 +29,6 @@ class BmmOp : public framework::OperatorWithKernel {
   using framework::OperatorWithKernel::OperatorWithKernel;
 
  protected:
-  void InferShape(framework::InferShapeContext* ctx) const override {
-    PADDLE_ENFORCE_EQ(
-        ctx->HasInput("X"),
-        true,
-        platform::errors::NotFound("Input(X) of BmmOp should not be null"));
-    PADDLE_ENFORCE_EQ(
-        ctx->HasInput("Y"),
-        true,
-        platform::errors::NotFound("Input(Y) of BmmOp should not be null"));
-    PADDLE_ENFORCE_EQ(
-        ctx->HasOutput("Out"),
-        true,
-        platform::errors::NotFound("Output(Out) of BmmOp should not be null."));
-
-    auto x_dims = ctx->GetInputDim("X");
-    auto y_dims = ctx->GetInputDim("Y");
-
-    PADDLE_ENFORCE_EQ(x_dims.size(),
-                      3,
-                      platform::errors::InvalidArgument(
-                          "Input(X) of BmmOp must be 3-dimensional in BmmOp, "
-                          "but received X's shape: [%s].",
-                          x_dims));
-    PADDLE_ENFORCE_EQ(y_dims.size(),
-                      3,
-                      platform::errors::InvalidArgument(
-                          "Input(Y) of BmmOp must be 3-dimensional in BmmOp, "
-                          "but received Y's shape: [%s].",
-                          y_dims));
-    PADDLE_ENFORCE_EQ(
-        x_dims[0],
-        y_dims[0],
-        platform::errors::InvalidArgument(
-            "Input(X) and Input(Y) must have the same batch size in BmmOp, "
-            "but received X's batch size: [%s],"
-            "Y's batch size [%s]",
-            x_dims[0],
-            y_dims[0]));
-    PADDLE_ENFORCE_EQ(
-        x_dims[2],
-        y_dims[1],
-        platform::errors::InvalidArgument(
-            "Input(X)'s width must be equal with Input(Y)'s height in BmmOp,"
-            "but receive X's width: [%s],"
-            "Y's height: [%s].",
-            x_dims[2],
-            y_dims[1]));
-
-    std::vector<int64_t> dim_out;
-    dim_out.push_back(x_dims[0]);
-    dim_out.push_back(x_dims[1]);
-    dim_out.push_back(y_dims[2]);
-    ctx->SetOutputDim("Out", phi::make_ddim(dim_out));
-    ctx->ShareLoD("X", /*->*/ "Out");
-  }
-
   framework::OpKernelType GetExpectedKernelType(
       const framework::ExecutionContext& ctx) const override {
     auto data_type = OperatorWithKernel::IndicateVarDataType(ctx, "X");
@@ -110,33 +59,6 @@ class BmmOpGrad : public framework::OperatorWithKernel {
   using framework::OperatorWithKernel::OperatorWithKernel;
 
  protected:
-  void InferShape(framework::InferShapeContext* ctx) const override {
-    PADDLE_ENFORCE_EQ(
-        ctx->HasInput("X"),
-        true,
-        platform::errors::NotFound("Input(X) of BmmOp should not be null"));
-    PADDLE_ENFORCE_EQ(
-        ctx->HasInput("Y"),
-        true,
-        platform::errors::NotFound("Input(Y) of BmmOp should not be null"));
-    PADDLE_ENFORCE_EQ(ctx->HasInput(framework::GradVarName("Out")),
-                      true,
-                      platform::errors::NotFound(
-                          "Output(Out@GRAD) of BmmOp should not be null."));
-
-    auto x_dims = ctx->GetInputDim("X");
-    auto y_dims = ctx->GetInputDim("Y");
-
-    auto x_grad_name = framework::GradVarName("X");
-    auto y_grad_name = framework::GradVarName("Y");
-
-    if (ctx->HasOutput(x_grad_name)) {
-      ctx->SetOutputDim(x_grad_name, x_dims);
-    }
-    if (ctx->HasOutput(y_grad_name)) {
-      ctx->SetOutputDim(y_grad_name, y_dims);
-    }
-  }
   framework::OpKernelType GetExpectedKernelType(
       const framework::ExecutionContext& ctx) const override {
     return framework::OpKernelType(OperatorWithKernel::IndicateVarDataType(
@@ -166,15 +88,16 @@ class BmmOpGradMaker : public framework::SingleGradOpMaker<T> {
 
 namespace ops = paddle::operators;
 
+DECLARE_INFER_SHAPE_FUNCTOR(bmm,
+                            BmmInferShapeFunctor,
+                            PD_INFER_META(phi::BmmInferMeta));
+DECLARE_INFER_SHAPE_FUNCTOR(bmm_grad,
+                            BmmGradInferShapeFunctor,
+                            PD_INFER_META(phi::BmmGradInferMeta));
 REGISTER_OPERATOR(bmm,
                   ops::BmmOp,
                   ops::BmmOpMaker,
                   ops::BmmOpGradMaker<paddle::framework::OpDesc>,
-                  ops::BmmOpGradMaker<paddle::imperative::OpBase>);
-REGISTER_OPERATOR(bmm_grad, ops::BmmOpGrad);
-REGISTER_OP_CPU_KERNEL(bmm,
-                       ops::BmmKernel<phi::CPUContext, float>,
-                       ops::BmmKernel<phi::CPUContext, double>);
-REGISTER_OP_CPU_KERNEL(bmm_grad,
-                       ops::BmmGradKernel<phi::CPUContext, float>,
-                       ops::BmmGradKernel<phi::CPUContext, double>);
+                  ops::BmmOpGradMaker<paddle::imperative::OpBase>,
+                  BmmInferShapeFunctor);
+REGISTER_OPERATOR(bmm_grad, ops::BmmOpGrad, BmmGradInferShapeFunctor);
diff --git a/paddle/fluid/operators/bmm_op.cu b/paddle/fluid/operators/bmm_op.cu
diff --git a/paddle/fluid/operators/bmm_op.h b/paddle/fluid/operators/bmm_op.h
@@ -58,95 +58,6 @@ static void ReshapeXYOutIntoMatrixSequence(framework::Tensor *x,
   ReshapeTensorIntoMatrixSequence(y, mat_dim_y);
 }
 
-template <typename DeviceContext, typename T>
-class BmmKernel : public framework::OpKernel<T> {
- public:
-  void Compute(const framework::ExecutionContext &context) const override {
-    const Tensor &x = *context.Input<Tensor>("X");
-    const Tensor &y = *context.Input<Tensor>("Y");
-    Tensor *out = context.Output<Tensor>("Out");
-    out->mutable_data<T>(context.GetPlace());
-
-    if (x.numel() == 0 || y.numel() == 0) {
-      return;
-    }
-
-    auto blas = phi::funcs::GetBlas<DeviceContext, T>(context);
-
-    auto mat_dim_a = phi::funcs::CreateMatrixDescriptor(x.dims(), 0, false);
-    auto mat_dim_b = phi::funcs::CreateMatrixDescriptor(y.dims(), 0, false);
-
-    // auto scale = static_cast<T>(context.Attr<float>("alpha"));
-    blas.MatMul(x, mat_dim_a, y, mat_dim_b, T(1), out, T(0));
-  }
-};
-
-template <typename DeviceContext, typename T>
-class BmmGradKernel : public framework::OpKernel<T> {
- public:
-  void MatMul(const framework::ExecutionContext &context,
-              const framework::Tensor &a,
-              bool trans_a,
-              const framework::Tensor &b,
-              bool trans_b,
-              framework::Tensor *out) const {
-    out->mutable_data<T>(context.GetPlace());
-    auto blas = phi::funcs::GetBlas<DeviceContext, T>(context);
-    auto mat_dim_a = phi::funcs::CreateMatrixDescriptor(a.dims(), 0, trans_a);
-    auto mat_dim_b = phi::funcs::CreateMatrixDescriptor(b.dims(), 0, trans_b);
-
-    blas.MatMul(a, mat_dim_a, b, mat_dim_b, T(1), out, T(0));
-  }
-  void CalcInputGrad(const framework::ExecutionContext &context,
-                     const framework::Tensor &a,
-                     bool trans_a,
-                     const framework::Tensor &b,
-                     bool trans_b,
-                     framework::Tensor *out) const {
-    if (out == nullptr) return;
-    MatMul(context, a, trans_a, b, trans_b, out);
-  }
-  void Compute(const framework::ExecutionContext &context) const override {
-    auto x = *context.Input<framework::Tensor>("X");
-    auto y = *context.Input<framework::Tensor>("Y");
-    auto dout =
-        *context.Input<framework::Tensor>(framework::GradVarName("Out"));
-    auto *dx = context.Output<framework::Tensor>(framework::GradVarName("X"));
-    auto *dy = context.Output<framework::Tensor>(framework::GradVarName("Y"));
-
-    ReshapeXYOutIntoMatrixSequence(&x, &y, &dout, false, false);
-    framework::DDim dx_dims;
-    if (dx) {
-      dx_dims = dx->dims();
-      if (dx_dims != x.dims()) {
-        dx->Resize(x.dims());
-      }
-    }
-
-    framework::DDim dy_dims;
-    if (dy) {
-      dy_dims = dy->dims();
-      if (dy_dims != y.dims()) {
-        dy->Resize(y.dims());
-      }
-    }
-
-    CalcInputGrad(context, dout, false, y, true, dx);
-    CalcInputGrad(context, x, true, dout, false, dy);
-
-    if (dx) {
-      if (dx_dims != x.dims()) {
-        dx->Resize(dx_dims);
-      }
-    }
-    if (dy) {
-      if (dy_dims != y.dims()) {
-        dy->Resize(dy_dims);
-      }
-    }
-  }
-};
-
 }  // namespace operators
 }  // namespace paddle
 #endif  // PADDLE_FLUID_OPERATORS_BMM_OP_H_
diff --git a/paddle/phi/infermeta/backward.cc b/paddle/phi/infermeta/backward.cc
@@ -73,6 +73,17 @@ void BilinearTensorProductGradInferMeta(const MetaTensor& x,
   }
 }
 
+void BmmGradInferMeta(const MetaTensor& x,
+                      const MetaTensor& y,
+                      const MetaTensor& out_grad,
+                      MetaTensor* x_grad,
+                      MetaTensor* y_grad) {
+  x_grad->set_dims(x.dims());
+  y_grad->set_dims(y.dims());
+  x_grad->set_dtype(x.dtype());
+  y_grad->set_dtype(y.dtype());
+}
+
 void ChannelShuffleGradInferMeta(const MetaTensor& out_grad,
                                  int groups,
                                  const std::string& data_format,

diff --git a/paddle/phi/infermeta/backward.h b/paddle/phi/infermeta/backward.h
@@ -41,6 +41,12 @@ void BilinearTensorProductGradInferMeta(const MetaTensor& x,
                                         MetaTensor* dweight,
                                         MetaTensor* dbias);
 
+void BmmGradInferMeta(const MetaTensor& x,
+                      const MetaTensor& y,
+                      const MetaTensor& out_grad,
+                      MetaTensor* x_grad,
+                      MetaTensor* y_grad);
+
 void ChannelShuffleGradInferMeta(const MetaTensor& out_grad,
                                  int groups,
                                  const std::string& data_format,

diff --git a/paddle/phi/infermeta/binary.cc b/paddle/phi/infermeta/binary.cc
@@ -260,6 +260,53 @@ void BincountInferMeta(const MetaTensor& x,
   out->share_lod(x);
 }
 
+void BmmInferMeta(const MetaTensor& x, const MetaTensor& y, MetaTensor* out) {
+  std::vector<int64_t> x_dims = phi::vectorize(x.dims());
+  std::vector<int64_t> y_dims = phi::vectorize(y.dims());
+  std::size_t x_ndims = x_dims.size();
+  std::size_t y_ndims = y_dims.size();
+
+  PADDLE_ENFORCE_EQ(x_ndims,
+                    3,
+                    phi::errors::InvalidArgument(
+                        "Input(X) of BmmOp must be 3-dimensional in BmmOp, "
+                        "but received X's shape: [%s].",
+                        x_ndims));
+  PADDLE_ENFORCE_EQ(y_ndims,
+                    3,
+                    phi::errors::InvalidArgument(
+                        "Input(Y) of BmmOp must be 3-dimensional in BmmOp, "
+                        "but received Y's shape: [%s].",
+                        y_ndims));
+  PADDLE_ENFORCE_EQ(
+      x_dims[0],
+      y_dims[0],
+      phi::errors::InvalidArgument(
+          "Input(X) and Input(Y) must have the same batch size in BmmOp, "
+          "but received X's batch size: [%s],"
+          "Y's batch size [%s]",
+          x_dims[0],
+          y_dims[0]));
+  PADDLE_ENFORCE_EQ(
+      x_dims[2],
+      y_dims[1],
+      phi::errors::InvalidArgument(
+          "Input(X)'s width must be equal with Input(Y)'s height in BmmOp,"
+          "but receive X's width: [%s],"
+          "Y's height: [%s].",
+          x_dims[2],
+          y_dims[1]));
+
+  std::vector<int64_t> dim_out;
+  dim_out.push_back(x_dims[0]);
+  dim_out.push_back(x_dims[1]);
+  dim_out.push_back(y_dims[2]);
+  out->set_dims(phi::make_ddim(dim_out));
+  out->share_lod(x);
+  out->set_dtype(x.dtype());
+  out->set_layout(x.layout());
+}
+
 void CholeskySolveInferMeta(const MetaTensor& x,
                             const MetaTensor& y,
                             bool upper,

diff --git a/paddle/phi/infermeta/binary.h b/paddle/phi/infermeta/binary.h
@@ -60,6 +60,8 @@ void BincountInferMeta(const MetaTensor& x,
                        int minlength,
                        MetaTensor* out);
 
+void BmmInferMeta(const MetaTensor& x, const MetaTensor& y, MetaTensor* out);
+
 void CholeskySolveInferMeta(const MetaTensor& x,
                             const MetaTensor& y,
                             bool upper,

diff --git a/paddle/phi/kernels/bmm_grad_kernel.h b/paddle/phi/kernels/bmm_grad_kernel.h
@@ -0,0 +1,29 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "paddle/phi/core/dense_tensor.h"
+
+namespace phi {
+
+template <typename T, typename Context>
+void BmmGradKernel(const Context& ctx,
+                   const DenseTensor& x,
+                   const DenseTensor& y,
+                   const DenseTensor& out_grad,
+                   DenseTensor* x_grad,
+                   DenseTensor* y_grad);
+
+}  // namespace phi