move cpu_impl of elementwise kernel to new directory

PaddlePaddle · Dec 31, 2021 · cfd79d4 · cfd79d4 · paddle-bot-old · Jan 4, 2022
1 parent 308c279
commit cfd79d4
Show file tree

Hide file tree

Showing 19 changed files with 568 additions and 772 deletions.
diff --git a/paddle/fluid/framework/data_device_transform_test.cu b/paddle/fluid/framework/data_device_transform_test.cu
@@ -23,6 +23,9 @@ limitations under the License. */
 #include "paddle/fluid/platform/device_context.h"
 #include "paddle/fluid/platform/init.h"
 
+#include "paddle/fluid/framework/pten_utils.h"
+#include "paddle/pten/include/core.h"
+
 namespace paddle {
 namespace framework {
 
@@ -73,9 +76,12 @@ class TestKernel : public OpKernel<float> {
     output->Resize(input->dims());
     output->mutable_data<T>(ctx.GetPlace());
 
-    operators::TransformFunctor<AddFunctor<T>, T, DeviceContext> functor(
-        input, input, output, ctx.template device_context<DeviceContext>(),
-        AddFunctor<T>());
+    auto pt_input = paddle::experimental::MakePtenDenseTensor(*input);
+    auto pt_out = paddle::experimental::MakePtenDenseTensor(*output);
+
+    pten::funcs::TransformFunctor<AddFunctor<T>, T, DeviceContext> functor(
+        *pt_input, *pt_input, pt_out.get(),
+        ctx.template device_context<DeviceContext>(), AddFunctor<T>());
     functor.Run();
   }
 };

diff --git a/paddle/fluid/operators/elementwise/elementwise_functor.h b/paddle/fluid/operators/elementwise/elementwise_functor.h
@@ -17,6 +17,7 @@ limitations under the License. */
 #include "paddle/fluid/platform/enforce.h"
 #include "paddle/fluid/platform/float16.h"
 #include "paddle/fluid/platform/hostdevice.h"
+#include "paddle/pten/kernels/funcs/elementwise_functor.h"
 
 namespace paddle {
 namespace operators {
@@ -25,58 +26,31 @@ namespace operators {
 
 // Add
 template <typename T>
-struct AddFunctor {
-  inline HOSTDEVICE T operator()(const T& a, const T& b) const { return a + b; }
-};
+using AddFunctor = pten::funcs::AddFunctor<T>;
+
 template <typename T>
-struct InverseAddFunctor {
-  inline HOSTDEVICE T operator()(const T& a, const T& b) const { return b + a; }
-};
+using InverseAddFunctor = pten::funcs::InverseAddFunctor<T>;
 
 // Subtract
 template <typename T>
-struct SubFunctor {
-  inline HOSTDEVICE T operator()(const T& a, const T& b) const { return a - b; }
-};
+using SubFunctor = pten::funcs::SubtractFunctor<T>;
+
 template <typename T>
-struct InverseSubFunctor {
-  inline HOSTDEVICE T operator()(const T& a, const T& b) const { return b - a; }
-};
+using InverseSubFunctor = pten::funcs::InverseSubtractFunctor<T>;
 
 // Multiply
 template <typename T>
-struct MulFunctor {
-  inline HOSTDEVICE T operator()(const T& a, const T& b) const { return a * b; }
-};
+using MulFunctor = pten::funcs::MultiplyFunctor<T>;
+
 template <typename T>
-struct InverseMulFunctor {
-  inline HOSTDEVICE T operator()(const T& a, const T& b) const { return b * a; }
-};
+using InverseMulFunctor = pten::funcs::InverseMultiplyFunctor<T>;
 
 // Divide
-#define DIV_ERROR_INFO                                             \
-  "InvalidArgumentError: Integer division by zero encountered in " \
-  "(floor) divide. Please check the input value."
-
-template <typename T, typename Enable = void>
-struct DivFunctor {
-  inline HOSTDEVICE T operator()(const T& a, const T& b) const { return a / b; }
-};
-
 template <typename T>
-struct DivFunctor<T,
-                  typename std::enable_if<std::is_integral<T>::value>::type> {
-  inline HOSTDEVICE T operator()(const T& a, const T& b) const {
-    // For int32/int64, need to check whether the divison is zero.
-    PADDLE_ENFORCE(b != 0, DIV_ERROR_INFO);
-    return a / b;
-  }
-};
+using DivFunctor = pten::funcs::DivideFunctor<T>;
 
-template <typename T, typename Enable = void>
-struct InverseDivFunctor {
-  inline HOSTDEVICE T operator()(const T& a, const T& b) const { return b / a; }
-};
+template <typename T>
+using InverseDivFunctor = pten::funcs::InverseDivideFunctor<T>;
 
 // Floor Divide
 template <typename T>

diff --git a/paddle/fluid/operators/elementwise/elementwise_op_function.h b/paddle/fluid/operators/elementwise/elementwise_op_function.h
@@ -31,8 +31,7 @@ limitations under the License. */
 
 // only can include the headers in paddle/pten/include dirs
 #include "paddle/pten/api/lib/utils/tensor_utils.h"
-#include "paddle/pten/kernels/hybird/cpu/elementwise.h"
-#include "paddle/pten/kernels/hybird/general/elementwise_base.h"
+#include "paddle/pten/kernels/cpu/elementwise_impl.h"
 
 #if defined(__NVCC__) || defined(__HIPCC__)
 #ifdef __NVCC__
@@ -151,9 +150,9 @@ inline void GetBroadcastDimsArrays(const framework::DDim &x_dims,
                                    int *x_dims_array, int *y_dims_array,
                                    int *out_dims_array, const int max_dim,
                                    const int axis) {
-  pten::general::GetBroadcastDimsArrays(x_dims, y_dims, x_dims_array,
-                                        y_dims_array, out_dims_array, max_dim,
-                                        axis);
+  pten::funcs::GetBroadcastDimsArrays(x_dims, y_dims, x_dims_array,
+                                      y_dims_array, out_dims_array, max_dim,
+                                      axis);
 }
 
 template <typename Functor, typename T, typename OutType = T>
@@ -1073,71 +1072,9 @@ void CommonGradBroadcastCUDA(
 
 inline framework::DDim trim_trailing_singular_dims(
     const framework::DDim &dims) {
-  return pten::general::trim_trailing_singular_dims(dims);
+  return pten::funcs::trim_trailing_singular_dims(dims);
 }
 
-template <typename Functor, typename T, typename DeviceContext,
-          typename OutType = T>
-class TransformFunctor {
- public:
-  TransformFunctor(const framework::Tensor *x, const framework::Tensor *y,
-                   framework::Tensor *z, const DeviceContext &ctx, Functor func,
-                   const bool is_xsize_larger = true)
-      : x_(x->data<T>()),
-        y_(y->data<T>()),
-        z_(z->mutable_data<OutType>(ctx.GetPlace())),
-        nx_(x->numel()),
-        ctx_(ctx),
-        func_(func),
-        is_xsize_larger_(is_xsize_larger) {
-    if (is_xsize_larger_ == false) {
-      nx_ = y->numel();
-    }
-  }
-
-  inline void Run() const {
-    platform::Transform<DeviceContext> trans;
-    trans(ctx_, x_, x_ + nx_, y_, z_, func_);
-  }
-
-  inline void RunRowWise(int n, int pre) const {
-    platform::Transform<DeviceContext> trans;
-    if (is_xsize_larger_) {
-      trans(ctx_, x_, x_ + nx_,
-            pten::general::RowwiseTransformIterator<T, DeviceContext>(y_, n),
-            z_, func_);
-    } else {
-      trans(ctx_, y_, y_ + nx_,
-            pten::general::RowwiseTransformIterator<T, DeviceContext>(x_, n),
-            z_, func_);
-    }
-  }
-
-  inline void RunMidWise(int n, int pre, int post) const {
-    platform::Transform<DeviceContext> trans;
-    if (is_xsize_larger_) {
-      trans(ctx_, x_, x_ + nx_,
-            pten::general::MidWiseTransformIterator<T, DeviceContext>(y_, n,
-                                                                      post),
-            z_, func_);
-    } else {
-      trans(ctx_, y_, y_ + nx_,
-            pten::general::MidWiseTransformIterator<T, DeviceContext>(x_, n,
-                                                                      post),
-            z_, func_);
-    }
-  }
-
- private:
-  const T *x_;
-  const T *y_;
-  OutType *z_;
-  int64_t nx_;
-  const DeviceContext &ctx_;
-  Functor func_;
-  bool is_xsize_larger_;
-};
-
 template <typename T, typename DX_OP, typename DY_OP, typename Tout = T>
 struct ElemwiseGradNoBroadcast {
   const T *x_;
@@ -1457,13 +1394,13 @@ void ElemwiseGradComputeWithBroadcast(
   if (is_xsize_larger) {
     auto y_dims_trimed = trim_trailing_singular_dims(y_dims);
     axis_trim = (y_dims_trimed.size() == 0) ? x_dims.size() : axis;
-    pten::general::get_mid_dims(x_dims, y_dims_trimed, axis_trim, &pre, &n,
-                                &post, &is_run_common_broadcast);
+    pten::funcs::get_mid_dims(x_dims, y_dims_trimed, axis_trim, &pre, &n, &post,
+                              &is_run_common_broadcast);
   } else {
     auto x_dims_trimed = trim_trailing_singular_dims(x_dims);
     axis_trim = (x_dims_trimed.size() == 0) ? y_dims.size() : axis;
-    pten::general::get_mid_dims(y_dims, x_dims_trimed, axis_trim, &pre, &n,
-                                &post, &is_run_common_broadcast);
+    pten::funcs::get_mid_dims(y_dims, x_dims_trimed, axis_trim, &pre, &n, &post,
+                              &is_run_common_broadcast);
   }
   // special case for common backward implementation.
   if (is_run_common_broadcast) {
@@ -1861,8 +1798,8 @@ void FusedElemwiseAndActComputeWithBroadcast(
   axis = (y_dim.size() == 0) ? x_dim.size() : axis;
 
   int pre, n, post, is_run_common_broadcast;
-  pten::general::get_mid_dims(x_dim, y_dim, axis, &pre, &n, &post,
-                              &is_run_common_broadcast);
+  pten::funcs::get_mid_dims(x_dim, y_dim, axis, &pre, &n, &post,
+                            &is_run_common_broadcast);
   if (post == 1) {
     int h = pre;
     int w = n;
@@ -2409,8 +2346,8 @@ void FusedElemwiseAndActGradComputeWithBroadcast(
   axis = (y_dim.size() == 0) ? x_dim.size() : axis;
 
   int pre, n, post, is_run_common_broadcast;
-  pten::general::get_mid_dims(x_dim, y_dim, axis, &pre, &n, &post,
-                              &is_run_common_broadcast);
+  pten::funcs::get_mid_dims(x_dim, y_dim, axis, &pre, &n, &post,
+                            &is_run_common_broadcast);
   const T *x_data = nullptr;
   const T *y_data = nullptr;
   if (x->IsInitialized()) x_data = x->data<T>();

diff --git a/paddle/pten/api/lib/kernel_declare.h b/paddle/pten/api/lib/kernel_declare.h
diff --git a/paddle/pten/infermeta/binary.cc b/paddle/pten/infermeta/binary.cc
@@ -14,7 +14,7 @@ limitations under the License. */
 
 // See Note [ Why still include the fluid headers? ]
 #include "paddle/pten/infermeta/binary.h"
-#include "paddle/pten/kernels/hybird/general/elementwise_base.h"
+#include "paddle/pten/kernels/funcs/elementwise_base.h"
 
 namespace pten {
 
@@ -162,13 +162,13 @@ DenseTensorMeta ElementwiseInferMeta(const DenseTensorMeta& x_meta,
     std::vector<int> x_dims_array(max_dim);
     std::vector<int> y_dims_array(max_dim);
     std::vector<int> out_dims_array(max_dim);
-    general::GetBroadcastDimsArrays(x_dims,
-                                    y_dims,
-                                    x_dims_array.data(),
-                                    y_dims_array.data(),
-                                    out_dims_array.data(),
-                                    max_dim,
-                                    axis);
+    funcs::GetBroadcastDimsArrays(x_dims,
+                                  y_dims,
+                                  x_dims_array.data(),
+                                  y_dims_array.data(),
+                                  out_dims_array.data(),
+                                  max_dim,
+                                  axis);
     return_meta.dims = paddle::framework::make_ddim(out_dims_array);
   }
   return_meta.lod = x_meta.lod;