Skip to content

Commit

Permalink
move cpu_impl of elementwise kernel to new directory
Browse files Browse the repository at this point in the history
  • Loading branch information
YuanRisheng committed Dec 31, 2021
1 parent 308c279 commit cfd79d4
Show file tree
Hide file tree
Showing 19 changed files with 568 additions and 772 deletions.
12 changes: 9 additions & 3 deletions paddle/fluid/framework/data_device_transform_test.cu
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@ limitations under the License. */
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/init.h"

#include "paddle/fluid/framework/pten_utils.h"
#include "paddle/pten/include/core.h"

namespace paddle {
namespace framework {

Expand Down Expand Up @@ -73,9 +76,12 @@ class TestKernel : public OpKernel<float> {
output->Resize(input->dims());
output->mutable_data<T>(ctx.GetPlace());

operators::TransformFunctor<AddFunctor<T>, T, DeviceContext> functor(
input, input, output, ctx.template device_context<DeviceContext>(),
AddFunctor<T>());
auto pt_input = paddle::experimental::MakePtenDenseTensor(*input);
auto pt_out = paddle::experimental::MakePtenDenseTensor(*output);

pten::funcs::TransformFunctor<AddFunctor<T>, T, DeviceContext> functor(
*pt_input, *pt_input, pt_out.get(),
ctx.template device_context<DeviceContext>(), AddFunctor<T>());
functor.Run();
}
};
Expand Down
52 changes: 13 additions & 39 deletions paddle/fluid/operators/elementwise/elementwise_functor.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ limitations under the License. */
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/float16.h"
#include "paddle/fluid/platform/hostdevice.h"
#include "paddle/pten/kernels/funcs/elementwise_functor.h"

namespace paddle {
namespace operators {
Expand All @@ -25,58 +26,31 @@ namespace operators {

// Add
template <typename T>
struct AddFunctor {
inline HOSTDEVICE T operator()(const T& a, const T& b) const { return a + b; }
};
using AddFunctor = pten::funcs::AddFunctor<T>;

template <typename T>
struct InverseAddFunctor {
inline HOSTDEVICE T operator()(const T& a, const T& b) const { return b + a; }
};
using InverseAddFunctor = pten::funcs::InverseAddFunctor<T>;

// Subtract
template <typename T>
struct SubFunctor {
inline HOSTDEVICE T operator()(const T& a, const T& b) const { return a - b; }
};
using SubFunctor = pten::funcs::SubtractFunctor<T>;

template <typename T>
struct InverseSubFunctor {
inline HOSTDEVICE T operator()(const T& a, const T& b) const { return b - a; }
};
using InverseSubFunctor = pten::funcs::InverseSubtractFunctor<T>;

// Multiply
template <typename T>
struct MulFunctor {
inline HOSTDEVICE T operator()(const T& a, const T& b) const { return a * b; }
};
using MulFunctor = pten::funcs::MultiplyFunctor<T>;

template <typename T>
struct InverseMulFunctor {
inline HOSTDEVICE T operator()(const T& a, const T& b) const { return b * a; }
};
using InverseMulFunctor = pten::funcs::InverseMultiplyFunctor<T>;

// Divide
#define DIV_ERROR_INFO \
"InvalidArgumentError: Integer division by zero encountered in " \
"(floor) divide. Please check the input value."

template <typename T, typename Enable = void>
struct DivFunctor {
inline HOSTDEVICE T operator()(const T& a, const T& b) const { return a / b; }
};

template <typename T>
struct DivFunctor<T,
typename std::enable_if<std::is_integral<T>::value>::type> {
inline HOSTDEVICE T operator()(const T& a, const T& b) const {
// For int32/int64, need to check whether the divison is zero.
PADDLE_ENFORCE(b != 0, DIV_ERROR_INFO);
return a / b;
}
};
using DivFunctor = pten::funcs::DivideFunctor<T>;

template <typename T, typename Enable = void>
struct InverseDivFunctor {
inline HOSTDEVICE T operator()(const T& a, const T& b) const { return b / a; }
};
template <typename T>
using InverseDivFunctor = pten::funcs::InverseDivideFunctor<T>;

// Floor Divide
template <typename T>
Expand Down
89 changes: 13 additions & 76 deletions paddle/fluid/operators/elementwise/elementwise_op_function.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,7 @@ limitations under the License. */

// only can include the headers in paddle/pten/include dirs
#include "paddle/pten/api/lib/utils/tensor_utils.h"
#include "paddle/pten/kernels/hybird/cpu/elementwise.h"
#include "paddle/pten/kernels/hybird/general/elementwise_base.h"
#include "paddle/pten/kernels/cpu/elementwise_impl.h"

#if defined(__NVCC__) || defined(__HIPCC__)
#ifdef __NVCC__
Expand Down Expand Up @@ -151,9 +150,9 @@ inline void GetBroadcastDimsArrays(const framework::DDim &x_dims,
int *x_dims_array, int *y_dims_array,
int *out_dims_array, const int max_dim,
const int axis) {
pten::general::GetBroadcastDimsArrays(x_dims, y_dims, x_dims_array,
y_dims_array, out_dims_array, max_dim,
axis);
pten::funcs::GetBroadcastDimsArrays(x_dims, y_dims, x_dims_array,
y_dims_array, out_dims_array, max_dim,
axis);
}

template <typename Functor, typename T, typename OutType = T>
Expand Down Expand Up @@ -1073,71 +1072,9 @@ void CommonGradBroadcastCUDA(

inline framework::DDim trim_trailing_singular_dims(
const framework::DDim &dims) {
return pten::general::trim_trailing_singular_dims(dims);
return pten::funcs::trim_trailing_singular_dims(dims);
}

template <typename Functor, typename T, typename DeviceContext,
typename OutType = T>
class TransformFunctor {
public:
TransformFunctor(const framework::Tensor *x, const framework::Tensor *y,
framework::Tensor *z, const DeviceContext &ctx, Functor func,
const bool is_xsize_larger = true)
: x_(x->data<T>()),
y_(y->data<T>()),
z_(z->mutable_data<OutType>(ctx.GetPlace())),
nx_(x->numel()),
ctx_(ctx),
func_(func),
is_xsize_larger_(is_xsize_larger) {
if (is_xsize_larger_ == false) {
nx_ = y->numel();
}
}

inline void Run() const {
platform::Transform<DeviceContext> trans;
trans(ctx_, x_, x_ + nx_, y_, z_, func_);
}

inline void RunRowWise(int n, int pre) const {
platform::Transform<DeviceContext> trans;
if (is_xsize_larger_) {
trans(ctx_, x_, x_ + nx_,
pten::general::RowwiseTransformIterator<T, DeviceContext>(y_, n),
z_, func_);
} else {
trans(ctx_, y_, y_ + nx_,
pten::general::RowwiseTransformIterator<T, DeviceContext>(x_, n),
z_, func_);
}
}

inline void RunMidWise(int n, int pre, int post) const {
platform::Transform<DeviceContext> trans;
if (is_xsize_larger_) {
trans(ctx_, x_, x_ + nx_,
pten::general::MidWiseTransformIterator<T, DeviceContext>(y_, n,
post),
z_, func_);
} else {
trans(ctx_, y_, y_ + nx_,
pten::general::MidWiseTransformIterator<T, DeviceContext>(x_, n,
post),
z_, func_);
}
}

private:
const T *x_;
const T *y_;
OutType *z_;
int64_t nx_;
const DeviceContext &ctx_;
Functor func_;
bool is_xsize_larger_;
};

template <typename T, typename DX_OP, typename DY_OP, typename Tout = T>
struct ElemwiseGradNoBroadcast {
const T *x_;
Expand Down Expand Up @@ -1457,13 +1394,13 @@ void ElemwiseGradComputeWithBroadcast(
if (is_xsize_larger) {
auto y_dims_trimed = trim_trailing_singular_dims(y_dims);
axis_trim = (y_dims_trimed.size() == 0) ? x_dims.size() : axis;
pten::general::get_mid_dims(x_dims, y_dims_trimed, axis_trim, &pre, &n,
&post, &is_run_common_broadcast);
pten::funcs::get_mid_dims(x_dims, y_dims_trimed, axis_trim, &pre, &n, &post,
&is_run_common_broadcast);
} else {
auto x_dims_trimed = trim_trailing_singular_dims(x_dims);
axis_trim = (x_dims_trimed.size() == 0) ? y_dims.size() : axis;
pten::general::get_mid_dims(y_dims, x_dims_trimed, axis_trim, &pre, &n,
&post, &is_run_common_broadcast);
pten::funcs::get_mid_dims(y_dims, x_dims_trimed, axis_trim, &pre, &n, &post,
&is_run_common_broadcast);
}
// special case for common backward implementation.
if (is_run_common_broadcast) {
Expand Down Expand Up @@ -1861,8 +1798,8 @@ void FusedElemwiseAndActComputeWithBroadcast(
axis = (y_dim.size() == 0) ? x_dim.size() : axis;

int pre, n, post, is_run_common_broadcast;
pten::general::get_mid_dims(x_dim, y_dim, axis, &pre, &n, &post,
&is_run_common_broadcast);
pten::funcs::get_mid_dims(x_dim, y_dim, axis, &pre, &n, &post,
&is_run_common_broadcast);
if (post == 1) {
int h = pre;
int w = n;
Expand Down Expand Up @@ -2409,8 +2346,8 @@ void FusedElemwiseAndActGradComputeWithBroadcast(
axis = (y_dim.size() == 0) ? x_dim.size() : axis;

int pre, n, post, is_run_common_broadcast;
pten::general::get_mid_dims(x_dim, y_dim, axis, &pre, &n, &post,
&is_run_common_broadcast);
pten::funcs::get_mid_dims(x_dim, y_dim, axis, &pre, &n, &post,
&is_run_common_broadcast);
const T *x_data = nullptr;
const T *y_data = nullptr;
if (x->IsInitialized()) x_data = x->data<T>();
Expand Down
21 changes: 0 additions & 21 deletions paddle/pten/api/lib/kernel_declare.h

This file was deleted.

16 changes: 8 additions & 8 deletions paddle/pten/infermeta/binary.cc
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ limitations under the License. */

// See Note [ Why still include the fluid headers? ]
#include "paddle/pten/infermeta/binary.h"
#include "paddle/pten/kernels/hybird/general/elementwise_base.h"
#include "paddle/pten/kernels/funcs/elementwise_base.h"

namespace pten {

Expand Down Expand Up @@ -162,13 +162,13 @@ DenseTensorMeta ElementwiseInferMeta(const DenseTensorMeta& x_meta,
std::vector<int> x_dims_array(max_dim);
std::vector<int> y_dims_array(max_dim);
std::vector<int> out_dims_array(max_dim);
general::GetBroadcastDimsArrays(x_dims,
y_dims,
x_dims_array.data(),
y_dims_array.data(),
out_dims_array.data(),
max_dim,
axis);
funcs::GetBroadcastDimsArrays(x_dims,
y_dims,
x_dims_array.data(),
y_dims_array.data(),
out_dims_array.data(),
max_dim,
axis);
return_meta.dims = paddle::framework::make_ddim(out_dims_array);
}
return_meta.lod = x_meta.lod;
Expand Down
Loading

1 comment on commit cfd79d4

@paddle-bot-old
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Congratulation! Your pull request passed all required CI. You could ask reviewer(s) to approve and merge. 🎉

Please sign in to comment.