Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Pten]Move CPU_implementation of elementwise kernel in new directory #38651

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 9 additions & 3 deletions paddle/fluid/framework/data_device_transform_test.cu
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@ limitations under the License. */
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/init.h"

#include "paddle/fluid/framework/pten_utils.h"
#include "paddle/pten/include/core.h"

namespace paddle {
namespace framework {

Expand Down Expand Up @@ -73,9 +76,12 @@ class TestKernel : public OpKernel<float> {
output->Resize(input->dims());
output->mutable_data<T>(ctx.GetPlace());

operators::TransformFunctor<AddFunctor<T>, T, DeviceContext> functor(
input, input, output, ctx.template device_context<DeviceContext>(),
AddFunctor<T>());
auto pt_input = paddle::experimental::MakePtenDenseTensor(*input);
auto pt_out = paddle::experimental::MakePtenDenseTensor(*output);

pten::funcs::TransformFunctor<AddFunctor<T>, T, DeviceContext> functor(
*pt_input, *pt_input, pt_out.get(),
ctx.template device_context<DeviceContext>(), AddFunctor<T>());
functor.Run();
}
};
Expand Down
52 changes: 13 additions & 39 deletions paddle/fluid/operators/elementwise/elementwise_functor.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ limitations under the License. */
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/float16.h"
#include "paddle/fluid/platform/hostdevice.h"
#include "paddle/pten/kernels/funcs/elementwise_functor.h"

namespace paddle {
namespace operators {
Expand All @@ -25,58 +26,31 @@ namespace operators {

// Add
template <typename T>
struct AddFunctor {
inline HOSTDEVICE T operator()(const T& a, const T& b) const { return a + b; }
};
using AddFunctor = pten::funcs::AddFunctor<T>;

template <typename T>
struct InverseAddFunctor {
inline HOSTDEVICE T operator()(const T& a, const T& b) const { return b + a; }
};
using InverseAddFunctor = pten::funcs::InverseAddFunctor<T>;

// Subtract
template <typename T>
struct SubFunctor {
inline HOSTDEVICE T operator()(const T& a, const T& b) const { return a - b; }
};
using SubFunctor = pten::funcs::SubtractFunctor<T>;

template <typename T>
struct InverseSubFunctor {
inline HOSTDEVICE T operator()(const T& a, const T& b) const { return b - a; }
};
using InverseSubFunctor = pten::funcs::InverseSubtractFunctor<T>;

// Multiply
template <typename T>
struct MulFunctor {
inline HOSTDEVICE T operator()(const T& a, const T& b) const { return a * b; }
};
using MulFunctor = pten::funcs::MultiplyFunctor<T>;

template <typename T>
struct InverseMulFunctor {
inline HOSTDEVICE T operator()(const T& a, const T& b) const { return b * a; }
};
using InverseMulFunctor = pten::funcs::InverseMultiplyFunctor<T>;

// Divide
#define DIV_ERROR_INFO \
"InvalidArgumentError: Integer division by zero encountered in " \
"(floor) divide. Please check the input value."

template <typename T, typename Enable = void>
struct DivFunctor {
inline HOSTDEVICE T operator()(const T& a, const T& b) const { return a / b; }
};

template <typename T>
struct DivFunctor<T,
typename std::enable_if<std::is_integral<T>::value>::type> {
inline HOSTDEVICE T operator()(const T& a, const T& b) const {
// For int32/int64, need to check whether the divison is zero.
PADDLE_ENFORCE(b != 0, DIV_ERROR_INFO);
return a / b;
}
};
using DivFunctor = pten::funcs::DivideFunctor<T>;

template <typename T, typename Enable = void>
struct InverseDivFunctor {
inline HOSTDEVICE T operator()(const T& a, const T& b) const { return b / a; }
};
template <typename T>
using InverseDivFunctor = pten::funcs::InverseDivideFunctor<T>;

// Floor Divide
template <typename T>
Expand Down
89 changes: 13 additions & 76 deletions paddle/fluid/operators/elementwise/elementwise_op_function.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,7 @@ limitations under the License. */

// only can include the headers in paddle/pten/include dirs
#include "paddle/pten/api/lib/utils/tensor_utils.h"
#include "paddle/pten/kernels/hybird/cpu/elementwise.h"
#include "paddle/pten/kernels/hybird/general/elementwise_base.h"
#include "paddle/pten/kernels/cpu/elementwise_impl.h"

#if defined(__NVCC__) || defined(__HIPCC__)
#ifdef __NVCC__
Expand Down Expand Up @@ -151,9 +150,9 @@ inline void GetBroadcastDimsArrays(const framework::DDim &x_dims,
int *x_dims_array, int *y_dims_array,
int *out_dims_array, const int max_dim,
const int axis) {
pten::general::GetBroadcastDimsArrays(x_dims, y_dims, x_dims_array,
y_dims_array, out_dims_array, max_dim,
axis);
pten::funcs::GetBroadcastDimsArrays(x_dims, y_dims, x_dims_array,
y_dims_array, out_dims_array, max_dim,
axis);
}

template <typename Functor, typename T, typename OutType = T>
Expand Down Expand Up @@ -1073,71 +1072,9 @@ void CommonGradBroadcastCUDA(

inline framework::DDim trim_trailing_singular_dims(
const framework::DDim &dims) {
return pten::general::trim_trailing_singular_dims(dims);
return pten::funcs::trim_trailing_singular_dims(dims);
}

template <typename Functor, typename T, typename DeviceContext,
typename OutType = T>
class TransformFunctor {
public:
TransformFunctor(const framework::Tensor *x, const framework::Tensor *y,
framework::Tensor *z, const DeviceContext &ctx, Functor func,
const bool is_xsize_larger = true)
: x_(x->data<T>()),
y_(y->data<T>()),
z_(z->mutable_data<OutType>(ctx.GetPlace())),
nx_(x->numel()),
ctx_(ctx),
func_(func),
is_xsize_larger_(is_xsize_larger) {
if (is_xsize_larger_ == false) {
nx_ = y->numel();
}
}

inline void Run() const {
platform::Transform<DeviceContext> trans;
trans(ctx_, x_, x_ + nx_, y_, z_, func_);
}

inline void RunRowWise(int n, int pre) const {
platform::Transform<DeviceContext> trans;
if (is_xsize_larger_) {
trans(ctx_, x_, x_ + nx_,
pten::general::RowwiseTransformIterator<T, DeviceContext>(y_, n),
z_, func_);
} else {
trans(ctx_, y_, y_ + nx_,
pten::general::RowwiseTransformIterator<T, DeviceContext>(x_, n),
z_, func_);
}
}

inline void RunMidWise(int n, int pre, int post) const {
platform::Transform<DeviceContext> trans;
if (is_xsize_larger_) {
trans(ctx_, x_, x_ + nx_,
pten::general::MidWiseTransformIterator<T, DeviceContext>(y_, n,
post),
z_, func_);
} else {
trans(ctx_, y_, y_ + nx_,
pten::general::MidWiseTransformIterator<T, DeviceContext>(x_, n,
post),
z_, func_);
}
}

private:
const T *x_;
const T *y_;
OutType *z_;
int64_t nx_;
const DeviceContext &ctx_;
Functor func_;
bool is_xsize_larger_;
};

template <typename T, typename DX_OP, typename DY_OP, typename Tout = T>
struct ElemwiseGradNoBroadcast {
const T *x_;
Expand Down Expand Up @@ -1457,13 +1394,13 @@ void ElemwiseGradComputeWithBroadcast(
if (is_xsize_larger) {
auto y_dims_trimed = trim_trailing_singular_dims(y_dims);
axis_trim = (y_dims_trimed.size() == 0) ? x_dims.size() : axis;
pten::general::get_mid_dims(x_dims, y_dims_trimed, axis_trim, &pre, &n,
&post, &is_run_common_broadcast);
pten::funcs::get_mid_dims(x_dims, y_dims_trimed, axis_trim, &pre, &n, &post,
&is_run_common_broadcast);
} else {
auto x_dims_trimed = trim_trailing_singular_dims(x_dims);
axis_trim = (x_dims_trimed.size() == 0) ? y_dims.size() : axis;
pten::general::get_mid_dims(y_dims, x_dims_trimed, axis_trim, &pre, &n,
&post, &is_run_common_broadcast);
pten::funcs::get_mid_dims(y_dims, x_dims_trimed, axis_trim, &pre, &n, &post,
&is_run_common_broadcast);
}
// special case for common backward implementation.
if (is_run_common_broadcast) {
Expand Down Expand Up @@ -1861,8 +1798,8 @@ void FusedElemwiseAndActComputeWithBroadcast(
axis = (y_dim.size() == 0) ? x_dim.size() : axis;

int pre, n, post, is_run_common_broadcast;
pten::general::get_mid_dims(x_dim, y_dim, axis, &pre, &n, &post,
&is_run_common_broadcast);
pten::funcs::get_mid_dims(x_dim, y_dim, axis, &pre, &n, &post,
&is_run_common_broadcast);
if (post == 1) {
int h = pre;
int w = n;
Expand Down Expand Up @@ -2409,8 +2346,8 @@ void FusedElemwiseAndActGradComputeWithBroadcast(
axis = (y_dim.size() == 0) ? x_dim.size() : axis;

int pre, n, post, is_run_common_broadcast;
pten::general::get_mid_dims(x_dim, y_dim, axis, &pre, &n, &post,
&is_run_common_broadcast);
pten::funcs::get_mid_dims(x_dim, y_dim, axis, &pre, &n, &post,
&is_run_common_broadcast);
const T *x_data = nullptr;
const T *y_data = nullptr;
if (x->IsInitialized()) x_data = x->data<T>();
Expand Down
21 changes: 0 additions & 21 deletions paddle/pten/api/lib/kernel_declare.h

This file was deleted.

16 changes: 8 additions & 8 deletions paddle/pten/infermeta/binary.cc
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ limitations under the License. */

// See Note [ Why still include the fluid headers? ]
#include "paddle/pten/infermeta/binary.h"
#include "paddle/pten/kernels/hybird/general/elementwise_base.h"
#include "paddle/pten/kernels/funcs/elementwise_base.h"

namespace pten {

Expand Down Expand Up @@ -162,13 +162,13 @@ DenseTensorMeta ElementwiseInferMeta(const DenseTensorMeta& x_meta,
std::vector<int> x_dims_array(max_dim);
std::vector<int> y_dims_array(max_dim);
std::vector<int> out_dims_array(max_dim);
general::GetBroadcastDimsArrays(x_dims,
y_dims,
x_dims_array.data(),
y_dims_array.data(),
out_dims_array.data(),
max_dim,
axis);
funcs::GetBroadcastDimsArrays(x_dims,
y_dims,
x_dims_array.data(),
y_dims_array.data(),
out_dims_array.data(),
max_dim,
axis);
return_meta.dims = paddle::framework::make_ddim(out_dims_array);
}
return_meta.lod = x_meta.lod;
Expand Down
Loading