Skip to content

Commit

Permalink
[XPU][PHI Kernels] bind rsqrt、bitwise_or、arange_tensor for xpu (Paddl…
Browse files Browse the repository at this point in the history
…ePaddle#58950)

* bind kernels

* add tests

* bugfix
  • Loading branch information
lj970926 authored and SecretXV committed Nov 28, 2023
1 parent 6ba9386 commit b8c8410
Show file tree
Hide file tree
Showing 8 changed files with 118 additions and 22 deletions.
7 changes: 7 additions & 0 deletions paddle/phi/backends/xpu/xpu2_op_list.cc
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,10 @@ XPUOpMap& get_kl2_ops() {
{"adagrad", XPUKernelSet({phi::DataType::FLOAT32})},
{"addcmul_xpu",
XPUKernelSet({phi::DataType::FLOAT32, phi::DataType::FLOAT16})},
{"arange_tensor",
XPUKernelSet({phi::DataType::FLOAT32,
phi::DataType::INT32,
phi::DataType::INT64})},
{"arg_max",
XPUKernelSet({phi::DataType::INT32,
phi::DataType::FLOAT32,
Expand Down Expand Up @@ -98,6 +102,7 @@ XPUOpMap& get_kl2_ops() {
{"bilinear_interp_v2_grad", XPUKernelSet({phi::DataType::FLOAT32})},
{"bitwise_not", XPUKernelSet({phi::DataType::BOOL})},
{"bitwise_and", XPUKernelSet({phi::DataType::BOOL})},
{"bitwise_or", XPUKernelSet({phi::DataType::BOOL})},
{"broadcast", XPUKernelSet({phi::DataType::FLOAT32})},
{"c_allgather",
XPUKernelSet({phi::DataType::FLOAT16,
Expand Down Expand Up @@ -717,6 +722,8 @@ XPUOpMap& get_kl2_ops() {
{"roi_align_grad", XPUKernelSet({phi::DataType::FLOAT32})},
{"roll", XPUKernelSet({phi::DataType::FLOAT32})},
{"roll_grad", XPUKernelSet({phi::DataType::FLOAT32})},
{"rsqrt", XPUKernelSet({phi::DataType::FLOAT32})},
{"rsqrt_grad", XPUKernelSet({phi::DataType::FLOAT32})},
{"scale",
XPUKernelSet({phi::DataType::FLOAT32,
phi::DataType::FLOAT16,
Expand Down
7 changes: 7 additions & 0 deletions paddle/phi/backends/xpu/xpu3_op_list.cc
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,10 @@ XPUOpMap& get_kl3_ops() {
{"adagrad", XPUKernelSet({phi::DataType::FLOAT32})},
{"addcmul_xpu",
XPUKernelSet({phi::DataType::FLOAT32, phi::DataType::FLOAT16})},
{"arange_tensor",
XPUKernelSet({phi::DataType::FLOAT32,
phi::DataType::INT32,
phi::DataType::INT64})},
{"arg_max",
XPUKernelSet({phi::DataType::INT32,
phi::DataType::FLOAT32,
Expand Down Expand Up @@ -92,6 +96,7 @@ XPUOpMap& get_kl3_ops() {
{"bilinear_interp_v2_grad", XPUKernelSet({phi::DataType::FLOAT32})},
{"bitwise_not", XPUKernelSet({phi::DataType::BOOL})},
{"bitwise_and", XPUKernelSet({phi::DataType::BOOL})},
{"bitwise_or", XPUKernelSet({phi::DataType::BOOL})},
{"broadcast", XPUKernelSet({phi::DataType::FLOAT32})},
{"c_allgather",
XPUKernelSet({phi::DataType::FLOAT16,
Expand Down Expand Up @@ -677,6 +682,8 @@ XPUOpMap& get_kl3_ops() {
{"roi_align_grad", XPUKernelSet({phi::DataType::FLOAT32})},
{"roll", XPUKernelSet({phi::DataType::FLOAT32})},
{"roll_grad", XPUKernelSet({phi::DataType::FLOAT32})},
{"rsqrt", XPUKernelSet({phi::DataType::FLOAT32})},
{"rsqrt_grad", XPUKernelSet({phi::DataType::FLOAT32})},
{"scale",
XPUKernelSet({phi::DataType::FLOAT32,
phi::DataType::FLOAT16,
Expand Down
17 changes: 17 additions & 0 deletions paddle/phi/kernels/xpu/activation_grad_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -611,13 +611,29 @@ struct XPUCosGradFunctor : public funcs::BaseActivationFunctor<T> {
}
};

template <typename T>
struct XPURsqrtGradFunctor : public funcs::BaseActivationFunctor<T> {
using XPUType = typename XPUTypeTrait<T>::Type;
template <typename Context>
void operator()(const Context& dev_ctx,
const DenseTensor* x,
const DenseTensor* out,
const DenseTensor* dout,
DenseTensor* dx) const {
int r = xpu_activation_backward<Context, T, XPUType>(
dev_ctx, x, out, dout, dx, xpu::rsqrt_grad<XPUType>);
PADDLE_ENFORCE_XDNN_SUCCESS(r, "rsqrt_grad");
}
};

DEFINE_XPU_ACTIVATION_GRAD_KERNEL_DEPOUT(Exp, XPUExpGradFunctor);
DEFINE_XPU_ACTIVATION_GRAD_KERNEL_DEPOUT(Reciprocal, XPUReciprocalGradFunctor);
DEFINE_XPU_ACTIVATION_GRAD_KERNEL_DEPOUT(Sigmoid, XPUSigmoidGradFunctor);
DEFINE_XPU_ACTIVATION_GRAD_KERNEL_DEPOUT(Sqrt, XPUSqrtGradFunctor);
DEFINE_XPU_ACTIVATION_GRAD_KERNEL_DEPOUT(Tanh, XPUTanhGradFunctor);
DEFINE_XPU_ACTIVATION_GRAD_KERNEL_DEPOUT(Relu, XPUReluGradFunctor);
DEFINE_XPU_ACTIVATION_GRAD_KERNEL_DEPOUT(Relu6, XPURelu6GradFunctor);
DEFINE_XPU_ACTIVATION_GRAD_KERNEL_DEPOUT(Rsqrt, XPURsqrtGradFunctor);

DEFINE_XPU_ACTIVATION_GRAD_KERNEL_DEPX(Log, XPULogGradFunctor);
DEFINE_XPU_ACTIVATION_GRAD_KERNEL_DEPX(Square, XPUSquareGradFunctor);
Expand Down Expand Up @@ -721,5 +737,6 @@ PD_REGISTER_ACTIVATION_GRAD_KERNEL(mish_grad, MishGradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL(softplus_grad, SoftplusGradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL(sin_grad, SinGradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL(cos_grad, CosGradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL(rsqrt_grad, RsqrtGradKernel)

PD_REGISTER_KERNEL(pow_grad, XPU, ALL_LAYOUT, phi::PowGradKernel, float) {}
15 changes: 15 additions & 0 deletions paddle/phi/kernels/xpu/activation_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -514,6 +514,19 @@ struct XPUCosFunctor : public funcs::BaseActivationFunctor<T> {
}
};

template <typename T>
struct XPURsqrtFunctor : public funcs::BaseActivationFunctor<T> {
using XPUType = typename XPUTypeTrait<T>::Type;
template <typename Context>
void operator()(const Context& dev_ctx,
const DenseTensor& x,
DenseTensor* out) const {
int ret = xpu_activation_func<Context, T, XPUType>(
dev_ctx, x, out, xpu::rsqrt<XPUType>);
PADDLE_ENFORCE_XDNN_SUCCESS(ret, "rsqrt");
}
};

DEFINE_XPU_ACTIVATION_KERNEL(Exp, XPUExpFunctor)
DEFINE_XPU_ACTIVATION_KERNEL(Floor, XPUFloorFunctor)
DEFINE_XPU_ACTIVATION_KERNEL(Log, XPULogFunctor)
Expand All @@ -526,6 +539,7 @@ DEFINE_XPU_ACTIVATION_KERNEL(Tanh, XPUTanhFunctor)
DEFINE_XPU_ACTIVATION_KERNEL(Silu, XPUSiluFunctor)
DEFINE_XPU_ACTIVATION_KERNEL(Sin, XPUSinFunctor)
DEFINE_XPU_ACTIVATION_KERNEL(Cos, XPUCosFunctor)
DEFINE_XPU_ACTIVATION_KERNEL(Rsqrt, XPURsqrtFunctor)

DEFINE_XPU_ACTIVATION_KERNEL_WITH_ONE_ATTRS(Mish, XPUMishFunctor, threshold)
DEFINE_XPU_ACTIVATION_KERNEL_WITH_ONE_ATTRS(LeakyRelu,
Expand Down Expand Up @@ -617,3 +631,4 @@ PD_REGISTER_ACTIVATION_KERNEL(reciprocal, ReciprocalKernel)
PD_REGISTER_ACTIVATION_KERNEL(softplus, SoftplusKernel)
PD_REGISTER_ACTIVATION_KERNEL(sin, SinKernel)
PD_REGISTER_ACTIVATION_KERNEL(cos, CosKernel)
PD_REGISTER_ACTIVATION_KERNEL(rsqrt, RsqrtKernel)
20 changes: 6 additions & 14 deletions paddle/phi/kernels/xpu/arange_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */

#include "paddle/phi/kernels/arange_kernel.h"
#include "paddle/phi/backends/xpu/enforce_xpu.h"

#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/funcs/range_function.h"
Expand All @@ -32,19 +33,11 @@ void ArangeTensorKernel(const Context& dev_ctx,
int64_t size = 0;
phi::funcs::GetSize(start_value, end_value, step_value, &size);
out->Resize(phi::make_ddim({size}));
dev_ctx.template Alloc<T>(out);

DenseTensor out_cpu;
out_cpu.Resize({out->numel()});
dev_ctx.template HostAlloc<T>(&out_cpu);
T* out_cpu_data = out_cpu.data<T>();

T value = start_value;
for (int64_t i = 0; i < size; ++i) {
out_cpu_data[i] = value;
value += step_value;
}
phi::Copy(dev_ctx, out_cpu, out->place(), true, out);
auto* out_data = dev_ctx.template Alloc<T>(out);

int ret = xpu::range<T>(
dev_ctx.x_context(), out_data, start_value, step_value, size);
PADDLE_ENFORCE_XDNN_SUCCESS(ret, "range");
}

} // namespace phi
Expand All @@ -54,7 +47,6 @@ PD_REGISTER_KERNEL(arange_tensor,
ALL_LAYOUT,
phi::ArangeTensorKernel,
float,
double,
int,
int64_t) {
kernel->InputAt(0).SetBackend(phi::Backend::ALL_BACKEND);
Expand Down
10 changes: 10 additions & 0 deletions paddle/phi/kernels/xpu/bitwise.cc
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,17 @@ void BitwiseAndKernel(const Context& ctx,
// counterpart. Need to be changed when adding support to other types.
LogicalAndKernel<T, Context>(ctx, x, y, out);
}

template <typename T, typename Context>
void BitwiseOrKernel(const Context& ctx,
const DenseTensor& x,
const DenseTensor& y,
DenseTensor* out) {
// Same reason as bitwise_and
LogicalOrKernel<T, Context>(ctx, x, y, out);
}
} // namespace phi

PD_REGISTER_KERNEL(bitwise_not, XPU, ALL_LAYOUT, phi::BitwiseNotKernel, bool) {}
PD_REGISTER_KERNEL(bitwise_and, XPU, ALL_LAYOUT, phi::BitwiseAndKernel, bool) {}
PD_REGISTER_KERNEL(bitwise_or, XPU, ALL_LAYOUT, phi::BitwiseOrKernel, bool) {}
18 changes: 10 additions & 8 deletions paddle/phi/kernels/xpu/embedding_grad_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ void EmbeddingGradKernel(const Context& ctx,
const DenseTensor& out_grad,
int64_t padding_idx,
DenseTensor* weight_grad) {
using XPUT = typename XPUTypeTrait<T>::Type;
DDim table_dim;
table_dim = weight.dims();

Expand Down Expand Up @@ -62,14 +63,15 @@ void EmbeddingGradKernel(const Context& ctx,
int ym = static_cast<int>(ids_numel);
int n = d_table_t->dims()[1];

int r = xpu::embedding_grad<T, int64_t>(dev_ctx.x_context(),
d_output_data,
ids_data,
d_table_data,
xm,
n,
ym,
padding_idx);
int r = xpu::embedding_grad<XPUT, int64_t>(
dev_ctx.x_context(),
reinterpret_cast<const XPUT*>(d_output_data),
ids_data,
reinterpret_cast<XPUT*>(d_table_data),
xm,
n,
ym,
padding_idx);
PADDLE_ENFORCE_XDNN_SUCCESS(r, "embedding_grad");
}

Expand Down
46 changes: 46 additions & 0 deletions test/xpu/test_activation_op_xpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -1373,5 +1373,51 @@ def init_config(self):
for stype in support_types:
create_test_class(globals(), XPUTestCosOP, stype)


class XPUTestRsqrtOP(XPUOpTestWrapper):
def __init__(self):
self.op_name = "rsqrt"
self.use_dynamic_create_class = False

class XPUTestRsqrtBase(TestActivationOPBase):
def set_case(self):
self.op_type = "rsqrt"
self.dtype = self.in_type
self.init_config()
out = np.reciprocal(np.sqrt(self.x))

self.inputs = {'X': self.x}
self.outputs = {'Out': out}
self.attrs = {'use_xpu': True}

def init_config(self):
self.x = np.random.uniform(0.01, 4, [11, 17]).astype(self.dtype)

class XPUTestRsqrt_ZeroDim(XPUTestRsqrtBase):
def init_config(self):
self.x = np.random.uniform(0.01, 4, []).astype(self.dtype)

class XPUTestRsqrt2(XPUTestRsqrtBase):
def init_config(self):
self.x = np.random.uniform(0.01, 4, [1024, 8]).astype(self.dtype)

class XPUTestRsqrt3(XPUTestRsqrtBase):
def init_config(self):
self.x = np.random.uniform(0.01, 4, [4, 512, 15, 15]).astype(
self.dtype
)

class XPUTestRsqrt4(XPUTestRsqrtBase):
def init_config(self):
self.x = np.random.uniform(0.01, 4, [4, 256, 22, 22]).astype(
self.dtype
)


support_types = get_xpu_op_support_types('rsqrt')
for stype in support_types:
create_test_class(globals(), XPUTestRsqrtOP, stype)


if __name__ == "__main__":
unittest.main()

0 comments on commit b8c8410

Please sign in to comment.