From d05b73e4ca99df5a7531f283ab09fd2aeadb10ec Mon Sep 17 00:00:00 2001 From: huangjiyi <43315610+huangjiyi@users.noreply.github.com> Date: Fri, 31 Mar 2023 19:43:25 +0800 Subject: [PATCH] register fluid kerenls to phi [part 2] (#52044) * update bipartite_match * update * fix bug * fix test * fix bug * fix Kunlun-KP-Build * Revert "fix Kunlun-KP-Build" This reverts commit ceab63cc23079fd6839c826bb52db893fb056355. * update --- paddle/fluid/operators/bpr_loss_op.cc | 16 +++++++----- paddle/fluid/operators/bpr_loss_op.h | 4 +-- .../operators/collective/c_allgather_op.cc | 21 ++++++++------- .../operators/collective/c_allgather_op.cu.cc | 26 +++++++++++-------- .../operators/collective/c_allgather_op.h | 2 +- .../collective/c_allreduce_max_op.cc | 18 ++++++++----- .../collective/c_allreduce_max_op.cu.cc | 22 +++++++++++----- .../collective/c_allreduce_min_op.cc | 17 +++++++----- .../collective/c_allreduce_min_op.cu.cc | 22 +++++++++++----- .../operators/collective/c_allreduce_op.h | 8 ++++++ .../collective/c_allreduce_prod_op.cc | 17 +++++++----- .../collective/c_allreduce_prod_op.cu.cc | 22 +++++++++++----- .../collective/c_allreduce_sum_op.cc | 17 +++++++----- .../collective/c_allreduce_sum_op.cu.cc | 25 ++++++++++++------ .../operators/detection/bipartite_match_op.cc | 12 ++++++--- .../fluid/operators/detection/box_clip_op.cc | 6 ++--- .../fluid/operators/detection/box_clip_op.cu | 8 +++--- .../fluid/operators/detection/box_clip_op.h | 2 +- .../detection/box_decoder_and_assign_op.cc | 9 ++++--- .../detection/box_decoder_and_assign_op.cu | 13 ++++++---- .../detection/box_decoder_and_assign_op.h | 2 +- .../unittests/test_bipartite_match_op.py | 10 +++---- .../fluid/tests/unittests/test_box_clip_op.py | 2 +- 23 files changed, 190 insertions(+), 111 deletions(-) diff --git a/paddle/fluid/operators/bpr_loss_op.cc b/paddle/fluid/operators/bpr_loss_op.cc index 47aea124430373..4a2928338251e1 100644 --- a/paddle/fluid/operators/bpr_loss_op.cc +++ b/paddle/fluid/operators/bpr_loss_op.cc @@ -174,7 +174,6 @@ class BprLossGradMaker : public framework::SingleGradOpMaker { } // namespace paddle namespace ops = paddle::operators; -using CPUCtx = phi::CPUContext; REGISTER_OPERATOR(bpr_loss, ops::BprLossOp, @@ -182,9 +181,12 @@ REGISTER_OPERATOR(bpr_loss, ops::BprLossGradMaker, ops::BprLossGradMaker); REGISTER_OPERATOR(bpr_loss_grad, ops::BprLossGradientOp); -REGISTER_OP_CPU_KERNEL(bpr_loss, - ops::BprLossOpKernel, - ops::BprLossOpKernel); -REGISTER_OP_CPU_KERNEL(bpr_loss_grad, - ops::BprLossGradientOpKernel, - ops::BprLossGradientOpKernel); + +PD_REGISTER_STRUCT_KERNEL( + bpr_loss, CPU, ALL_LAYOUT, ops::BprLossOpKernel, float, double) {} +PD_REGISTER_STRUCT_KERNEL(bpr_loss_grad, + CPU, + ALL_LAYOUT, + ops::BprLossGradientOpKernel, + float, + double) {} diff --git a/paddle/fluid/operators/bpr_loss_op.h b/paddle/fluid/operators/bpr_loss_op.h index 679f2687849c38..1a1bddc17c2e59 100644 --- a/paddle/fluid/operators/bpr_loss_op.h +++ b/paddle/fluid/operators/bpr_loss_op.h @@ -35,7 +35,7 @@ struct TolerableValue { } }; -template +template class BprLossOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { @@ -83,7 +83,7 @@ class BprLossOpKernel : public framework::OpKernel { } }; -template +template class BprLossGradientOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { diff --git a/paddle/fluid/operators/collective/c_allgather_op.cc b/paddle/fluid/operators/collective/c_allgather_op.cc index 83449de91017db..4e9cadbddf4865 100644 --- a/paddle/fluid/operators/collective/c_allgather_op.cc +++ b/paddle/fluid/operators/collective/c_allgather_op.cc @@ -73,12 +73,15 @@ REGISTER_OP_WITHOUT_GRADIENT(c_allgather, ops::CAllGatherOp, ops::CAllGatherOpMaker); -REGISTER_OP_CPU_KERNEL(c_allgather, - ops::CAllGatherOpCPUKernel, - ops::CAllGatherOpCPUKernel, - ops::CAllGatherOpCPUKernel, - ops::CAllGatherOpCPUKernel, - ops::CAllGatherOpCPUKernel, - ops::CAllGatherOpCPUKernel, - ops::CAllGatherOpCPUKernel, - ops::CAllGatherOpCPUKernel); +PD_REGISTER_STRUCT_KERNEL(c_allgather, + CPU, + ALL_LAYOUT, + ops::CAllGatherOpCPUKernel, + float, + double, + int, + int8_t, + int64_t, + uint8_t, + bool, + plat::float16) {} diff --git a/paddle/fluid/operators/collective/c_allgather_op.cu.cc b/paddle/fluid/operators/collective/c_allgather_op.cu.cc index 075abc49a72baa..93be43a1a324a6 100644 --- a/paddle/fluid/operators/collective/c_allgather_op.cu.cc +++ b/paddle/fluid/operators/collective/c_allgather_op.cu.cc @@ -25,7 +25,7 @@ limitations under the License. */ namespace paddle { namespace operators { -template +template class CAllGatherOpCUDAKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { @@ -93,15 +93,19 @@ class CAllGatherOpCUDAKernel : public framework::OpKernel { namespace ops = paddle::operators; namespace plat = paddle::platform; -REGISTER_OP_CUDA_KERNEL(c_allgather, - ops::CAllGatherOpCUDAKernel, - ops::CAllGatherOpCUDAKernel, +PD_REGISTER_STRUCT_KERNEL(c_allgather, + GPU, + ALL_LAYOUT, + ops::CAllGatherOpCUDAKernel, + float, + double, #if NCCL_VERSION_CODE >= 21000 - ops::CAllGatherOpCUDAKernel, + plat::bfloat16, #endif - ops::CAllGatherOpCUDAKernel, - ops::CAllGatherOpCUDAKernel, - ops::CAllGatherOpCUDAKernel, - ops::CAllGatherOpCUDAKernel, - ops::CAllGatherOpCUDAKernel, - ops::CAllGatherOpCUDAKernel); + int, + uint8_t, + int8_t, + int64_t, + bool, + plat::float16) { +} diff --git a/paddle/fluid/operators/collective/c_allgather_op.h b/paddle/fluid/operators/collective/c_allgather_op.h index 198ec4009f4d37..e896f96ead5329 100644 --- a/paddle/fluid/operators/collective/c_allgather_op.h +++ b/paddle/fluid/operators/collective/c_allgather_op.h @@ -32,7 +32,7 @@ limitations under the License. */ namespace paddle { namespace operators { -template +template class CAllGatherOpCPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { diff --git a/paddle/fluid/operators/collective/c_allreduce_max_op.cc b/paddle/fluid/operators/collective/c_allreduce_max_op.cc index d339a51368a1af..c47bf7025e1fd0 100644 --- a/paddle/fluid/operators/collective/c_allreduce_max_op.cc +++ b/paddle/fluid/operators/collective/c_allreduce_max_op.cc @@ -35,6 +35,8 @@ class CAllReduceMaxOpMaker : public CAllReduceOpMaker { DECLARE_INPLACE_OP_INFERER(AllreduceMaxInplaceInferer, {"X", "Out"}); +DEFINE_C_ALLREDUCE_CPU_KERNEL(CAllReduceMax, kRedMax) + } // namespace operators } // namespace paddle @@ -45,10 +47,12 @@ REGISTER_OP_WITHOUT_GRADIENT(c_allreduce_max, ops::CAllReduceOp, ops::CAllReduceMaxOpMaker, ops::AllreduceMaxInplaceInferer) - -REGISTER_OP_CPU_KERNEL(c_allreduce_max, - ops::CAllReduceOpCPUKernel, - ops::CAllReduceOpCPUKernel, - ops::CAllReduceOpCPUKernel, - ops::CAllReduceOpCPUKernel, - ops::CAllReduceOpCPUKernel); +PD_REGISTER_STRUCT_KERNEL(c_allreduce_max, + CPU, + ALL_LAYOUT, + ops::CAllReduceMaxCPUKernel, + float, + double, + int, + int64_t, + plat::float16) {} diff --git a/paddle/fluid/operators/collective/c_allreduce_max_op.cu.cc b/paddle/fluid/operators/collective/c_allreduce_max_op.cu.cc index fb073c8173d30a..6ac228ca53dd51 100644 --- a/paddle/fluid/operators/collective/c_allreduce_max_op.cu.cc +++ b/paddle/fluid/operators/collective/c_allreduce_max_op.cu.cc @@ -14,13 +14,21 @@ limitations under the License. */ #include "paddle/fluid/operators/collective/c_allreduce_op.h" +namespace paddle { +namespace operators { +DEFINE_C_ALLREDUCE_CUDA_KERNEL(CAllReduceMax, kRedMax) +} // namespace operators +} // namespace paddle + namespace ops = paddle::operators; namespace plat = paddle::platform; -REGISTER_OP_CUDA_KERNEL( - c_allreduce_max, - ops::CAllReduceOpCUDAKernel, - ops::CAllReduceOpCUDAKernel, - ops::CAllReduceOpCUDAKernel, - ops::CAllReduceOpCUDAKernel, - ops::CAllReduceOpCUDAKernel) +PD_REGISTER_STRUCT_KERNEL(c_allreduce_max, + GPU, + ALL_LAYOUT, + ops::CAllReduceMaxCUDAKernel, + float, + double, + int, + int64_t, + plat::float16) {} diff --git a/paddle/fluid/operators/collective/c_allreduce_min_op.cc b/paddle/fluid/operators/collective/c_allreduce_min_op.cc index 8c1dd0172a808a..c21337a27202e1 100644 --- a/paddle/fluid/operators/collective/c_allreduce_min_op.cc +++ b/paddle/fluid/operators/collective/c_allreduce_min_op.cc @@ -35,6 +35,8 @@ class CAllReduceMinOpMaker : public CAllReduceOpMaker { DECLARE_INPLACE_OP_INFERER(AllreduceMinInplaceInferer, {"X", "Out"}); +DEFINE_C_ALLREDUCE_CPU_KERNEL(CAllReduceMin, kRedMin) + } // namespace operators } // namespace paddle @@ -46,9 +48,12 @@ REGISTER_OP_WITHOUT_GRADIENT(c_allreduce_min, ops::CAllReduceMinOpMaker, ops::AllreduceMinInplaceInferer) -REGISTER_OP_CPU_KERNEL(c_allreduce_min, - ops::CAllReduceOpCPUKernel, - ops::CAllReduceOpCPUKernel, - ops::CAllReduceOpCPUKernel, - ops::CAllReduceOpCPUKernel, - ops::CAllReduceOpCPUKernel); +PD_REGISTER_STRUCT_KERNEL(c_allreduce_min, + CPU, + ALL_LAYOUT, + ops::CAllReduceMinCPUKernel, + float, + double, + int, + int64_t, + plat::float16) {} diff --git a/paddle/fluid/operators/collective/c_allreduce_min_op.cu.cc b/paddle/fluid/operators/collective/c_allreduce_min_op.cu.cc index c80a224f460dc9..4475abdef281b7 100644 --- a/paddle/fluid/operators/collective/c_allreduce_min_op.cu.cc +++ b/paddle/fluid/operators/collective/c_allreduce_min_op.cu.cc @@ -14,13 +14,21 @@ limitations under the License. */ #include "paddle/fluid/operators/collective/c_allreduce_op.h" +namespace paddle { +namespace operators { +DEFINE_C_ALLREDUCE_CUDA_KERNEL(CAllReduceMin, kRedMin) +} // namespace operators +} // namespace paddle + namespace ops = paddle::operators; namespace plat = paddle::platform; -REGISTER_OP_CUDA_KERNEL( - c_allreduce_min, - ops::CAllReduceOpCUDAKernel, - ops::CAllReduceOpCUDAKernel, - ops::CAllReduceOpCUDAKernel, - ops::CAllReduceOpCUDAKernel, - ops::CAllReduceOpCUDAKernel) +PD_REGISTER_STRUCT_KERNEL(c_allreduce_min, + GPU, + ALL_LAYOUT, + ops::CAllReduceMinCUDAKernel, + float, + double, + int, + int64_t, + plat::float16) {} diff --git a/paddle/fluid/operators/collective/c_allreduce_op.h b/paddle/fluid/operators/collective/c_allreduce_op.h index f63c4a9abcdc6e..e292864a8e7748 100644 --- a/paddle/fluid/operators/collective/c_allreduce_op.h +++ b/paddle/fluid/operators/collective/c_allreduce_op.h @@ -148,6 +148,10 @@ class CAllReduceOpCPUKernel : public framework::OpKernel { } }; +#define DEFINE_C_ALLREDUCE_CPU_KERNEL(op_name, red_type) \ + template \ + class op_name##CPUKernel : public CAllReduceOpCPUKernel {}; + #if defined(PADDLE_WITH_ASCEND_CL) // return true if found_nan or return false; inline bool ContainsNan(const paddle::platform::NPUDeviceContext& dev_ctx, @@ -527,6 +531,10 @@ class CAllReduceOpCUDAKernel : public framework::OpKernel { } }; +#define DEFINE_C_ALLREDUCE_CUDA_KERNEL(op_name, red_type) \ + template \ + class op_name##CUDAKernel : public CAllReduceOpCUDAKernel {}; + template class CAllReduceOpMLUKernel : public framework::OpKernel { public: diff --git a/paddle/fluid/operators/collective/c_allreduce_prod_op.cc b/paddle/fluid/operators/collective/c_allreduce_prod_op.cc index 1d07b8226ab86d..ee40f29d789e15 100644 --- a/paddle/fluid/operators/collective/c_allreduce_prod_op.cc +++ b/paddle/fluid/operators/collective/c_allreduce_prod_op.cc @@ -35,6 +35,8 @@ class CAllReduceProdOpMaker : public CAllReduceOpMaker { DECLARE_INPLACE_OP_INFERER(AllreduceProdInplaceInferer, {"X", "Out"}); +DEFINE_C_ALLREDUCE_CPU_KERNEL(CAllReduceProd, kRedProd) + } // namespace operators } // namespace paddle @@ -46,9 +48,12 @@ REGISTER_OP_WITHOUT_GRADIENT(c_allreduce_prod, ops::CAllReduceProdOpMaker, ops::AllreduceProdInplaceInferer) -REGISTER_OP_CPU_KERNEL(c_allreduce_prod, - ops::CAllReduceOpCPUKernel, - ops::CAllReduceOpCPUKernel, - ops::CAllReduceOpCPUKernel, - ops::CAllReduceOpCPUKernel, - ops::CAllReduceOpCPUKernel) +PD_REGISTER_STRUCT_KERNEL(c_allreduce_prod, + CPU, + ALL_LAYOUT, + ops::CAllReduceProdCPUKernel, + float, + double, + int, + int64_t, + plat::float16) {} diff --git a/paddle/fluid/operators/collective/c_allreduce_prod_op.cu.cc b/paddle/fluid/operators/collective/c_allreduce_prod_op.cu.cc index 1ca4b6ecc18952..c63a1d2182678b 100644 --- a/paddle/fluid/operators/collective/c_allreduce_prod_op.cu.cc +++ b/paddle/fluid/operators/collective/c_allreduce_prod_op.cu.cc @@ -14,13 +14,21 @@ limitations under the License. */ #include "paddle/fluid/operators/collective/c_allreduce_op.h" +namespace paddle { +namespace operators { +DEFINE_C_ALLREDUCE_CUDA_KERNEL(CAllReduceProd, kRedProd) +} // namespace operators +} // namespace paddle + namespace ops = paddle::operators; namespace plat = paddle::platform; -REGISTER_OP_CUDA_KERNEL( - c_allreduce_prod, - ops::CAllReduceOpCUDAKernel, - ops::CAllReduceOpCUDAKernel, - ops::CAllReduceOpCUDAKernel, - ops::CAllReduceOpCUDAKernel, - ops::CAllReduceOpCUDAKernel) +PD_REGISTER_STRUCT_KERNEL(c_allreduce_prod, + GPU, + ALL_LAYOUT, + ops::CAllReduceProdCUDAKernel, + float, + double, + int, + int64_t, + plat::float16) {} diff --git a/paddle/fluid/operators/collective/c_allreduce_sum_op.cc b/paddle/fluid/operators/collective/c_allreduce_sum_op.cc index 1fd77d3ab96d6c..79e70757fbcfdd 100644 --- a/paddle/fluid/operators/collective/c_allreduce_sum_op.cc +++ b/paddle/fluid/operators/collective/c_allreduce_sum_op.cc @@ -56,6 +56,8 @@ class CAllReduceSumOpMaker : public CAllReduceOpMaker { DECLARE_INPLACE_OP_INFERER(AllreduceSumInplaceInferer, {"X", "Out"}); +DEFINE_C_ALLREDUCE_CPU_KERNEL(CAllReduceSum, kRedSum) + } // namespace operators } // namespace paddle @@ -67,9 +69,12 @@ REGISTER_OP_WITHOUT_GRADIENT(c_allreduce_sum, ops::CAllReduceSumOpMaker, ops::AllreduceSumInplaceInferer) -REGISTER_OP_CPU_KERNEL(c_allreduce_sum, - ops::CAllReduceOpCPUKernel, - ops::CAllReduceOpCPUKernel, - ops::CAllReduceOpCPUKernel, - ops::CAllReduceOpCPUKernel, - ops::CAllReduceOpCPUKernel) +PD_REGISTER_STRUCT_KERNEL(c_allreduce_sum, + CPU, + ALL_LAYOUT, + ops::CAllReduceSumCPUKernel, + float, + double, + int, + int64_t, + plat::float16) {} diff --git a/paddle/fluid/operators/collective/c_allreduce_sum_op.cu.cc b/paddle/fluid/operators/collective/c_allreduce_sum_op.cu.cc index 5e00c3bdba3fdc..8e45b7e43b2ed1 100644 --- a/paddle/fluid/operators/collective/c_allreduce_sum_op.cu.cc +++ b/paddle/fluid/operators/collective/c_allreduce_sum_op.cu.cc @@ -14,16 +14,25 @@ limitations under the License. */ #include "paddle/fluid/operators/collective/c_allreduce_op.h" +namespace paddle { +namespace operators { +DEFINE_C_ALLREDUCE_CUDA_KERNEL(CAllReduceSum, kRedSum) +} // namespace operators +} // namespace paddle + namespace ops = paddle::operators; namespace plat = paddle::platform; -REGISTER_OP_CUDA_KERNEL( - c_allreduce_sum, - ops::CAllReduceOpCUDAKernel, +PD_REGISTER_STRUCT_KERNEL(c_allreduce_sum, + GPU, + ALL_LAYOUT, + ops::CAllReduceSumCUDAKernel, + float, #if NCCL_VERSION_CODE >= 21000 - ops::CAllReduceOpCUDAKernel, + plat::bfloat16, #endif - ops::CAllReduceOpCUDAKernel, - ops::CAllReduceOpCUDAKernel, - ops::CAllReduceOpCUDAKernel, - ops::CAllReduceOpCUDAKernel) + double, + int, + int64_t, + plat::float16) { +} diff --git a/paddle/fluid/operators/detection/bipartite_match_op.cc b/paddle/fluid/operators/detection/bipartite_match_op.cc index 8bf542e17caed0..29b732536437d2 100644 --- a/paddle/fluid/operators/detection/bipartite_match_op.cc +++ b/paddle/fluid/operators/detection/bipartite_match_op.cc @@ -64,7 +64,7 @@ bool DistPairDescend(std::tuple pair1, return std::get<2>(pair1) > std::get<2>(pair2); } -template +template class BipartiteMatchKernel : public framework::OpKernel { public: // The match_indices must be initialized to -1 at first. @@ -318,6 +318,10 @@ REGISTER_OPERATOR( ops::BipartiteMatchOpMaker, paddle::framework::EmptyGradOpMaker, paddle::framework::EmptyGradOpMaker); -REGISTER_OP_CPU_KERNEL(bipartite_match, - ops::BipartiteMatchKernel, - ops::BipartiteMatchKernel); + +PD_REGISTER_STRUCT_KERNEL(bipartite_match, + CPU, + ALL_LAYOUT, + ops::BipartiteMatchKernel, + float, + double) {} diff --git a/paddle/fluid/operators/detection/box_clip_op.cc b/paddle/fluid/operators/detection/box_clip_op.cc index 46363ceb5355ea..5af100b8f64071 100644 --- a/paddle/fluid/operators/detection/box_clip_op.cc +++ b/paddle/fluid/operators/detection/box_clip_op.cc @@ -104,6 +104,6 @@ REGISTER_OPERATOR( ops::BoxClipOpMaker, paddle::framework::EmptyGradOpMaker, paddle::framework::EmptyGradOpMaker); -REGISTER_OP_CPU_KERNEL(box_clip, - ops::BoxClipKernel, - ops::BoxClipKernel); + +PD_REGISTER_STRUCT_KERNEL( + box_clip, CPU, ALL_LAYOUT, ops::BoxClipKernel, float, double) {} diff --git a/paddle/fluid/operators/detection/box_clip_op.cu b/paddle/fluid/operators/detection/box_clip_op.cu index 4a1558de906b3c..ef0f2439c5ee1c 100644 --- a/paddle/fluid/operators/detection/box_clip_op.cu +++ b/paddle/fluid/operators/detection/box_clip_op.cu @@ -44,7 +44,7 @@ static __global__ void GPUBoxClip(const T *input, } } -template +template class GPUBoxClipKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &context) const override { @@ -74,6 +74,6 @@ class GPUBoxClipKernel : public framework::OpKernel { } // namespace paddle namespace ops = paddle::operators; -REGISTER_OP_CUDA_KERNEL(box_clip, - ops::GPUBoxClipKernel, - ops::GPUBoxClipKernel); + +PD_REGISTER_STRUCT_KERNEL( + box_clip, GPU, ALL_LAYOUT, ops::GPUBoxClipKernel, float, double) {} diff --git a/paddle/fluid/operators/detection/box_clip_op.h b/paddle/fluid/operators/detection/box_clip_op.h index cb067f91662ed0..c07185dec167c5 100644 --- a/paddle/fluid/operators/detection/box_clip_op.h +++ b/paddle/fluid/operators/detection/box_clip_op.h @@ -19,7 +19,7 @@ limitations under the License. */ namespace paddle { namespace operators { -template +template class BoxClipKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { diff --git a/paddle/fluid/operators/detection/box_decoder_and_assign_op.cc b/paddle/fluid/operators/detection/box_decoder_and_assign_op.cc index 7de0f05e0d3ba4..8c97523559de64 100644 --- a/paddle/fluid/operators/detection/box_decoder_and_assign_op.cc +++ b/paddle/fluid/operators/detection/box_decoder_and_assign_op.cc @@ -225,6 +225,9 @@ REGISTER_OPERATOR( paddle::framework::EmptyGradOpMaker, paddle::framework::EmptyGradOpMaker); -REGISTER_OP_CPU_KERNEL(box_decoder_and_assign, - ops::BoxDecoderAndAssignKernel, - ops::BoxDecoderAndAssignKernel); +PD_REGISTER_STRUCT_KERNEL(box_decoder_and_assign, + CPU, + ALL_LAYOUT, + ops::BoxDecoderAndAssignKernel, + float, + double) {} diff --git a/paddle/fluid/operators/detection/box_decoder_and_assign_op.cu b/paddle/fluid/operators/detection/box_decoder_and_assign_op.cu index daae995de0d5ea..a956a58ac75f7c 100644 --- a/paddle/fluid/operators/detection/box_decoder_and_assign_op.cu +++ b/paddle/fluid/operators/detection/box_decoder_and_assign_op.cu @@ -95,7 +95,7 @@ __global__ void AssignBoxKernel(const T* prior_box_data, } } -template +template class BoxDecoderAndAssignCUDAKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { @@ -150,7 +150,10 @@ class BoxDecoderAndAssignCUDAKernel : public framework::OpKernel { } // namespace paddle namespace ops = paddle::operators; -REGISTER_OP_CUDA_KERNEL( - box_decoder_and_assign, - ops::BoxDecoderAndAssignCUDAKernel, - ops::BoxDecoderAndAssignCUDAKernel); + +PD_REGISTER_STRUCT_KERNEL(box_decoder_and_assign, + GPU, + ALL_LAYOUT, + ops::BoxDecoderAndAssignCUDAKernel, + float, + double) {} diff --git a/paddle/fluid/operators/detection/box_decoder_and_assign_op.h b/paddle/fluid/operators/detection/box_decoder_and_assign_op.h index 31f03a76ba2451..5a191ffaf44746 100644 --- a/paddle/fluid/operators/detection/box_decoder_and_assign_op.h +++ b/paddle/fluid/operators/detection/box_decoder_and_assign_op.h @@ -20,7 +20,7 @@ limitations under the License. */ namespace paddle { namespace operators { -template +template class BoxDecoderAndAssignKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { diff --git a/python/paddle/fluid/tests/unittests/test_bipartite_match_op.py b/python/paddle/fluid/tests/unittests/test_bipartite_match_op.py index 5dc8f8f5d691c0..b42750d84b9a9b 100644 --- a/python/paddle/fluid/tests/unittests/test_bipartite_match_op.py +++ b/python/paddle/fluid/tests/unittests/test_bipartite_match_op.py @@ -105,7 +105,7 @@ def setUp(self): } def test_check_output(self): - self.check_output() + self.check_output(check_dygraph=False) class TestBipartiteMatchOpWithoutLoD(OpTest): @@ -122,7 +122,7 @@ def setUp(self): } def test_check_output(self): - self.check_output() + self.check_output(check_dygraph=False) class TestBipartiteMatchOpWithoutLoDLargeScaleInput(OpTest): @@ -139,7 +139,7 @@ def setUp(self): } def test_check_output(self): - self.check_output() + self.check_output(check_dygraph=False) class TestBipartiteMatchOpWithPerPredictionType(OpTest): @@ -162,7 +162,7 @@ def setUp(self): } def test_check_output(self): - self.check_output() + self.check_output(check_dygraph=False) class TestBipartiteMatchOpWithEmptyLoD(OpTest): @@ -179,7 +179,7 @@ def setUp(self): } def test_check_output(self): - self.check_output() + self.check_output(check_dygraph=False) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/test_box_clip_op.py b/python/paddle/fluid/tests/unittests/test_box_clip_op.py index b785a800458ddd..1f7f3e9672541a 100644 --- a/python/paddle/fluid/tests/unittests/test_box_clip_op.py +++ b/python/paddle/fluid/tests/unittests/test_box_clip_op.py @@ -52,7 +52,7 @@ def batch_box_clip(input_boxes, im_info, lod): class TestBoxClipOp(OpTest): def test_check_output(self): - self.check_output() + self.check_output(check_dygraph=False) def setUp(self): self.op_type = "box_clip"