Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

register fluid kerenls to phi [part 6] #52500

Closed
wants to merge 10 commits into from
Closed
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions paddle/fluid/operators/amp/get_float_status_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ class GetFloatStatusMaker : public framework::OpProtoAndCheckerMaker {
}
};

template <typename DeviceContext, typename T>
template <typename T, typename DeviceContext>
class GetFloatStatusKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
Expand All @@ -75,4 +75,5 @@ REGISTER_OPERATOR(
paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>,
paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>);

REGISTER_OP_CPU_KERNEL(get_float_status, ops::GetFloatStatusKernel<CPU, float>);
PD_REGISTER_STRUCT_KERNEL(
get_float_status, CPU, ALL_LAYOUT, ops::GetFloatStatusKernel, float) {}
15 changes: 9 additions & 6 deletions paddle/fluid/operators/collective/global_gather_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -111,9 +111,12 @@ REGISTER_OPERATOR(global_gather,
ops::GlobalGatherOpGradMaker<paddle::framework::OpDesc>,
ops::GlobalGatherOpGradMaker<paddle::imperative::OpBase>)

REGISTER_OP_CPU_KERNEL(global_gather,
ops::GlobalGatherOpCPUKernel<float>,
ops::GlobalGatherOpCPUKernel<double>,
ops::GlobalGatherOpCPUKernel<int>,
ops::GlobalGatherOpCPUKernel<int64_t>,
ops::GlobalGatherOpCPUKernel<plat::float16>);
PD_REGISTER_STRUCT_KERNEL(global_gather,
CPU,
ALL_LAYOUT,
ops::GlobalGatherOpCPUKernel,
float,
double,
int,
int64_t,
plat::float16) {}
17 changes: 10 additions & 7 deletions paddle/fluid/operators/collective/global_gather_op.cu.cc
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,7 @@ struct GlobalGatherProcessGroupFunctor<phi::GPUContext, T> {
}
};

template <typename T>
template <typename T, typename DeivceContext>
class GlobalGatherOpCUDAKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
Expand All @@ -283,9 +283,12 @@ class GlobalGatherOpCUDAKernel : public framework::OpKernel<T> {
namespace ops = paddle::operators;
namespace plat = paddle::platform;

REGISTER_OP_CUDA_KERNEL(global_gather,
ops::GlobalGatherOpCUDAKernel<float>,
ops::GlobalGatherOpCUDAKernel<double>,
ops::GlobalGatherOpCUDAKernel<int>,
ops::GlobalGatherOpCUDAKernel<int64_t>,
ops::GlobalGatherOpCUDAKernel<plat::float16>);
PD_REGISTER_STRUCT_KERNEL(global_gather,
GPU,
ALL_LAYOUT,
ops::GlobalGatherOpCUDAKernel,
float,
double,
int,
int64_t,
plat::float16) {}
2 changes: 1 addition & 1 deletion paddle/fluid/operators/collective/global_gather_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ limitations under the License. */
namespace paddle {
namespace operators {

template <typename T>
template <typename T, typename DeviceContext>
class GlobalGatherOpCPUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
Expand Down
15 changes: 9 additions & 6 deletions paddle/fluid/operators/collective/global_scatter_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -115,9 +115,12 @@ REGISTER_OPERATOR(global_scatter,
ops::GlobalScatterOpGradMaker<paddle::framework::OpDesc>,
ops::GlobalScatterOpGradMaker<paddle::imperative::OpBase>)

REGISTER_OP_CPU_KERNEL(global_scatter,
ops::GlobalScatterOpCPUKernel<float>,
ops::GlobalScatterOpCPUKernel<double>,
ops::GlobalScatterOpCPUKernel<int>,
ops::GlobalScatterOpCPUKernel<int64_t>,
ops::GlobalScatterOpCPUKernel<plat::float16>);
PD_REGISTER_STRUCT_KERNEL(global_scatter,
CPU,
ALL_LAYOUT,
ops::GlobalScatterOpCPUKernel,
float,
double,
int,
int64_t,
plat::float16) {}
17 changes: 10 additions & 7 deletions paddle/fluid/operators/collective/global_scatter_op.cu.cc
Original file line number Diff line number Diff line change
Expand Up @@ -259,7 +259,7 @@ struct GlobalScatterProcessGroupFunctor<phi::GPUContext, T> {
}
};

template <typename T>
template <typename T, typename DeviceContext>
class GlobalScatterOpCUDAKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
Expand All @@ -281,9 +281,12 @@ class GlobalScatterOpCUDAKernel : public framework::OpKernel<T> {
namespace ops = paddle::operators;
namespace plat = paddle::platform;

REGISTER_OP_CUDA_KERNEL(global_scatter,
ops::GlobalScatterOpCUDAKernel<float>,
ops::GlobalScatterOpCUDAKernel<double>,
ops::GlobalScatterOpCUDAKernel<int>,
ops::GlobalScatterOpCUDAKernel<int64_t>,
ops::GlobalScatterOpCUDAKernel<plat::float16>);
PD_REGISTER_STRUCT_KERNEL(global_scatter,
GPU,
ALL_LAYOUT,
ops::GlobalScatterOpCUDAKernel,
float,
double,
int,
int64_t,
plat::float16) {}
2 changes: 1 addition & 1 deletion paddle/fluid/operators/collective/global_scatter_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ limitations under the License. */
namespace paddle {
namespace operators {

template <typename T>
template <typename T, typename DeviceContext>
class GlobalScatterOpCPUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
Expand Down
17 changes: 11 additions & 6 deletions paddle/fluid/operators/collective/mp_allreduce_sum_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,8 @@ class MpAllReduceSumOpGradMaker : public framework::SingleGradOpMaker<T> {

DECLARE_INPLACE_OP_INFERER(MpAllReduceSumInplaceInferer, {"X", "Out"});

DEFINE_C_ALLREDUCE_CPU_KERNEL(MpAllReduceSum, kRedSum);

} // namespace operators
} // namespace paddle

Expand All @@ -89,9 +91,12 @@ REGISTER_OPERATOR(mp_allreduce_sum,
ops::MpAllReduceSumOpMaker,
ops::MpAllReduceSumInplaceInferer);

REGISTER_OP_CPU_KERNEL(mp_allreduce_sum,
ops::CAllReduceOpCPUKernel<ops::kRedSum, float>,
ops::CAllReduceOpCPUKernel<ops::kRedSum, double>,
ops::CAllReduceOpCPUKernel<ops::kRedSum, int>,
ops::CAllReduceOpCPUKernel<ops::kRedSum, int64_t>,
ops::CAllReduceOpCPUKernel<ops::kRedSum, plat::float16>)
PD_REGISTER_STRUCT_KERNEL(mp_allreduce_sum,
CPU,
ALL_LAYOUT,
ops::MpAllReduceSumCPUKernel,
float,
double,
int,
int64_t,
plat::float16) {}
26 changes: 17 additions & 9 deletions paddle/fluid/operators/collective/mp_allreduce_sum_op.cu.cc
Original file line number Diff line number Diff line change
Expand Up @@ -15,16 +15,24 @@
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/collective/c_allreduce_op.h"

namespace paddle {
namespace operators {
DEFINE_C_ALLREDUCE_CUDA_KERNEL(MpAllReduceSum, kRedSum)
} // namespace operators
} // namespace paddle

namespace ops = paddle::operators;
namespace plat = paddle::platform;

REGISTER_OP_CUDA_KERNEL(
mp_allreduce_sum,
ops::CAllReduceOpCUDAKernel<ops::kRedSum, float>,
PD_REGISTER_STRUCT_KERNEL(mp_allreduce_sum,
GPU,
ALL_LAYOUT,
ops::MpAllReduceSumCUDAKernel,
float,
double,
int,
int64_t,
#if NCCL_VERSION_CODE >= 21000
ops::CAllReduceOpCUDAKernel<ops::kRedSum, plat::bfloat16>,
plat::bfloat16,
#endif
ops::CAllReduceOpCUDAKernel<ops::kRedSum, double>,
ops::CAllReduceOpCUDAKernel<ops::kRedSum, int>,
ops::CAllReduceOpCUDAKernel<ops::kRedSum, int64_t>,
ops::CAllReduceOpCUDAKernel<ops::kRedSum, plat::float16>)
plat::float16) {
}
10 changes: 5 additions & 5 deletions paddle/fluid/operators/custom_device_common_op_registry.cc
Original file line number Diff line number Diff line change
Expand Up @@ -78,15 +78,15 @@ void RegisterCustomDeviceCommonKernel(const std::string& dev_type) {
load_combine,
device_type,
paddle::operators::
LoadCombineOpKernel<paddle::platform::CustomDeviceContext, float>,
LoadCombineOpKernel<float, paddle::platform::CustomDeviceContext>,
paddle::operators::
LoadCombineOpKernel<paddle::platform::CustomDeviceContext, double>,
LoadCombineOpKernel<double, paddle::platform::CustomDeviceContext>,
paddle::operators::
LoadCombineOpKernel<paddle::platform::CustomDeviceContext, int>,
LoadCombineOpKernel<int, paddle::platform::CustomDeviceContext>,
paddle::operators::
LoadCombineOpKernel<paddle::platform::CustomDeviceContext, int8_t>,
LoadCombineOpKernel<int8_t, paddle::platform::CustomDeviceContext>,
paddle::operators::
LoadCombineOpKernel<paddle::platform::CustomDeviceContext, int64_t>);
LoadCombineOpKernel<int64_t, paddle::platform::CustomDeviceContext>);
#endif
}

Expand Down
10 changes: 7 additions & 3 deletions paddle/fluid/operators/detection/generate_mask_labels_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -328,7 +328,7 @@ std::vector<phi::DenseTensor> SampleMaskForOneImage(
return res;
}

template <typename T>
template <typename T, typename DeviceContext>
class GenerateMaskLabelsKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
Expand Down Expand Up @@ -533,5 +533,9 @@ REGISTER_OPERATOR(
ops::GenerateMaskLabelsOpMaker,
paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>,
paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>);
REGISTER_OP_CPU_KERNEL(generate_mask_labels,
ops::GenerateMaskLabelsKernel<float>);

PD_REGISTER_STRUCT_KERNEL(generate_mask_labels,
CPU,
ALL_LAYOUT,
ops::GenerateMaskLabelsKernel,
float) {}
11 changes: 7 additions & 4 deletions paddle/fluid/operators/detection/generate_proposal_labels_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -510,7 +510,7 @@ std::vector<phi::DenseTensor> SampleRoisForOneImage(
return res;
}

template <typename T>
template <typename T, typename DeviceContext>
class GenerateProposalLabelsKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& context) const override {
Expand Down Expand Up @@ -811,9 +811,12 @@ REGISTER_OPERATOR(
ops::GenerateProposalLabelsOpMaker,
paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>,
paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>);
REGISTER_OP_CPU_KERNEL(generate_proposal_labels,
ops::GenerateProposalLabelsKernel<float>,
ops::GenerateProposalLabelsKernel<double>);
PD_REGISTER_STRUCT_KERNEL(generate_proposal_labels,
CPU,
ALL_LAYOUT,
ops::GenerateProposalLabelsKernel,
float,
double) {}

REGISTER_OP_VERSION(generate_proposal_labels)
.AddCheckpoint(
Expand Down
5 changes: 2 additions & 3 deletions paddle/fluid/operators/detection/iou_similarity_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,5 @@ REGISTER_OPERATOR(
paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>,
paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>);

REGISTER_OP_CPU_KERNEL(iou_similarity,
ops::IOUSimilarityKernel<phi::CPUContext, float>,
ops::IOUSimilarityKernel<phi::CPUContext, double>);
PD_REGISTER_STRUCT_KERNEL(
iou_similarity, CPU, ALL_LAYOUT, ops::IOUSimilarityKernel, float, double) {}
5 changes: 2 additions & 3 deletions paddle/fluid/operators/detection/iou_similarity_op.cu
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,5 @@ limitations under the License. */
#include "paddle/fluid/operators/detection/iou_similarity_op.h"

namespace ops = paddle::operators;
REGISTER_OP_CUDA_KERNEL(iou_similarity,
ops::IOUSimilarityKernel<phi::GPUContext, float>,
ops::IOUSimilarityKernel<phi::GPUContext, double>);
PD_REGISTER_STRUCT_KERNEL(
iou_similarity, GPU, ALL_LAYOUT, ops::IOUSimilarityKernel, float, double) {}
2 changes: 1 addition & 1 deletion paddle/fluid/operators/detection/iou_similarity_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ struct IOUSimilarityFunctor {
namespace paddle {
namespace operators {

template <typename DeviceContext, typename T>
template <typename T, typename DeviceContext>
class IOUSimilarityKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
Expand Down
11 changes: 7 additions & 4 deletions paddle/fluid/operators/detection/locality_aware_nms_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ void GetMaxScoreIndexWithLocalityAware(
}
}

template <typename T>
template <typename T, typename DeviceContext>
class LocalityAwareNMSKernel : public framework::OpKernel<T> {
public:
void LocalityAwareNMSFast(phi::DenseTensor* bbox,
Expand Down Expand Up @@ -520,6 +520,9 @@ REGISTER_OPERATOR(
ops::LocalityAwareNMSOpMaker,
paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>,
paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>);
REGISTER_OP_CPU_KERNEL(locality_aware_nms,
ops::LocalityAwareNMSKernel<float>,
ops::LocalityAwareNMSKernel<double>);
PD_REGISTER_STRUCT_KERNEL(locality_aware_nms,
CPU,
ALL_LAYOUT,
ops::LocalityAwareNMSKernel,
float,
double) {}
11 changes: 7 additions & 4 deletions paddle/fluid/operators/detection/mine_hard_examples_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ inline MiningType GetMiningType(std::string str) {
}
}

template <typename DeviceContext, typename T>
template <typename T, typename DeviceContext>
class MineHardExamplesKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
Expand Down Expand Up @@ -403,6 +403,9 @@ REGISTER_OPERATOR(
paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>,
paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>);

REGISTER_OP_CPU_KERNEL(mine_hard_examples,
ops::MineHardExamplesKernel<phi::CPUContext, float>,
ops::MineHardExamplesKernel<phi::CPUContext, double>);
PD_REGISTER_STRUCT_KERNEL(mine_hard_examples,
CPU,
ALL_LAYOUT,
ops::MineHardExamplesKernel,
float,
double) {}
20 changes: 13 additions & 7 deletions paddle/fluid/operators/detection/multiclass_nms_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ void SliceOneClass(const platform::DeviceContext& ctx,
}
}

template <typename T>
template <typename T, typename DeviceContext>
class MultiClassNMSKernel : public framework::OpKernel<T> {
public:
void NMSFast(const phi::DenseTensor& bbox,
Expand Down Expand Up @@ -629,6 +629,9 @@ class MultiClassNMS3OpMaker : public MultiClassNMS2OpMaker {
}
};

template <typename T, typename DeviceContext>
class MultiClassNMS2Kernel : public MultiClassNMSKernel<T, DeviceContext> {};

} // namespace operators
} // namespace paddle

Expand All @@ -643,18 +646,21 @@ REGISTER_OPERATOR(
ops::MultiClassNMSOpMaker,
paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>,
paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>);
REGISTER_OP_CPU_KERNEL(multiclass_nms,
ops::MultiClassNMSKernel<float>,
ops::MultiClassNMSKernel<double>);
PD_REGISTER_STRUCT_KERNEL(
multiclass_nms, CPU, ALL_LAYOUT, ops::MultiClassNMSKernel, float, double) {}

REGISTER_OPERATOR(
multiclass_nms2,
ops::MultiClassNMS2Op,
ops::MultiClassNMS2OpMaker,
paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>,
paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>);
REGISTER_OP_CPU_KERNEL(multiclass_nms2,
ops::MultiClassNMSKernel<float>,
ops::MultiClassNMSKernel<double>);
PD_REGISTER_STRUCT_KERNEL(multiclass_nms2,
CPU,
ALL_LAYOUT,
ops::MultiClassNMS2Kernel,
float,
double) {}

REGISTER_OPERATOR(
multiclass_nms3,
Expand Down
Loading