update (#52878)

PaddlePaddle · Apr 14, 2023 · e93e8a3 · e93e8a3
1 parent aac8da9
commit e93e8a3
Show file tree

Hide file tree

Showing 19 changed files with 101 additions and 1,288 deletions.
diff --git a/paddle/fluid/operators/amp/get_float_status_op.cc b/paddle/fluid/operators/amp/get_float_status_op.cc
@@ -53,7 +53,7 @@ class GetFloatStatusMaker : public framework::OpProtoAndCheckerMaker {
   }
 };
 
-template <typename DeviceContext, typename T>
+template <typename T, typename DeviceContext>
 class GetFloatStatusKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& ctx) const override {
@@ -75,4 +75,5 @@ REGISTER_OPERATOR(
     paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>,
     paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>);
 
-REGISTER_OP_CPU_KERNEL(get_float_status, ops::GetFloatStatusKernel<CPU, float>);
+PD_REGISTER_STRUCT_KERNEL(
+    get_float_status, CPU, ALL_LAYOUT, ops::GetFloatStatusKernel, float) {}
diff --git a/paddle/fluid/operators/collective/global_gather_op.cc b/paddle/fluid/operators/collective/global_gather_op.cc
@@ -111,9 +111,12 @@ REGISTER_OPERATOR(global_gather,
                   ops::GlobalGatherOpGradMaker<paddle::framework::OpDesc>,
                   ops::GlobalGatherOpGradMaker<paddle::imperative::OpBase>)
 
-REGISTER_OP_CPU_KERNEL(global_gather,
-                       ops::GlobalGatherOpCPUKernel<float>,
-                       ops::GlobalGatherOpCPUKernel<double>,
-                       ops::GlobalGatherOpCPUKernel<int>,
-                       ops::GlobalGatherOpCPUKernel<int64_t>,
-                       ops::GlobalGatherOpCPUKernel<plat::float16>);
+PD_REGISTER_STRUCT_KERNEL(global_gather,
+                          CPU,
+                          ALL_LAYOUT,
+                          ops::GlobalGatherOpCPUKernel,
+                          float,
+                          double,
+                          int,
+                          int64_t,
+                          plat::float16) {}
diff --git a/paddle/fluid/operators/collective/global_gather_op.cu.cc b/paddle/fluid/operators/collective/global_gather_op.cu.cc
@@ -261,7 +261,7 @@ struct GlobalGatherProcessGroupFunctor<phi::GPUContext, T> {
   }
 };
 
-template <typename T>
+template <typename T, typename DeivceContext>
 class GlobalGatherOpCUDAKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& ctx) const override {
@@ -283,9 +283,12 @@ class GlobalGatherOpCUDAKernel : public framework::OpKernel<T> {
 namespace ops = paddle::operators;
 namespace plat = paddle::platform;
 
-REGISTER_OP_CUDA_KERNEL(global_gather,
-                        ops::GlobalGatherOpCUDAKernel<float>,
-                        ops::GlobalGatherOpCUDAKernel<double>,
-                        ops::GlobalGatherOpCUDAKernel<int>,
-                        ops::GlobalGatherOpCUDAKernel<int64_t>,
-                        ops::GlobalGatherOpCUDAKernel<plat::float16>);
+PD_REGISTER_STRUCT_KERNEL(global_gather,
+                          GPU,
+                          ALL_LAYOUT,
+                          ops::GlobalGatherOpCUDAKernel,
+                          float,
+                          double,
+                          int,
+                          int64_t,
+                          plat::float16) {}
diff --git a/paddle/fluid/operators/collective/global_gather_op.h b/paddle/fluid/operators/collective/global_gather_op.h
@@ -25,7 +25,7 @@ limitations under the License. */
 namespace paddle {
 namespace operators {
 
-template <typename T>
+template <typename T, typename DeviceContext>
 class GlobalGatherOpCPUKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& ctx) const override {

diff --git a/paddle/fluid/operators/collective/global_scatter_op.cc b/paddle/fluid/operators/collective/global_scatter_op.cc
@@ -115,9 +115,12 @@ REGISTER_OPERATOR(global_scatter,
                   ops::GlobalScatterOpGradMaker<paddle::framework::OpDesc>,
                   ops::GlobalScatterOpGradMaker<paddle::imperative::OpBase>)
 
-REGISTER_OP_CPU_KERNEL(global_scatter,
-                       ops::GlobalScatterOpCPUKernel<float>,
-                       ops::GlobalScatterOpCPUKernel<double>,
-                       ops::GlobalScatterOpCPUKernel<int>,
-                       ops::GlobalScatterOpCPUKernel<int64_t>,
-                       ops::GlobalScatterOpCPUKernel<plat::float16>);
+PD_REGISTER_STRUCT_KERNEL(global_scatter,
+                          CPU,
+                          ALL_LAYOUT,
+                          ops::GlobalScatterOpCPUKernel,
+                          float,
+                          double,
+                          int,
+                          int64_t,
+                          plat::float16) {}
diff --git a/paddle/fluid/operators/collective/global_scatter_op.cu.cc b/paddle/fluid/operators/collective/global_scatter_op.cu.cc
@@ -259,7 +259,7 @@ struct GlobalScatterProcessGroupFunctor<phi::GPUContext, T> {
   }
 };
 
-template <typename T>
+template <typename T, typename DeviceContext>
 class GlobalScatterOpCUDAKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& ctx) const override {
@@ -281,9 +281,12 @@ class GlobalScatterOpCUDAKernel : public framework::OpKernel<T> {
 namespace ops = paddle::operators;
 namespace plat = paddle::platform;
 
-REGISTER_OP_CUDA_KERNEL(global_scatter,
-                        ops::GlobalScatterOpCUDAKernel<float>,
-                        ops::GlobalScatterOpCUDAKernel<double>,
-                        ops::GlobalScatterOpCUDAKernel<int>,
-                        ops::GlobalScatterOpCUDAKernel<int64_t>,
-                        ops::GlobalScatterOpCUDAKernel<plat::float16>);
+PD_REGISTER_STRUCT_KERNEL(global_scatter,
+                          GPU,
+                          ALL_LAYOUT,
+                          ops::GlobalScatterOpCUDAKernel,
+                          float,
+                          double,
+                          int,
+                          int64_t,
+                          plat::float16) {}
diff --git a/paddle/fluid/operators/collective/global_scatter_op.h b/paddle/fluid/operators/collective/global_scatter_op.h
@@ -25,7 +25,7 @@ limitations under the License. */
 namespace paddle {
 namespace operators {
 
-template <typename T>
+template <typename T, typename DeviceContext>
 class GlobalScatterOpCPUKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& ctx) const override {

diff --git a/paddle/fluid/operators/detection/generate_mask_labels_op.cc b/paddle/fluid/operators/detection/generate_mask_labels_op.cc
@@ -328,7 +328,7 @@ std::vector<phi::DenseTensor> SampleMaskForOneImage(
   return res;
 }
 
-template <typename T>
+template <typename T, typename DeviceContext>
 class GenerateMaskLabelsKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& ctx) const override {
@@ -533,5 +533,9 @@ REGISTER_OPERATOR(
     ops::GenerateMaskLabelsOpMaker,
     paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>,
     paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>);
-REGISTER_OP_CPU_KERNEL(generate_mask_labels,
-                       ops::GenerateMaskLabelsKernel<float>);
+
+PD_REGISTER_STRUCT_KERNEL(generate_mask_labels,
+                          CPU,
+                          ALL_LAYOUT,
+                          ops::GenerateMaskLabelsKernel,
+                          float) {}
diff --git a/paddle/fluid/operators/detection/generate_proposal_labels_op.cc b/paddle/fluid/operators/detection/generate_proposal_labels_op.cc
@@ -510,7 +510,7 @@ std::vector<phi::DenseTensor> SampleRoisForOneImage(
   return res;
 }
 
-template <typename T>
+template <typename T, typename DeviceContext>
 class GenerateProposalLabelsKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& context) const override {
@@ -811,9 +811,12 @@ REGISTER_OPERATOR(
     ops::GenerateProposalLabelsOpMaker,
     paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>,
     paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>);
-REGISTER_OP_CPU_KERNEL(generate_proposal_labels,
-                       ops::GenerateProposalLabelsKernel<float>,
-                       ops::GenerateProposalLabelsKernel<double>);
+PD_REGISTER_STRUCT_KERNEL(generate_proposal_labels,
+                          CPU,
+                          ALL_LAYOUT,
+                          ops::GenerateProposalLabelsKernel,
+                          float,
+                          double) {}
 
 REGISTER_OP_VERSION(generate_proposal_labels)
     .AddCheckpoint(

diff --git a/paddle/fluid/operators/gaussian_random_batch_size_like_op.cc b/paddle/fluid/operators/gaussian_random_batch_size_like_op.cc
@@ -19,7 +19,7 @@ limitations under the License. */
 namespace paddle {
 namespace operators {
 
-template <typename T>
+template <typename T, typename DeviceContext>
 class CPUGaussianRandomBatchSizeLikeKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& context) const override {
@@ -99,7 +99,10 @@ REGISTER_OPERATOR(
     paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>,
     paddle::operators::BatchSizeLikeNoNeedBufferVarsInferer);
 
-REGISTER_OP_CPU_KERNEL(
-    gaussian_random_batch_size_like,
-    paddle::operators::CPUGaussianRandomBatchSizeLikeKernel<float>,
-    paddle::operators::CPUGaussianRandomBatchSizeLikeKernel<double>);
+namespace ops = paddle::operators;
+PD_REGISTER_STRUCT_KERNEL(gaussian_random_batch_size_like,
+                          CPU,
+                          ALL_LAYOUT,
+                          ops::CPUGaussianRandomBatchSizeLikeKernel,
+                          float,
+                          double) {}
diff --git a/paddle/fluid/operators/gaussian_random_batch_size_like_op.cu b/paddle/fluid/operators/gaussian_random_batch_size_like_op.cu
@@ -47,7 +47,7 @@ struct GaussianGenerator {
   }
 };
 
-template <typename T>
+template <typename T, typename DeviceContext>
 class GPUGaussianRandomBatchSizeLikeKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& context) const override {
@@ -78,9 +78,12 @@ class GPUGaussianRandomBatchSizeLikeKernel : public framework::OpKernel<T> {
 }  // namespace operators
 }  // namespace paddle
 
-REGISTER_OP_CUDA_KERNEL(
-    gaussian_random_batch_size_like,
-    paddle::operators::GPUGaussianRandomBatchSizeLikeKernel<
-        paddle::platform::float16>,
-    paddle::operators::GPUGaussianRandomBatchSizeLikeKernel<float>,
-    paddle::operators::GPUGaussianRandomBatchSizeLikeKernel<double>);
+namespace ops = paddle::operators;
+namespace plat = paddle::platform;
+PD_REGISTER_STRUCT_KERNEL(gaussian_random_batch_size_like,
+                          GPU,
+                          ALL_LAYOUT,
+                          ops::GPUGaussianRandomBatchSizeLikeKernel,
+                          float,
+                          double,
+                          plat::float16) {}
diff --git a/paddle/fluid/operators/graph_khop_sampler_op.cc b/paddle/fluid/operators/graph_khop_sampler_op.cc
@@ -136,6 +136,10 @@ using CPU = phi::CPUContext;
 REGISTER_OPERATOR(graph_khop_sampler,
                   ops::GraphKhopSamplerOP,
                   ops::GraphKhopSamplerOpMaker);
-REGISTER_OP_CPU_KERNEL(graph_khop_sampler,
-                       ops::GraphKhopSamplerOpKernel<CPU, int32_t>,
-                       ops::GraphKhopSamplerOpKernel<CPU, int64_t>);
+
+PD_REGISTER_STRUCT_KERNEL(graph_khop_sampler,
+                          CPU,
+                          ALL_LAYOUT,
+                          ops::GraphKhopSamplerOpKernel,
+                          int32_t,
+                          int64_t) {}
diff --git a/paddle/fluid/operators/graph_khop_sampler_op.cu b/paddle/fluid/operators/graph_khop_sampler_op.cu
@@ -412,7 +412,7 @@ void ReindexFunc(const framework::ExecutionContext& ctx,
                           thrust::raw_pointer_cast(values.data()));
 }
 
-template <typename DeviceContext, typename T>
+template <typename T, typename DeviceContext>
 class GraphKhopSamplerOpCUDAKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& ctx) const override {
@@ -668,6 +668,9 @@ class GraphKhopSamplerOpCUDAKernel : public framework::OpKernel<T> {
 using CUDA = phi::GPUContext;
 namespace ops = paddle::operators;
 
-REGISTER_OP_CUDA_KERNEL(graph_khop_sampler,
-                        ops::GraphKhopSamplerOpCUDAKernel<CUDA, int32_t>,
-                        ops::GraphKhopSamplerOpCUDAKernel<CUDA, int64_t>);
+PD_REGISTER_STRUCT_KERNEL(graph_khop_sampler,
+                          GPU,
+                          ALL_LAYOUT,
+                          ops::GraphKhopSamplerOpCUDAKernel,
+                          int32_t,
+                          int64_t) {}
diff --git a/paddle/fluid/operators/graph_khop_sampler_op.h b/paddle/fluid/operators/graph_khop_sampler_op.h
@@ -191,7 +191,7 @@ void SampleNeighbors(const T* src,
   }
 }
 
-template <typename DeviceContext, typename T>
+template <typename T, typename DeviceContext>
 class GraphKhopSamplerOpKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& ctx) const override {

diff --git a/paddle/fluid/operators/group_norm_op.cc b/paddle/fluid/operators/group_norm_op.cc
@@ -12,8 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include "paddle/fluid/operators/group_norm_op.h"
-
 #include <memory>
 #include <string>
 #include <unordered_map>