Merge branch 'develop' into phi_decouple to resolve conflicts

PaddlePaddle · Nov 17, 2022 · 880faed · 880faed
2 parents 97185b6 + 460d504
commit 880faed
Show file tree

Hide file tree

Showing 28 changed files with 240 additions and 349 deletions.
diff --git a/paddle/fluid/operators/cross_entropy_op.h b/paddle/fluid/operators/cross_entropy_op.h
@@ -15,9 +15,9 @@ limitations under the License. */
 #pragma once
 #include "paddle/fluid/framework/eigen.h"
 #include "paddle/fluid/framework/op_registry.h"
-#include "paddle/fluid/operators/math.h"
 #include "paddle/fluid/operators/math/cross_entropy.h"
 #include "paddle/fluid/platform/for_range.h"
+#include "paddle/phi/kernels/funcs/math.h"
 #include "paddle/phi/kernels/funcs/math_function.h"
 
 namespace paddle {
@@ -190,7 +190,7 @@ struct HardLabelCrossEntropyForwardFunctor {
                      label);
 
       auto match_x = x_[idx * feature_size_ + label];
-      y_[idx] = -math::TolerableValue<T>()(real_log(match_x));
+      y_[idx] = -math::TolerableValue<T>()(phi::funcs::real_log(match_x));
       match_x_[idx] = match_x;
     } else {
       y_[idx] = 0;

diff --git a/paddle/fluid/operators/detection/sigmoid_focal_loss_op.cu b/paddle/fluid/operators/detection/sigmoid_focal_loss_op.cu
@@ -15,6 +15,7 @@ limitations under the License. */
 #include "paddle/fluid/operators/math.h"
 #include "paddle/phi/backends/gpu/gpu_primitives.h"
 #include "paddle/phi/core/hostdevice.h"
+#include "paddle/phi/kernels/funcs/math.h"
 
 namespace paddle {
 namespace operators {
@@ -55,15 +56,16 @@ __global__ void GPUSigmoidFocalLossForward(const T *x_data,
     T s_pos = alpha / fg_num;
 
     // p = 1. / 1. + expf(-x)
-    T p = 1. / (1. + real_exp(-x));
+    T p = 1. / (1. + phi::funcs::real_exp(-x));
 
     // (1 - p)**gamma * log(p)
     T term_pos = std::pow(static_cast<T>(1. - p), gamma) *
-                 real_log(p > FLT_MIN ? p : FLT_MIN);
+                 phi::funcs::real_log(p > FLT_MIN ? p : FLT_MIN);
     // p**gamma * log(1 - p)
-    T term_neg =
-        std::pow(p, gamma) *
-        (-1. * x * (x >= 0) - real_log(1. + real_exp(x - 2. * x * (x >= 0))));
+    T term_neg = std::pow(p, gamma) *
+                 (-1. * x * (x >= 0) -
+                  phi::funcs::real_log(
+                      1. + phi::funcs::real_exp(x - 2. * x * (x >= 0))));
 
     out_data[i] = 0.0;
     out_data[i] += -c_pos * term_pos * s_pos;
@@ -96,17 +98,20 @@ __global__ void GPUSigmoidFocalLossBackward(const T *x_data,
     T c_pos = static_cast<T>(g == (d + 1));
     T c_neg = static_cast<T>((g != -1) & (g != (d + 1)));
 
-    T p = 1. / (1. + real_exp(-x));
+    T p = 1. / (1. + phi::funcs::real_exp(-x));
 
     // (1-p)**g * (1 - p - g*p*log(p))
-    T term_pos = std::pow(static_cast<T>(1. - p), gamma) *
-                 (1. - p - (p * gamma * real_log(p > FLT_MIN ? p : FLT_MIN)));
+    T term_pos =
+        std::pow(static_cast<T>(1. - p), gamma) *
+        (1. - p -
+         (p * gamma * phi::funcs::real_log(p > FLT_MIN ? p : FLT_MIN)));
     // (p**g) * (g*(1-p)*log(1-p) - p)
-    T term_neg =
-        std::pow(p, gamma) *
-        ((-1. * x * (x >= 0) - real_log(1. + real_exp(x - 2. * x * (x >= 0)))) *
-             (1. - p) * gamma -
-         p);
+    T term_neg = std::pow(p, gamma) *
+                 ((-1. * x * (x >= 0) -
+                   phi::funcs::real_log(
+                       1. + phi::funcs::real_exp(x - 2. * x * (x >= 0)))) *
+                      (1. - p) * gamma -
+                  p);
 
     dx_data[i] = 0.0;
     dx_data[i] += -c_pos * s_pos * term_pos;

diff --git a/paddle/fluid/operators/math/cross_entropy.cu b/paddle/fluid/operators/math/cross_entropy.cu
@@ -14,11 +14,10 @@ limitations under the License. */
 
 #include "paddle/fluid/operators/math/cross_entropy.h"
 #include "paddle/fluid/framework/convert_utils.h"
-#include "paddle/fluid/operators/math.h"
 #include "paddle/fluid/platform/device/gpu/gpu_device_function.h"
 #include "paddle/phi/backends/gpu/gpu_context.h"
 #include "paddle/phi/backends/gpu/gpu_primitives.h"
-
+#include "paddle/phi/kernels/funcs/math.h"
 namespace paddle {
 namespace operators {
 namespace math {
@@ -39,9 +38,10 @@ __global__ void CrossEntropyKernel(T* Y,
                    D,
                    ignore_index,
                    lbl);
-    Y[i] = ignore_index == lbl
-               ? static_cast<T>(0)
-               : -math::TolerableValue<T>()(real_log(X[i * D + lbl]));
+    Y[i] =
+        ignore_index == lbl
+            ? static_cast<T>(0)
+            : -math::TolerableValue<T>()(phi::funcs::real_log(X[i * D + lbl]));
   }
 }
 
@@ -56,7 +56,7 @@ __global__ void SoftCrossEntropyKernel(T* Y,
   int idx = blockIdx.x * class_num + tid;
   int end = blockIdx.x * class_num + class_num;
   for (; idx < end; idx += blockDim.x) {
-    val += math::TolerableValue<T>()(real_log(X[idx])) * label[idx];
+    val += math::TolerableValue<T>()(phi::funcs::real_log(X[idx])) * label[idx];
   }
 
   val = paddle::platform::reduceSum(val, tid, blockDim.x);
@@ -152,7 +152,7 @@ void CrossEntropyFunctor<DeviceContext, T>::operator()(
 
 template class CrossEntropyFunctor<phi::GPUContext, float>;
 template class CrossEntropyFunctor<phi::GPUContext, double>;
-template class CrossEntropyFunctor<phi::GPUContext, platform::float16>;
+template class CrossEntropyFunctor<phi::GPUContext, phi::dtype::float16>;
 
 }  // namespace math
 }  // namespace operators

diff --git a/paddle/fluid/operators/sequence_ops/sequence_softmax_op.cu b/paddle/fluid/operators/sequence_ops/sequence_softmax_op.cu
@@ -23,8 +23,8 @@ limitations under the License. */
 namespace cub = hipcub;
 #endif
 
-#include "paddle/fluid/operators/math.h"
 #include "paddle/fluid/operators/sequence_ops/sequence_softmax_op.h"
+#include "paddle/phi/kernels/funcs/math.h"
 
 namespace paddle {
 namespace operators {
@@ -67,7 +67,7 @@ __global__ void sequence_softmax_kernel(const T *in_data,
     T sum_data = 0;
     for (int tid = threadIdx.x; tid < span; tid += blockDim.x) {
       T ele = in_data[start + tid];
-      sum_data += real_exp(ele - shared_max_data);
+      sum_data += phi::funcs::real_exp(ele - shared_max_data);
     }
     sum_data =
         BlockReduce<T, BlockDim>(temp_storage).Reduce(sum_data, cub::Sum());
@@ -79,7 +79,7 @@ __global__ void sequence_softmax_kernel(const T *in_data,
     // get final resit
     for (int tid = threadIdx.x; tid < span; tid += blockDim.x) {
       T ele = in_data[start + tid];
-      ele = real_exp(ele - shared_max_data) / shared_sum_data;
+      ele = phi::funcs::real_exp(ele - shared_max_data) / shared_sum_data;
       out_data[start + tid] = ele;
     }
   }

diff --git a/paddle/fluid/pybind/eager_functions.cc b/paddle/fluid/pybind/eager_functions.cc
@@ -268,7 +268,8 @@ PyObject* eager_api_get_grads_types(PyObject* self,
     if (meta && grad.initialized()) {
       if (grad.is_dense_tensor() &&
           (tensor.dtype() == paddle::experimental::DataType::FLOAT32 ||
-           tensor.dtype() == paddle::experimental::DataType::FLOAT16)) {
+           tensor.dtype() == paddle::experimental::DataType::FLOAT16 ||
+           tensor.dtype() == paddle::experimental::DataType::BFLOAT16)) {
         ret.emplace_back(
             paddle::framework::TransToProtoVarType(tensor.dtype()));
       }

diff --git a/paddle/phi/kernels/cpu/batch_norm_grad_kernel.cc b/paddle/phi/kernels/cpu/batch_norm_grad_kernel.cc
@@ -16,9 +16,9 @@
 #include "paddle/phi/backends/cpu/cpu_context.h"
 #include "paddle/phi/core/kernel_registry.h"
 #include "paddle/phi/kernels/batch_norm_kernel.h"
+#include "paddle/phi/kernels/funcs/batch_norm_utils.h"
 #include "paddle/phi/kernels/funcs/eigen/common.h"
 #include "paddle/phi/kernels/funcs/math_function.h"
-#include "paddle/phi/kernels/gpu/batch_norm_utils.h"
 
 namespace phi {
 

diff --git a/paddle/phi/kernels/cpu/bce_loss_kernel.cc b/paddle/phi/kernels/cpu/bce_loss_kernel.cc
@@ -16,9 +16,9 @@
 
 #include <algorithm>  // for max
 
-#include "paddle/fluid/operators/math.h"
 #include "paddle/phi/backends/cpu/cpu_context.h"
 #include "paddle/phi/core/kernel_registry.h"
+#include "paddle/phi/kernels/funcs/math.h"
 
 namespace phi {
 
@@ -47,10 +47,9 @@ void BCELossKernel(const Context& dev_ctx,
             "Illegal input, input must be less than or equal to 1"));
     out_data[i] =
         (label_data[i] - static_cast<T>(1)) *
-            std::max(paddle::operators::real_log(static_cast<T>(1) - x_data[i]),
+            std::max(phi::funcs::real_log(static_cast<T>(1) - x_data[i]),
                      (T)(-100)) -
-        label_data[i] *
-            std::max(paddle::operators::real_log(x_data[i]), (T)(-100));
+        label_data[i] * std::max(phi::funcs::real_log(x_data[i]), (T)(-100));
   }
 }
 

diff --git a/paddle/phi/kernels/cpu/nll_loss_grad_kernel.cc b/paddle/phi/kernels/cpu/nll_loss_grad_kernel.cc
@@ -17,9 +17,9 @@
 #include <memory>
 #include <string>
 
-#include "paddle/fluid/operators/math.h"
 #include "paddle/phi/backends/cpu/cpu_context.h"
 #include "paddle/phi/core/kernel_registry.h"
+#include "paddle/phi/kernels/funcs/math.h"
 
 namespace phi {
 template <typename T>

diff --git a/paddle/phi/kernels/funcs/functors.h b/paddle/phi/kernels/funcs/functors.h
@@ -15,7 +15,7 @@ limitations under the License. */
 #pragma once
 
 #include "paddle/fluid/operators/amp/fp16_type_traits.h"
-#include "paddle/fluid/operators/math.h"
+#include "paddle/phi/kernels/funcs/math.h"
 
 namespace phi {
 namespace funcs {
@@ -89,8 +89,7 @@ struct TanhFunctor {
     // y = 2 / (1 + e^-2x) - 1
     T t0 = static_cast<T>(2) * x;
     T t1 = (t0 < kMin) ? kMin : ((t0 > kMax) ? kMax : t0);
-    return static_cast<T>(2) /
-               (static_cast<T>(1) + paddle::operators::real_exp(-t1)) -
+    return static_cast<T>(2) / (static_cast<T>(1) + phi::funcs::real_exp(-t1)) -
            static_cast<T>(1);
   }
 };
@@ -111,8 +110,7 @@ struct SigmoidFunctor {
   inline HOSTDEVICE T operator()(T x) {
     // y = 1 / (1 + e^-x)
     T tmp = (x < kMin) ? kMin : ((x > kMax) ? kMax : x);
-    return static_cast<T>(1) /
-           (static_cast<T>(1) + paddle::operators::real_exp(-tmp));
+    return static_cast<T>(1) / (static_cast<T>(1) + phi::funcs::real_exp(-tmp));
   }
 };
 

diff --git a/paddle/fluid/operators/math.h → paddle/phi/kernels/funcs/math.h b/paddle/fluid/operators/math.h → paddle/phi/kernels/funcs/math.h
@@ -1,4 +1,4 @@
-// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -15,22 +15,22 @@
 #pragma once
 
 #include "math.h"  // NOLINT
-#include "paddle/fluid/platform/float16.h"
+#include "paddle/phi/common/float16.h"
 #include "paddle/phi/core/hostdevice.h"
 
-namespace paddle {
-namespace operators {
+namespace phi {
+namespace funcs {
 
-inline HOSTDEVICE platform::float16 real_exp(platform::float16 x) {
-  return static_cast<platform::float16>(::expf(static_cast<float>(x)));
+inline HOSTDEVICE phi::dtype::float16 real_exp(phi::dtype::float16 x) {
+  return static_cast<phi::dtype::float16>(::expf(static_cast<float>(x)));
 }
 
 inline HOSTDEVICE float real_exp(float x) { return ::expf(x); }
 
 inline HOSTDEVICE double real_exp(double x) { return ::exp(x); }
 
-inline HOSTDEVICE platform::float16 real_log(platform::float16 x) {
-  return static_cast<platform::float16>(::logf(static_cast<float>(x)));
+inline HOSTDEVICE phi::dtype::float16 real_log(phi::dtype::float16 x) {
+  return static_cast<phi::dtype::float16>(::logf(static_cast<float>(x)));
 }
 
 inline HOSTDEVICE float real_log(float x) { return ::logf(x); }
@@ -41,5 +41,5 @@ inline HOSTDEVICE float real_min(float x, float y) { return ::fminf(x, y); }
 
 inline HOSTDEVICE double real_min(double x, double y) { return ::fmin(x, y); }
 
-}  // namespace operators
-}  // namespace paddle
+}  // namespace funcs
+}  // namespace phi
diff --git a/paddle/phi/kernels/gpu/adamw_kernel.cu b/paddle/phi/kernels/gpu/adamw_kernel.cu
@@ -21,6 +21,7 @@
 #include "paddle/fluid/framework/tensor_util.h"
 #include "paddle/phi/backends/gpu/gpu_context.h"
 #include "paddle/phi/common/amp_type_traits.h"
+#include "paddle/phi/common/bfloat16.h"
 #include "paddle/phi/common/float16.h"
 #include "paddle/phi/core/kernel_registry.h"
 #include "paddle/phi/core/tensor_utils.h"
@@ -300,7 +301,8 @@ PD_REGISTER_KERNEL(adamw,
                    phi::AdamwDenseKernel,
                    float,
                    double,
-                   phi::dtype::float16) {
+                   phi::dtype::float16,
+                   phi::dtype::bfloat16) {
   // Skip beta1_pow, beta2_pow, skip_update data transform
   kernel->InputAt(5).SetBackend(phi::Backend::ALL_BACKEND);
   kernel->InputAt(6).SetBackend(phi::Backend::ALL_BACKEND);

diff --git a/paddle/phi/kernels/gpu/batch_norm_grad_kernel.cu b/paddle/phi/kernels/gpu/batch_norm_grad_kernel.cu
@@ -22,10 +22,10 @@
 #include "paddle/phi/core/kernel_registry.h"
 #include "paddle/phi/kernels/batch_norm_kernel.h"
 #include "paddle/phi/kernels/empty_kernel.h"
+#include "paddle/phi/kernels/funcs/batch_norm_utils.h"
 #include "paddle/phi/kernels/funcs/eigen/common.h"
 #include "paddle/phi/kernels/funcs/norm_utils.h"
 #include "paddle/phi/kernels/funcs/reduce_function.h"
-#include "paddle/phi/kernels/gpu/batch_norm_utils.h"
 
 #ifdef __HIPCC__
 #define LAUNCH_BOUNDS(BlockDim) __launch_bounds__(BlockDim)

diff --git a/paddle/phi/kernels/gpu/batch_norm_kernel.cu b/paddle/phi/kernels/gpu/batch_norm_kernel.cu
@@ -29,10 +29,10 @@ namespace cub = hipcub;
 #include "paddle/phi/common/layout.h"
 #include "paddle/phi/core/kernel_registry.h"
 #include "paddle/phi/kernels/batch_norm_kernel.h"
+#include "paddle/phi/kernels/funcs/batch_norm_utils.h"
 #include "paddle/phi/kernels/funcs/eigen/common.h"
 #include "paddle/phi/kernels/funcs/norm_utils.h"
 #include "paddle/phi/kernels/funcs/reduce_function.h"
-#include "paddle/phi/kernels/gpu/batch_norm_utils.h"
 
 #ifdef __HIPCC__
 #define LAUNCH_BOUNDS(BlockDim) __launch_bounds__(BlockDim)