PaddlePaddle · thunder95 · Feb 20, 2023 · Feb 23, 2023 · Feb 23, 2023 · Feb 25, 2023
diff --git a/paddle/phi/kernels/gpu/label_smooth_grad_kernel.cu b/paddle/phi/kernels/gpu/label_smooth_grad_kernel.cu
@@ -15,6 +15,8 @@
 #include "paddle/phi/kernels/label_smooth_grad_kernel.h"
 
 #include "paddle/phi/backends/gpu/gpu_context.h"
+#include "paddle/phi/common/amp_type_traits.h"
+#include "paddle/phi/common/float16.h"
 #include "paddle/phi/core/kernel_registry.h"
 #include "paddle/phi/kernels/funcs/elementwise_base.h"
 
@@ -28,7 +30,8 @@ struct LabelSmoothGradFunctor {
   }
 
   __device__ __forceinline__ T operator()(const T x) const {
-    return static_cast<T>(1 - epsilon) * x;
+    using MT = typename phi::dtype::MPTypeTrait<T>::Type;
+    return static_cast<T>((1 - static_cast<MT>(epsilon)) * static_cast<MT>(x));
   }
 };
 
@@ -52,4 +55,5 @@ PD_REGISTER_KERNEL(label_smooth_grad,
                    ALL_LAYOUT,
                    phi::LabelSmoothGradKernel,
                    float,
-                   double) {}
+                   double,
+                   phi::dtype::float16) {}
diff --git a/paddle/phi/kernels/gpu/label_smooth_kernel.cu b/paddle/phi/kernels/gpu/label_smooth_kernel.cu
@@ -17,6 +17,8 @@
 #include <vector>
 
 #include "paddle/phi/backends/gpu/gpu_context.h"
+#include "paddle/phi/common/amp_type_traits.h"
+#include "paddle/phi/common/float16.h"
 #include "paddle/phi/core/kernel_registry.h"
 #include "paddle/phi/kernels/funcs/elementwise_base.h"
 
@@ -33,8 +35,10 @@ struct LabelSmoothFunctor {
   }
 
   __device__ __forceinline__ T operator()(const T x) const {
-    return (static_cast<T>(1 - epsilon) * x +
-            static_cast<T>(epsilon / label_dim));
+    using MT = typename phi::dtype::MPTypeTrait<T>::Type;
+    return static_cast<T>((1 - static_cast<MT>(epsilon)) * static_cast<MT>(x) +
+                          static_cast<MT>(epsilon) /
+                              static_cast<MT>(label_dim));
   }
 };
 
@@ -46,9 +50,11 @@ __global__ void LabelSmoothRunDistKernel(const int N,
                                          const T* dist_data,
                                          T* dst) {
   CUDA_KERNEL_LOOP(idx, N) {
+    using MT = typename phi::dtype::MPTypeTrait<T>::Type;
     int dist_idx = idx % dist_numel;
-    dst[idx] = static_cast<T>(1 - epsilon) * src[idx] +
-               static_cast<T>(epsilon) * dist_data[dist_idx];
+    dst[idx] = static_cast<T>(
+        static_cast<MT>((1 - epsilon) * static_cast<MT>(src[idx])) +
+        static_cast<MT>(epsilon) * static_cast<MT>(dist_data[dist_idx]));
   }
 }
 
@@ -83,5 +89,10 @@ void LabelSmoothKernel(const Context& ctx,
 
 }  // namespace phi
 
-PD_REGISTER_KERNEL(
-    label_smooth, GPU, ALL_LAYOUT, phi::LabelSmoothKernel, float, double) {}
+PD_REGISTER_KERNEL(label_smooth,
+                   GPU,
+                   ALL_LAYOUT,
+                   phi::LabelSmoothKernel,
+                   float,
+                   double,
+                   phi::dtype::float16) {}
diff --git a/paddle/phi/kernels/gpu/prelu_funcs.h b/paddle/phi/kernels/gpu/prelu_funcs.h
@@ -137,6 +137,9 @@ void PreluChannelWiseDirectCUDAFunctor<T>::operator()(gpuStream_t stream,
                                  stream>>>(
         input, alpha, output, channel, numel);
   } else {
+    printf("debug: spatial: %d, ch_num: %d\n",
+           static_cast<int>(numel / batch_size / channel),
+           static_cast<int>(channel));
     PReluChannelFirstWiseKernel<<<PADDLE_GET_BLOCKS(numel),
                                   CUDA_NUM_THREADS,
                                   0,

diff --git a/python/paddle/fluid/tests/unittests/test_label_smooth_op.py b/python/paddle/fluid/tests/unittests/test_label_smooth_op.py
@@ -78,6 +78,31 @@ def setUp(self):
         )
 
 
+class TestLabelSmoothFP16(unittest.TestCase):
+    def check_main(self, x_np, dtype):
+        paddle.disable_static()
+        x = paddle.to_tensor(x_np.astype(dtype))
+        x.stop_gradient = False
+        y = paddle.nn.functional.label_smooth(x, epsilon=0.1)
+        x_g = paddle.grad(y, [x])
+        y_np = y.numpy().astype('float32')
+        x_g_np = x_g[0].numpy().astype('float32')
+        paddle.enable_static()
+        return y_np, x_g_np
+
+    def test_main(self):
+        if not paddle.is_compiled_with_cuda():
+            return
+
+        np.random.seed(20)
+        x_np = np.random.random([10, 12])
+        y_np_1, x_g_np_1 = self.check_main(x_np, 'float16')
+        y_np_2, x_g_np_2 = self.check_main(x_np, 'float32')
+
+        np.testing.assert_allclose(y_np_1, y_np_2, rtol=1e-03)
+        np.testing.assert_allclose(x_g_np_1, x_g_np_2, rtol=1e-03)
+
+
 if __name__ == '__main__':
     paddle.enable_static()
     unittest.main()
diff --git a/python/paddle/nn/functional/common.py b/python/paddle/nn/functional/common.py
@@ -1922,7 +1922,8 @@ def label_smooth(label, prior_dist=None, epsilon=0.1, name=None):
         label(Tensor): The input variable containing the label data. The
                         label data should use one-hot representation. It's
                         a multidimensional tensor with a shape of
-                        :math:`[N_1, ..., Depth]`, where Depth is class number. The dtype can be "float32" and "float64".
+                        :math:`[N_1, ..., Depth]`, where Depth is class number.
+                        The dtype can be "float16", "float32" and "float64".
         prior_dist(Tensor, optional): The prior distribution to be used to smooth
                         labels. If not provided, an uniform distribution
                         is used. It's a multidimensional tensor with a shape of
@@ -1964,7 +1965,7 @@ def label_smooth(label, prior_dist=None, epsilon=0.1, name=None):
         )
 
     check_variable_and_dtype(
-        label, 'label', ['float32', 'float64'], 'label_smooth'
+        label, 'label', ['float16', 'float32', 'float64'], 'label_smooth'
     )
 
     helper = LayerHelper("label_smooth", **locals())