diff --git a/paddle/phi/kernels/gpu/label_smooth_grad_kernel.cu b/paddle/phi/kernels/gpu/label_smooth_grad_kernel.cu index 2ac6442967b38e..1e5b07d621b429 100644 --- a/paddle/phi/kernels/gpu/label_smooth_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/label_smooth_grad_kernel.cu @@ -15,20 +15,23 @@ #include "paddle/phi/kernels/label_smooth_grad_kernel.h" #include "paddle/phi/backends/gpu/gpu_context.h" +#include "paddle/phi/common/amp_type_traits.h" +#include "paddle/phi/common/float16.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/funcs/elementwise_base.h" namespace phi { template struct LabelSmoothGradFunctor { - T epsilon; + using MT = typename phi::dtype::MPTypeTrait::Type; + MT epsilon; __forceinline__ LabelSmoothGradFunctor(float epsilon_data) { - epsilon = static_cast(epsilon_data); + epsilon = static_cast(epsilon_data); } __device__ __forceinline__ T operator()(const T x) const { - return static_cast(1 - epsilon) * x; + return static_cast((1 - epsilon) * static_cast(x)); } }; @@ -52,4 +55,5 @@ PD_REGISTER_KERNEL(label_smooth_grad, ALL_LAYOUT, phi::LabelSmoothGradKernel, float, - double) {} + double, + phi::dtype::float16) {} diff --git a/paddle/phi/kernels/gpu/label_smooth_kernel.cu b/paddle/phi/kernels/gpu/label_smooth_kernel.cu index ff2fff4445174a..b56276dcd76542 100644 --- a/paddle/phi/kernels/gpu/label_smooth_kernel.cu +++ b/paddle/phi/kernels/gpu/label_smooth_kernel.cu @@ -17,6 +17,8 @@ #include #include "paddle/phi/backends/gpu/gpu_context.h" +#include "paddle/phi/common/amp_type_traits.h" +#include "paddle/phi/common/float16.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/funcs/elementwise_base.h" @@ -24,17 +26,18 @@ namespace phi { template struct LabelSmoothFunctor { - T epsilon; - T label_dim; + using MT = typename phi::dtype::MPTypeTrait::Type; + MT epsilon; + MT label_dim; __forceinline__ LabelSmoothFunctor(float epsilon_data, int label_dim_data) { - epsilon = static_cast(epsilon_data); - label_dim = static_cast(label_dim_data); + epsilon = static_cast(epsilon_data); + label_dim = static_cast(label_dim_data); } __device__ __forceinline__ T operator()(const T x) const { - return (static_cast(1 - epsilon) * x + - static_cast(epsilon / label_dim)); + return static_cast((1 - epsilon) * static_cast(x) + + epsilon / label_dim); } }; @@ -46,9 +49,11 @@ __global__ void LabelSmoothRunDistKernel(const int N, const T* dist_data, T* dst) { CUDA_KERNEL_LOOP(idx, N) { + using MT = typename phi::dtype::MPTypeTrait::Type; int dist_idx = idx % dist_numel; - dst[idx] = static_cast(1 - epsilon) * src[idx] + - static_cast(epsilon) * dist_data[dist_idx]; + dst[idx] = static_cast( + static_cast((1 - epsilon) * static_cast(src[idx])) + + static_cast(epsilon) * static_cast(dist_data[dist_idx])); } } @@ -83,5 +88,10 @@ void LabelSmoothKernel(const Context& ctx, } // namespace phi -PD_REGISTER_KERNEL( - label_smooth, GPU, ALL_LAYOUT, phi::LabelSmoothKernel, float, double) {} +PD_REGISTER_KERNEL(label_smooth, + GPU, + ALL_LAYOUT, + phi::LabelSmoothKernel, + float, + double, + phi::dtype::float16) {} diff --git a/python/paddle/fluid/tests/unittests/test_label_smooth_op.py b/python/paddle/fluid/tests/unittests/test_label_smooth_op.py index b62a75438a7fd4..8cff0bfa3a455b 100644 --- a/python/paddle/fluid/tests/unittests/test_label_smooth_op.py +++ b/python/paddle/fluid/tests/unittests/test_label_smooth_op.py @@ -18,15 +18,19 @@ from op_test import OpTest import paddle +import paddle.fluid.core as core class TestLabelSmoothOp(OpTest): def config(self): + self.init_data_type() self.op_type = "label_smooth" self.python_api = paddle.nn.functional.label_smooth self.epsilon = 0.1 batch_size, self.label_dim = 10, 12 - self.label = np.zeros((batch_size, self.label_dim)).astype("float64") + self.label = np.zeros((batch_size, self.label_dim)).astype( + self.data_type + ) nonzero_index = np.random.randint(self.label_dim, size=(batch_size)) self.label[np.arange(batch_size), nonzero_index] = 1 @@ -39,6 +43,9 @@ def setUp(self): self.attrs = {'epsilon': self.epsilon} self.outputs = {'Out': smoothed_label} + def init_data_type(self): + self.data_type = "float64" + def test_check_output(self): self.check_output(check_eager=True) @@ -78,6 +85,42 @@ def setUp(self): ) +class TestLabelSmoothFP16(TestLabelSmoothOp): + def init_data_type(self): + self.data_type = "float16" + + def test_check_grad(self): + place = core.CUDAPlace(0) + self.check_grad_with_place(place, ['X'], 'Out', max_relative_error=1e-3) + + +class TestLabelSmoothStaticFP16(unittest.TestCase): + def test_fp16(self): + paddle.enable_static() + + epsilon = 0.1 + shape = [10, 12] + batch_size, label_dim = shape + label = np.zeros((batch_size, label_dim)).astype("float16") + nonzero_index = np.random.randint(label_dim, size=(batch_size)) + label[np.arange(batch_size), nonzero_index] = 1 + output_np = (1 - epsilon) * label + epsilon / label_dim + + with paddle.static.program_guard(paddle.static.Program()): + x_data = paddle.static.data(shape=shape, name='x', dtype='float16') + out = paddle.nn.functional.label_smooth(x_data, epsilon=epsilon) + if core.is_compiled_with_cuda(): + place = paddle.CUDAPlace(0) + exe = paddle.static.Executor(place) + exe.run(paddle.static.default_startup_program()) + output_pd = exe.run(feed={'x': label}, fetch_list=[out])[0] + np.testing.assert_allclose( + output_pd, output_np, rtol=1e-3, atol=1e-3 + ) + + paddle.disable_static() + + if __name__ == '__main__': paddle.enable_static() unittest.main() diff --git a/python/paddle/nn/functional/common.py b/python/paddle/nn/functional/common.py index b957eb369ad8ab..7fb130ab8cdbbc 100644 --- a/python/paddle/nn/functional/common.py +++ b/python/paddle/nn/functional/common.py @@ -1923,7 +1923,8 @@ def label_smooth(label, prior_dist=None, epsilon=0.1, name=None): label(Tensor): The input variable containing the label data. The label data should use one-hot representation. It's a multidimensional tensor with a shape of - :math:`[N_1, ..., Depth]`, where Depth is class number. The dtype can be "float32" and "float64". + :math:`[N_1, ..., Depth]`, where Depth is class number. + The dtype can be "float16", "float32" and "float64". prior_dist(Tensor, optional): The prior distribution to be used to smooth labels. If not provided, an uniform distribution is used. It's a multidimensional tensor with a shape of @@ -1965,7 +1966,7 @@ def label_smooth(label, prior_dist=None, epsilon=0.1, name=None): ) check_variable_and_dtype( - label, 'label', ['float32', 'float64'], 'label_smooth' + label, 'label', ['float16', 'float32', 'float64'], 'label_smooth' ) helper = LayerHelper("label_smooth", **locals())