PaddlePaddle · luotao1 · Apr 17, 2023 · Feb 20, 2023 · Feb 23, 2023 · Feb 23, 2023
diff --git a/paddle/phi/kernels/gpu/bce_loss_grad_kernel.cu b/paddle/phi/kernels/gpu/bce_loss_grad_kernel.cu
@@ -18,6 +18,8 @@
 #include <vector>
 
 #include "paddle/phi/backends/gpu/gpu_context.h"
+#include "paddle/phi/common/amp_type_traits.h"
+#include "paddle/phi/common/float16.h"
 #include "paddle/phi/core/hostdevice.h"
 #include "paddle/phi/core/kernel_registry.h"
 #include "paddle/phi/kernels/funcs/elementwise_base.h"
@@ -27,16 +29,16 @@ namespace phi {
 template <typename T>
 struct BCELossGradFunctor {
   T one;
-  T eps;
 
-  HOSTDEVICE inline BCELossGradFunctor() {
-    one = static_cast<T>(1.0f);
-    eps = static_cast<T>(1e-12);
-  }
+  HOSTDEVICE inline BCELossGradFunctor() { one = static_cast<T>(1.0f); }
 
   HOSTDEVICE inline T operator()(const T x, const T label, const T dout) const {
-    T term1 = max((one - x) * x, eps);
-    return (dout * (x - label) / term1);
+    using MT = typename phi::dtype::MPTypeTrait<T>::Type;
+    MT x_mt = static_cast<MT>(x);
+    MT eps_mt = static_cast<MT>(1e-12);
+    MT term1 = max((static_cast<MT>(one) - x_mt) * x_mt, eps_mt);
+    return static_cast<T>(static_cast<MT>(dout) *
+                          (x_mt - static_cast<MT>(label)) / term1);
   }
 };
 
@@ -55,5 +57,10 @@ void BCELossGradKernel(const Context& dev_ctx,
 
 }  // namespace phi
 
-PD_REGISTER_KERNEL(
-    bce_loss_grad, GPU, ALL_LAYOUT, phi::BCELossGradKernel, float, double) {}
+PD_REGISTER_KERNEL(bce_loss_grad,
+                   GPU,
+                   ALL_LAYOUT,
+                   phi::BCELossGradKernel,
+                   float,
+                   double,
+                   phi::dtype::float16) {}
diff --git a/paddle/phi/kernels/gpu/bce_loss_kernel.cu b/paddle/phi/kernels/gpu/bce_loss_kernel.cu
@@ -18,6 +18,8 @@
 #include <vector>
 
 #include "paddle/phi/backends/gpu/gpu_context.h"
+#include "paddle/phi/common/amp_type_traits.h"
+#include "paddle/phi/common/float16.h"
 #include "paddle/phi/core/hostdevice.h"
 #include "paddle/phi/core/kernel_registry.h"
 #include "paddle/phi/kernels/funcs/elementwise_base.h"
@@ -40,9 +42,15 @@ struct BCELossFunctor {
         (x >= static_cast<T>(0)) && (x <= one),
         "Input is expected to be within the interval [0, 1], but received %f.",
         x);
-    T term1 = max(phi::kps::details::Log(x), neg_100);
-    T term2 = max(phi::kps::details::Log(one - x), neg_100);
-    return (((label - one) * term2) - (label * term1));
+    using MT = typename phi::dtype::MPTypeTrait<T>::Type;
+    MT term1 = max(phi::kps::details::Log(static_cast<MT>(x)),
+                   static_cast<MT>(neg_100));
+    MT term2 =
+        max(phi::kps::details::Log(static_cast<MT>(one) - static_cast<MT>(x)),
+            static_cast<MT>(neg_100));
+    return static_cast<T>(
+        ((static_cast<MT>(label) - static_cast<MT>(one)) * term2) -
+        (static_cast<MT>(label) * term1));
   }
 };
 
@@ -60,5 +68,10 @@ void BCELossKernel(const Context& dev_ctx,
 
 }  // namespace phi
 
-PD_REGISTER_KERNEL(
-    bce_loss, GPU, ALL_LAYOUT, phi::BCELossKernel, float, double) {}
+PD_REGISTER_KERNEL(bce_loss,
+                   GPU,
+                   ALL_LAYOUT,
+                   phi::BCELossKernel,
+                   float,
+                   double,
+                   phi::dtype::float16) {}
diff --git a/python/paddle/fluid/tests/unittests/test_bce_loss.py b/python/paddle/fluid/tests/unittests/test_bce_loss.py
@@ -19,6 +19,7 @@
 
 import paddle
 import paddle.fluid as fluid
+import paddle.fluid.core as core
 
 
 def test_static_layer(
@@ -279,6 +280,68 @@ def init_test_cast(self):
         self.shape = [2, 3, 20]
 
 
+class TestBceLossOpFP16(TestBceLossOp):
+    def setUp(self):
+        self.init_test_case()
+        self.op_type = "bce_loss"
+        self.python_api = bce_wrapper
+        input_np = np.random.uniform(0.1, 0.8, self.shape).astype("float16")
+        label_np = np.random.randint(0, 2, self.shape).astype("float16")
+        output_np = bce_loss(input_np, label_np)
+
+        self.inputs = {'X': input_np, 'Label': label_np}
+        self.outputs = {'Out': output_np}
+
+    def test_check_output(self):
+        if core.is_compiled_with_cuda():
+            place = core.CUDAPlace(0)
+            if core.is_float16_supported(place):
+                self.check_output_with_place(place, atol=1e-3)
+
+    def test_check_grad(self):
+        place = core.CUDAPlace(0)
+        if core.is_float16_supported(place):
+            self.check_grad_with_place(
+                place, ['X'], 'Out', max_relative_error=1
+            )
+
+
+class TestBceLossOpFP16Case1(TestBceLossOpFP16):
+    def init_test_case(self):
+        self.shape = [2, 3, 4, 5]
+
+
+class TestBceLossOpFP16Case2(TestBceLossOpFP16):
+    def init_test_case(self):
+        self.shape = [2, 3, 20]
+
+
+class TestBceLossOpStaticFP16(unittest.TestCase):
+    def test_fp16(self):
+        paddle.enable_static()
+        shape = [2, 3, 20]
+        x_data = np.random.uniform(0.1, 0.8, shape).astype("float16")
+        y_data = np.random.randint(0, 2, shape).astype("float16")
+        output_np = bce_loss(x_data, y_data)
+        with paddle.static.program_guard(paddle.static.Program()):
+            x = paddle.static.data(shape=shape, name='x', dtype='float16')
+            y = paddle.static.data(shape=shape, name='y', dtype='float16')
+            out = paddle.nn.functional.binary_cross_entropy(
+                x, y, reduction="none"
+            )
+            if core.is_compiled_with_cuda():
+                place = paddle.CUDAPlace(0)
+                exe = paddle.static.Executor(place)
+                exe.run(paddle.static.default_startup_program())
+                output_pd = exe.run(
+                    feed={'x': x_data, 'y': y_data}, fetch_list=[out]
+                )[0]
+                np.testing.assert_allclose(
+                    output_pd, output_np, rtol=1e-3, atol=1e-3
+                )
+        paddle.disable_static()
+
+
 if __name__ == "__main__":
     paddle.enable_static()
     unittest.main()
diff --git a/python/paddle/nn/functional/loss.py b/python/paddle/nn/functional/loss.py
@@ -633,10 +633,10 @@ def binary_cross_entropy(
     Parameters:
         input (Tensor): The input predications tensor. 2-D tensor with shape: [N, *],
             N is batch_size, `*` means number of additional dimensions. The ``input``
-            should always be the output of sigmod.  Available dtype is float32, float64.
+            should always be the output of sigmod.  Available dtype is float16, float32, float64.
         label (Tensor): The target labels tensor. 2-D tensor with the same shape as
             ``input``. The target labels which values should be numbers between 0 and 1.
-            Available dtype is float32, float64.
+            Available dtype is float16, float32, float64.
         weight (Tensor, optional): A manual rescaling weight given to the loss of each
             batch element. If given, has to be a Tensor of size nbatch and the data type
             is float32, float64. Default is ``'None'``.
@@ -686,10 +686,16 @@ def binary_cross_entropy(
             return out
     else:
         check_variable_and_dtype(
-            input, 'input', ['float32', 'float64'], 'binary_cross_entropy'
+            input,
+            'input',
+            ['float16', 'float32', 'float64'],
+            'binary_cross_entropy',
         )
         check_variable_and_dtype(
-            label, 'label', ['float32', 'float64'], 'binary_cross_entropy'
+            label,
+            'label',
+            ['float16', 'float32', 'float64'],
+            'binary_cross_entropy',
         )
 
         sub_name = name if weight is None and reduction == 'none' else None

diff --git a/python/paddle/nn/layer/loss.py b/python/paddle/nn/layer/loss.py
@@ -732,8 +732,8 @@ class BCELoss(Layer):
             For more information, please refer to :ref:`api_guide_Name`.
 
     Shape:
-        - input (Tensor): 2-D tensor with shape: ``[N, *]``, N is batch_size, `*` means number of additional dimensions. The input ``input`` should always be the output of sigmod. Available dtype is float32, float64.
-        - label (Tensor): 2-D tensor with the same shape as ``input``. The target labels which values should be numbers between 0 and 1. Available dtype is float32, float64.
+        - input (Tensor): 2-D tensor with shape: ``[N, *]``, N is batch_size, `*` means number of additional dimensions. The input ``input`` should always be the output of sigmod. Available dtype is float16, float32, float64.
+        - label (Tensor): 2-D tensor with the same shape as ``input``. The target labels which values should be numbers between 0 and 1. Available dtype is float16, float32, float64.
         - output (Tensor): If ``reduction`` is ``'none'``, the shape of output is same as ``input`` , else the shape of output is scalar.
 
     Returns: