PaddlePaddle · ggggkkkknnnn · Mar 14, 2023 · Mar 14, 2023 · Mar 15, 2023 · Mar 15, 2023
diff --git a/paddle/phi/kernels/addmm_grad_kernel.h b/paddle/phi/kernels/addmm_grad_kernel.h
@@ -15,6 +15,7 @@ limitations under the License. */
 #pragma once
 
 #include "paddle/phi/core/dense_tensor.h"
+#include "paddle/phi/core/device_context.h"
 
 namespace phi {
 

diff --git a/paddle/phi/kernels/addmm_kernel.h b/paddle/phi/kernels/addmm_kernel.h
@@ -14,7 +14,9 @@ limitations under the License. */
 
 #pragma once
 
+#include "paddle/phi/common/scalar.h"
 #include "paddle/phi/core/dense_tensor.h"
+#include "paddle/phi/core/device_context.h"
 
 namespace phi {
 

diff --git a/paddle/phi/kernels/angle_grad_kernel.h b/paddle/phi/kernels/angle_grad_kernel.h
@@ -15,6 +15,7 @@
 #pragma once
 
 #include "paddle/phi/core/dense_tensor.h"
+#include "paddle/phi/core/device_context.h"
 
 namespace phi {
 

diff --git a/paddle/phi/kernels/angle_kernel.h b/paddle/phi/kernels/angle_kernel.h
@@ -18,7 +18,9 @@
 #endif
 #include <math.h>
 
+#include "paddle/phi/common/scalar.h"
 #include "paddle/phi/core/dense_tensor.h"
+#include "paddle/phi/core/device_context.h"
 
 namespace phi {
 

diff --git a/paddle/phi/kernels/bmm_kernel.h b/paddle/phi/kernels/bmm_kernel.h
@@ -15,6 +15,7 @@
 #pragma once
 
 #include "paddle/phi/core/dense_tensor.h"
+#include "paddle/phi/common/scalar.h"
 
 namespace phi {
 

diff --git a/paddle/phi/kernels/cpu/addmm_kernel.cc b/paddle/phi/kernels/cpu/addmm_kernel.cc
@@ -18,4 +18,4 @@ limitations under the License. */
 #include "paddle/phi/core/kernel_registry.h"
 #include "paddle/phi/kernels/impl/addmm_kernel_impl.h"
 
-PD_REGISTER_KERNEL(addmm, CPU, ALL_LAYOUT, phi::AddmmKernel, float, double) {}
+PD_REGISTER_KERNEL(addmm, CPU, ALL_LAYOUT, phi::AddmmKernel, float, double,phi::dtype::float16,phi::dtype::bfloat16) {}
diff --git a/paddle/phi/kernels/cpu/angle_kernel.cc b/paddle/phi/kernels/cpu/angle_kernel.cc
@@ -25,7 +25,6 @@ PD_REGISTER_KERNEL(angle,
                    phi::AngleKernel,
                    float,
                    double,
-                   phi::dtype::complex<float>,
-                   phi::dtype::complex<double>) {
+                   ,phi::dtype::float16,phi::dtype::bfloat16) {
   kernel->OutputAt(0).SetDataType(phi::DataType::UNDEFINED);
 }
diff --git a/paddle/phi/kernels/cpu/bmm_kernel.cc b/paddle/phi/kernels/cpu/bmm_kernel.cc
@@ -17,5 +17,9 @@
 #include "paddle/phi/backends/cpu/cpu_context.h"
 #include "paddle/phi/core/kernel_registry.h"
 #include "paddle/phi/kernels/impl/bmm_kernel_impl.h"
+#include "paddle/phi/core/device_context.h"
+#include "paddle/phi/common/data_type.h"
+#include "paddle/phi/common/float16.h"
+#include "paddle/phi/common/bfloat16.h"
 
-PD_REGISTER_KERNEL(bmm, CPU, ALL_LAYOUT, phi::BmmKernel, float, double) {}
+PD_REGISTER_KERNEL(bmm, CPU, ALL_LAYOUT, phi::BmmKernel, float, double,phi::dtype::float16,phi::dtype::bfloat16) {}
diff --git a/paddle/phi/kernels/cpu/index_add_kernel.cc b/paddle/phi/kernels/cpu/index_add_kernel.cc
@@ -40,5 +40,6 @@ PD_REGISTER_KERNEL(index_add,
                    float,
                    double,
                    phi::dtype::float16,
+                   phi::dtype::bfloat16,
                    int,
                    int64_t) {}
diff --git a/paddle/phi/kernels/cpu/index_sample_kernel.cc b/paddle/phi/kernels/cpu/index_sample_kernel.cc
@@ -113,4 +113,6 @@ PD_REGISTER_KERNEL(index_sample,
                    float,
                    double,
                    int,
-                   int64_t) {}
+                   int64_t,
+                   phi::dtype::float16,
+                   phi::dtype::bfloat16) {}
diff --git a/paddle/phi/kernels/cpu/put_along_axis_kernel.cc b/paddle/phi/kernels/cpu/put_along_axis_kernel.cc
@@ -83,4 +83,7 @@ PD_REGISTER_KERNEL(put_along_axis,
                    double,
                    int,
                    uint8_t,
-                   int64_t) {}
+                   int64_t,
+                   phi::dtype::float16,
+                   phi::dtype::bfloat16
+                   ) {}
diff --git a/paddle/phi/kernels/cpu/take_along_axis_kernel.cc b/paddle/phi/kernels/cpu/take_along_axis_kernel.cc
@@ -54,4 +54,7 @@ PD_REGISTER_KERNEL(take_along_axis,
                    double,
                    int,
                    uint8_t,
-                   int64_t) {}
+                   int64_t,
+                   phi::dtype::float16,
+                   phi::dtype::bfloat16
+                   ) {}
diff --git a/paddle/phi/kernels/gpu/addmm_grad_kernel.cu b/paddle/phi/kernels/gpu/addmm_grad_kernel.cu
@@ -17,6 +17,7 @@ limitations under the License. */
 #include "paddle/phi/backends/gpu/gpu_context.h"
 #include "paddle/phi/core/kernel_registry.h"
 #include "paddle/phi/kernels/impl/addmm_grad_kernel_impl.h"
+#include "paddle/phi/common/data_type.h"
 
 PD_REGISTER_KERNEL(
-    addmm_grad, GPU, ALL_LAYOUT, phi::AddmmGradKernel, float, double) {}
+    addmm_grad, GPU, ALL_LAYOUT, phi::AddmmGradKernel, float, double,phi::dtype::float16,phi::dtype::bfloat16) {}
diff --git a/paddle/phi/kernels/gpu/addmm_kernel.cu b/paddle/phi/kernels/gpu/addmm_kernel.cu
@@ -18,4 +18,12 @@ limitations under the License. */
 #include "paddle/phi/core/kernel_registry.h"
 #include "paddle/phi/kernels/impl/addmm_kernel_impl.h"
 
-PD_REGISTER_KERNEL(addmm, GPU, ALL_LAYOUT, phi::AddmmKernel, float, double) {}
+#include "paddle/phi/core/device_context.h"
+#include "paddle/phi/common/data_type.h"
+#include "paddle/phi/common/float16.h"
+#include "paddle/phi/common/bfloat16.h"
+
+
+// PD_REGISTER_KERNEL(addmm, GPU, ALL_LAYOUT, phi::AddmmKernel, float, double) {}
+
+PD_REGISTER_KERNEL(addmm, GPU, ALL_LAYOUT, phi::AddmmKernel, float, double,phi::dtype::float16,phi::dtype::bfloat16) {}
diff --git a/paddle/phi/kernels/gpu/angle_grad_kernel.cu b/paddle/phi/kernels/gpu/angle_grad_kernel.cu
@@ -17,6 +17,7 @@
 
 #include "paddle/phi/backends/gpu/gpu_context.h"
 #include "paddle/phi/core/kernel_registry.h"
+#include "paddle/phi/common/data_type.h"
 
 PD_REGISTER_KERNEL(angle_grad,
                    GPU,

diff --git a/paddle/phi/kernels/gpu/angle_kernel.cu b/paddle/phi/kernels/gpu/angle_kernel.cu
@@ -19,13 +19,19 @@
 #include "paddle/phi/backends/gpu/gpu_context.h"
 #include "paddle/phi/core/kernel_registry.h"
 
+#include "paddle/phi/core/device_context.h"
+#include "paddle/phi/common/data_type.h"
+#include "paddle/phi/common/float16.h"
+#include "paddle/phi/common/bfloat16.h"
+
+
 PD_REGISTER_KERNEL(angle,
                    GPU,
                    ALL_LAYOUT,
                    phi::AngleKernel,
                    float,
                    double,
-                   phi::dtype::complex<float>,
-                   phi::dtype::complex<double>) {
+                   phi::dtype::float16,
+                   phi::dtype::bfloat16) {
   kernel->OutputAt(0).SetDataType(phi::DataType::UNDEFINED);
 }
diff --git a/paddle/phi/kernels/gpu/bmm_kernel.cu b/paddle/phi/kernels/gpu/bmm_kernel.cu
@@ -18,5 +18,11 @@
 #include "paddle/phi/core/kernel_registry.h"
 #include "paddle/phi/kernels/impl/bmm_kernel_impl.h"
 
+#include "paddle/phi/core/device_context.h"
+#include "paddle/phi/common/data_type.h"
+#include "paddle/phi/common/float16.h"
+#include "paddle/phi/common/bfloat16.h"
+
+
 PD_REGISTER_KERNEL(
-    bmm, GPU, ALL_LAYOUT, phi::BmmKernel, float, double, phi::dtype::float16) {}
+    bmm, GPU, ALL_LAYOUT, phi::BmmKernel, float, double, phi::dtype::float16,phi::dtype::bfloat16) {}
diff --git a/paddle/phi/kernels/gpu/index_add_kernel.cu b/paddle/phi/kernels/gpu/index_add_kernel.cu
@@ -18,7 +18,12 @@
 #include "paddle/phi/backends/gpu/gpu_launch_config.h"
 #include "paddle/phi/backends/gpu/gpu_primitives.h"
 #include "paddle/phi/core/kernel_registry.h"
-#include "paddle/phi/core/utils/data_type.h"
+// #include "paddle/phi/core/utils/data_type.h"
+
+#include "paddle/phi/common/bfloat16.h"
+#include "paddle/phi/common/data_type.h"
+#include "paddle/phi/common/float16.h"
+#include "paddle/phi/core/device_context.h"
 
 DECLARE_bool(cudnn_deterministic);
 
@@ -122,4 +127,5 @@ PD_REGISTER_KERNEL(index_add,
                    double,
                    phi::dtype::float16,
                    int,
-                   int64_t) {}
+                   int64_t,
+                   phi::dtype::bfloat16) {}
diff --git a/paddle/phi/kernels/gpu/index_sample_kernel.cu b/paddle/phi/kernels/gpu/index_sample_kernel.cu
@@ -23,6 +23,12 @@
 #include "paddle/phi/core/utils/data_type.h"
 #include "paddle/phi/kernels/funcs/math_function.h"
 
+#include "paddle/phi/core/device_context.h"
+#include "paddle/phi/common/data_type.h"
+#include "paddle/phi/common/float16.h"
+#include "paddle/phi/common/bfloat16.h"
+
+
 namespace phi {
 
 namespace {

diff --git a/paddle/phi/kernels/gpu/put_along_axis_kernel.cu b/paddle/phi/kernels/gpu/put_along_axis_kernel.cu
@@ -21,6 +21,12 @@
 #include "paddle/phi/core/utils/data_type.h"
 #include "paddle/phi/kernels/funcs/gather_scatter_functor.h"
 
+#include "paddle/phi/core/device_context.h"
+#include "paddle/phi/common/data_type.h"
+#include "paddle/phi/common/float16.h"
+#include "paddle/phi/common/bfloat16.h"
+
+
 namespace phi {
 
 template <typename T, typename Context>
@@ -82,4 +88,5 @@ PD_REGISTER_KERNEL(put_along_axis,
                    double,
                    int64_t,
                    int,
-                   phi::dtype::float16) {}
+                   phi::dtype::float16,
+                   phi::dtype::bfloat16) {}
diff --git a/paddle/phi/kernels/gpu/take_along_axis_kernel.cu b/paddle/phi/kernels/gpu/take_along_axis_kernel.cu
@@ -20,6 +20,12 @@
 #include "paddle/phi/core/utils/data_type.h"
 #include "paddle/phi/kernels/funcs/gather_scatter_functor.h"
 
+#include "paddle/phi/core/device_context.h"
+#include "paddle/phi/common/data_type.h"
+#include "paddle/phi/common/float16.h"
+#include "paddle/phi/common/bfloat16.h"
+
+
 namespace phi {
 
 template <typename T, typename Context>
@@ -54,4 +60,6 @@ PD_REGISTER_KERNEL(take_along_axis,
                    double,
                    int64_t,
                    int,
-                   phi::dtype::float16) {}
+                   phi::dtype::float16，
+                   phi::dtype::bfloat16,
+                   ) {}
diff --git a/paddle/phi/kernels/impl/addmm_kernel_impl.h b/paddle/phi/kernels/impl/addmm_kernel_impl.h
@@ -118,6 +118,7 @@ void AddmmKernel(const Context& dev_ctx,
             alpha,
             x.data<T>(),
             x_dims[1],
+            reinterpret_cast<const Eigen::half*>(y.data<float16_t>()),
             y.data<T>(),
             y_dims[1],
             beta,

diff --git a/paddle/phi/kernels/index_add_kernel.h b/paddle/phi/kernels/index_add_kernel.h
@@ -15,6 +15,7 @@
 #pragma once
 
 #include "paddle/phi/core/dense_tensor.h"
+#include "paddle/phi/common/scalar.h"
 
 namespace phi {
 

diff --git a/paddle/phi/kernels/index_sample_kernel.h b/paddle/phi/kernels/index_sample_kernel.h
@@ -15,6 +15,7 @@
 #pragma once
 
 #include "paddle/phi/core/dense_tensor.h"
+#include "paddle/phi/common/scalar.h"
 
 namespace phi {
 

diff --git a/paddle/phi/kernels/put_along_axis_kernel.h b/paddle/phi/kernels/put_along_axis_kernel.h
@@ -17,6 +17,7 @@
 #include <string>
 
 #include "paddle/phi/core/dense_tensor.h"
+#include "paddle/phi/common/scalar.h"
 
 namespace phi {
 

diff --git a/python/paddle/fluid/tests/unittests/test_activation_op.py b/python/paddle/fluid/tests/unittests/test_activation_op.py
@@ -1035,6 +1035,76 @@ def test_errors(self):
                 F.hardtanh(x_fp16)
 
 
+class TestHardtanhOp(OpTest):
+    def setUp(self):
+        self.op_type = "hardtanh"
+        self.init_input_output()
+        self.attrs = {'min': self.min, 'max': self.max}
+        self.inputs = {'X': self.x}
+        self.outputs = {'Out': self.out}
+
+    def init_input_output(self):
+        self.x = np.random.uniform(-1, 1, [10, 10]).astype("float32")
+        self.min = -1
+        self.max = 1
+        self.out = np.clip(self.x, self.min, self.max)
+
+    def test_check_output(self):
+        self.check_output()
+
+    def test_check_grad_normal(self):
+        self.check_grad(['X'], 'Out')
+
+
+class TestHardtanhFp16Op(OpTest):
+    def setUp(self):
+        self.op_type = "hardtanh"
+        self.init_input_output()
+        self.attrs = {'min': self.min, 'max': self.max}
+        self.inputs = {'X': self.x.astype(np.float16)}
+        self.outputs = {'Out': self.out.astype(np.float16)}
+
+    def init_input_output(self):
+        self.x = np.random.uniform(-1, 1, [10, 10]).astype(np.float16)
+        self.min = -1
+        self.max = 1
+        self.out = np.clip(self.x, self.min, self.max)
+
+    def test_check_output(self):
+        rtol = 1e-3 if self.dtype == np.float16 else 1e-4
+        self.check_output(rtol=rtol)
+
+    def test_check_grad_normal(self):
+        rtol = 1e-3 if self.dtype == np.float16 else 1e-4
+        self.check_grad(['X'], 'Out', rtol=rtol)
+
+
+class TestHardtanhBF16Op(OpTest):
+    def setUp(self):
+        self.op_type = "hardtanh"
+        self.init_input_output()
+        self.attrs = {'min': self.min, 'max': self.max}
+        self.inputs = {'X': convert_float_to_uint16(self.x)}
+        self.outputs = {'Out': convert_float_to_uint16(self.out)}
+
+    def init_input_output(self):
+        self.x = np.random.uniform(-1, 1, [10, 10]).astype(np.float32)
+        self.min = -1
+        self.max = 1
+        self.out = np.clip(self.x, self.min, self.max)
+
+    def test_check_output(self):
+        self.check_output(atol=0.0,rtol=0.8)
+
+    def test_check_grad_normal(self):
+        self.decr_ratio=0.8
+        self.check_grad(['X'], 'Out', atol=0.0,rtol=0.8)
+
+
+
+
+
+
 def ref_softshrink(x, threshold=0.5):
     out = np.copy(x)
     out = (out < -threshold) * (out + threshold) + (out > threshold) * (