diff --git a/paddle/phi/kernels/gpu/complex_kernel.cu b/paddle/phi/kernels/gpu/complex_kernel.cu
index 5c5bf104128d33..3b26984d87dde8 100644
--- a/paddle/phi/kernels/gpu/complex_kernel.cu
+++ b/paddle/phi/kernels/gpu/complex_kernel.cu
@@ -26,6 +26,7 @@ PD_REGISTER_KERNEL(conj,
                    ALL_LAYOUT,
                    phi::ConjKernel,
                    phi::dtype::float16,
+                   phi::dtype::bfloat16,
                    phi::dtype::complex<float>,
                    phi::dtype::complex<double>,
                    float,
diff --git a/paddle/phi/kernels/gpu/temporal_shift_grad_kernel.cu b/paddle/phi/kernels/gpu/temporal_shift_grad_kernel.cu
index cc5d95a12f7a3c..b50fad637d106e 100644
--- a/paddle/phi/kernels/gpu/temporal_shift_grad_kernel.cu
+++ b/paddle/phi/kernels/gpu/temporal_shift_grad_kernel.cu
@@ -146,4 +146,5 @@ PD_REGISTER_KERNEL(temporal_shift_grad,
                    phi::TemporalShiftGradKernel,
                    float,
                    double,
-                   phi::dtype::float16) {}
+                   phi::dtype::float16,
+                   phi::dtype::bfloat16) {}
diff --git a/paddle/phi/kernels/gpu/temporal_shift_kernel.cu b/paddle/phi/kernels/gpu/temporal_shift_kernel.cu
index b321fad07ac1fd..4904da296488f3 100644
--- a/paddle/phi/kernels/gpu/temporal_shift_kernel.cu
+++ b/paddle/phi/kernels/gpu/temporal_shift_kernel.cu
@@ -146,4 +146,5 @@ PD_REGISTER_KERNEL(temporal_shift,
                    phi::TemporalShiftKernel,
                    float,
                    double,
-                   phi::dtype::float16) {}
+                   phi::dtype::float16,
+                   phi::dtype::bfloat16) {}
diff --git a/python/paddle/fluid/tests/unittests/test_conj_op.py b/python/paddle/fluid/tests/unittests/test_conj_op.py
index adb6b051839411..e356743b179db5 100644
--- a/python/paddle/fluid/tests/unittests/test_conj_op.py
+++ b/python/paddle/fluid/tests/unittests/test_conj_op.py
@@ -20,9 +20,10 @@
 import paddle
 
 sys.path.append("..")
-from eager_op_test import OpTest
+from eager_op_test import OpTest, convert_float_to_uint16
 from numpy.random import random as rand
 
+import paddle.fluid.core as core
 import paddle.fluid.dygraph as dg
 import paddle.static as static
 
@@ -147,5 +148,43 @@ def testfp16(self):
                 out = exe.run(feed={'x': input_x}, fetch_list=[out])
 
 
+class TestConjFP16OP(TestConjOp):
+    def init_dtype_type(self):
+        self.dtype = np.float16
+
+
+@unittest.skipIf(
+    not core.is_compiled_with_cuda()
+    or not core.is_bfloat16_supported(core.CUDAPlace(0)),
+    "core is not complied with CUDA and not support the bfloat16",
+)
+class TestConjBF16(OpTest):
+    def setUp(self):
+        self.op_type = "conj"
+        self.python_api = paddle.tensor.conj
+        self.init_dtype_type()
+        self.init_input_output()
+
+    def init_dtype_type(self):
+        self.dtype = np.uint16
+
+    def init_input_output(self):
+        x = (
+            np.random.random((12, 14)) + 1j * np.random.random((12, 14))
+        ).astype(np.float32)
+        out = np.conj(x)
+
+        self.inputs = {'X': convert_float_to_uint16(x)}
+        self.outputs = {'Out': convert_float_to_uint16(out)}
+
+    def test_check_output(self):
+        place = core.CUDAPlace(0)
+        self.check_output_with_place(place)
+
+    def test_check_grad(self):
+        place = core.CUDAPlace(0)
+        self.check_grad_with_place(place, ['X'], 'Out')
+
+
 if __name__ == "__main__":
     unittest.main()
diff --git a/python/paddle/fluid/tests/unittests/test_temporal_shift_op.py b/python/paddle/fluid/tests/unittests/test_temporal_shift_op.py
index 6b99e0ead08867..64f3ac0169f2c6 100644
--- a/python/paddle/fluid/tests/unittests/test_temporal_shift_op.py
+++ b/python/paddle/fluid/tests/unittests/test_temporal_shift_op.py
@@ -15,7 +15,7 @@
 import unittest
 
 import numpy as np
-from op_test import OpTest
+from op_test import OpTest, convert_float_to_uint16
 
 import paddle
 from paddle.fluid import core
@@ -44,6 +44,7 @@ def temporal_shift(x, seg_num, shift_ratio, data_format):
 class TestTemporalShift(OpTest):
     def setUp(self):
         self.initTestCase()
+        self.init_dtype()
         self.op_type = 'temporal_shift'
         self.python_api = paddle.nn.functional.temporal_shift
         x = np.random.random(self.x_shape).astype(self.dtype)
@@ -64,6 +65,9 @@ def setUp(self):
         self.outputs = {"Out": output}
         self.python_out_sig = ["Out"]
 
+    def init_dtype(self):
+        self.dtype = 'float64'
+
     def test_check_output(self):
         self.check_output(check_eager=True)
 
@@ -74,7 +78,6 @@ def initTestCase(self):
         self.x_shape = (6, 4, 4, 4)
         self.seg_num = 3
         self.shift_ratio = 0.25
-        self.dtype = 'float64'
         self.data_format = 'NCHW'
 
 
@@ -174,6 +177,56 @@ def attr_data_format():
         self.assertRaises(ValueError, attr_data_format)
 
 
+class TestTemporalShiftFP16OP(TestTemporalShift):
+    def init_dtype(self):
+        self.dtype = np.float16
+
+
+@unittest.skipIf(
+    not core.is_compiled_with_cuda()
+    or not core.is_bfloat16_supported(core.CUDAPlace(0)),
+    "core is not complied with CUDA and not support the bfloat16",
+)
+class TestTemporalShiftBF16(OpTest):
+    def initTestCase(self):
+        self.x_shape = (3, 10, 5, 5)
+        self.seg_num = 1
+        self.shift_ratio = 0.3
+        self.dtype = np.uint16
+        self.data_format = 'NCHW'
+
+    def setUp(self):
+        self.initTestCase()
+        self.op_type = 'temporal_shift'
+        self.python_api = paddle.nn.functional.temporal_shift
+
+        x = np.random.random(self.x_shape).astype(np.float32)
+
+        self.attrs = {
+            "seg_num": self.seg_num,
+            "shift_ratio": self.shift_ratio,
+            "data_format": self.data_format,
+        }
+
+        self.inputs = {
+            "X": convert_float_to_uint16(x),
+        }
+
+        output = temporal_shift(
+            x, self.seg_num, self.shift_ratio, self.data_format
+        )
+        self.outputs = {"Out": convert_float_to_uint16(output)}
+        self.python_out_sig = ["Out"]
+
+    def test_check_output(self):
+        place = core.CUDAPlace(0)
+        self.check_output_with_place(place)
+
+    def test_check_grad_ignore_uv(self):
+        place = core.CUDAPlace(0)
+        self.check_grad_with_place(place, ['X'], 'Out')
+
+
 if __name__ == "__main__":
     paddle.enable_static()
     unittest.main()