From e64a18dba1e5e01bd73a59a9bcade2994d9446cf Mon Sep 17 00:00:00 2001
From: Charles-hit <56987902+Charles-hit@users.noreply.github.com>
Date: Thu, 8 Jun 2023 15:14:34 +0800
Subject: [PATCH] [AMP Prim OP]support some prim ops for bf16 dtype part3
 (#54368)

* support some prim ops bf16 dtype

* fix cmake
---
 test/legacy_test/CMakeLists.txt               |  5 +++-
 test/legacy_test/test_assign_op.py            |  1 -
 test/legacy_test/test_erf_op.py               | 16 +++++-----
 test/legacy_test/test_fill_any_like_op.py     |  2 +-
 .../test_flatten_contiguous_range_op.py       | 29 ++++++++++++++++---
 test/legacy_test/test_index_select_op.py      | 14 +++++++--
 test/legacy_test/test_top_k_v2_op.py          | 26 ++++++++++++++---
 test/legacy_test/test_transpose_op.py         | 14 +++++++--
 8 files changed, 84 insertions(+), 23 deletions(-)

diff --git a/test/legacy_test/CMakeLists.txt b/test/legacy_test/CMakeLists.txt
index 67142d60a7266..20aedda9e82d5 100644
--- a/test/legacy_test/CMakeLists.txt
+++ b/test/legacy_test/CMakeLists.txt
@@ -1199,7 +1199,10 @@ set(TEST_CINN_OPS
     test_instance_norm_op
     test_cumsum_op
     test_pad_op
-    test_split_op)
+    test_split_op
+    test_erf_op
+    test_assign_op
+    test_flatten_contiguous_range_op)
 
 foreach(TEST_CINN_OPS ${TEST_CINN_OPS})
   if(WITH_CINN)
diff --git a/test/legacy_test/test_assign_op.py b/test/legacy_test/test_assign_op.py
index 22efd0ac66175..9069b11669d3e 100644
--- a/test/legacy_test/test_assign_op.py
+++ b/test/legacy_test/test_assign_op.py
@@ -80,7 +80,6 @@ def setUp(self):
         self.public_python_api = paddle.assign
         self.op_type = "assign"
         self.prim_op_type = "prim"
-        self.enable_cinn = False
         x = np.random.uniform(0, 1, [100, 10]).astype(np.float32)
         x = convert_float_to_uint16(x)
         self.inputs = {'X': x}
diff --git a/test/legacy_test/test_erf_op.py b/test/legacy_test/test_erf_op.py
index a124a6839ac55..b560859cd411d 100644
--- a/test/legacy_test/test_erf_op.py
+++ b/test/legacy_test/test_erf_op.py
@@ -57,15 +57,17 @@ def _test_case(self, place):
         np.testing.assert_allclose(y_ref, y_test, rtol=1e-05)
 
     def test_case(self):
-        self._test_case(fluid.CPUPlace())
-        if fluid.is_compiled_with_cuda():
-            self._test_case(fluid.CUDAPlace(0))
+        with paddle.fluid.framework._static_guard():
+            self._test_case(fluid.CPUPlace())
+            if fluid.is_compiled_with_cuda():
+                self._test_case(fluid.CUDAPlace(0))
 
     def test_name(self):
-        with fluid.program_guard(fluid.Program()):
-            x = paddle.static.data('x', [3, 4])
-            y = paddle.erf(x, name='erf')
-            self.assertTrue('erf' in y.name)
+        with paddle.fluid.framework._static_guard():
+            with fluid.program_guard(fluid.Program()):
+                x = paddle.static.data('x', [3, 4])
+                y = paddle.erf(x, name='erf')
+                self.assertTrue('erf' in y.name)
 
 
 class TestErfFP16OP(OpTest):
diff --git a/test/legacy_test/test_fill_any_like_op.py b/test/legacy_test/test_fill_any_like_op.py
index 754e1318788f0..36cf77195ccdb 100644
--- a/test/legacy_test/test_fill_any_like_op.py
+++ b/test/legacy_test/test_fill_any_like_op.py
@@ -88,7 +88,7 @@ def test_check_output(self):
         self.check_output_with_place(place, check_prim=True)
 
     def if_enable_cinn(self):
-        self.enable_cinn = False
+        pass
 
 
 class TestFillAnyLikeOpValue1(TestFillAnyLikeOp):
diff --git a/test/legacy_test/test_flatten_contiguous_range_op.py b/test/legacy_test/test_flatten_contiguous_range_op.py
index ea924ce6297e6..658f03979a9c4 100644
--- a/test/legacy_test/test_flatten_contiguous_range_op.py
+++ b/test/legacy_test/test_flatten_contiguous_range_op.py
@@ -30,7 +30,7 @@ def setUp(self):
         self.prim_op_type = "comp"
         self.start_axis = 0
         self.stop_axis = -1
-        self.skip_cinn()
+        self.if_enable_cinn()
         self.init_test_case()
         self.init_test_dtype()
         self.init_input_data()
@@ -40,8 +40,8 @@ def setUp(self):
             "XShape": np.random.random(self.in_shape).astype("float32"),
         }
 
-    def skip_cinn(self):
-        self.enable_cinn = True
+    def if_enable_cinn(self):
+        pass
 
     def test_check_output(self):
         if str(self.dtype) in {"float16", "uint16"}:
@@ -104,6 +104,9 @@ def init_test_dtype(self):
     "core is not complied with CUDA and not support the bfloat16",
 )
 class TestFlattenBF16Op(TestFlattenOp):
+    def if_enable_cinn(self):
+        pass
+
     def init_test_dtype(self):
         self.dtype = "uint16"
 
@@ -142,6 +145,9 @@ def init_test_dtype(self):
     "core is not complied with CUDA and not support the bfloat16",
 )
 class TestFlattenBF16Op_1(TestFlattenOp_1):
+    def if_enable_cinn(self):
+        pass
+
     def init_test_dtype(self):
         self.dtype = "uint16"
 
@@ -180,6 +186,9 @@ def init_test_dtype(self):
     "core is not complied with CUDA and not support the bfloat16",
 )
 class TestFlattenBF16Op_2(TestFlattenOp_2):
+    def if_enable_cinn(self):
+        pass
+
     def init_test_dtype(self):
         self.dtype = "uint16"
 
@@ -218,6 +227,9 @@ def init_test_dtype(self):
     "core is not complied with CUDA and not support the bfloat16",
 )
 class TestFlattenBF16Op_3(TestFlattenOp_3):
+    def if_enable_cinn(self):
+        pass
+
     def init_test_dtype(self):
         self.dtype = "uint16"
 
@@ -256,6 +268,9 @@ def init_test_dtype(self):
     "core is not complied with CUDA and not support the bfloat16",
 )
 class TestFlattenBF16Op_4(TestFlattenOp_4):
+    def if_enable_cinn(self):
+        pass
+
     def init_test_dtype(self):
         self.dtype = "uint16"
 
@@ -294,6 +309,9 @@ def init_test_dtype(self):
     "core is not complied with CUDA and not support the bfloat16",
 )
 class TestFlattenBF16Op_5(TestFlattenOp_5):
+    def if_enable_cinn(self):
+        pass
+
     def init_test_dtype(self):
         self.dtype = "uint16"
 
@@ -305,7 +323,7 @@ def init_test_case(self):
         self.stop_axis = -1
         self.new_shape = (1,)
 
-    def skip_cinn(self):
+    def if_enable_cinn(self):
         self.enable_cinn = False
 
     def init_attrs(self):
@@ -363,6 +381,9 @@ def init_test_dtype(self):
     "core is not complied with CUDA and not support the bfloat16",
 )
 class TestFlattenBF16OpSixDims(TestFlattenOpSixDims):
+    def if_enable_cinn(self):
+        pass
+
     def init_test_dtype(self):
         self.dtype = "uint16"
 
diff --git a/test/legacy_test/test_index_select_op.py b/test/legacy_test/test_index_select_op.py
index 40a01aef3f630..ceb152a465fc6 100644
--- a/test/legacy_test/test_index_select_op.py
+++ b/test/legacy_test/test_index_select_op.py
@@ -19,7 +19,7 @@
 
 import paddle
 from paddle import fluid
-from paddle.fluid import Program, program_guard
+from paddle.fluid import Program, core, program_guard
 
 np.random.seed(1024)
 
@@ -102,8 +102,11 @@ def init_dtype_type(self):
 class TestIndexSelectBF16Op(OpTest):
     def setUp(self):
         self.python_api = paddle.index_select
+        self.public_python_api = paddle.index_select
+        self.prim_op_type = "comp"
         self.op_type = "index_select"
         self.init_dtype_type()
+        self.if_skip_cinn()
         index_np = np.random.randint(
             low=0, high=self.x_shape[self.dim], size=self.index_size
         )
@@ -124,6 +127,9 @@ def setUp(self):
         out = np.reshape(out_list, self.out_shape)
         self.outputs = {'Out': convert_float_to_uint16(out)}
 
+    def if_skip_cinn(self):
+        self.enable_cinn = False
+
     def init_dtype_type(self):
         self.dim = 1
         self.x_type = np.uint16
@@ -132,10 +138,12 @@ def init_dtype_type(self):
         self.index_size = 100
 
     def test_check_output(self):
-        self.check_output()
+        place = core.CUDAPlace(0)
+        self.check_output_with_place(place)
 
     def test_check_grad_normal(self):
-        self.check_grad(['X'], 'Out')
+        place = core.CUDAPlace(0)
+        self.check_grad_with_place(place, ['X'], 'Out', check_prim=True)
 
 
 class TestIndexSelectAPI(unittest.TestCase):
diff --git a/test/legacy_test/test_top_k_v2_op.py b/test/legacy_test/test_top_k_v2_op.py
index 5612703968dad..872a52e7ccc83 100644
--- a/test/legacy_test/test_top_k_v2_op.py
+++ b/test/legacy_test/test_top_k_v2_op.py
@@ -15,7 +15,11 @@
 import unittest
 
 import numpy as np
-from eager_op_test import OpTest, convert_float_to_uint16
+from eager_op_test import (
+    OpTest,
+    convert_float_to_uint16,
+    convert_uint16_to_float,
+)
 
 import paddle
 from paddle.fluid import core
@@ -51,6 +55,7 @@ def setUp(self):
         self.dtype = np.float64
         self.input_data = np.random.rand(10, 20)
         self.init_args()
+        self.if_enable_cinn()
         self.inputs = {'X': self.input_data}
         self.attrs = {'k': self.k, 'axis': self.axis, 'largest': self.largest}
         output, indices = numpy_topk(
@@ -58,6 +63,9 @@ def setUp(self):
         )
         self.outputs = {'Out': output, 'Indices': indices}
 
+    def if_enable_cinn(self):
+        pass
+
     def test_check_output(self):
         self.check_output()
 
@@ -115,6 +123,7 @@ def setUp(self):
         self.dtype = np.float64
         self.input_data = np.random.rand(10, 10, 5)
         self.init_args()
+        self.if_enable_cinn()
         self.inputs = {'X': self.input_data}
         self.attrs = {'k': self.k, 'axis': self.axis, 'largest': self.largest}
         output, indices = numpy_topk(
@@ -137,6 +146,7 @@ def setUp(self):
         self.dtype = np.float64
         self.input_data = np.random.rand(10, 10, 5)
         self.init_args()
+        self.if_enable_cinn()
         self.inputs = {'X': self.input_data}
         self.attrs = {'k': self.k, 'axis': self.axis, 'largest': self.largest}
         output, indices = numpy_topk(
@@ -159,6 +169,7 @@ def setUp(self):
         self.dtype = np.float32
         self.input_data = np.random.rand(10, 10, 5)
         self.init_args()
+        self.if_enable_cinn()
         self.inputs = {'X': self.input_data}
         self.attrs = {'k': self.k, 'axis': self.axis, 'largest': self.largest}
         output, indices = numpy_topk(
@@ -181,6 +192,7 @@ def setUp(self):
         self.dtype = np.float16
         self.input_data = np.random.rand(10, 20, 10)
         self.init_args()
+        self.if_enable_cinn()
         self.inputs = {'X': self.input_data}
         self.attrs = {'k': self.k, 'axis': self.axis, 'largest': self.largest}
         output, indices = numpy_topk(
@@ -198,6 +210,7 @@ def setUp(self):
         self.prim_op_type = "prim"
         self.input_data = np.random.rand(10, 20).astype(self.dtype)
         self.init_args()
+        self.if_enable_cinn()
         self.inputs = {'X': self.input_data}
         self.attrs = {'k': self.k, 'axis': self.axis, 'largest': self.largest}
         output, indices = numpy_topk(
@@ -218,9 +231,11 @@ def setUp(self):
         self.public_python_api = paddle.topk
         self.dtype = np.uint16
         self.prim_op_type = "prim"
-        self.input_data = np.random.rand(10, 20).astype(np.float32)
+        self.input_data = np.random.random([10, 20]).astype(np.float32)
         self.init_args()
+        self.if_enable_cinn()
         self.inputs = {'X': convert_float_to_uint16(self.input_data)}
+        self.input_data = convert_uint16_to_float(self.inputs['X'])
         self.attrs = {'k': self.k, 'axis': self.axis, 'largest': self.largest}
         output, indices = numpy_topk(
             self.input_data, axis=self.axis, k=self.k, largest=self.largest
@@ -230,13 +245,16 @@ def setUp(self):
             'Indices': indices,
         }
 
+    def if_enable_cinn(self):
+        self.enable_cinn = False
+
     def test_check_output(self):
         place = core.CUDAPlace(0)
-        self.check_output_with_place(place, check_eager=True)
+        self.check_output_with_place(place)
 
     def test_check_grad(self):
         place = core.CUDAPlace(0)
-        self.check_grad_with_place(place, {'X'}, 'Out', check_eager=True)
+        self.check_grad_with_place(place, ['X'], 'Out', check_prim=True)
 
 
 class TestTopKAPI(unittest.TestCase):
diff --git a/test/legacy_test/test_transpose_op.py b/test/legacy_test/test_transpose_op.py
index 3865476f529b5..5bbc458799fbf 100644
--- a/test/legacy_test/test_transpose_op.py
+++ b/test/legacy_test/test_transpose_op.py
@@ -244,7 +244,7 @@ def setUp(self):
         self.python_api = paddle.transpose
         self.public_python_api = paddle.transpose
         self.prim_op_type = "prim"
-        self.enable_cinn = False
+        self.if_enable_cinn()
         x = np.random.random(self.shape).astype("float32")
         self.inputs = {'X': convert_float_to_uint16(x)}
         self.attrs = {
@@ -258,6 +258,9 @@ def setUp(self):
             'Out': self.inputs['X'].transpose(self.axis),
         }
 
+    def if_enable_cinn(self):
+        self.enable_cinn = False
+
     def initTestCase(self):
         fluid.core.set_autotune_range(0, 3)
         fluid.core.update_autotune_status()
@@ -283,7 +286,7 @@ def setUp(self):
         self.initTestCase()
         self.dtype = np.float16
         self.prim_op_type = "prim"
-        self.enable_cinn = False
+        self.if_enable_cinn()
         self.python_api = paddle.transpose
         self.public_python_api = paddle.transpose
         x = np.random.random(self.shape).astype(self.dtype)
@@ -298,6 +301,9 @@ def setUp(self):
             'Out': self.inputs['X'].transpose(self.axis),
         }
 
+    def if_enable_cinn(self):
+        pass
+
     def init_op_type(self):
         self.op_type = "transpose2"
         self.use_mkldnn = False
@@ -323,6 +329,7 @@ def setUp(self):
         self.python_api = paddle.transpose
         self.public_python_api = paddle.transpose
         x = np.random.random(self.shape).astype("float32")
+        self.if_enable_cinn()
 
         self.inputs = {'X': convert_float_to_uint16(x)}
         self.attrs = {
@@ -336,6 +343,9 @@ def setUp(self):
             'Out': self.inputs['X'].transpose(self.axis),
         }
 
+    def if_enable_cinn(self):
+        self.enable_cinn = False
+
     def init_op_type(self):
         self.op_type = "transpose2"
         self.use_mkldnn = False