Skip to content

Commit

Permalink
[BugFix] fix bugs in DCU unit tests (#54874)
Browse files Browse the repository at this point in the history
* block bf16 tests on ROCM

* block more bf16 tests on ROCM

* some unittest cases doesn't have kernels on ROCm

* some unittest cases doesn't have kernels on ROCm

* fix code style
  • Loading branch information
lishicheng1996 authored Jun 27, 2023
1 parent 0cdaafe commit abc1c3d
Show file tree
Hide file tree
Showing 11 changed files with 72 additions and 7 deletions.
3 changes: 2 additions & 1 deletion test/legacy_test/test_assign_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,8 @@ def test_backward(self):


@unittest.skipIf(
not paddle.is_compiled_with_cuda(), "BFP16 test runs only on GPU"
not paddle.is_compiled_with_cuda() or paddle.is_compiled_with_rocm(),
"BFP16 test runs only on CUDA",
)
class TestAssignBFP16Op(eager_op_test.OpTest):
def setUp(self):
Expand Down
8 changes: 8 additions & 0 deletions test/legacy_test/test_cast_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,10 @@ def test_grad(self):
self.check_grad(['X'], ['Out'], check_prim=True, only_check_prim=True)


@unittest.skipIf(
not paddle.is_compiled_with_cuda() or paddle.is_compiled_with_rocm(),
"BFP16 test runs only on CUDA",
)
class TestCastOpBf16ToFp32(OpTest):
def setUp(self):
ipt = np.array(np.random.randint(10, size=[10, 10])).astype('uint16')
Expand All @@ -120,6 +124,10 @@ def test_grad(self):
self.check_grad(['X'], ['Out'], check_prim=True, only_check_prim=True)


@unittest.skipIf(
not paddle.is_compiled_with_cuda() or paddle.is_compiled_with_rocm(),
"BFP16 test runs only on CUDA",
)
class TestCastOpFp32ToBf16(OpTest):
def setUp(self):
ipt = np.random.random(size=[10, 10]).astype('float32')
Expand Down
4 changes: 4 additions & 0 deletions test/legacy_test/test_elementwise_mul_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,10 @@ def init_input_output(self):
self.out = np.multiply(self.x, self.y)


@unittest.skipIf(
not paddle.is_compiled_with_cuda() or paddle.is_compiled_with_rocm(),
"BFP16 test runs only on CUDA",
)
class TestBF16ElementwiseMulOp(OpTest):
def setUp(self):
self.op_type = "elementwise_mul"
Expand Down
4 changes: 4 additions & 0 deletions test/legacy_test/test_elementwise_pow_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,10 @@ def test_check_grad(self):
)


@unittest.skipIf(
not paddle.is_compiled_with_cuda() or paddle.is_compiled_with_rocm(),
"BFP16 test runs only on CUDA",
)
class TestElementwisePowBF16Op(OpTest):
def setUp(self):
self.op_type = "elementwise_pow"
Expand Down
3 changes: 2 additions & 1 deletion test/legacy_test/test_fill_any_like_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,8 @@ def if_enable_cinn(self):


@unittest.skipIf(
not core.is_compiled_with_cuda(), "core is not compiled with CUDA"
not core.is_compiled_with_cuda() or paddle.is_compiled_with_rocm(),
"core is not compiled with CUDA",
)
class TestFillAnyLikeOpBfloat16(OpTest):
def setUp(self):
Expand Down
33 changes: 31 additions & 2 deletions test/legacy_test/test_layer_norm_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,10 @@ def layer_norm_wrapper(
)


@unittest.skipIf(
paddle.is_compiled_with_rocm(),
"ROCm doesn't support fp64 LayerNormOpByOp currently",
)
class TestLayerNormOpByOpTest(OpTest):
def setUp(self):
self.python_api = layer_norm_wrapper
Expand Down Expand Up @@ -164,7 +168,7 @@ def initConfig(self):
self.cinn_rtol = 1e-5

self.max_relative_error = 1e-5

# ROCm does not have float64 LayerNorm kernel
self.dtype = "float64"
self.x_shape = [2, 6, 6, 3]
self.epsilon = 0.00001
Expand Down Expand Up @@ -218,6 +222,7 @@ def initTestCase(self):

@unittest.skipIf(
not core.is_compiled_with_cuda()
or paddle.is_compiled_with_rocm()
or not core.is_bfloat16_supported(core.CUDAPlace(0)),
"core is not compiled with CUDA or not support the bfloat16",
)
Expand Down Expand Up @@ -306,6 +311,10 @@ def initTestCase(self):
}


@unittest.skipIf(
paddle.is_compiled_with_rocm(),
"ROCm doesn't support fp64 LayerNormOpByOp currently",
)
class TestLayerNormOpByOpTestFP64_case2(TestLayerNormOpByOpTest):
def initConfig(self):
self.rev_comp_atol = 1e-6
Expand All @@ -328,6 +337,10 @@ def initConfig(self):
self.has_bias = False


@unittest.skipIf(
paddle.is_compiled_with_rocm(),
"ROCm doesn't support bf16 LayerNormOpByOp currently",
)
class TestLayerNormBF16OpByOpTest_case2(TestLayerNormBF16OpByOpTest):
def initConfig(self):
self.ori_atol = 1e-2
Expand All @@ -343,6 +356,10 @@ def initConfig(self):
self.has_bias = False


@unittest.skipIf(
paddle.is_compiled_with_rocm(),
"ROCm doesn't support fp64 LayerNormOpByOp currently",
)
class TestLayerNormOpByOpTestFP64_case3(TestLayerNormOpByOpTest):
def initConfig(self):
self.rev_comp_atol = 1e-7
Expand All @@ -365,6 +382,10 @@ def initConfig(self):
self.has_bias = False


@unittest.skipIf(
paddle.is_compiled_with_rocm(),
"ROCm doesn't support bf16 LayerNormOpByOp currently",
)
class TestLayerNormBF16OpByOpTest_case3(TestLayerNormBF16OpByOpTest):
def initConfig(self):
self.ori_atol = 1e-2
Expand All @@ -380,6 +401,10 @@ def initConfig(self):
self.has_bias = False


@unittest.skipIf(
paddle.is_compiled_with_rocm(),
"ROCm doesn't support fp64 LayerNormOpByOp currently",
)
class TestLayerNormOpByOpTestFP64_case4(TestLayerNormOpByOpTest):
def initConfig(self):
self.rev_comp_atol = 1e-6
Expand Down Expand Up @@ -801,6 +826,10 @@ def assert_equal(x, y):
assert_equal(b_g_np_1, b_g_np_2)


@unittest.skipIf(
not core.is_compiled_with_cuda() or paddle.is_compiled_with_rocm(),
"BF16 is only supported on CUDA.",
)
class TestBF16ScaleBiasLayerNorm(unittest.TestCase):
def check_main(self, x_np, weight_np, bias_np, dtype):
paddle.disable_static()
Expand Down Expand Up @@ -934,7 +963,7 @@ def check_with_dtype(self, dtype):
)

def test_main(self):
if not paddle.is_compiled_with_cuda():
if not paddle.is_compiled_with_cuda() or paddle.is_compiled_with_rocm():
return
self.check_with_dtype(dtype="float32")
self.check_with_dtype(dtype="bfloat16")
Expand Down
1 change: 1 addition & 0 deletions test/legacy_test/test_matmul_v2_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -405,6 +405,7 @@ def test_check_grad(self):
def create_test_bf16_class(parent, atol=0.01):
@unittest.skipIf(
not core.is_compiled_with_cuda()
or paddle.is_compiled_with_rocm()
or not core.is_bfloat16_supported(core.CUDAPlace(0)),
"core is not compiled with CUDA and not support the bfloat16",
)
Expand Down
11 changes: 10 additions & 1 deletion test/legacy_test/test_reduce_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,8 @@ def test_check_grad(self):

def create_test_bf16_class(parent):
@unittest.skipIf(
not core.is_compiled_with_cuda(), "core is not compiled with CUDA"
not core.is_compiled_with_cuda() or paddle.is_compiled_with_rocm(),
"core is not compiled with CUDA",
)
class TestSumOpBf16(parent):
def setUp(self):
Expand Down Expand Up @@ -349,6 +350,7 @@ def init_dtype(self):

@unittest.skipIf(
not core.is_compiled_with_cuda()
or paddle.is_compiled_with_rocm()
or not core.is_bfloat16_supported(core.CUDAPlace(0)),
"core is not compiled with CUDA or not support the bfloat16",
)
Expand Down Expand Up @@ -449,6 +451,9 @@ def test_check_output(self):
reason="reduce_min is discontinuous non-derivable function,"
" its gradient check is not supported by unittest framework."
)
@unittest.skipIf(
paddle.is_compiled_with_rocm(), "ROCm doesn't have FP16 reduce_min kernel"
)
class TestMinFP16Op(OpTest):
"""Remove Min with subgradient from gradient check to confirm the success of CI."""

Expand Down Expand Up @@ -479,6 +484,7 @@ def test_check_output(self):

@unittest.skipIf(
not core.is_compiled_with_cuda()
or paddle.is_compiled_with_rocm()
or not core.is_bfloat16_supported(core.CUDAPlace(0)),
"core is not compiled with CUDA or not support the bfloat16",
)
Expand Down Expand Up @@ -541,6 +547,7 @@ def test_check_grad(self):

@unittest.skipIf(
not core.is_compiled_with_cuda()
or paddle.is_compiled_with_rocm()
or not core.is_bfloat16_supported(core.CUDAPlace(0)),
"core is not compiled with CUDA or not support the bfloat16",
)
Expand Down Expand Up @@ -648,6 +655,7 @@ def test_check_grad(self):

@unittest.skipIf(
not core.is_compiled_with_cuda()
or paddle.is_compiled_with_rocm()
or not core.is_bfloat16_supported(core.CUDAPlace(0)),
"core is not compiled with CUDA or not support the bfloat16",
)
Expand Down Expand Up @@ -721,6 +729,7 @@ def test_check_grad(self):

@unittest.skipIf(
not core.is_compiled_with_cuda()
or paddle.is_compiled_with_rocm()
or not core.is_bfloat16_supported(core.CUDAPlace(0)),
"core is not compiled with CUDA or not support the bfloat16",
)
Expand Down
4 changes: 4 additions & 0 deletions test/legacy_test/test_reshape_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,10 @@ def init_data(self):
self.infered_shape = ()


@unittest.skipIf(
not paddle.is_compiled_with_cuda() or paddle.is_compiled_with_rocm(),
"BFP16 test runs only on CUDA",
)
class TestReshapeBF16Op(OpTest):
def setUp(self):
self.init_data()
Expand Down
3 changes: 2 additions & 1 deletion test/legacy_test/test_scale_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,8 @@ def test_check_grad(self):


@unittest.skipIf(
not core.is_compiled_with_rocm(), "core is not compiled with CUDA"
not paddle.is_compiled_with_cuda() or paddle.is_compiled_with_rocm(),
"BFP16 test runs only on CUDA",
)
class TestScaleBF16Op(OpTest):
def setUp(self):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,10 @@ def _setup_config(self):
def test_dist_static_model_parallel_fused_multi_transformer(self):
from paddle import fluid

if fluid.core.is_compiled_with_cuda():
if (
fluid.core.is_compiled_with_cuda()
and not paddle.is_compiled_with_rocm()
):
self.check_with_place(
"static_model_parallel_fused_multi_transformer.py",
delta=1e-5,
Expand Down

0 comments on commit abc1c3d

Please sign in to comment.