Skip to content

Commit

Permalink
fix a100 cuda 12 ut (PaddlePaddle#54542)
Browse files Browse the repository at this point in the history
  • Loading branch information
FeixLiu committed Jun 12, 2023
1 parent 83e1a07 commit 8a2a55d
Show file tree
Hide file tree
Showing 4 changed files with 27 additions and 12 deletions.
16 changes: 12 additions & 4 deletions test/collective/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -200,8 +200,12 @@ if((WITH_GPU OR WITH_ROCM) AND (LINUX))
endif()
if((WITH_GPU OR WITH_ROCM) AND (LINUX))
py_test_modules(
test_collective_reduce_api MODULES test_collective_reduce_api ENVS
"http_proxy=;https_proxy=;PYTHONPATH=..:${PADDLE_BINARY_DIR}/python")
test_collective_reduce_api
MODULES
test_collective_reduce_api
ENVS
"NVIDIA_TF32_OVERRIDE=0;http_proxy=;https_proxy=;PYTHONPATH=..:${PADDLE_BINARY_DIR}/python"
)
set_tests_properties(test_collective_reduce_api
PROPERTIES TIMEOUT "500" LABELS "RUN_TYPE=DIST")
endif()
Expand Down Expand Up @@ -272,8 +276,12 @@ if((WITH_GPU OR WITH_ROCM) AND (LINUX))
endif()
if((WITH_GPU OR WITH_ROCM) AND (LINUX))
py_test_modules(
test_collective_split_col_linear MODULES test_collective_split_col_linear
ENVS "http_proxy=;https_proxy=;PYTHONPATH=..:${PADDLE_BINARY_DIR}/python")
test_collective_split_col_linear
MODULES
test_collective_split_col_linear
ENVS
"NVIDIA_TF32_OVERRIDE=0;http_proxy=;https_proxy=;PYTHONPATH=..:${PADDLE_BINARY_DIR}/python"
)
set_tests_properties(test_collective_split_col_linear
PROPERTIES TIMEOUT "300" LABELS "RUN_TYPE=DIST")
endif()
Expand Down
19 changes: 13 additions & 6 deletions test/collective/fleet/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -332,7 +332,7 @@ if(LOCAL_ALL_ARCH AND LOCAL_ALL_PLAT)
LABELS
"RUN_TYPE=DIST"
ENVS
"PADDLE_DIST_UT_PORT=21234;http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python"
"NVIDIA_TF32_OVERRIDE=0;PADDLE_DIST_UT_PORT=21234;http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python"
)
set_tests_properties(test_hybrid_parallel_inference_helper PROPERTIES TIMEOUT
"120")
Expand All @@ -351,8 +351,12 @@ if((WITH_GPU OR WITH_ROCM) AND (LINUX OR WIN32))
endif()
if(LOCAL_ALL_ARCH AND LOCAL_ALL_PLAT)
py_test_modules(
test_recv_save_op MODULES test_recv_save_op ENVS
"http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python")
test_recv_save_op
MODULES
test_recv_save_op
ENVS
"NVIDIA_TF32_OVERRIDE=0;http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python"
)
endif()
if(LOCAL_ALL_ARCH AND LOCAL_ALL_PLAT)
py_test_modules(
Expand Down Expand Up @@ -696,7 +700,7 @@ if((WITH_GPU OR WITH_ROCM) AND LOCAL_ALL_PLAT)
LABELS
"RUN_TYPE=DIST"
ENVS
"PADDLE_DIST_UT_PORT=21274;http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python"
"NVIDIA_TF32_OVERRIDE=0;PADDLE_DIST_UT_PORT=21274;http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python"
)
set_tests_properties(test_parallel_dygraph_mnist PROPERTIES TIMEOUT "200")
endif()
Expand Down Expand Up @@ -922,9 +926,12 @@ if((WITH_GPU) AND (LINUX))
endif()
if((WITH_GPU) AND (LINUX))
py_test_modules(
test_dygraph_save_for_auto_infer MODULES test_dygraph_save_for_auto_infer
test_dygraph_save_for_auto_infer
MODULES
test_dygraph_save_for_auto_infer
ENVS
"http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python")
"NVIDIA_TF32_OVERRIDE=0;http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python"
)
set_tests_properties(test_dygraph_save_for_auto_infer
PROPERTIES TIMEOUT "300" LABELS "RUN_TYPE=DIST")
endif()
2 changes: 1 addition & 1 deletion test/collective/fleet/hybrid_parallel_mp_layers.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,7 @@ def test_row_parallel_layer(self):
optimizer_b.step()

np.testing.assert_allclose(
loss_a.numpy(), loss_b.numpy(), rtol=5e-6
loss_a.numpy(), loss_b.numpy(), rtol=5e-5
)

def test_parallel_embedding(self):
Expand Down
2 changes: 1 addition & 1 deletion test/distributed_passes/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ if(NOT ((WITH_GPU) AND (CUDA_VERSION GREATER_EQUAL 11.6)))
endif()

foreach(TEST_OP ${TEST_OPS})
py_test_modules(${TEST_OP} MODULES ${TEST_OP})
py_test_modules(${TEST_OP} MODULES ${TEST_OP} ENVS "NVIDIA_TF32_OVERRIDE=0")
list(APPEND DIST_TEST_OPS ${TEST_OP})
set_tests_properties(${TEST_OP} PROPERTIES TIMEOUT 200)
set_tests_properties(${TEST_OP} PROPERTIES LABELS "RUN_TYPE=DIST")
Expand Down

0 comments on commit 8a2a55d

Please sign in to comment.