PaddlePaddle · FeixLiu · Jun 12, 2023 · Jun 12, 2023 · Jun 12, 2023
diff --git a/test/collective/CMakeLists.txt b/test/collective/CMakeLists.txt
@@ -200,8 +200,12 @@ if((WITH_GPU OR WITH_ROCM) AND (LINUX))
 endif()
 if((WITH_GPU OR WITH_ROCM) AND (LINUX))
   py_test_modules(
-    test_collective_reduce_api MODULES test_collective_reduce_api ENVS
-    "http_proxy=;https_proxy=;PYTHONPATH=..:${PADDLE_BINARY_DIR}/python")
+    test_collective_reduce_api
+    MODULES
+    test_collective_reduce_api
+    ENVS
+    "NVIDIA_TF32_OVERRIDE=0;http_proxy=;https_proxy=;PYTHONPATH=..:${PADDLE_BINARY_DIR}/python"
+  )
   set_tests_properties(test_collective_reduce_api
                        PROPERTIES TIMEOUT "500" LABELS "RUN_TYPE=DIST")
 endif()
@@ -272,8 +276,12 @@ if((WITH_GPU OR WITH_ROCM) AND (LINUX))
 endif()
 if((WITH_GPU OR WITH_ROCM) AND (LINUX))
   py_test_modules(
-    test_collective_split_col_linear MODULES test_collective_split_col_linear
-    ENVS "http_proxy=;https_proxy=;PYTHONPATH=..:${PADDLE_BINARY_DIR}/python")
+    test_collective_split_col_linear
+    MODULES
+    test_collective_split_col_linear
+    ENVS
+    "NVIDIA_TF32_OVERRIDE=0;http_proxy=;https_proxy=;PYTHONPATH=..:${PADDLE_BINARY_DIR}/python"
+  )
   set_tests_properties(test_collective_split_col_linear
                        PROPERTIES TIMEOUT "300" LABELS "RUN_TYPE=DIST")
 endif()

diff --git a/test/collective/fleet/CMakeLists.txt b/test/collective/fleet/CMakeLists.txt
@@ -332,7 +332,7 @@ if(LOCAL_ALL_ARCH AND LOCAL_ALL_PLAT)
     LABELS
     "RUN_TYPE=DIST"
     ENVS
-    "PADDLE_DIST_UT_PORT=21234;http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python"
+    "NVIDIA_TF32_OVERRIDE=0;PADDLE_DIST_UT_PORT=21234;http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python"
   )
   set_tests_properties(test_hybrid_parallel_inference_helper PROPERTIES TIMEOUT
                                                                         "120")
@@ -351,8 +351,12 @@ if((WITH_GPU OR WITH_ROCM) AND (LINUX OR WIN32))
 endif()
 if(LOCAL_ALL_ARCH AND LOCAL_ALL_PLAT)
   py_test_modules(
-    test_recv_save_op MODULES test_recv_save_op ENVS
-    "http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python")
+    test_recv_save_op
+    MODULES
+    test_recv_save_op
+    ENVS
+    "NVIDIA_TF32_OVERRIDE=0;http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python"
+  )
 endif()
 if(LOCAL_ALL_ARCH AND LOCAL_ALL_PLAT)
   py_test_modules(
@@ -696,7 +700,7 @@ if((WITH_GPU OR WITH_ROCM) AND LOCAL_ALL_PLAT)
     LABELS
     "RUN_TYPE=DIST"
     ENVS
-    "PADDLE_DIST_UT_PORT=21274;http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python"
+    "NVIDIA_TF32_OVERRIDE=0;PADDLE_DIST_UT_PORT=21274;http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python"
   )
   set_tests_properties(test_parallel_dygraph_mnist PROPERTIES TIMEOUT "200")
 endif()
@@ -922,9 +926,12 @@ if((WITH_GPU) AND (LINUX))
 endif()
 if((WITH_GPU) AND (LINUX))
   py_test_modules(
-    test_dygraph_save_for_auto_infer MODULES test_dygraph_save_for_auto_infer
+    test_dygraph_save_for_auto_infer
+    MODULES
+    test_dygraph_save_for_auto_infer
     ENVS
-    "http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python")
+    "NVIDIA_TF32_OVERRIDE=0;http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python"
+  )
   set_tests_properties(test_dygraph_save_for_auto_infer
                        PROPERTIES TIMEOUT "300" LABELS "RUN_TYPE=DIST")
 endif()
diff --git a/test/collective/fleet/hybrid_parallel_mp_layers.py b/test/collective/fleet/hybrid_parallel_mp_layers.py
@@ -221,7 +221,7 @@ def test_row_parallel_layer(self):
             optimizer_b.step()
 
             np.testing.assert_allclose(
-                loss_a.numpy(), loss_b.numpy(), rtol=5e-6
+                loss_a.numpy(), loss_b.numpy(), rtol=5e-5
             )
 
     def test_parallel_embedding(self):

diff --git a/test/distributed_passes/CMakeLists.txt b/test/distributed_passes/CMakeLists.txt
@@ -27,7 +27,7 @@ if(NOT ((WITH_GPU) AND (CUDA_VERSION GREATER_EQUAL 11.6)))
 endif()
 
 foreach(TEST_OP ${TEST_OPS})
-  py_test_modules(${TEST_OP} MODULES ${TEST_OP})
+  py_test_modules(${TEST_OP} MODULES ${TEST_OP} ENVS "NVIDIA_TF32_OVERRIDE=0")
   list(APPEND DIST_TEST_OPS ${TEST_OP})
   set_tests_properties(${TEST_OP} PROPERTIES TIMEOUT 200)
   set_tests_properties(${TEST_OP} PROPERTIES LABELS "RUN_TYPE=DIST")