diff --git a/python/paddle/distributed/fleet/meta_optimizers/raw_program_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/raw_program_optimizer.py index 7626c36b3a6334..7d899cff418710 100755 --- a/python/paddle/distributed/fleet/meta_optimizers/raw_program_optimizer.py +++ b/python/paddle/distributed/fleet/meta_optimizers/raw_program_optimizer.py @@ -165,7 +165,7 @@ def _insert_allreduce_ops_for_gm(self, gm_block): block = self.main_program.global_block() first_optimize_op_idx = None - for i, op in reversed(enumerate(list(gm_block.ops))): + for i, op in reversed(list(enumerate(gm_block.ops))): if is_backward_op(op) and first_optimize_op_idx is None: first_optimize_op_idx = i + 1 break diff --git a/python/paddle/fluid/tests/unittests/CMakeLists.txt b/python/paddle/fluid/tests/unittests/CMakeLists.txt index 44fd4c04e20955..6da4f6f64e4d4d 100644 --- a/python/paddle/fluid/tests/unittests/CMakeLists.txt +++ b/python/paddle/fluid/tests/unittests/CMakeLists.txt @@ -778,7 +778,7 @@ endif() if (WITH_DISTRIBUTE AND NOT APPLE) if(WITH_GPU OR WITH_ROCM) set_tests_properties(test_c_comm_init_op PROPERTIES TIMEOUT 120) - set_tests_properties(test_dist_mnist_gradient_merge PROPERTIES TIMEOUT 120) + set_tests_properties(test_dist_mnist_gradient_merge PROPERTIES TIMEOUT 160) endif() endif() diff --git a/python/paddle/fluid/tests/unittests/dist_mnist_gradient_merge_raw_optimizer.py b/python/paddle/fluid/tests/unittests/dist_mnist_gradient_merge_raw_optimizer.py index ce8b6dec3c1b00..733c4267db6134 100644 --- a/python/paddle/fluid/tests/unittests/dist_mnist_gradient_merge_raw_optimizer.py +++ b/python/paddle/fluid/tests/unittests/dist_mnist_gradient_merge_raw_optimizer.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import os import paddle import paddle.nn as nn import paddle.fluid as fluid @@ -44,9 +45,10 @@ def get_model(self, batch_size=2, single_device=False): strategy.build_strategy = build_strategy strategy.gradient_merge = True + avg = os.environ['enable_gm_avg'] == "True" strategy.gradient_merge_configs = { "k_steps": 2, - "avg": False, + "avg": avg, } strategy.without_graph_optimization = True @@ -65,9 +67,25 @@ def get_model(self, batch_size=2, single_device=False): optimizer, k_steps=strategy.gradient_merge_configs["k_steps"], avg=strategy.gradient_merge_configs["avg"]) + world_size = 1 else: optimizer = fleet.distributed_optimizer(optimizer) + world_size = fleet.world_size() optimizer.minimize(cost) + if world_size > 1: + assert paddle.static.default_main_program().num_blocks == 2 + gm_block = paddle.static.default_main_program().block(1) + start_allreduce_idx = None + for i, op in enumerate(gm_block.ops): + if op.type == "c_allreduce_sum": + start_allreduce_idx = i + break + # the magic number 1 below means skip the c_sync_calc_stream op + if avg: + assert start_allreduce_idx > 1 + else: + assert start_allreduce_idx == 1 + train_reader = paddle.batch( paddle.dataset.mnist.test(), batch_size=batch_size) test_reader = paddle.batch( diff --git a/python/paddle/fluid/tests/unittests/test_dist_mnist_gradient_merge.py b/python/paddle/fluid/tests/unittests/test_dist_mnist_gradient_merge.py index e10e4fd09e4a9a..8056ab86333809 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_mnist_gradient_merge.py +++ b/python/paddle/fluid/tests/unittests/test_dist_mnist_gradient_merge.py @@ -52,21 +52,35 @@ def test_dist_train(self): log_name=flag_name + "_no_fuse") -class TestDistMnistGradMergeRawOptimizer(TestDistBase): +class TestDistMnistGradMergeRawOptimizerBase(TestDistBase): def _setup_config(self): self._use_reader_alloc = False self._nccl2_mode = True self._use_fleet_api = True self._use_fleet_api_20 = True + def enable_avg(self): + return False + def test_dist_train(self): if fluid.core.is_compiled_with_cuda(): + avg = str(self.enable_avg()) + log_name = flag_name + "_raw_optimizer_gm_avg_" + avg self.check_with_place( "dist_mnist_gradient_merge_raw_optimizer.py", delta=1e-5, check_error_log=True, - log_name=flag_name + "_raw_optimizer", - need_envs={'FLAGS_apply_pass_to_program': '1'}) + log_name=log_name, + need_envs={ + 'FLAGS_apply_pass_to_program': '1', + 'enable_gm_avg': avg, + }) + + +class TestDistMnistGradMergeRawOptimizerAvg( + TestDistMnistGradMergeRawOptimizerBase): + def enable_avg(self): + return True if __name__ == "__main__":