Skip to content

Commit

Permalink
part-2 cherry from: [Distributed] Opt nccl connection by lazy initial…
Browse files Browse the repository at this point in the history
…ization (PaddlePaddle#55005)
  • Loading branch information
ForFishes authored and wentaoyu committed Nov 23, 2023
1 parent 48fe614 commit 17197e3
Showing 1 changed file with 13 additions and 0 deletions.
13 changes: 13 additions & 0 deletions python/paddle/distributed/fleet/base/topology.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,19 @@ def __init__(self, topology):
group=self._pp_comm_group,
)

# create comm group for pipe parallel
self._pp_group, self._pp_comm_group = self._set_comm_group("pipe")
# NOTE(shenliang03): In pipeline parallel, we use batch_isend_irecv.
# if batch_isend_irecv is the first collective operation, all ranks of
# the pipeline group must participate in this call. In order to avoid
# this situation, we perform a collective communication in advance and
# create a communicator.
paddle.distributed.all_reduce(
paddle.zeros([1], dtype="int32"),
op=paddle.distributed.ReduceOp.SUM,
group=self._pp_comm_group,
)

# create comm group for data parallel
self._dp_group, self._dp_comm_group = self._set_comm_group("data")

Expand Down

0 comments on commit 17197e3

Please sign in to comment.