PaddlePaddle · LiYuRio · Jan 5, 2023 · Dec 29, 2022 · Dec 29, 2022 · Dec 29, 2022
@@ -2,14 +2,11 @@ cc_library(
   process_group
   SRCS process_group.cc
   DEPS dense_tensor)
-cc_library(
-  process_group_stream
-  SRCS process_group_stream.cc
-  DEPS dense_tensor)
+
 cc_library(
   eager_reducer
   SRCS reducer.cc
-  DEPS eager_api process_group process_group_stream phi_api string_helper)
+  DEPS eager_api process_group phi_api string_helper)
 
 if(WITH_DISTRIBUTE)
   cc_library(
@@ -23,7 +20,6 @@ if(WITH_NCCL OR WITH_RCCL)
     process_group_nccl
     SRCS process_group_nccl.cc nccl_tools.cc common.cc check.cc
     DEPS process_group
-         process_group_stream
          place
          enforce
          collective_helper

@@ -15,9 +15,9 @@
 #include "paddle/fluid/distributed/collective/check.h"
 
 #include "paddle/fluid/distributed/collective/nccl_tools.h"
-#include "paddle/fluid/platform/enforce.h"
 #include "paddle/fluid/platform/place.h"
 #include "paddle/phi/core/dense_tensor.h"
+#include "paddle/phi/core/enforce.h"
 #include "paddle/phi/core/errors.h"
 
 #ifdef PADDLE_WITH_HIP

@@ -14,13 +14,16 @@
 
 #include "paddle/fluid/distributed/collective/nccl_tools.h"
 
-#include "paddle/fluid/platform/enforce.h"
+#include <unordered_map>
+
+#include "paddle/phi/core/enforce.h"
+#include "paddle/phi/core/errors.h"
 
 namespace paddle {
 namespace distributed {
 
 ncclRedOp_t ToNCCLRedType(ReduceOp reduction) {
-  static const std::map<ReduceOp, ncclRedOp_t> red_type = {
+  static const std::unordered_map<ReduceOp, ncclRedOp_t> red_type = {
       {ReduceOp::MIN, ncclMin},
       {ReduceOp::MAX, ncclMax},
       {ReduceOp::SUM, ncclSum},
@@ -29,7 +32,7 @@ ncclRedOp_t ToNCCLRedType(ReduceOp reduction) {
   auto it = red_type.find(reduction);
   PADDLE_ENFORCE_EQ(it != red_type.end(),
                     true,
-                    platform::errors::InvalidArgument(
+                    phi::errors::InvalidArgument(
                         "Invalid nccl reduction. Must be ncclMin | ncclMax | "
                         "ncclProd | ncclSum"));
   return it->second;

@@ -14,20 +14,15 @@
 
 #pragma once
 
-#ifdef PADDLE_WITH_CUDA
-#include <cuda_runtime.h>
-#endif
-#ifdef PADDLE_WITH_HIP
-#include <hip/hip_runtime.h>
-#endif
-
 #include <string>
 
 #include "paddle/fluid/distributed/collective/types.h"
 
 #ifdef PADDLE_WITH_RCCL
+#include <hip/hip_runtime.h>
 #include "paddle/phi/backends/dynload/rccl.h"
 #else
+#include <cuda_runtime.h>
 #include "paddle/phi/backends/dynload/nccl.h"
 #endif
 

@@ -17,44 +17,20 @@
 namespace paddle {
 namespace distributed {
 
-ProcessGroup::Task::Task(int rank, CommType comm_type, bool sync_op)
-    : rank_(rank), comm_type_(comm_type), sync_op_(sync_op) {}
-
-ProcessGroup::Task::~Task() = default;
-
 bool ProcessGroup::Task::IsCompleted() {
   std::lock_guard<std::mutex> lock(mutex_);
   return is_completed_;
 }
 
-bool ProcessGroup::Task::Wait(std::chrono::milliseconds timeout) {
-  return false;
-}
-
-void ProcessGroup::Task::Synchronize() {}
-
-void ProcessGroup::Task::UpdateWaitChain(const phi::DeviceContext& ctx) {}
-
 ProcessGroup::ProcessGroup(int rank, int size, int gid)
     : rank_(rank), size_(size), gid_(gid) {
-  if (gid != IGNORE_ID) {
+  if (gid != kIgnoreId) {
     auto map = ProcessGroupMapFromGid::getInstance();
     map->insert(gid_, this);
   }
 }
 
 // TODO(sunyilun): methods below will be removed later
-ProcessGroup::Task::Task(int rank,
-                         const std::vector<phi::DenseTensor>& inputs,
-                         CommType comm_type)
-    : rank_(rank), comm_type_(comm_type) {}
-
-ProcessGroup::Task::Task(int rank,
-                         const std::vector<phi::DenseTensor>& inputs,
-                         CommType comm_type,
-                         bool sync_op)
-    : rank_(rank), comm_type_(comm_type), sync_op_(sync_op) {}
-
 ProcessGroupIdMap& ProcessGroupIdMap::GetInstance() {
   static ProcessGroupIdMap instance;
   return instance;