From 9ba08b10c85a86cf2c1bae1d0447c0f066f00084 Mon Sep 17 00:00:00 2001
From: zyfncg <zhangyunfei07@baidu.com>
Date: Tue, 16 Nov 2021 11:28:38 +0000
Subject: [PATCH 1/9] rename TensorBase interface data_type() to dtype()

---
 paddle/pten/api/lib/tensor.cc               |  4 ++--
 paddle/pten/api/lib/utils/tensor_utils.cc   |  2 +-
 paddle/pten/core/compat_utils.h             |  2 +-
 paddle/pten/core/dense_tensor.cc            | 16 +++++++---------
 paddle/pten/core/dense_tensor.h             |  2 +-
 paddle/pten/core/tensor_base.h              |  2 +-
 paddle/pten/kernels/cpu/utils.cc            |  4 ++--
 paddle/pten/kernels/cuda/math.cu            |  2 +-
 paddle/pten/kernels/cuda/utils.cu           |  4 ++--
 paddle/pten/kernels/xpu/utils.cc            |  4 ++--
 paddle/pten/tests/api/test_tensor_utils.cc  | 11 +++++------
 paddle/pten/tests/core/test_dense_tensor.cc |  2 +-
 12 files changed, 26 insertions(+), 29 deletions(-)
diff --git a/paddle/pten/api/lib/tensor.cc b/paddle/pten/api/lib/tensor.cc
index 5d10c7209e2b86..bb3fba885862b9 100644
--- a/paddle/pten/api/lib/tensor.cc
+++ b/paddle/pten/api/lib/tensor.cc
@@ -109,9 +109,9 @@ void Tensor::reshape(const std::vector<int64_t> &shape) {
       "and it will be implemented by calling the reshape kernel later."));
 }
 
-DataType Tensor::dtype() const { return impl_->data_type(); }
+DataType Tensor::dtype() const { return impl_->dtype(); }
 
-DataType Tensor::type() const { return impl_->data_type(); }
+DataType Tensor::type() const { return impl_->dtype(); }
 
 DataLayout Tensor::layout() const { return impl_->layout(); }
 
diff --git a/paddle/pten/api/lib/utils/tensor_utils.cc b/paddle/pten/api/lib/utils/tensor_utils.cc
index 2c362a11b11d9c..b02392e5763be0 100644
--- a/paddle/pten/api/lib/utils/tensor_utils.cc
+++ b/paddle/pten/api/lib/utils/tensor_utils.cc
@@ -125,7 +125,7 @@ void MovesStorage(pten::DenseTensor* src, paddle::framework::Tensor* dst) {
   auto storage = src->release();
   std::shared_ptr<paddle::memory::allocation::Allocation> holder(
       new TensorStorage(std::move(storage)));
-  dst->ResetHolderWithType(holder, pten::TransToProtoVarType(src->data_type()));
+  dst->ResetHolderWithType(holder, pten::TransToProtoVarType(src->dtype()));
 }
 
 void MovesStorage(pten::DenseTensor* src, paddle::framework::LoDTensor* dst) {
diff --git a/paddle/pten/core/compat_utils.h b/paddle/pten/core/compat_utils.h
index dc65e04b3ae734..a0602f33e3de22 100644
--- a/paddle/pten/core/compat_utils.h
+++ b/paddle/pten/core/compat_utils.h
@@ -74,7 +74,7 @@ class CompatibleDenseTensorUtils {
       ret.meta_.dims[0] = end_idx - begin_idx;
       ret.meta_.offset = tensor->meta_.offset +
                          begin_idx * (tensor->numel() / tensor->dims()[0]) *
-                             paddle::experimental::SizeOf(tensor->data_type());
+                             paddle::experimental::SizeOf(tensor->dtype());
     }
     return ret;
   }
diff --git a/paddle/pten/core/dense_tensor.cc b/paddle/pten/core/dense_tensor.cc
index fbc2a24312941e..bb38c53ada04e8 100644
--- a/paddle/pten/core/dense_tensor.cc
+++ b/paddle/pten/core/dense_tensor.cc
@@ -24,14 +24,12 @@ namespace pten {
 DenseTensor::DenseTensor(const std::shared_ptr<Allocator>& a,
                          const DenseTensorMeta& meta)
     : meta_(meta),
-      storage_(
-          make_intrusive<TensorStorage>(a, SizeOf(data_type()) * numel())) {}
+      storage_(make_intrusive<TensorStorage>(a, SizeOf(dtype()) * numel())) {}
 
 DenseTensor::DenseTensor(const std::shared_ptr<Allocator>& a,
                          DenseTensorMeta&& meta)
     : meta_(std::move(meta)),
-      storage_(
-          make_intrusive<TensorStorage>(a, SizeOf(data_type()) * numel())) {}
+      storage_(make_intrusive<TensorStorage>(a, SizeOf(dtype()) * numel())) {}
 
 DenseTensor::DenseTensor(intrusive_ptr<Storage> storage,
                          const DenseTensorMeta& meta)
@@ -60,7 +58,7 @@ void* DenseTensor::mutable_data(size_t request_bytes) {
       storage_,
       paddle::platform::errors::PreconditionNotMet(
           "The storage must be valid when call the mutable data function."));
-  size_t bytes = numel() * SizeOf(data_type());
+  size_t bytes = numel() * SizeOf(dtype());
   if (request_bytes) {
     PADDLE_ENFORCE_GE(request_bytes,
                       bytes,
@@ -87,19 +85,19 @@ T* DenseTensor::mutable_data() {
         paddle::experimental::CppTypeToDataType<T>::Type();
   }
   PADDLE_ENFORCE(
-      (data_type() == paddle::experimental::CppTypeToDataType<T>::Type()),
+      (dtype() == paddle::experimental::CppTypeToDataType<T>::Type()),
       paddle::platform::errors::InvalidArgument(
           "The type of data (%d) we are trying to retrieve does not match the "
           "type of data currently contained in the container (%d).",
           static_cast<int>(paddle::experimental::CppTypeToDataType<T>::Type()),
-          static_cast<int>(data_type())));
+          static_cast<int>(dtype())));
   return static_cast<T*>(mutable_data());
 }
 
 template <typename T>
 const T* DenseTensor::data() const {
   PADDLE_ENFORCE(
-      (data_type() == paddle::experimental::CppTypeToDataType<T>::Type()),
+      (dtype() == paddle::experimental::CppTypeToDataType<T>::Type()),
       paddle::platform::errors::InvalidArgument(
           "The type of data we are trying to retrieve does not match the "
           "type of data currently contained in the container."));
@@ -115,7 +113,7 @@ const void* DenseTensor::data() const {
 }
 
 void DenseTensor::check_memory_size() const {
-  size_t bytes = numel() * SizeOf(data_type());
+  size_t bytes = numel() * SizeOf(dtype());
   PADDLE_ENFORCE_GE(memory_size(),
                     bytes,
                     paddle::platform::errors::InvalidArgument(
diff --git a/paddle/pten/core/dense_tensor.h b/paddle/pten/core/dense_tensor.h
index b0a4195bc6cec3..8ece80f529161a 100644
--- a/paddle/pten/core/dense_tensor.h
+++ b/paddle/pten/core/dense_tensor.h
@@ -93,7 +93,7 @@ class DenseTensor : public TensorBase,
 
   /// \brief Returns the data type of the tensor.
   /// \return The data type of the tensor.
-  DataType data_type() const noexcept override { return meta_.type; }
+  DataType dtype() const noexcept override { return meta_.type; }
 
   /// \brief Returns the data layout of the tensor.
   /// \return The data layout of the tensor.
diff --git a/paddle/pten/core/tensor_base.h b/paddle/pten/core/tensor_base.h
index 79fd742aea10b6..528a52cee8da42 100644
--- a/paddle/pten/core/tensor_base.h
+++ b/paddle/pten/core/tensor_base.h
@@ -43,7 +43,7 @@ class TensorBase {
 
   /// \brief Returns the data type of the tensor.
   /// \return The data type of the tensor.
-  virtual DataType data_type() const = 0;
+  virtual DataType dtype() const = 0;
 
   /// \brief Returns the data layout of the tensor.
   /// \return The data layout of the tensor.
diff --git a/paddle/pten/kernels/cpu/utils.cc b/paddle/pten/kernels/cpu/utils.cc
index 3e0bfccb1ec726..e089eabb0e5334 100644
--- a/paddle/pten/kernels/cpu/utils.cc
+++ b/paddle/pten/kernels/cpu/utils.cc
@@ -38,8 +38,8 @@ void Copy(const CPUContext& dev_ctx, const DenseTensor& src, DenseTensor* dst) {
   VLOG(4) << "src:" << src_ptr << ", dst:" << dst_ptr;
   CHECK(dst->layout() == src.layout());
 
-  auto size = src.numel() * paddle::framework::SizeOfType(
-                                TransToProtoVarType(src.data_type()));
+  auto size = src.numel() *
+              paddle::framework::SizeOfType(TransToProtoVarType(src.dtype()));
 
   if (paddle::platform::is_cpu_place(src_place) &&
       paddle::platform::is_cpu_place(dst_place)) {
diff --git a/paddle/pten/kernels/cuda/math.cu b/paddle/pten/kernels/cuda/math.cu
index 73a743d58e6a97..6a64290d398377 100644
--- a/paddle/pten/kernels/cuda/math.cu
+++ b/paddle/pten/kernels/cuda/math.cu
@@ -81,7 +81,7 @@ void Mean(const CUDAContext& dev_ctx, const DenseTensor& x, DenseTensor* out) {
       dev_ctx.GetPlace());
   pten::DenseTensor tmp(
       alloc,
-      DenseTensorMeta(x.data_type(),
+      DenseTensorMeta(x.dtype(),
                       paddle::framework::make_ddim(
                           {static_cast<int64_t>(temp_storage_bytes)}),
                       x.layout()));
diff --git a/paddle/pten/kernels/cuda/utils.cu b/paddle/pten/kernels/cuda/utils.cu
index c3940b42ca46e0..04cf1413cba683 100644
--- a/paddle/pten/kernels/cuda/utils.cu
+++ b/paddle/pten/kernels/cuda/utils.cu
@@ -48,8 +48,8 @@ void Copy(const CUDAContext& dev_ctx,
   VLOG(4) << "src:" << src_ptr << ", dst:" << dst_ptr;
   CHECK(dst->layout() == src.layout());
 
-  auto size = src.numel() * paddle::framework::SizeOfType(
-                                TransToProtoVarType(src.data_type()));
+  auto size = src.numel() *
+              paddle::framework::SizeOfType(TransToProtoVarType(src.dtype()));
 
   if (paddle::platform::is_cuda_pinned_place(src_place) &&  // NOLINT
       paddle::platform::is_cuda_pinned_place(dst_place)) {
diff --git a/paddle/pten/kernels/xpu/utils.cc b/paddle/pten/kernels/xpu/utils.cc
index 33bdc66ff01f36..9bfe493f5ff9d0 100644
--- a/paddle/pten/kernels/xpu/utils.cc
+++ b/paddle/pten/kernels/xpu/utils.cc
@@ -38,8 +38,8 @@ void Copy(const XPUDeviceContext& dev_ctx,
           << dst_place;
   dst->Resize(src.dims());
   CHECK(dst->layout() == src.layout());
-  auto size = src.numel() * paddle::framework::SizeOfType(
-                                TransToProtoVarType(src.data_type()));
+  auto size = src.numel() *
+              paddle::framework::SizeOfType(TransToProtoVarType(src.dtype()));
 
   if (paddle::platform::is_xpu_place(src_place) &&  // NOLINT
       paddle::platform::is_cpu_place(dst_place)) {
diff --git a/paddle/pten/tests/api/test_tensor_utils.cc b/paddle/pten/tests/api/test_tensor_utils.cc
index fd52b96542c712..cebb0fc07ee636 100644
--- a/paddle/pten/tests/api/test_tensor_utils.cc
+++ b/paddle/pten/tests/api/test_tensor_utils.cc
@@ -47,8 +47,7 @@ TEST(tensor_utils, dense_tensor_to_lod_tensor) {
   CHECK(dense_tensor.lod().size() == lod_tensor.lod().size());
   CHECK(dense_tensor.lod()[0] ==
         static_cast<std::vector<size_t>>((lod_tensor.lod()[0])));
-  CHECK(dense_tensor.data_type() ==
-        pten::TransToPtenDataType(lod_tensor.type()));
+  CHECK(dense_tensor.dtype() == pten::TransToPtenDataType(lod_tensor.type()));
   CHECK(dense_tensor.layout() ==
         pten::TransToPtenDataLayout(lod_tensor.layout()));
   CHECK(platform::is_cpu_place(lod_tensor.place()));
@@ -58,7 +57,7 @@ TEST(tensor_utils, dense_tensor_to_lod_tensor) {
 
   auto dense_tensor_1 = MakePtenDenseTensor(lod_tensor);
   CHECK(dense_tensor_1->dims() == dims);
-  CHECK(dense_tensor_1->data_type() == dtype);
+  CHECK(dense_tensor_1->dtype() == dtype);
   CHECK(dense_tensor_1->layout() == layout);
   CHECK(dense_tensor_1->lod().size() == lod.size());
   CHECK(dense_tensor_1->lod()[0] == lod[0]);
@@ -83,7 +82,7 @@ TEST(tensor_utils, dense_tensor_to_tensor) {
   framework::Tensor tensor;
   MovesStorage(&dense_tensor, &tensor);
 
-  CHECK(dense_tensor.data_type() == pten::TransToPtenDataType(tensor.type()));
+  CHECK(dense_tensor.dtype() == pten::TransToPtenDataType(tensor.type()));
   CHECK(dense_tensor.layout() == pten::TransToPtenDataLayout(tensor.layout()));
   CHECK(platform::is_cpu_place(tensor.place()));
 
@@ -92,7 +91,7 @@ TEST(tensor_utils, dense_tensor_to_tensor) {
 
   auto dense_tensor_1 = MakePtenDenseTensor(tensor);
   CHECK(dense_tensor_1->dims() == dims);
-  CHECK(dense_tensor_1->data_type() == dtype);
+  CHECK(dense_tensor_1->dtype() == dtype);
   CHECK(dense_tensor_1->layout() == layout);
   const float* data_1 = dense_tensor_1->data<float>();
   CHECK(data_1[0] == 1.0f);
@@ -117,7 +116,7 @@ TEST(PtenUtils, VarToPtTensor) {
   // 2. test API
   auto tensor_x = MakePtenTensorBaseFromVar(v, tensor_def);
   // 3. check result
-  ASSERT_EQ(tensor_x->data_type(), pten::DataType::INT32);
+  ASSERT_EQ(tensor_x->dtype(), pten::DataType::INT32);
 }
 
 }  // namespace tests
diff --git a/paddle/pten/tests/core/test_dense_tensor.cc b/paddle/pten/tests/core/test_dense_tensor.cc
index dac2575713bfb4..69c5e9b12606b4 100644
--- a/paddle/pten/tests/core/test_dense_tensor.cc
+++ b/paddle/pten/tests/core/test_dense_tensor.cc
@@ -82,7 +82,7 @@ TEST(dense_tensor, ctor) {
     bool r{true};
     r = r && (t.numel() == product(m.dims));
     r = r && (t.dims() == m.dims);
-    r = r && (t.data_type() == m.type);
+    r = r && (t.dtype() == m.type);
     r = r && (t.layout() == m.layout);
     r = r && (t.place() == paddle::platform::CPUPlace());
     r = r && t.initialized();

From 3c1afc0598df2ca15dc6b40f0484a56c92ac97ba Mon Sep 17 00:00:00 2001
From: zyfncg <zhangyunfei07@baidu.com>
Date: Wed, 17 Nov 2021 05:12:57 +0000
Subject: [PATCH 2/9] rename type to dtype of TensorMeta

---
 paddle/pten/api/lib/utils/tensor_utils.cc     |  4 ++--
 paddle/pten/core/dense_tensor.cc              |  4 ++--
 paddle/pten/core/dense_tensor.h               |  2 +-
 paddle/pten/core/tensor_meta.h                | 24 +++++++++----------
 paddle/pten/infermeta/binary.cc               |  6 ++---
 paddle/pten/infermeta/unary.cc                |  8 +++----
 paddle/pten/tests/core/test_dense_tensor.cc   | 12 +++++-----
 paddle/pten/tests/kernels/test_dot_dev_api.cc |  2 +-
 .../tests/kernels/test_elementwise_dev_api.cc |  2 +-
 .../pten/tests/kernels/test_fill_dev_api.cc   |  2 +-
 .../tests/kernels/test_flatten_dev_api.cc     |  2 +-
 .../pten/tests/kernels/test_mean_dev_api.cc   |  2 +-
 .../tests/kernels/test_reshape_dev_api.cc     |  2 +-
 .../pten/tests/kernels/test_scale_dev_api.cc  |  4 ++--
 14 files changed, 38 insertions(+), 38 deletions(-)

diff --git a/paddle/pten/api/lib/utils/tensor_utils.cc b/paddle/pten/api/lib/utils/tensor_utils.cc
index b02392e5763be0..4936006d26f8aa 100644
--- a/paddle/pten/api/lib/utils/tensor_utils.cc
+++ b/paddle/pten/api/lib/utils/tensor_utils.cc
@@ -146,7 +146,7 @@ void ReMakePtenDenseTensor(const paddle::framework::Tensor& src,
   auto* meta = pten::CompatibleDenseTensorUtils::GetMutableMeta(dst);
   meta->dims = src.dims();
   // Since the type of DenseTensorMeta is const, const_cast must be used
-  const_cast<DataType&>(meta->type) = pten::TransToPtenDataType(src.type());
+  const_cast<DataType&>(meta->dtype) = pten::TransToPtenDataType(src.type());
   // Since the type of DenseTensorMeta is const, const_cast must be used
   const_cast<DataLayout&>(meta->layout) =
       pten::TransToPtenDataLayout(src.layout());
@@ -164,7 +164,7 @@ void ReMakePtenDenseTensor(const paddle::framework::LoDTensor& src,
   auto* meta = pten::CompatibleDenseTensorUtils::GetMutableMeta(dst);
   meta->dims = src.dims();
   // Since the type of DenseTensorMeta is const, const_cast must be used
-  const_cast<DataType&>(meta->type) = pten::TransToPtenDataType(src.type());
+  const_cast<DataType&>(meta->dtype) = pten::TransToPtenDataType(src.type());
   // Since the type of DenseTensorMeta is const, const_cast must be used
   const_cast<DataLayout&>(meta->layout) =
       pten::TransToPtenDataLayout(src.layout());
diff --git a/paddle/pten/core/dense_tensor.cc b/paddle/pten/core/dense_tensor.cc
index bb38c53ada04e8..701ccb509f15c9 100644
--- a/paddle/pten/core/dense_tensor.cc
+++ b/paddle/pten/core/dense_tensor.cc
@@ -80,8 +80,8 @@ T* DenseTensor::mutable_data() {
   // In order to be compatible with the original Tensor design and
   // execution system, we have to reset the datatype in mutable_data<T>.
   // When the compatibility phase is over in the future, we can delete it
-  if (meta_.type == DataType::UNDEFINED) {
-    const_cast<DataType&>(meta_.type) =
+  if (meta_.dtype == DataType::UNDEFINED) {
+    const_cast<DataType&>(meta_.dtype) =
         paddle::experimental::CppTypeToDataType<T>::Type();
   }
   PADDLE_ENFORCE(
diff --git a/paddle/pten/core/dense_tensor.h b/paddle/pten/core/dense_tensor.h
index 8ece80f529161a..6d938d8ab57904 100644
--- a/paddle/pten/core/dense_tensor.h
+++ b/paddle/pten/core/dense_tensor.h
@@ -93,7 +93,7 @@ class DenseTensor : public TensorBase,
 
   /// \brief Returns the data type of the tensor.
   /// \return The data type of the tensor.
-  DataType dtype() const noexcept override { return meta_.type; }
+  DataType dtype() const noexcept override { return meta_.dtype; }
 
   /// \brief Returns the data layout of the tensor.
   /// \return The data layout of the tensor.
diff --git a/paddle/pten/core/tensor_meta.h b/paddle/pten/core/tensor_meta.h
index eae270171d88e6..7cbc919dab985d 100644
--- a/paddle/pten/core/tensor_meta.h
+++ b/paddle/pten/core/tensor_meta.h
@@ -39,9 +39,9 @@ struct DenseTensorMeta {
   using DataLayout = paddle::experimental::DataLayout;
 
   DenseTensorMeta() = default;
-  DenseTensorMeta(DataType type, const DDim& dims);
-  DenseTensorMeta(DataType type, const DDim& dims, DataLayout layout);
-  DenseTensorMeta(DataType type,
+  DenseTensorMeta(DataType dtype, const DDim& dims);
+  DenseTensorMeta(DataType dtype, const DDim& dims, DataLayout layout);
+  DenseTensorMeta(DataType dtype,
                   const DDim& dims,
                   DataLayout layout,
                   const std::vector<std::vector<size_t>>& lod);
@@ -54,30 +54,30 @@ struct DenseTensorMeta {
   /// marked with `const` are expected to remain unchanged.
   const bool is_scalar{false};
   DDim dims;
-  const DataType type{DataType::UNDEFINED};
+  const DataType dtype{DataType::UNDEFINED};
   const DataLayout layout{DataLayout::NCHW};
   LoD lod;
   size_t offset{0};
 };
 
-inline DenseTensorMeta::DenseTensorMeta(DataType type, const DDim& dims)
-    : dims(dims), type(type) {}
+inline DenseTensorMeta::DenseTensorMeta(DataType dtype, const DDim& dims)
+    : dims(dims), dtype(dtype) {}
 
-inline DenseTensorMeta::DenseTensorMeta(DataType type,
+inline DenseTensorMeta::DenseTensorMeta(DataType dtype,
                                         const DDim& dims,
                                         DataLayout layout)
-    : dims(dims), type(type), layout(layout) {}
+    : dims(dims), dtype(dtype), layout(layout) {}
 
 inline DenseTensorMeta::DenseTensorMeta(
-    DataType type,
+    DataType dtype,
     const DDim& dims,
     DataLayout layout,
     const std::vector<std::vector<size_t>>& lod)
-    : dims(dims), type(type), layout(layout), lod(lod) {}
+    : dims(dims), dtype(dtype), layout(layout), lod(lod) {}
 
 inline bool DenseTensorMeta::valid() const noexcept {
   bool valid{true};
-  valid = valid && (type != DataType::UNDEFINED);
+  valid = valid && (dtype != DataType::UNDEFINED);
   valid = valid && (layout != DataLayout::UNDEFINED);
   valid = valid && (is_scalar || product(dims) >= 0);
   return valid;
@@ -86,7 +86,7 @@ inline bool DenseTensorMeta::valid() const noexcept {
 inline bool operator==(const DenseTensorMeta& lhs, const DenseTensorMeta& rhs) {
   bool ret = true;
   return ret && (lhs.is_scalar == rhs.is_scalar) && (lhs.dims == rhs.dims) &&
-         (lhs.type == rhs.type) && (lhs.layout == rhs.layout) &&
+         (lhs.dtype == rhs.dtype) && (lhs.layout == rhs.layout) &&
          (lhs.lod == rhs.lod) && (lhs.offset == rhs.offset);
 }
 
diff --git a/paddle/pten/infermeta/binary.cc b/paddle/pten/infermeta/binary.cc
index e124466a6d33af..838e450007fcd4 100644
--- a/paddle/pten/infermeta/binary.cc
+++ b/paddle/pten/infermeta/binary.cc
@@ -56,7 +56,7 @@ DenseTensorMeta DotInferShape(const DenseTensorMeta& x_meta,
                         y_dims.to_str()));
 
   x_dims[x_dims.size() - 1] = 1;
-  DenseTensorMeta return_meta(x_meta.type, x_dims, x_meta.layout);
+  DenseTensorMeta return_meta(x_meta.dtype, x_dims, x_meta.layout);
   return return_meta;
 }
 
@@ -127,13 +127,13 @@ DenseTensorMeta MatmulInferShape(const DenseTensorMeta& x_meta,
 
   auto ddim_out = paddle::framework::make_ddim(new_dims);
 
-  return {x_meta.type, ddim_out, x_meta.layout};
+  return {x_meta.dtype, ddim_out, x_meta.layout};
 }
 
 DenseTensorMeta ElementwiseInferShape(const DenseTensorMeta& x_meta,
                                       const DenseTensorMeta& y_meta,
                                       int axis) {
-  DenseTensorMeta return_meta(x_meta.type, x_meta.dims, x_meta.layout);
+  DenseTensorMeta return_meta(x_meta.dtype, x_meta.dims, x_meta.layout);
   if (x_meta.dims != y_meta.dims) {
     auto x_dims = x_meta.dims;
     auto y_dims = y_meta.dims;
diff --git a/paddle/pten/infermeta/unary.cc b/paddle/pten/infermeta/unary.cc
index 5099984886cce5..ea6e97db3460d9 100644
--- a/paddle/pten/infermeta/unary.cc
+++ b/paddle/pten/infermeta/unary.cc
@@ -23,7 +23,7 @@ DenseTensorMeta UnchangedInferShape(const DenseTensorMeta& x_meta) {
 
 DenseTensorMeta ReductionInferShape(const DenseTensorMeta& x_meta) {
   const auto& out_dims = paddle::framework::make_ddim({1});
-  DenseTensorMeta return_meta(x_meta.type, out_dims, x_meta.layout);
+  DenseTensorMeta return_meta(x_meta.dtype, out_dims, x_meta.layout);
   return return_meta;
 }
 
@@ -63,7 +63,7 @@ DenseTensorMeta FlattenInferShape(const DenseTensorMeta& x_meta,
     out_shape.push_back(x_dims[i]);
   }
   const auto& out_dims = paddle::framework::make_ddim(out_shape);
-  DenseTensorMeta return_meta(x_meta.type, out_dims, x_meta.layout);
+  DenseTensorMeta return_meta(x_meta.dtype, out_dims, x_meta.layout);
 
   if (x_dims[0] == return_meta.dims[0]) {
     // Only pass LoD when the first dimension of output and Input(X)
@@ -77,7 +77,7 @@ DenseTensorMeta FlattenInferShape(const DenseTensorMeta& x_meta,
 DenseTensorMeta FullLikeInferShape(const DenseTensorMeta& x_meta,
                                    DataType dtype,
                                    DataLayout layout) {
-  return {dtype == DataType::UNDEFINED ? x_meta.type : dtype,
+  return {dtype == DataType::UNDEFINED ? x_meta.dtype : dtype,
           x_meta.dims,
           layout == DataLayout::UNDEFINED ? x_meta.layout : layout};
 }
@@ -211,7 +211,7 @@ DenseTensorMeta InferShapeFromVecValue(const DenseTensorMeta& x_meta,
                         "But received 'shape' is empty."));
   auto x_dims = x_meta.dims;
   auto out_dims = ValidateShape(shape, x_dims);
-  DenseTensorMeta return_meta(x_meta.type, out_dims, x_meta.layout);
+  DenseTensorMeta return_meta(x_meta.dtype, out_dims, x_meta.layout);
   if (x_dims[0] == return_meta.dims[0]) {
     // Only pass LoD when the first dimension of output and Input(X)
     // are the same.
diff --git a/paddle/pten/tests/core/test_dense_tensor.cc b/paddle/pten/tests/core/test_dense_tensor.cc
index 69c5e9b12606b4..fae4a5415a3729 100644
--- a/paddle/pten/tests/core/test_dense_tensor.cc
+++ b/paddle/pten/tests/core/test_dense_tensor.cc
@@ -31,32 +31,32 @@ TEST(dense_tensor, meta) {
   CHECK(!meta_0.valid());
 
   DenseTensorMeta meta_1(dtype, dims);
-  CHECK(meta_1.type == dtype);
+  CHECK(meta_1.dtype == dtype);
   CHECK(meta_1.dims == dims);
   CHECK(meta_1.valid());
 
   DenseTensorMeta meta_2(dtype, dims, layout);
-  CHECK(meta_2.type == dtype);
+  CHECK(meta_2.dtype == dtype);
   CHECK(meta_2.dims == dims);
   CHECK(meta_2.layout == layout);
   CHECK(meta_2.valid());
 
   DenseTensorMeta meta_3(dtype, dims, layout, lod);
-  CHECK(meta_3.type == dtype);
+  CHECK(meta_3.dtype == dtype);
   CHECK(meta_3.dims == dims);
   CHECK(meta_3.layout == layout);
   CHECK(meta_3.lod == lod);
   CHECK(meta_3.valid());
 
   DenseTensorMeta meta_4(meta_3);
-  CHECK(meta_4.type == dtype);
+  CHECK(meta_4.dtype == dtype);
   CHECK(meta_4.dims == dims);
   CHECK(meta_4.layout == layout);
   CHECK(meta_4.lod == lod);
   CHECK(meta_4.valid());
 
   DenseTensorMeta meta_5(std::move(meta_4));
-  CHECK(meta_5.type == dtype);
+  CHECK(meta_5.dtype == dtype);
   CHECK(meta_5.dims == dims);
   CHECK(meta_5.layout == layout);
   CHECK(meta_5.lod == lod);
@@ -82,7 +82,7 @@ TEST(dense_tensor, ctor) {
     bool r{true};
     r = r && (t.numel() == product(m.dims));
     r = r && (t.dims() == m.dims);
-    r = r && (t.dtype() == m.type);
+    r = r && (t.dtype() == m.dtype);
     r = r && (t.layout() == m.layout);
     r = r && (t.place() == paddle::platform::CPUPlace());
     r = r && t.initialized();
diff --git a/paddle/pten/tests/kernels/test_dot_dev_api.cc b/paddle/pten/tests/kernels/test_dot_dev_api.cc
index 2276d49590a701..5485ef2843c2ca 100644
--- a/paddle/pten/tests/kernels/test_dot_dev_api.cc
+++ b/paddle/pten/tests/kernels/test_dot_dev_api.cc
@@ -62,7 +62,7 @@ TEST(DEV_API, dot) {
   // 3. check result
   ASSERT_EQ(out.dims().size(), 2);
   ASSERT_EQ(out.dims()[0], 3);
-  ASSERT_EQ(out.meta().type, pten::DataType::FLOAT32);
+  ASSERT_EQ(out.meta().dtype, pten::DataType::FLOAT32);
   ASSERT_EQ(out.meta().layout, pten::DataLayout::NCHW);
 
   auto expect_result = sum;
diff --git a/paddle/pten/tests/kernels/test_elementwise_dev_api.cc b/paddle/pten/tests/kernels/test_elementwise_dev_api.cc
index f6b93b731865c3..f2525ae800accd 100644
--- a/paddle/pten/tests/kernels/test_elementwise_dev_api.cc
+++ b/paddle/pten/tests/kernels/test_elementwise_dev_api.cc
@@ -65,7 +65,7 @@ TEST(DEV_API, elementwise_add) {
   // 3. check result
   ASSERT_EQ(dense_out.dims().size(), 2);
   ASSERT_EQ(dense_out.dims()[0], 3);
-  ASSERT_EQ(dense_out.meta().type, pten::DataType::FLOAT32);
+  ASSERT_EQ(dense_out.meta().dtype, pten::DataType::FLOAT32);
   ASSERT_EQ(dense_out.meta().layout, pten::DataLayout::NCHW);
 
   auto expect_result = sum;
diff --git a/paddle/pten/tests/kernels/test_fill_dev_api.cc b/paddle/pten/tests/kernels/test_fill_dev_api.cc
index 6e6af22f6de890..aa66877881b66c 100644
--- a/paddle/pten/tests/kernels/test_fill_dev_api.cc
+++ b/paddle/pten/tests/kernels/test_fill_dev_api.cc
@@ -50,7 +50,7 @@ TEST(DEV_API, fill_any_like) {
   ASSERT_EQ(out.dims().size(), 2);
   ASSERT_EQ(out.dims()[0], 3);
   ASSERT_EQ(out.numel(), 6);
-  ASSERT_EQ(out.meta().type, pten::DataType::FLOAT32);
+  ASSERT_EQ(out.meta().dtype, pten::DataType::FLOAT32);
   ASSERT_EQ(out.meta().layout, pten::DataLayout::NCHW);
 
   auto* actual_result = out.data<float>();
diff --git a/paddle/pten/tests/kernels/test_flatten_dev_api.cc b/paddle/pten/tests/kernels/test_flatten_dev_api.cc
index b027c75a37b310..a9be6108d24b61 100644
--- a/paddle/pten/tests/kernels/test_flatten_dev_api.cc
+++ b/paddle/pten/tests/kernels/test_flatten_dev_api.cc
@@ -56,7 +56,7 @@ TEST(DEV_API, flatten) {
   ASSERT_EQ(out.dims()[1], expect_shape[1]);
   ASSERT_EQ(out.dims()[2], expect_shape[2]);
   ASSERT_EQ(out.numel(), 36);
-  ASSERT_EQ(out.meta().type, pten::DataType::FLOAT32);
+  ASSERT_EQ(out.meta().dtype, pten::DataType::FLOAT32);
   ASSERT_EQ(out.meta().layout, pten::DataLayout::NCHW);
 
   bool value_equal = true;
diff --git a/paddle/pten/tests/kernels/test_mean_dev_api.cc b/paddle/pten/tests/kernels/test_mean_dev_api.cc
index 1ae59ff8034f58..b16d339e18af34 100644
--- a/paddle/pten/tests/kernels/test_mean_dev_api.cc
+++ b/paddle/pten/tests/kernels/test_mean_dev_api.cc
@@ -49,7 +49,7 @@ TEST(DEV_API, mean) {
   // 3. check result
   ASSERT_EQ(out.dims().size(), 1);
   ASSERT_EQ(out.numel(), 1);
-  ASSERT_EQ(out.meta().type, pten::DataType::FLOAT32);
+  ASSERT_EQ(out.meta().dtype, pten::DataType::FLOAT32);
   ASSERT_EQ(out.meta().layout, pten::DataLayout::NCHW);
 
   auto expect_result = sum / 12;
diff --git a/paddle/pten/tests/kernels/test_reshape_dev_api.cc b/paddle/pten/tests/kernels/test_reshape_dev_api.cc
index c06cc8a8a406bd..b227d3b009e89a 100644
--- a/paddle/pten/tests/kernels/test_reshape_dev_api.cc
+++ b/paddle/pten/tests/kernels/test_reshape_dev_api.cc
@@ -54,7 +54,7 @@ TEST(DEV_API, reshape) {
   ASSERT_EQ(out.dims()[0], expect_shape[0]);
   ASSERT_EQ(out.dims()[1], expect_shape[1]);
   ASSERT_EQ(out.numel(), 36);
-  ASSERT_EQ(out.meta().type, pten::DataType::FLOAT32);
+  ASSERT_EQ(out.meta().dtype, pten::DataType::FLOAT32);
   ASSERT_EQ(out.meta().layout, pten::DataLayout::NCHW);
 
   bool value_equal = true;
diff --git a/paddle/pten/tests/kernels/test_scale_dev_api.cc b/paddle/pten/tests/kernels/test_scale_dev_api.cc
index b057821e6cf81a..b87692137251a9 100644
--- a/paddle/pten/tests/kernels/test_scale_dev_api.cc
+++ b/paddle/pten/tests/kernels/test_scale_dev_api.cc
@@ -56,7 +56,7 @@ TEST(DEV_API, scale) {
   // 3. check result
   ASSERT_EQ(out.dims().size(), 2);
   ASSERT_EQ(out.numel(), 12);
-  ASSERT_EQ(out.meta().type, pten::DataType::FLOAT32);
+  ASSERT_EQ(out.meta().dtype, pten::DataType::FLOAT32);
   ASSERT_EQ(out.meta().layout, pten::DataLayout::NCHW);
 
   auto expect_result = 23;
@@ -101,7 +101,7 @@ TEST(DEV_API, scale_host) {
   // 3. check result
   ASSERT_EQ(out.dims().size(), 2);
   ASSERT_EQ(out.numel(), 12);
-  ASSERT_EQ(out.meta().type, pten::DataType::FLOAT32);
+  ASSERT_EQ(out.meta().dtype, pten::DataType::FLOAT32);
   ASSERT_EQ(out.meta().layout, pten::DataLayout::NCHW);
 
   auto expect_result = 23;

From 7bc3cbb5431a2b37ee52e91314df407894718457 Mon Sep 17 00:00:00 2001
From: zyfncg <zhangyunfei07@baidu.com>
Date: Wed, 17 Nov 2021 13:27:33 +0000
Subject: [PATCH 3/9] merge the code

---
 paddle/pten/core/tensor_meta.h | 33 ---------------------------------
 1 file changed, 33 deletions(-)

diff --git a/paddle/pten/core/tensor_meta.h b/paddle/pten/core/tensor_meta.h
index b311589d792eb7..cc02c57a48ba13 100644
--- a/paddle/pten/core/tensor_meta.h
+++ b/paddle/pten/core/tensor_meta.h
@@ -60,37 +60,4 @@ struct DenseTensorMeta {
   size_t offset{0};
 };
 
-<<<<<<< HEAD
-inline DenseTensorMeta::DenseTensorMeta(DataType dtype, const DDim& dims)
-    : dims(dims), dtype(dtype) {}
-
-inline DenseTensorMeta::DenseTensorMeta(DataType dtype,
-                                        const DDim& dims,
-                                        DataLayout layout)
-    : dims(dims), dtype(dtype), layout(layout) {}
-
-inline DenseTensorMeta::DenseTensorMeta(
-    DataType dtype,
-    const DDim& dims,
-    DataLayout layout,
-    const std::vector<std::vector<size_t>>& lod)
-    : dims(dims), dtype(dtype), layout(layout), lod(lod) {}
-
-inline bool DenseTensorMeta::valid() const noexcept {
-  bool valid{true};
-  valid = valid && (dtype != DataType::UNDEFINED);
-  valid = valid && (layout != DataLayout::UNDEFINED);
-  valid = valid && (is_scalar || product(dims) >= 0);
-  return valid;
-}
-
-inline bool operator==(const DenseTensorMeta& lhs, const DenseTensorMeta& rhs) {
-  bool ret = true;
-  return ret && (lhs.is_scalar == rhs.is_scalar) && (lhs.dims == rhs.dims) &&
-         (lhs.dtype == rhs.dtype) && (lhs.layout == rhs.layout) &&
-         (lhs.lod == rhs.lod) && (lhs.offset == rhs.offset);
-}
-
-=======
->>>>>>> d08753df36986f5a5a7384f092c578c296e5150b
 }  // namespace pten

From 7b79b03e0196ad72420a54efddf8244c7dc4271a Mon Sep 17 00:00:00 2001
From: zyfncg <zhangyunfei07@baidu.com>
Date: Wed, 17 Nov 2021 13:57:02 +0000
Subject: [PATCH 4/9] merge the code

---
 paddle/pten/core/tensor_meta.cc | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/paddle/pten/core/tensor_meta.cc b/paddle/pten/core/tensor_meta.cc
index ebdcd9b5f250b8..6bc28908a25701 100644
--- a/paddle/pten/core/tensor_meta.cc
+++ b/paddle/pten/core/tensor_meta.cc
@@ -16,23 +16,23 @@ limitations under the License. */
 
 namespace pten {
 
-DenseTensorMeta::DenseTensorMeta(DataType type, const DDim& dims)
-    : dims(dims), type(type) {}
+DenseTensorMeta::DenseTensorMeta(DataType dtype, const DDim& dims)
+    : dims(dims), dtype(dtype) {}
 
-DenseTensorMeta::DenseTensorMeta(DataType type,
+DenseTensorMeta::DenseTensorMeta(DataType dtype,
                                  const DDim& dims,
                                  DataLayout layout)
-    : dims(dims), type(type), layout(layout) {}
+    : dims(dims), dtype(dtype), layout(layout) {}
 
 DenseTensorMeta::DenseTensorMeta(DataType type,
                                  const DDim& dims,
                                  DataLayout layout,
                                  const std::vector<std::vector<size_t>>& lod)
-    : dims(dims), type(type), layout(layout), lod(lod) {}
+    : dims(dims), dtype(dtype), layout(layout), lod(lod) {}
 
 bool DenseTensorMeta::valid() const noexcept {
   bool valid{true};
-  valid = valid && (type != DataType::UNDEFINED);
+  valid = valid && (dtype != DataType::UNDEFINED);
   valid = valid && (layout != DataLayout::UNDEFINED);
   valid = valid && (is_scalar || product(dims) >= 0);
   return valid;
@@ -41,7 +41,7 @@ bool DenseTensorMeta::valid() const noexcept {
 bool operator==(const DenseTensorMeta& lhs, const DenseTensorMeta& rhs) {
   bool ret = true;
   return ret && (lhs.is_scalar == rhs.is_scalar) && (lhs.dims == rhs.dims) &&
-         (lhs.type == rhs.type) && (lhs.layout == rhs.layout) &&
+         (lhs.dtype == rhs.dtype) && (lhs.layout == rhs.layout) &&
          (lhs.lod == rhs.lod) && (lhs.offset == rhs.offset);
 }
 }  // namespace pten

From 471a1bf7de8a0874f5a2254859545c03758450cb Mon Sep 17 00:00:00 2001
From: zyfncg <zhangyunfei07@baidu.com>
Date: Thu, 18 Nov 2021 02:48:40 +0000
Subject: [PATCH 5/9] fix the problem when merge conflict

---
 paddle/pten/core/tensor_meta.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/paddle/pten/core/tensor_meta.cc b/paddle/pten/core/tensor_meta.cc
index 6bc28908a25701..3e06508be69d65 100644
--- a/paddle/pten/core/tensor_meta.cc
+++ b/paddle/pten/core/tensor_meta.cc
@@ -24,7 +24,7 @@ DenseTensorMeta::DenseTensorMeta(DataType dtype,
                                  DataLayout layout)
     : dims(dims), dtype(dtype), layout(layout) {}
 
-DenseTensorMeta::DenseTensorMeta(DataType type,
+DenseTensorMeta::DenseTensorMeta(DataType dtype,
                                  const DDim& dims,
                                  DataLayout layout,
                                  const std::vector<std::vector<size_t>>& lod)

From 835e4156785eb799a84757f12c5687065cb24119 Mon Sep 17 00:00:00 2001
From: zyfncg <zhangyunfei07@baidu.com>
Date: Fri, 19 Nov 2021 07:22:00 +0000
Subject: [PATCH 6/9] fix bug of ci caused by type of tensor_meta

---
 paddle/pten/tests/kernels/test_elementwise_dev_api.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/paddle/pten/tests/kernels/test_elementwise_dev_api.cc b/paddle/pten/tests/kernels/test_elementwise_dev_api.cc
index c6e0d339915447..8dafce1fba7d89 100644
--- a/paddle/pten/tests/kernels/test_elementwise_dev_api.cc
+++ b/paddle/pten/tests/kernels/test_elementwise_dev_api.cc
@@ -118,7 +118,7 @@ TEST(DEV_API, subtract) {
   // 3. check result
   ASSERT_EQ(dense_out.dims().size(), 2);
   ASSERT_EQ(dense_out.dims()[0], 3);
-  ASSERT_EQ(dense_out.meta().type, pten::DataType::FLOAT32);
+  ASSERT_EQ(dense_out.dtype(), pten::DataType::FLOAT32);
   ASSERT_EQ(dense_out.meta().layout, pten::DataLayout::NCHW);
 
   auto expect_result = sub;

From 3629f71d719bb8fac0f54a367453c364a3c84c25 Mon Sep 17 00:00:00 2001
From: sandyhouse <lilong12@baidu.com>
Date: Fri, 4 Mar 2022 13:02:52 +0000
Subject: [PATCH 7/9] update

---
 .../collective/ProcessGroupNCCL.cc            | 45 ++++------
 .../distributed/collective/ProcessGroupNCCL.h |  5 +-
 paddle/fluid/distributed/store/store.h        | 23 +++--
 paddle/fluid/pybind/CMakeLists.txt            |  1 -
 paddle/fluid/pybind/communication.cc          | 48 -----------
 paddle/fluid/pybind/communication.h           | 31 -------
 paddle/fluid/pybind/distributed_py.cc         | 86 +++++++++----------
 paddle/fluid/pybind/pybind.cc                 |  2 -
 .../tests/unittests/process_group_nccl.py     | 19 ++--
 9 files changed, 86 insertions(+), 174 deletions(-)
 delete mode 100644 paddle/fluid/pybind/communication.cc
 delete mode 100644 paddle/fluid/pybind/communication.h

diff --git a/paddle/fluid/distributed/collective/ProcessGroupNCCL.cc b/paddle/fluid/distributed/collective/ProcessGroupNCCL.cc
index 88d8fb69eb6980..67715f410d443c 100644
--- a/paddle/fluid/distributed/collective/ProcessGroupNCCL.cc
+++ b/paddle/fluid/distributed/collective/ProcessGroupNCCL.cc
@@ -156,36 +156,27 @@ bool ProcessGroupNCCL::NCCLTask::Wait(std::chrono::milliseconds timeout) {
 // Same as Wait
 void ProcessGroupNCCL::NCCLTask::Synchronize() { Wait(kWaitTimeout); }
 
-ProcessGroupNCCL::ProcessGroupNCCL(const ProcessGroupStrategy& strategy,
+ProcessGroupNCCL::ProcessGroupNCCL(const std::shared_ptr<Store>& store,
                                    int rank, int size)
-    : ProcessGroup(rank, size), strategy_(strategy) {}
-
-void ProcessGroupNCCL::BcastNCCLId(
-    std::vector<ncclUniqueId>& nccl_ids,  // NOLINT
-    int root, int server_fd) {
-  if (strategy_.local_rank_ == root) {
-    std::vector<std::string> other_trainers;
-    for (auto& ep : strategy_.trainer_endpoints_) {
-      if (ep != strategy_.current_endpoint_) {
-        other_trainers.push_back(ep);
-      }
-    }
-    platform::SendBroadCastCommID(other_trainers, &nccl_ids);
-  } else {
-    platform::RecvBroadCastCommID(server_fd, strategy_.current_endpoint_,
-                                  &nccl_ids);
-  }
-}
+    : ProcessGroup(rank, size), store_(store) {}
 
 void ProcessGroupNCCL::BroadcastUniqueNCCLID(
     std::vector<ncclUniqueId>& nccl_ids) {  // NOLINT
-
-  int server_fd = -1;
-  if (rank_ != 0) {
-    server_fd = platform::SocketServer::GetInstance(strategy_.current_endpoint_)
-                    .socket();
+  if (rank_ == 0) {
+    for (size_t i = 0; i < nccl_ids.size(); i++) {
+      auto key = "ProcessGroupNCCL/nccl_ids/" + std::to_string(i);
+      auto nccl_id = std::vector<uint8_t>(
+          reinterpret_cast<uint8_t*>(&nccl_ids[i]),
+          reinterpret_cast<uint8_t*>(&nccl_ids[i]) + NCCL_UNIQUE_ID_BYTES);
+      store_->set(key, nccl_id);
+    }
+  } else {
+    for (size_t i = 0; i < nccl_ids.size(); i++) {
+      auto key = "ProcessGroupNCCL/nccl_ids/" + std::to_string(i);
+      auto ret = store_->get(key);
+      std::memcpy(&nccl_ids[i], ret.data(), ret.size());
+    }
   }
-  BcastNCCLId(nccl_ids, 0, server_fd);
 }
 
 // create NCCLManager cache for places_key
@@ -213,8 +204,8 @@ void ProcessGroupNCCL::CreateNCCLManagerCache(
   }
   BroadcastUniqueNCCLID(nccl_ids);
 
-  VLOG(3) << "init nccl rank: " << strategy_.local_rank_
-          << ", nranks: " << strategy_.nranks_ << ", place: " << places_key
+  VLOG(3) << "init nccl rank: " << rank_ << ", nranks: " << size_
+          << ", place: " << places_key
           << ", nccl uniqueid: " << SerializeNCCLUniqueId(nccl_id);
 
   std::vector<std::unique_ptr<CUDADeviceContext>> dev_ctx;
diff --git a/paddle/fluid/distributed/collective/ProcessGroupNCCL.h b/paddle/fluid/distributed/collective/ProcessGroupNCCL.h
index d63a5e768382c6..aa2a2b8fa2088c 100644
--- a/paddle/fluid/distributed/collective/ProcessGroupNCCL.h
+++ b/paddle/fluid/distributed/collective/ProcessGroupNCCL.h
@@ -25,6 +25,7 @@
 #include "paddle/fluid/platform/cuda_device_guard.h"
 #include "paddle/fluid/platform/device_context.h"
 
+#include "paddle/fluid/distributed/store/store.h"
 #include "paddle/fluid/platform/enforce.h"
 #include "paddle/fluid/platform/gen_comm_id_helper.h"
 #include "paddle/fluid/platform/place.h"
@@ -75,7 +76,7 @@ class ProcessGroupNCCL : public ProcessGroup {
    private:
   };
 
-  ProcessGroupNCCL(const ProcessGroupStrategy& strategy, int rank, int size);
+  ProcessGroupNCCL(const std::shared_ptr<Store>& store, int rank, int size);
 
   const std::string GetBackendName() const override {
     return std::string(NCCL_BACKEND_NAME);
@@ -118,7 +119,7 @@ class ProcessGroupNCCL : public ProcessGroup {
       const std::vector<Tensor>& inputs);
 
  protected:
-  ProcessGroupStrategy strategy_;
+  std::shared_ptr<Store> store_;
   std::shared_ptr<NCCLCommManager> nccl_comm_;
   std::mutex mutex_;
   std::unordered_map<std::string, std::vector<std::shared_ptr<NCCLCommManager>>>
diff --git a/paddle/fluid/distributed/store/store.h b/paddle/fluid/distributed/store/store.h
index 2581a74d7e8187..7b4ae7e70ff6f0 100644
--- a/paddle/fluid/distributed/store/store.h
+++ b/paddle/fluid/distributed/store/store.h
@@ -25,15 +25,26 @@ namespace distributed {
 
 class Store {
  public:
-  Store() = delete;
+  Store() : _timeout(tcputils::kNoTimeout) {}
   explicit Store(const std::chrono::seconds& timeout) : _timeout(timeout) {}
   virtual ~Store() = default;
 
-  virtual int64_t add(const std::string& key, int64_t value) = 0;
-  virtual std::vector<uint8_t> get(const std::string& key) = 0;
-  virtual void wait(const std::string& key) = 0;
-  virtual void set(const std::string& key,
-                   const std::vector<uint8_t>& value) = 0;
+  virtual int64_t add(const std::string& key, int64_t value) {
+    PADDLE_THROW(platform::errors::InvalidArgument(
+        "Implement the add method in the subclass."));
+  }
+  virtual std::vector<uint8_t> get(const std::string& key) {
+    PADDLE_THROW(platform::errors::InvalidArgument(
+        "Implement the add method in the subclass."));
+  }
+  virtual void wait(const std::string& key) {
+    PADDLE_THROW(platform::errors::InvalidArgument(
+        "Implement the add method in the subclass."));
+  }
+  virtual void set(const std::string& key, const std::vector<uint8_t>& value) {
+    PADDLE_THROW(platform::errors::InvalidArgument(
+        "Implement the add method in the subclass."));
+  }
 
   virtual const std::chrono::seconds& timeout() const { return _timeout; }
 
diff --git a/paddle/fluid/pybind/CMakeLists.txt b/paddle/fluid/pybind/CMakeLists.txt
index 5e61133510d6a2..2edfc60d5b0e87 100644
--- a/paddle/fluid/pybind/CMakeLists.txt
+++ b/paddle/fluid/pybind/CMakeLists.txt
@@ -77,7 +77,6 @@ set(PYBIND_SRCS
   compatible.cc
   io.cc
   generator_py.cc
-  communication.cc
   cuda_streams_py.cc)
 
 if(NOT ON_INFER)
diff --git a/paddle/fluid/pybind/communication.cc b/paddle/fluid/pybind/communication.cc
deleted file mode 100644
index c01accaf598aa8..00000000000000
--- a/paddle/fluid/pybind/communication.cc
+++ /dev/null
@@ -1,48 +0,0 @@
-/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include <Python.h>
-#include <pybind11/chrono.h>
-#include <pybind11/complex.h>
-#include <pybind11/functional.h>
-#include <pybind11/stl.h>
-#include <chrono>
-#include <string>
-
-#include "paddle/fluid/distributed/store/tcp_store.h"
-#include "paddle/fluid/pybind/communication.h"
-
-namespace py = pybind11;
-
-namespace paddle {
-namespace pybind {
-
-using TCPStore = paddle::distributed::TCPStore;
-
-void BindTCPStore(py::module* m) {
-  py::class_<TCPStore, std::shared_ptr<TCPStore>>(*m, "TCPStore")
-      .def(py::init([](std::string hostname, uint16_t port, bool is_master,
-                       size_t world_size, std::chrono::seconds timeout) {
-             return std::make_shared<TCPStore>(hostname, port, is_master,
-                                               world_size, timeout);
-           }),
-           py::arg("hostname"), py::arg("port"), py::arg("is_master"),
-           py::arg("world_size"), py::arg("timeout"),
-           py::call_guard<py::gil_scoped_release>())
-      .def("add", &TCPStore::add)
-      .def("get", &TCPStore::get);
-}
-
-}  // namespace pybind
-}  // namespace paddle
diff --git a/paddle/fluid/pybind/communication.h b/paddle/fluid/pybind/communication.h
deleted file mode 100644
index 17045ccfe65cae..00000000000000
--- a/paddle/fluid/pybind/communication.h
+++ /dev/null
@@ -1,31 +0,0 @@
-/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#pragma once
-
-#include <Python.h>
-
-#include "pybind11/chrono.h"
-#include "pybind11/complex.h"
-#include "pybind11/functional.h"
-#include "pybind11/pybind11.h"
-#include "pybind11/stl.h"
-
-namespace paddle {
-namespace pybind {
-
-void BindTCPStore(pybind11::module* m);
-
-}  // namespace pybind
-}  // namespace paddle
diff --git a/paddle/fluid/pybind/distributed_py.cc b/paddle/fluid/pybind/distributed_py.cc
index 17512863357d8d..a51384d8fc65d7 100644
--- a/paddle/fluid/pybind/distributed_py.cc
+++ b/paddle/fluid/pybind/distributed_py.cc
@@ -31,13 +31,15 @@ limitations under the License. */
 #include "paddle/fluid/pybind/eager_utils.h"
 #include "paddle/phi/api/all.h"
 
+#include "paddle/fluid/distributed/store/tcp_store.h"
+#include "paddle/fluid/distributed/store/tcp_utils.h"
+
 #if defined(PADDLE_WITH_NCCL)
 #include "paddle/fluid/distributed/collective/ProcessGroupNCCL.h"
 #endif
 
 #if defined(PADDLE_WITH_GLOO)
 #include "paddle/fluid/distributed/collective/ProcessGroupGloo.h"
-#include "paddle/fluid/distributed/store/tcp_store.h"
 #endif
 
 namespace py = pybind11;
@@ -53,6 +55,8 @@ using GlooStore = paddle::distributed::ProcessGroupGloo::GlooStore;
 using GlooOptions = paddle::distributed::ProcessGroupGloo::GlooOptions;
 #endif
 
+using TCPStore = paddle::distributed::TCPStore;
+
 static std::string GLOO_SOCKET_IFNAME_ENV = "GLOO_SOCKET_IFNAME";  // NOLINT
 
 void BindDistributed(py::module *m) {
@@ -82,6 +86,42 @@ void BindDistributed(py::module *m) {
       .def_readwrite("reduce_op", &distributed::ReduceOptions::reduce_op)
       .def_readwrite("source_root", &distributed::ReduceOptions::root_rank);
 
+  auto Store =
+      py::class_<distributed::Store, std::shared_ptr<distributed::Store>>(
+          *m, "Store")
+          .def(py::init<>())
+          .def("set",
+               [](distributed::Store &self, const std::string &key,
+                  const std::string &value) {
+                 std::vector<uint8_t> data(value.begin(), value.end());
+                 self.set(key, data);
+               },
+               py::arg("key"), py::arg("value"),
+               py::call_guard<py::gil_scoped_release>())
+          .def("get",
+               [](distributed::Store &self,
+                  const std::string &key) -> py::bytes {
+                 auto data = self.get(key);
+                 return py::bytes(reinterpret_cast<char *>(data.data()),
+                                  data.size());
+               },
+               py::arg("key"), py::call_guard<py::gil_scoped_release>())
+          .def("add", &distributed::Store::add,
+               py::call_guard<py::gil_scoped_release>())
+          .def("wait", &distributed::Store::wait,
+               py::call_guard<py::gil_scoped_release>());
+
+  py::class_<TCPStore, std::shared_ptr<TCPStore>>(*m, "TCPStore", Store)
+      .def(py::init([](std::string hostname, uint16_t port, bool is_master,
+                       size_t world_size, std::chrono::seconds timeout) {
+             return std::make_shared<TCPStore>(hostname, port, is_master,
+                                               world_size, timeout);
+           }),
+           py::arg("hostname"), py::arg("port"), py::arg("is_master"),
+           py::arg("world_size"),
+           py::arg("timeout") = distributed::tcputils::kNoTimeout,
+           py::call_guard<py::gil_scoped_release>());
+
   auto ProcessGroup =
       py::class_<distributed::ProcessGroup,
                  std::shared_ptr<distributed::ProcessGroup>>(*m, "ProcessGroup")
@@ -197,7 +237,7 @@ void BindDistributed(py::module *m) {
   py::class_<distributed::ProcessGroupNCCL,
              std::shared_ptr<distributed::ProcessGroupNCCL>>(
       *m, "ProcessGroupNCCL", ProcessGroup)
-      .def(py::init<const distributed::ProcessGroupStrategy &, int, int>(),
+      .def(py::init<const std::shared_ptr<distributed::Store> &, int, int>(),
            py::call_guard<py::gil_scoped_release>());
 #endif
 
@@ -210,44 +250,6 @@ void BindDistributed(py::module *m) {
       .def("synchronize", &distributed::ProcessGroup::Task::Synchronize,
            py::call_guard<py::gil_scoped_release>());
 
-  // define parallel strategy, it will be removed
-  py::class_<distributed::ProcessGroupStrategy> pg_strategy(
-      *m, "ProcessGroupStrategy", "");
-  pg_strategy.def(py::init())
-      .def_property("nranks",
-                    [](const distributed::ProcessGroupStrategy &self) {
-                      return self.nranks_;
-                    },
-                    [](distributed::ProcessGroupStrategy &self, int nranks) {
-                      self.nranks_ = nranks;
-                    })
-      .def_property("local_rank",
-                    [](const distributed::ProcessGroupStrategy &self) {
-                      return self.local_rank_;
-                    },
-                    [](distributed::ProcessGroupStrategy &self,
-                       int local_rank) { self.local_rank_ = local_rank; })
-      .def_property(
-          "trainer_endpoints",
-          [](const distributed::ProcessGroupStrategy &self) {
-            return self.trainer_endpoints_;
-          },
-          [](distributed::ProcessGroupStrategy &self,
-             std::vector<std::string> eps) { self.trainer_endpoints_ = eps; })
-      .def_property("current_endpoint",
-                    [](const distributed::ProcessGroupStrategy &self) {
-                      return self.current_endpoint_;
-                    },
-                    [](distributed::ProcessGroupStrategy &self,
-                       const std::string &ep) { self.current_endpoint_ = ep; })
-      .def_property("nrings",
-                    [](const distributed::ProcessGroupStrategy &self) {
-                      return self.nrings_;
-                    },
-                    [](distributed::ProcessGroupStrategy &self, int nrings) {
-                      self.nrings_ = nrings;
-                    });
-
 #if defined(PADDLE_WITH_GLOO)
   py::class_<GlooOptions>(*m, "GlooOptions")
       .def(py::init<>())
@@ -279,9 +281,7 @@ void BindDistributed(py::module *m) {
              return std::make_shared<ProcessGroupGloo>(store, rank, world_size,
                                                        opts);
            }),
-           py::arg("store"), py::arg("rank"),
-           py::arg("world_size"),  // py::arg("timeout") =
-                                   // kProcessGroupDefaultTimeout,
+           py::arg("store"), py::arg("rank"), py::arg("world_size"),
            py::call_guard<py::gil_scoped_release>())
       .def_static("create_default_device",
                   &ProcessGroupGloo::createDefaultDevice);
diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc
index c016321ef802a1..7a1397c76d0470 100644
--- a/paddle/fluid/pybind/pybind.cc
+++ b/paddle/fluid/pybind/pybind.cc
@@ -91,7 +91,6 @@ limitations under the License. */
 #include "paddle/fluid/pybind/bind_cost_model.h"
 #include "paddle/fluid/pybind/bind_fleet_executor.h"
 #include "paddle/fluid/pybind/box_helper_py.h"
-#include "paddle/fluid/pybind/communication.h"
 #include "paddle/fluid/pybind/compatible.h"
 #include "paddle/fluid/pybind/const_value.h"
 #include "paddle/fluid/pybind/data_set_py.h"
@@ -2618,7 +2617,6 @@ All parameter, weight, gradient are variables in Paddle.
   BindGlobalValueGetterSetter(&m);
   BindProcessMeshDesc(&m);
   BindFleetExecutor(&m);
-  BindTCPStore(&m);
 
   py::class_<framework::LoDRankTable>(m, "LodRankTable")
       .def("items", [](framework::LoDRankTable &table) {
diff --git a/python/paddle/fluid/tests/unittests/process_group_nccl.py b/python/paddle/fluid/tests/unittests/process_group_nccl.py
index 4833cea9a8d1ab..b1da0777feb3de 100644
--- a/python/paddle/fluid/tests/unittests/process_group_nccl.py
+++ b/python/paddle/fluid/tests/unittests/process_group_nccl.py
@@ -27,22 +27,13 @@
 from paddle.fluid.framework import _test_eager_guard
 from paddle.fluid.dygraph.parallel import ParallelEnv
 
-ProcessGroupStrategy = core.ProcessGroupStrategy
-
 
 def init_process_group(strategy=None):
-    # this will remove
-    if strategy is None:
-        strategy = ProcessGroupStrategy()
-        strategy.nranks = ParallelEnv().nranks
-        strategy.local_rank = ParallelEnv().local_rank
-        strategy.trainer_endpoints = ParallelEnv().trainer_endpoints
-        strategy.current_endpoint = ParallelEnv().current_endpoint
-    if strategy.nranks < 2:
-        return
-
-    pg_group = core.ProcessGroupNCCL(strategy, strategy.local_rank,
-                                     strategy.nranks)
+    nranks = ParallelEnv().nranks
+    rank = ParallelEnv().local_rank
+    is_master = True if rank == 0 else False
+    store = paddle.fluid.core.TCPStore("127.0.0.1", 6173, is_master, nranks)
+    pg_group = core.ProcessGroupNCCL(store, rank, nranks)
 
     return pg_group
 

From 735eb3698adf2bafaa29a2194e7fd37e124dc798 Mon Sep 17 00:00:00 2001
From: sandyhouse <lilong12@baidu.com>
Date: Fri, 4 Mar 2022 15:43:33 +0000
Subject: [PATCH 8/9] update

---
 paddle/fluid/pybind/distributed_py.cc | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/paddle/fluid/pybind/distributed_py.cc b/paddle/fluid/pybind/distributed_py.cc
index a51384d8fc65d7..3b48837c0009dc 100644
--- a/paddle/fluid/pybind/distributed_py.cc
+++ b/paddle/fluid/pybind/distributed_py.cc
@@ -24,6 +24,8 @@ limitations under the License. */
 #include "paddle/fluid/distributed/collective/ProcessGroup.h"
 #include "paddle/fluid/distributed/collective/Types.h"
 #include "paddle/fluid/distributed/collective/reducer.h"
+#include "paddle/fluid/distributed/store/tcp_store.h"
+#include "paddle/fluid/distributed/store/tcp_utils.h"
 #include "paddle/fluid/framework/lod_tensor.h"
 #include "paddle/fluid/framework/tensor.h"
 #include "paddle/fluid/imperative/layer.h"
@@ -31,9 +33,6 @@ limitations under the License. */
 #include "paddle/fluid/pybind/eager_utils.h"
 #include "paddle/phi/api/all.h"
 
-#include "paddle/fluid/distributed/store/tcp_store.h"
-#include "paddle/fluid/distributed/store/tcp_utils.h"
-
 #if defined(PADDLE_WITH_NCCL)
 #include "paddle/fluid/distributed/collective/ProcessGroupNCCL.h"
 #endif

From 5b4de6490298c3678f871f6400816df6647bb372 Mon Sep 17 00:00:00 2001
From: sandyhouse <lilong12@baidu.com>
Date: Sat, 5 Mar 2022 16:08:22 +0000
Subject: [PATCH 9/9] update

---
 paddle/fluid/pybind/CMakeLists.txt    |  1 +
 paddle/fluid/pybind/communication.cc  | 72 +++++++++++++++++++++++++++
 paddle/fluid/pybind/communication.h   | 31 ++++++++++++
 paddle/fluid/pybind/distributed_py.cc | 41 +--------------
 paddle/fluid/pybind/pybind.cc         |  2 +
 5 files changed, 107 insertions(+), 40 deletions(-)
 create mode 100644 paddle/fluid/pybind/communication.cc
 create mode 100644 paddle/fluid/pybind/communication.h

diff --git a/paddle/fluid/pybind/CMakeLists.txt b/paddle/fluid/pybind/CMakeLists.txt
index 2edfc60d5b0e87..5e61133510d6a2 100644
--- a/paddle/fluid/pybind/CMakeLists.txt
+++ b/paddle/fluid/pybind/CMakeLists.txt
@@ -77,6 +77,7 @@ set(PYBIND_SRCS
   compatible.cc
   io.cc
   generator_py.cc
+  communication.cc
   cuda_streams_py.cc)
 
 if(NOT ON_INFER)
diff --git a/paddle/fluid/pybind/communication.cc b/paddle/fluid/pybind/communication.cc
new file mode 100644
index 00000000000000..1a6a395545a96b
--- /dev/null
+++ b/paddle/fluid/pybind/communication.cc
@@ -0,0 +1,72 @@
+/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include <Python.h>
+#include <pybind11/chrono.h>
+#include <pybind11/complex.h>
+#include <pybind11/functional.h>
+#include <pybind11/stl.h>
+#include <chrono>
+#include <string>
+
+#include "paddle/fluid/distributed/store/tcp_store.h"
+#include "paddle/fluid/pybind/communication.h"
+
+namespace py = pybind11;
+
+namespace paddle {
+namespace pybind {
+
+using TCPStore = paddle::distributed::TCPStore;
+
+void BindTCPStore(py::module *m) {
+  auto Store =
+      py::class_<distributed::Store, std::shared_ptr<distributed::Store>>(
+          *m, "Store")
+          .def(py::init<>())
+          .def("set",
+               [](distributed::Store &self, const std::string &key,
+                  const std::string &value) {
+                 std::vector<uint8_t> data(value.begin(), value.end());
+                 self.set(key, data);
+               },
+               py::arg("key"), py::arg("value"),
+               py::call_guard<py::gil_scoped_release>())
+          .def("get",
+               [](distributed::Store &self,
+                  const std::string &key) -> py::bytes {
+                 auto data = self.get(key);
+                 return py::bytes(reinterpret_cast<char *>(data.data()),
+                                  data.size());
+               },
+               py::arg("key"), py::call_guard<py::gil_scoped_release>())
+          .def("add", &distributed::Store::add,
+               py::call_guard<py::gil_scoped_release>())
+          .def("wait", &distributed::Store::wait,
+               py::call_guard<py::gil_scoped_release>());
+
+  py::class_<TCPStore, std::shared_ptr<TCPStore>>(*m, "TCPStore", Store)
+      .def(py::init([](std::string hostname, uint16_t port, bool is_master,
+                       size_t world_size, std::chrono::seconds timeout) {
+             return std::make_shared<TCPStore>(hostname, port, is_master,
+                                               world_size, timeout);
+           }),
+           py::arg("hostname"), py::arg("port"), py::arg("is_master"),
+           py::arg("world_size"),
+           py::arg("timeout") = distributed::tcputils::kNoTimeout,
+           py::call_guard<py::gil_scoped_release>());
+}
+
+}  // namespace pybind
+}  // namespace paddle
diff --git a/paddle/fluid/pybind/communication.h b/paddle/fluid/pybind/communication.h
new file mode 100644
index 00000000000000..17045ccfe65cae
--- /dev/null
+++ b/paddle/fluid/pybind/communication.h
@@ -0,0 +1,31 @@
+/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+
+#include <Python.h>
+
+#include "pybind11/chrono.h"
+#include "pybind11/complex.h"
+#include "pybind11/functional.h"
+#include "pybind11/pybind11.h"
+#include "pybind11/stl.h"
+
+namespace paddle {
+namespace pybind {
+
+void BindTCPStore(pybind11::module* m);
+
+}  // namespace pybind
+}  // namespace paddle
diff --git a/paddle/fluid/pybind/distributed_py.cc b/paddle/fluid/pybind/distributed_py.cc
index 3b48837c0009dc..9870eab8da9023 100644
--- a/paddle/fluid/pybind/distributed_py.cc
+++ b/paddle/fluid/pybind/distributed_py.cc
@@ -24,8 +24,6 @@ limitations under the License. */
 #include "paddle/fluid/distributed/collective/ProcessGroup.h"
 #include "paddle/fluid/distributed/collective/Types.h"
 #include "paddle/fluid/distributed/collective/reducer.h"
-#include "paddle/fluid/distributed/store/tcp_store.h"
-#include "paddle/fluid/distributed/store/tcp_utils.h"
 #include "paddle/fluid/framework/lod_tensor.h"
 #include "paddle/fluid/framework/tensor.h"
 #include "paddle/fluid/imperative/layer.h"
@@ -39,6 +37,7 @@ limitations under the License. */
 
 #if defined(PADDLE_WITH_GLOO)
 #include "paddle/fluid/distributed/collective/ProcessGroupGloo.h"
+#include "paddle/fluid/distributed/store/tcp_store.h"
 #endif
 
 namespace py = pybind11;
@@ -54,8 +53,6 @@ using GlooStore = paddle::distributed::ProcessGroupGloo::GlooStore;
 using GlooOptions = paddle::distributed::ProcessGroupGloo::GlooOptions;
 #endif
 
-using TCPStore = paddle::distributed::TCPStore;
-
 static std::string GLOO_SOCKET_IFNAME_ENV = "GLOO_SOCKET_IFNAME";  // NOLINT
 
 void BindDistributed(py::module *m) {
@@ -85,42 +82,6 @@ void BindDistributed(py::module *m) {
       .def_readwrite("reduce_op", &distributed::ReduceOptions::reduce_op)
       .def_readwrite("source_root", &distributed::ReduceOptions::root_rank);
 
-  auto Store =
-      py::class_<distributed::Store, std::shared_ptr<distributed::Store>>(
-          *m, "Store")
-          .def(py::init<>())
-          .def("set",
-               [](distributed::Store &self, const std::string &key,
-                  const std::string &value) {
-                 std::vector<uint8_t> data(value.begin(), value.end());
-                 self.set(key, data);
-               },
-               py::arg("key"), py::arg("value"),
-               py::call_guard<py::gil_scoped_release>())
-          .def("get",
-               [](distributed::Store &self,
-                  const std::string &key) -> py::bytes {
-                 auto data = self.get(key);
-                 return py::bytes(reinterpret_cast<char *>(data.data()),
-                                  data.size());
-               },
-               py::arg("key"), py::call_guard<py::gil_scoped_release>())
-          .def("add", &distributed::Store::add,
-               py::call_guard<py::gil_scoped_release>())
-          .def("wait", &distributed::Store::wait,
-               py::call_guard<py::gil_scoped_release>());
-
-  py::class_<TCPStore, std::shared_ptr<TCPStore>>(*m, "TCPStore", Store)
-      .def(py::init([](std::string hostname, uint16_t port, bool is_master,
-                       size_t world_size, std::chrono::seconds timeout) {
-             return std::make_shared<TCPStore>(hostname, port, is_master,
-                                               world_size, timeout);
-           }),
-           py::arg("hostname"), py::arg("port"), py::arg("is_master"),
-           py::arg("world_size"),
-           py::arg("timeout") = distributed::tcputils::kNoTimeout,
-           py::call_guard<py::gil_scoped_release>());
-
   auto ProcessGroup =
       py::class_<distributed::ProcessGroup,
                  std::shared_ptr<distributed::ProcessGroup>>(*m, "ProcessGroup")
diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc
index 7a1397c76d0470..c016321ef802a1 100644
--- a/paddle/fluid/pybind/pybind.cc
+++ b/paddle/fluid/pybind/pybind.cc
@@ -91,6 +91,7 @@ limitations under the License. */
 #include "paddle/fluid/pybind/bind_cost_model.h"
 #include "paddle/fluid/pybind/bind_fleet_executor.h"
 #include "paddle/fluid/pybind/box_helper_py.h"
+#include "paddle/fluid/pybind/communication.h"
 #include "paddle/fluid/pybind/compatible.h"
 #include "paddle/fluid/pybind/const_value.h"
 #include "paddle/fluid/pybind/data_set_py.h"
@@ -2617,6 +2618,7 @@ All parameter, weight, gradient are variables in Paddle.
   BindGlobalValueGetterSetter(&m);
   BindProcessMeshDesc(&m);
   BindFleetExecutor(&m);
+  BindTCPStore(&m);
 
   py::class_<framework::LoDRankTable>(m, "LodRankTable")
       .def("items", [](framework::LoDRankTable &table) {