resolve conflit

PaddlePaddle · Jan 11, 2022 · 64cbc6d · 64cbc6d
2 parents 3331f1e + 8cc0955
commit 64cbc6d
Show file tree

Hide file tree

Showing 109 changed files with 4,810 additions and 3,123 deletions.
diff --git a/cmake/external/protobuf.cmake b/cmake/external/protobuf.cmake
@@ -207,6 +207,10 @@ FUNCTION(build_protobuf TARGET_NAME BUILD_FOR_HOST)
     elseif(WITH_IPU)
         SET(PROTOBUF_REPOSITORY  ${GIT_URL}/protocolbuffers/protobuf.git)
         SET(PROTOBUF_TAG         d750fbf648256c7c631f51ffdbf67d7c18b0114e)
+    elseif(WIN32)
+        SET(PROTOBUF_REPOSITORY  ${GIT_URL}/protocolbuffers/protobuf.git)
+        # Change the tag to support building with vs2019
+        SET(PROTOBUF_TAG         01a05a53f40ca2ac5f0af10c6cc0810bee39b792)
     else()
         SET(PROTOBUF_REPOSITORY  ${GIT_URL}/protocolbuffers/protobuf.git)
         SET(PROTOBUF_TAG         9f75c5aa851cd877fb0d93ccc31b8567a6706546)

diff --git a/cmake/pten_kernel.cmake b/cmake/pten_kernel.cmake
@@ -79,6 +79,9 @@ function(kernel_library TARGET)
     endif()
 
     list(APPEND all_srcs ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.h)
+    if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/impl/${TARGET}_impl.h)
+        list(APPEND all_srcs ${CMAKE_CURRENT_SOURCE_DIR}/impl/${TARGET}_impl.h)
+    endif()
     list(APPEND all_srcs ${common_srcs})
     list(APPEND all_srcs ${cpu_srcs})
     list(APPEND all_srcs ${gpu_srcs})

diff --git a/paddle/fluid/eager/accumulation/accumulation_node.cc b/paddle/fluid/eager/accumulation/accumulation_node.cc
@@ -28,6 +28,9 @@
 
 static void CopyOrAddTensor(egr::EagerTensor* tensor,
                             const egr::EagerTensor& t) {
+  if (t.Var().IsInitialized()) {
+    const_cast<egr::EagerTensor*>(&t)->SyncToTensor();
+  }
   if (!tensor->defined() || !tensor->initialized()) {
     // Simply copy tensor->impl
     *tensor = t;

diff --git a/paddle/fluid/eager/accumulation/accumulation_node.h b/paddle/fluid/eager/accumulation/accumulation_node.h
@@ -32,7 +32,7 @@ class GradNodeAccumulation : public GradNodeBase {
   void RetainGrad(
       const std::function<egr::EagerTensor(const egr::EagerTensor&)>& hook);
 
-  egr::EagerTensor Grad() { return accumulated_grad; }
+  egr::EagerTensor* Grad() { return &accumulated_grad; }
 
  private:
   egr::EagerTensor accumulated_grad;

diff --git a/paddle/fluid/eager/eager_tensor.h b/paddle/fluid/eager/eager_tensor.h
@@ -239,8 +239,8 @@ class EagerTensor final {
           auto tensor_dense =
               std::dynamic_pointer_cast<pten::DenseTensor>(tensor_->impl());
           if (tensor_dense) {
-            paddle::experimental::MovesStorage(tensor_dense.get(),
-                                               framework_tensor);
+            paddle::experimental::SharesStorage(tensor_dense.get(),
+                                                framework_tensor);
           } else {
             PADDLE_THROW(paddle::platform::errors::Fatal(
                 "Unrecognized egr::EagerTensor type, only "
@@ -258,27 +258,23 @@ class EagerTensor final {
   /** Part 11: Sync paddle::framework::Variable with pten::Tensor **/
   void SyncToTensor() {
     // Synchronize allocation only once.
-    if (!this->defined() || !this->initialized()) {
-      // TODO(jiabin): Support selected rows later.
-      if (var_.IsInitialized()) {
-        if (var_.IsType<paddle::framework::LoDTensor>()) {
-          SetImplWithLegacyTensor<paddle::framework::LoDTensor,
-                                  pten::DenseTensor>();
-        } else if (var_.IsType<paddle::framework::Tensor>()) {
-          SetImplWithLegacyTensor<paddle::framework::Tensor,
-                                  pten::DenseTensor>();
-        } else {
-          PADDLE_THROW(paddle::platform::errors::Fatal(
-              "Unable to fetch underlying tensor "
-              "from VarBase, only LoDTensor and "
-              "Tensor are supported for now"));
-        }
+    if (var_.IsInitialized()) {
+      if (var_.IsType<paddle::framework::LoDTensor>()) {
+        SetImplWithLegacyTensor<paddle::framework::LoDTensor,
+                                pten::DenseTensor>();
+      } else if (var_.IsType<paddle::framework::Tensor>()) {
+        SetImplWithLegacyTensor<paddle::framework::Tensor, pten::DenseTensor>();
       } else {
-        PADDLE_THROW(paddle::platform::errors::Fatal(
-            "Can not Sync EagerTensor %s whose paddle::framework::Variable is "
-            "not initialized!",
-            name()));
+        PADDLE_THROW(
+            paddle::platform::errors::Fatal("Unable to fetch underlying tensor "
+                                            "from VarBase, only LoDTensor and "
+                                            "Tensor are supported for now"));
       }
+    } else {
+      PADDLE_THROW(paddle::platform::errors::Fatal(
+          "Can not Sync EagerTensor %s whose paddle::framework::Variable is "
+          "not initialized!",
+          name()));
     }
   }
 
@@ -296,8 +292,16 @@ class EagerTensor final {
   template <typename LEGACY_TYPE, typename TYPE>
   void SetImplWithLegacyTensor() {
     const auto& framework_tensor = var_.Get<LEGACY_TYPE>();
-    this->set_impl(
-        std::move(paddle::experimental::MakePtenDenseTensor(framework_tensor)));
+    if (this->initialized()) {
+      VLOG(8) << "Sync Var to initialized tensor for: " << name();
+      paddle::experimental::ReMakePtenDenseTensor(
+          framework_tensor,
+          static_cast<pten::DenseTensor*>(this->impl().get()));
+    } else {
+      VLOG(8) << "Sync Var to uninitialized tensor for: " << name();
+      this->set_impl(std::move(
+          paddle::experimental::MakePtenDenseTensor(framework_tensor)));
+    }
     var_.Clear();
   }
 

diff --git a/paddle/fluid/eager/tests/data_structure_tests/eager_tensor_test.cc b/paddle/fluid/eager/tests/data_structure_tests/eager_tensor_test.cc
@@ -118,7 +118,7 @@ TEST(EagerTensor, MemberFunction) {
   CHECK_EQ(et3.Var().Get<paddle::framework::LoDTensor>().data<float>()[1],
            10.0f);
   VLOG(6) << "SyncToTensor";
-  CHECK(et3.initialized() == false);
+  CHECK(et3.initialized() == true);
   et3.SyncToTensor();
   CHECK(et3.initialized() == true);
   VLOG(6) << "Check Tensor";

diff --git a/paddle/fluid/eager/tests/performance_tests/benchmark_utils.cc b/paddle/fluid/eager/tests/performance_tests/benchmark_utils.cc
@@ -87,8 +87,8 @@ void benchmark_eager_intermediate_matmul(const EagerTensor& X,
     // Examine Forward Grad (w.r.t max_num_runs = 2)
     eager_test::CompareVariableWithValue<float>(input_tensor0, 16);
     // Examine Backward Grad (w.r.t max_num_runs = 2)
-    eager_test::CompareGradVariableWithValue<float>(X, 16);
-    eager_test::CompareGradVariableWithValue<float>(Y, 16);
+    eager_test::CompareGradTensorWithValue<float>(X, 16);
+    eager_test::CompareGradTensorWithValue<float>(Y, 16);
   }
 }
 
@@ -121,8 +121,8 @@ void benchmark_eager_intermediate_mlp(const EagerTensor& X,
     eager_test::CompareVariableWithValue<float>(Out, result["Out"]);
 
     // Examine Backward Grad (w.r.t max_num_runs = 2)
-    eager_test::CompareGradVariableWithValue<float>(X, result["GradX"]);
-    eager_test::CompareGradVariableWithValue<float>(Ws[0], result["GradW"]);
+    eager_test::CompareGradTensorWithValue<float>(X, result["GradX"]);
+    eager_test::CompareGradTensorWithValue<float>(Ws[0], result["GradW"]);
   }
 }
 

diff --git a/paddle/fluid/eager/tests/task_tests/generated_test.cc b/paddle/fluid/eager/tests/task_tests/generated_test.cc
@@ -54,7 +54,7 @@ TEST(Generated, Sigmoid) {
   RunBackward(target_tensors, {});
 
   VLOG(6) << "Finish Backward";
-  eager_test::CompareGradVariableWithValue<float>(tensor, 0.25);
+  eager_test::CompareGradTensorWithValue<float>(tensor, 0.25);
 }
 
 TEST(Generated, Matmul_v2) {
@@ -85,8 +85,8 @@ TEST(Generated, Matmul_v2) {
   std::vector<egr::EagerTensor> target_tensors = {output_tensor};
   RunBackward(target_tensors, {});
 
-  eager_test::CompareGradVariableWithValue<float>(X, 2.0 * 20);
-  eager_test::CompareGradVariableWithValue<float>(Y, 3.0 * 4);
+  eager_test::CompareGradTensorWithValue<float>(X, 2.0 * 20);
+  eager_test::CompareGradTensorWithValue<float>(Y, 3.0 * 4);
 }
 
 TEST(Generated, ElementwiseAdd) {
@@ -116,8 +116,8 @@ TEST(Generated, ElementwiseAdd) {
   std::vector<egr::EagerTensor> target_tensors = {output_tensor};
   RunBackward(target_tensors, {});
 
-  eager_test::CompareGradVariableWithValue<float>(X, 1.0);
-  eager_test::CompareGradVariableWithValue<float>(Y, 1.0);
+  eager_test::CompareGradTensorWithValue<float>(X, 1.0);
+  eager_test::CompareGradTensorWithValue<float>(Y, 1.0);
 }
 
 }  // namespace egr

diff --git a/paddle/fluid/framework/fleet/fleet_wrapper.cc b/paddle/fluid/framework/fleet/fleet_wrapper.cc
@@ -632,6 +632,7 @@ void FleetWrapper::PullSparseToTensorSync(const uint64_t table_id, int fea_dim,
   if (ret != 0) {
     LOG(ERROR) << "fleet pull sparse failed, status[" << ret << "]";
     sleep(sleep_seconds_before_fail_exit_);
+    exit(-1);
   }
 #else
   for (size_t index = 0; index < inputs->size(); ++index) {
@@ -685,9 +686,36 @@ void FleetWrapper::PullDenseVarsSync(
     paddle::ps::Region reg(w, tensor->numel());
     regions.emplace_back(std::move(reg));
   }
-  auto status =
-      pslib_ptr_->_worker_ptr->pull_dense(regions.data(), regions.size(), tid);
-  status.wait();
+  int32_t status = -1;
+  int32_t cnt = 0;
+  while (true) {
+    auto tt = pslib_ptr_->_worker_ptr->pull_dense(regions.data(),
+                                                  regions.size(), tid);
+    bool flag = true;
+
+    tt.wait();
+
+    try {
+      status = tt.get();
+    } catch (const std::future_error& e) {
+      VLOG(0) << "Caught a future_error with code" << e.code()
+              << ", Message:" << e.what();
+    }
+    if (status != 0) {
+      VLOG(0) << "fleet pull dense sync failed, status[" << status << "]";
+      sleep(sleep_seconds_before_fail_exit_);
+      flag = false;
+      cnt++;
+    }
+    if (cnt > 3) {
+      VLOG(0) << "fleet pull dense sync failed, retry 3 times";
+      exit(-1);
+    }
+
+    if (flag) {
+      break;
+    }
+  }
 #endif
 }
 
@@ -1248,6 +1276,7 @@ void FleetWrapper::LoadModelOneTable(const uint64_t table_id,
   if (ret.get() != 0) {
     LOG(ERROR) << "load model of table id: " << table_id
                << ", from path: " << path << " failed";
+    exit(-1);
   }
 #else
   VLOG(0) << "FleetWrapper::LoadModel does nothing when no pslib";
@@ -1263,6 +1292,7 @@ void FleetWrapper::LoadWithWhitelist(const uint64_t table_id,
   if (ret.get() != 0) {
     LOG(ERROR) << "load model of table id: " << table_id
                << ", from path: " << path << " failed";
+    exit(-1);
   }
 #else
   VLOG(0) << "FleetWrapper::LoadWhitelist does nothing when no pslib";
@@ -1311,6 +1341,7 @@ void FleetWrapper::SaveModelOneTable(const uint64_t table_id,
   if (ret.get() != 0) {
     LOG(ERROR) << "save model of table id: " << table_id
                << ", to path: " << path << " failed";
+    exit(-1);
   }
 #else
   VLOG(0) << "FleetWrapper::SaveModelOneTable does nothing when no pslib";
@@ -1328,6 +1359,7 @@ void FleetWrapper::SaveModelOneTablePrefix(const uint64_t table_id,
   if (ret.get() != 0) {
     LOG(ERROR) << "save model (with prefix) of table id: " << table_id
                << ", to path: " << path << " failed";
+    exit(-1);
   }
 #else
   VLOG(0) << "FleetWrapper::SaveModelOneTablePrefix does nothing when no pslib";
@@ -1351,6 +1383,7 @@ void FleetWrapper::SetDate(const uint64_t table_id, const std::string& date) {
   ret.wait();
   if (ret.get() != 0) {
     LOG(ERROR) << "setdate : " << date << " failed";
+    exit(-1);
   }
 #else
   VLOG(0) << "FleetWrapper::SetDate does nothing when no pslib-gpu";
@@ -1463,6 +1496,11 @@ void FleetWrapper::ShrinkSparseTable(int table_id) {
 #ifdef PADDLE_WITH_PSLIB
   auto ret = pslib_ptr_->_worker_ptr->shrink(table_id);
   ret.wait();
+  int32_t err_code = ret.get();
+  if (err_code == -1) {
+    LOG(ERROR) << "Shrink Sparse Table failed";
+    exit(-1);
+  }
 #else
   VLOG(0) << "FleetWrapper::ShrinkSparseTable does nothing when no pslib";
 #endif
@@ -1472,6 +1510,10 @@ void FleetWrapper::ClearModel() {
 #ifdef PADDLE_WITH_PSLIB
   auto ret = pslib_ptr_->_worker_ptr->clear();
   ret.wait();
+  int32_t err_code = ret.get();
+  if (err_code == -1) {
+    LOG(ERROR) << "Clear Model failed";
+  }
 #else
   VLOG(0) << "FleetWrapper::ClearModel does nothing when no pslib";
 #endif
@@ -1481,6 +1523,10 @@ void FleetWrapper::ClearOneTable(const uint64_t table_id) {
 #ifdef PADDLE_WITH_PSLIB
   auto ret = pslib_ptr_->_worker_ptr->clear(table_id);
   ret.wait();
+  int32_t err_code = ret.get();
+  if (err_code == -1) {
+    LOG(ERROR) << "Clear One Table failed table_id: " << table_id;
+  }
 #else
   VLOG(0) << "FleetWrapper::ClearOneTable does nothing when no pslib";
 #endif
@@ -1541,6 +1587,10 @@ void FleetWrapper::ClientFlush() {
 #ifdef PADDLE_WITH_PSLIB
   auto ret = pslib_ptr_->_worker_ptr->flush();
   ret.wait();
+  int32_t err_code = ret.get();
+  if (err_code == -1) {
+    LOG(ERROR) << "Client Flush failed";
+  }
 #else
   VLOG(0) << "FleetWrapper::ServerFlush does nothing when no pslib";
 #endif

diff --git a/paddle/fluid/framework/fleet/heter_context.h b/paddle/fluid/framework/fleet/heter_context.h
@@ -235,7 +235,6 @@ class HeterContext {
       }
       VLOG(3) << "heter_context unique keys with dynamic mf dimention";
     }
-
     for (std::thread& t : threads) {
       t.join();
     }

diff --git a/paddle/fluid/framework/operator.cc b/paddle/fluid/framework/operator.cc
@@ -1880,16 +1880,32 @@ void OperatorWithKernel::BuildPtenKernelContext(
     // Otherwise，we will create new storage.
     for (size_t offset = 0; offset < outs_vector.size(); ++offset) {
       if (current_vector_size > start_idx + offset) {
-        experimental::ReMakePtenDenseTensorFromVar(
-            outs_vector[offset], out_def,
+        auto* buffer_tensor =
             pt_kernel_context_->MutableOutputAt<pten::DenseTensor>(start_idx +
-                                                                   offset));
+                                                                   offset);
+        if (buffer_tensor) {
+          experimental::ReMakePtenDenseTensorFromVar(outs_vector[offset],
+                                                     out_def, buffer_tensor);
+        }
       } else {
         pt_kernel_context_->EmplaceBackOutputWithoutSetRange(
             experimental::MakePtenTensorBaseFromVar(outs_vector[offset],
                                                     out_def));
       }
     }
+
+    // Deal with the case that some outputs are NULL when run the kernel.
+    // For example : the outputs of matmul_grad are dx and dy,
+    // sometimes dx or dy may be NULL.
+    if (outs_vector.empty()) {
+      if (current_vector_size > start_idx) {
+        pt_kernel_context_->SetOutputWithoutSetRange(start_idx, {nullptr});
+      } else {
+        pt_kernel_context_->EmplaceBackOutputWithoutSetRange({nullptr});
+      }
+      end_idx = start_idx + 1;
+    }
+
     pt_kernel_context_->AssignOutputRange(std::make_pair(start_idx, end_idx),
                                           i);
   }
@@ -2002,7 +2018,9 @@ void OperatorWithKernel::WriteBackToOutputs(RuntimeContext* ctx) const {
             range_pair.first, range_pair.second);
 
     for (size_t j = 0; j < pten_outs.size(); ++j) {
-      experimental::MakeVariableFromPtenTensor(pten_outs[j], outs_vector[j]);
+      if (pten_outs[j]) {
+        experimental::MakeVariableFromPtenTensor(pten_outs[j], outs_vector[j]);
+      }
     }
   }
 }

diff --git a/paddle/fluid/framework/pten_utils.cc b/paddle/fluid/framework/pten_utils.cc
@@ -99,7 +99,7 @@ KernelSignatureMap& KernelSignatureMap::Instance() {
       const auto& op_type = pair.first;
       const auto* op_proto = pair.second.proto_;
       if (pten::KernelFactory::Instance().HasCompatiblePtenKernel(op_type) &&
-          op_proto != nullptr) {
+          op_proto) {
         KernelArgsNameMakerByOpProto maker(op_proto);
         VLOG(10) << "Register kernel signature for " << op_type;
         auto success = kernel_signature_map_->map_