[AutoParallel] convert distensor for eager custom op (PaddlePaddle#59137

) * convert distensor for eager custom op
SecretXV · Nov 28, 2023 · a79421c · a79421c
1 parent ba2e1e2
commit a79421c
Show file tree

Hide file tree

Showing 7 changed files with 48 additions and 50 deletions.
diff --git a/paddle/fluid/eager/custom_operator/custom_operator_utils.cc b/paddle/fluid/eager/custom_operator/custom_operator_utils.cc
@@ -472,47 +472,6 @@ std::tuple<bool, bool, phi::distributed::ProcessMesh> PrepareCtxForAutoParallel(
       x.emplace_back(t);
     }
   }
-  const phi::distributed::ProcessMesh* mesh = nullptr;
-  for (auto& input : x) {
-    if (input.is_dist_tensor()) {
-      mesh = &(
-          std::dynamic_pointer_cast<phi::distributed::DistTensor>(input.impl())
-              ->dist_attr()
-              .process_mesh());
-      break;
-    }
-  }
-
-  if (mesh) {
-    for (auto& input : x) {
-      if (input.is_dist_tensor()) {
-        PADDLE_ENFORCE_EQ(
-            std::dynamic_pointer_cast<phi::distributed::DistTensor>(
-                input.impl())
-                ->dist_attr()
-                .process_mesh(),
-            *mesh,
-            phi::errors::InvalidArgument(
-                "Input %s has different mesh. However all inputs should "
-                "have the same mesh.",
-                input.name()));
-      } else {
-        PADDLE_ENFORCE_EQ(
-            phi::DenseTensor::classof(input.impl().get()),
-            true,
-            phi::errors::InvalidArgument("Failed to convert input %s impl "
-                                         "to phi::distributed::DistTensor "
-                                         "as it's not phi::DenseTensor.",
-                                         input.name()));
-        phi::distributed::TensorDistAttr dist_attr(
-            phi::vectorize(input.impl()->dims()));
-        dist_attr.set_process_mesh(*mesh);
-        auto dense_t = std::static_pointer_cast<phi::DenseTensor>(input.impl());
-        input.set_impl(
-            std::make_shared<phi::distributed::DistTensor>(dense_t, dist_attr));
-      }
-    }
-  }
 
   run_auto_parallel = paddle::experimental::AllInputsAreDistTensor(x);
   rank_is_in_current_mesh = true;

diff --git a/paddle/fluid/pybind/eager_functions.cc b/paddle/fluid/pybind/eager_functions.cc
@@ -578,14 +578,47 @@ static PyObject* eager_api_run_custom_op(PyObject* self,
               << " to CustomOpKernelContext. Add vector<Tensor> size = "
               << ctx.InputRangeAt(i).second - ctx.InputRangeAt(i).first;
     } else {
-      paddle::Tensor tensor =
-          std::move(CastPyArg2Tensor(obj, i + 1));  // NOLINT
-      ctx.EmplaceBackInput(std::move(tensor));
+      const paddle::Tensor& tensor = CastPyArg2Tensor(obj, i + 1);  // NOLINT
+      ctx.EmplaceBackInput(tensor);
       VLOG(7) << "Custom operator add input " << input
               << " to CustomOpKernelContext. Add Tensor for general case.";
     }
   }
 
+  const phi::distributed::ProcessMesh* mesh = nullptr;
+  if (InputsContainDistTensor(&mesh, *(ctx.AllMutableInput()))) {
+    ctx.AllMutableInput()->clear();
+    for (size_t i = 0; i < inputs.size(); ++i) {
+      const auto& input = inputs.at(i);
+      // Parse op_type first, so that use i + 1
+      PyObject* obj = PyTuple_GET_ITEM(args, i + 1);
+      // Emplace Py_None from python, this means optional inputs passed to C++,
+      // use one un-initialized tensor to indicate both Tensor and
+      // vector<Tensor> inputs.
+      if (obj == Py_None) {
+        VLOG(7) << "Custom operator add input " << input
+                << " to CustomOpKernelContext. Add un-initialized tensor "
+                   "because the optional input is None";
+        ctx.EmplaceBackInput(std::move(paddle::Tensor()));
+        continue;
+      }
+      if (paddle::framework::detail::IsDuplicableVar(input)) {
+        std::vector<paddle::Tensor> tensors =
+            std::move(CastPyArg2VectorOfTensor(obj, i + 1, mesh));  // NOLINT
+        ctx.EmplaceBackInputs(std::move(tensors));
+        VLOG(7) << "Custom operator add input " << input
+                << " to CustomOpKernelContext. Add vector<Tensor> size = "
+                << ctx.InputRangeAt(i).second - ctx.InputRangeAt(i).first;
+      } else {
+        const paddle::Tensor& tensor = CastPyArg2Tensor(obj, i + 1);  // NOLINT
+        ConvertAllInputsToDistTensor(mesh, tensor);
+        ctx.EmplaceBackInput(tensor);
+        VLOG(7) << "Custom operator add input " << input
+                << " to CustomOpKernelContext. Add Tensor for general case.";
+      }
+    }
+  }
+
   // Parse op_type and inputs first, so that use 1 + inputs.size() + i
   int attr_start_idx = static_cast<int>(1 + inputs.size());
   for (size_t i = 0; i < attrs.size(); ++i) {

diff --git a/paddle/fluid/pybind/inference_api.cc b/paddle/fluid/pybind/inference_api.cc
@@ -302,8 +302,8 @@ void PaddleInferShareExternalData(paddle_infer::Tensor &tensor,  // NOLINT
   }
 }
 
-void PaddleTensorShareExternalData(paddle_infer::Tensor &tensor,  // NOLINT
-                                   paddle::Tensor &&paddle_tensor) {
+void PaddleTensorShareExternalData(paddle_infer::Tensor &tensor,     // NOLINT
+                                   paddle::Tensor &paddle_tensor) {  // NOLINT
   std::vector<int> shape;
   for (int i = 0; i < paddle_tensor.dims().size(); ++i) {
     shape.push_back(paddle_tensor.dims()[i]);  // NOLINT
@@ -1245,8 +1245,7 @@ void BindPaddleInferTensor(py::module *m) {
       .def("_share_external_data_paddle_tensor_bind",
            [](paddle_infer::Tensor &self, const py::handle &input) {
              PyObject *obj = input.ptr();
-             PaddleTensorShareExternalData(self,
-                                           std::move(CastPyArg2Tensor(obj, 0)));
+             PaddleTensorShareExternalData(self, CastPyArg2Tensor(obj, 0));
            })
       .def("copy_to_cpu", &PaddleInferTensorToNumpy)
       .def("shape", &paddle_infer::Tensor::shape)

diff --git a/paddle/phi/api/ext/op_meta_info.h b/paddle/phi/api/ext/op_meta_info.h
@@ -108,6 +108,7 @@ class PADDLE_API CustomOpKernelContext {
   CustomOpKernelContext() = default;
 
   void EmplaceBackInput(Tensor&& input);
+  void EmplaceBackInput(const Tensor& input);
   void EmplaceBackInputs(const std::vector<Tensor>& inputs);
   void EmplaceBackOutput(Tensor&& output);
   void EmplaceBackOutputs(const std::vector<Tensor>& outputs);

diff --git a/paddle/phi/api/lib/op_meta_info.cc b/paddle/phi/api/lib/op_meta_info.cc
@@ -102,6 +102,12 @@ void CustomOpKernelContext::EmplaceBackInput(Tensor&& input) {
   input_range_.emplace_back(index, index + 1);
 }
 
+void CustomOpKernelContext::EmplaceBackInput(const Tensor& input) {
+  size_t index = inputs_.size();
+  inputs_.emplace_back(input);
+  input_range_.emplace_back(index, index + 1);
+}
+
 void CustomOpKernelContext::EmplaceBackInputs(
     const std::vector<Tensor>& inputs) {
   size_t index = inputs_.size();

diff --git a/paddle/utils/pybind.cc b/paddle/utils/pybind.cc
@@ -44,7 +44,7 @@ void ShareTensor(PyObject* src, PyObject* dst) {
   }
 }
 
-paddle::Tensor CastPyArg2Tensor(PyObject* obj, Py_ssize_t arg_pos) {
+paddle::Tensor& CastPyArg2Tensor(PyObject* obj, Py_ssize_t arg_pos) {
   if (PyObject_TypeCheck(obj, p_tensor_type) ||
       PyObject_TypeCheck(obj, p_string_tensor_type)) {
     return reinterpret_cast<TensorObject*>(obj)->tensor;

diff --git a/paddle/utils/pybind.h b/paddle/utils/pybind.h
@@ -44,7 +44,7 @@ bool PyCheckTensor(PyObject* obj);
 void ShareTensor(PyObject* src, PyObject* dst);
 
 // Internal use only, to expose the Tensor type to Python.
-paddle::Tensor CastPyArg2Tensor(PyObject* obj, Py_ssize_t arg_pos);
+paddle::Tensor& CastPyArg2Tensor(PyObject* obj, Py_ssize_t arg_pos);
 
 // Internal use only, to expose the Tensor type to Python.
 PyObject* ToPyObject(const paddle::Tensor& value,