Merge branch 'develop' of github.com:PaddlePaddle/Paddle into add_new…

…_quant_format
PaddlePaddle · Apr 4, 2022 · a3aeed9 · a3aeed9 · paddle-bot-old · Apr 5, 2022
2 parents 324e04c + 119816f
commit a3aeed9
Show file tree

Hide file tree

Showing 143 changed files with 2,464 additions and 1,041 deletions.
diff --git a/paddle/fluid/eager/auto_code_generator/final_state_generator/eager_gen.py b/paddle/fluid/eager/auto_code_generator/final_state_generator/eager_gen.py
@@ -88,9 +88,9 @@ def ParseArguments():
 
 CLEAR_VECTOR_TENSOR_WRAPPERS_TEMPLATE = \
 """
-       for (auto tw: {}) {
+       for (auto& tw : {}) {{
          tw.clear();
-       };
+       }}
 """
 
 SET_ATTR_METHOD_TEMPLATE = \
@@ -359,6 +359,12 @@ class {} : public egr::GradNodeBase {{
     if({}.initialized()) {}_optional = paddle::make_optional<const paddle::experimental::Tensor&>({});
 """
 
+CREATE_RECOVER_OPTIONAL_TENSOR_TEMPLATE = \
+"""
+    paddle::optional<const paddle::experimental::Tensor&> {}_optional = paddle::none;
+    if( {}.impl() ) {}_optional = paddle::make_optional<const paddle::experimental::Tensor&>({});
+"""
+
 
 #######################
 ## Generator Helpers ##
@@ -1248,11 +1254,18 @@ def GenerateNodeDefinition(self, grad_node_creation_str):
                 name)
 
             is_optional = (name in self.optional_inputs)
+            tensor_wrapper_recover_str = f"{indent}auto {transformed_tensor_name} = egr::EagerUtils::RecoverTensorWrapper(&this->{tensor_wrapper_name}, this->shared_from_this());"
             if is_optional:
-                tensor_wrapper_recover_str = f"{indent}auto {transformed_tensor_name} = egr::EagerUtils::RecoverOptionalTensorWrapper(&this->{tensor_wrapper_name}, this->shared_from_this());"
+                tensor_wrapper_recover_str += "\n" + CREATE_RECOVER_OPTIONAL_TENSOR_TEMPLATE.format(
+                    transformed_tensor_name, transformed_tensor_name,
+                    transformed_tensor_name, transformed_tensor_name)
+
+                grad_api_args[
+                    grad_api_position] = transformed_tensor_name + "_optional"
+
             else:
-                tensor_wrapper_recover_str = f"{indent}auto {transformed_tensor_name} = egr::EagerUtils::RecoverTensorWrapper(&this->{tensor_wrapper_name}, this->shared_from_this());"
-            grad_api_args[grad_api_position] = transformed_tensor_name
+                grad_api_args[grad_api_position] = transformed_tensor_name
+
             get_grad_in_args_list.append(tensor_wrapper_recover_str)
 
         # Grad Ins from grads

diff --git a/paddle/fluid/eager/auto_code_generator/final_state_generator/python_c_gen.py b/paddle/fluid/eager/auto_code_generator/final_state_generator/python_c_gen.py
@@ -23,7 +23,7 @@
 ###########################
 ## Global Configurations ##
 ###########################
-skipped_forward_api_names = set(["scale"])
+skipped_forward_api_names = set([])
 
 
 def SkipAPIGeneration(forward_api_name):

diff --git a/paddle/fluid/eager/grad_tensor_holder.cc b/paddle/fluid/eager/grad_tensor_holder.cc
@@ -64,8 +64,9 @@ void GradTensorHolder::CopyValueFromTensor(
   } else {
     // Create new tensor->impl and fill it with 1.0
     if (t.defined()) {
-      // Fill 1.0
-      buffer_[slot_id][rank] = paddle::experimental::ones_like(t, t.dtype());
+      // Fill 1.0, use full to support complex, one_like don't support it.
+      buffer_[slot_id][rank] =
+          paddle::experimental::full(t.shape(), 1, t.dtype(), t.inner_place());
     }
   }
 }

diff --git a/paddle/fluid/eager/utils.cc b/paddle/fluid/eager/utils.cc
@@ -364,22 +364,6 @@ paddle::experimental::Tensor EagerUtils::RecoverTensorWrapper(
   return tw->recover(grad_node);
 }
 
-paddle::optional<const paddle::experimental::Tensor&>
-EagerUtils::RecoverOptionalTensorWrapper(
-    TensorWrapper* tw, const std::shared_ptr<GradNodeBase>& grad_node) {
-  PADDLE_ENFORCE_NOT_NULL(
-      tw, phi::errors::InvalidArgument("TensorWrapper in "
-                                       "RecoverOptionalTensorWrapper function "
-                                       "should not be null"));
-  auto tmp = tw->recover(grad_node);
-
-  paddle::optional<const paddle::experimental::Tensor&> res{paddle::none};
-  if (tmp.initialized()) {
-    res = tmp;
-  }
-  return res;
-}
-
 std::vector<paddle::experimental::Tensor> EagerUtils::RecoverTensorWrapper(
     std::vector<TensorWrapper>* tw,
     const std::shared_ptr<GradNodeBase>& grad_node) {

diff --git a/paddle/fluid/eager/utils.h b/paddle/fluid/eager/utils.h
@@ -179,9 +179,6 @@ class EagerUtils {
   static std::vector<paddle::experimental::Tensor> RecoverTensorWrapper(
       std::vector<TensorWrapper>* tw,
       const std::shared_ptr<GradNodeBase>& grad_node);
-  static paddle::optional<const paddle::experimental::Tensor&>
-  RecoverOptionalTensorWrapper(TensorWrapper* tw,
-                               const std::shared_ptr<GradNodeBase>& grad_node);
 
   // Intermidate needed remove this once we don't need legacy
   // Inner Method

diff --git a/paddle/fluid/framework/new_executor/interpretercore.cc b/paddle/fluid/framework/new_executor/interpretercore.cc
@@ -516,6 +516,12 @@ void InterpreterCore::RunInstruction(const Instruction& instr_node) {
 
 void InterpreterCore::ExecuteInstructionList(
     const std::vector<Instruction>& vec_instr) {
+  unfinished_op_numer_ = vec_instr.size();
+  if (unfinished_op_numer_ == 0) {
+    VLOG(4) << "No op to run, return";
+    return;
+  }
+
   // NOTE(zhiqiu): get the prepared deps from std::future, and async prepare
   // those for the next step
   auto atomic_deps = async_work_queue_->AtomicDeps();
@@ -524,8 +530,6 @@ void InterpreterCore::ExecuteInstructionList(
   async_work_queue_->PrepareAtomicDeps(dependecy_count_);
   async_work_queue_->PrepareAtomicVarRef(global_scope_->VecMetaInfo());
 
-  unfinished_op_numer_ = vec_instr.size();
-
   exception_holder_.Clear();
 
   for (size_t i = 0; i < dependecy_count_.size(); ++i) {

diff --git a/paddle/fluid/framework/new_executor/standalone_executor_test.cc b/paddle/fluid/framework/new_executor/standalone_executor_test.cc
@@ -46,7 +46,7 @@ USE_OP_ITSELF(elementwise_add_grad);
 USE_OP_ITSELF(matmul_grad);
 USE_OP_ITSELF(square);
 USE_OP_ITSELF(transpose2_grad);
-USE_OP(concat_grad);
+USE_OP_ITSELF(concat_grad);
 USE_OP_ITSELF(elementwise_mul_grad);
 USE_OP_ITSELF(sigmoid_grad);
 USE_OP_ITSELF(tanh_grad);
@@ -67,6 +67,7 @@ PD_DECLARE_KERNEL(transpose, GPU, ALL_LAYOUT);
 PD_DECLARE_KERNEL(reshape, GPU, ALL_LAYOUT);
 PD_DECLARE_KERNEL(split, GPU, ALL_LAYOUT);
 PD_DECLARE_KERNEL(concat, GPU, ALL_LAYOUT);
+PD_DECLARE_KERNEL(concat_grad, GPU, ALL_LAYOUT);
 PD_DECLARE_KERNEL(matmul, GPU, ALL_LAYOUT);
 PD_DECLARE_KERNEL(add_raw, GPU, ALL_LAYOUT);
 PD_DECLARE_KERNEL(add, GPU, ALL_LAYOUT);

diff --git a/paddle/fluid/operators/concat_op.cc b/paddle/fluid/operators/concat_op.cc
@@ -216,18 +216,3 @@ REGISTER_OPERATOR(concat_grad, ops::ConcatOpGrad,
                   ops::ConcatDoubleGradOpMaker<paddle::framework::OpDesc>,
                   ops::ConcatDoubleGradOpMaker<paddle::imperative::OpBase>,
                   ops::ConcatOpGradNoNeedBufferVarInferer);
-
-REGISTER_OP_CPU_KERNEL(
-    concat_grad,
-    ops::ConcatGradKernel<paddle::platform::CPUDeviceContext, double>,
-    ops::ConcatGradKernel<paddle::platform::CPUDeviceContext, float>,
-    ops::ConcatGradKernel<paddle::platform::CPUDeviceContext, bool>,
-    ops::ConcatGradKernel<paddle::platform::CPUDeviceContext, int64_t>,
-    ops::ConcatGradKernel<paddle::platform::CPUDeviceContext,
-                          paddle::platform::float16>,
-    ops::ConcatGradKernel<paddle::platform::CPUDeviceContext, int>,
-    ops::ConcatGradKernel<paddle::platform::CPUDeviceContext, uint8_t>,
-    ops::ConcatGradKernel<paddle::platform::CPUDeviceContext,
-                          paddle::platform::complex<float>>,
-    ops::ConcatGradKernel<paddle::platform::CPUDeviceContext,
-                          paddle::platform::complex<double>>);
diff --git a/paddle/fluid/operators/concat_op.cu.cc b/paddle/fluid/operators/concat_op.cu.cc
diff --git a/paddle/fluid/operators/concat_op.h b/paddle/fluid/operators/concat_op.h
@@ -39,62 +39,6 @@ static inline int64_t ComputeAxis(int64_t axis, int64_t rank) {
   }
   return axis > 0 ? axis : 0;
 }
-template <typename DeviceContext, typename T>
-class ConcatGradKernel : public framework::OpKernel<T> {
- public:
-  void Compute(const framework::ExecutionContext& ctx) const {
-    auto* out_grad =
-        ctx.Input<framework::Tensor>(framework::GradVarName("Out"));
-    auto ins = ctx.MultiInput<framework::LoDTensor>("X");
-    auto out_var_names = ctx.OutputNames(framework::GradVarName("X"));
-    auto outs =
-        ctx.MultiOutput<framework::LoDTensor>(framework::GradVarName("X"));
-
-    {
-      auto dx = outs;
-      auto x = ins;
-      for (size_t i = 0; i < dx.size(); ++i) {
-        if (dx[i] != nullptr) {
-          dx[i]->set_lod(x[i]->lod());
-        }
-      }
-    }
-    PADDLE_ENFORCE_NOT_NULL(ins[0],
-                            platform::errors::NotFound(
-                                "The first input tensor is not initalized."));
-
-    auto axis = ctx.Attr<int>("axis");
-    if (ctx.HasInput("AxisTensor")) {
-      auto* axis_tensor = ctx.Input<framework::Tensor>("AxisTensor");
-      axis = GetDataFromTensor<int>(axis_tensor)[0];
-    }
-    axis = ComputeAxis(static_cast<int64_t>(axis),
-                       static_cast<int64_t>(ins[0]->dims().size()));
-    // get output tensor that the name is not kEmptyVarName
-    std::vector<framework::Tensor*> outputs;
-    for (size_t j = 0; j < outs.size(); ++j) {
-      if (out_var_names[j] != framework::kEmptyVarName &&
-          outs[j]->numel() != 0UL) {
-        outs[j]->mutable_data<T>(ctx.GetPlace());
-        outputs.push_back(outs[j]);
-      } else {
-        outputs.push_back(nullptr);
-      }
-    }
-    auto& dev_ctx = ctx.template device_context<DeviceContext>();
-
-    // Sometimes direct copies will be faster, this maybe need deeply analysis.
-    if (axis == 0 && outs.size() < 10) {
-      std::vector<const framework::Tensor*> ref_shape;
-      ref_shape.insert(ref_shape.begin(), ins.begin(), ins.end());
-      StridedMemcpyWithAxis0<T>(dev_ctx, *out_grad, ref_shape, &outputs);
-    } else {
-      math::SplitFunctor<DeviceContext, T> split_functor;
-      split_functor(dev_ctx, *out_grad, ctx.MultiInput<framework::Tensor>("X"),
-                    static_cast<int>(axis), &outputs);
-    }
-  }
-};
 
 }  // namespace operators
 }  // namespace paddle
diff --git a/paddle/fluid/operators/meshgrid_op.cc b/paddle/fluid/operators/meshgrid_op.cc
@@ -19,6 +19,10 @@
 #include "paddle/fluid/framework/op_registry.h"
 #include "paddle/fluid/framework/operator.h"
 
+#include "paddle/fluid/framework/infershape_utils.h"
+#include "paddle/phi/core/infermeta_utils.h"
+#include "paddle/phi/infermeta/multiary.h"
+
 namespace paddle {
 namespace operators {
 
@@ -28,30 +32,6 @@ class MeshgridOp : public framework::OperatorWithKernel {
  public:
   using framework::OperatorWithKernel::OperatorWithKernel;
 
- protected:
-  void InferShape(framework::InferShapeContext* ctx) const override {
-    PADDLE_ENFORCE_GE(
-        ctx->Inputs("X").size(), 1UL,
-        platform::errors::InvalidArgument("Input(X) should not be empty."));
-    PADDLE_ENFORCE_GE(
-        ctx->Outputs("Out").size(), 1UL,
-        platform::errors::InvalidArgument("Output(Out) should not be empty."));
-
-    auto inputs_dims = ctx->GetInputsDim("X");
-    const size_t inputs_num = inputs_dims.size();
-    auto outs_names = ctx->Outputs("Out");
-    const size_t outputs_num = outs_names.size();
-
-    auto out_shape = std::vector<int>(inputs_num);
-
-    for (size_t i = 0; i < inputs_num; i++) {
-      out_shape[i] = inputs_dims[i][0];
-    }
-    auto out_dims = phi::make_ddim(std::vector<int>(out_shape));
-    std::vector<framework::DDim> outs_dims(outputs_num, out_dims);
-    ctx->SetOutputsDim("Out", outs_dims);
-  }
-
  protected:
   framework::OpKernelType GetExpectedKernelType(
       const framework::ExecutionContext& ctx) const override {
@@ -142,7 +122,10 @@ class MeshgridGradOpMaker : public framework::SingleGradOpMaker<T> {
 }  // namespace paddle
 
 namespace ops = paddle::operators;
+DECLARE_INFER_SHAPE_FUNCTOR(meshgrid, MeshgridInferShapeFunctor,
+                            PD_INFER_META(phi::MeshgridInferMeta));
 REGISTER_OPERATOR(meshgrid, ops::MeshgridOp, ops::MeshgridOpMaker,
                   ops::MeshgridGradOpMaker<paddle::framework::OpDesc>,
-                  ops::MeshgridGradOpMaker<paddle::imperative::OpBase>);
+                  ops::MeshgridGradOpMaker<paddle::imperative::OpBase>,
+                  MeshgridInferShapeFunctor);
 REGISTER_OPERATOR(meshgrid_grad, ops::MeshgridGradOp);
diff --git a/paddle/fluid/operators/optimizers/adagrad_op.cc b/paddle/fluid/operators/optimizers/adagrad_op.cc
@@ -19,6 +19,10 @@ limitations under the License. */
 #include "paddle/fluid/operators/math/selected_rows_functor.h"
 #include "paddle/phi/kernels/funcs/math_function.h"
 
+#include "paddle/fluid/framework/infershape_utils.h"
+#include "paddle/phi/core/infermeta_utils.h"
+#include "paddle/phi/infermeta/multiary.h"
+
 namespace paddle {
 namespace operators {
 
@@ -27,39 +31,6 @@ class AdagradOp : public framework::OperatorWithKernel {
  public:
   using framework::OperatorWithKernel::OperatorWithKernel;
 
-  void InferShape(framework::InferShapeContext* ctx) const override {
-    OP_INOUT_CHECK(ctx->HasInput("Param"), "Input", "Param", "Adagrad");
-    OP_INOUT_CHECK(ctx->HasInput("Grad"), "Input", "Grad", "Adagrad");
-    OP_INOUT_CHECK(ctx->HasInput("Moment"), "Input", "Moment", "Adagrad");
-    OP_INOUT_CHECK(ctx->HasInput("LearningRate"), "Input", "LearningRate",
-                   "Adagrad");
-    OP_INOUT_CHECK(ctx->HasOutput("ParamOut"), "Output", "ParamOut", "Adagrad");
-    OP_INOUT_CHECK(ctx->HasOutput("MomentOut"), "Output", "MomentOut",
-                   "Adagrad");
-
-    auto lr_dims = ctx->GetInputDim("LearningRate");
-    PADDLE_ENFORCE_NE(phi::product(lr_dims), 0,
-                      platform::errors::InvalidArgument(
-                          "Maybe the Input variable LearningRate has not "
-                          "been initialized. You may need to confirm "
-                          "if you put exe.run(startup_program) "
-                          "after optimizer.minimize function."));
-    PADDLE_ENFORCE_EQ(phi::product(lr_dims), 1,
-                      platform::errors::InvalidArgument(
-                          "LearningRate should have one element"));
-    auto param_dims = ctx->GetInputDim("Param");
-    PADDLE_ENFORCE_EQ(
-        param_dims, ctx->GetInputDim("Grad"),
-        platform::errors::InvalidArgument("Param and Grad input of AdagradOp "
-                                          "should have the same dimension."));
-    PADDLE_ENFORCE_EQ(
-        param_dims, ctx->GetInputDim("Moment"),
-        platform::errors::InvalidArgument("Param and Moment input of AdagradOp "
-                                          "should have the same dimension."));
-
-    ctx->SetOutputDim("ParamOut", param_dims);
-    ctx->SetOutputDim("MomentOut", param_dims);
-  }
   framework::OpKernelType GetExpectedKernelType(
       const framework::ExecutionContext& ctx) const override {
     return framework::OpKernelType(
@@ -105,4 +76,7 @@ for numerical stability to avoid the division by zero error.
 }  // namespace paddle
 
 namespace ops = paddle::operators;
-REGISTER_OP_WITHOUT_GRADIENT(adagrad, ops::AdagradOp, ops::AdagradOpMaker);
+DECLARE_INFER_SHAPE_FUNCTOR(adagrad, AdagradInferShapeFunctor,
+                            PD_INFER_META(phi::AdagradInferMeta));
+REGISTER_OP_WITHOUT_GRADIENT(adagrad, ops::AdagradOp, ops::AdagradOpMaker,
+                             AdagradInferShapeFunctor);