[Cmake 治理] Move DDim etc. to common (PaddlePaddle#59105)

* fix conflict * exception * kunlun ci * WIN_CI * setup.py * bug_fix * hash * auto_code_gen_WIN_CI * inference_CI * use_common_enforce * delete pir_enforce * delete_error * change_cmake * conflict * cmake * mac_CI * inference_copy * delete_pybind_common * paddle_test * split ddim constructor * cc_test * use cinn::common * copy_infer * delete_layer_test_new * bug_fix * infer * fix inference bug * conflict --------- Co-authored-by: winter-wang <1030748926@qq.com>
gouzil · Dec 5, 2023 · 528faed · 528faed
1 parent a3be97c
commit 528faed
Show file tree

Hide file tree

Showing 1,819 changed files with 8,289 additions and 8,899 deletions.
diff --git a/cmake/generic.cmake b/cmake/generic.cmake
@@ -622,7 +622,7 @@ function(paddle_test_build TARGET_NAME)
     if(APPLE)
       target_link_libraries(
         ${TARGET_NAME}
-        "-Wl,-rpath,$<TARGET_FILE_DIR:${paddle_lib}> -Wl,-rpath,$<TARGET_FILE_DIR:phi> -Wl,-rpath,$<TARGET_FILE_DIR:pir>"
+        "-Wl,-rpath,$<TARGET_FILE_DIR:${paddle_lib}> -Wl,-rpath,$<TARGET_FILE_DIR:phi> -Wl,-rpath,$<TARGET_FILE_DIR:pir> -Wl,-rpath,$<TARGET_FILE_DIR:common>"
       )
     endif()
     common_link(${TARGET_NAME})

diff --git a/cmake/inference_lib.cmake b/cmake/inference_lib.cmake
@@ -286,6 +286,10 @@ copy(
 include_directories(${CMAKE_BINARY_DIR}/../paddle/fluid/framework/io)
 
 # copy api headers for phi & custom op
+copy(
+  inference_lib_dist
+  SRCS ${PADDLE_SOURCE_DIR}/paddle/common/*.h
+  DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle/common/)
 copy(
   inference_lib_dist
   SRCS ${PADDLE_SOURCE_DIR}/paddle/phi/api/ext/*.h
@@ -304,8 +308,17 @@ copy(
   DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle/phi/common/)
 copy(
   inference_lib_dist
-  SRCS ${PADDLE_SOURCE_DIR}/paddle/phi/core/macros.h
+  SRCS ${PADDLE_SOURCE_DIR}/paddle/phi/core/enforce.h
   DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle/phi/core/)
+copy(
+  inference_lib_dist
+  SRCS ${PADDLE_SOURCE_DIR}/paddle/utils/string/*.h
+  DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle/utils/string/)
+copy(
+  inference_lib_dist
+  SRCS ${PADDLE_SOURCE_DIR}/paddle/utils/string/tinyformat/tinyformat.h
+  DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle/utils/string/tinyformat/
+)
 copy(
   inference_lib_dist
   SRCS ${PADDLE_SOURCE_DIR}/paddle/phi/core/visit_type.h
@@ -320,40 +333,13 @@ copy(
   DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle/phi/)
 copy(
   inference_lib_dist
-  SRCS ${PADDLE_SOURCE_DIR}/paddle/utils/any.h
-  DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle/utils/)
-copy(
-  inference_lib_dist
-  SRCS ${PADDLE_SOURCE_DIR}/paddle/utils/optional.h
-  DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle/utils/)
-copy(
-  inference_lib_dist
-  SRCS ${PADDLE_SOURCE_DIR}/paddle/utils/none.h
-  DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle/utils/)
-copy(
-  inference_lib_dist
-  SRCS ${PADDLE_SOURCE_DIR}/paddle/utils/flat_hash_map.h
-  DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle/utils/)
-copy(
-  inference_lib_dist
-  SRCS ${PADDLE_SOURCE_DIR}/paddle/utils/flags.h
-  DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle/utils/)
-copy(
-  inference_lib_dist
-  SRCS ${PADDLE_SOURCE_DIR}/paddle/utils/test_macros.h
+  SRCS ${PADDLE_SOURCE_DIR}/paddle/utils/*.h
   DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle/utils/)
 copy(
   inference_lib_dist
   SRCS ${PADDLE_SOURCE_DIR}/paddle/extension.h
   DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle/)
 
-if(NOT WITH_GFLAGS)
-  copy(
-    inference_lib_dist
-    SRCS ${PADDLE_SOURCE_DIR}/paddle/utils/flags_native.h
-    DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle/utils/)
-endif()
-
 # the include path of phi needs to be changed to adapt to inference api path
 add_custom_command(
   TARGET inference_lib_dist

diff --git a/paddle/cinn/api/tensor_node.h b/paddle/cinn/api/tensor_node.h
@@ -52,9 +52,10 @@ class TensorNode final {
 
   class ConsumerOpListView {
    public:
-    ConsumerOpListView(const std::set<common::Shared<common::GraphEdge>,
-                                      common::GraphEdgeCompare>& edges,
-                       const hlir::framework::Graph* graph)
+    ConsumerOpListView(
+        const std::set<cinn::common::Shared<cinn::common::GraphEdge>,
+                       cinn::common::GraphEdgeCompare>& edges,
+        const hlir::framework::Graph* graph)
         : edges_(edges), graph_(graph) {}
 
     ConsumerOpListView(const ConsumerOpListView& other) = delete;
@@ -64,8 +65,8 @@ class TensorNode final {
 
     class Iterator {
      public:
-      Iterator(std::set<common::Shared<common::GraphEdge>,
-                        common::GraphEdgeCompare>::const_iterator it,
+      Iterator(std::set<cinn::common::Shared<cinn::common::GraphEdge>,
+                        cinn::common::GraphEdgeCompare>::const_iterator it,
                const hlir::framework::Graph* graph)
           : iter_(it), graph_(graph) {}
 
@@ -89,8 +90,8 @@ class TensorNode final {
       OpNode operator*() const;
 
      private:
-      std::set<common::Shared<common::GraphEdge>,
-               common::GraphEdgeCompare>::const_iterator iter_;
+      std::set<cinn::common::Shared<cinn::common::GraphEdge>,
+               cinn::common::GraphEdgeCompare>::const_iterator iter_;
       const hlir::framework::Graph* graph_;
     };
 
@@ -101,7 +102,8 @@ class TensorNode final {
     Iterator end() const { return Iterator(this->edges_.end(), graph_); }
 
    private:
-    const std::set<Shared<common::GraphEdge>, common::GraphEdgeCompare>& edges_;
+    const std::set<Shared<cinn::common::GraphEdge>,
+                   cinn::common::GraphEdgeCompare>& edges_;
     const hlir::framework::Graph* graph_;
   };
 

diff --git a/paddle/cinn/ast_gen_ius/ast_gen.cc b/paddle/cinn/ast_gen_ius/ast_gen.cc
@@ -90,7 +90,7 @@ ir::Expr AstGen::Build(const ir::Tensor& tensor, TensorGroup* tensor_group) {
     std::vector<ir::Expr> iter_values;
     // reduce body and reduce init schedule block should have different objects
     // for same axis so we re-create objects
-    std::vector<Var> axis_vars = common::GenDefaultAxis(axis_len);
+    std::vector<Var> axis_vars = cinn::common::GenDefaultAxis(axis_len);
     for (int i = 0; i < shape.size(); ++i) {
       block_vars.push_back(Var(Expr(0),
                                shape[i],
@@ -118,7 +118,7 @@ ir::Expr AstGen::Build(const ir::Tensor& tensor, TensorGroup* tensor_group) {
     std::vector<ir::Expr> reduce_iter_values;
     // reduce body and reduce init schedule block should have different objects
     // for same axis so we re-create objects
-    std::vector<Var> reduce_axis_vars = common::GenDefaultAxis(axis_len);
+    std::vector<Var> reduce_axis_vars = cinn::common::GenDefaultAxis(axis_len);
     for (int i = 0; i < shape.size(); ++i) {
       reduce_block_vars.push_back(Var(Expr(0),
                                       shape[i],
@@ -182,7 +182,7 @@ ir::Expr AstGen::Build(const ir::Tensor& tensor, TensorGroup* tensor_group) {
     // create schedule block itervars, i0,i1...
     std::vector<ir::Var> block_vars;
     std::vector<ir::Expr> iter_values;
-    std::vector<Var> axis_vars = common::GenDefaultAxis(axis_len);
+    std::vector<Var> axis_vars = cinn::common::GenDefaultAxis(axis_len);
     for (int i = 0; i < shape.size(); ++i) {
       block_vars.push_back(Var(
           Expr(0), shape[i], cinn::UniqName("i" + std::to_string(i)), false));

diff --git a/paddle/cinn/auto_schedule/analysis/analyze_ir.cc b/paddle/cinn/auto_schedule/analysis/analyze_ir.cc
@@ -144,7 +144,7 @@ bool NeedsMultiLevelTiling(const ir::ScheduleBlockRealize& sche_block_realize) {
   return total_unused_iter_vars >= 1;
 }
 
-ir::LoweredFunc UpdateFuncWithNewBody(const common::Target& target,
+ir::LoweredFunc UpdateFuncWithNewBody(const cinn::common::Target& target,
                                       const ir::LoweredFunc& old_func,
                                       ir::Expr& body) {  // NOLINT
   ir::ModuleExpr mod_expr(std::vector<ir::Expr>({body}));
@@ -179,7 +179,7 @@ ir::LoweredFunc UpdateFuncWithNewBody(const common::Target& target,
   ir::LoweredFunc new_func = ir::_LoweredFunc_::Make(
       old_func->name, old_func->args, updated_body, new_temp_bufs);
 #ifdef CINN_WITH_CUDA
-  if (target == common::DefaultNVGPUTarget()) {
+  if (target == cinn::common::DefaultNVGPUTarget()) {
     new_func->PrepareCudaAxisInfoFromBody();
   }
 #endif

diff --git a/paddle/cinn/auto_schedule/analysis/analyze_ir.h b/paddle/cinn/auto_schedule/analysis/analyze_ir.h
@@ -44,7 +44,7 @@ bool NeedsMultiLevelTiling(const ir::ScheduleBlockRealize& sche_block_realize);
 /**
  * Update a LoweredFunc by regenerating related fields with a new function body
  */
-ir::LoweredFunc UpdateFuncWithNewBody(const common::Target& target,
+ir::LoweredFunc UpdateFuncWithNewBody(const cinn::common::Target& target,
                                       const ir::LoweredFunc& old_func,
                                       ir::Expr& body);  // NOLINT
 

diff --git a/paddle/cinn/auto_schedule/analysis/analyze_ir_test.cc b/paddle/cinn/auto_schedule/analysis/analyze_ir_test.cc
@@ -38,9 +38,9 @@ namespace auto_schedule {
 TEST(AnalyzeIr, AnalyzeScheduleBlockReadWriteBuffer_SimpleAssign) {
   Context::Global().ResetNameId();
 #ifdef CINN_WITH_CUDA
-  Target target = common::DefaultNVGPUTarget();
+  Target target = cinn::common::DefaultNVGPUTarget();
 #else
-  Target target = common::DefaultHostTarget();
+  Target target = cinn::common::DefaultHostTarget();
 #endif
 
   ir::Expr M(32);
@@ -102,9 +102,9 @@ TEST(AnalyzeIr, AnalyzeScheduleBlockReadWriteBuffer_SimpleAssign) {
 TEST(AnalyzeIr, AnalyzeScheduleBlockReadWriteBuffer_AddDiffShape) {
   Context::Global().ResetNameId();
 #ifdef CINN_WITH_CUDA
-  Target target = common::DefaultNVGPUTarget();
+  Target target = cinn::common::DefaultNVGPUTarget();
 #else
-  Target target = common::DefaultHostTarget();
+  Target target = cinn::common::DefaultHostTarget();
 #endif
 
   ir::Expr M(32);
@@ -158,9 +158,9 @@ TEST(AnalyzeIr, AnalyzeScheduleBlockReadWriteBuffer_AddDiffShape) {
 TEST(AnalyzeIr, ContainsNodeType) {
   Context::Global().ResetNameId();
 #ifdef CINN_WITH_CUDA
-  Target target = common::DefaultNVGPUTarget();
+  Target target = cinn::common::DefaultNVGPUTarget();
 #else
-  Target target = common::DefaultHostTarget();
+  Target target = cinn::common::DefaultHostTarget();
 #endif
 
   ir::Expr M(32);

diff --git a/paddle/cinn/auto_schedule/auto_tuner.cc b/paddle/cinn/auto_schedule/auto_tuner.cc
@@ -38,7 +38,7 @@
 namespace cinn {
 namespace auto_schedule {
 
-AutoTuner::AutoTuner(const common::Target& target,
+AutoTuner::AutoTuner(const cinn::common::Target& target,
                      hlir::framework::Graph* graph)
     : target_(target), graph_(graph) {}
 
@@ -58,7 +58,7 @@ void AutoTuner::Initialize(const Config& config,
   tasks_ = task_creator.CreateTuneTaskOpLevel(graph_);
 
   const auto& dtype_dict =
-      graph_->GetAttrs<absl::flat_hash_map<std::string, common::Type>>(
+      graph_->GetAttrs<absl::flat_hash_map<std::string, cinn::common::Type>>(
           "inferdtype");
   const auto& shape_dict = graph_->GetAttrs<
       absl::flat_hash_map<std::string, hlir::framework::shape_t>>("infershape");

diff --git a/paddle/cinn/auto_schedule/auto_tuner.h b/paddle/cinn/auto_schedule/auto_tuner.h
@@ -46,7 +46,7 @@ class AutoTuner {
     DatabaseConfig database_config;
   };
 
-  AutoTuner(const common::Target& target, hlir::framework::Graph* graph);
+  AutoTuner(const cinn::common::Target& target, hlir::framework::Graph* graph);
 
   // Initialize tuner with specific config and auxiliary objects.
   void Initialize(const Config& config,
@@ -56,7 +56,7 @@ class AutoTuner {
   TuningResult Tune(const TuningOptions& options);
 
  private:
-  const common::Target& target_;
+  const cinn::common::Target& target_;
   hlir::framework::Graph* graph_;
   std::unique_ptr<hlir::framework::OpLowerer<GroupPtr>> op_lowerer_;
 

diff --git a/paddle/cinn/auto_schedule/auto_tuner_test.cc b/paddle/cinn/auto_schedule/auto_tuner_test.cc
@@ -48,9 +48,9 @@ using ::cinn::hlir::framework::Scope;
 class TestAutoTuner : public ::testing::Test {
  public:
 #ifdef CINN_WITH_CUDA
-  Target target = common::DefaultNVGPUTarget();
+  Target target = cinn::common::DefaultNVGPUTarget();
 #else
-  Target target = common::DefaultHostTarget();
+  Target target = cinn::common::DefaultHostTarget();
 #endif
 
   std::shared_ptr<Graph> graph;

diff --git a/paddle/cinn/auto_schedule/cost_model/expr_cost_model.cc b/paddle/cinn/auto_schedule/cost_model/expr_cost_model.cc
@@ -29,7 +29,7 @@ namespace cinn {
 namespace auto_schedule {
 
 float ExprCostModel::Predict(const ir::ModuleExpr& sample,
-                             const common::Target& target) const {
+                             const cinn::common::Target& target) const {
   if (trained_times_.load() == 0) {
     return SearchState::NOT_INIT_COST;
   }
@@ -42,7 +42,7 @@ float ExprCostModel::Predict(const ir::ModuleExpr& sample,
 
 void ExprCostModel::Train(const std::vector<const ir::ModuleExpr*>& samples,
                           const std::vector<float>& labels,
-                          const common::Target& target) {
+                          const cinn::common::Target& target) {
   trained_times_.store(1);
   size_t total_size = samples.size();
   CHECK_EQ(total_size, labels.size())
@@ -60,7 +60,7 @@ void ExprCostModel::Train(const std::vector<const ir::ModuleExpr*>& samples,
 
 void ExprCostModel::Update(const std::vector<const ir::ModuleExpr*>& samples,
                            const std::vector<float>& labels,
-                           const common::Target& target) {
+                           const cinn::common::Target& target) {
   ++trained_times_;
   size_t total_size = samples.size();
   CHECK_EQ(total_size, labels.size())

diff --git a/paddle/cinn/auto_schedule/cost_model/expr_cost_model.h b/paddle/cinn/auto_schedule/cost_model/expr_cost_model.h
@@ -30,13 +30,13 @@ namespace auto_schedule {
 class ExprCostModel : public XgbCostModel {
  public:
   virtual float Predict(const ir::ModuleExpr& sample,
-                        const common::Target& target) const;
+                        const cinn::common::Target& target) const;
   void Train(const std::vector<const ir::ModuleExpr*>& samples,
              const std::vector<float>& labels,
-             const common::Target& target);
+             const cinn::common::Target& target);
   void Update(const std::vector<const ir::ModuleExpr*>& samples,
               const std::vector<float>& labels,
-              const common::Target& target);
+              const cinn::common::Target& target);
 
  private:
   std::atomic<int> trained_times_{0};

diff --git a/paddle/cinn/auto_schedule/cost_model/feature.cc b/paddle/cinn/auto_schedule/cost_model/feature.cc
@@ -37,12 +37,12 @@ namespace cinn {
 namespace auto_schedule {
 
 Feature::Feature()
-    : target_(common::UnkTarget()),
+    : target_(cinn::common::UnkTarget()),
       stack_encoded_feature_(1),  // initialize a LoopBlockFeature as root block
       current_loop_block_index_(0),
       parent_indices_(1, -1) {}
 
-Feature::Feature(const common::Target& target)
+Feature::Feature(const cinn::common::Target& target)
     : target_(target),
       stack_encoded_feature_(1),  // initialize a LoopBlockFeature as root block
       current_loop_block_index_(0),
@@ -52,7 +52,7 @@ std::vector<float> Feature::ToFixedSizeVector() {
   std::vector<float> ret(LoopBlockFeature::kTotalSize + 1,
                          0);  // LoopBlockFeature::kTotalSize plus 1 for target
 
-  if (target_ == common::DefaultNVGPUTarget()) {
+  if (target_ == cinn::common::DefaultNVGPUTarget()) {
     ret[0] = 1;
   }  // else 0 for other cases
 

diff --git a/paddle/cinn/auto_schedule/cost_model/feature.h b/paddle/cinn/auto_schedule/cost_model/feature.h
@@ -134,7 +134,7 @@ class Feature {
  public:
   Feature();
 
-  explicit Feature(const common::Target& target);
+  explicit Feature(const cinn::common::Target& target);
 
   // Convert the various-length loop block features to fixed-size vector
   std::vector<float> ToFixedSizeVector();
@@ -182,7 +182,7 @@ class Feature {
   int current_loop_block_index_;
   std::vector<int> parent_indices_;
 
-  common::Target target_;
+  cinn::common::Target target_;
 };
 
 }  // namespace auto_schedule

diff --git a/paddle/cinn/auto_schedule/cost_model/feature_extractor.cc b/paddle/cinn/auto_schedule/cost_model/feature_extractor.cc
@@ -50,7 +50,7 @@ void FeatureExtractor::Visit(const Expr *x) {
 }
 
 Feature FeatureExtractor::Extract(const ir::ModuleExpr &mod_expr,
-                                  const common::Target &target) {
+                                  const cinn::common::Target &target) {
   feature_ = Feature(target);
   for (const ir::Expr &e : mod_expr.GetExprs()) {
     Visit(&e);
@@ -91,8 +91,9 @@ NotVisitExprFields(_Tensor_)
 
 #define VisitForDtypePattern(NodeType, member)                         \
   void FeatureExtractor::Visit(const NodeType *x) {                    \
-    if (x->type() == common::F32() || x->type() == common::F16() ||    \
-        x->type() == common::F64()) {                                  \
+    if (x->type() == cinn::common::F32() ||                            \
+        x->type() == cinn::common::F16() ||                            \
+        x->type() == cinn::common::F64()) {                            \
       feature_.CurrentLoopBlock().float_##member += x->type().lanes(); \
     } else {                                                           \
       feature_.CurrentLoopBlock().int_##member += x->type().lanes();   \
@@ -125,8 +126,9 @@ VisitForDtypePattern(Let, other_call);
 
 #define VisitForMultiOperandsDtypePattern(NodeType, member)                   \
   void FeatureExtractor::Visit(const NodeType *x) {                           \
-    if (x->type() == common::F32() || x->type() == common::F16() ||           \
-        x->type() == common::F64()) {                                         \
+    if (x->type() == cinn::common::F32() ||                                   \
+        x->type() == cinn::common::F16() ||                                   \
+        x->type() == cinn::common::F64()) {                                   \
       feature_.CurrentLoopBlock().float_##member +=                           \
           (x->operands().size() - 1);                                         \
     } else {                                                                  \
@@ -231,8 +233,8 @@ void FeatureExtractor::Visit(const PolyFor *x) {
 /* Visit for Reduce and Broadcast */
 
 void FeatureExtractor::Visit(const Reduce *x) {
-  if (x->type() == common::F32() || x->type() == common::F16() ||
-      x->type() == common::F64()) {
+  if (x->type() == cinn::common::F32() || x->type() == cinn::common::F16() ||
+      x->type() == cinn::common::F64()) {
     switch (x->reduce_type) {
       case Reduce::ReduceType::kSum:
         feature_.CurrentLoopBlock().float_reduce_sum_or_sub +=