Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…

… pass_infra_part_5
PaddlePaddle · Jun 13, 2023 · 05af20a · 05af20a
2 parents f8df1a5 + 6bbe92a
commit 05af20a
Show file tree

Hide file tree

Showing 549 changed files with 2,580 additions and 2,141 deletions.
diff --git a/.cmake-format.py b/.cmake-format.py
@@ -16,7 +16,6 @@
 # Options affecting formatting.
 # -----------------------------
 with section("format"):
-
     # How wide to allow formatted cmake files
     line_width = 80
 

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -53,7 +53,7 @@ repos:
             )$
 # For Python files
 -   repo: https://github.com/psf/black.git
-    rev: 22.8.0
+    rev: 23.3.0
     hooks:
     -   id: black
         files: (.*\.(py|pyi|bzl)|BUILD|.*\.BUILD|WORKSPACE)$

diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -62,6 +62,7 @@ option(WITH_IPU "Compile PaddlePaddle with Graphcore IPU" OFF)
 option(WITH_ONNXRUNTIME "Compile PaddlePaddle with ONNXRUNTIME" OFF)
 option(WITH_CUSPARSELT "Compile PaddlePaddle with CUSPARSELT" OFF)
 option(WITH_SETUP_INSTALL "Compile PaddlePaddle with setup.py" OFF)
+option(WITH_SHARED_PHI "Compile PaddlePaddle with SHARED LIB of PHI" OFF)
 # Note(zhouwei): It use option above, so put here
 include(init)
 include(generic) # simplify cmake module
@@ -111,7 +112,7 @@ endif()
 
 if(WIN32)
   option(MSVC_STATIC_CRT "use static C Runtime library by default" ON)
-
+  message("Build static library of PHI")
   set(CMAKE_SUPPRESS_REGENERATION ON)
   set(CMAKE_STATIC_LIBRARY_PREFIX lib)
 

diff --git a/README.md b/README.md
@@ -10,7 +10,7 @@ English | [简体中文](./README_cn.md) | [日本語](./README_ja.md)
 [![Documentation Status](https://img.shields.io/badge/中文文档-最新-brightgreen.svg)](https://paddlepaddle.org.cn/documentation/docs/zh/guides/index_cn.html)
 [![Release](https://img.shields.io/github/release/PaddlePaddle/Paddle.svg)](https://github.com/PaddlePaddle/Paddle/releases)
 [![License](https://img.shields.io/badge/license-Apache%202-blue.svg)](LICENSE)
-[![Twitter](https://img.shields.io/badge/Twitter-1ca0f1.svg?logo=twitter&logoColor=white)](https://twitter.com/PaddlePaddle_)
+[![Twitter](https://img.shields.io/badge/Twitter-1ca0f1.svg?logo=twitter&logoColor=white)](https://twitter.com/PaddlePaddle)
 
 Welcome to the PaddlePaddle GitHub.
 

diff --git a/cmake/configure.cmake b/cmake/configure.cmake
@@ -236,18 +236,6 @@ if(WITH_CUDNN_FRONTEND)
   add_definitions(-DPADDLE_WITH_CUDNN_FRONTEND)
 endif()
 
-set(WITH_PHI_SHARED
-    ON
-    CACHE BOOL "" FORCE)
-if(WIN32
-   OR WITH_ROCM
-   OR WITH_XPU_KP
-   OR ON_INFER)
-  set(WITH_PHI_SHARED
-      OFF
-      CACHE BOOL "" FORCE)
-endif()
-
-if(WITH_PHI_SHARED)
+if(WITH_SHARED_PHI)
   add_definitions(-DPHI_SHARED)
 endif()
diff --git a/cmake/external/rocksdb.cmake b/cmake/external/rocksdb.cmake
@@ -15,7 +15,8 @@
 include(ExternalProject)
 
 # find_package(jemalloc REQUIRED)
-
+set(ROCKSDB_SOURCE_DIR ${PADDLE_SOURCE_DIR}/third_party/rocksdb)
+set(ROCKSDB_TAG 6.19.fb)
 set(JEMALLOC_INCLUDE_DIR ${THIRD_PARTY_PATH}/install/jemalloc/include)
 set(JEMALLOC_LIBRARIES
     ${THIRD_PARTY_PATH}/install/jemalloc/lib/libjemalloc_pic.a)
@@ -41,16 +42,37 @@ set(ROCKSDB_CMAKE_CXX_FLAGS
 set(ROCKSDB_CMAKE_C_FLAGS
     "${ROCKSDB_COMMON_FLAGS} ${ROCKSDB_FLAGS} -DROCKSDB_LIBAIO_PRESENT -fPIC  -I${JEMALLOC_INCLUDE_DIR}"
 )
+
 include_directories(${ROCKSDB_INCLUDE_DIR})
 
+file(GLOB ROCKSDB_SOURCE_FILE_LIST ${ROCKSDB_SOURCE_DIR})
+list(LENGTH ROCKSDB_SOURCE_FILE_LIST RES_LEN)
+if(RES_LEN EQUAL 0)
+  execute_process(
+    COMMAND ${GIT_EXECUTABLE} clone -b ${ROCKSDB_TAG}
+            "https://github.com/Thunderbrook/rocksdb" ${ROCKSDB_SOURCE_DIR})
+else()
+  # check git tag
+  execute_process(
+    COMMAND ${GIT_EXECUTABLE} describe --abbrev=6 --always --tags
+    OUTPUT_VARIABLE VERSION
+    OUTPUT_STRIP_TRAILING_WHITESPACE ERROR_QUIET
+    WORKING_DIRECTORY ${ROCKSDB_SOURCE_DIR})
+  if(NOT ${VERSION} STREQUAL ${ROCKSDB_TAG})
+    message(
+      WARNING "rocksdb version is not ${VERSION}, checkout to ${ROCKSDB_TAG}")
+    execute_process(COMMAND ${GIT_EXECUTABLE} checkout ${ROCKSDB_TAG}
+                    WORKING_DIRECTORY ${ROCKSDB_SOURCE_DIR})
+  endif()
+endif()
+
 set(CMAKE_CXX_LINK_EXECUTABLE
     "${CMAKE_CXX_LINK_EXECUTABLE} -pthread -Wl,--no-as-needed -ldl -lrt -lz")
 ExternalProject_Add(
   extern_rocksdb
   ${EXTERNAL_PROJECT_LOG_ARGS}
   PREFIX ${ROCKSDB_PREFIX_DIR}
-  GIT_REPOSITORY "https://github.com/Thunderbrook/rocksdb"
-  GIT_TAG 6.19.fb
+  SOURCE_DIR ${ROCKSDB_SOURCE_DIR}
   UPDATE_COMMAND ""
   CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
              -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
@@ -66,9 +88,8 @@ ExternalProject_Add(
              -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS}
   INSTALL_COMMAND
     mkdir -p ${ROCKSDB_INSTALL_DIR}/lib/ && cp
-    ${ROCKSDB_PREFIX_DIR}/src/extern_rocksdb/librocksdb.a ${ROCKSDB_LIBRARIES}
-    && cp -r ${ROCKSDB_PREFIX_DIR}/src/extern_rocksdb/include
-    ${ROCKSDB_INSTALL_DIR}/
+    ${ROCKSDB_SOURCE_DIR}/librocksdb.a ${ROCKSDB_LIBRARIES} && cp -r
+    ${ROCKSDB_SOURCE_DIR}/include ${ROCKSDB_INSTALL_DIR}/
   BUILD_IN_SOURCE 1
   BUILD_BYPRODUCTS ${ROCKSDB_LIBRARIES})
 

diff --git a/cmake/inference_lib.cmake b/cmake/inference_lib.cmake
@@ -269,7 +269,7 @@ else()
     SRCS ${src_dir}/inference/api/paddle_*.h ${paddle_inference_lib}
     DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include
          ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/lib)
-  if(WITH_PHI_SHARED)
+  if(WITH_SHARED_PHI)
     set(paddle_phi_lib ${PADDLE_BINARY_DIR}/paddle/phi/libphi.*)
     copy(
       inference_lib_dist

diff --git a/cmake/third_party.cmake b/cmake/third_party.cmake
@@ -30,7 +30,19 @@ set(third_party_deps)
 include(ProcessorCount)
 ProcessorCount(NPROC)
 if(NOT WITH_SETUP_INSTALL)
-  execute_process(COMMAND git submodule update --init --recursive)
+  #NOTE(risemeup1):Initialize any submodules.
+  message(
+    STATUS
+      "Check submodules of paddle, and run 'git submodule update --init --recursive'"
+  )
+  execute_process(
+    COMMAND git submodule update --init --recursive
+    WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}
+    RESULT_VARIABLE result_var)
+  if(NOT result_var EQUAL 0)
+    message(FATAL_ERROR "Failed to get submodule, please check your network !")
+  endif()
+
 endif()
 # cache funciton to avoid repeat download code of third_party.
 # This function has 4 parameters, URL / REPOSITOR / TAG / DIR:

diff --git a/paddle/fluid/eager/auto_code_generator/CMakeLists.txt b/paddle/fluid/eager/auto_code_generator/CMakeLists.txt
@@ -52,7 +52,7 @@ if(WIN32)
     set(eager_generator_path "${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_BUILD_TYPE}")
   endif()
 
-  if(WITH_PHI_SHARED)
+  if(WITH_SHARED_PHI)
     message("Copied phi.dll for Eager AutoCodeGen")
     add_custom_command(
       OUTPUT ${eager_generator_path}/phi.dll

diff --git a/paddle/fluid/eager/auto_code_generator/generator/eager_gen.py b/paddle/fluid/eager/auto_code_generator/generator/eager_gen.py
@@ -547,7 +547,6 @@ def GenerateCoreOpInfoDeclaration():
 
 
 def GenerateCoreOpInfoDefinition():
-
     op_args_info_list = []
     for op_name, arg_list in core_ops_args_info.items():
         arg_str = ",".join(["\"" + v + "\"" for v in arg_list])
@@ -803,7 +802,6 @@ def CollectBackwardInfo(self):
             self.backward_returns_list = backward_returns_list_new
 
     def CollectForwardInfoFromBackwardContents(self):
-
         backward_forward_str = self.backward_forward_str
 
         (
@@ -1910,7 +1908,6 @@ def GenerateHigherOrderNodeCreationCode(self):
                     self.grad_api_contents["backward_op"] in prim_white_list
                     or is_invoke_forward_api
                 ):
-
                     next_grad_node_creation_str = f"""
  if (!paddle::prim::PrimCommonUtils::IsEagerPrimEnabled()) {{
     if(trace_backward) {{
@@ -2274,7 +2271,6 @@ def GenerateNodeDefinition(
       egr::EagerUtils::HandleViewBetweenInputAndOutput({inplace_grad_input_str}, api_output_{out_index});
     }}"""
             if IsPlainTensorType(ttype):
-
                 if (
                     backward_inplace_map
                     and name in backward_inplace_map.values()

diff --git a/paddle/fluid/eager/auto_code_generator/generator/python_c_gen.py b/paddle/fluid/eager/auto_code_generator/generator/python_c_gen.py
@@ -604,7 +604,6 @@ def GenerateCoreOpsInfoMap():
 
 
 def GeneratePythonCWrappers(python_c_function_str, python_c_function_reg_str):
-
     (
         core_ops_infos_definition,
         core_ops_infos_registry,

diff --git a/paddle/fluid/inference/tensorrt/convert/layer_norm_op.cc b/paddle/fluid/inference/tensorrt/convert/layer_norm_op.cc
@@ -24,57 +24,129 @@ class LayerNormOpConverter : public OpConverter {
   void operator()(const framework::proto::OpDesc& op,
                   const framework::Scope& scope,
                   bool test_mode) override {
-    VLOG(4) << "convert a layer_norm op to tensorrt layer_norm plugin";
+    VLOG(4) << "convert a layer_norm op with dynamic shape to  Normalization "
+               "layer or  Static shape  tensorrt layer_norm plugin";
     framework::OpDesc op_desc(op, nullptr);
 
-    auto* X = engine_->GetITensor(op_desc.Input("X").front());
-    auto* Bias_v = scope.FindVar(op_desc.Input("Bias").front());
-    auto* Scale_v = scope.FindVar(op_desc.Input("Scale").front());
-    const int begin_norm_axis =
-        op_desc.HasAttr("begin_norm_axis")
-            ? PADDLE_GET_CONST(int, op_desc.GetAttr("begin_norm_axis"))
-            : 1;
+    auto* X = engine_->GetITensor(op_desc.Input("X")[0]);
+    auto rank = X->getDimensions().nbDims;
+    std::string output_name = op_desc.Output("Y")[0];
     const float eps = op_desc.HasAttr("epsilon")
                           ? PADDLE_GET_CONST(float, op_desc.GetAttr("epsilon"))
                           : 1e-5f;
-    PADDLE_ENFORCE_NOT_NULL(
-        Bias_v,
-        platform::errors::InvalidArgument(
-            "Input(Bias) of layer_norm should not be null."));
-    PADDLE_ENFORCE_NOT_NULL(
-        Scale_v,
-        platform::errors::InvalidArgument(
-            "Input(Scale) of layer_norm should not be null."));
+    if (engine_->with_dynamic_shape()) {
+      auto* Scale = engine_->GetITensor(op_desc.Input("Scale")[0]);
+      auto* Bias = engine_->GetITensor(op_desc.Input("Bias")[0]);
+      int32_t begin_axis =
+          op_desc.HasAttr("begin_norm_axis")
+              ? PADDLE_GET_CONST(int, op_desc.GetAttr("begin_norm_axis"))
+              : 1;
+      uint32_t axisMask{0};
+      for (int32_t i = begin_axis; i < rank; i++) {
+        axisMask |= 1 << i;
+      }
+      std::vector<int32_t> indice_dim_vec(rank);
+      std::iota(indice_dim_vec.begin(), indice_dim_vec.end(), 0);
+      auto p = std::remove_if(indice_dim_vec.begin(),
+                              indice_dim_vec.end(),
+                              [begin_axis](int x) { return x < begin_axis; });
+      indice_dim_vec.resize(p - indice_dim_vec.begin());
+      auto newDims = Gather(Shape(X), indice_dim_vec);
+      auto newrank = indice_dim_vec.size();
+      auto* one_rank_tensor =
+          Add1DConstantLayer(std::vector<int32_t>(rank - newrank, 1));
+      std::vector<nvinfer1::ITensor*> itensors;
+      itensors.push_back(one_rank_tensor);
+      itensors.push_back(newDims);
+      nvinfer1::ITensor* concat_shape_tensor = Concat(itensors);
+      auto Bias_reshape = Reshape(
+          Bias,
+          concat_shape_tensor,
+          ("layer_norm Bias: reshape: (Output(" + output_name + ")").c_str());
+      auto Scale_reshape = Reshape(
+          Scale,
+          concat_shape_tensor,
+          ("layer_norm Scale: reshape: (Output(" + output_name + ")").c_str());
+#if IS_TRT_VERSION_GE(8600)
+      auto layer = TRT_ENGINE_ADD_LAYER(
+          engine_, Normalization, *X, *Scale_reshape, *Bias_reshape, axisMask);
+      layer->setEpsilon(eps);
+      RreplenishLayerAndOutput(layer, "layer_norm", {output_name}, test_mode);
+#else
+      // μ
+      auto miu_layer = TRT_ENGINE_ADD_LAYER(
+          engine_, Reduce, *X, nvinfer1::ReduceOperation::kAVG, axisMask, true);
+      miu_layer->setName((output_name + "_miu").c_str());
+      auto miu_output = miu_layer->getOutput(0);
+      // x−μ
+      auto xsubmiu_output = Sub(X, miu_output);
+      // σ
+      // pow(x−μ,2)
+      auto pow_tensor = Add1DConstantLayer(static_cast<float>(2));
+      auto xsubmiu_pow_out = Pow(
+          xsubmiu_output,
+          BroadcastTensors(xsubmiu_output,
+                           pow_tensor,
+                           ("layer_norm_pow: reshape_for_broadcast: (Output(" +
+                            output_name + ")")
+                               .c_str()));
+      // mean_var
+      auto mean_var_layer =
+          TRT_ENGINE_ADD_LAYER(engine_,
+                               Reduce,
+                               *xsubmiu_pow_out,
+                               nvinfer1::ReduceOperation::kAVG,
+                               axisMask,
+                               true);
+      mean_var_layer->setName((output_name + "_sigma").c_str());
+      auto mean_var_out = mean_var_layer->getOutput(0);
+      // sigma
+      auto eps_tensor = Add1DConstantLayer(eps);
+      auto sum_out = Sum(
+          mean_var_out,
+          BroadcastTensors(mean_var_out,
+                           eps_tensor,
+                           ("layer_norm_eps: reshape_for_broadcast: (Output(" +
+                            output_name + ")")
+                               .c_str()));
+      auto sigma_layer = TRT_ENGINE_ADD_LAYER(
+          engine_, Unary, *sum_out, nvinfer1::UnaryOperation::kSQRT);
+      auto sigma_output = sigma_layer->getOutput(0);
+      // σ/sigma
+      auto div_out = Div(xsubmiu_output, sigma_output);
+      // (σ/sigma)*g+b
+      auto scale_out = Prod(div_out, Scale_reshape);
+      auto layer = TRT_ENGINE_ADD_LAYER(engine_,
+                                        ElementWise,
+                                        *scale_out,
+                                        *Bias_reshape,
+                                        nvinfer1::ElementWiseOperation::kSUM);
+      RreplenishLayerAndOutput(layer, "layer_norm", {output_name}, test_mode);
+#endif
+    } else {
+      auto* Bias_v = scope.FindVar(op_desc.Input("Bias")[0]);
+      auto* Scale_v = scope.FindVar(op_desc.Input("Scale")[0]);
+      PADDLE_ENFORCE_NOT_NULL(
+          Bias_v,
+          platform::errors::InvalidArgument(
+              "Input(Bias) of layer_norm should not be null."));
+      PADDLE_ENFORCE_NOT_NULL(
+          Scale_v,
+          platform::errors::InvalidArgument(
+              "Input(Scale) of layer_norm should not be null."));
+      auto* Bias_t = Bias_v->GetMutable<phi::DenseTensor>();
+      auto* Scale_t = Scale_v->GetMutable<phi::DenseTensor>();
 
-    auto* Bias_t = Bias_v->GetMutable<phi::DenseTensor>();
-    auto* Scale_t = Scale_v->GetMutable<phi::DenseTensor>();
+      auto bias_weight =
+          engine_->GetFp32TrtWeight(op_desc.Input("Bias").front(), *Bias_t);
+      auto scale_weight =
+          engine_->GetFp32TrtWeight(op_desc.Input("Scale").front(), *Scale_t);
 
-    auto bias_weight =
-        engine_->GetFp32TrtWeight(op_desc.Input("Bias").front(), *Bias_t);
-    auto scale_weight =
-        engine_->GetFp32TrtWeight(op_desc.Input("Scale").front(), *Scale_t);
+      const int begin_norm_axis =
+          op_desc.HasAttr("begin_norm_axis")
+              ? PADDLE_GET_CONST(int, op_desc.GetAttr("begin_norm_axis"))
+              : 1;
 
-    nvinfer1::ILayer* layernorm_layer = nullptr;
-    if (engine_->with_dynamic_shape()) {
-      // For dynamic shape,
-      // the shape of mean and variance will be determine in configuPlugin.
-      std::vector<int64_t> mean_shape{1};
-      std::vector<int64_t> variance_shape{1};
-      bool with_fp16 =
-          engine_->WithFp16() && !engine_->disable_trt_plugin_fp16();
-      plugin::LayerNormPluginDynamic* plugin =
-          new plugin::LayerNormPluginDynamic(
-              static_cast<const float*>(bias_weight.get().values),
-              bias_weight.get().count,
-              static_cast<const float*>(scale_weight.get().values),
-              scale_weight.get().count,
-              begin_norm_axis,
-              eps,
-              mean_shape,
-              variance_shape,
-              with_fp16);
-      layernorm_layer = engine_->AddDynamicPlugin(&X, 1, plugin);
-    } else {
       int statis_num = 1;
       for (int i = 1; i < begin_norm_axis; i++) {
         statis_num *= X->getDimensions().d[i];
@@ -93,13 +165,11 @@ class LayerNormOpConverter : public OpConverter {
           mean_shape,
           variance_shape,
           with_fp16);
-      layernorm_layer = engine_->AddPlugin(
+      auto* layernorm_layer = engine_->AddPlugin(
           &X, 1, reinterpret_cast<plugin::PluginTensorRT*>(plugin));
+      RreplenishLayerAndOutput(
+          layernorm_layer, "layer_norm", {output_name}, test_mode);
     }
-
-    auto output_name = op_desc.Output("Y").front();
-    RreplenishLayerAndOutput(
-        layernorm_layer, "layer_norm", {output_name}, test_mode);
   }
 };
-Original file line number
+Diff line change
@@ Expand Up / @@ -604,7 +604,6 @@ def GenerateCoreOpsInfoMap(): @@
     def GeneratePythonCWrappers(python_c_function_str, python_c_function_reg_str):
         (
             core_ops_infos_definition,
             core_ops_infos_registry,
@@ Expand Down @@