Skip to content

Commit

Permalink
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
Browse files Browse the repository at this point in the history
… pass_infra_part_5
  • Loading branch information
yuanlehome committed Jun 13, 2023
2 parents f8df1a5 + 6bbe92a commit 05af20a
Show file tree
Hide file tree
Showing 549 changed files with 2,580 additions and 2,141 deletions.
1 change: 0 additions & 1 deletion .cmake-format.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
# Options affecting formatting.
# -----------------------------
with section("format"):

# How wide to allow formatted cmake files
line_width = 80

Expand Down
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ repos:
)$
# For Python files
- repo: https://github.com/psf/black.git
rev: 22.8.0
rev: 23.3.0
hooks:
- id: black
files: (.*\.(py|pyi|bzl)|BUILD|.*\.BUILD|WORKSPACE)$
Expand Down
3 changes: 2 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ option(WITH_IPU "Compile PaddlePaddle with Graphcore IPU" OFF)
option(WITH_ONNXRUNTIME "Compile PaddlePaddle with ONNXRUNTIME" OFF)
option(WITH_CUSPARSELT "Compile PaddlePaddle with CUSPARSELT" OFF)
option(WITH_SETUP_INSTALL "Compile PaddlePaddle with setup.py" OFF)
option(WITH_SHARED_PHI "Compile PaddlePaddle with SHARED LIB of PHI" OFF)
# Note(zhouwei): It use option above, so put here
include(init)
include(generic) # simplify cmake module
Expand Down Expand Up @@ -111,7 +112,7 @@ endif()

if(WIN32)
option(MSVC_STATIC_CRT "use static C Runtime library by default" ON)

message("Build static library of PHI")
set(CMAKE_SUPPRESS_REGENERATION ON)
set(CMAKE_STATIC_LIBRARY_PREFIX lib)

Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ English | [简体中文](./README_cn.md) | [日本語](./README_ja.md)
[![Documentation Status](https://img.shields.io/badge/中文文档-最新-brightgreen.svg)](https://paddlepaddle.org.cn/documentation/docs/zh/guides/index_cn.html)
[![Release](https://img.shields.io/github/release/PaddlePaddle/Paddle.svg)](https://github.com/PaddlePaddle/Paddle/releases)
[![License](https://img.shields.io/badge/license-Apache%202-blue.svg)](LICENSE)
[![Twitter](https://img.shields.io/badge/Twitter-1ca0f1.svg?logo=twitter&logoColor=white)](https://twitter.com/PaddlePaddle_)
[![Twitter](https://img.shields.io/badge/Twitter-1ca0f1.svg?logo=twitter&logoColor=white)](https://twitter.com/PaddlePaddle)

Welcome to the PaddlePaddle GitHub.

Expand Down
14 changes: 1 addition & 13 deletions cmake/configure.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -236,18 +236,6 @@ if(WITH_CUDNN_FRONTEND)
add_definitions(-DPADDLE_WITH_CUDNN_FRONTEND)
endif()

set(WITH_PHI_SHARED
ON
CACHE BOOL "" FORCE)
if(WIN32
OR WITH_ROCM
OR WITH_XPU_KP
OR ON_INFER)
set(WITH_PHI_SHARED
OFF
CACHE BOOL "" FORCE)
endif()

if(WITH_PHI_SHARED)
if(WITH_SHARED_PHI)
add_definitions(-DPHI_SHARED)
endif()
33 changes: 27 additions & 6 deletions cmake/external/rocksdb.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@
include(ExternalProject)

# find_package(jemalloc REQUIRED)

set(ROCKSDB_SOURCE_DIR ${PADDLE_SOURCE_DIR}/third_party/rocksdb)
set(ROCKSDB_TAG 6.19.fb)
set(JEMALLOC_INCLUDE_DIR ${THIRD_PARTY_PATH}/install/jemalloc/include)
set(JEMALLOC_LIBRARIES
${THIRD_PARTY_PATH}/install/jemalloc/lib/libjemalloc_pic.a)
Expand All @@ -41,16 +42,37 @@ set(ROCKSDB_CMAKE_CXX_FLAGS
set(ROCKSDB_CMAKE_C_FLAGS
"${ROCKSDB_COMMON_FLAGS} ${ROCKSDB_FLAGS} -DROCKSDB_LIBAIO_PRESENT -fPIC -I${JEMALLOC_INCLUDE_DIR}"
)

include_directories(${ROCKSDB_INCLUDE_DIR})

file(GLOB ROCKSDB_SOURCE_FILE_LIST ${ROCKSDB_SOURCE_DIR})
list(LENGTH ROCKSDB_SOURCE_FILE_LIST RES_LEN)
if(RES_LEN EQUAL 0)
execute_process(
COMMAND ${GIT_EXECUTABLE} clone -b ${ROCKSDB_TAG}
"https://github.com/Thunderbrook/rocksdb" ${ROCKSDB_SOURCE_DIR})
else()
# check git tag
execute_process(
COMMAND ${GIT_EXECUTABLE} describe --abbrev=6 --always --tags
OUTPUT_VARIABLE VERSION
OUTPUT_STRIP_TRAILING_WHITESPACE ERROR_QUIET
WORKING_DIRECTORY ${ROCKSDB_SOURCE_DIR})
if(NOT ${VERSION} STREQUAL ${ROCKSDB_TAG})
message(
WARNING "rocksdb version is not ${VERSION}, checkout to ${ROCKSDB_TAG}")
execute_process(COMMAND ${GIT_EXECUTABLE} checkout ${ROCKSDB_TAG}
WORKING_DIRECTORY ${ROCKSDB_SOURCE_DIR})
endif()
endif()

set(CMAKE_CXX_LINK_EXECUTABLE
"${CMAKE_CXX_LINK_EXECUTABLE} -pthread -Wl,--no-as-needed -ldl -lrt -lz")
ExternalProject_Add(
extern_rocksdb
${EXTERNAL_PROJECT_LOG_ARGS}
PREFIX ${ROCKSDB_PREFIX_DIR}
GIT_REPOSITORY "https://github.com/Thunderbrook/rocksdb"
GIT_TAG 6.19.fb
SOURCE_DIR ${ROCKSDB_SOURCE_DIR}
UPDATE_COMMAND ""
CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
Expand All @@ -66,9 +88,8 @@ ExternalProject_Add(
-DCMAKE_C_FLAGS=${CMAKE_C_FLAGS}
INSTALL_COMMAND
mkdir -p ${ROCKSDB_INSTALL_DIR}/lib/ && cp
${ROCKSDB_PREFIX_DIR}/src/extern_rocksdb/librocksdb.a ${ROCKSDB_LIBRARIES}
&& cp -r ${ROCKSDB_PREFIX_DIR}/src/extern_rocksdb/include
${ROCKSDB_INSTALL_DIR}/
${ROCKSDB_SOURCE_DIR}/librocksdb.a ${ROCKSDB_LIBRARIES} && cp -r
${ROCKSDB_SOURCE_DIR}/include ${ROCKSDB_INSTALL_DIR}/
BUILD_IN_SOURCE 1
BUILD_BYPRODUCTS ${ROCKSDB_LIBRARIES})

Expand Down
2 changes: 1 addition & 1 deletion cmake/inference_lib.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -269,7 +269,7 @@ else()
SRCS ${src_dir}/inference/api/paddle_*.h ${paddle_inference_lib}
DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include
${PADDLE_INFERENCE_INSTALL_DIR}/paddle/lib)
if(WITH_PHI_SHARED)
if(WITH_SHARED_PHI)
set(paddle_phi_lib ${PADDLE_BINARY_DIR}/paddle/phi/libphi.*)
copy(
inference_lib_dist
Expand Down
14 changes: 13 additions & 1 deletion cmake/third_party.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,19 @@ set(third_party_deps)
include(ProcessorCount)
ProcessorCount(NPROC)
if(NOT WITH_SETUP_INSTALL)
execute_process(COMMAND git submodule update --init --recursive)
#NOTE(risemeup1):Initialize any submodules.
message(
STATUS
"Check submodules of paddle, and run 'git submodule update --init --recursive'"
)
execute_process(
COMMAND git submodule update --init --recursive
WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}
RESULT_VARIABLE result_var)
if(NOT result_var EQUAL 0)
message(FATAL_ERROR "Failed to get submodule, please check your network !")
endif()

endif()
# cache funciton to avoid repeat download code of third_party.
# This function has 4 parameters, URL / REPOSITOR / TAG / DIR:
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/eager/auto_code_generator/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ if(WIN32)
set(eager_generator_path "${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_BUILD_TYPE}")
endif()

if(WITH_PHI_SHARED)
if(WITH_SHARED_PHI)
message("Copied phi.dll for Eager AutoCodeGen")
add_custom_command(
OUTPUT ${eager_generator_path}/phi.dll
Expand Down
4 changes: 0 additions & 4 deletions paddle/fluid/eager/auto_code_generator/generator/eager_gen.py
Original file line number Diff line number Diff line change
Expand Up @@ -547,7 +547,6 @@ def GenerateCoreOpInfoDeclaration():


def GenerateCoreOpInfoDefinition():

op_args_info_list = []
for op_name, arg_list in core_ops_args_info.items():
arg_str = ",".join(["\"" + v + "\"" for v in arg_list])
Expand Down Expand Up @@ -803,7 +802,6 @@ def CollectBackwardInfo(self):
self.backward_returns_list = backward_returns_list_new

def CollectForwardInfoFromBackwardContents(self):

backward_forward_str = self.backward_forward_str

(
Expand Down Expand Up @@ -1910,7 +1908,6 @@ def GenerateHigherOrderNodeCreationCode(self):
self.grad_api_contents["backward_op"] in prim_white_list
or is_invoke_forward_api
):

next_grad_node_creation_str = f"""
if (!paddle::prim::PrimCommonUtils::IsEagerPrimEnabled()) {{
if(trace_backward) {{
Expand Down Expand Up @@ -2274,7 +2271,6 @@ def GenerateNodeDefinition(
egr::EagerUtils::HandleViewBetweenInputAndOutput({inplace_grad_input_str}, api_output_{out_index});
}}"""
if IsPlainTensorType(ttype):

if (
backward_inplace_map
and name in backward_inplace_map.values()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -604,7 +604,6 @@ def GenerateCoreOpsInfoMap():


def GeneratePythonCWrappers(python_c_function_str, python_c_function_reg_str):

(
core_ops_infos_definition,
core_ops_infos_registry,
Expand Down
166 changes: 118 additions & 48 deletions paddle/fluid/inference/tensorrt/convert/layer_norm_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -24,57 +24,129 @@ class LayerNormOpConverter : public OpConverter {
void operator()(const framework::proto::OpDesc& op,
const framework::Scope& scope,
bool test_mode) override {
VLOG(4) << "convert a layer_norm op to tensorrt layer_norm plugin";
VLOG(4) << "convert a layer_norm op with dynamic shape to Normalization "
"layer or Static shape tensorrt layer_norm plugin";
framework::OpDesc op_desc(op, nullptr);

auto* X = engine_->GetITensor(op_desc.Input("X").front());
auto* Bias_v = scope.FindVar(op_desc.Input("Bias").front());
auto* Scale_v = scope.FindVar(op_desc.Input("Scale").front());
const int begin_norm_axis =
op_desc.HasAttr("begin_norm_axis")
? PADDLE_GET_CONST(int, op_desc.GetAttr("begin_norm_axis"))
: 1;
auto* X = engine_->GetITensor(op_desc.Input("X")[0]);
auto rank = X->getDimensions().nbDims;
std::string output_name = op_desc.Output("Y")[0];
const float eps = op_desc.HasAttr("epsilon")
? PADDLE_GET_CONST(float, op_desc.GetAttr("epsilon"))
: 1e-5f;
PADDLE_ENFORCE_NOT_NULL(
Bias_v,
platform::errors::InvalidArgument(
"Input(Bias) of layer_norm should not be null."));
PADDLE_ENFORCE_NOT_NULL(
Scale_v,
platform::errors::InvalidArgument(
"Input(Scale) of layer_norm should not be null."));
if (engine_->with_dynamic_shape()) {
auto* Scale = engine_->GetITensor(op_desc.Input("Scale")[0]);
auto* Bias = engine_->GetITensor(op_desc.Input("Bias")[0]);
int32_t begin_axis =
op_desc.HasAttr("begin_norm_axis")
? PADDLE_GET_CONST(int, op_desc.GetAttr("begin_norm_axis"))
: 1;
uint32_t axisMask{0};
for (int32_t i = begin_axis; i < rank; i++) {
axisMask |= 1 << i;
}
std::vector<int32_t> indice_dim_vec(rank);
std::iota(indice_dim_vec.begin(), indice_dim_vec.end(), 0);
auto p = std::remove_if(indice_dim_vec.begin(),
indice_dim_vec.end(),
[begin_axis](int x) { return x < begin_axis; });
indice_dim_vec.resize(p - indice_dim_vec.begin());
auto newDims = Gather(Shape(X), indice_dim_vec);
auto newrank = indice_dim_vec.size();
auto* one_rank_tensor =
Add1DConstantLayer(std::vector<int32_t>(rank - newrank, 1));
std::vector<nvinfer1::ITensor*> itensors;
itensors.push_back(one_rank_tensor);
itensors.push_back(newDims);
nvinfer1::ITensor* concat_shape_tensor = Concat(itensors);
auto Bias_reshape = Reshape(
Bias,
concat_shape_tensor,
("layer_norm Bias: reshape: (Output(" + output_name + ")").c_str());
auto Scale_reshape = Reshape(
Scale,
concat_shape_tensor,
("layer_norm Scale: reshape: (Output(" + output_name + ")").c_str());
#if IS_TRT_VERSION_GE(8600)
auto layer = TRT_ENGINE_ADD_LAYER(
engine_, Normalization, *X, *Scale_reshape, *Bias_reshape, axisMask);
layer->setEpsilon(eps);
RreplenishLayerAndOutput(layer, "layer_norm", {output_name}, test_mode);
#else
// μ
auto miu_layer = TRT_ENGINE_ADD_LAYER(
engine_, Reduce, *X, nvinfer1::ReduceOperation::kAVG, axisMask, true);
miu_layer->setName((output_name + "_miu").c_str());
auto miu_output = miu_layer->getOutput(0);
// x−μ
auto xsubmiu_output = Sub(X, miu_output);
// σ
// pow(x−μ,2)
auto pow_tensor = Add1DConstantLayer(static_cast<float>(2));
auto xsubmiu_pow_out = Pow(
xsubmiu_output,
BroadcastTensors(xsubmiu_output,
pow_tensor,
("layer_norm_pow: reshape_for_broadcast: (Output(" +
output_name + ")")
.c_str()));
// mean_var
auto mean_var_layer =
TRT_ENGINE_ADD_LAYER(engine_,
Reduce,
*xsubmiu_pow_out,
nvinfer1::ReduceOperation::kAVG,
axisMask,
true);
mean_var_layer->setName((output_name + "_sigma").c_str());
auto mean_var_out = mean_var_layer->getOutput(0);
// sigma
auto eps_tensor = Add1DConstantLayer(eps);
auto sum_out = Sum(
mean_var_out,
BroadcastTensors(mean_var_out,
eps_tensor,
("layer_norm_eps: reshape_for_broadcast: (Output(" +
output_name + ")")
.c_str()));
auto sigma_layer = TRT_ENGINE_ADD_LAYER(
engine_, Unary, *sum_out, nvinfer1::UnaryOperation::kSQRT);
auto sigma_output = sigma_layer->getOutput(0);
// σ/sigma
auto div_out = Div(xsubmiu_output, sigma_output);
// (σ/sigma)*g+b
auto scale_out = Prod(div_out, Scale_reshape);
auto layer = TRT_ENGINE_ADD_LAYER(engine_,
ElementWise,
*scale_out,
*Bias_reshape,
nvinfer1::ElementWiseOperation::kSUM);
RreplenishLayerAndOutput(layer, "layer_norm", {output_name}, test_mode);
#endif
} else {
auto* Bias_v = scope.FindVar(op_desc.Input("Bias")[0]);
auto* Scale_v = scope.FindVar(op_desc.Input("Scale")[0]);
PADDLE_ENFORCE_NOT_NULL(
Bias_v,
platform::errors::InvalidArgument(
"Input(Bias) of layer_norm should not be null."));
PADDLE_ENFORCE_NOT_NULL(
Scale_v,
platform::errors::InvalidArgument(
"Input(Scale) of layer_norm should not be null."));
auto* Bias_t = Bias_v->GetMutable<phi::DenseTensor>();
auto* Scale_t = Scale_v->GetMutable<phi::DenseTensor>();

auto* Bias_t = Bias_v->GetMutable<phi::DenseTensor>();
auto* Scale_t = Scale_v->GetMutable<phi::DenseTensor>();
auto bias_weight =
engine_->GetFp32TrtWeight(op_desc.Input("Bias").front(), *Bias_t);
auto scale_weight =
engine_->GetFp32TrtWeight(op_desc.Input("Scale").front(), *Scale_t);

auto bias_weight =
engine_->GetFp32TrtWeight(op_desc.Input("Bias").front(), *Bias_t);
auto scale_weight =
engine_->GetFp32TrtWeight(op_desc.Input("Scale").front(), *Scale_t);
const int begin_norm_axis =
op_desc.HasAttr("begin_norm_axis")
? PADDLE_GET_CONST(int, op_desc.GetAttr("begin_norm_axis"))
: 1;

nvinfer1::ILayer* layernorm_layer = nullptr;
if (engine_->with_dynamic_shape()) {
// For dynamic shape,
// the shape of mean and variance will be determine in configuPlugin.
std::vector<int64_t> mean_shape{1};
std::vector<int64_t> variance_shape{1};
bool with_fp16 =
engine_->WithFp16() && !engine_->disable_trt_plugin_fp16();
plugin::LayerNormPluginDynamic* plugin =
new plugin::LayerNormPluginDynamic(
static_cast<const float*>(bias_weight.get().values),
bias_weight.get().count,
static_cast<const float*>(scale_weight.get().values),
scale_weight.get().count,
begin_norm_axis,
eps,
mean_shape,
variance_shape,
with_fp16);
layernorm_layer = engine_->AddDynamicPlugin(&X, 1, plugin);
} else {
int statis_num = 1;
for (int i = 1; i < begin_norm_axis; i++) {
statis_num *= X->getDimensions().d[i];
Expand All @@ -93,13 +165,11 @@ class LayerNormOpConverter : public OpConverter {
mean_shape,
variance_shape,
with_fp16);
layernorm_layer = engine_->AddPlugin(
auto* layernorm_layer = engine_->AddPlugin(
&X, 1, reinterpret_cast<plugin::PluginTensorRT*>(plugin));
RreplenishLayerAndOutput(
layernorm_layer, "layer_norm", {output_name}, test_mode);
}

auto output_name = op_desc.Output("Y").front();
RreplenishLayerAndOutput(
layernorm_layer, "layer_norm", {output_name}, test_mode);
}
};

Expand Down
Loading

0 comments on commit 05af20a

Please sign in to comment.