Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/develop' into slice/static_get…
Browse files Browse the repository at this point in the history
…item
  • Loading branch information
hbwx24 committed Jul 26, 2021
2 parents 0a476ea + 6b20cb4 commit ae19cc0
Show file tree
Hide file tree
Showing 219 changed files with 4,327 additions and 2,224 deletions.
27 changes: 17 additions & 10 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -119,17 +119,19 @@ if(WIN32)
endforeach(flag_var)
endif()

math(EXPR PROCESS_MAX "${CPU_CORES} * 2 / 3")

# windows build turn off warnings, use parallel compiling.
foreach(flag_var
CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE
CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO
CMAKE_C_FLAGS CMAKE_C_FLAGS_DEBUG CMAKE_C_FLAGS_RELEASE
CMAKE_C_FLAGS_MINSIZEREL CMAKE_C_FLAGS_RELWITHDEBINFO)
string(REGEX REPLACE "/W[1-4]" " /W0 " ${flag_var} "${${flag_var}}")
# NOTE(zhouwei25): GPU compile have too high memory utilization when parallel compiling
if(NOT WITH_GPU)

# NOTE(zhouwei25): GPU compile have too high memory utilization when parallel compiling,
# For Visual Studio generators, /MP should be added.
# For other generators like Ninja, it is not need to add /MP.
if("${CMAKE_GENERATOR}" STREQUAL "Visual Studio" AND NOT WITH_GPU)
math(EXPR PROCESS_MAX "${CPU_CORES} * 2 / 3")
set(${flag_var} "${${flag_var}} /MP${PROCESS_MAX}")
endif()
endforeach(flag_var)
Expand Down Expand Up @@ -312,6 +314,17 @@ else()
endif()
endif()

if(WITH_DISTRIBUTE)
if(LINUX)
set(WITH_GLOO ON CACHE STRING "Enable GLOO when compiling WITH_DISTRIBUTE=ON." FORCE)
endif()
if(WITH_ASCEND_CL)
# disable WITH_PSCORE for NPU before include third_party
MESSAGE(WARNING "Disable WITH_PSCORE when compiling with NPU. Force WITH_PSCORE=OFF.")
set(WITH_PSCORE OFF CACHE BOOL "Disable WITH_PSCORE when compiling with NPU" FORCE)
endif()
endif()

include(third_party) # download, build, install third_party, Contains about 20+ dependencies

include(flags) # set paddle compile flags
Expand All @@ -322,12 +335,6 @@ if(WITH_PROFILER)
add_definitions(-DWITH_GPERFTOOLS)
endif()

if(WITH_DISTRIBUTE)
if(LINUX)
set(WITH_GLOO ON CACHE STRING "Enable GLOO when compiling WITH_DISTRIBUTE=ON." FORCE)
endif()
endif()

include(ccache) # set ccache for compilation
include(util) # set unittest and link libs
include(version) # set PADDLE_VERSION
Expand Down
2 changes: 1 addition & 1 deletion cmake/ccache.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ elseif("${CMAKE_GENERATOR}" STREQUAL "Ninja")

if(SCCACHE_PATH)
execute_process(COMMAND sccache -V OUTPUT_VARIABLE sccache_version)
message(STATUS "${sccache_version} is founded, use [${SCCACHE_PATH}] to speed up compile on Windows.")
message(STATUS "sccache is founded, use [${SCCACHE_PATH}] to speed up compile on Windows.")

set(CMAKE_C_COMPILER_LAUNCHER ${SCCACHE_PATH})
set(CMAKE_CXX_COMPILER_LAUNCHER ${SCCACHE_PATH})
Expand Down
4 changes: 2 additions & 2 deletions paddle/fluid/framework/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -197,14 +197,14 @@ cc_test(operator_exception_test SRCS operator_exception_test.cc DEPS operator op
cc_library(version SRCS version.cc)
cc_test(version_test SRCS version_test.cc DEPS version)

cc_library(proto_desc SRCS var_desc.cc op_desc.cc block_desc.cc program_desc.cc DEPS shape_inference op_info operator glog version)
cc_library(proto_desc SRCS var_desc.cc op_desc.cc block_desc.cc program_desc.cc DEPS attribute shape_inference op_info operator glog version)

cc_library(op_registry SRCS op_registry.cc DEPS op_proto_maker op_info operator glog proto_desc)

cc_library(op_call_stack SRCS op_call_stack.cc DEPS op_proto_maker enforce)
cc_test(op_call_stack_test SRCS op_call_stack_test.cc DEPS op_call_stack)

cc_library(program_processing SRCS program_processing.cc DEPS framework_proto)
cc_library(program_processing SRCS program_processing.cc DEPS boost proto_desc)
cc_test(program_processing_test SRCS program_processing_test.cc DEPS proto_desc program_processing)

if(WITH_GPU)
Expand Down
2 changes: 2 additions & 0 deletions paddle/fluid/framework/ir/fuse_pass_base.cc
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,8 @@ void FusePassBase::AddStatis(int count_of_fused) const {
auto& info =
graph_->Get<std::unordered_map<std::string, int>>(kFuseStatisAttr);
info[repr_] = count_of_fused;
if (count_of_fused > 0)
LOG(INFO) << "--- detected " << count_of_fused << " subgraphs";
}

FuseOptions FusePassBase::FindFuseOption(const Node& node1,
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/framework/ir/graph_pattern_detector.cc
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ void GraphPatternDetector::operator()(Graph *graph,
ValidateByNodeRole(&subgraphs);

if (subgraphs.empty()) return;
LOG(INFO) << "--- detected " << subgraphs.size() << " subgraphs";

int id = 0;
for (auto &g : subgraphs) {
VLOG(3) << "optimizing #" << id++ << " subgraph";
Expand Down
9 changes: 4 additions & 5 deletions paddle/fluid/framework/ir/map_matmul_to_mul_pass.cc
Original file line number Diff line number Diff line change
Expand Up @@ -191,11 +191,6 @@ void MapMatmul2MulPass::ApplyImpl(ir::Graph* graph) const {
int found_count = 0;
auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
Graph* g) {
if (!IsCompat(subgraph, g)) {
LOG(WARNING) << "Pass in op compat failed.";
return;
}

VLOG(4) << "map matmul to mul";
GET_IR_NODE_FROM_SUBGRAPH(matmul_in_x, matmul_in_x, matmul_pattern);
GET_IR_NODE_FROM_SUBGRAPH(matmul_in_y, matmul_in_y, matmul_pattern);
Expand All @@ -221,6 +216,10 @@ void MapMatmul2MulPass::ApplyImpl(ir::Graph* graph) const {
next_ops[0]->Name() == "elementwise_add";

if (flag) {
if (!IsCompat(subgraph, g)) {
LOG(WARNING) << "Pass in op compat failed.";
return;
}
OpDesc desc;
desc.SetType("mul");
desc.SetInput("X", {matmul_in_x->Name()});
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ ConvBiasFusePass::ConvBiasFusePass() {
.IsTensor()
.End()
.AddAttr("axis")
.IsIntIn({-1, 0})
.IsIntIn({1, 3})
.End();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ void SetOp(ProgramDesc* prog, const std::string& type, const std::string& name,
op->SetInput("Bias", {});
} else if (type == "elementwise_add") {
op->SetAttr("use_mkldnn", true);
op->SetAttr("axis", -1);
op->SetAttr("axis", 1);
op->SetInput("X", {inputs[0]});
op->SetInput("Y", {inputs[1]});
op->SetOutput("Out", outputs);
Expand Down
1 change: 1 addition & 0 deletions paddle/fluid/inference/api/analysis_predictor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1255,6 +1255,7 @@ USE_TRT_CONVERTER(nearest_interp);
USE_TRT_CONVERTER(reshape);
USE_TRT_CONVERTER(reduce_sum);
USE_TRT_CONVERTER(gather_nd);
USE_TRT_CONVERTER(reduce_mean);
#endif

namespace paddle_infer {
Expand Down
34 changes: 26 additions & 8 deletions paddle/fluid/inference/tensorrt/convert/reduce_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -35,12 +35,18 @@ namespace paddle {
namespace inference {
namespace tensorrt {

class ReduceSumOpConverter : public OpConverter {
class ReduceOpConverter : public OpConverter {
public:
void operator()(const framework::proto::OpDesc& op,
const framework::Scope& scope, bool test_mode) override {
VLOG(4) << "convert a paddle reduce_sum op to tensorrt reduce layer";
VLOG(4) << "convert a paddle " << op_type << " op to tensorrt reduce layer";
framework::OpDesc op_desc(op, nullptr);
nvinfer1::ReduceOperation reduce_type;
if (op_type == "reduce_sum") {
reduce_type = nvinfer1::ReduceOperation::kSUM;
} else if (op_type == "reduce_mean") {
reduce_type = nvinfer1::ReduceOperation::kAVG;
}

auto* x = engine_->GetITensor(op_desc.Input("X").front());
nvinfer1::Dims input_shape = x->getDimensions();
Expand All @@ -51,15 +57,13 @@ class ReduceSumOpConverter : public OpConverter {
BOOST_GET_CONST(std::vector<int32_t>, op_desc.GetAttr("dim"));
bool reduce_all = BOOST_GET_CONST(bool, op_desc.GetAttr("reduce_all"));

// Now we only support dynamic_shape mode.
nvinfer1::IReduceLayer* layer = nullptr;
if (reduce_all) {
uint32_t reduce_dim = 0;
for (int i = 0; i < input_dims; ++i) {
reduce_dim |= 1 << i;
}
layer = TRT_ENGINE_ADD_LAYER(engine_, Reduce, *x,
nvinfer1::ReduceOperation::kSUM, reduce_dim,
layer = TRT_ENGINE_ADD_LAYER(engine_, Reduce, *x, reduce_type, reduce_dim,
keep_dim);
} else {
auto CvtToBitMask = [&](const std::vector<int32_t>& dims) -> uint32_t {
Expand All @@ -68,23 +72,37 @@ class ReduceSumOpConverter : public OpConverter {
if (x < 0) {
res |= 1 << (x + input_dims);
} else {
if (!engine_->with_dynamic_shape()) x = x - 1;
res |= 1 << x;
}
}
return res;
};
layer = TRT_ENGINE_ADD_LAYER(engine_, Reduce, *x,
nvinfer1::ReduceOperation::kSUM,
layer = TRT_ENGINE_ADD_LAYER(engine_, Reduce, *x, reduce_type,
CvtToBitMask(dim), keep_dim);
}

auto output_name = op_desc.Output("Out")[0];
RreplenishLayerAndOutput(layer, "reduce_sum", {output_name}, test_mode);
RreplenishLayerAndOutput(layer, op_type, {output_name}, test_mode);
}

protected:
std::string op_type;
};

class ReduceSumOpConverter : public ReduceOpConverter {
public:
ReduceSumOpConverter() { op_type = "reduce_sum"; }
};

class ReduceMeanOpConverter : public ReduceOpConverter {
public:
ReduceMeanOpConverter() { op_type = "reduce_mean"; }
};

} // namespace tensorrt
} // namespace inference
} // namespace paddle

REGISTER_TRT_OP_CONVERTER(reduce_sum, ReduceSumOpConverter);
REGISTER_TRT_OP_CONVERTER(reduce_mean, ReduceMeanOpConverter);
11 changes: 9 additions & 2 deletions paddle/fluid/inference/tensorrt/helper.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,12 @@ namespace tensorrt {
NV_TENSORRT_MAJOR * 1000 + NV_TENSORRT_MINOR * 100 + \
NV_TENSORRT_PATCH * 10 + NV_TENSORRT_BUILD

#if IS_TRT_VERSION_GE(8000)
#define TRT_NOEXCEPT noexcept
#else
#define TRT_NOEXCEPT
#endif

namespace dy = paddle::platform::dynload;

// TensorRT data type to size
Expand Down Expand Up @@ -72,7 +78,8 @@ static int GetInferLibVersion() {
// A logger for create TensorRT infer builder.
class NaiveLogger : public nvinfer1::ILogger {
public:
void log(nvinfer1::ILogger::Severity severity, const char* msg) override {
void log(nvinfer1::ILogger::Severity severity,
const char* msg) TRT_NOEXCEPT override {
switch (severity) {
case Severity::kVERBOSE:
VLOG(3) << msg;
Expand Down Expand Up @@ -105,7 +112,7 @@ class NaiveProfiler : public nvinfer1::IProfiler {
typedef std::pair<std::string, float> Record;
std::vector<Record> mProfile;

virtual void reportLayerTime(const char* layerName, float ms) {
virtual void reportLayerTime(const char* layerName, float ms) TRT_NOEXCEPT {
auto record =
std::find_if(mProfile.begin(), mProfile.end(),
[&](const Record& r) { return r.first == layerName; });
Expand Down
21 changes: 14 additions & 7 deletions paddle/fluid/inference/tensorrt/op_teller.cc
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,7 @@ struct SimpleOpTypeSetTeller : public Teller {
"nearest_interp",
"anchor_generator",
"reduce_sum",
"reduce_mean",
};
};

Expand Down Expand Up @@ -709,18 +710,24 @@ bool OpTeller::Tell(const framework::ir::Node* node, bool use_no_calib_int8,
if (!with_dynamic_shape && shape[0] == -1) return false;
}

if (op_type == "reduce_sum") {
if (!with_dynamic_shape) {
VLOG(3) << "the reduce_sum does not support static shape yet";
return false;
}

if (op_type == "reduce_sum" || op_type == "reduce_mean") {
if (!(desc.HasAttr("keep_dim") && desc.HasAttr("dim") &&
desc.HasAttr("reduce_all"))) {
VLOG(3) << "the reduce_sum does not have attr (keep_dim or dim or "
VLOG(3) << "the " << op_type
<< " does not have attr (keep_dim or dim or "
"reduce_all)";
return false;
}

// The batch size dimension cannot be reduced if it's not dynamic shape.
if (!with_dynamic_shape) {
if (desc.HasAttr("reduce_all")) return false;
std::vector<int32_t> dim =
BOOST_GET_CONST(std::vector<int32_t>, desc.GetAttr("dim"));
for (auto x : dim) {
if (!x) return false;
}
}
}

if ((*teller)(op_type, desc, use_no_calib_int8)) return true;
Expand Down
Loading

0 comments on commit ae19cc0

Please sign in to comment.