diff --git a/CMakeLists.txt b/CMakeLists.txt index 50070c7fc05133..4f6ed9de30efe4 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -119,8 +119,6 @@ if(WIN32) endforeach(flag_var) endif() - math(EXPR PROCESS_MAX "${CPU_CORES} * 2 / 3") - # windows build turn off warnings, use parallel compiling. foreach(flag_var CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE @@ -128,8 +126,12 @@ if(WIN32) CMAKE_C_FLAGS CMAKE_C_FLAGS_DEBUG CMAKE_C_FLAGS_RELEASE CMAKE_C_FLAGS_MINSIZEREL CMAKE_C_FLAGS_RELWITHDEBINFO) string(REGEX REPLACE "/W[1-4]" " /W0 " ${flag_var} "${${flag_var}}") - # NOTE(zhouwei25): GPU compile have too high memory utilization when parallel compiling - if(NOT WITH_GPU) + + # NOTE(zhouwei25): GPU compile have too high memory utilization when parallel compiling, + # For Visual Studio generators, /MP should be added. + # For other generators like Ninja, it is not need to add /MP. + if("${CMAKE_GENERATOR}" STREQUAL "Visual Studio" AND NOT WITH_GPU) + math(EXPR PROCESS_MAX "${CPU_CORES} * 2 / 3") set(${flag_var} "${${flag_var}} /MP${PROCESS_MAX}") endif() endforeach(flag_var) @@ -312,6 +314,17 @@ else() endif() endif() +if(WITH_DISTRIBUTE) + if(LINUX) + set(WITH_GLOO ON CACHE STRING "Enable GLOO when compiling WITH_DISTRIBUTE=ON." FORCE) + endif() + if(WITH_ASCEND_CL) + # disable WITH_PSCORE for NPU before include third_party + MESSAGE(WARNING "Disable WITH_PSCORE when compiling with NPU. Force WITH_PSCORE=OFF.") + set(WITH_PSCORE OFF CACHE BOOL "Disable WITH_PSCORE when compiling with NPU" FORCE) + endif() +endif() + include(third_party) # download, build, install third_party, Contains about 20+ dependencies include(flags) # set paddle compile flags @@ -322,12 +335,6 @@ if(WITH_PROFILER) add_definitions(-DWITH_GPERFTOOLS) endif() -if(WITH_DISTRIBUTE) - if(LINUX) - set(WITH_GLOO ON CACHE STRING "Enable GLOO when compiling WITH_DISTRIBUTE=ON." FORCE) - endif() -endif() - include(ccache) # set ccache for compilation include(util) # set unittest and link libs include(version) # set PADDLE_VERSION diff --git a/cmake/ccache.cmake b/cmake/ccache.cmake index 25798758473af5..5520720f7a6c71 100644 --- a/cmake/ccache.cmake +++ b/cmake/ccache.cmake @@ -18,7 +18,7 @@ elseif("${CMAKE_GENERATOR}" STREQUAL "Ninja") if(SCCACHE_PATH) execute_process(COMMAND sccache -V OUTPUT_VARIABLE sccache_version) - message(STATUS "${sccache_version} is founded, use [${SCCACHE_PATH}] to speed up compile on Windows.") + message(STATUS "sccache is founded, use [${SCCACHE_PATH}] to speed up compile on Windows.") set(CMAKE_C_COMPILER_LAUNCHER ${SCCACHE_PATH}) set(CMAKE_CXX_COMPILER_LAUNCHER ${SCCACHE_PATH}) diff --git a/paddle/fluid/framework/CMakeLists.txt b/paddle/fluid/framework/CMakeLists.txt index 0ed62ac93a7278..485fddff4df424 100644 --- a/paddle/fluid/framework/CMakeLists.txt +++ b/paddle/fluid/framework/CMakeLists.txt @@ -197,14 +197,14 @@ cc_test(operator_exception_test SRCS operator_exception_test.cc DEPS operator op cc_library(version SRCS version.cc) cc_test(version_test SRCS version_test.cc DEPS version) -cc_library(proto_desc SRCS var_desc.cc op_desc.cc block_desc.cc program_desc.cc DEPS shape_inference op_info operator glog version) +cc_library(proto_desc SRCS var_desc.cc op_desc.cc block_desc.cc program_desc.cc DEPS attribute shape_inference op_info operator glog version) cc_library(op_registry SRCS op_registry.cc DEPS op_proto_maker op_info operator glog proto_desc) cc_library(op_call_stack SRCS op_call_stack.cc DEPS op_proto_maker enforce) cc_test(op_call_stack_test SRCS op_call_stack_test.cc DEPS op_call_stack) -cc_library(program_processing SRCS program_processing.cc DEPS framework_proto) +cc_library(program_processing SRCS program_processing.cc DEPS boost proto_desc) cc_test(program_processing_test SRCS program_processing_test.cc DEPS proto_desc program_processing) if(WITH_GPU) diff --git a/paddle/fluid/framework/ir/fuse_pass_base.cc b/paddle/fluid/framework/ir/fuse_pass_base.cc index 9dfc8bf6037a74..4f89750daee16f 100644 --- a/paddle/fluid/framework/ir/fuse_pass_base.cc +++ b/paddle/fluid/framework/ir/fuse_pass_base.cc @@ -53,6 +53,8 @@ void FusePassBase::AddStatis(int count_of_fused) const { auto& info = graph_->Get>(kFuseStatisAttr); info[repr_] = count_of_fused; + if (count_of_fused > 0) + LOG(INFO) << "--- detected " << count_of_fused << " subgraphs"; } FuseOptions FusePassBase::FindFuseOption(const Node& node1, diff --git a/paddle/fluid/framework/ir/graph_pattern_detector.cc b/paddle/fluid/framework/ir/graph_pattern_detector.cc index 7717bcfc3e9624..9d06a4de9548de 100644 --- a/paddle/fluid/framework/ir/graph_pattern_detector.cc +++ b/paddle/fluid/framework/ir/graph_pattern_detector.cc @@ -88,7 +88,7 @@ void GraphPatternDetector::operator()(Graph *graph, ValidateByNodeRole(&subgraphs); if (subgraphs.empty()) return; - LOG(INFO) << "--- detected " << subgraphs.size() << " subgraphs"; + int id = 0; for (auto &g : subgraphs) { VLOG(3) << "optimizing #" << id++ << " subgraph"; diff --git a/paddle/fluid/framework/ir/map_matmul_to_mul_pass.cc b/paddle/fluid/framework/ir/map_matmul_to_mul_pass.cc index 9542d3d3d43f31..613768284735c1 100644 --- a/paddle/fluid/framework/ir/map_matmul_to_mul_pass.cc +++ b/paddle/fluid/framework/ir/map_matmul_to_mul_pass.cc @@ -191,11 +191,6 @@ void MapMatmul2MulPass::ApplyImpl(ir::Graph* graph) const { int found_count = 0; auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph, Graph* g) { - if (!IsCompat(subgraph, g)) { - LOG(WARNING) << "Pass in op compat failed."; - return; - } - VLOG(4) << "map matmul to mul"; GET_IR_NODE_FROM_SUBGRAPH(matmul_in_x, matmul_in_x, matmul_pattern); GET_IR_NODE_FROM_SUBGRAPH(matmul_in_y, matmul_in_y, matmul_pattern); @@ -221,6 +216,10 @@ void MapMatmul2MulPass::ApplyImpl(ir::Graph* graph) const { next_ops[0]->Name() == "elementwise_add"; if (flag) { + if (!IsCompat(subgraph, g)) { + LOG(WARNING) << "Pass in op compat failed."; + return; + } OpDesc desc; desc.SetType("mul"); desc.SetInput("X", {matmul_in_x->Name()}); diff --git a/paddle/fluid/framework/ir/mkldnn/conv_bias_mkldnn_fuse_pass.cc b/paddle/fluid/framework/ir/mkldnn/conv_bias_mkldnn_fuse_pass.cc index 74bbe24eb82f5d..a7514038d400b6 100644 --- a/paddle/fluid/framework/ir/mkldnn/conv_bias_mkldnn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/conv_bias_mkldnn_fuse_pass.cc @@ -70,7 +70,7 @@ ConvBiasFusePass::ConvBiasFusePass() { .IsTensor() .End() .AddAttr("axis") - .IsIntIn({-1, 0}) + .IsIntIn({1, 3}) .End(); } diff --git a/paddle/fluid/framework/ir/mkldnn/conv_bias_mkldnn_fuse_pass_tester.cc b/paddle/fluid/framework/ir/mkldnn/conv_bias_mkldnn_fuse_pass_tester.cc index 80a9ef7eda724a..e41c35ba33fdc9 100644 --- a/paddle/fluid/framework/ir/mkldnn/conv_bias_mkldnn_fuse_pass_tester.cc +++ b/paddle/fluid/framework/ir/mkldnn/conv_bias_mkldnn_fuse_pass_tester.cc @@ -52,7 +52,7 @@ void SetOp(ProgramDesc* prog, const std::string& type, const std::string& name, op->SetInput("Bias", {}); } else if (type == "elementwise_add") { op->SetAttr("use_mkldnn", true); - op->SetAttr("axis", -1); + op->SetAttr("axis", 1); op->SetInput("X", {inputs[0]}); op->SetInput("Y", {inputs[1]}); op->SetOutput("Out", outputs); diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index dd3a33130a3e6e..d32ec581ce94b4 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -1255,6 +1255,7 @@ USE_TRT_CONVERTER(nearest_interp); USE_TRT_CONVERTER(reshape); USE_TRT_CONVERTER(reduce_sum); USE_TRT_CONVERTER(gather_nd); +USE_TRT_CONVERTER(reduce_mean); #endif namespace paddle_infer { diff --git a/paddle/fluid/inference/tensorrt/convert/reduce_op.cc b/paddle/fluid/inference/tensorrt/convert/reduce_op.cc index 66d2680fe9969c..f3c4059b8e6456 100644 --- a/paddle/fluid/inference/tensorrt/convert/reduce_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/reduce_op.cc @@ -35,12 +35,18 @@ namespace paddle { namespace inference { namespace tensorrt { -class ReduceSumOpConverter : public OpConverter { +class ReduceOpConverter : public OpConverter { public: void operator()(const framework::proto::OpDesc& op, const framework::Scope& scope, bool test_mode) override { - VLOG(4) << "convert a paddle reduce_sum op to tensorrt reduce layer"; + VLOG(4) << "convert a paddle " << op_type << " op to tensorrt reduce layer"; framework::OpDesc op_desc(op, nullptr); + nvinfer1::ReduceOperation reduce_type; + if (op_type == "reduce_sum") { + reduce_type = nvinfer1::ReduceOperation::kSUM; + } else if (op_type == "reduce_mean") { + reduce_type = nvinfer1::ReduceOperation::kAVG; + } auto* x = engine_->GetITensor(op_desc.Input("X").front()); nvinfer1::Dims input_shape = x->getDimensions(); @@ -51,15 +57,13 @@ class ReduceSumOpConverter : public OpConverter { BOOST_GET_CONST(std::vector, op_desc.GetAttr("dim")); bool reduce_all = BOOST_GET_CONST(bool, op_desc.GetAttr("reduce_all")); - // Now we only support dynamic_shape mode. nvinfer1::IReduceLayer* layer = nullptr; if (reduce_all) { uint32_t reduce_dim = 0; for (int i = 0; i < input_dims; ++i) { reduce_dim |= 1 << i; } - layer = TRT_ENGINE_ADD_LAYER(engine_, Reduce, *x, - nvinfer1::ReduceOperation::kSUM, reduce_dim, + layer = TRT_ENGINE_ADD_LAYER(engine_, Reduce, *x, reduce_type, reduce_dim, keep_dim); } else { auto CvtToBitMask = [&](const std::vector& dims) -> uint32_t { @@ -68,19 +72,32 @@ class ReduceSumOpConverter : public OpConverter { if (x < 0) { res |= 1 << (x + input_dims); } else { + if (!engine_->with_dynamic_shape()) x = x - 1; res |= 1 << x; } } return res; }; - layer = TRT_ENGINE_ADD_LAYER(engine_, Reduce, *x, - nvinfer1::ReduceOperation::kSUM, + layer = TRT_ENGINE_ADD_LAYER(engine_, Reduce, *x, reduce_type, CvtToBitMask(dim), keep_dim); } auto output_name = op_desc.Output("Out")[0]; - RreplenishLayerAndOutput(layer, "reduce_sum", {output_name}, test_mode); + RreplenishLayerAndOutput(layer, op_type, {output_name}, test_mode); } + + protected: + std::string op_type; +}; + +class ReduceSumOpConverter : public ReduceOpConverter { + public: + ReduceSumOpConverter() { op_type = "reduce_sum"; } +}; + +class ReduceMeanOpConverter : public ReduceOpConverter { + public: + ReduceMeanOpConverter() { op_type = "reduce_mean"; } }; } // namespace tensorrt @@ -88,3 +105,4 @@ class ReduceSumOpConverter : public OpConverter { } // namespace paddle REGISTER_TRT_OP_CONVERTER(reduce_sum, ReduceSumOpConverter); +REGISTER_TRT_OP_CONVERTER(reduce_mean, ReduceMeanOpConverter); diff --git a/paddle/fluid/inference/tensorrt/helper.h b/paddle/fluid/inference/tensorrt/helper.h index e3c7d8b10333c3..f0d585e1b4090a 100644 --- a/paddle/fluid/inference/tensorrt/helper.h +++ b/paddle/fluid/inference/tensorrt/helper.h @@ -39,6 +39,12 @@ namespace tensorrt { NV_TENSORRT_MAJOR * 1000 + NV_TENSORRT_MINOR * 100 + \ NV_TENSORRT_PATCH * 10 + NV_TENSORRT_BUILD +#if IS_TRT_VERSION_GE(8000) +#define TRT_NOEXCEPT noexcept +#else +#define TRT_NOEXCEPT +#endif + namespace dy = paddle::platform::dynload; // TensorRT data type to size @@ -72,7 +78,8 @@ static int GetInferLibVersion() { // A logger for create TensorRT infer builder. class NaiveLogger : public nvinfer1::ILogger { public: - void log(nvinfer1::ILogger::Severity severity, const char* msg) override { + void log(nvinfer1::ILogger::Severity severity, + const char* msg) TRT_NOEXCEPT override { switch (severity) { case Severity::kVERBOSE: VLOG(3) << msg; @@ -105,7 +112,7 @@ class NaiveProfiler : public nvinfer1::IProfiler { typedef std::pair Record; std::vector mProfile; - virtual void reportLayerTime(const char* layerName, float ms) { + virtual void reportLayerTime(const char* layerName, float ms) TRT_NOEXCEPT { auto record = std::find_if(mProfile.begin(), mProfile.end(), [&](const Record& r) { return r.first == layerName; }); diff --git a/paddle/fluid/inference/tensorrt/op_teller.cc b/paddle/fluid/inference/tensorrt/op_teller.cc index f98b0c9ede76e2..6c6006065435f4 100644 --- a/paddle/fluid/inference/tensorrt/op_teller.cc +++ b/paddle/fluid/inference/tensorrt/op_teller.cc @@ -130,6 +130,7 @@ struct SimpleOpTypeSetTeller : public Teller { "nearest_interp", "anchor_generator", "reduce_sum", + "reduce_mean", }; }; @@ -709,18 +710,24 @@ bool OpTeller::Tell(const framework::ir::Node* node, bool use_no_calib_int8, if (!with_dynamic_shape && shape[0] == -1) return false; } - if (op_type == "reduce_sum") { - if (!with_dynamic_shape) { - VLOG(3) << "the reduce_sum does not support static shape yet"; - return false; - } - + if (op_type == "reduce_sum" || op_type == "reduce_mean") { if (!(desc.HasAttr("keep_dim") && desc.HasAttr("dim") && desc.HasAttr("reduce_all"))) { - VLOG(3) << "the reduce_sum does not have attr (keep_dim or dim or " + VLOG(3) << "the " << op_type + << " does not have attr (keep_dim or dim or " "reduce_all)"; return false; } + + // The batch size dimension cannot be reduced if it's not dynamic shape. + if (!with_dynamic_shape) { + if (desc.HasAttr("reduce_all")) return false; + std::vector dim = + BOOST_GET_CONST(std::vector, desc.GetAttr("dim")); + for (auto x : dim) { + if (!x) return false; + } + } } if ((*teller)(op_type, desc, use_no_calib_int8)) return true; diff --git a/paddle/fluid/inference/tensorrt/plugin/anchor_generator_op_plugin.cu b/paddle/fluid/inference/tensorrt/plugin/anchor_generator_op_plugin.cu index 8cf9178b6f139b..e5584f26580679 100644 --- a/paddle/fluid/inference/tensorrt/plugin/anchor_generator_op_plugin.cu +++ b/paddle/fluid/inference/tensorrt/plugin/anchor_generator_op_plugin.cu @@ -110,16 +110,18 @@ AnchorGeneratorPlugin::AnchorGeneratorPlugin(const void* data, size_t length) { PrepareParamsOnDevice(); } -const char* AnchorGeneratorPlugin::getPluginType() const { +const char* AnchorGeneratorPlugin::getPluginType() const TRT_NOEXCEPT { return "anchor_generator_plugin"; } -const char* AnchorGeneratorPlugin::getPluginVersion() const { return "1"; } +const char* AnchorGeneratorPlugin::getPluginVersion() const TRT_NOEXCEPT { + return "1"; +} -int AnchorGeneratorPlugin::getNbOutputs() const { return 2; } +int AnchorGeneratorPlugin::getNbOutputs() const TRT_NOEXCEPT { return 2; } nvinfer1::Dims AnchorGeneratorPlugin::getOutputDimensions( - int index, const nvinfer1::Dims* inputs, int nb_input_dims) { + int index, const nvinfer1::Dims* inputs, int nb_input_dims) TRT_NOEXCEPT { nvinfer1::Dims dims{}; dims.nbDims = 4; dims.d[0] = height_; @@ -130,20 +132,21 @@ nvinfer1::Dims AnchorGeneratorPlugin::getOutputDimensions( } bool AnchorGeneratorPlugin::supportsFormat( - nvinfer1::DataType type, nvinfer1::TensorFormat format) const { + nvinfer1::DataType type, nvinfer1::TensorFormat format) const TRT_NOEXCEPT { // static shape plugin can't support different type between input/out // it may cause addition overhead in half mode return (type == data_type_ && format == nvinfer1::TensorFormat::kLINEAR); } -size_t AnchorGeneratorPlugin::getWorkspaceSize(int max_batch_size) const { +size_t AnchorGeneratorPlugin::getWorkspaceSize(int max_batch_size) const + TRT_NOEXCEPT { return 0; } template int AnchorGeneratorPlugin::enqueue_impl(int batch_size, const void* const* inputs, - void** outputs, void* workspace, + void* const* outputs, void* workspace, cudaStream_t stream) { const int block = 512; const int gen_anchor_grid = (box_num_ + block - 1) / block; @@ -169,15 +172,15 @@ int AnchorGeneratorPlugin::enqueue(int batch_size, const void* const* inputs, #else void* const* outputs, void* workspace, #endif - cudaStream_t stream) { + cudaStream_t stream) TRT_NOEXCEPT { return enqueue_impl(batch_size, inputs, outputs, workspace, stream); } -int AnchorGeneratorPlugin::initialize() { return 0; } +int AnchorGeneratorPlugin::initialize() TRT_NOEXCEPT { return 0; } -void AnchorGeneratorPlugin::terminate() {} +void AnchorGeneratorPlugin::terminate() TRT_NOEXCEPT {} -size_t AnchorGeneratorPlugin::getSerializationSize() const { +size_t AnchorGeneratorPlugin::getSerializationSize() const TRT_NOEXCEPT { size_t serialize_size = 0; serialize_size += SerializedSize(data_type_); serialize_size += SerializedSize(anchor_sizes_); @@ -192,7 +195,7 @@ size_t AnchorGeneratorPlugin::getSerializationSize() const { return serialize_size; } -void AnchorGeneratorPlugin::serialize(void* buffer) const { +void AnchorGeneratorPlugin::serialize(void* buffer) const TRT_NOEXCEPT { SerializeValue(&buffer, data_type_); SerializeValue(&buffer, anchor_sizes_); SerializeValue(&buffer, aspect_ratios_); @@ -205,28 +208,31 @@ void AnchorGeneratorPlugin::serialize(void* buffer) const { SerializeValue(&buffer, box_num_); } -void AnchorGeneratorPlugin::destroy() {} +void AnchorGeneratorPlugin::destroy() TRT_NOEXCEPT {} -void AnchorGeneratorPlugin::setPluginNamespace(const char* lib_namespace) { +void AnchorGeneratorPlugin::setPluginNamespace(const char* lib_namespace) + TRT_NOEXCEPT { namespace_ = std::string(lib_namespace); } -const char* AnchorGeneratorPlugin::getPluginNamespace() const { +const char* AnchorGeneratorPlugin::getPluginNamespace() const TRT_NOEXCEPT { return namespace_.c_str(); } nvinfer1::DataType AnchorGeneratorPlugin::getOutputDataType( - int index, const nvinfer1::DataType* input_type, int nb_inputs) const { + int index, const nvinfer1::DataType* input_type, + int nb_inputs) const TRT_NOEXCEPT { return input_type[0]; } bool AnchorGeneratorPlugin::isOutputBroadcastAcrossBatch( - int output_index, const bool* input_is_broadcast, int nb_inputs) const { + int output_index, const bool* input_is_broadcast, + int nb_inputs) const TRT_NOEXCEPT { return true; } -bool AnchorGeneratorPlugin::canBroadcastInputAcrossBatch( - int input_index) const { +bool AnchorGeneratorPlugin::canBroadcastInputAcrossBatch(int input_index) const + TRT_NOEXCEPT { return false; } @@ -236,9 +242,9 @@ void AnchorGeneratorPlugin::configurePlugin( const nvinfer1::DataType* input_types, const nvinfer1::DataType* output_types, const bool* input_is_broadcast, const bool* output_is_broadcast, nvinfer1::PluginFormat float_format, - int max_batct_size) {} + int max_batct_size) TRT_NOEXCEPT {} -nvinfer1::IPluginV2Ext* AnchorGeneratorPlugin::clone() const { +nvinfer1::IPluginV2Ext* AnchorGeneratorPlugin::clone() const TRT_NOEXCEPT { auto plugin = new AnchorGeneratorPlugin( data_type_, anchor_sizes_, aspect_ratios_, stride_, variances_, offset_, height_, width_, num_anchors_, box_num_); @@ -246,30 +252,32 @@ nvinfer1::IPluginV2Ext* AnchorGeneratorPlugin::clone() const { return plugin; } -void AnchorGeneratorPluginCreator::setPluginNamespace( - const char* lib_namespace) { +void AnchorGeneratorPluginCreator::setPluginNamespace(const char* lib_namespace) + TRT_NOEXCEPT { namespace_ = std::string(lib_namespace); } -const char* AnchorGeneratorPluginCreator::getPluginNamespace() const { +const char* AnchorGeneratorPluginCreator::getPluginNamespace() const + TRT_NOEXCEPT { return namespace_.c_str(); } -const char* AnchorGeneratorPluginCreator::getPluginName() const { +const char* AnchorGeneratorPluginCreator::getPluginName() const TRT_NOEXCEPT { return "anchor_generator_plugin"; } -const char* AnchorGeneratorPluginCreator::getPluginVersion() const { +const char* AnchorGeneratorPluginCreator::getPluginVersion() const + TRT_NOEXCEPT { return "1"; } const nvinfer1::PluginFieldCollection* -AnchorGeneratorPluginCreator::getFieldNames() { +AnchorGeneratorPluginCreator::getFieldNames() TRT_NOEXCEPT { return &field_collection_; } nvinfer1::IPluginV2Ext* AnchorGeneratorPluginCreator::createPlugin( - const char* name, const nvinfer1::PluginFieldCollection* fc) { + const char* name, const nvinfer1::PluginFieldCollection* fc) TRT_NOEXCEPT { const nvinfer1::PluginField* fields = fc->fields; int type_id = -1; std::vector anchor_sizes, aspect_ratios, stride, variances; @@ -315,7 +323,8 @@ nvinfer1::IPluginV2Ext* AnchorGeneratorPluginCreator::createPlugin( } nvinfer1::IPluginV2Ext* AnchorGeneratorPluginCreator::deserializePlugin( - const char* name, const void* serial_data, size_t serial_length) { + const char* name, const void* serial_data, + size_t serial_length) TRT_NOEXCEPT { auto plugin = new AnchorGeneratorPlugin(serial_data, serial_length); plugin->setPluginNamespace(namespace_.c_str()); return plugin; @@ -374,7 +383,8 @@ AnchorGeneratorPluginDynamic::AnchorGeneratorPluginDynamic(void const* data, PrepareParamsOnDevice(); } -nvinfer1::IPluginV2DynamicExt* AnchorGeneratorPluginDynamic::clone() const { +nvinfer1::IPluginV2DynamicExt* AnchorGeneratorPluginDynamic::clone() const + TRT_NOEXCEPT { auto plugin = new AnchorGeneratorPluginDynamic( data_type_, anchor_sizes_, aspect_ratios_, stride_, variances_, offset_, num_anchors_); @@ -384,7 +394,7 @@ nvinfer1::IPluginV2DynamicExt* AnchorGeneratorPluginDynamic::clone() const { nvinfer1::DimsExprs AnchorGeneratorPluginDynamic::getOutputDimensions( int outputIndex, const nvinfer1::DimsExprs* inputs, int nbInputs, - nvinfer1::IExprBuilder& exprBuilder) { + nvinfer1::IExprBuilder& exprBuilder) TRT_NOEXCEPT { nvinfer1::DimsExprs ret{}; ret.nbDims = 4; ret.d[0] = inputs[0].d[2]; // feature height @@ -396,7 +406,7 @@ nvinfer1::DimsExprs AnchorGeneratorPluginDynamic::getOutputDimensions( bool AnchorGeneratorPluginDynamic::supportsFormatCombination( int pos, const nvinfer1::PluginTensorDesc* inOut, int nbInputs, - int nbOutputs) { + int nbOutputs) TRT_NOEXCEPT { // input can be any, doesn't matter // anchor generator doesn't read input raw data, only need the shape info auto type = inOut[pos].type; @@ -412,11 +422,12 @@ bool AnchorGeneratorPluginDynamic::supportsFormatCombination( void AnchorGeneratorPluginDynamic::configurePlugin( const nvinfer1::DynamicPluginTensorDesc* in, int nbInputs, - const nvinfer1::DynamicPluginTensorDesc* out, int nbOutputs) {} + const nvinfer1::DynamicPluginTensorDesc* out, int nbOutputs) TRT_NOEXCEPT {} size_t AnchorGeneratorPluginDynamic::getWorkspaceSize( const nvinfer1::PluginTensorDesc* inputs, int nbInputs, - const nvinfer1::PluginTensorDesc* outputs, int nbOutputs) const { + const nvinfer1::PluginTensorDesc* outputs, + int nbOutputs) const TRT_NOEXCEPT { return 0; } @@ -449,7 +460,7 @@ int AnchorGeneratorPluginDynamic::enqueue_impl( int AnchorGeneratorPluginDynamic::enqueue( const nvinfer1::PluginTensorDesc* inputDesc, const nvinfer1::PluginTensorDesc* outputDesc, const void* const* inputs, - void* const* outputs, void* workspace, cudaStream_t stream) { + void* const* outputs, void* workspace, cudaStream_t stream) TRT_NOEXCEPT { assert(outputDesc[0].type == nvinfer1::DataType::kFLOAT); assert(outputDesc[1].type == nvinfer1::DataType::kFLOAT); return enqueue_impl(inputDesc, outputDesc, inputs, outputs, workspace, @@ -457,21 +468,24 @@ int AnchorGeneratorPluginDynamic::enqueue( } nvinfer1::DataType AnchorGeneratorPluginDynamic::getOutputDataType( - int index, const nvinfer1::DataType* inputTypes, int nbInputs) const { + int index, const nvinfer1::DataType* inputTypes, + int nbInputs) const TRT_NOEXCEPT { return inputTypes[0]; } -const char* AnchorGeneratorPluginDynamic::getPluginType() const { +const char* AnchorGeneratorPluginDynamic::getPluginType() const TRT_NOEXCEPT { return "anchor_generator_plugin_dynamic"; } -int AnchorGeneratorPluginDynamic::getNbOutputs() const { return 2; } +int AnchorGeneratorPluginDynamic::getNbOutputs() const TRT_NOEXCEPT { + return 2; +} -int AnchorGeneratorPluginDynamic::initialize() { return 0; } +int AnchorGeneratorPluginDynamic::initialize() TRT_NOEXCEPT { return 0; } -void AnchorGeneratorPluginDynamic::terminate() {} +void AnchorGeneratorPluginDynamic::terminate() TRT_NOEXCEPT {} -size_t AnchorGeneratorPluginDynamic::getSerializationSize() const { +size_t AnchorGeneratorPluginDynamic::getSerializationSize() const TRT_NOEXCEPT { size_t serialize_size = 0; serialize_size += SerializedSize(data_type_); serialize_size += SerializedSize(anchor_sizes_); @@ -483,7 +497,7 @@ size_t AnchorGeneratorPluginDynamic::getSerializationSize() const { return serialize_size; } -void AnchorGeneratorPluginDynamic::serialize(void* buffer) const { +void AnchorGeneratorPluginDynamic::serialize(void* buffer) const TRT_NOEXCEPT { SerializeValue(&buffer, data_type_); SerializeValue(&buffer, anchor_sizes_); SerializeValue(&buffer, aspect_ratios_); @@ -493,32 +507,35 @@ void AnchorGeneratorPluginDynamic::serialize(void* buffer) const { SerializeValue(&buffer, num_anchors_); } -void AnchorGeneratorPluginDynamic::destroy() {} +void AnchorGeneratorPluginDynamic::destroy() TRT_NOEXCEPT {} void AnchorGeneratorPluginDynamicCreator::setPluginNamespace( - const char* lib_namespace) { + const char* lib_namespace) TRT_NOEXCEPT { namespace_ = std::string(lib_namespace); } -const char* AnchorGeneratorPluginDynamicCreator::getPluginNamespace() const { +const char* AnchorGeneratorPluginDynamicCreator::getPluginNamespace() const + TRT_NOEXCEPT { return namespace_.c_str(); } -const char* AnchorGeneratorPluginDynamicCreator::getPluginName() const { +const char* AnchorGeneratorPluginDynamicCreator::getPluginName() const + TRT_NOEXCEPT { return "anchor_generator_plugin_dynamic"; } -const char* AnchorGeneratorPluginDynamicCreator::getPluginVersion() const { +const char* AnchorGeneratorPluginDynamicCreator::getPluginVersion() const + TRT_NOEXCEPT { return "1"; } const nvinfer1::PluginFieldCollection* -AnchorGeneratorPluginDynamicCreator::getFieldNames() { +AnchorGeneratorPluginDynamicCreator::getFieldNames() TRT_NOEXCEPT { return &field_collection_; } nvinfer1::IPluginV2Ext* AnchorGeneratorPluginDynamicCreator::createPlugin( - const char* name, const nvinfer1::PluginFieldCollection* fc) { + const char* name, const nvinfer1::PluginFieldCollection* fc) TRT_NOEXCEPT { const nvinfer1::PluginField* fields = fc->fields; int type_id = -1; std::vector anchor_sizes, aspect_ratios, stride, variances; @@ -555,7 +572,8 @@ nvinfer1::IPluginV2Ext* AnchorGeneratorPluginDynamicCreator::createPlugin( } nvinfer1::IPluginV2Ext* AnchorGeneratorPluginDynamicCreator::deserializePlugin( - const char* name, const void* serial_data, size_t serial_length) { + const char* name, const void* serial_data, + size_t serial_length) TRT_NOEXCEPT { auto plugin = new AnchorGeneratorPluginDynamic(serial_data, serial_length); plugin->setPluginNamespace(namespace_.c_str()); return plugin; diff --git a/paddle/fluid/inference/tensorrt/plugin/anchor_generator_op_plugin.h b/paddle/fluid/inference/tensorrt/plugin/anchor_generator_op_plugin.h index 458326d0679ca9..3d265dfb5933e9 100644 --- a/paddle/fluid/inference/tensorrt/plugin/anchor_generator_op_plugin.h +++ b/paddle/fluid/inference/tensorrt/plugin/anchor_generator_op_plugin.h @@ -34,34 +34,35 @@ class AnchorGeneratorPlugin : public nvinfer1::IPluginV2Ext { const int width, const int num_anchors, const int box_num); AnchorGeneratorPlugin(const void* data, size_t length); ~AnchorGeneratorPlugin() override; - const char* getPluginType() const override; - const char* getPluginVersion() const override; - int getNbOutputs() const override; + const char* getPluginType() const TRT_NOEXCEPT override; + const char* getPluginVersion() const TRT_NOEXCEPT override; + int getNbOutputs() const TRT_NOEXCEPT override; nvinfer1::Dims getOutputDimensions(int index, const nvinfer1::Dims* inputs, - int nb_input_dims) override; - bool supportsFormat(nvinfer1::DataType type, - nvinfer1::TensorFormat format) const override; - size_t getWorkspaceSize(int max_batch_size) const override; + int nb_input_dims) TRT_NOEXCEPT override; + bool supportsFormat(nvinfer1::DataType type, nvinfer1::TensorFormat format) + const TRT_NOEXCEPT override; + size_t getWorkspaceSize(int max_batch_size) const TRT_NOEXCEPT override; #if IS_TRT_VERSION_LT(8000) int enqueue(int batch_size, const void* const* inputs, void** outputs, #else int enqueue(int batch_size, const void* const* inputs, void* const* outputs, #endif - void* workspace, cudaStream_t stream) override; - int initialize() override; - void terminate() override; - size_t getSerializationSize() const override; - void serialize(void* buffer) const override; - void destroy() override; - void setPluginNamespace(const char* lib_namespace) override; - const char* getPluginNamespace() const override; - nvinfer1::DataType getOutputDataType(int index, - const nvinfer1::DataType* input_type, - int nb_inputs) const override; + void* workspace, cudaStream_t stream) TRT_NOEXCEPT override; + int initialize() TRT_NOEXCEPT override; + void terminate() TRT_NOEXCEPT override; + size_t getSerializationSize() const TRT_NOEXCEPT override; + void serialize(void* buffer) const TRT_NOEXCEPT override; + void destroy() TRT_NOEXCEPT override; + void setPluginNamespace(const char* lib_namespace) TRT_NOEXCEPT override; + const char* getPluginNamespace() const TRT_NOEXCEPT override; + nvinfer1::DataType getOutputDataType( + int index, const nvinfer1::DataType* input_type, + int nb_inputs) const TRT_NOEXCEPT override; bool isOutputBroadcastAcrossBatch(int output_index, const bool* input_is_broadcast, - int nb_inputs) const override; - bool canBroadcastInputAcrossBatch(int input_index) const override; + int nb_inputs) const TRT_NOEXCEPT override; + bool canBroadcastInputAcrossBatch(int input_index) const + TRT_NOEXCEPT override; void configurePlugin(const nvinfer1::Dims* input_dims, int nb_inputs, const nvinfer1::Dims* output_dims, int nb_outputs, const nvinfer1::DataType* input_types, @@ -69,13 +70,13 @@ class AnchorGeneratorPlugin : public nvinfer1::IPluginV2Ext { const bool* input_is_broadcast, const bool* output_is_broadcast, nvinfer1::PluginFormat float_format, - int max_batct_size) override; - nvinfer1::IPluginV2Ext* clone() const override; + int max_batct_size) TRT_NOEXCEPT override; + nvinfer1::IPluginV2Ext* clone() const TRT_NOEXCEPT override; private: template - int enqueue_impl(int batch_size, const void* const* inputs, void** outputs, - void* workspace, cudaStream_t stream); + int enqueue_impl(int batch_size, const void* const* inputs, + void* const* outputs, void* workspace, cudaStream_t stream); nvinfer1::DataType data_type_; std::vector anchor_sizes_; std::vector aspect_ratios_; @@ -97,16 +98,17 @@ class AnchorGeneratorPluginCreator : public nvinfer1::IPluginCreator { public: AnchorGeneratorPluginCreator() = default; ~AnchorGeneratorPluginCreator() override = default; - void setPluginNamespace(const char* lib_namespace) override; - const char* getPluginNamespace() const override; - const char* getPluginName() const override; - const char* getPluginVersion() const override; - const nvinfer1::PluginFieldCollection* getFieldNames() override; + void setPluginNamespace(const char* lib_namespace) TRT_NOEXCEPT override; + const char* getPluginNamespace() const TRT_NOEXCEPT override; + const char* getPluginName() const TRT_NOEXCEPT override; + const char* getPluginVersion() const TRT_NOEXCEPT override; + const nvinfer1::PluginFieldCollection* getFieldNames() TRT_NOEXCEPT override; nvinfer1::IPluginV2Ext* createPlugin( - const char* name, const nvinfer1::PluginFieldCollection* fc) override; - nvinfer1::IPluginV2Ext* deserializePlugin(const char* name, - const void* serial_data, - size_t serial_length) override; + const char* name, + const nvinfer1::PluginFieldCollection* fc) TRT_NOEXCEPT override; + nvinfer1::IPluginV2Ext* deserializePlugin( + const char* name, const void* serial_data, + size_t serial_length) TRT_NOEXCEPT override; private: std::string namespace_; @@ -127,35 +129,36 @@ class AnchorGeneratorPluginDynamic : public DynamicPluginTensorRT { const int num_anchors); AnchorGeneratorPluginDynamic(void const* data, size_t length); ~AnchorGeneratorPluginDynamic(); - nvinfer1::IPluginV2DynamicExt* clone() const override; + nvinfer1::IPluginV2DynamicExt* clone() const TRT_NOEXCEPT override; nvinfer1::DimsExprs getOutputDimensions( int outputIndex, const nvinfer1::DimsExprs* inputs, int nbInputs, - nvinfer1::IExprBuilder& exprBuilder) override; + nvinfer1::IExprBuilder& exprBuilder) TRT_NOEXCEPT override; bool supportsFormatCombination(int pos, const nvinfer1::PluginTensorDesc* inOut, - int nbInputs, int nbOutputs) override; + int nbInputs, + int nbOutputs) TRT_NOEXCEPT override; void configurePlugin(const nvinfer1::DynamicPluginTensorDesc* in, int nbInputs, const nvinfer1::DynamicPluginTensorDesc* out, - int nbOutputs) override; + int nbOutputs) TRT_NOEXCEPT override; size_t getWorkspaceSize(const nvinfer1::PluginTensorDesc* inputs, int nbInputs, const nvinfer1::PluginTensorDesc* outputs, - int nbOutputs) const override; + int nbOutputs) const TRT_NOEXCEPT override; int enqueue(const nvinfer1::PluginTensorDesc* inputDesc, const nvinfer1::PluginTensorDesc* outputDesc, const void* const* inputs, void* const* outputs, void* workspace, - cudaStream_t stream) override; - nvinfer1::DataType getOutputDataType(int index, - const nvinfer1::DataType* inputTypes, - int nbInputs) const override; - const char* getPluginType() const override; - int getNbOutputs() const override; - int initialize() override; - void terminate() override; - size_t getSerializationSize() const override; - void serialize(void* buffer) const override; - void destroy() override; + cudaStream_t stream) TRT_NOEXCEPT override; + nvinfer1::DataType getOutputDataType( + int index, const nvinfer1::DataType* inputTypes, + int nbInputs) const TRT_NOEXCEPT override; + const char* getPluginType() const TRT_NOEXCEPT override; + int getNbOutputs() const TRT_NOEXCEPT override; + int initialize() TRT_NOEXCEPT override; + void terminate() TRT_NOEXCEPT override; + size_t getSerializationSize() const TRT_NOEXCEPT override; + void serialize(void* buffer) const TRT_NOEXCEPT override; + void destroy() TRT_NOEXCEPT override; private: template @@ -181,16 +184,17 @@ class AnchorGeneratorPluginDynamicCreator : public nvinfer1::IPluginCreator { public: AnchorGeneratorPluginDynamicCreator() = default; ~AnchorGeneratorPluginDynamicCreator() override = default; - void setPluginNamespace(const char* lib_namespace) override; - const char* getPluginNamespace() const override; - const char* getPluginName() const override; - const char* getPluginVersion() const override; - const nvinfer1::PluginFieldCollection* getFieldNames() override; + void setPluginNamespace(const char* lib_namespace) TRT_NOEXCEPT override; + const char* getPluginNamespace() const TRT_NOEXCEPT override; + const char* getPluginName() const TRT_NOEXCEPT override; + const char* getPluginVersion() const TRT_NOEXCEPT override; + const nvinfer1::PluginFieldCollection* getFieldNames() TRT_NOEXCEPT override; nvinfer1::IPluginV2Ext* createPlugin( - const char* name, const nvinfer1::PluginFieldCollection* fc) override; - nvinfer1::IPluginV2Ext* deserializePlugin(const char* name, - const void* serial_data, - size_t serial_length) override; + const char* name, + const nvinfer1::PluginFieldCollection* fc) TRT_NOEXCEPT override; + nvinfer1::IPluginV2Ext* deserializePlugin( + const char* name, const void* serial_data, + size_t serial_length) TRT_NOEXCEPT override; private: std::string namespace_; diff --git a/paddle/fluid/inference/tensorrt/plugin/elementwise_op_plugin.cu b/paddle/fluid/inference/tensorrt/plugin/elementwise_op_plugin.cu index 3338aae370e514..69e0075729b0dc 100644 --- a/paddle/fluid/inference/tensorrt/plugin/elementwise_op_plugin.cu +++ b/paddle/fluid/inference/tensorrt/plugin/elementwise_op_plugin.cu @@ -48,7 +48,7 @@ __global__ void elementwise_kernel(const size_t total, const T *x_data, } nvinfer1::Dims ElementWisePlugin::getOutputDimensions( - int index, const nvinfer1::Dims *input_dims, int num_inputs) { + int index, const nvinfer1::Dims *input_dims, int num_inputs) TRT_NOEXCEPT { PADDLE_ENFORCE_EQ(index, 0, platform::errors::InvalidArgument( "There is only one output in TRT elementwise " "op plugin, but got output index: %d.", @@ -64,7 +64,7 @@ nvinfer1::Dims ElementWisePlugin::getOutputDimensions( return input_dims[0]; } -int ElementWisePlugin::initialize() { +int ElementWisePlugin::initialize() TRT_NOEXCEPT { PADDLE_ENFORCE_GT(dims_y_.nbDims, 0, platform::errors::InvalidArgument( "The dimension of input Y of TRT elementwise op plugin " @@ -120,7 +120,7 @@ int ElementWisePlugin::enqueue(int batch_size, const void *const *inputs, #else void *const *outputs, void *workspace, #endif - cudaStream_t stream) { + cudaStream_t stream) TRT_NOEXCEPT { const float *x = reinterpret_cast(inputs[0]); const float *y = reinterpret_cast(inputs[1]); float *out = reinterpret_cast(outputs[0]); @@ -147,26 +147,26 @@ int ElementWisePlugin::enqueue(int batch_size, const void *const *inputs, // Dynamic Plugin below. #if IS_TRT_VERSION_GE(6000) -int ElementwisePluginDynamic::initialize() { return 0; } +int ElementwisePluginDynamic::initialize() TRT_NOEXCEPT { return 0; } -size_t ElementwisePluginDynamic::getSerializationSize() const { +size_t ElementwisePluginDynamic::getSerializationSize() const TRT_NOEXCEPT { return SerializedSize(type_.c_str()) + SerializedSize(axis_); } -void ElementwisePluginDynamic::serialize(void *buffer) const { +void ElementwisePluginDynamic::serialize(void *buffer) const TRT_NOEXCEPT { SerializeValue(&buffer, type_.c_str()); SerializeValue(&buffer, axis_); } nvinfer1::DimsExprs ElementwisePluginDynamic::getOutputDimensions( int output_index, const nvinfer1::DimsExprs *inputs, int nb_inputs, - nvinfer1::IExprBuilder &expr_builder) { + nvinfer1::IExprBuilder &expr_builder) TRT_NOEXCEPT { return inputs[0]; } bool ElementwisePluginDynamic::supportsFormatCombination( int pos, const nvinfer1::PluginTensorDesc *in_out, int nb_inputs, - int nb_outputs) { + int nb_outputs) TRT_NOEXCEPT { PADDLE_ENFORCE_NOT_NULL( in_out, platform::errors::InvalidArgument( "The input of swish plugin shoule not be nullptr.")); @@ -189,7 +189,8 @@ bool ElementwisePluginDynamic::supportsFormatCombination( } nvinfer1::DataType ElementwisePluginDynamic::getOutputDataType( - int index, const nvinfer1::DataType *input_types, int nb_inputs) const { + int index, const nvinfer1::DataType *input_types, + int nb_inputs) const TRT_NOEXCEPT { PADDLE_ENFORCE_EQ(index, 0, platform::errors::InvalidArgument( "The Elementwise Plugin only has one input, so the " @@ -201,7 +202,7 @@ nvinfer1::DataType ElementwisePluginDynamic::getOutputDataType( int ElementwisePluginDynamic::enqueue( const nvinfer1::PluginTensorDesc *input_desc, const nvinfer1::PluginTensorDesc *output_desc, const void *const *inputs, - void *const *outputs, void *workspace, cudaStream_t stream) { + void *const *outputs, void *workspace, cudaStream_t stream) TRT_NOEXCEPT { auto x_dims = input_desc[0].dims; auto y_dims = input_desc[1].dims; int axis = (axis_ == -1) ? x_dims.nbDims - y_dims.nbDims : axis_; diff --git a/paddle/fluid/inference/tensorrt/plugin/elementwise_op_plugin.h b/paddle/fluid/inference/tensorrt/plugin/elementwise_op_plugin.h index 5dd3142c758398..aa1ab5389a5720 100644 --- a/paddle/fluid/inference/tensorrt/plugin/elementwise_op_plugin.h +++ b/paddle/fluid/inference/tensorrt/plugin/elementwise_op_plugin.h @@ -48,33 +48,35 @@ class ElementWisePlugin : public PluginTensorRT { DeserializeValue(&serial_data, &serial_length, &post_size_); } - ElementWisePlugin* clone() const override { + ElementWisePlugin* clone() const TRT_NOEXCEPT override { return new ElementWisePlugin(type_, dims_x_, dims_y_, axis_); } - const char* getPluginType() const override { return "elementwise_plugin"; } + const char* getPluginType() const TRT_NOEXCEPT override { + return "elementwise_plugin"; + } nvinfer1::Dims getOutputDimensions(int index, const nvinfer1::Dims* input_dims, - int num_inputs) override; + int num_inputs) TRT_NOEXCEPT override; - int initialize() override; + int initialize() TRT_NOEXCEPT override; #if IS_TRT_VERSION_LT(8000) int enqueue(int batch_size, const void* const* inputs, void** outputs, #else int enqueue(int batch_size, const void* const* inputs, void* const* outputs, #endif - void* workspace, cudaStream_t stream); + void* workspace, cudaStream_t stream) TRT_NOEXCEPT; - size_t getSerializationSize() const override { + size_t getSerializationSize() const TRT_NOEXCEPT override { return getBaseSerializationSize() + SerializedSize(type_.c_str()) + SerializedSize(dims_x_) + SerializedSize(dims_y_) + SerializedSize(axis_) + SerializedSize(prev_size_) + SerializedSize(midd_size_) + SerializedSize(post_size_); } - void serialize(void* buffer) const override { + void serialize(void* buffer) const TRT_NOEXCEPT override { serializeBase(buffer); SerializeValue(&buffer, type_.c_str()); SerializeValue(&buffer, dims_x_); @@ -97,13 +99,15 @@ class ElementWisePlugin : public PluginTensorRT { class ElementWisePluginCreator : public TensorRTPluginCreator { public: - const char* getPluginName() const override { return "elementwise_plugin"; } + const char* getPluginName() const TRT_NOEXCEPT override { + return "elementwise_plugin"; + } - const char* getPluginVersion() const override { return "1"; } + const char* getPluginVersion() const TRT_NOEXCEPT override { return "1"; } - nvinfer1::IPluginV2* deserializePlugin(const char* name, - const void* serial_data, - size_t serial_length) override { + nvinfer1::IPluginV2* deserializePlugin( + const char* name, const void* serial_data, + size_t serial_length) TRT_NOEXCEPT override { return new ElementWisePlugin(serial_data, serial_length); } }; @@ -120,48 +124,49 @@ class ElementwisePluginDynamic : public DynamicPluginTensorRT { type_ = std::string(elementwise_type); DeserializeValue(&serialData, &serialLength, &axis_); } - nvinfer1::IPluginV2DynamicExt* clone() const override { + nvinfer1::IPluginV2DynamicExt* clone() const TRT_NOEXCEPT override { return new ElementwisePluginDynamic(type_, axis_); } - const char* getPluginType() const override { + const char* getPluginType() const TRT_NOEXCEPT override { return "elementwise_plugin_dynamic"; } - int getNbOutputs() const override { return 1; } - int initialize() override; + int getNbOutputs() const TRT_NOEXCEPT override { return 1; } + int initialize() TRT_NOEXCEPT override; - size_t getSerializationSize() const override; - void serialize(void* buffer) const override; + size_t getSerializationSize() const TRT_NOEXCEPT override; + void serialize(void* buffer) const TRT_NOEXCEPT override; nvinfer1::DimsExprs getOutputDimensions( int output_index, const nvinfer1::DimsExprs* inputs, int nb_inputs, - nvinfer1::IExprBuilder& expr_builder) override; + nvinfer1::IExprBuilder& expr_builder) TRT_NOEXCEPT override; bool supportsFormatCombination(int pos, const nvinfer1::PluginTensorDesc* inOut, - int nbInputs, int nbOutputs) override; + int nbInputs, + int nbOutputs) TRT_NOEXCEPT override; void configurePlugin(const nvinfer1::DynamicPluginTensorDesc* in, int nbInputs, const nvinfer1::DynamicPluginTensorDesc* out, - int nbOutputs) override {} + int nbOutputs) TRT_NOEXCEPT override {} size_t getWorkspaceSize(const nvinfer1::PluginTensorDesc* inputs, int nbInputs, const nvinfer1::PluginTensorDesc* outputs, - int nbOutputs) const override { + int nbOutputs) const TRT_NOEXCEPT override { return 0; } int enqueue(const nvinfer1::PluginTensorDesc* inputDesc, const nvinfer1::PluginTensorDesc* outputDesc, const void* const* inputs, void* const* outputs, void* workspace, - cudaStream_t stream) override; - nvinfer1::DataType getOutputDataType(int index, - const nvinfer1::DataType* inputTypes, - int nbInputs) const override; + cudaStream_t stream) TRT_NOEXCEPT override; + nvinfer1::DataType getOutputDataType( + int index, const nvinfer1::DataType* inputTypes, + int nbInputs) const TRT_NOEXCEPT override; - void destroy() override { delete this; } + void destroy() TRT_NOEXCEPT override { delete this; } private: std::string type_; @@ -171,33 +176,34 @@ class ElementwisePluginDynamic : public DynamicPluginTensorRT { class ElementwisePluginDynamicCreator : public nvinfer1::IPluginCreator { public: ElementwisePluginDynamicCreator() {} - const char* getPluginName() const override { + const char* getPluginName() const TRT_NOEXCEPT override { return "elementwise_plugin_dynamic"; } - const char* getPluginVersion() const override { return "1"; } + const char* getPluginVersion() const TRT_NOEXCEPT override { return "1"; } - const nvinfer1::PluginFieldCollection* getFieldNames() override { + const nvinfer1::PluginFieldCollection* getFieldNames() TRT_NOEXCEPT override { return &field_collection_; } - nvinfer1::IPluginV2* createPlugin( - const char* name, const nvinfer1::PluginFieldCollection* fc) override { + nvinfer1::IPluginV2* createPlugin(const char* name, + const nvinfer1::PluginFieldCollection* fc) + TRT_NOEXCEPT override { return nullptr; } - nvinfer1::IPluginV2* deserializePlugin(const char* name, - const void* serial_data, - size_t serial_length) override { + nvinfer1::IPluginV2* deserializePlugin( + const char* name, const void* serial_data, + size_t serial_length) TRT_NOEXCEPT override { auto plugin = new ElementwisePluginDynamic(serial_data, serial_length); return plugin; } - void setPluginNamespace(const char* lib_namespace) override { + void setPluginNamespace(const char* lib_namespace) TRT_NOEXCEPT override { plugin_namespace_ = lib_namespace; } - const char* getPluginNamespace() const override { + const char* getPluginNamespace() const TRT_NOEXCEPT override { return plugin_namespace_.c_str(); } diff --git a/paddle/fluid/inference/tensorrt/plugin/emb_eltwise_layernorm_plugin.cu b/paddle/fluid/inference/tensorrt/plugin/emb_eltwise_layernorm_plugin.cu index 79fc3d66bbe4dd..a8e9a94955f702 100644 --- a/paddle/fluid/inference/tensorrt/plugin/emb_eltwise_layernorm_plugin.cu +++ b/paddle/fluid/inference/tensorrt/plugin/emb_eltwise_layernorm_plugin.cu @@ -128,7 +128,7 @@ template int EmbEltwiseLayernormPluginDynamicImpl::enqueue( const nvinfer1::PluginTensorDesc *input_desc, const nvinfer1::PluginTensorDesc *output_desc, const void *const *inputs, - void *const *outputs, void *workspace, cudaStream_t stream) { + void *const *outputs, void *workspace, cudaStream_t stream) TRT_NOEXCEPT { auto id_dims = input_desc[0].dims; int batch = id_dims.d[0]; int seq_len = id_dims.d[1]; @@ -181,17 +181,19 @@ template class EmbEltwiseLayernormPluginDynamicImpl; template class EmbEltwiseLayernormPluginDynamicImpl; #endif -int EmbEltwiseLayernormPluginDynamic::initialize() { +int EmbEltwiseLayernormPluginDynamic::initialize() TRT_NOEXCEPT { impl_->initialize(); return 0; } -void EmbEltwiseLayernormPluginDynamic::terminate() { impl_->terminate(); } +void EmbEltwiseLayernormPluginDynamic::terminate() TRT_NOEXCEPT { + impl_->terminate(); +} nvinfer1::DimsExprs EmbEltwiseLayernormPluginDynamic::getOutputDimensions( int output_index, const nvinfer1::DimsExprs *inputs, int nb_inputs, - nvinfer1::IExprBuilder &expr_builder) { // NOLINT + nvinfer1::IExprBuilder &expr_builder) TRT_NOEXCEPT { // NOLINT PADDLE_ENFORCE_EQ(output_index, 0, platform::errors::InvalidArgument( "There is only one output of the EmbEltwiseLayernorm, " @@ -208,7 +210,7 @@ nvinfer1::DimsExprs EmbEltwiseLayernormPluginDynamic::getOutputDimensions( bool EmbEltwiseLayernormPluginDynamic::supportsFormatCombination( int pos, const nvinfer1::PluginTensorDesc *in_out, int nb_inputs, - int nb_outputs) { + int nb_outputs) TRT_NOEXCEPT { PADDLE_ENFORCE_NOT_NULL( in_out, platform::errors::InvalidArgument( "The input of swish plugin shoule not be nullptr.")); @@ -256,7 +258,8 @@ bool EmbEltwiseLayernormPluginDynamic::supportsFormatCombination( } nvinfer1::DataType EmbEltwiseLayernormPluginDynamic::getOutputDataType( - int index, const nvinfer1::DataType *input_types, int nb_inputs) const { + int index, const nvinfer1::DataType *input_types, + int nb_inputs) const TRT_NOEXCEPT { PADDLE_ENFORCE_EQ( index, 0, platform::errors::InvalidArgument( "The EmbEltwiseLayernorm Plugin only has one input, so the " @@ -271,7 +274,7 @@ nvinfer1::DataType EmbEltwiseLayernormPluginDynamic::getOutputDataType( int EmbEltwiseLayernormPluginDynamic::enqueue( const nvinfer1::PluginTensorDesc *input_desc, const nvinfer1::PluginTensorDesc *output_desc, const void *const *inputs, - void *const *outputs, void *workspace, cudaStream_t stream) { + void *const *outputs, void *workspace, cudaStream_t stream) TRT_NOEXCEPT { impl_->enqueue(input_desc, output_desc, inputs, outputs, workspace, stream); return cudaGetLastError() != cudaSuccess; } diff --git a/paddle/fluid/inference/tensorrt/plugin/emb_eltwise_layernorm_plugin.h b/paddle/fluid/inference/tensorrt/plugin/emb_eltwise_layernorm_plugin.h index 7de84a8fc49bcc..f44391310cc219 100644 --- a/paddle/fluid/inference/tensorrt/plugin/emb_eltwise_layernorm_plugin.h +++ b/paddle/fluid/inference/tensorrt/plugin/emb_eltwise_layernorm_plugin.h @@ -68,7 +68,7 @@ class EmbEltwiseLayernormPluginDynamicImpl int enqueue(const nvinfer1::PluginTensorDesc* inputDesc, const nvinfer1::PluginTensorDesc* outputDesc, const void* const* inputs, void* const* outputs, void* workspace, - cudaStream_t stream); + cudaStream_t stream) TRT_NOEXCEPT; void shareGPUData(const EmbEltwiseLayernormPluginDynamicImplBase* anthor); private: @@ -189,7 +189,7 @@ class EmbEltwiseLayernormPluginDynamic : public DynamicPluginTensorRT { } } - nvinfer1::IPluginV2DynamicExt* clone() const override { + nvinfer1::IPluginV2DynamicExt* clone() const TRT_NOEXCEPT override { auto ptr = new EmbEltwiseLayernormPluginDynamic( embs_, bias_, scale_, emb_sizes_, bias_size_, scale_size_, hidden_size_, eps_, with_fp16_); @@ -197,14 +197,14 @@ class EmbEltwiseLayernormPluginDynamic : public DynamicPluginTensorRT { return ptr; } - const char* getPluginType() const override { + const char* getPluginType() const TRT_NOEXCEPT override { return "fused_embedding_eltwise_layernorm_plugin"; } - int getNbOutputs() const override { return 1; } - int initialize() override; - void terminate() override; + int getNbOutputs() const TRT_NOEXCEPT override { return 1; } + int initialize() TRT_NOEXCEPT override; + void terminate() TRT_NOEXCEPT override; - size_t getSerializationSize() const override { + size_t getSerializationSize() const TRT_NOEXCEPT override { int sum_num = 0; sum_num += SerializedSize(emb_sizes_); @@ -223,7 +223,7 @@ class EmbEltwiseLayernormPluginDynamic : public DynamicPluginTensorRT { return sum_num; } - void serialize(void* buffer) const override { + void serialize(void* buffer) const TRT_NOEXCEPT override { SerializeValue(&buffer, emb_sizes_); for (size_t i = 0; i < emb_sizes_.size(); i++) { auto size = emb_sizes_[i]; @@ -248,33 +248,34 @@ class EmbEltwiseLayernormPluginDynamic : public DynamicPluginTensorRT { nvinfer1::DimsExprs getOutputDimensions( int output_index, const nvinfer1::DimsExprs* inputs, int nb_inputs, - nvinfer1::IExprBuilder& expr_builder) override; + nvinfer1::IExprBuilder& expr_builder) TRT_NOEXCEPT override; bool supportsFormatCombination(int pos, const nvinfer1::PluginTensorDesc* in_out, - int nb_inputs, int nb_outputs) override; + int nb_inputs, + int nb_outputs) TRT_NOEXCEPT override; void configurePlugin(const nvinfer1::DynamicPluginTensorDesc* in, int nb_inputs, const nvinfer1::DynamicPluginTensorDesc* out, - int nb_outputs) override {} + int nb_outputs) TRT_NOEXCEPT override {} size_t getWorkspaceSize(const nvinfer1::PluginTensorDesc* inputs, int nb_inputs, const nvinfer1::PluginTensorDesc* outputs, - int nb_outputs) const override { + int nb_outputs) const TRT_NOEXCEPT override { return 0; } int enqueue(const nvinfer1::PluginTensorDesc* input_desc, const nvinfer1::PluginTensorDesc* output_desc, const void* const* inputs, void* const* outputs, void* workspace, - cudaStream_t stream) override; - nvinfer1::DataType getOutputDataType(int index, - const nvinfer1::DataType* input_types, - int nb_inputs) const override; + cudaStream_t stream) TRT_NOEXCEPT override; + nvinfer1::DataType getOutputDataType( + int index, const nvinfer1::DataType* input_types, + int nb_inputs) const TRT_NOEXCEPT override; - void destroy() override { + void destroy() TRT_NOEXCEPT override { if (own_host_buff_) { for (auto ptr : embs_) { delete[] ptr; @@ -310,32 +311,33 @@ class EmbEltwiseLayernormPluginDynamicCreator : public nvinfer1::IPluginCreator { public: EmbEltwiseLayernormPluginDynamicCreator() {} - const char* getPluginName() const override { + const char* getPluginName() const TRT_NOEXCEPT override { return "fused_embedding_eltwise_layernorm_plugin"; } - const char* getPluginVersion() const override { return "1"; } + const char* getPluginVersion() const TRT_NOEXCEPT override { return "1"; } - const nvinfer1::PluginFieldCollection* getFieldNames() override { + const nvinfer1::PluginFieldCollection* getFieldNames() TRT_NOEXCEPT override { return &field_collection_; } - nvinfer1::IPluginV2* createPlugin( - const char* name, const nvinfer1::PluginFieldCollection* fc) override { + nvinfer1::IPluginV2* createPlugin(const char* name, + const nvinfer1::PluginFieldCollection* fc) + TRT_NOEXCEPT override { return nullptr; } - nvinfer1::IPluginV2* deserializePlugin(const char* name, - const void* serial_data, - size_t serial_length) override { + nvinfer1::IPluginV2* deserializePlugin( + const char* name, const void* serial_data, + size_t serial_length) TRT_NOEXCEPT override { return new EmbEltwiseLayernormPluginDynamic(serial_data, serial_length); } - void setPluginNamespace(const char* lib_namespace) override { + void setPluginNamespace(const char* lib_namespace) TRT_NOEXCEPT override { plugin_namespace_ = lib_namespace; } - const char* getPluginNamespace() const override { + const char* getPluginNamespace() const TRT_NOEXCEPT override { return plugin_namespace_.c_str(); } diff --git a/paddle/fluid/inference/tensorrt/plugin/gather_nd_op_plugin.cu b/paddle/fluid/inference/tensorrt/plugin/gather_nd_op_plugin.cu index 933ca333cdbb93..4371cc69f33341 100644 --- a/paddle/fluid/inference/tensorrt/plugin/gather_nd_op_plugin.cu +++ b/paddle/fluid/inference/tensorrt/plugin/gather_nd_op_plugin.cu @@ -58,19 +58,19 @@ __global__ void GatherNdCUDAKernel(const T* input, const int32_t* input_dims, } } -int GatherNdPluginDynamic::initialize() { return 0; } +int GatherNdPluginDynamic::initialize() TRT_NOEXCEPT { return 0; } -size_t GatherNdPluginDynamic::getSerializationSize() const { +size_t GatherNdPluginDynamic::getSerializationSize() const TRT_NOEXCEPT { return SerializedSize(with_fp16_); } -void GatherNdPluginDynamic::serialize(void* buffer) const { +void GatherNdPluginDynamic::serialize(void* buffer) const TRT_NOEXCEPT { SerializeValue(&buffer, with_fp16_); } nvinfer1::DimsExprs GatherNdPluginDynamic::getOutputDimensions( int output_index, const nvinfer1::DimsExprs* inputs, int nb_inputs, - nvinfer1::IExprBuilder& expr_builder) { + nvinfer1::IExprBuilder& expr_builder) TRT_NOEXCEPT { PADDLE_ENFORCE_EQ( nb_inputs, 2, platform::errors::InvalidArgument( @@ -100,7 +100,7 @@ nvinfer1::DimsExprs GatherNdPluginDynamic::getOutputDimensions( bool GatherNdPluginDynamic::supportsFormatCombination( int pos, const nvinfer1::PluginTensorDesc* in_out, int nb_inputs, - int nb_outputs) { + int nb_outputs) TRT_NOEXCEPT { PADDLE_ENFORCE_NOT_NULL( in_out, platform::errors::InvalidArgument( "The input of gather_nd plugin should not be nullptr.")); @@ -134,14 +134,15 @@ bool GatherNdPluginDynamic::supportsFormatCombination( } nvinfer1::DataType GatherNdPluginDynamic::getOutputDataType( - int index, const nvinfer1::DataType* input_types, int nb_inputs) const { + int index, const nvinfer1::DataType* input_types, + int nb_inputs) const TRT_NOEXCEPT { return input_types[0]; } int GatherNdPluginDynamic::enqueue( const nvinfer1::PluginTensorDesc* input_desc, const nvinfer1::PluginTensorDesc* output_desc, const void* const* inputs, - void* const* outputs, void* workspace, cudaStream_t stream) { + void* const* outputs, void* workspace, cudaStream_t stream) TRT_NOEXCEPT { auto input_dims = input_desc[0].dims; auto index_dims = input_desc[1].dims; auto input_dims_size = input_dims.nbDims; diff --git a/paddle/fluid/inference/tensorrt/plugin/gather_nd_op_plugin.h b/paddle/fluid/inference/tensorrt/plugin/gather_nd_op_plugin.h index 0a242238c81fb3..841fb2f6fe399f 100644 --- a/paddle/fluid/inference/tensorrt/plugin/gather_nd_op_plugin.h +++ b/paddle/fluid/inference/tensorrt/plugin/gather_nd_op_plugin.h @@ -35,46 +35,49 @@ class GatherNdPluginDynamic : public DynamicPluginTensorRT { DeserializeValue(&serial_data, &serial_length, &with_fp16_); } - nvinfer1::IPluginV2DynamicExt* clone() const override { + nvinfer1::IPluginV2DynamicExt* clone() const TRT_NOEXCEPT override { return new GatherNdPluginDynamic(with_fp16_); } - const char* getPluginType() const override { return "gather_nd_plugin"; } - int getNbOutputs() const override { return 1; } - int initialize() override; + const char* getPluginType() const TRT_NOEXCEPT override { + return "gather_nd_plugin"; + } + int getNbOutputs() const TRT_NOEXCEPT override { return 1; } + int initialize() TRT_NOEXCEPT override; - size_t getSerializationSize() const override; - void serialize(void* buffer) const override; + size_t getSerializationSize() const TRT_NOEXCEPT override; + void serialize(void* buffer) const TRT_NOEXCEPT override; nvinfer1::DimsExprs getOutputDimensions( int outputIndex, const nvinfer1::DimsExprs* inputs, int nbInputs, - nvinfer1::IExprBuilder& exprBuilder) override; + nvinfer1::IExprBuilder& exprBuilder) TRT_NOEXCEPT override; bool supportsFormatCombination(int pos, const nvinfer1::PluginTensorDesc* inOut, - int nbInputs, int nbOutputs) override; + int nbInputs, + int nbOutputs) TRT_NOEXCEPT override; void configurePlugin(const nvinfer1::DynamicPluginTensorDesc* in, int nbInputs, const nvinfer1::DynamicPluginTensorDesc* out, - int nbOutputs) override {} + int nbOutputs) TRT_NOEXCEPT override {} size_t getWorkspaceSize(const nvinfer1::PluginTensorDesc* inputs, int nbInputs, const nvinfer1::PluginTensorDesc* outputs, - int nbOutputs) const override { + int nbOutputs) const TRT_NOEXCEPT override { return 0; } int enqueue(const nvinfer1::PluginTensorDesc* inputDesc, const nvinfer1::PluginTensorDesc* outputDesc, const void* const* inputs, void* const* outputs, void* workspace, - cudaStream_t stream) override; - nvinfer1::DataType getOutputDataType(int index, - const nvinfer1::DataType* inputTypes, - int nbInputs) const override; + cudaStream_t stream) TRT_NOEXCEPT override; + nvinfer1::DataType getOutputDataType( + int index, const nvinfer1::DataType* inputTypes, + int nbInputs) const TRT_NOEXCEPT override; - void destroy() override { + void destroy() TRT_NOEXCEPT override { if (input_dims_data_) { cudaFree(input_dims_data_); } @@ -88,31 +91,34 @@ class GatherNdPluginDynamic : public DynamicPluginTensorRT { class GatherNdPluginDynamicCreator : public nvinfer1::IPluginCreator { public: GatherNdPluginDynamicCreator() {} - const char* getPluginName() const override { return "gather_nd_plugin"; } + const char* getPluginName() const TRT_NOEXCEPT override { + return "gather_nd_plugin"; + } - const char* getPluginVersion() const override { return "1"; } + const char* getPluginVersion() const TRT_NOEXCEPT override { return "1"; } - const nvinfer1::PluginFieldCollection* getFieldNames() override { + const nvinfer1::PluginFieldCollection* getFieldNames() TRT_NOEXCEPT override { return &field_collection_; } - nvinfer1::IPluginV2* createPlugin( - const char* name, const nvinfer1::PluginFieldCollection* fc) override { + nvinfer1::IPluginV2* createPlugin(const char* name, + const nvinfer1::PluginFieldCollection* fc) + TRT_NOEXCEPT override { return nullptr; } - nvinfer1::IPluginV2* deserializePlugin(const char* name, - const void* serial_data, - size_t serial_length) override { + nvinfer1::IPluginV2* deserializePlugin( + const char* name, const void* serial_data, + size_t serial_length) TRT_NOEXCEPT override { auto plugin = new GatherNdPluginDynamic(serial_data, serial_length); return plugin; } - void setPluginNamespace(const char* lib_namespace) override { + void setPluginNamespace(const char* lib_namespace) TRT_NOEXCEPT override { plugin_namespace_ = lib_namespace; } - const char* getPluginNamespace() const override { + const char* getPluginNamespace() const TRT_NOEXCEPT override { return plugin_namespace_.c_str(); } diff --git a/paddle/fluid/inference/tensorrt/plugin/gelu_op_plugin.cu b/paddle/fluid/inference/tensorrt/plugin/gelu_op_plugin.cu index 43557c341ef42e..08b259e0f952e1 100644 --- a/paddle/fluid/inference/tensorrt/plugin/gelu_op_plugin.cu +++ b/paddle/fluid/inference/tensorrt/plugin/gelu_op_plugin.cu @@ -30,8 +30,8 @@ static const float kAT = 0.5; static const float kBT = 0.7978845608028654; // sqrt(2.0/M_PI) static const float kCT = 0.035677408136300125; // 0.044715 * sqrt(2.0/M_PI) -bool GeluPlugin::supportsFormat(nvinfer1::DataType type, - nvinfer1::PluginFormat format) const { +bool GeluPlugin::supportsFormat( + nvinfer1::DataType type, nvinfer1::PluginFormat format) const TRT_NOEXCEPT { if (with_fp16_) { return ((type == nvinfer1::DataType::kFLOAT || type == nvinfer1::DataType::kHALF) && @@ -44,7 +44,7 @@ bool GeluPlugin::supportsFormat(nvinfer1::DataType type, nvinfer1::Dims GeluPlugin::getOutputDimensions(int index, const nvinfer1::Dims* in_dims, - int nb_inputs) { + int nb_inputs) TRT_NOEXCEPT { assert(nb_inputs == 1); assert(index < this->getNbOutputs()); nvinfer1::Dims const& input_dims = in_dims[0]; @@ -96,7 +96,8 @@ int GeluPlugin::enqueue(int batch_size, const void* const* inputs, #if IS_TRT_VERSION_LT(8000) void** outputs, void*, cudaStream_t stream) { #else - void* const* outputs, void*, cudaStream_t stream) { + void* const* outputs, void*, + cudaStream_t stream) TRT_NOEXCEPT { #endif const auto& input_dims = this->getInputDims(0); int num = batch_size; @@ -132,13 +133,13 @@ int GeluPlugin::enqueue(int batch_size, const void* const* inputs, nvinfer1::DimsExprs GeluPluginDynamic::getOutputDimensions( int output_index, const nvinfer1::DimsExprs* inputs, int nb_inputs, - nvinfer1::IExprBuilder& expr_builder) { + nvinfer1::IExprBuilder& expr_builder) TRT_NOEXCEPT { return inputs[0]; } bool GeluPluginDynamic::supportsFormatCombination( int pos, const nvinfer1::PluginTensorDesc* in_out, int nb_inputs, - int nb_outputs) { + int nb_outputs) TRT_NOEXCEPT { PADDLE_ENFORCE_NOT_NULL( in_out, platform::errors::InvalidArgument( "The input of swish plugin shoule not be nullptr.")); @@ -167,7 +168,8 @@ bool GeluPluginDynamic::supportsFormatCombination( } nvinfer1::DataType GeluPluginDynamic::getOutputDataType( - int index, const nvinfer1::DataType* input_types, int nb_inputs) const { + int index, const nvinfer1::DataType* input_types, + int nb_inputs) const TRT_NOEXCEPT { PADDLE_ENFORCE_EQ(index, 0, platform::errors::InvalidArgument( "The Gelu Plugin only has one input, so the " "index value should be 0, but get %d.", @@ -178,7 +180,8 @@ nvinfer1::DataType GeluPluginDynamic::getOutputDataType( int GeluPluginDynamic::enqueue(const nvinfer1::PluginTensorDesc* input_desc, const nvinfer1::PluginTensorDesc* output_desc, const void* const* inputs, void* const* outputs, - void* workspace, cudaStream_t stream) { + void* workspace, + cudaStream_t stream) TRT_NOEXCEPT { auto input_dims = input_desc[0].dims; size_t num = ProductDim(input_dims); const int block_size = 256; diff --git a/paddle/fluid/inference/tensorrt/plugin/gelu_op_plugin.h b/paddle/fluid/inference/tensorrt/plugin/gelu_op_plugin.h index 6fdd9791a61bdb..7efdd2798b2640 100644 --- a/paddle/fluid/inference/tensorrt/plugin/gelu_op_plugin.h +++ b/paddle/fluid/inference/tensorrt/plugin/gelu_op_plugin.h @@ -35,40 +35,48 @@ class GeluPlugin : public PluginTensorRT { } ~GeluPlugin() {} - GeluPlugin* clone() const override { return new GeluPlugin(with_fp16_); } + GeluPlugin* clone() const TRT_NOEXCEPT override { + return new GeluPlugin(with_fp16_); + } - const char* getPluginType() const override { return "gelu_plugin"; } - int getNbOutputs() const override { return 1; } - int initialize() override { return 0; } - bool supportsFormat(nvinfer1::DataType type, - nvinfer1::PluginFormat format) const override; + const char* getPluginType() const TRT_NOEXCEPT override { + return "gelu_plugin"; + } + int getNbOutputs() const TRT_NOEXCEPT override { return 1; } + int initialize() TRT_NOEXCEPT override { return 0; } + bool supportsFormat(nvinfer1::DataType type, nvinfer1::PluginFormat format) + const TRT_NOEXCEPT override; nvinfer1::Dims getOutputDimensions(int index, const nvinfer1::Dims* inputs, - int nb_input_dims) override; + int nb_input_dims) TRT_NOEXCEPT override; #if IS_TRT_VERSION_LT(8000) int enqueue(int batch_size, const void* const* inputs, void** outputs, #else int enqueue(int batch_size, const void* const* inputs, void* const* outputs, #endif - void* workspace, cudaStream_t stream) override; + void* workspace, cudaStream_t stream) TRT_NOEXCEPT override; - size_t getSerializationSize() const override { + size_t getSerializationSize() const TRT_NOEXCEPT override { return getBaseSerializationSize(); } // TRT will call this func to serialize the configuration of TRT // It should not be called by users. - void serialize(void* buffer) const override { serializeBase(buffer); } + void serialize(void* buffer) const TRT_NOEXCEPT override { + serializeBase(buffer); + } }; class GeluPluginCreator : public TensorRTPluginCreator { public: - const char* getPluginName() const override { return "gelu_plugin"; } + const char* getPluginName() const TRT_NOEXCEPT override { + return "gelu_plugin"; + } - const char* getPluginVersion() const override { return "1"; } + const char* getPluginVersion() const TRT_NOEXCEPT override { return "1"; } - nvinfer1::IPluginV2* deserializePlugin(const char* name, - const void* serial_data, - size_t serial_length) override { + nvinfer1::IPluginV2* deserializePlugin( + const char* name, const void* serial_data, + size_t serial_length) TRT_NOEXCEPT override { return new GeluPlugin(serial_data, serial_length); } }; @@ -83,61 +91,66 @@ class GeluPluginDynamic : public DynamicPluginTensorRT { } ~GeluPluginDynamic() {} - nvinfer1::IPluginV2DynamicExt* clone() const override { + nvinfer1::IPluginV2DynamicExt* clone() const TRT_NOEXCEPT override { return new GeluPluginDynamic(with_fp16_); } - const char* getPluginType() const override { return "gelu_plugin_dynamic"; } - int getNbOutputs() const override { return 1; } - int initialize() override { return 0; } + const char* getPluginType() const TRT_NOEXCEPT override { + return "gelu_plugin_dynamic"; + } + int getNbOutputs() const TRT_NOEXCEPT override { return 1; } + int initialize() TRT_NOEXCEPT override { return 0; } - size_t getSerializationSize() const override { + size_t getSerializationSize() const TRT_NOEXCEPT override { return SerializedSize(with_fp16_); } - void serialize(void* buffer) const override { + void serialize(void* buffer) const TRT_NOEXCEPT override { SerializeValue(&buffer, with_fp16_); } nvinfer1::DimsExprs getOutputDimensions( int output_index, const nvinfer1::DimsExprs* inputs, int nb_inputs, - nvinfer1::IExprBuilder& expr_builder) override; + nvinfer1::IExprBuilder& expr_builder) TRT_NOEXCEPT override; bool supportsFormatCombination(int pos, const nvinfer1::PluginTensorDesc* in_out, - int nb_inputs, int nb_outputs) override; + int nb_inputs, + int nb_outputs) TRT_NOEXCEPT override; void configurePlugin(const nvinfer1::DynamicPluginTensorDesc* in, int nb_inputs, const nvinfer1::DynamicPluginTensorDesc* out, - int nb_outputs) override {} + int nb_outputs) TRT_NOEXCEPT override {} size_t getWorkspaceSize(const nvinfer1::PluginTensorDesc* inputs, int nb_inputs, const nvinfer1::PluginTensorDesc* outputs, - int nb_outputs) const override { + int nb_outputs) const TRT_NOEXCEPT override { return 0; } int enqueue(const nvinfer1::PluginTensorDesc* input_desc, const nvinfer1::PluginTensorDesc* output_desc, const void* const* inputs, void* const* outputs, void* workspace, - cudaStream_t stream) override; - nvinfer1::DataType getOutputDataType(int index, - const nvinfer1::DataType* input_types, - int nb_inputs) const override; + cudaStream_t stream) TRT_NOEXCEPT override; + nvinfer1::DataType getOutputDataType( + int index, const nvinfer1::DataType* input_types, + int nb_inputs) const TRT_NOEXCEPT override; - void destroy() override { delete this; } + void destroy() TRT_NOEXCEPT override { delete this; } }; class GeluPluginDynamicCreator : public TensorRTPluginCreator { public: - const char* getPluginName() const override { return "gelu_plugin_dynamic"; } + const char* getPluginName() const TRT_NOEXCEPT override { + return "gelu_plugin_dynamic"; + } - const char* getPluginVersion() const override { return "1"; } + const char* getPluginVersion() const TRT_NOEXCEPT override { return "1"; } - nvinfer1::IPluginV2* deserializePlugin(const char* name, - const void* serial_data, - size_t serial_length) override { + nvinfer1::IPluginV2* deserializePlugin( + const char* name, const void* serial_data, + size_t serial_length) TRT_NOEXCEPT override { auto plugin = new GeluPluginDynamic(serial_data, serial_length); return plugin; } diff --git a/paddle/fluid/inference/tensorrt/plugin/hard_swish_op_plugin.cu b/paddle/fluid/inference/tensorrt/plugin/hard_swish_op_plugin.cu index dab7ddac1957a1..28060bd2facbee 100644 --- a/paddle/fluid/inference/tensorrt/plugin/hard_swish_op_plugin.cu +++ b/paddle/fluid/inference/tensorrt/plugin/hard_swish_op_plugin.cu @@ -22,7 +22,7 @@ namespace tensorrt { namespace plugin { nvinfer1::Dims HardSwishPlugin::getOutputDimensions( - int index, const nvinfer1::Dims* in_dims, int nb_inputs) { + int index, const nvinfer1::Dims* in_dims, int nb_inputs) TRT_NOEXCEPT { assert(nb_inputs == 1); assert(index < this->getNbOutputs()); nvinfer1::Dims const& input_dims = in_dims[0]; @@ -54,7 +54,8 @@ int HardSwishPlugin::enqueue(int batch_size, const void* const* inputs, #if IS_TRT_VERSION_LT(8000) void** outputs, void*, cudaStream_t stream) { #else - void* const* outputs, void*, cudaStream_t stream) { + void* const* outputs, void*, + cudaStream_t stream) TRT_NOEXCEPT { #endif const auto& input_dims = this->getInputDims(0); int num = batch_size; diff --git a/paddle/fluid/inference/tensorrt/plugin/hard_swish_op_plugin.h b/paddle/fluid/inference/tensorrt/plugin/hard_swish_op_plugin.h index 42c47959988a50..5dfa00ef1c204e 100644 --- a/paddle/fluid/inference/tensorrt/plugin/hard_swish_op_plugin.h +++ b/paddle/fluid/inference/tensorrt/plugin/hard_swish_op_plugin.h @@ -40,30 +40,32 @@ class HardSwishPlugin : public PluginTensorRT { } ~HardSwishPlugin() {} - HardSwishPlugin* clone() const override { + HardSwishPlugin* clone() const TRT_NOEXCEPT override { return new HardSwishPlugin(threshold_, scale_, offset_); } - const char* getPluginType() const override { return "hard_swish_plugin"; } - int getNbOutputs() const override { return 1; } - int initialize() override { return 0; } + const char* getPluginType() const TRT_NOEXCEPT override { + return "hard_swish_plugin"; + } + int getNbOutputs() const TRT_NOEXCEPT override { return 1; } + int initialize() TRT_NOEXCEPT override { return 0; } nvinfer1::Dims getOutputDimensions(int index, const nvinfer1::Dims* inputs, - int nbInputDims) override; + int nbInputDims) TRT_NOEXCEPT override; #if IS_TRT_VERSION_LT(8000) int enqueue(int batchSize, const void* const* inputs, void** outputs, #else int enqueue(int batchSize, const void* const* inputs, void* const* outputs, #endif - void* workspace, cudaStream_t stream) override; + void* workspace, cudaStream_t stream) TRT_NOEXCEPT override; - size_t getSerializationSize() const override { + size_t getSerializationSize() const TRT_NOEXCEPT override { return getBaseSerializationSize() + SerializedSize(threshold_) + SerializedSize(scale_) + SerializedSize(offset_); } // TRT will call this func to serialize the configuration of TRT // It should not be called by users. - void serialize(void* buffer) const override { + void serialize(void* buffer) const TRT_NOEXCEPT override { serializeBase(buffer); SerializeValue(&buffer, threshold_); SerializeValue(&buffer, scale_); @@ -78,13 +80,15 @@ class HardSwishPlugin : public PluginTensorRT { class HardSwishPluginCreator : public TensorRTPluginCreator { public: - const char* getPluginName() const override { return "hard_swish_plugin"; } + const char* getPluginName() const TRT_NOEXCEPT override { + return "hard_swish_plugin"; + } - const char* getPluginVersion() const override { return "1"; } + const char* getPluginVersion() const TRT_NOEXCEPT override { return "1"; } - nvinfer1::IPluginV2* deserializePlugin(const char* name, - const void* serial_data, - size_t serial_length) override { + nvinfer1::IPluginV2* deserializePlugin( + const char* name, const void* serial_data, + size_t serial_length) TRT_NOEXCEPT override { return new HardSwishPlugin(serial_data, serial_length); } }; diff --git a/paddle/fluid/inference/tensorrt/plugin/instance_norm_op_plugin.cu b/paddle/fluid/inference/tensorrt/plugin/instance_norm_op_plugin.cu index 13aa6df643e82a..b7c4fb7c99acfd 100644 --- a/paddle/fluid/inference/tensorrt/plugin/instance_norm_op_plugin.cu +++ b/paddle/fluid/inference/tensorrt/plugin/instance_norm_op_plugin.cu @@ -39,10 +39,10 @@ cudnnStatus_t convert_trt2cudnn_dtype(nvinfer1::DataType trt_dtype, return CUDNN_STATUS_SUCCESS; } -int InstanceNormPlugin::initialize() { return 0; } +int InstanceNormPlugin::initialize() TRT_NOEXCEPT { return 0; } nvinfer1::Dims InstanceNormPlugin::getOutputDimensions( - int index, const nvinfer1::Dims *inputDims, int nbInputs) { + int index, const nvinfer1::Dims *inputDims, int nbInputs) TRT_NOEXCEPT { assert(nbInputs == 1); assert(index < this->getNbOutputs()); nvinfer1::Dims const &input_dims = inputDims[0]; @@ -50,8 +50,8 @@ nvinfer1::Dims InstanceNormPlugin::getOutputDimensions( return output_dims; } -bool InstanceNormPlugin::supportsFormat(nvinfer1::DataType type, - nvinfer1::PluginFormat format) const { +bool InstanceNormPlugin::supportsFormat( + nvinfer1::DataType type, nvinfer1::PluginFormat format) const TRT_NOEXCEPT { return ((type == nvinfer1::DataType::kFLOAT || type == nvinfer1::DataType::kHALF) && (format == nvinfer1::PluginFormat::kLINEAR)); @@ -63,7 +63,7 @@ int InstanceNormPlugin::enqueue(int batch_size, const void *const *inputs, #else void *const *outputs, void *workspace, #endif - cudaStream_t stream) { + cudaStream_t stream) TRT_NOEXCEPT { const auto &input_dims = this->getInputDims(0); PADDLE_ENFORCE_EQ(input_dims.nbDims, 3, diff --git a/paddle/fluid/inference/tensorrt/plugin/instance_norm_op_plugin.h b/paddle/fluid/inference/tensorrt/plugin/instance_norm_op_plugin.h index f9dab09beebd3a..8b1507256757fc 100644 --- a/paddle/fluid/inference/tensorrt/plugin/instance_norm_op_plugin.h +++ b/paddle/fluid/inference/tensorrt/plugin/instance_norm_op_plugin.h @@ -39,7 +39,7 @@ class InstanceNormPlugin : public PluginTensorRT { cudnnTensorDescriptor_t x_desc_, y_desc_, b_desc_; public: - size_t getSerializationSize() const override { + size_t getSerializationSize() const TRT_NOEXCEPT override { return getBaseSerializationSize() + SerializedSize(eps_) + SerializedSize(scale_) + SerializedSize(bias_); } @@ -47,7 +47,7 @@ class InstanceNormPlugin : public PluginTensorRT { // TRT will call this func when we need to serialize the configuration of // tensorrt. // It should not be called by users. - void serialize(void *buffer) const override { + void serialize(void *buffer) const TRT_NOEXCEPT override { serializeBase(buffer); SerializeValue(&buffer, eps_); SerializeValue(&buffer, scale_); @@ -89,37 +89,41 @@ class InstanceNormPlugin : public PluginTensorRT { platform::dynload::cudnnDestroyTensorDescriptor(b_desc_); } - int initialize() override; + int initialize() TRT_NOEXCEPT override; - InstanceNormPlugin *clone() const override { + InstanceNormPlugin *clone() const TRT_NOEXCEPT override { return new InstanceNormPlugin(eps_, scale_, bias_); } - const char *getPluginType() const override { return "instance_norm_plugin"; } - int getNbOutputs() const override { return 1; } + const char *getPluginType() const TRT_NOEXCEPT override { + return "instance_norm_plugin"; + } + int getNbOutputs() const TRT_NOEXCEPT override { return 1; } nvinfer1::Dims getOutputDimensions(int index, const nvinfer1::Dims *inputs, - int nbInputDims) override; + int nbInputDims) TRT_NOEXCEPT override; #if IS_TRT_VERSION_LT(8000) int enqueue(int batchSize, const void *const *inputs, void **outputs, #else int enqueue(int batchSize, const void *const *inputs, void *const *outputs, #endif - void *workspace, cudaStream_t stream) override; + void *workspace, cudaStream_t stream) TRT_NOEXCEPT override; - bool supportsFormat(nvinfer1::DataType type, - nvinfer1::PluginFormat format) const override; + bool supportsFormat(nvinfer1::DataType type, nvinfer1::PluginFormat format) + const TRT_NOEXCEPT override; }; class InstanceNormPluginCreator : public TensorRTPluginCreator { public: - const char *getPluginName() const override { return "instance_norm_plugin"; } + const char *getPluginName() const TRT_NOEXCEPT override { + return "instance_norm_plugin"; + } - const char *getPluginVersion() const override { return "1"; } + const char *getPluginVersion() const TRT_NOEXCEPT override { return "1"; } - nvinfer1::IPluginV2 *deserializePlugin(const char *name, - const void *serial_data, - size_t serial_length) override { + nvinfer1::IPluginV2 *deserializePlugin( + const char *name, const void *serial_data, + size_t serial_length) TRT_NOEXCEPT override { return new InstanceNormPlugin(serial_data, serial_length); } }; diff --git a/paddle/fluid/inference/tensorrt/plugin/layer_norm_op_plugin.cu b/paddle/fluid/inference/tensorrt/plugin/layer_norm_op_plugin.cu index 2688380726f78e..325aed89f29f01 100644 --- a/paddle/fluid/inference/tensorrt/plugin/layer_norm_op_plugin.cu +++ b/paddle/fluid/inference/tensorrt/plugin/layer_norm_op_plugin.cu @@ -24,10 +24,10 @@ namespace inference { namespace tensorrt { namespace plugin { -int LayerNormPlugin::initialize() { return 0; } +int LayerNormPlugin::initialize() TRT_NOEXCEPT { return 0; } nvinfer1::Dims LayerNormPlugin::getOutputDimensions( - int index, const nvinfer1::Dims *inputDims, int nbInputs) { + int index, const nvinfer1::Dims *inputDims, int nbInputs) TRT_NOEXCEPT { assert(nbInputs == 1); assert(index < this->getNbOutputs()); nvinfer1::Dims const &input_dims = inputDims[0]; @@ -41,10 +41,10 @@ int LayerNormPlugin::enqueue(int batch_size, const void *const *inputs, #else void *const *outputs, void *workspace, #endif - cudaStream_t stream) { + cudaStream_t stream) TRT_NOEXCEPT { const auto &input_dims = this->getInputDims(0); const float *input = reinterpret_cast(inputs[0]); - float *output = reinterpret_cast(outputs)[0]; + float *output = reinterpret_cast(outputs)[0]; int begin_norm_axis = begin_norm_axis_; float eps = eps_; @@ -91,13 +91,13 @@ int LayerNormPlugin::enqueue(int batch_size, const void *const *inputs, nvinfer1::DimsExprs LayerNormPluginDynamic::getOutputDimensions( int output_index, const nvinfer1::DimsExprs *inputDims, int nb_inputs, - nvinfer1::IExprBuilder &expr_builder) { + nvinfer1::IExprBuilder &expr_builder) TRT_NOEXCEPT { return inputDims[0]; } bool LayerNormPluginDynamic::supportsFormatCombination( int pos, const nvinfer1::PluginTensorDesc *in_out, int nb_inputs, - int nb_outputs) { + int nb_outputs) TRT_NOEXCEPT { PADDLE_ENFORCE_NOT_NULL( in_out, platform::errors::InvalidArgument( "The input of layernorm plugin shoule not be nullptr.")); @@ -118,7 +118,8 @@ bool LayerNormPluginDynamic::supportsFormatCombination( } nvinfer1::DataType LayerNormPluginDynamic::getOutputDataType( - int index, const nvinfer1::DataType *input_types, int nb_inputs) const { + int index, const nvinfer1::DataType *input_types, + int nb_inputs) const TRT_NOEXCEPT { PADDLE_ENFORCE_EQ(index, 0, platform::errors::InvalidArgument( "The LayerNormPlugin only has one input, so the " @@ -130,7 +131,7 @@ nvinfer1::DataType LayerNormPluginDynamic::getOutputDataType( int LayerNormPluginDynamic::enqueue( const nvinfer1::PluginTensorDesc *input_desc, const nvinfer1::PluginTensorDesc *output_desc, const void *const *inputs, - void *const *outputs, void *workspace, cudaStream_t stream) { + void *const *outputs, void *workspace, cudaStream_t stream) TRT_NOEXCEPT { const auto &input_dims = input_desc[0].dims; int begin_norm_axis = begin_norm_axis_; float eps = eps_; diff --git a/paddle/fluid/inference/tensorrt/plugin/layer_norm_op_plugin.h b/paddle/fluid/inference/tensorrt/plugin/layer_norm_op_plugin.h index caa3c21db63fab..9e8ce302833731 100644 --- a/paddle/fluid/inference/tensorrt/plugin/layer_norm_op_plugin.h +++ b/paddle/fluid/inference/tensorrt/plugin/layer_norm_op_plugin.h @@ -40,7 +40,7 @@ class LayerNormPlugin : public PluginTensorRT { std::vector variance_shape_; public: - size_t getSerializationSize() const override { + size_t getSerializationSize() const TRT_NOEXCEPT override { return getBaseSerializationSize() + SerializedSize(bias_) + SerializedSize(scale_) + SerializedSize(begin_norm_axis_) + SerializedSize(eps_) + SerializedSize(mean_shape_) + @@ -50,7 +50,7 @@ class LayerNormPlugin : public PluginTensorRT { // TRT will call this func when we need to serialize the configuration of // tensorrt. // It should not be called by users. - void serialize(void* buffer) const override { + void serialize(void* buffer) const TRT_NOEXCEPT override { serializeBase(buffer); SerializeValue(&buffer, bias_); SerializeValue(&buffer, scale_); @@ -86,35 +86,39 @@ class LayerNormPlugin : public PluginTensorRT { DeserializeValue(&serialData, &serialLength, &variance_shape_); } ~LayerNormPlugin() {} - int initialize() override; + int initialize() TRT_NOEXCEPT override; - LayerNormPlugin* clone() const override { + LayerNormPlugin* clone() const TRT_NOEXCEPT override { return new LayerNormPlugin(bias_.data(), bias_.size(), scale_.data(), scale_.size(), begin_norm_axis_, eps_, mean_shape_, variance_shape_); } - const char* getPluginType() const override { return "layernorm_plugin"; } - int getNbOutputs() const override { return 1; } + const char* getPluginType() const TRT_NOEXCEPT override { + return "layernorm_plugin"; + } + int getNbOutputs() const TRT_NOEXCEPT override { return 1; } nvinfer1::Dims getOutputDimensions(int index, const nvinfer1::Dims* inputs, - int nbInputDims) override; + int nbInputDims) TRT_NOEXCEPT override; #if IS_TRT_VERSION_LT(8000) int enqueue(int batchSize, const void* const* inputs, void** outputs, #else int enqueue(int batchSize, const void* const* inputs, void* const* outputs, #endif - void* workspace, cudaStream_t stream) override; + void* workspace, cudaStream_t stream) TRT_NOEXCEPT override; }; class LayerNormPluginCreator : public TensorRTPluginCreator { public: - const char* getPluginName() const override { return "layernorm_plugin"; } + const char* getPluginName() const TRT_NOEXCEPT override { + return "layernorm_plugin"; + } - const char* getPluginVersion() const override { return "1"; } + const char* getPluginVersion() const TRT_NOEXCEPT override { return "1"; } - nvinfer1::IPluginV2* deserializePlugin(const char* name, - const void* serial_data, - size_t serial_length) override { + nvinfer1::IPluginV2* deserializePlugin( + const char* name, const void* serial_data, + size_t serial_length) TRT_NOEXCEPT override { return new LayerNormPlugin(serial_data, serial_length); } }; @@ -145,25 +149,25 @@ class LayerNormPluginDynamic : public DynamicPluginTensorRT { DeserializeValue(&serialData, &serialLength, &mean_shape_); DeserializeValue(&serialData, &serialLength, &variance_shape_); } - nvinfer1::IPluginV2DynamicExt* clone() const override { + nvinfer1::IPluginV2DynamicExt* clone() const TRT_NOEXCEPT override { return new LayerNormPluginDynamic(bias_.data(), bias_.size(), scale_.data(), scale_.size(), begin_norm_axis_, eps_, mean_shape_, variance_shape_); } - const char* getPluginType() const override { + const char* getPluginType() const TRT_NOEXCEPT override { return "layernorm_plugin_dynamic"; } - int getNbOutputs() const override { return 1; } - int initialize() override { return 0; } + int getNbOutputs() const TRT_NOEXCEPT override { return 1; } + int initialize() TRT_NOEXCEPT override { return 0; } - size_t getSerializationSize() const override { + size_t getSerializationSize() const TRT_NOEXCEPT override { return SerializedSize(bias_) + SerializedSize(scale_) + SerializedSize(begin_norm_axis_) + SerializedSize(eps_) + SerializedSize(mean_shape_) + SerializedSize(variance_shape_); } - void serialize(void* buffer) const override { + void serialize(void* buffer) const TRT_NOEXCEPT override { SerializeValue(&buffer, bias_); SerializeValue(&buffer, scale_); SerializeValue(&buffer, begin_norm_axis_); @@ -174,33 +178,34 @@ class LayerNormPluginDynamic : public DynamicPluginTensorRT { nvinfer1::DimsExprs getOutputDimensions( int output_index, const nvinfer1::DimsExprs* inputs, int nb_inputs, - nvinfer1::IExprBuilder& expr_builder) override; + nvinfer1::IExprBuilder& expr_builder) TRT_NOEXCEPT override; bool supportsFormatCombination(int pos, const nvinfer1::PluginTensorDesc* inOut, - int nbInputs, int nbOutputs) override; + int nbInputs, + int nbOutputs) TRT_NOEXCEPT override; void configurePlugin(const nvinfer1::DynamicPluginTensorDesc* in, int nbInputs, const nvinfer1::DynamicPluginTensorDesc* out, - int nbOutputs) override {} + int nbOutputs) TRT_NOEXCEPT override {} size_t getWorkspaceSize(const nvinfer1::PluginTensorDesc* inputs, int nbInputs, const nvinfer1::PluginTensorDesc* outputs, - int nbOutputs) const override { + int nbOutputs) const TRT_NOEXCEPT override { return 0; } int enqueue(const nvinfer1::PluginTensorDesc* inputDesc, const nvinfer1::PluginTensorDesc* outputDesc, const void* const* inputs, void* const* outputs, void* workspace, - cudaStream_t stream) override; - nvinfer1::DataType getOutputDataType(int index, - const nvinfer1::DataType* inputTypes, - int nbInputs) const override; + cudaStream_t stream) TRT_NOEXCEPT override; + nvinfer1::DataType getOutputDataType( + int index, const nvinfer1::DataType* inputTypes, + int nbInputs) const TRT_NOEXCEPT override; - void destroy() override { delete this; } + void destroy() TRT_NOEXCEPT override { delete this; } private: std::vector bias_; @@ -217,15 +222,15 @@ class LayerNormPluginDynamic : public DynamicPluginTensorRT { class LayerNormPluginDynamicCreator : public TensorRTPluginCreator { public: - const char* getPluginName() const override { + const char* getPluginName() const TRT_NOEXCEPT override { return "layernorm_plugin_dynamic"; } - const char* getPluginVersion() const override { return "1"; } + const char* getPluginVersion() const TRT_NOEXCEPT override { return "1"; } - nvinfer1::IPluginV2* deserializePlugin(const char* name, - const void* serial_data, - size_t serial_length) override { + nvinfer1::IPluginV2* deserializePlugin( + const char* name, const void* serial_data, + size_t serial_length) TRT_NOEXCEPT override { return new LayerNormPluginDynamic(serial_data, serial_length); } }; diff --git a/paddle/fluid/inference/tensorrt/plugin/pool_op_plugin.cu b/paddle/fluid/inference/tensorrt/plugin/pool_op_plugin.cu index 7e1d18227e2325..21c8812f3789e3 100644 --- a/paddle/fluid/inference/tensorrt/plugin/pool_op_plugin.cu +++ b/paddle/fluid/inference/tensorrt/plugin/pool_op_plugin.cu @@ -22,7 +22,7 @@ namespace plugin { nvinfer1::Dims PoolPlugin::getOutputDimensions(int index, const nvinfer1::Dims *inputDims, - int nbInputs) { + int nbInputs) TRT_NOEXCEPT { assert(nbInputs == 1); assert(index == 0); assert(inputDims[0].nbDims == 3); @@ -37,15 +37,16 @@ nvinfer1::Dims PoolPlugin::getOutputDimensions(int index, int PoolPlugin::enqueue(int batchSize, const void *const *inputs, #if IS_TRT_VERSION_LT(8000) - void **outputs, void *workspace, cudaStream_t stream) { + void **outputs, void *workspace, + cudaStream_t stream) TRT_NOEXCEPT { #else void *const *outputs, void *workspace, - cudaStream_t stream) { + cudaStream_t stream) TRT_NOEXCEPT { #endif auto const &input_dims = this->getInputDims(0); int input_size = 0; float const *idata = reinterpret_cast(inputs[0]); - float **odatas = reinterpret_cast(outputs); + float *const *odatas = reinterpret_cast(outputs); std::vector input_shape = input_shape_; std::vector output_shape = output_shape_; @@ -87,14 +88,14 @@ PoolPluginDynamic::PoolPluginDynamic(void const *serialData, DeserializeValue(&serialData, &serialLength, &is_global_); } -size_t PoolPluginDynamic::getSerializationSize() const { +size_t PoolPluginDynamic::getSerializationSize() const TRT_NOEXCEPT { return SerializedSize(ceil_mode_) + SerializedSize(pool_type_.c_str()) + SerializedSize(adaptive_) + SerializedSize(ksize_) + SerializedSize(strides_) + SerializedSize(paddings_) + SerializedSize(is_global_); } -void PoolPluginDynamic::serialize(void *buffer) const { +void PoolPluginDynamic::serialize(void *buffer) const TRT_NOEXCEPT { SerializeValue(&buffer, ceil_mode_); SerializeValue(&buffer, pool_type_.c_str()); SerializeValue(&buffer, adaptive_); @@ -106,7 +107,7 @@ void PoolPluginDynamic::serialize(void *buffer) const { nvinfer1::DimsExprs PoolPluginDynamic::getOutputDimensions( int output_index, const nvinfer1::DimsExprs *inputs, int nb_inputs, - nvinfer1::IExprBuilder &expr_builder) { + nvinfer1::IExprBuilder &expr_builder) TRT_NOEXCEPT { PADDLE_ENFORCE_EQ(nb_inputs, 1, platform::errors::InvalidArgument( "The Split plugin should be only one input.")); @@ -181,7 +182,7 @@ nvinfer1::DimsExprs PoolPluginDynamic::getOutputDimensions( bool PoolPluginDynamic::supportsFormatCombination( int pos, const nvinfer1::PluginTensorDesc *in_out, int nb_inputs, - int nb_outputs) { + int nb_outputs) TRT_NOEXCEPT { PADDLE_ENFORCE_NOT_NULL( in_out, platform::errors::InvalidArgument( "The input of swish plugin shoule not be nullptr.")); @@ -198,7 +199,8 @@ bool PoolPluginDynamic::supportsFormatCombination( } nvinfer1::DataType PoolPluginDynamic::getOutputDataType( - int index, const nvinfer1::DataType *input_types, int nb_inputs) const { + int index, const nvinfer1::DataType *input_types, + int nb_inputs) const TRT_NOEXCEPT { PADDLE_ENFORCE_EQ(index, 0, platform::errors::InvalidArgument( "The Pool Plugin only has one input, so the " "index value should be 0, but get %d.", @@ -212,7 +214,8 @@ nvinfer1::DataType PoolPluginDynamic::getOutputDataType( int PoolPluginDynamic::enqueue(const nvinfer1::PluginTensorDesc *input_desc, const nvinfer1::PluginTensorDesc *output_desc, const void *const *inputs, void *const *outputs, - void *workspace, cudaStream_t stream) { + void *workspace, + cudaStream_t stream) TRT_NOEXCEPT { auto input_dims = input_desc[0].dims; int n = input_dims.d[0]; int c = input_dims.d[1]; diff --git a/paddle/fluid/inference/tensorrt/plugin/pool_op_plugin.h b/paddle/fluid/inference/tensorrt/plugin/pool_op_plugin.h index 7c12796805c5d1..6ced066a35952f 100644 --- a/paddle/fluid/inference/tensorrt/plugin/pool_op_plugin.h +++ b/paddle/fluid/inference/tensorrt/plugin/pool_op_plugin.h @@ -57,7 +57,7 @@ static std::vector CalcOutputSize(const std::vector& input_shape, class PoolPlugin : public PluginTensorRT { public: - size_t getSerializationSize() const override { + size_t getSerializationSize() const TRT_NOEXCEPT override { return getBaseSerializationSize() + SerializedSize(ceil_mode_) + SerializedSize(pool_type_) + SerializedSize(adaptive_) + SerializedSize(ksize_) + SerializedSize(strides_) + @@ -67,7 +67,7 @@ class PoolPlugin : public PluginTensorRT { // TRT will call this func when we need to serialize the configuration of // tensorrt. - void serialize(void* buffer) const override { + void serialize(void* buffer) const TRT_NOEXCEPT override { serializeBase(buffer); SerializeValue(&buffer, ceil_mode_); SerializeValue(&buffer, pool_type_); @@ -116,22 +116,24 @@ class PoolPlugin : public PluginTensorRT { DeserializeValue(&serialData, &serialLength, &output_shape_); } - PoolPlugin* clone() const override { + PoolPlugin* clone() const TRT_NOEXCEPT override { return new PoolPlugin(ceil_mode_, pool_type_, adaptive_, ksize_, strides_, paddings_, input_shape_); } - const char* getPluginType() const override { return "pool_plugin"; } - int getNbOutputs() const override { return 1; } + const char* getPluginType() const TRT_NOEXCEPT override { + return "pool_plugin"; + } + int getNbOutputs() const TRT_NOEXCEPT override { return 1; } nvinfer1::Dims getOutputDimensions(int index, const nvinfer1::Dims* inputs, - int nbInputDims) override; - int initialize() override { return 0; } + int nbInputDims) TRT_NOEXCEPT override; + int initialize() TRT_NOEXCEPT override { return 0; } #if IS_TRT_VERSION_LT(8000) int enqueue(int batchSize, const void* const* inputs, void** outputs, #else int enqueue(int batchSize, const void* const* inputs, void* const* outputs, #endif - void* workspace, cudaStream_t stream) override; + void* workspace, cudaStream_t stream) TRT_NOEXCEPT override; private: bool ceil_mode_; @@ -146,13 +148,15 @@ class PoolPlugin : public PluginTensorRT { class PoolPluginCreator : public TensorRTPluginCreator { public: - const char* getPluginName() const override { return "pool_plugin"; } + const char* getPluginName() const TRT_NOEXCEPT override { + return "pool_plugin"; + } - const char* getPluginVersion() const override { return "1"; } + const char* getPluginVersion() const TRT_NOEXCEPT override { return "1"; } - nvinfer1::IPluginV2* deserializePlugin(const char* name, - const void* serial_data, - size_t serial_length) override { + nvinfer1::IPluginV2* deserializePlugin( + const char* name, const void* serial_data, + size_t serial_length) TRT_NOEXCEPT override { return new PoolPlugin(serial_data, serial_length); } }; @@ -176,47 +180,50 @@ class PoolPluginDynamic : public DynamicPluginTensorRT { PoolPluginDynamic(void const* serialData, size_t serialLength); ~PoolPluginDynamic() {} - nvinfer1::IPluginV2DynamicExt* clone() const override { + nvinfer1::IPluginV2DynamicExt* clone() const TRT_NOEXCEPT override { return new PoolPluginDynamic(ceil_mode_, pool_type_, adaptive_, ksize_, strides_, paddings_, is_global_); } - const char* getPluginType() const override { return "pool_plugin_dynamic"; } - int getNbOutputs() const override { return 1; } - int initialize() override { return 0; } + const char* getPluginType() const TRT_NOEXCEPT override { + return "pool_plugin_dynamic"; + } + int getNbOutputs() const TRT_NOEXCEPT override { return 1; } + int initialize() TRT_NOEXCEPT override { return 0; } - size_t getSerializationSize() const override; - void serialize(void* buffer) const override; + size_t getSerializationSize() const TRT_NOEXCEPT override; + void serialize(void* buffer) const TRT_NOEXCEPT override; nvinfer1::DimsExprs getOutputDimensions( int output_index, const nvinfer1::DimsExprs* inputs, int nb_inputs, - nvinfer1::IExprBuilder& expr_builder) override; + nvinfer1::IExprBuilder& expr_builder) TRT_NOEXCEPT override; bool supportsFormatCombination(int pos, const nvinfer1::PluginTensorDesc* inOut, - int nbInputs, int nbOutputs) override; + int nbInputs, + int nbOutputs) TRT_NOEXCEPT override; void configurePlugin(const nvinfer1::DynamicPluginTensorDesc* in, int nbInputs, const nvinfer1::DynamicPluginTensorDesc* out, - int nbOutputs) override {} + int nbOutputs) TRT_NOEXCEPT override {} size_t getWorkspaceSize(const nvinfer1::PluginTensorDesc* inputs, int nbInputs, const nvinfer1::PluginTensorDesc* outputs, - int nbOutputs) const override { + int nbOutputs) const TRT_NOEXCEPT override { return 0; } int enqueue(const nvinfer1::PluginTensorDesc* inputDesc, const nvinfer1::PluginTensorDesc* outputDesc, const void* const* inputs, void* const* outputs, void* workspace, - cudaStream_t stream) override; - nvinfer1::DataType getOutputDataType(int index, - const nvinfer1::DataType* inputTypes, - int nbInputs) const override; + cudaStream_t stream) TRT_NOEXCEPT override; + nvinfer1::DataType getOutputDataType( + int index, const nvinfer1::DataType* inputTypes, + int nbInputs) const TRT_NOEXCEPT override; - void destroy() override { delete this; } + void destroy() TRT_NOEXCEPT override { delete this; } private: bool ceil_mode_; @@ -230,13 +237,15 @@ class PoolPluginDynamic : public DynamicPluginTensorRT { class PoolPluginDynamicCreator : public TensorRTPluginCreator { public: - const char* getPluginName() const override { return "pool_plugin_dynamic"; } + const char* getPluginName() const TRT_NOEXCEPT override { + return "pool_plugin_dynamic"; + } - const char* getPluginVersion() const override { return "1"; } + const char* getPluginVersion() const TRT_NOEXCEPT override { return "1"; } - nvinfer1::IPluginV2* deserializePlugin(const char* name, - const void* serial_data, - size_t serial_length) override { + nvinfer1::IPluginV2* deserializePlugin( + const char* name, const void* serial_data, + size_t serial_length) TRT_NOEXCEPT override { return new PoolPluginDynamic(serial_data, serial_length); } }; diff --git a/paddle/fluid/inference/tensorrt/plugin/prelu_op_plugin.cu b/paddle/fluid/inference/tensorrt/plugin/prelu_op_plugin.cu index 1882084a8f5169..5533fb0af3fc4f 100644 --- a/paddle/fluid/inference/tensorrt/plugin/prelu_op_plugin.cu +++ b/paddle/fluid/inference/tensorrt/plugin/prelu_op_plugin.cu @@ -26,14 +26,14 @@ namespace inference { namespace tensorrt { namespace plugin { -int PReluPlugin::initialize() { +int PReluPlugin::initialize() TRT_NOEXCEPT { cudaMalloc(&p_gpu_weight_, sizeof(float) * weight_.size()); cudaMemcpy(p_gpu_weight_, weight_.data(), weight_.size() * sizeof(float), cudaMemcpyHostToDevice); return 0; } -void PReluPlugin::terminate() { +void PReluPlugin::terminate() TRT_NOEXCEPT { if (p_gpu_weight_) { cudaFree(p_gpu_weight_); p_gpu_weight_ = nullptr; @@ -42,7 +42,7 @@ void PReluPlugin::terminate() { nvinfer1::Dims PReluPlugin::getOutputDimensions(int index, const nvinfer1::Dims *inputDims, - int nbInputs) { + int nbInputs) TRT_NOEXCEPT { assert(nbInputs == 1); assert(index < this->getNbOutputs()); nvinfer1::Dims const &input_dims = inputDims[0]; @@ -55,14 +55,14 @@ int PReluPlugin::enqueue(int batch_size, const void *const *inputs, void **outputs, void *workspace, cudaStream_t stream) { #else void *const *outputs, void *workspace, - cudaStream_t stream) { + cudaStream_t stream) TRT_NOEXCEPT { #endif // input dims is CHW. const auto &input_dims = this->getInputDims(0); const float *input = reinterpret_cast(inputs[0]); // const float *alpha = reinterpret_cast(alpha_.get().values); const float *alpha = p_gpu_weight_; - float *output = reinterpret_cast(outputs)[0]; + float *const output = reinterpret_cast(outputs)[0]; int numel = 1; for (int i = 0; i < input_dims.nbDims; i++) { numel *= input_dims.d[i]; @@ -86,13 +86,13 @@ int PReluPlugin::enqueue(int batch_size, const void *const *inputs, #if IS_TRT_VERSION_GE(6000) -void PReluPluginDynamic::terminate() { +void PReluPluginDynamic::terminate() TRT_NOEXCEPT { if (p_gpu_weight_) { cudaFree(p_gpu_weight_); } } -int PReluPluginDynamic::initialize() { +int PReluPluginDynamic::initialize() TRT_NOEXCEPT { cudaMalloc(&p_gpu_weight_, sizeof(float) * weight_.size()); cudaMemcpy(p_gpu_weight_, weight_.data(), weight_.size() * sizeof(float), cudaMemcpyHostToDevice); @@ -107,24 +107,24 @@ PReluPluginDynamic::PReluPluginDynamic(void const *serialData, mode_ = std::string(prelu_mode); } -size_t PReluPluginDynamic::getSerializationSize() const { +size_t PReluPluginDynamic::getSerializationSize() const TRT_NOEXCEPT { return SerializedSize(mode_.c_str()) + SerializedSize(weight_); } -void PReluPluginDynamic::serialize(void *buffer) const { +void PReluPluginDynamic::serialize(void *buffer) const TRT_NOEXCEPT { SerializeValue(&buffer, weight_); SerializeValue(&buffer, mode_.c_str()); } nvinfer1::DimsExprs PReluPluginDynamic::getOutputDimensions( int output_index, const nvinfer1::DimsExprs *inputs, int nb_inputs, - nvinfer1::IExprBuilder &expr_builder) { + nvinfer1::IExprBuilder &expr_builder) TRT_NOEXCEPT { return inputs[0]; } bool PReluPluginDynamic::supportsFormatCombination( int pos, const nvinfer1::PluginTensorDesc *in_out, int nb_inputs, - int nb_outputs) { + int nb_outputs) TRT_NOEXCEPT { PADDLE_ENFORCE_NOT_NULL( in_out, platform::errors::InvalidArgument( "The input of swish plugin shoule not be nullptr.")); @@ -141,7 +141,8 @@ bool PReluPluginDynamic::supportsFormatCombination( } nvinfer1::DataType PReluPluginDynamic::getOutputDataType( - int index, const nvinfer1::DataType *input_types, int nb_inputs) const { + int index, const nvinfer1::DataType *input_types, + int nb_inputs) const TRT_NOEXCEPT { PADDLE_ENFORCE_EQ(index, 0, platform::errors::InvalidArgument( "The PRelu Plugin only has one input, so the " "index value should be 0, but get %d.", @@ -155,7 +156,8 @@ nvinfer1::DataType PReluPluginDynamic::getOutputDataType( int PReluPluginDynamic::enqueue(const nvinfer1::PluginTensorDesc *input_desc, const nvinfer1::PluginTensorDesc *output_desc, const void *const *inputs, void *const *outputs, - void *workspace, cudaStream_t stream) { + void *workspace, + cudaStream_t stream) TRT_NOEXCEPT { auto input_dims = input_desc[0].dims; const float *alpha = p_gpu_weight_; const float *input = static_cast(inputs[0]); diff --git a/paddle/fluid/inference/tensorrt/plugin/prelu_op_plugin.h b/paddle/fluid/inference/tensorrt/plugin/prelu_op_plugin.h index e3f05bdbe85a1b..c61b07e22d6eea 100644 --- a/paddle/fluid/inference/tensorrt/plugin/prelu_op_plugin.h +++ b/paddle/fluid/inference/tensorrt/plugin/prelu_op_plugin.h @@ -34,7 +34,7 @@ class PReluPlugin : public PluginTensorRT { std::string mode_; public: - size_t getSerializationSize() const override { + size_t getSerializationSize() const TRT_NOEXCEPT override { return getBaseSerializationSize() + SerializedSize(mode_.c_str()) + SerializedSize(weight_); } @@ -42,7 +42,7 @@ class PReluPlugin : public PluginTensorRT { // TRT will call this func when we need to serialize the configuration of // tensorrt. // It should not be called by users. - void serialize(void* buffer) const override { + void serialize(void* buffer) const TRT_NOEXCEPT override { serializeBase(buffer); SerializeValue(&buffer, weight_); SerializeValue(&buffer, mode_.c_str()); @@ -65,36 +65,40 @@ class PReluPlugin : public PluginTensorRT { mode_ = std::string(prelu_mode); } ~PReluPlugin() {} - int initialize() override; - void terminate() override; + int initialize() TRT_NOEXCEPT override; + void terminate() TRT_NOEXCEPT override; - PReluPlugin* clone() const override { + PReluPlugin* clone() const TRT_NOEXCEPT override { auto* ptr = new PReluPlugin(weight_.data(), weight_.size(), mode_); ptr->p_gpu_weight_ = p_gpu_weight_; return ptr; } - const char* getPluginType() const override { return "prelu_plugin"; } - int getNbOutputs() const override { return 1; } + const char* getPluginType() const TRT_NOEXCEPT override { + return "prelu_plugin"; + } + int getNbOutputs() const TRT_NOEXCEPT override { return 1; } nvinfer1::Dims getOutputDimensions(int index, const nvinfer1::Dims* inputs, - int nbInputDims) override; + int nbInputDims) TRT_NOEXCEPT override; #if IS_TRT_VERSION_LT(8000) int enqueue(int batchSize, const void* const* inputs, void** outputs, #else int enqueue(int batchSize, const void* const* inputs, void* const* outputs, #endif - void* workspace, cudaStream_t stream) override; + void* workspace, cudaStream_t stream) TRT_NOEXCEPT override; }; class PReluPluginCreator : public TensorRTPluginCreator { public: - const char* getPluginName() const override { return "prelu_plugin"; } + const char* getPluginName() const TRT_NOEXCEPT override { + return "prelu_plugin"; + } - const char* getPluginVersion() const override { return "1"; } + const char* getPluginVersion() const TRT_NOEXCEPT override { return "1"; } - nvinfer1::IPluginV2* deserializePlugin(const char* name, - const void* serial_data, - size_t serial_length) override { + nvinfer1::IPluginV2* deserializePlugin( + const char* name, const void* serial_data, + size_t serial_length) TRT_NOEXCEPT override { return new PReluPlugin(serial_data, serial_length); } }; @@ -112,49 +116,52 @@ class PReluPluginDynamic : public DynamicPluginTensorRT { PReluPluginDynamic(void const* serialData, size_t serialLength); ~PReluPluginDynamic() {} - nvinfer1::IPluginV2DynamicExt* clone() const override { + nvinfer1::IPluginV2DynamicExt* clone() const TRT_NOEXCEPT override { auto ptr = new PReluPluginDynamic(weight_.data(), weight_.size(), mode_); ptr->p_gpu_weight_ = p_gpu_weight_; return ptr; } - const char* getPluginType() const override { return "prelu_plugin_dynamic"; } - int getNbOutputs() const override { return 1; } - int initialize() override; - void terminate() override; + const char* getPluginType() const TRT_NOEXCEPT override { + return "prelu_plugin_dynamic"; + } + int getNbOutputs() const TRT_NOEXCEPT override { return 1; } + int initialize() TRT_NOEXCEPT override; + void terminate() TRT_NOEXCEPT override; - size_t getSerializationSize() const override; - void serialize(void* buffer) const override; + size_t getSerializationSize() const TRT_NOEXCEPT override; + void serialize(void* buffer) const TRT_NOEXCEPT override; nvinfer1::DimsExprs getOutputDimensions( int output_index, const nvinfer1::DimsExprs* inputs, int nb_inputs, - nvinfer1::IExprBuilder& expr_builder) override; + nvinfer1::IExprBuilder& expr_builder) TRT_NOEXCEPT override; bool supportsFormatCombination(int pos, const nvinfer1::PluginTensorDesc* inOut, - int nbInputs, int nbOutputs) override; + int nbInputs, + int nbOutputs) TRT_NOEXCEPT override; void configurePlugin(const nvinfer1::DynamicPluginTensorDesc* in, int nbInputs, const nvinfer1::DynamicPluginTensorDesc* out, - int nbOutputs) override {} + int nbOutputs) TRT_NOEXCEPT override {} size_t getWorkspaceSize(const nvinfer1::PluginTensorDesc* inputs, int nbInputs, const nvinfer1::PluginTensorDesc* outputs, - int nbOutputs) const override { + int nbOutputs) const TRT_NOEXCEPT override { return 0; } int enqueue(const nvinfer1::PluginTensorDesc* inputDesc, const nvinfer1::PluginTensorDesc* outputDesc, const void* const* inputs, void* const* outputs, void* workspace, - cudaStream_t stream) override; - nvinfer1::DataType getOutputDataType(int index, - const nvinfer1::DataType* inputTypes, - int nbInputs) const override; + cudaStream_t stream) TRT_NOEXCEPT override; + nvinfer1::DataType getOutputDataType( + int index, const nvinfer1::DataType* inputTypes, + int nbInputs) const TRT_NOEXCEPT override; - void destroy() override { delete this; } + void destroy() TRT_NOEXCEPT override { delete this; } private: std::vector weight_; @@ -165,13 +172,15 @@ class PReluPluginDynamic : public DynamicPluginTensorRT { class PReluPluginDynamicCreator : public TensorRTPluginCreator { public: - const char* getPluginName() const override { return "prelu_plugin_dynamic"; } + const char* getPluginName() const TRT_NOEXCEPT override { + return "prelu_plugin_dynamic"; + } - const char* getPluginVersion() const override { return "1"; } + const char* getPluginVersion() const TRT_NOEXCEPT override { return "1"; } - nvinfer1::IPluginV2* deserializePlugin(const char* name, - const void* serial_data, - size_t serial_length) override { + nvinfer1::IPluginV2* deserializePlugin( + const char* name, const void* serial_data, + size_t serial_length) TRT_NOEXCEPT override { return new PReluPluginDynamic(serial_data, serial_length); } }; diff --git a/paddle/fluid/inference/tensorrt/plugin/qkv_to_context_plugin.cu b/paddle/fluid/inference/tensorrt/plugin/qkv_to_context_plugin.cu index 0d9e5417263f3b..0d978939c4bf35 100644 --- a/paddle/fluid/inference/tensorrt/plugin/qkv_to_context_plugin.cu +++ b/paddle/fluid/inference/tensorrt/plugin/qkv_to_context_plugin.cu @@ -147,11 +147,11 @@ inline void TransposeQKV(const int batch, const int seq_len, } } -int QkvToContextPluginDynamic::initialize() { return 0; } +int QkvToContextPluginDynamic::initialize() TRT_NOEXCEPT { return 0; } nvinfer1::DimsExprs QkvToContextPluginDynamic::getOutputDimensions( int output_index, const nvinfer1::DimsExprs *inputs, int nb_inputs, - nvinfer1::IExprBuilder &expr_builder) { + nvinfer1::IExprBuilder &expr_builder) TRT_NOEXCEPT { // input[0], (B, S, 3 * N * H, 1, 1) // input[1], (B, head_num, seq_len, seq_len) // output, (B, seq_len, hidden) @@ -177,7 +177,7 @@ nvinfer1::DimsExprs QkvToContextPluginDynamic::getOutputDimensions( bool QkvToContextPluginDynamic::supportsFormatCombination( int pos, const nvinfer1::PluginTensorDesc *in_out, int nb_inputs, - int nb_outputs) { + int nb_outputs) TRT_NOEXCEPT { PADDLE_ENFORCE_NOT_NULL( in_out, platform::errors::InvalidArgument( "The input of swish plugin shoule not be nullptr.")); @@ -215,7 +215,8 @@ bool QkvToContextPluginDynamic::supportsFormatCombination( } nvinfer1::DataType QkvToContextPluginDynamic::getOutputDataType( - int index, const nvinfer1::DataType *input_types, int nb_inputs) const { + int index, const nvinfer1::DataType *input_types, + int nb_inputs) const TRT_NOEXCEPT { PADDLE_ENFORCE_EQ( index, 0, platform::errors::InvalidArgument( "The EmbEltwiseLayernorm Plugin only has one input, so the " @@ -235,7 +236,7 @@ __global__ void apply_scale(T *data, T scale, int n) { int QkvToContextPluginDynamic::enqueue( const nvinfer1::PluginTensorDesc *input_desc, const nvinfer1::PluginTensorDesc *output_desc, const void *const *inputs, - void *const *outputs, void *workspace, cudaStream_t stream) { + void *const *outputs, void *workspace, cudaStream_t stream) TRT_NOEXCEPT { auto input_dims = input_desc[0].dims; int input_num = ProductDim(input_dims); // input[0], (B, S, 3 * N * H, 1, 1) diff --git a/paddle/fluid/inference/tensorrt/plugin/qkv_to_context_plugin.h b/paddle/fluid/inference/tensorrt/plugin/qkv_to_context_plugin.h index 7147d9855755be..501c17b2858d6f 100644 --- a/paddle/fluid/inference/tensorrt/plugin/qkv_to_context_plugin.h +++ b/paddle/fluid/inference/tensorrt/plugin/qkv_to_context_plugin.h @@ -59,21 +59,23 @@ class QkvToContextPluginDynamic : public DynamicPluginTensorRT { DeserializeValue(&serial_data, &serial_length, &scale_); DeserializeValue(&serial_data, &serial_length, &with_fp16_); } - nvinfer1::IPluginV2DynamicExt* clone() const override { + nvinfer1::IPluginV2DynamicExt* clone() const TRT_NOEXCEPT override { return new QkvToContextPluginDynamic(hidden_, head_number_, head_size_, scale_, with_fp16_); } - const char* getPluginType() const override { return "qkv_to_context_plugin"; } - int getNbOutputs() const override { return 1; } - int initialize() override; + const char* getPluginType() const TRT_NOEXCEPT override { + return "qkv_to_context_plugin"; + } + int getNbOutputs() const TRT_NOEXCEPT override { return 1; } + int initialize() TRT_NOEXCEPT override; - size_t getSerializationSize() const override { + size_t getSerializationSize() const TRT_NOEXCEPT override { return SerializedSize(hidden_) + SerializedSize(head_number_) + SerializedSize(head_size_) + SerializedSize(scale_) + SerializedSize(with_fp16_); } - void serialize(void* buffer) const override { + void serialize(void* buffer) const TRT_NOEXCEPT override { SerializeValue(&buffer, hidden_); SerializeValue(&buffer, head_number_); SerializeValue(&buffer, head_size_); @@ -83,33 +85,34 @@ class QkvToContextPluginDynamic : public DynamicPluginTensorRT { nvinfer1::DimsExprs getOutputDimensions( int output_index, const nvinfer1::DimsExprs* inputs, int nb_inputs, - nvinfer1::IExprBuilder& expr_builder) override; + nvinfer1::IExprBuilder& expr_builder) TRT_NOEXCEPT override; bool supportsFormatCombination(int pos, const nvinfer1::PluginTensorDesc* in_out, - int nb_inputs, int nb_outputs) override; + int nb_inputs, + int nb_outputs) TRT_NOEXCEPT override; void configurePlugin(const nvinfer1::DynamicPluginTensorDesc* in, int nb_inputs, const nvinfer1::DynamicPluginTensorDesc* out, - int nb_outputs) override {} + int nb_outputs) TRT_NOEXCEPT override {} size_t getWorkspaceSize(const nvinfer1::PluginTensorDesc* inputs, int nb_inputs, const nvinfer1::PluginTensorDesc* outputs, - int nb_outputs) const override { + int nb_outputs) const TRT_NOEXCEPT override { return 0; } int enqueue(const nvinfer1::PluginTensorDesc* inputDesc, const nvinfer1::PluginTensorDesc* outputDesc, const void* const* inputs, void* const* outputs, void* workspace, - cudaStream_t stream) override; - nvinfer1::DataType getOutputDataType(int index, - const nvinfer1::DataType* input_types, - int nb_inputs) const override; + cudaStream_t stream) TRT_NOEXCEPT override; + nvinfer1::DataType getOutputDataType( + int index, const nvinfer1::DataType* input_types, + int nb_inputs) const TRT_NOEXCEPT override; - void destroy() override { delete this; } + void destroy() TRT_NOEXCEPT override { delete this; } private: int hidden_; @@ -121,31 +124,34 @@ class QkvToContextPluginDynamic : public DynamicPluginTensorRT { class QkvToContextPluginDynamicCreator : public nvinfer1::IPluginCreator { public: QkvToContextPluginDynamicCreator() {} - const char* getPluginName() const override { return "qkv_to_context_plugin"; } + const char* getPluginName() const TRT_NOEXCEPT override { + return "qkv_to_context_plugin"; + } - const char* getPluginVersion() const override { return "1"; } + const char* getPluginVersion() const TRT_NOEXCEPT override { return "1"; } - const nvinfer1::PluginFieldCollection* getFieldNames() override { + const nvinfer1::PluginFieldCollection* getFieldNames() TRT_NOEXCEPT override { return &field_collection_; } - nvinfer1::IPluginV2* createPlugin( - const char* name, const nvinfer1::PluginFieldCollection* fc) override { + nvinfer1::IPluginV2* createPlugin(const char* name, + const nvinfer1::PluginFieldCollection* fc) + TRT_NOEXCEPT override { return nullptr; } - nvinfer1::IPluginV2* deserializePlugin(const char* name, - const void* serial_data, - size_t serial_length) override { + nvinfer1::IPluginV2* deserializePlugin( + const char* name, const void* serial_data, + size_t serial_length) TRT_NOEXCEPT override { auto plugin = new QkvToContextPluginDynamic(serial_data, serial_length); return plugin; } - void setPluginNamespace(const char* lib_namespace) override { + void setPluginNamespace(const char* lib_namespace) TRT_NOEXCEPT override { plugin_namespace_ = lib_namespace; } - const char* getPluginNamespace() const override { + const char* getPluginNamespace() const TRT_NOEXCEPT override { return plugin_namespace_.c_str(); } diff --git a/paddle/fluid/inference/tensorrt/plugin/roi_align_op_plugin.cu b/paddle/fluid/inference/tensorrt/plugin/roi_align_op_plugin.cu index 5ec6e5af86daf1..06540b36260828 100644 --- a/paddle/fluid/inference/tensorrt/plugin/roi_align_op_plugin.cu +++ b/paddle/fluid/inference/tensorrt/plugin/roi_align_op_plugin.cu @@ -200,7 +200,8 @@ RoiAlignPluginDynamic::RoiAlignPluginDynamic(void const* data, size_t length) { smem_per_block_ = smem_per_block; } -nvinfer1::IPluginV2DynamicExt* RoiAlignPluginDynamic::clone() const { +nvinfer1::IPluginV2DynamicExt* RoiAlignPluginDynamic::clone() const + TRT_NOEXCEPT { auto* plugin = new RoiAlignPluginDynamic(data_type_, pooled_height_, pooled_width_, spatial_scale_, sampling_ratio_); @@ -210,7 +211,7 @@ nvinfer1::IPluginV2DynamicExt* RoiAlignPluginDynamic::clone() const { nvinfer1::DimsExprs RoiAlignPluginDynamic::getOutputDimensions( int outputIndex, const nvinfer1::DimsExprs* inputs, int nbInputs, - nvinfer1::IExprBuilder& exprBuilder) { + nvinfer1::IExprBuilder& exprBuilder) TRT_NOEXCEPT { nvinfer1::DimsExprs ret{}; ret.nbDims = 4; ret.d[0] = inputs[1].d[0]; // roi @@ -222,7 +223,7 @@ nvinfer1::DimsExprs RoiAlignPluginDynamic::getOutputDimensions( bool RoiAlignPluginDynamic::supportsFormatCombination( int pos, const nvinfer1::PluginTensorDesc* inOut, int nbInputs, - int nbOutputs) { + int nbOutputs) TRT_NOEXCEPT { if (inOut[pos].format != nvinfer1::TensorFormat::kLINEAR) { return false; } @@ -234,11 +235,12 @@ bool RoiAlignPluginDynamic::supportsFormatCombination( void RoiAlignPluginDynamic::configurePlugin( const nvinfer1::DynamicPluginTensorDesc* in, int nbInputs, - const nvinfer1::DynamicPluginTensorDesc* out, int nbOutputs) {} + const nvinfer1::DynamicPluginTensorDesc* out, int nbOutputs) TRT_NOEXCEPT {} size_t RoiAlignPluginDynamic::getWorkspaceSize( const nvinfer1::PluginTensorDesc* inputs, int nbInputs, - const nvinfer1::PluginTensorDesc* outputs, int nbOutputs) const { + const nvinfer1::PluginTensorDesc* outputs, + int nbOutputs) const TRT_NOEXCEPT { return 0; } @@ -287,7 +289,7 @@ int RoiAlignPluginDynamic::enqueue(const nvinfer1::PluginTensorDesc* inputDesc, const nvinfer1::PluginTensorDesc* outputDesc, const void* const* inputs, void* const* outputs, void* workspace, - cudaStream_t stream) { + cudaStream_t stream) TRT_NOEXCEPT { PADDLE_ENFORCE_EQ(outputDesc[0].type, data_type_, platform::errors::InvalidArgument( "TRT RoiAlignPluginDynamic expects outputDesc[0].type " @@ -302,21 +304,22 @@ int RoiAlignPluginDynamic::enqueue(const nvinfer1::PluginTensorDesc* inputDesc, } nvinfer1::DataType RoiAlignPluginDynamic::getOutputDataType( - int index, const nvinfer1::DataType* inputTypes, int nbInputs) const { + int index, const nvinfer1::DataType* inputTypes, + int nbInputs) const TRT_NOEXCEPT { return inputTypes[0]; } -const char* RoiAlignPluginDynamic::getPluginType() const { +const char* RoiAlignPluginDynamic::getPluginType() const TRT_NOEXCEPT { return "roi_align_plugin_dynamic"; } -int RoiAlignPluginDynamic::getNbOutputs() const { return 1; } +int RoiAlignPluginDynamic::getNbOutputs() const TRT_NOEXCEPT { return 1; } -int RoiAlignPluginDynamic::initialize() { return 0; } +int RoiAlignPluginDynamic::initialize() TRT_NOEXCEPT { return 0; } -void RoiAlignPluginDynamic::terminate() {} +void RoiAlignPluginDynamic::terminate() TRT_NOEXCEPT {} -size_t RoiAlignPluginDynamic::getSerializationSize() const { +size_t RoiAlignPluginDynamic::getSerializationSize() const TRT_NOEXCEPT { size_t serialize_size = 0; serialize_size += SerializedSize(data_type_); serialize_size += SerializedSize(pooled_height_); @@ -326,7 +329,7 @@ size_t RoiAlignPluginDynamic::getSerializationSize() const { return serialize_size; } -void RoiAlignPluginDynamic::serialize(void* buffer) const { +void RoiAlignPluginDynamic::serialize(void* buffer) const TRT_NOEXCEPT { SerializeValue(&buffer, data_type_); SerializeValue(&buffer, pooled_height_); SerializeValue(&buffer, pooled_width_); @@ -334,40 +337,43 @@ void RoiAlignPluginDynamic::serialize(void* buffer) const { SerializeValue(&buffer, sampling_ratio_); } -void RoiAlignPluginDynamic::destroy() {} +void RoiAlignPluginDynamic::destroy() TRT_NOEXCEPT {} RoiAlignPluginDynamicCreator::RoiAlignPluginDynamicCreator() {} -void RoiAlignPluginDynamicCreator::setPluginNamespace( - const char* lib_namespace) { +void RoiAlignPluginDynamicCreator::setPluginNamespace(const char* lib_namespace) + TRT_NOEXCEPT { namespace_ = std::string(lib_namespace); } -const char* RoiAlignPluginDynamicCreator::getPluginNamespace() const { +const char* RoiAlignPluginDynamicCreator::getPluginNamespace() const + TRT_NOEXCEPT { return namespace_.c_str(); } -const char* RoiAlignPluginDynamicCreator::getPluginName() const { +const char* RoiAlignPluginDynamicCreator::getPluginName() const TRT_NOEXCEPT { return "roi_align_plugin_dynamic"; } -const char* RoiAlignPluginDynamicCreator::getPluginVersion() const { +const char* RoiAlignPluginDynamicCreator::getPluginVersion() const + TRT_NOEXCEPT { return "1"; } const nvinfer1::PluginFieldCollection* -RoiAlignPluginDynamicCreator::getFieldNames() { +RoiAlignPluginDynamicCreator::getFieldNames() TRT_NOEXCEPT { return &field_collection_; } nvinfer1::IPluginV2Ext* RoiAlignPluginDynamicCreator::createPlugin( - const char* name, const nvinfer1::PluginFieldCollection* fc) { + const char* name, const nvinfer1::PluginFieldCollection* fc) TRT_NOEXCEPT { const nvinfer1::PluginField* fields = fc->fields; return nullptr; } nvinfer1::IPluginV2Ext* RoiAlignPluginDynamicCreator::deserializePlugin( - const char* name, const void* serial_data, size_t serial_length) { + const char* name, const void* serial_data, + size_t serial_length) TRT_NOEXCEPT { auto plugin = new RoiAlignPluginDynamic(serial_data, serial_length); plugin->setPluginNamespace(namespace_.c_str()); return plugin; diff --git a/paddle/fluid/inference/tensorrt/plugin/roi_align_op_plugin.h b/paddle/fluid/inference/tensorrt/plugin/roi_align_op_plugin.h index bba7d0d5a99664..44d2b630698357 100644 --- a/paddle/fluid/inference/tensorrt/plugin/roi_align_op_plugin.h +++ b/paddle/fluid/inference/tensorrt/plugin/roi_align_op_plugin.h @@ -34,37 +34,38 @@ class RoiAlignPluginDynamic : public DynamicPluginTensorRT { int sampling_ratio); RoiAlignPluginDynamic(void const* data, size_t length); ~RoiAlignPluginDynamic() = default; - nvinfer1::IPluginV2DynamicExt* clone() const override; + nvinfer1::IPluginV2DynamicExt* clone() const TRT_NOEXCEPT override; nvinfer1::DimsExprs getOutputDimensions( int outputIndex, const nvinfer1::DimsExprs* inputs, int nbInputs, - nvinfer1::IExprBuilder& exprBuilder) override; + nvinfer1::IExprBuilder& exprBuilder) TRT_NOEXCEPT override; bool supportsFormatCombination(int pos, const nvinfer1::PluginTensorDesc* inOut, - int nbInputs, int nbOutputs) override; + int nbInputs, + int nbOutputs) TRT_NOEXCEPT override; void configurePlugin(const nvinfer1::DynamicPluginTensorDesc* in, int nbInputs, const nvinfer1::DynamicPluginTensorDesc* out, - int nbOutputs) override; + int nbOutputs) TRT_NOEXCEPT override; size_t getWorkspaceSize(const nvinfer1::PluginTensorDesc* inputs, int nbInputs, const nvinfer1::PluginTensorDesc* outputs, - int nbOutputs) const override; + int nbOutputs) const TRT_NOEXCEPT override; int enqueue(const nvinfer1::PluginTensorDesc* inputDesc, const nvinfer1::PluginTensorDesc* outputDesc, const void* const* inputs, void* const* outputs, void* workspace, - cudaStream_t stream) override; + cudaStream_t stream) TRT_NOEXCEPT override; - nvinfer1::DataType getOutputDataType(int index, - const nvinfer1::DataType* inputTypes, - int nbInputs) const override; + nvinfer1::DataType getOutputDataType( + int index, const nvinfer1::DataType* inputTypes, + int nbInputs) const TRT_NOEXCEPT override; - const char* getPluginType() const override; - int getNbOutputs() const override; - int initialize() override; - void terminate() override; - size_t getSerializationSize() const override; - void serialize(void* buffer) const override; - void destroy() override; + const char* getPluginType() const TRT_NOEXCEPT override; + int getNbOutputs() const TRT_NOEXCEPT override; + int initialize() TRT_NOEXCEPT override; + void terminate() TRT_NOEXCEPT override; + size_t getSerializationSize() const TRT_NOEXCEPT override; + void serialize(void* buffer) const TRT_NOEXCEPT override; + void destroy() TRT_NOEXCEPT override; private: template @@ -87,17 +88,18 @@ class RoiAlignPluginDynamicCreator : public nvinfer1::IPluginCreator { RoiAlignPluginDynamicCreator(); ~RoiAlignPluginDynamicCreator() override = default; - void setPluginNamespace(const char* lib_namespace) override; - const char* getPluginNamespace() const override; - const char* getPluginName() const override; - const char* getPluginVersion() const override; - const nvinfer1::PluginFieldCollection* getFieldNames() override; + void setPluginNamespace(const char* lib_namespace) TRT_NOEXCEPT override; + const char* getPluginNamespace() const TRT_NOEXCEPT override; + const char* getPluginName() const TRT_NOEXCEPT override; + const char* getPluginVersion() const TRT_NOEXCEPT override; + const nvinfer1::PluginFieldCollection* getFieldNames() TRT_NOEXCEPT override; nvinfer1::IPluginV2Ext* createPlugin( - const char* name, const nvinfer1::PluginFieldCollection* fc) override; - nvinfer1::IPluginV2Ext* deserializePlugin(const char* name, - const void* serial_data, - size_t serial_length) override; + const char* name, + const nvinfer1::PluginFieldCollection* fc) TRT_NOEXCEPT override; + nvinfer1::IPluginV2Ext* deserializePlugin( + const char* name, const void* serial_data, + size_t serial_length) TRT_NOEXCEPT override; private: std::string namespace_; diff --git a/paddle/fluid/inference/tensorrt/plugin/skip_layernorm_op_plugin.cu b/paddle/fluid/inference/tensorrt/plugin/skip_layernorm_op_plugin.cu index 346b4c680830e9..fb14749f3d1dba 100644 --- a/paddle/fluid/inference/tensorrt/plugin/skip_layernorm_op_plugin.cu +++ b/paddle/fluid/inference/tensorrt/plugin/skip_layernorm_op_plugin.cu @@ -29,7 +29,7 @@ namespace plugin { // Dynamic Plugin below. #if IS_TRT_VERSION_GE(6000) -int SkipLayerNormPluginDynamic::initialize() { +int SkipLayerNormPluginDynamic::initialize() TRT_NOEXCEPT { cudaMalloc(&bias_gpu_, sizeof(float) * bias_size_); cudaMemcpy(bias_gpu_, bias_.data(), bias_size_ * sizeof(float), cudaMemcpyHostToDevice); @@ -39,7 +39,7 @@ int SkipLayerNormPluginDynamic::initialize() { return 0; } -void SkipLayerNormPluginDynamic::terminate() { +void SkipLayerNormPluginDynamic::terminate() TRT_NOEXCEPT { if (bias_gpu_) { cudaFree(bias_gpu_); bias_gpu_ = nullptr; @@ -52,13 +52,13 @@ void SkipLayerNormPluginDynamic::terminate() { nvinfer1::DimsExprs SkipLayerNormPluginDynamic::getOutputDimensions( int output_index, const nvinfer1::DimsExprs *inputs, int nb_inputs, - nvinfer1::IExprBuilder &expr_builder) { + nvinfer1::IExprBuilder &expr_builder) TRT_NOEXCEPT { return inputs[0]; } bool SkipLayerNormPluginDynamic::supportsFormatCombination( int pos, const nvinfer1::PluginTensorDesc *in_out, int nb_inputs, - int nb_outputs) { + int nb_outputs) TRT_NOEXCEPT { PADDLE_ENFORCE_NOT_NULL( in_out, platform::errors::InvalidArgument( "The input of swish plugin shoule not be nullptr.")); @@ -96,7 +96,8 @@ bool SkipLayerNormPluginDynamic::supportsFormatCombination( } nvinfer1::DataType SkipLayerNormPluginDynamic::getOutputDataType( - int index, const nvinfer1::DataType *input_types, int nb_inputs) const { + int index, const nvinfer1::DataType *input_types, + int nb_inputs) const TRT_NOEXCEPT { PADDLE_ENFORCE_EQ(index, 0, platform::errors::InvalidArgument( "The SkipLayerNorm Plugin only has one input, so the " @@ -112,7 +113,7 @@ nvinfer1::DataType SkipLayerNormPluginDynamic::getOutputDataType( int SkipLayerNormPluginDynamic::enqueue( const nvinfer1::PluginTensorDesc *input_desc, const nvinfer1::PluginTensorDesc *output_desc, const void *const *inputs, - void *const *outputs, void *workspace, cudaStream_t stream) { + void *const *outputs, void *workspace, cudaStream_t stream) TRT_NOEXCEPT { auto input_dims = input_desc[0].dims; size_t num = ProductDim(input_dims); int hidden = input_dims.d[2]; diff --git a/paddle/fluid/inference/tensorrt/plugin/skip_layernorm_op_plugin.h b/paddle/fluid/inference/tensorrt/plugin/skip_layernorm_op_plugin.h index ac621784550f2f..c66b285a9fbc56 100644 --- a/paddle/fluid/inference/tensorrt/plugin/skip_layernorm_op_plugin.h +++ b/paddle/fluid/inference/tensorrt/plugin/skip_layernorm_op_plugin.h @@ -39,6 +39,7 @@ class SkipLayerNormPluginDynamic : public DynamicPluginTensorRT { std::copy(bias, bias + bias_size, bias_.data()); std::copy(scale, scale + scale_size, scale_.data()); } + SkipLayerNormPluginDynamic(void const* serial_data, size_t serial_length) { DeserializeValue(&serial_data, &serial_length, &bias_); DeserializeValue(&serial_data, &serial_length, &scale_); @@ -48,7 +49,7 @@ class SkipLayerNormPluginDynamic : public DynamicPluginTensorRT { DeserializeValue(&serial_data, &serial_length, &with_fp16_); } - nvinfer1::IPluginV2DynamicExt* clone() const override { + nvinfer1::IPluginV2DynamicExt* clone() const TRT_NOEXCEPT override { auto ptr = new SkipLayerNormPluginDynamic( bias_.data(), scale_.data(), bias_size_, scale_size_, eps_, with_fp16_); ptr->bias_gpu_ = bias_gpu_; @@ -56,17 +57,19 @@ class SkipLayerNormPluginDynamic : public DynamicPluginTensorRT { return ptr; } - const char* getPluginType() const override { return "skip_layernorm_plugin"; } - int getNbOutputs() const override { return 1; } - int initialize() override; + const char* getPluginType() const TRT_NOEXCEPT override { + return "skip_layernorm_plugin"; + } + int getNbOutputs() const TRT_NOEXCEPT override { return 1; } + int initialize() TRT_NOEXCEPT override; - size_t getSerializationSize() const override { + size_t getSerializationSize() const TRT_NOEXCEPT override { size_t ser_size = SerializedSize(bias_) + SerializedSize(scale_) + SerializedSize(bias_size_) + SerializedSize(scale_size_) + SerializedSize(eps_) + SerializedSize(with_fp16_); return ser_size; } - void serialize(void* buffer) const override { + void serialize(void* buffer) const TRT_NOEXCEPT override { SerializeValue(&buffer, bias_); SerializeValue(&buffer, scale_); SerializeValue(&buffer, bias_size_); @@ -77,34 +80,35 @@ class SkipLayerNormPluginDynamic : public DynamicPluginTensorRT { nvinfer1::DimsExprs getOutputDimensions( int output_index, const nvinfer1::DimsExprs* inputs, int nb_inputs, - nvinfer1::IExprBuilder& expr_builder) override; + nvinfer1::IExprBuilder& expr_builder) TRT_NOEXCEPT override; bool supportsFormatCombination(int pos, const nvinfer1::PluginTensorDesc* in_out, - int nb_inputs, int nb_outputs) override; + int nb_inputs, + int nb_outputs) TRT_NOEXCEPT override; void configurePlugin(const nvinfer1::DynamicPluginTensorDesc* in, int nb_inputs, const nvinfer1::DynamicPluginTensorDesc* out, - int nb_outputs) override {} + int nb_outputs) TRT_NOEXCEPT override {} size_t getWorkspaceSize(const nvinfer1::PluginTensorDesc* inputs, int nb_inputs, const nvinfer1::PluginTensorDesc* outputs, - int nb_outputs) const override { + int nb_outputs) const TRT_NOEXCEPT override { return 0; } int enqueue(const nvinfer1::PluginTensorDesc* input_desc, const nvinfer1::PluginTensorDesc* output_desc, const void* const* inputs, void* const* outputs, void* workspace, - cudaStream_t stream) override; - nvinfer1::DataType getOutputDataType(int index, - const nvinfer1::DataType* input_types, - int nb_inputs) const override; + cudaStream_t stream) TRT_NOEXCEPT override; + nvinfer1::DataType getOutputDataType( + int index, const nvinfer1::DataType* input_types, + int nb_inputs) const TRT_NOEXCEPT override; - void destroy() override { delete this; } - void terminate() override; + void destroy() TRT_NOEXCEPT override { delete this; } + void terminate() TRT_NOEXCEPT override; private: std::vector bias_; @@ -122,31 +126,34 @@ class SkipLayerNormPluginDynamic : public DynamicPluginTensorRT { class SkipLayerNormPluginDynamicCreator : public nvinfer1::IPluginCreator { public: SkipLayerNormPluginDynamicCreator() {} - const char* getPluginName() const override { return "skip_layernorm_plugin"; } + const char* getPluginName() const TRT_NOEXCEPT override { + return "skip_layernorm_plugin"; + } - const char* getPluginVersion() const override { return "1"; } + const char* getPluginVersion() const TRT_NOEXCEPT override { return "1"; } - const nvinfer1::PluginFieldCollection* getFieldNames() override { + const nvinfer1::PluginFieldCollection* getFieldNames() TRT_NOEXCEPT override { return &field_collection_; } - nvinfer1::IPluginV2* createPlugin( - const char* name, const nvinfer1::PluginFieldCollection* fc) override { + nvinfer1::IPluginV2* createPlugin(const char* name, + const nvinfer1::PluginFieldCollection* fc) + TRT_NOEXCEPT override { return nullptr; } - nvinfer1::IPluginV2* deserializePlugin(const char* name, - const void* serial_data, - size_t serial_length) override { + nvinfer1::IPluginV2* deserializePlugin( + const char* name, const void* serial_data, + size_t serial_length) TRT_NOEXCEPT override { auto plugin = new SkipLayerNormPluginDynamic(serial_data, serial_length); return plugin; } - void setPluginNamespace(const char* lib_namespace) override { + void setPluginNamespace(const char* lib_namespace) TRT_NOEXCEPT override { plugin_namespace_ = lib_namespace; } - const char* getPluginNamespace() const override { + const char* getPluginNamespace() const TRT_NOEXCEPT override { return plugin_namespace_.c_str(); } diff --git a/paddle/fluid/inference/tensorrt/plugin/slice_op_plugin.cu b/paddle/fluid/inference/tensorrt/plugin/slice_op_plugin.cu index 70ff0e7cb069d7..6d367712eabc5a 100644 --- a/paddle/fluid/inference/tensorrt/plugin/slice_op_plugin.cu +++ b/paddle/fluid/inference/tensorrt/plugin/slice_op_plugin.cu @@ -75,12 +75,12 @@ SlicePlugin::~SlicePlugin() { cudaFree(offset_temp_data_); } -SlicePlugin *SlicePlugin::clone() const { +SlicePlugin *SlicePlugin::clone() const TRT_NOEXCEPT { return new SlicePlugin(starts_, ends_, axes_, with_fp16_); } -bool SlicePlugin::supportsFormat(nvinfer1::DataType type, - nvinfer1::PluginFormat format) const { +bool SlicePlugin::supportsFormat( + nvinfer1::DataType type, nvinfer1::PluginFormat format) const TRT_NOEXCEPT { if (with_fp16_) { return ((type == nvinfer1::DataType::kFLOAT || type == nvinfer1::DataType::kHALF) && @@ -91,9 +91,8 @@ bool SlicePlugin::supportsFormat(nvinfer1::DataType type, } } -nvinfer1::Dims SlicePlugin::getOutputDimensions(int index, - const nvinfer1::Dims *inputs, - int nb_input_dims) { +nvinfer1::Dims SlicePlugin::getOutputDimensions( + int index, const nvinfer1::Dims *inputs, int nb_input_dims) TRT_NOEXCEPT { auto in_dims = inputs[0]; nvinfer1::Dims out_dims = in_dims; for (size_t i = 0; i < axes_.size(); i++) { @@ -109,7 +108,7 @@ int SlicePlugin::enqueue(int batch_size, const void *const *inputs, void **outputs, void *workspace, cudaStream_t stream) { #else void *const *outputs, void *workspace, - cudaStream_t stream) { + cudaStream_t stream) TRT_NOEXCEPT { #endif auto input_dims = getInputDims(0); @@ -187,13 +186,13 @@ int SlicePlugin::enqueue(int batch_size, const void *const *inputs, return cudaGetLastError() != cudaSuccess; } -size_t SlicePlugin::getSerializationSize() const { +size_t SlicePlugin::getSerializationSize() const TRT_NOEXCEPT { return getBaseSerializationSize() + SerializedSize(getPluginType()) + SerializedSize(starts_) + SerializedSize(ends_) + SerializedSize(axes_); } -void SlicePlugin::serialize(void *buffer) const { +void SlicePlugin::serialize(void *buffer) const TRT_NOEXCEPT { SerializeValue(&buffer, getPluginType()); serializeBase(buffer); SerializeValue(&buffer, starts_); @@ -222,23 +221,23 @@ SlicePluginDynamic::SlicePluginDynamic(void const *serialData, cudaStreamCreate(©_stream_); } -void SlicePluginDynamic::destroy() { +void SlicePluginDynamic::destroy() TRT_NOEXCEPT { cudaStreamDestroy(copy_stream_); cudaEventDestroy(copy_event_); cudaFree(offset_temp_data_); delete this; } -int SlicePluginDynamic::initialize() { return 0; } +int SlicePluginDynamic::initialize() TRT_NOEXCEPT { return 0; } -size_t SlicePluginDynamic::getSerializationSize() const { +size_t SlicePluginDynamic::getSerializationSize() const TRT_NOEXCEPT { size_t size = SerializedSize(starts_) + SerializedSize(ends_) + SerializedSize(axes_) + SerializedSize(with_fp16_); return size; } -void SlicePluginDynamic::serialize(void *buffer) const { +void SlicePluginDynamic::serialize(void *buffer) const TRT_NOEXCEPT { SerializeValue(&buffer, starts_); SerializeValue(&buffer, ends_); SerializeValue(&buffer, axes_); @@ -247,7 +246,7 @@ void SlicePluginDynamic::serialize(void *buffer) const { nvinfer1::DimsExprs SlicePluginDynamic::getOutputDimensions( int output_index, const nvinfer1::DimsExprs *inputs, int nb_inputs, - nvinfer1::IExprBuilder &expr_builder) { + nvinfer1::IExprBuilder &expr_builder) TRT_NOEXCEPT { auto in_dims = inputs[0]; nvinfer1::DimsExprs ret = in_dims; // start, ends should greater 0 @@ -261,7 +260,7 @@ nvinfer1::DimsExprs SlicePluginDynamic::getOutputDimensions( bool SlicePluginDynamic::supportsFormatCombination( int pos, const nvinfer1::PluginTensorDesc *in_out, int nb_inputs, - int nb_outputs) { + int nb_outputs) TRT_NOEXCEPT { PADDLE_ENFORCE_NOT_NULL( in_out, platform::errors::InvalidArgument( "The input of swish plugin shoule not be nullptr.")); @@ -289,7 +288,8 @@ bool SlicePluginDynamic::supportsFormatCombination( } nvinfer1::DataType SlicePluginDynamic::getOutputDataType( - int index, const nvinfer1::DataType *input_types, int nb_inputs) const { + int index, const nvinfer1::DataType *input_types, + int nb_inputs) const TRT_NOEXCEPT { PADDLE_ENFORCE_EQ(index, 0, platform::errors::InvalidArgument( "The Slice Plugin only has one input, so the " "index value should be 0, but get %d.", @@ -304,7 +304,8 @@ nvinfer1::DataType SlicePluginDynamic::getOutputDataType( int SlicePluginDynamic::enqueue(const nvinfer1::PluginTensorDesc *input_desc, const nvinfer1::PluginTensorDesc *output_desc, const void *const *inputs, void *const *outputs, - void *workspace, cudaStream_t stream) { + void *workspace, + cudaStream_t stream) TRT_NOEXCEPT { auto input_dims = input_desc[0].dims; auto out_dims = output_desc[0].dims; auto num_dims = input_dims.nbDims; diff --git a/paddle/fluid/inference/tensorrt/plugin/slice_op_plugin.h b/paddle/fluid/inference/tensorrt/plugin/slice_op_plugin.h index b656918f8fbab4..29f8f7c0999c47 100644 --- a/paddle/fluid/inference/tensorrt/plugin/slice_op_plugin.h +++ b/paddle/fluid/inference/tensorrt/plugin/slice_op_plugin.h @@ -35,27 +35,29 @@ class SlicePlugin : public PluginTensorRT { // It should not be called by users. SlicePlugin(void const* serial_data, size_t serial_length); ~SlicePlugin(); - SlicePlugin* clone() const override; + SlicePlugin* clone() const TRT_NOEXCEPT override; - const char* getPluginType() const override { return "slice_plugin"; } - int getNbOutputs() const override { return 1; } - int initialize() override { return 0; } - bool supportsFormat(nvinfer1::DataType type, - nvinfer1::PluginFormat format) const override; + const char* getPluginType() const TRT_NOEXCEPT override { + return "slice_plugin"; + } + int getNbOutputs() const TRT_NOEXCEPT override { return 1; } + int initialize() TRT_NOEXCEPT override { return 0; } + bool supportsFormat(nvinfer1::DataType type, nvinfer1::PluginFormat format) + const TRT_NOEXCEPT override; nvinfer1::Dims getOutputDimensions(int index, const nvinfer1::Dims* inputs, - int nb_input_dims) override; + int nb_input_dims) TRT_NOEXCEPT override; #if IS_TRT_VERSION_LT(8000) int enqueue(int batch_size, const void* const* inputs, void** outputs, #else int enqueue(int batch_size, const void* const* inputs, void* const* outputs, #endif - void* workspace, cudaStream_t stream) override; + void* workspace, cudaStream_t stream) TRT_NOEXCEPT override; - size_t getSerializationSize() const override; + size_t getSerializationSize() const TRT_NOEXCEPT override; // TRT will call this func to serialize the configuration of TRT // It should not be called by users. - void serialize(void* buffer) const override; + void serialize(void* buffer) const TRT_NOEXCEPT override; private: std::vector starts_; @@ -68,13 +70,15 @@ class SlicePlugin : public PluginTensorRT { class SlicePluginCreator : public TensorRTPluginCreator { public: - const char* getPluginName() const override { return "slice_plugin"; } + const char* getPluginName() const TRT_NOEXCEPT override { + return "slice_plugin"; + } - const char* getPluginVersion() const override { return "1"; } + const char* getPluginVersion() const TRT_NOEXCEPT override { return "1"; } - nvinfer1::IPluginV2* deserializePlugin(const char* name, - const void* serial_data, - size_t serial_length) override { + nvinfer1::IPluginV2* deserializePlugin( + const char* name, const void* serial_data, + size_t serial_length) TRT_NOEXCEPT override { return new SlicePlugin(serial_data, serial_length); } }; @@ -86,48 +90,51 @@ class SlicePluginDynamic : public DynamicPluginTensorRT { explicit SlicePluginDynamic(std::vector starts, std::vector ends, std::vector axes, bool with_fp16); - nvinfer1::IPluginV2DynamicExt* clone() const override { + nvinfer1::IPluginV2DynamicExt* clone() const TRT_NOEXCEPT override { return new SlicePluginDynamic(starts_, ends_, axes_, with_fp16_); } SlicePluginDynamic(void const* serialData, size_t serialLength); - const char* getPluginType() const override { return "slice_plugin_dynamic"; } - int getNbOutputs() const override { return 1; } - int initialize() override; + const char* getPluginType() const TRT_NOEXCEPT override { + return "slice_plugin_dynamic"; + } + int getNbOutputs() const TRT_NOEXCEPT override { return 1; } + int initialize() TRT_NOEXCEPT override; - size_t getSerializationSize() const override; - void serialize(void* buffer) const override; + size_t getSerializationSize() const TRT_NOEXCEPT override; + void serialize(void* buffer) const TRT_NOEXCEPT override; nvinfer1::DimsExprs getOutputDimensions( int output_index, const nvinfer1::DimsExprs* inputs, int nb_inputs, - nvinfer1::IExprBuilder& expr_builder) override; + nvinfer1::IExprBuilder& expr_builder) TRT_NOEXCEPT override; bool supportsFormatCombination(int pos, const nvinfer1::PluginTensorDesc* inOut, - int nbInputs, int nbOutputs) override; + int nbInputs, + int nbOutputs) TRT_NOEXCEPT override; void configurePlugin(const nvinfer1::DynamicPluginTensorDesc* in, int nbInputs, const nvinfer1::DynamicPluginTensorDesc* out, - int nbOutputs) override {} + int nbOutputs) TRT_NOEXCEPT override {} size_t getWorkspaceSize(const nvinfer1::PluginTensorDesc* inputs, int nbInputs, const nvinfer1::PluginTensorDesc* outputs, - int nbOutputs) const override { + int nbOutputs) const TRT_NOEXCEPT override { return 0; } int enqueue(const nvinfer1::PluginTensorDesc* inputDesc, const nvinfer1::PluginTensorDesc* outputDesc, const void* const* inputs, void* const* outputs, void* workspace, - cudaStream_t stream) override; - nvinfer1::DataType getOutputDataType(int index, - const nvinfer1::DataType* inputTypes, - int nbInputs) const override; + cudaStream_t stream) TRT_NOEXCEPT override; + nvinfer1::DataType getOutputDataType( + int index, const nvinfer1::DataType* inputTypes, + int nbInputs) const TRT_NOEXCEPT override; - void destroy() override; + void destroy() TRT_NOEXCEPT override; private: std::vector starts_; @@ -140,13 +147,15 @@ class SlicePluginDynamic : public DynamicPluginTensorRT { class SlicePluginDynamicCreator : public TensorRTPluginCreator { public: - const char* getPluginName() const override { return "slice_plugin_dynamic"; } + const char* getPluginName() const TRT_NOEXCEPT override { + return "slice_plugin_dynamic"; + } - const char* getPluginVersion() const override { return "1"; } + const char* getPluginVersion() const TRT_NOEXCEPT override { return "1"; } - nvinfer1::IPluginV2* deserializePlugin(const char* name, - const void* serialData, - size_t serialLength) override { + nvinfer1::IPluginV2* deserializePlugin( + const char* name, const void* serialData, + size_t serialLength) TRT_NOEXCEPT override { return new SlicePluginDynamic(serialData, serialLength); } }; diff --git a/paddle/fluid/inference/tensorrt/plugin/special_slice_plugin.cu b/paddle/fluid/inference/tensorrt/plugin/special_slice_plugin.cu index 3bef9672e5058a..49c03b761ceb3e 100644 --- a/paddle/fluid/inference/tensorrt/plugin/special_slice_plugin.cu +++ b/paddle/fluid/inference/tensorrt/plugin/special_slice_plugin.cu @@ -30,28 +30,29 @@ SpecialSlicePluginDynamic::SpecialSlicePluginDynamic(void const* serial_data, SpecialSlicePluginDynamic::~SpecialSlicePluginDynamic() {} -nvinfer1::IPluginV2DynamicExt* SpecialSlicePluginDynamic::clone() const { +nvinfer1::IPluginV2DynamicExt* SpecialSlicePluginDynamic::clone() const + TRT_NOEXCEPT { return new SpecialSlicePluginDynamic(); } -const char* SpecialSlicePluginDynamic::getPluginType() const { +const char* SpecialSlicePluginDynamic::getPluginType() const TRT_NOEXCEPT { return "special_slice_plugin"; } -int SpecialSlicePluginDynamic::getNbOutputs() const { return 1; } +int SpecialSlicePluginDynamic::getNbOutputs() const TRT_NOEXCEPT { return 1; } -int SpecialSlicePluginDynamic::initialize() { return 0; } +int SpecialSlicePluginDynamic::initialize() TRT_NOEXCEPT { return 0; } -size_t SpecialSlicePluginDynamic::getSerializationSize() const { +size_t SpecialSlicePluginDynamic::getSerializationSize() const TRT_NOEXCEPT { size_t serialize_size = 0; return serialize_size; } -void SpecialSlicePluginDynamic::serialize(void* buffer) const {} +void SpecialSlicePluginDynamic::serialize(void* buffer) const TRT_NOEXCEPT {} nvinfer1::DimsExprs SpecialSlicePluginDynamic::getOutputDimensions( int output_index, const nvinfer1::DimsExprs* inputs, int nb_inputs, - nvinfer1::IExprBuilder& expr_builder) { + nvinfer1::IExprBuilder& expr_builder) TRT_NOEXCEPT { nvinfer1::DimsExprs output(inputs[0]); output.nbDims++; for (int i = output.nbDims - 1; i > 1; i--) { @@ -69,21 +70,22 @@ nvinfer1::DimsExprs SpecialSlicePluginDynamic::getOutputDimensions( void SpecialSlicePluginDynamic::configurePlugin( const nvinfer1::DynamicPluginTensorDesc* in, int nbInputs, - const nvinfer1::DynamicPluginTensorDesc* out, int nbOutputs) {} + const nvinfer1::DynamicPluginTensorDesc* out, int nbOutputs) TRT_NOEXCEPT {} size_t SpecialSlicePluginDynamic::getWorkspaceSize( const nvinfer1::PluginTensorDesc* inputs, int nbInputs, - const nvinfer1::PluginTensorDesc* outputs, int nbOutputs) const { + const nvinfer1::PluginTensorDesc* outputs, + int nbOutputs) const TRT_NOEXCEPT { return 0; } -void SpecialSlicePluginDynamic::destroy() { delete this; } +void SpecialSlicePluginDynamic::destroy() TRT_NOEXCEPT { delete this; } -void SpecialSlicePluginDynamic::terminate() {} +void SpecialSlicePluginDynamic::terminate() TRT_NOEXCEPT {} bool SpecialSlicePluginDynamic::supportsFormatCombination( int pos, const nvinfer1::PluginTensorDesc* desc, int nb_inputs, - int nb_outputs) { + int nb_outputs) TRT_NOEXCEPT { if (pos == 0) // slice tensor return (desc[pos].type == nvinfer1::DataType::kHALF && desc[pos].format == @@ -101,7 +103,8 @@ bool SpecialSlicePluginDynamic::supportsFormatCombination( } nvinfer1::DataType SpecialSlicePluginDynamic::getOutputDataType( - int index, const nvinfer1::DataType* input_types, int nb_inputs) const { + int index, const nvinfer1::DataType* input_types, + int nb_inputs) const TRT_NOEXCEPT { PADDLE_ENFORCE_EQ(index, 0, platform::errors::InvalidArgument( "The index should be equal to 0")); return input_types[0]; @@ -120,7 +123,7 @@ __global__ void SpecialSliceKernel(const T* slice_input, int SpecialSlicePluginDynamic::enqueue( const nvinfer1::PluginTensorDesc* input_desc, const nvinfer1::PluginTensorDesc* output_desc, const void* const* inputs, - void* const* outputs, void* workspace, cudaStream_t stream) { + void* const* outputs, void* workspace, cudaStream_t stream) TRT_NOEXCEPT { auto input_dims = input_desc[0].dims; // (sum(S), 768, 1, 1) auto out_dims = output_desc[0].dims; // (batch, 768, 1, 1) @@ -142,36 +145,40 @@ int SpecialSlicePluginDynamic::enqueue( SpecialSlicePluginDynamicCreator::SpecialSlicePluginDynamicCreator() {} -const char* SpecialSlicePluginDynamicCreator::getPluginName() const { +const char* SpecialSlicePluginDynamicCreator::getPluginName() const + TRT_NOEXCEPT { return "special_slice_plugin"; } -const char* SpecialSlicePluginDynamicCreator::getPluginVersion() const { +const char* SpecialSlicePluginDynamicCreator::getPluginVersion() const + TRT_NOEXCEPT { return "1"; } const nvinfer1::PluginFieldCollection* -SpecialSlicePluginDynamicCreator::getFieldNames() { +SpecialSlicePluginDynamicCreator::getFieldNames() TRT_NOEXCEPT { return &field_collection_; } nvinfer1::IPluginV2* SpecialSlicePluginDynamicCreator::createPlugin( - const char* name, const nvinfer1::PluginFieldCollection* fc) { + const char* name, const nvinfer1::PluginFieldCollection* fc) TRT_NOEXCEPT { return new SpecialSlicePluginDynamic(); } nvinfer1::IPluginV2* SpecialSlicePluginDynamicCreator::deserializePlugin( - const char* name, const void* serial_data, size_t serial_length) { + const char* name, const void* serial_data, + size_t serial_length) TRT_NOEXCEPT { auto plugin = new SpecialSlicePluginDynamic(serial_data, serial_length); return plugin; } void SpecialSlicePluginDynamicCreator::setPluginNamespace( - const char* lib_namespace) { + const char* lib_namespace) TRT_NOEXCEPT { plugin_namespace_ = lib_namespace; } -const char* SpecialSlicePluginDynamicCreator::getPluginNamespace() const { +const char* SpecialSlicePluginDynamicCreator::getPluginNamespace() const + TRT_NOEXCEPT { return plugin_namespace_.c_str(); } diff --git a/paddle/fluid/inference/tensorrt/plugin/special_slice_plugin.h b/paddle/fluid/inference/tensorrt/plugin/special_slice_plugin.h index 438d9e9465c52a..c3521e4ed63713 100644 --- a/paddle/fluid/inference/tensorrt/plugin/special_slice_plugin.h +++ b/paddle/fluid/inference/tensorrt/plugin/special_slice_plugin.h @@ -31,37 +31,38 @@ class SpecialSlicePluginDynamic : public DynamicPluginTensorRT { SpecialSlicePluginDynamic(); SpecialSlicePluginDynamic(void const* serial_data, size_t serial_length); ~SpecialSlicePluginDynamic(); - nvinfer1::IPluginV2DynamicExt* clone() const override; + nvinfer1::IPluginV2DynamicExt* clone() const TRT_NOEXCEPT override; nvinfer1::DimsExprs getOutputDimensions( int outputIndex, const nvinfer1::DimsExprs* inputs, int nbInputs, - nvinfer1::IExprBuilder& exprBuilder) override; + nvinfer1::IExprBuilder& exprBuilder) TRT_NOEXCEPT override; bool supportsFormatCombination(int pos, const nvinfer1::PluginTensorDesc* inOut, - int nbInputs, int nbOutputs) override; + int nbInputs, + int nbOutputs) TRT_NOEXCEPT override; void configurePlugin(const nvinfer1::DynamicPluginTensorDesc* in, int nbInputs, const nvinfer1::DynamicPluginTensorDesc* out, - int nbOutputs) override; + int nbOutputs) TRT_NOEXCEPT override; size_t getWorkspaceSize(const nvinfer1::PluginTensorDesc* inputs, int nbInputs, const nvinfer1::PluginTensorDesc* outputs, - int nbOutputs) const override; + int nbOutputs) const TRT_NOEXCEPT override; int enqueue(const nvinfer1::PluginTensorDesc* inputDesc, const nvinfer1::PluginTensorDesc* outputDesc, const void* const* inputs, void* const* outputs, void* workspace, - cudaStream_t stream) override; + cudaStream_t stream) TRT_NOEXCEPT override; - nvinfer1::DataType getOutputDataType(int index, - const nvinfer1::DataType* inputTypes, - int nbInputs) const override; + nvinfer1::DataType getOutputDataType( + int index, const nvinfer1::DataType* inputTypes, + int nbInputs) const TRT_NOEXCEPT override; - const char* getPluginType() const override; - int getNbOutputs() const override; - int initialize() override; - void terminate() override; - size_t getSerializationSize() const override; - void serialize(void* buffer) const override; - void destroy() override; + const char* getPluginType() const TRT_NOEXCEPT override; + int getNbOutputs() const TRT_NOEXCEPT override; + int initialize() TRT_NOEXCEPT override; + void terminate() TRT_NOEXCEPT override; + size_t getSerializationSize() const TRT_NOEXCEPT override; + void serialize(void* buffer) const TRT_NOEXCEPT override; + void destroy() TRT_NOEXCEPT override; private: int axis_; @@ -71,16 +72,17 @@ class SpecialSlicePluginDynamic : public DynamicPluginTensorRT { class SpecialSlicePluginDynamicCreator : public nvinfer1::IPluginCreator { public: SpecialSlicePluginDynamicCreator(); - const char* getPluginName() const override; - const char* getPluginVersion() const override; - const nvinfer1::PluginFieldCollection* getFieldNames() override; - nvinfer1::IPluginV2* createPlugin( - const char* name, const nvinfer1::PluginFieldCollection* fc) override; - nvinfer1::IPluginV2* deserializePlugin(const char* name, - const void* serial_data, - size_t serial_length) override; - void setPluginNamespace(const char* lib_namespace) override; - const char* getPluginNamespace() const override; + const char* getPluginName() const TRT_NOEXCEPT override; + const char* getPluginVersion() const TRT_NOEXCEPT override; + const nvinfer1::PluginFieldCollection* getFieldNames() TRT_NOEXCEPT override; + nvinfer1::IPluginV2* createPlugin(const char* name, + const nvinfer1::PluginFieldCollection* fc) + TRT_NOEXCEPT override; + nvinfer1::IPluginV2* deserializePlugin( + const char* name, const void* serial_data, + size_t serial_length) TRT_NOEXCEPT override; + void setPluginNamespace(const char* lib_namespace) TRT_NOEXCEPT override; + const char* getPluginNamespace() const TRT_NOEXCEPT override; private: std::string plugin_namespace_; diff --git a/paddle/fluid/inference/tensorrt/plugin/split_op_plugin.cu b/paddle/fluid/inference/tensorrt/plugin/split_op_plugin.cu index 37afff9105d80a..091680ff672d0e 100644 --- a/paddle/fluid/inference/tensorrt/plugin/split_op_plugin.cu +++ b/paddle/fluid/inference/tensorrt/plugin/split_op_plugin.cu @@ -38,7 +38,7 @@ __device__ int upper_bound(T const* vals, int n, T const& key) { } nvinfer1::Dims SplitPlugin::getOutputDimensions( - int index, const nvinfer1::Dims* input_dims, int num_inputs) { + int index, const nvinfer1::Dims* input_dims, int num_inputs) TRT_NOEXCEPT { PADDLE_ENFORCE_EQ(num_inputs, 1, platform::errors::InvalidArgument( "Invalid number of inputs of split TRT plugin. " @@ -66,7 +66,7 @@ void SplitPlugin::shareData(const SplitPlugin* another) { d_output_ptrs_.resize(another->d_output_ptrs_.size(), nullptr); } -int SplitPlugin::initialize() { +int SplitPlugin::initialize() TRT_NOEXCEPT { PADDLE_ENFORCE_LE(axis_, nvinfer1::Dims::MAX_DIMS, platform::errors::InvalidArgument( "Axis dimension exceeds max dimension in TensorRT. " @@ -98,7 +98,7 @@ int SplitPlugin::initialize() { } // nothing to release according to initialize -void SplitPlugin::terminate() {} +void SplitPlugin::terminate() TRT_NOEXCEPT {} // The following part of the code refers to onnx-tensorrt // https://github.com/onnx/onnx-tensorrt/blob/master/Split.cu @@ -129,7 +129,7 @@ int SplitPlugin::enqueue(int batchSize, const void* const* inputs, void** outputs, void* workspace, cudaStream_t stream) { #else void* const* outputs, void* workspace, - cudaStream_t stream) { + cudaStream_t stream) TRT_NOEXCEPT { #endif const int* d_segment_offsets_ptr = thrust::raw_pointer_cast(&d_segment_offsets_[0]); @@ -155,14 +155,14 @@ int SplitPlugin::enqueue(int batchSize, const void* const* inputs, // Dynamic Plugin below. #if IS_TRT_VERSION_GE(6000) -int SplitPluginDynamic::initialize() { return 0; } +int SplitPluginDynamic::initialize() TRT_NOEXCEPT { return 0; } -size_t SplitPluginDynamic::getSerializationSize() const { +size_t SplitPluginDynamic::getSerializationSize() const TRT_NOEXCEPT { return SerializedSize(axis_) + SerializedSize(output_length_) + SerializedSize(with_fp16_); } -void SplitPluginDynamic::serialize(void* buffer) const { +void SplitPluginDynamic::serialize(void* buffer) const TRT_NOEXCEPT { SerializeValue(&buffer, axis_); SerializeValue(&buffer, output_length_); SerializeValue(&buffer, with_fp16_); @@ -170,7 +170,7 @@ void SplitPluginDynamic::serialize(void* buffer) const { nvinfer1::DimsExprs SplitPluginDynamic::getOutputDimensions( int output_index, const nvinfer1::DimsExprs* inputs, int nb_inputs, - nvinfer1::IExprBuilder& expr_builder) { + nvinfer1::IExprBuilder& expr_builder) TRT_NOEXCEPT { PADDLE_ENFORCE_EQ(nb_inputs, 1, platform::errors::InvalidArgument( "The Split plugin should be only one input.")); @@ -188,7 +188,7 @@ nvinfer1::DimsExprs SplitPluginDynamic::getOutputDimensions( bool SplitPluginDynamic::supportsFormatCombination( int pos, const nvinfer1::PluginTensorDesc* in_out, int nb_inputs, - int nb_outputs) { + int nb_outputs) TRT_NOEXCEPT { PADDLE_ENFORCE_NOT_NULL( in_out, platform::errors::InvalidArgument( "The input of split plugin should not be nullptr.")); @@ -217,14 +217,16 @@ bool SplitPluginDynamic::supportsFormatCombination( } nvinfer1::DataType SplitPluginDynamic::getOutputDataType( - int index, const nvinfer1::DataType* input_types, int nb_inputs) const { + int index, const nvinfer1::DataType* input_types, + int nb_inputs) const TRT_NOEXCEPT { return input_types[0]; } int SplitPluginDynamic::enqueue(const nvinfer1::PluginTensorDesc* input_desc, const nvinfer1::PluginTensorDesc* output_desc, const void* const* inputs, void* const* outputs, - void* workspace, cudaStream_t stream) { + void* workspace, + cudaStream_t stream) TRT_NOEXCEPT { auto input_dims = input_desc[0].dims; int outer_rows = 1; int inner_cols = 1; diff --git a/paddle/fluid/inference/tensorrt/plugin/split_op_plugin.h b/paddle/fluid/inference/tensorrt/plugin/split_op_plugin.h index a791395f4a3d38..7a41fe1d1eef23 100644 --- a/paddle/fluid/inference/tensorrt/plugin/split_op_plugin.h +++ b/paddle/fluid/inference/tensorrt/plugin/split_op_plugin.h @@ -39,43 +39,47 @@ class SplitPlugin : public PluginTensorRTV2Ext { DeserializeValue(&serial_data, &serial_length, &output_length_); } - nvinfer1::IPluginV2Ext* clone() const override { + nvinfer1::IPluginV2Ext* clone() const TRT_NOEXCEPT override { SplitPlugin* ptr = new SplitPlugin(axis_, output_length_, with_fp16_); ptr->setPluginNamespace(this->getPluginNamespace()); ptr->shareData(this); return ptr; } - nvinfer1::DataType getOutputDataType(int index, - const nvinfer1::DataType* input_types, - int nb_inputs) const override { + nvinfer1::DataType getOutputDataType( + int index, const nvinfer1::DataType* input_types, + int nb_inputs) const TRT_NOEXCEPT override { return input_types[0]; } - const char* getPluginType() const override { return "split_plugin_v2ext"; } - int getNbOutputs() const override { return output_length_.size(); } + const char* getPluginType() const TRT_NOEXCEPT override { + return "split_plugin_v2ext"; + } + int getNbOutputs() const TRT_NOEXCEPT override { + return output_length_.size(); + } nvinfer1::Dims getOutputDimensions(int index, const nvinfer1::Dims* input_dims, - int num_inputs) override; + int num_inputs) TRT_NOEXCEPT override; - int initialize() override; - void terminate() override; + int initialize() TRT_NOEXCEPT override; + void terminate() TRT_NOEXCEPT override; #if IS_TRT_VERSION_LT(8000) int enqueue(int batch_size, const void* const* inputs, void** outputs, #else int enqueue(int batch_size, const void* const* inputs, void* const* outputs, #endif - void* workspace, cudaStream_t stream) override; + void* workspace, cudaStream_t stream) TRT_NOEXCEPT override; - void destroy() override { delete this; } + void destroy() TRT_NOEXCEPT override { delete this; } protected: - size_t getSerializationSize() const override { + size_t getSerializationSize() const TRT_NOEXCEPT override { return SerializedSize(axis_) + SerializedSize(output_length_) + getBaseSerializationSize(); } - void serialize(void* buffer) const override { + void serialize(void* buffer) const TRT_NOEXCEPT override { serializeBase(buffer); SerializeValue(&buffer, axis_); SerializeValue(&buffer, output_length_); @@ -98,32 +102,35 @@ class SplitPlugin : public PluginTensorRTV2Ext { class SplitPluginCreator : public nvinfer1::IPluginCreator { public: SplitPluginCreator() {} - const char* getPluginName() const override { return "split_plugin_v2ext"; } + const char* getPluginName() const TRT_NOEXCEPT override { + return "split_plugin_v2ext"; + } - const char* getPluginVersion() const override { return "1"; } + const char* getPluginVersion() const TRT_NOEXCEPT override { return "1"; } - const nvinfer1::PluginFieldCollection* getFieldNames() override { + const nvinfer1::PluginFieldCollection* getFieldNames() TRT_NOEXCEPT override { return &field_collection_; } - nvinfer1::IPluginV2* createPlugin( - const char* name, const nvinfer1::PluginFieldCollection* fc) override { + nvinfer1::IPluginV2* createPlugin(const char* name, + const nvinfer1::PluginFieldCollection* fc) + TRT_NOEXCEPT override { // not implemented return nullptr; } - nvinfer1::IPluginV2* deserializePlugin(const char* name, - const void* serial_data, - size_t serial_length) override { + nvinfer1::IPluginV2* deserializePlugin( + const char* name, const void* serial_data, + size_t serial_length) TRT_NOEXCEPT override { auto plugin = new SplitPlugin(serial_data, serial_length); return plugin; } - void setPluginNamespace(const char* lib_namespace) override { + void setPluginNamespace(const char* lib_namespace) TRT_NOEXCEPT override { plugin_namespace_ = lib_namespace; } - const char* getPluginNamespace() const override { + const char* getPluginNamespace() const TRT_NOEXCEPT override { return plugin_namespace_.c_str(); } @@ -151,46 +158,51 @@ class SplitPluginDynamic : public DynamicPluginTensorRT { DeserializeValue(&serial_data, &serial_length, &with_fp16_); } - nvinfer1::IPluginV2DynamicExt* clone() const override { + nvinfer1::IPluginV2DynamicExt* clone() const TRT_NOEXCEPT override { return new SplitPluginDynamic(axis_, output_length_, with_fp16_); } - const char* getPluginType() const override { return "split_plugin"; } - int getNbOutputs() const override { return output_length_.size(); } - int initialize() override; + const char* getPluginType() const TRT_NOEXCEPT override { + return "split_plugin"; + } + int getNbOutputs() const TRT_NOEXCEPT override { + return output_length_.size(); + } + int initialize() TRT_NOEXCEPT override; - size_t getSerializationSize() const override; - void serialize(void* buffer) const override; + size_t getSerializationSize() const TRT_NOEXCEPT override; + void serialize(void* buffer) const TRT_NOEXCEPT override; nvinfer1::DimsExprs getOutputDimensions( int outputIndex, const nvinfer1::DimsExprs* inputs, int nbInputs, - nvinfer1::IExprBuilder& exprBuilder) override; + nvinfer1::IExprBuilder& exprBuilder) TRT_NOEXCEPT override; bool supportsFormatCombination(int pos, const nvinfer1::PluginTensorDesc* inOut, - int nbInputs, int nbOutputs) override; + int nbInputs, + int nbOutputs) TRT_NOEXCEPT override; void configurePlugin(const nvinfer1::DynamicPluginTensorDesc* in, int nbInputs, const nvinfer1::DynamicPluginTensorDesc* out, - int nbOutputs) override {} + int nbOutputs) TRT_NOEXCEPT override {} size_t getWorkspaceSize(const nvinfer1::PluginTensorDesc* inputs, int nbInputs, const nvinfer1::PluginTensorDesc* outputs, - int nbOutputs) const override { + int nbOutputs) const TRT_NOEXCEPT override { return 0; } int enqueue(const nvinfer1::PluginTensorDesc* inputDesc, const nvinfer1::PluginTensorDesc* outputDesc, const void* const* inputs, void* const* outputs, void* workspace, - cudaStream_t stream) override; - nvinfer1::DataType getOutputDataType(int index, - const nvinfer1::DataType* inputTypes, - int nbInputs) const override; + cudaStream_t stream) TRT_NOEXCEPT override; + nvinfer1::DataType getOutputDataType( + int index, const nvinfer1::DataType* inputTypes, + int nbInputs) const TRT_NOEXCEPT override; - void destroy() override { delete this; } + void destroy() TRT_NOEXCEPT override { delete this; } private: int axis_; @@ -200,31 +212,34 @@ class SplitPluginDynamic : public DynamicPluginTensorRT { class SplitPluginDynamicCreator : public nvinfer1::IPluginCreator { public: SplitPluginDynamicCreator() {} - const char* getPluginName() const override { return "split_plugin"; } + const char* getPluginName() const TRT_NOEXCEPT override { + return "split_plugin"; + } - const char* getPluginVersion() const override { return "1"; } + const char* getPluginVersion() const TRT_NOEXCEPT override { return "1"; } - const nvinfer1::PluginFieldCollection* getFieldNames() override { + const nvinfer1::PluginFieldCollection* getFieldNames() TRT_NOEXCEPT override { return &field_collection_; } - nvinfer1::IPluginV2* createPlugin( - const char* name, const nvinfer1::PluginFieldCollection* fc) override { + nvinfer1::IPluginV2* createPlugin(const char* name, + const nvinfer1::PluginFieldCollection* fc) + TRT_NOEXCEPT override { return nullptr; } - nvinfer1::IPluginV2* deserializePlugin(const char* name, - const void* serial_data, - size_t serial_length) override { + nvinfer1::IPluginV2* deserializePlugin( + const char* name, const void* serial_data, + size_t serial_length) TRT_NOEXCEPT override { auto plugin = new SplitPluginDynamic(serial_data, serial_length); return plugin; } - void setPluginNamespace(const char* lib_namespace) override { + void setPluginNamespace(const char* lib_namespace) TRT_NOEXCEPT override { plugin_namespace_ = lib_namespace; } - const char* getPluginNamespace() const override { + const char* getPluginNamespace() const TRT_NOEXCEPT override { return plugin_namespace_.c_str(); } diff --git a/paddle/fluid/inference/tensorrt/plugin/stack_op_plugin.cu b/paddle/fluid/inference/tensorrt/plugin/stack_op_plugin.cu index 21e80339b50062..c3b4a6ff4af1cb 100644 --- a/paddle/fluid/inference/tensorrt/plugin/stack_op_plugin.cu +++ b/paddle/fluid/inference/tensorrt/plugin/stack_op_plugin.cu @@ -37,17 +37,19 @@ StackPluginDynamic::StackPluginDynamic(void const* serial_data, StackPluginDynamic::~StackPluginDynamic() {} -nvinfer1::IPluginV2DynamicExt* StackPluginDynamic::clone() const { +nvinfer1::IPluginV2DynamicExt* StackPluginDynamic::clone() const TRT_NOEXCEPT { return new StackPluginDynamic(axis_, num_stack_, with_fp16_); } -const char* StackPluginDynamic::getPluginType() const { return "stack_plugin"; } +const char* StackPluginDynamic::getPluginType() const TRT_NOEXCEPT { + return "stack_plugin"; +} -int StackPluginDynamic::getNbOutputs() const { return 1; } +int StackPluginDynamic::getNbOutputs() const TRT_NOEXCEPT { return 1; } -int StackPluginDynamic::initialize() { return 0; } +int StackPluginDynamic::initialize() TRT_NOEXCEPT { return 0; } -size_t StackPluginDynamic::getSerializationSize() const { +size_t StackPluginDynamic::getSerializationSize() const TRT_NOEXCEPT { size_t serialize_size = 0; serialize_size += SerializedSize(axis_); serialize_size += SerializedSize(num_stack_); @@ -55,7 +57,7 @@ size_t StackPluginDynamic::getSerializationSize() const { return serialize_size; } -void StackPluginDynamic::serialize(void* buffer) const { +void StackPluginDynamic::serialize(void* buffer) const TRT_NOEXCEPT { SerializeValue(&buffer, axis_); SerializeValue(&buffer, num_stack_); SerializeValue(&buffer, with_fp16_); @@ -63,7 +65,7 @@ void StackPluginDynamic::serialize(void* buffer) const { nvinfer1::DimsExprs StackPluginDynamic::getOutputDimensions( int output_index, const nvinfer1::DimsExprs* inputs, int nb_inputs, - nvinfer1::IExprBuilder& expr_builder) { + nvinfer1::IExprBuilder& expr_builder) TRT_NOEXCEPT { nvinfer1::DimsExprs output(inputs[0]); output.nbDims = inputs[0].nbDims + 1; @@ -76,21 +78,22 @@ nvinfer1::DimsExprs StackPluginDynamic::getOutputDimensions( void StackPluginDynamic::configurePlugin( const nvinfer1::DynamicPluginTensorDesc* in, int nbInputs, - const nvinfer1::DynamicPluginTensorDesc* out, int nbOutputs) {} + const nvinfer1::DynamicPluginTensorDesc* out, int nbOutputs) TRT_NOEXCEPT {} size_t StackPluginDynamic::getWorkspaceSize( const nvinfer1::PluginTensorDesc* inputs, int nbInputs, - const nvinfer1::PluginTensorDesc* outputs, int nbOutputs) const { + const nvinfer1::PluginTensorDesc* outputs, + int nbOutputs) const TRT_NOEXCEPT { return num_stack_ * sizeof(uintptr_t); } -void StackPluginDynamic::destroy() { delete this; } +void StackPluginDynamic::destroy() TRT_NOEXCEPT { delete this; } -void StackPluginDynamic::terminate() {} +void StackPluginDynamic::terminate() TRT_NOEXCEPT {} bool StackPluginDynamic::supportsFormatCombination( int pos, const nvinfer1::PluginTensorDesc* in_out, int nb_inputs, - int nb_outputs) { + int nb_outputs) TRT_NOEXCEPT { PADDLE_ENFORCE_NOT_NULL( in_out, platform::errors::InvalidArgument( "The input of stack plugin should not be nullptr.")); @@ -118,7 +121,8 @@ bool StackPluginDynamic::supportsFormatCombination( } nvinfer1::DataType StackPluginDynamic::getOutputDataType( - int index, const nvinfer1::DataType* input_types, int nb_inputs) const { + int index, const nvinfer1::DataType* input_types, + int nb_inputs) const TRT_NOEXCEPT { PADDLE_ENFORCE_EQ(index, 0, platform::errors::InvalidArgument( "The index should be equal to 0")); return input_types[0]; @@ -139,7 +143,8 @@ __global__ void StackKernel(const T* const* input, T* output, int num_stack, int StackPluginDynamic::enqueue(const nvinfer1::PluginTensorDesc* input_desc, const nvinfer1::PluginTensorDesc* output_desc, const void* const* inputs, void* const* outputs, - void* workspace, cudaStream_t stream) { + void* workspace, + cudaStream_t stream) TRT_NOEXCEPT { auto input_dims = input_desc[0].dims; // (batch, seq, seq) auto out_dims = output_desc[0].dims; // (batch, num_head, seq, seq) auto out_num_dims = out_dims.nbDims; @@ -195,19 +200,21 @@ int StackPluginDynamic::enqueue(const nvinfer1::PluginTensorDesc* input_desc, StackPluginDynamicCreator::StackPluginDynamicCreator() {} -const char* StackPluginDynamicCreator::getPluginName() const { +const char* StackPluginDynamicCreator::getPluginName() const TRT_NOEXCEPT { return "stack_plugin"; } -const char* StackPluginDynamicCreator::getPluginVersion() const { return "1"; } +const char* StackPluginDynamicCreator::getPluginVersion() const TRT_NOEXCEPT { + return "1"; +} const nvinfer1::PluginFieldCollection* -StackPluginDynamicCreator::getFieldNames() { +StackPluginDynamicCreator::getFieldNames() TRT_NOEXCEPT { return &field_collection_; } nvinfer1::IPluginV2* StackPluginDynamicCreator::createPlugin( - const char* name, const nvinfer1::PluginFieldCollection* fc) { + const char* name, const nvinfer1::PluginFieldCollection* fc) TRT_NOEXCEPT { int axis = -1; int num_stack = -1; bool with_fp16 = false; @@ -230,16 +237,18 @@ nvinfer1::IPluginV2* StackPluginDynamicCreator::createPlugin( } nvinfer1::IPluginV2* StackPluginDynamicCreator::deserializePlugin( - const char* name, const void* serial_data, size_t serial_length) { + const char* name, const void* serial_data, + size_t serial_length) TRT_NOEXCEPT { auto plugin = new StackPluginDynamic(serial_data, serial_length); return plugin; } -void StackPluginDynamicCreator::setPluginNamespace(const char* lib_namespace) { +void StackPluginDynamicCreator::setPluginNamespace(const char* lib_namespace) + TRT_NOEXCEPT { plugin_namespace_ = lib_namespace; } -const char* StackPluginDynamicCreator::getPluginNamespace() const { +const char* StackPluginDynamicCreator::getPluginNamespace() const TRT_NOEXCEPT { return plugin_namespace_.c_str(); } diff --git a/paddle/fluid/inference/tensorrt/plugin/stack_op_plugin.h b/paddle/fluid/inference/tensorrt/plugin/stack_op_plugin.h index cd8adaf7549572..965c53e2698778 100644 --- a/paddle/fluid/inference/tensorrt/plugin/stack_op_plugin.h +++ b/paddle/fluid/inference/tensorrt/plugin/stack_op_plugin.h @@ -31,37 +31,36 @@ class StackPluginDynamic : public DynamicPluginTensorRT { explicit StackPluginDynamic(int axis, int num_stack, bool with_fp16); StackPluginDynamic(void const* serial_data, size_t serial_length); ~StackPluginDynamic(); - nvinfer1::IPluginV2DynamicExt* clone() const override; + nvinfer1::IPluginV2DynamicExt* clone() const TRT_NOEXCEPT override; nvinfer1::DimsExprs getOutputDimensions( int outputIndex, const nvinfer1::DimsExprs* inputs, int nbInputs, - nvinfer1::IExprBuilder& exprBuilder) override; + nvinfer1::IExprBuilder& exprBuilder) TRT_NOEXCEPT override; bool supportsFormatCombination(int pos, const nvinfer1::PluginTensorDesc* inOut, - int nbInputs, int nbOutputs) override; + int nbInputs, + int nbOutputs) TRT_NOEXCEPT override; void configurePlugin(const nvinfer1::DynamicPluginTensorDesc* in, int nbInputs, const nvinfer1::DynamicPluginTensorDesc* out, - int nbOutputs) override; + int nbOutputs) TRT_NOEXCEPT override; size_t getWorkspaceSize(const nvinfer1::PluginTensorDesc* inputs, int nbInputs, const nvinfer1::PluginTensorDesc* outputs, - int nbOutputs) const override; + int nbOutputs) const TRT_NOEXCEPT override; int enqueue(const nvinfer1::PluginTensorDesc* inputDesc, const nvinfer1::PluginTensorDesc* outputDesc, const void* const* inputs, void* const* outputs, void* workspace, - cudaStream_t stream) override; - - nvinfer1::DataType getOutputDataType(int index, - const nvinfer1::DataType* inputTypes, - int nbInputs) const override; - - const char* getPluginType() const override; - int getNbOutputs() const override; - int initialize() override; - void terminate() override; - size_t getSerializationSize() const override; - void serialize(void* buffer) const override; - void destroy() override; + cudaStream_t stream) TRT_NOEXCEPT override; + nvinfer1::DataType getOutputDataType( + int index, const nvinfer1::DataType* inputTypes, + int nbInputs) const TRT_NOEXCEPT override; + const char* getPluginType() const TRT_NOEXCEPT override; + int getNbOutputs() const TRT_NOEXCEPT override; + int initialize() TRT_NOEXCEPT override; + void terminate() TRT_NOEXCEPT override; + size_t getSerializationSize() const TRT_NOEXCEPT override; + void serialize(void* buffer) const TRT_NOEXCEPT override; + void destroy() TRT_NOEXCEPT override; private: int axis_; @@ -71,16 +70,17 @@ class StackPluginDynamic : public DynamicPluginTensorRT { class StackPluginDynamicCreator : public nvinfer1::IPluginCreator { public: StackPluginDynamicCreator(); - const char* getPluginName() const override; - const char* getPluginVersion() const override; - const nvinfer1::PluginFieldCollection* getFieldNames() override; - nvinfer1::IPluginV2* createPlugin( - const char* name, const nvinfer1::PluginFieldCollection* fc) override; - nvinfer1::IPluginV2* deserializePlugin(const char* name, - const void* serial_data, - size_t serial_length) override; - void setPluginNamespace(const char* lib_namespace) override; - const char* getPluginNamespace() const override; + const char* getPluginName() const TRT_NOEXCEPT override; + const char* getPluginVersion() const TRT_NOEXCEPT override; + const nvinfer1::PluginFieldCollection* getFieldNames() TRT_NOEXCEPT override; + nvinfer1::IPluginV2* createPlugin(const char* name, + const nvinfer1::PluginFieldCollection* fc) + TRT_NOEXCEPT override; + nvinfer1::IPluginV2* deserializePlugin( + const char* name, const void* serial_data, + size_t serial_length) TRT_NOEXCEPT override; + void setPluginNamespace(const char* lib_namespace) TRT_NOEXCEPT override; + const char* getPluginNamespace() const TRT_NOEXCEPT override; private: std::string plugin_namespace_; diff --git a/paddle/fluid/inference/tensorrt/plugin/swish_op_plugin.cu b/paddle/fluid/inference/tensorrt/plugin/swish_op_plugin.cu index da9d21acd5d63f..9720719fd0bca0 100644 --- a/paddle/fluid/inference/tensorrt/plugin/swish_op_plugin.cu +++ b/paddle/fluid/inference/tensorrt/plugin/swish_op_plugin.cu @@ -23,11 +23,11 @@ namespace inference { namespace tensorrt { namespace plugin { -int SwishPlugin::initialize() { return 0; } +int SwishPlugin::initialize() TRT_NOEXCEPT { return 0; } nvinfer1::Dims SwishPlugin::getOutputDimensions(int index, const nvinfer1::Dims *inputDims, - int nbInputs) { + int nbInputs) TRT_NOEXCEPT { assert(nbInputs == 1); assert(index < this->getNbOutputs()); nvinfer1::Dims const &input_dims = inputDims[0]; @@ -83,12 +83,12 @@ int SwishPlugin::enqueue(int batch_size, const void *const *inputs, void **outputs, void *workspace, cudaStream_t stream) { #else void *const *outputs, void *workspace, - cudaStream_t stream) { + cudaStream_t stream) TRT_NOEXCEPT { #endif // input dims is CHW. const auto &input_dims = this->getInputDims(0); const float *input = reinterpret_cast(inputs[0]); - float *output = reinterpret_cast(outputs)[0]; + float *output = reinterpret_cast(outputs)[0]; int num = batch_size; for (int i = 0; i < input_dims.nbDims; i++) { num *= input_dims.d[i]; @@ -103,29 +103,29 @@ int SwishPlugin::enqueue(int batch_size, const void *const *inputs, // Dynamic Plugin below. #if IS_TRT_VERSION_GE(6000) -int SwishPluginDynamic::initialize() { +int SwishPluginDynamic::initialize() TRT_NOEXCEPT { getPluginNamespace(); return 0; } -size_t SwishPluginDynamic::getSerializationSize() const { +size_t SwishPluginDynamic::getSerializationSize() const TRT_NOEXCEPT { return SerializedSize(beta_) + SerializedSize(with_fp16_); } -void SwishPluginDynamic::serialize(void *buffer) const { +void SwishPluginDynamic::serialize(void *buffer) const TRT_NOEXCEPT { SerializeValue(&buffer, beta_); SerializeValue(&buffer, with_fp16_); } nvinfer1::DimsExprs SwishPluginDynamic::getOutputDimensions( int output_index, const nvinfer1::DimsExprs *inputs, int nb_inputs, - nvinfer1::IExprBuilder &expr_builder) { + nvinfer1::IExprBuilder &expr_builder) TRT_NOEXCEPT { return inputs[0]; } bool SwishPluginDynamic::supportsFormatCombination( int pos, const nvinfer1::PluginTensorDesc *in_out, int nb_inputs, - int nb_outputs) { + int nb_outputs) TRT_NOEXCEPT { PADDLE_ENFORCE_NOT_NULL( in_out, platform::errors::InvalidArgument( "The input of swish plugin shoule not be nullptr.")); @@ -154,7 +154,8 @@ bool SwishPluginDynamic::supportsFormatCombination( } nvinfer1::DataType SwishPluginDynamic::getOutputDataType( - int index, const nvinfer1::DataType *input_types, int nb_inputs) const { + int index, const nvinfer1::DataType *input_types, + int nb_inputs) const TRT_NOEXCEPT { PADDLE_ENFORCE_EQ(index, 0, platform::errors::InvalidArgument( "The Swish Plugin only has one input, so the " "index value should be 0, but get %d.", @@ -165,7 +166,8 @@ nvinfer1::DataType SwishPluginDynamic::getOutputDataType( int SwishPluginDynamic::enqueue(const nvinfer1::PluginTensorDesc *input_desc, const nvinfer1::PluginTensorDesc *output_desc, const void *const *inputs, void *const *outputs, - void *workspace, cudaStream_t stream) { + void *workspace, + cudaStream_t stream) TRT_NOEXCEPT { auto input_dims = input_desc[0].dims; size_t num = ProductDim(input_dims); int threads = 1024; diff --git a/paddle/fluid/inference/tensorrt/plugin/swish_op_plugin.h b/paddle/fluid/inference/tensorrt/plugin/swish_op_plugin.h index 8940fdce3b0b56..c4bdc5f921509c 100644 --- a/paddle/fluid/inference/tensorrt/plugin/swish_op_plugin.h +++ b/paddle/fluid/inference/tensorrt/plugin/swish_op_plugin.h @@ -31,11 +31,11 @@ class SwishPlugin : public PluginTensorRT { float beta_; public: - size_t getSerializationSize() const override { + size_t getSerializationSize() const TRT_NOEXCEPT override { return getBaseSerializationSize() + SerializedSize(beta_); } - void serialize(void* buffer) const override { + void serialize(void* buffer) const TRT_NOEXCEPT override { serializeBase(buffer); SerializeValue(&buffer, beta_); } @@ -53,33 +53,37 @@ class SwishPlugin : public PluginTensorRT { ~SwishPlugin() {} - int initialize() override; + int initialize() TRT_NOEXCEPT override; - SwishPlugin* clone() const override { + SwishPlugin* clone() const TRT_NOEXCEPT override { return new SwishPlugin(beta_, with_fp16_); } - const char* getPluginType() const override { return "swish_plugin"; } - int getNbOutputs() const override { return 1; } + const char* getPluginType() const TRT_NOEXCEPT override { + return "swish_plugin"; + } + int getNbOutputs() const TRT_NOEXCEPT override { return 1; } nvinfer1::Dims getOutputDimensions(int index, const nvinfer1::Dims* inputs, - int nbInputDims) override; + int nbInputDims) TRT_NOEXCEPT override; #if IS_TRT_VERSION_LT(8000) int enqueue(int batchSize, const void* const* inputs, void** outputs, #else int enqueue(int batchSize, const void* const* inputs, void* const* outputs, #endif - void* workspace, cudaStream_t stream) override; + void* workspace, cudaStream_t stream) TRT_NOEXCEPT override; }; class SwishPluginCreator : public TensorRTPluginCreator { public: - const char* getPluginName() const override { return "swish_plugin"; } + const char* getPluginName() const TRT_NOEXCEPT override { + return "swish_plugin"; + } - const char* getPluginVersion() const override { return "1"; } + const char* getPluginVersion() const TRT_NOEXCEPT override { return "1"; } - nvinfer1::IPluginV2* deserializePlugin(const char* name, - const void* serial_data, - size_t serial_length) override { + nvinfer1::IPluginV2* deserializePlugin( + const char* name, const void* serial_data, + size_t serial_length) TRT_NOEXCEPT override { return new SwishPlugin(serial_data, serial_length); } }; @@ -96,46 +100,49 @@ class SwishPluginDynamic : public DynamicPluginTensorRT { DeserializeValue(&serialData, &serialLength, &beta_); DeserializeValue(&serialData, &serialLength, &with_fp16_); } - nvinfer1::IPluginV2DynamicExt* clone() const override { + nvinfer1::IPluginV2DynamicExt* clone() const TRT_NOEXCEPT override { return new SwishPluginDynamic(beta_, with_fp16_); } - const char* getPluginType() const override { return "swish_plugin_dynamic"; } - int getNbOutputs() const override { return 1; } - int initialize() override; + const char* getPluginType() const TRT_NOEXCEPT override { + return "swish_plugin_dynamic"; + } + int getNbOutputs() const TRT_NOEXCEPT override { return 1; } + int initialize() TRT_NOEXCEPT override; - size_t getSerializationSize() const override; - void serialize(void* buffer) const override; + size_t getSerializationSize() const TRT_NOEXCEPT override; + void serialize(void* buffer) const TRT_NOEXCEPT override; nvinfer1::DimsExprs getOutputDimensions( int output_index, const nvinfer1::DimsExprs* inputs, int nb_inputs, - nvinfer1::IExprBuilder& expr_builder) override; + nvinfer1::IExprBuilder& expr_builder) TRT_NOEXCEPT override; bool supportsFormatCombination(int pos, const nvinfer1::PluginTensorDesc* inOut, - int nbInputs, int nbOutputs) override; + int nbInputs, + int nbOutputs) TRT_NOEXCEPT override; void configurePlugin(const nvinfer1::DynamicPluginTensorDesc* in, int nbInputs, const nvinfer1::DynamicPluginTensorDesc* out, - int nbOutputs) override {} + int nbOutputs) TRT_NOEXCEPT override {} size_t getWorkspaceSize(const nvinfer1::PluginTensorDesc* inputs, int nbInputs, const nvinfer1::PluginTensorDesc* outputs, - int nbOutputs) const override { + int nbOutputs) const TRT_NOEXCEPT override { return 0; } int enqueue(const nvinfer1::PluginTensorDesc* inputDesc, const nvinfer1::PluginTensorDesc* outputDesc, const void* const* inputs, void* const* outputs, void* workspace, - cudaStream_t stream) override; - nvinfer1::DataType getOutputDataType(int index, - const nvinfer1::DataType* inputTypes, - int nbInputs) const override; + cudaStream_t stream) TRT_NOEXCEPT override; + nvinfer1::DataType getOutputDataType( + int index, const nvinfer1::DataType* inputTypes, + int nbInputs) const TRT_NOEXCEPT override; - void destroy() override { delete this; } + void destroy() TRT_NOEXCEPT override { delete this; } private: float beta_; @@ -143,13 +150,15 @@ class SwishPluginDynamic : public DynamicPluginTensorRT { class SwishPluginDynamicCreator : public TensorRTPluginCreator { public: - const char* getPluginName() const override { return "swish_plugin_dynamic"; } + const char* getPluginName() const TRT_NOEXCEPT override { + return "swish_plugin_dynamic"; + } - const char* getPluginVersion() const override { return "1"; } + const char* getPluginVersion() const TRT_NOEXCEPT override { return "1"; } - nvinfer1::IPluginV2* deserializePlugin(const char* name, - const void* serial_data, - size_t serial_length) override { + nvinfer1::IPluginV2* deserializePlugin( + const char* name, const void* serial_data, + size_t serial_length) TRT_NOEXCEPT override { return new SwishPluginDynamic(serial_data, serial_length); } }; diff --git a/paddle/fluid/inference/tensorrt/plugin/trt_plugin.cc b/paddle/fluid/inference/tensorrt/plugin/trt_plugin.cc index 5be0ed4a13b230..da5aa54ee4eb59 100644 --- a/paddle/fluid/inference/tensorrt/plugin/trt_plugin.cc +++ b/paddle/fluid/inference/tensorrt/plugin/trt_plugin.cc @@ -60,8 +60,8 @@ size_t PluginTensorRT::getBaseSerializationSize() const { return SeriaSize(input_dims_, data_type_, data_format_, with_fp16_); } -bool PluginTensorRT::supportsFormat(nvinfer1::DataType type, - nvinfer1::PluginFormat format) const { +bool PluginTensorRT::supportsFormat( + nvinfer1::DataType type, nvinfer1::PluginFormat format) const TRT_NOEXCEPT { return ((type == nvinfer1::DataType::kFLOAT) && (format == nvinfer1::PluginFormat::kLINEAR)); } @@ -69,7 +69,7 @@ bool PluginTensorRT::supportsFormat(nvinfer1::DataType type, void PluginTensorRT::configureWithFormat( const nvinfer1::Dims* input_dims, int num_inputs, const nvinfer1::Dims* output_dims, int num_outputs, nvinfer1::DataType type, - nvinfer1::PluginFormat format, int max_batch_size) { + nvinfer1::PluginFormat format, int max_batch_size) TRT_NOEXCEPT { data_type_ = type; data_format_ = format; input_dims_.assign(input_dims, input_dims + num_inputs); @@ -95,26 +95,28 @@ void PluginTensorRTV2Ext::configurePlugin( const nvinfer1::DataType* input_types, const nvinfer1::DataType* output_types, const bool* input_is_broadcast, const bool* output_is_broadcast, nvinfer1::PluginFormat float_format, - int32_t max_batch_size) { + int32_t max_batch_size) TRT_NOEXCEPT { input_dims_.assign(input_dims, input_dims + nb_inputs); data_format_ = float_format; data_type_ = input_types[0]; } -const nvinfer1::PluginFieldCollection* TensorRTPluginCreator::getFieldNames() { +const nvinfer1::PluginFieldCollection* TensorRTPluginCreator::getFieldNames() + TRT_NOEXCEPT { return &field_collection_; } nvinfer1::IPluginV2* TensorRTPluginCreator::createPlugin( - const char* name, const nvinfer1::PluginFieldCollection* fc) { + const char* name, const nvinfer1::PluginFieldCollection* fc) TRT_NOEXCEPT { return nullptr; } -void TensorRTPluginCreator::setPluginNamespace(const char* lib_namespace) { +void TensorRTPluginCreator::setPluginNamespace(const char* lib_namespace) + TRT_NOEXCEPT { plugin_namespace_ = lib_namespace; } -const char* TensorRTPluginCreator::getPluginNamespace() const { +const char* TensorRTPluginCreator::getPluginNamespace() const TRT_NOEXCEPT { return plugin_namespace_.c_str(); } diff --git a/paddle/fluid/inference/tensorrt/plugin/trt_plugin.h b/paddle/fluid/inference/tensorrt/plugin/trt_plugin.h index 599294392799dc..6b2925a068bbd2 100644 --- a/paddle/fluid/inference/tensorrt/plugin/trt_plugin.h +++ b/paddle/fluid/inference/tensorrt/plugin/trt_plugin.h @@ -64,35 +64,35 @@ class PluginTensorRT : public nvinfer1::IPluginV2 { nvinfer1::PluginFormat getDataFormat() const { return data_format_; } // IPluginV2 - virtual const char* getPluginType() const = 0; + virtual const char* getPluginType() const TRT_NOEXCEPT = 0; - virtual const char* getPluginVersion() const { return "1"; } + virtual const char* getPluginVersion() const TRT_NOEXCEPT { return "1"; } - int getNbOutputs() const { return 1; } + int getNbOutputs() const TRT_NOEXCEPT { return 1; } virtual nvinfer1::Dims getOutputDimensions(int index, const nvinfer1::Dims* input_dims, - int num_inputs) = 0; + int num_inputs) TRT_NOEXCEPT = 0; // Check format support. The default is FLOAT32 and kLINEAR. - bool supportsFormat(nvinfer1::DataType type, - nvinfer1::PluginFormat format) const override; + bool supportsFormat(nvinfer1::DataType type, nvinfer1::PluginFormat format) + const TRT_NOEXCEPT override; // Configure the layer void configureWithFormat(const nvinfer1::Dims* input_dims, int num_inputs, const nvinfer1::Dims* output_dims, int num_outputs, nvinfer1::DataType type, nvinfer1::PluginFormat format, - int max_batch_size) override; + int max_batch_size) TRT_NOEXCEPT override; // Initialize the layer for execution. - int initialize() override { return 0; } + int initialize() TRT_NOEXCEPT override { return 0; } // Shutdown the layer. This is called when the engine is destroyed - void terminate() override {} + void terminate() TRT_NOEXCEPT override {} // Find the workspace size required by the layer - size_t getWorkspaceSize(int) const override { return 0; } + size_t getWorkspaceSize(int) const TRT_NOEXCEPT override { return 0; } // Execute the layer #if IS_TRT_VERSION_LT(8000) @@ -101,25 +101,27 @@ class PluginTensorRT : public nvinfer1::IPluginV2 { virtual int enqueue(int batch_size, const void* const* inputs, void* const* outputs, #endif - void* workspace, cudaStream_t stream) = 0; + void* workspace, cudaStream_t stream) TRT_NOEXCEPT = 0; // Find the size of the serialization buffer required - virtual size_t getSerializationSize() const = 0; + virtual size_t getSerializationSize() const TRT_NOEXCEPT = 0; // Serialize the layer config to buffer. // TensorRT will call this func to serialize the configuration of TensorRT // engine. It should not be called by users. - virtual void serialize(void* buffer) const = 0; + virtual void serialize(void* buffer) const TRT_NOEXCEPT = 0; - void destroy() override { delete this; } + void destroy() TRT_NOEXCEPT override { delete this; } - virtual nvinfer1::IPluginV2* clone() const = 0; + virtual nvinfer1::IPluginV2* clone() const TRT_NOEXCEPT = 0; - void setPluginNamespace(const char* plugin_namespace) override { + void setPluginNamespace(const char* plugin_namespace) TRT_NOEXCEPT override { namespace_ = plugin_namespace; } - const char* getPluginNamespace() const override { return namespace_.c_str(); } + const char* getPluginNamespace() const TRT_NOEXCEPT override { + return namespace_.c_str(); + } protected: // Deserialize input_dims, max_batch_size, data_type, data_format @@ -155,15 +157,16 @@ class PluginTensorRTV2Ext : public nvinfer1::IPluginV2Ext { // The Func in IPluginV2Ext virtual nvinfer1::DataType getOutputDataType( int index, const nvinfer1::DataType* input_types, - int nb_inputs) const = 0; + int nb_inputs) const TRT_NOEXCEPT = 0; - virtual bool isOutputBroadcastAcrossBatch(int32_t output_index, - const bool* input_is_broadcasted, - int32_t nb_inputs) const { + virtual bool isOutputBroadcastAcrossBatch( + int32_t output_index, const bool* input_is_broadcasted, + int32_t nb_inputs) const TRT_NOEXCEPT { return false; } - virtual bool canBroadcastInputAcrossBatch(int32_t input_index) const { + virtual bool canBroadcastInputAcrossBatch(int32_t input_index) const + TRT_NOEXCEPT { return false; } @@ -174,37 +177,37 @@ class PluginTensorRTV2Ext : public nvinfer1::IPluginV2Ext { const bool* input_is_broadcast, const bool* output_is_broadcast, nvinfer1::PluginFormat float_format, - int32_t max_batch_size) override; + int32_t max_batch_size) TRT_NOEXCEPT override; - virtual IPluginV2Ext* clone() const = 0; + virtual IPluginV2Ext* clone() const TRT_NOEXCEPT = 0; void attachToContext(cudnnContext*, cublasContext*, - nvinfer1::IGpuAllocator*) override {} + nvinfer1::IGpuAllocator*) TRT_NOEXCEPT override {} - void detachFromContext() override {} + void detachFromContext() TRT_NOEXCEPT override {} // The Func in IPluginV2 - virtual const char* getPluginType() const = 0; - const char* getPluginVersion() const override { return "1"; } - virtual int32_t getNbOutputs() const { return 1; } + virtual const char* getPluginType() const TRT_NOEXCEPT = 0; + const char* getPluginVersion() const TRT_NOEXCEPT override { return "1"; } + virtual int32_t getNbOutputs() const TRT_NOEXCEPT { return 1; } virtual nvinfer1::Dims getOutputDimensions(int32_t index, const nvinfer1::Dims* inputs, - int32_t nb_input) = 0; + int32_t nb_input) TRT_NOEXCEPT = 0; // Check format support. The default is FLOAT32 and NCHW. - bool supportsFormat(nvinfer1::DataType type, - nvinfer1::PluginFormat format) const override { + bool supportsFormat(nvinfer1::DataType type, nvinfer1::PluginFormat format) + const TRT_NOEXCEPT override { return ((type == nvinfer1::DataType::kFLOAT) && (format == nvinfer1::PluginFormat::kLINEAR)); } // Initialize the layer for execution. // This is called when the engine is created. - int initialize() override { return 0; } + int initialize() TRT_NOEXCEPT override { return 0; } // Shutdown the layer. This is called when the engine is destroyed - void terminate() override {} + void terminate() TRT_NOEXCEPT override {} // Find the workspace size required by the layer - size_t getWorkspaceSize(int) const override { return 0; } + size_t getWorkspaceSize(int) const TRT_NOEXCEPT override { return 0; } // Execute the layer #if IS_TRT_VERSION_LT(8000) @@ -213,23 +216,23 @@ class PluginTensorRTV2Ext : public nvinfer1::IPluginV2Ext { virtual int enqueue(int batch_size, const void* const* inputs, void* const* outputs, #endif - void* workspace, cudaStream_t stream) = 0; + void* workspace, cudaStream_t stream) TRT_NOEXCEPT = 0; // Find the size of the serialization buffer required - virtual size_t getSerializationSize() const = 0; + virtual size_t getSerializationSize() const TRT_NOEXCEPT = 0; // Serialize the layer config to buffer. // TensorRT will call this func to serialize the configuration of TensorRT // engine. It should not be called by users. - virtual void serialize(void* buffer) const = 0; + virtual void serialize(void* buffer) const TRT_NOEXCEPT = 0; - virtual void destroy() = 0; + virtual void destroy() TRT_NOEXCEPT = 0; - void setPluginNamespace(const char* plugin_namespace) override { + void setPluginNamespace(const char* plugin_namespace) TRT_NOEXCEPT override { name_space_ = plugin_namespace; } - const char* getPluginNamespace() const override { + const char* getPluginNamespace() const TRT_NOEXCEPT override { return name_space_.c_str(); } @@ -256,52 +259,52 @@ class DynamicPluginTensorRT : public nvinfer1::IPluginV2DynamicExt { DynamicPluginTensorRT(const void* serialized_data, size_t length) {} // The Func in IPluginExt or IpluginExtV2 - virtual const char* getPluginVersion() const { return "1"; } - virtual const char* getPluginType() const = 0; - int getNbOutputs() const { return 1; } - int initialize() override { return 0; } - void terminate() override{}; + virtual const char* getPluginVersion() const TRT_NOEXCEPT { return "1"; } + virtual const char* getPluginType() const TRT_NOEXCEPT = 0; + int getNbOutputs() const TRT_NOEXCEPT { return 1; } + int initialize() TRT_NOEXCEPT override { return 0; } + void terminate() TRT_NOEXCEPT override{}; - virtual size_t getSerializationSize() const = 0; - virtual void serialize(void* buffer) const = 0; + virtual size_t getSerializationSize() const TRT_NOEXCEPT = 0; + virtual void serialize(void* buffer) const TRT_NOEXCEPT = 0; // The Func in IPluginV2 - nvinfer1::IPluginV2DynamicExt* clone() const = 0; + nvinfer1::IPluginV2DynamicExt* clone() const TRT_NOEXCEPT = 0; virtual nvinfer1::DimsExprs getOutputDimensions( int output_index, const nvinfer1::DimsExprs* inputs, int nb_inputs, - nvinfer1::IExprBuilder& expr_builder) = 0; // NOLINT + nvinfer1::IExprBuilder& expr_builder) TRT_NOEXCEPT = 0; // NOLINT virtual bool supportsFormatCombination( int pos, const nvinfer1::PluginTensorDesc* in_out, int nb_inputs, - int nb_outputs) = 0; + int nb_outputs) TRT_NOEXCEPT = 0; virtual void configurePlugin(const nvinfer1::DynamicPluginTensorDesc* in, int nb_inputs, const nvinfer1::DynamicPluginTensorDesc* out, - int nb_outputs) = 0; + int nb_outputs) TRT_NOEXCEPT = 0; size_t getWorkspaceSize(const nvinfer1::PluginTensorDesc* inputs, int nb_inputs, const nvinfer1::PluginTensorDesc* outputs, - int nb_outputs) const override { + int nb_outputs) const TRT_NOEXCEPT override { return 0; } virtual int enqueue(const nvinfer1::PluginTensorDesc* input_desc, const nvinfer1::PluginTensorDesc* output_desc, const void* const* inputs, void* const* outputs, - void* workspace, cudaStream_t stream) = 0; + void* workspace, cudaStream_t stream) TRT_NOEXCEPT = 0; virtual nvinfer1::DataType getOutputDataType( int index, const nvinfer1::DataType* input_types, - int nb_inputs) const = 0; - void setPluginNamespace(const char* plugin_namespace) override { + int nb_inputs) const TRT_NOEXCEPT = 0; + void setPluginNamespace(const char* plugin_namespace) TRT_NOEXCEPT override { name_space_ = plugin_namespace; } - const char* getPluginNamespace() const override { + const char* getPluginNamespace() const TRT_NOEXCEPT override { return name_space_.c_str(); } - virtual void destroy() = 0; + virtual void destroy() TRT_NOEXCEPT = 0; protected: void deserializeBase(void const*& serial_data, // NOLINT @@ -320,22 +323,23 @@ class TensorRTPluginCreator : public nvinfer1::IPluginCreator { public: TensorRTPluginCreator() = default; - virtual const char* getPluginName() const = 0; + virtual const char* getPluginName() const TRT_NOEXCEPT = 0; - virtual const char* getPluginVersion() const = 0; + virtual const char* getPluginVersion() const TRT_NOEXCEPT = 0; - const nvinfer1::PluginFieldCollection* getFieldNames() override; + const nvinfer1::PluginFieldCollection* getFieldNames() TRT_NOEXCEPT override; - nvinfer1::IPluginV2* createPlugin( - const char* name, const nvinfer1::PluginFieldCollection* fc) override; + nvinfer1::IPluginV2* createPlugin(const char* name, + const nvinfer1::PluginFieldCollection* fc) + TRT_NOEXCEPT override; - virtual nvinfer1::IPluginV2* deserializePlugin(const char* name, - const void* serial_data, - size_t serial_length) = 0; + virtual nvinfer1::IPluginV2* deserializePlugin( + const char* name, const void* serial_data, + size_t serial_length) TRT_NOEXCEPT = 0; - void setPluginNamespace(const char* lib_namespace) override; + void setPluginNamespace(const char* lib_namespace) TRT_NOEXCEPT override; - const char* getPluginNamespace() const override; + const char* getPluginNamespace() const TRT_NOEXCEPT override; private: std::string plugin_namespace_; diff --git a/paddle/fluid/inference/tensorrt/plugin/yolo_box_op_plugin.cu b/paddle/fluid/inference/tensorrt/plugin/yolo_box_op_plugin.cu index fe292dba4673f6..ee1709f57e2598 100644 --- a/paddle/fluid/inference/tensorrt/plugin/yolo_box_op_plugin.cu +++ b/paddle/fluid/inference/tensorrt/plugin/yolo_box_op_plugin.cu @@ -70,15 +70,16 @@ YoloBoxPlugin::~YoloBoxPlugin() { } } -const char* YoloBoxPlugin::getPluginType() const { return "yolo_box_plugin"; } +const char* YoloBoxPlugin::getPluginType() const TRT_NOEXCEPT { + return "yolo_box_plugin"; +} -const char* YoloBoxPlugin::getPluginVersion() const { return "1"; } +const char* YoloBoxPlugin::getPluginVersion() const TRT_NOEXCEPT { return "1"; } -int YoloBoxPlugin::getNbOutputs() const { return 2; } +int YoloBoxPlugin::getNbOutputs() const TRT_NOEXCEPT { return 2; } -nvinfer1::Dims YoloBoxPlugin::getOutputDimensions(int index, - const nvinfer1::Dims* inputs, - int nb_input_dims) { +nvinfer1::Dims YoloBoxPlugin::getOutputDimensions( + int index, const nvinfer1::Dims* inputs, int nb_input_dims) TRT_NOEXCEPT { const int anchor_num = anchors_.size() / 2; const int box_num = inputs[0].d[1] * inputs[0].d[2] * anchor_num; @@ -90,13 +91,15 @@ nvinfer1::Dims YoloBoxPlugin::getOutputDimensions(int index, return nvinfer1::Dims2(box_num, class_num_); } -bool YoloBoxPlugin::supportsFormat(nvinfer1::DataType type, - nvinfer1::TensorFormat format) const { +bool YoloBoxPlugin::supportsFormat( + nvinfer1::DataType type, nvinfer1::TensorFormat format) const TRT_NOEXCEPT { return ((type == data_type_ || type == nvinfer1::DataType::kINT32) && format == nvinfer1::TensorFormat::kLINEAR); } -size_t YoloBoxPlugin::getWorkspaceSize(int max_batch_size) const { return 0; } +size_t YoloBoxPlugin::getWorkspaceSize(int max_batch_size) const TRT_NOEXCEPT { + return 0; +} template __device__ inline T sigmoid(T x) { @@ -219,7 +222,7 @@ __global__ void KeYoloBoxFw(const T* const input, const int* const imgsize, template int YoloBoxPlugin::enqueue_impl(int batch_size, const void* const* inputs, - void** outputs, void* workspace, + void* const* outputs, void* workspace, cudaStream_t stream) { const int n = batch_size; const int h = input_h_; @@ -247,7 +250,7 @@ int YoloBoxPlugin::enqueue(int batch_size, const void* const* inputs, #else void* const* outputs, void* workspace, #endif - cudaStream_t stream) { + cudaStream_t stream) TRT_NOEXCEPT { if (data_type_ == nvinfer1::DataType::kFLOAT) { return enqueue_impl(batch_size, inputs, outputs, workspace, stream); } else if (data_type_ == nvinfer1::DataType::kHALF) { @@ -256,11 +259,11 @@ int YoloBoxPlugin::enqueue(int batch_size, const void* const* inputs, assert("unsupported type."); } -int YoloBoxPlugin::initialize() { return 0; } +int YoloBoxPlugin::initialize() TRT_NOEXCEPT { return 0; } -void YoloBoxPlugin::terminate() {} +void YoloBoxPlugin::terminate() TRT_NOEXCEPT {} -size_t YoloBoxPlugin::getSerializationSize() const { +size_t YoloBoxPlugin::getSerializationSize() const TRT_NOEXCEPT { size_t serialize_size = 0; serialize_size += SerializedSize(data_type_); serialize_size += SerializedSize(anchors_); @@ -274,7 +277,7 @@ size_t YoloBoxPlugin::getSerializationSize() const { return serialize_size; } -void YoloBoxPlugin::serialize(void* buffer) const { +void YoloBoxPlugin::serialize(void* buffer) const TRT_NOEXCEPT { SerializeValue(&buffer, data_type_); SerializeValue(&buffer, anchors_); SerializeValue(&buffer, class_num_); @@ -286,28 +289,30 @@ void YoloBoxPlugin::serialize(void* buffer) const { SerializeValue(&buffer, input_w_); } -void YoloBoxPlugin::destroy() {} +void YoloBoxPlugin::destroy() TRT_NOEXCEPT {} -void YoloBoxPlugin::setPluginNamespace(const char* lib_namespace) { +void YoloBoxPlugin::setPluginNamespace(const char* lib_namespace) TRT_NOEXCEPT { namespace_ = std::string(lib_namespace); } -const char* YoloBoxPlugin::getPluginNamespace() const { +const char* YoloBoxPlugin::getPluginNamespace() const TRT_NOEXCEPT { return namespace_.c_str(); } nvinfer1::DataType YoloBoxPlugin::getOutputDataType( - int index, const nvinfer1::DataType* input_type, int nb_inputs) const { + int index, const nvinfer1::DataType* input_type, + int nb_inputs) const TRT_NOEXCEPT { return input_type[0]; } -bool YoloBoxPlugin::isOutputBroadcastAcrossBatch(int output_index, - const bool* input_is_broadcast, - int nb_inputs) const { +bool YoloBoxPlugin::isOutputBroadcastAcrossBatch( + int output_index, const bool* input_is_broadcast, + int nb_inputs) const TRT_NOEXCEPT { return false; } -bool YoloBoxPlugin::canBroadcastInputAcrossBatch(int input_index) const { +bool YoloBoxPlugin::canBroadcastInputAcrossBatch(int input_index) const + TRT_NOEXCEPT { return false; } @@ -317,9 +322,9 @@ void YoloBoxPlugin::configurePlugin( const nvinfer1::DataType* input_types, const nvinfer1::DataType* output_types, const bool* input_is_broadcast, const bool* output_is_broadcast, nvinfer1::PluginFormat float_format, - int max_batct_size) {} + int max_batct_size) TRT_NOEXCEPT {} -nvinfer1::IPluginV2Ext* YoloBoxPlugin::clone() const { +nvinfer1::IPluginV2Ext* YoloBoxPlugin::clone() const TRT_NOEXCEPT { return new YoloBoxPlugin(data_type_, anchors_, class_num_, conf_thresh_, downsample_ratio_, clip_bbox_, scale_x_y_, input_h_, input_w_); @@ -327,26 +332,30 @@ nvinfer1::IPluginV2Ext* YoloBoxPlugin::clone() const { YoloBoxPluginCreator::YoloBoxPluginCreator() {} -void YoloBoxPluginCreator::setPluginNamespace(const char* lib_namespace) { +void YoloBoxPluginCreator::setPluginNamespace(const char* lib_namespace) + TRT_NOEXCEPT { namespace_ = std::string(lib_namespace); } -const char* YoloBoxPluginCreator::getPluginNamespace() const { +const char* YoloBoxPluginCreator::getPluginNamespace() const TRT_NOEXCEPT { return namespace_.c_str(); } -const char* YoloBoxPluginCreator::getPluginName() const { +const char* YoloBoxPluginCreator::getPluginName() const TRT_NOEXCEPT { return "yolo_box_plugin"; } -const char* YoloBoxPluginCreator::getPluginVersion() const { return "1"; } +const char* YoloBoxPluginCreator::getPluginVersion() const TRT_NOEXCEPT { + return "1"; +} -const nvinfer1::PluginFieldCollection* YoloBoxPluginCreator::getFieldNames() { +const nvinfer1::PluginFieldCollection* YoloBoxPluginCreator::getFieldNames() + TRT_NOEXCEPT { return &field_collection_; } nvinfer1::IPluginV2Ext* YoloBoxPluginCreator::createPlugin( - const char* name, const nvinfer1::PluginFieldCollection* fc) { + const char* name, const nvinfer1::PluginFieldCollection* fc) TRT_NOEXCEPT { const nvinfer1::PluginField* fields = fc->fields; int type_id = -1; @@ -392,7 +401,8 @@ nvinfer1::IPluginV2Ext* YoloBoxPluginCreator::createPlugin( } nvinfer1::IPluginV2Ext* YoloBoxPluginCreator::deserializePlugin( - const char* name, const void* serial_data, size_t serial_length) { + const char* name, const void* serial_data, + size_t serial_length) TRT_NOEXCEPT { auto plugin = new YoloBoxPlugin(serial_data, serial_length); plugin->setPluginNamespace(namespace_.c_str()); return plugin; diff --git a/paddle/fluid/inference/tensorrt/plugin/yolo_box_op_plugin.h b/paddle/fluid/inference/tensorrt/plugin/yolo_box_op_plugin.h index 4cd6a383336e23..c9e9f9a0567aee 100644 --- a/paddle/fluid/inference/tensorrt/plugin/yolo_box_op_plugin.h +++ b/paddle/fluid/inference/tensorrt/plugin/yolo_box_op_plugin.h @@ -35,38 +35,39 @@ class YoloBoxPlugin : public nvinfer1::IPluginV2Ext { YoloBoxPlugin(const void* data, size_t length); ~YoloBoxPlugin() override; - const char* getPluginType() const override; - const char* getPluginVersion() const override; - int getNbOutputs() const override; + const char* getPluginType() const TRT_NOEXCEPT override; + const char* getPluginVersion() const TRT_NOEXCEPT override; + int getNbOutputs() const TRT_NOEXCEPT override; nvinfer1::Dims getOutputDimensions(int index, const nvinfer1::Dims* inputs, - int nb_input_dims) override; - bool supportsFormat(nvinfer1::DataType type, - nvinfer1::TensorFormat format) const override; - size_t getWorkspaceSize(int max_batch_size) const override; + int nb_input_dims) TRT_NOEXCEPT override; + bool supportsFormat(nvinfer1::DataType type, nvinfer1::TensorFormat format) + const TRT_NOEXCEPT override; + size_t getWorkspaceSize(int max_batch_size) const TRT_NOEXCEPT override; #if IS_TRT_VERSION_LT(8000) int enqueue(int batch_size, const void* const* inputs, void** outputs, #else int enqueue(int batch_size, const void* const* inputs, void* const* outputs, #endif - void* workspace, cudaStream_t stream) override; + void* workspace, cudaStream_t stream) TRT_NOEXCEPT override; template - int enqueue_impl(int batch_size, const void* const* inputs, void** outputs, - void* workspace, cudaStream_t stream); - int initialize() override; - void terminate() override; - size_t getSerializationSize() const override; - void serialize(void* buffer) const override; - void destroy() override; - void setPluginNamespace(const char* lib_namespace) override; - const char* getPluginNamespace() const override; + int enqueue_impl(int batch_size, const void* const* inputs, + void* const* outputs, void* workspace, cudaStream_t stream); + int initialize() TRT_NOEXCEPT override; + void terminate() TRT_NOEXCEPT override; + size_t getSerializationSize() const TRT_NOEXCEPT override; + void serialize(void* buffer) const TRT_NOEXCEPT override; + void destroy() TRT_NOEXCEPT override; + void setPluginNamespace(const char* lib_namespace) TRT_NOEXCEPT override; + const char* getPluginNamespace() const TRT_NOEXCEPT override; - nvinfer1::DataType getOutputDataType(int index, - const nvinfer1::DataType* input_type, - int nb_inputs) const override; + nvinfer1::DataType getOutputDataType( + int index, const nvinfer1::DataType* input_type, + int nb_inputs) const TRT_NOEXCEPT override; bool isOutputBroadcastAcrossBatch(int output_index, const bool* input_is_broadcast, - int nb_inputs) const override; - bool canBroadcastInputAcrossBatch(int input_index) const override; + int nb_inputs) const TRT_NOEXCEPT override; + bool canBroadcastInputAcrossBatch(int input_index) const + TRT_NOEXCEPT override; void configurePlugin(const nvinfer1::Dims* input_dims, int nb_inputs, const nvinfer1::Dims* output_dims, int nb_outputs, const nvinfer1::DataType* input_types, @@ -74,8 +75,8 @@ class YoloBoxPlugin : public nvinfer1::IPluginV2Ext { const bool* input_is_broadcast, const bool* output_is_broadcast, nvinfer1::PluginFormat float_format, - int max_batct_size) override; - nvinfer1::IPluginV2Ext* clone() const override; + int max_batct_size) TRT_NOEXCEPT override; + nvinfer1::IPluginV2Ext* clone() const TRT_NOEXCEPT override; private: nvinfer1::DataType data_type_; @@ -96,17 +97,18 @@ class YoloBoxPluginCreator : public nvinfer1::IPluginCreator { YoloBoxPluginCreator(); ~YoloBoxPluginCreator() override = default; - void setPluginNamespace(const char* lib_namespace) override; - const char* getPluginNamespace() const override; - const char* getPluginName() const override; - const char* getPluginVersion() const override; - const nvinfer1::PluginFieldCollection* getFieldNames() override; + void setPluginNamespace(const char* lib_namespace) TRT_NOEXCEPT override; + const char* getPluginNamespace() const TRT_NOEXCEPT override; + const char* getPluginName() const TRT_NOEXCEPT override; + const char* getPluginVersion() const TRT_NOEXCEPT override; + const nvinfer1::PluginFieldCollection* getFieldNames() TRT_NOEXCEPT override; nvinfer1::IPluginV2Ext* createPlugin( - const char* name, const nvinfer1::PluginFieldCollection* fc) override; - nvinfer1::IPluginV2Ext* deserializePlugin(const char* name, - const void* serial_data, - size_t serial_length) override; + const char* name, + const nvinfer1::PluginFieldCollection* fc) TRT_NOEXCEPT override; + nvinfer1::IPluginV2Ext* deserializePlugin( + const char* name, const void* serial_data, + size_t serial_length) TRT_NOEXCEPT override; private: std::string namespace_; diff --git a/paddle/fluid/inference/tensorrt/test_tensorrt.cc b/paddle/fluid/inference/tensorrt/test_tensorrt.cc index 36a25e27d78f5b..2f5b75c1020041 100644 --- a/paddle/fluid/inference/tensorrt/test_tensorrt.cc +++ b/paddle/fluid/inference/tensorrt/test_tensorrt.cc @@ -16,13 +16,15 @@ limitations under the License. */ #include #include #include "NvInfer.h" +#include "paddle/fluid/inference/tensorrt/helper.h" #include "paddle/fluid/platform/dynload/tensorrt.h" namespace dy = paddle::platform::dynload; class Logger : public nvinfer1::ILogger { public: - void log(nvinfer1::ILogger::Severity severity, const char* msg) override { + void log(nvinfer1::ILogger::Severity severity, + const char* msg) TRT_NOEXCEPT override { switch (severity) { case Severity::kINFO: LOG(INFO) << msg; @@ -74,10 +76,11 @@ nvinfer1::IHostMemory* CreateNetwork() { Logger logger; // Create the engine. nvinfer1::IBuilder* builder = createInferBuilder(&logger); + auto config = builder->createBuilderConfig(); ScopedWeights weights(2.); ScopedWeights bias(3.); - nvinfer1::INetworkDefinition* network = builder->createNetwork(); + nvinfer1::INetworkDefinition* network = builder->createNetworkV2(0U); // Add the input auto input = network->addInput(kInputTensor, nvinfer1::DataType::kFLOAT, nvinfer1::Dims3{1, 1, 1}); @@ -91,8 +94,8 @@ nvinfer1::IHostMemory* CreateNetwork() { network->markOutput(*output); // Build the engine. builder->setMaxBatchSize(1); - builder->setMaxWorkspaceSize(1 << 10); - auto engine = builder->buildCudaEngine(*network); + config->setMaxWorkspaceSize(1 << 10); + auto engine = builder->buildEngineWithConfig(*network, *config); EXPECT_NE(engine, nullptr); // Serialize the engine to create a model, then close. nvinfer1::IHostMemory* model = engine->serialize(); diff --git a/paddle/fluid/inference/tensorrt/trt_int8_calibrator.cc b/paddle/fluid/inference/tensorrt/trt_int8_calibrator.cc index 48343fca01efad..86666950bc36e6 100644 --- a/paddle/fluid/inference/tensorrt/trt_int8_calibrator.cc +++ b/paddle/fluid/inference/tensorrt/trt_int8_calibrator.cc @@ -22,7 +22,7 @@ namespace inference { namespace tensorrt { // set the batch size before constructing the thread to execute engine -int TRTInt8Calibrator::getBatchSize() const { return batch_size_; } +int TRTInt8Calibrator::getBatchSize() const TRT_NOEXCEPT { return batch_size_; } TRTInt8Calibrator::TRTInt8Calibrator( const std::unordered_map& buffers, int batch_size, @@ -95,7 +95,7 @@ bool TRTInt8Calibrator::setBatch( } bool TRTInt8Calibrator::getBatch(void** bindings, const char** names, - int num_bindings) { + int num_bindings) TRT_NOEXCEPT { VLOG(4) << "get batch: " << engine_name_; std::unique_lock lk(mut_); // The consumer has just finished processing a data. @@ -131,14 +131,15 @@ void TRTInt8Calibrator::setDone() { cond_.notify_all(); } -const void* TRTInt8Calibrator::readCalibrationCache(size_t& length) { +const void* TRTInt8Calibrator::readCalibrationCache(size_t& length) + TRT_NOEXCEPT { if (calibration_table_.empty()) return nullptr; length = calibration_table_.size(); return calibration_table_.data(); } void TRTInt8Calibrator::writeCalibrationCache(const void* ptr, - std::size_t length) { + std::size_t length) TRT_NOEXCEPT { calibration_table_ = std::string((const char*)ptr, length); VLOG(4) << "Got calibration data for " << engine_name_ << " " << ptr << " length=" << length; diff --git a/paddle/fluid/inference/tensorrt/trt_int8_calibrator.h b/paddle/fluid/inference/tensorrt/trt_int8_calibrator.h index 15ae67fa10f697..c84cb45b7ecbad 100644 --- a/paddle/fluid/inference/tensorrt/trt_int8_calibrator.h +++ b/paddle/fluid/inference/tensorrt/trt_int8_calibrator.h @@ -43,17 +43,18 @@ struct TRTInt8Calibrator : public nvinfer1::IInt8EntropyCalibrator2 { explicit TRTInt8Calibrator(const std::string& calibration_data); ~TRTInt8Calibrator(); - int getBatchSize() const override; + int getBatchSize() const TRT_NOEXCEPT override; bool getBatch(void* bindings[], const char* names[], - int num_bindings) override; + int num_bindings) TRT_NOEXCEPT override; bool setBatch(const std::unordered_map& data); void setDone(); void waitAndSetDone(); - const void* readCalibrationCache(std::size_t& length) override; - void writeCalibrationCache(const void* ptr, std::size_t length) override; + const void* readCalibrationCache(std::size_t& length) TRT_NOEXCEPT override; + void writeCalibrationCache(const void* ptr, + std::size_t length) TRT_NOEXCEPT override; const std::string& getCalibrationTableAsString() { return calibration_table_; } diff --git a/paddle/fluid/operators/activation_op_npu.cc b/paddle/fluid/operators/activation_op_npu.cc index 241081bc0d4afd..bb520c270fa2cb 100644 --- a/paddle/fluid/operators/activation_op_npu.cc +++ b/paddle/fluid/operators/activation_op_npu.cc @@ -347,6 +347,56 @@ class SigmoidGradNPUKernel : public framework::OpKernel { } }; +template +class HardSigmoidNPUKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + auto* x = ctx.Input("X"); + auto* out = ctx.Output("Out"); + float slope = ctx.Attr("slope"); + float offset = ctx.Attr("offset"); + + out->mutable_data(ctx.GetPlace()); + + framework::NPUAttributeMap attr_input = {{"alpha", slope}, + {"beta", offset}}; + + auto stream = + ctx.template device_context() + .stream(); + + const auto& runner = NpuOpRunner("HardSigmoid", {*x}, {*out}, attr_input); + runner.Run(stream); + } +}; + +template +class HardSigmoidGradNPUKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + auto* dout = ctx.Input(framework::GradVarName("Out")); + auto* out = ctx.Input("Out"); + + auto* dx = ctx.Output(framework::GradVarName("X")); + + float slope = ctx.Attr("slope"); + float offset = ctx.Attr("offset"); + + dx->mutable_data(ctx.GetPlace()); + + framework::NPUAttributeMap attr_input = {{"alpha", slope}, + {"beta", offset}}; + + auto stream = + ctx.template device_context() + .stream(); + + const auto& runner_dx = + NpuOpRunner("HardSigmoidGrad", {*dout, *out}, {*dx}, attr_input); + runner_dx.Run(stream); + } +}; + } // namespace operators } // namespace paddle @@ -421,3 +471,15 @@ REGISTER_OP_NPU_KERNEL( ops::SigmoidGradNPUKernel, ops::SigmoidGradNPUKernel); + +REGISTER_OP_NPU_KERNEL( + hard_sigmoid, + ops::HardSigmoidNPUKernel, + ops::HardSigmoidNPUKernel); + +REGISTER_OP_NPU_KERNEL( + hard_sigmoid_grad, + ops::HardSigmoidGradNPUKernel, + ops::HardSigmoidGradNPUKernel); diff --git a/paddle/fluid/operators/cast_op.cu b/paddle/fluid/operators/cast_op.cu index 1ac110b3cafd6b..0beb2291060169 100644 --- a/paddle/fluid/operators/cast_op.cu +++ b/paddle/fluid/operators/cast_op.cu @@ -40,7 +40,8 @@ __global__ void VecCastCUDAKernel(const InT* in, const int64_t N, OutT* out) { int64_t idx = blockDim.x * blockIdx.x + threadIdx.x; using LoadT = AlignedVector; using StoreT = AlignedVector; - for (int i = idx * VecSize; i < N; i += blockDim.x * gridDim.x * VecSize) { + for (int64_t i = idx * VecSize; i < N; + i += blockDim.x * gridDim.x * VecSize) { InT in_vec[VecSize]; LoadT* in_value = reinterpret_cast(&in_vec); *in_value = *reinterpret_cast(&in[i]); diff --git a/paddle/fluid/operators/controlflow/logical_op.cc b/paddle/fluid/operators/controlflow/logical_op.cc index fb8cde70f5324f..285b17d4995dbc 100644 --- a/paddle/fluid/operators/controlflow/logical_op.cc +++ b/paddle/fluid/operators/controlflow/logical_op.cc @@ -1,11 +1,8 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -26,15 +23,16 @@ class BinaryLogicalOpProtoMaker : public framework::OpProtoAndCheckerMaker { void Make() override { OpComment comment; AddInput("X", string::Sprintf("Left hand operand of %s operator. Must be " - "a Variable of type bool.", + "a Variable of type being one of bool, int8, " + "int16, int32, int64, float32, float64.", comment.type)); AddInput("Y", string::Sprintf("Right hand operand of %s operator. Must be " - "a Variable of type bool.", + "a Variable of type being one of bool, int8, " + "int16, int32, int64, float32, float64.", comment.type)); AddOutput("Out", string::Sprintf("n-dim bool Variable")); AddComment(string::Sprintf(R"DOC(%s Operator - -It operates element-wise on X and Y, and returns the Out. X, Y and Out are N-dim boolean LoDTensor or Tensor. +It operates element-wise on X and Y, and returns the Out. X, Y and Out are N-dim LoDTensor or Tensor. Each element of Out is calculated by %s )DOC", comment.type, comment.equation)); @@ -46,13 +44,14 @@ class UnaryLogicalOpProtoMaker : public framework::OpProtoAndCheckerMaker { public: void Make() override { OpComment comment; - AddInput("X", string::Sprintf("Operand of %s operator. Must be " - "a LoDTensor or Tensor of type bool.", - comment.type)); + AddInput("X", + string::Sprintf("Operand of %s operator. Must be " + "a LoDTensor or Tensor of type being one of bool, " + "int8, int16, int32, int64, float32, float64.", + comment.type)); AddOutput("Out", string::Sprintf("n-dim bool LoDTensor or Tensor.")); AddComment(string::Sprintf(R"DOC(%s Operator - -It operates element-wise on X, and returns the Out. X and Out are N-dim boolean LoDTensor or Tensor. +It operates element-wise on X, and returns the Out. X and Out are N-dim LoDTensor or Tensor. Each element of Out is calculated by %s )DOC", comment.type, comment.equation)); diff --git a/paddle/fluid/operators/controlflow/logical_op.cu b/paddle/fluid/operators/controlflow/logical_op.cu index 6cbcd516e08264..301b4c4149fad3 100644 --- a/paddle/fluid/operators/controlflow/logical_op.cu +++ b/paddle/fluid/operators/controlflow/logical_op.cu @@ -1,11 +1,8 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -21,13 +18,13 @@ namespace plat = paddle::platform; namespace paddle { namespace operators { -#define LOGICAL_BINARY_FUNCTOR(func_name, op) \ - template \ - struct func_name { \ - using ELEMENT_TYPE = T; \ - HOSTDEVICE bool operator()(const T* args) const { \ - return args[0] op args[1]; \ - } \ +#define LOGICAL_BINARY_FUNCTOR(func_name, op) \ + template \ + struct func_name { \ + using ELEMENT_TYPE = T; \ + HOSTDEVICE bool operator()(const T* args) const { \ + return static_cast(args[0]) op static_cast(args[1]); \ + } \ }; LOGICAL_BINARY_FUNCTOR(CudaOrFunctor, ||) @@ -68,10 +65,16 @@ class BinaryLogicalOpKernel } // namespace operators } // namespace paddle -#define REGISTER_LOGICAL_CUDA_KERNEL(op_name, func) \ - REGISTER_OP_CUDA_KERNEL( \ - op_name, \ - ops::BinaryLogicalOpKernel>); +#define REGISTER_LOGICAL_CUDA_KERNEL(op_name, func) \ + REGISTER_OP_CUDA_KERNEL( \ + op_name, \ + ops::BinaryLogicalOpKernel>, \ + ops::BinaryLogicalOpKernel>, \ + ops::BinaryLogicalOpKernel>, \ + ops::BinaryLogicalOpKernel>, \ + ops::BinaryLogicalOpKernel>, \ + ops::BinaryLogicalOpKernel>, \ + ops::BinaryLogicalOpKernel>); REGISTER_LOGICAL_CUDA_KERNEL(logical_or, CudaOrFunctor) REGISTER_LOGICAL_CUDA_KERNEL(logical_and, CudaAndFunctor) diff --git a/paddle/fluid/operators/controlflow/logical_op.h b/paddle/fluid/operators/controlflow/logical_op.h index 2c39201a426a25..92fe0a10cb907c 100644 --- a/paddle/fluid/operators/controlflow/logical_op.h +++ b/paddle/fluid/operators/controlflow/logical_op.h @@ -1,11 +1,8 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -82,12 +79,36 @@ class UnaryLogicalOpKernel } // namespace operators } // namespace paddle -#define REGISTER_BINARY_LOGICAL_KERNEL(op_type, dev, functor) \ - REGISTER_OP_##dev##_KERNEL( \ - op_type, ::paddle::operators::BinaryLogicalOpKernel< \ - ::paddle::platform::dev##DeviceContext, functor>); +#define REGISTER_BINARY_LOGICAL_KERNEL(op_type, dev, functor) \ + REGISTER_OP_##dev##_KERNEL( \ + op_type, ::paddle::operators::BinaryLogicalOpKernel< \ + ::paddle::platform::dev##DeviceContext, functor>, \ + ::paddle::operators::BinaryLogicalOpKernel< \ + ::paddle::platform::dev##DeviceContext, functor>, \ + ::paddle::operators::BinaryLogicalOpKernel< \ + ::paddle::platform::dev##DeviceContext, functor>, \ + ::paddle::operators::BinaryLogicalOpKernel< \ + ::paddle::platform::dev##DeviceContext, functor>, \ + ::paddle::operators::BinaryLogicalOpKernel< \ + ::paddle::platform::dev##DeviceContext, functor>, \ + ::paddle::operators::BinaryLogicalOpKernel< \ + ::paddle::platform::dev##DeviceContext, functor>, \ + ::paddle::operators::BinaryLogicalOpKernel< \ + ::paddle::platform::dev##DeviceContext, functor>); -#define REGISTER_UNARY_LOGICAL_KERNEL(op_type, dev, functor) \ - REGISTER_OP_##dev##_KERNEL( \ - op_type, ::paddle::operators::UnaryLogicalOpKernel< \ - ::paddle::platform::dev##DeviceContext, functor>); +#define REGISTER_UNARY_LOGICAL_KERNEL(op_type, dev, functor) \ + REGISTER_OP_##dev##_KERNEL( \ + op_type, ::paddle::operators::UnaryLogicalOpKernel< \ + ::paddle::platform::dev##DeviceContext, functor>, \ + ::paddle::operators::UnaryLogicalOpKernel< \ + ::paddle::platform::dev##DeviceContext, functor>, \ + ::paddle::operators::UnaryLogicalOpKernel< \ + ::paddle::platform::dev##DeviceContext, functor>, \ + ::paddle::operators::UnaryLogicalOpKernel< \ + ::paddle::platform::dev##DeviceContext, functor>, \ + ::paddle::operators::UnaryLogicalOpKernel< \ + ::paddle::platform::dev##DeviceContext, functor>, \ + ::paddle::operators::UnaryLogicalOpKernel< \ + ::paddle::platform::dev##DeviceContext, functor>, \ + ::paddle::operators::UnaryLogicalOpKernel< \ + ::paddle::platform::dev##DeviceContext, functor>); diff --git a/paddle/fluid/operators/controlflow/logical_op_npu.cc b/paddle/fluid/operators/controlflow/logical_op_npu.cc index b452bee747232d..babdb2257ee3ca 100644 --- a/paddle/fluid/operators/controlflow/logical_op_npu.cc +++ b/paddle/fluid/operators/controlflow/logical_op_npu.cc @@ -1,11 +1,8 @@ /* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. - Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -82,11 +79,29 @@ class LogicalAndPUKernel : public framework::OpKernel { namespace ops = paddle::operators; namespace plat = paddle::platform; -REGISTER_OP_NPU_KERNEL(logical_not, - ops::LogicalNotNPUKernel); +REGISTER_OP_NPU_KERNEL( + logical_not, ops::LogicalNotNPUKernel, + ops::LogicalNotNPUKernel, + ops::LogicalNotNPUKernel, + ops::LogicalNotNPUKernel, + ops::LogicalNotNPUKernel, + ops::LogicalNotNPUKernel, + ops::LogicalNotNPUKernel); REGISTER_OP_NPU_KERNEL(logical_or, - ops::LogicalOrNPUKernel); + ops::LogicalOrNPUKernel, + ops::LogicalOrNPUKernel, + ops::LogicalOrNPUKernel, + ops::LogicalOrNPUKernel, + ops::LogicalOrNPUKernel, + ops::LogicalOrNPUKernel, + ops::LogicalOrNPUKernel); REGISTER_OP_NPU_KERNEL(logical_and, - ops::LogicalAndPUKernel); + ops::LogicalAndPUKernel, + ops::LogicalAndPUKernel, + ops::LogicalAndPUKernel, + ops::LogicalAndPUKernel, + ops::LogicalAndPUKernel, + ops::LogicalAndPUKernel, + ops::LogicalAndPUKernel); diff --git a/paddle/fluid/operators/controlflow/logical_op_xpu.h b/paddle/fluid/operators/controlflow/logical_op_xpu.h index 9d46ad8c0447ff..aef6ae27a31945 100644 --- a/paddle/fluid/operators/controlflow/logical_op_xpu.h +++ b/paddle/fluid/operators/controlflow/logical_op_xpu.h @@ -45,7 +45,7 @@ class BinaryLogicalOpXPUKernel : public framework::OpKernel { auto* x = context.Input("X"); auto* y = context.Input("Y"); auto* out = context.Output("Out"); - T* out_ptr = out->mutable_data(context.GetPlace()); + bool* out_ptr = out->mutable_data(context.GetPlace()); const T* x_ptr = x->data(); const T* y_ptr = y->data(); auto& dev_ctx = @@ -153,7 +153,7 @@ class UnaryLogicalOpXPUKernel : public framework::OpKernel { if (x->numel() == 0) { return; } - out->mutable_data(context.GetPlace()); + out->mutable_data(context.GetPlace()); auto& dev_ctx = context.template device_context(); int ret = xpu::logical_not(dev_ctx.x_context(), x->data(), diff --git a/paddle/fluid/operators/controlflow/logicaland_op_xpu.cc b/paddle/fluid/operators/controlflow/logicaland_op_xpu.cc index 08927e66f25064..6248b6e0b06378 100644 --- a/paddle/fluid/operators/controlflow/logicaland_op_xpu.cc +++ b/paddle/fluid/operators/controlflow/logicaland_op_xpu.cc @@ -17,5 +17,11 @@ limitations under the License. */ namespace ops = paddle::operators; REGISTER_OP_XPU_KERNEL( logical_and, - ops::BinaryLogicalOpXPUKernel); + ops::BinaryLogicalOpXPUKernel, + ops::BinaryLogicalOpXPUKernel, + ops::BinaryLogicalOpXPUKernel, + ops::BinaryLogicalOpXPUKernel, + ops::BinaryLogicalOpXPUKernel, + ops::BinaryLogicalOpXPUKernel, + ops::BinaryLogicalOpXPUKernel); #endif diff --git a/paddle/fluid/operators/controlflow/logicalnot_op_xpu.cc b/paddle/fluid/operators/controlflow/logicalnot_op_xpu.cc old mode 100755 new mode 100644 index a8cef52ace2c60..be857db8aa9669 --- a/paddle/fluid/operators/controlflow/logicalnot_op_xpu.cc +++ b/paddle/fluid/operators/controlflow/logicalnot_op_xpu.cc @@ -15,5 +15,11 @@ limitations under the License. */ #ifdef PADDLE_WITH_XPU #include "paddle/fluid/operators/controlflow/logical_op_xpu.h" namespace ops = paddle::operators; -REGISTER_OP_XPU_KERNEL(logicalnot, ops::UnaryLogicalOpXPUKernel); +REGISTER_OP_XPU_KERNEL(logicalnot, ops::UnaryLogicalOpXPUKernel, + ops::UnaryLogicalOpXPUKernel, + ops::UnaryLogicalOpXPUKernel, + ops::UnaryLogicalOpXPUKernel, + ops::UnaryLogicalOpXPUKernel, + ops::UnaryLogicalOpXPUKernel, + ops::UnaryLogicalOpXPUKernel); #endif diff --git a/paddle/fluid/operators/controlflow/logicalor_op_xpu.cc b/paddle/fluid/operators/controlflow/logicalor_op_xpu.cc index e99c2f1a181040..126596841a29f8 100644 --- a/paddle/fluid/operators/controlflow/logicalor_op_xpu.cc +++ b/paddle/fluid/operators/controlflow/logicalor_op_xpu.cc @@ -18,5 +18,11 @@ limitations under the License. */ namespace ops = paddle::operators; REGISTER_OP_XPU_KERNEL( logical_or, - ops::BinaryLogicalOpXPUKernel); + ops::BinaryLogicalOpXPUKernel, + ops::BinaryLogicalOpXPUKernel, + ops::BinaryLogicalOpXPUKernel, + ops::BinaryLogicalOpXPUKernel, + ops::BinaryLogicalOpXPUKernel, + ops::BinaryLogicalOpXPUKernel, + ops::BinaryLogicalOpXPUKernel); #endif diff --git a/paddle/fluid/operators/cumsum_op_npu.cc b/paddle/fluid/operators/cumsum_op_npu.cc new file mode 100644 index 00000000000000..e8cf1a46db3cca --- /dev/null +++ b/paddle/fluid/operators/cumsum_op_npu.cc @@ -0,0 +1,73 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the Licnse. */ + +#include "paddle/fluid/framework/tensor.h" +#include "paddle/fluid/operators/cum_op.h" +#include "paddle/fluid/operators/npu_op_runner.h" + +namespace paddle { +namespace operators { + +using Tensor = framework::Tensor; + +template +class CumSumNPUKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + auto* x = ctx.Input("X"); + auto* out = ctx.Output("Out"); + int axis = ctx.Attr("axis"); + bool exclusive = ctx.Attr("exclusive"); + bool reverse = ctx.Attr("reverse"); + + out->mutable_data(ctx.GetPlace()); + + framework::NPUAttributeMap attr_input = { + {"axis", axis}, {"exclusive", exclusive}, {"reverse", reverse}}; + + auto stream = + ctx.template device_context() + .stream(); + + bool flatten = ctx.Attr("flatten"); + if (flatten) { + PADDLE_ENFORCE_EQ( + axis, -1, + platform::errors::InvalidArgument( + "when flatten is true, attr axis must be default %d, but got %d", + -1, axis)); + + Tensor new_x(x->type()); + new_x.ShareDataWith(*x); + + new_x.Resize(framework::make_ddim({x->numel()})); + + const auto& runner = NpuOpRunner("CumsumD", {new_x}, {*out}, attr_input); + runner.Run(stream); + } else { + const auto& runner = NpuOpRunner("CumsumD", {*x}, {*out}, attr_input); + runner.Run(stream); + } + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +namespace plat = paddle::platform; +REGISTER_OP_NPU_KERNEL( + cumsum, ops::CumSumNPUKernel, + ops::CumSumNPUKernel, + ops::CumSumNPUKernel); diff --git a/paddle/fluid/operators/dequantize_abs_max_op.cc b/paddle/fluid/operators/dequantize_abs_max_op.cc index c8bca25b6b0f0e..aee468e05e1826 100644 --- a/paddle/fluid/operators/dequantize_abs_max_op.cc +++ b/paddle/fluid/operators/dequantize_abs_max_op.cc @@ -50,6 +50,7 @@ struct DequantizeFunctor { }; template struct DequantizeFunctor; +template struct DequantizeFunctor; class DequantizeMaxAbsOp : public framework::OperatorWithKernel { public: @@ -79,7 +80,7 @@ class DequantizeMaxAbsOpMaker : public framework::OpProtoAndCheckerMaker { public: void Make() override { AddInput("X", - "(int8 Tensor) The input with int8 type is the " + "(Int Tensor) The input with int8/16 type is the " "low precision tensor."); AddInput("Scale", "(float) The scale in quantization stage."); AddOutput("Out", @@ -108,4 +109,5 @@ REGISTER_OPERATOR( paddle::framework::EmptyGradOpMaker, paddle::framework::EmptyGradOpMaker); REGISTER_OP_CPU_KERNEL(dequantize_abs_max, - ops::DequantizeMaxAbsKernel); + ops::DequantizeMaxAbsKernel, + ops::DequantizeMaxAbsKernel); diff --git a/paddle/fluid/operators/dequantize_abs_max_op.cu b/paddle/fluid/operators/dequantize_abs_max_op.cu index 6554d4545ad312..e96835a1ea51cd 100644 --- a/paddle/fluid/operators/dequantize_abs_max_op.cu +++ b/paddle/fluid/operators/dequantize_abs_max_op.cu @@ -45,6 +45,7 @@ struct DequantizeFunctor { }; template struct DequantizeFunctor; +template struct DequantizeFunctor; } // namespace operators } // namespace paddle @@ -52,4 +53,5 @@ template struct DequantizeFunctor; namespace ops = paddle::operators; using CUDA = paddle::platform::CUDADeviceContext; REGISTER_OP_CUDA_KERNEL(dequantize_abs_max, - ops::DequantizeMaxAbsKernel); + ops::DequantizeMaxAbsKernel, + ops::DequantizeMaxAbsKernel); diff --git a/paddle/fluid/operators/gather_tree_op.cu b/paddle/fluid/operators/gather_tree_op.cu index c53f1e81cef54e..829682764a674d 100644 --- a/paddle/fluid/operators/gather_tree_op.cu +++ b/paddle/fluid/operators/gather_tree_op.cu @@ -50,6 +50,14 @@ class GatherTreeOpCUDAKernel : public framework::OpKernel { const auto *parents_data = parents->data(); auto *out_data = out->mutable_data(ctx.GetPlace()); + PADDLE_ENFORCE_NOT_NULL( + ids_data, platform::errors::InvalidArgument( + "Input(Ids) of gather_tree should not be null.")); + + PADDLE_ENFORCE_NOT_NULL( + parents_data, platform::errors::InvalidArgument( + "Input(Parents) of gather_tree should not be null.")); + auto &ids_dims = ids->dims(); int64_t max_length = ids_dims[0]; int64_t batch_size = ids_dims[1]; diff --git a/paddle/fluid/operators/gather_tree_op.h b/paddle/fluid/operators/gather_tree_op.h index 742a7ffcaae4c8..e035a30e7954fe 100644 --- a/paddle/fluid/operators/gather_tree_op.h +++ b/paddle/fluid/operators/gather_tree_op.h @@ -38,6 +38,14 @@ class GatherTreeOpKernel : public framework::OpKernel { auto batch_size = ids_dims[1]; auto beam_size = ids_dims[2]; + PADDLE_ENFORCE_NOT_NULL( + ids_data, platform::errors::InvalidArgument( + "Input(Ids) of gather_tree should not be null.")); + + PADDLE_ENFORCE_NOT_NULL( + parents_data, platform::errors::InvalidArgument( + "Input(Parents) of gather_tree should not be null.")); + for (int batch = 0; batch < batch_size; batch++) { for (int beam = 0; beam < beam_size; beam++) { auto idx = (max_length - 1) * batch_size * beam_size + diff --git a/paddle/fluid/operators/index_sample_op_npu.cc b/paddle/fluid/operators/index_sample_op_npu.cc new file mode 100644 index 00000000000000..f5a4100c635856 --- /dev/null +++ b/paddle/fluid/operators/index_sample_op_npu.cc @@ -0,0 +1,130 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/operators/index_sample_op.h" + +#include "paddle/fluid/operators/npu_op_runner.h" + +namespace paddle { +namespace operators { +using Tensor = framework::Tensor; + +template +class IndexSampleNPUKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + auto& dev_ctx = + ctx.template device_context(); + auto* input = ctx.Input("X"); + auto* index = ctx.Input("Index"); + auto* out = ctx.Output("Out"); + out->mutable_data(ctx.GetPlace()); + + Tensor transformed_index; + const auto& index_type = index->type(); + bool index_type_match = index_type == framework::proto::VarType::INT32 || + index_type == framework::proto::VarType::INT64; + PADDLE_ENFORCE_EQ(index_type_match, true, + platform::errors::InvalidArgument( + "Input(Index) holds the wrong type, it holds %s, but " + "desires to be %s or %s", + paddle::framework::DataTypeToString(index_type), + paddle::framework::DataTypeToString( + framework::proto::VarType::INT32), + paddle::framework::DataTypeToString( + framework::proto::VarType::INT64))); + if (index_type == framework::proto::VarType::INT32) { + transformed_index.mutable_data(index->dims(), + dev_ctx.GetPlace()); + const auto& cast_runner = NpuOpRunner( + "Cast", {*index}, {transformed_index}, {{"dst_type", ACL_INT64}}); + cast_runner.Run(dev_ctx.stream()); + } else { + transformed_index.ShareDataWith(*index); + } + + const auto& runner = NpuOpRunner( + "GatherElements", {*input, transformed_index}, {*out}, {{"dim", 1}}); + runner.Run(dev_ctx.stream()); + } +}; + +template +void IndexSampleGradScatter(const paddle::platform::NPUDeviceContext& dev_ctx, + const Tensor* index, const Tensor* out_grad, + Tensor* x_grad) { + auto index_dims = index->dims(); + auto input_dims = x_grad->dims(); + auto batch_size = input_dims[0]; + auto index_length = index_dims[1]; + + std::vector scatter_index_vec; + std::vector index_vec; + framework::TensorToVector(*index, dev_ctx, &index_vec); + for (auto i = 0; i < batch_size; ++i) { + for (auto j = 0; j < index_length; j++) { + scatter_index_vec.push_back(i); + scatter_index_vec.push_back(index_vec[i * index_length + j]); + } + } + Tensor scatter_index; + framework::TensorFromVector(scatter_index_vec, dev_ctx, &scatter_index); + scatter_index.Resize({batch_size, index_length, 2}); + + NpuOpRunner runner; + runner.SetType("ScatterNd") + .AddInput(scatter_index) + .AddInput(*out_grad) + .AddInput(framework::vectorize(x_grad->dims())) + .AddOutput(*x_grad); + runner.Run(dev_ctx.stream()); +} + +template +class IndexSampleGradNPUKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + auto& dev_ctx = + ctx.template device_context(); + auto* index = ctx.Input("Index"); + auto* out_grad = + ctx.Input(framework::GradVarName("Out")); + auto* x_grad = + ctx.Output(framework::GradVarName("X")); + x_grad->mutable_data(ctx.GetPlace()); + + const auto& index_type = index->type(); + if (index_type == framework::proto::VarType::INT32) { + IndexSampleGradScatter(dev_ctx, index, out_grad, x_grad); + } else { + IndexSampleGradScatter(dev_ctx, index, out_grad, x_grad); + } + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +namespace plat = paddle::platform; + +REGISTER_OP_NPU_KERNEL(index_sample, ops::IndexSampleNPUKernel, + ops::IndexSampleNPUKernel, + ops::IndexSampleNPUKernel, + ops::IndexSampleNPUKernel); +REGISTER_OP_NPU_KERNEL(index_sample_grad, + ops::IndexSampleGradNPUKernel, + ops::IndexSampleGradNPUKernel, + ops::IndexSampleGradNPUKernel, + ops::IndexSampleGradNPUKernel); diff --git a/paddle/fluid/operators/index_select_op.h b/paddle/fluid/operators/index_select_op.h index 70714b7f3a0644..04775107033adc 100644 --- a/paddle/fluid/operators/index_select_op.h +++ b/paddle/fluid/operators/index_select_op.h @@ -15,6 +15,10 @@ #pragma once #include #include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/operators/jit/macro.h" +#include "paddle/fluid/operators/math/blas.h" +#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/fluid/platform/cpu_info.h" namespace paddle { namespace operators { @@ -38,7 +42,6 @@ void IndexSelectInner(const framework::ExecutionContext& context, auto input_width = slice_size * input_dim[dim]; auto output_width = slice_size * output_dim[dim]; - auto outer_nums = 1; for (auto i = 0; i < dim; i++) { outer_nums *= input_dim[i]; @@ -77,7 +80,6 @@ void IndexSelectInner(const framework::ExecutionContext& context, for (auto i = 0; i < outer_nums; i++) { auto input_start_offset = i * input_width; auto output_start_offset = i * output_width; - for (auto j = 0; j < index_size; j++) { IndexT index_value = index_vec[j]; for (auto k = 0; k < slice_size; k++) { @@ -98,7 +100,6 @@ class IndexSelectKernel : public framework::OpKernel { auto* inputs_var = context.InputVar("X"); auto* index_var = context.InputVar("Index"); auto* output_var = context.OutputVar("Out"); - auto& inputs = inputs_var->Get(); auto& index = index_var->Get(); auto* output = output_var->GetMutable(); @@ -107,8 +108,8 @@ class IndexSelectKernel : public framework::OpKernel { if (dim < 0) { dim += inputs.dims().size(); } - const auto& index_type = index.type(); + bool index_type_match = index_type == framework::proto::VarType::INT32 || index_type == framework::proto::VarType::INT64; PADDLE_ENFORCE_EQ(index_type_match, true, @@ -129,19 +130,41 @@ class IndexSelectKernel : public framework::OpKernel { } }; -template +template +struct IndexSelectAdd { + void operator()(const framework::ExecutionContext& ctx, int slice_size, + const T* src_pointer, const T* p_pointer, T* dist_pointer) { + for (int i = 0; i < slice_size; i++) { + dist_pointer[i] = src_pointer[i] + p_pointer[i]; + } + } +}; +template +struct IndexSelectAdd< + DeviceContext, T, + typename std::enable_if::value>::type> { + void operator()(const framework::ExecutionContext& ctx, int slice_size, + const T* src_pointer, const T* p_pointer, T* dist_pointer) { + auto blas = math::GetBlas(ctx); + blas.VADD(slice_size, src_pointer, p_pointer, dist_pointer); + } +}; + +template void IndexSelectGradInner(const framework::ExecutionContext& context, - const LoDTensor& out_grad, const LoDTensor& index, + const LoDTensor* out_grad, const LoDTensor* index, LoDTensor* x_grad, int dim) { - std::vector input_vec; - std::vector index_vec; - TensorToVector(out_grad, context.device_context(), &input_vec); - TensorToVector(index, context.device_context(), &index_vec); - - auto input_dim = out_grad.dims(); + const T* input_data = out_grad->data(); + const IndexT* index_data = index->data(); + const T* p_output = x_grad->mutable_data(context.GetPlace()); + T* out_data = x_grad->mutable_data(context.GetPlace()); + auto input_dim = out_grad->dims(); auto input_dim_size = input_dim.size(); auto output_dim = x_grad->dims(); - std::vector out_vec(x_grad->numel(), 0); + + auto& dev_ctx = context.template device_context(); + math::SetConstant set_constant; + set_constant(dev_ctx, x_grad, static_cast(0.0)); auto slice_size = 1; for (auto i = dim + 1; i < input_dim_size; i++) { @@ -156,7 +179,7 @@ void IndexSelectGradInner(const framework::ExecutionContext& context, outer_nums *= input_dim[i]; } - auto index_size = index.dims()[0]; + auto index_size = index->dims()[0]; VLOG(3) << "Index_Select_Grad_Debug; outer_nums: " << outer_nums << "; slice_size: " << slice_size << "; input_width: " << input_width << "; output_width: " << output_width @@ -167,15 +190,14 @@ void IndexSelectGradInner(const framework::ExecutionContext& context, auto output_start_offset = i * output_width; for (auto j = 0; j < index_size; j++) { - IndexT index_value = index_vec[j]; - for (auto k = 0; k < slice_size; k++) { - out_vec[output_start_offset + index_value * slice_size + k] += - input_vec[input_start_offset + j * slice_size + k]; - } + IndexT index_value = index_data[j]; + auto src = input_data + input_start_offset + j * slice_size; + auto p_out = p_output + output_start_offset + index_value * slice_size; + auto dst = out_data + output_start_offset + index_value * slice_size; + IndexSelectAdd index_select_add; + index_select_add(context, slice_size, src, p_out, dst); } } - x_grad->mutable_data(context.GetPlace()); - framework::TensorFromVector(out_vec, context.device_context(), x_grad); x_grad->Resize(output_dim); } @@ -183,19 +205,18 @@ template class IndexSelectGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - auto* index_var = context.InputVar("Index"); - auto* x_grad_var = context.OutputVar(framework::GradVarName("X")); - auto* out_grad_var = context.InputVar(framework::GradVarName("Out")); + auto* x_grad = + context.Output(framework::GradVarName("X")); + auto* index = context.Input("Index"); + auto* out_grad = + context.Input(framework::GradVarName("Out")); - auto& index = index_var->Get(); - auto& out_grad = out_grad_var->Get(); - auto* x_grad = x_grad_var->GetMutable(); int dim = context.Attr("dim"); if (dim < 0) { - dim += out_grad.dims().size(); + dim += out_grad->dims().size(); } + const auto& index_type = index->type(); - const auto& index_type = index.type(); bool index_type_match = index_type == framework::proto::VarType::INT32 || index_type == framework::proto::VarType::INT64; PADDLE_ENFORCE_EQ(index_type_match, true, @@ -209,9 +230,11 @@ class IndexSelectGradKernel : public framework::OpKernel { framework::proto::VarType::INT64))); if (index_type == framework::proto::VarType::INT32) { - IndexSelectGradInner(context, out_grad, index, x_grad, dim); + IndexSelectGradInner(context, out_grad, index, + x_grad, dim); } else if (index_type == framework::proto::VarType::INT64) { - IndexSelectGradInner(context, out_grad, index, x_grad, dim); + IndexSelectGradInner(context, out_grad, index, + x_grad, dim); } } }; diff --git a/paddle/fluid/operators/lookup_table_op.cc b/paddle/fluid/operators/lookup_table_op.cc index 9a0ce3900acf1c..2f3217e628dd0e 100644 --- a/paddle/fluid/operators/lookup_table_op.cc +++ b/paddle/fluid/operators/lookup_table_op.cc @@ -229,6 +229,7 @@ REGISTER_OPERATOR(lookup_table_grad, ops::LookupTableOpGrad, REGISTER_OP_CPU_KERNEL(lookup_table, ops::LookupTableKernel, ops::LookupTableKernel, ops::LookupTableKernel, + ops::LookupTableKernel, ops::LookupTableKernel); REGISTER_OP_CPU_KERNEL(lookup_table_grad, ops::LookupTableGradKernel, ops::LookupTableGradKernel, diff --git a/paddle/fluid/operators/lookup_table_op.cu b/paddle/fluid/operators/lookup_table_op.cu index 6985b916757173..3edea025b2a044 100644 --- a/paddle/fluid/operators/lookup_table_op.cu +++ b/paddle/fluid/operators/lookup_table_op.cu @@ -227,7 +227,8 @@ namespace plat = paddle::platform; REGISTER_OP_CUDA_KERNEL(lookup_table, ops::LookupTableCUDAKernel, ops::LookupTableCUDAKernel, ops::LookupTableCUDAKernel, - ops::LookupTableCUDAKernel); + ops::LookupTableCUDAKernel, + ops::LookupTableCUDAKernel); REGISTER_OP_CUDA_KERNEL(lookup_table_grad, ops::LookupTableGradCUDAKernel, ops::LookupTableGradCUDAKernel, diff --git a/paddle/fluid/operators/lookup_table_op.h b/paddle/fluid/operators/lookup_table_op.h index e385d72d1f43fd..74e26626bd5285 100644 --- a/paddle/fluid/operators/lookup_table_op.h +++ b/paddle/fluid/operators/lookup_table_op.h @@ -103,6 +103,7 @@ class LookupTableKernel : public framework::OpKernel { if (id_index != -1) { if (input_data_type == framework::proto::VarType::INT8 || + input_data_type == framework::proto::VarType::INT16 || input_data_type == framework::proto::VarType::BF16) { memcpy(output + i * row_width, table + id_index * row_width, row_width * sizeof(T)); @@ -130,6 +131,7 @@ class LookupTableKernel : public framework::OpKernel { id_index)); if (input_data_type == framework::proto::VarType::INT8 || + input_data_type == framework::proto::VarType::INT16 || input_data_type == framework::proto::VarType::BF16) { memcpy(output + i * row_width, table + id_index * row_width, row_width * sizeof(T)); diff --git a/paddle/fluid/operators/math/blas_impl.h b/paddle/fluid/operators/math/blas_impl.h index eab513e24bc809..55151c5483a38b 100644 --- a/paddle/fluid/operators/math/blas_impl.h +++ b/paddle/fluid/operators/math/blas_impl.h @@ -54,6 +54,15 @@ struct CBlas { } }; +template <> +struct CBlas { + template + static void VCOPY(ARGS... args) { + PADDLE_THROW(platform::errors::Unimplemented( + "Blas VCOPY do not supported on CPU, please check your code")); + } +}; + template <> struct CBlas { template diff --git a/paddle/fluid/operators/math/concat_and_split.cc b/paddle/fluid/operators/math/concat_and_split.cc index 7df78b321de996..6c1ee863737011 100644 --- a/paddle/fluid/operators/math/concat_and_split.cc +++ b/paddle/fluid/operators/math/concat_and_split.cc @@ -40,18 +40,18 @@ class ConcatFunctor { const std::vector& input, int axis, framework::Tensor* output) { // TODO(zcd): Add input data validity checking - int num = input.size(); + size_t num = input.size(); - int rows = 1; + int64_t rows = 1; auto dim_0 = input[0].dims(); for (int i = 0; i < axis; ++i) { rows *= dim_0[i]; } - int out_rows = rows, out_cols = 0; + int64_t out_rows = rows, out_cols = 0; std::vector input_cols(input.size()); - for (int i = 0; i < num; ++i) { - int t_cols = input[i].numel() / rows; + for (size_t i = 0; i < num; ++i) { + int64_t t_cols = input[i].numel() / rows; out_cols += t_cols; input_cols[i] = t_cols; } @@ -59,11 +59,11 @@ class ConcatFunctor { // computation auto output_data = output->data(); - int col_idx = 0; - for (int j = 0; j < num; ++j) { - int col_len = input_cols[j]; + int64_t col_idx = 0; + for (size_t j = 0; j < num; ++j) { + int64_t col_len = input_cols[j]; auto input_data = input[j].data(); - for (int k = 0; k < out_rows; ++k) { + for (int64_t k = 0; k < out_rows; ++k) { memory::Copy(cpu_place, output_data + k * out_cols + col_idx, cpu_place, input_data + k * col_len, sizeof(T) * col_len); } diff --git a/paddle/fluid/operators/math/concat_and_split.cu b/paddle/fluid/operators/math/concat_and_split.cu index 58f936788a363e..f9cce061383939 100644 --- a/paddle/fluid/operators/math/concat_and_split.cu +++ b/paddle/fluid/operators/math/concat_and_split.cu @@ -26,9 +26,9 @@ namespace operators { namespace math { template -__global__ void ConcatKernel(const T** inputs, const int* input_cols, - int col_size, const int output_rows, - const int output_cols, T* output) { +__global__ void ConcatKernel(const T** inputs, const int64_t* input_cols, + int col_size, const int64_t output_rows, + const int64_t output_cols, T* output) { int tid_x = blockIdx.x * blockDim.x + threadIdx.x; int curr_segment = 0; int curr_offset = input_cols[0]; @@ -70,8 +70,8 @@ __device__ void ConcatKernelDetail(const T** inputs_data, template __global__ void ConcatKernel(const T* input_addr0, const T* input_addr1, - const int fixed_in_col, const int out_rows, - const int out_cols, T* output_data) { + const int64_t fixed_in_col, const int64_t out_rows, + const int64_t out_cols, T* output_data) { const T* inputs_data[2]; inputs_data[0] = input_addr0; inputs_data[1] = input_addr1; @@ -81,8 +81,8 @@ __global__ void ConcatKernel(const T* input_addr0, const T* input_addr1, template __global__ void ConcatKernel(const T* input_addr0, const T* input_addr1, - const T* input_addr2, const int fixed_in_col, - const int out_rows, const int out_cols, + const T* input_addr2, const int64_t fixed_in_col, + const int64_t out_rows, const int64_t out_cols, T* output_data) { const T* inputs_data[3]; inputs_data[0] = input_addr0; @@ -95,8 +95,8 @@ __global__ void ConcatKernel(const T* input_addr0, const T* input_addr1, template __global__ void ConcatKernel(const T* input_addr0, const T* input_addr1, const T* input_addr2, const T* input_addr3, - const int fixed_in_col, const int out_rows, - const int out_cols, T* output_data) { + const int64_t fixed_in_col, const int64_t out_rows, + const int64_t out_cols, T* output_data) { const T* inputs_data[4]; inputs_data[0] = input_addr0; inputs_data[1] = input_addr1; @@ -108,8 +108,8 @@ __global__ void ConcatKernel(const T* input_addr0, const T* input_addr1, template __global__ void ConcatKernel(const T** inputs_data, const int in_num, - const int fixed_in_col, const int out_rows, - const int out_cols, T* output_data) { + const int64_t fixed_in_col, const int64_t out_rows, + const int64_t out_cols, T* output_data) { ConcatKernelDetail(inputs_data, fixed_in_col, out_rows, out_cols, output_data); } @@ -235,19 +235,19 @@ class ConcatFunctor { framework::Tensor* output) { // TODO(zcd): Add input data validity checking int in_num = input.size(); - int in_row = 1; + int64_t in_row = 1; auto dim_0 = input[0].dims(); for (int i = 0; i < axis; ++i) { in_row *= dim_0[i]; } - int in_col = input[0].numel() / in_row; - int out_row = in_row, out_col = 0; + int64_t in_col = input[0].numel() / in_row; + int64_t out_row = in_row, out_col = 0; int inputs_col_num = in_num + 1; std::vector inputs_data_vec(in_num); - std::vector inputs_col_vec(inputs_col_num); + std::vector inputs_col_vec(inputs_col_num); const T** inputs_data = inputs_data_vec.data(); - int* inputs_col = inputs_col_vec.data(); + int64_t* inputs_col = inputs_col_vec.data(); // There are some differences between hip runtime and NV runtime. // In NV, when the pageable memory data less than 64K is transferred from @@ -263,13 +263,13 @@ class ConcatFunctor { inputs_data = reinterpret_cast(data_alloc->ptr()); col_alloc = memory::Alloc(platform::CUDAPinnedPlace(), inputs_col_num * sizeof(int)); - inputs_col = reinterpret_cast(col_alloc->ptr()); + inputs_col = reinterpret_cast(col_alloc->ptr()); #endif inputs_col[0] = 0; bool has_same_shape = true; for (int i = 0; i < in_num; ++i) { - int t_cols = input[i].numel() / in_row; + int64_t t_cols = input[i].numel() / in_row; if (has_same_shape) { if (t_cols != in_col) has_same_shape = false; } @@ -312,17 +312,19 @@ class ConcatFunctor { } } else { auto tmp_dev_ins_col_data = - memory::Alloc(context, inputs_col_num * sizeof(int)); + memory::Alloc(context, inputs_col_num * sizeof(int64_t)); memory::Copy(BOOST_GET_CONST(platform::CUDAPlace, context.GetPlace()), tmp_dev_ins_col_data->ptr(), platform::CPUPlace(), - static_cast(inputs_col), inputs_col_num * sizeof(int), - context.stream()); - int* dev_ins_col_data = static_cast(tmp_dev_ins_col_data->ptr()); + static_cast(inputs_col), + inputs_col_num * sizeof(int64_t), context.stream()); + int64_t* dev_ins_col_data = + static_cast(tmp_dev_ins_col_data->ptr()); ConcatKernel<<>>( dev_ins_data, dev_ins_col_data, static_cast(inputs_col_num), out_row, out_col, output->data()); } + #ifdef PADDLE_WITH_HIP // Prevent the pinned memory value from being covered and release the memory // after the launch kernel of the stream is executed (reapply pinned memory diff --git a/paddle/fluid/operators/matmul_op_npu.cc b/paddle/fluid/operators/matmul_op_npu.cc new file mode 100644 index 00000000000000..d5606177a55926 --- /dev/null +++ b/paddle/fluid/operators/matmul_op_npu.cc @@ -0,0 +1,185 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/framework/op_version_registry.h" +#include "paddle/fluid/operators/npu_op_runner.h" + +namespace paddle { +namespace operators { + +template +class MatMulNPUKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + auto* x = ctx.Input("X"); + auto* y = ctx.Input("Y"); + auto* out = ctx.Output("Out"); + bool transpose_x = ctx.Attr("transpose_X"); + bool transpose_y = ctx.Attr("transpose_Y"); + + if (x->dims().size() == 2) { + out->mutable_data(ctx.GetPlace()); + + const auto& runner = NpuOpRunner( + "MatMul", {*x, *y}, {*out}, + {{"transpose_x1", transpose_x}, {"transpose_x2", transpose_y}}); + + auto stream = + ctx.template device_context() + .stream(); + runner.Run(stream); + + } else if (x->dims().size() > 2) { + out->mutable_data(ctx.GetPlace()); + + const auto& runner = + NpuOpRunner("BatchMatMul", {*x, *y}, {*out}, + {{"adj_x1", transpose_x}, {"adj_x2", transpose_y}}); + + auto stream = + ctx.template device_context() + .stream(); + runner.Run(stream); + } + } +}; + +template +class MatMulGradNPUKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + auto* x = ctx.Input("X"); + auto* y = ctx.Input("Y"); + auto* dout = ctx.Input(framework::GradVarName("Out")); + auto* dx = ctx.Output(framework::GradVarName("X")); + auto* dy = ctx.Output(framework::GradVarName("Y")); + bool transpose_y = ctx.Attr("transpose_Y"); + auto stream = + ctx.template device_context() + .stream(); + + if (x->dims().size() == 2) { + if (transpose_y) { + if (dx) { + dx->mutable_data(ctx.GetPlace()); + const auto& runner_dx = + NpuOpRunner("MatMul", {*dout, *y}, {*dx}, + {{"transpose_x1", false}, {"transpose_x2", false}}); + + runner_dx.Run(stream); + } + if (dy) { + dy->mutable_data(ctx.GetPlace()); + const auto& runner_dy = + NpuOpRunner("MatMul", {*dout, *x}, {*dy}, + {{"transpose_x1", true}, {"transpose_x2", false}}); + + runner_dy.Run(stream); + } + + } else { + if (dx) { + dx->mutable_data(ctx.GetPlace()); + const auto& runner_dx = + NpuOpRunner("MatMul", {*dout, *y}, {*dx}, + {{"transpose_x1", false}, {"transpose_x2", true}}); + + runner_dx.Run(stream); + } + if (dy) { + dy->mutable_data(ctx.GetPlace()); + const auto& runner_dy = + NpuOpRunner("MatMul", {*x, *dout}, {*dy}, + {{"transpose_x1", true}, {"transpose_x2", false}}); + + runner_dy.Run(stream); + } + } + } else if (x->dims().size() > 2) { + if (transpose_y) { + if (dx) { + dx->mutable_data(ctx.GetPlace()); + const auto& runner_dx = + NpuOpRunner("BatchMatMul", {*dout, *y}, {*dx}, + {{"adj_x1", false}, {"adj_x2", false}}); + + runner_dx.Run(stream); + } + if (dy) { + dy->mutable_data(ctx.GetPlace()); + const auto& runner_dy = + NpuOpRunner("BatchMatMul", {*dout, *x}, {*dy}, + {{"adj_x1", true}, {"adj_x2", false}}); + + runner_dy.Run(stream); + } + } else { + if (dx) { + dx->mutable_data(ctx.GetPlace()); + const auto& runner_dx = + NpuOpRunner("BatchMatMul", {*dout, *y}, {*dx}, + {{"adj_x1", false}, {"adj_x2", true}}); + + runner_dx.Run(stream); + } + if (dy) { + dy->mutable_data(ctx.GetPlace()); + if ((x->dims().size() == 3) && (dout->dims().size() == 3) && + (dy->dims().size() == 2)) { + framework::Tensor dout_tmp; + dout_tmp.ShareDataWith(*dout); + std::vector vec_dim = + framework::vectorize(dout_tmp.dims()); + std::vector vec_dim_v{vec_dim[0] * vec_dim[1], vec_dim[2]}; + dout_tmp.Resize(framework::make_ddim(vec_dim_v)); + + framework::Tensor x_tmp; + x_tmp.ShareDataWith(*x); + std::vector vec_dim_x = + framework::vectorize(x_tmp.dims()); + std::vector vec_dim_x_v{vec_dim_x[0] * vec_dim_x[1], + vec_dim_x[2]}; + x_tmp.Resize(framework::make_ddim(vec_dim_x_v)); + const auto& runner_dy = + NpuOpRunner("MatMul", {x_tmp, dout_tmp}, {*dy}, + {{"transpose_x1", true}, {"transpose_x2", false}}); + runner_dy.Run(stream); + } else { + const auto& runner_dy = + NpuOpRunner("BatchMatMul", {*x, *dout}, {*dy}, + {{"adj_x1", true}, {"adj_x2", false}}); + runner_dy.Run(stream); + } + } + } + } + } +}; +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; + +REGISTER_OP_NPU_KERNEL( + matmul, ops::MatMulNPUKernel, + ops::MatMulNPUKernel); +REGISTER_OP_NPU_KERNEL( + matmul_grad, + ops::MatMulGradNPUKernel, + ops::MatMulGradNPUKernel); diff --git a/paddle/fluid/operators/matmul_v2_op_npu.cc b/paddle/fluid/operators/matmul_v2_op_npu.cc index 3d77c177500e38..b23b408e9c59a7 100644 --- a/paddle/fluid/operators/matmul_v2_op_npu.cc +++ b/paddle/fluid/operators/matmul_v2_op_npu.cc @@ -140,20 +140,22 @@ class MatMulV2GradNPUKernel : public framework::OpKernel { dy->mutable_data(ctx.GetPlace()); if ((x->dims().size() == 3) && (dout->dims().size() == 3) && (dy->dims().size() == 2)) { - framework::Tensor dout_; - dout_.ShareDataWith(*dout); - std::vector vec_dim = framework::vectorize(dout_.dims()); + framework::Tensor dout_tmp; + dout_tmp.ShareDataWith(*dout); + std::vector vec_dim = + framework::vectorize(dout_tmp.dims()); std::vector vec_dim_v{vec_dim[0] * vec_dim[1], vec_dim[2]}; - dout_.Resize(framework::make_ddim(vec_dim_v)); + dout_tmp.Resize(framework::make_ddim(vec_dim_v)); - framework::Tensor x_; - x_.ShareDataWith(*x); - std::vector vec_dim_x = framework::vectorize(x_.dims()); + framework::Tensor x_tmp; + x_tmp.ShareDataWith(*x); + std::vector vec_dim_x = + framework::vectorize(x_tmp.dims()); std::vector vec_dim_x_v{vec_dim_x[0] * vec_dim_x[1], vec_dim_x[2]}; - x_.Resize(framework::make_ddim(vec_dim_x_v)); + x_tmp.Resize(framework::make_ddim(vec_dim_x_v)); const auto& runner_dy = - NpuOpRunner("MatMul", {x_, dout_}, {*dy}, + NpuOpRunner("MatMul", {x_tmp, dout_tmp}, {*dy}, {{"transpose_x1", true}, {"transpose_x2", false}}); runner_dy.Run(stream); } else { diff --git a/paddle/fluid/operators/memcpy_op.h b/paddle/fluid/operators/memcpy_op.h index 63a41cc7237310..ecd266858024e0 100644 --- a/paddle/fluid/operators/memcpy_op.h +++ b/paddle/fluid/operators/memcpy_op.h @@ -51,17 +51,14 @@ class MemcpyFunctor { } else if (dst_place_type_ == 1) { framework::TensorCopy(lod_tensor, dev_ctx_.GetPlace(), dev_ctx_, &out_tensor); - } + } else if (dst_place_type_ == 0) { + framework::TensorCopySync(lod_tensor, platform::CPUPlace(), &out_tensor); #ifdef PADDLE_WITH_ASCEND_CL - else if (dst_place_type_ == 0) { // NOLINT - framework::TensorCopy(lod_tensor, platform::CPUPlace(), dev_ctx_, - &out_tensor); } else if (dst_place_type_ == 4) { framework::TensorCopy(lod_tensor, dev_ctx_.GetPlace(), dev_ctx_, &out_tensor); - } #endif - else { // NOLINT + } else { PADDLE_THROW(platform::errors::Unimplemented( "memcpy dst_place_type: %d is not supported yet.", dst_place_type_)); } diff --git a/paddle/fluid/operators/mkldnn/activation_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/activation_mkldnn_op.cc index 177e539c4b6c29..3b92d2e2d88913 100644 --- a/paddle/fluid/operators/mkldnn/activation_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/activation_mkldnn_op.cc @@ -251,7 +251,9 @@ namespace ops = paddle::operators; ops::MKLDNNActivationKernel>); \ REGISTER_OP_KERNEL( \ act_type##_grad, MKLDNN, ::paddle::platform::CPUPlace, \ - ops::MKLDNNActivationGradKernel>); + ops::MKLDNNActivationGradKernel>, \ + ops::MKLDNNActivationGradKernel< \ + ops::grad_functor>); #define FOR_EACH_MKLDNN_KERNEL_FUNCTOR(__macro) \ __macro(relu, ReluMKLDNNFunctor, ReluMKLDNNGradFunctor); \ @@ -259,7 +261,6 @@ namespace ops = paddle::operators; __macro(leaky_relu, ReluMKLDNNFunctor, ReluMKLDNNGradFunctor); \ __macro(swish, SwishMKLDNNFunctor, SwishMKLDNNGradFunctor); \ __macro(hardswish, HardSwishMKLDNNFunctor, HardSwishMKLDNNGradFunctor); \ - __macro(sigmoid, SigmoidMKLDNNFunctor, SigmoidMKLDNNGradFunctor); \ __macro(tanh, TanhMKLDNNFunctor, TanhMKLDNNGradFunctor); \ __macro(sqrt, SqrtMKLDNNFunctor, SqrtMKLDNNGradFunctor); \ __macro(abs, AbsMKLDNNFunctor, AbsMKLDNNGradFunctor); @@ -267,3 +268,5 @@ namespace ops = paddle::operators; FOR_EACH_MKLDNN_KERNEL_FUNCTOR(REGISTER_ACTIVATION_MKLDNN_KERNEL); REGISTER_ACTIVATION_MKLDNN_BF16_KERNEL(gelu, GeluMKLDNNFunctor, GeluMKLDNNGradFunctor); +REGISTER_ACTIVATION_MKLDNN_BF16_KERNEL(sigmoid, SigmoidMKLDNNFunctor, + SigmoidMKLDNNGradFunctor); diff --git a/paddle/fluid/operators/optimizers/adam_op.cc b/paddle/fluid/operators/optimizers/adam_op.cc index edc75bda4abdf7..130e10a1f8de30 100644 --- a/paddle/fluid/operators/optimizers/adam_op.cc +++ b/paddle/fluid/operators/optimizers/adam_op.cc @@ -122,7 +122,8 @@ framework::OpKernelType AdamOp::GetExpectedKernelType( framework::OpKernelType AdamOp::GetKernelTypeForVar( const std::string &var_name, const framework::Tensor &tensor, const framework::OpKernelType &expected_kernel_type) const { - if (var_name == "Beta1Pow" || var_name == "Beta2Pow") { + if (var_name == "Beta1Pow" || var_name == "Beta2Pow" || + var_name == "SkipUpdate") { return expected_kernel_type; } else { return framework::OpKernelType(expected_kernel_type.data_type_, diff --git a/paddle/fluid/operators/optimizers/adam_op_npu.cc b/paddle/fluid/operators/optimizers/adam_op_npu.cc index 8b33dc64c4e4f0..d0de480c1a0ccc 100644 --- a/paddle/fluid/operators/optimizers/adam_op_npu.cc +++ b/paddle/fluid/operators/optimizers/adam_op_npu.cc @@ -141,7 +141,7 @@ class AdamNPUKernel : public framework::OpKernel { if (ctx.HasInput("Beta2Tensor")) { beta2_tensor = ctx.Input("Beta2Tensor"); - PADDLE_ENFORCE_EQ(beta1_tensor->numel(), 1, + PADDLE_ENFORCE_EQ(beta2_tensor->numel(), 1, platform::errors::InvalidArgument( "Input(Beta2Tensor) size must be 1, but get %d", beta2_tensor->numel())); diff --git a/paddle/fluid/operators/sampling_id_op_npu.cc b/paddle/fluid/operators/sampling_id_op_npu.cc new file mode 100644 index 00000000000000..162403595b6a67 --- /dev/null +++ b/paddle/fluid/operators/sampling_id_op_npu.cc @@ -0,0 +1,19 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/operators/sampling_id_op.h" + +namespace ops = paddle::operators; +REGISTER_OP_NPU_KERNEL(sampling_id, paddle::operators::SamplingIdKernel, + paddle::operators::SamplingIdKernel); diff --git a/paddle/fluid/operators/slice_op.h b/paddle/fluid/operators/slice_op.h index 96b8ea11d6845e..658939a91f39a7 100644 --- a/paddle/fluid/operators/slice_op.h +++ b/paddle/fluid/operators/slice_op.h @@ -391,17 +391,7 @@ class SliceGradKernel : public framework::OpKernel { } } - if (need_pad_num == 0) { - // do not need padding, pass if data address same, else copy - if (d_input->mutable_data(context.GetPlace()) == d_out->data()) { - // inplace, do not any operator, pass - } else { - framework::TensorCopy( - *d_out, context.GetPlace(), - context.template device_context(), - d_input); - } - } else if (need_pad_num == 1) { + if (need_pad_num == 1) { // only need padding one dimension, we can reduce dimension. // only the padding dimension is available for us. // How to reduce dimension(5 to 3 for example): diff --git a/paddle/fluid/platform/gpu_launch_config.h b/paddle/fluid/platform/gpu_launch_config.h index 4da91b4e764a52..a82262419066fa 100644 --- a/paddle/fluid/platform/gpu_launch_config.h +++ b/paddle/fluid/platform/gpu_launch_config.h @@ -41,7 +41,7 @@ struct GpuLaunchConfig { }; inline GpuLaunchConfig GetGpuLaunchConfig1D( - const platform::CUDADeviceContext& context, int element_count, + const platform::CUDADeviceContext& context, int64_t element_count, #ifdef PADDLE_WITH_HIP // HIP will throw GPU memory access fault if threads > 256 int max_threads = 256) { diff --git a/paddle/fluid/platform/stream/CMakeLists.txt b/paddle/fluid/platform/stream/CMakeLists.txt index e1e3e49ce9cbc0..cf219caa9f5c9c 100644 --- a/paddle/fluid/platform/stream/CMakeLists.txt +++ b/paddle/fluid/platform/stream/CMakeLists.txt @@ -1,5 +1,11 @@ +IF(WITH_MKLDNN) + set(MKLDNN_CTX_DEPS mkldnn) +ELSE() + set(MKLDNN_CTX_DEPS) +ENDIF() + IF(WITH_GPU OR WITH_ROCM) -cc_library(cuda_stream SRCS cuda_stream.cc DEPS enforce boost) +cc_library(cuda_stream SRCS cuda_stream.cc DEPS enforce boost ${MKLDNN_CTX_DEPS}) ENDIF() IF(WITH_ASCEND_CL) diff --git a/paddle/fluid/pybind/cuda_streams_py.cc b/paddle/fluid/pybind/cuda_streams_py.cc index df63239cad6d69..21c6e0a4f28caa 100644 --- a/paddle/fluid/pybind/cuda_streams_py.cc +++ b/paddle/fluid/pybind/cuda_streams_py.cc @@ -164,8 +164,7 @@ void BindCudaStream(py::module *m_ptr) { [](paddle::platform::stream::CUDAStream &self, paddle::platform::CudaEvent *event) { if (event == nullptr) { - auto event_tmp = paddle::platform::CudaEvent(); - event = &event_tmp; + event = new paddle::platform::CudaEvent(); } event->Record(self); return event; diff --git a/paddle/fluid/pybind/imperative.cc b/paddle/fluid/pybind/imperative.cc index 619301e3b45d31..7b99c7df188f35 100644 --- a/paddle/fluid/pybind/imperative.cc +++ b/paddle/fluid/pybind/imperative.cc @@ -432,19 +432,24 @@ static void ParseIndexingSlice(framework::LoDTensor *tensor, PyObject *_index, const auto &shape = tensor->dims(); const int rank = shape.size(); const int size = PyTuple_GET_SIZE(index); + + // specified_dims is the number of dimensions which indexed by Interger, + // Slices. + int specified_dims = 0; + for (int dim = 0; dim < size; ++dim) { + PyObject *slice_item = PyTuple_GetItem(index, dim); + if (PyCheckInteger(slice_item) || PySlice_Check(slice_item)) { + specified_dims++; + } + } + PADDLE_ENFORCE_EQ( size <= rank, true, platform::errors::InvalidArgument( "too many indices (%d) for tensor of dimension %d", size, rank)); - for (int dim = 0; dim < size; ++dim) { - PyObject *slice_item = PyTuple_GetItem(index, dim); - PADDLE_ENFORCE_EQ(PyCheckInteger(slice_item) || PySlice_Check(slice_item), - true, - platform::errors::InvalidArgument( - "Currently, VarBase.__getitem__() only allows " - "indexing by Integers, Slices, and tuples of " - "these types, but received %s in %dth slice item", - std::string(Py_TYPE(slice_item)->tp_name), dim + 1)); + for (int i = 0, dim = 0; i < size; ++i) { + PyObject *slice_item = PyTuple_GetItem(index, i); + infer_flags->push_back(1); int dim_len = shape[dim]; if (PyCheckInteger(slice_item)) { @@ -467,7 +472,8 @@ static void ParseIndexingSlice(framework::LoDTensor *tensor, PyObject *_index, slice_ends->push_back(start + 1); slice_strides->push_back(1); decrease_axis->push_back(dim); - } else { + dim++; + } else if (PySlice_Check(slice_item)) { // slice item Py_ssize_t start, end, step; PySliceObject *p = reinterpret_cast(slice_item); @@ -475,12 +481,22 @@ static void ParseIndexingSlice(framework::LoDTensor *tensor, PyObject *_index, // :: or : or 0:dim_len:1 if (start == 0 && end == dim_len && step == 1) { + dim++; continue; } slice_axes->push_back(dim); slice_starts->push_back(start); slice_ends->push_back(end); slice_strides->push_back(step); + dim++; + } else if (slice_item == Py_Ellipsis) { + dim += rank - specified_dims; + } else { + PADDLE_THROW(platform::errors::InvalidArgument( + "Currently, VarBase.__getitem__() only allows " + "indexing by Integers, Slices, Ellipsis, and tuples of " + "these types, but received %s in %dth slice item", + std::string(Py_TYPE(slice_item)->tp_name), i + 1)); } } if (!PyTuple_Check(_index)) Py_DecRef(index); diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index 4286b9092c2395..d646e06d8a47a9 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -78,7 +78,6 @@ limitations under the License. */ #include "paddle/fluid/pybind/box_helper_py.h" #include "paddle/fluid/pybind/compatible.h" #include "paddle/fluid/pybind/const_value.h" -#include "paddle/fluid/pybind/cuda_streams_py.h" #include "paddle/fluid/pybind/data_set_py.h" #include "paddle/fluid/pybind/exception.h" #include "paddle/fluid/pybind/fleet_wrapper_py.h" @@ -240,6 +239,7 @@ OpSupportedInfos(const std::string &place, {"GPU", &platform::is_gpu_place}, {"CPU", &platform::is_cpu_place}, {"XPU", &platform::is_xpu_place}, + {"NPU", &platform::is_npu_place}, }; PADDLE_ENFORCE_NE( is_target_place.count(query_place), 0, diff --git a/paddle/scripts/paddle_build.bat b/paddle/scripts/paddle_build.bat index bebcfe64406d9e..62d30a50d6be4d 100644 --- a/paddle/scripts/paddle_build.bat +++ b/paddle/scripts/paddle_build.bat @@ -324,14 +324,17 @@ if %day_now% NEQ %day_before% ( echo %day_now% > %cache_dir%\day.txt type %cache_dir%\day.txt if %day_now% EQU 21 ( + del D:\sccache\sccache_log.txt rmdir %cache_dir%\third_party_GPU /s/q rmdir %cache_dir%\third_party /s/q ) if %day_now% EQU 11 ( + del D:\sccache\sccache_log.txt rmdir %cache_dir%\third_party_GPU /s/q rmdir %cache_dir%\third_party /s/q ) if %day_now% EQU 01 ( + del D:\sccache\sccache_log.txt rmdir %cache_dir%\third_party_GPU /s/q rmdir %cache_dir%\third_party /s/q ) diff --git a/paddle/scripts/paddle_build.sh b/paddle/scripts/paddle_build.sh index 6b19e154c721e7..fb6496e8d6c656 100755 --- a/paddle/scripts/paddle_build.sh +++ b/paddle/scripts/paddle_build.sh @@ -225,7 +225,11 @@ function cmake_base() { -DLITE_GIT_TAG=release/v2.8 -DWITH_UNITY_BUILD=${WITH_UNITY_BUILD:-OFF} -DWITH_XPU_BKCL=${WITH_XPU_BKCL:-OFF} + -DWITH_ARM=${WITH_ARM:-OFF} + -DWITH_ASCEND=${WITH_ASCEND:-OFF} + -DWITH_ASCEND_CL=${WITH_ASCEND_CL:-OFF} -DWITH_STRIP=${WITH_STRIP:-ON} + -DON_INFER=${ON_INFER:-OFF} ======================================== EOF # Disable UNITTEST_USE_VIRTUALENV in docker because @@ -262,7 +266,11 @@ EOF -DXPU_SDK_ROOT=${XPU_SDK_ROOT:-""} \ -DWITH_LITE=${WITH_LITE:-OFF} \ -DWITH_XPU_BKCL=${WITH_XPU_BKCL:-OFF} \ + -DWITH_ARM=${WITH_ARM:-OFF} \ + -DWITH_ASCEND=${WITH_ASCEND:-OFF} \ + -DWITH_ASCEND_CL=${WITH_ASCEND_CL:-OFF} \ -DWITH_STRIP=${WITH_STRIP:-ON} \ + -DON_INFER=${ON_INFER:-OFF} \ -DWITH_UNITY_BUILD=${WITH_UNITY_BUILD:-OFF};build_error=$? if [ "$build_error" != 0 ];then exit 7; @@ -343,7 +351,11 @@ function build_base() { # reset ccache zero stats for collect PR's actual hit rate ccache -z - make install -j ${parallel_number};build_error=$? + if [ "$WITH_ARM" == "ON" ];then + make TARGET=ARMV8 -j ${parallel_number};build_error=$? + else + make install -j ${parallel_number};build_error=$? + fi # ci will collect ccache hit rate collect_ccache_hits @@ -816,20 +828,25 @@ function check_approvals_of_unittest() { curBuildSize=$(du -m --max-depth=0 ${PADDLE_ROOT}/build/paddle_inference_install_dir/paddle/lib/libpaddle_inference.so |awk '{print $1}') apt-get install -y bc diffSize=$(printf "%.2f" `echo "$curBuildSize - $oriBuildSize" | bc`) + AllDiffSize=$(printf "%.2f" `echo "$diffSize * 4" | bc`) cat < 0, "num_iters must be greater than 0!" epochs = (num_iters // steps) + 1 steps = min(num_iters, steps) @@ -1744,8 +1743,8 @@ def fit(self, eval_logs = self._run_one_epoch(eval_loader, cbks, 'eval') cbks.on_end('eval', eval_logs) - if self.stop_training: - break + if self.stop_training: + break cbks.on_end('train', logs) self._test_dataloader = None @@ -1832,7 +1831,8 @@ def evaluate(self, eval_steps = self._len_data_loader(eval_loader) self.num_iters = num_iters - if num_iters is not None and isinstance(num_iters, int): + if num_iters is not None and isinstance(num_iters, int) and isinstance( + eval_steps, int): assert num_iters > 0, "num_iters must be greater than 0!" eval_steps = min(num_iters, eval_steps) self.num_iters = eval_steps @@ -2094,7 +2094,9 @@ def _run_one_epoch( callbacks.on_batch_end(mode, step, logs) if hasattr(self, 'num_iters') and self.num_iters is not None: self.num_iters -= 1 - if self.num_iters == 0: + if self.num_iters <= 0: + self.stop_training = True + del self.num_iters break self._reset_metrics() diff --git a/python/paddle/nn/__init__.py b/python/paddle/nn/__init__.py index 5fe17e8c193e3e..8f094877e74b67 100644 --- a/python/paddle/nn/__init__.py +++ b/python/paddle/nn/__init__.py @@ -15,6 +15,11 @@ # TODO: import all neural network related api under this directory, # including layers, linear, conv, rnn etc. +from ..fluid.dygraph.layers import Layer # noqa: F401 +from ..fluid.dygraph.container import LayerList # noqa: F401 +from ..fluid.dygraph.container import ParameterList # noqa: F401 +from ..fluid.dygraph.container import Sequential # noqa: F401 + from .clip import ClipGradByGlobalNorm # noqa: F401 from .clip import ClipGradByNorm # noqa: F401 from .clip import ClipGradByValue # noqa: F401 @@ -130,10 +135,6 @@ # TODO: remove loss, keep it for too many used in unitests from .layer import loss # noqa: F401 -from ..fluid.dygraph.layers import Layer # noqa: F401 -from ..fluid.dygraph.container import LayerList # noqa: F401 -from ..fluid.dygraph.container import ParameterList # noqa: F401 -from ..fluid.dygraph.container import Sequential # noqa: F401 from . import utils # noqa: F401 from . import functional # noqa: F401 diff --git a/python/paddle/nn/functional/common.py b/python/paddle/nn/functional/common.py index 057797ff962b42..4bc137222d2efa 100644 --- a/python/paddle/nn/functional/common.py +++ b/python/paddle/nn/functional/common.py @@ -16,18 +16,21 @@ import paddle from ...fluid.framework import in_dygraph_mode, default_main_program from paddle.fluid.layer_helper import LayerHelper -from paddle.fluid.layers.tensor import Variable, fill_constant, zeros, concat +from paddle.fluid.layers.tensor import fill_constant +from ...tensor import concat +from ...tensor.creation import zeros +from paddle.static import Variable from ...fluid.layers import core from ...fluid import dygraph_utils # TODO: define the common functions to build a neural network from ...fluid.layers import unfold # noqa: F401 -from ...fluid.layers import squeeze -from ...fluid.layers import unsqueeze +from ...tensor.manipulation import squeeze +from ...tensor.manipulation import unsqueeze from ...tensor import clip from ...tensor import sum from ...tensor import sqrt from ...fluid.data_feeder import check_variable_and_dtype, check_dtype -from ...fluid.framework import Variable, in_dygraph_mode, _varbase_creator +from ...fluid.framework import in_dygraph_mode, _varbase_creator from ...fluid.framework import in_dygraph_mode from ...fluid import core, dygraph_utils @@ -927,9 +930,9 @@ def get_attrs(prog, dropout_prob, is_test, seed): keep_prob = 1 - p if training: if p == 1.: - return layers.scale(x, scale=0.) + return paddle.scale(x, scale=0.) - scale_input = layers.scale( + scale_input = paddle.scale( x, scale=1 / keep_prob) if mode == 'upscale_in_train' else x #get mask shape @@ -947,17 +950,17 @@ def get_attrs(prog, dropout_prob, is_test, seed): mask_shape[i] = input_shape[i] #get mask - random_tensor = layers.uniform_random( + random_tensor = paddle.uniform( mask_shape, dtype='float32', min=0., max=1.0) p = layers.fill_constant(shape=[1], dtype='float32', value=p) - keep_mask = layers.greater_equal(random_tensor, p) + keep_mask = paddle.greater_equal(random_tensor, p) - scale_input = layers.cast(scale_input, dtype) - keep_mask = layers.cast(keep_mask, dtype) + scale_input = paddle.cast(scale_input, dtype) + keep_mask = paddle.cast(keep_mask, dtype) ret = paddle.multiply(scale_input, keep_mask, name=name) return ret else: # test - ret = layers.scale( + ret = paddle.scale( x, scale=keep_prob) if mode == 'downscale_in_infer' else x return ret @@ -1113,7 +1116,7 @@ def alpha_dropout(x, p=0.5, training=True, name=None): if training: if p == 1: - return layers.scale(x, scale=0.) + return paddle.scale(x, scale=0.) #get transformation params alpha = 1.6732632423543772848170429916717 scale = 1.0507009873554804934193349852946 @@ -1125,23 +1128,22 @@ def alpha_dropout(x, p=0.5, training=True, name=None): input_shape = x.shape #get mask - random_tensor = layers.uniform_random( + random_tensor = paddle.uniform( input_shape, dtype='float32', min=0., max=1.0) p = layers.fill_constant(shape=[1], dtype='float32', value=p) - keep_mask = layers.greater_equal(random_tensor, p) - keep_mask = layers.cast(keep_mask, dtype) - drop_mask = layers.elementwise_sub( + keep_mask = paddle.greater_equal(random_tensor, p) + keep_mask = paddle.cast(keep_mask, dtype) + drop_mask = paddle.subtract( layers.fill_constant( shape=input_shape, dtype=dtype, value=1.), keep_mask) #apply mask b = layers.fill_constant(shape=[1], dtype=dtype, value=b) - y = layers.elementwise_add( - paddle.multiply(x, keep_mask), - layers.scale( - drop_mask, scale=alpha_p)) - res = layers.elementwise_add(layers.scale(y, scale=a), b, name=name) + y = paddle.add(paddle.multiply(x, keep_mask), + paddle.scale( + drop_mask, scale=alpha_p)) + res = paddle.add(paddle.scale(y, scale=a), b, name=name) return res else: # test return x @@ -1277,42 +1279,42 @@ def pad(x, pad, mode='constant', value=0, data_format="NCHW", name=None): if x_dim == 3: pad = concat([zeros((4, ), dtype="int32"), pad], axis=0) unsqueezed_dim = [3, 4] - x = unsqueeze(x, axes=unsqueezed_dim) + x = unsqueeze(x, axis=unsqueezed_dim) elif x_dim == 4: pad = concat([pad, zeros((2, ), dtype="int32")], axis=0) unsqueezed_dim = [2] - x = unsqueeze(x, axes=unsqueezed_dim) + x = unsqueeze(x, axis=unsqueezed_dim) elif data_format in ["NLC", "NHWC", "NDHWC"]: data_format = "NDHWC" if x_dim == 3: pad = concat([zeros((4, ), dtype="int32"), pad], axis=0) unsqueezed_dim = [2, 3] - x = unsqueeze(x, axes=unsqueezed_dim) + x = unsqueeze(x, axis=unsqueezed_dim) elif x_dim == 4: pad = concat([pad, zeros((2, ), dtype="int32")], axis=0) unsqueezed_dim = [1] - x = unsqueeze(x, axes=unsqueezed_dim) + x = unsqueeze(x, axis=unsqueezed_dim) else: if data_format in ["NCL", "NCHW", "NCDHW"]: data_format = "NCDHW" if x_dim == 3: pad = [0, 0, 0, 0] + pad unsqueezed_dim = [3, 4] - x = unsqueeze(x, axes=unsqueezed_dim) + x = unsqueeze(x, axis=unsqueezed_dim) elif x_dim == 4: pad = pad + [0, 0] unsqueezed_dim = [2] - x = unsqueeze(x, axes=unsqueezed_dim) + x = unsqueeze(x, axis=unsqueezed_dim) elif data_format in ["NLC", "NHWC", "NDHWC"]: data_format = "NDHWC" if x_dim == 3: pad = [0, 0, 0, 0] + pad unsqueezed_dim = [2, 3] - x = unsqueeze(x, axes=unsqueezed_dim) + x = unsqueeze(x, axis=unsqueezed_dim) elif x_dim == 4: pad = pad + [0, 0] unsqueezed_dim = [1] - x = unsqueeze(x, axes=unsqueezed_dim) + x = unsqueeze(x, axis=unsqueezed_dim) if in_dygraph_mode(): if isinstance(pad, Variable): @@ -1336,7 +1338,7 @@ def pad(x, pad, mode='constant', value=0, data_format="NCHW", name=None): type='pad3d', inputs=inputs, outputs={"Out": out}, attrs=attrs) if len(unsqueezed_dim) != 0: - out = squeeze(out, axes=unsqueezed_dim) + out = squeeze(out, axis=unsqueezed_dim) return out diff --git a/python/paddle/nn/functional/conv.py b/python/paddle/nn/functional/conv.py index bdbfa5877a789a..319248dfda2fab 100644 --- a/python/paddle/nn/functional/conv.py +++ b/python/paddle/nn/functional/conv.py @@ -16,13 +16,17 @@ import numpy as np from ...device import get_cudnn_version -from ...fluid.framework import Variable, in_dygraph_mode +from ...fluid.framework import in_dygraph_mode +from ...static import Variable from ...fluid import core, dygraph_utils, get_flags -from ...fluid.layers import nn, utils +from ...fluid.layers.utils import convert_to_list, _is_symmetric_padding from ...fluid.data_feeder import check_variable_and_dtype -from ...fluid.param_attr import ParamAttr +from ...framework import ParamAttr from ...fluid.layer_helper import LayerHelper from paddle import _C_ops +from ...tensor.manipulation import unsqueeze, squeeze +from ...tensor.math import add +from ...fluid.layers import nn __all__ = [] @@ -69,24 +73,24 @@ def _update_padding_nd(padding, channel_last, num_dims): padding_algorithm = "EXPLICIT" padding = _exclude_padding_in_batch_and_channel(padding, channel_last) - if utils._is_symmetric_padding(padding, num_dims): + if _is_symmetric_padding(padding, num_dims): padding = padding[0::2] # for padding like [pad_before, pad_after, pad_before, pad_after, ...] elif len(padding) == 2 * num_dims and isinstance(padding[0], int): padding_algorithm = "EXPLICIT" - padding = utils.convert_to_list(padding, 2 * num_dims, 'padding') - if utils._is_symmetric_padding(padding, num_dims): + padding = convert_to_list(padding, 2 * num_dims, 'padding') + if _is_symmetric_padding(padding, num_dims): padding = padding[0::2] # for padding like [pad_d1, pad_d2, ...] elif len(padding) == num_dims and isinstance(padding[0], int): padding_algorithm = "EXPLICIT" - padding = utils.convert_to_list(padding, num_dims, 'padding') + padding = convert_to_list(padding, num_dims, 'padding') else: raise ValueError("In valid padding: {}".format(padding)) # for integer padding else: padding_algorithm = "EXPLICIT" - padding = utils.convert_to_list(padding, num_dims, 'padding') + padding = convert_to_list(padding, num_dims, 'padding') if not all([p >= 0 for p in padding]): raise ValueError( "Invalid padding, all value should be larger than or equal to 0, but received: {}". @@ -323,8 +327,8 @@ def conv1d(x, "The size of padding's dimension should be 1 or 2. But got padding={}". format(padding)) - stride = utils.convert_to_list(stride, 1, 'stride') + [1] - dilation = utils.convert_to_list(dilation, 1, 'dilation') + [1] + stride = convert_to_list(stride, 1, 'stride') + [1] + dilation = convert_to_list(dilation, 1, 'dilation') + [1] l_type = "conv2d" if (num_channels == groups and num_channels != 1 and @@ -333,8 +337,8 @@ def conv1d(x, use_cudnn = False squeeze_aixs = -2 if channel_last else -1 - x = nn.unsqueeze(input=x, axes=[squeeze_aixs]) - weight = nn.unsqueeze(input=weight, axes=[-1]) + x = unsqueeze(x, axis=[squeeze_aixs]) + weight = unsqueeze(weight, axis=[-1]) if in_dygraph_mode(): attrs = ('strides', stride, 'paddings', padding, 'dilations', dilation, 'groups', groups, 'use_cudnn', use_cudnn, 'use_mkldnn', False, @@ -366,7 +370,7 @@ def conv1d(x, type=l_type, inputs=inputs, outputs=outputs, attrs=attrs) if bias is not None: out = nn.elementwise_add(out, bias, axis=channel_dim) - out = nn.squeeze(input=out, axes=[squeeze_aixs]) + out = squeeze(out, axis=[squeeze_aixs]) return out @@ -530,8 +534,8 @@ def conv2d(x, # update attrs padding, padding_algorithm = _update_padding_nd(padding, channel_last, 2) - stride = utils.convert_to_list(stride, 2, 'stride') - dilation = utils.convert_to_list(dilation, 2, 'dilation') + stride = convert_to_list(stride, 2, 'stride') + dilation = convert_to_list(dilation, 2, 'dilation') l_type = "conv2d" if (num_channels == groups and num_channels != 1 and @@ -730,8 +734,8 @@ def conv1d_transpose(x, "The size of padding's dimension should 1 or 2. But got padding={}". format(padding)) - stride = utils.convert_to_list(stride, 1, 'stride') + [1] - dilation = utils.convert_to_list(dilation, 1, 'dilation') + [1] + stride = convert_to_list(stride, 1, 'stride') + [1] + dilation = convert_to_list(dilation, 1, 'dilation') + [1] if output_size is None: output_size = [] @@ -740,8 +744,7 @@ def conv1d_transpose(x, raise ValueError('output_padding option is mutually exclusive with ' 'output_size') if isinstance(output_size, (list, tuple, int)): - output_size = utils.convert_to_list(output_size, 1, - 'output_size') + [1] + output_size = convert_to_list(output_size, 1, 'output_size') + [1] else: raise ValueError( "output_size should be int, or list, tuple of ints") @@ -749,8 +752,8 @@ def conv1d_transpose(x, if output_padding == 0: output_padding = [] else: - output_padding = utils.convert_to_list(output_padding, 1, - 'output_padding') + [0] + output_padding = convert_to_list(output_padding, 1, + 'output_padding') + [0] if len(output_padding) > 0 and output_padding[0] > stride[0]: raise ValueError( @@ -768,8 +771,8 @@ def conv1d_transpose(x, squeeze_axis = -2 if channel_last else -1 conv2d_data_format = "NHWC" if channel_last else "NCHW" - x = nn.unsqueeze(input=x, axes=[squeeze_axis]) - weight = nn.unsqueeze(input=weight, axes=[-1]) + x = unsqueeze(x, axis=[squeeze_axis]) + weight = unsqueeze(weight, axis=[-1]) if in_dygraph_mode(): attrs = ('output_padding', output_padding, 'output_size', output_size, @@ -803,7 +806,7 @@ def conv1d_transpose(x, if bias is not None: out = nn.elementwise_add(out, bias, axis=channel_dim) - out = nn.squeeze(input=out, axes=[squeeze_axis]) + out = squeeze(out, axis=[squeeze_axis]) return out @@ -979,8 +982,8 @@ def conv2d_transpose(x, # update attrs padding, padding_algorithm = _update_padding_nd(padding, channel_last, 2) - stride = utils.convert_to_list(stride, 2, 'stride') - dilation = utils.convert_to_list(dilation, 2, 'dilation') + stride = convert_to_list(stride, 2, 'stride') + dilation = convert_to_list(dilation, 2, 'dilation') if output_size is None: output_size = [] @@ -989,7 +992,7 @@ def conv2d_transpose(x, raise ValueError('output_padding option is mutually exclusive with ' 'output_size') if isinstance(output_size, (list, tuple, int)): - output_size = utils.convert_to_list(output_size, 2, 'output_size') + output_size = convert_to_list(output_size, 2, 'output_size') else: raise ValueError( "output_size should be int, or list, tuple of ints") @@ -997,8 +1000,7 @@ def conv2d_transpose(x, if output_padding == 0: output_padding = [] else: - output_padding = utils.convert_to_list(output_padding, 2, - 'output_padding') + output_padding = convert_to_list(output_padding, 2, 'output_padding') op_type = 'conv2d_transpose' num_filters = weight.shape[1] @@ -1187,8 +1189,8 @@ def conv3d(x, cudnn_version is not None) else False padding, padding_algorithm = _update_padding_nd(padding, channel_last, 3) - stride = utils.convert_to_list(stride, 3, 'stride') - dilation = utils.convert_to_list(dilation, 3, 'dilation') + stride = convert_to_list(stride, 3, 'stride') + dilation = convert_to_list(dilation, 3, 'dilation') op_type = "conv3d" return _conv_nd(x, weight, bias, stride, padding, padding_algorithm, @@ -1369,8 +1371,8 @@ def conv3d_transpose(x, groups)) padding, padding_algorithm = _update_padding_nd(padding, channel_last, 3) - stride = utils.convert_to_list(stride, 3, 'stride') - dilation = utils.convert_to_list(dilation, 3, 'dilation') + stride = convert_to_list(stride, 3, 'stride') + dilation = convert_to_list(dilation, 3, 'dilation') if output_size is None: output_size = [] else: @@ -1378,7 +1380,7 @@ def conv3d_transpose(x, raise ValueError('output_padding option is mutually exclusive with ' 'output_size') if isinstance(output_size, (list, tuple, int)): - output_size = utils.convert_to_list(output_size, 3, 'output_size') + output_size = convert_to_list(output_size, 3, 'output_size') else: raise ValueError( "output_size should be int, or list, tuple of ints") @@ -1386,8 +1388,7 @@ def conv3d_transpose(x, if output_padding == 0: output_padding = [] else: - output_padding = utils.convert_to_list(output_padding, 3, - 'output_padding') + output_padding = convert_to_list(output_padding, 3, 'output_padding') cudnn_version = get_cudnn_version() diff --git a/python/paddle/nn/functional/extension.py b/python/paddle/nn/functional/extension.py index 8a9597119ab8df..bccb7bc7334fb0 100644 --- a/python/paddle/nn/functional/extension.py +++ b/python/paddle/nn/functional/extension.py @@ -17,8 +17,9 @@ import numpy as np from ...fluid.data_feeder import check_dtype from ...fluid.layer_helper import LayerHelper -from ...fluid.framework import Variable, in_dygraph_mode -from ...fluid.layers.tensor import assign +from ...fluid.framework import in_dygraph_mode +from ...static import Variable +from ...tensor.creation import assign from ...fluid import core, dygraph_utils from ...fluid.layers.layer_function_generator import templatedoc from ...fluid.layers.sequence_lod import sequence_mask diff --git a/python/paddle/nn/functional/input.py b/python/paddle/nn/functional/input.py index 6fbb292e674861..d88ee530715b0c 100644 --- a/python/paddle/nn/functional/input.py +++ b/python/paddle/nn/functional/input.py @@ -14,7 +14,8 @@ from __future__ import print_function import warnings -from ...fluid.framework import Variable, in_dygraph_mode +from ...fluid.framework import in_dygraph_mode +from ...static import Variable from ...fluid.layer_helper import LayerHelper from ...fluid.layers import core from ...fluid.data_feeder import check_variable_and_dtype, check_dtype diff --git a/python/paddle/nn/functional/loss.py b/python/paddle/nn/functional/loss.py index 56eabd2ec40567..cb7a50ade7ac8f 100755 --- a/python/paddle/nn/functional/loss.py +++ b/python/paddle/nn/functional/loss.py @@ -27,7 +27,7 @@ from ...fluid.layers import dice_loss # noqa: F401 from ...fluid.layers import log_loss # noqa: F401 from ...fluid.layers import npair_loss # noqa: F401 -from ...fluid.layers import reshape +from ...tensor.manipulation import reshape from ...fluid.layers import softmax_with_cross_entropy as fluid_softmax_with_cross_entropy from ...fluid.layers import square_error_cost # noqa: F401 @@ -36,7 +36,7 @@ from ...fluid.layer_helper import LayerHelper from ...fluid.framework import in_dygraph_mode from ...fluid.framework import _varbase_creator -from ...fluid.framework import Variable +from ...static import Variable from paddle.utils import deprecated from paddle import _C_ops @@ -291,9 +291,7 @@ def binary_cross_entropy_with_logits(logit, pos_weight, 'pos_weight', ['float32', 'float64'], 'binary_cross_entropy_with_logits') log_weight = paddle.add( - paddle.multiply( - label, paddle.fluid.layers.elementwise_sub(pos_weight, one)), - one) + paddle.multiply(label, paddle.subtract(pos_weight, one)), one) pos_weight_name = name if reduction == 'none' and weight is None else None out = paddle.multiply(out, log_weight, name=pos_weight_name) @@ -515,9 +513,9 @@ def smooth_l1_loss(input, label, reduction='mean', delta=1.0, name=None): if reduction == 'none': return out elif reduction == 'mean': - return fluid.layers.reduce_mean(out) + return paddle.mean(out) elif reduction == 'sum': - return fluid.layers.reduce_sum(out) + return paddle.sum(out) def margin_ranking_loss(input, @@ -592,7 +590,7 @@ def margin_ranking_loss(input, fluid.data_feeder.check_variable_and_dtype( label, 'label', ['float32', 'float64'], 'margin_rank_loss') - out = paddle.fluid.layers.elementwise_sub(other, input) + out = paddle.subtract(other, input) out = paddle.multiply(out, label) if margin != 0.0: @@ -898,11 +896,11 @@ def kl_div(input, label, reduction='mean', name=None): if fluid.data_feeder.convert_dtype( input.dtype) == 'float32' and fluid.data_feeder.convert_dtype( label.dtype) == 'float64': - input = fluid.layers.cast(input, 'float64') + input = paddle.cast(input, 'float64') elif fluid.data_feeder.convert_dtype( input.dtype) == 'float64' and fluid.data_feeder.convert_dtype( label.dtype) == 'float32': - label = fluid.layers.cast(label, 'float64') + label = paddle.cast(label, 'float64') if paddle.in_dynamic_mode(): out = _C_ops.kldiv_loss(input, label, 'reduction', reduction) @@ -988,16 +986,12 @@ def mse_loss(input, label, reduction='mean', name=None): label, 'label', ['float32', 'float64'], 'mse_loss') if reduction == 'none': - return paddle.fluid.layers.square( - paddle.fluid.layers.elementwise_sub(input, label), name=name) + return paddle.square(paddle.subtract(input, label), name=name) elif reduction == 'mean': return paddle.mean( - paddle.fluid.layers.square( - paddle.fluid.layers.elementwise_sub(input, label)), - name=name) + paddle.square(paddle.subtract(input, label)), name=name) else: - return paddle.sum(paddle.fluid.layers.square( - paddle.fluid.layers.elementwise_sub(input, label)), + return paddle.sum(paddle.square(paddle.subtract(input, label)), name=name) diff --git a/python/paddle/nn/functional/norm.py b/python/paddle/nn/functional/norm.py index 2b4df534ac7474..863787c00e649d 100644 --- a/python/paddle/nn/functional/norm.py +++ b/python/paddle/nn/functional/norm.py @@ -19,8 +19,8 @@ from ...fluid.layer_helper import LayerHelper from ...fluid.framework import in_dygraph_mode, core from ...framework import create_parameter -from ...fluid.initializer import Constant -from ...fluid.param_attr import ParamAttr +from ..initializer import Constant +from ...framework import ParamAttr from ...fluid import core, dygraph_utils import numbers from paddle import _C_ops @@ -104,8 +104,7 @@ def normalize(x, p=2, axis=1, epsilon=1e-12, name=None): type='p_norm', inputs={'X': x}, outputs={'Out': out}, attrs=attrs) eps = out.block.create_var(dtype=out.dtype) paddle.fluid.layers.fill_constant([1], out.dtype, epsilon, out=eps) - return paddle.fluid.layers.elementwise_div( - x, paddle.maximum(out, eps), name=name) + return paddle.divide(x, paddle.maximum(out, eps), name=name) def batch_norm(x, diff --git a/python/paddle/nn/functional/pooling.py b/python/paddle/nn/functional/pooling.py index 5575a864cfb63a..d3ae44bf7cef33 100755 --- a/python/paddle/nn/functional/pooling.py +++ b/python/paddle/nn/functional/pooling.py @@ -15,7 +15,8 @@ # TODO: define pooling functions from ...fluid import core from ...fluid.framework import in_dygraph_mode -from ...fluid.layers import utils, LayerHelper, unsqueeze, squeeze +from ...fluid.layers import utils, LayerHelper +from ...tensor.manipulation import unsqueeze, squeeze from ...fluid.data_feeder import check_type, check_variable_and_dtype from paddle import _C_ops from paddle import _C_ops diff --git a/python/paddle/nn/functional/vision.py b/python/paddle/nn/functional/vision.py index 91e497a10ed5a3..bd3e27a25e12c4 100644 --- a/python/paddle/nn/functional/vision.py +++ b/python/paddle/nn/functional/vision.py @@ -13,7 +13,8 @@ # limitations under the License. from ...device import get_cudnn_version -from ...fluid.framework import core, in_dygraph_mode, Variable +from ...fluid.framework import core, in_dygraph_mode +from ...static import Variable from ...fluid.layer_helper import LayerHelper from ...fluid.data_feeder import check_variable_and_dtype from ...fluid import dygraph_utils diff --git a/python/paddle/nn/layer/activation.py b/python/paddle/nn/layer/activation.py index d5b37144cfffed..695e387bda84f0 100644 --- a/python/paddle/nn/layer/activation.py +++ b/python/paddle/nn/layer/activation.py @@ -14,18 +14,18 @@ # TODO: define activation functions of neural network -from ...fluid.dygraph import layers from ...fluid import core from ...fluid.framework import in_dygraph_mode -from ...fluid.param_attr import ParamAttr -from ...fluid.initializer import Constant +from ...framework import ParamAttr +from ..initializer import Constant from paddle.framework import get_default_dtype from .. import functional as F +from paddle.nn import Layer __all__ = [] -class ELU(layers.Layer): +class ELU(Layer): r""" ELU Activation. @@ -67,7 +67,7 @@ def extra_repr(self): return 'alpha={}{}'.format(self._alpha, name_str) -class GELU(layers.Layer): +class GELU(Layer): r""" GELU Activation. @@ -120,7 +120,7 @@ def extra_repr(self): return 'approximate={}{}'.format(self._approximate, name_str) -class Hardshrink(layers.Layer): +class Hardshrink(Layer): r""" Hardshrink Activation @@ -168,7 +168,7 @@ def extra_repr(self): return 'threshold={}{}'.format(self._threshold, name_str) -class Hardswish(layers.Layer): +class Hardswish(Layer): r""" Hardswish activation @@ -218,7 +218,7 @@ def extra_repr(self): return name_str -class Tanh(layers.Layer): +class Tanh(Layer): r""" Tanh Activation. @@ -259,7 +259,7 @@ def extra_repr(self): return name_str -class Hardtanh(layers.Layer): +class Hardtanh(Layer): r""" Hardtanh Activation @@ -305,7 +305,7 @@ def extra_repr(self): return 'min={}, max={}{}'.format(self._min, self._max, name_str) -class PReLU(layers.Layer): +class PReLU(Layer): """ PReLU Activation. @@ -377,7 +377,7 @@ def extra_repr(self): self._num_parameters, self._init, self._dtype, name_str) -class ReLU(layers.Layer): +class ReLU(Layer): """ ReLU Activation. @@ -415,7 +415,7 @@ def extra_repr(self): return name_str -class ReLU6(layers.Layer): +class ReLU6(Layer): """ ReLU6 Activation @@ -454,7 +454,7 @@ def extra_repr(self): return name_str -class SELU(layers.Layer): +class SELU(Layer): r""" SELU Activation @@ -505,7 +505,7 @@ def extra_repr(self): name_str) -class LeakyReLU(layers.Layer): +class LeakyReLU(Layer): r""" Leaky ReLU Activation. @@ -553,7 +553,7 @@ def extra_repr(self): return 'negative_slope={}{}'.format(self._negative_slope, name_str) -class Sigmoid(layers.Layer): +class Sigmoid(Layer): """ this interface is used to construct a callable object of the ``Sigmoid`` class. This layer calcluate the `sigmoid` of input x. @@ -593,7 +593,7 @@ def extra_repr(self): return name_str -class Hardsigmoid(layers.Layer): +class Hardsigmoid(Layer): r""" This interface is used to construct a callable object of the ``Hardsigmoid`` class. This layer calcluate the `hardsigmoid` of input x. @@ -644,7 +644,7 @@ def extra_repr(self): return name_str -class Softplus(layers.Layer): +class Softplus(Layer): r""" Softplus Activation @@ -689,7 +689,7 @@ def extra_repr(self): name_str) -class Softshrink(layers.Layer): +class Softshrink(Layer): r""" Softshrink Activation @@ -734,7 +734,7 @@ def extra_repr(self): return 'threshold={}{}'.format(self._threshold, name_str) -class Softsign(layers.Layer): +class Softsign(Layer): r""" Softsign Activation @@ -773,7 +773,7 @@ def extra_repr(self): return name_str -class Swish(layers.Layer): +class Swish(Layer): r""" Swish Activation. @@ -812,7 +812,7 @@ def extra_repr(self): return name_str -class Tanhshrink(layers.Layer): +class Tanhshrink(Layer): """ Tanhshrink Activation @@ -851,7 +851,7 @@ def extra_repr(self): return name_str -class ThresholdedReLU(layers.Layer): +class ThresholdedReLU(Layer): r""" Thresholded ReLU Activation @@ -895,7 +895,7 @@ def extra_repr(self): return 'threshold={}{}'.format(self._threshold, name_str) -class Silu(layers.Layer): +class Silu(Layer): """ Silu Activation. .. math:: @@ -933,7 +933,7 @@ def extra_repr(self): return name_str -class LogSigmoid(layers.Layer): +class LogSigmoid(Layer): r""" LogSigmoid Activation. @@ -972,7 +972,7 @@ def extra_repr(self): return name_str -class Softmax(layers.Layer): +class Softmax(Layer): r""" Softmax Activation. @@ -1099,7 +1099,7 @@ def extra_repr(self): return 'axis={}{}'.format(self._axis, name_str) -class LogSoftmax(layers.Layer): +class LogSoftmax(Layer): r""" This operator implements the log_softmax layer. The calculation process is as follows: @@ -1157,7 +1157,7 @@ def extra_repr(self): return 'axis={}{}'.format(self._axis, name_str) -class Maxout(layers.Layer): +class Maxout(Layer): r""" Maxout Activation. diff --git a/python/paddle/nn/layer/common.py b/python/paddle/nn/layer/common.py index 1d7f7c6589986b..9aa8097befc98b 100644 --- a/python/paddle/nn/layer/common.py +++ b/python/paddle/nn/layer/common.py @@ -15,10 +15,10 @@ # TODO: define the common classes to build a neural network import paddle from ...fluid.dygraph import Flatten # noqa: F401 -from ...fluid.dygraph import layers from ...fluid.framework import in_dygraph_mode from .. import functional as F from ...fluid.framework import _dygraph_tracer +from paddle.nn import Layer __all__ = [] @@ -30,7 +30,7 @@ def _npairs(x, n): return x -class Linear(layers.Layer): +class Linear(Layer): r""" Fully-connected linear transformation layer. For each input :math:`X` , @@ -135,7 +135,7 @@ def extra_repr(self): self.weight.shape[0], self.weight.shape[1], self._dtype, name_str) -class Upsample(layers.Layer): +class Upsample(Layer): """ This op resizes a batch of images. @@ -385,7 +385,7 @@ def extra_repr(self): self.data_format, name_str) -class UpsamplingNearest2D(layers.Layer): +class UpsamplingNearest2D(Layer): """ This op upsamples a batch of images, using nearest neighbours' pixel values. The input must be a 4-D Tensor of the shape (num_batches, channels, in_h, in_w), @@ -470,7 +470,7 @@ def extra_repr(self): name_str) -class UpsamplingBilinear2D(layers.Layer): +class UpsamplingBilinear2D(Layer): """ This op upsamples a batch of images, using bilinear' pixel values. The input must be a 4-D Tensor of the shape (num_batches, channels, in_h, in_w), @@ -556,7 +556,7 @@ def extra_repr(self): name_str) -class Bilinear(layers.Layer): +class Bilinear(Layer): r""" This layer performs bilinear on two inputs. @@ -651,7 +651,7 @@ def extra_repr(self): self._dtype, name_str) -class Dropout(layers.Layer): +class Dropout(Layer): """ Dropout is a regularization technique for reducing overfitting by preventing neuron co-adaption during training as described in the paper: @@ -725,7 +725,7 @@ def extra_repr(self): name_str) -class Dropout2D(layers.Layer): +class Dropout2D(Layer): """ Randomly zero out entire channels (in the batched input 4d tensor with the shape `NCHW` , a channel is a 2D feature map with the shape `HW`). Each channel will be zeroed out independently @@ -786,7 +786,7 @@ def extra_repr(self): name_str) -class Dropout3D(layers.Layer): +class Dropout3D(Layer): """ Randomly zero out entire channels (in the batched input 5d tensor with the shape `NCDHW` , a channel is a 3D feature map with the shape `DHW` ). Each channel will be zeroed out independently @@ -847,7 +847,7 @@ def extra_repr(self): name_str) -class AlphaDropout(layers.Layer): +class AlphaDropout(Layer): """ Alpha Dropout is a type of Dropout that maintains the self-normalizing property. For an input with zero mean and unit standard deviation, the output of Alpha Dropout maintains the original mean and @@ -900,7 +900,7 @@ def extra_repr(self): return 'p={}{}'.format(self.p, name_str) -class Pad1D(layers.Layer): +class Pad1D(Layer): """ This interface is used to construct a callable object of the ``Pad1D`` class. Pad tensor according to 'pad', 'mode' and 'value'. @@ -981,7 +981,7 @@ def extra_repr(self): self._pad, self._mode, self._value, self._data_format, name_str) -class Pad2D(layers.Layer): +class Pad2D(Layer): """ This interface is used to construct a callable object of the ``Pad2D`` class. Pad tensor according to 'pad', 'mode' and 'value'. @@ -1065,7 +1065,7 @@ def extra_repr(self): self._pad, self._mode, self._value, self._data_format, name_str) -class Pad3D(layers.Layer): +class Pad3D(Layer): """ This interface is used to construct a callable object of the ``Pad3D`` class. Pad tensor according to 'pad', 'mode' and 'value'. @@ -1149,7 +1149,7 @@ def extra_repr(self): self._pad, self._mode, self._value, self._data_format, name_str) -class CosineSimilarity(layers.Layer): +class CosineSimilarity(Layer): """ This interface is used to compute cosine similarity between x1 and x2 along axis. @@ -1206,7 +1206,7 @@ def extra_repr(self): return 'axis={_axis}, eps={_eps}'.format(**self.__dict__) -class Embedding(layers.Layer): +class Embedding(Layer): r""" **Embedding Layer** @@ -1367,7 +1367,7 @@ def extra_repr(self): return main_str.format(**self.__dict__) -class Unfold(layers.Layer): +class Unfold(Layer): """ This op returns a col buffer of sliding local blocks of input x, also known as im2col for batched 2D image tensors. For each block under the convolution filter, diff --git a/python/paddle/nn/layer/container.py b/python/paddle/nn/layer/container.py index 48697aa8f50909..aadaf1efce50fa 100644 --- a/python/paddle/nn/layer/container.py +++ b/python/paddle/nn/layer/container.py @@ -13,7 +13,7 @@ # limitations under the License. from collections import OrderedDict -from ...fluid.dygraph.layers import Layer +from .. import Layer from collections.abc import Iterable, Mapping __all__ = [] diff --git a/python/paddle/nn/layer/conv.py b/python/paddle/nn/layer/conv.py index 76011aeff5b4fb..26fd544ecce112 100644 --- a/python/paddle/nn/layer/conv.py +++ b/python/paddle/nn/layer/conv.py @@ -19,8 +19,8 @@ from ...fluid import get_flags from ...fluid import core from ...device import get_cudnn_version -from ...fluid.dygraph import layers -from ...fluid.initializer import Normal +from .. import Layer +from ..initializer import Normal from .. import functional as F from ...fluid.layers import utils from ..functional.conv import _update_padding_nd @@ -31,7 +31,7 @@ def _get_default_param_initializer(num_channels, filter_size): filter_elem_num = num_channels * np.prod(filter_size) std = (2.0 / filter_elem_num)**0.5 - return Normal(0.0, std, 0) + return Normal(0.0, std) def _reverse_repeat_list(t, n): @@ -42,7 +42,7 @@ def _reverse_repeat_list(t, n): return list(x for x in reversed(t) for _ in range(n)) -class _ConvNd(layers.Layer): +class _ConvNd(Layer): def __init__(self, in_channels, out_channels, @@ -127,7 +127,7 @@ def _get_default_param_initializer(): return None filter_elem_num = np.prod(self._kernel_size) * self._in_channels std = (2.0 / filter_elem_num)**0.5 - return Normal(0.0, std, 0) + return Normal(0.0, std) self.weight = self.create_parameter( shape=filter_shape, diff --git a/python/paddle/nn/layer/distance.py b/python/paddle/nn/layer/distance.py index 27e904980d143d..0547bf75a4bf6c 100644 --- a/python/paddle/nn/layer/distance.py +++ b/python/paddle/nn/layer/distance.py @@ -15,7 +15,7 @@ import numpy as np import paddle -from ...fluid.dygraph import layers +from .. import Layer from ...fluid.framework import core, in_dygraph_mode from ...fluid.data_feeder import check_variable_and_dtype, check_type from ...fluid.layer_helper import LayerHelper @@ -24,7 +24,7 @@ __all__ = [] -class PairwiseDistance(layers.Layer): +class PairwiseDistance(Layer): r""" This operator computes the pairwise distance between two vectors. The distance is calculated by p-oreder norm: @@ -87,7 +87,7 @@ def forward(self, x, y): 'PairwiseDistance') check_variable_and_dtype(y, 'y', ['float32', 'float64'], 'PairwiseDistance') - sub = paddle.fluid.layers.elementwise_sub(x, y) + sub = paddle.subtract(x, y) helper = LayerHelper("PairwiseDistance", name=self.name) attrs = { diff --git a/python/paddle/nn/layer/loss.py b/python/paddle/nn/layer/loss.py index 8f43eb8866b4bb..31b552bed162c2 100644 --- a/python/paddle/nn/layer/loss.py +++ b/python/paddle/nn/layer/loss.py @@ -20,11 +20,12 @@ import paddle from .. import functional as F from paddle.fluid.framework import core, in_dygraph_mode, _varbase_creator +from .. import Layer __all__ = [] -class BCEWithLogitsLoss(fluid.dygraph.Layer): +class BCEWithLogitsLoss(Layer): r""" This operator combines the sigmoid layer and the :ref:`api_nn_loss_BCELoss` layer. Also, we can see it as the combine of ``sigmoid_cross_entropy_with_logits`` @@ -128,7 +129,7 @@ def forward(self, logit, label): return out -class CrossEntropyLoss(fluid.dygraph.Layer): +class CrossEntropyLoss(Layer): r""" By default, this operator implements the cross entropy loss function with softmax. This function combines the calculation of the softmax operation and the cross entropy loss function @@ -407,7 +408,7 @@ def forward(self, input, label): return ret -class HSigmoidLoss(fluid.dygraph.Layer): +class HSigmoidLoss(Layer): """ Hierarchical Sigmoid Layer. @@ -529,7 +530,7 @@ def forward(self, input, label, path_table=None, path_code=None): return out -class MSELoss(fluid.dygraph.layers.Layer): +class MSELoss(Layer): r""" **Mean Square Error Loss** Computes the mean square error (squared L2 norm) of given input and label. @@ -596,8 +597,7 @@ def forward(self, input, label): fluid.data_feeder.check_variable_and_dtype( label, 'label', ['float32', 'float64'], 'MSELoss') - square_out = fluid.layers.square( - fluid.layers.elementwise_sub(input, label)) + square_out = paddle.square(paddle.subtract(input, label)) if self.reduction == 'none': return square_out @@ -608,7 +608,7 @@ def forward(self, input, label): return getattr(fluid.layers, reduce_op)(square_out) -class L1Loss(fluid.dygraph.Layer): +class L1Loss(Layer): r""" This interface is used to construct a callable object of the ``L1Loss`` class. The L1Loss layer calculates the L1 Loss of ``input`` and ``label`` as follows. @@ -687,7 +687,7 @@ def forward(self, input, label): input, label, self.reduction, name=self.name) -class BCELoss(fluid.dygraph.Layer): +class BCELoss(Layer): """ This interface is used to construct a callable object of the ``BCELoss`` class. The BCELoss layer measures the binary_cross_entropy loss between input predictions ``input`` @@ -777,7 +777,7 @@ def forward(self, input, label): return out -class NLLLoss(fluid.dygraph.Layer): +class NLLLoss(Layer): r""" :alias_main: paddle.nn.NLLLoss :alias: paddle.nn.NLLLoss,paddle.nn.layer.NLLLoss,paddle.nn.layer.loss.NLLLoss @@ -886,7 +886,7 @@ def forward(self, input, label): name=self._name) -class KLDivLoss(fluid.dygraph.Layer): +class KLDivLoss(Layer): r""" This interface calculates the Kullback-Leibler divergence loss between Input(X) and Input(Target). Notes that Input(X) is the @@ -959,7 +959,7 @@ def forward(self, input, label): return out -class MarginRankingLoss(fluid.dygraph.Layer): +class MarginRankingLoss(Layer): r""" This interface is used to construct a callable object of the ``MarginRankingLoss`` class. @@ -1031,7 +1031,7 @@ def forward(self, input, other, label): return out -class CTCLoss(fluid.dygraph.Layer): +class CTCLoss(Layer): """ An operator integrating the open source Warp-CTC library (https://github.com/baidu-research/warp-ctc) @@ -1127,7 +1127,7 @@ def forward(self, norm_by_times=norm_by_times) -class SmoothL1Loss(fluid.dygraph.Layer): +class SmoothL1Loss(Layer): r""" This operator calculates smooth_l1_loss. Creates a criterion that uses a squared term if the absolute element-wise error falls below 1 and an L1 term otherwise. diff --git a/python/paddle/nn/layer/norm.py b/python/paddle/nn/layer/norm.py index b93412a7b22ccd..9abbc494258948 100644 --- a/python/paddle/nn/layer/norm.py +++ b/python/paddle/nn/layer/norm.py @@ -30,15 +30,13 @@ import six from ...fluid.dygraph import BatchNorm # noqa: F401 - from ...fluid.dygraph import SpectralNorm # noqa: F401 -from ...fluid.dygraph import layers from ...framework import get_default_dtype, set_default_dtype from ...fluid.framework import in_dygraph_mode -from ...fluid.initializer import Constant -from ...fluid.param_attr import ParamAttr +from ..initializer import Constant +from ...framework import ParamAttr from ...fluid.data_feeder import check_variable_and_dtype, check_type from ...fluid import core, dygraph_utils @@ -47,14 +45,15 @@ import numpy as np import numbers import warnings -from ...fluid.dygraph.base import no_grad +from ...framework import no_grad from .. import functional as F from paddle import _C_ops +from .. import Layer __all__ = [] -class _InstanceNormBase(layers.Layer): +class _InstanceNormBase(Layer): """ This class is based class for InstanceNorm1D, 2d, 3d. @@ -317,7 +316,7 @@ def _check_input_dim(self, input): len(input.shape))) -class GroupNorm(layers.Layer): +class GroupNorm(Layer): """ This interface is used to construct a callable object of the ``GroupNorm`` class. For more details, refer to code examples. @@ -436,7 +435,7 @@ def extra_repr(self): self._num_groups, self._num_channels, self._epsilon) -class LayerNorm(layers.Layer): +class LayerNorm(Layer): r""" :alias_main: paddle.nn.LayerNorm :alias: paddle.nn.LayerNorm,paddle.nn.layer.LayerNorm,paddle.nn.layer.norm.LayerNorm @@ -544,7 +543,7 @@ def extra_repr(self): self._epsilon) -class _BatchNormBase(layers.Layer): +class _BatchNormBase(Layer): """ BatchNorm base . """ @@ -1181,7 +1180,7 @@ def convert_sync_batchnorm(cls, layer): return layer_output -class LocalResponseNorm(layers.Layer): +class LocalResponseNorm(Layer): """ Local Response Normalization performs a type of "lateral inhibition" by normalizing over local input regions. For more information, please refer to `ImageNet Classification with Deep Convolutional Neural Networks `_ diff --git a/python/paddle/nn/layer/pooling.py b/python/paddle/nn/layer/pooling.py index 528572ee21b7cc..881f92568414dc 100755 --- a/python/paddle/nn/layer/pooling.py +++ b/python/paddle/nn/layer/pooling.py @@ -12,14 +12,14 @@ # See the License for the specific language governing permissions and # limitations under the License. -from ...fluid.dygraph import layers from ...fluid.layer_helper import LayerHelper from .. import functional as F +from .. import Layer __all__ = [] -class AvgPool1D(layers.Layer): +class AvgPool1D(Layer): r""" This operation applies a 1D average pooling over an input signal composed of several input planes, based on the input, output_size, return_mask parameters. @@ -109,7 +109,7 @@ def extra_repr(self): **self.__dict__) -class AvgPool2D(layers.Layer): +class AvgPool2D(Layer): r""" This operation applies 2D average pooling over input features based on the input, and kernel_size, stride, padding parameters. Input(X) and Output(Out) are @@ -220,7 +220,7 @@ def extra_repr(self): **self.__dict__) -class AvgPool3D(layers.Layer): +class AvgPool3D(Layer): """ This operation applies 3D max pooling over input features based on the input, and kernel_size, stride, padding parameters. Input(X) and Output(Out) are @@ -318,7 +318,7 @@ def extra_repr(self): **self.__dict__) -class MaxPool1D(layers.Layer): +class MaxPool1D(Layer): """ This operation applies 1D max pooling over input signal composed of several input planes based on the input, @@ -412,7 +412,7 @@ def extra_repr(self): **self.__dict__) -class MaxPool2D(layers.Layer): +class MaxPool2D(Layer): r""" This operation applies 2D max pooling over input feature based on the input, and kernel_size, stride, padding parameters. Input(X) and Output(Out) are @@ -522,7 +522,7 @@ def extra_repr(self): **self.__dict__) -class MaxPool3D(layers.Layer): +class MaxPool3D(Layer): """ This operation applies 3D max pooling over input features based on the input, and kernel_size, stride, padding parameters. Input(X) and Output(Out) are @@ -620,7 +620,7 @@ def extra_repr(self): **self.__dict__) -class AdaptiveAvgPool1D(layers.Layer): +class AdaptiveAvgPool1D(Layer): r""" This operation applies a 1D adaptive average pooling over an input signal composed @@ -693,7 +693,7 @@ def extra_repr(self): return 'output_size={}'.format(self.output_size) -class AdaptiveAvgPool2D(layers.Layer): +class AdaptiveAvgPool2D(Layer): r""" This operation applies 2D adaptive avg pooling on input tensor. The h and w dimensions @@ -779,7 +779,7 @@ def extra_repr(self): return 'output_size={}'.format(self._output_size) -class AdaptiveAvgPool3D(layers.Layer): +class AdaptiveAvgPool3D(Layer): r""" This operation applies 3D adaptive avg pooling on input tensor. The h and w dimensions @@ -872,7 +872,7 @@ def extra_repr(self): return 'output_size={}'.format(self._output_size) -class AdaptiveMaxPool1D(layers.Layer): +class AdaptiveMaxPool1D(Layer): """ This operation applies a 1D adaptive max pooling over an input signal composed @@ -956,7 +956,7 @@ def extra_repr(self): self.return_mask) -class AdaptiveMaxPool2D(layers.Layer): +class AdaptiveMaxPool2D(Layer): """ This operation applies 2D adaptive max pooling on input tensor. The h and w dimensions of the output tensor are determined by the parameter output_size. The difference between adaptive pooling and @@ -1037,7 +1037,7 @@ def extra_repr(self): self._return_mask) -class AdaptiveMaxPool3D(layers.Layer): +class AdaptiveMaxPool3D(Layer): """ This operation applies 3D adaptive max pooling on input tensor. The h and w dimensions of the output tensor are determined by the parameter output_size. The difference between adaptive pooling and pooling is adaptive one focus diff --git a/python/paddle/nn/layer/rnn.py b/python/paddle/nn/layer/rnn.py index 693ec0200b0d05..77168566d88c60 100644 --- a/python/paddle/nn/layer/rnn.py +++ b/python/paddle/nn/layer/rnn.py @@ -28,7 +28,7 @@ from paddle.device import get_device, get_cudnn_version from paddle.nn import functional as F from paddle.nn import initializer as I -from paddle.fluid.dygraph import Layer, LayerList +from paddle.nn import Layer, LayerList from paddle.fluid.layers import utils from paddle.fluid.layers.utils import map_structure, flatten, pack_sequence_as from paddle.fluid.data_feeder import convert_dtype @@ -962,7 +962,7 @@ def flatten_parameters(self): # for static-graph, append coalesce_tensor into startup program with fluid.program_guard(fluid.default_startup_program(), fluid.default_startup_program()): - with framework.no_grad(): + with paddle.no_grad(): self._helper.append_op( type="coalesce_tensor", inputs={"Input": self._all_weights}, @@ -1040,11 +1040,11 @@ def forward(self, inputs, initial_states=None, sequence_length=None): ]) else: initial_states = [initial_states] if isinstance( - initial_states, - paddle.fluid.framework.Variable) else initial_states + initial_states, paddle.static.Variable) else initial_states - if self.could_use_cudnn and (not fluid.core.is_compiled_with_rocm() or - sequence_length is None): + if self.could_use_cudnn and ( + not paddle.device.is_compiled_with_rocm() or + sequence_length is None): # Add CPU kernel and dispatch in backend later return self._cudnn_impl(inputs, initial_states, sequence_length) diff --git a/python/paddle/nn/layer/transformer.py b/python/paddle/nn/layer/transformer.py index 5aba8ae85ad1b3..eacf5aac9daa9f 100644 --- a/python/paddle/nn/layer/transformer.py +++ b/python/paddle/nn/layer/transformer.py @@ -24,8 +24,8 @@ from .. import functional as F from ... import tensor from ...fluid import layers -from ...fluid.dygraph import Layer, LayerList -from ...fluid.param_attr import ParamAttr +from .. import Layer, LayerList +from ...framework import ParamAttr from ...fluid.data_feeder import convert_dtype __all__ = [] diff --git a/python/paddle/nn/layer/vision.py b/python/paddle/nn/layer/vision.py index e6d3af9a37b329..7f8b51ca10818e 100644 --- a/python/paddle/nn/layer/vision.py +++ b/python/paddle/nn/layer/vision.py @@ -14,13 +14,13 @@ # TODO: define specitial functions used in computer vision task -from ...fluid.dygraph import layers +from .. import Layer from .. import functional __all__ = [] -class PixelShuffle(layers.Layer): +class PixelShuffle(Layer): """ PixelShuffle Layer diff --git a/python/paddle/tests/dist_hapi_mnist_static.py b/python/paddle/tests/dist_hapi_mnist_static.py index eab34a6dafbc35..6120ae90e994d2 100644 --- a/python/paddle/tests/dist_hapi_mnist_static.py +++ b/python/paddle/tests/dist_hapi_mnist_static.py @@ -59,6 +59,7 @@ def compute_accuracy(pred, gt): 'CPU testing is not supported') class TestDistTraning(unittest.TestCase): def test_static_multiple_gpus(self): + paddle.enable_static() device = set_device('gpu') im_shape = (-1, 1, 28, 28) diff --git a/python/paddle/tests/test_dataset_cifar.py b/python/paddle/tests/test_dataset_cifar.py index 2e9efddf9712e3..abf79fb1e3974c 100644 --- a/python/paddle/tests/test_dataset_cifar.py +++ b/python/paddle/tests/test_dataset_cifar.py @@ -32,8 +32,6 @@ def test_main(self): self.assertTrue(data.shape[2] == 3) self.assertTrue(data.shape[1] == 32) self.assertTrue(data.shape[0] == 32) - self.assertTrue(len(label.shape) == 1) - self.assertTrue(label.shape[0] == 1) self.assertTrue(0 <= int(label) <= 9) @@ -51,8 +49,6 @@ def test_main(self): self.assertTrue(data.shape[2] == 3) self.assertTrue(data.shape[1] == 32) self.assertTrue(data.shape[0] == 32) - self.assertTrue(len(label.shape) == 1) - self.assertTrue(label.shape[0] == 1) self.assertTrue(0 <= int(label) <= 9) # test cv2 backend @@ -67,8 +63,6 @@ def test_main(self): self.assertTrue(data.shape[2] == 3) self.assertTrue(data.shape[1] == 32) self.assertTrue(data.shape[0] == 32) - self.assertTrue(len(label.shape) == 1) - self.assertTrue(label.shape[0] == 1) self.assertTrue(0 <= int(label) <= 99) with self.assertRaises(ValueError): @@ -89,8 +83,6 @@ def test_main(self): self.assertTrue(data.shape[2] == 3) self.assertTrue(data.shape[1] == 32) self.assertTrue(data.shape[0] == 32) - self.assertTrue(len(label.shape) == 1) - self.assertTrue(label.shape[0] == 1) self.assertTrue(0 <= int(label) <= 99) @@ -108,8 +100,6 @@ def test_main(self): self.assertTrue(data.shape[2] == 3) self.assertTrue(data.shape[1] == 32) self.assertTrue(data.shape[0] == 32) - self.assertTrue(len(label.shape) == 1) - self.assertTrue(label.shape[0] == 1) self.assertTrue(0 <= int(label) <= 99) # test cv2 backend @@ -124,8 +114,6 @@ def test_main(self): self.assertTrue(data.shape[2] == 3) self.assertTrue(data.shape[1] == 32) self.assertTrue(data.shape[0] == 32) - self.assertTrue(len(label.shape) == 1) - self.assertTrue(label.shape[0] == 1) self.assertTrue(0 <= int(label) <= 99) with self.assertRaises(ValueError): diff --git a/python/paddle/vision/datasets/cifar.py b/python/paddle/vision/datasets/cifar.py index 97ffb239fe7adf..74ae8ef11e3de0 100644 --- a/python/paddle/vision/datasets/cifar.py +++ b/python/paddle/vision/datasets/cifar.py @@ -148,8 +148,7 @@ def _load_data(self): six.b('labels'), batch.get(six.b('fine_labels'), None)) assert labels is not None for sample, label in six.moves.zip(data, labels): - self.data.append((sample, - np.array([label]).astype('int64'))) + self.data.append((sample, label)) def __getitem__(self, idx): image, label = self.data[idx] @@ -162,9 +161,9 @@ def __getitem__(self, idx): image = self.transform(image) if self.backend == 'pil': - return image, label.astype('int64') + return image, np.array(label).astype('int64') - return image.astype(self.dtype), label.astype('int64') + return image.astype(self.dtype), np.array(label).astype('int64') def __len__(self): return len(self.data) diff --git a/tools/dockerfile/Dockerfile.npu_aarch64 b/tools/dockerfile/Dockerfile.npu_aarch64 index e3cd162edc1547..e95ff951878c94 100644 --- a/tools/dockerfile/Dockerfile.npu_aarch64 +++ b/tools/dockerfile/Dockerfile.npu_aarch64 @@ -1,17 +1,20 @@ # A image for building paddle binaries -# Use cann 5.0.2.alpha003 and aarch64 for A300t-9000 -# When you modify it, please be aware of cann version +# Use cann 5.0.2.alpha005 and aarch64 for A300t-9000 +# Update CANN_VERSION if using other versions # -# Build: CANN 5.0.2.alpha003 +# Build: CANN 5.0.2.alpha005 +# Download pkgs from https://www.hiascend.com/software/cann/community +# and copy them to current dir first, then run build commands # cd Paddle/tools/dockerfile # docker build -f Dockerfile.npu_aarch64 \ -# -t paddlepaddle/paddle:latest-cann5.0.2-gcc82-aarch64-dev . +# --build-arg CANN_VERSION=5.0.2.alpha005 \ +# -t paddlepaddle/paddle:latest-dev-5.0.2.alpha005-gcc82-aarch64 . # # docker run -it --pids-limit 409600 \ # -v /usr/local/Ascend/driver:/usr/local/Ascend/driver \ # -v /usr/local/bin/npu-smi:/usr/local/bin/npu-smi \ # -v /usr/local/dcmi:/usr/local/dcmi \ -# paddlepaddle/paddle:latest-cann5.0.2-gcc82-aarch64-dev /bin/bash +# paddlepaddle/paddle:latest-dev-5.0.2.alpha005-gcc82-aarch64 /bin/bash FROM ubuntu:18.04 MAINTAINER PaddlePaddle Authors @@ -126,17 +129,19 @@ COPY ascend_install.info /etc/ascend_install.info RUN mkdir -p /usr/local/Ascend/driver COPY version.info /usr/local/Ascend/driver/version.info -# Packages from https://www.hiascend.com/software/cann/community +# Download packages from https://www.hiascend.com/software/cann/community and copy them to current dir first WORKDIR /usr/local/Ascend +ARG CANN_VERSION=5.0.2.alpha005 # update envs for driver ENV LD_LIBRARY_PATH=/usr/local/Ascend/driver/lib64:$LD_LIBRARY_PATH ENV LD_LIBRARY_PATH=/usr/local/Ascend/driver/lib64/common:$LD_LIBRARY_PATH ENV LD_LIBRARY_PATH=/usr/local/Ascend/driver/lib64/driver:$LD_LIBRARY_PATH # Install Ascend toolkit -COPY Ascend-cann-toolkit_5.0.2.alpha003_linux-aarch64.run /usr/local/Ascend/ -RUN ./Ascend-cann-toolkit_5.0.2.alpha003_linux-aarch64.run --install --quiet -RUN rm -rf Ascend-cann-toolkit_5.0.2.alpha003_linux-aarch64.run +COPY Ascend-cann-toolkit_${CANN_VERSION}_linux-aarch64.run /usr/local/Ascend/ +RUN chmod +x Ascend-cann-toolkit_${CANN_VERSION}_linux-aarch64.run && \ + ./Ascend-cann-toolkit_${CANN_VERSION}_linux-aarch64.run --install --quiet && \ + rm -rf Ascend-cann-toolkit_${CANN_VERSION}_linux-aarch64.run # udpate envs for model transformation and operator develop ENV PATH=/usr/local/Ascend/ascend-toolkit/latest/atc/bin:$PATH ENV LD_LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/latest/atc/lib64:$LD_LIBRARY_PATH @@ -146,9 +151,10 @@ ENV PYTHONPATH=/usr/local/Ascend/ascend-toolkit/latest/toolkit/python/site-packa ENV TOOLCHAIN_HOME=/usr/local/Ascend/ascend-toolkit/latest/toolkit # Install Ascend NNAE -COPY Ascend-cann-nnae_5.0.2.alpha003_linux-aarch64.run /usr/local/Ascend/ -RUN ./Ascend-cann-nnae_5.0.2.alpha003_linux-aarch64.run --install --quiet -RUN rm -rf Ascend-cann-nnae_5.0.2.alpha003_linux-aarch64.run +COPY Ascend-cann-nnae_${CANN_VERSION}_linux-aarch64.run /usr/local/Ascend/ +RUN chmod +x Ascend-cann-nnae_${CANN_VERSION}_linux-aarch64.run && \ + ./Ascend-cann-nnae_${CANN_VERSION}_linux-aarch64.run --install --quiet && \ + rm -rf Ascend-cann-nnae_${CANN_VERSION}_linux-aarch64.run # update envs for third party AI framework develop ENV PATH=/usr/local/Ascend/nnae/latest/fwkacllib/bin:$PATH diff --git a/tools/dockerfile/Dockerfile.npu_x86_64 b/tools/dockerfile/Dockerfile.npu_x86_64 new file mode 100644 index 00000000000000..6689deedf4b3bf --- /dev/null +++ b/tools/dockerfile/Dockerfile.npu_x86_64 @@ -0,0 +1,175 @@ +# A image for building paddle binaries +# Use cann 5.0.2.alpha005 and x86_64 for A300t-9000 +# Update CANN_VERSION if using other versions +# +# Build: CANN 5.0.2.alpha005 +# Download pkgs from https://www.hiascend.com/software/cann/community +# and copy them to current dir first, then run build commands +# cd Paddle/tools/dockerfile +# docker build -f Dockerfile.npu_x86_64 \ +# --build-arg CANN_VERSION=5.0.2.alpha005 \ +# -t paddlepaddle/paddle:latest-dev-5.0.2.alpha005-gcc82-x86_64 . +# +# docker run -it --pids-limit 409600 \ +# -v /usr/local/Ascend/driver:/usr/local/Ascend/driver \ +# -v /usr/local/bin/npu-smi:/usr/local/bin/npu-smi \ +# -v /usr/local/dcmi:/usr/local/dcmi \ +# paddlepaddle/paddle:latest-dev-5.0.2.alpha005-gcc82-x86_64 /bin/bash + +FROM ubuntu:18.04 +MAINTAINER PaddlePaddle Authors + +RUN apt-get update && apt-get install -y apt-utils +RUN ln -snf /usr/share/zoneinfo/Asia/Shanghai /etc/localtime +RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends tzdata +RUN apt-get update && apt-get install -y software-properties-common && add-apt-repository ppa:deadsnakes/ppa && add-apt-repository ppa:ubuntu-toolchain-r/test +RUN apt-get update && apt-get install -y curl wget vim git unzip unrar tar xz-utils libssl-dev bzip2 gzip make libgcc-s1 sudo openssh-server \ + coreutils ntp language-pack-zh-hans python-qt4 libsm6 libxext6 libxrender-dev libgl1-mesa-glx libsqlite3-dev libopenblas-dev \ + bison graphviz libjpeg-dev zlib1g zlib1g-dev automake locales swig net-tools libtool module-init-tools numactl libnuma-dev \ + openssl libffi-dev pciutils libblas-dev gfortran libblas3 liblapack-dev liblapack3 default-jre screen tmux gdb lldb gcc g++ + +# GCC 8.2 +WORKDIR /opt +RUN wget -q https://paddle-ci.gz.bcebos.com/gcc-8.2.0.tar.xz && \ + tar -xvf gcc-8.2.0.tar.xz && cd gcc-8.2.0 && \ + unset LIBRARY_PATH CPATH C_INCLUDE_PATH PKG_CONFIG_PATH CPLUS_INCLUDE_PATH INCLUDE && \ + ./contrib/download_prerequisites && \ + cd .. && mkdir temp_gcc82 && cd temp_gcc82 && \ + ../gcc-8.2.0/configure --prefix=/opt/compiler/gcc-8.2 --enable-threads=posix --disable-checking --disable-multilib && \ + make -j8 && make install && \ + cd .. && rm -rf temp_gcc82 && rm -rf gcc-8.2.0* && \ + cd /usr/lib/x86_64-linux-gnu && \ + mv libstdc++.so.6 libstdc++.so.6.bak && mv libstdc++.so.6.0.25 libstdc++.so.6.0.25.bak && \ + ln -s /opt/compiler/gcc-8.2/lib64/libgfortran.so.5 /usr/lib/x86_64-linux-gnu/libstdc++.so.5 && \ + ln -s /opt/compiler/gcc-8.2/lib64/libstdc++.so.6 /usr/lib/x86_64-linux-gnu/libstdc++.so.6 && \ + cp /opt/compiler/gcc-8.2/lib64/libstdc++.so.6.0.25 /usr/lib/x86_64-linux-gnu && \ + cd /usr/bin && mv gcc gcc.bak && mv g++ g++.bak && \ + ln -s /opt/compiler/gcc-8.2/bin/gcc /usr/bin/gcc && \ + ln -s /opt/compiler/gcc-8.2/bin/g++ /usr/bin/g++ +ENV PATH=/opt/compiler/gcc-8.2/bin:$PATH +ENV LD_LIBRARY_PATH=/opt/compiler/gcc-8.2/lib:/opt/compiler/gcc-8.2/lib64:$LD_LIBRARY_PATH + +# cmake 3.16 +WORKDIR /opt +RUN wget -q https://cmake.org/files/v3.16/cmake-3.16.0-Linux-x86_64.tar.gz && \ + tar -zxvf cmake-3.16.0-Linux-x86_64.tar.gz && rm cmake-3.16.0-Linux-x86_64.tar.gz && \ + mv cmake-3.16.0-Linux-x86_64 cmake-3.16 +ENV PATH=/opt/cmake-3.16/bin:${PATH} + +# conda 4.9.2 +WORKDIR /opt +ARG CONDA_FILE=Miniconda3-py37_4.9.2-Linux-x86_64.sh +RUN cd /opt && wget -q https://repo.anaconda.com/miniconda/${CONDA_FILE} && chmod +x ${CONDA_FILE} +RUN mkdir /opt/conda && ./${CONDA_FILE} -b -f -p "/opt/conda" && rm -rf ${CONDA_FILE} +ENV PATH=/opt/conda/bin:${PATH} +RUN conda init bash && conda install -n base jupyter jupyterlab + +# install pylint and pre-commit +RUN /opt/conda/bin/pip install pre-commit pylint pylint pytest astroid isort coverage qtconsole +# install CANN 5.0.2 requirement +RUN /opt/conda/bin/pip install 'numpy<1.20,>=1.13.3' 'decorator>=4.4.0' 'sympy>=1.4' 'cffi>=1.12.3' 'protobuf>=3.11.3' +RUN /opt/conda/bin/pip install attrs pyyaml pathlib2 scipy requests psutil + +# install Paddle requirement +RUN wget https://raw.githubusercontent.com/PaddlePaddle/Paddle/develop/python/requirements.txt -O /root/requirements.txt +RUN /opt/conda/bin/pip install -r /root/requirements.txt && rm -rf /root/requirements.txt +RUN wget https://raw.githubusercontent.com/PaddlePaddle/Paddle/develop/python/unittest_py/requirements.txt -O /root/requirements.txt +RUN /opt/conda/bin/pip install -r /root/requirements.txt && rm -rf /root/requirements.txt + +# Install Go and glide +RUN wget -qO- https://paddle-ci.cdn.bcebos.com/go1.8.1.linux-amd64.tar.gz | \ + tar -xz -C /usr/local && \ + mkdir /root/gopath && \ + mkdir /root/gopath/bin && \ + mkdir /root/gopath/src +ENV GOROOT=/usr/local/go GOPATH=/root/gopath +# should not be in the same line with GOROOT definition, otherwise docker build could not find GOROOT. +ENV PATH=${PATH}:${GOROOT}/bin:${GOPATH}/bin +# install glide +RUN curl -s -q https://glide.sh/get | sh + +# git credential to skip password typing +RUN git config --global credential.helper store + +# Fix locales to en_US.UTF-8 +RUN localedef -i en_US -f UTF-8 en_US.UTF-8 + +RUN apt-get install libprotobuf-dev -y + +# Older versions of patchelf limited the size of the files being processed and were fixed in this pr. +# https://github.com/NixOS/patchelf/commit/ba2695a8110abbc8cc6baf0eea819922ee5007fa +# So install a newer version here. +RUN wget -q https://paddle-ci.cdn.bcebos.com/patchelf_0.10-2_amd64.deb && \ + dpkg -i patchelf_0.10-2_amd64.deb && rm -rf patchelf_0.10-2_amd64.deb + +# Configure OpenSSH server. c.f. https://docs.docker.com/engine/examples/running_ssh_service +RUN mkdir /var/run/sshd && echo 'root:root' | chpasswd && sed -ri 's/^PermitRootLogin\s+.*/PermitRootLogin yes/' /etc/ssh/sshd_config && sed -ri 's/UsePAM yes/#UsePAM yes/g' /etc/ssh/sshd_config +CMD source ~/.bashrc + +# ccache 3.7.9 +RUN wget https://paddle-ci.gz.bcebos.com/ccache-3.7.9.tar.gz && \ + tar xf ccache-3.7.9.tar.gz && mkdir /usr/local/ccache-3.7.9 && cd ccache-3.7.9 && \ + ./configure -prefix=/usr/local/ccache-3.7.9 && \ + make -j8 && make install && cd .. && rm -rf ccache-3.7.9* && \ + ln -s /usr/local/ccache-3.7.9/bin/ccache /usr/local/bin/ccache + +# clang-form 3.8.0 +RUN wget https://paddle-ci.cdn.bcebos.com/clang+llvm-3.8.0-x86_64-linux-gnu-ubuntu-16.04.tar.xz && \ + tar xf clang+llvm-3.8.0-x86_64-linux-gnu-ubuntu-16.04.tar.xz && cd clang+llvm-3.8.0-x86_64-linux-gnu-ubuntu-16.04 && \ + cp -r * /usr/local && cd .. && rm -rf clang+llvm-3.8.0-x86_64-linux-gnu-ubuntu-16.04 && \ + rm -rf clang+llvm-3.8.0-x86_64-linux-gnu-ubuntu-16.04.tar.xz + +# HwHiAiUser +RUN groupadd HwHiAiUser && \ + useradd -g HwHiAiUser -m -d /home/HwHiAiUser HwHiAiUser + +# copy /etc/ascend_install.info to current dir fist +COPY ascend_install.info /etc/ascend_install.info + +# copy /usr/local/Ascend/driver/version.info to current dir fist +RUN mkdir -p /usr/local/Ascend/driver +COPY version.info /usr/local/Ascend/driver/version.info + +# Download packages from https://www.hiascend.com/software/cann/community and copy them to current dir first +WORKDIR /usr/local/Ascend +ARG CANN_VERSION=5.0.2.alpha005 +# update envs for driver +ENV LD_LIBRARY_PATH=/usr/local/Ascend/driver/lib64:$LD_LIBRARY_PATH +ENV LD_LIBRARY_PATH=/usr/local/Ascend/driver/lib64/common:$LD_LIBRARY_PATH +ENV LD_LIBRARY_PATH=/usr/local/Ascend/driver/lib64/driver:$LD_LIBRARY_PATH + +# Install Ascend toolkit +COPY Ascend-cann-toolkit_${CANN_VERSION}_linux-x86_64.run /usr/local/Ascend/ +RUN chmod +x Ascend-cann-toolkit_${CANN_VERSION}_linux-x86_64.run && \ + ./Ascend-cann-toolkit_${CANN_VERSION}_linux-x86_64.run --install --quiet && \ + rm -rf Ascend-cann-toolkit_${CANN_VERSION}_linux-x86_64.run +# udpate envs for model transformation and operator develop +ENV PATH=/usr/local/Ascend/ascend-toolkit/latest/atc/bin:$PATH +ENV LD_LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/latest/atc/lib64:$LD_LIBRARY_PATH +ENV PYTHONPATH=/usr/local/Ascend/ascend-toolkit/latest/pyACL/python/site-packages/acl:$PYTHONPATH +ENV PYTHONPATH=/usr/local/Ascend/ascend-toolkit/latest/atc/python/site-packages:$PYTHONPATH +ENV PYTHONPATH=/usr/local/Ascend/ascend-toolkit/latest/toolkit/python/site-packages:$PYTHONPATH +ENV TOOLCHAIN_HOME=/usr/local/Ascend/ascend-toolkit/latest/toolkit + +# Install Ascend NNAE +COPY Ascend-cann-nnae_${CANN_VERSION}_linux-x86_64.run /usr/local/Ascend/ +RUN chmod +x Ascend-cann-nnae_${CANN_VERSION}_linux-x86_64.run && \ + ./Ascend-cann-nnae_${CANN_VERSION}_linux-x86_64.run --install --quiet && \ + rm -rf Ascend-cann-nnae_${CANN_VERSION}_linux-x86_64.run +# update envs for third party AI framework develop +ENV PATH=/usr/local/Ascend/nnae/latest/fwkacllib/bin:$PATH +ENV PATH=/usr/local/Ascend/nnae/latest/fwkacllib/ccec_compiler/bin:$PATH +ENV LD_LIBRARY_PATH=/usr/local/Ascend/nnae/latest/fwkacllib/lib64:$LD_LIBRARY_PATH +ENV PYTHONPATH=/usr/local/Ascend/nnae/latest/fwkacllib/python/site-packages:$PYTHONPATH +ENV ASCEND_AICPU_PATH=/usr/local/Ascend/nnae/latest +ENV ASCEND_OPP_PATH=/usr/local/Ascend/nnae/latest/opp + +# DEV image should open error level log +# 0 debug; 1 info; 2 warning; 3 error; 4 null +ENV ASCEND_GLOBAL_LOG_LEVEL=3 +RUN rm -rf /usr/local/Ascend/driver + +# Clean +RUN apt-get clean -y + +EXPOSE 22 diff --git a/tools/dockerfile/build_scripts/install_trt.sh b/tools/dockerfile/build_scripts/install_trt.sh index 69552871211fd6..a461e2a4f24b33 100644 --- a/tools/dockerfile/build_scripts/install_trt.sh +++ b/tools/dockerfile/build_scripts/install_trt.sh @@ -16,6 +16,16 @@ VERSION=$(nvcc --version | grep release | grep -oEi "release ([0-9]+)\.([0-9])"| sed "s/release //") +CUDNN_MAJOR=$(cat /usr/include/cudnn.h | grep -v CUDNN_VERSION | grep CUDNN_MAJOR | cut -d' ' -f3) +CUDNN_MINOR=$(cat /usr/include/cudnn.h | grep -v CUDNN_VERSION | grep CUDNN_MINOR | cut -d' ' -f3) +CUDNN_PATCHLEVEL=$(cat /usr/include/cudnn.h | grep -v CUDNN_VERSION | grep CUDNN_PATCHLEVEL | cut -d' ' -f3) +if [[ -z "${CUDNN_MAJOR}" ]]; then + CUDNN_MAJOR=$(cat /usr/include/cudnn_version.h | grep -v CUDNN_VERSION | grep CUDNN_MAJOR | cut -d' ' -f3) + CUDNN_MINOR=$(cat /usr/include/cudnn_version.h | grep -v CUDNN_VERSION | grep CUDNN_MINOR | cut -d' ' -f3) + CUDNN_PATCHLEVEL=$(cat /usr/include/cudnn_version.h | grep -v CUDNN_VERSION | grep CUDNN_PATCHLEVEL | cut -d' ' -f3) +fi +CUDNN_VERSION="${CUDNN_MAJOR}.${CUDNN_MINOR}.${CUDNN_PATCHLEVEL}" + if [[ "$VERSION" == "10.1" ]];then wget -q https://paddle-ci.gz.bcebos.com/TRT/TensorRT6-cuda10.1-cudnn7.tar.gz --no-check-certificate tar -zxf TensorRT6-cuda10.1-cudnn7.tar.gz -C /usr/local @@ -36,7 +46,12 @@ elif [[ "$VERSION" == "11.0" ]];then tar -zxf TensorRT-7.1.3.4.Ubuntu-16.04.x86_64-gnu.cuda-11.0.cudnn8.0.tar.gz -C /usr/local cp -rf /usr/local/TensorRT-7.1.3.4/include/* /usr/include/ && cp -rf /usr/local/TensorRT-7.1.3.4/lib/* /usr/lib/ rm TensorRT-7.1.3.4.Ubuntu-16.04.x86_64-gnu.cuda-11.0.cudnn8.0.tar.gz -elif [[ "$VERSION" == "10.2" ]];then +elif [[ "$VERSION" == "10.2" && "$CUDNN_VERSION" == "7.6.5" ]];then + wget https://paddle-ci.gz.bcebos.com/TRT/TensorRT-6.0.1.8.CentOS-7.6.x86_64-gnu.cuda-10.2.cudnn7.6.tar.gz --no-check-certificate + tar -zxf TensorRT-6.0.1.8.CentOS-7.6.x86_64-gnu.cuda-10.2.cudnn7.6.tar.gz -C /usr/local + cp -rf /usr/local/TensorRT-6.0.1.8/include/* /usr/include/ && cp -rf /usr/local/TensorRT-6.0.1.8/lib/* /usr/lib/ + rm -f TensorRT-6.0.1.8.CentOS-7.6.x86_64-gnu.cuda-10.2.cudnn7.6.tar.gz +elif [[ "$VERSION" == "10.2" && "$CUDNN_VERSION" == "8.1.1" ]];then wget https://paddle-ci.gz.bcebos.com/TRT/TensorRT-7.2.3.4.CentOS-7.9.x86_64-gnu.cuda-10.2.cudnn8.1.tar.gz --no-check-certificate tar -zxf TensorRT-7.2.3.4.CentOS-7.9.x86_64-gnu.cuda-10.2.cudnn8.1.tar.gz -C /usr/local cp -rf /usr/local/TensorRT-7.2.3.4/include/* /usr/include/ && cp -rf /usr/local/TensorRT-7.2.3.4/lib/* /usr/lib/ diff --git a/tools/dockerfile/centos7_manylinux.sh b/tools/dockerfile/centos7_manylinux.sh index 9710ec02320951..d268341db9ae28 100755 --- a/tools/dockerfile/centos7_manylinux.sh +++ b/tools/dockerfile/centos7_manylinux.sh @@ -42,6 +42,11 @@ function make_cuda102cudnn7() { sed -i "s#RUN bash build_scripts/build.sh#RUN bash build_scripts/install_gcc.sh gcc82 \nRUN mv /usr/bin/cc /usr/bin/cc.bak \&\& ln -s /usr/local/gcc-8.2/bin/gcc /usr/bin/cc \nENV PATH=/usr/local/gcc-8.2/bin:\$PATH \nRUN bash build_scripts/build.sh#g" Dockerfile.tmp } +function make_cuda102cudnn7gcc54() { + sed 's//10.2-cudnn7-devel-centos7/g' Dockerfile.centos >Dockerfile.tmp + sed -i "s#RUN bash build_scripts/build.sh#RUN bash build_scripts/install_gcc.sh gcc54 \nRUN mv /usr/bin/cc /usr/bin/cc.bak \&\& ln -s /usr/local/gcc-8.2/bin/gcc /usr/bin/cc \nENV PATH=/usr/local/gcc-8.2/bin:\$PATH \nRUN bash build_scripts/build.sh#g" Dockerfile.tmp +} + function make_cuda102cudnn8() { sed 's//10.2-cudnn8-devel-centos7/g' Dockerfile.centos >Dockerfile.tmp sed -i "s#RUN bash build_scripts/build.sh#RUN bash build_scripts/install_gcc.sh gcc82 \nRUN mv /usr/bin/cc /usr/bin/cc.bak \&\& ln -s /usr/local/gcc-8.2/bin/gcc /usr/bin/cc \nENV PATH=/usr/local/gcc-8.2/bin:\$PATH \nRUN bash build_scripts/build.sh#g" Dockerfile.tmp @@ -94,6 +99,9 @@ function main() { cuda102cudnn7) make_cuda102cudnn7 ;; + cuda102cudnn7gcc54) + make_cuda102cudnn7gcc54 + ;; cuda102cudnn8) make_cuda102cudnn8 ;; diff --git a/tools/get_quick_disable_lt.py b/tools/get_quick_disable_lt.py index 4805c909c1ba41..f56e8c24e8f752 100644 --- a/tools/get_quick_disable_lt.py +++ b/tools/get_quick_disable_lt.py @@ -15,6 +15,7 @@ import sys import ssl import requests +import paddle def download_file(): @@ -25,13 +26,13 @@ def download_file(): url = "https://sys-p0.bj.bcebos.com/prec/{}".format('disable_ut_win') else: url = "https://sys-p0.bj.bcebos.com/prec/{}".format('disable_ut') - try: - import paddle.fluid.core as core - if core.is_compiled_with_rocm(): - url = "https://sys-p0.bj.bcebos.com/prec/{}".format( - 'disable_ut_rocm_ci') - except: - pass + + if paddle.is_compiled_with_rocm(): + url = "https://sys-p0.bj.bcebos.com/prec/{}".format('disable_ut_rocm') + + if paddle.is_compiled_with_npu(): + url = "https://sys-p0.bj.bcebos.com/prec/{}".format('disable_ut_npu') + f = requests.get(url) data = f.text status_code = f.status_code diff --git a/tools/parallel_UT_rule.py b/tools/parallel_UT_rule.py index 5108d34f7bf779..fe0be21bfdf44e 100644 --- a/tools/parallel_UT_rule.py +++ b/tools/parallel_UT_rule.py @@ -685,7 +685,6 @@ 'test_nn_functional_hot_op', 'test_op_name_conflict', 'test_imperative_gan', - 'test_simnet', 'test_amp_check_finite_and_scale_op', 'test_random_seed', 'test_histogram_op', @@ -819,7 +818,6 @@ 'test_prelu_op', 'test_fill_zeros_like_op', 'test_pool2d_op', - 'test_for_enumerate', 'test_gather_op', 'test_partial_concat_op', 'test_gaussian_random_op', @@ -883,7 +881,6 @@ 'test_empty_like_op', 'test_rank_loss_op', 'test_elementwise_mod_op', - 'test_reinforcement_learning', 'test_elementwise_max_op', 'test_retain_graph', 'test_edit_distance_op', @@ -1001,7 +998,6 @@ 'test_static_save_load', 'test_coalesce_tensor_op', 'test_fuse_bn_act_pass', - 'test_simnet_v2', 'test_shard_index_op', 'test_cuda_random_seed', 'test_dequantize_log_op', @@ -1023,7 +1019,6 @@ 'test_py_reader_pin_memory', 'test_train_recognize_digits', 'test_parallel_executor_feed_persistable_var', - 'test_mnist', 'test_update_loss_scaling_op', 'test_rnn_cell_api', 'test_imperative_load_static_param',