From 74e2a21140781fa945de2208c01986221507c017 Mon Sep 17 00:00:00 2001 From: shentanyue Date: Wed, 10 May 2023 18:43:10 +0800 Subject: [PATCH 01/14] support model optimized --- paddle/fluid/framework/ir/CMakeLists.txt | 1 + .../framework/ir/save_optimized_model_pass.cc | 123 ++++++++++++++++++ .../framework/ir/save_optimized_model_pass.h | 36 +++++ .../ir/xpu/delete_isolated_node_pass.cc | 2 + paddle/fluid/inference/analysis/argument.h | 3 +- .../inference/analysis/ir_pass_manager.cc | 26 +++- paddle/fluid/inference/api/analysis_config.cc | 8 +- .../fluid/inference/api/analysis_predictor.cc | 3 +- .../inference/api/paddle_analysis_config.h | 4 +- .../inference/api/paddle_pass_builder.cc | 1 + paddle/fluid/pybind/inference_api.cc | 3 +- 11 files changed, 204 insertions(+), 6 deletions(-) create mode 100644 paddle/fluid/framework/ir/save_optimized_model_pass.cc create mode 100644 paddle/fluid/framework/ir/save_optimized_model_pass.h diff --git a/paddle/fluid/framework/ir/CMakeLists.txt b/paddle/fluid/framework/ir/CMakeLists.txt index 5eb94dbbce435..4c0ba6183ebe0 100755 --- a/paddle/fluid/framework/ir/CMakeLists.txt +++ b/paddle/fluid/framework/ir/CMakeLists.txt @@ -127,6 +127,7 @@ pass_library(dense_multihead_matmul_to_sparse_pass inference) pass_library(delete_cast_op_pass inference) pass_library(delete_elementwise_mul_op_pass inference) pass_library(delete_repeated_ops_pass inference) +pass_library(save_optimized_model_pass inference) pass_library(generate_pass DEPS pass_desc_proto) target_link_libraries(generate_pass pass_desc_proto) diff --git a/paddle/fluid/framework/ir/save_optimized_model_pass.cc b/paddle/fluid/framework/ir/save_optimized_model_pass.cc new file mode 100644 index 0000000000000..8ea5efe06a4ef --- /dev/null +++ b/paddle/fluid/framework/ir/save_optimized_model_pass.cc @@ -0,0 +1,123 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/framework/ir/save_optimized_model_pass.h" +#include "paddle/fluid/framework/block_desc.h" +#include "paddle/fluid/framework/executor.h" +#include "paddle/fluid/framework/ir/fuse_pass_base.h" +#include "paddle/fluid/framework/ir/graph.h" +#include "paddle/fluid/framework/program_desc.h" +#include "paddle/fluid/framework/scope.h" +#include "paddle/fluid/inference/analysis/helper.h" +#include "paddle/phi/common/backend.h" +#include "paddle/phi/core/enforce.h" +#include "paddle/phi/core/errors.h" + +namespace paddle { +namespace framework { +class ProgramDesc; +} // namespace framework +} // namespace paddle + +namespace paddle { +namespace framework { +namespace ir { + +void SaveOptimizedModelPass::ApplyImpl(ir::Graph* graph) const { + if (!Has("save_optimized_model") || !Get("save_optimized_model")) + return; + + std::string model_opt_cache_dir = Get("model_opt_cache_dir"); + auto& scope = graph->Get(kParamScopeAttr); + framework::ProgramDesc optimized_program_desc; + framework::ir::GraphToProgram(*graph, &optimized_program_desc); + + auto IsPersistable = [](const framework::VarDesc* var) { + if (var->Persistable() && + var->GetType() != framework::proto::VarType::FEED_MINIBATCH && + var->GetType() != framework::proto::VarType::FETCH_LIST && + var->GetType() != framework::proto::VarType::RAW) { + return true; + } + return false; + }; + + auto SerializeParams = [&](const std::string& path) { + framework::ProgramDesc save_program; + auto* save_block = save_program.MutableBlock(0); + std::set save_var_set; + for (size_t i = 0; i < optimized_program_desc.Size(); ++i) { + const auto& global_block = optimized_program_desc.Block(i); + for (framework::VarDesc* var : global_block.AllVars()) { + if (IsPersistable(var)) { + framework::VarDesc* new_var = save_block->Var(var->Name()); + new_var->SetShape(var->GetShape()); + new_var->SetDataType(var->GetDataType()); + new_var->SetType(var->GetType()); + new_var->SetLoDLevel(var->GetLoDLevel()); + new_var->SetPersistable(true); + save_var_set.insert(new_var->Name()); + } + } + } + + std::string save_params_path = path + "/" + "optimized.pdiparams"; + std::vector save_var_list(save_var_set.begin(), + save_var_set.end()); + std::sort(save_var_list.begin(), save_var_list.end()); + auto* op = save_block->AppendOp(); + op->SetType("save_combine"); + op->SetInput("X", save_var_list); + op->SetAttr("file_path", save_params_path); + op->CheckAttrs(); + + framework::Executor exe(platform::CPUPlace{}); + exe.Run(save_program, &scope, 0, true, true); + }; + // TODO(shentanyue01): Setting hardware and version identification for + // optimized models. + auto SerializeProg = [&](const std::string& path) { + // All persistable var need to be moved to global block + auto* global_block = optimized_program_desc.MutableBlock(0); + for (size_t i = 1; i < optimized_program_desc.Size(); ++i) { + const auto& sub_block = optimized_program_desc.Block(i); + for (framework::VarDesc* var : sub_block.AllVars()) { + if (IsPersistable(var) && !global_block->HasVar(var->Name())) { + framework::VarDesc* new_var = global_block->Var(var->Name()); + new_var->SetShape(var->GetShape()); + new_var->SetDataType(var->GetDataType()); + new_var->SetType(var->GetType()); + new_var->SetLoDLevel(var->GetLoDLevel()); + new_var->SetPersistable(true); + } + } + } + std::string save_model_path = path + "/" + "optimized.pdmodel"; + auto str = optimized_program_desc.Proto()->SerializeAsString(); + std::ofstream file(save_model_path.c_str(), std::ios::binary); + file.write(str.c_str(), str.size()); + file.close(); + }; + + SerializeProg(model_opt_cache_dir); + SerializeParams(model_opt_cache_dir); + LOG(INFO) << "Optimized model saved to " << model_opt_cache_dir; +} + +} // namespace ir +} // namespace framework +} // namespace paddle + +REGISTER_PASS(save_optimized_model_pass, + paddle::framework::ir::SaveOptimizedModelPass); diff --git a/paddle/fluid/framework/ir/save_optimized_model_pass.h b/paddle/fluid/framework/ir/save_optimized_model_pass.h new file mode 100644 index 0000000000000..87ea3e8535924 --- /dev/null +++ b/paddle/fluid/framework/ir/save_optimized_model_pass.h @@ -0,0 +1,36 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include +#include + +#include "paddle/fluid/framework/ir/graph_helper.h" +#include "paddle/fluid/framework/ir/pass.h" + +namespace paddle { +namespace framework { +namespace ir { + +class Graph; + +class SaveOptimizedModelPass : public Pass { + protected: + void ApplyImpl(ir::Graph* graph) const override; +}; + +} // namespace ir +} // namespace framework +} // namespace paddle diff --git a/paddle/fluid/framework/ir/xpu/delete_isolated_node_pass.cc b/paddle/fluid/framework/ir/xpu/delete_isolated_node_pass.cc index c543045b5bc95..9f12ad5fc5191 100644 --- a/paddle/fluid/framework/ir/xpu/delete_isolated_node_pass.cc +++ b/paddle/fluid/framework/ir/xpu/delete_isolated_node_pass.cc @@ -99,6 +99,7 @@ void DeleteIsolatedNodePass::CollectReservedPersistableNodeNames( Graph* graph, std::unordered_set* reserved_persistable_node_names) const { for (auto* node : graph->Nodes()) { + if (!node || node->Name() == "fetch" || node->Name() == "feed") continue; if (!node->IsVar() || !node->Var()->Persistable()) continue; for (auto* out_node : node->outputs) { auto op_type = out_node->Op()->Type(); @@ -131,6 +132,7 @@ int DeleteIsolatedNodePass::RemoveIsolatedNodes( std::unordered_set delete_nodes; const std::unordered_set nodes = graph->Nodes(); for (auto* node : nodes) { + if (!node || node->Name() == "fetch" || node->Name() == "feed") continue; if (!node->IsVar() || !node->Var()->Persistable()) continue; auto name = node->Var()->Name(); if (reserved_persistable_node_names.count(name) > 0) continue; diff --git a/paddle/fluid/inference/analysis/argument.h b/paddle/fluid/inference/analysis/argument.h index 8a43229af7971..d6ed028293715 100644 --- a/paddle/fluid/inference/analysis/argument.h +++ b/paddle/fluid/inference/analysis/argument.h @@ -146,6 +146,7 @@ struct Argument { DECL_ARGUMENT_FIELD(model_program_path, ModelProgramPath, std::string); DECL_ARGUMENT_FIELD(model_params_path, ModelParamsPath, std::string); DECL_ARGUMENT_FIELD(model_from_memory, ModelFromMemory, bool); + DECL_ARGUMENT_FIELD(save_optimized_model, SaveOptimizedModel, bool); DECL_ARGUMENT_FIELD(optim_cache_dir, OptimCacheDir, std::string); DECL_ARGUMENT_FIELD(enable_ir_optim, EnableIrOptim, bool); @@ -294,7 +295,7 @@ struct Argument { XpuQuantPostDynamicWeightBits, int); DECL_ARGUMENT_FIELD(xpu_quant_post_dynamic_op_types, - XpuQuantPostDynamicOpTypss, + XpuQuantPostDynamicOpTypes, std::vector); DECL_ARGUMENT_FIELD(use_opencl, UseOpenCL, bool); diff --git a/paddle/fluid/inference/analysis/ir_pass_manager.cc b/paddle/fluid/inference/analysis/ir_pass_manager.cc index 4051511906b1b..a6d50622a24c3 100644 --- a/paddle/fluid/inference/analysis/ir_pass_manager.cc +++ b/paddle/fluid/inference/analysis/ir_pass_manager.cc @@ -310,7 +310,7 @@ void IRPassManager::CreatePasses(Argument *argument, } bool use_fc_padding = !fc_mkldnn_pass && argument->use_fc_padding(); pass->Set("use_fc_padding", new bool(use_fc_padding)); - } else if (pass_name == "fused_multi_transformer_xpu_quant_pass") { + } else if (pass_name == "fused_multi_transformer_xpu_pass") { auto op_types = argument->xpu_quant_post_dynamic_op_types(); if (std::count(op_types.begin(), op_types.end(), @@ -318,6 +318,30 @@ void IRPassManager::CreatePasses(Argument *argument, pass->Set("quant_weight_bits", new int(argument->xpu_quant_post_dynamic_weight_bits())); } + } else if (pass_name == "save_optimized_model_pass") { + pass->Set("save_optimized_model", + new bool(argument->save_optimized_model())); + std::string optim_cache_dir = argument->optim_cache_dir(); + if (!optim_cache_dir.empty()) { + if (!PathExists(optim_cache_dir)) { + PADDLE_ENFORCE_NE( + MKDIR(optim_cache_dir.c_str()), + -1, + platform::errors::PreconditionNotMet( + "Can not create optimize cache directory: %s, Make sure you " + "have permission to write", + optim_cache_dir)); + } + pass->Set("model_opt_cache_dir", new std::string(optim_cache_dir)); + } else { + std::string model_opt_cache_dir = + argument->Has("model_dir") + ? argument->model_dir() + : GetDirRoot(argument->model_program_path()); + pass->Set("model_opt_cache_dir", + new std::string(GetOrCreateModelOptCacheDir( + std::movemodel_opt_cache_dir))); + } } pre_pass = pass_name; diff --git a/paddle/fluid/inference/api/analysis_config.cc b/paddle/fluid/inference/api/analysis_config.cc index 4f73fb23c6a5a..1920db7aaaee9 100644 --- a/paddle/fluid/inference/api/analysis_config.cc +++ b/paddle/fluid/inference/api/analysis_config.cc @@ -199,9 +199,11 @@ void AnalysisConfig::SetXpuDeviceId(int device_id) { void AnalysisConfig::SetXpuConfig( int quant_post_dynamic_weight_bits, - const std::vector &quant_post_dynamic_op_types) { + const std::vector &quant_post_dynamic_op_types, + bool save_optimized_model) { xpu_quant_post_dynamic_weight_bits_ = quant_post_dynamic_weight_bits; xpu_quant_post_dynamic_op_types_ = quant_post_dynamic_op_types; + save_optimized_model_ = save_optimized_model; Update(); } @@ -501,6 +503,7 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) { CP_MEMBER(xpu_enable_multi_stream_); CP_MEMBER(xpu_quant_post_dynamic_weight_bits_); CP_MEMBER(xpu_quant_post_dynamic_op_types_); + CP_MEMBER(save_optimized_model_); // Lite OpenCL Related CP_MEMBER(use_opencl_); @@ -1110,6 +1113,7 @@ std::string AnalysisConfig::SerializeInfoCache() { ss << xpu_adaptive_seqlen_; ss << xpu_enable_multi_stream_; ss << xpu_quant_post_dynamic_weight_bits_; + ss << save_optimized_model_; for (auto op_type : xpu_quant_post_dynamic_op_types_) { ss << op_type; } @@ -1373,6 +1377,8 @@ std::string AnalysisConfig::Summary() { os.InsertRow({"use_cinn_compiler", use_cinn_compiler_ ? "true" : "false"}); // ir info + os.InsertRow( + {"save_optimized_model", save_optimized_model_ ? "true" : "false"}); os.InsertRow({"ir_optim", enable_ir_optim_ ? "true" : "false"}); os.InsertRow({"ir_debug", ir_debug_ ? "true" : "false"}); os.InsertRow({"memory_optim", enable_memory_optim_ ? "true" : "false"}); diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index 5495f929e8895..7e93678a46d98 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -1316,6 +1316,7 @@ void AnalysisPredictor::PrepareArgument() { // Analyze inference_program argument_->SetPredictorID(predictor_id_); argument_->SetRootPredictorID(root_predictor_id_); + argument_->SetSaveOptimizedModel(config_.save_optimized_model_); argument_->SetOptimCacheDir(config_.opt_cache_dir_); if (!config_.model_dir().empty()) { argument_->SetModelDir(config_.model_dir()); @@ -1485,7 +1486,7 @@ void AnalysisPredictor::PrepareArgument() { argument_->SetXpuEnableMultiStream(config_.xpu_enable_multi_stream_); argument_->SetXpuQuantPostDynamicWeightBits( config_.xpu_quant_post_dynamic_weight_bits_); - argument_->SetXpuQuantPostDynamicOpTypss( + argument_->SetXpuQuantPostDynamicOpTypes( config_.xpu_quant_post_dynamic_op_types_); #endif diff --git a/paddle/fluid/inference/api/paddle_analysis_config.h b/paddle/fluid/inference/api/paddle_analysis_config.h index d810442810af7..e460587caa392 100644 --- a/paddle/fluid/inference/api/paddle_analysis_config.h +++ b/paddle/fluid/inference/api/paddle_analysis_config.h @@ -298,7 +298,8 @@ struct PD_INFER_DECL AnalysisConfig { /// void SetXpuConfig( int quant_post_dynamic_weight_bits = -1, - const std::vector& quant_post_dynamic_op_types = {}); + const std::vector& quant_post_dynamic_op_types = {}, + bool save_optimized_model = false); /// /// \brief configs of IPU @@ -1200,6 +1201,7 @@ struct PD_INFER_DECL AnalysisConfig { bool xpu_enable_multi_stream_; int xpu_quant_post_dynamic_weight_bits_{-1}; std::vector xpu_quant_post_dynamic_op_types_; + bool save_optimized_model_; // LITE OPENCL SETTINGS bool use_opencl_{false}; diff --git a/paddle/fluid/inference/api/paddle_pass_builder.cc b/paddle/fluid/inference/api/paddle_pass_builder.cc index bea0b82ecd494..f447c8c415b36 100644 --- a/paddle/fluid/inference/api/paddle_pass_builder.cc +++ b/paddle/fluid/inference/api/paddle_pass_builder.cc @@ -527,6 +527,7 @@ XpuPassStrategy::XpuPassStrategy() : PassStrategy({}) { "link_xpu_op_max_pass", "inplace_op_var_pass", "delete_isolated_node_pass", + "save_optimized_model_pass", }); use_xpu_ = true; } diff --git a/paddle/fluid/pybind/inference_api.cc b/paddle/fluid/pybind/inference_api.cc index e861c5b5bbe90..150a0c45c6798 100644 --- a/paddle/fluid/pybind/inference_api.cc +++ b/paddle/fluid/pybind/inference_api.cc @@ -770,7 +770,8 @@ void BindAnalysisConfig(py::module *m) { "set_xpu_config", &AnalysisConfig::SetXpuConfig, py::arg("quant_post_dynamic_weight_bits") = -1, - py::arg("quant_post_dynamic_op_types") = std::vector({})) + py::arg("quant_post_dynamic_op_types") = std::vector({}), + py::arg("save_otpimized_model") = false) .def("enable_custom_device", &AnalysisConfig::EnableCustomDevice, py::arg("device_type"), From d0b9a50f0b248a8f61bc4e756a254f40dd5f8251 Mon Sep 17 00:00:00 2001 From: shentanyue Date: Wed, 10 May 2023 19:51:39 +0800 Subject: [PATCH 02/14] update api --- paddle/fluid/inference/api/analysis_config.cc | 9 +++------ paddle/fluid/inference/api/paddle_analysis_config.h | 13 ++++++++++--- paddle/fluid/pybind/inference_api.cc | 2 ++ 3 files changed, 15 insertions(+), 9 deletions(-) diff --git a/paddle/fluid/inference/api/analysis_config.cc b/paddle/fluid/inference/api/analysis_config.cc index 1920db7aaaee9..1a4a08af8b52a 100644 --- a/paddle/fluid/inference/api/analysis_config.cc +++ b/paddle/fluid/inference/api/analysis_config.cc @@ -199,11 +199,9 @@ void AnalysisConfig::SetXpuDeviceId(int device_id) { void AnalysisConfig::SetXpuConfig( int quant_post_dynamic_weight_bits, - const std::vector &quant_post_dynamic_op_types, - bool save_optimized_model) { + const std::vector &quant_post_dynamic_op_types) { xpu_quant_post_dynamic_weight_bits_ = quant_post_dynamic_weight_bits; xpu_quant_post_dynamic_op_types_ = quant_post_dynamic_op_types; - save_optimized_model_ = save_optimized_model; Update(); } @@ -413,7 +411,7 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) { CP_MEMBER(model_dir_); CP_MEMBER(model_from_memory_); // the memory model reuses prog_file_ and // params_file_ fields. - + CP_MEMBER(save_optimized_model_); CP_MEMBER(opt_cache_dir_); CP_MEMBER(prog_file_); CP_MEMBER(params_file_); @@ -503,7 +501,6 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) { CP_MEMBER(xpu_enable_multi_stream_); CP_MEMBER(xpu_quant_post_dynamic_weight_bits_); CP_MEMBER(xpu_quant_post_dynamic_op_types_); - CP_MEMBER(save_optimized_model_); // Lite OpenCL Related CP_MEMBER(use_opencl_); @@ -1051,6 +1048,7 @@ std::string AnalysisConfig::SerializeInfoCache() { ss << model_dir_; ss << prog_file_; ss << params_file_; + ss << save_optimized_model_; ss << use_gpu_; ss << enable_gpu_mixed_; @@ -1113,7 +1111,6 @@ std::string AnalysisConfig::SerializeInfoCache() { ss << xpu_adaptive_seqlen_; ss << xpu_enable_multi_stream_; ss << xpu_quant_post_dynamic_weight_bits_; - ss << save_optimized_model_; for (auto op_type : xpu_quant_post_dynamic_op_types_) { ss << op_type; } diff --git a/paddle/fluid/inference/api/paddle_analysis_config.h b/paddle/fluid/inference/api/paddle_analysis_config.h index e460587caa392..a19a60c061845 100644 --- a/paddle/fluid/inference/api/paddle_analysis_config.h +++ b/paddle/fluid/inference/api/paddle_analysis_config.h @@ -199,6 +199,14 @@ struct PD_INFER_DECL AnalysisConfig { /// void SetParamsFile(const std::string& x) { params_file_ = x; } + /// + /// \brief Save optimized model. + /// + /// \param save_optimized_model Whether to enable save optimized model. + /// + void EnableSaveOptimizedModel(bool save_optimized_model) { + save_optimized_model_ = save_optimized_model; + } /// /// \brief Set the path of optimization cache directory. /// @@ -298,8 +306,7 @@ struct PD_INFER_DECL AnalysisConfig { /// void SetXpuConfig( int quant_post_dynamic_weight_bits = -1, - const std::vector& quant_post_dynamic_op_types = {}, - bool save_optimized_model = false); + const std::vector& quant_post_dynamic_op_types = {}); /// /// \brief configs of IPU @@ -1201,7 +1208,6 @@ struct PD_INFER_DECL AnalysisConfig { bool xpu_enable_multi_stream_; int xpu_quant_post_dynamic_weight_bits_{-1}; std::vector xpu_quant_post_dynamic_op_types_; - bool save_optimized_model_; // LITE OPENCL SETTINGS bool use_opencl_{false}; @@ -1257,6 +1263,7 @@ struct PD_INFER_DECL AnalysisConfig { // Variables held by config can take up a lot of memory in some cases. // So we release the memory when the predictor is set up. mutable bool is_valid_{true}; + bool save_optimized_model_{false}; std::string opt_cache_dir_; friend class paddle_infer::experimental::InternalUtils; diff --git a/paddle/fluid/pybind/inference_api.cc b/paddle/fluid/pybind/inference_api.cc index 150a0c45c6798..35764672ea81b 100644 --- a/paddle/fluid/pybind/inference_api.cc +++ b/paddle/fluid/pybind/inference_api.cc @@ -824,6 +824,8 @@ void BindAnalysisConfig(py::module *m) { .def("enable_profile", &AnalysisConfig::EnableProfile) .def("disable_glog_info", &AnalysisConfig::DisableGlogInfo) .def("glog_info_disabled", &AnalysisConfig::glog_info_disabled) + .def("enable_save_optimized_model", + &AnalysisConfig::EnableSaveOptimizedModel) .def("set_optim_cache_dir", &AnalysisConfig::SetOptimCacheDir) .def("switch_use_feed_fetch_ops", &AnalysisConfig::SwitchUseFeedFetchOps, From 8b7783bcb2b291e7ab864ed46d6d77377e10a3ae Mon Sep 17 00:00:00 2001 From: shentanyue Date: Wed, 10 May 2023 19:53:35 +0800 Subject: [PATCH 03/14] update api --- paddle/fluid/inference/analysis/ir_pass_manager.cc | 6 +++--- paddle/fluid/pybind/inference_api.cc | 3 +-- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/paddle/fluid/inference/analysis/ir_pass_manager.cc b/paddle/fluid/inference/analysis/ir_pass_manager.cc index a6d50622a24c3..8673dbe9c8bf1 100644 --- a/paddle/fluid/inference/analysis/ir_pass_manager.cc +++ b/paddle/fluid/inference/analysis/ir_pass_manager.cc @@ -338,9 +338,9 @@ void IRPassManager::CreatePasses(Argument *argument, argument->Has("model_dir") ? argument->model_dir() : GetDirRoot(argument->model_program_path()); - pass->Set("model_opt_cache_dir", - new std::string(GetOrCreateModelOptCacheDir( - std::movemodel_opt_cache_dir))); + pass->Set( + "model_opt_cache_dir", + new std::string(GetOrCreateModelOptCacheDir(model_opt_cache_dir))); } } pre_pass = pass_name; diff --git a/paddle/fluid/pybind/inference_api.cc b/paddle/fluid/pybind/inference_api.cc index 35764672ea81b..e2847d431ee16 100644 --- a/paddle/fluid/pybind/inference_api.cc +++ b/paddle/fluid/pybind/inference_api.cc @@ -770,8 +770,7 @@ void BindAnalysisConfig(py::module *m) { "set_xpu_config", &AnalysisConfig::SetXpuConfig, py::arg("quant_post_dynamic_weight_bits") = -1, - py::arg("quant_post_dynamic_op_types") = std::vector({}), - py::arg("save_otpimized_model") = false) + py::arg("quant_post_dynamic_op_types") = std::vector({})) .def("enable_custom_device", &AnalysisConfig::EnableCustomDevice, py::arg("device_type"), From 869e08fe2f164cf32d9ea25815093162bf2b0a74 Mon Sep 17 00:00:00 2001 From: shentanyue Date: Wed, 10 May 2023 20:49:54 +0800 Subject: [PATCH 04/14] add ut --- paddle/fluid/framework/ir/CMakeLists.txt | 4 + .../framework/ir/save_optimized_model_pass.cc | 2 +- .../framework/ir/save_optimized_model_pass.h | 2 +- .../ir/save_optimized_model_pass_tester.cc | 73 +++++++++++++++++++ .../inference/api/paddle_analysis_config.h | 2 +- 5 files changed, 80 insertions(+), 3 deletions(-) create mode 100644 paddle/fluid/framework/ir/save_optimized_model_pass_tester.cc diff --git a/paddle/fluid/framework/ir/CMakeLists.txt b/paddle/fluid/framework/ir/CMakeLists.txt index 4c0ba6183ebe0..11528c43c7205 100755 --- a/paddle/fluid/framework/ir/CMakeLists.txt +++ b/paddle/fluid/framework/ir/CMakeLists.txt @@ -322,6 +322,10 @@ cc_test( test_graph_pattern_detector SRCS graph_pattern_detector_tester.cc DEPS graph_pattern_detector) +cc_test( + test_save_optimized_model_pass + SRCS save_optimized_model_pass_tester.cc + DEPS save_optimized_model_pass) cc_test( test_op_compat_sensible_pass SRCS op_compat_sensible_pass_tester.cc diff --git a/paddle/fluid/framework/ir/save_optimized_model_pass.cc b/paddle/fluid/framework/ir/save_optimized_model_pass.cc index 8ea5efe06a4ef..bbb4efda8706b 100644 --- a/paddle/fluid/framework/ir/save_optimized_model_pass.cc +++ b/paddle/fluid/framework/ir/save_optimized_model_pass.cc @@ -1,4 +1,4 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +/* Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/paddle/fluid/framework/ir/save_optimized_model_pass.h b/paddle/fluid/framework/ir/save_optimized_model_pass.h index 87ea3e8535924..247bae959badc 100644 --- a/paddle/fluid/framework/ir/save_optimized_model_pass.h +++ b/paddle/fluid/framework/ir/save_optimized_model_pass.h @@ -1,4 +1,4 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +/* Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/paddle/fluid/framework/ir/save_optimized_model_pass_tester.cc b/paddle/fluid/framework/ir/save_optimized_model_pass_tester.cc new file mode 100644 index 0000000000000..4102ffecb6c40 --- /dev/null +++ b/paddle/fluid/framework/ir/save_optimized_model_pass_tester.cc @@ -0,0 +1,73 @@ +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include "paddle/fluid/framework/ir/pass.h" +#include "paddle/fluid/framework/ir/pass_tester_helper.h" +#include "paddle/fluid/inference/analysis/helper.h" + +namespace paddle { +namespace framework { +namespace ir { + +void AddVarToScope(Scope* param_scope, + const std::string& name, + const DDim& dims) { + auto* tensor = param_scope->Var(name)->GetMutable(); + tensor->Resize(dims); + auto* cpu_ctx = static_cast( + platform::DeviceContextPool::Instance().Get(phi::CPUPlace())); + cpu_ctx->Alloc(tensor); +} + +VarDesc* Data(paddle::framework::BlockDesc* block, + std::string name, + std::vector shape = {}, + bool is_persistable = false, + proto::VarType::Type data_type = proto::VarType::FP32) { + auto* var = block->Var(name); + var->SetType(proto::VarType::LOD_TENSOR); + var->SetDataType(data_type); + var->SetShape(shape); + var->SetPersistable(is_persistable); + return var; +} + +TEST(SaveOptimizedModelPass, basic) { + paddle::framework::ProgramDesc program; + auto* block = program.MutableBlock(0); + auto* lookup_table_w = Data(block, "lookup_table_w", {1}, true); + auto* lookup_table_out = Data(block, "scatter_out", {1}); + OpDesc* lookup_table = block->AppendOp(); + lookup_table->SetType("lookup_table_v2"); + lookup_table->SetInput("W", {lookup_table_w->Name()}); + lookup_table->SetOutput("Out", {lookup_table_out->Name()}); + + std::unique_ptr graph(new ir::Graph(program)); + auto scope = new Scope(); + AddVarToScope(scope, lookup_table_w->Name(), {1}); + graph->Set("__param_scope__", scope); + + auto save_optimized_model_pass = + PassRegistry::Instance().Get("save_optimized_model_pass"); + save_optimized_model_pass->Set("save_optimized_model", new bool(true)); + save_optimized_model_pass->Set("model_opt_cache_dir", new std::string("")); + save_optimized_model_pass->Apply(graph.get()); +} + +} // namespace ir +} // namespace framework +} // namespace paddle + +USE_PASS(save_optimized_model_pass); diff --git a/paddle/fluid/inference/api/paddle_analysis_config.h b/paddle/fluid/inference/api/paddle_analysis_config.h index a19a60c061845..bd52737bfa935 100644 --- a/paddle/fluid/inference/api/paddle_analysis_config.h +++ b/paddle/fluid/inference/api/paddle_analysis_config.h @@ -202,7 +202,7 @@ struct PD_INFER_DECL AnalysisConfig { /// /// \brief Save optimized model. /// - /// \param save_optimized_model Whether to enable save optimized model. + /// \param save_optimized_model whether to enable save optimized model. /// void EnableSaveOptimizedModel(bool save_optimized_model) { save_optimized_model_ = save_optimized_model; From 1e4d499a8fe1a5654747f56a7dd35b2263a5ba32 Mon Sep 17 00:00:00 2001 From: shentanyue Date: Wed, 10 May 2023 21:07:54 +0800 Subject: [PATCH 05/14] fix --- paddle/fluid/pybind/inference_api.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/paddle/fluid/pybind/inference_api.cc b/paddle/fluid/pybind/inference_api.cc index e2847d431ee16..cfdb7b597823e 100644 --- a/paddle/fluid/pybind/inference_api.cc +++ b/paddle/fluid/pybind/inference_api.cc @@ -824,7 +824,8 @@ void BindAnalysisConfig(py::module *m) { .def("disable_glog_info", &AnalysisConfig::DisableGlogInfo) .def("glog_info_disabled", &AnalysisConfig::glog_info_disabled) .def("enable_save_optimized_model", - &AnalysisConfig::EnableSaveOptimizedModel) + &AnalysisConfig::EnableSaveOptimizedModel, + py::arg("x") = false) .def("set_optim_cache_dir", &AnalysisConfig::SetOptimCacheDir) .def("switch_use_feed_fetch_ops", &AnalysisConfig::SwitchUseFeedFetchOps, From 9d43ee136fe2116d93ecfa64aa3de327a23ecbc1 Mon Sep 17 00:00:00 2001 From: shentanyue Date: Thu, 11 May 2023 10:32:51 +0800 Subject: [PATCH 06/14] fix ut --- paddle/fluid/framework/ir/save_optimized_model_pass_tester.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paddle/fluid/framework/ir/save_optimized_model_pass_tester.cc b/paddle/fluid/framework/ir/save_optimized_model_pass_tester.cc index 4102ffecb6c40..0a9b394bc9d5d 100644 --- a/paddle/fluid/framework/ir/save_optimized_model_pass_tester.cc +++ b/paddle/fluid/framework/ir/save_optimized_model_pass_tester.cc @@ -62,7 +62,7 @@ TEST(SaveOptimizedModelPass, basic) { auto save_optimized_model_pass = PassRegistry::Instance().Get("save_optimized_model_pass"); save_optimized_model_pass->Set("save_optimized_model", new bool(true)); - save_optimized_model_pass->Set("model_opt_cache_dir", new std::string("")); + save_optimized_model_pass->Set("model_opt_cache_dir", new std::string("./")); save_optimized_model_pass->Apply(graph.get()); } From 358baa03f2003d4be65ffdeebf637fe4beff379a Mon Sep 17 00:00:00 2001 From: shentanyue Date: Thu, 11 May 2023 18:18:25 +0800 Subject: [PATCH 07/14] change to analysis pass --- paddle/fluid/framework/ir/CMakeLists.txt | 5 -- paddle/fluid/inference/analysis/analyzer.cc | 7 ++- .../inference/analysis/ir_pass_manager.cc | 24 --------- .../inference/analysis/passes/CMakeLists.txt | 5 ++ .../fluid/inference/analysis/passes/passes.cc | 3 ++ .../passes}/save_optimized_model_pass.cc | 53 +++++++++++-------- .../passes}/save_optimized_model_pass.h | 25 ++++----- .../save_optimized_model_pass_tester.cc | 0 .../inference/api/paddle_pass_builder.cc | 1 - .../fluid/inference/api/paddle_pass_builder.h | 1 + .../cpp/inference/analysis/analyzer_tester.cc | 14 +++++ 11 files changed, 74 insertions(+), 64 deletions(-) rename paddle/fluid/{framework/ir => inference/analysis/passes}/save_optimized_model_pass.cc (77%) rename paddle/fluid/{framework/ir => inference/analysis/passes}/save_optimized_model_pass.h (67%) rename paddle/fluid/{framework/ir => inference/analysis/passes}/save_optimized_model_pass_tester.cc (100%) diff --git a/paddle/fluid/framework/ir/CMakeLists.txt b/paddle/fluid/framework/ir/CMakeLists.txt index 11528c43c7205..5eb94dbbce435 100755 --- a/paddle/fluid/framework/ir/CMakeLists.txt +++ b/paddle/fluid/framework/ir/CMakeLists.txt @@ -127,7 +127,6 @@ pass_library(dense_multihead_matmul_to_sparse_pass inference) pass_library(delete_cast_op_pass inference) pass_library(delete_elementwise_mul_op_pass inference) pass_library(delete_repeated_ops_pass inference) -pass_library(save_optimized_model_pass inference) pass_library(generate_pass DEPS pass_desc_proto) target_link_libraries(generate_pass pass_desc_proto) @@ -322,10 +321,6 @@ cc_test( test_graph_pattern_detector SRCS graph_pattern_detector_tester.cc DEPS graph_pattern_detector) -cc_test( - test_save_optimized_model_pass - SRCS save_optimized_model_pass_tester.cc - DEPS save_optimized_model_pass) cc_test( test_op_compat_sensible_pass SRCS op_compat_sensible_pass_tester.cc diff --git a/paddle/fluid/inference/analysis/analyzer.cc b/paddle/fluid/inference/analysis/analyzer.cc index 4aadb34d7b354..44cf98b10f628 100644 --- a/paddle/fluid/inference/analysis/analyzer.cc +++ b/paddle/fluid/inference/analysis/analyzer.cc @@ -39,7 +39,12 @@ void Analyzer::RunAnalysis(Argument *argument) { string::PrettyLogH1("--- Running analysis [%s]", pass); } if (!argument->enable_ir_optim() && pass == "ir_analysis_pass") continue; - + if (argument->save_optimized_model() && + pass == "ir_params_sync_among_devices_pass") { + LOG(WARNING) << "When save_optimized_model is true, we need to skip " + "ir_params_sync_among_devices_pass for saving time."; + continue; + } auto *ptr = PassRegistry::Global().Retreive(pass); PADDLE_ENFORCE_NOT_NULL(ptr, platform::errors::PreconditionNotMet( diff --git a/paddle/fluid/inference/analysis/ir_pass_manager.cc b/paddle/fluid/inference/analysis/ir_pass_manager.cc index 8673dbe9c8bf1..1ab600854882a 100644 --- a/paddle/fluid/inference/analysis/ir_pass_manager.cc +++ b/paddle/fluid/inference/analysis/ir_pass_manager.cc @@ -318,30 +318,6 @@ void IRPassManager::CreatePasses(Argument *argument, pass->Set("quant_weight_bits", new int(argument->xpu_quant_post_dynamic_weight_bits())); } - } else if (pass_name == "save_optimized_model_pass") { - pass->Set("save_optimized_model", - new bool(argument->save_optimized_model())); - std::string optim_cache_dir = argument->optim_cache_dir(); - if (!optim_cache_dir.empty()) { - if (!PathExists(optim_cache_dir)) { - PADDLE_ENFORCE_NE( - MKDIR(optim_cache_dir.c_str()), - -1, - platform::errors::PreconditionNotMet( - "Can not create optimize cache directory: %s, Make sure you " - "have permission to write", - optim_cache_dir)); - } - pass->Set("model_opt_cache_dir", new std::string(optim_cache_dir)); - } else { - std::string model_opt_cache_dir = - argument->Has("model_dir") - ? argument->model_dir() - : GetDirRoot(argument->model_program_path()); - pass->Set( - "model_opt_cache_dir", - new std::string(GetOrCreateModelOptCacheDir(model_opt_cache_dir))); - } } pre_pass = pass_name; diff --git a/paddle/fluid/inference/analysis/passes/CMakeLists.txt b/paddle/fluid/inference/analysis/passes/CMakeLists.txt index 35c03cfc7602a..bc41a34db5e34 100644 --- a/paddle/fluid/inference/analysis/passes/CMakeLists.txt +++ b/paddle/fluid/inference/analysis/passes/CMakeLists.txt @@ -31,12 +31,17 @@ cc_library( inference_op_replace_pass SRCS inference_op_replace_pass.cc DEPS analysis_pass graph_to_program_pass) +cc_library( + save_optimized_model_pass + SRCS save_optimized_model_pass.cc + DEPS analysis_pass argument ir_pass_manager graph_to_program_pass) cc_library( analysis_passes SRCS passes.cc DEPS ir_graph_build_pass ir_analysis_pass + save_optimized_model_pass ir_params_sync_among_devices_pass adjust_cudnn_workspace_size_pass memory_optim_pass diff --git a/paddle/fluid/inference/analysis/passes/passes.cc b/paddle/fluid/inference/analysis/passes/passes.cc index cd65757d08f3f..26e0f34c0ec08 100644 --- a/paddle/fluid/inference/analysis/passes/passes.cc +++ b/paddle/fluid/inference/analysis/passes/passes.cc @@ -21,6 +21,7 @@ #include "paddle/fluid/inference/analysis/passes/ir_graph_to_program_pass.h" #include "paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.h" #include "paddle/fluid/inference/analysis/passes/memory_optimize_pass.h" +#include "paddle/fluid/inference/analysis/passes/save_optimized_model_pass.h" namespace paddle { namespace inference { @@ -33,6 +34,8 @@ PassRegistry::PassRegistry() { std::unique_ptr(new IrAnalysisPass)); passes_.emplace("ir_graph_build_pass", std::unique_ptr(new IrGraphBuildPass)); + passes_.emplace("save_optimized_model_pass", + std::unique_ptr(new SaveOptimizedModelPass)); passes_.emplace("memory_optimize_pass", std::unique_ptr(new MemoryOptimizePass)); passes_.emplace( diff --git a/paddle/fluid/framework/ir/save_optimized_model_pass.cc b/paddle/fluid/inference/analysis/passes/save_optimized_model_pass.cc similarity index 77% rename from paddle/fluid/framework/ir/save_optimized_model_pass.cc rename to paddle/fluid/inference/analysis/passes/save_optimized_model_pass.cc index bbb4efda8706b..6bc425e85e6a1 100644 --- a/paddle/fluid/framework/ir/save_optimized_model_pass.cc +++ b/paddle/fluid/inference/analysis/passes/save_optimized_model_pass.cc @@ -12,34 +12,44 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/framework/ir/save_optimized_model_pass.h" +#include "paddle/fluid/inference/analysis/passes/save_optimized_model_pass.h" + #include "paddle/fluid/framework/block_desc.h" #include "paddle/fluid/framework/executor.h" #include "paddle/fluid/framework/ir/fuse_pass_base.h" #include "paddle/fluid/framework/ir/graph.h" +#include "paddle/fluid/framework/ir/graph_helper.h" #include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/framework/scope.h" -#include "paddle/fluid/inference/analysis/helper.h" #include "paddle/phi/common/backend.h" -#include "paddle/phi/core/enforce.h" -#include "paddle/phi/core/errors.h" namespace paddle { -namespace framework { -class ProgramDesc; -} // namespace framework -} // namespace paddle +namespace inference { +namespace analysis { -namespace paddle { -namespace framework { -namespace ir { +void SaveOptimizedModelPass::RunImpl(Argument* argument) { + if (!argument->save_optimized_model()) return; + + std::string model_opt_cache_dir = argument->optim_cache_dir(); + if (!model_opt_cache_dir.empty()) { + if (!PathExists(model_opt_cache_dir)) { + PADDLE_ENFORCE_NE( + MKDIR(model_opt_cache_dir.c_str()), + -1, + platform::errors::PreconditionNotMet( + "Can not create optimize cache directory: %s, Make sure you " + "have permission to write", + model_opt_cache_dir)); + } + } else { + model_opt_cache_dir = argument->Has("model_dir") + ? argument->model_dir() + : GetDirRoot(argument->model_program_path()); + } -void SaveOptimizedModelPass::ApplyImpl(ir::Graph* graph) const { - if (!Has("save_optimized_model") || !Get("save_optimized_model")) - return; + auto& scope = argument->scope(); + auto* graph = argument->main_graph_ptr(); - std::string model_opt_cache_dir = Get("model_opt_cache_dir"); - auto& scope = graph->Get(kParamScopeAttr); framework::ProgramDesc optimized_program_desc; framework::ir::GraphToProgram(*graph, &optimized_program_desc); @@ -115,9 +125,10 @@ void SaveOptimizedModelPass::ApplyImpl(ir::Graph* graph) const { LOG(INFO) << "Optimized model saved to " << model_opt_cache_dir; } -} // namespace ir -} // namespace framework -} // namespace paddle +std::string SaveOptimizedModelPass::repr() const { + return "save_optimized_model_pass"; +} -REGISTER_PASS(save_optimized_model_pass, - paddle::framework::ir::SaveOptimizedModelPass); +} // namespace analysis +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/framework/ir/save_optimized_model_pass.h b/paddle/fluid/inference/analysis/passes/save_optimized_model_pass.h similarity index 67% rename from paddle/fluid/framework/ir/save_optimized_model_pass.h rename to paddle/fluid/inference/analysis/passes/save_optimized_model_pass.h index 247bae959badc..09c748de91d94 100644 --- a/paddle/fluid/framework/ir/save_optimized_model_pass.h +++ b/paddle/fluid/inference/analysis/passes/save_optimized_model_pass.h @@ -17,20 +17,21 @@ limitations under the License. */ #include #include -#include "paddle/fluid/framework/ir/graph_helper.h" -#include "paddle/fluid/framework/ir/pass.h" +#include "paddle/fluid/inference/analysis/analysis_pass.h" namespace paddle { -namespace framework { -namespace ir { - -class Graph; - -class SaveOptimizedModelPass : public Pass { - protected: - void ApplyImpl(ir::Graph* graph) const override; +namespace inference { +namespace analysis { + +/* + * Save model optimized by ir pass + */ +class SaveOptimizedModelPass : public AnalysisPass { + public: + void RunImpl(Argument *argument) override; + std::string repr() const override; }; -} // namespace ir -} // namespace framework +} // namespace analysis +} // namespace inference } // namespace paddle diff --git a/paddle/fluid/framework/ir/save_optimized_model_pass_tester.cc b/paddle/fluid/inference/analysis/passes/save_optimized_model_pass_tester.cc similarity index 100% rename from paddle/fluid/framework/ir/save_optimized_model_pass_tester.cc rename to paddle/fluid/inference/analysis/passes/save_optimized_model_pass_tester.cc diff --git a/paddle/fluid/inference/api/paddle_pass_builder.cc b/paddle/fluid/inference/api/paddle_pass_builder.cc index f447c8c415b36..bea0b82ecd494 100644 --- a/paddle/fluid/inference/api/paddle_pass_builder.cc +++ b/paddle/fluid/inference/api/paddle_pass_builder.cc @@ -527,7 +527,6 @@ XpuPassStrategy::XpuPassStrategy() : PassStrategy({}) { "link_xpu_op_max_pass", "inplace_op_var_pass", "delete_isolated_node_pass", - "save_optimized_model_pass", }); use_xpu_ = true; } diff --git a/paddle/fluid/inference/api/paddle_pass_builder.h b/paddle/fluid/inference/api/paddle_pass_builder.h index 021b758239eee..0d81a5d9e8371 100644 --- a/paddle/fluid/inference/api/paddle_pass_builder.h +++ b/paddle/fluid/inference/api/paddle_pass_builder.h @@ -116,6 +116,7 @@ class PD_INFER_DECL PaddlePassBuilder { std::vector analysis_passes_{ {"ir_graph_build_pass", "ir_analysis_pass", + "save_optimized_model_pass", "ir_params_sync_among_devices_pass", "adjust_cudnn_workspace_size_pass", "inference_op_replace_pass"}}; diff --git a/test/cpp/inference/analysis/analyzer_tester.cc b/test/cpp/inference/analysis/analyzer_tester.cc index 3f5be92f5a3e6..b329fd6f9eb42 100644 --- a/test/cpp/inference/analysis/analyzer_tester.cc +++ b/test/cpp/inference/analysis/analyzer_tester.cc @@ -41,6 +41,20 @@ TEST(Analyzer, analysis_without_tensorrt) { analyser.Run(&argument); } +TEST(Analyzer, analysis_save_optimized_model) { + Argument argument; + argument.SetDisableLogs(false); + argument.SetModelDir(FLAGS_inference_model_dir); + argument.SetEnableIrOptim(false); + argument.SetUseGPU(false); + argument.SetSaveOptimizedModel(true); + argument.SetAnalysisPasses( + {"ir_graph_build_pass", "ir_analysis_pass", "save_optimized_model_pass"}); + + Analyzer analyser; + analyser.Run(&argument); +} + TEST(Analyzer, analysis_with_tensorrt) { Argument argument; argument.SetDisableLogs(false); From 0a3ed1b69473ea1d26eb81677fa421702ac196f9 Mon Sep 17 00:00:00 2001 From: shentanyue Date: Thu, 11 May 2023 18:45:44 +0800 Subject: [PATCH 08/14] change to analysis pass --- paddle/fluid/inference/analysis/analyzer.cc | 2 +- .../passes/save_optimized_model_pass.cc | 12 ++- .../passes/save_optimized_model_pass.h | 3 + .../save_optimized_model_pass_tester.cc | 73 ------------------- 4 files changed, 15 insertions(+), 75 deletions(-) delete mode 100644 paddle/fluid/inference/analysis/passes/save_optimized_model_pass_tester.cc diff --git a/paddle/fluid/inference/analysis/analyzer.cc b/paddle/fluid/inference/analysis/analyzer.cc index 44cf98b10f628..edc22f26bb3ef 100644 --- a/paddle/fluid/inference/analysis/analyzer.cc +++ b/paddle/fluid/inference/analysis/analyzer.cc @@ -41,7 +41,7 @@ void Analyzer::RunAnalysis(Argument *argument) { if (!argument->enable_ir_optim() && pass == "ir_analysis_pass") continue; if (argument->save_optimized_model() && pass == "ir_params_sync_among_devices_pass") { - LOG(WARNING) << "When save_optimized_model is true, we need to skip " + LOG(WARNING) << "When save_optimized_model is turn on, we need to skip " "ir_params_sync_among_devices_pass for saving time."; continue; } diff --git a/paddle/fluid/inference/analysis/passes/save_optimized_model_pass.cc b/paddle/fluid/inference/analysis/passes/save_optimized_model_pass.cc index 6bc425e85e6a1..7c0f4b440cee4 100644 --- a/paddle/fluid/inference/analysis/passes/save_optimized_model_pass.cc +++ b/paddle/fluid/inference/analysis/passes/save_optimized_model_pass.cc @@ -27,8 +27,12 @@ namespace paddle { namespace inference { namespace analysis { -void SaveOptimizedModelPass::RunImpl(Argument* argument) { +void SaveOptimizedModelPass::SaveOptimizedModel(Argument* argument) { if (!argument->save_optimized_model()) return; + if (!argument->enable_ir_optim()) { + LOG(WARNING) << "ir_optim is turned off, skip save_optimized_model_pass"; + return; + } std::string model_opt_cache_dir = argument->optim_cache_dir(); if (!model_opt_cache_dir.empty()) { @@ -125,6 +129,12 @@ void SaveOptimizedModelPass::RunImpl(Argument* argument) { LOG(INFO) << "Optimized model saved to " << model_opt_cache_dir; } +void SaveOptimizedModelPass::RunImpl(Argument* argument) { + if (argument->use_xpu_valid()) { + SaveOptimizedModel(argument); + } +} + std::string SaveOptimizedModelPass::repr() const { return "save_optimized_model_pass"; } diff --git a/paddle/fluid/inference/analysis/passes/save_optimized_model_pass.h b/paddle/fluid/inference/analysis/passes/save_optimized_model_pass.h index 09c748de91d94..0b751c2ad47e7 100644 --- a/paddle/fluid/inference/analysis/passes/save_optimized_model_pass.h +++ b/paddle/fluid/inference/analysis/passes/save_optimized_model_pass.h @@ -30,6 +30,9 @@ class SaveOptimizedModelPass : public AnalysisPass { public: void RunImpl(Argument *argument) override; std::string repr() const override; + + private: + void SaveOptimizedModel(Argument *argument); }; } // namespace analysis diff --git a/paddle/fluid/inference/analysis/passes/save_optimized_model_pass_tester.cc b/paddle/fluid/inference/analysis/passes/save_optimized_model_pass_tester.cc deleted file mode 100644 index 0a9b394bc9d5d..0000000000000 --- a/paddle/fluid/inference/analysis/passes/save_optimized_model_pass_tester.cc +++ /dev/null @@ -1,73 +0,0 @@ -// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include -#include "paddle/fluid/framework/ir/pass.h" -#include "paddle/fluid/framework/ir/pass_tester_helper.h" -#include "paddle/fluid/inference/analysis/helper.h" - -namespace paddle { -namespace framework { -namespace ir { - -void AddVarToScope(Scope* param_scope, - const std::string& name, - const DDim& dims) { - auto* tensor = param_scope->Var(name)->GetMutable(); - tensor->Resize(dims); - auto* cpu_ctx = static_cast( - platform::DeviceContextPool::Instance().Get(phi::CPUPlace())); - cpu_ctx->Alloc(tensor); -} - -VarDesc* Data(paddle::framework::BlockDesc* block, - std::string name, - std::vector shape = {}, - bool is_persistable = false, - proto::VarType::Type data_type = proto::VarType::FP32) { - auto* var = block->Var(name); - var->SetType(proto::VarType::LOD_TENSOR); - var->SetDataType(data_type); - var->SetShape(shape); - var->SetPersistable(is_persistable); - return var; -} - -TEST(SaveOptimizedModelPass, basic) { - paddle::framework::ProgramDesc program; - auto* block = program.MutableBlock(0); - auto* lookup_table_w = Data(block, "lookup_table_w", {1}, true); - auto* lookup_table_out = Data(block, "scatter_out", {1}); - OpDesc* lookup_table = block->AppendOp(); - lookup_table->SetType("lookup_table_v2"); - lookup_table->SetInput("W", {lookup_table_w->Name()}); - lookup_table->SetOutput("Out", {lookup_table_out->Name()}); - - std::unique_ptr graph(new ir::Graph(program)); - auto scope = new Scope(); - AddVarToScope(scope, lookup_table_w->Name(), {1}); - graph->Set("__param_scope__", scope); - - auto save_optimized_model_pass = - PassRegistry::Instance().Get("save_optimized_model_pass"); - save_optimized_model_pass->Set("save_optimized_model", new bool(true)); - save_optimized_model_pass->Set("model_opt_cache_dir", new std::string("./")); - save_optimized_model_pass->Apply(graph.get()); -} - -} // namespace ir -} // namespace framework -} // namespace paddle - -USE_PASS(save_optimized_model_pass); From 57a23d88f2c9fac31958f2bfac58fca611dc450d Mon Sep 17 00:00:00 2001 From: shentanyue Date: Thu, 11 May 2023 18:49:04 +0800 Subject: [PATCH 09/14] fix --- test/cpp/inference/analysis/analyzer_tester.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/cpp/inference/analysis/analyzer_tester.cc b/test/cpp/inference/analysis/analyzer_tester.cc index b329fd6f9eb42..9aaf6de05e10c 100644 --- a/test/cpp/inference/analysis/analyzer_tester.cc +++ b/test/cpp/inference/analysis/analyzer_tester.cc @@ -45,7 +45,7 @@ TEST(Analyzer, analysis_save_optimized_model) { Argument argument; argument.SetDisableLogs(false); argument.SetModelDir(FLAGS_inference_model_dir); - argument.SetEnableIrOptim(false); + argument.SetEnableIrOptim(true); argument.SetUseGPU(false); argument.SetSaveOptimizedModel(true); argument.SetAnalysisPasses( From 9de58eba5a7822b7a03be70f21da03914304fbcc Mon Sep 17 00:00:00 2001 From: shentanyue Date: Thu, 11 May 2023 20:00:40 +0800 Subject: [PATCH 10/14] fix ut --- test/cpp/inference/analysis/analyzer_tester.cc | 14 -------------- .../cpp/inference/api/analysis_predictor_tester.cc | 13 +++++++++++++ 2 files changed, 13 insertions(+), 14 deletions(-) diff --git a/test/cpp/inference/analysis/analyzer_tester.cc b/test/cpp/inference/analysis/analyzer_tester.cc index 9aaf6de05e10c..3f5be92f5a3e6 100644 --- a/test/cpp/inference/analysis/analyzer_tester.cc +++ b/test/cpp/inference/analysis/analyzer_tester.cc @@ -41,20 +41,6 @@ TEST(Analyzer, analysis_without_tensorrt) { analyser.Run(&argument); } -TEST(Analyzer, analysis_save_optimized_model) { - Argument argument; - argument.SetDisableLogs(false); - argument.SetModelDir(FLAGS_inference_model_dir); - argument.SetEnableIrOptim(true); - argument.SetUseGPU(false); - argument.SetSaveOptimizedModel(true); - argument.SetAnalysisPasses( - {"ir_graph_build_pass", "ir_analysis_pass", "save_optimized_model_pass"}); - - Analyzer analyser; - analyser.Run(&argument); -} - TEST(Analyzer, analysis_with_tensorrt) { Argument argument; argument.SetDisableLogs(false); diff --git a/test/cpp/inference/api/analysis_predictor_tester.cc b/test/cpp/inference/api/analysis_predictor_tester.cc index e6b5630dccad6..8d2d005201a48 100644 --- a/test/cpp/inference/api/analysis_predictor_tester.cc +++ b/test/cpp/inference/api/analysis_predictor_tester.cc @@ -132,6 +132,19 @@ TEST(AnalysisPredictor, analysis_on) { inference::CompareTensor(outputs.front(), naive_outputs.front()); } +#ifdef PADDLE_WITH_XPU +TEST(AnalysisPredictor, save_optimized_model_on) { + AnalysisConfig config; + config.SetModel(FLAGS_dirname); + config.SwitchIrOptim(true); + config.EnableSaveOptimizedModel(true); + config.EnableXpu(); + config.SetXpuDeviceId(0); + LOG(INFO) << config.Summary(); + CreatePaddlePredictor(config); +} +#endif + TEST(AnalysisPredictor, ZeroCopy) { AnalysisConfig config; config.SetModel(FLAGS_dirname); From c4bca9cb41d5a369eebf8cfb0384113dc96b0ded Mon Sep 17 00:00:00 2001 From: shentanyue Date: Fri, 12 May 2023 10:51:00 +0800 Subject: [PATCH 11/14] fix ut error --- paddle/fluid/inference/analysis/analyzer.cc | 7 +------ .../inference/analysis/passes/save_optimized_model_pass.cc | 4 ++-- 2 files changed, 3 insertions(+), 8 deletions(-) diff --git a/paddle/fluid/inference/analysis/analyzer.cc b/paddle/fluid/inference/analysis/analyzer.cc index edc22f26bb3ef..4aadb34d7b354 100644 --- a/paddle/fluid/inference/analysis/analyzer.cc +++ b/paddle/fluid/inference/analysis/analyzer.cc @@ -39,12 +39,7 @@ void Analyzer::RunAnalysis(Argument *argument) { string::PrettyLogH1("--- Running analysis [%s]", pass); } if (!argument->enable_ir_optim() && pass == "ir_analysis_pass") continue; - if (argument->save_optimized_model() && - pass == "ir_params_sync_among_devices_pass") { - LOG(WARNING) << "When save_optimized_model is turn on, we need to skip " - "ir_params_sync_among_devices_pass for saving time."; - continue; - } + auto *ptr = PassRegistry::Global().Retreive(pass); PADDLE_ENFORCE_NOT_NULL(ptr, platform::errors::PreconditionNotMet( diff --git a/paddle/fluid/inference/analysis/passes/save_optimized_model_pass.cc b/paddle/fluid/inference/analysis/passes/save_optimized_model_pass.cc index 7c0f4b440cee4..3cc3cb80054d2 100644 --- a/paddle/fluid/inference/analysis/passes/save_optimized_model_pass.cc +++ b/paddle/fluid/inference/analysis/passes/save_optimized_model_pass.cc @@ -86,7 +86,7 @@ void SaveOptimizedModelPass::SaveOptimizedModel(Argument* argument) { } } - std::string save_params_path = path + "/" + "optimized.pdiparams"; + std::string save_params_path = path + "/" + "_optimized.pdiparams"; std::vector save_var_list(save_var_set.begin(), save_var_set.end()); std::sort(save_var_list.begin(), save_var_list.end()); @@ -117,7 +117,7 @@ void SaveOptimizedModelPass::SaveOptimizedModel(Argument* argument) { } } } - std::string save_model_path = path + "/" + "optimized.pdmodel"; + std::string save_model_path = path + "/" + "_optimized.pdmodel"; auto str = optimized_program_desc.Proto()->SerializeAsString(); std::ofstream file(save_model_path.c_str(), std::ios::binary); file.write(str.c_str(), str.size()); From 97dc77ba4e5e5a798496502a22282a15464f344a Mon Sep 17 00:00:00 2001 From: shentanyue Date: Fri, 12 May 2023 14:13:18 +0800 Subject: [PATCH 12/14] fix api --- .../inference/analysis/passes/save_optimized_model_pass.cc | 6 +++++- paddle/fluid/inference/api/paddle_analysis_config.h | 2 +- paddle/fluid/pybind/inference_api.cc | 6 +++--- test/cpp/inference/api/analysis_predictor_tester.cc | 2 +- 4 files changed, 10 insertions(+), 6 deletions(-) diff --git a/paddle/fluid/inference/analysis/passes/save_optimized_model_pass.cc b/paddle/fluid/inference/analysis/passes/save_optimized_model_pass.cc index 3cc3cb80054d2..9c5d8fd5fa1af 100644 --- a/paddle/fluid/inference/analysis/passes/save_optimized_model_pass.cc +++ b/paddle/fluid/inference/analysis/passes/save_optimized_model_pass.cc @@ -28,7 +28,11 @@ namespace inference { namespace analysis { void SaveOptimizedModelPass::SaveOptimizedModel(Argument* argument) { - if (!argument->save_optimized_model()) return; + if (!argument->save_optimized_model()) { + LOG(WARNING) << "save_optim_cache_model is turned off, skip " + "save_optimized_model_pass"; + return; + } if (!argument->enable_ir_optim()) { LOG(WARNING) << "ir_optim is turned off, skip save_optimized_model_pass"; return; diff --git a/paddle/fluid/inference/api/paddle_analysis_config.h b/paddle/fluid/inference/api/paddle_analysis_config.h index bd52737bfa935..d1fca04050217 100644 --- a/paddle/fluid/inference/api/paddle_analysis_config.h +++ b/paddle/fluid/inference/api/paddle_analysis_config.h @@ -204,7 +204,7 @@ struct PD_INFER_DECL AnalysisConfig { /// /// \param save_optimized_model whether to enable save optimized model. /// - void EnableSaveOptimizedModel(bool save_optimized_model) { + void EnableSaveOptimCacheModel(bool save_optimized_model) { save_optimized_model_ = save_optimized_model; } /// diff --git a/paddle/fluid/pybind/inference_api.cc b/paddle/fluid/pybind/inference_api.cc index cfdb7b597823e..f196e3223213d 100644 --- a/paddle/fluid/pybind/inference_api.cc +++ b/paddle/fluid/pybind/inference_api.cc @@ -823,9 +823,9 @@ void BindAnalysisConfig(py::module *m) { .def("enable_profile", &AnalysisConfig::EnableProfile) .def("disable_glog_info", &AnalysisConfig::DisableGlogInfo) .def("glog_info_disabled", &AnalysisConfig::glog_info_disabled) - .def("enable_save_optimized_model", - &AnalysisConfig::EnableSaveOptimizedModel, - py::arg("x") = false) + .def("enable_save_optim_cache_model", + &AnalysisConfig::EnableSaveOptimCacheModel, + py::arg("save_optimized_model") = false) .def("set_optim_cache_dir", &AnalysisConfig::SetOptimCacheDir) .def("switch_use_feed_fetch_ops", &AnalysisConfig::SwitchUseFeedFetchOps, diff --git a/test/cpp/inference/api/analysis_predictor_tester.cc b/test/cpp/inference/api/analysis_predictor_tester.cc index 8d2d005201a48..9fd773c185d10 100644 --- a/test/cpp/inference/api/analysis_predictor_tester.cc +++ b/test/cpp/inference/api/analysis_predictor_tester.cc @@ -137,7 +137,7 @@ TEST(AnalysisPredictor, save_optimized_model_on) { AnalysisConfig config; config.SetModel(FLAGS_dirname); config.SwitchIrOptim(true); - config.EnableSaveOptimizedModel(true); + config.EnableSaveOptimCacheModel(true); config.EnableXpu(); config.SetXpuDeviceId(0); LOG(INFO) << config.Summary(); From 0590fee81b860db0e71954c77bb51ed8ac9a714f Mon Sep 17 00:00:00 2001 From: shentanyue Date: Wed, 17 May 2023 14:27:10 +0800 Subject: [PATCH 13/14] fix comment --- .../inference/analysis/passes/save_optimized_model_pass.cc | 1 + .../inference/analysis/passes/save_optimized_model_pass.h | 1 - paddle/fluid/inference/api/paddle_analysis_config.h | 2 +- paddle/fluid/pybind/inference_api.cc | 4 ++-- test/cpp/inference/api/analysis_predictor_tester.cc | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/paddle/fluid/inference/analysis/passes/save_optimized_model_pass.cc b/paddle/fluid/inference/analysis/passes/save_optimized_model_pass.cc index 9c5d8fd5fa1af..435b06945837a 100644 --- a/paddle/fluid/inference/analysis/passes/save_optimized_model_pass.cc +++ b/paddle/fluid/inference/analysis/passes/save_optimized_model_pass.cc @@ -14,6 +14,7 @@ limitations under the License. */ #include "paddle/fluid/inference/analysis/passes/save_optimized_model_pass.h" +#include #include "paddle/fluid/framework/block_desc.h" #include "paddle/fluid/framework/executor.h" #include "paddle/fluid/framework/ir/fuse_pass_base.h" diff --git a/paddle/fluid/inference/analysis/passes/save_optimized_model_pass.h b/paddle/fluid/inference/analysis/passes/save_optimized_model_pass.h index 0b751c2ad47e7..5fc520a6badc3 100644 --- a/paddle/fluid/inference/analysis/passes/save_optimized_model_pass.h +++ b/paddle/fluid/inference/analysis/passes/save_optimized_model_pass.h @@ -15,7 +15,6 @@ limitations under the License. */ #pragma once #include -#include #include "paddle/fluid/inference/analysis/analysis_pass.h" diff --git a/paddle/fluid/inference/api/paddle_analysis_config.h b/paddle/fluid/inference/api/paddle_analysis_config.h index 54f77bd5e1f43..895ac8bf0c055 100644 --- a/paddle/fluid/inference/api/paddle_analysis_config.h +++ b/paddle/fluid/inference/api/paddle_analysis_config.h @@ -204,7 +204,7 @@ struct PD_INFER_DECL AnalysisConfig { /// /// \param save_optimized_model whether to enable save optimized model. /// - void EnableSaveOptimCacheModel(bool save_optimized_model) { + void EnableSaveOptimModel(bool save_optimized_model) { save_optimized_model_ = save_optimized_model; } /// diff --git a/paddle/fluid/pybind/inference_api.cc b/paddle/fluid/pybind/inference_api.cc index c9dda2b7e6e68..bca05f2fc5733 100644 --- a/paddle/fluid/pybind/inference_api.cc +++ b/paddle/fluid/pybind/inference_api.cc @@ -820,8 +820,8 @@ void BindAnalysisConfig(py::module *m) { .def("enable_profile", &AnalysisConfig::EnableProfile) .def("disable_glog_info", &AnalysisConfig::DisableGlogInfo) .def("glog_info_disabled", &AnalysisConfig::glog_info_disabled) - .def("enable_save_optim_cache_model", - &AnalysisConfig::EnableSaveOptimCacheModel, + .def("enable_save_optim_model", + &AnalysisConfig::EnableSaveOptimModel, py::arg("save_optimized_model") = false) .def("set_optim_cache_dir", &AnalysisConfig::SetOptimCacheDir) .def("switch_use_feed_fetch_ops", diff --git a/test/cpp/inference/api/analysis_predictor_tester.cc b/test/cpp/inference/api/analysis_predictor_tester.cc index 9fd773c185d10..d187fb9d17325 100644 --- a/test/cpp/inference/api/analysis_predictor_tester.cc +++ b/test/cpp/inference/api/analysis_predictor_tester.cc @@ -137,7 +137,7 @@ TEST(AnalysisPredictor, save_optimized_model_on) { AnalysisConfig config; config.SetModel(FLAGS_dirname); config.SwitchIrOptim(true); - config.EnableSaveOptimCacheModel(true); + config.EnableSaveOptimModel(true); config.EnableXpu(); config.SetXpuDeviceId(0); LOG(INFO) << config.Summary(); From b62d0212a35f568292c850e2b04717615c8ecec5 Mon Sep 17 00:00:00 2001 From: shentanyue Date: Wed, 17 May 2023 14:41:50 +0800 Subject: [PATCH 14/14] fix comment --- .../inference/analysis/passes/save_optimized_model_pass.cc | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/paddle/fluid/inference/analysis/passes/save_optimized_model_pass.cc b/paddle/fluid/inference/analysis/passes/save_optimized_model_pass.cc index 435b06945837a..a2e968591943f 100644 --- a/paddle/fluid/inference/analysis/passes/save_optimized_model_pass.cc +++ b/paddle/fluid/inference/analysis/passes/save_optimized_model_pass.cc @@ -15,14 +15,9 @@ limitations under the License. */ #include "paddle/fluid/inference/analysis/passes/save_optimized_model_pass.h" #include -#include "paddle/fluid/framework/block_desc.h" #include "paddle/fluid/framework/executor.h" -#include "paddle/fluid/framework/ir/fuse_pass_base.h" -#include "paddle/fluid/framework/ir/graph.h" #include "paddle/fluid/framework/ir/graph_helper.h" -#include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/framework/scope.h" -#include "paddle/phi/common/backend.h" namespace paddle { namespace inference { @@ -75,7 +70,7 @@ void SaveOptimizedModelPass::SaveOptimizedModel(Argument* argument) { auto SerializeParams = [&](const std::string& path) { framework::ProgramDesc save_program; auto* save_block = save_program.MutableBlock(0); - std::set save_var_set; + std::unordered_set save_var_set; for (size_t i = 0; i < optimized_program_desc.Size(); ++i) { const auto& global_block = optimized_program_desc.Block(i); for (framework::VarDesc* var : global_block.AllVars()) {