Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Inference] Save optimized model by pass #53696

Merged
5 changes: 5 additions & 0 deletions paddle/fluid/framework/ir/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,7 @@ pass_library(dense_multihead_matmul_to_sparse_pass inference)
pass_library(delete_cast_op_pass inference)
pass_library(delete_elementwise_mul_op_pass inference)
pass_library(delete_repeated_ops_pass inference)
pass_library(save_optimized_model_pass inference)
pass_library(generate_pass DEPS pass_desc_proto)
target_link_libraries(generate_pass pass_desc_proto)

Expand Down Expand Up @@ -321,6 +322,10 @@ cc_test(
test_graph_pattern_detector
SRCS graph_pattern_detector_tester.cc
DEPS graph_pattern_detector)
cc_test(
test_save_optimized_model_pass
SRCS save_optimized_model_pass_tester.cc
DEPS save_optimized_model_pass)
cc_test(
test_op_compat_sensible_pass
SRCS op_compat_sensible_pass_tester.cc
Expand Down
123 changes: 123 additions & 0 deletions paddle/fluid/framework/ir/save_optimized_model_pass.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
/* Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#include "paddle/fluid/framework/ir/save_optimized_model_pass.h"
#include "paddle/fluid/framework/block_desc.h"
#include "paddle/fluid/framework/executor.h"
#include "paddle/fluid/framework/ir/fuse_pass_base.h"
#include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/inference/analysis/helper.h"
#include "paddle/phi/common/backend.h"
#include "paddle/phi/core/enforce.h"
#include "paddle/phi/core/errors.h"

namespace paddle {
namespace framework {
class ProgramDesc;
} // namespace framework
} // namespace paddle

namespace paddle {
namespace framework {
namespace ir {

void SaveOptimizedModelPass::ApplyImpl(ir::Graph* graph) const {
if (!Has("save_optimized_model") || !Get<bool>("save_optimized_model"))
return;

std::string model_opt_cache_dir = Get<std::string>("model_opt_cache_dir");
auto& scope = graph->Get<Scope>(kParamScopeAttr);
framework::ProgramDesc optimized_program_desc;
framework::ir::GraphToProgram(*graph, &optimized_program_desc);

auto IsPersistable = [](const framework::VarDesc* var) {
if (var->Persistable() &&
var->GetType() != framework::proto::VarType::FEED_MINIBATCH &&
var->GetType() != framework::proto::VarType::FETCH_LIST &&
var->GetType() != framework::proto::VarType::RAW) {
return true;
}
return false;
};

auto SerializeParams = [&](const std::string& path) {
framework::ProgramDesc save_program;
auto* save_block = save_program.MutableBlock(0);
std::set<std::string> save_var_set;
for (size_t i = 0; i < optimized_program_desc.Size(); ++i) {
const auto& global_block = optimized_program_desc.Block(i);
for (framework::VarDesc* var : global_block.AllVars()) {
if (IsPersistable(var)) {
framework::VarDesc* new_var = save_block->Var(var->Name());
new_var->SetShape(var->GetShape());
new_var->SetDataType(var->GetDataType());
new_var->SetType(var->GetType());
new_var->SetLoDLevel(var->GetLoDLevel());
new_var->SetPersistable(true);
save_var_set.insert(new_var->Name());
}
}
}

std::string save_params_path = path + "/" + "optimized.pdiparams";
std::vector<std::string> save_var_list(save_var_set.begin(),
save_var_set.end());
std::sort(save_var_list.begin(), save_var_list.end());
auto* op = save_block->AppendOp();
op->SetType("save_combine");
op->SetInput("X", save_var_list);
op->SetAttr("file_path", save_params_path);
op->CheckAttrs();

framework::Executor exe(platform::CPUPlace{});
exe.Run(save_program, &scope, 0, true, true);
};
// TODO(shentanyue01): Setting hardware and version identification for
// optimized models.
auto SerializeProg = [&](const std::string& path) {
// All persistable var need to be moved to global block
auto* global_block = optimized_program_desc.MutableBlock(0);
for (size_t i = 1; i < optimized_program_desc.Size(); ++i) {
const auto& sub_block = optimized_program_desc.Block(i);
for (framework::VarDesc* var : sub_block.AllVars()) {
if (IsPersistable(var) && !global_block->HasVar(var->Name())) {
framework::VarDesc* new_var = global_block->Var(var->Name());
new_var->SetShape(var->GetShape());
new_var->SetDataType(var->GetDataType());
new_var->SetType(var->GetType());
new_var->SetLoDLevel(var->GetLoDLevel());
new_var->SetPersistable(true);
}
}
}
std::string save_model_path = path + "/" + "optimized.pdmodel";
auto str = optimized_program_desc.Proto()->SerializeAsString();
std::ofstream file(save_model_path.c_str(), std::ios::binary);
file.write(str.c_str(), str.size());
file.close();
};

SerializeProg(model_opt_cache_dir);
SerializeParams(model_opt_cache_dir);
LOG(INFO) << "Optimized model saved to " << model_opt_cache_dir;
}

} // namespace ir
} // namespace framework
} // namespace paddle

REGISTER_PASS(save_optimized_model_pass,
paddle::framework::ir::SaveOptimizedModelPass);
36 changes: 36 additions & 0 deletions paddle/fluid/framework/ir/save_optimized_model_pass.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
/* Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#pragma once

#include <string>
#include <unordered_set>

#include "paddle/fluid/framework/ir/graph_helper.h"
#include "paddle/fluid/framework/ir/pass.h"

namespace paddle {
namespace framework {
namespace ir {

class Graph;

class SaveOptimizedModelPass : public Pass {
protected:
void ApplyImpl(ir::Graph* graph) const override;
};

} // namespace ir
} // namespace framework
} // namespace paddle
73 changes: 73 additions & 0 deletions paddle/fluid/framework/ir/save_optimized_model_pass_tester.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <gtest/gtest.h>
#include "paddle/fluid/framework/ir/pass.h"
#include "paddle/fluid/framework/ir/pass_tester_helper.h"
#include "paddle/fluid/inference/analysis/helper.h"

namespace paddle {
namespace framework {
namespace ir {

void AddVarToScope(Scope* param_scope,
const std::string& name,
const DDim& dims) {
auto* tensor = param_scope->Var(name)->GetMutable<phi::DenseTensor>();
tensor->Resize(dims);
auto* cpu_ctx = static_cast<phi::CPUContext*>(
platform::DeviceContextPool::Instance().Get(phi::CPUPlace()));
cpu_ctx->Alloc<float>(tensor);
}

VarDesc* Data(paddle::framework::BlockDesc* block,
std::string name,
std::vector<int64_t> shape = {},
bool is_persistable = false,
proto::VarType::Type data_type = proto::VarType::FP32) {
auto* var = block->Var(name);
var->SetType(proto::VarType::LOD_TENSOR);
var->SetDataType(data_type);
var->SetShape(shape);
var->SetPersistable(is_persistable);
return var;
}

TEST(SaveOptimizedModelPass, basic) {
paddle::framework::ProgramDesc program;
auto* block = program.MutableBlock(0);
auto* lookup_table_w = Data(block, "lookup_table_w", {1}, true);
auto* lookup_table_out = Data(block, "scatter_out", {1});
OpDesc* lookup_table = block->AppendOp();
lookup_table->SetType("lookup_table_v2");
lookup_table->SetInput("W", {lookup_table_w->Name()});
lookup_table->SetOutput("Out", {lookup_table_out->Name()});

std::unique_ptr<ir::Graph> graph(new ir::Graph(program));
auto scope = new Scope();
AddVarToScope(scope, lookup_table_w->Name(), {1});
graph->Set("__param_scope__", scope);

auto save_optimized_model_pass =
PassRegistry::Instance().Get("save_optimized_model_pass");
save_optimized_model_pass->Set("save_optimized_model", new bool(true));
save_optimized_model_pass->Set("model_opt_cache_dir", new std::string(""));
save_optimized_model_pass->Apply(graph.get());
}

} // namespace ir
} // namespace framework
} // namespace paddle

USE_PASS(save_optimized_model_pass);
2 changes: 2 additions & 0 deletions paddle/fluid/framework/ir/xpu/delete_isolated_node_pass.cc
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ void DeleteIsolatedNodePass::CollectReservedPersistableNodeNames(
Graph* graph,
std::unordered_set<std::string>* reserved_persistable_node_names) const {
for (auto* node : graph->Nodes()) {
if (!node || node->Name() == "fetch" || node->Name() == "feed") continue;
if (!node->IsVar() || !node->Var()->Persistable()) continue;
for (auto* out_node : node->outputs) {
auto op_type = out_node->Op()->Type();
Expand Down Expand Up @@ -131,6 +132,7 @@ int DeleteIsolatedNodePass::RemoveIsolatedNodes(
std::unordered_set<const Node*> delete_nodes;
const std::unordered_set<ir::Node*> nodes = graph->Nodes();
for (auto* node : nodes) {
if (!node || node->Name() == "fetch" || node->Name() == "feed") continue;
if (!node->IsVar() || !node->Var()->Persistable()) continue;
auto name = node->Var()->Name();
if (reserved_persistable_node_names.count(name) > 0) continue;
Expand Down
3 changes: 2 additions & 1 deletion paddle/fluid/inference/analysis/argument.h
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,7 @@ struct Argument {
DECL_ARGUMENT_FIELD(model_program_path, ModelProgramPath, std::string);
DECL_ARGUMENT_FIELD(model_params_path, ModelParamsPath, std::string);
DECL_ARGUMENT_FIELD(model_from_memory, ModelFromMemory, bool);
DECL_ARGUMENT_FIELD(save_optimized_model, SaveOptimizedModel, bool);
DECL_ARGUMENT_FIELD(optim_cache_dir, OptimCacheDir, std::string);
DECL_ARGUMENT_FIELD(enable_ir_optim, EnableIrOptim, bool);

Expand Down Expand Up @@ -294,7 +295,7 @@ struct Argument {
XpuQuantPostDynamicWeightBits,
int);
DECL_ARGUMENT_FIELD(xpu_quant_post_dynamic_op_types,
XpuQuantPostDynamicOpTypss,
XpuQuantPostDynamicOpTypes,
std::vector<std::string>);

DECL_ARGUMENT_FIELD(use_opencl, UseOpenCL, bool);
Expand Down
26 changes: 25 additions & 1 deletion paddle/fluid/inference/analysis/ir_pass_manager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -310,14 +310,38 @@ void IRPassManager::CreatePasses(Argument *argument,
}
bool use_fc_padding = !fc_mkldnn_pass && argument->use_fc_padding();
pass->Set("use_fc_padding", new bool(use_fc_padding));
} else if (pass_name == "fused_multi_transformer_xpu_quant_pass") {
} else if (pass_name == "fused_multi_transformer_xpu_pass") {
auto op_types = argument->xpu_quant_post_dynamic_op_types();
if (std::count(op_types.begin(),
op_types.end(),
"fused_multi_transformer") > 0) {
pass->Set("quant_weight_bits",
new int(argument->xpu_quant_post_dynamic_weight_bits()));
}
} else if (pass_name == "save_optimized_model_pass") {
pass->Set("save_optimized_model",
new bool(argument->save_optimized_model()));
std::string optim_cache_dir = argument->optim_cache_dir();
if (!optim_cache_dir.empty()) {
if (!PathExists(optim_cache_dir)) {
PADDLE_ENFORCE_NE(
MKDIR(optim_cache_dir.c_str()),
-1,
platform::errors::PreconditionNotMet(
"Can not create optimize cache directory: %s, Make sure you "
"have permission to write",
optim_cache_dir));
}
pass->Set("model_opt_cache_dir", new std::string(optim_cache_dir));
} else {
std::string model_opt_cache_dir =
argument->Has("model_dir")
? argument->model_dir()
: GetDirRoot(argument->model_program_path());
pass->Set(
"model_opt_cache_dir",
new std::string(GetOrCreateModelOptCacheDir(model_opt_cache_dir)));
}
}
pre_pass = pass_name;

Expand Down
5 changes: 4 additions & 1 deletion paddle/fluid/inference/api/analysis_config.cc
Original file line number Diff line number Diff line change
Expand Up @@ -411,7 +411,7 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
CP_MEMBER(model_dir_);
CP_MEMBER(model_from_memory_); // the memory model reuses prog_file_ and
// params_file_ fields.

CP_MEMBER(save_optimized_model_);
CP_MEMBER(opt_cache_dir_);
CP_MEMBER(prog_file_);
CP_MEMBER(params_file_);
Expand Down Expand Up @@ -1048,6 +1048,7 @@ std::string AnalysisConfig::SerializeInfoCache() {
ss << model_dir_;
ss << prog_file_;
ss << params_file_;
ss << save_optimized_model_;

ss << use_gpu_;
ss << enable_gpu_mixed_;
Expand Down Expand Up @@ -1373,6 +1374,8 @@ std::string AnalysisConfig::Summary() {
os.InsertRow({"use_cinn_compiler", use_cinn_compiler_ ? "true" : "false"});

// ir info
os.InsertRow(
{"save_optimized_model", save_optimized_model_ ? "true" : "false"});
os.InsertRow({"ir_optim", enable_ir_optim_ ? "true" : "false"});
os.InsertRow({"ir_debug", ir_debug_ ? "true" : "false"});
os.InsertRow({"memory_optim", enable_memory_optim_ ? "true" : "false"});
Expand Down
3 changes: 2 additions & 1 deletion paddle/fluid/inference/api/analysis_predictor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1316,6 +1316,7 @@ void AnalysisPredictor::PrepareArgument() {
// Analyze inference_program
argument_->SetPredictorID(predictor_id_);
argument_->SetRootPredictorID(root_predictor_id_);
argument_->SetSaveOptimizedModel(config_.save_optimized_model_);
argument_->SetOptimCacheDir(config_.opt_cache_dir_);
if (!config_.model_dir().empty()) {
argument_->SetModelDir(config_.model_dir());
Expand Down Expand Up @@ -1485,7 +1486,7 @@ void AnalysisPredictor::PrepareArgument() {
argument_->SetXpuEnableMultiStream(config_.xpu_enable_multi_stream_);
argument_->SetXpuQuantPostDynamicWeightBits(
config_.xpu_quant_post_dynamic_weight_bits_);
argument_->SetXpuQuantPostDynamicOpTypss(
argument_->SetXpuQuantPostDynamicOpTypes(
config_.xpu_quant_post_dynamic_op_types_);
#endif

Expand Down
9 changes: 9 additions & 0 deletions paddle/fluid/inference/api/paddle_analysis_config.h
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,14 @@ struct PD_INFER_DECL AnalysisConfig {
///
void SetParamsFile(const std::string& x) { params_file_ = x; }

///
/// \brief Save optimized model.
///
/// \param save_optimized_model whether to enable save optimized model.
///
void EnableSaveOptimizedModel(bool save_optimized_model) {
save_optimized_model_ = save_optimized_model;
}
///
/// \brief Set the path of optimization cache directory.
///
Expand Down Expand Up @@ -1255,6 +1263,7 @@ struct PD_INFER_DECL AnalysisConfig {
// Variables held by config can take up a lot of memory in some cases.
// So we release the memory when the predictor is set up.
mutable bool is_valid_{true};
bool save_optimized_model_{false};
std::string opt_cache_dir_;
friend class paddle_infer::experimental::InternalUtils;

Expand Down
Loading