PaddlePaddle · shentanyue · May 19, 2023 · May 10, 2023 · May 10, 2023 · May 10, 2023
diff --git a/paddle/fluid/framework/ir/CMakeLists.txt b/paddle/fluid/framework/ir/CMakeLists.txt
@@ -127,6 +127,7 @@ pass_library(dense_multihead_matmul_to_sparse_pass inference)
 pass_library(delete_cast_op_pass inference)
 pass_library(delete_elementwise_mul_op_pass inference)
 pass_library(delete_repeated_ops_pass inference)
+pass_library(save_optimized_model_pass inference)
 pass_library(generate_pass DEPS pass_desc_proto)
 target_link_libraries(generate_pass pass_desc_proto)
 
@@ -321,6 +322,10 @@ cc_test(
   test_graph_pattern_detector
   SRCS graph_pattern_detector_tester.cc
   DEPS graph_pattern_detector)
+cc_test(
+  test_save_optimized_model_pass
+  SRCS save_optimized_model_pass_tester.cc
+  DEPS save_optimized_model_pass)
 cc_test(
   test_op_compat_sensible_pass
   SRCS op_compat_sensible_pass_tester.cc

diff --git a/paddle/fluid/framework/ir/save_optimized_model_pass.cc b/paddle/fluid/framework/ir/save_optimized_model_pass.cc
@@ -0,0 +1,123 @@
+/* Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/fluid/framework/ir/save_optimized_model_pass.h"
+#include "paddle/fluid/framework/block_desc.h"
+#include "paddle/fluid/framework/executor.h"
+#include "paddle/fluid/framework/ir/fuse_pass_base.h"
+#include "paddle/fluid/framework/ir/graph.h"
+#include "paddle/fluid/framework/program_desc.h"
+#include "paddle/fluid/framework/scope.h"
+#include "paddle/fluid/inference/analysis/helper.h"
+#include "paddle/phi/common/backend.h"
+#include "paddle/phi/core/enforce.h"
+#include "paddle/phi/core/errors.h"
+
+namespace paddle {
+namespace framework {
+class ProgramDesc;
+}  // namespace framework
+}  // namespace paddle
+
+namespace paddle {
+namespace framework {
+namespace ir {
+
+void SaveOptimizedModelPass::ApplyImpl(ir::Graph* graph) const {
+  if (!Has("save_optimized_model") || !Get<bool>("save_optimized_model"))
+    return;
+
+  std::string model_opt_cache_dir = Get<std::string>("model_opt_cache_dir");
+  auto& scope = graph->Get<Scope>(kParamScopeAttr);
+  framework::ProgramDesc optimized_program_desc;
+  framework::ir::GraphToProgram(*graph, &optimized_program_desc);
+
+  auto IsPersistable = [](const framework::VarDesc* var) {
+    if (var->Persistable() &&
+        var->GetType() != framework::proto::VarType::FEED_MINIBATCH &&
+        var->GetType() != framework::proto::VarType::FETCH_LIST &&
+        var->GetType() != framework::proto::VarType::RAW) {
+      return true;
+    }
+    return false;
+  };
+
+  auto SerializeParams = [&](const std::string& path) {
+    framework::ProgramDesc save_program;
+    auto* save_block = save_program.MutableBlock(0);
+    std::set<std::string> save_var_set;
+    for (size_t i = 0; i < optimized_program_desc.Size(); ++i) {
+      const auto& global_block = optimized_program_desc.Block(i);
+      for (framework::VarDesc* var : global_block.AllVars()) {
+        if (IsPersistable(var)) {
+          framework::VarDesc* new_var = save_block->Var(var->Name());
+          new_var->SetShape(var->GetShape());
+          new_var->SetDataType(var->GetDataType());
+          new_var->SetType(var->GetType());
+          new_var->SetLoDLevel(var->GetLoDLevel());
+          new_var->SetPersistable(true);
+          save_var_set.insert(new_var->Name());
+        }
+      }
+    }
+
+    std::string save_params_path = path + "/" + "optimized.pdiparams";
+    std::vector<std::string> save_var_list(save_var_set.begin(),
+                                           save_var_set.end());
+    std::sort(save_var_list.begin(), save_var_list.end());
+    auto* op = save_block->AppendOp();
+    op->SetType("save_combine");
+    op->SetInput("X", save_var_list);
+    op->SetAttr("file_path", save_params_path);
+    op->CheckAttrs();
+
+    framework::Executor exe(platform::CPUPlace{});
+    exe.Run(save_program, &scope, 0, true, true);
+  };
+  // TODO(shentanyue01): Setting hardware and version identification for
+  // optimized models.
+  auto SerializeProg = [&](const std::string& path) {
+    // All persistable var need to be moved to global block
+    auto* global_block = optimized_program_desc.MutableBlock(0);
+    for (size_t i = 1; i < optimized_program_desc.Size(); ++i) {
+      const auto& sub_block = optimized_program_desc.Block(i);
+      for (framework::VarDesc* var : sub_block.AllVars()) {
+        if (IsPersistable(var) && !global_block->HasVar(var->Name())) {
+          framework::VarDesc* new_var = global_block->Var(var->Name());
+          new_var->SetShape(var->GetShape());
+          new_var->SetDataType(var->GetDataType());
+          new_var->SetType(var->GetType());
+          new_var->SetLoDLevel(var->GetLoDLevel());
+          new_var->SetPersistable(true);
+        }
+      }
+    }
+    std::string save_model_path = path + "/" + "optimized.pdmodel";
+    auto str = optimized_program_desc.Proto()->SerializeAsString();
+    std::ofstream file(save_model_path.c_str(), std::ios::binary);
+    file.write(str.c_str(), str.size());
+    file.close();
+  };
+
+  SerializeProg(model_opt_cache_dir);
+  SerializeParams(model_opt_cache_dir);
+  LOG(INFO) << "Optimized model saved to " << model_opt_cache_dir;
+}
+
+}  // namespace ir
+}  // namespace framework
+}  // namespace paddle
+
+REGISTER_PASS(save_optimized_model_pass,
+              paddle::framework::ir::SaveOptimizedModelPass);
diff --git a/paddle/fluid/framework/ir/save_optimized_model_pass.h b/paddle/fluid/framework/ir/save_optimized_model_pass.h
@@ -0,0 +1,36 @@
+/* Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+
+#include <string>
+#include <unordered_set>
+
+#include "paddle/fluid/framework/ir/graph_helper.h"
+#include "paddle/fluid/framework/ir/pass.h"
+
+namespace paddle {
+namespace framework {
+namespace ir {
+
+class Graph;
+
+class SaveOptimizedModelPass : public Pass {
+ protected:
+  void ApplyImpl(ir::Graph* graph) const override;
+};
+
+}  // namespace ir
+}  // namespace framework
+}  // namespace paddle
diff --git a/paddle/fluid/framework/ir/save_optimized_model_pass_tester.cc b/paddle/fluid/framework/ir/save_optimized_model_pass_tester.cc
@@ -0,0 +1,73 @@
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <gtest/gtest.h>
+#include "paddle/fluid/framework/ir/pass.h"
+#include "paddle/fluid/framework/ir/pass_tester_helper.h"
+#include "paddle/fluid/inference/analysis/helper.h"
+
+namespace paddle {
+namespace framework {
+namespace ir {
+
+void AddVarToScope(Scope* param_scope,
+                   const std::string& name,
+                   const DDim& dims) {
+  auto* tensor = param_scope->Var(name)->GetMutable<phi::DenseTensor>();
+  tensor->Resize(dims);
+  auto* cpu_ctx = static_cast<phi::CPUContext*>(
+      platform::DeviceContextPool::Instance().Get(phi::CPUPlace()));
+  cpu_ctx->Alloc<float>(tensor);
+}
+
+VarDesc* Data(paddle::framework::BlockDesc* block,
+              std::string name,
+              std::vector<int64_t> shape = {},
+              bool is_persistable = false,
+              proto::VarType::Type data_type = proto::VarType::FP32) {
+  auto* var = block->Var(name);
+  var->SetType(proto::VarType::LOD_TENSOR);
+  var->SetDataType(data_type);
+  var->SetShape(shape);
+  var->SetPersistable(is_persistable);
+  return var;
+}
+
+TEST(SaveOptimizedModelPass, basic) {
+  paddle::framework::ProgramDesc program;
+  auto* block = program.MutableBlock(0);
+  auto* lookup_table_w = Data(block, "lookup_table_w", {1}, true);
+  auto* lookup_table_out = Data(block, "scatter_out", {1});
+  OpDesc* lookup_table = block->AppendOp();
+  lookup_table->SetType("lookup_table_v2");
+  lookup_table->SetInput("W", {lookup_table_w->Name()});
+  lookup_table->SetOutput("Out", {lookup_table_out->Name()});
+
+  std::unique_ptr<ir::Graph> graph(new ir::Graph(program));
+  auto scope = new Scope();
+  AddVarToScope(scope, lookup_table_w->Name(), {1});
+  graph->Set("__param_scope__", scope);
+
+  auto save_optimized_model_pass =
+      PassRegistry::Instance().Get("save_optimized_model_pass");
+  save_optimized_model_pass->Set("save_optimized_model", new bool(true));
+  save_optimized_model_pass->Set("model_opt_cache_dir", new std::string(""));
+  save_optimized_model_pass->Apply(graph.get());
+}
+
+}  // namespace ir
+}  // namespace framework
+}  // namespace paddle
+
+USE_PASS(save_optimized_model_pass);
diff --git a/paddle/fluid/framework/ir/xpu/delete_isolated_node_pass.cc b/paddle/fluid/framework/ir/xpu/delete_isolated_node_pass.cc
@@ -99,6 +99,7 @@ void DeleteIsolatedNodePass::CollectReservedPersistableNodeNames(
     Graph* graph,
     std::unordered_set<std::string>* reserved_persistable_node_names) const {
   for (auto* node : graph->Nodes()) {
+    if (!node || node->Name() == "fetch" || node->Name() == "feed") continue;
     if (!node->IsVar() || !node->Var()->Persistable()) continue;
     for (auto* out_node : node->outputs) {
       auto op_type = out_node->Op()->Type();
@@ -131,6 +132,7 @@ int DeleteIsolatedNodePass::RemoveIsolatedNodes(
   std::unordered_set<const Node*> delete_nodes;
   const std::unordered_set<ir::Node*> nodes = graph->Nodes();
   for (auto* node : nodes) {
+    if (!node || node->Name() == "fetch" || node->Name() == "feed") continue;
     if (!node->IsVar() || !node->Var()->Persistable()) continue;
     auto name = node->Var()->Name();
     if (reserved_persistable_node_names.count(name) > 0) continue;

diff --git a/paddle/fluid/inference/analysis/argument.h b/paddle/fluid/inference/analysis/argument.h
@@ -146,6 +146,7 @@ struct Argument {
   DECL_ARGUMENT_FIELD(model_program_path, ModelProgramPath, std::string);
   DECL_ARGUMENT_FIELD(model_params_path, ModelParamsPath, std::string);
   DECL_ARGUMENT_FIELD(model_from_memory, ModelFromMemory, bool);
+  DECL_ARGUMENT_FIELD(save_optimized_model, SaveOptimizedModel, bool);
   DECL_ARGUMENT_FIELD(optim_cache_dir, OptimCacheDir, std::string);
   DECL_ARGUMENT_FIELD(enable_ir_optim, EnableIrOptim, bool);
 
@@ -294,7 +295,7 @@ struct Argument {
                       XpuQuantPostDynamicWeightBits,
                       int);
   DECL_ARGUMENT_FIELD(xpu_quant_post_dynamic_op_types,
-                      XpuQuantPostDynamicOpTypss,
+                      XpuQuantPostDynamicOpTypes,
                       std::vector<std::string>);
 
   DECL_ARGUMENT_FIELD(use_opencl, UseOpenCL, bool);

diff --git a/paddle/fluid/inference/analysis/ir_pass_manager.cc b/paddle/fluid/inference/analysis/ir_pass_manager.cc
@@ -310,14 +310,38 @@ void IRPassManager::CreatePasses(Argument *argument,
       }
       bool use_fc_padding = !fc_mkldnn_pass && argument->use_fc_padding();
       pass->Set("use_fc_padding", new bool(use_fc_padding));
-    } else if (pass_name == "fused_multi_transformer_xpu_quant_pass") {
+    } else if (pass_name == "fused_multi_transformer_xpu_pass") {
       auto op_types = argument->xpu_quant_post_dynamic_op_types();
       if (std::count(op_types.begin(),
                      op_types.end(),
                      "fused_multi_transformer") > 0) {
         pass->Set("quant_weight_bits",
                   new int(argument->xpu_quant_post_dynamic_weight_bits()));
       }
+    } else if (pass_name == "save_optimized_model_pass") {
+      pass->Set("save_optimized_model",
+                new bool(argument->save_optimized_model()));
+      std::string optim_cache_dir = argument->optim_cache_dir();
+      if (!optim_cache_dir.empty()) {
+        if (!PathExists(optim_cache_dir)) {
+          PADDLE_ENFORCE_NE(
+              MKDIR(optim_cache_dir.c_str()),
+              -1,
+              platform::errors::PreconditionNotMet(
+                  "Can not create optimize cache directory: %s, Make sure you "
+                  "have permission to write",
+                  optim_cache_dir));
+        }
+        pass->Set("model_opt_cache_dir", new std::string(optim_cache_dir));
+      } else {
+        std::string model_opt_cache_dir =
+            argument->Has("model_dir")
+                ? argument->model_dir()
+                : GetDirRoot(argument->model_program_path());
+        pass->Set(
+            "model_opt_cache_dir",
+            new std::string(GetOrCreateModelOptCacheDir(model_opt_cache_dir)));
+      }
     }
     pre_pass = pass_name;
 

diff --git a/paddle/fluid/inference/api/analysis_config.cc b/paddle/fluid/inference/api/analysis_config.cc
@@ -411,7 +411,7 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
   CP_MEMBER(model_dir_);
   CP_MEMBER(model_from_memory_);  // the memory model reuses prog_file_ and
                                   // params_file_ fields.
-
+  CP_MEMBER(save_optimized_model_);
   CP_MEMBER(opt_cache_dir_);
   CP_MEMBER(prog_file_);
   CP_MEMBER(params_file_);
@@ -1048,6 +1048,7 @@ std::string AnalysisConfig::SerializeInfoCache() {
   ss << model_dir_;
   ss << prog_file_;
   ss << params_file_;
+  ss << save_optimized_model_;
 
   ss << use_gpu_;
   ss << enable_gpu_mixed_;
@@ -1373,6 +1374,8 @@ std::string AnalysisConfig::Summary() {
   os.InsertRow({"use_cinn_compiler", use_cinn_compiler_ ? "true" : "false"});
 
   // ir info
+  os.InsertRow(
+      {"save_optimized_model", save_optimized_model_ ? "true" : "false"});
   os.InsertRow({"ir_optim", enable_ir_optim_ ? "true" : "false"});
   os.InsertRow({"ir_debug", ir_debug_ ? "true" : "false"});
   os.InsertRow({"memory_optim", enable_memory_optim_ ? "true" : "false"});

diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc
@@ -1316,6 +1316,7 @@ void AnalysisPredictor::PrepareArgument() {
   // Analyze inference_program
   argument_->SetPredictorID(predictor_id_);
   argument_->SetRootPredictorID(root_predictor_id_);
+  argument_->SetSaveOptimizedModel(config_.save_optimized_model_);
   argument_->SetOptimCacheDir(config_.opt_cache_dir_);
   if (!config_.model_dir().empty()) {
     argument_->SetModelDir(config_.model_dir());
@@ -1485,7 +1486,7 @@ void AnalysisPredictor::PrepareArgument() {
   argument_->SetXpuEnableMultiStream(config_.xpu_enable_multi_stream_);
   argument_->SetXpuQuantPostDynamicWeightBits(
       config_.xpu_quant_post_dynamic_weight_bits_);
-  argument_->SetXpuQuantPostDynamicOpTypss(
+  argument_->SetXpuQuantPostDynamicOpTypes(
       config_.xpu_quant_post_dynamic_op_types_);
 #endif
 

diff --git a/paddle/fluid/inference/api/paddle_analysis_config.h b/paddle/fluid/inference/api/paddle_analysis_config.h
@@ -199,6 +199,14 @@ struct PD_INFER_DECL AnalysisConfig {
   ///
   void SetParamsFile(const std::string& x) { params_file_ = x; }
 
+  ///
+  /// \brief Save optimized model.
+  ///
+  /// \param save_optimized_model whether to enable save optimized model.
+  ///
+  void EnableSaveOptimizedModel(bool save_optimized_model) {
+    save_optimized_model_ = save_optimized_model;
+  }
   ///
   /// \brief Set the path of optimization cache directory.
   ///
@@ -1255,6 +1263,7 @@ struct PD_INFER_DECL AnalysisConfig {
   // Variables held by config can take up a lot of memory in some cases.
   // So we release the memory when the predictor is set up.
   mutable bool is_valid_{true};
+  bool save_optimized_model_{false};
   std::string opt_cache_dir_;
   friend class paddle_infer::experimental::InternalUtils;