merge develop

PaddlePaddle · Jan 5, 2022 · 3279a2b · 3279a2b · paddle-bot-old · Jan 5, 2022
2 parents c68778b + 7a4a512
commit 3279a2b
Show file tree

Hide file tree

Showing 58 changed files with 1,910 additions and 678 deletions.
diff --git a/paddle/fluid/framework/ir/graph_pattern_detector.cc b/paddle/fluid/framework/ir/graph_pattern_detector.cc
@@ -1641,6 +1641,32 @@ PDNode *patterns::Slice::operator()() {
   return slice_out;
 }
 
+PDNode *patterns::NearestInterp::operator()() {
+  auto prev_op = pattern->NewNode(prev_op_repr())->assert_is_op();
+
+  auto nearest_interp_op =
+      pattern->NewNode(nearest_interp_op_repr())
+          ->assert_is_ops({"nearest_interp", "nearest_interp_v2"});
+
+  auto nearest_interp_in =
+      pattern->NewNode(nearest_interp_in_repr())
+          ->AsInput()
+          ->assert_is_ops_input({"nearest_interp", "nearest_interp_v2"}, "X");
+  auto nearest_interp_out =
+      pattern->NewNode(nearest_interp_out_repr())
+          ->AsOutput()
+          ->assert_is_ops_output({"nearest_interp", "nearest_interp_v2"},
+                                 "Out");
+
+  auto next_op = pattern->NewNode(next_op_repr())->assert_is_op();
+
+  prev_op->LinksTo({nearest_interp_in});
+  nearest_interp_op->LinksFrom({nearest_interp_in})
+      .LinksTo({nearest_interp_out});
+  next_op->LinksFrom({nearest_interp_out});
+  return nearest_interp_out;
+}
+
 PDNode *patterns::Matmul::operator()() {
   auto matmul_op = pattern->NewNode(matmul_op_repr())->assert_is_op("matmul");
 
@@ -2376,15 +2402,8 @@ PDNode *patterns::MultipleQuantize::operator()() {
 
 PDNode *patterns::QuantizePlacement::operator()(
     const std::unordered_set<std::string> &quantize_enabled_op_types) {
-  std::unordered_set<std::string> supported_op_types =
-      std::unordered_set<std::string>({"concat", "conv2d", "elementwise_add",
-                                       "fc", "matmul", "pool2d", "prior_box",
-                                       "reshape2", "transpose2", "fusion_gru",
-                                       "fusion_lstm", "multi_gru", "slice"});
-  if (!quantize_enabled_op_types.empty()) {
-    supported_op_types = quantize_enabled_op_types;
-  }
-  auto *op = pattern->NewNode(op_repr())->assert_is_ops(supported_op_types);
+  auto *op =
+      pattern->NewNode(op_repr())->assert_is_ops(quantize_enabled_op_types);
   return op;
 }
 

diff --git a/paddle/fluid/framework/ir/graph_pattern_detector.h b/paddle/fluid/framework/ir/graph_pattern_detector.h
@@ -995,6 +995,21 @@ struct Slice : public PatternBase {
   PATTERN_DECL_NODE(next_op);
 };
 
+// Nearest Interp op
+// Forward pass for nearest_interp.
+// nearest_interp_out is a result of the operator.
+struct NearestInterp : public PatternBase {
+  NearestInterp(PDPattern* pattern, const std::string& name_scope)
+      : PatternBase(pattern, name_scope, "nearest_interp") {}
+
+  PDNode* operator()();
+  PATTERN_DECL_NODE(prev_op);
+  PATTERN_DECL_NODE(nearest_interp_in);
+  PATTERN_DECL_NODE(nearest_interp_op);
+  PATTERN_DECL_NODE(nearest_interp_out);
+  PATTERN_DECL_NODE(next_op);
+};
+
 // Matmul op
 // Forward pass for matmul.
 struct Matmul : public PatternBase {

diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.cc b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.cc
@@ -1053,6 +1053,67 @@ void CPUQuantizePass::QuantizeFusionLSTM(Graph* graph) const {
   PrettyLogDetail("---    quantized %d fusion_lstm ops", quantize_count);
 }
 
+void CPUQuantizePass::QuantizeNearestInterp(Graph* graph) const {
+  GraphPatternDetector gpd;
+  auto pattern = gpd.mutable_pattern();
+  patterns::NearestInterp nearest_interp_pattern{pattern, name_scope_};
+  nearest_interp_pattern();
+
+  int quantize_nearest_interp_count = 0;
+  auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
+                     Graph* g) {
+    VLOG(4) << "Quantize nearest_interp op";
+    GET_IR_NODE_FROM_SUBGRAPH(nearest_interp_op, nearest_interp_op,
+                              nearest_interp_pattern);
+
+    // skip if should not be quantized
+    if (!platform::HasOpINT8DataType(nearest_interp_op->Op())) {
+      LogQuantizationDisabled(nearest_interp_op);
+      return;
+    }
+    GET_IR_NODE_FROM_SUBGRAPH(prev_op, prev_op, nearest_interp_pattern);
+    GET_IR_NODE_FROM_SUBGRAPH(next_op, next_op, nearest_interp_pattern);
+
+    // skip if prev op and next op is not quantized
+    if (!(IsOpDequantized(prev_op)) && !(IsOpQuantized(next_op))) {
+      LogCannotQuantizeOp(nearest_interp_op,
+                          "There are no other quantized operators nearby, so "
+                          "quantization is not recommended.");
+      return;
+    }
+
+    GET_IR_NODE_FROM_SUBGRAPH(nearest_interp_in, nearest_interp_in,
+                              nearest_interp_pattern);
+    GET_IR_NODE_FROM_SUBGRAPH(nearest_interp_out, nearest_interp_out,
+                              nearest_interp_pattern);
+
+    if (!AreScalesPresentForNodes({nearest_interp_in, nearest_interp_out})) {
+      LogCannotQuantizeOp(nearest_interp_op);
+      return;
+    }
+
+    bool is_input_unsigned{false};
+    auto input_scale =
+        GetScaleValueForNode(nearest_interp_in, &is_input_unsigned);
+    QuantizeInput(g, nearest_interp_op, nearest_interp_in, "X", input_scale,
+                  is_input_unsigned);
+
+    bool is_output_unsigned{false};
+    auto output_scale =
+        GetScaleValueForNode(nearest_interp_out, &is_output_unsigned);
+    DequantizeOutput(g, nearest_interp_op, nearest_interp_out, "Out",
+                     output_scale, is_output_unsigned);
+
+    ++quantize_nearest_interp_count;
+  };
+
+  gpd(graph, handler);
+  AddStatis(quantize_nearest_interp_count);
+
+  PrettyLogDetail("---    quantized %d nearest_interp ops",
+                  quantize_nearest_interp_count);
+}
+
 void CPUQuantizePass::ApplyImpl(ir::Graph* graph) const {
   VLOG(3) << "Quantizing the graph.";
   PADDLE_ENFORCE_NOT_NULL(
@@ -1076,6 +1137,7 @@ void CPUQuantizePass::ApplyImpl(ir::Graph* graph) const {
   QuantizeMultiGru(graph);
   QuantizeFusionLSTM(graph);
   QuantizeSlice(graph);
+  QuantizeNearestInterp(graph);
 }
 
 }  // namespace ir

diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.h b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.h
@@ -62,6 +62,7 @@ class CPUQuantizePass : public FusePassBase {
   void QuantizeMultiGru(Graph* graph) const;
   void QuantizeFusionLSTM(Graph* graph) const;
   void QuantizeSlice(Graph* graph) const;
+  void QuantizeNearestInterp(Graph* graph) const;
 
   void QuantizeInput(Graph* g, Node* op, Node* input, std::string input_name,
                      double scale_to_one, bool is_input_unsigned,

diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass_tester.cc b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass_tester.cc
@@ -58,7 +58,8 @@ void SetOp(ProgramDesc* prog, const std::string& type, const std::string& name,
     op->SetAttr("Scale_in", 1.0f);
     op->SetAttr("Scale_out", 1.0f);
     op->SetAttr("Scale_weights", std::vector<float>{1.0f});
-  } else if (type == "pool2d" || type == "transpose2" || type == "reshape2") {
+  } else if (type == "pool2d" || type == "transpose2" || type == "reshape2" ||
+             type == "nearest_interp" || type == "nearest_interp_v2") {
     op->SetInput("X", {inputs[0]});
     op->SetOutput("Out", {outputs[0]});
   } else if (type == "slice") {
@@ -434,6 +435,18 @@ TEST(CpuQuantizePass, sliceBetweenNonQuantizedOp) {
   TestImmutableOpBetweenNonQuantizedOp("slice");
 }
 
+TEST(CpuQuantizePass, nearestInterp) { TestImmutableOp("nearest_interp"); }
+
+TEST(CpuQuantizePass, nearestInterpBetweenNonQuantizedOp) {
+  TestImmutableOpBetweenNonQuantizedOp("nearest_interp");
+}
+
+TEST(CpuQuantizePass, nearestInterpV2) { TestImmutableOp("nearest_interp_v2"); }
+
+TEST(CpuQuantizePass, nearestInterpV2BetweenNonQuantizedOp) {
+  TestImmutableOpBetweenNonQuantizedOp("nearest_interp_v2");
+}
+
 static const std::initializer_list<std::string> variable_names_matmul = {
     "a", "b", "c", "d", "e", "f"};
 

diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_placement_pass.cc b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_placement_pass.cc
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 
 #include "paddle/fluid/framework/ir/mkldnn/cpu_quantize_placement_pass.h"
+
 #include <unordered_set>
 
 namespace paddle {
@@ -23,15 +24,34 @@ class Graph;
 
 void CPUQuantizePlacementPass::ApplyImpl(ir::Graph* graph) const {
   VLOG(3) << "Marks operators which are to be quantized.";
+  std::unordered_set<std::string> supported_op_types =
+      std::unordered_set<std::string>(
+          {"concat", "conv2d", "depthwise_conv2d", "elementwise_add", "fc",
+           "matmul", "nearest_interp", "nearest_interp_v2", "pool2d",
+           "prior_box", "reshape2", "transpose2", "fusion_gru", "fusion_lstm",
+           "multi_gru", "slice"});
   const auto& excluded_ids_list =
       Get<std::unordered_set<int>>("quantize_excluded_op_ids");
   const auto& op_types_list =
       Get<std::unordered_set<std::string>>("quantize_enabled_op_types");
+
+  if (!op_types_list.empty()) {
+    // Verify that all user-specified operators can be quantized.
+    for (const auto& op : op_types_list) {
+      PADDLE_ENFORCE_NE(
+          supported_op_types.count(op), 0,
+          platform::errors::InvalidArgument(
+              "Pass attribute quantize_enabled_op_types contains operator %s "
+              "that is not supported by OneDNN quantization.",
+              op));
+    }
+    supported_op_types = op_types_list;
+  }
   Init(name_scope_, graph);
   GraphPatternDetector gpd;
   patterns::QuantizePlacement quantize_placement_pattern{gpd.mutable_pattern(),
                                                          "quantize_placement"};
-  quantize_placement_pattern(op_types_list);
+  quantize_placement_pattern(supported_op_types);
 
   auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
                      Graph* g) {
@@ -46,16 +66,7 @@ void CPUQuantizePlacementPass::ApplyImpl(ir::Graph* graph) const {
       return;
     }
 
-    if (op->Op()->HasAttr("mkldnn_data_type") ||
-        op->Op()->HasProtoAttr("mkldnn_data_type")) {
-      // use_quantizer is no longer used
-      // assign value for compatibility
-      if (op->Op()->GetAttrIfExists<bool>("use_quantizer")) {
-        op->Op()->SetAttr("mkldnn_data_type", std::string("int8"));
-      }
-      op->Op()->SetAttr("mkldnn_data_type", std::string("int8"));
-      op->Op()->SetAttr("use_quantizer", true);
-    }
+    op->Op()->SetAttr("mkldnn_data_type", std::string("int8"));
   };
   gpd(graph, handler);
 }

diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_placement_pass_tester.cc b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_placement_pass_tester.cc
@@ -140,6 +140,32 @@ TEST(QuantizerPlacementPass, default_attr_value) {
   DefaultAttrTest(5);
 }
 
+void EnabledOpTypesTest(
+    std::initializer_list<std::string> quantize_enabled_op_types,
+    std::string missing_op) {
+  auto prog = BuildProgramDesc();
+  std::unique_ptr<ir::Graph> graph(new ir::Graph(prog));
+
+  auto pass = PassRegistry::Instance().Get("cpu_quantize_placement_pass");
+  pass->Set("quantize_enabled_op_types",
+            new std::unordered_set<std::string>(quantize_enabled_op_types));
+
+  try {
+    graph.reset(pass->Apply(graph.release()));
+  } catch (paddle::platform::EnforceNotMet& err) {
+    std::string ex_msg = err.what();
+    std::string expected_msg =
+        "Pass attribute quantize_enabled_op_types contains operator " +
+        missing_op + " that is not supported by OneDNN quantization.";
+    EXPECT_TRUE(ex_msg.find(expected_msg) != std::string::npos);
+  }
+}
+
+TEST(QuantizerPlacementPass, unsupported_op_type) {
+  // Dropout op is not supported by OneDNN quantization
+  EnabledOpTypesTest({"conv2d", "dropout"}, "dropout");
+}
+
 }  // namespace ir
 }  // namespace framework
 }  // namespace paddle

diff --git a/paddle/fluid/inference/api/mkldnn_quantizer.cc b/paddle/fluid/inference/api/mkldnn_quantizer.cc
@@ -124,7 +124,8 @@ void AnalysisPredictor::MkldnnQuantizer::CalculateScalesForOpOutputs(
       } else if (op->Type() == "relu") {
         is_unsigned = true;
       } else if (op->Type() == "transpose2" || op->Type() == "reshape2" ||
-                 op->Type() == "pool2d") {
+                 op->Type() == "pool2d" || op->Type() == "nearest_interp" ||
+                 op->Type() == "nearest_interp_v2") {
         auto input_var_name = op->Input("X")[0];
         PADDLE_ENFORCE_NE(scales_.find(input_var_name), scales_.end(),
                           platform::errors::PreconditionNotMet(

diff --git a/paddle/fluid/inference/api/mkldnn_quantizer_config.cc b/paddle/fluid/inference/api/mkldnn_quantizer_config.cc
@@ -107,6 +107,18 @@ MkldnnQuantizerConfig::MkldnnQuantizerConfig() {
   rules_["fusion_lstm"]["ReorderedC0"] = ScaleAlgo::NONE;
   rules_["fusion_lstm"]["CheckedCell"] = ScaleAlgo::NONE;
   rules_["fusion_lstm"]["Hidden"] = ScaleAlgo::KL;
+
+  rules_["nearest_interp"]["X"] = ScaleAlgo::KL;
+  rules_["nearest_interp"]["OutSize"] = ScaleAlgo::NONE;
+  rules_["nearest_interp"]["SizeTensor"] = ScaleAlgo::NONE;
+  rules_["nearest_interp"]["Scale"] = ScaleAlgo::NONE;
+  rules_["nearest_interp"]["Out"] = ScaleAlgo::NONE;
+
+  rules_["nearest_interp_v2"]["X"] = ScaleAlgo::KL;
+  rules_["nearest_interp_v2"]["OutSize"] = ScaleAlgo::NONE;
+  rules_["nearest_interp_v2"]["SizeTensor"] = ScaleAlgo::NONE;
+  rules_["nearest_interp_v2"]["Scale"] = ScaleAlgo::NONE;
+  rules_["nearest_interp_v2"]["Out"] = ScaleAlgo::NONE;
 }
 
 ScaleAlgo MkldnnQuantizerConfig::scale_algo(

diff --git a/paddle/fluid/inference/capi_exp/pd_config.cc b/paddle/fluid/inference/capi_exp/pd_config.cc
@@ -459,12 +459,10 @@ __pd_give PD_OneDimArrayCstr* PD_ConfigAllPasses(
   std::vector<std::string> passes = config->pass_builder()->AllPasses();
   return paddle_infer::CvtVecToOneDimArrayCstr(passes);
 }
-const char* PD_ConfigSummary(__pd_keep PD_Config* pd_config) {
+__pd_give PD_Cstr* PD_ConfigSummary(__pd_keep PD_Config* pd_config) {
   CHECK_AND_CONVERT_PD_CONFIG;
   auto sum_str = config->Summary();
-  char* c = reinterpret_cast<char*>(malloc(sum_str.length() + 1));
-  snprintf(c, sum_str.length() + 1, "%s", sum_str.c_str());
-  return c;
+  return paddle_infer::CvtStrToCstr(sum_str);
 }
 
 }  // extern "C"
diff --git a/paddle/fluid/inference/capi_exp/pd_config.h b/paddle/fluid/inference/capi_exp/pd_config.h
@@ -705,7 +705,7 @@ PADDLE_CAPI_EXPORT extern __pd_give PD_OneDimArrayCstr* PD_ConfigAllPasses(
 ///
 /// \return Return config info.
 ///
-PADDLE_CAPI_EXPORT extern const char* PD_ConfigSummary(
+PADDLE_CAPI_EXPORT extern __pd_give PD_Cstr* PD_ConfigSummary(
     __pd_keep PD_Config* pd_config);
 
 #ifdef __cplusplus

diff --git a/paddle/fluid/inference/capi_exp/pd_types.h b/paddle/fluid/inference/capi_exp/pd_types.h
@@ -34,6 +34,11 @@ typedef struct PD_OneDimArrayCstr {
   char** data;
 } PD_OneDimArrayCstr;  // std::vector<std::string>
 
+typedef struct PD_Cstr {
+  size_t size;
+  char* data;
+} PD_Cstr;  // std::string
+
 typedef struct PD_TwoDimArraySize {
   size_t size;
   PD_OneDimArraySize** data;

diff --git a/paddle/fluid/inference/capi_exp/pd_utils.cc b/paddle/fluid/inference/capi_exp/pd_utils.cc
@@ -78,6 +78,17 @@ void PD_OneDimArrayCstrDestroy(__pd_take PD_OneDimArrayCstr* array) {
     delete array;
   }
 }
+
+void PD_CstrDestroy(__pd_take PD_Cstr* cstr) {
+  if (cstr != NULL) {
+    if (cstr->size != 0) {
+      cstr->size = 0;
+      delete[] cstr->data;
+      cstr->data = NULL;
+    }
+    delete cstr;
+  }
+}
 namespace paddle_infer {
 
 __pd_give PD_OneDimArrayCstr* CvtVecToOneDimArrayCstr(
@@ -101,6 +112,18 @@ std::vector<std::string> CvtOneDimArrayToVecCstr(
   return vec;
 }
 
+__pd_give PD_Cstr* CvtStrToCstr(const std::string& str) {
+  PD_Cstr* cstr = new PD_Cstr;
+  if (str.empty()) {
+    cstr->size = 0;
+    cstr->data = NULL;
+  } else {
+    cstr->size = str.length() + 1;
+    cstr->data = new char[str.length() + 1];
+    memcpy(cstr->data, str.c_str(), str.length() + 1);
+  }
+  return cstr;
+}
 }  // namespace paddle_infer
 
 #define DESTROY_TWO_DIM_ARRAY(type)                                           \

diff --git a/paddle/fluid/inference/capi_exp/pd_utils.h b/paddle/fluid/inference/capi_exp/pd_utils.h
@@ -65,6 +65,15 @@ PADDLE_CAPI_EXPORT extern void PD_OneDimArraySizeDestroy(
 PADDLE_CAPI_EXPORT extern void PD_TwoDimArraySizeDestroy(
     __pd_take PD_TwoDimArraySize* array);
 
+///
+/// \brief Destroy the PD_Cstr object pointed to by the pointer.
+/// NOTE: if input string is empty, the return PD_Cstr's size is
+/// 0 and data is NULL.
+///
+/// \param[in] cstr pointer to the PD_Cstr object.
+///
+PADDLE_CAPI_EXPORT extern void PD_CstrDestroy(__pd_take PD_Cstr* cstr);
+
 #ifdef __cplusplus
 }  // extern "C"
 #endif