Fused elementwises kernels and ops (#51427)

* Fused elementwises kernels and ops * change fuse pass name * adjust .pbtxt files * adjust quantization attributes * add missing arguments and fix others, review fixed * simplify fused kernel registration * fix elementwise unit tests * reuse one fused elementwise op * adjust proto * Add supported datatypes * Change 'Scale' to 'scale' in tests, change some tests to onednn * Revert breaking changes * Fix unit tests * Delete obsolete test cases * Delete commented out code * Fix codestyle * delete temporary condition * fix conflicts and delete duplicate fusing * Fix code after merge * Move tests to new directory * fix tests volatility * Rename test_elementwise_add_onednn_op.py to test_elementwise_add_mkldnn_op.py * Update CMakeLists.txt add mkldnn op test --------- Co-authored-by: Silv3S <slawomir.siwek@intel.com>
PaddlePaddle · May 18, 2023 · fb4a6ec · fb4a6ec
1 parent 26da689
commit fb4a6ec
Show file tree

Hide file tree

Showing 33 changed files with 367 additions and 385 deletions.
diff --git a/paddle/fluid/framework/ir/CMakeLists.txt b/paddle/fluid/framework/ir/CMakeLists.txt
@@ -180,7 +180,7 @@ if(WITH_MKLDNN)
   pass_library(softplus_activation_onednn_fuse_pass inference DIR mkldnn)
   pass_library(shuffle_channel_mkldnn_detect_pass inference DIR mkldnn)
   pass_library(fc_act_mkldnn_fuse_pass inference DIR mkldnn)
-  pass_library(elt_act_mkldnn_fuse_pass inference DIR mkldnn)
+  pass_library(elementwise_act_onednn_fuse_pass inference DIR mkldnn)
   pass_library(matmul_elementwise_add_mkldnn_fuse_pass inference DIR mkldnn)
   pass_library(matmul_activation_mkldnn_fuse_pass inference DIR mkldnn)
   pass_library(layer_norm_onednn_optimization_pass inference DIR mkldnn)

diff --git a/paddle/fluid/framework/ir/graph_pattern_detector.cc b/paddle/fluid/framework/ir/graph_pattern_detector.cc
@@ -2260,7 +2260,8 @@ PDNode *patterns::OpRequant::operator()() {
   auto any_op = pattern->NewNode(any_op_repr())
                     ->assert_is_op()
                     ->assert_more([&](Node *node) {
-                      return node->Op()->HasAttr("Scale_out") ? true : false;
+                      return (node->Op()->HasAttr("Scale_out") ||
+                              node->Op()->HasAttr("scale_out"));
                     });
   auto requant_in = pattern->NewNode(requant_in_repr())
                         ->assert_is_op_input("requantize", "Input");
@@ -2288,7 +2289,10 @@ PDNode *patterns::RequantOp::operator()() {
                     ->assert_more([&](Node *node) {
                       return (node->Op()->HasAttr("Scale_in") ||
                               node->Op()->HasAttr("Scale_x") ||
-                              node->Op()->HasAttr("Scale_y"));
+                              node->Op()->HasAttr("Scale_y") ||
+                              node->Op()->HasAttr("scale_in") ||
+                              node->Op()->HasAttr("scale_x") ||
+                              node->Op()->HasAttr("scale_y"));
                     });
 
   requant_op->LinksFrom({requant_in}).LinksTo({requant_out});

diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.cc b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.cc
@@ -1045,14 +1045,14 @@ void CPUQuantizePass::QuantizeElementwise(
                   "X",
                   input_x_scale,
                   is_x_unsigned,
-                  "Scale_x");
+                  "scale_x");
     QuantizeInput(g,
                   elementwise_op,
                   elementwise_y,
                   "Y",
                   input_y_scale,
                   is_y_unsigned,
-                  "Scale_y");
+                  "scale_y");
 
     bool is_output_unsigned{false};
     auto output_scale =
@@ -1064,7 +1064,7 @@ void CPUQuantizePass::QuantizeElementwise(
                      "Out",
                      output_scale,
                      is_output_unsigned,
-                     "Scale_out");
+                     "scale_out");
 
     ++quantize_elementwise_count;
   };
@@ -1314,9 +1314,9 @@ void CPUQuantizePass::ApplyImpl(ir::Graph* graph) const {
   QuantizeImmutable(graph, "nearest_interp", "X");
   QuantizeImmutable(graph, "nearest_interp_v2", "X");
   QuantizeImmutable(graph, "split", "X");
-  QuantizeElementwise(graph, "elementwise_add");
-  QuantizeElementwise(graph, "elementwise_mul");
-  QuantizeElementwise(graph, "elementwise_sub");
+  QuantizeElementwise(graph, "fused_elementwise_add");
+  QuantizeElementwise(graph, "fused_elementwise_mul");
+  QuantizeElementwise(graph, "fused_elementwise_sub");
   QuantizeFusionGru(graph);
   QuantizeMultiGru(graph);
   QuantizeFusionLSTM(graph);

diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass_tester.cc b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass_tester.cc
@@ -97,14 +97,15 @@ void SetOp(ProgramDesc* prog,
     op->SetAttr("Scale_x", 1.0f);
     op->SetAttr("Scale_y", 1.0f);
     op->SetAttr("Scale_out", 1.0f);
-  } else if (type == "elementwise_add" || type == "elementwise_mul" ||
-             type == "elementwise_sub") {
+  } else if (type == "fused_elementwise_add" ||
+             type == "fused_elementwise_sub" ||
+             type == "fused_elementwise_mul") {
     op->SetInput("X", {inputs[0]});
     if (inputs.size() > 1) op->SetInput("Y", {inputs[1]});
     op->SetOutput("Out", {outputs[0]});
-    op->SetAttr("Scale_x", 1.0f);
-    op->SetAttr("Scale_y", 1.0f);
-    op->SetAttr("Scale_out", 1.0f);
+    op->SetAttr("scale_x", 1.0f);
+    op->SetAttr("scale_y", 1.0f);
+    op->SetAttr("scale_out", 1.0f);
   } else if (type == "fusion_gru") {
     op->SetInput("X", {inputs[0]});
     op->SetInput("Bias", {inputs[1]});
@@ -178,16 +179,19 @@ void CheckScales(const OpDesc* op, float scale, float shift) {
               scale);
     scale_names.push_back("Scale_in");
     scale_names.push_back("Scale_out");
-  } else if (type == "fused_matmul" || type == "elementwise_add" ||
-             type == "elementwise_mul" || type == "elementwise_sub") {
+  } else if (type == "fused_matmul") {
     scale_names.push_back("Scale_x");
     scale_names.push_back("Scale_y");
     scale_names.push_back("Scale_out");
-    if (type == "fused_matmul") {
-      auto const& names = op->InputNames();
-      if (std::find(names.begin(), names.end(), "ResidualData") != names.end())
-        scale_names.push_back("Scale_in_eltwise");
-    }
+    auto const& names = op->InputNames();
+    if (std::find(names.begin(), names.end(), "ResidualData") != names.end())
+      scale_names.push_back("Scale_in_eltwise");
+  } else if (type == "fused_elementwise_add" ||
+             type == "fused_elementwise_sub" ||
+             type == "fused_elementwise_mul") {
+    scale_names.push_back("scale_x");
+    scale_names.push_back("scale_y");
+    scale_names.push_back("scale_out");
   } else if (type == "fusion_gru" || type == "fusion_lstm") {
     EXPECT_EQ(op->GetAttrIfExists<float>("Shift_data"), shift);
     EXPECT_EQ(op->GetAttrIfExists<std::vector<float>>("Scale_weights")[0],
@@ -710,9 +714,9 @@ void TestElementwiseUnsignedAndSignedInput(
 }
 
 const std::vector<std::vector<std::string>> elementwises = {
-    {"elementwise_add", "ElementwiseAdd"},
-    {"elementwise_mul", "ElementwiseMul"},
-    {"elementwise_sub", "ElementwiseSub"}};
+    {"fused_elementwise_add", "FusedElementwiseAdd"},
+    {"fused_elementwise_mul", "FusedElementwiseMul"},
+    {"fused_elementwise_sub", "FusedElementwiseSub"}};
 
 class TestElementwises
     : public testing::TestWithParam<std::vector<std::string>> {};

diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_placement_pass.cc b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_placement_pass.cc
@@ -32,6 +32,9 @@ void CPUQuantizePlacementPass::ApplyImpl(ir::Graph* graph) const {
                                        "fused_conv2d",
                                        "fused_conv3d",
                                        "fused_matmul",
+                                       "fused_elementwise_add",
+                                       "fused_elementwise_mul",
+                                       "fused_elementwise_sub",
                                        "elementwise_add",
                                        "elementwise_mul",
                                        "elementwise_sub",
@@ -87,12 +90,7 @@ void CPUQuantizePlacementPass::ApplyImpl(ir::Graph* graph) const {
       return;
     }
 
-    // Remove this condition when all fused_elementwise ops are merged
-    if (!(op->Op()->Type() == "elementwise_add" ||
-          op->Op()->Type() == "elementwise_sub" ||
-          op->Op()->Type() == "elementwise_mul")) {
-      ConvertToFusedOp(op->Op());
-    }
+    ConvertToFusedOp(op->Op());
     op->Op()->SetAttr("mkldnn_data_type", std::string("int8"));
   };
   gpd(graph, handler);

diff --git a/...ork/ir/mkldnn/elt_act_mkldnn_fuse_pass.cc → ...kldnn/elementwise_act_onednn_fuse_pass.cc b/...ork/ir/mkldnn/elt_act_mkldnn_fuse_pass.cc → ...kldnn/elementwise_act_onednn_fuse_pass.cc
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "paddle/fluid/framework/ir/mkldnn/elt_act_mkldnn_fuse_pass.h"
+#include "paddle/fluid/framework/ir/mkldnn/elementwise_act_onednn_fuse_pass.h"
 
 #include "paddle/fluid/framework/ir/graph_pattern_detector.h"
 #include "paddle/fluid/framework/ir/mkldnn/activation_onednn_fuse_pass.h"
@@ -62,6 +62,7 @@ void ElementwiseActivationOneDNNPass::FuseElementwiseAct(
     GET_IR_NODE_FROM_SUBGRAPH(
         activation_out, activation_out, elementwise_act_pattern);
 
+    ConvertToFusedOp(elementwise->Op());
     SetActivationAttrs(elementwise->Op(), activation->Op(), act_type);
     elementwise->Op()->SetOutput("Out", {activation_out->Name()});
 
@@ -84,9 +85,9 @@ void ElementwiseActivationOneDNNPass::FuseElementwiseAct(
 }  // namespace framework
 }  // namespace paddle
 
-REGISTER_PASS(elt_act_mkldnn_fuse_pass,
+REGISTER_PASS(elementwise_act_onednn_fuse_pass,
               paddle::framework::ir::ElementwiseActivationOneDNNPass);
-REGISTER_PASS_CAPABILITY(elt_act_mkldnn_fuse_pass)
+REGISTER_PASS_CAPABILITY(elementwise_act_onednn_fuse_pass)
     .AddCombination(
         paddle::framework::compatible::OpVersionComparatorCombination()
             .LE("elementwise_add", 1)

diff --git a/...work/ir/mkldnn/elt_act_mkldnn_fuse_pass.h → ...mkldnn/elementwise_act_onednn_fuse_pass.h b/...work/ir/mkldnn/elt_act_mkldnn_fuse_pass.h → ...mkldnn/elementwise_act_onednn_fuse_pass.h
diff --git a/paddle/fluid/framework/ir/mkldnn/mkldnn_pass_util.h b/paddle/fluid/framework/ir/mkldnn/mkldnn_pass_util.h
@@ -148,6 +148,8 @@ inline void ConvertToFusedOp(OpDesc* op) {
   const std::map<std::string, std::string> fused_ops = {
       {"conv2d", "fused_conv2d"},
       {"depthwise_conv2d", "fused_conv2d"},
+      {"elementwise_add", "fused_elementwise_add"},
+      {"elementwise_sub", "fused_elementwise_sub"},
       {"elementwise_mul", "fused_elementwise_mul"},
       {"elementwise_div", "fused_elementwise_div"},
       {"matmul", "fused_matmul"},

diff --git a/paddle/fluid/framework/ir/mkldnn/operator_scale_onednn_fuse_pass.cc b/paddle/fluid/framework/ir/mkldnn/operator_scale_onednn_fuse_pass.cc
@@ -31,6 +31,8 @@ void FuseOperatorScaleOneDNNPass::ApplyImpl(Graph *graph) const {
       "fused_matmul",
       "matmul",
       "matmul_v2",
+      "fused_elementwise_add",
+      "fused_elementwise_sub",
       "fused_elementwise_mul",
       "fused_elementwise_div",
       "elementwise_add",
@@ -119,6 +121,8 @@ REGISTER_PASS_CAPABILITY(operator_scale_onednn_fuse_pass)
             .EQ("fused_matmul", 0)
             .LE("matmul", 1)
             .EQ("matmul_v2", 0)
+            .EQ("fused_elementwise_add", 0)
+            .EQ("fused_elementwise_sub", 0)
             .EQ("fused_elementwise_mul", 0)
             .EQ("fused_elementwise_div", 0)
             .LE("elementwise_add", 1)

diff --git a/paddle/fluid/framework/ir/mkldnn/operator_unsqueeze2_onednn_fuse_pass.cc b/paddle/fluid/framework/ir/mkldnn/operator_unsqueeze2_onednn_fuse_pass.cc
@@ -16,7 +16,6 @@
 
 #include "paddle/fluid/framework/ir/mkldnn/mkldnn_pass_util.h"
 #include "paddle/fluid/framework/op_version_registry.h"
-#include "paddle/phi/backends/onednn/onednn_reuse.h"
 #include "paddle/utils/string/pretty_log.h"
 
 namespace paddle {

diff --git a/paddle/fluid/inference/api/paddle_pass_builder.cc b/paddle/fluid/inference/api/paddle_pass_builder.cc
@@ -368,7 +368,7 @@ void CpuPassStrategy::EnableMKLDNN() {
              "batch_norm_act_fuse_pass",              //
              "softplus_activation_onednn_fuse_pass",  //
              "shuffle_channel_mkldnn_detect_pass",    //
-             "elt_act_mkldnn_fuse_pass",              //
+             "elementwise_act_onednn_fuse_pass",      //
              "layer_norm_onednn_optimization_pass",   //
              "operator_scale_onednn_fuse_pass",       //
              "operator_unsqueeze2_onednn_fuse_pass",  //

diff --git a/paddle/fluid/operators/compat/elementwise_add.pbtxt b/paddle/fluid/operators/compat/elementwise_add.pbtxt
@@ -22,14 +22,6 @@ extra {
   attrs {
     name: "Out0_threshold"
     type: FLOAT
-  }  
-  attrs {
-    name: "x_data_format"
-    type: STRING
-  }
-  attrs {
-    name: "y_data_format"
-    type: STRING
   }
   attrs {
     name: "Scale_x"

diff --git a/paddle/fluid/operators/compat/elementwise_div.pbtxt b/paddle/fluid/operators/compat/elementwise_div.pbtxt
@@ -16,11 +16,7 @@ def {
 }
 extra {
   attrs {
-    name: "x_data_format"
-    type: STRING
-  }
-  attrs {
-    name: "y_data_format"
+    name: "act"
     type: STRING
   }
   attrs {
@@ -35,8 +31,4 @@ extra {
     name: "Scale_out"
     type: FLOAT
   }
-  attrs {
-    name: "act"
-    type: STRING
-  }
 }
diff --git a/paddle/fluid/operators/compat/elementwise_mul.pbtxt b/paddle/fluid/operators/compat/elementwise_mul.pbtxt
@@ -13,16 +13,6 @@ def {
     name: "axis"
     type: INT
   }
-}
-extra {
-  attrs {
-    name: "x_data_format"
-    type: STRING
-  }
-  attrs {
-    name: "y_data_format"
-    type: STRING
-  }
   attrs {
     name: "Scale_x"
     type: FLOAT

diff --git a/paddle/fluid/operators/compat/elementwise_pow.pbtxt b/paddle/fluid/operators/compat/elementwise_pow.pbtxt
@@ -16,11 +16,7 @@ def {
 }
 extra {
   attrs {
-    name: "x_data_format"
-    type: STRING
-  }
-  attrs {
-    name: "y_data_format"
+    name: "act"
     type: STRING
   }
   attrs {
@@ -35,8 +31,4 @@ extra {
     name: "Scale_out"
     type: FLOAT
   }
-  attrs {
-    name: "act"
-    type: STRING
-  }
 }
diff --git a/paddle/fluid/operators/compat/elementwise_sub.pbtxt b/paddle/fluid/operators/compat/elementwise_sub.pbtxt
@@ -16,11 +16,7 @@ def {
 }
 extra {
   attrs {
-    name: "x_data_format"
-    type: STRING
-  }
-  attrs {
-    name: "y_data_format"
+    name: "act"
     type: STRING
   }
   attrs {
@@ -35,8 +31,4 @@ extra {
     name: "Scale_out"
     type: FLOAT
   }
-  attrs {
-    name: "act"
-    type: STRING
-  }
 }
diff --git a/paddle/fluid/operators/compat/fused_elementwise_add.pbtxt b/paddle/fluid/operators/compat/fused_elementwise_add.pbtxt
@@ -0,0 +1,50 @@
+type: "fused_elementwise_add"
+def {
+  inputs {
+    name: "X"
+  }
+  inputs {
+    name: "Y"
+  }
+  outputs {
+    name: "Out"
+  }
+  attrs {
+    name: "axis"
+    type: INT
+  }
+}
+extra {
+  attrs {
+    name: "fuse_activation"
+    type: STRING
+  }
+  attrs {
+    name: "fuse_alpha"
+    type: FLOAT
+  }
+  attrs {
+    name: "fuse_beta"
+    type: FLOAT
+  }
+  attrs {
+    name: "fused_output_scale"
+    type: FLOAT
+  }
+  attrs {
+    name: "fused_unsqueeze2_axes"
+    type: INTS
+  }
+  attrs {
+    name: "scale_x"
+    type: FLOAT
+  }
+  attrs {
+    name: "scale_y"
+    type: FLOAT
+  }
+  attrs {
+    name: "scale_out"
+    type: FLOAT
+  }
+}