From bd117a064e4875c64479c8d9992b98cf62b41b5f Mon Sep 17 00:00:00 2001 From: JakubHulek Date: Thu, 9 Mar 2023 10:29:43 +0100 Subject: [PATCH 01/23] Fused elementwises kernels and ops --- paddle/fluid/framework/ir/CMakeLists.txt | 2 +- ...se_pass.cc => elt_act_onednn_fuse_pass.cc} | 7 +- ...fuse_pass.h => elt_act_onednn_fuse_pass.h} | 0 .../framework/ir/mkldnn/mkldnn_pass_util.h | 6 +- .../inference/api/paddle_pass_builder.cc | 2 +- .../compat/fused_elementwise_add.pbtxt | 50 ++++ .../compat/fused_elementwise_div.pbtxt | 50 ++++ .../compat/fused_elementwise_mul.pbtxt | 50 ++++ .../compat/fused_elementwise_sub.pbtxt | 51 +++++ .../fused/fused_elementwise_add_op.cc | 56 +++++ .../fused/fused_elementwise_div_op.cc | 56 +++++ .../fused/fused_elementwise_mul_op.cc | 56 +++++ .../operators/fused/fused_elementwise_op.h | 119 ++++++++++ .../fused/fused_elementwise_sub_op.cc | 56 +++++ .../fusion/onednn/fused_elementwise_kernel.cc | 213 ++++++++++++++++++ .../phi/kernels/onednn/elementwise_kernel.cc | 29 +-- .../phi/ops/compat/fused_elementwise_sig.cc | 145 ++++++++++++ .../test_mkldnn_elt_act_fuse_pass.py | 2 +- ...nn_elementwise_add_activation_fuse_pass.py | 6 +- 19 files changed, 921 insertions(+), 35 deletions(-) rename paddle/fluid/framework/ir/mkldnn/{elt_act_mkldnn_fuse_pass.cc => elt_act_onednn_fuse_pass.cc} (95%) rename paddle/fluid/framework/ir/mkldnn/{elt_act_mkldnn_fuse_pass.h => elt_act_onednn_fuse_pass.h} (100%) create mode 100644 paddle/fluid/operators/compat/fused_elementwise_add.pbtxt create mode 100644 paddle/fluid/operators/compat/fused_elementwise_div.pbtxt create mode 100644 paddle/fluid/operators/compat/fused_elementwise_mul.pbtxt create mode 100644 paddle/fluid/operators/compat/fused_elementwise_sub.pbtxt create mode 100644 paddle/fluid/operators/fused/fused_elementwise_add_op.cc create mode 100644 paddle/fluid/operators/fused/fused_elementwise_div_op.cc create mode 100644 paddle/fluid/operators/fused/fused_elementwise_mul_op.cc create mode 100644 paddle/fluid/operators/fused/fused_elementwise_op.h create mode 100644 paddle/fluid/operators/fused/fused_elementwise_sub_op.cc create mode 100644 paddle/phi/kernels/fusion/onednn/fused_elementwise_kernel.cc create mode 100644 paddle/phi/ops/compat/fused_elementwise_sig.cc diff --git a/paddle/fluid/framework/ir/CMakeLists.txt b/paddle/fluid/framework/ir/CMakeLists.txt index e602b899fe62e6..13266b12a3b9ff 100755 --- a/paddle/fluid/framework/ir/CMakeLists.txt +++ b/paddle/fluid/framework/ir/CMakeLists.txt @@ -175,7 +175,7 @@ if(WITH_MKLDNN) pass_library(softplus_activation_mkldnn_fuse_pass inference DIR mkldnn) pass_library(shuffle_channel_mkldnn_detect_pass inference DIR mkldnn) pass_library(fc_act_mkldnn_fuse_pass inference DIR mkldnn) - pass_library(elt_act_mkldnn_fuse_pass inference DIR mkldnn) + pass_library(elt_act_onednn_fuse_pass inference DIR mkldnn) pass_library(matmul_elementwise_add_mkldnn_fuse_pass inference DIR mkldnn) pass_library(matmul_activation_mkldnn_fuse_pass inference DIR mkldnn) pass_library(layer_norm_onednn_optimization_pass inference DIR mkldnn) diff --git a/paddle/fluid/framework/ir/mkldnn/elt_act_mkldnn_fuse_pass.cc b/paddle/fluid/framework/ir/mkldnn/elt_act_onednn_fuse_pass.cc similarity index 95% rename from paddle/fluid/framework/ir/mkldnn/elt_act_mkldnn_fuse_pass.cc rename to paddle/fluid/framework/ir/mkldnn/elt_act_onednn_fuse_pass.cc index 4b3f6a95d6dd17..9999ee3e09a3d0 100644 --- a/paddle/fluid/framework/ir/mkldnn/elt_act_mkldnn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/elt_act_onednn_fuse_pass.cc @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/framework/ir/mkldnn/elt_act_mkldnn_fuse_pass.h" +#include "paddle/fluid/framework/ir/mkldnn/elt_act_onednn_fuse_pass.h" #include "paddle/fluid/framework/ir/graph_pattern_detector.h" #include "paddle/fluid/framework/ir/mkldnn/activation_onednn_fuse_pass.h" @@ -62,6 +62,7 @@ void ElementwiseActivationOneDNNPass::FuseElementwiseAct( GET_IR_NODE_FROM_SUBGRAPH( activation_out, activation_out, elementwise_act_pattern); + ConvertToFusedOp(elementwise->Op()); SetActivationAttrs(elementwise->Op(), activation->Op(), act_type); elementwise->Op()->SetOutput("Out", {activation_out->Name()}); @@ -84,9 +85,9 @@ void ElementwiseActivationOneDNNPass::FuseElementwiseAct( } // namespace framework } // namespace paddle -REGISTER_PASS(elt_act_mkldnn_fuse_pass, +REGISTER_PASS(elt_act_onednn_fuse_pass, paddle::framework::ir::ElementwiseActivationOneDNNPass); -REGISTER_PASS_CAPABILITY(elt_act_mkldnn_fuse_pass) +REGISTER_PASS_CAPABILITY(elt_act_onednn_fuse_pass) .AddCombination( paddle::framework::compatible::OpVersionComparatorCombination() .LE("elementwise_add", 1) diff --git a/paddle/fluid/framework/ir/mkldnn/elt_act_mkldnn_fuse_pass.h b/paddle/fluid/framework/ir/mkldnn/elt_act_onednn_fuse_pass.h similarity index 100% rename from paddle/fluid/framework/ir/mkldnn/elt_act_mkldnn_fuse_pass.h rename to paddle/fluid/framework/ir/mkldnn/elt_act_onednn_fuse_pass.h diff --git a/paddle/fluid/framework/ir/mkldnn/mkldnn_pass_util.h b/paddle/fluid/framework/ir/mkldnn/mkldnn_pass_util.h index 142bb9adb68dca..f946948428a4a2 100644 --- a/paddle/fluid/framework/ir/mkldnn/mkldnn_pass_util.h +++ b/paddle/fluid/framework/ir/mkldnn/mkldnn_pass_util.h @@ -160,7 +160,11 @@ inline void ConvertToFusedOp(OpDesc* op) { {"conv2d", "fused_conv2d"}, {"depthwise_conv2d", "fused_conv2d"}, {"matmul", "fused_matmul"}, - {"matmul_v2", "fused_matmul"}}; + {"matmul_v2", "fused_matmul"}, + {"elementwise_add", "fused_elementwise_add"}, + {"elementwise_sub", "fused_elementwise_sub"}, + {"elementwise_mul", "fused_elementwise_mul"}, + {"elementwise_mul", "fused_elementwise_div"}}; if (op->Type() == "matmul") { op->SetAttr("trans_x", op->GetAttr("transpose_X")); diff --git a/paddle/fluid/inference/api/paddle_pass_builder.cc b/paddle/fluid/inference/api/paddle_pass_builder.cc index 23fdaf3ddff334..562c3502ce6d32 100644 --- a/paddle/fluid/inference/api/paddle_pass_builder.cc +++ b/paddle/fluid/inference/api/paddle_pass_builder.cc @@ -377,7 +377,7 @@ void CpuPassStrategy::EnableMKLDNN() { "batch_norm_act_fuse_pass", // "softplus_activation_mkldnn_fuse_pass", // "shuffle_channel_mkldnn_detect_pass", // - "elt_act_mkldnn_fuse_pass", // + "elt_act_onednn_fuse_pass", // "layer_norm_onednn_optimization_pass", // "operator_scale_onednn_fuse_pass", // "operator_unsqueeze2_onednn_fuse_pass", // diff --git a/paddle/fluid/operators/compat/fused_elementwise_add.pbtxt b/paddle/fluid/operators/compat/fused_elementwise_add.pbtxt new file mode 100644 index 00000000000000..3514e06839bdc1 --- /dev/null +++ b/paddle/fluid/operators/compat/fused_elementwise_add.pbtxt @@ -0,0 +1,50 @@ +type: "fused_elementwise_add" +def { + inputs { + name: "X" + } + inputs { + name: "Y" + } + outputs { + name: "Out" + } + attrs { + name: "axis" + type: INT + } +} +extra { + attrs { + name: "x_data_format" + type: STRING + } + attrs { + name: "y_data_format" + type: STRING + } + attrs { + name: "alpha" + type: FLOAT + } + attrs { + name: "beta" + type: FLOAT + } + attrs { + name: "Scale_x" + type: FLOAT + } + attrs { + name: "Scale_y" + type: FLOAT + } + attrs { + name: "Scale_out" + type: FLOAT + } + attrs { + name: "fuse_activation" + type: STRING + } +} diff --git a/paddle/fluid/operators/compat/fused_elementwise_div.pbtxt b/paddle/fluid/operators/compat/fused_elementwise_div.pbtxt new file mode 100644 index 00000000000000..0690a2983e9688 --- /dev/null +++ b/paddle/fluid/operators/compat/fused_elementwise_div.pbtxt @@ -0,0 +1,50 @@ +type: "fused_elementwise_div" +def { + inputs { + name: "X" + } + inputs { + name: "Y" + } + outputs { + name: "Out" + } + attrs { + name: "axis" + type: INT + } +} +extra { + attrs { + name: "x_data_format" + type: STRING + } + attrs { + name: "y_data_format" + type: STRING + } + attrs { + name: "alpha" + type: FLOAT + } + attrs { + name: "beta" + type: FLOAT + } + attrs { + name: "Scale_x" + type: FLOAT + } + attrs { + name: "Scale_y" + type: FLOAT + } + attrs { + name: "Scale_out" + type: FLOAT + } + attrs { + name: "fuse_activation" + type: STRING + } +} diff --git a/paddle/fluid/operators/compat/fused_elementwise_mul.pbtxt b/paddle/fluid/operators/compat/fused_elementwise_mul.pbtxt new file mode 100644 index 00000000000000..5a1a27d38fc0fd --- /dev/null +++ b/paddle/fluid/operators/compat/fused_elementwise_mul.pbtxt @@ -0,0 +1,50 @@ +type: "fused_elementwise_mul" +def { + inputs { + name: "X" + } + inputs { + name: "Y" + } + outputs { + name: "Out" + } + attrs { + name: "axis" + type: INT + } +} +extra { + attrs { + name: "x_data_format" + type: STRING + } + attrs { + name: "y_data_format" + type: STRING + } + attrs { + name: "alpha" + type: FLOAT + } + attrs { + name: "beta" + type: FLOAT + } + attrs { + name: "Scale_x" + type: FLOAT + } + attrs { + name: "Scale_y" + type: FLOAT + } + attrs { + name: "Scale_out" + type: FLOAT + } + attrs { + name: "fuse_activation" + type: STRING + } +} diff --git a/paddle/fluid/operators/compat/fused_elementwise_sub.pbtxt b/paddle/fluid/operators/compat/fused_elementwise_sub.pbtxt new file mode 100644 index 00000000000000..c12b3ca5f3a41b --- /dev/null +++ b/paddle/fluid/operators/compat/fused_elementwise_sub.pbtxt @@ -0,0 +1,51 @@ +type: "fused_elementwise_sub" +def { + inputs { + name: "X" + } + inputs { + name: "Y" + } + outputs { + name: "Out" + } + attrs { + name: "axis" + type: INT + } +} +extra { + attrs { + name: "x_data_format" + type: STRING + } + attrs { + name: "y_data_format" + type: STRING + } + attrs { + name: "alpha" + type: FLOAT + } + attrs { + name: "beta" + type: FLOAT + } + attrs { + name: "Scale_x" + type: FLOAT + } + attrs { + name: "Scale_y" + type: FLOAT + } + attrs { + name: "Scale_out" + type: FLOAT + } + attrs { + name: "fuse_activation" + type: STRING + } + } +} diff --git a/paddle/fluid/operators/fused/fused_elementwise_add_op.cc b/paddle/fluid/operators/fused/fused_elementwise_add_op.cc new file mode 100644 index 00000000000000..e8b689a9eb1d37 --- /dev/null +++ b/paddle/fluid/operators/fused/fused_elementwise_add_op.cc @@ -0,0 +1,56 @@ +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/operators/elementwise/elementwise_op.h" +#include "paddle/fluid/operators/fused/fused_elementwise_op.h" + +namespace paddle { +namespace operators { + +class FusedElementwiseAddOpMaker : public FusedElementwiseOpMaker { + protected: + std::string GetName() const override { return "Add"; } + std::string GetEquation() const override { return "Out = X + Y"; } + + void AddInputX() override { + AddInput( + "X", + "(Variable), Tensor or phi::DenseTensor of any dimensions. Its dtype " + "should be int32, int64, float32, float64."); + } + + void AddInputY() override { + AddInput( + "Y", + "(Variable), Tensor or phi::DenseTensor of any dimensions. Its dtype " + "should be int32, int64, float32, float64."); + } + + std::string GetOpFuntionality() const override { + return "Add two tensors element-wise"; + } +}; + +} // namespace operators +} // namespace paddle +namespace ops = paddle::operators; + +REGISTER_OPERATOR( + fused_elementwise_add, + ops::ElementwiseOp, + ops::FusedElementwiseAddOpMaker, + ops::ElementwiseOpInferVarType, + paddle::framework::EmptyGradOpMaker, + paddle::framework::EmptyGradOpMaker); diff --git a/paddle/fluid/operators/fused/fused_elementwise_div_op.cc b/paddle/fluid/operators/fused/fused_elementwise_div_op.cc new file mode 100644 index 00000000000000..3f36ab85e73718 --- /dev/null +++ b/paddle/fluid/operators/fused/fused_elementwise_div_op.cc @@ -0,0 +1,56 @@ +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/operators/elementwise/elementwise_op.h" +#include "paddle/fluid/operators/fused/fused_elementwise_op.h" + +namespace paddle { +namespace operators { + +class FusedElementwiseDivOpMaker : public FusedElementwiseOpMaker { + protected: + std::string GetName() const override { return "Div"; } + std::string GetEquation() const override { return "Out = X / Y"; } + + void AddInputX() override { + AddInput( + "X", + "(Variable), Tensor or phi::DenseTensor of any dimensions. Its dtype " + "should be int32, int64, float32, float64."); + } + + void AddInputY() override { + AddInput( + "Y", + "(Variable), Tensor or phi::DenseTensor of any dimensions. Its dtype " + "should be int32, int64, float32, float64."); + } + + std::string GetOpFuntionality() const override { + return "Divide two tensors element-wise"; + } +}; + +} // namespace operators +} // namespace paddle +namespace ops = paddle::operators; + +REGISTER_OPERATOR( + fused_elementwise_div, + ops::ElementwiseOp, + ops::FusedElementwiseDivOpMaker, + ops::ElementwiseOpInferVarType, + paddle::framework::EmptyGradOpMaker, + paddle::framework::EmptyGradOpMaker); diff --git a/paddle/fluid/operators/fused/fused_elementwise_mul_op.cc b/paddle/fluid/operators/fused/fused_elementwise_mul_op.cc new file mode 100644 index 00000000000000..92a73e1df56d14 --- /dev/null +++ b/paddle/fluid/operators/fused/fused_elementwise_mul_op.cc @@ -0,0 +1,56 @@ +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/operators/elementwise/elementwise_op.h" +#include "paddle/fluid/operators/fused/fused_elementwise_op.h" + +namespace paddle { +namespace operators { + +class FusedElementwiseMulOpMaker : public FusedElementwiseOpMaker { + protected: + std::string GetName() const override { return "Mul"; } + std::string GetEquation() const override { return "Out = X \\\\odot Y"; } + + void AddInputX() override { + AddInput( + "X", + "(Variable), Tensor or phi::DenseTensor of any dimensions. Its dtype " + "should be int32, int64, float32, float64."); + } + + void AddInputY() override { + AddInput( + "Y", + "(Variable), Tensor or phi::DenseTensor of any dimensions. Its dtype " + "should be int32, int64, float32, float64."); + } + + std::string GetOpFuntionality() const override { + return "Multiply two tensors element-wise"; + } +}; + +} // namespace operators +} // namespace paddle +namespace ops = paddle::operators; + +REGISTER_OPERATOR( + fused_elementwise_mul, + ops::ElementwiseOp, + ops::FusedElementwiseMulOpMaker, + ops::ElementwiseOpInferVarType, + paddle::framework::EmptyGradOpMaker, + paddle::framework::EmptyGradOpMaker); diff --git a/paddle/fluid/operators/fused/fused_elementwise_op.h b/paddle/fluid/operators/fused/fused_elementwise_op.h new file mode 100644 index 00000000000000..ca1e368fd06625 --- /dev/null +++ b/paddle/fluid/operators/fused/fused_elementwise_op.h @@ -0,0 +1,119 @@ +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/operators/elementwise/elementwise_op.h" + +namespace paddle { +namespace operators { + +class FusedElementwiseOpMaker : public framework::OpProtoAndCheckerMaker { + public: + void Make() final { + AddInputX(); + AddInputY(); + AddOpOutput(); + AddAttr("axis", + "(int, default -1). If X.dimension != Y.dimension," + "Y.dimension must be a subsequence of x.dimension. And axis " + "is the start dimension index " + "for broadcasting Y onto X. ") + .SetDefault(-1); + + AddAttr("Alfa", "Alfa value for the elementwise operator") + .SetDefault(1.0f); + AddAttr("Beta", "Beta value for the elementwise operator") + .SetDefault(1.0f); + + AddAttr("fuse_activation", + "Activation type from elt_act_onednn_fuse_pass") + .SetDefault(""); + + AddAttr("Scale_x", "Obtained from cpu_quantize_pass") + .SetDefault(1.0f); + AddAttr("Scale_y", "Obtained from cpu_quantize_pass") + .SetDefault(1.0f); + AddAttr("Scale_out", "Obtained from cpu_quantize_pass") + .SetDefault(1.0f); + AddOpComment(); + } + + protected: + virtual void AddInputX() { + AddInput("X", "(Tensor), The first input tensor of elementwise op."); + } + virtual void AddInputY() { + AddInput("Y", "(Tensor), The second input tensor of elementwise op."); + } + virtual void AddOpOutput() { + AddOutput("Out", + "N-dimension tensor. A location into which the result is stored. " + "It's dimension " + "equals with x"); + } + virtual void AddOpComment() { AddComment(GetCommentExamples()); } + + virtual std::string GetOpFuntionality() const { return ""; } + + virtual std::string GetName() const = 0; + virtual std::string GetEquation() const = 0; + + std::string GetCommentExamples() const { + return string::Sprintf(R"DOC( +Elementwise %s Operator. + +%s + +The equation is: + +$$%s$$ + +- $X$: a tensor of any dimension. +- $Y$: a tensor whose dimensions must be less than or equal to the dimensions of $X$. + +There are two cases for this operator: + +1. The shape of $Y$ is the same with $X$. +2. The shape of $Y$ is a continuous subsequence of $X$. + +For case 2: + +1. Broadcast $Y$ to match the shape of $X$, where $axis$ is the start dimension index + for broadcasting $Y$ onto $X$. +2. If $axis$ is -1 (default), $axis = rank(X) - rank(Y)$. +3. The trailing dimensions of size 1 for $Y$ will be ignored for the consideration of + subsequence, such as shape(Y) = (2, 1) => (2). + +For example: + + .. code-block:: text + + shape(X) = (2, 3, 4, 5), shape(Y) = (,) + shape(X) = (2, 3, 4, 5), shape(Y) = (5,) + shape(X) = (2, 3, 4, 5), shape(Y) = (4, 5), with axis=-1(default) or axis=2 + shape(X) = (2, 3, 4, 5), shape(Y) = (3, 4), with axis=1 + shape(X) = (2, 3, 4, 5), shape(Y) = (2), with axis=0 + shape(X) = (2, 3, 4, 5), shape(Y) = (2, 1), with axis=0 + +)DOC", + GetName(), + GetOpFuntionality(), + GetEquation()); + } +}; + +} // namespace operators +} // namespace paddle diff --git a/paddle/fluid/operators/fused/fused_elementwise_sub_op.cc b/paddle/fluid/operators/fused/fused_elementwise_sub_op.cc new file mode 100644 index 00000000000000..0b345134f663f7 --- /dev/null +++ b/paddle/fluid/operators/fused/fused_elementwise_sub_op.cc @@ -0,0 +1,56 @@ +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/operators/elementwise/elementwise_op.h" +#include "paddle/fluid/operators/fused/fused_elementwise_op.h" + +namespace paddle { +namespace operators { + +class FusedElementwiseSubOpMaker : public FusedElementwiseOpMaker { + protected: + std::string GetName() const override { return "Sub"; } + std::string GetEquation() const override { return "Out = X - Y"; } + + void AddInputX() override { + AddInput( + "X", + "(Variable), Tensor or phi::DenseTensor of any dimensions. Its dtype " + "should be int32, int64, float32, float64."); + } + + void AddInputY() override { + AddInput( + "Y", + "(Variable), Tensor or phi::DenseTensor of any dimensions. Its dtype " + "should be int32, int64, float32, float64."); + } + + std::string GetOpFuntionality() const override { + return "Subtract two tensors element-wise"; + } +}; + +} // namespace operators +} // namespace paddle +namespace ops = paddle::operators; + +REGISTER_OPERATOR( + fused_elementwise_sub, + ops::ElementwiseOp, + ops::FusedElementwiseSubOpMaker, + ops::ElementwiseOpInferVarType, + paddle::framework::EmptyGradOpMaker, + paddle::framework::EmptyGradOpMaker); diff --git a/paddle/phi/kernels/fusion/onednn/fused_elementwise_kernel.cc b/paddle/phi/kernels/fusion/onednn/fused_elementwise_kernel.cc new file mode 100644 index 00000000000000..b10b3902b0bdba --- /dev/null +++ b/paddle/phi/kernels/fusion/onednn/fused_elementwise_kernel.cc @@ -0,0 +1,213 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/backends/onednn/onednn_reuse.h" +#include "paddle/phi/core/kernel_registry.h" + +namespace phi { + +template +void FusedElementwiseKernel(const OneDNNContext& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + int axis, + DenseTensor* out) { + const auto& onednn_engine = dev_ctx.GetEngine(); + + float scale_x = dev_ctx.HasDnnAttr("Scale_x") + ? PADDLE_GET_CONST(float, dev_ctx.GetDnnAttr("Scale_x")) + : 1.0f; + float scale_y = dev_ctx.HasDnnAttr("Scale_y") + ? PADDLE_GET_CONST(float, dev_ctx.GetDnnAttr("Scale_y")) + : 1.0f; + float scale_out = + dev_ctx.HasDnnAttr("Scale_out") + ? PADDLE_GET_CONST(float, dev_ctx.GetDnnAttr("Scale_out")) + : 1.0f; + + dnnl::post_ops post_operations; + funcs::AppendActivation(dev_ctx, post_operations); + if (dev_ctx.HasDnnAttr("fused_output_scale")) { + float scale_alpha = + PADDLE_GET_CONST(float, dev_ctx.GetDnnAttr("fused_output_scale")); + post_operations.append_eltwise( + 1.0, dnnl::algorithm::eltwise_linear, scale_alpha, 0.0f); + } + + auto* non_const_x = &x; + auto* non_const_y = &y; + + funcs::BinaryOneDNNHandler handler(BINARY_OP, + axis, + onednn_engine, + dev_ctx.GetPlace(), + non_const_x, + non_const_y, + out, + scale_x, + scale_y, + scale_out, + true, + post_operations); + + // oneDNN's binary is optimized for broadcasting y into x, so in other case + // we have to swap tensors to achieve optimal performance + if (x.numel() < y.numel()) { + std::swap(non_const_x, non_const_y); + } + + const auto src_x_memory = handler.AcquireSrcMemory(non_const_x); + const auto src_y_memory = handler.AcquireSecondSrcMemory(non_const_y); + // (jczaja) For Inplace src and dst should be the same memory object. + // So x should share buffer with z. But UT mechanics is testing inplace + // execution for this op not checking that x can be bradcasted to match in + // shape y tensor. + // This is wrong as when x is to be broadcasted then z(out) will match the + // shape of y which is bigger than x. Hence if x is smaller in shape than z + // and they share a buffer (of + // shape x) then this buffer is not big enough to hold result of elementwise + // operation. + const bool reuse_x_memory = non_const_x->numel() == out->numel() && + non_const_x->IsSharedBufferWith(*out); + std::shared_ptr dst_memory; + + if (reuse_x_memory) { + dst_memory = src_x_memory; + // NOTE(chenfeiyu): when the output reuses memory from other tensor rather + // than allocate its own, it's still need to take care of its data type. + // Unfortunately, paddle's operator only infers the output' shape, but not + // the data type. Alloc takes care of allocation and data type + // normally, but if the memory is already allocated and there is no need + // to re-allocate, it just set the data type. So this it added there to + // get the right data type. + dev_ctx.template Alloc(out); + } else { + dst_memory = handler.AcquireDstMemory(out); + } + + const auto binary_prim = handler.AcquireForwardPrimitive(); + + auto& astream = OneDNNContext::tls().get_stream(); + + const std::unordered_map args = { + {DNNL_ARG_SRC_0, *src_x_memory}, + {DNNL_ARG_SRC_1, *src_y_memory}, + {DNNL_ARG_DST, *dst_memory}}; + + binary_prim->execute(astream, args); + astream.wait(); + + if (handler.use_broadcasting_hack == false) { + funcs::SetOutMemDescWithLogicalLayoutFusesSupport( + dev_ctx, out, dst_memory->get_desc()); + } else { + auto dims = dst_memory->get_desc().dims(); + dims.insert(dims.begin(), non_const_x->dims()[0]); + dims[1] /= dims[0]; + funcs::SetOutMemDescWithLogicalLayoutFusesSupport( + dev_ctx, out, dst_memory->get_desc().reshape(dims)); + } +} + +#define DEFINE_ONEDNN_ELEMENTWISE_KERNEL(name, algorithm) \ + template \ + void name##RawKernel(const Context& dev_ctx, \ + const DenseTensor& x, \ + const DenseTensor& y, \ + int axis, \ + DenseTensor* out) { \ + FusedElementwiseKernel(dev_ctx, x, y, axis, out); \ + } \ + template \ + void name##Kernel(const Context& dev_ctx, \ + const DenseTensor& x, \ + const DenseTensor& y, \ + DenseTensor* out) { \ + FusedElementwiseKernel(dev_ctx, x, y, -1, out); \ + } + +DEFINE_ONEDNN_ELEMENTWISE_KERNEL(FusedAdd, dnnl::algorithm::binary_add) +DEFINE_ONEDNN_ELEMENTWISE_KERNEL(FusedSubtract, dnnl::algorithm::binary_sub) +DEFINE_ONEDNN_ELEMENTWISE_KERNEL(FusedMultiply, dnnl::algorithm::binary_mul) +DEFINE_ONEDNN_ELEMENTWISE_KERNEL(FusedDivide, dnnl::algorithm::binary_div) + +} // namespace phi + +PD_REGISTER_KERNEL(fused_add_raw, + OneDNN, + ONEDNN, + phi::FusedAddRawKernel, + float, + phi::dtype::bfloat16, + int8_t, + uint8_t) {} + +PD_REGISTER_KERNEL(fused_add, + OneDNN, + ONEDNN, + phi::FusedAddKernel, + float, + phi::dtype::bfloat16, + int8_t, + uint8_t) {} + +PD_REGISTER_KERNEL(fused_subtract_raw, + OneDNN, + ONEDNN, + phi::FusedSubtractRawKernel, + float, + phi::dtype::bfloat16, + int8_t, + uint8_t) {} + +PD_REGISTER_KERNEL(fused_subtract, + OneDNN, + ONEDNN, + phi::FusedSubtractKernel, + float, + phi::dtype::bfloat16, + int8_t, + uint8_t) {} + +PD_REGISTER_KERNEL(fused_multiply_raw, + OneDNN, + ONEDNN, + phi::FusedMultiplyRawKernel, + float, + phi::dtype::bfloat16, + int8_t, + uint8_t) {} + +PD_REGISTER_KERNEL(fused_multiply, + OneDNN, + ONEDNN, + phi::FusedMultiplyKernel, + float, + phi::dtype::bfloat16, + int8_t, + uint8_t) {} + +PD_REGISTER_KERNEL(fused_divide_raw, + OneDNN, + ONEDNN, + phi::FusedDivideRawKernel, + float, + phi::dtype::bfloat16) {} + +PD_REGISTER_KERNEL(fused_divide, + OneDNN, + ONEDNN, + phi::FusedDivideKernel, + float, + phi::dtype::bfloat16) {} diff --git a/paddle/phi/kernels/onednn/elementwise_kernel.cc b/paddle/phi/kernels/onednn/elementwise_kernel.cc index b786da7a319156..fc1255c5b8cb27 100644 --- a/paddle/phi/kernels/onednn/elementwise_kernel.cc +++ b/paddle/phi/kernels/onednn/elementwise_kernel.cc @@ -30,26 +30,6 @@ void ElementwiseKernel(const OneDNNContext& dev_ctx, DenseTensor* out) { const auto& onednn_engine = dev_ctx.GetEngine(); - float scale_x = dev_ctx.HasDnnAttr("Scale_x") - ? PADDLE_GET_CONST(float, dev_ctx.GetDnnAttr("Scale_x")) - : 1.0f; - float scale_y = dev_ctx.HasDnnAttr("Scale_y") - ? PADDLE_GET_CONST(float, dev_ctx.GetDnnAttr("Scale_y")) - : 1.0f; - float scale_out = - dev_ctx.HasDnnAttr("Scale_out") - ? PADDLE_GET_CONST(float, dev_ctx.GetDnnAttr("Scale_out")) - : 1.0f; - - dnnl::post_ops post_operations; - funcs::AppendActivation(dev_ctx, post_operations); - if (dev_ctx.HasDnnAttr("fused_output_scale")) { - float scale_alpha = - PADDLE_GET_CONST(float, dev_ctx.GetDnnAttr("fused_output_scale")); - post_operations.append_eltwise( - 1.0, dnnl::algorithm::eltwise_linear, scale_alpha, 0.0f); - } - auto* non_const_x = &x; auto* non_const_y = &y; @@ -60,11 +40,10 @@ void ElementwiseKernel(const OneDNNContext& dev_ctx, non_const_x, non_const_y, out, - scale_x, - scale_y, - scale_out, - true, - post_operations); + 1.0f, + 1.0f, + 1.0f, + true); // oneDNN's binary is optimized for broadcasting y into x, so in other case // we have to swap tensors to achieve optimal performance diff --git a/paddle/phi/ops/compat/fused_elementwise_sig.cc b/paddle/phi/ops/compat/fused_elementwise_sig.cc new file mode 100644 index 00000000000000..4b1b9599ab32d7 --- /dev/null +++ b/paddle/phi/ops/compat/fused_elementwise_sig.cc @@ -0,0 +1,145 @@ +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/core/compat/op_utils.h" + +namespace phi { + +KernelSignature FusedElementwiseAddOpArgumentMapping( + const ArgumentMappingContext& ctx) { + int axis = paddle::any_cast(ctx.Attr("axis")); + if (axis == -1) { + return KernelSignature("fused_add", + {"X", "Y"}, + {"alpha", + "beta", + "fuse_activation", + "mkldnn_data_type", + "Scale_x", + "Scale_y", + "Scale_out"}, + {"Out"}); + } + return KernelSignature("fused_add_raw", + {"X", "Y"}, + {"axis", + "alpha", + "beta", + "fuse_activation", + "mkldnn_data_type", + "Scale_x", + "Scale_y", + "Scale_out"}, + {"Out"}); +} + +KernelSignature FusedElementwiseSubOpArgumentMapping( + const ArgumentMappingContext& ctx) { + int axis = paddle::any_cast(ctx.Attr("axis")); + if (axis == -1) { + return KernelSignature("fused_subtract", + {"X", "Y"}, + {"alpha", + "beta", + "fuse_activation", + "mkldnn_data_type", + "Scale_x", + "Scale_y", + "Scale_out"}, + {"Out"}); + } + return KernelSignature("fused_subtract_raw", + {"X", "Y"}, + {"axis", + "alpha", + "beta", + "fuse_activation", + "mkldnn_data_type", + "Scale_x", + "Scale_y", + "Scale_out"}, + {"Out"}); +} + +KernelSignature FusedElementwiseMulOpArgumentMapping( + const ArgumentMappingContext& ctx) { + int axis = paddle::any_cast(ctx.Attr("axis")); + if (axis == -1) { + return KernelSignature("fused_multiply", + {"X", "Y"}, + {"alpha", + "beta", + "fuse_activation", + "mkldnn_data_type", + "Scale_x", + "Scale_y", + "Scale_out"}, + {"Out"}); + } + return KernelSignature("fused_multiply_raw", + {"X", "Y"}, + {"axis", + "alpha", + "beta", + "fuse_activation", + "mkldnn_data_type", + "Scale_x", + "Scale_y", + "Scale_out"}, + {"Out"}); +} + +KernelSignature FusedElementwiseDivOpArgumentMapping( + const ArgumentMappingContext& ctx) { + int axis = paddle::any_cast(ctx.Attr("axis")); + if (axis == -1) { + return KernelSignature("fused_divide", + {"X", "Y"}, + {"alpha", + "beta", + "fuse_activation", + "mkldnn_data_type", + "Scale_x", + "Scale_y", + "Scale_out"}, + {"Out"}); + } + return KernelSignature("fused_divide_raw", + {"X", "Y"}, + {"axis", + "alpha", + "beta", + "fuse_activation", + "mkldnn_data_type", + "Scale_x", + "Scale_y", + "Scale_out"}, + {"Out"}); +} + +} // namespace phi + +PD_REGISTER_BASE_KERNEL_NAME(fused_elementwise_add, fused_add); +PD_REGISTER_BASE_KERNEL_NAME(fused_elementwise_sub, fused_subtract); +PD_REGISTER_BASE_KERNEL_NAME(fused_elementwise_mul, fused_multiply); +PD_REGISTER_BASE_KERNEL_NAME(fused_elementwise_div, fused_divide); + +PD_REGISTER_ARG_MAPPING_FN(fused_elementwise_add, + phi::FusedElementwiseAddOpArgumentMapping); +PD_REGISTER_ARG_MAPPING_FN(fused_elementwise_sub, + phi::FusedElementwiseSubOpArgumentMapping); +PD_REGISTER_ARG_MAPPING_FN(fused_elementwise_mul, + phi::FusedElementwiseMulOpArgumentMapping); +PD_REGISTER_ARG_MAPPING_FN(fused_elementwise_div, + phi::FusedElementwiseDivOpArgumentMapping); diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_elt_act_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_elt_act_fuse_pass.py index 24a63751cfec43..3315534821dc02 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_elt_act_fuse_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_elt_act_fuse_pass.py @@ -26,7 +26,7 @@ class ElementwiseActivationMkldnnFusePassTest(InferencePassTest): act_alpha = None act_beta = None - pass_name = 'elt_act_mkldnn_fuse_pass' + pass_name = 'elt_act_onednn_fuse_pass' def setUp(self): self.set_params() diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_onednn_elementwise_add_activation_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_onednn_elementwise_add_activation_fuse_pass.py index 93165f829b1c8a..92341facaaf217 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_onednn_elementwise_add_activation_fuse_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_onednn_elementwise_add_activation_fuse_pass.py @@ -118,17 +118,17 @@ def sample_predictor_configs(self, program_config): config = self.create_inference_config( use_mkldnn=True, passes=[ - 'elt_act_mkldnn_fuse_pass', + 'elt_act_onednn_fuse_pass', 'operator_scale_onednn_fuse_pass', ], ) - yield config, ['elementwise_add'], (1e-5, 1e-5) + yield config, ['fused_elementwise_add'], (1e-5, 1e-5) def test(self): self.run_and_statis( quant=False, passes=[ - 'elt_act_mkldnn_fuse_pass', + 'elt_act_onednn_fuse_pass', 'operator_scale_onednn_fuse_pass', ], ) From 25256810c8b49dc583cbf5f2414ece223ff5fe67 Mon Sep 17 00:00:00 2001 From: Silv3S Date: Thu, 9 Mar 2023 14:09:08 +0100 Subject: [PATCH 02/23] change fuse pass name --- paddle/fluid/framework/ir/CMakeLists.txt | 2 +- ...elementwise_activation_onednn_fuse_pass.cc} | 6 +++--- ... elementwise_activation_onednn_fuse_pass.h} | 0 .../fluid/inference/api/paddle_pass_builder.cc | 18 +++++++++--------- .../operators/fused/fused_elementwise_op.h | 5 +++-- .../inference/test_mkldnn_elt_act_fuse_pass.py | 2 +- ...dnn_elementwise_add_activation_fuse_pass.py | 4 ++-- 7 files changed, 19 insertions(+), 18 deletions(-) rename paddle/fluid/framework/ir/mkldnn/{elt_act_onednn_fuse_pass.cc => elementwise_activation_onednn_fuse_pass.cc} (94%) rename paddle/fluid/framework/ir/mkldnn/{elt_act_onednn_fuse_pass.h => elementwise_activation_onednn_fuse_pass.h} (100%) diff --git a/paddle/fluid/framework/ir/CMakeLists.txt b/paddle/fluid/framework/ir/CMakeLists.txt index aa57c267c0c0f3..6f19b642a34be7 100755 --- a/paddle/fluid/framework/ir/CMakeLists.txt +++ b/paddle/fluid/framework/ir/CMakeLists.txt @@ -175,7 +175,7 @@ if(WITH_MKLDNN) pass_library(softplus_activation_mkldnn_fuse_pass inference DIR mkldnn) pass_library(shuffle_channel_mkldnn_detect_pass inference DIR mkldnn) pass_library(fc_act_mkldnn_fuse_pass inference DIR mkldnn) - pass_library(elt_act_onednn_fuse_pass inference DIR mkldnn) + pass_library(elementwise_activation_onednn_fuse_pass inference DIR mkldnn) pass_library(matmul_elementwise_add_mkldnn_fuse_pass inference DIR mkldnn) pass_library(matmul_activation_mkldnn_fuse_pass inference DIR mkldnn) pass_library(layer_norm_onednn_optimization_pass inference DIR mkldnn) diff --git a/paddle/fluid/framework/ir/mkldnn/elt_act_onednn_fuse_pass.cc b/paddle/fluid/framework/ir/mkldnn/elementwise_activation_onednn_fuse_pass.cc similarity index 94% rename from paddle/fluid/framework/ir/mkldnn/elt_act_onednn_fuse_pass.cc rename to paddle/fluid/framework/ir/mkldnn/elementwise_activation_onednn_fuse_pass.cc index 9999ee3e09a3d0..2725dae2820572 100644 --- a/paddle/fluid/framework/ir/mkldnn/elt_act_onednn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/elementwise_activation_onednn_fuse_pass.cc @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/framework/ir/mkldnn/elt_act_onednn_fuse_pass.h" +#include "paddle/fluid/framework/ir/mkldnn/elementwise_activation_onednn_fuse_pass.h" #include "paddle/fluid/framework/ir/graph_pattern_detector.h" #include "paddle/fluid/framework/ir/mkldnn/activation_onednn_fuse_pass.h" @@ -85,9 +85,9 @@ void ElementwiseActivationOneDNNPass::FuseElementwiseAct( } // namespace framework } // namespace paddle -REGISTER_PASS(elt_act_onednn_fuse_pass, +REGISTER_PASS(elementwise_activation_onednn_fuse_pass, paddle::framework::ir::ElementwiseActivationOneDNNPass); -REGISTER_PASS_CAPABILITY(elt_act_onednn_fuse_pass) +REGISTER_PASS_CAPABILITY(elementwise_activation_onednn_fuse_pass) .AddCombination( paddle::framework::compatible::OpVersionComparatorCombination() .LE("elementwise_add", 1) diff --git a/paddle/fluid/framework/ir/mkldnn/elt_act_onednn_fuse_pass.h b/paddle/fluid/framework/ir/mkldnn/elementwise_activation_onednn_fuse_pass.h similarity index 100% rename from paddle/fluid/framework/ir/mkldnn/elt_act_onednn_fuse_pass.h rename to paddle/fluid/framework/ir/mkldnn/elementwise_activation_onednn_fuse_pass.h diff --git a/paddle/fluid/inference/api/paddle_pass_builder.cc b/paddle/fluid/inference/api/paddle_pass_builder.cc index c1fa7654005239..1fa42cbbe5fa80 100644 --- a/paddle/fluid/inference/api/paddle_pass_builder.cc +++ b/paddle/fluid/inference/api/paddle_pass_builder.cc @@ -373,15 +373,15 @@ void CpuPassStrategy::EnableMKLDNN() { // Disabled due to topology-dependent speed-up "fc_mkldnn_pass", "fc_act_mkldnn_fuse_pass", - "fc_elementwise_add_mkldnn_fuse_pass", // - "batch_norm_act_fuse_pass", // - "softplus_activation_mkldnn_fuse_pass", // - "shuffle_channel_mkldnn_detect_pass", // - "elt_act_onednn_fuse_pass", // - "layer_norm_onednn_optimization_pass", // - "operator_scale_onednn_fuse_pass", // - "operator_unsqueeze2_onednn_fuse_pass", // - "operator_reshape2_onednn_fuse_pass", // + "fc_elementwise_add_mkldnn_fuse_pass", // + "batch_norm_act_fuse_pass", // + "softplus_activation_mkldnn_fuse_pass", // + "shuffle_channel_mkldnn_detect_pass", // + "elementwise_activation_onednn_fuse_pass", // + "layer_norm_onednn_optimization_pass", // + "operator_scale_onednn_fuse_pass", // + "operator_unsqueeze2_onednn_fuse_pass", // + "operator_reshape2_onednn_fuse_pass", // })) { passes_.push_back(pass); } diff --git a/paddle/fluid/operators/fused/fused_elementwise_op.h b/paddle/fluid/operators/fused/fused_elementwise_op.h index ca1e368fd06625..730623d078ad4f 100644 --- a/paddle/fluid/operators/fused/fused_elementwise_op.h +++ b/paddle/fluid/operators/fused/fused_elementwise_op.h @@ -38,8 +38,9 @@ class FusedElementwiseOpMaker : public framework::OpProtoAndCheckerMaker { AddAttr("Beta", "Beta value for the elementwise operator") .SetDefault(1.0f); - AddAttr("fuse_activation", - "Activation type from elt_act_onednn_fuse_pass") + AddAttr( + "fuse_activation", + "Activation type from elementwise_activation_onednn_fuse_pass") .SetDefault(""); AddAttr("Scale_x", "Obtained from cpu_quantize_pass") diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_elt_act_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_elt_act_fuse_pass.py index 3315534821dc02..721c2f19455fbe 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_elt_act_fuse_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_elt_act_fuse_pass.py @@ -26,7 +26,7 @@ class ElementwiseActivationMkldnnFusePassTest(InferencePassTest): act_alpha = None act_beta = None - pass_name = 'elt_act_onednn_fuse_pass' + pass_name = 'elementwise_activation_onednn_fuse_pass' def setUp(self): self.set_params() diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_onednn_elementwise_add_activation_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_onednn_elementwise_add_activation_fuse_pass.py index 92341facaaf217..4565560f49b9d9 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_onednn_elementwise_add_activation_fuse_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_onednn_elementwise_add_activation_fuse_pass.py @@ -118,7 +118,7 @@ def sample_predictor_configs(self, program_config): config = self.create_inference_config( use_mkldnn=True, passes=[ - 'elt_act_onednn_fuse_pass', + 'elementwise_activation_onednn_fuse_pass', 'operator_scale_onednn_fuse_pass', ], ) @@ -128,7 +128,7 @@ def test(self): self.run_and_statis( quant=False, passes=[ - 'elt_act_onednn_fuse_pass', + 'elementwise_activation_onednn_fuse_pass', 'operator_scale_onednn_fuse_pass', ], ) From 3c8b7d3d72d9f9190d38633a562e9e26d29f5e41 Mon Sep 17 00:00:00 2001 From: Silv3S Date: Thu, 9 Mar 2023 14:27:27 +0100 Subject: [PATCH 03/23] adjust .pbtxt files --- .../operators/compat/elementwise_add.pbtxt | 20 ---------- .../operators/compat/elementwise_div.pbtxt | 20 ---------- .../operators/compat/elementwise_mul.pbtxt | 22 ----------- .../operators/compat/elementwise_pow.pbtxt | 20 ---------- .../operators/compat/elementwise_sub.pbtxt | 20 ---------- .../compat/fused_elementwise_add.pbtxt | 22 +++++------ .../compat/fused_elementwise_div.pbtxt | 30 +------------- .../compat/fused_elementwise_mul.pbtxt | 22 +++++------ .../compat/fused_elementwise_sub.pbtxt | 23 +++++------ paddle/fluid/operators/feed_forward_test.cu | 10 +---- paddle/fluid/operators/ops_extra_info.h | 2 - paddle/phi/api/yaml/op_compat.yaml | 39 +++++++------------ 12 files changed, 42 insertions(+), 208 deletions(-) diff --git a/paddle/fluid/operators/compat/elementwise_add.pbtxt b/paddle/fluid/operators/compat/elementwise_add.pbtxt index 5b55f3981c77da..fdafc47dcd12ac 100644 --- a/paddle/fluid/operators/compat/elementwise_add.pbtxt +++ b/paddle/fluid/operators/compat/elementwise_add.pbtxt @@ -22,25 +22,5 @@ extra { attrs { name: "Out0_threshold" type: FLOAT - } - attrs { - name: "x_data_format" - type: STRING - } - attrs { - name: "y_data_format" - type: STRING - } - attrs { - name: "Scale_x" - type: FLOAT - } - attrs { - name: "Scale_y" - type: FLOAT - } - attrs { - name: "Scale_out" - type: FLOAT } } diff --git a/paddle/fluid/operators/compat/elementwise_div.pbtxt b/paddle/fluid/operators/compat/elementwise_div.pbtxt index a73d2072029a3a..4baa9dea4fcdd5 100644 --- a/paddle/fluid/operators/compat/elementwise_div.pbtxt +++ b/paddle/fluid/operators/compat/elementwise_div.pbtxt @@ -15,26 +15,6 @@ def { } } extra { - attrs { - name: "x_data_format" - type: STRING - } - attrs { - name: "y_data_format" - type: STRING - } - attrs { - name: "Scale_x" - type: FLOAT - } - attrs { - name: "Scale_y" - type: FLOAT - } - attrs { - name: "Scale_out" - type: FLOAT - } attrs { name: "act" type: STRING diff --git a/paddle/fluid/operators/compat/elementwise_mul.pbtxt b/paddle/fluid/operators/compat/elementwise_mul.pbtxt index 22289e2689c10a..0db1c9c782ae2d 100644 --- a/paddle/fluid/operators/compat/elementwise_mul.pbtxt +++ b/paddle/fluid/operators/compat/elementwise_mul.pbtxt @@ -14,25 +14,3 @@ def { type: INT } } -extra { - attrs { - name: "x_data_format" - type: STRING - } - attrs { - name: "y_data_format" - type: STRING - } - attrs { - name: "Scale_x" - type: FLOAT - } - attrs { - name: "Scale_y" - type: FLOAT - } - attrs { - name: "Scale_out" - type: FLOAT - } -} diff --git a/paddle/fluid/operators/compat/elementwise_pow.pbtxt b/paddle/fluid/operators/compat/elementwise_pow.pbtxt index a2ab73f409b780..018ca70bcbca19 100644 --- a/paddle/fluid/operators/compat/elementwise_pow.pbtxt +++ b/paddle/fluid/operators/compat/elementwise_pow.pbtxt @@ -15,26 +15,6 @@ def { } } extra { - attrs { - name: "x_data_format" - type: STRING - } - attrs { - name: "y_data_format" - type: STRING - } - attrs { - name: "Scale_x" - type: FLOAT - } - attrs { - name: "Scale_y" - type: FLOAT - } - attrs { - name: "Scale_out" - type: FLOAT - } attrs { name: "act" type: STRING diff --git a/paddle/fluid/operators/compat/elementwise_sub.pbtxt b/paddle/fluid/operators/compat/elementwise_sub.pbtxt index 9f38601f585ea8..ae8a1352452d27 100644 --- a/paddle/fluid/operators/compat/elementwise_sub.pbtxt +++ b/paddle/fluid/operators/compat/elementwise_sub.pbtxt @@ -15,26 +15,6 @@ def { } } extra { - attrs { - name: "x_data_format" - type: STRING - } - attrs { - name: "y_data_format" - type: STRING - } - attrs { - name: "Scale_x" - type: FLOAT - } - attrs { - name: "Scale_y" - type: FLOAT - } - attrs { - name: "Scale_out" - type: FLOAT - } attrs { name: "act" type: STRING diff --git a/paddle/fluid/operators/compat/fused_elementwise_add.pbtxt b/paddle/fluid/operators/compat/fused_elementwise_add.pbtxt index 3514e06839bdc1..374170aa4e97fe 100644 --- a/paddle/fluid/operators/compat/fused_elementwise_add.pbtxt +++ b/paddle/fluid/operators/compat/fused_elementwise_add.pbtxt @@ -16,35 +16,31 @@ def { } extra { attrs { - name: "x_data_format" - type: STRING - } - attrs { - name: "y_data_format" + name: "fuse_activation" type: STRING } attrs { - name: "alpha" + name: "fuse_alpha" type: FLOAT } attrs { - name: "beta" + name: "fuse_beta" type: FLOAT - } + } attrs { - name: "Scale_x" + name: "fused_output_scale" type: FLOAT } attrs { - name: "Scale_y" + name: "scale_x" type: FLOAT } attrs { - name: "Scale_out" + name: "scale_y" type: FLOAT } attrs { - name: "fuse_activation" - type: STRING + name: "scale_out" + type: FLOAT } } diff --git a/paddle/fluid/operators/compat/fused_elementwise_div.pbtxt b/paddle/fluid/operators/compat/fused_elementwise_div.pbtxt index 0690a2983e9688..1eb6230dfc36d0 100644 --- a/paddle/fluid/operators/compat/fused_elementwise_div.pbtxt +++ b/paddle/fluid/operators/compat/fused_elementwise_div.pbtxt @@ -16,35 +16,7 @@ def { } extra { attrs { - name: "x_data_format" - type: STRING - } - attrs { - name: "y_data_format" - type: STRING - } - attrs { - name: "alpha" - type: FLOAT - } - attrs { - name: "beta" - type: FLOAT - } - attrs { - name: "Scale_x" + name: "fused_output_scale" type: FLOAT } - attrs { - name: "Scale_y" - type: FLOAT - } - attrs { - name: "Scale_out" - type: FLOAT - } - attrs { - name: "fuse_activation" - type: STRING - } } diff --git a/paddle/fluid/operators/compat/fused_elementwise_mul.pbtxt b/paddle/fluid/operators/compat/fused_elementwise_mul.pbtxt index 5a1a27d38fc0fd..14b099ab9f8fcb 100644 --- a/paddle/fluid/operators/compat/fused_elementwise_mul.pbtxt +++ b/paddle/fluid/operators/compat/fused_elementwise_mul.pbtxt @@ -16,35 +16,31 @@ def { } extra { attrs { - name: "x_data_format" - type: STRING - } - attrs { - name: "y_data_format" + name: "fuse_activation" type: STRING } attrs { - name: "alpha" + name: "fuse_alpha" type: FLOAT } attrs { - name: "beta" + name: "fuse_beta" type: FLOAT - } + } attrs { - name: "Scale_x" + name: "fused_output_scale" type: FLOAT } attrs { - name: "Scale_y" + name: "scale_x" type: FLOAT } attrs { - name: "Scale_out" + name: "scale_y" type: FLOAT } attrs { - name: "fuse_activation" - type: STRING + name: "scale_out" + type: FLOAT } } diff --git a/paddle/fluid/operators/compat/fused_elementwise_sub.pbtxt b/paddle/fluid/operators/compat/fused_elementwise_sub.pbtxt index c12b3ca5f3a41b..1f59922cff1556 100644 --- a/paddle/fluid/operators/compat/fused_elementwise_sub.pbtxt +++ b/paddle/fluid/operators/compat/fused_elementwise_sub.pbtxt @@ -16,36 +16,31 @@ def { } extra { attrs { - name: "x_data_format" - type: STRING - } - attrs { - name: "y_data_format" + name: "fuse_activation" type: STRING } attrs { - name: "alpha" + name: "fuse_alpha" type: FLOAT } attrs { - name: "beta" + name: "fuse_beta" type: FLOAT - } + } attrs { - name: "Scale_x" + name: "fused_output_scale" type: FLOAT } attrs { - name: "Scale_y" + name: "scale_x" type: FLOAT } attrs { - name: "Scale_out" + name: "scale_y" type: FLOAT } attrs { - name: "fuse_activation" - type: STRING - } + name: "scale_out" + type: FLOAT } } diff --git a/paddle/fluid/operators/feed_forward_test.cu b/paddle/fluid/operators/feed_forward_test.cu index 6c4bc4d562cd78..b45c72cd2333bd 100644 --- a/paddle/fluid/operators/feed_forward_test.cu +++ b/paddle/fluid/operators/feed_forward_test.cu @@ -245,21 +245,13 @@ void GetElementwiseAddOpGrad(const std::vector &dout_vec, dout_ptr, dout_vec.data(), size_z * sizeof(T), cudaMemcpyHostToDevice); int axis = -1; - bool use_mkldnn = false, use_quantizer = false; + bool use_mkldnn = false; std::string mkldnn_data_type = "float32"; - std::string x_data_format = "", y_data_format = ""; - float Scale_x = 1.0, Scale_y = 1.0, Scale_out = 1.0; framework::AttributeMap attrs; attrs.insert({"axis", axis}); attrs.insert({"use_mkldnn", use_mkldnn}); - attrs.insert({"x_data_format", x_data_format}); - attrs.insert({"y_data_format", y_data_format}); - attrs.insert({"use_quantizer", use_quantizer}); attrs.insert({"mkldnn_data_type", mkldnn_data_type}); - attrs.insert({"Scale_x", Scale_x}); - attrs.insert({"Scale_y", Scale_y}); - attrs.insert({"Scale_out", Scale_out}); auto op = framework::OpRegistry::CreateOp( "elementwise_add_grad", diff --git a/paddle/fluid/operators/ops_extra_info.h b/paddle/fluid/operators/ops_extra_info.h index 0f7f6d8b21c238..ca481045c06569 100644 --- a/paddle/fluid/operators/ops_extra_info.h +++ b/paddle/fluid/operators/ops_extra_info.h @@ -112,8 +112,6 @@ const std::unordered_map {"Scale_y", ExtraAttrProperty::ONEDNN}, {"Scale_out", ExtraAttrProperty::ONEDNN}, {"Scale_weights", ExtraAttrProperty::ONEDNN}, - {"x_data_format", ExtraAttrProperty::ONEDNN}, - {"y_data_format", ExtraAttrProperty::ONEDNN}, {"fused_squeeze2_axes", ExtraAttrProperty::ONEDNN}, {"fused_unsqueeze2_axes", ExtraAttrProperty::ONEDNN}, {"fused_reshape2_shape", ExtraAttrProperty::ONEDNN}, diff --git a/paddle/phi/api/yaml/op_compat.yaml b/paddle/phi/api/yaml/op_compat.yaml index 13131bd345bd1a..0ce2ac2c090baf 100644 --- a/paddle/phi/api/yaml/op_compat.yaml +++ b/paddle/phi/api/yaml/op_compat.yaml @@ -41,8 +41,7 @@ - op : add (elementwise_add) backward : add_grad (elementwise_add_grad) extra : - attrs : [bool use_mkldnn = false, str x_data_format = "", str y_data_format = "", str mkldnn_data_type = "float32", - bool use_quantizer = false, float Scale_x = 1.0f, float Scale_y = 1.0f, float Scale_out = 1.0f] + attrs : [bool use_mkldnn = false, str mkldnn_data_type = "float32"] - op : addmm backward : addmm_grad @@ -452,8 +451,7 @@ outputs : out: Out extra : - attrs : [bool use_mkldnn = false, str x_data_format = "", str y_data_format = "", str mkldnn_data_type = "float32", - bool use_quantizer = false, float Scale_x = 1.0f, float Scale_y = 1.0f, float Scale_out = 1.0f] + attrs : [bool use_mkldnn = false, str mkldnn_data_type = "float32"] - op : dot inputs : @@ -505,8 +503,7 @@ - op : elementwise_pow backward : elementwise_pow_grad extra : - attrs : [bool use_mkldnn = false, str x_data_format = "", str y_data_format = "", str mkldnn_data_type = "float32", - bool use_quantizer = false, float Scale_x = 1.0f, float Scale_y = 1.0f, float Scale_out = 1.0f] + attrs : [bool use_mkldnn = false, str mkldnn_data_type = "float32"] - op : elu backward : elu_grad, elu_double_grad (elu_grad_grad) @@ -667,20 +664,17 @@ - op : floor_divide (elementwise_floordiv) extra : - attrs : [bool use_mkldnn = false, str x_data_format = "", str y_data_format = "", str mkldnn_data_type = "float32", - bool use_quantizer = false, float Scale_x = 1.0f, float Scale_y = 1.0f, float Scale_out = 1.0f] + attrs : [bool use_mkldnn = false, str mkldnn_data_type = "float32"] - op : fmax (elementwise_fmax) backward : fmax_grad (elementwise_fmax_grad) extra : - attrs : [bool use_mkldnn = false, str x_data_format = "", str y_data_format = "", str mkldnn_data_type = "float32", - bool use_quantizer = false, float Scale_x = 1.0f, float Scale_y = 1.0f, float Scale_out = 1.0f] + attrs : [bool use_mkldnn = false, str mkldnn_data_type = "float32"] - op : fmin (elementwise_fmin) backward : fmin_grad (elementwise_fmin_grad) extra : - attrs : [bool use_mkldnn = false, str x_data_format = "", str y_data_format = "", str mkldnn_data_type = "float32", - bool use_quantizer = false, float Scale_x = 1.0f, float Scale_y = 1.0f, float Scale_out = 1.0f] + attrs : [bool use_mkldnn = false, str mkldnn_data_type = "float32"] - op : fold inputs : @@ -746,8 +740,7 @@ - op : grad_add extra : - attrs : [bool use_mkldnn = false, str x_data_format = "", str y_data_format = "", str mkldnn_data_type = "float32", - bool use_quantizer = false, float Scale_x = 1.0f, float Scale_y = 1.0f, float Scale_out = 1.0f] + attrs : [bool use_mkldnn = false, str mkldnn_data_type = "float32"] - op : greater_equal inputs : @@ -824,8 +817,7 @@ - op : heaviside (elementwise_heaviside) backward : heaviside_grad (elementwise_heaviside_grad) extra : - attrs : [bool use_mkldnn = false, str x_data_format = "", str y_data_format = "", str mkldnn_data_type = "float32", - bool use_quantizer = false, float Scale_x = 1.0f, float Scale_y = 1.0f, float Scale_out = 1.0f] + attrs : [bool use_mkldnn = false, str mkldnn_data_type = "float32"] - op : histogram inputs : @@ -1109,14 +1101,12 @@ - op : maximum (elementwise_max) backward : maximum_grad (elementwise_max_grad) extra : - attrs : [bool use_mkldnn = false, str x_data_format = "", str y_data_format = "", str mkldnn_data_type = "float32", - bool use_quantizer = false, float Scale_x = 1.0f, float Scale_y = 1.0f, float Scale_out = 1.0f] + attrs : [bool use_mkldnn = false, str mkldnn_data_type = "float32"] - op : maximum (elementwise_min) backward : maximum_grad (elementwise_min_grad) extra : - attrs : [bool use_mkldnn = false, str x_data_format = "", str y_data_format = "", str mkldnn_data_type = "float32", - bool use_quantizer = false, float Scale_x = 1.0f, float Scale_y = 1.0f, float Scale_out = 1.0f] + attrs : [bool use_mkldnn = false, str mkldnn_data_type = "float32"] - op : maxout inputs : @@ -1188,8 +1178,7 @@ outputs : out : Out extra : - attrs : [bool use_mkldnn = false, str x_data_format = "", str y_data_format = "", str mkldnn_data_type = "float32", - bool use_quantizer = false, float Scale_x = 1.0f, float Scale_y = 1.0f, float Scale_out = 1.0f] + attrs : [bool use_mkldnn = false, str mkldnn_data_type = "float32"] - op : mv inputs : @@ -1386,8 +1375,7 @@ - op : remainder (elementwise_mod) extra : - attrs : [bool use_mkldnn = false, str x_data_format = "", str y_data_format = "", str mkldnn_data_type = "float32", - bool use_quantizer = false, float Scale_x = 1.0f, float Scale_y = 1.0f, float Scale_out = 1.0f] + attrs : [bool use_mkldnn = false, str mkldnn_data_type = "float32"] - op : renorm backward : renorm_grad @@ -1684,8 +1672,7 @@ outputs : out : Out extra : - attrs : [bool use_mkldnn = false, str x_data_format = "", str y_data_format = "", str mkldnn_data_type = "float32", - bool use_quantizer = false, float Scale_x = 1.0f, float Scale_y = 1.0f, float Scale_out = 1.0f] + attrs : [bool use_mkldnn = false, str mkldnn_data_type = "float32"] - op : sum (reduce_sum) backward : (sum_grad) reduce_sum_grad From 462f006961dedde51e4d76558720cc11a462c87d Mon Sep 17 00:00:00 2001 From: Silv3S Date: Thu, 9 Mar 2023 14:36:50 +0100 Subject: [PATCH 04/23] adjust quantization attributes --- .../fluid/framework/ir/mkldnn/cpu_quantize_pass.cc | 12 ++++++------ .../ir/mkldnn/cpu_quantize_placement_pass.cc | 3 +++ paddle/fluid/framework/ir/mkldnn/mkldnn_pass_util.h | 2 +- .../operators/compat/fused_elementwise_mul.pbtxt | 4 ++++ 4 files changed, 14 insertions(+), 7 deletions(-) diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.cc b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.cc index b25b17125363f4..310ba0b81f49e8 100644 --- a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.cc @@ -1045,14 +1045,14 @@ void CPUQuantizePass::QuantizeElementwise( "X", input_x_scale, is_x_unsigned, - "Scale_x"); + "scale_x"); QuantizeInput(g, elementwise_op, elementwise_y, "Y", input_y_scale, is_y_unsigned, - "Scale_y"); + "scale_y"); bool is_output_unsigned{false}; auto output_scale = @@ -1064,7 +1064,7 @@ void CPUQuantizePass::QuantizeElementwise( "Out", output_scale, is_output_unsigned, - "Scale_out"); + "scale_out"); ++quantize_elementwise_count; }; @@ -1310,9 +1310,9 @@ void CPUQuantizePass::ApplyImpl(ir::Graph* graph) const { QuantizeImmutable(graph, "nearest_interp", "X"); QuantizeImmutable(graph, "nearest_interp_v2", "X"); QuantizeImmutable(graph, "split", "X"); - QuantizeElementwise(graph, "elementwise_add"); - QuantizeElementwise(graph, "elementwise_mul"); - QuantizeElementwise(graph, "elementwise_sub"); + QuantizeElementwise(graph, "fused_elementwise_add"); + QuantizeElementwise(graph, "fused_elementwise_mul"); + QuantizeElementwise(graph, "fused_elementwise_sub"); QuantizeFusionGru(graph); QuantizeMultiGru(graph); QuantizeFusionLSTM(graph); diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_placement_pass.cc b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_placement_pass.cc index 2f6b5e4c14ecc8..c8f40e900e757a 100644 --- a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_placement_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_placement_pass.cc @@ -32,6 +32,9 @@ void CPUQuantizePlacementPass::ApplyImpl(ir::Graph* graph) const { "fused_conv2d", "fused_conv3d", "fused_matmul", + "fused_elementwise_add", + "fused_elementwise_mul", + "fused_elementwise_sub", "elementwise_add", "elementwise_mul", "elementwise_sub", diff --git a/paddle/fluid/framework/ir/mkldnn/mkldnn_pass_util.h b/paddle/fluid/framework/ir/mkldnn/mkldnn_pass_util.h index f946948428a4a2..b008b4cccc39a7 100644 --- a/paddle/fluid/framework/ir/mkldnn/mkldnn_pass_util.h +++ b/paddle/fluid/framework/ir/mkldnn/mkldnn_pass_util.h @@ -164,7 +164,7 @@ inline void ConvertToFusedOp(OpDesc* op) { {"elementwise_add", "fused_elementwise_add"}, {"elementwise_sub", "fused_elementwise_sub"}, {"elementwise_mul", "fused_elementwise_mul"}, - {"elementwise_mul", "fused_elementwise_div"}}; + {"elementwise_div", "fused_elementwise_div"}}; if (op->Type() == "matmul") { op->SetAttr("trans_x", op->GetAttr("transpose_X")); diff --git a/paddle/fluid/operators/compat/fused_elementwise_mul.pbtxt b/paddle/fluid/operators/compat/fused_elementwise_mul.pbtxt index 14b099ab9f8fcb..9c46e8b7c7e643 100644 --- a/paddle/fluid/operators/compat/fused_elementwise_mul.pbtxt +++ b/paddle/fluid/operators/compat/fused_elementwise_mul.pbtxt @@ -31,6 +31,10 @@ extra { name: "fused_output_scale" type: FLOAT } + attrs { + name: "fused_unsqueeze2_axes" + type: INTS + } attrs { name: "scale_x" type: FLOAT From b72bd52dff574bdf2a0485fcd12298e3b382cba1 Mon Sep 17 00:00:00 2001 From: JakubHulek Date: Thu, 9 Mar 2023 14:57:48 +0100 Subject: [PATCH 05/23] add missing arguments and fix others, review fixed --- paddle/fluid/framework/ir/CMakeLists.txt | 2 +- ...cc => elementwise_act_onednn_fuse_pass.cc} | 6 +- ...s.h => elementwise_act_onednn_fuse_pass.h} | 0 .../inference/api/paddle_pass_builder.cc | 2 +- .../operators/compat/elementwise_add.pbtxt | 14 +- .../operators/compat/elementwise_div.pbtxt | 14 +- .../operators/compat/elementwise_mul.pbtxt | 14 +- .../operators/compat/elementwise_sub.pbtxt | 14 +- .../compat/fused_elementwise_add.pbtxt | 28 ++-- .../compat/fused_elementwise_div.pbtxt | 28 ++-- .../compat/fused_elementwise_mul.pbtxt | 28 ++-- .../compat/fused_elementwise_sub.pbtxt | 29 ++-- .../elementwise/elementwise_sub_op.cc | 4 +- .../operators/fused/fused_elementwise_op.h | 27 ++-- .../fusion/onednn/fused_elementwise_kernel.cc | 12 +- .../phi/ops/compat/fused_elementwise_sig.cc | 90 ++++++----- ... test_onednn_elementwise_act_fuse_pass.py} | 152 +++++++++--------- ...nn_elementwise_add_activation_fuse_pass.py | 4 +- 18 files changed, 221 insertions(+), 247 deletions(-) rename paddle/fluid/framework/ir/mkldnn/{elt_act_onednn_fuse_pass.cc => elementwise_act_onednn_fuse_pass.cc} (95%) rename paddle/fluid/framework/ir/mkldnn/{elt_act_onednn_fuse_pass.h => elementwise_act_onednn_fuse_pass.h} (100%) rename python/paddle/fluid/tests/unittests/ir/inference/{test_mkldnn_elt_act_fuse_pass.py => test_onednn_elementwise_act_fuse_pass.py} (64%) diff --git a/paddle/fluid/framework/ir/CMakeLists.txt b/paddle/fluid/framework/ir/CMakeLists.txt index 13266b12a3b9ff..bbc7a861264a35 100755 --- a/paddle/fluid/framework/ir/CMakeLists.txt +++ b/paddle/fluid/framework/ir/CMakeLists.txt @@ -175,7 +175,7 @@ if(WITH_MKLDNN) pass_library(softplus_activation_mkldnn_fuse_pass inference DIR mkldnn) pass_library(shuffle_channel_mkldnn_detect_pass inference DIR mkldnn) pass_library(fc_act_mkldnn_fuse_pass inference DIR mkldnn) - pass_library(elt_act_onednn_fuse_pass inference DIR mkldnn) + pass_library(elementwise_act_onednn_fuse_pass inference DIR mkldnn) pass_library(matmul_elementwise_add_mkldnn_fuse_pass inference DIR mkldnn) pass_library(matmul_activation_mkldnn_fuse_pass inference DIR mkldnn) pass_library(layer_norm_onednn_optimization_pass inference DIR mkldnn) diff --git a/paddle/fluid/framework/ir/mkldnn/elt_act_onednn_fuse_pass.cc b/paddle/fluid/framework/ir/mkldnn/elementwise_act_onednn_fuse_pass.cc similarity index 95% rename from paddle/fluid/framework/ir/mkldnn/elt_act_onednn_fuse_pass.cc rename to paddle/fluid/framework/ir/mkldnn/elementwise_act_onednn_fuse_pass.cc index 9999ee3e09a3d0..b6e84145aebffd 100644 --- a/paddle/fluid/framework/ir/mkldnn/elt_act_onednn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/elementwise_act_onednn_fuse_pass.cc @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/framework/ir/mkldnn/elt_act_onednn_fuse_pass.h" +#include "paddle/fluid/framework/ir/mkldnn/elementwise_act_onednn_fuse_pass.h" #include "paddle/fluid/framework/ir/graph_pattern_detector.h" #include "paddle/fluid/framework/ir/mkldnn/activation_onednn_fuse_pass.h" @@ -85,9 +85,9 @@ void ElementwiseActivationOneDNNPass::FuseElementwiseAct( } // namespace framework } // namespace paddle -REGISTER_PASS(elt_act_onednn_fuse_pass, +REGISTER_PASS(elementwise_act_onednn_fuse_pass, paddle::framework::ir::ElementwiseActivationOneDNNPass); -REGISTER_PASS_CAPABILITY(elt_act_onednn_fuse_pass) +REGISTER_PASS_CAPABILITY(elementwise_act_onednn_fuse_pass) .AddCombination( paddle::framework::compatible::OpVersionComparatorCombination() .LE("elementwise_add", 1) diff --git a/paddle/fluid/framework/ir/mkldnn/elt_act_onednn_fuse_pass.h b/paddle/fluid/framework/ir/mkldnn/elementwise_act_onednn_fuse_pass.h similarity index 100% rename from paddle/fluid/framework/ir/mkldnn/elt_act_onednn_fuse_pass.h rename to paddle/fluid/framework/ir/mkldnn/elementwise_act_onednn_fuse_pass.h diff --git a/paddle/fluid/inference/api/paddle_pass_builder.cc b/paddle/fluid/inference/api/paddle_pass_builder.cc index 562c3502ce6d32..79c3a4503af516 100644 --- a/paddle/fluid/inference/api/paddle_pass_builder.cc +++ b/paddle/fluid/inference/api/paddle_pass_builder.cc @@ -377,7 +377,7 @@ void CpuPassStrategy::EnableMKLDNN() { "batch_norm_act_fuse_pass", // "softplus_activation_mkldnn_fuse_pass", // "shuffle_channel_mkldnn_detect_pass", // - "elt_act_onednn_fuse_pass", // + "elementwise_act_onednn_fuse_pass", // "layer_norm_onednn_optimization_pass", // "operator_scale_onednn_fuse_pass", // "operator_unsqueeze2_onednn_fuse_pass", // diff --git a/paddle/fluid/operators/compat/elementwise_add.pbtxt b/paddle/fluid/operators/compat/elementwise_add.pbtxt index 5b55f3981c77da..5691bcc8fa9e1c 100644 --- a/paddle/fluid/operators/compat/elementwise_add.pbtxt +++ b/paddle/fluid/operators/compat/elementwise_add.pbtxt @@ -22,25 +22,17 @@ extra { attrs { name: "Out0_threshold" type: FLOAT - } - attrs { - name: "x_data_format" - type: STRING - } - attrs { - name: "y_data_format" - type: STRING } attrs { - name: "Scale_x" + name: "scale_x" type: FLOAT } attrs { - name: "Scale_y" + name: "scale_y" type: FLOAT } attrs { - name: "Scale_out" + name: "scale_out" type: FLOAT } } diff --git a/paddle/fluid/operators/compat/elementwise_div.pbtxt b/paddle/fluid/operators/compat/elementwise_div.pbtxt index a73d2072029a3a..48bc2adcc26377 100644 --- a/paddle/fluid/operators/compat/elementwise_div.pbtxt +++ b/paddle/fluid/operators/compat/elementwise_div.pbtxt @@ -16,23 +16,15 @@ def { } extra { attrs { - name: "x_data_format" - type: STRING - } - attrs { - name: "y_data_format" - type: STRING - } - attrs { - name: "Scale_x" + name: "scale_x" type: FLOAT } attrs { - name: "Scale_y" + name: "scale_y" type: FLOAT } attrs { - name: "Scale_out" + name: "scale_out" type: FLOAT } attrs { diff --git a/paddle/fluid/operators/compat/elementwise_mul.pbtxt b/paddle/fluid/operators/compat/elementwise_mul.pbtxt index 22289e2689c10a..a24491430b2aa1 100644 --- a/paddle/fluid/operators/compat/elementwise_mul.pbtxt +++ b/paddle/fluid/operators/compat/elementwise_mul.pbtxt @@ -16,23 +16,15 @@ def { } extra { attrs { - name: "x_data_format" - type: STRING - } - attrs { - name: "y_data_format" - type: STRING - } - attrs { - name: "Scale_x" + name: "scale_x" type: FLOAT } attrs { - name: "Scale_y" + name: "scale_y" type: FLOAT } attrs { - name: "Scale_out" + name: "scale_out" type: FLOAT } } diff --git a/paddle/fluid/operators/compat/elementwise_sub.pbtxt b/paddle/fluid/operators/compat/elementwise_sub.pbtxt index 9f38601f585ea8..a7aca8ca86973a 100644 --- a/paddle/fluid/operators/compat/elementwise_sub.pbtxt +++ b/paddle/fluid/operators/compat/elementwise_sub.pbtxt @@ -16,23 +16,15 @@ def { } extra { attrs { - name: "x_data_format" - type: STRING - } - attrs { - name: "y_data_format" - type: STRING - } - attrs { - name: "Scale_x" + name: "scale_x" type: FLOAT } attrs { - name: "Scale_y" + name: "scale_y" type: FLOAT } attrs { - name: "Scale_out" + name: "scale_out" type: FLOAT } attrs { diff --git a/paddle/fluid/operators/compat/fused_elementwise_add.pbtxt b/paddle/fluid/operators/compat/fused_elementwise_add.pbtxt index 3514e06839bdc1..375970171dd57d 100644 --- a/paddle/fluid/operators/compat/fused_elementwise_add.pbtxt +++ b/paddle/fluid/operators/compat/fused_elementwise_add.pbtxt @@ -16,35 +16,31 @@ def { } extra { attrs { - name: "x_data_format" - type: STRING - } - attrs { - name: "y_data_format" - type: STRING + name: "scale_x" + type: FLOAT } attrs { - name: "alpha" + name: "scale_y" type: FLOAT } attrs { - name: "beta" + name: "scale_out" type: FLOAT - } + } attrs { - name: "Scale_x" - type: FLOAT + name: "fuse_activation" + type: STRING } attrs { - name: "Scale_y" + name: "fuse_alpha" type: FLOAT } attrs { - name: "Scale_out" + name: "fuse_beta" type: FLOAT - } + } attrs { - name: "fuse_activation" - type: STRING + name: "fused_output_scale" + type: FLOAT } } diff --git a/paddle/fluid/operators/compat/fused_elementwise_div.pbtxt b/paddle/fluid/operators/compat/fused_elementwise_div.pbtxt index 0690a2983e9688..9aefd0c58ca582 100644 --- a/paddle/fluid/operators/compat/fused_elementwise_div.pbtxt +++ b/paddle/fluid/operators/compat/fused_elementwise_div.pbtxt @@ -16,35 +16,31 @@ def { } extra { attrs { - name: "x_data_format" - type: STRING - } - attrs { - name: "y_data_format" - type: STRING + name: "scale_x" + type: FLOAT } attrs { - name: "alpha" + name: "scale_y" type: FLOAT } attrs { - name: "beta" + name: "scale_out" type: FLOAT - } + } attrs { - name: "Scale_x" - type: FLOAT + name: "fuse_activation" + type: STRING } attrs { - name: "Scale_y" + name: "fuse_alpha" type: FLOAT } attrs { - name: "Scale_out" + name: "fuse_beta" type: FLOAT - } + } attrs { - name: "fuse_activation" - type: STRING + name: "fused_output_scale" + type: FLOAT } } diff --git a/paddle/fluid/operators/compat/fused_elementwise_mul.pbtxt b/paddle/fluid/operators/compat/fused_elementwise_mul.pbtxt index 5a1a27d38fc0fd..44d3cff55bd86e 100644 --- a/paddle/fluid/operators/compat/fused_elementwise_mul.pbtxt +++ b/paddle/fluid/operators/compat/fused_elementwise_mul.pbtxt @@ -16,35 +16,35 @@ def { } extra { attrs { - name: "x_data_format" - type: STRING + name: "scale_x" + type: FLOAT } attrs { - name: "y_data_format" - type: STRING + name: "scale_y" + type: FLOAT } attrs { - name: "alpha" + name: "scale_out" type: FLOAT } attrs { - name: "beta" - type: FLOAT - } + name: "fuse_activation" + type: STRING + } attrs { - name: "Scale_x" + name: "fuse_alpha" type: FLOAT } attrs { - name: "Scale_y" + name: "fuse_beta" type: FLOAT - } + } attrs { - name: "Scale_out" + name: "fused_output_scale" type: FLOAT } attrs { - name: "fuse_activation" - type: STRING + name: "fused_unsqueeze2_axes" + type: INTS } } diff --git a/paddle/fluid/operators/compat/fused_elementwise_sub.pbtxt b/paddle/fluid/operators/compat/fused_elementwise_sub.pbtxt index c12b3ca5f3a41b..db540aa9fb1cb7 100644 --- a/paddle/fluid/operators/compat/fused_elementwise_sub.pbtxt +++ b/paddle/fluid/operators/compat/fused_elementwise_sub.pbtxt @@ -16,36 +16,31 @@ def { } extra { attrs { - name: "x_data_format" - type: STRING - } - attrs { - name: "y_data_format" - type: STRING + name: "scale_x" + type: FLOAT } attrs { - name: "alpha" + name: "scale_y" type: FLOAT } attrs { - name: "beta" + name: "scale_out" type: FLOAT - } + } attrs { - name: "Scale_x" - type: FLOAT + name: "fuse_activation" + type: STRING } attrs { - name: "Scale_y" + name: "fuse_alpha" type: FLOAT } attrs { - name: "Scale_out" + name: "fuse_beta" type: FLOAT - } + } attrs { - name: "fuse_activation" - type: STRING - } + name: "fused_output_scale" + type: FLOAT } } diff --git a/paddle/fluid/operators/elementwise/elementwise_sub_op.cc b/paddle/fluid/operators/elementwise/elementwise_sub_op.cc index 6088ac3d01945c..73eab8b878755f 100644 --- a/paddle/fluid/operators/elementwise/elementwise_sub_op.cc +++ b/paddle/fluid/operators/elementwise/elementwise_sub_op.cc @@ -132,9 +132,9 @@ REGISTER_OPERATOR(elementwise_sub_grad_grad, REGISTER_OP_VERSION(elementwise_sub) .AddCheckpoint( - R"ROC(Register elementwise_sub for adding the attribute of Scale_y)ROC", + R"ROC(Register elementwise_sub for adding the attribute of scale_y)ROC", paddle::framework::compatible::OpVersionDesc().NewAttr( - "Scale_y", + "scale_y", "In order to support the function of scaling the input Y when " "using the operator of elementwise_sub.", 1.0f)); diff --git a/paddle/fluid/operators/fused/fused_elementwise_op.h b/paddle/fluid/operators/fused/fused_elementwise_op.h index ca1e368fd06625..45c7d89f2fb3aa 100644 --- a/paddle/fluid/operators/fused/fused_elementwise_op.h +++ b/paddle/fluid/operators/fused/fused_elementwise_op.h @@ -33,21 +33,30 @@ class FusedElementwiseOpMaker : public framework::OpProtoAndCheckerMaker { "for broadcasting Y onto X. ") .SetDefault(-1); - AddAttr("Alfa", "Alfa value for the elementwise operator") + AddAttr( + "fuse_activation", + "Activation type from elementwise_act_onednn_fuse_pass") + .SetDefault(""); + AddAttr("fuse_alpha", "Alfa value for the elementwise operator") .SetDefault(1.0f); - AddAttr("Beta", "Beta value for the elementwise operator") + AddAttr("fuse_beta", "Beta value for the elementwise operator") .SetDefault(1.0f); - AddAttr("fuse_activation", - "Activation type from elt_act_onednn_fuse_pass") - .SetDefault(""); - - AddAttr("Scale_x", "Obtained from cpu_quantize_pass") + AddAttr("scale_x", "Obtained from cpu_quantize_pass") .SetDefault(1.0f); - AddAttr("Scale_y", "Obtained from cpu_quantize_pass") + AddAttr("scale_y", "Obtained from cpu_quantize_pass") .SetDefault(1.0f); - AddAttr("Scale_out", "Obtained from cpu_quantize_pass") + AddAttr("scale_out", "Obtained from cpu_quantize_pass") + .SetDefault(1.0f); + + AddAttr("fused_output_scale", + "Obtained from operator_scale_onednn_fuse_pass") .SetDefault(1.0f); + AddAttr>( + "fused_unsqueeze2_axes", + "Obtained from operator_unsqueeze2_onednn_fuse_pass for " + "elementwise_mul") + .SetDefault({}); AddOpComment(); } diff --git a/paddle/phi/kernels/fusion/onednn/fused_elementwise_kernel.cc b/paddle/phi/kernels/fusion/onednn/fused_elementwise_kernel.cc index b10b3902b0bdba..1f1a6b7be76fc8 100644 --- a/paddle/phi/kernels/fusion/onednn/fused_elementwise_kernel.cc +++ b/paddle/phi/kernels/fusion/onednn/fused_elementwise_kernel.cc @@ -25,15 +25,15 @@ void FusedElementwiseKernel(const OneDNNContext& dev_ctx, DenseTensor* out) { const auto& onednn_engine = dev_ctx.GetEngine(); - float scale_x = dev_ctx.HasDnnAttr("Scale_x") - ? PADDLE_GET_CONST(float, dev_ctx.GetDnnAttr("Scale_x")) + float scale_x = dev_ctx.HasDnnAttr("scale_x") + ? PADDLE_GET_CONST(float, dev_ctx.GetDnnAttr("scale_x")) : 1.0f; - float scale_y = dev_ctx.HasDnnAttr("Scale_y") - ? PADDLE_GET_CONST(float, dev_ctx.GetDnnAttr("Scale_y")) + float scale_y = dev_ctx.HasDnnAttr("scale_y") + ? PADDLE_GET_CONST(float, dev_ctx.GetDnnAttr("scale_y")) : 1.0f; float scale_out = - dev_ctx.HasDnnAttr("Scale_out") - ? PADDLE_GET_CONST(float, dev_ctx.GetDnnAttr("Scale_out")) + dev_ctx.HasDnnAttr("scale_out") + ? PADDLE_GET_CONST(float, dev_ctx.GetDnnAttr("scale_out")) : 1.0f; dnnl::post_ops post_operations; diff --git a/paddle/phi/ops/compat/fused_elementwise_sig.cc b/paddle/phi/ops/compat/fused_elementwise_sig.cc index 4b1b9599ab32d7..f7ed8474ef44b3 100644 --- a/paddle/phi/ops/compat/fused_elementwise_sig.cc +++ b/paddle/phi/ops/compat/fused_elementwise_sig.cc @@ -22,25 +22,27 @@ KernelSignature FusedElementwiseAddOpArgumentMapping( if (axis == -1) { return KernelSignature("fused_add", {"X", "Y"}, - {"alpha", - "beta", + {"fuse_alpha", + "fuse_beta", "fuse_activation", "mkldnn_data_type", - "Scale_x", - "Scale_y", - "Scale_out"}, + "scale_x", + "scale_y", + "scale_out", + "fused_output_scale"}, {"Out"}); } return KernelSignature("fused_add_raw", {"X", "Y"}, {"axis", - "alpha", - "beta", + "fuse_alpha", + "fuse_beta", "fuse_activation", "mkldnn_data_type", - "Scale_x", - "Scale_y", - "Scale_out"}, + "scale_x", + "scale_y", + "scale_out", + "fused_output_scale"}, {"Out"}); } @@ -50,25 +52,27 @@ KernelSignature FusedElementwiseSubOpArgumentMapping( if (axis == -1) { return KernelSignature("fused_subtract", {"X", "Y"}, - {"alpha", - "beta", + {"fuse_alpha", + "fuse_beta", "fuse_activation", "mkldnn_data_type", - "Scale_x", - "Scale_y", - "Scale_out"}, + "scale_x", + "scale_y", + "scale_out", + "fused_output_scale"}, {"Out"}); } return KernelSignature("fused_subtract_raw", {"X", "Y"}, {"axis", - "alpha", - "beta", + "fuse_alpha", + "fuse_beta", "fuse_activation", "mkldnn_data_type", - "Scale_x", - "Scale_y", - "Scale_out"}, + "scale_x", + "scale_y", + "scale_out", + "fused_output_scale"}, {"Out"}); } @@ -78,25 +82,29 @@ KernelSignature FusedElementwiseMulOpArgumentMapping( if (axis == -1) { return KernelSignature("fused_multiply", {"X", "Y"}, - {"alpha", - "beta", + {"fuse_alpha", + "fuse_beta", "fuse_activation", "mkldnn_data_type", - "Scale_x", - "Scale_y", - "Scale_out"}, + "scale_x", + "scale_y", + "scale_out", + "fused_output_scale", + "fused_unsqueeze2_axes"}, {"Out"}); } return KernelSignature("fused_multiply_raw", {"X", "Y"}, {"axis", - "alpha", - "beta", + "fuse_alpha", + "fuse_beta", "fuse_activation", "mkldnn_data_type", - "Scale_x", - "Scale_y", - "Scale_out"}, + "scale_x", + "scale_y", + "scale_out", + "fused_output_scale", + "fused_unsqueeze2_axes"}, {"Out"}); } @@ -106,25 +114,27 @@ KernelSignature FusedElementwiseDivOpArgumentMapping( if (axis == -1) { return KernelSignature("fused_divide", {"X", "Y"}, - {"alpha", - "beta", + {"fuse_alpha", + "fuse_beta", "fuse_activation", "mkldnn_data_type", - "Scale_x", - "Scale_y", - "Scale_out"}, + "scale_x", + "scale_y", + "scale_out", + "fused_output_scale"}, {"Out"}); } return KernelSignature("fused_divide_raw", {"X", "Y"}, {"axis", - "alpha", - "beta", + "fuse_fuse_alpha", + "fuse_beta", "fuse_activation", "mkldnn_data_type", - "Scale_x", - "Scale_y", - "Scale_out"}, + "scale_x", + "scale_y", + "scale_out", + "fused_output_scale"}, {"Out"}); } diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_elt_act_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_onednn_elementwise_act_fuse_pass.py similarity index 64% rename from python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_elt_act_fuse_pass.py rename to python/paddle/fluid/tests/unittests/ir/inference/test_onednn_elementwise_act_fuse_pass.py index 3315534821dc02..0572a87b8a6531 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_elt_act_fuse_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_onednn_elementwise_act_fuse_pass.py @@ -23,10 +23,10 @@ from paddle.fluid.core import PassVersionChecker -class ElementwiseActivationMkldnnFusePassTest(InferencePassTest): +class ElementwiseActivationOneDNNFusePassTest(InferencePassTest): act_alpha = None act_beta = None - pass_name = 'elt_act_onednn_fuse_pass' + pass_name = 'elementwise_act_onednn_fuse_pass' def setUp(self): self.set_params() @@ -65,24 +65,24 @@ def test_pass_compatible(self): self.assertTrue(PassVersionChecker.IsCompatible(self.pass_name)) -class ElementwiseActivationMkldnnFusePassTest_Add_Relu( - ElementwiseActivationMkldnnFusePassTest +class ElementwiseActivationOneDNNFusePassTest_Add_Relu( + ElementwiseActivationOneDNNFusePassTest ): def set_params(self): self.operand = paddle.add self.act = F.relu -class ElementwiseActivationMkldnnFusePassTest_Add_Tanh( - ElementwiseActivationMkldnnFusePassTest +class ElementwiseActivationOneDNNFusePassTest_Add_Tanh( + ElementwiseActivationOneDNNFusePassTest ): def set_params(self): self.operand = paddle.add self.act = paddle.tanh -class ElementwiseActivationMkldnnFusePassTest_Add_LeakyRelu( - ElementwiseActivationMkldnnFusePassTest +class ElementwiseActivationOneDNNFusePassTest_Add_LeakyRelu( + ElementwiseActivationOneDNNFusePassTest ): def set_params(self): self.operand = paddle.add @@ -90,40 +90,40 @@ def set_params(self): self.act = paddle.nn.functional.leaky_relu -class ElementwiseActivationMkldnnFusePassTest_Add_Swish( - ElementwiseActivationMkldnnFusePassTest +class ElementwiseActivationOneDNNFusePassTest_Add_Swish( + ElementwiseActivationOneDNNFusePassTest ): def set_params(self): self.operand = paddle.add self.act = paddle.nn.functional.swish -class ElementwiseActivationMkldnnFusePassTest_Add_HardSwish( - ElementwiseActivationMkldnnFusePassTest +class ElementwiseActivationOneDNNFusePassTest_Add_HardSwish( + ElementwiseActivationOneDNNFusePassTest ): def set_params(self): self.operand = paddle.add self.act = paddle.nn.functional.hardswish -class ElementwiseActivationMkldnnFusePassTest_Add_SQRT( - ElementwiseActivationMkldnnFusePassTest +class ElementwiseActivationOneDNNFusePassTest_Add_SQRT( + ElementwiseActivationOneDNNFusePassTest ): def set_params(self): self.operand = paddle.add self.act = paddle.sqrt -class ElementwiseActivationMkldnnFusePassTest_Add_ABS( - ElementwiseActivationMkldnnFusePassTest +class ElementwiseActivationOneDNNFusePassTest_Add_ABS( + ElementwiseActivationOneDNNFusePassTest ): def set_params(self): self.operand = paddle.add self.act = paddle.abs -class ElementwiseActivationMkldnnFusePassTest_Add_Clip( - ElementwiseActivationMkldnnFusePassTest +class ElementwiseActivationOneDNNFusePassTest_Add_Clip( + ElementwiseActivationOneDNNFusePassTest ): def set_params(self): self.operand = paddle.add @@ -132,16 +132,16 @@ def set_params(self): self.act_beta = 10.0 -class ElementwiseActivationMkldnnFusePassTest_Add_Gelu( - ElementwiseActivationMkldnnFusePassTest +class ElementwiseActivationOneDNNFusePassTest_Add_Gelu( + ElementwiseActivationOneDNNFusePassTest ): def set_params(self): self.operand = paddle.add self.act = paddle.nn.functional.gelu -class ElementwiseActivationMkldnnFusePassTest_Add_Gelu_Tanh( - ElementwiseActivationMkldnnFusePassTest +class ElementwiseActivationOneDNNFusePassTest_Add_Gelu_Tanh( + ElementwiseActivationOneDNNFusePassTest ): def set_params(self): self.operand = paddle.add @@ -149,40 +149,40 @@ def set_params(self): self.act_alpha = True -class ElementwiseActivationMkldnnFusePassTest_Add_Relu6( - ElementwiseActivationMkldnnFusePassTest +class ElementwiseActivationOneDNNFusePassTest_Add_Relu6( + ElementwiseActivationOneDNNFusePassTest ): def set_params(self): self.operand = paddle.add self.act = paddle.nn.functional.relu6 -class ElementwiseActivationMkldnnFusePassTest_Add_Sigmoid( - ElementwiseActivationMkldnnFusePassTest +class ElementwiseActivationOneDNNFusePassTest_Add_Sigmoid( + ElementwiseActivationOneDNNFusePassTest ): def set_params(self): self.operand = paddle.add self.act = paddle.nn.functional.sigmoid -class ElementwiseActivationMkldnnFusePassTest_Sub_Relu( - ElementwiseActivationMkldnnFusePassTest +class ElementwiseActivationOneDNNFusePassTest_Sub_Relu( + ElementwiseActivationOneDNNFusePassTest ): def set_params(self): self.operand = paddle.subtract self.act = F.relu -class ElementwiseActivationMkldnnFusePassTest_Sub_Tanh( - ElementwiseActivationMkldnnFusePassTest +class ElementwiseActivationOneDNNFusePassTest_Sub_Tanh( + ElementwiseActivationOneDNNFusePassTest ): def set_params(self): self.operand = paddle.subtract self.act = paddle.tanh -class ElementwiseActivationMkldnnFusePassTest_Sub_LeakyRelu( - ElementwiseActivationMkldnnFusePassTest +class ElementwiseActivationOneDNNFusePassTest_Sub_LeakyRelu( + ElementwiseActivationOneDNNFusePassTest ): def set_params(self): self.operand = paddle.subtract @@ -190,32 +190,32 @@ def set_params(self): self.act = paddle.nn.functional.leaky_relu -class ElementwiseActivationMkldnnFusePassTest_Sub_Swish( - ElementwiseActivationMkldnnFusePassTest +class ElementwiseActivationOneDNNFusePassTest_Sub_Swish( + ElementwiseActivationOneDNNFusePassTest ): def set_params(self): self.operand = paddle.subtract self.act = paddle.nn.functional.swish -class ElementwiseActivationMkldnnFusePassTest_Sub_HardSwish( - ElementwiseActivationMkldnnFusePassTest +class ElementwiseActivationOneDNNFusePassTest_Sub_HardSwish( + ElementwiseActivationOneDNNFusePassTest ): def set_params(self): self.operand = paddle.subtract self.act = paddle.nn.functional.hardswish -class ElementwiseActivationMkldnnFusePassTest_Sub_ABS( - ElementwiseActivationMkldnnFusePassTest +class ElementwiseActivationOneDNNFusePassTest_Sub_ABS( + ElementwiseActivationOneDNNFusePassTest ): def set_params(self): self.operand = paddle.subtract self.act = paddle.abs -class ElementwiseActivationMkldnnFusePassTest_Sub_Clip( - ElementwiseActivationMkldnnFusePassTest +class ElementwiseActivationOneDNNFusePassTest_Sub_Clip( + ElementwiseActivationOneDNNFusePassTest ): def set_params(self): self.operand = paddle.subtract @@ -224,16 +224,16 @@ def set_params(self): self.act_beta = 10.0 -class ElementwiseActivationMkldnnFusePassTest_Sub_Gelu( - ElementwiseActivationMkldnnFusePassTest +class ElementwiseActivationOneDNNFusePassTest_Sub_Gelu( + ElementwiseActivationOneDNNFusePassTest ): def set_params(self): self.operand = paddle.subtract self.act = paddle.nn.functional.gelu -class ElementwiseActivationMkldnnFusePassTest_Sub_Gelu_Tanh( - ElementwiseActivationMkldnnFusePassTest +class ElementwiseActivationOneDNNFusePassTest_Sub_Gelu_Tanh( + ElementwiseActivationOneDNNFusePassTest ): def set_params(self): self.operand = paddle.subtract @@ -241,40 +241,40 @@ def set_params(self): self.act_alpha = True -class ElementwiseActivationMkldnnFusePassTest_Sub_Relu6( - ElementwiseActivationMkldnnFusePassTest +class ElementwiseActivationOneDNNFusePassTest_Sub_Relu6( + ElementwiseActivationOneDNNFusePassTest ): def set_params(self): self.operand = paddle.subtract self.act = paddle.nn.functional.relu6 -class ElementwiseActivationMkldnnFusePassTest_Sub_Sigmoid( - ElementwiseActivationMkldnnFusePassTest +class ElementwiseActivationOneDNNFusePassTest_Sub_Sigmoid( + ElementwiseActivationOneDNNFusePassTest ): def set_params(self): self.operand = paddle.subtract self.act = paddle.nn.functional.sigmoid -class ElementwiseActivationMkldnnFusePassTest_Mul_Relu( - ElementwiseActivationMkldnnFusePassTest +class ElementwiseActivationOneDNNFusePassTest_Mul_Relu( + ElementwiseActivationOneDNNFusePassTest ): def set_params(self): self.operand = paddle.multiply self.act = F.relu -class ElementwiseActivationMkldnnFusePassTest_Mul_Tanh( - ElementwiseActivationMkldnnFusePassTest +class ElementwiseActivationOneDNNFusePassTest_Mul_Tanh( + ElementwiseActivationOneDNNFusePassTest ): def set_params(self): self.operand = paddle.multiply self.act = paddle.tanh -class ElementwiseActivationMkldnnFusePassTest_Mul_LeakyRelu( - ElementwiseActivationMkldnnFusePassTest +class ElementwiseActivationOneDNNFusePassTest_Mul_LeakyRelu( + ElementwiseActivationOneDNNFusePassTest ): def set_params(self): self.operand = paddle.multiply @@ -282,40 +282,40 @@ def set_params(self): self.act = paddle.nn.functional.leaky_relu -class ElementwiseActivationMkldnnFusePassTest_Mul_Swish( - ElementwiseActivationMkldnnFusePassTest +class ElementwiseActivationOneDNNFusePassTest_Mul_Swish( + ElementwiseActivationOneDNNFusePassTest ): def set_params(self): self.operand = paddle.multiply self.act = paddle.nn.functional.swish -class ElementwiseActivationMkldnnFusePassTest_Mul_HardSwish( - ElementwiseActivationMkldnnFusePassTest +class ElementwiseActivationOneDNNFusePassTest_Mul_HardSwish( + ElementwiseActivationOneDNNFusePassTest ): def set_params(self): self.operand = paddle.multiply self.act = paddle.nn.functional.hardswish -class ElementwiseActivationMkldnnFusePassTest_Mul_SQRT( - ElementwiseActivationMkldnnFusePassTest +class ElementwiseActivationOneDNNFusePassTest_Mul_SQRT( + ElementwiseActivationOneDNNFusePassTest ): def set_params(self): self.operand = paddle.multiply self.act = paddle.sqrt -class ElementwiseActivationMkldnnFusePassTest_Mul_ABS( - ElementwiseActivationMkldnnFusePassTest +class ElementwiseActivationOneDNNFusePassTest_Mul_ABS( + ElementwiseActivationOneDNNFusePassTest ): def set_params(self): self.operand = paddle.multiply self.act = paddle.abs -class ElementwiseActivationMkldnnFusePassTest_Mul_Clip( - ElementwiseActivationMkldnnFusePassTest +class ElementwiseActivationOneDNNFusePassTest_Mul_Clip( + ElementwiseActivationOneDNNFusePassTest ): def set_params(self): self.operand = paddle.multiply @@ -324,16 +324,16 @@ def set_params(self): self.act_beta = 10.0 -class ElementwiseActivationMkldnnFusePassTest_Mul_Gelu( - ElementwiseActivationMkldnnFusePassTest +class ElementwiseActivationOneDNNFusePassTest_Mul_Gelu( + ElementwiseActivationOneDNNFusePassTest ): def set_params(self): self.operand = paddle.multiply self.act = paddle.nn.functional.gelu -class ElementwiseActivationMkldnnFusePassTest_Mul_Gelu_Tanh( - ElementwiseActivationMkldnnFusePassTest +class ElementwiseActivationOneDNNFusePassTest_Mul_Gelu_Tanh( + ElementwiseActivationOneDNNFusePassTest ): def set_params(self): self.operand = paddle.multiply @@ -341,16 +341,16 @@ def set_params(self): self.act_alpha = True -class ElementwiseActivationMkldnnFusePassTest_Mul_Relu6( - ElementwiseActivationMkldnnFusePassTest +class ElementwiseActivationOneDNNFusePassTest_Mul_Relu6( + ElementwiseActivationOneDNNFusePassTest ): def set_params(self): self.operand = paddle.multiply self.act = paddle.nn.functional.relu6 -class ElementwiseActivationMkldnnFusePassTest_Mul_Sigmoid( - ElementwiseActivationMkldnnFusePassTest +class ElementwiseActivationOneDNNFusePassTest_Mul_Sigmoid( + ElementwiseActivationOneDNNFusePassTest ): def set_params(self): self.operand = paddle.multiply @@ -358,7 +358,7 @@ def set_params(self): class ElementwiseScaleOneDNNFusePassTest_Add( - ElementwiseActivationMkldnnFusePassTest + ElementwiseActivationOneDNNFusePassTest ): def set_params(self): self.operand = paddle.add @@ -367,7 +367,7 @@ def set_params(self): class ElementwiseScaleOneDNNFusePassTest_Sub( - ElementwiseActivationMkldnnFusePassTest + ElementwiseActivationOneDNNFusePassTest ): def set_params(self): self.operand = paddle.subtract @@ -376,7 +376,7 @@ def set_params(self): class ElementwiseScaleOneDNNFusePassTest_Mul( - ElementwiseActivationMkldnnFusePassTest + ElementwiseActivationOneDNNFusePassTest ): def set_params(self): self.operand = paddle.multiply @@ -385,7 +385,7 @@ def set_params(self): class ElementwiseScaleOneDNNFusePassTest_Div( - ElementwiseActivationMkldnnFusePassTest + ElementwiseActivationOneDNNFusePassTest ): def set_params(self): self.operand = paddle.divide diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_onednn_elementwise_add_activation_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_onednn_elementwise_add_activation_fuse_pass.py index 92341facaaf217..e4a4809971739a 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_onednn_elementwise_add_activation_fuse_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_onednn_elementwise_add_activation_fuse_pass.py @@ -118,7 +118,7 @@ def sample_predictor_configs(self, program_config): config = self.create_inference_config( use_mkldnn=True, passes=[ - 'elt_act_onednn_fuse_pass', + 'elementwise_act_onednn_fuse_pass', 'operator_scale_onednn_fuse_pass', ], ) @@ -128,7 +128,7 @@ def test(self): self.run_and_statis( quant=False, passes=[ - 'elt_act_onednn_fuse_pass', + 'elementwise_act_onednn_fuse_pass', 'operator_scale_onednn_fuse_pass', ], ) From 190e7976bf5f32f2600eccec8c37e5fd39263634 Mon Sep 17 00:00:00 2001 From: Silv3S Date: Thu, 9 Mar 2023 16:58:58 +0100 Subject: [PATCH 06/23] simplify fused kernel registration --- .../elementwise_act_onednn_fuse_pass.cc | 54 +++---- .../operators/fused/fused_elementwise_op.h | 18 ++- .../fusion/onednn/fused_elementwise_kernel.cc | 136 ++++++++---------- .../phi/ops/compat/fused_elementwise_sig.cc | 105 +++----------- 4 files changed, 108 insertions(+), 205 deletions(-) diff --git a/paddle/fluid/framework/ir/mkldnn/elementwise_act_onednn_fuse_pass.cc b/paddle/fluid/framework/ir/mkldnn/elementwise_act_onednn_fuse_pass.cc index 8df33719676f18..b6e84145aebffd 100644 --- a/paddle/fluid/framework/ir/mkldnn/elementwise_act_onednn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/elementwise_act_onednn_fuse_pass.cc @@ -85,36 +85,24 @@ void ElementwiseActivationOneDNNPass::FuseElementwiseAct( } // namespace framework } // namespace paddle -< < < < < < < < - HEAD : paddle / fluid / framework / ir / mkldnn / - elementwise_activation_onednn_fuse_pass.cc REGISTER_PASS( - elementwise_activation_onednn_fuse_pass, - paddle::framework::ir::ElementwiseActivationOneDNNPass); -REGISTER_PASS_CAPABILITY(elementwise_activation_onednn_fuse_pass) -== == == == - REGISTER_PASS(elementwise_act_onednn_fuse_pass, - paddle::framework::ir::ElementwiseActivationOneDNNPass); -REGISTER_PASS_CAPABILITY(elementwise_act_onednn_fuse_pass) >>>>>>>> - jhulek / - Fused_Elementwise_Kernel_And_Op - : paddle - / - fluid / framework / ir / mkldnn / - elementwise_act_onednn_fuse_pass.cc.AddCombination( - paddle::framework::compatible::OpVersionComparatorCombination() - .LE("elementwise_add", 1) - .LE("elementwise_sub", 1) - .LE("elementwise_mul", 1) - .EQ("abs", 0) - .LE("clip", 1) - .EQ("gelu", 0) - .EQ("hard_sigmoid", 0) - .LE("hard_swish", 0) - .LE("leaky_relu", 1) - .LE("mish", 1) - .EQ("relu", 0) - .EQ("relu6", 0) - .EQ("sigmoid", 0) - .EQ("sqrt", 0) - .EQ("swish", 0) - .EQ("tanh", 0)); +REGISTER_PASS(elementwise_act_onednn_fuse_pass, + paddle::framework::ir::ElementwiseActivationOneDNNPass); +REGISTER_PASS_CAPABILITY(elementwise_act_onednn_fuse_pass) + .AddCombination( + paddle::framework::compatible::OpVersionComparatorCombination() + .LE("elementwise_add", 1) + .LE("elementwise_sub", 1) + .LE("elementwise_mul", 1) + .EQ("abs", 0) + .LE("clip", 1) + .EQ("gelu", 0) + .EQ("hard_sigmoid", 0) + .LE("hard_swish", 0) + .LE("leaky_relu", 1) + .LE("mish", 1) + .EQ("relu", 0) + .EQ("relu6", 0) + .EQ("sigmoid", 0) + .EQ("sqrt", 0) + .EQ("swish", 0) + .EQ("tanh", 0)); diff --git a/paddle/fluid/operators/fused/fused_elementwise_op.h b/paddle/fluid/operators/fused/fused_elementwise_op.h index 0a9010bd14fbd1..21bd80d1b7d1f4 100644 --- a/paddle/fluid/operators/fused/fused_elementwise_op.h +++ b/paddle/fluid/operators/fused/fused_elementwise_op.h @@ -37,17 +37,9 @@ class FusedElementwiseOpMaker : public framework::OpProtoAndCheckerMaker { "Activation type from elementwise_act_onednn_fuse_pass") .SetDefault(""); AddAttr("fuse_alpha", "Alfa value for the elementwise operator") - .SetDefault(1.0f); + .SetDefault(0.0f); AddAttr("fuse_beta", "Beta value for the elementwise operator") - .SetDefault(1.0f); - - AddAttr("scale_x", "Obtained from cpu_quantize_pass") - .SetDefault(1.0f); - AddAttr("scale_y", "Obtained from cpu_quantize_pass") - .SetDefault(1.0f); - AddAttr("scale_out", "Obtained from cpu_quantize_pass") - .SetDefault(1.0f); - + .SetDefault(0.0f); AddAttr("fused_output_scale", "Obtained from operator_scale_onednn_fuse_pass") .SetDefault(1.0f); @@ -56,6 +48,12 @@ class FusedElementwiseOpMaker : public framework::OpProtoAndCheckerMaker { "Obtained from operator_unsqueeze2_onednn_fuse_pass for " "elementwise_mul") .SetDefault({}); + AddAttr("scale_x", "Obtained from cpu_quantize_pass") + .SetDefault(1.0f); + AddAttr("scale_y", "Obtained from cpu_quantize_pass") + .SetDefault(1.0f); + AddAttr("scale_out", "Obtained from cpu_quantize_pass") + .SetDefault(1.0f); AddOpComment(); } diff --git a/paddle/phi/kernels/fusion/onednn/fused_elementwise_kernel.cc b/paddle/phi/kernels/fusion/onednn/fused_elementwise_kernel.cc index 1f1a6b7be76fc8..185b9797e7ec7f 100644 --- a/paddle/phi/kernels/fusion/onednn/fused_elementwise_kernel.cc +++ b/paddle/phi/kernels/fusion/onednn/fused_elementwise_kernel.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -21,28 +21,24 @@ template void FusedElementwiseKernel(const OneDNNContext& dev_ctx, const DenseTensor& x, const DenseTensor& y, - int axis, + const int axis, + const std::string& fuse_activation, + const float fuse_alpha, + const float fuse_beta, + const float fused_output_scale, + const std::vector& fused_unsqueeze2_axes, + const float scale_x, + const float scale_y, + const float scale_out, DenseTensor* out) { const auto& onednn_engine = dev_ctx.GetEngine(); - float scale_x = dev_ctx.HasDnnAttr("scale_x") - ? PADDLE_GET_CONST(float, dev_ctx.GetDnnAttr("scale_x")) - : 1.0f; - float scale_y = dev_ctx.HasDnnAttr("scale_y") - ? PADDLE_GET_CONST(float, dev_ctx.GetDnnAttr("scale_y")) - : 1.0f; - float scale_out = - dev_ctx.HasDnnAttr("scale_out") - ? PADDLE_GET_CONST(float, dev_ctx.GetDnnAttr("scale_out")) - : 1.0f; - dnnl::post_ops post_operations; - funcs::AppendActivation(dev_ctx, post_operations); - if (dev_ctx.HasDnnAttr("fused_output_scale")) { - float scale_alpha = - PADDLE_GET_CONST(float, dev_ctx.GetDnnAttr("fused_output_scale")); + funcs::AppendActivation( + dev_ctx, post_operations, 1.0f, fuse_activation, fuse_alpha, fuse_beta); + if (fused_output_scale != 1.0) { post_operations.append_eltwise( - 1.0, dnnl::algorithm::eltwise_linear, scale_alpha, 0.0f); + 1.0, dnnl::algorithm::eltwise_linear, fused_output_scale, 0.0f); } auto* non_const_x = &x; @@ -108,33 +104,51 @@ void FusedElementwiseKernel(const OneDNNContext& dev_ctx, binary_prim->execute(astream, args); astream.wait(); - if (handler.use_broadcasting_hack == false) { - funcs::SetOutMemDescWithLogicalLayoutFusesSupport( - dev_ctx, out, dst_memory->get_desc()); - } else { - auto dims = dst_memory->get_desc().dims(); + auto out_md = dst_memory->get_desc(); + + if (handler.use_broadcasting_hack) { + auto dims = out_md.dims(); dims.insert(dims.begin(), non_const_x->dims()[0]); dims[1] /= dims[0]; - funcs::SetOutMemDescWithLogicalLayoutFusesSupport( - dev_ctx, out, dst_memory->get_desc().reshape(dims)); + out_md = out_md.reshape(dims); + } + + if (fused_unsqueeze2_axes.empty()) { + out->set_mem_desc(out_md); + } else { + funcs::SetOutMemDescWithUnsqueeze2FuseSupport( + fused_unsqueeze2_axes, out, out_md); } } -#define DEFINE_ONEDNN_ELEMENTWISE_KERNEL(name, algorithm) \ - template \ - void name##RawKernel(const Context& dev_ctx, \ - const DenseTensor& x, \ - const DenseTensor& y, \ - int axis, \ - DenseTensor* out) { \ - FusedElementwiseKernel(dev_ctx, x, y, axis, out); \ - } \ - template \ - void name##Kernel(const Context& dev_ctx, \ - const DenseTensor& x, \ - const DenseTensor& y, \ - DenseTensor* out) { \ - FusedElementwiseKernel(dev_ctx, x, y, -1, out); \ +#define DEFINE_ONEDNN_ELEMENTWISE_KERNEL(name, algorithm) \ + template \ + void name##Kernel(const Context& dev_ctx, \ + const DenseTensor& x, \ + const DenseTensor& y, \ + const int axis, \ + const std::string& fuse_activation, \ + const float fuse_alpha, \ + const float fuse_beta, \ + const float fused_output_scale, \ + const std::vector& fused_unsqueeze2_axes, \ + const float scale_x, \ + const float scale_y, \ + const float scale_out, \ + DenseTensor* out) { \ + FusedElementwiseKernel(dev_ctx, \ + x, \ + y, \ + axis, \ + fuse_activation, \ + fuse_alpha, \ + fuse_beta, \ + fused_output_scale, \ + fused_unsqueeze2_axes, \ + scale_x, \ + scale_y, \ + scale_out, \ + out); \ } DEFINE_ONEDNN_ELEMENTWISE_KERNEL(FusedAdd, dnnl::algorithm::binary_add) @@ -144,16 +158,7 @@ DEFINE_ONEDNN_ELEMENTWISE_KERNEL(FusedDivide, dnnl::algorithm::binary_div) } // namespace phi -PD_REGISTER_KERNEL(fused_add_raw, - OneDNN, - ONEDNN, - phi::FusedAddRawKernel, - float, - phi::dtype::bfloat16, - int8_t, - uint8_t) {} - -PD_REGISTER_KERNEL(fused_add, +PD_REGISTER_KERNEL(fused_elementwise_add, OneDNN, ONEDNN, phi::FusedAddKernel, @@ -162,16 +167,7 @@ PD_REGISTER_KERNEL(fused_add, int8_t, uint8_t) {} -PD_REGISTER_KERNEL(fused_subtract_raw, - OneDNN, - ONEDNN, - phi::FusedSubtractRawKernel, - float, - phi::dtype::bfloat16, - int8_t, - uint8_t) {} - -PD_REGISTER_KERNEL(fused_subtract, +PD_REGISTER_KERNEL(fused_elementwise_sub, OneDNN, ONEDNN, phi::FusedSubtractKernel, @@ -180,16 +176,7 @@ PD_REGISTER_KERNEL(fused_subtract, int8_t, uint8_t) {} -PD_REGISTER_KERNEL(fused_multiply_raw, - OneDNN, - ONEDNN, - phi::FusedMultiplyRawKernel, - float, - phi::dtype::bfloat16, - int8_t, - uint8_t) {} - -PD_REGISTER_KERNEL(fused_multiply, +PD_REGISTER_KERNEL(fused_elementwise_mul, OneDNN, ONEDNN, phi::FusedMultiplyKernel, @@ -198,14 +185,7 @@ PD_REGISTER_KERNEL(fused_multiply, int8_t, uint8_t) {} -PD_REGISTER_KERNEL(fused_divide_raw, - OneDNN, - ONEDNN, - phi::FusedDivideRawKernel, - float, - phi::dtype::bfloat16) {} - -PD_REGISTER_KERNEL(fused_divide, +PD_REGISTER_KERNEL(fused_elementwise_div, OneDNN, ONEDNN, phi::FusedDivideKernel, diff --git a/paddle/phi/ops/compat/fused_elementwise_sig.cc b/paddle/phi/ops/compat/fused_elementwise_sig.cc index f7ed8474ef44b3..5494546de9804f 100644 --- a/paddle/phi/ops/compat/fused_elementwise_sig.cc +++ b/paddle/phi/ops/compat/fused_elementwise_sig.cc @@ -18,133 +18,70 @@ namespace phi { KernelSignature FusedElementwiseAddOpArgumentMapping( const ArgumentMappingContext& ctx) { - int axis = paddle::any_cast(ctx.Attr("axis")); - if (axis == -1) { - return KernelSignature("fused_add", - {"X", "Y"}, - {"fuse_alpha", - "fuse_beta", - "fuse_activation", - "mkldnn_data_type", - "scale_x", - "scale_y", - "scale_out", - "fused_output_scale"}, - {"Out"}); - } - return KernelSignature("fused_add_raw", + return KernelSignature("fused_elementwise_add", {"X", "Y"}, {"axis", + "fuse_activation", "fuse_alpha", "fuse_beta", - "fuse_activation", - "mkldnn_data_type", + "fused_output_scale", + "fused_unsqueeze2_axes", "scale_x", "scale_y", - "scale_out", - "fused_output_scale"}, + "scale_out"}, {"Out"}); } KernelSignature FusedElementwiseSubOpArgumentMapping( const ArgumentMappingContext& ctx) { - int axis = paddle::any_cast(ctx.Attr("axis")); - if (axis == -1) { - return KernelSignature("fused_subtract", - {"X", "Y"}, - {"fuse_alpha", - "fuse_beta", - "fuse_activation", - "mkldnn_data_type", - "scale_x", - "scale_y", - "scale_out", - "fused_output_scale"}, - {"Out"}); - } - return KernelSignature("fused_subtract_raw", + return KernelSignature("fused_elementwise_sub", {"X", "Y"}, {"axis", + "fuse_activation", "fuse_alpha", "fuse_beta", - "fuse_activation", - "mkldnn_data_type", + "fused_output_scale", + "fused_unsqueeze2_axes", "scale_x", "scale_y", - "scale_out", - "fused_output_scale"}, + "scale_out"}, {"Out"}); } KernelSignature FusedElementwiseMulOpArgumentMapping( const ArgumentMappingContext& ctx) { - int axis = paddle::any_cast(ctx.Attr("axis")); - if (axis == -1) { - return KernelSignature("fused_multiply", - {"X", "Y"}, - {"fuse_alpha", - "fuse_beta", - "fuse_activation", - "mkldnn_data_type", - "scale_x", - "scale_y", - "scale_out", - "fused_output_scale", - "fused_unsqueeze2_axes"}, - {"Out"}); - } - return KernelSignature("fused_multiply_raw", + return KernelSignature("fused_elementwise_mul", {"X", "Y"}, {"axis", + "fuse_activation", "fuse_alpha", "fuse_beta", - "fuse_activation", - "mkldnn_data_type", + "fused_output_scale", + "fused_unsqueeze2_axes", "scale_x", "scale_y", - "scale_out", - "fused_output_scale", - "fused_unsqueeze2_axes"}, + "scale_out"}, {"Out"}); } KernelSignature FusedElementwiseDivOpArgumentMapping( const ArgumentMappingContext& ctx) { - int axis = paddle::any_cast(ctx.Attr("axis")); - if (axis == -1) { - return KernelSignature("fused_divide", - {"X", "Y"}, - {"fuse_alpha", - "fuse_beta", - "fuse_activation", - "mkldnn_data_type", - "scale_x", - "scale_y", - "scale_out", - "fused_output_scale"}, - {"Out"}); - } - return KernelSignature("fused_divide_raw", + return KernelSignature("fused_elementwise_div", {"X", "Y"}, {"axis", - "fuse_fuse_alpha", - "fuse_beta", "fuse_activation", - "mkldnn_data_type", + "fuse_alpha", + "fuse_beta", + "fused_output_scale", + "fused_unsqueeze2_axes", "scale_x", "scale_y", - "scale_out", - "fused_output_scale"}, + "scale_out"}, {"Out"}); } } // namespace phi -PD_REGISTER_BASE_KERNEL_NAME(fused_elementwise_add, fused_add); -PD_REGISTER_BASE_KERNEL_NAME(fused_elementwise_sub, fused_subtract); -PD_REGISTER_BASE_KERNEL_NAME(fused_elementwise_mul, fused_multiply); -PD_REGISTER_BASE_KERNEL_NAME(fused_elementwise_div, fused_divide); - PD_REGISTER_ARG_MAPPING_FN(fused_elementwise_add, phi::FusedElementwiseAddOpArgumentMapping); PD_REGISTER_ARG_MAPPING_FN(fused_elementwise_sub, From 4f729433733e7ab38c85b51dffdfee1c879da21d Mon Sep 17 00:00:00 2001 From: Silv3S Date: Thu, 9 Mar 2023 17:10:13 +0100 Subject: [PATCH 07/23] fix elementwise unit tests --- .../ir/mkldnn/operator_scale_onednn_fuse_pass.cc | 13 +++++++++---- .../mkldnn/operator_unsqueeze2_onednn_fuse_pass.cc | 5 +++-- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/paddle/fluid/framework/ir/mkldnn/operator_scale_onednn_fuse_pass.cc b/paddle/fluid/framework/ir/mkldnn/operator_scale_onednn_fuse_pass.cc index 6c060e125372fd..086de37ab42996 100644 --- a/paddle/fluid/framework/ir/mkldnn/operator_scale_onednn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/operator_scale_onednn_fuse_pass.cc @@ -31,6 +31,10 @@ void FuseOperatorScaleOneDNNPass::ApplyImpl(Graph *graph) const { "fused_matmul", "matmul", "matmul_v2", + "fused_elementwise_add", + "fused_elementwise_sub", + "fused_elementwise_mul", + "fused_elementwise_div", "elementwise_add", "elementwise_sub", "elementwise_mul", @@ -87,10 +91,7 @@ void FuseOperatorScaleOneDNNPass::FuseScale(Graph *graph, scale = *(scale_tensor->data()); } - if (op_type == "matmul" || op_type == "matmul_v2") { - ConvertToFusedOp(operator_op->Op()); - } - + ConvertToFusedOp(operator_op->Op()); operator_op->Op()->SetAttr("fused_output_scale", scale); operator_op->Op()->SetOutput("Out", {scale_out->Name()}); @@ -120,6 +121,10 @@ REGISTER_PASS_CAPABILITY(operator_scale_onednn_fuse_pass) .EQ("fused_matmul", 0) .LE("matmul", 1) .EQ("matmul_v2", 0) + .EQ("fused_elementwise_add", 0) + .EQ("fused_elementwise_sub", 0) + .EQ("fused_elementwise_mul", 0) + .EQ("fused_elementwise_div", 0) .LE("elementwise_add", 1) .LE("elementwise_sub", 1) .LE("elementwise_mul", 1) diff --git a/paddle/fluid/framework/ir/mkldnn/operator_unsqueeze2_onednn_fuse_pass.cc b/paddle/fluid/framework/ir/mkldnn/operator_unsqueeze2_onednn_fuse_pass.cc index 716d14886327a0..0d908bd358ece6 100644 --- a/paddle/fluid/framework/ir/mkldnn/operator_unsqueeze2_onednn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/operator_unsqueeze2_onednn_fuse_pass.cc @@ -14,8 +14,8 @@ #include "paddle/fluid/framework/ir/mkldnn/operator_unsqueeze2_onednn_fuse_pass.h" +#include "paddle/fluid/framework/ir/mkldnn/mkldnn_pass_util.h" #include "paddle/fluid/framework/op_version_registry.h" -#include "paddle/phi/backends/onednn/onednn_reuse.h" #include "paddle/utils/string/pretty_log.h" namespace paddle { @@ -26,7 +26,7 @@ using string::PrettyLogDetail; void FuseOperatorUnsqueeze2OneDNNPass::ApplyImpl(Graph *graph) const { std::vector> ops_and_outputs = { - {"transpose2", 2}, {"elementwise_mul", 1}}; + {"transpose2", 2}, {"fused_elementwise_mul", 1}, {"elementwise_mul", 1}}; for (const auto &op_and_outputs : ops_and_outputs) FuseUnsqueeze2(graph, op_and_outputs.first, op_and_outputs.second); @@ -55,6 +55,7 @@ void FuseOperatorUnsqueeze2OneDNNPass::FuseUnsqueeze2( GET_IR_NODE_FROM_SUBGRAPH( unsqueeze2_out, unsqueeze2_out, op_unsqueeze2_pattern); + ConvertToFusedOp(operator_op->Op()); if (!operator_op->Op()->HasAttr("use_mkldnn") || (operator_op->Op()->HasAttr("use_mkldnn") && !(PADDLE_GET_CONST(bool, operator_op->Op()->GetAttr("use_mkldnn"))))) { From 1ad3f16a23d5b3e9dbc6c3db2de9aa94885ef50a Mon Sep 17 00:00:00 2001 From: Silv3S Date: Fri, 10 Mar 2023 13:29:22 +0100 Subject: [PATCH 08/23] reuse one fused elementwise op --- .../fused/fused_elementwise_add_op.cc | 56 -------- .../fused/fused_elementwise_div_op.cc | 56 -------- .../fused/fused_elementwise_mul_op.cc | 56 -------- .../operators/fused/fused_elementwise_op.cc | 95 +++++++++++++ .../operators/fused/fused_elementwise_op.h | 125 ------------------ .../fused/fused_elementwise_sub_op.cc | 56 -------- 6 files changed, 95 insertions(+), 349 deletions(-) delete mode 100644 paddle/fluid/operators/fused/fused_elementwise_add_op.cc delete mode 100644 paddle/fluid/operators/fused/fused_elementwise_div_op.cc delete mode 100644 paddle/fluid/operators/fused/fused_elementwise_mul_op.cc create mode 100644 paddle/fluid/operators/fused/fused_elementwise_op.cc delete mode 100644 paddle/fluid/operators/fused/fused_elementwise_op.h delete mode 100644 paddle/fluid/operators/fused/fused_elementwise_sub_op.cc diff --git a/paddle/fluid/operators/fused/fused_elementwise_add_op.cc b/paddle/fluid/operators/fused/fused_elementwise_add_op.cc deleted file mode 100644 index e8b689a9eb1d37..00000000000000 --- a/paddle/fluid/operators/fused/fused_elementwise_add_op.cc +++ /dev/null @@ -1,56 +0,0 @@ -// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/elementwise/elementwise_op.h" -#include "paddle/fluid/operators/fused/fused_elementwise_op.h" - -namespace paddle { -namespace operators { - -class FusedElementwiseAddOpMaker : public FusedElementwiseOpMaker { - protected: - std::string GetName() const override { return "Add"; } - std::string GetEquation() const override { return "Out = X + Y"; } - - void AddInputX() override { - AddInput( - "X", - "(Variable), Tensor or phi::DenseTensor of any dimensions. Its dtype " - "should be int32, int64, float32, float64."); - } - - void AddInputY() override { - AddInput( - "Y", - "(Variable), Tensor or phi::DenseTensor of any dimensions. Its dtype " - "should be int32, int64, float32, float64."); - } - - std::string GetOpFuntionality() const override { - return "Add two tensors element-wise"; - } -}; - -} // namespace operators -} // namespace paddle -namespace ops = paddle::operators; - -REGISTER_OPERATOR( - fused_elementwise_add, - ops::ElementwiseOp, - ops::FusedElementwiseAddOpMaker, - ops::ElementwiseOpInferVarType, - paddle::framework::EmptyGradOpMaker, - paddle::framework::EmptyGradOpMaker); diff --git a/paddle/fluid/operators/fused/fused_elementwise_div_op.cc b/paddle/fluid/operators/fused/fused_elementwise_div_op.cc deleted file mode 100644 index 3f36ab85e73718..00000000000000 --- a/paddle/fluid/operators/fused/fused_elementwise_div_op.cc +++ /dev/null @@ -1,56 +0,0 @@ -// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/elementwise/elementwise_op.h" -#include "paddle/fluid/operators/fused/fused_elementwise_op.h" - -namespace paddle { -namespace operators { - -class FusedElementwiseDivOpMaker : public FusedElementwiseOpMaker { - protected: - std::string GetName() const override { return "Div"; } - std::string GetEquation() const override { return "Out = X / Y"; } - - void AddInputX() override { - AddInput( - "X", - "(Variable), Tensor or phi::DenseTensor of any dimensions. Its dtype " - "should be int32, int64, float32, float64."); - } - - void AddInputY() override { - AddInput( - "Y", - "(Variable), Tensor or phi::DenseTensor of any dimensions. Its dtype " - "should be int32, int64, float32, float64."); - } - - std::string GetOpFuntionality() const override { - return "Divide two tensors element-wise"; - } -}; - -} // namespace operators -} // namespace paddle -namespace ops = paddle::operators; - -REGISTER_OPERATOR( - fused_elementwise_div, - ops::ElementwiseOp, - ops::FusedElementwiseDivOpMaker, - ops::ElementwiseOpInferVarType, - paddle::framework::EmptyGradOpMaker, - paddle::framework::EmptyGradOpMaker); diff --git a/paddle/fluid/operators/fused/fused_elementwise_mul_op.cc b/paddle/fluid/operators/fused/fused_elementwise_mul_op.cc deleted file mode 100644 index 92a73e1df56d14..00000000000000 --- a/paddle/fluid/operators/fused/fused_elementwise_mul_op.cc +++ /dev/null @@ -1,56 +0,0 @@ -// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/elementwise/elementwise_op.h" -#include "paddle/fluid/operators/fused/fused_elementwise_op.h" - -namespace paddle { -namespace operators { - -class FusedElementwiseMulOpMaker : public FusedElementwiseOpMaker { - protected: - std::string GetName() const override { return "Mul"; } - std::string GetEquation() const override { return "Out = X \\\\odot Y"; } - - void AddInputX() override { - AddInput( - "X", - "(Variable), Tensor or phi::DenseTensor of any dimensions. Its dtype " - "should be int32, int64, float32, float64."); - } - - void AddInputY() override { - AddInput( - "Y", - "(Variable), Tensor or phi::DenseTensor of any dimensions. Its dtype " - "should be int32, int64, float32, float64."); - } - - std::string GetOpFuntionality() const override { - return "Multiply two tensors element-wise"; - } -}; - -} // namespace operators -} // namespace paddle -namespace ops = paddle::operators; - -REGISTER_OPERATOR( - fused_elementwise_mul, - ops::ElementwiseOp, - ops::FusedElementwiseMulOpMaker, - ops::ElementwiseOpInferVarType, - paddle::framework::EmptyGradOpMaker, - paddle::framework::EmptyGradOpMaker); diff --git a/paddle/fluid/operators/fused/fused_elementwise_op.cc b/paddle/fluid/operators/fused/fused_elementwise_op.cc new file mode 100644 index 00000000000000..e6c2743e9385d2 --- /dev/null +++ b/paddle/fluid/operators/fused/fused_elementwise_op.cc @@ -0,0 +1,95 @@ +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/operators/elementwise/elementwise_op.h" + +namespace paddle { +namespace operators { + +class FusedElementwiseOpMaker : public framework::OpProtoAndCheckerMaker { + public: + void Make() final { + AddInput("X", "The first input tensor of elementwise op."); + AddInput("Y", "The second input tensor of elementwise op."); + AddOutput("Out", "A location into which the result is stored."); + AddAttr( + "axis", + "If X.dimension != Y.dimension, Y.dimension must be a " + "subsequence of X.dimension. And axis is the start dimension index " + "for broadcasting Y onto X.") + .SetDefault(-1); + AddAttr( + "fuse_activation", + "Activation type from elementwise_act_onednn_fuse_pass") + .SetDefault(""); + AddAttr("fuse_alpha", + "Activation alpha from elementwise_act_onednn_fuse_pass") + .SetDefault(0.0f); + AddAttr("fuse_beta", + "Activation beta from elementwise_act_onednn_fuse_pass") + .SetDefault(0.0f); + AddAttr("fused_output_scale", + "Obtained from operator_scale_onednn_fuse_pass") + .SetDefault(1.0f); + AddAttr>( + "fused_unsqueeze2_axes", + "Obtained from operator_unsqueeze2_onednn_fuse_pass") + .SetDefault({}); + AddAttr("scale_x", "Elementwise X input quantization scale") + .SetDefault(1.0f); + AddAttr("scale_y", "Elementwise Y input quantization scale") + .SetDefault(1.0f); + AddAttr("scale_out", "Elementwise Out output quantization scale") + .SetDefault(1.0f); + AddComment( + R"DOC(Elementwise operator extended with oneDNN-specific fusion logic.)DOC"); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OPERATOR( + fused_elementwise_add, + ops::ElementwiseOp, + ops::FusedElementwiseOpMaker, + ops::ElementwiseOpInferVarType, + paddle::framework::EmptyGradOpMaker, + paddle::framework::EmptyGradOpMaker); + +REGISTER_OPERATOR( + fused_elementwise_sub, + ops::ElementwiseOp, + ops::FusedElementwiseOpMaker, + ops::ElementwiseOpInferVarType, + paddle::framework::EmptyGradOpMaker, + paddle::framework::EmptyGradOpMaker); + +REGISTER_OPERATOR( + fused_elementwise_mul, + ops::ElementwiseOp, + ops::FusedElementwiseOpMaker, + ops::ElementwiseOpInferVarType, + paddle::framework::EmptyGradOpMaker, + paddle::framework::EmptyGradOpMaker); + +REGISTER_OPERATOR( + fused_elementwise_div, + ops::ElementwiseOp, + ops::FusedElementwiseOpMaker, + ops::ElementwiseOpInferVarType, + paddle::framework::EmptyGradOpMaker, + paddle::framework::EmptyGradOpMaker); diff --git a/paddle/fluid/operators/fused/fused_elementwise_op.h b/paddle/fluid/operators/fused/fused_elementwise_op.h deleted file mode 100644 index 21bd80d1b7d1f4..00000000000000 --- a/paddle/fluid/operators/fused/fused_elementwise_op.h +++ /dev/null @@ -1,125 +0,0 @@ -// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/elementwise/elementwise_op.h" - -namespace paddle { -namespace operators { - -class FusedElementwiseOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() final { - AddInputX(); - AddInputY(); - AddOpOutput(); - AddAttr("axis", - "(int, default -1). If X.dimension != Y.dimension," - "Y.dimension must be a subsequence of x.dimension. And axis " - "is the start dimension index " - "for broadcasting Y onto X. ") - .SetDefault(-1); - AddAttr( - "fuse_activation", - "Activation type from elementwise_act_onednn_fuse_pass") - .SetDefault(""); - AddAttr("fuse_alpha", "Alfa value for the elementwise operator") - .SetDefault(0.0f); - AddAttr("fuse_beta", "Beta value for the elementwise operator") - .SetDefault(0.0f); - AddAttr("fused_output_scale", - "Obtained from operator_scale_onednn_fuse_pass") - .SetDefault(1.0f); - AddAttr>( - "fused_unsqueeze2_axes", - "Obtained from operator_unsqueeze2_onednn_fuse_pass for " - "elementwise_mul") - .SetDefault({}); - AddAttr("scale_x", "Obtained from cpu_quantize_pass") - .SetDefault(1.0f); - AddAttr("scale_y", "Obtained from cpu_quantize_pass") - .SetDefault(1.0f); - AddAttr("scale_out", "Obtained from cpu_quantize_pass") - .SetDefault(1.0f); - AddOpComment(); - } - - protected: - virtual void AddInputX() { - AddInput("X", "(Tensor), The first input tensor of elementwise op."); - } - virtual void AddInputY() { - AddInput("Y", "(Tensor), The second input tensor of elementwise op."); - } - virtual void AddOpOutput() { - AddOutput("Out", - "N-dimension tensor. A location into which the result is stored. " - "It's dimension " - "equals with x"); - } - virtual void AddOpComment() { AddComment(GetCommentExamples()); } - - virtual std::string GetOpFuntionality() const { return ""; } - - virtual std::string GetName() const = 0; - virtual std::string GetEquation() const = 0; - - std::string GetCommentExamples() const { - return string::Sprintf(R"DOC( -Elementwise %s Operator. - -%s - -The equation is: - -$$%s$$ - -- $X$: a tensor of any dimension. -- $Y$: a tensor whose dimensions must be less than or equal to the dimensions of $X$. - -There are two cases for this operator: - -1. The shape of $Y$ is the same with $X$. -2. The shape of $Y$ is a continuous subsequence of $X$. - -For case 2: - -1. Broadcast $Y$ to match the shape of $X$, where $axis$ is the start dimension index - for broadcasting $Y$ onto $X$. -2. If $axis$ is -1 (default), $axis = rank(X) - rank(Y)$. -3. The trailing dimensions of size 1 for $Y$ will be ignored for the consideration of - subsequence, such as shape(Y) = (2, 1) => (2). - -For example: - - .. code-block:: text - - shape(X) = (2, 3, 4, 5), shape(Y) = (,) - shape(X) = (2, 3, 4, 5), shape(Y) = (5,) - shape(X) = (2, 3, 4, 5), shape(Y) = (4, 5), with axis=-1(default) or axis=2 - shape(X) = (2, 3, 4, 5), shape(Y) = (3, 4), with axis=1 - shape(X) = (2, 3, 4, 5), shape(Y) = (2), with axis=0 - shape(X) = (2, 3, 4, 5), shape(Y) = (2, 1), with axis=0 - -)DOC", - GetName(), - GetOpFuntionality(), - GetEquation()); - } -}; - -} // namespace operators -} // namespace paddle diff --git a/paddle/fluid/operators/fused/fused_elementwise_sub_op.cc b/paddle/fluid/operators/fused/fused_elementwise_sub_op.cc deleted file mode 100644 index 0b345134f663f7..00000000000000 --- a/paddle/fluid/operators/fused/fused_elementwise_sub_op.cc +++ /dev/null @@ -1,56 +0,0 @@ -// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/elementwise/elementwise_op.h" -#include "paddle/fluid/operators/fused/fused_elementwise_op.h" - -namespace paddle { -namespace operators { - -class FusedElementwiseSubOpMaker : public FusedElementwiseOpMaker { - protected: - std::string GetName() const override { return "Sub"; } - std::string GetEquation() const override { return "Out = X - Y"; } - - void AddInputX() override { - AddInput( - "X", - "(Variable), Tensor or phi::DenseTensor of any dimensions. Its dtype " - "should be int32, int64, float32, float64."); - } - - void AddInputY() override { - AddInput( - "Y", - "(Variable), Tensor or phi::DenseTensor of any dimensions. Its dtype " - "should be int32, int64, float32, float64."); - } - - std::string GetOpFuntionality() const override { - return "Subtract two tensors element-wise"; - } -}; - -} // namespace operators -} // namespace paddle -namespace ops = paddle::operators; - -REGISTER_OPERATOR( - fused_elementwise_sub, - ops::ElementwiseOp, - ops::FusedElementwiseSubOpMaker, - ops::ElementwiseOpInferVarType, - paddle::framework::EmptyGradOpMaker, - paddle::framework::EmptyGradOpMaker); From f93b1a7e469e4f24745312104fba92c7de504b48 Mon Sep 17 00:00:00 2001 From: Silv3S Date: Fri, 10 Mar 2023 14:10:09 +0100 Subject: [PATCH 09/23] adjust proto --- .../compat/fused_elementwise_add.pbtxt | 4 +++ .../compat/fused_elementwise_div.pbtxt | 28 +++++++++++++++++++ .../compat/fused_elementwise_mul.pbtxt | 2 +- .../compat/fused_elementwise_sub.pbtxt | 4 +++ .../fusion/onednn/fused_elementwise_kernel.cc | 7 ++--- 5 files changed, 40 insertions(+), 5 deletions(-) diff --git a/paddle/fluid/operators/compat/fused_elementwise_add.pbtxt b/paddle/fluid/operators/compat/fused_elementwise_add.pbtxt index 374170aa4e97fe..6c589b7244ca47 100644 --- a/paddle/fluid/operators/compat/fused_elementwise_add.pbtxt +++ b/paddle/fluid/operators/compat/fused_elementwise_add.pbtxt @@ -31,6 +31,10 @@ extra { name: "fused_output_scale" type: FLOAT } + attrs { + name: "fused_unsqueeze2_axes" + type: INTS + } attrs { name: "scale_x" type: FLOAT diff --git a/paddle/fluid/operators/compat/fused_elementwise_div.pbtxt b/paddle/fluid/operators/compat/fused_elementwise_div.pbtxt index 1eb6230dfc36d0..136edf14d3ef6a 100644 --- a/paddle/fluid/operators/compat/fused_elementwise_div.pbtxt +++ b/paddle/fluid/operators/compat/fused_elementwise_div.pbtxt @@ -15,8 +15,36 @@ def { } } extra { + attrs { + name: "fuse_activation" + type: STRING + } + attrs { + name: "fuse_alpha" + type: FLOAT + } + attrs { + name: "fuse_beta" + type: FLOAT + } attrs { name: "fused_output_scale" type: FLOAT } + attrs { + name: "fused_unsqueeze2_axes" + type: INTS + } + attrs { + name: "scale_x" + type: FLOAT + } + attrs { + name: "scale_y" + type: FLOAT + } + attrs { + name: "scale_out" + type: FLOAT + } } diff --git a/paddle/fluid/operators/compat/fused_elementwise_mul.pbtxt b/paddle/fluid/operators/compat/fused_elementwise_mul.pbtxt index 9c46e8b7c7e643..05e70950d3a212 100644 --- a/paddle/fluid/operators/compat/fused_elementwise_mul.pbtxt +++ b/paddle/fluid/operators/compat/fused_elementwise_mul.pbtxt @@ -33,7 +33,7 @@ extra { } attrs { name: "fused_unsqueeze2_axes" - type: INTS + type: INTS } attrs { name: "scale_x" diff --git a/paddle/fluid/operators/compat/fused_elementwise_sub.pbtxt b/paddle/fluid/operators/compat/fused_elementwise_sub.pbtxt index 1f59922cff1556..b9d4acad8cbc75 100644 --- a/paddle/fluid/operators/compat/fused_elementwise_sub.pbtxt +++ b/paddle/fluid/operators/compat/fused_elementwise_sub.pbtxt @@ -31,6 +31,10 @@ extra { name: "fused_output_scale" type: FLOAT } + attrs { + name: "fused_unsqueeze2_axes" + type: INTS + } attrs { name: "scale_x" type: FLOAT diff --git a/paddle/phi/kernels/fusion/onednn/fused_elementwise_kernel.cc b/paddle/phi/kernels/fusion/onednn/fused_elementwise_kernel.cc index 185b9797e7ec7f..e06c5860a6718c 100644 --- a/paddle/phi/kernels/fusion/onednn/fused_elementwise_kernel.cc +++ b/paddle/phi/kernels/fusion/onednn/fused_elementwise_kernel.cc @@ -65,15 +65,14 @@ void FusedElementwiseKernel(const OneDNNContext& dev_ctx, const auto src_x_memory = handler.AcquireSrcMemory(non_const_x); const auto src_y_memory = handler.AcquireSecondSrcMemory(non_const_y); - // (jczaja) For Inplace src and dst should be the same memory object. + // For Inplace src and dst should be the same memory object. // So x should share buffer with z. But UT mechanics is testing inplace // execution for this op not checking that x can be bradcasted to match in // shape y tensor. // This is wrong as when x is to be broadcasted then z(out) will match the // shape of y which is bigger than x. Hence if x is smaller in shape than z - // and they share a buffer (of - // shape x) then this buffer is not big enough to hold result of elementwise - // operation. + // and they share a buffer (of shape x) then this buffer is not big enough + // to hold result of elementwise operation. const bool reuse_x_memory = non_const_x->numel() == out->numel() && non_const_x->IsSharedBufferWith(*out); std::shared_ptr dst_memory; From 079fb837631476daa2dfabea7eb06c294c73b39d Mon Sep 17 00:00:00 2001 From: JakubHulek Date: Mon, 13 Mar 2023 13:53:49 +0100 Subject: [PATCH 10/23] Add supported datatypes --- .../fusion/onednn/fused_elementwise_kernel.cc | 35 +++++++++++++------ 1 file changed, 25 insertions(+), 10 deletions(-) diff --git a/paddle/phi/kernels/fusion/onednn/fused_elementwise_kernel.cc b/paddle/phi/kernels/fusion/onednn/fused_elementwise_kernel.cc index e06c5860a6718c..81ccd12bda078c 100644 --- a/paddle/phi/kernels/fusion/onednn/fused_elementwise_kernel.cc +++ b/paddle/phi/kernels/fusion/onednn/fused_elementwise_kernel.cc @@ -162,31 +162,46 @@ PD_REGISTER_KERNEL(fused_elementwise_add, ONEDNN, phi::FusedAddKernel, float, - phi::dtype::bfloat16, - int8_t, - uint8_t) {} + double, + int16_t, + int, + int64_t, + phi::dtype::complex, + phi::dtype::complex) {} PD_REGISTER_KERNEL(fused_elementwise_sub, OneDNN, ONEDNN, phi::FusedSubtractKernel, float, - phi::dtype::bfloat16, - int8_t, - uint8_t) {} + double, + int16_t, + int, + int64_t, + phi::dtype::complex, + phi::dtype::complex, + phi::dtype::bfloat16) {} PD_REGISTER_KERNEL(fused_elementwise_mul, OneDNN, ONEDNN, phi::FusedMultiplyKernel, float, - phi::dtype::bfloat16, - int8_t, - uint8_t) {} + double, + int, + int64_t, + bool, + phi::dtype::complex, + phi::dtype::complex, + phi::dtype::bfloat16) {} PD_REGISTER_KERNEL(fused_elementwise_div, OneDNN, ONEDNN, phi::FusedDivideKernel, float, - phi::dtype::bfloat16) {} + double, + int, + int64_t, + phi::dtype::complex, + phi::dtype::complex) {} From 3651b8eadc659ea879fe9f733f7268eb7345642f Mon Sep 17 00:00:00 2001 From: JakubHulek Date: Tue, 14 Mar 2023 17:37:32 +0100 Subject: [PATCH 11/23] Change 'Scale' to 'scale' in tests, change some tests to onednn --- .../framework/ir/graph_pattern_detector.cc | 8 ++- .../ir/mkldnn/cpu_quantize_pass_tester.cc | 34 ++++++------ .../test_trt_convert_multihead_matmul.py | 6 +-- ...p.py => test_elementwise_add_onednn_op.py} | 28 +++++----- ...p.py => test_elementwise_mul_onednn_op.py} | 22 ++++---- ...p.py => test_elementwise_sub_onednn_op.py} | 28 +++++----- python/paddle/incubate/autograd/primrules.py | 54 ++----------------- tools/parallel_UT_rule.py | 12 ++--- tools/static_mode_white_list.py | 6 +-- 9 files changed, 79 insertions(+), 119 deletions(-) rename python/paddle/fluid/tests/unittests/mkldnn/{test_elementwise_add_mkldnn_op.py => test_elementwise_add_onednn_op.py} (87%) rename python/paddle/fluid/tests/unittests/mkldnn/{test_elementwise_mul_mkldnn_op.py => test_elementwise_mul_onednn_op.py} (89%) rename python/paddle/fluid/tests/unittests/mkldnn/{test_elementwise_sub_mkldnn_op.py => test_elementwise_sub_onednn_op.py} (91%) diff --git a/paddle/fluid/framework/ir/graph_pattern_detector.cc b/paddle/fluid/framework/ir/graph_pattern_detector.cc index 858725f601079c..92c84ff50ef9cf 100644 --- a/paddle/fluid/framework/ir/graph_pattern_detector.cc +++ b/paddle/fluid/framework/ir/graph_pattern_detector.cc @@ -2254,7 +2254,8 @@ PDNode *patterns::OpRequant::operator()() { auto any_op = pattern->NewNode(any_op_repr()) ->assert_is_op() ->assert_more([&](Node *node) { - return node->Op()->HasAttr("Scale_out") ? true : false; + return (node->Op()->HasAttr("Scale_out") || + node->Op()->HasAttr("scale_out")); }); auto requant_in = pattern->NewNode(requant_in_repr()) ->assert_is_op_input("requantize", "Input"); @@ -2282,7 +2283,10 @@ PDNode *patterns::RequantOp::operator()() { ->assert_more([&](Node *node) { return (node->Op()->HasAttr("Scale_in") || node->Op()->HasAttr("Scale_x") || - node->Op()->HasAttr("Scale_y")); + node->Op()->HasAttr("Scale_y") || + node->Op()->HasAttr("scale_in") || + node->Op()->HasAttr("scale_x") || + node->Op()->HasAttr("scale_y")); }); requant_op->LinksFrom({requant_in}).LinksTo({requant_out}); diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass_tester.cc b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass_tester.cc index 195aa2728acd57..9c32e564c3722b 100644 --- a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass_tester.cc +++ b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass_tester.cc @@ -96,14 +96,15 @@ void SetOp(ProgramDesc* prog, op->SetAttr("Scale_x", 1.0f); op->SetAttr("Scale_y", 1.0f); op->SetAttr("Scale_out", 1.0f); - } else if (type == "elementwise_add" || type == "elementwise_mul" || - type == "elementwise_sub") { + } else if (type == "fused_elementwise_add" || + type == "fused_elementwise_sub" || + type == "fused_elementwise_mul") { op->SetInput("X", {inputs[0]}); if (inputs.size() > 1) op->SetInput("Y", {inputs[1]}); op->SetOutput("Out", {outputs[0]}); - op->SetAttr("Scale_x", 1.0f); - op->SetAttr("Scale_y", 1.0f); - op->SetAttr("Scale_out", 1.0f); + op->SetAttr("scale_x", 1.0f); + op->SetAttr("scale_y", 1.0f); + op->SetAttr("scale_out", 1.0f); } else if (type == "fusion_gru") { op->SetInput("X", {inputs[0]}); op->SetInput("Bias", {inputs[1]}); @@ -177,16 +178,19 @@ void CheckScales(const OpDesc* op, float scale, float shift) { scale); scale_names.push_back("Scale_in"); scale_names.push_back("Scale_out"); - } else if (type == "fused_matmul" || type == "elementwise_add" || - type == "elementwise_mul" || type == "elementwise_sub") { + } else if (type == "fused_matmul") { scale_names.push_back("Scale_x"); scale_names.push_back("Scale_y"); scale_names.push_back("Scale_out"); - if (type == "fused_matmul") { - auto const& names = op->InputNames(); - if (std::find(names.begin(), names.end(), "ResidualData") != names.end()) - scale_names.push_back("Scale_in_eltwise"); - } + auto const& names = op->InputNames(); + if (std::find(names.begin(), names.end(), "ResidualData") != names.end()) + scale_names.push_back("Scale_in_eltwise"); + } else if (type == "fused_elementwise_add" || + type == "fused_elementwise_sub" || + type == "fused_elementwise_mul") { + scale_names.push_back("scale_x"); + scale_names.push_back("scale_y"); + scale_names.push_back("scale_out"); } else if (type == "fusion_gru" || type == "fusion_lstm") { EXPECT_EQ(op->GetAttrIfExists("Shift_data"), shift); EXPECT_EQ(op->GetAttrIfExists>("Scale_weights")[0], @@ -709,9 +713,9 @@ void TestElementwiseUnsignedAndSignedInput( } const std::vector> elementwises = { - {"elementwise_add", "ElementwiseAdd"}, - {"elementwise_mul", "ElementwiseMul"}, - {"elementwise_sub", "ElementwiseSub"}}; + {"fused_elementwise_add", "FusedElementwiseAdd"}, + {"fused_elementwise_mul", "FusedElementwiseMul"}, + {"fused_elementwise_sub", "FusedElementwiseSub"}}; class TestElementwises : public testing::TestWithParam> {}; diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_multihead_matmul.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_multihead_matmul.py index cca7fcddcdabbf..f5658c172a890a 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_multihead_matmul.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_multihead_matmul.py @@ -834,9 +834,9 @@ def generate_weight2(): }, "op_outputs": {"Out": ["elementwise_add1_output"]}, "op_attrs": { - "Scale_out": 1.0, - "Scale_x": 1.0, - "Scale_y": 1.0, + "scale_out": 1.0, + "scale_x": 1.0, + "scale_y": 1.0, "axis": 2, "Out": 1.0, }, diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_elementwise_add_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_elementwise_add_onednn_op.py similarity index 87% rename from python/paddle/fluid/tests/unittests/mkldnn/test_elementwise_add_mkldnn_op.py rename to python/paddle/fluid/tests/unittests/mkldnn/test_elementwise_add_onednn_op.py index a50289de1eebfe..95b2f0f7b4ed75 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_elementwise_add_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_elementwise_add_onednn_op.py @@ -23,7 +23,7 @@ ) -class TestMKLDNNElementwiseAddOp(TestElementwiseAddOp): +class TestOneDNNElementwiseAddOp(TestElementwiseAddOp): def init_kernel_type(self): self.use_mkldnn = True @@ -31,21 +31,21 @@ def init_dtype(self): self.dtype = np.float32 -class TestMKLDNNElementwiseAddOp2(TestMKLDNNElementwiseAddOp): +class TestOneDNNElementwiseAddOp2(TestOneDNNElementwiseAddOp): def init_input_output(self): self.x = np.random.random((100,)).astype(self.dtype) self.y = np.random.random((100,)).astype(self.dtype) self.out = np.add(self.x, self.y) -class TestMKLDNNElementwiseAddOp3(TestMKLDNNElementwiseAddOp): +class TestOneDNNElementwiseAddOp3(TestOneDNNElementwiseAddOp): def init_input_output(self): self.x = np.random.uniform(0.1, 1, [2, 3, 4, 5]).astype(self.dtype) self.y = np.random.uniform(0.1, 1, [2, 3, 4, 5]).astype(self.dtype) self.out = np.add(self.x, self.y) -class TestMKLDNNElementwiseAddOp4(TestMKLDNNElementwiseAddOp): +class TestOneDNNElementwiseAddOp4(TestOneDNNElementwiseAddOp): def init_input_output(self): self.x = np.random.uniform(1, 2, [2, 3, 4, 32]).astype(self.dtype) self.y = np.random.uniform(1, 2, [4, 32]).astype(self.dtype) @@ -59,21 +59,21 @@ def test_check_grad_ingore_y(self): pass -class TestMKLDNNElementwiseAddOp5(TestMKLDNNElementwiseAddOp): +class TestOneDNNElementwiseAddOp5(TestOneDNNElementwiseAddOp): def init_input_output(self): self.x = np.random.uniform(1, 2, [2, 3, 4, 100]).astype(self.dtype) self.y = np.random.uniform(1, 2, [100]).astype(self.dtype) self.out = np.add(self.x, self.y) -class TestMKLDNNElementwiseAddOpBroadcastXintoY(TestMKLDNNElementwiseAddOp): +class TestOneDNNElementwiseAddOpBroadcastXintoY(TestOneDNNElementwiseAddOp): def init_input_output(self): self.x = np.random.uniform(1, 2, [2, 50, 1]).astype(self.dtype) self.y = np.random.uniform(1, 2, [2, 50, 160]).astype(self.dtype) self.out = np.add(self.x, self.y) -class TestMKLDNNElementwiseAddOp_broadcast_3(TestMKLDNNElementwiseAddOp): +class TestOneDNNElementwiseAddOp_broadcast_3(TestOneDNNElementwiseAddOp): def init_input_output(self): self.x = np.random.rand(2, 10, 12, 3).astype(self.dtype) self.y = np.random.rand(10, 12).astype(self.dtype) @@ -83,7 +83,7 @@ def init_axis(self): self.axis = 1 -class TestElementwiseAddOp_xsize_lessthan_ysize_add(TestMKLDNNElementwiseAddOp): +class TestElementwiseAddOp_xsize_lessthan_ysize_add(TestOneDNNElementwiseAddOp): def init_input_output(self): self.x = np.random.rand(10, 12).astype(self.dtype) self.y = np.random.rand(2, 2, 10, 12).astype(self.dtype) @@ -123,9 +123,9 @@ def init_input_output(self): self.out = np.add(self.x, self.y) def init_scales(self): - self.attrs['Scale_x'] = 1.0 - self.attrs['Scale_y'] = 1.0 - self.attrs['Scale_out'] = 1.0 + self.attrs['scale_x'] = 1.0 + self.attrs['scale_y'] = 1.0 + self.attrs['scale_out'] = 1.0 def test_check_output(self): # TODO(wangzhongpu): support mkldnn op in dygraph mode @@ -159,9 +159,9 @@ def init_input_output(self): self.scale_o, self.out = self.quantize(self.out_f) def init_scales(self): - self.attrs['Scale_x'] = self.scale_x - self.attrs['Scale_y'] = self.scale_y - self.attrs['Scale_out'] = self.scale_o + self.attrs['scale_x'] = self.scale_x + self.attrs['scale_y'] = self.scale_y + self.attrs['scale_out'] = self.scale_o def test_check_output(self): # TODO(wangzhongpu): support mkldnn op in dygraph mode diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_elementwise_mul_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_elementwise_mul_onednn_op.py similarity index 89% rename from python/paddle/fluid/tests/unittests/mkldnn/test_elementwise_mul_mkldnn_op.py rename to python/paddle/fluid/tests/unittests/mkldnn/test_elementwise_mul_onednn_op.py index a558b05196dcf1..9d3afeaa40925f 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_elementwise_mul_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_elementwise_mul_onednn_op.py @@ -23,7 +23,7 @@ ) -class TestMKLDNNElementwiseMulOp(ElementwiseMulOp): +class TestOneDNNElementwiseMulOp(ElementwiseMulOp): def init_kernel_type(self): self.use_mkldnn = True @@ -31,21 +31,21 @@ def init_dtype(self): self.dtype = np.float32 -class TestMKLDNNElementwiseMulOp2(TestMKLDNNElementwiseMulOp): +class TestOneDNNElementwiseMulOp2(TestOneDNNElementwiseMulOp): def init_input_output(self): self.x = np.random.random((100,)).astype(self.dtype) self.y = np.random.random((100,)).astype(self.dtype) self.out = np.multiply(self.x, self.y) -class TestMKLDNNElementwiseMulOp3(TestMKLDNNElementwiseMulOp): +class TestOneDNNElementwiseMulOp3(TestOneDNNElementwiseMulOp): def init_input_output(self): self.x = np.random.uniform(0.1, 1, [2, 3, 4, 5]).astype(self.dtype) self.y = np.random.uniform(0.1, 1, [2, 3, 4, 5]).astype(self.dtype) self.out = np.multiply(self.x, self.y) -class TestMKLDNNElementwiseMulOp4(TestMKLDNNElementwiseMulOp): +class TestOneDNNElementwiseMulOp4(TestOneDNNElementwiseMulOp): def init_input_output(self): self.x = np.random.uniform(1, 2, [2, 3, 4, 32]).astype(self.dtype) self.y = np.random.uniform(1, 2, [4, 32]).astype(self.dtype) @@ -59,7 +59,7 @@ def test_check_grad_ingore_y(self): pass -class TestMKLDNNElementwiseMulOp5(TestMKLDNNElementwiseMulOp): +class TestOneDNNElementwiseMulOp5(TestOneDNNElementwiseMulOp): def init_input_output(self): self.x = np.random.uniform(1, 2, [2, 3, 4, 100]).astype(self.dtype) self.y = np.random.uniform(1, 2, [100]).astype(self.dtype) @@ -96,9 +96,9 @@ def init_input_output(self): self.out = np.multiply(self.x, self.y) def init_scales(self): - self.attrs['Scale_x'] = 1.0 - self.attrs['Scale_y'] = 1.0 - self.attrs['Scale_out'] = 1.0 + self.attrs['scale_x'] = 1.0 + self.attrs['scale_y'] = 1.0 + self.attrs['scale_out'] = 1.0 def test_check_output(self): # TODO(wangzhongpu): support mkldnn op in dygraph mode @@ -132,9 +132,9 @@ def init_input_output(self): self.scale_o, self.out = self.quantize(self.out_f) def init_scales(self): - self.attrs['Scale_x'] = self.scale_x - self.attrs['Scale_y'] = self.scale_y - self.attrs['Scale_out'] = self.scale_o + self.attrs['scale_x'] = self.scale_x + self.attrs['scale_y'] = self.scale_y + self.attrs['scale_out'] = self.scale_o def test_check_output(self): # TODO(wangzhongpu): support mkldnn op in dygraph mode diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_elementwise_sub_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_elementwise_sub_onednn_op.py similarity index 91% rename from python/paddle/fluid/tests/unittests/mkldnn/test_elementwise_sub_mkldnn_op.py rename to python/paddle/fluid/tests/unittests/mkldnn/test_elementwise_sub_onednn_op.py index 685e8cc648a5e3..5f97197a683ece 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_elementwise_sub_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_elementwise_sub_onednn_op.py @@ -30,7 +30,7 @@ not (isinstance(_current_expected_place(), core.CPUPlace)), "GPU is not supported", ) -class TestMKLDNNElementwiseSubOp(OpTest): +class TestOneDNNElementwiseSubOp(OpTest): def setUp(self): self.op_type = "elementwise_sub" self.init_dtype() @@ -71,49 +71,49 @@ def test_check_output(self): self.check_output() -class TestMKLDNNElementwiseSubOp2(TestMKLDNNElementwiseSubOp): +class TestOneDNNElementwiseSubOp2(TestOneDNNElementwiseSubOp): def init_input_output(self): self.x = np.random.random((100,)).astype(self.dtype) self.y = np.random.random((100,)).astype(self.dtype) self.out = np.subtract(self.x, self.y) -class TestMKLDNNElementwiseSubOp3(TestMKLDNNElementwiseSubOp): +class TestOneDNNElementwiseSubOp3(TestOneDNNElementwiseSubOp): def init_input_output(self): self.x = np.random.uniform(0.1, 1, [2, 3, 4, 5]).astype(self.dtype) self.y = np.random.uniform(0.1, 1, [2, 3, 4, 5]).astype(self.dtype) self.out = np.subtract(self.x, self.y) -class TestMKLDNNElementwiseSubOp4(TestMKLDNNElementwiseSubOp): +class TestOneDNNElementwiseSubOp4(TestOneDNNElementwiseSubOp): def init_input_output(self): self.x = np.random.uniform(1, 2, [2, 3, 4, 32]).astype(self.dtype) self.y = np.random.uniform(1, 2, [4, 32]).astype(self.dtype) self.out = np.subtract(self.x, self.y) -class TestMKLDNNElementwiseSubOp5(TestMKLDNNElementwiseSubOp): +class TestOneDNNElementwiseSubOp5(TestOneDNNElementwiseSubOp): def init_input_output(self): self.x = np.random.uniform(1, 2, [2, 3, 4, 100]).astype(self.dtype) self.y = np.random.uniform(1, 2, [100]).astype(self.dtype) self.out = np.subtract(self.x, self.y) -class TestMKLDNNElementwiseSubOp6(TestMKLDNNElementwiseSubOp): +class TestOneDNNElementwiseSubOp6(TestOneDNNElementwiseSubOp): def init_input_output(self): self.x = np.random.uniform(0.1, 2, [180, 1]).astype(self.dtype) self.y = np.random.uniform(0.1, 1, [1, 256]).astype(self.dtype) self.out = np.subtract(self.x, self.y) -class TestMKLDNNElementwiseSubOp7(TestMKLDNNElementwiseSubOp): +class TestOneDNNElementwiseSubOp7(TestOneDNNElementwiseSubOp): def init_input_output(self): self.x = np.random.uniform(0.1, 2, [1, 180]).astype(self.dtype) self.y = np.random.uniform(0.1, 1, [256, 1]).astype(self.dtype) self.out = np.subtract(self.x, self.y) -class TestMKLDNNElementwiseSubOp_broadcast(TestMKLDNNElementwiseSubOp): +class TestOneDNNElementwiseSubOp_broadcast(TestOneDNNElementwiseSubOp): def init_input_output(self): self.x = np.random.rand(2, 10, 12, 3).astype(self.dtype) self.y = np.random.rand(10, 12).astype(self.dtype) @@ -123,7 +123,7 @@ def init_axis(self): self.axis = 1 -class TestElementwiseSubOp_xsize_lessthan_ysize_sub(TestMKLDNNElementwiseSubOp): +class TestElementwiseSubOp_xsize_lessthan_ysize_sub(TestOneDNNElementwiseSubOp): def init_input_output(self): self.x = np.random.rand(10, 12).astype(self.dtype) self.y = np.random.rand(2, 2, 10, 12).astype(self.dtype) @@ -134,7 +134,7 @@ def init_axis(self): @OpTestTool.skip_if_not_cpu_bf16() -class TestBf16(TestMKLDNNElementwiseSubOp): +class TestBf16(TestOneDNNElementwiseSubOp): def setUp(self): self.op_type = "elementwise_sub" self.init_dtype() @@ -223,7 +223,7 @@ def test_check_grad_ignore_x(self): ) -class TestInt8(TestMKLDNNElementwiseSubOp): +class TestInt8(TestOneDNNElementwiseSubOp): def init_kernel_type(self): self.use_mkldnn = True self._cpu_only = True @@ -237,9 +237,9 @@ def init_input_output(self): self.out = np.subtract(self.x, self.y) def init_scales(self): - self.attrs['Scale_x'] = 1.0 - self.attrs['Scale_y'] = 1.0 - self.attrs['Scale_out'] = 1.0 + self.attrs['scale_x'] = 1.0 + self.attrs['scale_y'] = 1.0 + self.attrs['scale_out'] = 1.0 def test_check_output(self): self.init_scales() diff --git a/python/paddle/incubate/autograd/primrules.py b/python/paddle/incubate/autograd/primrules.py index 137747e75da15e..06dad16b9bf5bc 100644 --- a/python/paddle/incubate/autograd/primrules.py +++ b/python/paddle/incubate/autograd/primrules.py @@ -132,69 +132,21 @@ def linear_jvp(op, *args, **kwargs): def elementwise_add_orig2prim(op, x, y): if x.shape != y.shape: y = broadcast(y, shape=x.shape) - if op.attr('Scale_x') - 1.0 > 1e-5: - scale_x = fill_const( - shape=x.shape, dtype=x.dtype, value=op.attr('Scale_x') - ) - x = mul(x, scale_x) - if op.attr('Scale_y') - 1.0 > 1e-5: - scale_y = fill_const( - shape=y.shape, dtype=y.dtype, value=op.attr('Scale_y') - ) - y = mul(y, scale_y) - z = add(x, y) - if op.attr('Scale_out') - 1.0 > 1e-5: - scale_out = fill_const( - shape=z.shape, dtype=z.dtype, value=op.attr('Scale_out') - ) - z = mul(z, scale_out) - return z + return add(x, y) @REGISTER_ORIG2PRIM('elementwise_sub') def elementwise_sub_orig2prim(op, x, y): if x.shape != y.shape: y = broadcast(y, shape=x.shape) - if op.attr('Scale_x') - 1.0 > 1e-5: - scale_x = fill_const( - shape=x.shape, dtype=x.dtype, value=op.attr('Scale_x') - ) - x = mul(x, scale_x) - if op.attr('Scale_y') - 1.0 > 1e-5: - scale_y = fill_const( - shape=y.shape, dtype=y.dtype, value=op.attr('Scale_y') - ) - y = mul(y, scale_y) - z = sub(x, y) - if op.attr('Scale_out') - 1.0 > 1e-5: - scale_out = fill_const( - shape=z.shape, dtype=z.dtype, value=op.attr('Scale_out') - ) - z = mul(z, scale_out) - return z + return sub(x, y) @REGISTER_ORIG2PRIM('elementwise_mul') def elementwise_mul_orig2prim(op, x, y): if x.shape != y.shape: y = broadcast(y, shape=x.shape) - if op.attr('Scale_x') - 1.0 > 1e-5: - scale_x = fill_const( - shape=x.shape, dtype=x.dtype, value=op.attr('Scale_x') - ) - x = mul(x, scale_x) - if op.attr('Scale_y') - 1.0 > 1e-5: - scale_y = fill_const( - shape=y.shape, dtype=y.dtype, value=op.attr('Scale_y') - ) - y = mul(y, scale_y) - z = mul(x, y) - if op.attr('Scale_out') - 1.0 > 1e-5: - scale_out = fill_const( - shape=z.shape, dtype=z.dtype, value=op.attr('Scale_out') - ) - z = mul(z, scale_out) - return z + return mul(x, y) @REGISTER_ORIG2PRIM('elementwise_div') diff --git a/tools/parallel_UT_rule.py b/tools/parallel_UT_rule.py index 90f6f3600e3fdd..05a6ebe4e3e7d7 100755 --- a/tools/parallel_UT_rule.py +++ b/tools/parallel_UT_rule.py @@ -370,7 +370,7 @@ 'test_adaptive_pool2d_convert_global_pass', 'test_lookup_table_v2_bf16_op', 'test_operator_desc', - 'test_elementwise_mul_mkldnn_op', + 'test_elementwise_mul_onednn_op', 'test_fetch_handler', 'test_cpu_bfloat16_placement_pass', 'test_match_matrix_tensor_op', @@ -649,7 +649,7 @@ 'test_auto_parallel_reshard_serial', 'test_auto_parallel_reshard_dpmppp', 'test_clip_mkldnn_op', - 'test_elementwise_sub_mkldnn_op', + 'test_elementwise_sub_onednn_op', 'test_flatten_mkldnn_op', 'test_slice_mkldnn_op', 'test_ir_generate_pass', @@ -717,7 +717,7 @@ 'test_quant2_int8_resnet50_channelwise_mkldnn', 'test_analyzer_bert', 'test_directory_migration', - 'test_elementwise_add_mkldnn_op', + 'test_elementwise_add_onednn_op', 'test_quant_int8_googlenet_mkldnn', 'test_callback_early_stop', ] @@ -2259,7 +2259,7 @@ 'test_scale_bf16_mkldnn_op', 'test_ir_generate_pass', 'test_expand_v2_mkldnn_op', - 'test_elementwise_sub_mkldnn_op', + 'test_elementwise_sub_onednn_op', ] # It run 4 job each time, If it failed due to Insufficient GPU memory or CUBLAS_STATUS_ALLOC_FAILED, @@ -2702,7 +2702,7 @@ 'test_fusion_transpose_flatten_concat_op', 'test_elementwise_nn_grad', 'test_hinge_loss_op', - 'test_elementwise_add_mkldnn_op', + 'test_elementwise_add_onednn_op', 'test_optimizer', 'test_deformable_conv_op', 'test_py_reader_push_pop', @@ -2861,7 +2861,7 @@ 'test_queue', 'test_cross_entropy_op', 'test_detection', - 'test_elementwise_mul_mkldnn_op', + 'test_elementwise_mul_onednn_op', 'test_grid_generator', 'test_functional_conv2d', 'test_fit_a_line', diff --git a/tools/static_mode_white_list.py b/tools/static_mode_white_list.py index 3d7e2654a4f639..f83d21c1fca076 100755 --- a/tools/static_mode_white_list.py +++ b/tools/static_mode_white_list.py @@ -594,11 +594,11 @@ 'test_conv2d_transpose_bf16_mkldnn_op', 'test_conv3d_mkldnn_op', 'test_dequantize_mkldnn_op', - 'test_elementwise_add_mkldnn_op', + 'test_elementwise_add_onednn_op', 'test_elementwise_add_bf16_mkldnn_op', 'test_elementwise_div_mkldnn_op', - 'test_elementwise_sub_mkldnn_op', - 'test_elementwise_mul_mkldnn_op', + 'test_elementwise_sub_onednn_op', + 'test_elementwise_mul_onednn_op', 'test_elementwise_mul_bf16_mkldnn_op', 'test_fc_mkldnn_op', 'test_fc_bf16_mkldnn_op', From f4909e8cbf50b3a8561b0ed6f148a88331666d35 Mon Sep 17 00:00:00 2001 From: JakubHulek Date: Thu, 16 Mar 2023 19:03:37 +0100 Subject: [PATCH 12/23] Revert breaking changes --- .../operators/compat/elementwise_add.pbtxt | 12 ++++++ .../operators/compat/elementwise_div.pbtxt | 12 ++++++ .../operators/compat/elementwise_mul.pbtxt | 12 ++++++ .../operators/compat/elementwise_pow.pbtxt | 12 ++++++ .../operators/compat/elementwise_sub.pbtxt | 12 ++++++ paddle/phi/api/yaml/op_compat.yaml | 39 ++++++++++++------- .../fusion/onednn/fused_elementwise_kernel.cc | 37 ++++++------------ .../phi/kernels/onednn/elementwise_kernel.cc | 29 ++++++++++++-- ...st_onednn_operator_unsqueeze2_fuse_pass.py | 2 +- tools/parallel_UT_rule.py | 12 +++--- tools/static_mode_white_list.py | 6 +-- 11 files changed, 133 insertions(+), 52 deletions(-) diff --git a/paddle/fluid/operators/compat/elementwise_add.pbtxt b/paddle/fluid/operators/compat/elementwise_add.pbtxt index fdafc47dcd12ac..ee2f1d7c6861e3 100644 --- a/paddle/fluid/operators/compat/elementwise_add.pbtxt +++ b/paddle/fluid/operators/compat/elementwise_add.pbtxt @@ -23,4 +23,16 @@ extra { name: "Out0_threshold" type: FLOAT } + attrs { + name: "Scale_x" + type: FLOAT + } + attrs { + name: "Scale_y" + type: FLOAT + } + attrs { + name: "Scale_out" + type: FLOAT + } } diff --git a/paddle/fluid/operators/compat/elementwise_div.pbtxt b/paddle/fluid/operators/compat/elementwise_div.pbtxt index 4baa9dea4fcdd5..0b46bfb7484013 100644 --- a/paddle/fluid/operators/compat/elementwise_div.pbtxt +++ b/paddle/fluid/operators/compat/elementwise_div.pbtxt @@ -19,4 +19,16 @@ extra { name: "act" type: STRING } + attrs { + name: "Scale_x" + type: FLOAT + } + attrs { + name: "Scale_y" + type: FLOAT + } + attrs { + name: "Scale_out" + type: FLOAT + } } diff --git a/paddle/fluid/operators/compat/elementwise_mul.pbtxt b/paddle/fluid/operators/compat/elementwise_mul.pbtxt index 0db1c9c782ae2d..94cec09ace8f32 100644 --- a/paddle/fluid/operators/compat/elementwise_mul.pbtxt +++ b/paddle/fluid/operators/compat/elementwise_mul.pbtxt @@ -13,4 +13,16 @@ def { name: "axis" type: INT } + attrs { + name: "Scale_x" + type: FLOAT + } + attrs { + name: "Scale_y" + type: FLOAT + } + attrs { + name: "Scale_out" + type: FLOAT + } } diff --git a/paddle/fluid/operators/compat/elementwise_pow.pbtxt b/paddle/fluid/operators/compat/elementwise_pow.pbtxt index 018ca70bcbca19..e9427104dbfa7c 100644 --- a/paddle/fluid/operators/compat/elementwise_pow.pbtxt +++ b/paddle/fluid/operators/compat/elementwise_pow.pbtxt @@ -19,4 +19,16 @@ extra { name: "act" type: STRING } + attrs { + name: "Scale_x" + type: FLOAT + } + attrs { + name: "Scale_y" + type: FLOAT + } + attrs { + name: "Scale_out" + type: FLOAT + } } diff --git a/paddle/fluid/operators/compat/elementwise_sub.pbtxt b/paddle/fluid/operators/compat/elementwise_sub.pbtxt index ae8a1352452d27..f73b92fb95f87c 100644 --- a/paddle/fluid/operators/compat/elementwise_sub.pbtxt +++ b/paddle/fluid/operators/compat/elementwise_sub.pbtxt @@ -19,4 +19,16 @@ extra { name: "act" type: STRING } + attrs { + name: "Scale_x" + type: FLOAT + } + attrs { + name: "Scale_y" + type: FLOAT + } + attrs { + name: "Scale_out" + type: FLOAT + } } diff --git a/paddle/phi/api/yaml/op_compat.yaml b/paddle/phi/api/yaml/op_compat.yaml index e1430844dea87b..98fbee81627c38 100644 --- a/paddle/phi/api/yaml/op_compat.yaml +++ b/paddle/phi/api/yaml/op_compat.yaml @@ -41,7 +41,8 @@ - op : add (elementwise_add) backward : add_grad (elementwise_add_grad) extra : - attrs : [bool use_mkldnn = false, str mkldnn_data_type = "float32"] + attrs : [bool use_mkldnn = false, str mkldnn_data_type = "float32", + bool use_quantizer = false, float Scale_x = 1.0f, float Scale_y = 1.0f, float Scale_out = 1.0f] - op : addmm backward : addmm_grad @@ -459,7 +460,8 @@ outputs : out: Out extra : - attrs : [bool use_mkldnn = false, str mkldnn_data_type = "float32"] + attrs : [bool use_mkldnn = false, str mkldnn_data_type = "float32", + bool use_quantizer = false, float Scale_x = 1.0f, float Scale_y = 1.0f, float Scale_out = 1.0f] - op : dot inputs : @@ -511,7 +513,8 @@ - op : elementwise_pow backward : elementwise_pow_grad extra : - attrs : [bool use_mkldnn = false, str mkldnn_data_type = "float32"] + attrs : [bool use_mkldnn = false, str mkldnn_data_type = "float32", + bool use_quantizer = false, float Scale_x = 1.0f, float Scale_y = 1.0f, float Scale_out = 1.0f] - op : elu backward : elu_grad, elu_double_grad (elu_grad_grad) @@ -672,17 +675,20 @@ - op : floor_divide (elementwise_floordiv) extra : - attrs : [bool use_mkldnn = false, str mkldnn_data_type = "float32"] + attrs : [bool use_mkldnn = false, str mkldnn_data_type = "float32", + bool use_quantizer = false, float Scale_x = 1.0f, float Scale_y = 1.0f, float Scale_out = 1.0f] - op : fmax (elementwise_fmax) backward : fmax_grad (elementwise_fmax_grad) extra : - attrs : [bool use_mkldnn = false, str mkldnn_data_type = "float32"] + attrs : [bool use_mkldnn = false, str mkldnn_data_type = "float32", + bool use_quantizer = false, float Scale_x = 1.0f, float Scale_y = 1.0f, float Scale_out = 1.0f] - op : fmin (elementwise_fmin) backward : fmin_grad (elementwise_fmin_grad) extra : - attrs : [bool use_mkldnn = false, str mkldnn_data_type = "float32"] + attrs : [bool use_mkldnn = false, str mkldnn_data_type = "float32", + bool use_quantizer = false, float Scale_x = 1.0f, float Scale_y = 1.0f, float Scale_out = 1.0f] - op : fold inputs : @@ -748,7 +754,8 @@ - op : grad_add extra : - attrs : [bool use_mkldnn = false, str mkldnn_data_type = "float32"] + attrs : [bool use_mkldnn = false, str mkldnn_data_type = "float32", + bool use_quantizer = false, float Scale_x = 1.0f, float Scale_y = 1.0f, float Scale_out = 1.0f] - op : greater_equal inputs : @@ -825,7 +832,8 @@ - op : heaviside (elementwise_heaviside) backward : heaviside_grad (elementwise_heaviside_grad) extra : - attrs : [bool use_mkldnn = false, str mkldnn_data_type = "float32"] + attrs : [bool use_mkldnn = false, str mkldnn_data_type = "float32", + bool use_quantizer = false, float Scale_x = 1.0f, float Scale_y = 1.0f, float Scale_out = 1.0f] - op : histogram inputs : @@ -1113,12 +1121,14 @@ - op : maximum (elementwise_max) backward : maximum_grad (elementwise_max_grad) extra : - attrs : [bool use_mkldnn = false, str mkldnn_data_type = "float32"] + attrs : [bool use_mkldnn = false, str mkldnn_data_type = "float32", + bool use_quantizer = false, float Scale_x = 1.0f, float Scale_y = 1.0f, float Scale_out = 1.0f] - op : maximum (elementwise_min) backward : maximum_grad (elementwise_min_grad) extra : - attrs : [bool use_mkldnn = false, str mkldnn_data_type = "float32"] + attrs : [bool use_mkldnn = false, str mkldnn_data_type = "float32", + bool use_quantizer = false, float Scale_x = 1.0f, float Scale_y = 1.0f, float Scale_out = 1.0f] - op : maxout inputs : @@ -1190,7 +1200,8 @@ outputs : out : Out extra : - attrs : [bool use_mkldnn = false, str mkldnn_data_type = "float32"] + attrs : [bool use_mkldnn = false, str mkldnn_data_type = "float32", + bool use_quantizer = false, float Scale_x = 1.0f, float Scale_y = 1.0f, float Scale_out = 1.0f] - op : mv inputs : @@ -1391,7 +1402,8 @@ - op : remainder (elementwise_mod) extra : - attrs : [bool use_mkldnn = false, str mkldnn_data_type = "float32"] + attrs : [bool use_mkldnn = false, str mkldnn_data_type = "float32", + bool use_quantizer = false, float Scale_x = 1.0f, float Scale_y = 1.0f, float Scale_out = 1.0f] - op : renorm backward : renorm_grad @@ -1687,7 +1699,8 @@ outputs : out : Out extra : - attrs : [bool use_mkldnn = false, str mkldnn_data_type = "float32"] + attrs : [bool use_mkldnn = false, str mkldnn_data_type = "float32", + bool use_quantizer = false, float Scale_x = 1.0f, float Scale_y = 1.0f, float Scale_out = 1.0f] - op : sum (reduce_sum) backward : (sum_grad) reduce_sum_grad diff --git a/paddle/phi/kernels/fusion/onednn/fused_elementwise_kernel.cc b/paddle/phi/kernels/fusion/onednn/fused_elementwise_kernel.cc index 81ccd12bda078c..92e1132f9930a7 100644 --- a/paddle/phi/kernels/fusion/onednn/fused_elementwise_kernel.cc +++ b/paddle/phi/kernels/fusion/onednn/fused_elementwise_kernel.cc @@ -162,46 +162,33 @@ PD_REGISTER_KERNEL(fused_elementwise_add, ONEDNN, phi::FusedAddKernel, float, - double, - int16_t, - int, - int64_t, - phi::dtype::complex, - phi::dtype::complex) {} + phi::dtype::bfloat16, + int8_t, + uint8_t) {} PD_REGISTER_KERNEL(fused_elementwise_sub, OneDNN, ONEDNN, phi::FusedSubtractKernel, float, - double, - int16_t, - int, - int64_t, - phi::dtype::complex, - phi::dtype::complex, - phi::dtype::bfloat16) {} + phi::dtype::bfloat16, + int8_t, + uint8_t) {} PD_REGISTER_KERNEL(fused_elementwise_mul, OneDNN, ONEDNN, phi::FusedMultiplyKernel, float, - double, - int, - int64_t, - bool, - phi::dtype::complex, - phi::dtype::complex, - phi::dtype::bfloat16) {} + phi::dtype::bfloat16, + int8_t, + uint8_t) {} PD_REGISTER_KERNEL(fused_elementwise_div, OneDNN, ONEDNN, phi::FusedDivideKernel, float, - double, - int, - int64_t, - phi::dtype::complex, - phi::dtype::complex) {} + phi::dtype::bfloat16, + int8_t, + uint8_t) {} diff --git a/paddle/phi/kernels/onednn/elementwise_kernel.cc b/paddle/phi/kernels/onednn/elementwise_kernel.cc index 1105a4d80b9ee6..6aa478756006f1 100644 --- a/paddle/phi/kernels/onednn/elementwise_kernel.cc +++ b/paddle/phi/kernels/onednn/elementwise_kernel.cc @@ -30,6 +30,26 @@ void ElementwiseKernel(const OneDNNContext& dev_ctx, DenseTensor* out) { const auto& onednn_engine = dev_ctx.GetEngine(); + float scale_x = dev_ctx.HasDnnAttr("Scale_x") + ? PADDLE_GET_CONST(float, dev_ctx.GetDnnAttr("Scale_x")) + : 1.0f; + float scale_y = dev_ctx.HasDnnAttr("Scale_y") + ? PADDLE_GET_CONST(float, dev_ctx.GetDnnAttr("Scale_y")) + : 1.0f; + float scale_out = + dev_ctx.HasDnnAttr("Scale_out") + ? PADDLE_GET_CONST(float, dev_ctx.GetDnnAttr("Scale_out")) + : 1.0f; + + dnnl::post_ops post_operations; + funcs::AppendActivation(dev_ctx, post_operations); + if (dev_ctx.HasDnnAttr("fused_output_scale")) { + float scale_alpha = + PADDLE_GET_CONST(float, dev_ctx.GetDnnAttr("fused_output_scale")); + post_operations.append_eltwise( + 1.0, dnnl::algorithm::eltwise_linear, scale_alpha, 0.0f); + } + auto* non_const_x = &x; auto* non_const_y = &y; @@ -40,10 +60,11 @@ void ElementwiseKernel(const OneDNNContext& dev_ctx, non_const_x, non_const_y, out, - 1.0f, - 1.0f, - 1.0f, - true); + scale_x, + scale_y, + scale_out, + true, + post_operations); // oneDNN's binary is optimized for broadcasting y into x, so in other case // we have to swap tensors to achieve optimal performance diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_onednn_operator_unsqueeze2_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_onednn_operator_unsqueeze2_fuse_pass.py index 0c92a845f06c2c..5b23c545fe402b 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_onednn_operator_unsqueeze2_fuse_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_onednn_operator_unsqueeze2_fuse_pass.py @@ -143,7 +143,7 @@ def sample_predictor_configs(self, program_config): "operator_unsqueeze2_onednn_fuse_pass", ], ) - yield config, ["elementwise_mul"], (1e-5, 1e-5) + yield config, ["fused_elementwise_mul"], (1e-5, 1e-5) def test(self): self.run_and_statis( diff --git a/tools/parallel_UT_rule.py b/tools/parallel_UT_rule.py index 05a6ebe4e3e7d7..90f6f3600e3fdd 100755 --- a/tools/parallel_UT_rule.py +++ b/tools/parallel_UT_rule.py @@ -370,7 +370,7 @@ 'test_adaptive_pool2d_convert_global_pass', 'test_lookup_table_v2_bf16_op', 'test_operator_desc', - 'test_elementwise_mul_onednn_op', + 'test_elementwise_mul_mkldnn_op', 'test_fetch_handler', 'test_cpu_bfloat16_placement_pass', 'test_match_matrix_tensor_op', @@ -649,7 +649,7 @@ 'test_auto_parallel_reshard_serial', 'test_auto_parallel_reshard_dpmppp', 'test_clip_mkldnn_op', - 'test_elementwise_sub_onednn_op', + 'test_elementwise_sub_mkldnn_op', 'test_flatten_mkldnn_op', 'test_slice_mkldnn_op', 'test_ir_generate_pass', @@ -717,7 +717,7 @@ 'test_quant2_int8_resnet50_channelwise_mkldnn', 'test_analyzer_bert', 'test_directory_migration', - 'test_elementwise_add_onednn_op', + 'test_elementwise_add_mkldnn_op', 'test_quant_int8_googlenet_mkldnn', 'test_callback_early_stop', ] @@ -2259,7 +2259,7 @@ 'test_scale_bf16_mkldnn_op', 'test_ir_generate_pass', 'test_expand_v2_mkldnn_op', - 'test_elementwise_sub_onednn_op', + 'test_elementwise_sub_mkldnn_op', ] # It run 4 job each time, If it failed due to Insufficient GPU memory or CUBLAS_STATUS_ALLOC_FAILED, @@ -2702,7 +2702,7 @@ 'test_fusion_transpose_flatten_concat_op', 'test_elementwise_nn_grad', 'test_hinge_loss_op', - 'test_elementwise_add_onednn_op', + 'test_elementwise_add_mkldnn_op', 'test_optimizer', 'test_deformable_conv_op', 'test_py_reader_push_pop', @@ -2861,7 +2861,7 @@ 'test_queue', 'test_cross_entropy_op', 'test_detection', - 'test_elementwise_mul_onednn_op', + 'test_elementwise_mul_mkldnn_op', 'test_grid_generator', 'test_functional_conv2d', 'test_fit_a_line', diff --git a/tools/static_mode_white_list.py b/tools/static_mode_white_list.py index f83d21c1fca076..3d7e2654a4f639 100755 --- a/tools/static_mode_white_list.py +++ b/tools/static_mode_white_list.py @@ -594,11 +594,11 @@ 'test_conv2d_transpose_bf16_mkldnn_op', 'test_conv3d_mkldnn_op', 'test_dequantize_mkldnn_op', - 'test_elementwise_add_onednn_op', + 'test_elementwise_add_mkldnn_op', 'test_elementwise_add_bf16_mkldnn_op', 'test_elementwise_div_mkldnn_op', - 'test_elementwise_sub_onednn_op', - 'test_elementwise_mul_onednn_op', + 'test_elementwise_sub_mkldnn_op', + 'test_elementwise_mul_mkldnn_op', 'test_elementwise_mul_bf16_mkldnn_op', 'test_fc_mkldnn_op', 'test_fc_bf16_mkldnn_op', From b587ed0c47b38eb227517d6852e9a327fe8bdf4a Mon Sep 17 00:00:00 2001 From: JakubHulek Date: Fri, 17 Mar 2023 13:32:22 +0100 Subject: [PATCH 13/23] Fix unit tests --- .../phi/kernels/onednn/elementwise_kernel.cc | 42 +++---------------- .../mkldnn/test_elementwise_add_onednn_op.py | 21 +++++++++- .../mkldnn/test_elementwise_mul_onednn_op.py | 22 +++++++++- .../mkldnn/test_elementwise_sub_onednn_op.py | 6 +-- 4 files changed, 49 insertions(+), 42 deletions(-) diff --git a/paddle/phi/kernels/onednn/elementwise_kernel.cc b/paddle/phi/kernels/onednn/elementwise_kernel.cc index 6aa478756006f1..02b148e54f547b 100644 --- a/paddle/phi/kernels/onednn/elementwise_kernel.cc +++ b/paddle/phi/kernels/onednn/elementwise_kernel.cc @@ -30,26 +30,6 @@ void ElementwiseKernel(const OneDNNContext& dev_ctx, DenseTensor* out) { const auto& onednn_engine = dev_ctx.GetEngine(); - float scale_x = dev_ctx.HasDnnAttr("Scale_x") - ? PADDLE_GET_CONST(float, dev_ctx.GetDnnAttr("Scale_x")) - : 1.0f; - float scale_y = dev_ctx.HasDnnAttr("Scale_y") - ? PADDLE_GET_CONST(float, dev_ctx.GetDnnAttr("Scale_y")) - : 1.0f; - float scale_out = - dev_ctx.HasDnnAttr("Scale_out") - ? PADDLE_GET_CONST(float, dev_ctx.GetDnnAttr("Scale_out")) - : 1.0f; - - dnnl::post_ops post_operations; - funcs::AppendActivation(dev_ctx, post_operations); - if (dev_ctx.HasDnnAttr("fused_output_scale")) { - float scale_alpha = - PADDLE_GET_CONST(float, dev_ctx.GetDnnAttr("fused_output_scale")); - post_operations.append_eltwise( - 1.0, dnnl::algorithm::eltwise_linear, scale_alpha, 0.0f); - } - auto* non_const_x = &x; auto* non_const_y = &y; @@ -60,11 +40,10 @@ void ElementwiseKernel(const OneDNNContext& dev_ctx, non_const_x, non_const_y, out, - scale_x, - scale_y, - scale_out, - true, - post_operations); + 1.0f, + 1.0f, + 1.0f, + true); // oneDNN's binary is optimized for broadcasting y into x, so in other case // we have to swap tensors to achieve optimal performance @@ -122,18 +101,7 @@ void ElementwiseKernel(const OneDNNContext& dev_ctx, out_md = out_md.reshape(dims); } - const auto fused_unsqueeze2_axes = - dev_ctx.HasDnnAttr("fused_unsqueeze2_axes") - ? PADDLE_GET_CONST(std::vector, - dev_ctx.GetDnnAttr("fused_unsqueeze2_axes")) - : std::vector(); - - if (!fused_unsqueeze2_axes.empty()) { - funcs::SetOutMemDescWithUnsqueeze2FuseSupport( - fused_unsqueeze2_axes, out, out_md); - } else { - out->set_mem_desc(out_md); - } + out->set_mem_desc(out_md); } #define DEFINE_ONEDNN_ELEMENTWISE_KERNEL(name, algorithm) \ diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_elementwise_add_onednn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_elementwise_add_onednn_op.py index 95b2f0f7b4ed75..c439feda1f77b0 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_elementwise_add_onednn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_elementwise_add_onednn_op.py @@ -16,8 +16,9 @@ import numpy as np +import paddle from paddle import enable_static -from paddle.fluid.tests.unittests.op_test import skip_check_grad_ci +from paddle.fluid.tests.unittests.op_test import OpTest, skip_check_grad_ci from paddle.fluid.tests.unittests.test_elementwise_add_op import ( TestElementwiseAddOp, ) @@ -110,6 +111,24 @@ def test_check_grad_ingore_x(self): reason="oneDNN's int8 elementwise_ops don't implemend grad kernel." ) class TestInt8(TestElementwiseAddOp): + def setUp(self): + self.op_type = "fused_elementwise_add" + self.python_api = paddle.add + self.prim_op_type = "prim" + self.init_dtype() + self.init_input_output() + self.init_kernel_type() + self.init_axis() + self.if_check_prim() + self.if_enable_cinn() + + self.inputs = { + 'X': OpTest.np_dtype_to_fluid_dtype(self.x), + 'Y': OpTest.np_dtype_to_fluid_dtype(self.y), + } + self.attrs = {'axis': self.axis, 'use_mkldnn': self.use_mkldnn} + self.outputs = {'Out': self.out} + def init_kernel_type(self): self.use_mkldnn = True self._cpu_only = True diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_elementwise_mul_onednn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_elementwise_mul_onednn_op.py index 9d3afeaa40925f..fb85a3d2eb9595 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_elementwise_mul_onednn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_elementwise_mul_onednn_op.py @@ -16,8 +16,9 @@ import numpy as np +import paddle from paddle import enable_static -from paddle.fluid.tests.unittests.op_test import skip_check_grad_ci +from paddle.fluid.tests.unittests.op_test import OpTest, skip_check_grad_ci from paddle.fluid.tests.unittests.test_elementwise_mul_op import ( ElementwiseMulOp, ) @@ -83,6 +84,25 @@ def test_check_grad_ingore_x(self): reason="oneDNN's int8 elementwise_ops don't implemend grad kernel." ) class TestInt8(ElementwiseMulOp): + def setUp(self): + self.op_type = "fused_elementwise_mul" + self.prim_op_type = "prim" + self.python_api = paddle.multiply + self.dtype = np.float64 + self.axis = -1 + self.init_dtype() + self.init_input_output() + self.init_kernel_type() + self.init_axis() + self.if_enable_cinn() + + self.inputs = { + 'X': OpTest.np_dtype_to_fluid_dtype(self.x), + 'Y': OpTest.np_dtype_to_fluid_dtype(self.y), + } + self.outputs = {'Out': self.out} + self.attrs = {'axis': self.axis, 'use_mkldnn': self.use_mkldnn} + def init_kernel_type(self): self.use_mkldnn = True self._cpu_only = True diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_elementwise_sub_onednn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_elementwise_sub_onednn_op.py index 5f97197a683ece..4b88cc2b2137dc 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_elementwise_sub_onednn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_elementwise_sub_onednn_op.py @@ -237,9 +237,9 @@ def init_input_output(self): self.out = np.subtract(self.x, self.y) def init_scales(self): - self.attrs['scale_x'] = 1.0 - self.attrs['scale_y'] = 1.0 - self.attrs['scale_out'] = 1.0 + self.attrs['Scale_x'] = 1.0 + self.attrs['Scale_y'] = 1.0 + self.attrs['Scale_out'] = 1.0 def test_check_output(self): self.init_scales() From 43224096e3d8a2ffe250c1700708efab47f78559 Mon Sep 17 00:00:00 2001 From: JakubHulek Date: Mon, 20 Mar 2023 11:26:36 +0100 Subject: [PATCH 14/23] Delete obsolete test cases --- .../mkldnn/test_elementwise_add_onednn_op.py | 63 +------------- .../mkldnn/test_elementwise_mul_onednn_op.py | 86 +++++++------------ 2 files changed, 34 insertions(+), 115 deletions(-) diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_elementwise_add_onednn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_elementwise_add_onednn_op.py index c439feda1f77b0..9b2d4aec1a369c 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_elementwise_add_onednn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_elementwise_add_onednn_op.py @@ -16,9 +16,8 @@ import numpy as np -import paddle from paddle import enable_static -from paddle.fluid.tests.unittests.op_test import OpTest, skip_check_grad_ci +from paddle.fluid.tests.unittests.op_test import skip_check_grad_ci from paddle.fluid.tests.unittests.test_elementwise_add_op import ( TestElementwiseAddOp, ) @@ -111,24 +110,6 @@ def test_check_grad_ingore_x(self): reason="oneDNN's int8 elementwise_ops don't implemend grad kernel." ) class TestInt8(TestElementwiseAddOp): - def setUp(self): - self.op_type = "fused_elementwise_add" - self.python_api = paddle.add - self.prim_op_type = "prim" - self.init_dtype() - self.init_input_output() - self.init_kernel_type() - self.init_axis() - self.if_check_prim() - self.if_enable_cinn() - - self.inputs = { - 'X': OpTest.np_dtype_to_fluid_dtype(self.x), - 'Y': OpTest.np_dtype_to_fluid_dtype(self.y), - } - self.attrs = {'axis': self.axis, 'use_mkldnn': self.use_mkldnn} - self.outputs = {'Out': self.out} - def init_kernel_type(self): self.use_mkldnn = True self._cpu_only = True @@ -161,48 +142,6 @@ def test_check_grad_ingore_y(self): pass -class TestInt8Scales(TestInt8): - def quantize(self, tensor, dt="int8"): - max_int = 127.0 if dt == "int8" else 255.0 - scale = max_int / np.abs(np.amax(tensor)) - quantized = np.round(scale * tensor).astype(dt) - return scale, quantized - - def init_input_output(self): - self.x_f = np.random.random((100,)).astype("float") - self.y_f = np.random.random((100,)).astype("float") - self.out_f = np.add(self.x_f, self.y_f) - - self.scale_x, self.x = self.quantize(self.x_f) - self.scale_y, self.y = self.quantize(self.y_f) - self.scale_o, self.out = self.quantize(self.out_f) - - def init_scales(self): - self.attrs['scale_x'] = self.scale_x - self.attrs['scale_y'] = self.scale_y - self.attrs['scale_out'] = self.scale_o - - def test_check_output(self): - # TODO(wangzhongpu): support mkldnn op in dygraph mode - self.init_scales() - int_atol = 1 # different quantization techniques - self.check_output(check_dygraph=(not self.use_mkldnn), atol=int_atol) - - -class TestUint8Scales(TestInt8Scales): - def init_input_output(self): - self.x_f = np.random.random((100,)).astype("float") - self.y_f = np.random.random((100,)).astype("float") - self.out_f = np.add(self.x_f, self.y_f) - - self.scale_x, self.x = self.quantize(self.x_f, "uint8") - self.scale_y, self.y = self.quantize(self.y_f, "uint8") - self.scale_o, self.out = self.quantize(self.out_f, "uint8") - - def init_dtype(self): - self.dtype = np.uint8 - - if __name__ == '__main__': enable_static() unittest.main() diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_elementwise_mul_onednn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_elementwise_mul_onednn_op.py index fb85a3d2eb9595..fea3c8ac464848 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_elementwise_mul_onednn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_elementwise_mul_onednn_op.py @@ -16,9 +16,8 @@ import numpy as np -import paddle from paddle import enable_static -from paddle.fluid.tests.unittests.op_test import OpTest, skip_check_grad_ci +from paddle.fluid.tests.unittests.op_test import skip_check_grad_ci from paddle.fluid.tests.unittests.test_elementwise_mul_op import ( ElementwiseMulOp, ) @@ -84,25 +83,6 @@ def test_check_grad_ingore_x(self): reason="oneDNN's int8 elementwise_ops don't implemend grad kernel." ) class TestInt8(ElementwiseMulOp): - def setUp(self): - self.op_type = "fused_elementwise_mul" - self.prim_op_type = "prim" - self.python_api = paddle.multiply - self.dtype = np.float64 - self.axis = -1 - self.init_dtype() - self.init_input_output() - self.init_kernel_type() - self.init_axis() - self.if_enable_cinn() - - self.inputs = { - 'X': OpTest.np_dtype_to_fluid_dtype(self.x), - 'Y': OpTest.np_dtype_to_fluid_dtype(self.y), - } - self.outputs = {'Out': self.out} - self.attrs = {'axis': self.axis, 'use_mkldnn': self.use_mkldnn} - def init_kernel_type(self): self.use_mkldnn = True self._cpu_only = True @@ -135,46 +115,46 @@ def test_check_grad_ingore_y(self): pass -class TestInt8Scales(TestInt8): - def quantize(self, tensor, dt="int8"): - max_int = 127.0 if dt == "int8" else 255.0 - scale = max_int / np.abs(np.amax(tensor)) - quantized = np.round(scale * tensor).astype(dt) - return scale, quantized +# class TestInt8Scales(TestInt8): +# def quantize(self, tensor, dt="int8"): +# max_int = 127.0 if dt == "int8" else 255.0 +# scale = max_int / np.abs(np.amax(tensor)) +# quantized = np.round(scale * tensor).astype(dt) +# return scale, quantized - def init_input_output(self): - self.x_f = np.random.random((100,)).astype("float") - self.y_f = np.random.random((100,)).astype("float") - self.out_f = np.multiply(self.x_f, self.y_f) +# def init_input_output(self): +# self.x_f = np.random.random((100,)).astype("float") +# self.y_f = np.random.random((100,)).astype("float") +# self.out_f = np.multiply(self.x_f, self.y_f) - self.scale_x, self.x = self.quantize(self.x_f) - self.scale_y, self.y = self.quantize(self.y_f) - self.scale_o, self.out = self.quantize(self.out_f) +# self.scale_x, self.x = self.quantize(self.x_f) +# self.scale_y, self.y = self.quantize(self.y_f) +# self.scale_o, self.out = self.quantize(self.out_f) - def init_scales(self): - self.attrs['scale_x'] = self.scale_x - self.attrs['scale_y'] = self.scale_y - self.attrs['scale_out'] = self.scale_o +# def init_scales(self): +# self.attrs['scale_x'] = self.scale_x +# self.attrs['scale_y'] = self.scale_y +# self.attrs['scale_out'] = self.scale_o - def test_check_output(self): - # TODO(wangzhongpu): support mkldnn op in dygraph mode - self.init_scales() - int_atol = 1 # different quantization techniques - self.check_output(check_dygraph=(not self.use_mkldnn), atol=int_atol) +# def test_check_output(self): +# # TODO(wangzhongpu): support mkldnn op in dygraph mode +# self.init_scales() +# int_atol = 1 # different quantization techniques +# self.check_output(check_dygraph=(not self.use_mkldnn), atol=int_atol) -class TestUint8Scales(TestInt8Scales): - def init_input_output(self): - self.x_f = np.random.random((100,)).astype("float") - self.y_f = np.random.random((100,)).astype("float") - self.out_f = np.multiply(self.x_f, self.y_f) +# class TestUint8Scales(TestInt8Scales): +# def init_input_output(self): +# self.x_f = np.random.random((100,)).astype("float") +# self.y_f = np.random.random((100,)).astype("float") +# self.out_f = np.multiply(self.x_f, self.y_f) - self.scale_x, self.x = self.quantize(self.x_f, "uint8") - self.scale_y, self.y = self.quantize(self.y_f, "uint8") - self.scale_o, self.out = self.quantize(self.out_f, "uint8") +# self.scale_x, self.x = self.quantize(self.x_f, "uint8") +# self.scale_y, self.y = self.quantize(self.y_f, "uint8") +# self.scale_o, self.out = self.quantize(self.out_f, "uint8") - def init_dtype(self): - self.dtype = np.uint8 +# def init_dtype(self): +# self.dtype = np.uint8 if __name__ == '__main__': From 6d21d733e5d08b18949f1e5865e09ca4813c7e96 Mon Sep 17 00:00:00 2001 From: Hulek Date: Tue, 21 Mar 2023 09:36:50 +0100 Subject: [PATCH 15/23] Delete commented out code --- .../mkldnn/test_elementwise_mul_onednn_op.py | 42 ------------------- 1 file changed, 42 deletions(-) diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_elementwise_mul_onednn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_elementwise_mul_onednn_op.py index fea3c8ac464848..b05f407dc33b46 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_elementwise_mul_onednn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_elementwise_mul_onednn_op.py @@ -115,48 +115,6 @@ def test_check_grad_ingore_y(self): pass -# class TestInt8Scales(TestInt8): -# def quantize(self, tensor, dt="int8"): -# max_int = 127.0 if dt == "int8" else 255.0 -# scale = max_int / np.abs(np.amax(tensor)) -# quantized = np.round(scale * tensor).astype(dt) -# return scale, quantized - -# def init_input_output(self): -# self.x_f = np.random.random((100,)).astype("float") -# self.y_f = np.random.random((100,)).astype("float") -# self.out_f = np.multiply(self.x_f, self.y_f) - -# self.scale_x, self.x = self.quantize(self.x_f) -# self.scale_y, self.y = self.quantize(self.y_f) -# self.scale_o, self.out = self.quantize(self.out_f) - -# def init_scales(self): -# self.attrs['scale_x'] = self.scale_x -# self.attrs['scale_y'] = self.scale_y -# self.attrs['scale_out'] = self.scale_o - -# def test_check_output(self): -# # TODO(wangzhongpu): support mkldnn op in dygraph mode -# self.init_scales() -# int_atol = 1 # different quantization techniques -# self.check_output(check_dygraph=(not self.use_mkldnn), atol=int_atol) - - -# class TestUint8Scales(TestInt8Scales): -# def init_input_output(self): -# self.x_f = np.random.random((100,)).astype("float") -# self.y_f = np.random.random((100,)).astype("float") -# self.out_f = np.multiply(self.x_f, self.y_f) - -# self.scale_x, self.x = self.quantize(self.x_f, "uint8") -# self.scale_y, self.y = self.quantize(self.y_f, "uint8") -# self.scale_o, self.out = self.quantize(self.out_f, "uint8") - -# def init_dtype(self): -# self.dtype = np.uint8 - - if __name__ == '__main__': enable_static() unittest.main() From f0d836c8d5c2da624b7e689320fcd30d2882cb38 Mon Sep 17 00:00:00 2001 From: JakubHulek Date: Wed, 22 Mar 2023 18:49:24 +0100 Subject: [PATCH 16/23] Fix codestyle --- .../ir/mkldnn/operator_unsqueeze2_onednn_fuse_pass.cc | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/paddle/fluid/framework/ir/mkldnn/operator_unsqueeze2_onednn_fuse_pass.cc b/paddle/fluid/framework/ir/mkldnn/operator_unsqueeze2_onednn_fuse_pass.cc index 74ac23e6a1ad07..96da2a4785a682 100644 --- a/paddle/fluid/framework/ir/mkldnn/operator_unsqueeze2_onednn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/operator_unsqueeze2_onednn_fuse_pass.cc @@ -26,7 +26,10 @@ using string::PrettyLogDetail; void FuseOperatorUnsqueeze2OneDNNPass::ApplyImpl(Graph *graph) const { std::vector> ops_and_outputs = { - {"fused_transpose", 2}, {"transpose2", 2}, {"fused_elementwise_mul", 1}, {"elementwise_mul", 1}}; + {"fused_transpose", 2}, + {"transpose2", 2}, + {"fused_elementwise_mul", 1}, + {"elementwise_mul", 1}}; for (const auto &op_and_outputs : ops_and_outputs) FuseUnsqueeze2(graph, op_and_outputs.first, op_and_outputs.second); From 0f36edfdcfb6a747b3e09dc4d357829ddbc58e1e Mon Sep 17 00:00:00 2001 From: Hulek Date: Mon, 27 Mar 2023 12:54:53 +0200 Subject: [PATCH 17/23] delete temporary condition --- .../framework/ir/mkldnn/cpu_quantize_placement_pass.cc | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_placement_pass.cc b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_placement_pass.cc index 43c697f44c3976..2071f284126b79 100644 --- a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_placement_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_placement_pass.cc @@ -90,12 +90,7 @@ void CPUQuantizePlacementPass::ApplyImpl(ir::Graph* graph) const { return; } - // Remove this condition when all fused_elementwise ops are merged - if (!(op->Op()->Type() == "elementwise_add" || - op->Op()->Type() == "elementwise_sub" || - op->Op()->Type() == "elementwise_mul")) { - ConvertToFusedOp(op->Op()); - } + ConvertToFusedOp(op->Op()); op->Op()->SetAttr("mkldnn_data_type", std::string("int8")); }; gpd(graph, handler); From 833c2f13c1173af99311a25b036b123913778e7f Mon Sep 17 00:00:00 2001 From: JakubHulek Date: Tue, 28 Mar 2023 12:20:15 +0200 Subject: [PATCH 18/23] fix conflicts and delete duplicate fusing --- .../framework/ir/mkldnn/operator_unsqueeze2_onednn_fuse_pass.cc | 1 - paddle/fluid/operators/ops_extra_info.h | 1 - 2 files changed, 2 deletions(-) diff --git a/paddle/fluid/framework/ir/mkldnn/operator_unsqueeze2_onednn_fuse_pass.cc b/paddle/fluid/framework/ir/mkldnn/operator_unsqueeze2_onednn_fuse_pass.cc index 96da2a4785a682..7726206e84ea41 100644 --- a/paddle/fluid/framework/ir/mkldnn/operator_unsqueeze2_onednn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/operator_unsqueeze2_onednn_fuse_pass.cc @@ -58,7 +58,6 @@ void FuseOperatorUnsqueeze2OneDNNPass::FuseUnsqueeze2( GET_IR_NODE_FROM_SUBGRAPH( unsqueeze2_out, unsqueeze2_out, op_unsqueeze2_pattern); - ConvertToFusedOp(operator_op->Op()); if (!operator_op->Op()->HasAttr("use_mkldnn") || (operator_op->Op()->HasAttr("use_mkldnn") && !(PADDLE_GET_CONST(bool, operator_op->Op()->GetAttr("use_mkldnn"))))) { diff --git a/paddle/fluid/operators/ops_extra_info.h b/paddle/fluid/operators/ops_extra_info.h index 852e5abfbf2aab..e58c3f5a80aa66 100644 --- a/paddle/fluid/operators/ops_extra_info.h +++ b/paddle/fluid/operators/ops_extra_info.h @@ -107,7 +107,6 @@ const std::unordered_map {"Scale_y", ExtraAttrProperty::ONEDNN}, {"Scale_out", ExtraAttrProperty::ONEDNN}, {"Scale_weights", ExtraAttrProperty::ONEDNN}, - {"fused_unsqueeze2_axes", ExtraAttrProperty::ONEDNN}, {"x_data_format", ExtraAttrProperty::ONEDNN}, {"y_data_format", ExtraAttrProperty::ONEDNN}, {"fused_reshape2_shape", ExtraAttrProperty::ONEDNN}, From 2c2093287351d94c41863f5d34c757e3a9d5d28a Mon Sep 17 00:00:00 2001 From: JakubHulek Date: Tue, 11 Apr 2023 18:14:26 +0200 Subject: [PATCH 19/23] Fix code after merge --- paddle/phi/kernels/fusion/onednn/fused_elementwise_kernel.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/paddle/phi/kernels/fusion/onednn/fused_elementwise_kernel.cc b/paddle/phi/kernels/fusion/onednn/fused_elementwise_kernel.cc index 87c64817bdb3ec..d19bca440fbbfd 100644 --- a/paddle/phi/kernels/fusion/onednn/fused_elementwise_kernel.cc +++ b/paddle/phi/kernels/fusion/onednn/fused_elementwise_kernel.cc @@ -162,7 +162,7 @@ DEFINE_ONEDNN_ELEMENTWISE_KERNEL(FusedDivide, dnnl::algorithm::binary_div) PD_REGISTER_KERNEL(fused_elementwise_add, OneDNN, ONEDNN, - phi::FusedAddKernel, + phi::fusion::FusedAddKernel, float, phi::dtype::bfloat16, int8_t, @@ -171,7 +171,7 @@ PD_REGISTER_KERNEL(fused_elementwise_add, PD_REGISTER_KERNEL(fused_elementwise_sub, OneDNN, ONEDNN, - phi::FusedSubtractKernel, + phi::fusion::FusedSubtractKernel, float, phi::dtype::bfloat16, int8_t, From 4e600b4b8854ca71b9fb7db123865db2226d4420 Mon Sep 17 00:00:00 2001 From: JakubHulek Date: Wed, 12 Apr 2023 17:37:24 +0200 Subject: [PATCH 20/23] Move tests to new directory --- test/mkldnn/test_elementwise_add_mkldnn_op.py | 210 ------------ .../mkldnn/test_elementwise_add_onednn_op.py | 6 +- test/mkldnn/test_elementwise_mul_mkldnn_op.py | 210 ------------ .../mkldnn/test_elementwise_mul_onednn_op.py | 6 +- test/mkldnn/test_elementwise_sub_mkldnn_op.py | 308 ------------------ .../mkldnn/test_elementwise_sub_onednn_op.py | 6 +- 6 files changed, 9 insertions(+), 737 deletions(-) delete mode 100644 test/mkldnn/test_elementwise_add_mkldnn_op.py rename {python/paddle/fluid/tests/unittests => test}/mkldnn/test_elementwise_add_onednn_op.py (96%) delete mode 100644 test/mkldnn/test_elementwise_mul_mkldnn_op.py rename {python/paddle/fluid/tests/unittests => test}/mkldnn/test_elementwise_mul_onednn_op.py (96%) delete mode 100644 test/mkldnn/test_elementwise_sub_mkldnn_op.py rename {python/paddle/fluid/tests/unittests => test}/mkldnn/test_elementwise_sub_onednn_op.py (97%) diff --git a/test/mkldnn/test_elementwise_add_mkldnn_op.py b/test/mkldnn/test_elementwise_add_mkldnn_op.py deleted file mode 100644 index 6e190b3183125c..00000000000000 --- a/test/mkldnn/test_elementwise_add_mkldnn_op.py +++ /dev/null @@ -1,210 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import unittest - -import numpy as np - -from paddle import enable_static -from paddle.fluid.tests.unittests.eager_op_test import skip_check_grad_ci -from paddle.fluid.tests.unittests.test_elementwise_add_op import ( - TestElementwiseAddOp, -) - - -class TestMKLDNNElementwiseAddOp(TestElementwiseAddOp): - def init_kernel_type(self): - self.use_mkldnn = True - - def init_dtype(self): - self.dtype = np.float32 - - -class TestMKLDNNElementwiseAddOp2(TestMKLDNNElementwiseAddOp): - def init_input_output(self): - self.x = np.random.random((100,)).astype(self.dtype) - self.y = np.random.random((100,)).astype(self.dtype) - self.out = np.add(self.x, self.y) - - -class TestMKLDNNElementwiseAddOp3(TestMKLDNNElementwiseAddOp): - def init_input_output(self): - self.x = np.random.uniform(0.1, 1, [2, 3, 4, 5]).astype(self.dtype) - self.y = np.random.uniform(0.1, 1, [2, 3, 4, 5]).astype(self.dtype) - self.out = np.add(self.x, self.y) - - -class TestMKLDNNElementwiseAddOp4(TestMKLDNNElementwiseAddOp): - def init_input_output(self): - self.x = np.random.uniform(1, 2, [2, 3, 4, 32]).astype(self.dtype) - self.y = np.random.uniform(1, 2, [4, 32]).astype(self.dtype) - self.out = np.add(self.x, self.y) - - # TODO(jczaja): Enable when grad is ready - def test_check_grad_normal(self): - pass - - def test_check_grad_ingore_y(self): - pass - - -class TestMKLDNNElementwiseAddOp5(TestMKLDNNElementwiseAddOp): - def init_input_output(self): - self.x = np.random.uniform(1, 2, [2, 3, 4, 100]).astype(self.dtype) - self.y = np.random.uniform(1, 2, [100]).astype(self.dtype) - self.out = np.add(self.x, self.y) - - -class TestMKLDNNElementwiseAddOpBroadcastXintoY(TestMKLDNNElementwiseAddOp): - def init_input_output(self): - self.x = np.random.uniform(1, 2, [2, 50, 1]).astype(self.dtype) - self.y = np.random.uniform(1, 2, [2, 50, 160]).astype(self.dtype) - self.out = np.add(self.x, self.y) - - -class TestMKLDNNElementwiseAddOp_broadcast_3(TestMKLDNNElementwiseAddOp): - def init_input_output(self): - self.x = np.random.rand(2, 10, 12, 3).astype(self.dtype) - self.y = np.random.rand(10, 12).astype(self.dtype) - self.out = self.x + self.y.reshape(1, 10, 12, 1) - - def init_axis(self): - self.axis = 1 - - -class TestElementwiseAddOp_xsize_lessthan_ysize_add(TestMKLDNNElementwiseAddOp): - def init_input_output(self): - self.x = np.random.rand(10, 12).astype(self.dtype) - self.y = np.random.rand(2, 2, 10, 12).astype(self.dtype) - self.out = self.x + self.y - - def init_axis(self): - self.axis = 2 - - # TODO(jczaja): Enable when grad is ready - def test_check_grad_normal(self): - pass - - def test_check_grad_ingore_y(self): - pass - - def test_check_grad_ingore_x(self): - pass - - -class TestMKLDNNElementwiseAddOpZeroDim(TestMKLDNNElementwiseAddOp): - def init_input_output(self): - self.x = np.random.random((100,)).astype(self.dtype) - self.y = np.array(3.0).astype(self.dtype) - self.out = np.add(self.x, self.y) - - -class TestMKLDNNElementwiseAddOpZeroDim2(TestMKLDNNElementwiseAddOp): - def init_input_output(self): - self.x = np.array(3.0).astype(self.dtype) - self.y = np.random.random((100,)).astype(self.dtype) - self.out = np.add(self.x, self.y) - - -class TestMKLDNNElementwiseAddOpZeroDim3(TestMKLDNNElementwiseAddOp): - def init_input_output(self): - self.x = np.array(3.0).astype(self.dtype) - self.y = np.array(3.0).astype(self.dtype) - self.out = np.add(self.x, self.y) - - -''' INT8 Tests ''' - - -@skip_check_grad_ci( - reason="oneDNN's int8 elementwise_ops don't implemend grad kernel." -) -class TestInt8(TestElementwiseAddOp): - def init_kernel_type(self): - self.use_mkldnn = True - self._cpu_only = True - - def init_dtype(self): - self.dtype = np.int8 - - def init_input_output(self): - self.x = np.random.randint(0, 3, (12, 9)).astype("int8") - self.y = np.random.randint(0, 3, (12, 9)).astype("int8") - self.out = np.add(self.x, self.y) - - def init_scales(self): - self.attrs['Scale_x'] = 1.0 - self.attrs['Scale_y'] = 1.0 - self.attrs['Scale_out'] = 1.0 - - def test_check_output(self): - # TODO(wangzhongpu): support mkldnn op in dygraph mode - self.init_scales() - self.check_output(check_dygraph=(not self.use_mkldnn)) - - def test_check_grad_normal(self): - pass - - def test_check_grad_ingore_x(self): - pass - - def test_check_grad_ingore_y(self): - pass - - -class TestInt8Scales(TestInt8): - def quantize(self, tensor, dt="int8"): - max_int = 127.0 if dt == "int8" else 255.0 - scale = max_int / np.abs(np.amax(tensor)) - quantized = np.round(scale * tensor).astype(dt) - return scale, quantized - - def init_input_output(self): - self.x_f = np.random.random((100,)).astype("float") - self.y_f = np.random.random((100,)).astype("float") - self.out_f = np.add(self.x_f, self.y_f) - - self.scale_x, self.x = self.quantize(self.x_f) - self.scale_y, self.y = self.quantize(self.y_f) - self.scale_o, self.out = self.quantize(self.out_f) - - def init_scales(self): - self.attrs['Scale_x'] = self.scale_x - self.attrs['Scale_y'] = self.scale_y - self.attrs['Scale_out'] = self.scale_o - - def test_check_output(self): - # TODO(wangzhongpu): support mkldnn op in dygraph mode - self.init_scales() - int_atol = 1 # different quantization techniques - self.check_output(check_dygraph=(not self.use_mkldnn), atol=int_atol) - - -class TestUint8Scales(TestInt8Scales): - def init_input_output(self): - self.x_f = np.random.random((100,)).astype("float") - self.y_f = np.random.random((100,)).astype("float") - self.out_f = np.add(self.x_f, self.y_f) - - self.scale_x, self.x = self.quantize(self.x_f, "uint8") - self.scale_y, self.y = self.quantize(self.y_f, "uint8") - self.scale_o, self.out = self.quantize(self.out_f, "uint8") - - def init_dtype(self): - self.dtype = np.uint8 - - -if __name__ == '__main__': - enable_static() - unittest.main() diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_elementwise_add_onednn_op.py b/test/mkldnn/test_elementwise_add_onednn_op.py similarity index 96% rename from python/paddle/fluid/tests/unittests/mkldnn/test_elementwise_add_onednn_op.py rename to test/mkldnn/test_elementwise_add_onednn_op.py index 1ce623258530e7..0ad1c40021d508 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_elementwise_add_onednn_op.py +++ b/test/mkldnn/test_elementwise_add_onednn_op.py @@ -103,21 +103,21 @@ def test_check_grad_ingore_x(self): pass -class TestMKLDNNElementwiseAddOpZeroDim(TestOneDNNElementwiseAddOp): +class TestOneDNNlementwiseAddOpZeroDim(TestOneDNNElementwiseAddOp): def init_input_output(self): self.x = np.random.random((100,)).astype(self.dtype) self.y = np.array(3.0).astype(self.dtype) self.out = np.add(self.x, self.y) -class TestMKLDNNElementwiseAddOpZeroDim2(TestOneDNNElementwiseAddOp): +class TestOneDNNlementwiseAddOpZeroDim2(TestOneDNNElementwiseAddOp): def init_input_output(self): self.x = np.array(3.0).astype(self.dtype) self.y = np.random.random((100,)).astype(self.dtype) self.out = np.add(self.x, self.y) -class TestMKLDNNElementwiseAddOpZeroDim3(TestOneDNNElementwiseAddOp): +class TestOneDNNlementwiseAddOpZeroDim3(TestOneDNNElementwiseAddOp): def init_input_output(self): self.x = np.array(3.0).astype(self.dtype) self.y = np.array(3.0).astype(self.dtype) diff --git a/test/mkldnn/test_elementwise_mul_mkldnn_op.py b/test/mkldnn/test_elementwise_mul_mkldnn_op.py deleted file mode 100644 index 8d60282abcbdd0..00000000000000 --- a/test/mkldnn/test_elementwise_mul_mkldnn_op.py +++ /dev/null @@ -1,210 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import unittest - -import numpy as np - -from paddle import enable_static -from paddle.fluid.tests.unittests.eager_op_test import skip_check_grad_ci -from paddle.fluid.tests.unittests.test_elementwise_mul_op import ( - ElementwiseMulOp, -) - - -class TestMKLDNNElementwiseMulOp(ElementwiseMulOp): - def init_kernel_type(self): - self.use_mkldnn = True - - def init_dtype(self): - self.dtype = np.float32 - - -class TestMKLDNNElementwiseMulOp2(TestMKLDNNElementwiseMulOp): - def init_input_output(self): - self.x = np.random.random((100,)).astype(self.dtype) - self.y = np.random.random((100,)).astype(self.dtype) - self.out = np.multiply(self.x, self.y) - - -class TestMKLDNNElementwiseMulOp3(TestMKLDNNElementwiseMulOp): - def init_input_output(self): - self.x = np.random.uniform(0.1, 1, [2, 3, 4, 5]).astype(self.dtype) - self.y = np.random.uniform(0.1, 1, [2, 3, 4, 5]).astype(self.dtype) - self.out = np.multiply(self.x, self.y) - - -class TestMKLDNNElementwiseMulOp4(TestMKLDNNElementwiseMulOp): - def init_input_output(self): - self.x = np.random.uniform(1, 2, [2, 3, 4, 32]).astype(self.dtype) - self.y = np.random.uniform(1, 2, [4, 32]).astype(self.dtype) - self.out = np.multiply(self.x, self.y) - - # TODO(jczaja): Enable when grad is ready - def test_check_grad_normal(self): - pass - - def test_check_grad_ingore_y(self): - pass - - -class TestMKLDNNElementwiseMulOp5(TestMKLDNNElementwiseMulOp): - def init_input_output(self): - self.x = np.random.uniform(1, 2, [2, 3, 4, 100]).astype(self.dtype) - self.y = np.random.uniform(1, 2, [100]).astype(self.dtype) - self.out = np.multiply(self.x, self.y) - - # TODO(jczaja): Enable when grad is ready - def test_check_grad_normal(self): - pass - - def test_check_grad_ingore_y(self): - pass - - def test_check_grad_ingore_x(self): - pass - - -class TestMKLDNNElementwiseMulOpZeroDim(TestMKLDNNElementwiseMulOp): - def init_input_output(self): - self.x = np.random.random((100,)).astype(self.dtype) - self.y = np.array(3.0).astype(self.dtype) - self.out = np.multiply(self.x, self.y) - - def test_check_grad_normal(self): - pass - - def test_check_grad_ingore_y(self): - pass - - def test_check_grad_ingore_x(self): - pass - - -class TestMKLDNNElementwiseMulOpZeroDim2(TestMKLDNNElementwiseMulOp): - def init_input_output(self): - self.x = np.array(3.0).astype(self.dtype) - self.y = np.random.random((100,)).astype(self.dtype) - self.out = np.multiply(self.x, self.y) - - def test_check_grad_normal(self): - pass - - def test_check_grad_ingore_y(self): - pass - - def test_check_grad_ingore_x(self): - pass - - -class TestMKLDNNElementwiseMulOpZeroDim3(TestMKLDNNElementwiseMulOp): - def init_input_output(self): - self.x = np.array(3.0).astype(self.dtype) - self.y = np.array(3.0).astype(self.dtype) - self.out = np.multiply(self.x, self.y) - - def test_check_grad_normal(self): - pass - - def test_check_grad_ingore_y(self): - pass - - def test_check_grad_ingore_x(self): - pass - - -''' INT8 Tests ''' - - -@skip_check_grad_ci( - reason="oneDNN's int8 elementwise_ops don't implemend grad kernel." -) -class TestInt8(ElementwiseMulOp): - def init_kernel_type(self): - self.use_mkldnn = True - self._cpu_only = True - - def init_dtype(self): - self.dtype = np.int8 - - def init_input_output(self): - self.x = np.random.randint(0, 3, (12, 9)).astype("int8") - self.y = np.random.randint(0, 3, (12, 9)).astype("int8") - self.out = np.multiply(self.x, self.y) - - def init_scales(self): - self.attrs['Scale_x'] = 1.0 - self.attrs['Scale_y'] = 1.0 - self.attrs['Scale_out'] = 1.0 - - def test_check_output(self): - # TODO(wangzhongpu): support mkldnn op in dygraph mode - self.init_scales() - self.check_output(check_dygraph=(not self.use_mkldnn)) - - def test_check_grad_normal(self): - pass - - def test_check_grad_ingore_x(self): - pass - - def test_check_grad_ingore_y(self): - pass - - -class TestInt8Scales(TestInt8): - def quantize(self, tensor, dt="int8"): - max_int = 127.0 if dt == "int8" else 255.0 - scale = max_int / np.abs(np.amax(tensor)) - quantized = np.round(scale * tensor).astype(dt) - return scale, quantized - - def init_input_output(self): - self.x_f = np.random.random((100,)).astype("float") - self.y_f = np.random.random((100,)).astype("float") - self.out_f = np.multiply(self.x_f, self.y_f) - - self.scale_x, self.x = self.quantize(self.x_f) - self.scale_y, self.y = self.quantize(self.y_f) - self.scale_o, self.out = self.quantize(self.out_f) - - def init_scales(self): - self.attrs['Scale_x'] = self.scale_x - self.attrs['Scale_y'] = self.scale_y - self.attrs['Scale_out'] = self.scale_o - - def test_check_output(self): - # TODO(wangzhongpu): support mkldnn op in dygraph mode - self.init_scales() - int_atol = 1 # different quantization techniques - self.check_output(check_dygraph=(not self.use_mkldnn), atol=int_atol) - - -class TestUint8Scales(TestInt8Scales): - def init_input_output(self): - self.x_f = np.random.random((100,)).astype("float") - self.y_f = np.random.random((100,)).astype("float") - self.out_f = np.multiply(self.x_f, self.y_f) - - self.scale_x, self.x = self.quantize(self.x_f, "uint8") - self.scale_y, self.y = self.quantize(self.y_f, "uint8") - self.scale_o, self.out = self.quantize(self.out_f, "uint8") - - def init_dtype(self): - self.dtype = np.uint8 - - -if __name__ == '__main__': - enable_static() - unittest.main() diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_elementwise_mul_onednn_op.py b/test/mkldnn/test_elementwise_mul_onednn_op.py similarity index 96% rename from python/paddle/fluid/tests/unittests/mkldnn/test_elementwise_mul_onednn_op.py rename to test/mkldnn/test_elementwise_mul_onednn_op.py index b3f62f14b76568..09ba96995d3f57 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_elementwise_mul_onednn_op.py +++ b/test/mkldnn/test_elementwise_mul_onednn_op.py @@ -76,7 +76,7 @@ def test_check_grad_ingore_x(self): pass -class TestMKLDNNElementwiseMulOpZeroDim(TestOneDNNElementwiseMulOp): +class TestOneDNNElementwiseMulOpZeroDim(TestOneDNNElementwiseMulOp): def init_input_output(self): self.x = np.random.random((100,)).astype(self.dtype) self.y = np.array(3.0).astype(self.dtype) @@ -92,7 +92,7 @@ def test_check_grad_ingore_x(self): pass -class TestMKLDNNElementwiseMulOpZeroDim2(TestOneDNNElementwiseMulOp): +class TestOneDNNElementwiseMulOpZeroDim2(TestOneDNNElementwiseMulOp): def init_input_output(self): self.x = np.array(3.0).astype(self.dtype) self.y = np.random.random((100,)).astype(self.dtype) @@ -108,7 +108,7 @@ def test_check_grad_ingore_x(self): pass -class TestMKLDNNElementwiseMulOpZeroDim3(TestOneDNNElementwiseMulOp): +class TestOneDNNElementwiseMulOpZeroDim3(TestOneDNNElementwiseMulOp): def init_input_output(self): self.x = np.array(3.0).astype(self.dtype) self.y = np.array(3.0).astype(self.dtype) diff --git a/test/mkldnn/test_elementwise_sub_mkldnn_op.py b/test/mkldnn/test_elementwise_sub_mkldnn_op.py deleted file mode 100644 index 2f74c0da3180a6..00000000000000 --- a/test/mkldnn/test_elementwise_sub_mkldnn_op.py +++ /dev/null @@ -1,308 +0,0 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import unittest - -import numpy as np - -from paddle import enable_static -from paddle.fluid import core -from paddle.fluid.framework import _current_expected_place -from paddle.fluid.tests.unittests.eager_op_test import ( - OpTest, - OpTestTool, - convert_float_to_uint16, -) - - -@OpTestTool.skip_if( - not (isinstance(_current_expected_place(), core.CPUPlace)), - "GPU is not supported", -) -class TestMKLDNNElementwiseSubOp(OpTest): - def setUp(self): - self.op_type = "elementwise_sub" - self.init_dtype() - self.init_input_output() - self.init_kernel_type() - self.init_axis() - self.inputs = { - 'X': OpTest.np_dtype_to_fluid_dtype(self.x), - 'Y': OpTest.np_dtype_to_fluid_dtype(self.y), - } - self.attrs = {'axis': self.axis, 'use_mkldnn': self.use_mkldnn} - self.outputs = {'Out': self.out} - - def init_input_output(self): - self.x = np.random.uniform(0.1, 1, [13, 17]).astype(self.dtype) - self.y = np.random.uniform(0.1, 1, [13, 17]).astype(self.dtype) - self.out = np.subtract(self.x, self.y) - - def test_check_grad_normal(self): - self.check_grad(['X', 'Y'], 'Out') - - def test_check_grad_ignore_x(self): - self.check_grad(['Y'], 'Out', no_grad_set=set("X")) - - def test_check_grad_ignore_y(self): - self.check_grad(['X'], 'Out', no_grad_set=set('Y')) - - def init_axis(self): - self.axis = -1 - - def init_kernel_type(self): - self.use_mkldnn = True - - def init_dtype(self): - self.dtype = np.float32 - - def test_check_output(self): - self.check_output() - - -class TestMKLDNNElementwiseSubOp2(TestMKLDNNElementwiseSubOp): - def init_input_output(self): - self.x = np.random.random((100,)).astype(self.dtype) - self.y = np.random.random((100,)).astype(self.dtype) - self.out = np.subtract(self.x, self.y) - - -class TestMKLDNNElementwiseSubOp3(TestMKLDNNElementwiseSubOp): - def init_input_output(self): - self.x = np.random.uniform(0.1, 1, [2, 3, 4, 5]).astype(self.dtype) - self.y = np.random.uniform(0.1, 1, [2, 3, 4, 5]).astype(self.dtype) - self.out = np.subtract(self.x, self.y) - - -class TestMKLDNNElementwiseSubOp4(TestMKLDNNElementwiseSubOp): - def init_input_output(self): - self.x = np.random.uniform(1, 2, [2, 3, 4, 32]).astype(self.dtype) - self.y = np.random.uniform(1, 2, [4, 32]).astype(self.dtype) - self.out = np.subtract(self.x, self.y) - - -class TestMKLDNNElementwiseSubOp5(TestMKLDNNElementwiseSubOp): - def init_input_output(self): - self.x = np.random.uniform(1, 2, [2, 3, 4, 100]).astype(self.dtype) - self.y = np.random.uniform(1, 2, [100]).astype(self.dtype) - self.out = np.subtract(self.x, self.y) - - -class TestMKLDNNElementwiseSubOp6(TestMKLDNNElementwiseSubOp): - def init_input_output(self): - self.x = np.random.uniform(0.1, 2, [180, 1]).astype(self.dtype) - self.y = np.random.uniform(0.1, 1, [1, 256]).astype(self.dtype) - self.out = np.subtract(self.x, self.y) - - -class TestMKLDNNElementwiseSubOp7(TestMKLDNNElementwiseSubOp): - def init_input_output(self): - self.x = np.random.uniform(0.1, 2, [1, 180]).astype(self.dtype) - self.y = np.random.uniform(0.1, 1, [256, 1]).astype(self.dtype) - self.out = np.subtract(self.x, self.y) - - -class TestMKLDNNElementwiseSubOp_broadcast(TestMKLDNNElementwiseSubOp): - def init_input_output(self): - self.x = np.random.rand(2, 10, 12, 3).astype(self.dtype) - self.y = np.random.rand(10, 12).astype(self.dtype) - self.out = self.x - self.y.reshape(1, 10, 12, 1) - - def init_axis(self): - self.axis = 1 - - -class TestElementwiseSubOp_xsize_lessthan_ysize_sub(TestMKLDNNElementwiseSubOp): - def init_input_output(self): - self.x = np.random.rand(10, 12).astype(self.dtype) - self.y = np.random.rand(2, 2, 10, 12).astype(self.dtype) - self.out = self.x - self.y - - def init_axis(self): - self.axis = 2 - - -class TestMKLDNNElementwiseSubOpZeroDim(TestMKLDNNElementwiseSubOp): - def init_input_output(self): - self.x = np.random.random((100,)).astype(self.dtype) - self.y = np.array(3.0).astype(self.dtype) - self.out = np.subtract(self.x, self.y) - - def test_check_grad_normal(self): - pass - - def test_check_grad_ignore_x(self): - pass - - def test_check_grad_ignore_y(self): - pass - - -class TestMKLDNNElementwiseSubOpZeroDim2(TestMKLDNNElementwiseSubOp): - def init_input_output(self): - self.x = np.array(3.0).astype(self.dtype) - self.y = np.random.random((100,)).astype(self.dtype) - self.out = np.subtract(self.x, self.y) - - def test_check_grad_normal(self): - pass - - def test_check_grad_ignore_x(self): - pass - - def test_check_grad_ignore_y(self): - pass - - -class TestMKLDNNElementwiseSubOpZeroDim3(TestMKLDNNElementwiseSubOp): - def init_input_output(self): - self.x = np.array(3.0).astype(self.dtype) - self.y = np.array(3.0).astype(self.dtype) - self.out = np.subtract(self.x, self.y) - - def test_check_grad_normal(self): - pass - - def test_check_grad_ignore_x(self): - pass - - def test_check_grad_ignore_y(self): - pass - - -@OpTestTool.skip_if_not_cpu_bf16() -class TestBf16(TestMKLDNNElementwiseSubOp): - def setUp(self): - self.op_type = "elementwise_sub" - self.init_dtype() - self.init_input_output() - self.init_kernel_type() - self.init_axis() - - self.x_bf16 = convert_float_to_uint16(self.x) - self.y_bf16 = convert_float_to_uint16(self.y) - self.inputs = {'X': self.x_bf16, 'Y': self.y_bf16} - self.attrs = {'axis': self.axis, 'use_mkldnn': self.use_mkldnn} - self.outputs = {'Out': convert_float_to_uint16(self.out)} - - def init_dtype(self): - self.dtype = np.float32 - self.mkldnn_data_type = "bfloat16" - - def init_input_output(self): - self.x = np.random.random( - 100, - ).astype(self.dtype) - self.y = np.random.random( - 100, - ).astype(self.dtype) - self.out = np.subtract(self.x, self.y) - - def test_check_output(self): - self.check_output_with_place(core.CPUPlace()) - - def test_check_grad_normal(self): - self.check_grad_with_place( - core.CPUPlace(), - ["X", "Y"], - "Out", - user_defined_grads=[self.x, -self.x], - user_defined_grad_outputs=[self.x_bf16], - ) - - def test_check_grad_ignore_x(self): - self.check_grad_with_place( - core.CPUPlace(), - ["Y"], - "Out", - user_defined_grads=[-self.y], - user_defined_grad_outputs=[self.y_bf16], - ) - - def test_check_grad_ignore_y(self): - self.check_grad_with_place( - core.CPUPlace(), - ["X"], - "Out", - user_defined_grads=[self.x], - user_defined_grad_outputs=[self.x_bf16], - ) - - -class TestBf16Broadcasting(TestBf16): - def init_input_output(self): - self.x = np.random.uniform(1, 2, [2, 3, 4, 100]).astype(self.dtype) - self.y = np.random.uniform(1, 2, [100]).astype(self.dtype) - self.out = np.subtract(self.x, self.y) - - def compute_reduced_gradients(self, out_grads): - part_sum = np.add.reduceat(out_grads, [0], axis=0) - part_sum = np.add.reduceat(part_sum, [0], axis=1) - part_sum = np.add.reduceat(part_sum, [0], axis=2) - return -part_sum.flatten() - - def test_check_grad_normal(self): - self.check_grad_with_place( - core.CPUPlace(), - ["X", "Y"], - "Out", - user_defined_grads=[self.x, self.compute_reduced_gradients(self.x)], - user_defined_grad_outputs=[self.x_bf16], - ) - - def test_check_grad_ignore_x(self): - self.check_grad_with_place( - core.CPUPlace(), - ["Y"], - "Out", - user_defined_grads=[self.compute_reduced_gradients(self.x)], - user_defined_grad_outputs=[self.x_bf16], - ) - - -class TestInt8(TestMKLDNNElementwiseSubOp): - def init_kernel_type(self): - self.use_mkldnn = True - self._cpu_only = True - - def init_dtype(self): - self.dtype = np.int8 - - def init_input_output(self): - self.x = np.random.randint(0, 3, (12, 9)).astype("int8") - self.y = np.random.randint(0, 3, (12, 9)).astype("int8") - self.out = np.subtract(self.x, self.y) - - def init_scales(self): - self.attrs['Scale_x'] = 1.0 - self.attrs['Scale_y'] = 1.0 - self.attrs['Scale_out'] = 1.0 - - def test_check_output(self): - self.init_scales() - self.check_output() - - def test_check_grad_normal(self): - pass - - def test_check_grad_ignore_x(self): - pass - - def test_check_grad_ignore_y(self): - pass - - -if __name__ == '__main__': - enable_static() - unittest.main() diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_elementwise_sub_onednn_op.py b/test/mkldnn/test_elementwise_sub_onednn_op.py similarity index 97% rename from python/paddle/fluid/tests/unittests/mkldnn/test_elementwise_sub_onednn_op.py rename to test/mkldnn/test_elementwise_sub_onednn_op.py index bdc2d4a6d10bb5..35dac7b091ed01 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_elementwise_sub_onednn_op.py +++ b/test/mkldnn/test_elementwise_sub_onednn_op.py @@ -133,7 +133,7 @@ def init_axis(self): self.axis = 2 -class TestMKLDNNElementwiseSubOpZeroDim(TestOneDNNElementwiseSubOp): +class TestOneDNNElementwiseSubOpZeroDim(TestOneDNNElementwiseSubOp): def init_input_output(self): self.x = np.random.random((100,)).astype(self.dtype) self.y = np.array(3.0).astype(self.dtype) @@ -149,7 +149,7 @@ def test_check_grad_ignore_y(self): pass -class TestMKLDNNElementwiseSubOpZeroDim2(TestOneDNNElementwiseSubOp): +class TestOneDNNElementwiseSubOpZeroDim2(TestOneDNNElementwiseSubOp): def init_input_output(self): self.x = np.array(3.0).astype(self.dtype) self.y = np.random.random((100,)).astype(self.dtype) @@ -165,7 +165,7 @@ def test_check_grad_ignore_y(self): pass -class TestMKLDNNElementwiseSubOpZeroDim3(TestOneDNNElementwiseSubOp): +class TestOneDNNElementwiseSubOpZeroDim3(TestOneDNNElementwiseSubOp): def init_input_output(self): self.x = np.array(3.0).astype(self.dtype) self.y = np.array(3.0).astype(self.dtype) From 8d601609421b9b1848e14203a33cf21a0891b570 Mon Sep 17 00:00:00 2001 From: JakubHulek Date: Mon, 24 Apr 2023 11:57:43 +0200 Subject: [PATCH 21/23] fix tests volatility --- test/mkldnn/CMakeLists.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/test/mkldnn/CMakeLists.txt b/test/mkldnn/CMakeLists.txt index d3da5f2897da39..3740c3f618a65a 100644 --- a/test/mkldnn/CMakeLists.txt +++ b/test/mkldnn/CMakeLists.txt @@ -24,6 +24,8 @@ endif() set_tests_properties(test_concat_mkldnn_op PROPERTIES TIMEOUT 120) set_tests_properties(test_conv3d_mkldnn_op PROPERTIES TIMEOUT 120) +set_tests_properties(test_elementwise_mul_onednn_op PROPERTIES TIMEOUT 60) +set_tests_properties(test_elementwise_add_onednn_op PROPERTIES TIMEOUT 60) if(WITH_MKLDNN AND NOT WIN32) set_tests_properties(test_onnx_format_quantization_mobilenetv1 PROPERTIES TIMEOUT 300) From 7d2fa1a1f8d14ad39dbb4c524b64e23174a385a4 Mon Sep 17 00:00:00 2001 From: Hulek Date: Tue, 16 May 2023 12:58:34 +0200 Subject: [PATCH 22/23] Rename test_elementwise_add_onednn_op.py to test_elementwise_add_mkldnn_op.py --- ...entwise_add_onednn_op.py => test_elementwise_add_mkldnn_op.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename test/mkldnn/{test_elementwise_add_onednn_op.py => test_elementwise_add_mkldnn_op.py} (100%) diff --git a/test/mkldnn/test_elementwise_add_onednn_op.py b/test/mkldnn/test_elementwise_add_mkldnn_op.py similarity index 100% rename from test/mkldnn/test_elementwise_add_onednn_op.py rename to test/mkldnn/test_elementwise_add_mkldnn_op.py From 20c3c3335781fd863db220377cdee39064a7cd34 Mon Sep 17 00:00:00 2001 From: Hulek Date: Tue, 16 May 2023 14:13:51 +0200 Subject: [PATCH 23/23] Update CMakeLists.txt add mkldnn op test --- test/mkldnn/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/mkldnn/CMakeLists.txt b/test/mkldnn/CMakeLists.txt index 3740c3f618a65a..16030200222e83 100644 --- a/test/mkldnn/CMakeLists.txt +++ b/test/mkldnn/CMakeLists.txt @@ -25,7 +25,7 @@ endif() set_tests_properties(test_concat_mkldnn_op PROPERTIES TIMEOUT 120) set_tests_properties(test_conv3d_mkldnn_op PROPERTIES TIMEOUT 120) set_tests_properties(test_elementwise_mul_onednn_op PROPERTIES TIMEOUT 60) -set_tests_properties(test_elementwise_add_onednn_op PROPERTIES TIMEOUT 60) +set_tests_properties(test_elementwise_add_mkldnn_op PROPERTIES TIMEOUT 60) if(WITH_MKLDNN AND NOT WIN32) set_tests_properties(test_onnx_format_quantization_mobilenetv1 PROPERTIES TIMEOUT 300)