diff --git a/paddle/fluid/framework/ir/CMakeLists.txt b/paddle/fluid/framework/ir/CMakeLists.txt index f4b88178499be..fa0fa7fccfd61 100755 --- a/paddle/fluid/framework/ir/CMakeLists.txt +++ b/paddle/fluid/framework/ir/CMakeLists.txt @@ -174,7 +174,6 @@ if(WITH_MKLDNN) pass_library(conv_elementwise_add_mkldnn_fuse_pass inference DIR mkldnn) pass_library(int8_scale_calculation_mkldnn_pass inference DIR mkldnn) pass_library(params_quantization_mkldnn_pass inference DIR mkldnn) - pass_library(fc_elementwise_add_mkldnn_fuse_pass inference DIR mkldnn) pass_library(scale_matmul_fuse_pass inference DIR mkldnn) pass_library(cpu_bfloat16_placement_pass inference DIR mkldnn) pass_library(cpu_bfloat16_pass inference DIR mkldnn) diff --git a/paddle/fluid/framework/ir/mkldnn/fc_elementwise_add_mkldnn_fuse_pass.cc b/paddle/fluid/framework/ir/mkldnn/fc_elementwise_add_mkldnn_fuse_pass.cc deleted file mode 100644 index ef01acd88c0b7..0000000000000 --- a/paddle/fluid/framework/ir/mkldnn/fc_elementwise_add_mkldnn_fuse_pass.cc +++ /dev/null @@ -1,165 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/fluid/framework/ir/mkldnn/fc_elementwise_add_mkldnn_fuse_pass.h" - -#include "paddle/fluid/framework/ir/graph_traits.h" -#include "paddle/fluid/framework/op_version_registry.h" -#include "paddle/utils/string/pretty_log.h" - -namespace paddle { -namespace framework { -namespace ir { - -FCResidualConnectionMKLDNNFusePass::FCResidualConnectionMKLDNNFusePass() { - AddOpCompat(OpCompat("fc")) - .AddInput("Input") - .IsTensor() - .End() - .AddInput("W") - .IsTensor() - .End() - .AddInput("Bias") - .IsTensor() - .End() - .AddOutput("Out") - .IsTensor() - .End() - .AddAttr("in_num_col_dims") - .IsNumGE(1) - .End(); - - AddOpCompat(OpCompat("elementwise_add")) - .AddInput("X") - .IsTensor() - .End() - .AddInput("Y") - .IsTensor() - .End() - .AddOutput("Out") - .IsTensor() - .End() - .AddAttr("axis") - .IsIntIn({-1, 0, 1}) - .End(); -} - -GraphWithStats FCResidualConnectionMKLDNNFusePass::FuseFC( - const std::string& name_scope, - const GraphWithStats& graph_with_stats, - bool fc_as_x) const { - GraphPatternDetector gpd; - auto pattern = gpd.mutable_pattern(); - patterns::FCMKLDNN fc_pattern{pattern, name_scope}; - auto fc_output = fc_pattern(false /* with residual */); - - patterns::ResidualElementwise elementwise_pattern{ - pattern, name_scope, fc_as_x}; - elementwise_pattern( - fc_output, - pattern->NewNode(elementwise_pattern.residual_data_repr()), - "elementwise_add", - fc_as_x); - fc_output->AsIntermediate(); - - int found_fc_count = 0; - - auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph, - Graph* g) { - VLOG(4) << "Fuse fc + elementwise_add as residual"; - GET_IR_NODE_FROM_SUBGRAPH(fc_op, fc, fc_pattern); - GET_IR_NODE_FROM_SUBGRAPH(fc_input, input, fc_pattern); - GET_IR_NODE_FROM_SUBGRAPH(fc_weights, weights, fc_pattern); - GET_IR_NODE_FROM_SUBGRAPH(fc_output, output, fc_pattern); - - GET_IR_NODE_FROM_SUBGRAPH( - elementwise_op, elementwise_op, elementwise_pattern); - GET_IR_NODE_FROM_SUBGRAPH( - residual_data, residual_data, elementwise_pattern); - GET_IR_NODE_FROM_SUBGRAPH( - elementwise_out, elementwise_out, elementwise_pattern); - - if (FindFuseOption(*fc_op, *elementwise_op) != FUSE_MKLDNN) { - VLOG(4) << "Skipping fusion for " << fc_op->Name() << "(" << fc_op->id() - << ") with " << elementwise_op->Name() << "(" - << elementwise_op->id() - << ") because not both ops have use_mkldnn"; - return; - } - if (!IsReachable(g, residual_data, fc_output)) { - VLOG(4) << "Skipping fusion for " << fc_op->Name() << "(" << fc_op->id() - << ") with " << elementwise_op->Name() << "(" - << elementwise_op->id() << ") because residual input " - << residual_data->Name() << "(" << residual_data->id() - << ") is not " - "reachable"; - return; - } - if (HasFusedActivation(fc_op)) { - VLOG(4) << "Skipping fusion for " << fc_op->Name() << "(" << fc_op->id() - << ") with " << elementwise_op->Name() << "(" - << elementwise_op->id() << ") because fc has activation fused"; - return; - } - - if (!IsCompat(subgraph, g)) { - LOG(WARNING) - << "op compat for fc_elementwise_add_mkldnn_fuse_pass failed."; - return; - } - - fc_op->Op()->SetInput("ResidualData", {residual_data->Name()}); - fc_op->Op()->SetOutput("Out", {elementwise_out->Name()}); - fc_op->Op()->SetAttr("fuse_residual_connection", true); - - GraphSafeRemoveNodes(g, {fc_output, elementwise_op}); - - IR_NODE_LINK_TO(residual_data, fc_op); - IR_NODE_LINK_TO(fc_op, elementwise_out); - - found_fc_count++; - }; - - gpd(graph_with_stats.first, handler); - if ((!Has("disable_logs") || !Get("disable_logs")) && - (found_fc_count > 0)) { - std::stringstream msg_ss; - std::string fusionMode = fc_as_x ? "x" : "y"; - msg_ss << "--- Fused " << found_fc_count << " fc (as " << fusionMode - << ") + elementwise_add patterns"; - paddle::string::PrettyLogDetail(msg_ss.str().c_str()); - } - - return std::make_pair(graph_with_stats.first, - found_fc_count + graph_with_stats.second); -} - -void FCResidualConnectionMKLDNNFusePass::ApplyImpl(ir::Graph* graph) const { - FusePassBase::Init(name_scope_, graph); - auto graph_with_stats = FuseFC(name_scope_, std::make_pair(graph, 0), true); - graph_with_stats = FuseFC(name_scope_, graph_with_stats, false); - - AddStatis(graph_with_stats.second); -} -} // namespace ir -} // namespace framework -} // namespace paddle - -REGISTER_PASS(fc_elementwise_add_mkldnn_fuse_pass, - paddle::framework::ir::FCResidualConnectionMKLDNNFusePass); -REGISTER_PASS_CAPABILITY(fc_elementwise_add_mkldnn_fuse_pass) - .AddCombination( - paddle::framework::compatible::OpVersionComparatorCombination() - .LE("fc", 0) - .LE("elementwise_add", 1)); diff --git a/paddle/fluid/framework/ir/mkldnn/fc_elementwise_add_mkldnn_fuse_pass.h b/paddle/fluid/framework/ir/mkldnn/fc_elementwise_add_mkldnn_fuse_pass.h deleted file mode 100644 index f92ce5bfc7044..0000000000000 --- a/paddle/fluid/framework/ir/mkldnn/fc_elementwise_add_mkldnn_fuse_pass.h +++ /dev/null @@ -1,48 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include "paddle/fluid/framework/ir/fuse_pass_base.h" -#include "paddle/fluid/framework/ir/graph_pattern_detector.h" - -namespace paddle { -namespace framework { -namespace ir { - -using GraphWithStats = std::pair; - -class FCResidualConnectionMKLDNNFusePass : public FusePassBase { - private: - GraphWithStats FuseFC(const std::string& name_scope, - const GraphWithStats& graph_with_stats, - bool fc_as_x) const; - - public: - FCResidualConnectionMKLDNNFusePass(); - virtual ~FCResidualConnectionMKLDNNFusePass() {} - - protected: - void ApplyImpl(ir::Graph* graph) const; - - static bool HasFusedActivation(Node* fc_node) { - return !( - fc_node->Op()->GetAttrIfExists("activation_type").empty()); - } - - const std::string name_scope_{"fc_elementwise_add_mkldnn_fuse"}; -}; -} // namespace ir -} // namespace framework -} // namespace paddle diff --git a/paddle/fluid/inference/api/paddle_pass_builder.cc b/paddle/fluid/inference/api/paddle_pass_builder.cc index b2a8fc86c2305..18cdf2c624545 100644 --- a/paddle/fluid/inference/api/paddle_pass_builder.cc +++ b/paddle/fluid/inference/api/paddle_pass_builder.cc @@ -372,7 +372,6 @@ void CpuPassStrategy::EnableMKLDNN() { // Disabled due to topology-dependent speed-up "fc_mkldnn_pass", "fc_act_mkldnn_fuse_pass", - "fc_elementwise_add_mkldnn_fuse_pass", // "self_attention_fuse_pass", // "batch_norm_act_fuse_pass", // "softplus_activation_onednn_fuse_pass", // @@ -407,7 +406,6 @@ void CpuPassStrategy::EnableMkldnnBfloat16() { if (!use_mkldnn_bfloat16_) { passes_.push_back("fc_mkldnn_pass"); passes_.push_back("fc_act_mkldnn_fuse_pass"); - passes_.push_back("fc_elementwise_add_mkldnn_fuse_pass"); passes_.push_back("cpu_bfloat16_placement_pass"); passes_.push_back("cpu_bfloat16_pass"); @@ -463,7 +461,6 @@ void CpuPassStrategy::EnableMkldnnInt8() { passes_.push_back("repeated_fc_relu_fuse_pass"); passes_.push_back("fc_mkldnn_pass"); passes_.push_back("fc_act_mkldnn_fuse_pass"); - passes_.push_back("fc_elementwise_add_mkldnn_fuse_pass"); passes_.push_back("matmul_transpose_reshape_mkldnn_fuse_pass"); passes_.push_back("batch_norm_act_fuse_pass"); passes_.push_back("softplus_activation_onednn_fuse_pass"); @@ -498,9 +495,7 @@ void CpuPassStrategy::DisableMkldnnFcPasses() { void CpuPassStrategy::EraseFcMkldnnPasses() { std::vector fc_passes_to_erase( - {"fc_mkldnn_pass", - "fc_act_mkldnn_fuse_pass", - "fc_elementwise_add_mkldnn_fuse_pass"}); + {"fc_mkldnn_pass", "fc_act_mkldnn_fuse_pass"}); for (const auto &pass : fc_passes_to_erase) { int idx = GetPassIndex(pass); if (idx != -1) { diff --git a/paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc index 5cf06bcbbdcf0..18680fe678b5d 100644 --- a/paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc @@ -46,9 +46,6 @@ GetDNNLScales(const ExecutionContext& ctx) { auto scale_in_data = ctx.Attr("Scale_in"); auto scale_out = ctx.Attr("Scale_out"); auto scale_weights_data = ctx.Attr>("Scale_weights"); - auto scale_in_eltwise_data = ctx.HasAttr("Scale_in_eltwise") - ? ctx.Attr("Scale_in_eltwise") - : 1.0f; std::vector dnnl_src_scales = {1.f / scale_in_data}; size_t count = scale_weights_data.size(); @@ -57,7 +54,7 @@ GetDNNLScales(const ExecutionContext& ctx) { for (size_t i = 0; i < count; i++) { dnnl_wei_scales[i] = 1.f / scale_weights_data[i]; } - std::vector dnnl_psum_scales = {1.f / scale_in_eltwise_data}; + std::vector dnnl_psum_scales = {1.f}; std::vector dnnl_dst_scales = {1.f / scale_out}; return std::make_tuple( @@ -127,7 +124,6 @@ class FCMKLDNNHandler dnnl::primitive_attr attributes; dnnl::post_ops post_operations; - float sum_scale = 1.0f; float activation_scale = 1.0f; if (phi::funcs::is_int8()) { std::vector src_scales, wei_scales, psum_scales, dst_scales; @@ -168,13 +164,6 @@ class FCMKLDNNHandler dst_scales.data(), dst_scales.size() * sizeof(float)); } - - sum_scale = psum_scales[0]; - } - - if (ctx.HasAttr("fuse_residual_connection") && - ctx.Attr("fuse_residual_connection")) { - post_operations.append_sum(sum_scale); } // ReLU from "fc_fuse_pass" @@ -332,22 +321,6 @@ class FCMKLDNNHandler std::shared_ptr AcquireCustomDstMemory( const ExecutionContext& ctx, phi::DenseTensor* out) { - if (ctx.HasAttr("fuse_residual_connection") && - ctx.Attr("fuse_residual_connection")) { - auto* residual_param = ctx.Input("ResidualData"); - - PADDLE_ENFORCE_EQ( - out->dims(), - residual_param->dims(), - phi::errors::InvalidArgument( - "Output and elementwise parameter need to have the " - "same dimension sizes, but got output's dimension = %d" - " and residual param's dimension =%d .", - out->dims().size(), - residual_param->dims().size())); - - out->ShareDataWith(*residual_param); - } return this->template AcquireDstMemory(out); } // namespace operators @@ -458,11 +431,7 @@ class FCMKLDNNKernel : public framework::OpKernel { dst_memory_p = std::make_shared(inner_product_cache->dst_mem); - if (ctx.HasAttr("fuse_residual_connection") && - ctx.Attr("fuse_residual_connection")) { - auto* residual_param = ctx.Input("ResidualData"); - out->ShareDataWith(*residual_param); - } + auto out_ptr = out->mutable_data( ctx.GetPlace(), dst_memory_p->get_desc().get_size()); dst_memory_p->set_data_handle(out_ptr); diff --git a/test/cpp/inference/api/analysis_predictor_tester.cc b/test/cpp/inference/api/analysis_predictor_tester.cc index 6e3497d14a0dd..0827c3c196c19 100644 --- a/test/cpp/inference/api/analysis_predictor_tester.cc +++ b/test/cpp/inference/api/analysis_predictor_tester.cc @@ -370,9 +370,7 @@ TEST(AnalysisPredictor, mkldnn_fc_passes_cpu_pass_strategy) { CpuPassStrategy cpuPassStrategy; cpuPassStrategy.EnableMKLDNN(); const std::vector fc_passes_to_erase( - {"fc_mkldnn_pass", - "fc_act_mkldnn_fuse_pass", - "fc_elementwise_add_mkldnn_fuse_pass"}); + {"fc_mkldnn_pass", "fc_act_mkldnn_fuse_pass"}); for (const auto& pass : fc_passes_to_erase) { ASSERT_NE(cpuPassStrategy.GetPassIndex(pass), (size_t)-1); } diff --git a/test/cpp/inference/api/analyzer_ernie_int8_tester.cc b/test/cpp/inference/api/analyzer_ernie_int8_tester.cc index a93bbb1ab7e24..f8e7eb9ee8ad5 100644 --- a/test/cpp/inference/api/analyzer_ernie_int8_tester.cc +++ b/test/cpp/inference/api/analyzer_ernie_int8_tester.cc @@ -34,7 +34,7 @@ void SetInt8Config(AnalysisConfig *cfg, pass_builder->DeletePass("constant_folding_pass"); auto warmup_data = std::make_shared>(data); cfg->mkldnn_quantizer_config()->SetEnabledOpTypes( - {"elementwise_add", "matmul", "matmul_v2", "fused_matmul"}); + {"matmul", "matmul_v2", "fused_matmul"}); // Exclusion of several matmules that should not be quantized due to the fact // that they reduce the accuracy of the model cfg->mkldnn_quantizer_config()->SetExcludedOpIds( diff --git a/test/cpp/inference/api/analyzer_vit_ocr_tester.cc b/test/cpp/inference/api/analyzer_vit_ocr_tester.cc index 3582fc22c9cc2..43457a05b4b48 100644 --- a/test/cpp/inference/api/analyzer_vit_ocr_tester.cc +++ b/test/cpp/inference/api/analyzer_vit_ocr_tester.cc @@ -104,7 +104,6 @@ TEST(Analyzer_vit_ocr, fuse_status) { CHECK_EQ(fuse_statis.at("fc_mkldnn_pass"), 33); CHECK_EQ(fuse_statis.at("fused_conv2d_gelu_mkldnn_fuse_pass"), 2); - CHECK_EQ(fuse_statis.at("fc_elementwise_add_mkldnn_fuse"), 16); } #endif diff --git a/test/ir/inference/CMakeLists.txt b/test/ir/inference/CMakeLists.txt index 1932ad37490ff..1e7fcfa6a1005 100755 --- a/test/ir/inference/CMakeLists.txt +++ b/test/ir/inference/CMakeLists.txt @@ -301,8 +301,6 @@ if(WITH_GPU AND TENSORRT_FOUND) set_tests_properties(test_mkldnn_conv_mish_fuse_pass PROPERTIES TIMEOUT 300) set_tests_properties(test_onednn_fc_activation_fuse_pass PROPERTIES TIMEOUT 300) - set_tests_properties(test_onednn_fc_elementwise_add_fuse_pass - PROPERTIES TIMEOUT 120) set_tests_properties(test_mkldnn_conv_affine_channel_fuse_pass PROPERTIES TIMEOUT 60) endif() diff --git a/test/ir/inference/test_onednn_fc_elementwise_add_fuse_pass.py b/test/ir/inference/test_onednn_fc_elementwise_add_fuse_pass.py deleted file mode 100644 index 9c27452aff5c4..0000000000000 --- a/test/ir/inference/test_onednn_fc_elementwise_add_fuse_pass.py +++ /dev/null @@ -1,104 +0,0 @@ -# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import unittest -from functools import partial - -import hypothesis.strategies as st -import numpy as np -from auto_scan_test import PassAutoScanTest -from program_config import OpConfig, ProgramConfig, TensorConfig - - -class TestFCElementwiseAddOneDNNFusePass(PassAutoScanTest): - def sample_program_config(self, draw): - axis = draw(st.sampled_from([-1, 0, 1])) - fc_as_x = draw(st.booleans()) - fc_in = draw(st.sampled_from([32, 64])) - fc_wei = draw(st.sampled_from([32, 64])) - - def generate_data(shape): - return np.random.random(shape).astype(np.float32) - - relu_op = OpConfig( - type='relu', - inputs={'X': ['input_data']}, - outputs={'Out': ['relu_out']}, - attrs={}, - ) - - fc_op = OpConfig( - type='fc', - inputs={ - 'Input': ['relu_out'], - 'W': ['fc_weight'], - 'Bias': ['fc_bias'], - }, - outputs={'Out': ['fc_output']}, - attrs={ - 'use_mkldnn': True, - 'padding_weights': False, - 'activation_type': '', - 'in_num_col_dims': 1, - }, - ) - - if fc_as_x: - inputs = {'X': ['fc_output'], 'Y': ['input_data']} - else: - inputs = {'X': ['input_data'], 'Y': ['fc_output']} - - elt_add_op = OpConfig( - type='elementwise_add', - inputs=inputs, - outputs={'Out': ['elementwise_output']}, - attrs={'axis': axis, 'use_mkldnn': True}, - ) - - model_net = [relu_op, fc_op, elt_add_op] - - program_config = ProgramConfig( - ops=model_net, - weights={ - 'fc_weight': TensorConfig( - data_gen=partial(generate_data, [fc_wei, fc_wei]) - ), - 'fc_bias': TensorConfig( - data_gen=partial(generate_data, [fc_wei]) - ), - }, - inputs={ - 'input_data': TensorConfig( - data_gen=partial(generate_data, [fc_in, fc_wei]) - ) - }, - outputs=['elementwise_output'], - ) - - return program_config - - def sample_predictor_configs(self, program_config): - config = self.create_inference_config( - use_mkldnn=True, passes=['fc_elementwise_add_mkldnn_fuse_pass'] - ) - yield config, ['relu', 'fc'], (1e-5, 1e-5) - - def test(self): - self.run_and_statis( - quant=False, passes=['fc_elementwise_add_mkldnn_fuse_pass'] - ) - - -if __name__ == '__main__': - unittest.main()