Skip to content

Commit

Permalink
onednn: remove fc_elementwise_add fusion (PaddlePaddle#55504)
Browse files Browse the repository at this point in the history
* onednn: remove fc+eltwiseadd fusion pass
* onednn: remove post-sum fusion in fc kernel
* onednn: tests: make unfused add run into f32
  • Loading branch information
xinyu-intel authored and wyf committed Aug 30, 2023
1 parent 5ffd59a commit 6e8cd2a
Show file tree
Hide file tree
Showing 10 changed files with 5 additions and 364 deletions.
1 change: 0 additions & 1 deletion paddle/fluid/framework/ir/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,6 @@ if(WITH_MKLDNN)
pass_library(conv_elementwise_add_mkldnn_fuse_pass inference DIR mkldnn)
pass_library(int8_scale_calculation_mkldnn_pass inference DIR mkldnn)
pass_library(params_quantization_mkldnn_pass inference DIR mkldnn)
pass_library(fc_elementwise_add_mkldnn_fuse_pass inference DIR mkldnn)
pass_library(scale_matmul_fuse_pass inference DIR mkldnn)
pass_library(cpu_bfloat16_placement_pass inference DIR mkldnn)
pass_library(cpu_bfloat16_pass inference DIR mkldnn)
Expand Down

This file was deleted.

This file was deleted.

7 changes: 1 addition & 6 deletions paddle/fluid/inference/api/paddle_pass_builder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -372,7 +372,6 @@ void CpuPassStrategy::EnableMKLDNN() {
// Disabled due to topology-dependent speed-up
"fc_mkldnn_pass",
"fc_act_mkldnn_fuse_pass",
"fc_elementwise_add_mkldnn_fuse_pass", //
"self_attention_fuse_pass", //
"batch_norm_act_fuse_pass", //
"softplus_activation_onednn_fuse_pass", //
Expand Down Expand Up @@ -407,7 +406,6 @@ void CpuPassStrategy::EnableMkldnnBfloat16() {
if (!use_mkldnn_bfloat16_) {
passes_.push_back("fc_mkldnn_pass");
passes_.push_back("fc_act_mkldnn_fuse_pass");
passes_.push_back("fc_elementwise_add_mkldnn_fuse_pass");

passes_.push_back("cpu_bfloat16_placement_pass");
passes_.push_back("cpu_bfloat16_pass");
Expand Down Expand Up @@ -463,7 +461,6 @@ void CpuPassStrategy::EnableMkldnnInt8() {
passes_.push_back("repeated_fc_relu_fuse_pass");
passes_.push_back("fc_mkldnn_pass");
passes_.push_back("fc_act_mkldnn_fuse_pass");
passes_.push_back("fc_elementwise_add_mkldnn_fuse_pass");
passes_.push_back("matmul_transpose_reshape_mkldnn_fuse_pass");
passes_.push_back("batch_norm_act_fuse_pass");
passes_.push_back("softplus_activation_onednn_fuse_pass");
Expand Down Expand Up @@ -498,9 +495,7 @@ void CpuPassStrategy::DisableMkldnnFcPasses() {

void CpuPassStrategy::EraseFcMkldnnPasses() {
std::vector<std::string> fc_passes_to_erase(
{"fc_mkldnn_pass",
"fc_act_mkldnn_fuse_pass",
"fc_elementwise_add_mkldnn_fuse_pass"});
{"fc_mkldnn_pass", "fc_act_mkldnn_fuse_pass"});
for (const auto &pass : fc_passes_to_erase) {
int idx = GetPassIndex(pass);
if (idx != -1) {
Expand Down
35 changes: 2 additions & 33 deletions paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,6 @@ GetDNNLScales(const ExecutionContext& ctx) {
auto scale_in_data = ctx.Attr<float>("Scale_in");
auto scale_out = ctx.Attr<float>("Scale_out");
auto scale_weights_data = ctx.Attr<std::vector<float>>("Scale_weights");
auto scale_in_eltwise_data = ctx.HasAttr("Scale_in_eltwise")
? ctx.Attr<float>("Scale_in_eltwise")
: 1.0f;

std::vector<float> dnnl_src_scales = {1.f / scale_in_data};
size_t count = scale_weights_data.size();
Expand All @@ -57,7 +54,7 @@ GetDNNLScales(const ExecutionContext& ctx) {
for (size_t i = 0; i < count; i++) {
dnnl_wei_scales[i] = 1.f / scale_weights_data[i];
}
std::vector<float> dnnl_psum_scales = {1.f / scale_in_eltwise_data};
std::vector<float> dnnl_psum_scales = {1.f};
std::vector<float> dnnl_dst_scales = {1.f / scale_out};

return std::make_tuple(
Expand Down Expand Up @@ -127,7 +124,6 @@ class FCMKLDNNHandler
dnnl::primitive_attr attributes;
dnnl::post_ops post_operations;

float sum_scale = 1.0f;
float activation_scale = 1.0f;
if (phi::funcs::is_int8<T_w>()) {
std::vector<float> src_scales, wei_scales, psum_scales, dst_scales;
Expand Down Expand Up @@ -168,13 +164,6 @@ class FCMKLDNNHandler
dst_scales.data(),
dst_scales.size() * sizeof(float));
}

sum_scale = psum_scales[0];
}

if (ctx.HasAttr("fuse_residual_connection") &&
ctx.Attr<bool>("fuse_residual_connection")) {
post_operations.append_sum(sum_scale);
}

// ReLU from "fc_fuse_pass"
Expand Down Expand Up @@ -332,22 +321,6 @@ class FCMKLDNNHandler

std::shared_ptr<dnnl::memory> AcquireCustomDstMemory(
const ExecutionContext& ctx, phi::DenseTensor* out) {
if (ctx.HasAttr("fuse_residual_connection") &&
ctx.Attr<bool>("fuse_residual_connection")) {
auto* residual_param = ctx.Input<phi::DenseTensor>("ResidualData");

PADDLE_ENFORCE_EQ(
out->dims(),
residual_param->dims(),
phi::errors::InvalidArgument(
"Output and elementwise parameter need to have the "
"same dimension sizes, but got output's dimension = %d"
" and residual param's dimension =%d .",
out->dims().size(),
residual_param->dims().size()));

out->ShareDataWith(*residual_param);
}
return this->template AcquireDstMemory<T_out>(out);
} // namespace operators

Expand Down Expand Up @@ -458,11 +431,7 @@ class FCMKLDNNKernel : public framework::OpKernel<T_in> {

dst_memory_p =
std::make_shared<dnnl::memory>(inner_product_cache->dst_mem);
if (ctx.HasAttr("fuse_residual_connection") &&
ctx.Attr<bool>("fuse_residual_connection")) {
auto* residual_param = ctx.Input<phi::DenseTensor>("ResidualData");
out->ShareDataWith(*residual_param);
}

auto out_ptr = out->mutable_data<T_out>(
ctx.GetPlace(), dst_memory_p->get_desc().get_size());
dst_memory_p->set_data_handle(out_ptr);
Expand Down
4 changes: 1 addition & 3 deletions test/cpp/inference/api/analysis_predictor_tester.cc
Original file line number Diff line number Diff line change
Expand Up @@ -370,9 +370,7 @@ TEST(AnalysisPredictor, mkldnn_fc_passes_cpu_pass_strategy) {
CpuPassStrategy cpuPassStrategy;
cpuPassStrategy.EnableMKLDNN();
const std::vector<std::string> fc_passes_to_erase(
{"fc_mkldnn_pass",
"fc_act_mkldnn_fuse_pass",
"fc_elementwise_add_mkldnn_fuse_pass"});
{"fc_mkldnn_pass", "fc_act_mkldnn_fuse_pass"});
for (const auto& pass : fc_passes_to_erase) {
ASSERT_NE(cpuPassStrategy.GetPassIndex(pass), (size_t)-1);
}
Expand Down
2 changes: 1 addition & 1 deletion test/cpp/inference/api/analyzer_ernie_int8_tester.cc
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ void SetInt8Config(AnalysisConfig *cfg,
pass_builder->DeletePass("constant_folding_pass");
auto warmup_data = std::make_shared<std::vector<PaddleTensor>>(data);
cfg->mkldnn_quantizer_config()->SetEnabledOpTypes(
{"elementwise_add", "matmul", "matmul_v2", "fused_matmul"});
{"matmul", "matmul_v2", "fused_matmul"});
// Exclusion of several matmules that should not be quantized due to the fact
// that they reduce the accuracy of the model
cfg->mkldnn_quantizer_config()->SetExcludedOpIds(
Expand Down
1 change: 0 additions & 1 deletion test/cpp/inference/api/analyzer_vit_ocr_tester.cc
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,6 @@ TEST(Analyzer_vit_ocr, fuse_status) {

CHECK_EQ(fuse_statis.at("fc_mkldnn_pass"), 33);
CHECK_EQ(fuse_statis.at("fused_conv2d_gelu_mkldnn_fuse_pass"), 2);
CHECK_EQ(fuse_statis.at("fc_elementwise_add_mkldnn_fuse"), 16);
}
#endif

Expand Down
2 changes: 0 additions & 2 deletions test/ir/inference/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -301,8 +301,6 @@ if(WITH_GPU AND TENSORRT_FOUND)
set_tests_properties(test_mkldnn_conv_mish_fuse_pass PROPERTIES TIMEOUT 300)
set_tests_properties(test_onednn_fc_activation_fuse_pass PROPERTIES TIMEOUT
300)
set_tests_properties(test_onednn_fc_elementwise_add_fuse_pass
PROPERTIES TIMEOUT 120)
set_tests_properties(test_mkldnn_conv_affine_channel_fuse_pass
PROPERTIES TIMEOUT 60)
endif()
Expand Down
Loading

0 comments on commit 6e8cd2a

Please sign in to comment.