Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[OneDNN] Enable oneDNN by default in Paddle Inference API #58560

Merged
merged 28 commits into from
Nov 16, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
cb557dd
pr for oneDNN default
zhanglirong1999 Nov 1, 2023
578edf7
first commit for default
zhanglirong1999 Nov 2, 2023
6e6b485
add disable MKLDNN in CpuPassStrategy
zhanglirong1999 Nov 6, 2023
8f619bf
Add condition for compile without MKLDNN
zhanglirong1999 Nov 6, 2023
766aba6
fix CI error
zhanglirong1999 Nov 6, 2023
4f76140
for format
zhanglirong1999 Nov 6, 2023
8ee1d00
filter win vs2017
zhanglirong1999 Nov 6, 2023
888279a
disable mldnn in faster tokenizer
zhanglirong1999 Nov 7, 2023
f350967
skip or disable mkldnn when GPU,XPU, so on
zhanglirong1999 Nov 7, 2023
979fd50
fix some ut error
zhanglirong1999 Nov 8, 2023
9b81b1d
disable FLGA_mkldnn when NOT CPU place
zhanglirong1999 Nov 8, 2023
04c11d6
fix use_mkldnn for CI ut
zhanglirong1999 Nov 9, 2023
ad1f2c3
find why ut failed
zhanglirong1999 Nov 9, 2023
18e7e35
clear pass if not use_mkl
zhanglirong1999 Nov 9, 2023
75b0b6b
clear pass and add original cpu pass
zhanglirong1999 Nov 10, 2023
16e9e46
disable GLAGS_use_mkldnn
zhanglirong1999 Nov 10, 2023
af8aaf8
add ifdef PADDLE_WITH_DNNL
zhanglirong1999 Nov 10, 2023
c14c96f
fix test_conv2d_transpose_op and test_pad_op bug
zhanglirong1999 Nov 12, 2023
6fa2a81
enable mkldnn in FLAGS
zhanglirong1999 Nov 13, 2023
3172eea
disable mkldnn in three case
zhanglirong1999 Nov 13, 2023
24a5a4f
disable flags
zhanglirong1999 Nov 13, 2023
be74b27
change disable mkldnn position
zhanglirong1999 Nov 13, 2023
5bba802
set FLAGS_use_mkldnn in static.exe
zhanglirong1999 Nov 13, 2023
a394087
add PADDLE_WITH_DNNL
zhanglirong1999 Nov 14, 2023
422ff79
skip training defunct OneDNN
zhanglirong1999 Nov 14, 2023
8249efd
delete useless sentence
zhanglirong1999 Nov 14, 2023
9bc9260
unify passes to avoid duplicate
zhanglirong1999 Nov 14, 2023
99f5ac9
fix code style
zhanglirong1999 Nov 14, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 27 additions & 6 deletions paddle/fluid/inference/api/analysis_config.cc
Original file line number Diff line number Diff line change
Expand Up @@ -658,6 +658,11 @@ void AnalysisConfig::EnableMKLDNN() {
Update();
}

void AnalysisConfig::DisableMKLDNN() {
use_mkldnn_ = false;
Update();
}

void AnalysisConfig::SetMkldnnCacheCapacity(int capacity) {
#ifdef PADDLE_WITH_DNNL
mkldnn_cache_capacity_ = capacity;
Expand Down Expand Up @@ -933,6 +938,24 @@ void AnalysisConfig::Update() {
}
}

#ifdef PADDLE_WITH_DNNL
// Since EnableMKLDNN is default, the pass_builder has created in the first
// time.
// Case1: User manually disable mkldnn after pass_builder
// create.(config.disable_mkldnn())
// Case2: User device is gpu/ipu/xpu, use
// EnableXpu(), EnableCUDNN(), PassStrategy has been reset in the above code
// block
// Case3: pass_builder_ has been created and belongs to
// GpuPassStrategy(or IpuPassStrategy), neither enable mkldnn and
// disable mkldnn will be executed
if (!use_gpu() && !use_xpu() && !use_ipu() && !use_custom_device() &&
!use_mkldnn_) {
// User manually disable mkldnn
pass_builder()->DisableMKLDNN();
}
#endif

if (use_tensorrt_) {
pass_builder()->ClearPasses();
for (const auto &pass : kTRTSubgraphPasses) {
Expand Down Expand Up @@ -976,15 +999,13 @@ void AnalysisConfig::Update() {
#endif
}

if (use_mkldnn_) {
if (!use_gpu() && !use_xpu() && !use_ipu()) {
if (use_mkldnn_ && enable_ir_optim_) {
#ifdef PADDLE_WITH_DNNL
if (!enable_ir_optim_) {
LOG(ERROR)
<< "EnableMKLDNN() only works when IR optimization is enabled.";
} else {
// default enable mkldnn when device is cpu and enable_ir_optim
pass_builder()->EnableMKLDNN();
}
#endif
}
}

// Quantization passes must come after all other optimization passes
Expand Down
16 changes: 15 additions & 1 deletion paddle/fluid/inference/api/paddle_analysis_config.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,13 +33,13 @@
#include <vector>

#include "paddle_infer_declare.h" // NOLINT

/*! \file */
// Here we include some header files with relative paths, for that in deploy,
// the abstract path of this header file will be changed.
#include "paddle_api.h" // NOLINT
#include "paddle_pass_builder.h" // NOLINT
#ifdef PADDLE_WITH_DNNL
#include "paddle/phi/backends/cpu/cpu_info.h"
#include "paddle_mkldnn_quantizer_config.h" // NOLINT
#endif

Expand Down Expand Up @@ -929,6 +929,13 @@ struct PD_INFER_DECL AnalysisConfig {
///
///
void EnableMKLDNN();

///
/// \brief Turn down MKLDNN.
///
///
void DisableMKLDNN();

///
/// \brief Set the cache capacity of different input shapes for MKLDNN.
/// Default value 0 means not caching any shape.
Expand Down Expand Up @@ -1294,7 +1301,14 @@ struct PD_INFER_DECL AnalysisConfig {

std::unordered_set<std::string> trt_ops_run_float_;

#ifdef PADDLE_WITH_DNNL
bool use_mkldnn_{
phi::backends::cpu::MayIUse(phi::backends::cpu::cpu_isa_t::avx2) ? true
: false};
#else
bool use_mkldnn_{false};
#endif

std::unordered_set<std::string> mkldnn_enabled_op_types_;

bool model_from_memory_{false};
Expand Down
69 changes: 39 additions & 30 deletions paddle/fluid/inference/api/paddle_pass_builder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,39 @@ const std::vector<std::string> kCINNCompilerPasses{
"build_cinn_pass",
};

const std::vector<std::string> CpuBasicPasses{
"simplify_with_basic_ops_pass", //
"layer_norm_fuse_pass",
"attention_lstm_fuse_pass", //
"seqconv_eltadd_relu_fuse_pass", //
// "seqpool_concat_fuse_pass", //
"seqpool_cvm_concat_fuse_pass", //
// "embedding_fc_lstm_fuse_pass", //
// TODO(wilber): fix correctness problem.
// "fc_lstm_fuse_pass", //
"mul_lstm_fuse_pass", //
"fc_gru_fuse_pass", //
"mul_gru_fuse_pass", //
"seq_concat_fc_fuse_pass", //
"gpu_cpu_squeeze2_matmul_fuse_pass", //
"gpu_cpu_reshape2_matmul_fuse_pass", //
"gpu_cpu_flatten2_matmul_fuse_pass", //
"matmul_v2_scale_fuse_pass", //
"gpu_cpu_map_matmul_v2_to_mul_pass", //
"gpu_cpu_map_matmul_v2_to_matmul_pass", //
"matmul_scale_fuse_pass", //
"gpu_cpu_map_matmul_to_mul_pass", //
"fc_fuse_pass", //
"repeated_fc_relu_fuse_pass", //
"squared_mat_sub_fuse_pass", //
"conv_bn_fuse_pass", //
"conv_eltwiseadd_bn_fuse_pass", //
"conv_transpose_bn_fuse_pass", //
"conv_transpose_eltwiseadd_bn_fuse_pass", //
"is_test_pass", //
"constant_folding_pass",
};

GpuPassStrategy::GpuPassStrategy() : PassStrategy({}) {
passes_.assign({
"map_op_to_another_pass", //
Expand Down Expand Up @@ -309,36 +342,7 @@ void GpuPassStrategy::DisableMkldnnFcPasses() {
CpuPassStrategy::CpuPassStrategy() : PassStrategy({}) {
// NOTE the large fusions should be located in the front, so that they will
// not be damaged by smaller ones.
passes_.assign({"simplify_with_basic_ops_pass", //
"layer_norm_fuse_pass",
"attention_lstm_fuse_pass", //
"seqconv_eltadd_relu_fuse_pass", //
// "seqpool_concat_fuse_pass", //
"seqpool_cvm_concat_fuse_pass", //
// "embedding_fc_lstm_fuse_pass", //
// TODO(wilber): fix correctness problem.
// "fc_lstm_fuse_pass", //
"mul_lstm_fuse_pass", //
"fc_gru_fuse_pass", //
"mul_gru_fuse_pass", //
"seq_concat_fc_fuse_pass", //
"gpu_cpu_squeeze2_matmul_fuse_pass", //
"gpu_cpu_reshape2_matmul_fuse_pass", //
"gpu_cpu_flatten2_matmul_fuse_pass", //
"matmul_v2_scale_fuse_pass", //
"gpu_cpu_map_matmul_v2_to_mul_pass", //
"gpu_cpu_map_matmul_v2_to_matmul_pass", //
"matmul_scale_fuse_pass", //
"gpu_cpu_map_matmul_to_mul_pass", //
"fc_fuse_pass", //
"repeated_fc_relu_fuse_pass", //
"squared_mat_sub_fuse_pass", //
"conv_bn_fuse_pass", //
"conv_eltwiseadd_bn_fuse_pass", //
"conv_transpose_bn_fuse_pass", //
"conv_transpose_eltwiseadd_bn_fuse_pass", //
"is_test_pass", //
"constant_folding_pass"});
passes_.assign(CpuBasicPasses.begin(), CpuBasicPasses.end());

use_gpu_ = false;
}
Expand Down Expand Up @@ -391,6 +395,11 @@ void CpuPassStrategy::EnableMKLDNN() {
#endif
}

void CpuPassStrategy::DisableMKLDNN() {
ClearPasses();
passes_.assign(CpuBasicPasses.begin(), CpuBasicPasses.end());
}

void CpuPassStrategy::EnableMkldnnQuantizer() {
#ifdef PADDLE_WITH_DNNL
if (!use_mkldnn_quantizer_) {
Expand Down
6 changes: 6 additions & 0 deletions paddle/fluid/inference/api/paddle_pass_builder.h
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,9 @@ class PD_INFER_DECL PassStrategy : public PaddlePassBuilder {
/// still be some CPU kernels running in GPU mode.
virtual void EnableMKLDNN() {}

/// \brief Disable the use of MKLDNN.
virtual void DisableMKLDNN() {}

/// \brief Enable MKLDNN quantize optimization.
virtual void EnableMkldnnQuantizer() {}

Expand Down Expand Up @@ -212,6 +215,9 @@ class PD_INFER_DECL CpuPassStrategy : public PassStrategy {
/// \brief Enable the use of MKLDNN.
void EnableMKLDNN() override;

/// \brief Disable the use of MKLDNN.
void DisableMKLDNN() override;

/// \brief Enable MKLDNN quantize optimization.
void EnableMkldnnQuantizer() override;

Expand Down
1 change: 1 addition & 0 deletions paddle/fluid/pybind/inference_api.cc
Original file line number Diff line number Diff line change
Expand Up @@ -963,6 +963,7 @@ void BindAnalysisConfig(py::module *m) {
&AnalysisConfig::SwitchIrDebug,
py::arg("x") = true)
.def("enable_mkldnn", &AnalysisConfig::EnableMKLDNN)
.def("disable_mkldnn", &AnalysisConfig::DisableMKLDNN)
.def("mkldnn_enabled", &AnalysisConfig::mkldnn_enabled)
.def("set_cpu_math_library_num_threads",
&AnalysisConfig::SetCpuMathLibraryNumThreads)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ void SetConfig(AnalysisConfig *cfg) {
cfg->SwitchIrOptim();
cfg->SwitchSpecifyInputNames();
cfg->SetCpuMathLibraryNumThreads(FLAGS_num_threads);
if (FLAGS_enable_mkldnn) cfg->EnableMKLDNN();
if (!FLAGS_enable_mkldnn) cfg->DisableMKLDNN();
}

TEST(Analyzer_bfloat16_image_classification, bfloat16) {
Expand Down
4 changes: 2 additions & 2 deletions test/cpp/inference/api/analyzer_transformer_compare_tester.cc
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@ namespace transformer_tester {
void compare(bool use_mkldnn = false) {
AnalysisConfig cfg;
SetConfig(&cfg);
if (use_mkldnn) {
cfg.EnableMKLDNN();
if (!use_mkldnn) {
cfg.DisableMKLDNN();
}

std::vector<std::vector<PaddleTensor>> input_slots_all;
Expand Down
4 changes: 2 additions & 2 deletions test/ir/inference/auto_scan_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -237,8 +237,8 @@ def create_inference_config(
config.switch_ir_optim(ir_optim)
if use_gpu:
config.enable_use_gpu(100, 0)
if use_mkldnn:
config.enable_mkldnn()
if not use_mkldnn:
config.disable_mkldnn()
if use_xpu:
config.enable_xpu()
if passes is not None:
Expand Down
1 change: 1 addition & 0 deletions test/ir/inference/inference_pass_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,7 @@ def _get_analysis_config(
self.path + ".pdmodel", self.path + ".pdiparams"
)
config.disable_gpu()
config.disable_mkldnn()
config.switch_specify_input_names(True)
config.switch_ir_optim(True)
config.switch_use_feed_fetch_ops(False)
Expand Down
1 change: 1 addition & 0 deletions test/ir/inference/quant_dequant_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,7 @@ def _get_analysis_config(
self.path + ".pdmodel", self.path + ".pdiparams"
)
config.disable_gpu()
config.disable_mkldnn()
config.switch_specify_input_names(True)
config.switch_ir_optim(True)
config.switch_use_feed_fetch_ops(False)
Expand Down
3 changes: 1 addition & 2 deletions test/ir/inference/test_conv_act_onednn_fuse_pass.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,7 @@

class TestConvActOneDNNFusePass(PassAutoScanTest):
def sample_predictor_configs(self, program_config):
config = self.create_inference_config(use_gpu=False)
config.enable_mkldnn()
config = self.create_inference_config(use_gpu=False, use_mkldnn=True)
yield config, ['fused_conv2d'], (1e-4, 1e-5)

def is_program_valid(self, prog_config):
Expand Down
3 changes: 1 addition & 2 deletions test/ir/inference/test_conv_bn_fuse_pass.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,8 +159,7 @@ def generate_bn_Var():
def sample_predictor_configs(self, program_config):
# for mkldnn
if program_config.ops[0].attrs['use_mkldnn']:
config = self.create_inference_config()
config.enable_mkldnn()
config = self.create_inference_config(use_mkldnn=True)
yield config, ['fused_conv2d'], (1e-5, 1e-5)
else:
config = self.create_inference_config()
Expand Down
4 changes: 2 additions & 2 deletions test/ir/inference/test_conv_transpose_bn_fuse_pass.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,12 +194,12 @@ def generate_batch_norm_Variance():

def sample_predictor_configs(self, program_config):
# for mkldnn
config = self.create_inference_config()
if program_config.ops[0].attrs['use_mkldnn']:
config.enable_mkldnn()
config = self.create_inference_config(use_mkldnn=True)
yield config, ['conv2d_transpose'], (1e-5, 1e-5)
# for cpu
else:
config = self.create_inference_config()
yield config, ['conv2d_transpose', 'elementwise_add'], (1e-5, 1e-5)

def is_program_valid(self, program_config: ProgramConfig) -> bool:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -220,12 +220,12 @@ def generate_batch_norm_Variance():

def sample_predictor_configs(self, program_config):
# for mkldnn
config = self.create_inference_config()
if program_config.ops[2].attrs['use_mkldnn']:
config.enable_mkldnn()
config = self.create_inference_config(use_mkldnn=True)
yield config, ['conv2d_transpose', 'elementwise_add'], (1e-5, 1e-5)
# cpu
else:
config = self.create_inference_config()
yield config, ['conv2d_transpose', 'elementwise_add'], (1e-5, 1e-5)

def is_program_valid(self, program_config: ProgramConfig) -> bool:
Expand Down
3 changes: 1 addition & 2 deletions test/ir/inference/test_onednn_conv_bias_fuse_pass.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,7 @@

class TestConvBiasOneDNNFusePass(PassAutoScanTest):
def sample_predictor_configs(self, program_config):
config = self.create_inference_config(use_gpu=False)
config.enable_mkldnn()
config = self.create_inference_config(use_gpu=False, use_mkldnn=True)
yield config, ['fused_conv2d'], (1e-4, 1e-5)

def is_program_valid(self, prog_config):
Expand Down
3 changes: 1 addition & 2 deletions test/ir/inference/test_onednn_conv_bn_fuse_pass.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,8 +136,7 @@ def generate_data(shape):
return program_config

def sample_predictor_configs(self, program_config):
config = self.create_inference_config()
config.enable_mkldnn()
config = self.create_inference_config(use_mkldnn=True)
yield config, ['fused_conv2d'], (1e-5, 1e-5)

def test(self):
Expand Down
5 changes: 5 additions & 0 deletions test/legacy_test/op_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -1971,6 +1971,8 @@ def check_output_with_place(
):
core._set_prim_all_enabled(False)
core.set_prim_eager_enabled(False)
if not self.is_mkldnn_op():
set_flags({"FLAGS_use_mkldnn": False})

if hasattr(self, "use_custom_device") and self.use_custom_device:
check_dygraph = False
Expand Down Expand Up @@ -2917,6 +2919,9 @@ def check_grad_with_place(
if hasattr(self, "use_custom_device") and self.use_custom_device:
check_dygraph = False

if not self.is_mkldnn_op():
set_flags({"FLAGS_use_mkldnn": False})

core._set_prim_all_enabled(False)
core.set_prim_eager_enabled(False)
if check_prim:
Expand Down
1 change: 1 addition & 0 deletions test/legacy_test/test_attribute_var.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ def infer_prog(self):
config = paddle_infer.Config(
self.save_path + '.pdmodel', self.save_path + '.pdiparams'
)
config.disable_mkldnn()
predictor = paddle_infer.create_predictor(config)
input_names = predictor.get_input_names()
for i, shape in enumerate(self.shapes):
Expand Down
1 change: 1 addition & 0 deletions test/tokenizer/test_faster_tokenizer_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,7 @@ def __init__(self, model_dir):

# fast_tokenizer op only support cpu.
config.disable_gpu()
config.disable_mkldnn()
config.set_cpu_math_library_num_threads(10)

config.switch_use_feed_fetch_ops(False)
Expand Down