Skip to content

Commit

Permalink
[CPU] Change precision configuration by ov::pass::ConvertPrecision (o…
Browse files Browse the repository at this point in the history
  • Loading branch information
alvoron authored and allnes committed Nov 23, 2023
1 parent d8dcc81 commit cd9a2e1
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 14 deletions.
10 changes: 8 additions & 2 deletions src/plugins/intel_cpu/src/config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -255,10 +255,16 @@ void Config::readProperties(const std::map<std::string, std::string> &prop, cons
// when both execution_mode and inference_precision are specified
if (!inferencePrecisionSetExplicitly) {
if (executionMode == ov::hint::ExecutionMode::PERFORMANCE) {
inferencePrecision = ov::element::f32;
#if defined(OV_CPU_ARM_ENABLE_FP16)
//fp16 precision is used as default precision on ARM for non-convolution networks
//fp16 ACL convolution is slower than fp32
if (modelType != ModelType::CNN)
inferencePrecision = ov::element::f16;
#else
if (mayiuse(avx512_core_bf16))
inferencePrecision = ov::element::bf16;
else
inferencePrecision = ov::element::f32;
#endif
} else {
inferencePrecision = ov::element::f32;
}
Expand Down
5 changes: 4 additions & 1 deletion src/plugins/intel_cpu/src/graph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1709,7 +1709,10 @@ void Graph::EnforceInferencePrecision() {

if (inferPrec == Precision::FP32)
return; // nothing to do, only precision reduction is currently allowed

#if defined(OV_CPU_ARM_ENABLE_FP16)
if (inferPrec == Precision::FP16)
return; // precision of configured by ov::pass::ConvertPrecision
#endif
std::function<void(const NodePtr&, std::unordered_set<NodePtr>& skipNodes)> searchForNodesToSkip;
searchForNodesToSkip = [&](const NodePtr& node, std::unordered_set<NodePtr>& skipNodes) -> void {
for (size_t i = 0; i < node->getParentEdges().size(); i++) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -195,14 +195,6 @@ void Transformations::PreLpt(const std::vector<ov::element::Type>& defaultPrecis
CPU_REGISTER_PASS_COMMON(manager, ov::pass::InitNodeInfo);
CPU_REGISTER_PASS_COMMON(manager, ov::pass::MarkShapeOfSubgraphs);

CPU_REGISTER_PASS_COMMON(manager, ov::pass::KeepConstAndDecompression);
CPU_SET_CALLBACK_COMMON(manager,
[](const_node_ptr &node) -> bool {
const auto outputs = node->get_output_target_inputs(0);
return outputs.size() != 1 || !is_type<ov::op::v0::MatMul>(outputs.begin()->get_node());
},
ov::pass::KeepConstAndDecompression);

const bool useLpt = !defaultPrecisions.empty();
if (useLpt) {
CPU_REGISTER_PASS_COMMON(manager, ov::pass::MarkDequantizationSubgraph, defaultPrecisions);
Expand Down Expand Up @@ -243,28 +235,52 @@ void Transformations::PreLpt(const std::vector<ov::element::Type>& defaultPrecis
}, ov::pass::MarkDequantizationSubgraph);
}

auto get_convert_precisions = []() {
auto get_convert_precisions = [&]() {
precisions_map map = {
{ov::element::i64, ov::element::i32},
{ov::element::u64, ov::element::i32},
{ov::element::i16, ov::element::i32},
{ov::element::u16, ov::element::i32},
{ov::element::u32, ov::element::i32},
{ov::element::f64, ov::element::f32},
{ov::element::f16, ov::element::f32},
{ov::element::boolean, ov::element::u8},
{ov::element::i4, ov::element::i8},
{ov::element::u4, ov::element::u8}
};
// @todo should we always convert to f32 regardless of hardware support, as it is done for f16?
if (!dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core))
map.insert({ov::element::bf16, ov::element::f32});

#if defined(OV_CPU_ARM_ENABLE_FP16)
if (inferencePrecision != ov::element::f16)
map.insert({ov::element::f16, ov::element::f32});
#else
map.insert({ov::element::f16, ov::element::f32});
#endif
return map;
};
static const auto precisions = get_convert_precisions();
type_to_fuse_map type_to_fuse = {{ov::opset10::Convert::get_type_info_static(), fuse_type_to_convert}};

#if defined(OV_CPU_ARM_ENABLE_FP16)
if (inferencePrecision == ov::element::f16) {
precisions_map fp_convert_precision_map = {
{ov::element::f32, ov::element::f16}
};
type_to_fuse_map empty_fuse_map = {};
const bool keep_precision_sensitive_in_fp32 = true;
CPU_REGISTER_PASS_COMMON(manager, ov::pass::ConvertPrecision, fp_convert_precision_map,
empty_fuse_map,
keep_precision_sensitive_in_fp32);
}
#endif
CPU_REGISTER_PASS_COMMON(manager, ov::pass::KeepConstAndDecompression);
CPU_SET_CALLBACK_COMMON(manager,
[](const_node_ptr &node) -> bool {
const auto outputs = node->get_output_target_inputs(0);
return outputs.size() != 1 || !is_type<ov::op::v0::MatMul>(outputs.begin()->get_node());
},
ov::pass::KeepConstAndDecompression);

CPU_REGISTER_PASS_COMMON(manager, ov::pass::AUGRUCellFusion);
CPU_REGISTER_PASS_COMMON(manager, ov::pass::CommonOptimizations);
CPU_REGISTER_PASS_COMMON(manager, ov::pass::WrapInterpolateIntoTransposes);
Expand Down

0 comments on commit cd9a2e1

Please sign in to comment.