From 475ddb9283abd8040f0104572c0c1ace733fb126 Mon Sep 17 00:00:00 2001 From: Aleksandr Voron Date: Mon, 16 Oct 2023 14:32:47 +0200 Subject: [PATCH] [CPU] Change precision configuration by ov::pass::ConvertPrecision (#19993) --- src/plugins/intel_cpu/src/config.cpp | 10 ++++- src/plugins/intel_cpu/src/graph.cpp | 5 ++- .../transformation_pipeline.cpp | 38 +++++++++++++------ 3 files changed, 39 insertions(+), 14 deletions(-) diff --git a/src/plugins/intel_cpu/src/config.cpp b/src/plugins/intel_cpu/src/config.cpp index 277bdb412563e7..90e65a15495719 100644 --- a/src/plugins/intel_cpu/src/config.cpp +++ b/src/plugins/intel_cpu/src/config.cpp @@ -255,10 +255,16 @@ void Config::readProperties(const std::map &prop, cons // when both execution_mode and inference_precision are specified if (!inferencePrecisionSetExplicitly) { if (executionMode == ov::hint::ExecutionMode::PERFORMANCE) { + inferencePrecision = ov::element::f32; +#if defined(OV_CPU_ARM_ENABLE_FP16) + //fp16 precision is used as default precision on ARM for non-convolution networks + //fp16 ACL convolution is slower than fp32 + if (modelType != ModelType::CNN) + inferencePrecision = ov::element::f16; +#else if (mayiuse(avx512_core_bf16)) inferencePrecision = ov::element::bf16; - else - inferencePrecision = ov::element::f32; +#endif } else { inferencePrecision = ov::element::f32; } diff --git a/src/plugins/intel_cpu/src/graph.cpp b/src/plugins/intel_cpu/src/graph.cpp index ed18c01c848af1..5275259886ed91 100644 --- a/src/plugins/intel_cpu/src/graph.cpp +++ b/src/plugins/intel_cpu/src/graph.cpp @@ -1709,7 +1709,10 @@ void Graph::EnforceInferencePrecision() { if (inferPrec == Precision::FP32) return; // nothing to do, only precision reduction is currently allowed - +#if defined(OV_CPU_ARM_ENABLE_FP16) + if (inferPrec == Precision::FP16) + return; // precision of configured by ov::pass::ConvertPrecision +#endif std::function& skipNodes)> searchForNodesToSkip; searchForNodesToSkip = [&](const NodePtr& node, std::unordered_set& skipNodes) -> void { for (size_t i = 0; i < node->getParentEdges().size(); i++) { diff --git a/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp b/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp index 28756ba21664e1..002123781b84df 100644 --- a/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp +++ b/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp @@ -195,14 +195,6 @@ void Transformations::PreLpt(const std::vector& defaultPrecis CPU_REGISTER_PASS_COMMON(manager, ov::pass::InitNodeInfo); CPU_REGISTER_PASS_COMMON(manager, ov::pass::MarkShapeOfSubgraphs); - CPU_REGISTER_PASS_COMMON(manager, ov::pass::KeepConstAndDecompression); - CPU_SET_CALLBACK_COMMON(manager, - [](const_node_ptr &node) -> bool { - const auto outputs = node->get_output_target_inputs(0); - return outputs.size() != 1 || !is_type(outputs.begin()->get_node()); - }, - ov::pass::KeepConstAndDecompression); - const bool useLpt = !defaultPrecisions.empty(); if (useLpt) { CPU_REGISTER_PASS_COMMON(manager, ov::pass::MarkDequantizationSubgraph, defaultPrecisions); @@ -243,7 +235,7 @@ void Transformations::PreLpt(const std::vector& defaultPrecis }, ov::pass::MarkDequantizationSubgraph); } - auto get_convert_precisions = []() { + auto get_convert_precisions = [&]() { precisions_map map = { {ov::element::i64, ov::element::i32}, {ov::element::u64, ov::element::i32}, @@ -251,7 +243,6 @@ void Transformations::PreLpt(const std::vector& defaultPrecis {ov::element::u16, ov::element::i32}, {ov::element::u32, ov::element::i32}, {ov::element::f64, ov::element::f32}, - {ov::element::f16, ov::element::f32}, {ov::element::boolean, ov::element::u8}, {ov::element::i4, ov::element::i8}, {ov::element::u4, ov::element::u8} @@ -259,12 +250,37 @@ void Transformations::PreLpt(const std::vector& defaultPrecis // @todo should we always convert to f32 regardless of hardware support, as it is done for f16? if (!dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core)) map.insert({ov::element::bf16, ov::element::f32}); - +#if defined(OV_CPU_ARM_ENABLE_FP16) + if (inferencePrecision != ov::element::f16) + map.insert({ov::element::f16, ov::element::f32}); +#else + map.insert({ov::element::f16, ov::element::f32}); +#endif return map; }; static const auto precisions = get_convert_precisions(); type_to_fuse_map type_to_fuse = {{ov::opset10::Convert::get_type_info_static(), fuse_type_to_convert}}; +#if defined(OV_CPU_ARM_ENABLE_FP16) + if (inferencePrecision == ov::element::f16) { + precisions_map fp_convert_precision_map = { + {ov::element::f32, ov::element::f16} + }; + type_to_fuse_map empty_fuse_map = {}; + const bool keep_precision_sensitive_in_fp32 = true; + CPU_REGISTER_PASS_COMMON(manager, ov::pass::ConvertPrecision, fp_convert_precision_map, + empty_fuse_map, + keep_precision_sensitive_in_fp32); + } +#endif + CPU_REGISTER_PASS_COMMON(manager, ov::pass::KeepConstAndDecompression); + CPU_SET_CALLBACK_COMMON(manager, + [](const_node_ptr &node) -> bool { + const auto outputs = node->get_output_target_inputs(0); + return outputs.size() != 1 || !is_type(outputs.begin()->get_node()); + }, + ov::pass::KeepConstAndDecompression); + CPU_REGISTER_PASS_COMMON(manager, ov::pass::AUGRUCellFusion); CPU_REGISTER_PASS_COMMON(manager, ov::pass::CommonOptimizations); CPU_REGISTER_PASS_COMMON(manager, ov::pass::WrapInterpolateIntoTransposes);