openvinotoolkit · mg-intel · Mar 19, 2024 · Feb 14, 2024 · Feb 15, 2024 · Feb 15, 2024
@@ -369,10 +369,7 @@ void Config::readProperties(const ov::AnyMap& prop, const ModelType modelType) {
         if (executionMode == ov::hint::ExecutionMode::PERFORMANCE) {
             inferencePrecision = ov::element::f32;
 #if defined(OV_CPU_ARM_ENABLE_FP16)
-            // fp16 precision is used as default precision on ARM for non-convolution networks
-            // fp16 ACL convolution is slower than fp32
-            if (modelType != ModelType::CNN)
-                inferencePrecision = ov::element::f16;
+            inferencePrecision = ov::element::f16;
 #else
             if (mayiuse(avx512_core_bf16))
                 inferencePrecision = ov::element::bf16;

@@ -71,6 +71,7 @@ class AutoBatching_Test : public OVPluginTestBase,
 
             if (target_device.find("CPU") != std::string::npos) {
                 config.insert(ov::num_streams(static_cast<int32_t>(num_streams)));
+                config.insert(ov::hint::inference_precision(ov::element::f32));
             }
             // minimize timeout to reduce test time
             config.insert(ov::auto_batch_timeout(1));

@@ -487,7 +487,8 @@ TEST_P(CoreThreadingTestsWithIter, smoke_CompileModel_Accuracy_SingleCore) {
             }
 
             auto getOutputBlob = [&](ov::Core& core) {
-                auto compiled_model = core.compile_model(model, target_device);
+                ov::AnyMap f32_precision_property = {{ov::hint::inference_precision.name(), ov::element::f32.to_string()}};
+                auto compiled_model = core.compile_model(model, target_device, f32_precision_property);
                 auto req = compiled_model.create_infer_request();
                 for (const auto& input : inputs) {
                     req.set_tensor(input.first, input.second);
@@ -530,7 +531,8 @@ TEST_P(CoreThreadingTestsWithIter, smoke_CompileModel_Accuracy_MultipleCores) {
             }
 
             auto getOutputBlob = [&](ov::Core& core) {
-                auto compiled_model = core.compile_model(model, target_device);
+                ov::AnyMap f32_precision_property = {{ov::hint::inference_precision.name(), ov::element::f32.to_string()}};
+                auto compiled_model = core.compile_model(model, target_device, f32_precision_property);
                 auto req = compiled_model.create_infer_request();
                 for (const auto& input : inputs) {
                     req.set_tensor(input.first, input.second);