[CPU] Enabled FP16 Compressed FC on models with PagedAttention (openv…

…inotoolkit#26279)
bbielawx · Oct 14, 2024 · 7250c1e · 7250c1e
1 parent c714284
commit 7250c1e
Showing 1 changed file with 4 additions and 3 deletions.
diff --git a/src/plugins/intel_cpu/src/plugin.cpp b/src/plugins/intel_cpu/src/plugin.cpp
@@ -19,6 +19,7 @@
 #include "utils/precision_support.h"
 #include "utils/serialize.hpp"
 #include "weights_cache.hpp"
+#include "openvino/op/paged_attention.hpp"
 
 #if defined(__linux__)
 #    include <signal.h>
@@ -197,9 +198,9 @@ static Config::ModelType getModelType(const std::shared_ptr<const Model>& model)
     if (op::util::has_op_with_type<op::v1::Convolution>(model) ||
         op::util::has_op_with_type<op::v1::ConvolutionBackpropData>(model))
         return Config::ModelType::CNN;
-    
-    if (op::util::has_op_with_type<op::v13::ScaledDotProductAttention>(model) &&
-        model->get_variables().size() > 0)
+
+    if ((op::util::has_op_with_type<op::v13::ScaledDotProductAttention>(model) && model->get_variables().size() > 0) ||
+         op::util::has_op_with_type<ov::op::PagedAttentionExtension>(model))
         return Config::ModelType::LLM;
 
     return Config::ModelType::Unknown;