Skip to content

Commit

Permalink
[CPU] Enabled FP16 Compressed FC on models with PagedAttention (openv…
Browse files Browse the repository at this point in the history
  • Loading branch information
dmitry-gorokhov authored Oct 14, 2024
1 parent c714284 commit 7250c1e
Showing 1 changed file with 4 additions and 3 deletions.
7 changes: 4 additions & 3 deletions src/plugins/intel_cpu/src/plugin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include "utils/precision_support.h"
#include "utils/serialize.hpp"
#include "weights_cache.hpp"
#include "openvino/op/paged_attention.hpp"

#if defined(__linux__)
# include <signal.h>
Expand Down Expand Up @@ -197,9 +198,9 @@ static Config::ModelType getModelType(const std::shared_ptr<const Model>& model)
if (op::util::has_op_with_type<op::v1::Convolution>(model) ||
op::util::has_op_with_type<op::v1::ConvolutionBackpropData>(model))
return Config::ModelType::CNN;
if (op::util::has_op_with_type<op::v13::ScaledDotProductAttention>(model) &&
model->get_variables().size() > 0)

if ((op::util::has_op_with_type<op::v13::ScaledDotProductAttention>(model) && model->get_variables().size() > 0) ||
op::util::has_op_with_type<ov::op::PagedAttentionExtension>(model))
return Config::ModelType::LLM;

return Config::ModelType::Unknown;
Expand Down

0 comments on commit 7250c1e

Please sign in to comment.