Skip to content

Commit

Permalink
Review comments applied
Browse files Browse the repository at this point in the history
  • Loading branch information
v-Golubev committed Nov 13, 2023
1 parent 0409f64 commit 9642a35
Show file tree
Hide file tree
Showing 2 changed files with 2 additions and 9 deletions.
8 changes: 0 additions & 8 deletions src/plugins/intel_cpu/src/graph_optimizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -371,18 +371,10 @@ void GraphOptimizer::FuseFCAndWeightsDecompression(Graph &graph) {
continue;

// Precision limitations
if (multiplyConstNode->getOriginalOutputPrecisionAtPort(0) != Precision::FP32)
continue;
if (withPowerStatic && powerStaticNode->getOriginalOutputPrecisionAtPort(0) != Precision::FP32)
continue;
if (supportedDataPrecisions.find(fcNode->getOriginalInputPrecisionAtPort(0)) == supportedDataPrecisions.end())
continue;
if (supportedWeightsPrecisions.find(weightsNode->getOriginalOutputPrecisionAtPort(0)) == supportedWeightsPrecisions.end())
continue;
// Subtract constant can have both FP32 precision or weights precision
if (withSubtract && subtractConstNode->getOriginalOutputPrecisionAtPort(0) != Precision::FP32 &&
subtractConstNode->getOriginalOutputPrecisionAtPort(0) != weightsNode->getOriginalOutputPrecisionAtPort(0))
continue;

// Shape limitations
const auto weightsShape = weightsNode->getOutputShapeAtPort(0);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,7 @@ void Transformations::PreLpt(const std::vector<ov::element::Type>& defaultPrecis
CPU_REGISTER_PASS_COMMON(decompression_handling_manager, ov::pass::InitNodeInfo);
CPU_REGISTER_PASS_COMMON(decompression_handling_manager, ov::pass::MarkShapeOfSubgraphs);
// We need to fuse Transpose to MatMul to have a simpler callback for the next transformation
CPU_REGISTER_PASS_COMMON(decompression_handling_manager, ov::pass::TransposeMatMul);
CPU_REGISTER_PASS_X64(decompression_handling_manager, ov::pass::TransposeMatMul);
ov::element::TypeVector decompression_precisions{ov::element::u8};
// We don't have BF16/FP16 FullyConnected kernels to work with 4bits compressed weights
// Convert node doesn't support 4bit precisions -> fallback on constant folding
Expand All @@ -254,6 +254,7 @@ void Transformations::PreLpt(const std::vector<ov::element::Type>& defaultPrecis
decompression_precisions.push_back(ov::element::i4);
decompression_precisions.push_back(ov::element::nf4);
}
// Ticket 124834: set fold_subtract_const to false when cpu_convert supports i4/u4/nf4 precisions
CPU_REGISTER_PASS_X64(decompression_handling_manager, ov::pass::MarkDequantizationSubgraph, decompression_precisions, true);
CPU_SET_CALLBACK_X64(decompression_handling_manager, [&](const_node_ptr &node) -> bool {
return !is_decompression_multiply(node);
Expand Down

0 comments on commit 9642a35

Please sign in to comment.