Skip to content

Commit

Permalink
[CPU] Weights decompression support for hybrid models (openvinotoolki…
Browse files Browse the repository at this point in the history
  • Loading branch information
v-Golubev authored and byungilm committed Nov 17, 2023
1 parent 0b66f75 commit c9fd6af
Show file tree
Hide file tree
Showing 8 changed files with 212 additions and 126 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ namespace low_precision {
/**
* @ingroup ie_transformation_common_api
* @brief FoldConvertTransformation evaluates Convert operation on Subtract constant subgraph.
* Important notice: this transformation ignores DisableConstantFolding runtime attribute.
*
* For more details about the transformation, refer to
* [FoldConvertTransformation](@ref openvino_docs_OV_UG_lpt_FoldConvertTransformation) page
Expand Down
35 changes: 25 additions & 10 deletions src/plugins/intel_cpu/src/graph_optimizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -325,16 +325,22 @@ void GraphOptimizer::FuseFCAndWeightsDecompression(Graph &graph) {

const auto mulParent = multiplyNode->getParentEdgesAtPort(0)[0]->getParent();
const bool withSubtract = mulParent->getAlgorithm() == Algorithm::EltwiseSubtract;
NodePtr subtractNode, subtractConstNode;
NodePtr subtractNode, subtractConvertNode, subtractConstNode;
if (withSubtract) {
subtractNode = mulParent;
if (!expectedNode(subtractNode, Type::Eltwise))
continue;
subtractConstNode = subtractNode->getParentEdgesAtPort(1)[0]->getParent();
auto subtractParent = subtractNode->getParentEdgesAtPort(1)[0]->getParent();
if (expectedNode(subtractParent, Type::Convert)) {
subtractConvertNode = subtractParent;
subtractParent = subtractConvertNode->getParentEdgesAtPort(0)[0]->getParent();
}
subtractConstNode = subtractParent;
if (!expectedNode(subtractConstNode, Type::Input))
continue;
}

const bool withSubtractConvert = subtractConvertNode != nullptr;
const bool withPowerStatic = mulParent->getAlgorithm() == Algorithm::EltwisePowerStatic;
NodePtr powerStaticNode;
if (withPowerStatic) {
Expand Down Expand Up @@ -364,12 +370,6 @@ void GraphOptimizer::FuseFCAndWeightsDecompression(Graph &graph) {
continue;

// Precision limitations
if (multiplyConstNode->getOriginalOutputPrecisionAtPort(0) != Precision::FP32)
continue;
if (withSubtract && subtractConstNode->getOriginalOutputPrecisionAtPort(0) != Precision::FP32)
continue;
if (withPowerStatic && powerStaticNode->getOriginalOutputPrecisionAtPort(0) != Precision::FP32)
continue;
if (supportedDataPrecisions.find(fcNode->getOriginalInputPrecisionAtPort(0)) == supportedDataPrecisions.end())
continue;
if (supportedWeightsPrecisions.find(weightsNode->getOriginalOutputPrecisionAtPort(0)) == supportedWeightsPrecisions.end())
Expand Down Expand Up @@ -403,9 +403,17 @@ void GraphOptimizer::FuseFCAndWeightsDecompression(Graph &graph) {
decompressionConstShape = withTranspose ? VectorDims{N, 1, O} : VectorDims{O, N, 1};
groupNum = N;
}
if (multiplyConstNode->getOutputShapeAtPort(0).getDims() != decompressionConstShape)

auto check_decompression_shape = [&decompressionConstShape](const VectorDims& shape_to_check) {
if (shape_to_check.size() > decompressionConstShape.size())
return false;
const auto comparison_start_pos = decompressionConstShape.size() - shape_to_check.size();
// in case of different ranks shapes are compared taking into account ranks numpy broadcasting
return std::equal(shape_to_check.begin(), shape_to_check.end(), decompressionConstShape.begin() + comparison_start_pos);
};
if (!check_decompression_shape(multiplyConstNode->getOutputShapeAtPort(0).getDims()))
continue;
if (withSubtract && subtractConstNode->getOutputShapeAtPort(0).getDims() != decompressionConstShape)
if (withSubtract && !check_decompression_shape(subtractConstNode->getOutputShapeAtPort(0).getDims()))
continue;

// HW specific shape limitations
Expand Down Expand Up @@ -460,6 +468,11 @@ void GraphOptimizer::FuseFCAndWeightsDecompression(Graph &graph) {
fcNode->addOriginalLayer(multiplyNode->getOriginalLayers());
fcNode->addOriginalLayer(convertNode->getOriginalLayers());

if (withSubtractConvert) {
fcNode->addOriginalLayer(subtractConvertNode->getOriginalLayers());
auto subtractConvertEdge = subtractConvertNode->getChildEdges()[0].lock();
graph.RemoveEdge(subtractConvertEdge);
}
if (withSubtract) {
fcNode->addOriginalLayer(subtractNode->getOriginalLayers());
auto subtractConstEdge = subtractConstNode->getChildEdges()[0].lock();
Expand All @@ -473,6 +486,8 @@ void GraphOptimizer::FuseFCAndWeightsDecompression(Graph &graph) {
graph.RemoveEdge(multiplyConstEdge);

graph.DropNode(convertNode);
if (withSubtractConvert)
graph.DropNode(subtractConvertNode);
if (withSubtract)
graph.DropNode(subtractNode);
if (withPowerStatic)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,21 +21,27 @@ ov::intel_cpu::MoveFCReshapeToWeights::MoveFCReshapeToWeights() {
MATCHER_SCOPE(MoveFCReshapeToWeights);
using namespace ov::pass::pattern;
auto weights_m = wrap_type<ov::op::v0::Constant>(consumers_count(1));
auto convert_m = wrap_type<ov::op::v0::Convert>({weights_m});
auto convert_m = wrap_type<ov::op::v0::Convert>({weights_m}, consumers_count(1));

auto one_consumer_rank_equals = [](const ov::Dimension& expected_rank) {
return [=](ov::Output<ov::Node> output) -> bool {
return consumers_count(1)(output) && rank_equals(expected_rank)(output);
};
};

auto sub_const_m = wrap_type<ov::op::v0::Constant>(consumers_count(1));
auto subtract_m = wrap_type<ov::op::v1::Subtract>({convert_m, sub_const_m});
auto subtract_wo_convert_m = wrap_type<ov::op::v1::Subtract>({convert_m, sub_const_m}, consumers_count(1));
auto sub_convert = wrap_type<ov::op::v0::Convert>({sub_const_m}, consumers_count(1));
auto subtract_w_convert_m = wrap_type<ov::op::v1::Subtract>({convert_m, sub_convert}, consumers_count(1));
auto subtract_m = std::make_shared<ov::pass::pattern::op::Or>(OutputVector{subtract_wo_convert_m, subtract_w_convert_m});

auto mul_const_m = wrap_type<ov::op::v0::Constant>(consumers_count(1));
auto mul_with_sub_m = wrap_type<ov::op::v1::Multiply>({subtract_m, mul_const_m}, rank_equals(3));
auto mul_no_sub_m = wrap_type<ov::op::v1::Multiply>({convert_m, mul_const_m}, rank_equals(3));
auto mul_with_sub_m = wrap_type<ov::op::v1::Multiply>({subtract_m, mul_const_m}, one_consumer_rank_equals(3));
auto mul_no_sub_m = wrap_type<ov::op::v1::Multiply>({convert_m, mul_const_m}, one_consumer_rank_equals(3));
auto mul_m = std::make_shared<ov::pass::pattern::op::Or>(OutputVector{mul_with_sub_m, mul_no_sub_m});

auto one_consumer_rank_2 = [](const ov::Output<ov::Node>& out) {
return consumers_count(1)(out) && rank_equals(2)(out);
};
auto reshape_const_m = wrap_type<ov::op::v0::Constant>(consumers_count(1));
auto reshape_m = wrap_type<ov::op::v1::Reshape>({mul_m, reshape_const_m}, one_consumer_rank_2);
auto reshape_m = wrap_type<ov::op::v1::Reshape>({mul_m, reshape_const_m}, one_consumer_rank_equals(2));

auto transpose_const_m = wrap_type<ov::op::v0::Constant>();
auto transpose_m = wrap_type<ov::op::v1::Transpose>({reshape_m, transpose_const_m});
Expand All @@ -58,21 +64,24 @@ ov::intel_cpu::MoveFCReshapeToWeights::MoveFCReshapeToWeights() {
const auto& fc_input_shape = fully_connected->get_input_shape(1);
const auto reshape = with_transpose ? weights_path->get_input_node_shared_ptr(0) : weights_path;

auto check_decompression_const = [&](const std::shared_ptr<ov::Node>& node) {
if (!ov::is_type<ov::op::v0::Constant>(node))
return false;
auto check_decompression_shape = [&](const std::shared_ptr<ov::Node>& node) {
ov::Shape expected_shape(3, 1);
const size_t out_channels_idx = with_transpose ? 2 : 1;
expected_shape[out_channels_idx] = fc_input_shape[0];
return node->get_output_shape(0) == expected_shape;
const auto& node_shape = node->get_output_shape(0);
if (node_shape.size() > expected_shape.size())
return false;

const auto comparison_start_pos = expected_shape.size() - node_shape.size();
return std::equal(node_shape.begin(), node_shape.end(), expected_shape.begin() + comparison_start_pos);
};

const auto mul = reshape->get_input_node_shared_ptr(0);
if (!check_decompression_const(mul->get_input_node_shared_ptr(1)))
if (!check_decompression_shape(mul->get_input_node_shared_ptr(1)))
return false;
const auto mul_parent = mul->get_input_node_shared_ptr(0);
const bool with_subtract = ov::is_type<ov::op::v1::Subtract>(mul_parent);
if (with_subtract && !check_decompression_const(mul_parent->get_input_node_shared_ptr(1)))
if (with_subtract && !check_decompression_shape(mul_parent->get_input_node_shared_ptr(1)))
return false;

const auto convert = with_subtract ? mul_parent->get_input_node_shared_ptr(0) : mul_parent;
Expand All @@ -83,22 +92,29 @@ ov::intel_cpu::MoveFCReshapeToWeights::MoveFCReshapeToWeights() {
if (weights->get_output_shape(0) != expected_weights_shape)
return false;

auto squeeze_constant = [](const std::shared_ptr<ov::Node>& node) {
auto squeeze_constant = [&](const std::shared_ptr<ov::Node>& node) {
const auto constant = ov::as_type_ptr<ov::op::v0::Constant>(node);
OPENVINO_ASSERT(constant, "squeeze_constant is called for non constant node");
auto shape = constant->get_shape();
shape.erase(shape.begin());
const auto new_constant = std::make_shared<ov::op::v0::Constant>(*constant, shape);
ov::replace_node(constant, new_constant);
ov::copy_runtime_info(constant, new_constant);
new_constant->set_friendly_name(constant->get_friendly_name());
if (shape.size() - fc_input_shape.size() == 1) {
shape.erase(shape.begin());
const auto new_constant = std::make_shared<ov::op::v0::Constant>(*constant, shape);
ov::replace_node(constant, new_constant);
ov::copy_runtime_info(constant, new_constant);
new_constant->set_friendly_name(constant->get_friendly_name());
}
};

// We can remove 3D->2D reshape if we manually reshape all constants in the weights subgraph
ov::replace_output_update_name(reshape->output(0), reshape->input_value(0));
squeeze_constant(mul->get_input_node_shared_ptr(1));
squeeze_constant(weights);
if (with_subtract)
squeeze_constant(mul_parent->get_input_node_shared_ptr(1));
if (with_subtract) {
auto sub_const = mul_parent->get_input_node_shared_ptr(1);
if (ov::is_type<ov::op::v0::Convert>(sub_const))
sub_const = sub_const->get_input_node_shared_ptr(0);
squeeze_constant(sub_const);
}
return true;
};

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@ namespace intel_cpu {
* This transformation is applied to the FC with compressed 3D u8 weights. It moves Reshape at the weights path to the constants
* in order to constant fold the Reshape node.
* Example:
* Weights(3D) Weights(2D)
* | |
* Convert Subtract_const(3D) Convert Subtract_const(2D)
* Weights(3D) Subtract_const(3D) Weights(2D) Subtract_const(2D)
* | / | /
* Convert Subtract_convert(opt) Convert Subtract_convert(opt)
* | / | /
* Subtract(opt) Subtract(opt)
* | Multiply_const(3D) ====> | Multiply_const(2D)
Expand Down
Loading

0 comments on commit c9fd6af

Please sign in to comment.