From e35479b53962aafa2a2ea51f1468b99c5cc038c0 Mon Sep 17 00:00:00 2001 From: bell song Date: Thu, 26 Sep 2024 21:24:05 +0800 Subject: [PATCH 1/2] Revert "[GPU] Fixed error on fused fc with fused bias when all the bias shapes are same (#26751)" This reverts commit 986f603507a78f043b25ca1798f2c579b2b3ef59. --- .../transformations/fc_horizontal_fusion.cpp | 72 +++++++++---------- .../dynamic/dynamic_fc_horizontal_fusion.cpp | 5 +- .../horizontal_fc_fusion_test.cpp | 14 ++-- 3 files changed, 42 insertions(+), 49 deletions(-) diff --git a/src/plugins/intel_gpu/src/plugin/transformations/fc_horizontal_fusion.cpp b/src/plugins/intel_gpu/src/plugin/transformations/fc_horizontal_fusion.cpp index fcb339531c1883..a0651b484e2431 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations/fc_horizontal_fusion.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations/fc_horizontal_fusion.cpp @@ -84,7 +84,6 @@ FullyConnectedHorizontalFusion::FullyConnectedHorizontalFusion() { ov::NodeVector scale_nodes; ov::NodeVector bias_nodes; ov::NodeVector zp_nodes; - int32_t bias_rank = -1; for (auto user : input_node->get_users()) { auto fc_user = std::dynamic_pointer_cast(user); if (fc_user) { @@ -92,13 +91,8 @@ FullyConnectedHorizontalFusion::FullyConnectedHorizontalFusion() { fc_nodes.push_back(fc_user); fc_nodes_vec.push_back(fc_user); weight_nodes.push_back(fc_user->get_input_node_shared_ptr(1)); - if (!std::dynamic_pointer_cast(fc_user->get_input_node_shared_ptr(2))) { - if (bias_rank == -1) - bias_rank = static_cast(fc_user->get_input_partial_shape(2).size()); - if (bias_rank != static_cast(fc_user->get_input_partial_shape(2).size())) - return false; + if (!std::dynamic_pointer_cast(fc_user->get_input_node_shared_ptr(2))) bias_nodes.push_back(fc_user->get_input_node_shared_ptr(2)); - } scale_nodes.push_back(fc_user->get_input_node_shared_ptr(3)); if (fc_user->inputs().size() > 4) zp_nodes.push_back(fc_user->get_input_node_shared_ptr(4)); @@ -137,41 +131,39 @@ FullyConnectedHorizontalFusion::FullyConnectedHorizontalFusion() { auto fused_scale = std::make_shared(scales_as_output_vector, 0); fused_scale->set_friendly_name(scale_nodes[0]->get_friendly_name() + "_fused_scale"); ov::copy_runtime_info(scale_nodes, fused_scale); - // check if the FCs do not have bias inputs, but all of the fc has a bias add user, set them as bias inputs - // Currently horizontal fusing is applied only when fusing is applied for N dim - // Also, fuse biases for the last dimension too, if - // - Biases are constant - // - Rank of the bias shapes are same - // - all other dims except last dim is 1 (e.g., [1, 1, N]) + // check if all of the fc has a bias user, set it as bias input size_t n_bias_users = 0; - if (bias_nodes.empty()) { - for (auto fc : fc_nodes) { - if (fc->get_users().size() == 1 && - fc->get_users()[0]->get_type_info() == ov::opset1::Add::get_type_info_static() && - ov::is_type(fc->get_users()[0]->inputs()[1].get_source_output().get_node())) { - auto bias_input1_shape = fc->get_users()[0]->get_input_partial_shape(1).get_shape(); - if (bias_rank == -1) - bias_rank = static_cast(bias_input1_shape.size()); - if (bias_rank != static_cast(bias_input1_shape.size())) - break; - size_t ndim_size = bias_input1_shape.back(); - // allow only [1, 1, N] shape bias - if (std::accumulate(bias_input1_shape.begin(), - bias_input1_shape.end(), - static_cast(1), - std::multiplies()) != ndim_size) - break; + for (auto fc : fc_nodes) { + if (fc->get_users().size() == 1 + && fc->get_users()[0]->get_type_info() == ov::opset1::Add::get_type_info_static() + && ov::is_type(fc->get_users()[0]->inputs()[1].get_source_output().get_node())) { n_bias_users++; - } } + } - if (n_bias_users == fc_nodes.size()) { - for (size_t i = 0; i < fc_nodes.size(); ++i) { - auto orig_fc = fc_nodes[i]; - auto bias_node = orig_fc->get_users()[0]; - auto bias_const_ptr = orig_fc->get_users()[0]->get_input_node_shared_ptr(1); - bias_nodes.push_back(bias_const_ptr); + size_t bias_concat_axis = 0; + if (bias_nodes.empty() && n_bias_users == fc_nodes.size()) { + // Set Add user as bias input to FC + for (size_t i = 0; i < fc_nodes.size(); ++i) { + auto orig_fc = fc_nodes[i]; + auto bias_node = orig_fc->get_users()[0]; + auto bias_const_ptr = orig_fc->get_users()[0]->get_input_node_shared_ptr(1); + bias_nodes.push_back(bias_const_ptr); + } + // Check shape and find axis + const auto bias_rank = bias_nodes[0]->get_output_partial_shape(0).size(); + size_t non_zero_diffs = 0; + for (size_t i = 0; i < bias_rank; ++i) { + std::unordered_set dims; + for (size_t j = 0; j < bias_nodes.size(); ++j) { + dims.insert(bias_nodes[j]->get_output_partial_shape(0)[i].get_length()); + } + if (dims.size() > 1) { + bias_concat_axis = i; + non_zero_diffs++; } + } + if (non_zero_diffs <= 1) { for (size_t i = 0; i < fc_nodes.size(); ++i) { auto orig_fc = fc_nodes[i]; auto bias_node = orig_fc->get_users()[0]; @@ -195,16 +187,18 @@ FullyConnectedHorizontalFusion::FullyConnectedHorizontalFusion() { bias_node->clear_control_dependencies(); orig_fc->clear_control_dependencies(); } + } else { + // biases cannot be fusable. Not to set users as bias input + bias_nodes.clear(); } } - std::shared_ptr fused_bias; if (bias_nodes.size() == fc_nodes.size()) { ov::OutputVector bias_nodes_as_output_vector; for (size_t i = 0; i < bias_nodes.size(); ++i) { bias_nodes_as_output_vector.push_back(bias_nodes[i]); } - fused_bias = std::make_shared(bias_nodes_as_output_vector, bias_rank - 1); + fused_bias = std::make_shared(bias_nodes_as_output_vector, bias_concat_axis); fused_bias->set_friendly_name(bias_nodes[0]->get_friendly_name() + "_fused_bias"); ov::copy_runtime_info(bias_nodes, fused_bias); } else { diff --git a/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/dynamic_fc_horizontal_fusion.cpp b/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/dynamic_fc_horizontal_fusion.cpp index 7c9994b8235661..47dfb8ab3b80da 100644 --- a/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/dynamic_fc_horizontal_fusion.cpp +++ b/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/dynamic_fc_horizontal_fusion.cpp @@ -359,7 +359,8 @@ TEST_P(FullyConnectedHorizontalFusion, Inference) { check_results(); } -const std::vector activations_precisions = {ov::element::f32, ov::element::f16}; +// const std::vector activations_precisions = {ov::element::f32, ov::element::f16}; +const std::vector activations_precisions = {ov::element::f16}; const std::vector weights_precisions = {ov::element::u8, ov::element::u4, ov::element::i4}; const std::vector per_tensor_zp = {true, false}; const std::vector transpose_weights = {true, false}; @@ -367,13 +368,11 @@ const std::vector transpose_weights = {true, false}; std::vector weights1 = {{1, 16, 32}, {1, 16, 4}, {1, 16, 32}}; std::vector weights2 = {{16, 32}, {16, 4}, {16, 32}}; std::vector weights3 = {{28, 24}, {28, 18}, {28, 24}}; -std::vector weights4 = {{1, 16, 24}, {1, 16, 24}, {1, 16, 24}}; const std::vector input_shapes = { {{{-1, -1, -1}, {{1, 4, 16}}}, weights1}, {{{-1, -1, 16}, {{1, 4, 16}}}, weights2, 4}, {{{-1, 28}, {{16, 28}}}, weights3, 4}, - {{{-1, -1, -1}, {{1, 4, 16}}}, weights4}, }; INSTANTIATE_TEST_SUITE_P(smoke_FCHorizontalFusion_no_bias, diff --git a/src/plugins/intel_gpu/tests/unit/transformations/horizontal_fc_fusion_test.cpp b/src/plugins/intel_gpu/tests/unit/transformations/horizontal_fc_fusion_test.cpp index af7e6482002ae2..b94ba03a7565e8 100644 --- a/src/plugins/intel_gpu/tests/unit/transformations/horizontal_fc_fusion_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/transformations/horizontal_fc_fusion_test.cpp @@ -106,9 +106,9 @@ TEST_F(TransformationTestsF, FullyConnectedHorizontalFusion_bias_zp) { auto weight3 = std::make_shared(ov::element::u4, ov::Shape{128, 4096}); weight3->set_friendly_name("weight1_3"); - auto bias1 = std::make_shared(ov::element::f16, ov::Shape{1, 1024}); - auto bias2 = std::make_shared(ov::element::f16, ov::Shape{1, 512}); - auto bias3 = std::make_shared(ov::element::f16, ov::Shape{1, 128}); + auto bias1 = std::make_shared(ov::element::f16, ov::Shape{1024, 1}); + auto bias2 = std::make_shared(ov::element::f16, ov::Shape{512, 1}); + auto bias3 = std::make_shared(ov::element::f16, ov::Shape{128, 1}); auto scale1 = std::make_shared(ov::element::f16, ov::Shape{1024, 32}); auto scale2 = std::make_shared(ov::element::f16, ov::Shape{512, 32}); @@ -137,11 +137,11 @@ TEST_F(TransformationTestsF, FullyConnectedHorizontalFusion_bias_zp) { weight3->set_friendly_name("weight2_3"); auto weights = ov::OutputVector{weight1, weight2, weight3}; auto weight_fused = std::make_shared(weights, 0); - auto bias1 = std::make_shared(ov::element::f16, ov::Shape{1, 1024}); - auto bias2 = std::make_shared(ov::element::f16, ov::Shape{1, 512}); - auto bias3 = std::make_shared(ov::element::f16, ov::Shape{1, 128}); + auto bias1 = std::make_shared(ov::element::f16, ov::Shape{1024, 1}); + auto bias2 = std::make_shared(ov::element::f16, ov::Shape{512, 1}); + auto bias3 = std::make_shared(ov::element::f16, ov::Shape{128, 1}); auto biases = ov::OutputVector{bias1, bias2, bias3}; - auto bias_fused = std::make_shared(biases, 1); + auto bias_fused = std::make_shared(biases, 0); auto scale1 = std::make_shared(ov::element::f16, ov::Shape{1024, 32}); auto scale2 = std::make_shared(ov::element::f16, ov::Shape{512, 32}); auto scale3 = std::make_shared(ov::element::f16, ov::Shape{128, 32}); From 3a867652c0f9963a59b594daa75d18c7af239a1f Mon Sep 17 00:00:00 2001 From: bell song Date: Thu, 26 Sep 2024 21:24:24 +0800 Subject: [PATCH 2/2] Revert "[GPU] Fuse more eltwises for horizontally fused FC (#26599)" This reverts commit bdc01107d989310c6efa7dd61c7fa5743c971f1d. --- .../transformations/fc_horizontal_fusion.cpp | 140 ++++-------------- .../horizontal_fc_fusion_test.cpp | 80 ---------- 2 files changed, 28 insertions(+), 192 deletions(-) diff --git a/src/plugins/intel_gpu/src/plugin/transformations/fc_horizontal_fusion.cpp b/src/plugins/intel_gpu/src/plugin/transformations/fc_horizontal_fusion.cpp index a0651b484e2431..a5ec6c0060ef81 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations/fc_horizontal_fusion.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations/fc_horizontal_fusion.cpp @@ -13,7 +13,6 @@ #include "openvino/pass/pattern/op/wrap_type.hpp" #include "transformations/utils/utils.hpp" #include "intel_gpu/op/placeholder.hpp" -#include "intel_gpu/runtime/debug_configuration.hpp" namespace ov { namespace intel_gpu { @@ -41,12 +40,13 @@ FullyConnectedHorizontalFusion::FullyConnectedHorizontalFusion() { return std::dynamic_pointer_cast(node); }; // Three FCs connected to the same input - const int min_num_fcs_to_fuse = 3; - const int max_num_fcs_to_fuse = 3; + const int num_fcs_to_fuse = 3; const auto& fc = std::dynamic_pointer_cast(output.get_node_shared_ptr()); const auto& input = fc->get_input_node_shared_ptr(0); if (!fc->get_input_partial_shape(0).is_dynamic()) return false; + if (input->get_users().size() < num_fcs_to_fuse) + return false; size_t user_fc_count = 0; int32_t nodes_with_bias = 0; int32_t nodes_with_zp = 0; @@ -67,9 +67,8 @@ FullyConnectedHorizontalFusion::FullyConnectedHorizontalFusion() { } user_fc_count++; } - return (user_fc_count >= min_num_fcs_to_fuse) && (user_fc_count <= max_num_fcs_to_fuse) && - (nodes_with_bias == static_cast(user_fc_count) || nodes_with_bias == 0) && - (nodes_with_zp == static_cast(user_fc_count) || nodes_with_zp == 0); + return (user_fc_count == num_fcs_to_fuse) && (nodes_with_bias == num_fcs_to_fuse || nodes_with_bias == 0) && + (nodes_with_zp == num_fcs_to_fuse || nodes_with_zp == 0); }; auto target_fc = wrap_type(is_target_pattern); @@ -79,7 +78,6 @@ FullyConnectedHorizontalFusion::FullyConnectedHorizontalFusion() { auto m_fc = pattern_map.at(target_fc).get_node_shared_ptr(); auto input_node = m_fc->get_input_node_shared_ptr(0); std::vector> fc_nodes; - ov::NodeVector fc_nodes_vec; ov::NodeVector weight_nodes; ov::NodeVector scale_nodes; ov::NodeVector bias_nodes; @@ -89,7 +87,6 @@ FullyConnectedHorizontalFusion::FullyConnectedHorizontalFusion() { if (fc_user) { OPENVINO_ASSERT(fc_user->inputs().size() >= 4, "Compressed FC should have at least 4 inputs"); fc_nodes.push_back(fc_user); - fc_nodes_vec.push_back(fc_user); weight_nodes.push_back(fc_user->get_input_node_shared_ptr(1)); if (!std::dynamic_pointer_cast(fc_user->get_input_node_shared_ptr(2))) bias_nodes.push_back(fc_user->get_input_node_shared_ptr(2)); @@ -98,109 +95,33 @@ FullyConnectedHorizontalFusion::FullyConnectedHorizontalFusion() { zp_nodes.push_back(fc_user->get_input_node_shared_ptr(4)); } } - // fc weight is already transposed to [N, K] - const size_t weight_idx = 1; - if (fc_nodes[0]->get_input_shape(weight_idx).size() != 2) - return false; - const size_t n_axis = 0; - const size_t k_axis = 1; - auto weight_dtype = fc_nodes[0]->get_input_element_type(weight_idx); - auto k_size = fc_nodes[0]->get_input_shape(weight_idx)[k_axis]; + auto weight_dtype = fc_nodes[0]->get_input_element_type(1); + auto k_size = fc_nodes[0]->get_input_shape(1)[fc_nodes[0]->get_input_shape(1).size() - 1]; std::vector orig_n_sizes; // merge weights, scale, zp for (auto fc : fc_nodes) { - if (k_size != fc->get_input_shape(weight_idx)[k_axis]) + if (k_size != fc->get_input_shape(1)[fc->get_input_shape(1).size() - 1]) return false; - if (weight_dtype != fc->get_input_element_type(weight_idx)) + if (weight_dtype != fc->get_input_element_type(1)) return false; - orig_n_sizes.push_back(fc->get_input_shape(weight_idx)[n_axis]); - } - ov::OutputVector weight_nodes_as_output_vector; - for (size_t i = 0; i < weight_nodes.size(); ++i) { - weight_nodes_as_output_vector.push_back(weight_nodes[i]); + orig_n_sizes.push_back(fc->get_input_shape(1)[fc->get_input_shape(1).size() - 2]); } + auto weight_nodes_as_output_vector = ov::OutputVector{weight_nodes[0], weight_nodes[1], weight_nodes[2]}; auto fused_weight = std::make_shared(weight_nodes_as_output_vector, 0); - fused_weight->set_friendly_name(weight_nodes[0]->get_friendly_name() + "_fused_weight"); - ov::copy_runtime_info(weight_nodes, fused_weight); - - ov::OutputVector scales_as_output_vector; - for (size_t i = 0; i < scale_nodes.size(); ++i) { - scales_as_output_vector.push_back(scale_nodes[i]); - } + fused_weight->set_friendly_name(weight_nodes[0]->get_friendly_name() + "_fused"); + ov::copy_runtime_info({weight_nodes[0], weight_nodes[1], weight_nodes[2]}, fused_weight); - auto fused_scale = std::make_shared(scales_as_output_vector, 0); - fused_scale->set_friendly_name(scale_nodes[0]->get_friendly_name() + "_fused_scale"); - ov::copy_runtime_info(scale_nodes, fused_scale); - // check if all of the fc has a bias user, set it as bias input - size_t n_bias_users = 0; - for (auto fc : fc_nodes) { - if (fc->get_users().size() == 1 - && fc->get_users()[0]->get_type_info() == ov::opset1::Add::get_type_info_static() - && ov::is_type(fc->get_users()[0]->inputs()[1].get_source_output().get_node())) { - n_bias_users++; - } - } + auto scale_nodes_as_output_vector = ov::OutputVector{scale_nodes[0], scale_nodes[1], scale_nodes[2]}; + auto fused_scale = std::make_shared(scale_nodes_as_output_vector, 0); + fused_scale->set_friendly_name(scale_nodes[0]->get_friendly_name() + "_fused"); + ov::copy_runtime_info({scale_nodes[0], scale_nodes[1], scale_nodes[2]}, fused_scale); - size_t bias_concat_axis = 0; - if (bias_nodes.empty() && n_bias_users == fc_nodes.size()) { - // Set Add user as bias input to FC - for (size_t i = 0; i < fc_nodes.size(); ++i) { - auto orig_fc = fc_nodes[i]; - auto bias_node = orig_fc->get_users()[0]; - auto bias_const_ptr = orig_fc->get_users()[0]->get_input_node_shared_ptr(1); - bias_nodes.push_back(bias_const_ptr); - } - // Check shape and find axis - const auto bias_rank = bias_nodes[0]->get_output_partial_shape(0).size(); - size_t non_zero_diffs = 0; - for (size_t i = 0; i < bias_rank; ++i) { - std::unordered_set dims; - for (size_t j = 0; j < bias_nodes.size(); ++j) { - dims.insert(bias_nodes[j]->get_output_partial_shape(0)[i].get_length()); - } - if (dims.size() > 1) { - bias_concat_axis = i; - non_zero_diffs++; - } - } - if (non_zero_diffs <= 1) { - for (size_t i = 0; i < fc_nodes.size(); ++i) { - auto orig_fc = fc_nodes[i]; - auto bias_node = orig_fc->get_users()[0]; - GPU_DEBUG_TRACE_DETAIL << "Set Add op user " << bias_node->get_friendly_name() << " as the FC " - << orig_fc->get_friendly_name() << "'s bias input" << std::endl; - auto bias_const = orig_fc->get_users()[0]->input_value(1); - auto orig_users_of_bias_user = bias_node->get_users(); - ov::OutputVector fc_inputs = orig_fc->input_values(); - fc_inputs[2] = bias_const; - auto new_fc = orig_fc->clone_with_new_inputs(fc_inputs); - new_fc->set_friendly_name(orig_fc->get_friendly_name() + "_with_bias"); - ov::copy_runtime_info(orig_fc, new_fc); - for (auto u : orig_users_of_bias_user) { - for (size_t idx = 0; idx < u->inputs().size(); ++idx) { - if (u->get_input_node_shared_ptr(idx) == bias_node) { - u->input(idx).replace_source_output(new_fc->output(0)); - } - } - } - fc_nodes[i] = std::dynamic_pointer_cast(new_fc); - bias_node->clear_control_dependencies(); - orig_fc->clear_control_dependencies(); - } - } else { - // biases cannot be fusable. Not to set users as bias input - bias_nodes.clear(); - } - } std::shared_ptr fused_bias; - if (bias_nodes.size() == fc_nodes.size()) { - ov::OutputVector bias_nodes_as_output_vector; - for (size_t i = 0; i < bias_nodes.size(); ++i) { - bias_nodes_as_output_vector.push_back(bias_nodes[i]); - } - fused_bias = std::make_shared(bias_nodes_as_output_vector, bias_concat_axis); - fused_bias->set_friendly_name(bias_nodes[0]->get_friendly_name() + "_fused_bias"); - ov::copy_runtime_info(bias_nodes, fused_bias); + if (bias_nodes.size() == 3) { + auto bias_nodes_as_output_vector = ov::OutputVector{bias_nodes[0], bias_nodes[1], bias_nodes[2]}; + fused_bias = std::make_shared(bias_nodes_as_output_vector, 0); + fused_bias->set_friendly_name(bias_nodes[0]->get_friendly_name() + "_fused"); + ov::copy_runtime_info({bias_nodes[0], bias_nodes[1], bias_nodes[2]}, fused_bias); } else { fused_bias = std::make_shared(); } @@ -240,12 +161,9 @@ FullyConnectedHorizontalFusion::FullyConnectedHorizontalFusion() { return false; } } else { - ov::OutputVector zp_nodes_as_output_vector; - for (size_t i = 0; i < zp_nodes.size(); ++i) { - zp_nodes_as_output_vector.push_back(zp_nodes[i]); - } + auto zp_nodes_as_output_vector = ov::OutputVector{zp_nodes[0], zp_nodes[1], zp_nodes[2]}; fused_zps = std::make_shared(zp_nodes_as_output_vector, 0); - fused_zps->set_friendly_name(zp_nodes[0]->get_friendly_name() + "_fused_zps"); + fused_zps->set_friendly_name(zp_nodes[0]->get_friendly_name() + "_fused"); } } // Create new fc with merged weights, bias, scale, zp @@ -264,17 +182,16 @@ FullyConnectedHorizontalFusion::FullyConnectedHorizontalFusion() { fused_scale, fc_nodes[0]->get_output_type()); - auto new_fc_name = fc_nodes[0]->get_friendly_name() + "_fused_" + std::to_string(fc_nodes.size()) + "FCs"; + auto new_fc_name = fc_nodes[0]->get_friendly_name() + "_fused"; new_fc->set_friendly_name(new_fc_name); - copy_runtime_info(fc_nodes_vec, new_fc); + copy_runtime_info({fc_nodes[0], fc_nodes[1], fc_nodes[2]}, new_fc); // Split output and connect to the orig users auto split_name = fc_nodes[0]->get_friendly_name() + "_split"; auto axis_const = ov::op::v0::Constant::create(ov::element::i64, ov::Shape{1}, {new_fc->get_output_partial_shape(0).size() - 1}); - auto split_size = fc_nodes.size(); - auto split_const = ov::op::v0::Constant::create(ov::element::i64, ov::Shape{split_size}, orig_n_sizes); + auto split_const = ov::op::v0::Constant::create(ov::element::i64, ov::Shape{3}, orig_n_sizes); auto output_split = std::make_shared(new_fc, axis_const, split_const); - copy_runtime_info(fc_nodes_vec, output_split); + copy_runtime_info({fc_nodes[0], fc_nodes[1], fc_nodes[2]}, output_split); output_split->set_friendly_name(split_name); for (size_t i = 0; i < fc_nodes.size(); ++i) { auto org_fc = fc_nodes[i]; @@ -287,7 +204,6 @@ FullyConnectedHorizontalFusion::FullyConnectedHorizontalFusion() { } org_fc->clear_control_dependencies(); } - GPU_DEBUG_TRACE_DETAIL << "Created a new fused FC " << new_fc_name << std::endl; return true; }; diff --git a/src/plugins/intel_gpu/tests/unit/transformations/horizontal_fc_fusion_test.cpp b/src/plugins/intel_gpu/tests/unit/transformations/horizontal_fc_fusion_test.cpp index b94ba03a7565e8..fababa0c20df38 100644 --- a/src/plugins/intel_gpu/tests/unit/transformations/horizontal_fc_fusion_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/transformations/horizontal_fc_fusion_test.cpp @@ -17,7 +17,6 @@ #include "openvino/op/concat.hpp" #include "openvino/op/variadic_split.hpp" #include "openvino/op/reshape.hpp" -#include "openvino/op/add.hpp" #include "openvino/pass/manager.hpp" #include @@ -163,85 +162,6 @@ TEST_F(TransformationTestsF, FullyConnectedHorizontalFusion_bias_zp) { comparator.enable(FunctionsComparator::ATTRIBUTES); } } - -TEST_F(TransformationTestsF, FullyConnectedHorizontalFusion_eltwise_bias_zp) { - std::vector pattern = {7, -1}; - { - auto input = std::make_shared(ov::element::f16, ov::PartialShape{-1, 7, 4096}); - auto weight1 = std::make_shared(ov::element::u4, ov::Shape{1024, 4096}); - weight1->set_friendly_name("weight1_1"); - auto weight2 = std::make_shared(ov::element::u4, ov::Shape{512, 4096}); - weight2->set_friendly_name("weight1_2"); - auto weight3 = std::make_shared(ov::element::u4, ov::Shape{128, 4096}); - weight3->set_friendly_name("weight1_3"); - - auto bias1 = std::make_shared(); - auto bias2 = std::make_shared(); - auto bias3 = std::make_shared(); - - auto scale1 = std::make_shared(ov::element::f16, ov::Shape{1024, 32}); - auto scale2 = std::make_shared(ov::element::f16, ov::Shape{512, 32}); - auto scale3 = std::make_shared(ov::element::f16, ov::Shape{128, 32}); - auto fc1 = std::make_shared(input, weight1, bias1, scale1); - fc1->set_friendly_name("fc1"); - auto fc2 = std::make_shared(input, weight2, bias2, scale2); - auto fc3 = std::make_shared(input, weight3, bias3, scale3); - - auto add_input1 = std::make_shared(ov::element::f16, ov::Shape{1, 1024}); - auto add1 = std::make_shared(fc1, add_input1); - - auto add_input2 = std::make_shared(ov::element::f16, ov::Shape{1, 512}); - auto add2 = std::make_shared(fc2, add_input2); - - auto add_input3 = std::make_shared(ov::element::f16, ov::Shape{1, 128}); - auto add3 = std::make_shared(fc3, add_input3); - - auto reshape_pattern = std::make_shared(ov::element::i64, ov::Shape{2}, pattern); - auto reshape1 = std::make_shared(add1, reshape_pattern, true); - auto reshape2 = std::make_shared(add2, reshape_pattern, true); - auto reshape3 = std::make_shared(add3, reshape_pattern, true); - auto result1 = std::make_shared(reshape1); - auto result2 = std::make_shared(reshape2); - auto result3 = std::make_shared(reshape3); - model = std::make_shared(ov::ResultVector{result1, result2, result3}, ov::ParameterVector{input}); - manager.register_pass(); - } - { - auto input = std::make_shared(ov::element::f16, ov::PartialShape{-1, 7, 4096}); - auto weight1 = std::make_shared(ov::element::u4, ov::Shape{1024, 4096}); - weight1->set_friendly_name("weight2_1"); - auto weight2 = std::make_shared(ov::element::u4, ov::Shape{512, 4096}); - weight2->set_friendly_name("weight2_2"); - auto weight3 = std::make_shared(ov::element::u4, ov::Shape{128, 4096}); - weight3->set_friendly_name("weight2_3"); - auto weights = ov::OutputVector{weight1, weight2, weight3}; - auto weight_fused = std::make_shared(weights, 0); - auto bias1 = std::make_shared(ov::element::f16, ov::Shape{1, 1024}); - auto bias2 = std::make_shared(ov::element::f16, ov::Shape{1, 512}); - auto bias3 = std::make_shared(ov::element::f16, ov::Shape{1, 128}); - auto biases = ov::OutputVector{bias1, bias2, bias3}; - auto bias_fused = std::make_shared(biases, 1); - auto scale1 = std::make_shared(ov::element::f16, ov::Shape{1024, 32}); - auto scale2 = std::make_shared(ov::element::f16, ov::Shape{512, 32}); - auto scale3 = std::make_shared(ov::element::f16, ov::Shape{128, 32}); - auto scales = ov::OutputVector{scale1, scale2, scale3}; - auto scale_fused = std::make_shared(scales, 0); - auto fc_fused = std::make_shared(input, weight_fused, bias_fused, scale_fused); - auto axis_const = ov::op::v0::Constant::create(ov::element::i64, ov::Shape{1}, {fc_fused->get_output_partial_shape(0).size() - 1}); - std::vector orig_n_sizes = {1024, 512, 128}; - auto split_const = ov::op::v0::Constant::create(ov::element::i64, ov::Shape{3}, orig_n_sizes); - auto split = std::make_shared(fc_fused, axis_const, split_const); - auto reshape_pattern = std::make_shared(ov::element::i64, ov::Shape{2}, pattern); - auto reshape1 = std::make_shared(split->output(0), reshape_pattern, true); - auto reshape2 = std::make_shared(split->output(1), reshape_pattern, true); - auto reshape3 = std::make_shared(split->output(2), reshape_pattern, true); - auto result1 = std::make_shared(reshape1); - auto result2 = std::make_shared(reshape2); - auto result3 = std::make_shared(reshape3); - model_ref = std::make_shared(ov::ResultVector{result1, result2, result3}, ov::ParameterVector{input}); - comparator.enable(FunctionsComparator::ATTRIBUTES); - } -} } // namespace intel_gpu } // namespace test } // namespace ov \ No newline at end of file