From e35479b53962aafa2a2ea51f1468b99c5cc038c0 Mon Sep 17 00:00:00 2001
From: bell song <bell.song@intel.com>
Date: Thu, 26 Sep 2024 21:24:05 +0800
Subject: [PATCH 1/2] Revert "[GPU] Fixed error on fused fc with fused bias
 when all the bias shapes are same (#26751)"

This reverts commit 986f603507a78f043b25ca1798f2c579b2b3ef59.
---
 .../transformations/fc_horizontal_fusion.cpp  | 72 +++++++++----------
 .../dynamic/dynamic_fc_horizontal_fusion.cpp  |  5 +-
 .../horizontal_fc_fusion_test.cpp             | 14 ++--
 3 files changed, 42 insertions(+), 49 deletions(-)

diff --git a/src/plugins/intel_gpu/src/plugin/transformations/fc_horizontal_fusion.cpp b/src/plugins/intel_gpu/src/plugin/transformations/fc_horizontal_fusion.cpp
index fcb339531c1883..a0651b484e2431 100644
--- a/src/plugins/intel_gpu/src/plugin/transformations/fc_horizontal_fusion.cpp
+++ b/src/plugins/intel_gpu/src/plugin/transformations/fc_horizontal_fusion.cpp
@@ -84,7 +84,6 @@ FullyConnectedHorizontalFusion::FullyConnectedHorizontalFusion() {
         ov::NodeVector scale_nodes;
         ov::NodeVector bias_nodes;
         ov::NodeVector zp_nodes;
-        int32_t bias_rank = -1;
         for (auto user : input_node->get_users()) {
             auto fc_user = std::dynamic_pointer_cast<op::FullyConnectedCompressed>(user);
             if (fc_user) {
@@ -92,13 +91,8 @@ FullyConnectedHorizontalFusion::FullyConnectedHorizontalFusion() {
                 fc_nodes.push_back(fc_user);
                 fc_nodes_vec.push_back(fc_user);
                 weight_nodes.push_back(fc_user->get_input_node_shared_ptr(1));
-                if (!std::dynamic_pointer_cast<op::Placeholder>(fc_user->get_input_node_shared_ptr(2))) {
-                    if (bias_rank == -1)
-                        bias_rank = static_cast<int32_t>(fc_user->get_input_partial_shape(2).size());
-                    if (bias_rank != static_cast<int32_t>(fc_user->get_input_partial_shape(2).size()))
-                        return false;
+                if (!std::dynamic_pointer_cast<op::Placeholder>(fc_user->get_input_node_shared_ptr(2)))
                     bias_nodes.push_back(fc_user->get_input_node_shared_ptr(2));
-                }
                 scale_nodes.push_back(fc_user->get_input_node_shared_ptr(3));
                 if (fc_user->inputs().size() > 4)
                     zp_nodes.push_back(fc_user->get_input_node_shared_ptr(4));
@@ -137,41 +131,39 @@ FullyConnectedHorizontalFusion::FullyConnectedHorizontalFusion() {
         auto fused_scale = std::make_shared<ov::op::v0::Concat>(scales_as_output_vector, 0);
         fused_scale->set_friendly_name(scale_nodes[0]->get_friendly_name() + "_fused_scale");
         ov::copy_runtime_info(scale_nodes, fused_scale);
-        // check if the FCs do not have bias inputs, but all of the fc has a bias add user, set them as bias inputs
-        // Currently horizontal fusing is applied only when fusing is applied for N dim
-        // Also, fuse biases for the last dimension too, if
-        // - Biases are constant
-        // - Rank of the bias shapes are same
-        // - all other dims except last dim is 1 (e.g., [1, 1, N])
+        // check if all of the fc has a bias user, set it as bias input
         size_t n_bias_users = 0;
-        if (bias_nodes.empty()) {
-            for (auto fc : fc_nodes) {
-                if (fc->get_users().size() == 1 &&
-                    fc->get_users()[0]->get_type_info() == ov::opset1::Add::get_type_info_static() &&
-                    ov::is_type<ov::op::v0::Constant>(fc->get_users()[0]->inputs()[1].get_source_output().get_node())) {
-                    auto bias_input1_shape = fc->get_users()[0]->get_input_partial_shape(1).get_shape();
-                    if (bias_rank == -1)
-                        bias_rank = static_cast<int32_t>(bias_input1_shape.size());
-                    if (bias_rank != static_cast<int32_t>(bias_input1_shape.size()))
-                        break;
-                    size_t ndim_size = bias_input1_shape.back();
-                    // allow only [1, 1, N] shape bias
-                    if (std::accumulate(bias_input1_shape.begin(),
-                                        bias_input1_shape.end(),
-                                        static_cast<size_t>(1),
-                                        std::multiplies<size_t>()) != ndim_size)
-                        break;
+        for (auto fc : fc_nodes) {
+            if (fc->get_users().size() == 1
+                && fc->get_users()[0]->get_type_info() == ov::opset1::Add::get_type_info_static()
+                && ov::is_type<ov::op::v0::Constant>(fc->get_users()[0]->inputs()[1].get_source_output().get_node())) {
                     n_bias_users++;
-                }
             }
+        }
 
-            if (n_bias_users == fc_nodes.size()) {
-                for (size_t i = 0; i < fc_nodes.size(); ++i) {
-                    auto orig_fc = fc_nodes[i];
-                    auto bias_node = orig_fc->get_users()[0];
-                    auto bias_const_ptr = orig_fc->get_users()[0]->get_input_node_shared_ptr(1);
-                    bias_nodes.push_back(bias_const_ptr);
+        size_t bias_concat_axis = 0;
+        if (bias_nodes.empty() && n_bias_users == fc_nodes.size()) {
+            // Set Add user as bias input to FC
+            for (size_t i = 0; i < fc_nodes.size(); ++i) {
+                auto orig_fc = fc_nodes[i];
+                auto bias_node = orig_fc->get_users()[0];
+                auto bias_const_ptr = orig_fc->get_users()[0]->get_input_node_shared_ptr(1);
+                bias_nodes.push_back(bias_const_ptr);
+            }
+            // Check shape and find axis
+            const auto bias_rank = bias_nodes[0]->get_output_partial_shape(0).size();
+            size_t non_zero_diffs = 0;
+            for (size_t i = 0; i < bias_rank; ++i) {
+                std::unordered_set<size_t> dims;
+                for (size_t j = 0; j < bias_nodes.size(); ++j) {
+                    dims.insert(bias_nodes[j]->get_output_partial_shape(0)[i].get_length());
+                }
+                if (dims.size() > 1) {
+                    bias_concat_axis = i;
+                    non_zero_diffs++;
                 }
+            }
+            if (non_zero_diffs <= 1) {
                 for (size_t i = 0; i < fc_nodes.size(); ++i) {
                     auto orig_fc = fc_nodes[i];
                     auto bias_node = orig_fc->get_users()[0];
@@ -195,16 +187,18 @@ FullyConnectedHorizontalFusion::FullyConnectedHorizontalFusion() {
                     bias_node->clear_control_dependencies();
                     orig_fc->clear_control_dependencies();
                 }
+            } else {
+                // biases cannot be fusable. Not to set users as bias input
+                bias_nodes.clear();
             }
         }
-
         std::shared_ptr<ov::Node> fused_bias;
         if (bias_nodes.size() == fc_nodes.size()) {
             ov::OutputVector bias_nodes_as_output_vector;
             for (size_t i = 0; i < bias_nodes.size(); ++i) {
                 bias_nodes_as_output_vector.push_back(bias_nodes[i]);
             }
-            fused_bias = std::make_shared<ov::op::v0::Concat>(bias_nodes_as_output_vector, bias_rank - 1);
+            fused_bias = std::make_shared<ov::op::v0::Concat>(bias_nodes_as_output_vector, bias_concat_axis);
             fused_bias->set_friendly_name(bias_nodes[0]->get_friendly_name() + "_fused_bias");
             ov::copy_runtime_info(bias_nodes, fused_bias);
         } else {
diff --git a/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/dynamic_fc_horizontal_fusion.cpp b/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/dynamic_fc_horizontal_fusion.cpp
index 7c9994b8235661..47dfb8ab3b80da 100644
--- a/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/dynamic_fc_horizontal_fusion.cpp
+++ b/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/dynamic_fc_horizontal_fusion.cpp
@@ -359,7 +359,8 @@ TEST_P(FullyConnectedHorizontalFusion, Inference) {
     check_results();
 }
 
-const std::vector<ov::element::Type> activations_precisions = {ov::element::f32, ov::element::f16};
+// const std::vector<ov::element::Type> activations_precisions = {ov::element::f32, ov::element::f16};
+const std::vector<ov::element::Type> activations_precisions = {ov::element::f16};
 const std::vector<ov::element::Type> weights_precisions = {ov::element::u8, ov::element::u4, ov::element::i4};
 const std::vector<bool> per_tensor_zp = {true, false};
 const std::vector<bool> transpose_weights = {true, false};
@@ -367,13 +368,11 @@ const std::vector<bool> transpose_weights = {true, false};
 std::vector<ov::Shape> weights1 = {{1, 16, 32}, {1, 16, 4}, {1, 16, 32}};
 std::vector<ov::Shape> weights2 = {{16, 32}, {16, 4}, {16, 32}};
 std::vector<ov::Shape> weights3 = {{28, 24}, {28, 18}, {28, 24}};
-std::vector<ov::Shape> weights4 = {{1, 16, 24}, {1, 16, 24}, {1, 16, 24}};
 
 const std::vector<ShapeParams> input_shapes = {
     {{{-1, -1, -1}, {{1, 4, 16}}}, weights1},
     {{{-1, -1, 16}, {{1, 4, 16}}}, weights2, 4},
     {{{-1, 28}, {{16, 28}}}, weights3, 4},
-    {{{-1, -1, -1}, {{1, 4, 16}}}, weights4},
 };
 
 INSTANTIATE_TEST_SUITE_P(smoke_FCHorizontalFusion_no_bias,
diff --git a/src/plugins/intel_gpu/tests/unit/transformations/horizontal_fc_fusion_test.cpp b/src/plugins/intel_gpu/tests/unit/transformations/horizontal_fc_fusion_test.cpp
index af7e6482002ae2..b94ba03a7565e8 100644
--- a/src/plugins/intel_gpu/tests/unit/transformations/horizontal_fc_fusion_test.cpp
+++ b/src/plugins/intel_gpu/tests/unit/transformations/horizontal_fc_fusion_test.cpp
@@ -106,9 +106,9 @@ TEST_F(TransformationTestsF, FullyConnectedHorizontalFusion_bias_zp) {
         auto weight3 = std::make_shared<ov::op::v0::Constant>(ov::element::u4, ov::Shape{128, 4096});
         weight3->set_friendly_name("weight1_3");
 
-        auto bias1 = std::make_shared<ov::op::v0::Constant>(ov::element::f16, ov::Shape{1, 1024});
-        auto bias2 = std::make_shared<ov::op::v0::Constant>(ov::element::f16, ov::Shape{1, 512});
-        auto bias3 = std::make_shared<ov::op::v0::Constant>(ov::element::f16, ov::Shape{1, 128});
+        auto bias1 = std::make_shared<ov::op::v0::Constant>(ov::element::f16, ov::Shape{1024, 1});
+        auto bias2 = std::make_shared<ov::op::v0::Constant>(ov::element::f16, ov::Shape{512, 1});
+        auto bias3 = std::make_shared<ov::op::v0::Constant>(ov::element::f16, ov::Shape{128, 1});
  
         auto scale1 = std::make_shared<ov::op::v0::Constant>(ov::element::f16, ov::Shape{1024, 32});
         auto scale2 = std::make_shared<ov::op::v0::Constant>(ov::element::f16, ov::Shape{512, 32});
@@ -137,11 +137,11 @@ TEST_F(TransformationTestsF, FullyConnectedHorizontalFusion_bias_zp) {
         weight3->set_friendly_name("weight2_3");
         auto weights = ov::OutputVector{weight1, weight2, weight3};
         auto weight_fused = std::make_shared<ov::op::v0::Concat>(weights, 0);
-        auto bias1 = std::make_shared<ov::op::v0::Constant>(ov::element::f16, ov::Shape{1, 1024});
-        auto bias2 = std::make_shared<ov::op::v0::Constant>(ov::element::f16, ov::Shape{1, 512});
-        auto bias3 = std::make_shared<ov::op::v0::Constant>(ov::element::f16, ov::Shape{1, 128});
+        auto bias1 = std::make_shared<ov::op::v0::Constant>(ov::element::f16, ov::Shape{1024, 1});
+        auto bias2 = std::make_shared<ov::op::v0::Constant>(ov::element::f16, ov::Shape{512, 1});
+        auto bias3 = std::make_shared<ov::op::v0::Constant>(ov::element::f16, ov::Shape{128, 1});
         auto biases = ov::OutputVector{bias1, bias2, bias3};
-        auto bias_fused = std::make_shared<ov::op::v0::Concat>(biases, 1);
+        auto bias_fused = std::make_shared<ov::op::v0::Concat>(biases, 0);
         auto scale1 = std::make_shared<ov::op::v0::Constant>(ov::element::f16, ov::Shape{1024, 32});
         auto scale2 = std::make_shared<ov::op::v0::Constant>(ov::element::f16, ov::Shape{512, 32});
         auto scale3 = std::make_shared<ov::op::v0::Constant>(ov::element::f16, ov::Shape{128, 32});

From 3a867652c0f9963a59b594daa75d18c7af239a1f Mon Sep 17 00:00:00 2001
From: bell song <bell.song@intel.com>
Date: Thu, 26 Sep 2024 21:24:24 +0800
Subject: [PATCH 2/2] Revert "[GPU] Fuse more eltwises for horizontally fused
 FC (#26599)"

This reverts commit bdc01107d989310c6efa7dd61c7fa5743c971f1d.
---
 .../transformations/fc_horizontal_fusion.cpp  | 140 ++++--------------
 .../horizontal_fc_fusion_test.cpp             |  80 ----------
 2 files changed, 28 insertions(+), 192 deletions(-)

diff --git a/src/plugins/intel_gpu/src/plugin/transformations/fc_horizontal_fusion.cpp b/src/plugins/intel_gpu/src/plugin/transformations/fc_horizontal_fusion.cpp
index a0651b484e2431..a5ec6c0060ef81 100644
--- a/src/plugins/intel_gpu/src/plugin/transformations/fc_horizontal_fusion.cpp
+++ b/src/plugins/intel_gpu/src/plugin/transformations/fc_horizontal_fusion.cpp
@@ -13,7 +13,6 @@
 #include "openvino/pass/pattern/op/wrap_type.hpp"
 #include "transformations/utils/utils.hpp"
 #include "intel_gpu/op/placeholder.hpp"
-#include "intel_gpu/runtime/debug_configuration.hpp"
 
 namespace ov {
 namespace intel_gpu {
@@ -41,12 +40,13 @@ FullyConnectedHorizontalFusion::FullyConnectedHorizontalFusion() {
             return std::dynamic_pointer_cast<op::Placeholder>(node);
         };
         // Three FCs connected to the same input
-        const int min_num_fcs_to_fuse = 3;
-        const int max_num_fcs_to_fuse = 3;
+        const int num_fcs_to_fuse = 3;
         const auto& fc = std::dynamic_pointer_cast<op::FullyConnectedCompressed>(output.get_node_shared_ptr());
         const auto& input = fc->get_input_node_shared_ptr(0);
         if (!fc->get_input_partial_shape(0).is_dynamic())
             return false;
+        if (input->get_users().size() < num_fcs_to_fuse)
+            return false;
         size_t user_fc_count = 0;
         int32_t nodes_with_bias = 0;
         int32_t nodes_with_zp = 0;
@@ -67,9 +67,8 @@ FullyConnectedHorizontalFusion::FullyConnectedHorizontalFusion() {
             }
             user_fc_count++;
         }
-        return (user_fc_count >= min_num_fcs_to_fuse) && (user_fc_count <= max_num_fcs_to_fuse) &&
-               (nodes_with_bias == static_cast<int32_t>(user_fc_count) || nodes_with_bias == 0) &&
-               (nodes_with_zp == static_cast<int32_t>(user_fc_count) || nodes_with_zp == 0);
+        return (user_fc_count == num_fcs_to_fuse) && (nodes_with_bias == num_fcs_to_fuse || nodes_with_bias == 0) &&
+               (nodes_with_zp == num_fcs_to_fuse || nodes_with_zp == 0);
     };
 
     auto target_fc = wrap_type<op::FullyConnectedCompressed>(is_target_pattern);
@@ -79,7 +78,6 @@ FullyConnectedHorizontalFusion::FullyConnectedHorizontalFusion() {
         auto m_fc = pattern_map.at(target_fc).get_node_shared_ptr();
         auto input_node = m_fc->get_input_node_shared_ptr(0);
         std::vector<std::shared_ptr<op::FullyConnectedCompressed>> fc_nodes;
-        ov::NodeVector fc_nodes_vec;
         ov::NodeVector weight_nodes;
         ov::NodeVector scale_nodes;
         ov::NodeVector bias_nodes;
@@ -89,7 +87,6 @@ FullyConnectedHorizontalFusion::FullyConnectedHorizontalFusion() {
             if (fc_user) {
                 OPENVINO_ASSERT(fc_user->inputs().size() >= 4, "Compressed FC should have at least 4 inputs");
                 fc_nodes.push_back(fc_user);
-                fc_nodes_vec.push_back(fc_user);
                 weight_nodes.push_back(fc_user->get_input_node_shared_ptr(1));
                 if (!std::dynamic_pointer_cast<op::Placeholder>(fc_user->get_input_node_shared_ptr(2)))
                     bias_nodes.push_back(fc_user->get_input_node_shared_ptr(2));
@@ -98,109 +95,33 @@ FullyConnectedHorizontalFusion::FullyConnectedHorizontalFusion() {
                     zp_nodes.push_back(fc_user->get_input_node_shared_ptr(4));
             }
         }
-        // fc weight is already transposed to [N, K]
-        const size_t weight_idx = 1;
-        if (fc_nodes[0]->get_input_shape(weight_idx).size() != 2)
-            return false;
-        const size_t n_axis = 0;
-        const size_t k_axis = 1;
-        auto weight_dtype = fc_nodes[0]->get_input_element_type(weight_idx);
-        auto k_size = fc_nodes[0]->get_input_shape(weight_idx)[k_axis];
+        auto weight_dtype = fc_nodes[0]->get_input_element_type(1);
+        auto k_size = fc_nodes[0]->get_input_shape(1)[fc_nodes[0]->get_input_shape(1).size() - 1];
         std::vector<int64_t> orig_n_sizes;
         // merge weights, scale, zp
         for (auto fc : fc_nodes) {
-            if (k_size != fc->get_input_shape(weight_idx)[k_axis])
+            if (k_size != fc->get_input_shape(1)[fc->get_input_shape(1).size() - 1])
                 return false;
-            if (weight_dtype != fc->get_input_element_type(weight_idx))
+            if (weight_dtype != fc->get_input_element_type(1))
                 return false;
-            orig_n_sizes.push_back(fc->get_input_shape(weight_idx)[n_axis]);
-        }
-        ov::OutputVector weight_nodes_as_output_vector;
-        for (size_t i = 0; i < weight_nodes.size(); ++i) {
-            weight_nodes_as_output_vector.push_back(weight_nodes[i]);
+            orig_n_sizes.push_back(fc->get_input_shape(1)[fc->get_input_shape(1).size() - 2]);
         }
+        auto weight_nodes_as_output_vector = ov::OutputVector{weight_nodes[0], weight_nodes[1], weight_nodes[2]};
         auto fused_weight = std::make_shared<ov::op::v0::Concat>(weight_nodes_as_output_vector, 0);
-        fused_weight->set_friendly_name(weight_nodes[0]->get_friendly_name() + "_fused_weight");
-        ov::copy_runtime_info(weight_nodes, fused_weight);
-
-        ov::OutputVector scales_as_output_vector;
-        for (size_t i = 0; i < scale_nodes.size(); ++i) {
-            scales_as_output_vector.push_back(scale_nodes[i]);
-        }
+        fused_weight->set_friendly_name(weight_nodes[0]->get_friendly_name() + "_fused");
+        ov::copy_runtime_info({weight_nodes[0], weight_nodes[1], weight_nodes[2]}, fused_weight);
 
-        auto fused_scale = std::make_shared<ov::op::v0::Concat>(scales_as_output_vector, 0);
-        fused_scale->set_friendly_name(scale_nodes[0]->get_friendly_name() + "_fused_scale");
-        ov::copy_runtime_info(scale_nodes, fused_scale);
-        // check if all of the fc has a bias user, set it as bias input
-        size_t n_bias_users = 0;
-        for (auto fc : fc_nodes) {
-            if (fc->get_users().size() == 1
-                && fc->get_users()[0]->get_type_info() == ov::opset1::Add::get_type_info_static()
-                && ov::is_type<ov::op::v0::Constant>(fc->get_users()[0]->inputs()[1].get_source_output().get_node())) {
-                    n_bias_users++;
-            }
-        }
+        auto scale_nodes_as_output_vector = ov::OutputVector{scale_nodes[0], scale_nodes[1], scale_nodes[2]};
+        auto fused_scale = std::make_shared<ov::op::v0::Concat>(scale_nodes_as_output_vector, 0);
+        fused_scale->set_friendly_name(scale_nodes[0]->get_friendly_name() + "_fused");
+        ov::copy_runtime_info({scale_nodes[0], scale_nodes[1], scale_nodes[2]}, fused_scale);
 
-        size_t bias_concat_axis = 0;
-        if (bias_nodes.empty() && n_bias_users == fc_nodes.size()) {
-            // Set Add user as bias input to FC
-            for (size_t i = 0; i < fc_nodes.size(); ++i) {
-                auto orig_fc = fc_nodes[i];
-                auto bias_node = orig_fc->get_users()[0];
-                auto bias_const_ptr = orig_fc->get_users()[0]->get_input_node_shared_ptr(1);
-                bias_nodes.push_back(bias_const_ptr);
-            }
-            // Check shape and find axis
-            const auto bias_rank = bias_nodes[0]->get_output_partial_shape(0).size();
-            size_t non_zero_diffs = 0;
-            for (size_t i = 0; i < bias_rank; ++i) {
-                std::unordered_set<size_t> dims;
-                for (size_t j = 0; j < bias_nodes.size(); ++j) {
-                    dims.insert(bias_nodes[j]->get_output_partial_shape(0)[i].get_length());
-                }
-                if (dims.size() > 1) {
-                    bias_concat_axis = i;
-                    non_zero_diffs++;
-                }
-            }
-            if (non_zero_diffs <= 1) {
-                for (size_t i = 0; i < fc_nodes.size(); ++i) {
-                    auto orig_fc = fc_nodes[i];
-                    auto bias_node = orig_fc->get_users()[0];
-                    GPU_DEBUG_TRACE_DETAIL << "Set Add op user " << bias_node->get_friendly_name() << " as the FC "
-                                           << orig_fc->get_friendly_name() << "'s bias input" << std::endl;
-                    auto bias_const = orig_fc->get_users()[0]->input_value(1);
-                    auto orig_users_of_bias_user = bias_node->get_users();
-                    ov::OutputVector fc_inputs = orig_fc->input_values();
-                    fc_inputs[2] = bias_const;
-                    auto new_fc = orig_fc->clone_with_new_inputs(fc_inputs);
-                    new_fc->set_friendly_name(orig_fc->get_friendly_name() + "_with_bias");
-                    ov::copy_runtime_info(orig_fc, new_fc);
-                    for (auto u : orig_users_of_bias_user) {
-                        for (size_t idx = 0; idx < u->inputs().size(); ++idx) {
-                            if (u->get_input_node_shared_ptr(idx) == bias_node) {
-                                u->input(idx).replace_source_output(new_fc->output(0));
-                            }
-                        }
-                    }
-                    fc_nodes[i] = std::dynamic_pointer_cast<op::FullyConnectedCompressed>(new_fc);
-                    bias_node->clear_control_dependencies();
-                    orig_fc->clear_control_dependencies();
-                }
-            } else {
-                // biases cannot be fusable. Not to set users as bias input
-                bias_nodes.clear();
-            }
-        }
         std::shared_ptr<ov::Node> fused_bias;
-        if (bias_nodes.size() == fc_nodes.size()) {
-            ov::OutputVector bias_nodes_as_output_vector;
-            for (size_t i = 0; i < bias_nodes.size(); ++i) {
-                bias_nodes_as_output_vector.push_back(bias_nodes[i]);
-            }
-            fused_bias = std::make_shared<ov::op::v0::Concat>(bias_nodes_as_output_vector, bias_concat_axis);
-            fused_bias->set_friendly_name(bias_nodes[0]->get_friendly_name() + "_fused_bias");
-            ov::copy_runtime_info(bias_nodes, fused_bias);
+        if (bias_nodes.size() == 3) {
+            auto bias_nodes_as_output_vector = ov::OutputVector{bias_nodes[0], bias_nodes[1], bias_nodes[2]};
+            fused_bias = std::make_shared<ov::op::v0::Concat>(bias_nodes_as_output_vector, 0);
+            fused_bias->set_friendly_name(bias_nodes[0]->get_friendly_name() + "_fused");
+            ov::copy_runtime_info({bias_nodes[0], bias_nodes[1], bias_nodes[2]}, fused_bias);
         } else {
             fused_bias = std::make_shared<op::Placeholder>();
         }
@@ -240,12 +161,9 @@ FullyConnectedHorizontalFusion::FullyConnectedHorizontalFusion() {
                         return false;
                 }
             } else {
-                ov::OutputVector zp_nodes_as_output_vector;
-                for (size_t i = 0; i < zp_nodes.size(); ++i) {
-                    zp_nodes_as_output_vector.push_back(zp_nodes[i]);
-                }
+                auto zp_nodes_as_output_vector = ov::OutputVector{zp_nodes[0], zp_nodes[1], zp_nodes[2]};
                 fused_zps = std::make_shared<ov::op::v0::Concat>(zp_nodes_as_output_vector, 0);
-                fused_zps->set_friendly_name(zp_nodes[0]->get_friendly_name() + "_fused_zps");
+                fused_zps->set_friendly_name(zp_nodes[0]->get_friendly_name() + "_fused");
             }
         }
         // Create new fc with merged weights, bias, scale, zp
@@ -264,17 +182,16 @@ FullyConnectedHorizontalFusion::FullyConnectedHorizontalFusion() {
                                                                     fused_scale,
                                                                     fc_nodes[0]->get_output_type());
 
-        auto new_fc_name = fc_nodes[0]->get_friendly_name() + "_fused_" + std::to_string(fc_nodes.size()) + "FCs";
+        auto new_fc_name = fc_nodes[0]->get_friendly_name() + "_fused";
         new_fc->set_friendly_name(new_fc_name);
-        copy_runtime_info(fc_nodes_vec, new_fc);
+        copy_runtime_info({fc_nodes[0], fc_nodes[1], fc_nodes[2]}, new_fc);
 
         // Split output and connect to the orig users
         auto split_name = fc_nodes[0]->get_friendly_name() + "_split";
         auto axis_const = ov::op::v0::Constant::create(ov::element::i64, ov::Shape{1}, {new_fc->get_output_partial_shape(0).size() - 1});
-        auto split_size = fc_nodes.size();
-        auto split_const = ov::op::v0::Constant::create(ov::element::i64, ov::Shape{split_size}, orig_n_sizes);
+        auto split_const = ov::op::v0::Constant::create(ov::element::i64, ov::Shape{3}, orig_n_sizes);
         auto output_split = std::make_shared<ov::op::v1::VariadicSplit>(new_fc, axis_const, split_const);
-        copy_runtime_info(fc_nodes_vec, output_split);
+        copy_runtime_info({fc_nodes[0], fc_nodes[1], fc_nodes[2]}, output_split);
         output_split->set_friendly_name(split_name);
         for (size_t i = 0; i < fc_nodes.size(); ++i) {
             auto org_fc = fc_nodes[i];
@@ -287,7 +204,6 @@ FullyConnectedHorizontalFusion::FullyConnectedHorizontalFusion() {
             }
             org_fc->clear_control_dependencies();
         }
-        GPU_DEBUG_TRACE_DETAIL << "Created a new fused FC " << new_fc_name << std::endl;
         return true;
     };
 
diff --git a/src/plugins/intel_gpu/tests/unit/transformations/horizontal_fc_fusion_test.cpp b/src/plugins/intel_gpu/tests/unit/transformations/horizontal_fc_fusion_test.cpp
index b94ba03a7565e8..fababa0c20df38 100644
--- a/src/plugins/intel_gpu/tests/unit/transformations/horizontal_fc_fusion_test.cpp
+++ b/src/plugins/intel_gpu/tests/unit/transformations/horizontal_fc_fusion_test.cpp
@@ -17,7 +17,6 @@
 #include "openvino/op/concat.hpp"
 #include "openvino/op/variadic_split.hpp"
 #include "openvino/op/reshape.hpp"
-#include "openvino/op/add.hpp"
 #include "openvino/pass/manager.hpp"
 
 #include <transformations/utils/utils.hpp>
@@ -163,85 +162,6 @@ TEST_F(TransformationTestsF, FullyConnectedHorizontalFusion_bias_zp) {
         comparator.enable(FunctionsComparator::ATTRIBUTES);
     }
 }
-
-TEST_F(TransformationTestsF, FullyConnectedHorizontalFusion_eltwise_bias_zp) {
-    std::vector<int64_t> pattern = {7, -1};
-    {
-        auto input = std::make_shared<ov::op::v0::Parameter>(ov::element::f16, ov::PartialShape{-1, 7, 4096});
-        auto weight1 = std::make_shared<ov::op::v0::Constant>(ov::element::u4, ov::Shape{1024, 4096});
-        weight1->set_friendly_name("weight1_1");
-        auto weight2 = std::make_shared<ov::op::v0::Constant>(ov::element::u4, ov::Shape{512, 4096});
-        weight2->set_friendly_name("weight1_2");
-        auto weight3 = std::make_shared<ov::op::v0::Constant>(ov::element::u4, ov::Shape{128, 4096});
-        weight3->set_friendly_name("weight1_3");
-
-        auto bias1 = std::make_shared<ov::intel_gpu::op::Placeholder>();
-        auto bias2 = std::make_shared<ov::intel_gpu::op::Placeholder>();
-        auto bias3 = std::make_shared<ov::intel_gpu::op::Placeholder>();
- 
-        auto scale1 = std::make_shared<ov::op::v0::Constant>(ov::element::f16, ov::Shape{1024, 32});
-        auto scale2 = std::make_shared<ov::op::v0::Constant>(ov::element::f16, ov::Shape{512, 32});
-        auto scale3 = std::make_shared<ov::op::v0::Constant>(ov::element::f16, ov::Shape{128, 32});
-        auto fc1 = std::make_shared<ov::intel_gpu::op::FullyConnectedCompressed>(input, weight1, bias1, scale1);
-        fc1->set_friendly_name("fc1");
-        auto fc2 = std::make_shared<ov::intel_gpu::op::FullyConnectedCompressed>(input, weight2, bias2, scale2);
-        auto fc3 = std::make_shared<ov::intel_gpu::op::FullyConnectedCompressed>(input, weight3, bias3, scale3);
-
-        auto add_input1 = std::make_shared<ov::op::v0::Constant>(ov::element::f16, ov::Shape{1, 1024});
-        auto add1 = std::make_shared<ov::op::v1::Add>(fc1, add_input1);
-
-        auto add_input2 = std::make_shared<ov::op::v0::Constant>(ov::element::f16, ov::Shape{1, 512});
-        auto add2 = std::make_shared<ov::op::v1::Add>(fc2, add_input2);
-
-        auto add_input3 = std::make_shared<ov::op::v0::Constant>(ov::element::f16, ov::Shape{1, 128});
-        auto add3 = std::make_shared<ov::op::v1::Add>(fc3, add_input3);
-
-        auto reshape_pattern = std::make_shared<ov::op::v0::Constant>(ov::element::i64, ov::Shape{2}, pattern);
-        auto reshape1 = std::make_shared<ov::op::v1::Reshape>(add1, reshape_pattern, true);
-        auto reshape2 = std::make_shared<ov::op::v1::Reshape>(add2, reshape_pattern, true);
-        auto reshape3 = std::make_shared<ov::op::v1::Reshape>(add3, reshape_pattern, true);
-        auto result1 = std::make_shared<ov::op::v0::Result>(reshape1);
-        auto result2 = std::make_shared<ov::op::v0::Result>(reshape2);
-        auto result3 = std::make_shared<ov::op::v0::Result>(reshape3);
-        model = std::make_shared<ov::Model>(ov::ResultVector{result1, result2, result3}, ov::ParameterVector{input});
-        manager.register_pass<FullyConnectedHorizontalFusion>();
-    }
-    {
-        auto input = std::make_shared<ov::op::v0::Parameter>(ov::element::f16, ov::PartialShape{-1, 7, 4096});
-        auto weight1 = std::make_shared<ov::op::v0::Constant>(ov::element::u4, ov::Shape{1024, 4096});
-        weight1->set_friendly_name("weight2_1");
-        auto weight2 = std::make_shared<ov::op::v0::Constant>(ov::element::u4, ov::Shape{512, 4096});
-        weight2->set_friendly_name("weight2_2");
-        auto weight3 = std::make_shared<ov::op::v0::Constant>(ov::element::u4, ov::Shape{128, 4096});
-        weight3->set_friendly_name("weight2_3");
-        auto weights = ov::OutputVector{weight1, weight2, weight3};
-        auto weight_fused = std::make_shared<ov::op::v0::Concat>(weights, 0);
-        auto bias1 = std::make_shared<ov::op::v0::Constant>(ov::element::f16, ov::Shape{1, 1024});
-        auto bias2 = std::make_shared<ov::op::v0::Constant>(ov::element::f16, ov::Shape{1, 512});
-        auto bias3 = std::make_shared<ov::op::v0::Constant>(ov::element::f16, ov::Shape{1, 128});
-        auto biases = ov::OutputVector{bias1, bias2, bias3};
-        auto bias_fused = std::make_shared<ov::op::v0::Concat>(biases, 1);
-        auto scale1 = std::make_shared<ov::op::v0::Constant>(ov::element::f16, ov::Shape{1024, 32});
-        auto scale2 = std::make_shared<ov::op::v0::Constant>(ov::element::f16, ov::Shape{512, 32});
-        auto scale3 = std::make_shared<ov::op::v0::Constant>(ov::element::f16, ov::Shape{128, 32});
-        auto scales = ov::OutputVector{scale1, scale2, scale3};
-        auto scale_fused = std::make_shared<ov::op::v0::Concat>(scales, 0);
-        auto fc_fused = std::make_shared<ov::intel_gpu::op::FullyConnectedCompressed>(input, weight_fused, bias_fused, scale_fused);
-        auto axis_const = ov::op::v0::Constant::create(ov::element::i64, ov::Shape{1}, {fc_fused->get_output_partial_shape(0).size() - 1});
-        std::vector<int64_t> orig_n_sizes = {1024, 512, 128};
-        auto split_const = ov::op::v0::Constant::create(ov::element::i64, ov::Shape{3}, orig_n_sizes);
-        auto split = std::make_shared<ov::op::v1::VariadicSplit>(fc_fused, axis_const, split_const);
-        auto reshape_pattern = std::make_shared<ov::op::v0::Constant>(ov::element::i64, ov::Shape{2}, pattern);
-        auto reshape1 = std::make_shared<ov::op::v1::Reshape>(split->output(0), reshape_pattern, true);
-        auto reshape2 = std::make_shared<ov::op::v1::Reshape>(split->output(1), reshape_pattern, true);
-        auto reshape3 = std::make_shared<ov::op::v1::Reshape>(split->output(2), reshape_pattern, true);
-        auto result1 = std::make_shared<ov::op::v0::Result>(reshape1);
-        auto result2 = std::make_shared<ov::op::v0::Result>(reshape2);
-        auto result3 = std::make_shared<ov::op::v0::Result>(reshape3);
-        model_ref = std::make_shared<ov::Model>(ov::ResultVector{result1, result2, result3}, ov::ParameterVector{input});
-        comparator.enable(FunctionsComparator::ATTRIBUTES);
-    }
-}
 }  // namespace intel_gpu
 }  // namespace test
 }  // namespace ov
\ No newline at end of file