From 38dec7b8cffe5f900819f6a1be984a03b57991a0 Mon Sep 17 00:00:00 2001
From: Edward Shogulin <edward.shogulin@intel.com>
Date: Wed, 19 Jul 2023 08:30:21 +0100
Subject: [PATCH] [CPU] Fixed zero-point fusion transformation (#18435)

Proper hanlding for subgraphs where ZP parent node has more than one output port
---
 src/plugins/intel_cpu/src/graph_optimizer.cpp |   6 +-
 .../include/conv_with_zero_point_fuse.hpp     |  56 +++++++
 .../src/conv_with_zero_point_fuse.cpp         | 152 ++++++++++++++++++
 .../functional/test_utils/cpu_test_utils.cpp  |   2 +-
 .../include/ngraph_functions/builders.hpp     |  13 ++
 .../ngraph_functions/src/convolution.cpp      |  30 ++++
 6 files changed, 256 insertions(+), 3 deletions(-)
 create mode 100644 src/plugins/intel_cpu/tests/functional/subgraph_tests/include/conv_with_zero_point_fuse.hpp
 create mode 100644 src/plugins/intel_cpu/tests/functional/subgraph_tests/src/conv_with_zero_point_fuse.cpp
diff --git a/src/plugins/intel_cpu/src/graph_optimizer.cpp b/src/plugins/intel_cpu/src/graph_optimizer.cpp
index 8952b09ea6f9af..ec4805a86406ee 100644
--- a/src/plugins/intel_cpu/src/graph_optimizer.cpp
+++ b/src/plugins/intel_cpu/src/graph_optimizer.cpp
@@ -756,8 +756,10 @@ void GraphOptimizer::FuseConvolutionAndZeroPoints(Graph &graph) {
                 return false;
         }
 
-        auto subtractArg0 = parent0->getParentEdgesAtPort(0)[0]->getParent();
-        if (subtractArg0->getOriginalOutputPrecisionAtPort(0) != Precision::U8)
+        const auto& parentEdge = parent0->getParentEdgeAt(0);
+        const auto& subtractArg0 = parentEdge->getParent();
+        const size_t portNum = parentEdge->getInputNum();
+        if (subtractArg0->getOriginalOutputPrecisionAtPort(portNum) != Precision::U8)
             return false;
 
         auto zeroPointsConstant = dynamic_cast<node::Input*>(subtractArg1.get());
diff --git a/src/plugins/intel_cpu/tests/functional/subgraph_tests/include/conv_with_zero_point_fuse.hpp b/src/plugins/intel_cpu/tests/functional/subgraph_tests/include/conv_with_zero_point_fuse.hpp
new file mode 100644
index 00000000000000..a91d572a00076f
--- /dev/null
+++ b/src/plugins/intel_cpu/tests/functional/subgraph_tests/include/conv_with_zero_point_fuse.hpp
@@ -0,0 +1,56 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <tuple>
+#include <vector>
+#include <string>
+
+#include "test_utils/cpu_test_utils.hpp"
+#include "shared_test_classes/base/layer_test_utils.hpp"
+#include "ngraph_functions/utils/ngraph_helpers.hpp"
+#include "ngraph_functions/builders.hpp"
+
+using namespace CPUTestUtils;
+
+namespace SubgraphTestsDefinitions {
+
+using convConcatCPUParams = std::tuple<
+    nodeType,                           // Ngraph convolution type
+    InferenceEngine::SizeVector         // Input shapes
+>;
+
+// Subgraph:
+/*
+ *           Paramter           Constant
+ *               |                 | i8
+ *               |                 |
+ *         FakeQuantise         Convert
+ *           /      \              | f32
+ *          /        \             |
+ *      MaxPool    FakeQuantize  Mulltiply
+ *         \           \         /
+ *          \           \       /
+ *           \        Convolution
+ *            \        /
+ *             \      /
+ *              Concat
+ *                |
+ *                |
+ *             Result
+ */
+
+class ConvWithZeroPointFuseSubgraphTest : public testing::WithParamInterface<convConcatCPUParams>,
+                                          public CPUTestsBase,
+                                          virtual public LayerTestsUtils::LayerTestsCommon {
+public:
+    static std::string getTestCaseName(testing::TestParamInfo<convConcatCPUParams> obj);
+
+protected:
+    void SetUp() override;
+    std::string pluginTypeNode;
+};
+
+} // namespace SubgraphTestsDefinitions
diff --git a/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/conv_with_zero_point_fuse.cpp b/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/conv_with_zero_point_fuse.cpp
new file mode 100644
index 00000000000000..430b1353afd3c7
--- /dev/null
+++ b/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/conv_with_zero_point_fuse.cpp
@@ -0,0 +1,152 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "ngraph/opsets/opset1.hpp"
+#include "test_utils/convolution_params.hpp"
+#include "subgraph_tests/include/conv_with_zero_point_fuse.hpp"
+
+using namespace InferenceEngine;
+using namespace CPUTestUtils;
+
+namespace SubgraphTestsDefinitions {
+
+std::string ConvWithZeroPointFuseSubgraphTest::getTestCaseName(testing::TestParamInfo<convConcatCPUParams> obj) {
+    std::ostringstream result;
+    nodeType type;
+    SizeVector inputShapes;
+    std::tie(type, inputShapes) = obj.param;
+
+    result << "Type=" << nodeType2str(type) << "_";
+    result << "IS=" << CommonTestUtils::vec2str(inputShapes) << "_";
+
+    return result.str();
+}
+
+void ConvWithZeroPointFuseSubgraphTest::SetUp() {
+    targetDevice = CommonTestUtils::DEVICE_CPU;
+    nodeType type;
+    SizeVector inputShapes;
+    std::tie(type, inputShapes) = this->GetParam();
+    pluginTypeNode = nodeType2PluginType(type);
+
+    const ngraph::op::PadType paddingType { ngraph::op::PadType::EXPLICIT };
+    const size_t numOutChannels = 256;
+    const SizeVector dilation { 1, 1 };
+    const SizeVector kernelSize { 1, 1 };
+    const SizeVector strides { 1, 1 };
+    const std::vector<ptrdiff_t> padBegin { 0, 0 };
+    const std::vector<ptrdiff_t> padEnd { 0, 0 };
+
+    selectedType = ".*_I8";
+
+    auto inputParams = ngraph::builder::makeParams(ngraph::element::f32, {inputShapes});
+    const auto fq = ngraph::builder::makeFakeQuantize(
+        inputParams[0],
+        ov::element::f32,
+        256,
+        {1, 1, 1, 1},
+        {-12.8f},
+        {12.7f},
+        {-12.8f},
+        {12.7f});
+
+    auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes<ngraph::op::Parameter>(inputParams));
+
+    std::vector<std::shared_ptr<ngraph::Node>> branches(2);
+    {
+        ngraph::Strides strides{1, 1};
+        ngraph::Shape pads_begin{0, 0}, pads_end{0, 0}, kernel{1, 1};
+        branches[0] = std::make_shared<ngraph::opset1::MaxPool>(fq,
+                                                                        strides,
+                                                                        pads_begin,
+                                                                        pads_end,
+                                                                        kernel);
+    }
+    {
+        const auto fq_conv_data = ngraph::builder::makeFakeQuantize(
+            fq,
+            ov::element::f32,
+            256,
+            {1, 1, 1, 1},
+            {-12.8f},
+            {12.7f},
+            {-12.8f},
+            {12.7f});
+
+        const InferenceEngine::SizeVector weights_const_shape = {numOutChannels, inputShapes[1], kernelSize[0], kernelSize[1]};
+        const auto weights_const_values = std::vector<int>(ngraph::shape_size(weights_const_shape), 1);
+        const auto weights_const = ngraph::builder::makeConstant(ov::element::i8, weights_const_shape, weights_const_values);
+
+        const auto weights_convert = ngraph::builder::makeConversion(
+            weights_const,
+            ov::element::f32,
+            ngraph::helpers::ConversionTypes::CONVERT);
+
+        const auto weights_multiply = std::make_shared<ov::opset10::Multiply>(
+            weights_convert,
+            ngraph::builder::makeConstant(ov::element::f32,
+                                            {numOutChannels, 1, 1, 1},
+                                            std::vector<float>(numOutChannels, 1.0)));
+
+        switch (type) {
+            case nodeType::convolution: {
+                branches[1] = ngraph::builder::makeConvolution(fq_conv_data,
+                                                               weights_multiply,
+                                                               ngraph::element::f32,
+                                                               kernelSize,
+                                                               strides,
+                                                               padBegin,
+                                                               padEnd,
+                                                               dilation,
+                                                               paddingType,
+                                                               numOutChannels);
+                break;
+            }
+            case nodeType::groupConvolution: {
+                branches[1] = ngraph::builder::makeGroupConvolution(
+                    fq_conv_data,
+                    std::make_shared<ov::opset10::Reshape>(
+                        weights_multiply,
+                        ngraph::builder::makeConstant(
+                            ov::element::i32,
+                            {5},
+                            std::vector<size_t>{1, numOutChannels, inputShapes[1], kernelSize[0], kernelSize[1]}),
+                        true),
+                    ngraph::element::f32,
+                    strides,
+                    padBegin,
+                    padEnd,
+                    dilation,
+                    paddingType);
+                break;
+            }
+            default: {
+                throw std::runtime_error("Subgraph concat test doesn't support this type of operation");
+            }
+        }
+    }
+
+    auto concat = ngraph::builder::makeConcat(ngraph::OutputVector{branches[0], branches[1]}, 1);
+
+    ngraph::ResultVector results{std::make_shared<ngraph::opset4::Result>(concat)};
+    function = std::make_shared<ngraph::Function>(results, inputParams, "ConvWithZeroPointFuseSubgraphTest");
+}
+
+TEST_P(ConvWithZeroPointFuseSubgraphTest, CompareWithRefs) {
+    Run();
+
+    CheckPluginRelatedResults(executableNetwork, pluginTypeNode);
+};
+
+const SizeVector inputShapes2D = {1, 32, 136, 136};
+
+const auto params2DConv = ::testing::Combine(::testing::ValuesIn({nodeType::convolution, nodeType::groupConvolution}),
+                                             ::testing::Values(inputShapes2D));
+
+INSTANTIATE_TEST_SUITE_P(smoke_ConvWithZeroPointFuse,
+                         ConvWithZeroPointFuseSubgraphTest,
+                         params2DConv,
+                         ConvWithZeroPointFuseSubgraphTest::getTestCaseName);
+
+}  // namespace SubgraphTestsDefinitions
diff --git a/src/plugins/intel_cpu/tests/functional/test_utils/cpu_test_utils.cpp b/src/plugins/intel_cpu/tests/functional/test_utils/cpu_test_utils.cpp
index 5593385d70bd71..5ac5f258db1dc5 100644
--- a/src/plugins/intel_cpu/tests/functional/test_utils/cpu_test_utils.cpp
+++ b/src/plugins/intel_cpu/tests/functional/test_utils/cpu_test_utils.cpp
@@ -224,7 +224,7 @@ void CPUTestsBase::CheckPluginRelatedResultsImpl(const std::shared_ptr<const ov:
 }
 
 bool CPUTestsBase::primTypeCheck(std::string primType) const {
-    return selectedType.find(CPUTestsBase::any_type) != std::string::npos || selectedType == primType;
+    return selectedType.find(CPUTestsBase::any_type) != std::string::npos || std::regex_match(primType, std::regex(selectedType));
 }
 
 std::string CPUTestsBase::getTestCaseName(CPUSpecificParams params) {
diff --git a/src/tests/ngraph_helpers/ngraph_functions/include/ngraph_functions/builders.hpp b/src/tests/ngraph_helpers/ngraph_functions/include/ngraph_functions/builders.hpp
index 686775e53c2e20..2cdde2f51b9853 100644
--- a/src/tests/ngraph_helpers/ngraph_functions/include/ngraph_functions/builders.hpp
+++ b/src/tests/ngraph_helpers/ngraph_functions/include/ngraph_functions/builders.hpp
@@ -96,6 +96,19 @@ std::shared_ptr<ngraph::Node> makeConvolution(const ngraph::Output<Node> &in,
                                               const std::vector<float> &filterWeights = {},
                                               const std::vector<float> &biasesWeights = {});
 
+std::shared_ptr<ngraph::Node> makeConvolution(const ngraph::Output<Node>& in_data,
+                                              const ngraph::Output<Node>& in_weights,
+                                              const element::Type& type,
+                                              const std::vector<size_t>& filterSize,
+                                              const std::vector<size_t>& strides,
+                                              const std::vector<ptrdiff_t>& padsBegin,
+                                              const std::vector<ptrdiff_t>& padsEnd,
+                                              const std::vector<size_t>& dilations,
+                                              const op::PadType& autoPad,
+                                              size_t numOutChannels,
+                                              bool addBiases = false,
+                                              const std::vector<float>& biasesWeights = {});
+
 std::shared_ptr<ngraph::Node> makeGroupConvolution(const ngraph::Output<Node> &in,
                                                    const element::Type &type,
                                                    const std::vector<size_t> &filterSize,
diff --git a/src/tests/ngraph_helpers/ngraph_functions/src/convolution.cpp b/src/tests/ngraph_helpers/ngraph_functions/src/convolution.cpp
index 07fe0fa9e642af..4908a6119c495f 100644
--- a/src/tests/ngraph_helpers/ngraph_functions/src/convolution.cpp
+++ b/src/tests/ngraph_helpers/ngraph_functions/src/convolution.cpp
@@ -39,5 +39,35 @@ std::shared_ptr<Node> makeConvolution(const ngraph::Output<Node> &in,
     }
 }
 
+std::shared_ptr<Node> makeConvolution(const ngraph::Output<Node>& in_data,
+                                      const ngraph::Output<Node>& in_weights,
+                                      const element::Type &type,
+                                      const std::vector<size_t> &filterSize,
+                                      const std::vector<size_t> &strides,
+                                      const std::vector<ptrdiff_t> &padsBegin,
+                                      const std::vector<ptrdiff_t> &padsEnd,
+                                      const std::vector<size_t> &dilations,
+                                      const op::PadType &autoPad,
+                                      size_t numOutChannels,
+                                      bool addBiases,
+                                      const std::vector<float> &biasesWeights) {
+    auto shape = in_data.get_partial_shape();
+    auto conv = std::make_shared<opset1::Convolution>(in_data,
+                                                      in_weights,
+                                                      strides,
+                                                      padsBegin,
+                                                      padsEnd,
+                                                      dilations,
+                                                      autoPad);
+    if (addBiases) {
+        bool randomBiases = biasesWeights.empty();
+        auto biasesWeightsNode = makeConstant(type, {1, numOutChannels , 1, 1}, biasesWeights, randomBiases);
+        auto add = std::make_shared<ngraph::opset1::Add>(conv, biasesWeightsNode);
+        return add;
+    } else {
+        return conv;
+    }
+}
+
 }  // namespace builder
 }  // namespace ngraph