From 38dec7b8cffe5f900819f6a1be984a03b57991a0 Mon Sep 17 00:00:00 2001 From: Edward Shogulin Date: Wed, 19 Jul 2023 08:30:21 +0100 Subject: [PATCH] [CPU] Fixed zero-point fusion transformation (#18435) Proper hanlding for subgraphs where ZP parent node has more than one output port --- src/plugins/intel_cpu/src/graph_optimizer.cpp | 6 +- .../include/conv_with_zero_point_fuse.hpp | 56 +++++++ .../src/conv_with_zero_point_fuse.cpp | 152 ++++++++++++++++++ .../functional/test_utils/cpu_test_utils.cpp | 2 +- .../include/ngraph_functions/builders.hpp | 13 ++ .../ngraph_functions/src/convolution.cpp | 30 ++++ 6 files changed, 256 insertions(+), 3 deletions(-) create mode 100644 src/plugins/intel_cpu/tests/functional/subgraph_tests/include/conv_with_zero_point_fuse.hpp create mode 100644 src/plugins/intel_cpu/tests/functional/subgraph_tests/src/conv_with_zero_point_fuse.cpp diff --git a/src/plugins/intel_cpu/src/graph_optimizer.cpp b/src/plugins/intel_cpu/src/graph_optimizer.cpp index 8952b09ea6f9af..ec4805a86406ee 100644 --- a/src/plugins/intel_cpu/src/graph_optimizer.cpp +++ b/src/plugins/intel_cpu/src/graph_optimizer.cpp @@ -756,8 +756,10 @@ void GraphOptimizer::FuseConvolutionAndZeroPoints(Graph &graph) { return false; } - auto subtractArg0 = parent0->getParentEdgesAtPort(0)[0]->getParent(); - if (subtractArg0->getOriginalOutputPrecisionAtPort(0) != Precision::U8) + const auto& parentEdge = parent0->getParentEdgeAt(0); + const auto& subtractArg0 = parentEdge->getParent(); + const size_t portNum = parentEdge->getInputNum(); + if (subtractArg0->getOriginalOutputPrecisionAtPort(portNum) != Precision::U8) return false; auto zeroPointsConstant = dynamic_cast(subtractArg1.get()); diff --git a/src/plugins/intel_cpu/tests/functional/subgraph_tests/include/conv_with_zero_point_fuse.hpp b/src/plugins/intel_cpu/tests/functional/subgraph_tests/include/conv_with_zero_point_fuse.hpp new file mode 100644 index 00000000000000..a91d572a00076f --- /dev/null +++ b/src/plugins/intel_cpu/tests/functional/subgraph_tests/include/conv_with_zero_point_fuse.hpp @@ -0,0 +1,56 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include + +#include "test_utils/cpu_test_utils.hpp" +#include "shared_test_classes/base/layer_test_utils.hpp" +#include "ngraph_functions/utils/ngraph_helpers.hpp" +#include "ngraph_functions/builders.hpp" + +using namespace CPUTestUtils; + +namespace SubgraphTestsDefinitions { + +using convConcatCPUParams = std::tuple< + nodeType, // Ngraph convolution type + InferenceEngine::SizeVector // Input shapes +>; + +// Subgraph: +/* + * Paramter Constant + * | | i8 + * | | + * FakeQuantise Convert + * / \ | f32 + * / \ | + * MaxPool FakeQuantize Mulltiply + * \ \ / + * \ \ / + * \ Convolution + * \ / + * \ / + * Concat + * | + * | + * Result + */ + +class ConvWithZeroPointFuseSubgraphTest : public testing::WithParamInterface, + public CPUTestsBase, + virtual public LayerTestsUtils::LayerTestsCommon { +public: + static std::string getTestCaseName(testing::TestParamInfo obj); + +protected: + void SetUp() override; + std::string pluginTypeNode; +}; + +} // namespace SubgraphTestsDefinitions diff --git a/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/conv_with_zero_point_fuse.cpp b/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/conv_with_zero_point_fuse.cpp new file mode 100644 index 00000000000000..430b1353afd3c7 --- /dev/null +++ b/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/conv_with_zero_point_fuse.cpp @@ -0,0 +1,152 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "ngraph/opsets/opset1.hpp" +#include "test_utils/convolution_params.hpp" +#include "subgraph_tests/include/conv_with_zero_point_fuse.hpp" + +using namespace InferenceEngine; +using namespace CPUTestUtils; + +namespace SubgraphTestsDefinitions { + +std::string ConvWithZeroPointFuseSubgraphTest::getTestCaseName(testing::TestParamInfo obj) { + std::ostringstream result; + nodeType type; + SizeVector inputShapes; + std::tie(type, inputShapes) = obj.param; + + result << "Type=" << nodeType2str(type) << "_"; + result << "IS=" << CommonTestUtils::vec2str(inputShapes) << "_"; + + return result.str(); +} + +void ConvWithZeroPointFuseSubgraphTest::SetUp() { + targetDevice = CommonTestUtils::DEVICE_CPU; + nodeType type; + SizeVector inputShapes; + std::tie(type, inputShapes) = this->GetParam(); + pluginTypeNode = nodeType2PluginType(type); + + const ngraph::op::PadType paddingType { ngraph::op::PadType::EXPLICIT }; + const size_t numOutChannels = 256; + const SizeVector dilation { 1, 1 }; + const SizeVector kernelSize { 1, 1 }; + const SizeVector strides { 1, 1 }; + const std::vector padBegin { 0, 0 }; + const std::vector padEnd { 0, 0 }; + + selectedType = ".*_I8"; + + auto inputParams = ngraph::builder::makeParams(ngraph::element::f32, {inputShapes}); + const auto fq = ngraph::builder::makeFakeQuantize( + inputParams[0], + ov::element::f32, + 256, + {1, 1, 1, 1}, + {-12.8f}, + {12.7f}, + {-12.8f}, + {12.7f}); + + auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(inputParams)); + + std::vector> branches(2); + { + ngraph::Strides strides{1, 1}; + ngraph::Shape pads_begin{0, 0}, pads_end{0, 0}, kernel{1, 1}; + branches[0] = std::make_shared(fq, + strides, + pads_begin, + pads_end, + kernel); + } + { + const auto fq_conv_data = ngraph::builder::makeFakeQuantize( + fq, + ov::element::f32, + 256, + {1, 1, 1, 1}, + {-12.8f}, + {12.7f}, + {-12.8f}, + {12.7f}); + + const InferenceEngine::SizeVector weights_const_shape = {numOutChannels, inputShapes[1], kernelSize[0], kernelSize[1]}; + const auto weights_const_values = std::vector(ngraph::shape_size(weights_const_shape), 1); + const auto weights_const = ngraph::builder::makeConstant(ov::element::i8, weights_const_shape, weights_const_values); + + const auto weights_convert = ngraph::builder::makeConversion( + weights_const, + ov::element::f32, + ngraph::helpers::ConversionTypes::CONVERT); + + const auto weights_multiply = std::make_shared( + weights_convert, + ngraph::builder::makeConstant(ov::element::f32, + {numOutChannels, 1, 1, 1}, + std::vector(numOutChannels, 1.0))); + + switch (type) { + case nodeType::convolution: { + branches[1] = ngraph::builder::makeConvolution(fq_conv_data, + weights_multiply, + ngraph::element::f32, + kernelSize, + strides, + padBegin, + padEnd, + dilation, + paddingType, + numOutChannels); + break; + } + case nodeType::groupConvolution: { + branches[1] = ngraph::builder::makeGroupConvolution( + fq_conv_data, + std::make_shared( + weights_multiply, + ngraph::builder::makeConstant( + ov::element::i32, + {5}, + std::vector{1, numOutChannels, inputShapes[1], kernelSize[0], kernelSize[1]}), + true), + ngraph::element::f32, + strides, + padBegin, + padEnd, + dilation, + paddingType); + break; + } + default: { + throw std::runtime_error("Subgraph concat test doesn't support this type of operation"); + } + } + } + + auto concat = ngraph::builder::makeConcat(ngraph::OutputVector{branches[0], branches[1]}, 1); + + ngraph::ResultVector results{std::make_shared(concat)}; + function = std::make_shared(results, inputParams, "ConvWithZeroPointFuseSubgraphTest"); +} + +TEST_P(ConvWithZeroPointFuseSubgraphTest, CompareWithRefs) { + Run(); + + CheckPluginRelatedResults(executableNetwork, pluginTypeNode); +}; + +const SizeVector inputShapes2D = {1, 32, 136, 136}; + +const auto params2DConv = ::testing::Combine(::testing::ValuesIn({nodeType::convolution, nodeType::groupConvolution}), + ::testing::Values(inputShapes2D)); + +INSTANTIATE_TEST_SUITE_P(smoke_ConvWithZeroPointFuse, + ConvWithZeroPointFuseSubgraphTest, + params2DConv, + ConvWithZeroPointFuseSubgraphTest::getTestCaseName); + +} // namespace SubgraphTestsDefinitions diff --git a/src/plugins/intel_cpu/tests/functional/test_utils/cpu_test_utils.cpp b/src/plugins/intel_cpu/tests/functional/test_utils/cpu_test_utils.cpp index 5593385d70bd71..5ac5f258db1dc5 100644 --- a/src/plugins/intel_cpu/tests/functional/test_utils/cpu_test_utils.cpp +++ b/src/plugins/intel_cpu/tests/functional/test_utils/cpu_test_utils.cpp @@ -224,7 +224,7 @@ void CPUTestsBase::CheckPluginRelatedResultsImpl(const std::shared_ptr makeConvolution(const ngraph::Output &in, const std::vector &filterWeights = {}, const std::vector &biasesWeights = {}); +std::shared_ptr makeConvolution(const ngraph::Output& in_data, + const ngraph::Output& in_weights, + const element::Type& type, + const std::vector& filterSize, + const std::vector& strides, + const std::vector& padsBegin, + const std::vector& padsEnd, + const std::vector& dilations, + const op::PadType& autoPad, + size_t numOutChannels, + bool addBiases = false, + const std::vector& biasesWeights = {}); + std::shared_ptr makeGroupConvolution(const ngraph::Output &in, const element::Type &type, const std::vector &filterSize, diff --git a/src/tests/ngraph_helpers/ngraph_functions/src/convolution.cpp b/src/tests/ngraph_helpers/ngraph_functions/src/convolution.cpp index 07fe0fa9e642af..4908a6119c495f 100644 --- a/src/tests/ngraph_helpers/ngraph_functions/src/convolution.cpp +++ b/src/tests/ngraph_helpers/ngraph_functions/src/convolution.cpp @@ -39,5 +39,35 @@ std::shared_ptr makeConvolution(const ngraph::Output &in, } } +std::shared_ptr makeConvolution(const ngraph::Output& in_data, + const ngraph::Output& in_weights, + const element::Type &type, + const std::vector &filterSize, + const std::vector &strides, + const std::vector &padsBegin, + const std::vector &padsEnd, + const std::vector &dilations, + const op::PadType &autoPad, + size_t numOutChannels, + bool addBiases, + const std::vector &biasesWeights) { + auto shape = in_data.get_partial_shape(); + auto conv = std::make_shared(in_data, + in_weights, + strides, + padsBegin, + padsEnd, + dilations, + autoPad); + if (addBiases) { + bool randomBiases = biasesWeights.empty(); + auto biasesWeightsNode = makeConstant(type, {1, numOutChannels , 1, 1}, biasesWeights, randomBiases); + auto add = std::make_shared(conv, biasesWeightsNode); + return add; + } else { + return conv; + } +} + } // namespace builder } // namespace ngraph