Skip to content

Commit

Permalink
[GNA] Add decompose 2D convolutions transformation (openvinotoolkit#6382
Browse files Browse the repository at this point in the history
)

* [GNA] Add decompose 2D convolutions transformation

* [GNA] Use constant folding instead of creating copies

* [GNA] Enable more tests

* [GNA] Align ngraph opset
  • Loading branch information
sirzabek authored and akuporos committed Sep 29, 2021
1 parent 9153001 commit a035c18
Show file tree
Hide file tree
Showing 12 changed files with 1,841 additions and 109 deletions.
4 changes: 4 additions & 0 deletions inference-engine/src/gna_plugin/backend/gna_limitations.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@ constexpr uint32_t noOfInputsLowPrecDivisor = 16;

constexpr uint32_t affineMaxBatchSize = 8;

constexpr uint32_t maxPoolMaxWindowSize = 6;

constexpr uint32_t copyMaxGrouping = 8;

namespace Cnn2D {
struct RangeLimit {
uint32_t min;
Expand Down
18 changes: 15 additions & 3 deletions inference-engine/src/gna_plugin/gna_plugin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@
#include "transformations/swap_input_matmul_gna.hpp"
#include "transformations/convert_matmul_to_pointwise_convolution.hpp"
#include "transformations/split_convolution_with_large_buffer_size.hpp"
#include "transformations/decompose_2d_conv.hpp"
#include "transformations/convert_padded2valid_conv.hpp"

#include <ngraph/opsets/opset7.hpp>
Expand Down Expand Up @@ -673,6 +674,11 @@ void GNAPlugin::AddDebugProperties(const InferenceEngine::CNNLayerPtr layer,
void GNAPlugin::LoadNetwork(CNNNetwork & _network) {
OV_ITT_SCOPED_TASK(itt::domains::GNAPlugin, "LoadNetwork");
std::shared_ptr<InferenceEngine::details::CNNNetworkImpl> convertedNetwork;

if (!gnaFlags->sw_fp32) {
InitGNADevice();
}

if (_network.getFunction()) {
CNNNetwork clonedNetwork = InferenceEngine::cloneNetwork(_network);
const auto& graph = clonedNetwork.getFunction();
Expand All @@ -682,6 +688,11 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) {
manager.register_pass<ngraph::pass::ConvertPriorBox>();
manager.register_pass<ngraph::pass::CommonOptimizations>();
manager.register_pass<ConvertPadded2ValidConv>();
if (config.gnaCompileTarget == InferenceEngine::GNAConfigParams::GNA_TARGET_2_0) {
manager.register_pass<Decompose2DConvTransposedWithBiasAF>();
manager.register_pass<Decompose2DConvTransposedWithBias>();
manager.register_pass<Decompose2DConv>();
}
// TODO enable this transformation for networks with convolutions
if (!ngraph::op::util::has_op_with_type<ngraph::opset7::Convolution>(graph)) {
manager.register_pass<ConvertMatmulWithFqToPointWiseConvolution>();
Expand Down Expand Up @@ -870,15 +881,16 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) {
// fill in extra storage with memory layers
graphCompiler.fillMemoryConnections(memoryPairs);

if (!graphCompiler.memory_connection.empty()) {
if (!graphCompiler.memory_connection.empty() && gnaFlags->gna_lib_async_threads_num != 1) {
// TODO: check if updating the number of threads is needed for sw_fp32
gnaFlags->gna_lib_async_threads_num = 1;
if (!gnaFlags->sw_fp32)
InitGNADevice();
}

if (gnaFlags->sw_fp32) {
gnamem.reset(new gna_memory_type(memory::make_polymorph<std::allocator<uint8_t>>()));
graphCompiler.setGNAMemoryPtr(gnamem);
} else {
InitGNADevice();
}

// keep inputs information and create input primitives
Expand Down
4 changes: 2 additions & 2 deletions inference-engine/src/gna_plugin/gna_plugin_config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -90,8 +90,8 @@ void Config::UpdateFromMap(const std::map<std::string, std::string>& config) {
}
}
auto scale_factor = InferenceEngine::CNNLayer::ie_parse_float(value);
if (fp32eq(scale_factor, 0.0f)) {
THROW_GNA_EXCEPTION << "input scale factor of 0.0f not supported";
if (fp32eq(scale_factor, 0.0f) || std::isinf(scale_factor)) {
THROW_GNA_EXCEPTION << "input scale factor of 0.0f or +-inf not supported";
}
// missing scale factors are set to be 1.0f
if (inputScaleFactors.size() <= input_index) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,72 +14,26 @@
#include <ngraph/rt_info.hpp>
#include <ngraph/pass/manager.hpp>
#include <ie_common.h>
#include "utils/transformation_helper.hpp"


using namespace GNAPluginNS;

NGRAPH_RTTI_DEFINITION(ConvertPadded2ValidConv, "ConvertPadded2ValidConv", 0);

struct ConvData {
size_t input_height;
size_t input_width;
size_t input_channel_count;
size_t filter_count;
size_t pads_begin_width;
size_t pads_begin_height;
size_t pads_end_width;
size_t pads_end_height;
ngraph::op::PadType padding_type;
ngraph::element::Type element_type;
};

static bool VerifyAndGetConvParams(std::shared_ptr<ngraph::opset7::Convolution> conv, ConvData& conv_data) {
static bool VerifyAndGetConvData(std::shared_ptr<ngraph::opset7::Convolution> conv, ConvData& conv_data) {
const auto& input = conv->input_value(0);

// We support only 2D conv batch 1
if (conv->get_dilations().size() != 2 ||
conv->get_strides().size() != 2 ||
input.get_shape()[0] != 1) {
// We support only batch 1
if (input.get_shape()[0] != 1) {
return false;
}

conv_data.padding_type = conv->get_auto_pad();
conv_data.input_channel_count = conv->input_value(0).get_shape()[1];
conv_data.input_height = conv->input_value(0).get_shape()[2];
conv_data.input_width = conv->input_value(0).get_shape()[3];
conv_data.filter_count = conv->input_value(1).get_shape()[0];
conv_data.pads_begin_height = conv->get_pads_begin()[0];
conv_data.pads_begin_width = conv->get_pads_begin()[1];
conv_data.pads_end_height = conv->get_pads_end()[0];
conv_data.pads_end_width = conv->get_pads_end()[1];
conv_data.element_type = conv->get_element_type();
GetConvData(conv, conv_data);

return conv_data.pads_begin_height || conv_data.pads_end_height || conv_data.pads_begin_width || conv_data.pads_end_width;
}

static bool TransposeOrderMatches(std::shared_ptr<ngraph::opset7::Transpose> transpose, std::vector<size_t> order) {
if (!transpose)
return false;
const ngraph::Output<ngraph::Node>& transpose_order = transpose->input_value(1);
auto transpose_order_dim = transpose_order.get_shape().size();

if (transpose_order_dim != 1 || transpose_order.get_shape()[0] != order.size())
return false;

auto const_with_order_values = std::dynamic_pointer_cast<ngraph::opset7::Constant>(transpose_order.get_node_shared_ptr());
if (!const_with_order_values)
return false;

const auto data = const_with_order_values->cast_vector<size_t>();
if (data.empty())
return false;

if (!std::equal(order.begin(), order.end(), data.begin()))
return false;

return true;
}

static bool VerifyBias(std::shared_ptr<ngraph::opset7::Add> bias, const size_t& filter_count) {
auto add_const = std::dynamic_pointer_cast<ngraph::opset7::Constant>(bias->input_value(0).get_node_shared_ptr());

Expand All @@ -91,16 +45,6 @@ static bool VerifyBias(std::shared_ptr<ngraph::opset7::Add> bias, const size_t&
return (add_const && shape_size(add_const->get_shape()) == filter_count);
}

static std::shared_ptr<ngraph::opset7::StridedSlice> FlatCrop(ngraph::Output<ngraph::Node> input, size_t offset, size_t size) {
return std::make_shared<ngraph::opset7::StridedSlice>(
input, // data
ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{2}, {(size_t)0, offset}), // begin sice index
ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{2}, {(size_t)0, offset + size}), // end slice index
ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{2}, {(size_t)1, (size_t)1}), // strides
std::vector<int64_t>{1, 0}, // begin mask
std::vector<int64_t>{1, 0}); // end mask
}

static void InsertPadding(ngraph::OutputVector& input_rows_to_concat, size_t size, const std::shared_ptr<ngraph::opset7::Convolution>& conv,
const std::shared_ptr<ngraph::opset7::Constant> padding_const, size_t biggest_padding) {

Expand Down Expand Up @@ -226,7 +170,7 @@ static bool Convert(std::shared_ptr<ngraph::Node> leading_transpose,

ConvData conv_data;

if (!VerifyAndGetConvParams(std::dynamic_pointer_cast<ngraph::opset7::Convolution>(conv), conv_data))
if (!VerifyAndGetConvData(std::dynamic_pointer_cast<ngraph::opset7::Convolution>(conv), conv_data))
return false;

// We are looking for Transpose(NHWC->NCHW) => Conv => Transpose(NCHW->NHWC)
Expand All @@ -246,7 +190,7 @@ static bool Convert(std::shared_ptr<ngraph::Node> leading_transpose,
return true;
}

std::function<bool(ngraph::Output<ngraph::Node>)> consumers_and_rank(const size_t expected_count, const ngraph::Dimension& expected_rank) {
static std::function<bool(ngraph::Output<ngraph::Node>)> consumers_and_rank(const size_t expected_count, const ngraph::Dimension& expected_rank) {
return [=](ngraph::Output<ngraph::Node> output) -> bool {
return ngraph::pattern::consumers_count(expected_count) && ngraph::pattern::rank_equals(expected_rank);
};
Expand Down Expand Up @@ -287,10 +231,8 @@ ConvertPadded2ValidConv::ConvertPadded2ValidConv() {

ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher& m) {
const auto& pattern_map = m.get_pattern_value_map();
auto conv_output = conv->output(0).get_node_shared_ptr();
IE_ASSERT(conv_output != nullptr);

auto bias_node = std::dynamic_pointer_cast<ngraph::opset7::Add>(conv_output);
auto bias_it = pattern_map.find(bias);
auto bias_node = (bias_it == std::end(pattern_map) ? nullptr : bias_it->second.get_node_shared_ptr());

return Convert(pattern_map.at(leading_transpose).get_node_shared_ptr(), pattern_map.at(conv).get_node_shared_ptr(),
pattern_map.at(trailing_transpose).get_node_shared_ptr(), bias_node);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,11 @@ namespace GNAPluginNS {
* wrapped with transposes, to a valid convolution with padding added before the leading transpose,
* POT precessed models are supported (fake quantized layers omitted below for clarity):
*
* Padding
* |
* Padding
* |
* Transpose (NHWC -> NCHW) Transpose (NHWC -> NCHW)
* | |
* Convolution with padding Convolution with padding
* Convolution with padding Valid convolution
* | |
* Broadcast Bias (optional) Broadcast Bias (optional)
* | |
Expand Down
Loading

0 comments on commit a035c18

Please sign in to comment.