Skip to content

Commit

Permalink
[NVIDIA] Adds tests for fuse matmul with add transformation. Fix bug …
Browse files Browse the repository at this point in the history
…when input node have several output ports. (#690)
  • Loading branch information
nkogteva authored Jul 24, 2023
1 parent 149de40 commit 8e60111
Show file tree
Hide file tree
Showing 5 changed files with 276 additions and 88 deletions.

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@

#include "bidirectional_lstm_sequence_composition.hpp"
#include "concat_transformation.hpp"
#include "cuda_fullyconnected_transformation.hpp"
#include "fuse_matmul_add.hpp"
#include "matmul_transformations.hpp"
#include "noop_broadcast_transformation.hpp"
#include "remove_duplicated_results_transformation.hpp"
Expand Down
104 changes: 104 additions & 0 deletions modules/nvidia_plugin/src/transformer/fuse_matmul_add.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#include "openvino/cc/pass/itt.hpp"
#include "fuse_matmul_add.hpp"

#include <exec_graph_info.hpp>
#include "openvino/core/rt_info.hpp"
#include "openvino/pass/pattern/op/wrap_type.hpp"
#include "openvino/pass/pattern/op/or.hpp"
#include <openvino/op/add.hpp>
#include <openvino/op/matmul.hpp>
#include <ops/matmul.hpp>

using namespace ov::pass::pattern;

namespace {
std::pair<std::shared_ptr<ov::op::v0::MatMul>, std::shared_ptr<ov::op::v0::Constant>> get_matmul_constant_nodes(const std::shared_ptr<ov::Node>& add_node) {
if (std::dynamic_pointer_cast<ov::op::v0::Constant>(add_node->get_input_node_shared_ptr(1))) {
return {std::dynamic_pointer_cast<ov::op::v0::MatMul>(add_node->get_input_node_shared_ptr(0)),
std::dynamic_pointer_cast<ov::op::v0::Constant>(add_node->get_input_node_shared_ptr(1))};
} else if (std::dynamic_pointer_cast<ov::op::v0::Constant>(add_node->get_input_node_shared_ptr(0))) {
return {std::dynamic_pointer_cast<ov::op::v0::MatMul>(add_node->get_input_node_shared_ptr(1)),
std::dynamic_pointer_cast<ov::op::v0::Constant>(add_node->get_input_node_shared_ptr(0))};
}
return {nullptr, nullptr};
}

bool is_add_to_be_fused(const ov::Output<ov::Node>& output) {
auto add_node = std::dynamic_pointer_cast<ov::op::v1::Add>(output.get_node_shared_ptr());
if (!add_node) {
return false;
}
std::shared_ptr<ov::op::v0::MatMul> matmul_node;
std::shared_ptr<ov::op::v0::Constant> constant_node;
std::tie(matmul_node, constant_node) = get_matmul_constant_nodes(add_node);
if (!matmul_node || !constant_node) {
return false;
}

auto matrix_A_shape = matmul_node->get_input_shape(0);
auto matrix_B_shape = matmul_node->get_input_shape(1);
const auto matrix_shape = matmul_node->get_output_shape(0);
ov::nvidia_gpu::MatMulOp::BroadcastToMatrix(matrix_A_shape);
ov::nvidia_gpu::MatMulOp::BroadcastToMatrix(matrix_B_shape);
const auto matmul_batch = std::max(ov::nvidia_gpu::MatMulOp::GetMatrixNumBatches(matrix_A_shape),
ov::nvidia_gpu::MatMulOp::GetMatrixNumBatches(matrix_B_shape));

auto const_shape = constant_node->get_output_shape(0);
ov::nvidia_gpu::MatMulOp::BroadcastToMatrix(const_shape);
const auto const_batch = ov::nvidia_gpu::MatMulOp::GetMatrixNumBatches(const_shape);
const auto const_shape_size = ov::shape_size(const_shape);
const auto matrix_shape_size = ov::shape_size(matrix_shape);
const auto num_auto_const_batch = matrix_shape_size / const_shape_size;
const auto matmul_shape_dividable = matrix_shape_size % const_shape_size;
if (matmul_batch < const_batch || matmul_shape_dividable != 0 || num_auto_const_batch > 1) {
return false;
}
return true;
}
} // namespace

namespace ov::nvidia_gpu::pass {
bool fuse_matmul_and_add(Matcher &m) {
// Decompose Divide into Multiply with Power operations
auto add_node = std::dynamic_pointer_cast<ov::op::v1::Add>(m.get_match_root());
auto consumers = add_node->output(0).get_target_inputs();
std::shared_ptr<ov::op::v0::MatMul> matmul_node;
std::shared_ptr<ov::op::v0::Constant> constant_node;
std::tie(matmul_node, constant_node) = get_matmul_constant_nodes(add_node);
const auto fully_connected_node =
std::make_shared<ov::nvidia_gpu::nodes::FullyConnected>(matmul_node->get_input_source_output(0),
matmul_node->get_input_source_output(1),
constant_node,
matmul_node->get_transpose_a(),
matmul_node->get_transpose_b());
fully_connected_node->set_friendly_name(add_node->get_friendly_name());
ov::copy_runtime_info({matmul_node, add_node}, fully_connected_node);

const std::string original_layers = matmul_node->get_friendly_name() + "," + add_node->get_friendly_name();
fully_connected_node->get_rt_info()[ExecGraphInfoSerialization::ORIGINAL_NAMES] = original_layers;

for (auto input : consumers) {
input.replace_source_output(fully_connected_node);
}
return true;
}

FullyConnectedTransformation::FullyConnectedTransformation() {
MATCHER_SCOPE(FullyConnectedTransformation);
auto matmul = wrap_type<ov::op::v0::MatMul>(consumers_count(1));
auto bias = wrap_type<ov::op::v0::Constant>();
auto add0 = wrap_type<ov::op::v1::Add>({matmul, bias}, is_add_to_be_fused);
auto add1 = wrap_type<ov::op::v1::Add>({bias, matmul}, is_add_to_be_fused);
auto result = std::make_shared<ov::pass::pattern::op::Or>(OutputVector{add0, add1});

matcher_pass_callback callback = [](Matcher &m) { return fuse_matmul_and_add(m); };

auto m = std::make_shared<Matcher>(result, matcher_name);
register_matcher(m, callback);
}

} // namespace ov::nvidia_gpu::pass
171 changes: 171 additions & 0 deletions modules/nvidia_plugin/tests/unit/transformations/fuse_matmul_add.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@
// Copyright (C) 2018-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#include <gtest/gtest.h>

#include <tuple>

#include "transformer/fuse_matmul_add.hpp"
#include "transformer/nodes/fully_connected.hpp"

#include "common_test_utils/ngraph_test_utils.hpp"
#include "openvino/core/model.hpp"
#include "openvino/op/constant.hpp"
#include "openvino/pass/manager.hpp"
#include "transformations/init_node_info.hpp"
#include "transformations/utils/utils.hpp"

using ov::nvidia_gpu::nodes::FullyConnected;
using namespace ov;
using namespace std;

namespace testing {

TEST(fuse_matmul_add, parameters_matmul_add_constant) {
shared_ptr<ov::Model> model, model_ref;
{
auto input0 = make_shared<op::v0::Parameter>(element::f32, Shape{1, 512});
auto input1 = make_shared<op::v0::Parameter>(element::f32, Shape{1024, 512});
auto matmul = make_shared<op::v0::MatMul>(input0, input1, false, true);
auto const_node = op::v0::Constant::create(element::f32, Shape{1, 1024}, {1});
auto add = make_shared<op::v1::Add>(matmul, const_node);
model = make_shared<Model>(add, ParameterVector{input0, input1});

pass::Manager pass_manager;
pass_manager.register_pass<pass::InitNodeInfo>();
pass_manager.register_pass<nvidia_gpu::pass::FullyConnectedTransformation>();
pass_manager.run_passes(model);

ASSERT_EQ(count_ops_of_type<op::v0::MatMul>(model), 0);
}
{
auto input0 = make_shared<op::v0::Parameter>(element::f32, Shape{1, 512});
auto input1 = make_shared<op::v0::Parameter>(element::f32, Shape{1024, 512});
auto const_node = op::v0::Constant::create(element::f32, Shape{1, 1024}, {1});
auto fc = make_shared<FullyConnected>(input0, input1, const_node, false, true);
model_ref = make_shared<Model>(fc, ParameterVector{input0, input1});
}

auto res = compare_functions(model, model_ref);
ASSERT_TRUE(res.first) << res.second;
}

TEST(fuse_matmul_add, parameters_matmul_add_parameter_fail) {
shared_ptr<ov::Model> model, model_ref;
{
auto input0 = make_shared<op::v0::Parameter>(element::f32, Shape{1, 512});
auto input1 = make_shared<op::v0::Parameter>(element::f32, Shape{512, 1024});
auto matmul = make_shared<op::v0::MatMul>(input0, input1, false, false);
auto input3 = make_shared<op::v0::Parameter>(element::f32, Shape{1, 1024});
auto add = make_shared<op::v1::Add>(matmul, input3);
model = make_shared<Model>(add, ParameterVector{input0, input1, input3});
}
model_ref = model->clone();

pass::Manager pass_manager;
pass_manager.register_pass<pass::InitNodeInfo>();
pass_manager.register_pass<nvidia_gpu::pass::FullyConnectedTransformation>();
pass_manager.run_passes(model);

ASSERT_EQ(count_ops_of_type<op::v0::MatMul>(model), 1);

auto res = compare_functions(model, model_ref);
ASSERT_TRUE(res.first) << res.second;
}

TEST(fuse_matmul_add, parameter_constant_matmul_add_constant) {
shared_ptr<ov::Model> model, model_ref;
{
auto input = make_shared<op::v0::Parameter>(element::f32, Shape{512, 1});
auto const_node0 = op::v0::Constant::create(element::f32, Shape{1024, 512}, {1});
auto matmul = make_shared<op::v0::MatMul>(input, const_node0, true, true);
auto const_node1 = op::v0::Constant::create(element::f32, Shape{1, 1024}, {2});
auto add = make_shared<op::v1::Add>(matmul, const_node1);
model = make_shared<Model>(add, ParameterVector{input});

pass::Manager pass_manager;
pass_manager.register_pass<pass::InitNodeInfo>();
pass_manager.register_pass<nvidia_gpu::pass::FullyConnectedTransformation>();
pass_manager.run_passes(model);

ASSERT_EQ(count_ops_of_type<op::v0::MatMul>(model), 0);
}
{
auto input = make_shared<op::v0::Parameter>(element::f32, Shape{512, 1});
auto const_node0 = op::v0::Constant::create(element::f32, Shape{1024, 512}, {1});
auto const_node1 = op::v0::Constant::create(element::f32, Shape{1, 1024}, {2});
auto fc = make_shared<FullyConnected>(input, const_node0, const_node1, true, true);
model_ref = make_shared<Model>(fc, ParameterVector{input});
}

auto res = compare_functions(model, model_ref);
ASSERT_TRUE(res.first) << res.second;
}

TEST(fuse_matmul_add, constant_parameter_matmul_add_constant) {
shared_ptr<ov::Model> model, model_ref;
{
auto const_node0 = op::v0::Constant::create(element::f32, Shape{1024, 512}, {1});
auto input = make_shared<op::v0::Parameter>(element::f32, Shape{1024, 1});
auto matmul = make_shared<op::v0::MatMul>(const_node0, input, true, false);
auto const_node1 = op::v0::Constant::create(element::f32, Shape{1, 512}, {2});
auto add = make_shared<op::v1::Add>(matmul, const_node1);
model = make_shared<Model>(add, ParameterVector{input});

pass::Manager pass_manager;
pass_manager.register_pass<pass::InitNodeInfo>();
pass_manager.register_pass<nvidia_gpu::pass::FullyConnectedTransformation>();
pass_manager.run_passes(model);

ASSERT_EQ(count_ops_of_type<op::v0::MatMul>(model), 0);
}
{
auto const_node0 = op::v0::Constant::create(element::f32, Shape{1024, 512}, {1});
auto input = make_shared<op::v0::Parameter>(element::f32, Shape{1024, 1});
auto const_node1 = op::v0::Constant::create(element::f32, Shape{1, 512}, {2});
auto fc = make_shared<FullyConnected>(const_node0, input, const_node1, true, false);
model_ref = make_shared<Model>(fc, ParameterVector{input});
}

auto res = compare_functions(model, model_ref);
ASSERT_TRUE(res.first) << res.second;
}

TEST(fuse_matmul_add, parameter_variadic_split_matmul_add_constant) {
shared_ptr<ov::Model> model, model_ref;
{
auto input = make_shared<op::v0::Parameter>(element::f32, Shape{197, 128});
auto split = make_shared<op::v1::VariadicSplit>(input,
op::v0::Constant::create(element::i32, {}, {0}),
op::v0::Constant::create(element::i32, Shape{2}, {196, 1}));
auto const_node0 = op::v0::Constant::create(element::f32, Shape{128, 128}, {1});
auto matmul = make_shared<op::v0::MatMul>(split->output(1), const_node0, false, true);
auto const_node1 = op::v0::Constant::create(element::f32, Shape{1, 128}, {2});
auto add = make_shared<op::v1::Add>(matmul, const_node1);
auto concat = make_shared<op::v0::Concat>(OutputVector{split->output(0), add}, 0);
model = make_shared<Model>(concat, ParameterVector{input});

pass::Manager pass_manager;
pass_manager.register_pass<pass::InitNodeInfo>();
pass_manager.register_pass<nvidia_gpu::pass::FullyConnectedTransformation>();
pass_manager.run_passes(model);

ASSERT_EQ(count_ops_of_type<op::v0::MatMul>(model), 0);
}
{
auto input = make_shared<op::v0::Parameter>(element::f32, Shape{197, 128});
auto split = make_shared<op::v1::VariadicSplit>(input,
op::v0::Constant::create(element::i32, {}, {0}),
op::v0::Constant::create(element::i32, Shape{2}, {196, 1}));
auto const_node0 = op::v0::Constant::create(element::f32, Shape{128, 128}, {1});
auto const_node1 = op::v0::Constant::create(element::f32, Shape{1, 128}, {2});
auto fc = make_shared<FullyConnected>(split->output(1), const_node0, const_node1, false, true);
auto concat = make_shared<op::v0::Concat>(OutputVector{split->output(0), fc}, 0);
model_ref = make_shared<Model>(concat, ParameterVector{input});
}

auto res = compare_functions(model, model_ref);
ASSERT_TRUE(res.first) << res.second;
}
} // namespace testing

0 comments on commit 8e60111

Please sign in to comment.