-
Notifications
You must be signed in to change notification settings - Fork 144
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[NVIDIA] Adds tests for fuse matmul with add transformation. Fix bug …
…when input node have several output ports. (#690)
- Loading branch information
Showing
5 changed files
with
276 additions
and
88 deletions.
There are no files selected for viewing
87 changes: 0 additions & 87 deletions
87
modules/nvidia_plugin/src/transformer/cuda_fullyconnected_transformation.cpp
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
104 changes: 104 additions & 0 deletions
104
modules/nvidia_plugin/src/transformer/fuse_matmul_add.cpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,104 @@ | ||
// Copyright (C) 2018-2021 Intel Corporation | ||
// SPDX-License-Identifier: Apache-2.0 | ||
// | ||
|
||
#include "openvino/cc/pass/itt.hpp" | ||
#include "fuse_matmul_add.hpp" | ||
|
||
#include <exec_graph_info.hpp> | ||
#include "openvino/core/rt_info.hpp" | ||
#include "openvino/pass/pattern/op/wrap_type.hpp" | ||
#include "openvino/pass/pattern/op/or.hpp" | ||
#include <openvino/op/add.hpp> | ||
#include <openvino/op/matmul.hpp> | ||
#include <ops/matmul.hpp> | ||
|
||
using namespace ov::pass::pattern; | ||
|
||
namespace { | ||
std::pair<std::shared_ptr<ov::op::v0::MatMul>, std::shared_ptr<ov::op::v0::Constant>> get_matmul_constant_nodes(const std::shared_ptr<ov::Node>& add_node) { | ||
if (std::dynamic_pointer_cast<ov::op::v0::Constant>(add_node->get_input_node_shared_ptr(1))) { | ||
return {std::dynamic_pointer_cast<ov::op::v0::MatMul>(add_node->get_input_node_shared_ptr(0)), | ||
std::dynamic_pointer_cast<ov::op::v0::Constant>(add_node->get_input_node_shared_ptr(1))}; | ||
} else if (std::dynamic_pointer_cast<ov::op::v0::Constant>(add_node->get_input_node_shared_ptr(0))) { | ||
return {std::dynamic_pointer_cast<ov::op::v0::MatMul>(add_node->get_input_node_shared_ptr(1)), | ||
std::dynamic_pointer_cast<ov::op::v0::Constant>(add_node->get_input_node_shared_ptr(0))}; | ||
} | ||
return {nullptr, nullptr}; | ||
} | ||
|
||
bool is_add_to_be_fused(const ov::Output<ov::Node>& output) { | ||
auto add_node = std::dynamic_pointer_cast<ov::op::v1::Add>(output.get_node_shared_ptr()); | ||
if (!add_node) { | ||
return false; | ||
} | ||
std::shared_ptr<ov::op::v0::MatMul> matmul_node; | ||
std::shared_ptr<ov::op::v0::Constant> constant_node; | ||
std::tie(matmul_node, constant_node) = get_matmul_constant_nodes(add_node); | ||
if (!matmul_node || !constant_node) { | ||
return false; | ||
} | ||
|
||
auto matrix_A_shape = matmul_node->get_input_shape(0); | ||
auto matrix_B_shape = matmul_node->get_input_shape(1); | ||
const auto matrix_shape = matmul_node->get_output_shape(0); | ||
ov::nvidia_gpu::MatMulOp::BroadcastToMatrix(matrix_A_shape); | ||
ov::nvidia_gpu::MatMulOp::BroadcastToMatrix(matrix_B_shape); | ||
const auto matmul_batch = std::max(ov::nvidia_gpu::MatMulOp::GetMatrixNumBatches(matrix_A_shape), | ||
ov::nvidia_gpu::MatMulOp::GetMatrixNumBatches(matrix_B_shape)); | ||
|
||
auto const_shape = constant_node->get_output_shape(0); | ||
ov::nvidia_gpu::MatMulOp::BroadcastToMatrix(const_shape); | ||
const auto const_batch = ov::nvidia_gpu::MatMulOp::GetMatrixNumBatches(const_shape); | ||
const auto const_shape_size = ov::shape_size(const_shape); | ||
const auto matrix_shape_size = ov::shape_size(matrix_shape); | ||
const auto num_auto_const_batch = matrix_shape_size / const_shape_size; | ||
const auto matmul_shape_dividable = matrix_shape_size % const_shape_size; | ||
if (matmul_batch < const_batch || matmul_shape_dividable != 0 || num_auto_const_batch > 1) { | ||
return false; | ||
} | ||
return true; | ||
} | ||
} // namespace | ||
|
||
namespace ov::nvidia_gpu::pass { | ||
bool fuse_matmul_and_add(Matcher &m) { | ||
// Decompose Divide into Multiply with Power operations | ||
auto add_node = std::dynamic_pointer_cast<ov::op::v1::Add>(m.get_match_root()); | ||
auto consumers = add_node->output(0).get_target_inputs(); | ||
std::shared_ptr<ov::op::v0::MatMul> matmul_node; | ||
std::shared_ptr<ov::op::v0::Constant> constant_node; | ||
std::tie(matmul_node, constant_node) = get_matmul_constant_nodes(add_node); | ||
const auto fully_connected_node = | ||
std::make_shared<ov::nvidia_gpu::nodes::FullyConnected>(matmul_node->get_input_source_output(0), | ||
matmul_node->get_input_source_output(1), | ||
constant_node, | ||
matmul_node->get_transpose_a(), | ||
matmul_node->get_transpose_b()); | ||
fully_connected_node->set_friendly_name(add_node->get_friendly_name()); | ||
ov::copy_runtime_info({matmul_node, add_node}, fully_connected_node); | ||
|
||
const std::string original_layers = matmul_node->get_friendly_name() + "," + add_node->get_friendly_name(); | ||
fully_connected_node->get_rt_info()[ExecGraphInfoSerialization::ORIGINAL_NAMES] = original_layers; | ||
|
||
for (auto input : consumers) { | ||
input.replace_source_output(fully_connected_node); | ||
} | ||
return true; | ||
} | ||
|
||
FullyConnectedTransformation::FullyConnectedTransformation() { | ||
MATCHER_SCOPE(FullyConnectedTransformation); | ||
auto matmul = wrap_type<ov::op::v0::MatMul>(consumers_count(1)); | ||
auto bias = wrap_type<ov::op::v0::Constant>(); | ||
auto add0 = wrap_type<ov::op::v1::Add>({matmul, bias}, is_add_to_be_fused); | ||
auto add1 = wrap_type<ov::op::v1::Add>({bias, matmul}, is_add_to_be_fused); | ||
auto result = std::make_shared<ov::pass::pattern::op::Or>(OutputVector{add0, add1}); | ||
|
||
matcher_pass_callback callback = [](Matcher &m) { return fuse_matmul_and_add(m); }; | ||
|
||
auto m = std::make_shared<Matcher>(result, matcher_name); | ||
register_matcher(m, callback); | ||
} | ||
|
||
} // namespace ov::nvidia_gpu::pass |
File renamed without changes.
171 changes: 171 additions & 0 deletions
171
modules/nvidia_plugin/tests/unit/transformations/fuse_matmul_add.cpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,171 @@ | ||
// Copyright (C) 2018-2023 Intel Corporation | ||
// SPDX-License-Identifier: Apache-2.0 | ||
// | ||
|
||
#include <gtest/gtest.h> | ||
|
||
#include <tuple> | ||
|
||
#include "transformer/fuse_matmul_add.hpp" | ||
#include "transformer/nodes/fully_connected.hpp" | ||
|
||
#include "common_test_utils/ngraph_test_utils.hpp" | ||
#include "openvino/core/model.hpp" | ||
#include "openvino/op/constant.hpp" | ||
#include "openvino/pass/manager.hpp" | ||
#include "transformations/init_node_info.hpp" | ||
#include "transformations/utils/utils.hpp" | ||
|
||
using ov::nvidia_gpu::nodes::FullyConnected; | ||
using namespace ov; | ||
using namespace std; | ||
|
||
namespace testing { | ||
|
||
TEST(fuse_matmul_add, parameters_matmul_add_constant) { | ||
shared_ptr<ov::Model> model, model_ref; | ||
{ | ||
auto input0 = make_shared<op::v0::Parameter>(element::f32, Shape{1, 512}); | ||
auto input1 = make_shared<op::v0::Parameter>(element::f32, Shape{1024, 512}); | ||
auto matmul = make_shared<op::v0::MatMul>(input0, input1, false, true); | ||
auto const_node = op::v0::Constant::create(element::f32, Shape{1, 1024}, {1}); | ||
auto add = make_shared<op::v1::Add>(matmul, const_node); | ||
model = make_shared<Model>(add, ParameterVector{input0, input1}); | ||
|
||
pass::Manager pass_manager; | ||
pass_manager.register_pass<pass::InitNodeInfo>(); | ||
pass_manager.register_pass<nvidia_gpu::pass::FullyConnectedTransformation>(); | ||
pass_manager.run_passes(model); | ||
|
||
ASSERT_EQ(count_ops_of_type<op::v0::MatMul>(model), 0); | ||
} | ||
{ | ||
auto input0 = make_shared<op::v0::Parameter>(element::f32, Shape{1, 512}); | ||
auto input1 = make_shared<op::v0::Parameter>(element::f32, Shape{1024, 512}); | ||
auto const_node = op::v0::Constant::create(element::f32, Shape{1, 1024}, {1}); | ||
auto fc = make_shared<FullyConnected>(input0, input1, const_node, false, true); | ||
model_ref = make_shared<Model>(fc, ParameterVector{input0, input1}); | ||
} | ||
|
||
auto res = compare_functions(model, model_ref); | ||
ASSERT_TRUE(res.first) << res.second; | ||
} | ||
|
||
TEST(fuse_matmul_add, parameters_matmul_add_parameter_fail) { | ||
shared_ptr<ov::Model> model, model_ref; | ||
{ | ||
auto input0 = make_shared<op::v0::Parameter>(element::f32, Shape{1, 512}); | ||
auto input1 = make_shared<op::v0::Parameter>(element::f32, Shape{512, 1024}); | ||
auto matmul = make_shared<op::v0::MatMul>(input0, input1, false, false); | ||
auto input3 = make_shared<op::v0::Parameter>(element::f32, Shape{1, 1024}); | ||
auto add = make_shared<op::v1::Add>(matmul, input3); | ||
model = make_shared<Model>(add, ParameterVector{input0, input1, input3}); | ||
} | ||
model_ref = model->clone(); | ||
|
||
pass::Manager pass_manager; | ||
pass_manager.register_pass<pass::InitNodeInfo>(); | ||
pass_manager.register_pass<nvidia_gpu::pass::FullyConnectedTransformation>(); | ||
pass_manager.run_passes(model); | ||
|
||
ASSERT_EQ(count_ops_of_type<op::v0::MatMul>(model), 1); | ||
|
||
auto res = compare_functions(model, model_ref); | ||
ASSERT_TRUE(res.first) << res.second; | ||
} | ||
|
||
TEST(fuse_matmul_add, parameter_constant_matmul_add_constant) { | ||
shared_ptr<ov::Model> model, model_ref; | ||
{ | ||
auto input = make_shared<op::v0::Parameter>(element::f32, Shape{512, 1}); | ||
auto const_node0 = op::v0::Constant::create(element::f32, Shape{1024, 512}, {1}); | ||
auto matmul = make_shared<op::v0::MatMul>(input, const_node0, true, true); | ||
auto const_node1 = op::v0::Constant::create(element::f32, Shape{1, 1024}, {2}); | ||
auto add = make_shared<op::v1::Add>(matmul, const_node1); | ||
model = make_shared<Model>(add, ParameterVector{input}); | ||
|
||
pass::Manager pass_manager; | ||
pass_manager.register_pass<pass::InitNodeInfo>(); | ||
pass_manager.register_pass<nvidia_gpu::pass::FullyConnectedTransformation>(); | ||
pass_manager.run_passes(model); | ||
|
||
ASSERT_EQ(count_ops_of_type<op::v0::MatMul>(model), 0); | ||
} | ||
{ | ||
auto input = make_shared<op::v0::Parameter>(element::f32, Shape{512, 1}); | ||
auto const_node0 = op::v0::Constant::create(element::f32, Shape{1024, 512}, {1}); | ||
auto const_node1 = op::v0::Constant::create(element::f32, Shape{1, 1024}, {2}); | ||
auto fc = make_shared<FullyConnected>(input, const_node0, const_node1, true, true); | ||
model_ref = make_shared<Model>(fc, ParameterVector{input}); | ||
} | ||
|
||
auto res = compare_functions(model, model_ref); | ||
ASSERT_TRUE(res.first) << res.second; | ||
} | ||
|
||
TEST(fuse_matmul_add, constant_parameter_matmul_add_constant) { | ||
shared_ptr<ov::Model> model, model_ref; | ||
{ | ||
auto const_node0 = op::v0::Constant::create(element::f32, Shape{1024, 512}, {1}); | ||
auto input = make_shared<op::v0::Parameter>(element::f32, Shape{1024, 1}); | ||
auto matmul = make_shared<op::v0::MatMul>(const_node0, input, true, false); | ||
auto const_node1 = op::v0::Constant::create(element::f32, Shape{1, 512}, {2}); | ||
auto add = make_shared<op::v1::Add>(matmul, const_node1); | ||
model = make_shared<Model>(add, ParameterVector{input}); | ||
|
||
pass::Manager pass_manager; | ||
pass_manager.register_pass<pass::InitNodeInfo>(); | ||
pass_manager.register_pass<nvidia_gpu::pass::FullyConnectedTransformation>(); | ||
pass_manager.run_passes(model); | ||
|
||
ASSERT_EQ(count_ops_of_type<op::v0::MatMul>(model), 0); | ||
} | ||
{ | ||
auto const_node0 = op::v0::Constant::create(element::f32, Shape{1024, 512}, {1}); | ||
auto input = make_shared<op::v0::Parameter>(element::f32, Shape{1024, 1}); | ||
auto const_node1 = op::v0::Constant::create(element::f32, Shape{1, 512}, {2}); | ||
auto fc = make_shared<FullyConnected>(const_node0, input, const_node1, true, false); | ||
model_ref = make_shared<Model>(fc, ParameterVector{input}); | ||
} | ||
|
||
auto res = compare_functions(model, model_ref); | ||
ASSERT_TRUE(res.first) << res.second; | ||
} | ||
|
||
TEST(fuse_matmul_add, parameter_variadic_split_matmul_add_constant) { | ||
shared_ptr<ov::Model> model, model_ref; | ||
{ | ||
auto input = make_shared<op::v0::Parameter>(element::f32, Shape{197, 128}); | ||
auto split = make_shared<op::v1::VariadicSplit>(input, | ||
op::v0::Constant::create(element::i32, {}, {0}), | ||
op::v0::Constant::create(element::i32, Shape{2}, {196, 1})); | ||
auto const_node0 = op::v0::Constant::create(element::f32, Shape{128, 128}, {1}); | ||
auto matmul = make_shared<op::v0::MatMul>(split->output(1), const_node0, false, true); | ||
auto const_node1 = op::v0::Constant::create(element::f32, Shape{1, 128}, {2}); | ||
auto add = make_shared<op::v1::Add>(matmul, const_node1); | ||
auto concat = make_shared<op::v0::Concat>(OutputVector{split->output(0), add}, 0); | ||
model = make_shared<Model>(concat, ParameterVector{input}); | ||
|
||
pass::Manager pass_manager; | ||
pass_manager.register_pass<pass::InitNodeInfo>(); | ||
pass_manager.register_pass<nvidia_gpu::pass::FullyConnectedTransformation>(); | ||
pass_manager.run_passes(model); | ||
|
||
ASSERT_EQ(count_ops_of_type<op::v0::MatMul>(model), 0); | ||
} | ||
{ | ||
auto input = make_shared<op::v0::Parameter>(element::f32, Shape{197, 128}); | ||
auto split = make_shared<op::v1::VariadicSplit>(input, | ||
op::v0::Constant::create(element::i32, {}, {0}), | ||
op::v0::Constant::create(element::i32, Shape{2}, {196, 1})); | ||
auto const_node0 = op::v0::Constant::create(element::f32, Shape{128, 128}, {1}); | ||
auto const_node1 = op::v0::Constant::create(element::f32, Shape{1, 128}, {2}); | ||
auto fc = make_shared<FullyConnected>(split->output(1), const_node0, const_node1, false, true); | ||
auto concat = make_shared<op::v0::Concat>(OutputVector{split->output(0), fc}, 0); | ||
model_ref = make_shared<Model>(concat, ParameterVector{input}); | ||
} | ||
|
||
auto res = compare_functions(model, model_ref); | ||
ASSERT_TRUE(res.first) << res.second; | ||
} | ||
} // namespace testing |