-
Notifications
You must be signed in to change notification settings - Fork 2.2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[NPUW] Add Slice before last MatMul #27229
Changes from 3 commits
3b1d1c6
93afd14
28ee454
4601e42
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6,18 +6,7 @@ | |
|
||
#include "../../logging.hpp" | ||
#include "../../util.hpp" | ||
#include "openvino/op/add.hpp" | ||
#include "openvino/op/broadcast.hpp" | ||
#include "openvino/op/concat.hpp" | ||
#include "openvino/op/convert.hpp" | ||
#include "openvino/op/gather.hpp" | ||
#include "openvino/op/matmul.hpp" | ||
#include "openvino/op/multiply.hpp" | ||
#include "openvino/op/reduce_sum.hpp" | ||
#include "openvino/op/reshape.hpp" | ||
#include "openvino/op/slice.hpp" | ||
#include "openvino/op/split.hpp" | ||
#include "openvino/op/subtract.hpp" | ||
#include "openvino/op/ops.hpp" | ||
#include "openvino/op/util/op_types.hpp" | ||
#include "openvino/pass/pattern/op/label.hpp" // any_input | ||
#include "openvino/pass/pattern/op/optional.hpp" | ||
|
@@ -1296,6 +1285,151 @@ CompressDictMatMulf32::CompressDictMatMulf32(Context::Ref ctx) { | |
register_matcher(std::make_shared<opp::Matcher>(res, "OptCompressDictMatMulf32"), std::move(callback)); | ||
} | ||
|
||
SliceLastMatmul::SliceLastMatmul() { | ||
auto matmul = opp::wrap_type<ov::op::v0::MatMul>({opp::any_input(), opp::any_input()}); | ||
auto res = opp::wrap_type<ov::op::v0::Result>({matmul}); | ||
|
||
// Note: Use [=] to make sure the above objects stay alive in the callback | ||
auto callback = [=](ov::pass::pattern::Matcher& m) { | ||
auto& node_to_output = m.get_pattern_value_map(); | ||
|
||
auto matched_out_matmul = node_to_output.at(matmul); | ||
|
||
auto shape = matched_out_matmul.get_node()->input(0).get_shape(); | ||
|
||
if (shape.size() == 3 && shape[1] > 1) { | ||
auto start = std::make_shared<ov::op::v0::Constant>(ov::element::i32, | ||
ov::Shape{3}, | ||
std::vector<int32_t>{0, int32_t(shape[1] - 1), 0}); | ||
auto stop = | ||
std::make_shared<ov::op::v0::Constant>(ov::element::i32, | ||
ov::Shape{3}, | ||
std::vector<int32_t>{1, int32_t(shape[1]), int32_t(shape[2])}); | ||
auto step = | ||
std::make_shared<ov::op::v0::Constant>(ov::element::i32, ov::Shape{3}, std::vector<int32_t>{1, 1, 1}); | ||
|
||
auto slice = | ||
std::make_shared<ov::op::v8::Slice>(matched_out_matmul.get_node()->input_value(0), start, stop, step); | ||
|
||
matched_out_matmul.get_node()->input(0).replace_source_output(slice); | ||
|
||
return true; // root was changed | ||
} | ||
return false; // root hasn't changed | ||
}; | ||
register_matcher(std::make_shared<opp::Matcher>(res, "SliceLastMatmul"), std::move(callback)); | ||
} | ||
|
||
SliceLastMatmulAdd::SliceLastMatmulAdd() { | ||
auto matmul = opp::wrap_type<ov::op::v0::MatMul>({opp::any_input(), opp::any_input()}); | ||
auto add = opp::wrap_type<ov::op::v1::Add>({matmul, opp::any_input()}); | ||
auto res = opp::wrap_type<ov::op::v0::Result>({add}); | ||
Comment on lines
+1325
to
+1326
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not sure if I've seen cases like this. @TolyaTalamanov did you? @smirnov-alexey what pattern worked for you most of the time? I think this pattern we can drop There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. phi2: https://huggingface.co/OpenVINO/phi-2-fp16-ov/raw/main/openvino_model.xml has There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I only saw the first one, but as discussed let's keep all 3 for now (enabled via property) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I found the next patterns and applied them in genai repo: chatglm2-6b codegen2-1b gemma-2-2b There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks @olpipi, appreciate your help |
||
|
||
// Note: Use [=] to make sure the above objects stay alive in the callback | ||
auto callback = [=](ov::pass::pattern::Matcher& m) { | ||
auto& node_to_output = m.get_pattern_value_map(); | ||
|
||
auto matched_out_matmul = node_to_output.at(matmul); | ||
|
||
auto shape = matched_out_matmul.get_node()->input(0).get_shape(); | ||
|
||
if (shape.size() == 3 && shape[1] > 1) { | ||
auto start = std::make_shared<ov::op::v0::Constant>(ov::element::i32, | ||
ov::Shape{3}, | ||
std::vector<int32_t>{0, int32_t(shape[1] - 1), 0}); | ||
auto stop = | ||
std::make_shared<ov::op::v0::Constant>(ov::element::i32, | ||
ov::Shape{3}, | ||
std::vector<int32_t>{1, int32_t(shape[1]), int32_t(shape[2])}); | ||
auto step = | ||
std::make_shared<ov::op::v0::Constant>(ov::element::i32, ov::Shape{3}, std::vector<int32_t>{1, 1, 1}); | ||
|
||
auto slice = | ||
std::make_shared<ov::op::v8::Slice>(matched_out_matmul.get_node()->input_value(0), start, stop, step); | ||
|
||
matched_out_matmul.get_node()->input(0).replace_source_output(slice); | ||
|
||
return true; // root was changed | ||
} | ||
return false; // root hasn't changed | ||
}; | ||
register_matcher(std::make_shared<opp::Matcher>(res, "SliceLastMatmulAdd"), std::move(callback)); | ||
} | ||
|
||
SliceLastMatmulTranspose::SliceLastMatmulTranspose() { | ||
auto matmul = opp::wrap_type<ov::op::v0::MatMul>({opp::any_input(), opp::any_input()}); | ||
auto add = opp::wrap_type<ov::op::v1::Transpose>({matmul, opp::any_input()}); | ||
auto res = opp::wrap_type<ov::op::v0::Result>({matmul}); | ||
Comment on lines
+1359
to
+1362
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also not sure about this one. My main concern is that we can alter more matmuls than we actually need.. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As discussed, keeping for now |
||
|
||
// Note: Use [=] to make sure the above objects stay alive in the callback | ||
auto callback = [=](ov::pass::pattern::Matcher& m) { | ||
auto& node_to_output = m.get_pattern_value_map(); | ||
|
||
auto matched_out_matmul = node_to_output.at(matmul); | ||
|
||
auto shape = matched_out_matmul.get_node()->input(0).get_shape(); | ||
|
||
if (shape.size() == 3 && shape[1] > 1) { | ||
auto start = std::make_shared<ov::op::v0::Constant>(ov::element::i32, | ||
ov::Shape{3}, | ||
std::vector<int32_t>{0, int32_t(shape[1] - 1), 0}); | ||
auto stop = | ||
std::make_shared<ov::op::v0::Constant>(ov::element::i32, | ||
ov::Shape{3}, | ||
std::vector<int32_t>{1, int32_t(shape[1]), int32_t(shape[2])}); | ||
auto step = | ||
std::make_shared<ov::op::v0::Constant>(ov::element::i32, ov::Shape{3}, std::vector<int32_t>{1, 1, 1}); | ||
|
||
auto slice = | ||
std::make_shared<ov::op::v8::Slice>(matched_out_matmul.get_node()->input_value(0), start, stop, step); | ||
|
||
matched_out_matmul.get_node()->input(0).replace_source_output(slice); | ||
|
||
return true; // root was changed | ||
} | ||
return false; // root hasn't changed | ||
}; | ||
register_matcher(std::make_shared<opp::Matcher>(res, "SliceLastMatmulTranspose"), std::move(callback)); | ||
} | ||
|
||
SliceLastMatmulMultiply::SliceLastMatmulMultiply() { | ||
auto matmul = opp::wrap_type<ov::op::v0::MatMul>({opp::any_input(), opp::any_input()}); | ||
auto div = opp::wrap_type<ov::op::v1::Divide>({matmul, opp::any_input()}); | ||
auto tanh = opp::wrap_type<ov::op::v0::Tanh>({div}); | ||
auto multiply = opp::wrap_type<ov::op::v1::Multiply>({tanh, opp::any_input()}); | ||
auto res = opp::wrap_type<ov::op::v0::Result>({multiply}); | ||
|
||
// Note: Use [=] to make sure the above objects stay alive in the callback | ||
auto callback = [=](ov::pass::pattern::Matcher& m) { | ||
auto& node_to_output = m.get_pattern_value_map(); | ||
|
||
auto matched_out_matmul = node_to_output.at(matmul); | ||
|
||
auto shape = matched_out_matmul.get_node()->input(0).get_shape(); | ||
|
||
if (shape.size() == 3 && shape[1] > 1) { | ||
auto start = std::make_shared<ov::op::v0::Constant>(ov::element::i32, | ||
ov::Shape{3}, | ||
std::vector<int32_t>{0, int32_t(shape[1] - 1), 0}); | ||
auto stop = | ||
std::make_shared<ov::op::v0::Constant>(ov::element::i32, | ||
ov::Shape{3}, | ||
std::vector<int32_t>{1, int32_t(shape[1]), int32_t(shape[2])}); | ||
auto step = | ||
std::make_shared<ov::op::v0::Constant>(ov::element::i32, ov::Shape{3}, std::vector<int32_t>{1, 1, 1}); | ||
|
||
auto slice = | ||
std::make_shared<ov::op::v8::Slice>(matched_out_matmul.get_node()->input_value(0), start, stop, step); | ||
|
||
matched_out_matmul.get_node()->input(0).replace_source_output(slice); | ||
|
||
return true; // root was changed | ||
} | ||
return false; // root hasn't changed | ||
}; | ||
register_matcher(std::make_shared<opp::Matcher>(res, "SliceLastMatmulMultiply"), std::move(callback)); | ||
} | ||
Comment on lines
+1395
to
+1431
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same concern here. Not sure which topologies does it serve. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As discussed, keeping for now |
||
|
||
} // namespace opt | ||
} // namespace patterns | ||
} // namespace npuw | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Not sure if all those additional patterns could be simplified with
optional
nodes