From 18c852e7a49c658f5850dcc0cdc1c65538b07949 Mon Sep 17 00:00:00 2001 From: Andrey Babushkin Date: Wed, 2 Oct 2024 10:14:42 +0100 Subject: [PATCH] Revert "[NPUW] Add initial unpack and partitioning unit tests on NPUW" (#26884) Reverts openvinotoolkit/openvino#25780 Causes failures on master branch and in pull requests --- .../npuw/partitioning/online/snapshot.hpp | 16 +- src/plugins/intel_npu/tests/CMakeLists.txt | 1 - .../intel_npu/tests/unit/CMakeLists.txt | 49 -- .../tests/unit/npuw/online_partitioning.cpp | 692 ------------------ .../intel_npu/tests/unit/npuw/unpack.cpp | 100 --- .../intel_npu/tests/unit/npuw/unpack.hpp | 628 ---------------- 6 files changed, 8 insertions(+), 1478 deletions(-) delete mode 100644 src/plugins/intel_npu/tests/unit/CMakeLists.txt delete mode 100644 src/plugins/intel_npu/tests/unit/npuw/online_partitioning.cpp delete mode 100644 src/plugins/intel_npu/tests/unit/npuw/unpack.cpp delete mode 100644 src/plugins/intel_npu/tests/unit/npuw/unpack.hpp diff --git a/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/snapshot.hpp b/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/snapshot.hpp index e7e5121b1240e7..72a62781580cda 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/snapshot.hpp +++ b/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/snapshot.hpp @@ -16,6 +16,8 @@ namespace ov { namespace npuw { namespace online { +class Group; // forward declaration + namespace detail { // At partitioning level we exclude some "non-Ops" to not interfere with the passes. // We include some of them back to properly link everything at plugin level @@ -31,8 +33,6 @@ class Snapshot : public std::enable_shared_from_this { m_node_to_prod_cons(std::make_shared()), m_node_to_gr(std::make_shared()) {} - friend class Group; // forward declaration - // Simple passes void singleGroup(); @@ -49,27 +49,27 @@ class Snapshot : public std::enable_shared_from_this { void repeatedBlocks(); void earlyAvoids(); void earlyRegroup(); + void markInternalCompute(); + void resetExcludedRep(); // Utility std::shared_ptr getGraph() const; + size_t graphSize() const; + const detail::OVNodeSet& getNodeProducers(const detail::OVNodePtr& node) const; + const detail::OVNodeSet& getNodeConsumers(const detail::OVNodePtr& node) const; const detail::OVPortsMap& getPortsMap() const; const detail::OVNodeToGroupMapPtr& getNodeToGroupMap() const; const std::map>>& getMatches() const; + detail::GPtrSet getRepGroups(const std::shared_ptr& group) const; void repeat(detail::Pass&& pass); void setCtx(const PassContext& ctx); - size_t graphSize() const; private: - detail::GPtrSet getRepGroups(const std::shared_ptr& group) const; - const detail::OVNodeSet& getNodeProducers(const detail::OVNodePtr& node) const; - const detail::OVNodeSet& getNodeConsumers(const detail::OVNodePtr& node) const; void identifyUniques(); void mergeUniques(); void mergeTriangles(); void cleanUpUniques(); void afterUniques(); - void markInternalCompute(); - void resetExcludedRep(); bool cleanUpUniquesImpl(const detail::GPtrSet& gset); std::shared_ptr tryGrowRepeatingGroups(const detail::GPtrSet& repeating_groups); std::shared_ptr tryMergeTriangles(const detail::GPtrSet& repeating_groups); diff --git a/src/plugins/intel_npu/tests/CMakeLists.txt b/src/plugins/intel_npu/tests/CMakeLists.txt index 0f5bd7a6b093b2..4c41f008eb7f81 100644 --- a/src/plugins/intel_npu/tests/CMakeLists.txt +++ b/src/plugins/intel_npu/tests/CMakeLists.txt @@ -8,4 +8,3 @@ if (MSVC) ov_add_compiler_flags(/wd5105) endif() add_subdirectory(functional) -add_subdirectory(unit) diff --git a/src/plugins/intel_npu/tests/unit/CMakeLists.txt b/src/plugins/intel_npu/tests/unit/CMakeLists.txt deleted file mode 100644 index 5741a1e43c2a5b..00000000000000 --- a/src/plugins/intel_npu/tests/unit/CMakeLists.txt +++ /dev/null @@ -1,49 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 -# - -set(TARGET_NAME "ov_npu_unit_tests") - -set(MANDATORY_UNIT_TESTS_LIBS - "openvino::commonTestUtils" - "openvino::gmock" - "openvino::gtest" - "openvino::gtest_main" - "openvino::runtime" - "openvino::npu_al" - "openvino::npu_logger_utils" -) - -ov_add_test_target( - NAME ${TARGET_NAME} - ROOT ${CMAKE_CURRENT_SOURCE_DIR} - ADDITIONAL_SOURCE_DIRS - ${OpenVINO_SOURCE_DIR}/src/plugins/intel_npu/src/plugin/npuw/ - DEPENDENCIES - openvino::runtime - INCLUDES - ${CMAKE_CURRENT_SOURCE_DIR} - ${CMAKE_CURRENT_SOURCE_DIR}/npuw - ${OpenVINO_SOURCE_DIR}/src/plugins/intel_npu/src/plugin/npuw - ${OpenVINO_SOURCE_DIR}/src/plugins/intel_npu/src/utils/include - ${OpenVINO_SOURCE_DIR}/src/plugins/intel_npu/src/plugin/include - ${OpenVINO_SOURCE_DIR}/src/plugins/intel_npu/src/al/include - LINK_LIBRARIES - ${MANDATORY_UNIT_TESTS_LIBS} - LABELS - NPUW -) - -if(CMAKE_CXX_COMPILER_ID MATCHES "Clang" OR CMAKE_CXX_COMPILER_ID MATCHES "GNU") - target_compile_options(${TARGET_NAME} PRIVATE -mavx2 -mf16c) -elseif(CMAKE_CXX_COMPILER_ID MATCHES "MSVC") - target_compile_options(${TARGET_NAME} PRIVATE /arch:AVX2) -else() - message(AUTHOR_WARNING "Unknown compiler, may miss the AVX2 baseline setting") -endif() - -install(TARGETS ${TARGET_NAME} - RUNTIME DESTINATION tests - COMPONENT tests - EXCLUDE_FROM_ALL -) diff --git a/src/plugins/intel_npu/tests/unit/npuw/online_partitioning.cpp b/src/plugins/intel_npu/tests/unit/npuw/online_partitioning.cpp deleted file mode 100644 index af1fc5de8e92c7..00000000000000 --- a/src/plugins/intel_npu/tests/unit/npuw/online_partitioning.cpp +++ /dev/null @@ -1,692 +0,0 @@ -// Copyright (C) 2024 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include -#include - -#include "partitioning/online/compiler.hpp" -#include "partitioning/online/snapshot.hpp" -#include "partitioning/online/group.hpp" - -#include "intel_npu/al/config/config.hpp" -#include "intel_npu/al/config/npuw.hpp" - -#include "openvino/openvino.hpp" -#include "openvino/op/ops.hpp" -#include "openvino/op/util/op_types.hpp" - -bool isEqualEns(ov::npuw::Ensemble& ens1, ov::npuw::Ensemble& ens2); -bool isEqualEns(ov::npuw::Ensemble& ens1, ov::npuw::Ensemble& ens2) { - if (ens1.groups.size() != ens2.groups.size()) { - return false; - } - - for (auto& g : ens1.groups) { - std::sort(g.input_layers.begin(), g.input_layers.end()); - std::sort(g.output_layers.begin(), g.output_layers.end()); - std::sort(g.all_layers.begin(), g.all_layers.end()); - } - - for (auto& g : ens2.groups) { - std::sort(g.input_layers.begin(), g.input_layers.end()); - std::sort(g.output_layers.begin(), g.output_layers.end()); - std::sort(g.all_layers.begin(), g.all_layers.end()); - } - - std::sort(ens1.groups.begin(), ens1.groups.end(), [](const ov::npuw::Group& g1, - const ov::npuw::Group& g2){ - return g1.all_layers.front() < g2.all_layers.front(); - }); - - std::sort(ens2.groups.begin(), ens2.groups.end(), [](const ov::npuw::Group& g1, - const ov::npuw::Group& g2){ - return g1.all_layers.front() < g2.all_layers.front(); - }); - - for (size_t i = 0; i < ens1.groups.size(); ++i) { - const auto& g1 = ens1.groups.at(i); - const auto& g2 = ens2.groups.at(i); - - if (g1.avoid_list != g2.avoid_list || - g1.input_layers != g2.input_layers || - g1.output_layers != g2.output_layers || - g1.all_layers != g2.all_layers) { - return false; - } - - // Can't compare them directly since they are random, but dont't affect the structure - if ((g1.repeated_id.empty() && !g2.repeated_id.empty()) || - (!g1.repeated_id.empty() && g2.repeated_id.empty())) { - return false; - } - } - - if (ens1.repeated.size() != ens2.repeated.size()) { - return false; - } - - auto get_sorted_rep = [](const std::map& rep) { - std::vector>> sorted_rep; - - std::transform(rep.begin(), rep.end(), std::back_inserter(sorted_rep), [](const auto& v) { - return v.second.matches; - }); - - for (auto& g : sorted_rep) { - std::sort(g.begin(), g.end(), - [](const auto& a, const auto& b) {return *a.begin() < *b.begin();}); - } - - std::sort(sorted_rep.begin(), sorted_rep.end(), - [](const auto& a, const auto& b) {return *a.front().begin() < *b.front().begin();}); - - return sorted_rep; - }; - - - if (get_sorted_rep(ens1.repeated) != get_sorted_rep(ens2.repeated)) { - return false; - } - - return true; -} - -class ModelGenerator { -public: - ModelGenerator() = default; - - std::shared_ptr get_model_without_repeated_blocks() { - std::shared_ptr input = std::make_shared(ov::element::i32, ov::Shape{1, 1, 40}); - m_nodes.push_back(input); - set_name(input); - - std::shared_ptr res = get_block(input); - - auto result = std::make_shared(res); - m_nodes.push_back(result); - set_name(result); - - ov::ParameterVector params = {input}; - ov::ResultVector results = {result}; - - return std::make_shared(results, params); - } - - std::shared_ptr get_model_with_repeated_blocks() { - // Generate head - std::shared_ptr input = std::make_shared(ov::element::i32, ov::Shape{1, 1, 40}); - m_nodes.push_back(input); - set_name(input); - - std::vector> head(7, nullptr); - head[0] = std::make_shared(input, input); - head[1] = std::make_shared(ov::element::i32, ov::Shape{1}, std::vector{2}); - head[2] = std::make_shared(head[0], head[1], true); - head[3] = std::make_shared(ov::element::i64, ov::Shape{4}, std::vector{1, 1, 4, 10}); - head[4] = std::make_shared(ov::element::i64, ov::Shape{3}, std::vector{1, 1, 40}); - head[5] = std::make_shared(head[2], head[3], false); - head[6] = std::make_shared(head[5], head[4], false); - - for (const auto& h : head) { - m_nodes.push_back(h); - set_name(h); - } - - // Generate repeated blocks - std::shared_ptr output = get_block(head[6]); - std::vector> outputs; - outputs.push_back(output); - - for (size_t i = 0; i < 9; ++i) { - output = get_block(output); - outputs.push_back(output); - } - - // Generate tail - std::vector> tail(6, nullptr); - tail[0] = std::make_shared(outputs, -1); - tail[1] = std::make_shared(ov::element::i32, ov::Shape{3}, std::vector{1, 20, 20}); - tail[2] = std::make_shared(tail[0], tail[1], false); - tail[3] = std::make_shared(ov::element::i32, ov::Shape{1, 1, 1}); - tail[4] = std::make_shared(tail[2], tail[3]); - tail[5] = std::make_shared(tail[4], tail[4]); - - for (const auto& t : tail) { - m_nodes.push_back(t); - set_name(t); - } - - // Create model - auto result = std::make_shared(tail[5]); - m_nodes.push_back(result); - set_name(result); - - ov::ParameterVector params = {input}; - ov::ResultVector results = {result}; - - return std::make_shared(results, params); - } - - std::shared_ptr get_block(const std::shared_ptr& input) { - // Parameters - // input - - // Constants - std::vector> model_c(18, nullptr); - model_c[0] = std::make_shared(ov::element::i32, ov::Shape{4}, std::vector{0, 2, 1, 3}); - model_c[1] = std::make_shared(ov::element::i64, ov::Shape{1}, std::vector{1}); - model_c[2] = std::make_shared(ov::element::i64, ov::Shape{1}, std::vector{0}); - model_c[3] = std::make_shared(ov::element::i64, ov::Shape{1}, std::vector{2}); - model_c[4] = std::make_shared(ov::element::i64, ov::Shape{1}, std::vector{0}); - model_c[5] = std::make_shared(ov::element::i64, ov::Shape{4}, std::vector{1, 1, 1, 1}); - model_c[6] = std::make_shared(ov::element::i64, ov::Shape{1}, std::vector{1}); - model_c[7] = std::make_shared(ov::element::i64, ov::Shape{1}, std::vector{0}); - model_c[8] = std::make_shared(ov::element::i64, ov::Shape{4}, std::vector{1, 1, 1, 1}); - model_c[9] = std::make_shared(ov::element::i32, ov::Shape{4}, std::vector{1, 1, 1, 2}); - model_c[10] = std::make_shared(ov::element::i32, ov::Shape{4}, std::vector{1, 1, 1, 1}); - model_c[11] = std::make_shared(ov::element::i32, ov::Shape{4}, std::vector{1, 1, 1, 2}); - model_c[12] = std::make_shared(ov::element::i32, ov::Shape{1, 1, 1, 1}); - model_c[13] = std::make_shared(ov::element::i32, ov::Shape{1, 1, 1, 1}); - model_c[14] = std::make_shared(ov::element::i32, ov::Shape{1, 1, 1, 1}); - model_c[15] = std::make_shared(ov::element::f32, ov::Shape{40, 40}); - model_c[16] = std::make_shared(ov::element::i64, ov::Shape{4}, std::vector{1, 1, 4, 10}); - model_c[17] = std::make_shared(ov::element::i32, ov::Shape{3}, std::vector{1, 1, 40}); - - for (const auto& c : model_c) { - m_nodes.push_back(c); - set_name(c); - } - - // Converts - std::vector> convert(3, nullptr); - convert[0] = std::make_shared(model_c[15], ov::element::f16); - convert[1] = std::make_shared(convert[0], ov::element::i32); - convert[2] = std::make_shared(model_c[12], ov::element::i32); - - for (const auto& c : convert) { - m_nodes.push_back(c); - set_name(c); - } - - // Ops - std::vector> op(16, nullptr); - op[0] = std::make_shared(input, convert[1], false, true); - op[1] = std::make_shared(op[0], model_c[16], false); - op[2] = std::make_shared(op[1], model_c[0]); - op[3] = std::make_shared(op[2]); - op[4] = std::make_shared(op[3], model_c[1], model_c[2]); - op[5] = std::make_shared(op[4], model_c[3], true); - op[6] = std::make_shared(op[5]); - op[7] = std::make_shared(model_c[5], model_c[6], op[6], model_c[7]); - op[8] = std::make_shared(op[2], - model_c[8], - op[7], - model_c[9], - std::vector{1, 1, 1, 1}, - std::vector{1, 1, 1, 1}); - op[9] = std::make_shared(op[2], - op[7], - model_c[10], - model_c[11], - std::vector{1, 1, 1, 1}, - std::vector{1, 1, 1, 1}); - op[10] = std::make_shared(op[9], convert[2]); - op[11] = std::make_shared(std::vector>{op[10], op[8]}, -1); - op[12] = std::make_shared(model_c[13], op[11]); - op[13] = std::make_shared(model_c[14], op[2]); - op[14] = std::make_shared(op[13], op[12]); - op[15] = std::make_shared(op[14], model_c[17], false); - - for (const auto& o : op) { - m_nodes.push_back(o); - set_name(o); - } - - return op[15]; - } - -private: - void set_name(const std::shared_ptr& node) { - node->set_friendly_name("node_" + std::to_string(m_name_idx++)); - } - - std::vector> m_nodes; - size_t m_name_idx; -}; - -TEST(OnlinePartitioningTest, Partitioning_IsTheSame_SmallModel) { - ModelGenerator mg; - auto model = mg.get_model_without_repeated_blocks(); - - auto opt_desc = std::make_shared<::intel_npu::OptionsDesc>(); - auto cfg = ::intel_npu::Config(opt_desc); - ::intel_npu::registerNPUWOptions(*opt_desc); - std::map cfg_map = {{ "NPUW_ONLINE_KEEP_BLOCK_SIZE", "9" }}; - cfg.update(cfg_map); - - auto ens = ov::npuw::online::buildPartitioning(model, cfg); - - for (size_t i = 0; i < 100; ++i) { - auto ens_again = ov::npuw::online::buildPartitioning(model, cfg); - EXPECT_TRUE(isEqualEns(ens, ens_again)); - } -} - -TEST(OnlinePartitioningTest, Partitioning_IsTheSame_RepeatedModel) { - ModelGenerator mg; - auto model = mg.get_model_with_repeated_blocks(); - - auto opt_desc = std::make_shared<::intel_npu::OptionsDesc>(); - auto cfg = ::intel_npu::Config(opt_desc); - ::intel_npu::registerNPUWOptions(*opt_desc); - std::map cfg_map = {{ "NPUW_ONLINE_KEEP_BLOCK_SIZE", "9" }}; - cfg.update(cfg_map); - - auto ens = ov::npuw::online::buildPartitioning(model, cfg); - - for (size_t i = 0; i < 100; ++i) { - auto ens_again = ov::npuw::online::buildPartitioning(model, cfg); - EXPECT_TRUE(isEqualEns(ens, ens_again)); - } -} - -TEST(OnlinePartitioningTest, Partitioning_SingleGroup_SmallModel) { - ModelGenerator mg; - auto model = mg.get_model_without_repeated_blocks(); - - auto snap = std::make_shared(model); - snap->singleGroup(); - EXPECT_EQ(snap->graphSize(), 1); -} - -TEST(OnlinePartitioningTest, Partitioning_SingleGroup_RepeatedModel) { - ModelGenerator mg; - auto model = mg.get_model_with_repeated_blocks(); - - auto snap = std::make_shared(model); - snap->singleGroup(); - EXPECT_EQ(snap->graphSize(), 1); -} - -TEST(OnlinePartitioningTest, Partitioning_buildGraph_SmallModel) { - ModelGenerator mg; - auto model = mg.get_model_without_repeated_blocks(); - - auto snap = std::make_shared(model); - snap->buildGraph(); - auto g = snap->getGraph(); - for (const auto& nh : g->sorted()) { - ov::npuw::online::Group::GPtr group = g->meta(nh).get(); - EXPECT_EQ(group->size(), 1); - } - EXPECT_EQ(snap->getNodeToGroupMap()->size(), snap->graphSize()); -} - -TEST(OnlinePartitioningTest, Partitioning_buildGraph_RepeatedModel) { - ModelGenerator mg; - auto model = mg.get_model_with_repeated_blocks(); - - auto snap = std::make_shared(model); - snap->buildGraph(); - auto g = snap->getGraph(); - for (const auto& nh : g->sorted()) { - ov::npuw::online::Group::GPtr group = g->meta(nh).get(); - EXPECT_EQ(group->size(), 1); - } - EXPECT_EQ(snap->getNodeToGroupMap()->size(), snap->graphSize()); -} - -TEST(OnlinePartitioningTest, Partitioning_earlyAvoids_SmallModel) { - ModelGenerator mg; - auto model = mg.get_model_without_repeated_blocks(); - - auto snap = std::make_shared(model); - ov::npuw::online::PassContext ctx; - ctx.avoids = {{ov::npuw::online::PatternType::OP, "Gather", "mydevice"}, {ov::npuw::online::PatternType::OP, "MatMul", "mydevice"}}; - snap->setCtx(ctx); - snap->buildGraph(); - snap->earlyAvoids(); - auto g = snap->getGraph(); - size_t count = 0; - for (const auto& nh : g->sorted()) { - ov::npuw::online::Group::GPtr group = g->meta(nh).get(); - EXPECT_EQ(group->size(), 1); - if (group->avoidedTargets().size() == 1 && *(group->avoidedTargets().begin()) == "mydevice") { - ++count; - } - } - EXPECT_EQ(count, 2); -} - -TEST(OnlinePartitioningTest, Partitioning_earlyAvoids_RepeatedModel) { - ModelGenerator mg; - auto model = mg.get_model_with_repeated_blocks(); - - auto snap = std::make_shared(model); - ov::npuw::online::PassContext ctx; - ctx.avoids = {{ov::npuw::online::PatternType::OP, "Gather", "mydevice"}, {ov::npuw::online::PatternType::OP, "MatMul", "mydevice"}}; - snap->setCtx(ctx); - snap->buildGraph(); - snap->earlyAvoids(); - auto g = snap->getGraph(); - size_t count = 0; - for (const auto& nh : g->sorted()) { - ov::npuw::online::Group::GPtr group = g->meta(nh).get(); - EXPECT_EQ(group->size(), 1); - if (group->avoidedTargets().size() == 1 && *(group->avoidedTargets().begin()) == "mydevice") { - ++count; - } - } - EXPECT_EQ(count, 20); -} - -TEST(OnlinePartitioningTest, Partitioning_collectLHF_SmallModel) { - ModelGenerator mg; - auto model = mg.get_model_without_repeated_blocks(); - - auto snap = std::make_shared(model); - snap->buildGraph(); - - std::vector sizes = {10, 10}; - size_t iter = 0; - - snap->repeat([&]{ - snap->collectLHF(); - EXPECT_LT(iter, sizes.size()); - EXPECT_EQ(snap->graphSize(), sizes[iter++]); - }); -} - -TEST(OnlinePartitioningTest, Partitioning_collectLHF_RepeatedModel) { - ModelGenerator mg; - auto model = mg.get_model_with_repeated_blocks(); - - auto snap = std::make_shared(model); - snap->buildGraph(); - - std::vector sizes = {82, 82}; - size_t iter = 0; - - snap->repeat([&]{ - snap->collectLHF(); - EXPECT_LT(iter, sizes.size()); - EXPECT_EQ(snap->graphSize(), sizes[iter++]); - }); -} - -TEST(OnlinePartitioningTest, Partitioning_fuseRemnants_SmallModel) { - ModelGenerator mg; - auto model = mg.get_model_without_repeated_blocks(); - - auto snap = std::make_shared(model); - snap->buildGraph(); - - std::vector sizes = {10, 10}; - size_t iter = 0; - - snap->repeat([&]{ - snap->fuseRemnants(); - EXPECT_LT(iter, sizes.size()); - EXPECT_EQ(snap->graphSize(), sizes[iter++]); - }); -} - -TEST(OnlinePartitioningTest, Partitioning_fuseRemnants_RepeatedModel) { - ModelGenerator mg; - auto model = mg.get_model_with_repeated_blocks(); - - auto snap = std::make_shared(model); - snap->buildGraph(); - - std::vector sizes = {75, 38, 19, 10}; - size_t iter = 0; - - snap->repeat([&]{ - snap->fuseRemnants(); - EXPECT_LT(iter, sizes.size()); - EXPECT_EQ(snap->graphSize(), sizes[iter++]); - }); -} - -TEST(OnlinePartitioningTest, Partitioning_fuseRemnantsExtended_SmallModel) { - ModelGenerator mg; - auto model = mg.get_model_without_repeated_blocks(); - - auto snap = std::make_shared(model); - snap->buildGraph(); - - std::vector sizes = {10, 10}; - size_t iter = 0; - - snap->repeat([&]{ - snap->fuseRemnantsExtended(); - EXPECT_LT(iter, sizes.size()); - EXPECT_EQ(snap->graphSize(), sizes[iter++]); - }); -} - -TEST(OnlinePartitioningTest, Partitioning_fuseRemnantsExtended_RepeatedModel) { - ModelGenerator mg; - auto model = mg.get_model_with_repeated_blocks(); - - auto snap = std::make_shared(model); - snap->buildGraph(); - - std::vector sizes = {10, 10}; - size_t iter = 0; - - snap->repeat([&]{ - snap->fuseRemnantsExtended(); - EXPECT_LT(iter, sizes.size()); - EXPECT_EQ(snap->graphSize(), sizes[iter++]); - }); -} - -TEST(OnlinePartitioningTest, Partitioning_fuseInputs_SmallModel) { - ModelGenerator mg; - auto model = mg.get_model_without_repeated_blocks(); - - auto snap = std::make_shared(model); - snap->buildGraph(); - - std::vector sizes = {15, 14, 14}; - size_t iter = 0; - - snap->repeat([&]{ - snap->fuseInputs(); - EXPECT_LT(iter, sizes.size()); - EXPECT_EQ(snap->graphSize(), sizes[iter++]); - }); -} - -TEST(OnlinePartitioningTest, Partitioning_fuseInputs_RepeatedModel) { - ModelGenerator mg; - auto model = mg.get_model_with_repeated_blocks(); - - auto snap = std::make_shared(model); - snap->buildGraph(); - - std::vector sizes = {148, 138, 138}; - size_t iter = 0; - - snap->repeat([&]{ - snap->fuseInputs(); - EXPECT_LT(iter, sizes.size()); - EXPECT_EQ(snap->graphSize(), sizes[iter++]); - }); -} - -TEST(OnlinePartitioningTest, Partitioning_Compiler_Just_SmallModel) { - ModelGenerator mg; - auto model = mg.get_model_without_repeated_blocks(); - - auto snap = std::make_shared(model); - snap->buildGraph(); - - std::vector sizes_lhf = {10, 10}; - size_t iter_lhf = 0; - - std::vector sizes_fr = {10, 10}; - size_t iter_fr = 0; - - snap->repeat([&] { - snap->collectLHF(); - EXPECT_LT(iter_lhf, sizes_lhf.size()); - EXPECT_EQ(snap->graphSize(), sizes_lhf[iter_lhf++]); - }); - snap->repeat([&] { - snap->fuseRemnants(); - EXPECT_LT(iter_fr, sizes_fr.size()); - EXPECT_EQ(snap->graphSize(), sizes_fr[iter_fr++]); - }); -} - -TEST(OnlinePartitioningTest, Partitioning_Compiler_Just_RepeatedModel) { - ModelGenerator mg; - auto model = mg.get_model_with_repeated_blocks(); - - auto snap = std::make_shared(model); - snap->buildGraph(); - - std::vector sizes_lhf = {82, 82}; - size_t iter_lhf = 0; - - std::vector sizes_fr = {41, 21, 11, 10, 10}; - size_t iter_fr = 0; - - snap->repeat([&] { - snap->collectLHF(); - EXPECT_LT(iter_lhf, sizes_lhf.size()); - EXPECT_EQ(snap->graphSize(), sizes_lhf[iter_lhf++]); - }); - snap->repeat([&] { - snap->fuseRemnants(); - EXPECT_LT(iter_fr, sizes_fr.size()); - EXPECT_EQ(snap->graphSize(), sizes_fr[iter_fr++]); - }); -} - -TEST(OnlinePartitioningTest, Partitioning_Compiler_RepeatedBlocks_SmallModel) { - ModelGenerator mg; - auto model = mg.get_model_without_repeated_blocks(); - - auto snap = std::make_shared(model); - snap->buildGraph(); - - - std::vector sizes_fr = {10, 10}; - size_t iter_fr = 0; - - snap->earlyAvoids(); - snap->earlyRegroup(); - snap->repeatedBlocks(); - EXPECT_EQ(snap->graphSize(), 17); - - auto matches = snap->getMatches(); - EXPECT_EQ(matches.size(), 0); - - snap->repeat([&] { - snap->fuseRemnantsExtended(); - EXPECT_LT(iter_fr, sizes_fr.size()); - EXPECT_EQ(snap->graphSize(), sizes_fr[iter_fr++]); - }); -} - -TEST(OnlinePartitioningTest, Partitioning_Compiler_RepeatedBlocks_RepeatedModel) { - ModelGenerator mg; - auto model = mg.get_model_with_repeated_blocks(); - - auto snap = std::make_shared(model); - snap->buildGraph(); - - - std::vector sizes_fr = {12, 12}; - size_t iter_fr = 0; - - snap->earlyAvoids(); - snap->earlyRegroup(); - snap->repeatedBlocks(); - EXPECT_EQ(snap->graphSize(), 18); - - auto matches = snap->getMatches(); - EXPECT_EQ(matches.size(), 1); - - for (const auto& m : matches) { - EXPECT_EQ(m.second.size(), 17); - for (const auto& layers : m.second) { - EXPECT_EQ(layers.size(), 10); - } - } - - snap->repeat([&] { - snap->fuseRemnantsExtended(); - EXPECT_LT(iter_fr, sizes_fr.size()); - EXPECT_EQ(snap->graphSize(), sizes_fr[iter_fr++]); - }); -} - -TEST(OnlinePartitioningTest, Partitioning_Compiler_Compute_SmallModel) { - ModelGenerator mg; - auto model = mg.get_model_without_repeated_blocks(); - - auto snap = std::make_shared(model); - - std::vector sizes_fr = {10, 10}; - size_t iter_fr = 0; - - ov::npuw::online::PassContext ctx; - ctx.isolates = {{ov::npuw::online::PatternType::OP, "Transpose", "test_compute"}, {ov::npuw::online::PatternType::OP, "ScatterUpdate", "test_compute"}}; - ctx.nofolds = {"test_compute"}; - snap->setCtx(ctx); - - snap->buildGraph(); - snap->earlyAvoids(); - snap->earlyRegroup(); - snap->repeatedBlocks(); - EXPECT_EQ(snap->graphSize(), 17); - - auto matches = snap->getMatches(); - EXPECT_EQ(matches.size(), 0); - - snap->repeat([&] { - snap->fuseRemnantsExtended(); - EXPECT_LT(iter_fr, sizes_fr.size()); - EXPECT_EQ(snap->graphSize(), sizes_fr[iter_fr++]); - }); -} - -TEST(OnlinePartitioningTest, Partitioning_Compiler_Compute_RepeatedModel) { - ModelGenerator mg; - auto model = mg.get_model_with_repeated_blocks(); - - auto snap = std::make_shared(model); - - std::vector sizes_fr = {10, 10}; - size_t iter_fr = 0; - - ov::npuw::online::PassContext ctx; - ctx.isolates = {{ov::npuw::online::PatternType::OP, "Gather", "test_compute"}, - {ov::npuw::online::PatternType::OP, "ScatterUpdate", "test_compute"}, - {ov::npuw::online::PatternType::OP, "ShapeOf", "test_compute"}, - {ov::npuw::online::PatternType::OP, "Divide", "test_compute"}, - {ov::npuw::online::PatternType::OP, "Floor", "test_compute"}}; - ctx.nofolds = {"test_compute"}; - snap->setCtx(ctx); - - snap->buildGraph(); - snap->earlyAvoids(); - snap->earlyRegroup(); - snap->repeatedBlocks(); - EXPECT_EQ(snap->graphSize(), 29); - - // FIXME: create a config in which there will be repeated blocks - auto matches = snap->getMatches(); - EXPECT_EQ(matches.size(), 0); - - snap->repeat([&] { - snap->fuseRemnantsExtended(); - EXPECT_LT(iter_fr, sizes_fr.size()); - EXPECT_EQ(snap->graphSize(), sizes_fr[iter_fr++]); - }); -} diff --git a/src/plugins/intel_npu/tests/unit/npuw/unpack.cpp b/src/plugins/intel_npu/tests/unit/npuw/unpack.cpp deleted file mode 100644 index 51285c8145ceb6..00000000000000 --- a/src/plugins/intel_npu/tests/unit/npuw/unpack.cpp +++ /dev/null @@ -1,100 +0,0 @@ -// Copyright (C) 2024 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include "unpack.hpp" - -namespace { - -const auto TestCases = ::testing::Combine( - ::testing::ValuesIn({ov::element::Type_t::i4}), - ::testing::ValuesIn({ov::element::Type_t::i8, ov::element::Type_t::f16}), - ::testing::ValuesIn({ov::element::Type_t::undefined}), // no used in this test - ::testing::ValuesIn({ov::element::Type_t::undefined}), // no used in this test - ::testing::ValuesIn({3lu, 0lu}), - ::details::ShapesIn({Tensors{input={1, 1, 1, 32};}, - Tensors{input={1,1,1, 128};}, - Tensors{input={1,1,1, 390};}, - Tensors{input={1,1,1, 82};}}), - ::testing::ValuesIn({true, false}), - ::testing::ValuesIn({true, false}) -); - -INSTANTIATE_TEST_SUITE_P(UnpackTests, UnpackTests, - TestCases, - UnpackTests::getTestCaseName); - -const auto TestCasesScale = ::testing::Combine( - ::testing::ValuesIn({ov::element::Type_t::i4}), // TODO: add i8 as input for test - ::testing::ValuesIn({ov::element::Type_t::f16, ov::element::Type_t::f32}), - ::testing::ValuesIn({ov::element::Type_t::f16, ov::element::Type_t::f32}), - ::testing::ValuesIn({ov::element::Type_t::undefined}), // no used in this test - ::testing::ValuesIn({3lu, 0lu}), - ::details::ShapesIn({Tensors{input={1,32, 128}; scale = {1, 32, 1};}, - Tensors{input={32, 128}; scale = {32, 1};}, - Tensors{input={64, 160}; scale = {64, 1};}, - Tensors{input={1024, 4}; scale = {64, 1};}, - Tensors{input={1, 1, 1024, 4}; scale = {1, 1, 64, 1};}}), - ::testing::ValuesIn({true, false}), - ::testing::ValuesIn({true, false}) -); - -INSTANTIATE_TEST_SUITE_P(UnpackWithScaleTests, UnpackWithScaleTests, - TestCasesScale, - UnpackWithScaleTests::getTestCaseName); - - -const auto TestCasesScaleAndZeroPoints = ::testing::Combine( - ::testing::ValuesIn({ov::element::Type_t::u4}), - ::testing::ValuesIn({ov::element::Type_t::f16}), - ::testing::ValuesIn({ov::element::Type_t::f16}), - ::testing::ValuesIn({ov::element::Type_t::u4}), - ::testing::ValuesIn({3lu, 0lu}), - ::details::ShapesIn({Tensors{input={1,32, 128}; scale = {1, 32, 1};}, - Tensors{input={1,64, 160}; scale = {1, 64, 1};}, - Tensors{input={1,1024, 4}; scale = {1, 64, 1};}, - Tensors{input={1,1, 1024, 4}; scale = {1, 1, 64, 1};}, - Tensors{input={64, 1}; scale = {64, 1};}}), - ::testing::ValuesIn({true, false}), - ::testing::ValuesIn({true, false}) -); - -INSTANTIATE_TEST_SUITE_P(UnpackTestsWithScaleAndZeroPoint, UnpackTestsWithScaleAndZeroPoint, - TestCasesScaleAndZeroPoints, - UnpackTestsWithScaleAndZeroPoint::getTestCaseName); - -const auto TestCasesScaleAndZeroPoints2 = ::testing::Combine( - ::testing::ValuesIn({ov::element::Type_t::u4}), - ::testing::ValuesIn({ov::element::Type_t::f16}), - ::testing::ValuesIn({ov::element::Type_t::f32}), - ::testing::ValuesIn({ov::element::Type_t::f32}), - ::testing::ValuesIn({3lu, 0lu}), - ::details::ShapesIn({Tensors{input={32, 32, 64}; scale = {32, 1, 64};}, - Tensors{input={64, 64, 128}; scale = {64, 1, 128};}, - Tensors{input={64, 32, 32}; scale = {64, 1, 32};}}), - ::testing::ValuesIn({true, false}), - ::testing::ValuesIn({true, false}) -); - -INSTANTIATE_TEST_SUITE_P(UnpackTestsWithScaleAndZeroPointTest2, UnpackTestsWithScaleAndZeroPointTest2, - TestCasesScaleAndZeroPoints2, - UnpackTestsWithScaleAndZeroPointTest2::getTestCaseName); - -const auto TestCasesScaleAndZeroPoints3 = ::testing::Combine( - ::testing::ValuesIn({ov::element::Type_t::u4}), - ::testing::ValuesIn({ov::element::Type_t::f16}), - ::testing::ValuesIn({ov::element::Type_t::f16}), - ::testing::ValuesIn({ov::element::Type_t::u4}), - ::testing::ValuesIn({3lu, 0lu}), - ::details::ShapesIn({Tensors{input={1, 32, 128}; scale = {1, 32, 1}; zerop = {1, 32, 1};}, - Tensors{input={16, 64, 64}; scale = {16, 64, 1}; zerop = {16, 64, 1};}, - Tensors{input={1, 1024, 4}; scale = {1, 64, 1}; zerop = {1, 32, 1};}}), - ::testing::ValuesIn({true, false}), - ::testing::ValuesIn({true, false}) -); - -INSTANTIATE_TEST_SUITE_P(UnpackTestsWithScaleAndZeroPointTest3, UnpackTestsWithScaleAndZeroPointTest3, - TestCasesScaleAndZeroPoints3, - UnpackTestsWithScaleAndZeroPointTest3::getTestCaseName); - -} // anonymous namespace diff --git a/src/plugins/intel_npu/tests/unit/npuw/unpack.hpp b/src/plugins/intel_npu/tests/unit/npuw/unpack.hpp deleted file mode 100644 index da5bb4e4720f3e..00000000000000 --- a/src/plugins/intel_npu/tests/unit/npuw/unpack.hpp +++ /dev/null @@ -1,628 +0,0 @@ -// Copyright (C) 2024 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include -#include -#include -#include -#include -#include -#include - -#include "openvino/runtime/make_tensor.hpp" - -#include "util.hpp" - -namespace { - -#define ASSERT_NO_THROW_WITH_MESSAGE(code) do{ \ - try {\ - code;\ - }catch (const std::exception &ex ) {\ - FAIL()<> 4) | ((x & (1 << 6)) >> 4) | ((x & (1 << 5)) >> 4) | ((x & (1 << 4)) >> 4); -} - -inline int8_t lo4(int8_t x) { - return (x & (1 << 3)) | (x & (1 << 2)) | (x & (1 << 1)) | (x & (1 << 0)); -} - -inline uint8_t hi4(uint8_t x) { - return x >> 4; -} - -inline uint8_t lo4(uint8_t x) { - return x & 0x0F; -} - -inline int8_t upc(int8_t h) { - return h | (-((h & (1 << 3)) >> 3) & (-8)); -} - -typedef unsigned short ushort; -typedef unsigned int uint; - -float half_to_float(const ushort x) { - - __m128i halfVector = _mm_cvtsi32_si128(x); - __m128 floatVector = _mm_cvtph_ps(halfVector); - return _mm_cvtss_f32(floatVector); -} - -ushort float_to_half(const float x) { - __m128 floatVector = _mm_set_ss(x); - __m128i halfVector = _mm_cvtps_ph(floatVector, _MM_FROUND_TO_NEAREST_INT); - return _mm_extract_epi16(halfVector, 0); -} - -inline uint16_t int2hfloat(int8_t x) -{ - float inputFl32 = static_cast(x); - float* inputFl32_ptr = &inputFl32; - unsigned int* fltInt32Ptr = reinterpret_cast(inputFl32_ptr); - unsigned int fltInt32 = *fltInt32Ptr; - unsigned short fltInt16; - - fltInt16 = (fltInt32 >> 31) << 5; - unsigned short tmp = (fltInt32 >> 23) & 0xff; - tmp = (tmp - 0x70) & ((unsigned int)((int)(0x70 - tmp) >> 4) >> 27); - fltInt16 = (fltInt16 | tmp) << 10; - fltInt16 |= (fltInt32 >> 13) & 0x3ff; - - return fltInt16; -} - - -void unpack(const int8_t* in, int8_t* out, int size) { - for (int i = 0; i < size / 2; i++) { - *(out++) = upc(lo4(*in)); - *(out++) = upc(hi4(*in)); - in++; - } -} - -void unpack_i4f16(const int8_t* in, int8_t* out, int size) { - uint16_t *hFloatOut = reinterpret_cast(out); - - for (int i = 0; i < size / 2; i++) { - *(hFloatOut++) = int2hfloat(upc(lo4(*in))); - *(hFloatOut++) = int2hfloat(upc(hi4(*in))); - in++; - } -} - -/*u4 order*/ -void unpack_u4f32(const int8_t* in, float* out, int size) { - for (int i = 0; i < size / 2; i++) { - *(out++) = static_cast(lo4(*in)); - *(out++) = static_cast(hi4(*in)); - in++; - } -} - -template -::testing::AssertionResult fp16ArraysMatch(const T &actual, - const T &expected, - const T &i4Input, - bool int4 = 1 /*i4 or u4*/){ - for (size_t i = 0; i < expected.size() / 2; ++i) { - - int int8Input[] ={ - details::lo4(i4Input[i / 2]), - details::hi4(i4Input[i / 2]) - }; - - if (int4) { - int8Input[0] = details::upc(int8Input[1]); - int8Input[1] = details::upc(int8Input[0]); - }; - - auto fp16ref = int{*((uint16_t*)expected.data() + i)}; - auto fp16out = int{*((uint16_t*)actual.data() + i)}; - -#define _P(x) std::dec << std::setw(5) << (x) << '(' << std::setw(4) << std::hex << (x) << ')' - if (fp16ref != fp16out) { - return ::testing::AssertionFailure() << std::dec << std::setw(4) << i << ", i4:" - << std::setw(2) << int8Input[i % 2] - << " | ref " << _P(fp16ref) - << ", test " << _P(fp16out) << "\n"; - } -#undef _P - - } - - return ::testing::AssertionSuccess(); -} - -} // namespace details - -using ShapesInitializer = std::function&, std::vector&, std::vector&)>; - - -using UnpackTestsParams = std::tuple< - ov::element::Type_t, // fromPrecision - ov::element::Type_t, // toPrecision - ov::element::Type_t, // scalePrecision - ov::element::Type_t, // zeroPointPrecision - unsigned long, // nPartitions - ShapesInitializer, // input_shape , scale_shape, zerop initializer - bool, // use parallel_for - bool // strict partitioning - >; - -class UnpackTestsBase { -protected: - ov::element::Type fromType; - ov::element::Type toType; - ov::element::Type scaleType; - ov::element::Type zeropType; - std::shared_ptr from, to, scale, zerop; - - std::vector input; - std::vector output; - std::vector ref_output; - std::vector scalesStorage; - std::vector zeropStorage; - float zeropValue; - ov::Shape input_shape; - ov::Shape scale_shape; - ov::Shape zerop_shape; - - size_t nPartitions; - bool useParallelFor = false; - bool strictPartitions = false; - - void make_zeropoints() { - if (zeropType == ov::element::undefined) { - return; - } - - const std::vector zeropValues = {15.0f, 12.0f, 0.0f, 31.0f}; - const size_t nElements = shape_size(zerop_shape); - - // Set zeropValue if there's only one element - if (nElements == 1) { - zeropValue = zeropValues.front(); - } - - // Determine the size of the storage based on the type and resize the storage vector - if (zeropType == ov::element::Type_t::u4) { - zeropStorage.resize((nElements + 1) / 2, 0); // Each u4 zeropoint is 4 bits, so two zeropoints fit in one byte - } else if (zeropType == ov::element::Type_t::f32) { - zeropStorage.resize(nElements * sizeof(float), 0); - } else { - ASSERT_TRUE(zeropType == ov::element::u4 || zeropType == ov::element::f32); - } - - // Fill the storage with the appropriate values - if (zeropType == ov::element::Type_t::u4) { - for (size_t i = 0; i < nElements; ++i) { - uint8_t zeropValueU4 = static_cast(zeropValues[i % zeropValues.size()]) & 0x0F; - size_t byteIndex = i / 2; - if (i % 2 == 0) { - zeropStorage[byteIndex] = zeropValueU4; - } else { - zeropStorage[byteIndex] = (zeropValueU4 << 4); - } - } - } else if (zeropType == ov::element::Type_t::f32) { - float* ptrWork = reinterpret_cast(zeropStorage.data()); - for (size_t i = 0; i < nElements; ++i) { - ptrWork[i] = zeropValues[i % zeropValues.size()]; - } - } - - // Create the tensor - zerop = ov::make_tensor(zeropType, zerop_shape, zeropStorage.data()); - } - - void make_scales() { - if (scaleType == ov::element::undefined) { - return; - } - ASSERT_TRUE(scaleType == ov::element::f16 || scaleType == ov::element::f32); - size_t nElements = shape_size(scale_shape); - - // creating custom scale factors - const size_t nScaleBytes = scaleType.bitwidth() * nElements / 8; - - std::vector sc(nElements); - float coeffTable[] = { - 0.1f, - 0.5f, - 1.f, - 2.f - }; - for (size_t i = 0; i != nElements; i++) { - sc[i] = coeffTable[i % (sizeof (coeffTable) / sizeof(*coeffTable))]; - } - scalesStorage.resize(nScaleBytes); - - if (scaleType == ov::element::f16) { - uint16_t * ptrWork = reinterpret_cast(scalesStorage.data()); - for (size_t i = 0; i != nElements; i++) { - ptrWork[i] = details::float_to_half(sc[i]); - } - } - if (scaleType == ov::element::f32) { - float* ptrWork = reinterpret_cast(scalesStorage.data()); - for (size_t i = 0; i != nElements; i++) { - ptrWork[i] = sc[i]; - } - } - scale = ov::make_tensor(scaleType, scale_shape, scalesStorage.data()); - } - - void make_input() { - - size_t nElements = shape_size(input_shape); - - ASSERT_EQ((fromType.bitwidth() * nElements) % 8, 0) << "Input len has to be byte boundary aligned, but was " - << fromType.bitwidth() * nElements << " bits"; - ASSERT_EQ((toType.bitwidth() * nElements) % 8, 0) << "Output len has to be byte boundary aligned"; - - const size_t nInputBytes = fromType.bitwidth() * nElements / 8; - const size_t nOutputBytes = toType.bitwidth() * nElements / 8; - - input.resize(nInputBytes); - ref_output.resize(nOutputBytes); - output.resize(nOutputBytes); - std::fill(ref_output.begin(), ref_output.end(), 0); - std::fill(output.begin(), output.end(), 0); - - std::array input_local = { - 0x0A, 0x0B, 0x1C, 0x1D, 0x2E, 0x2F, 0x35, 0x36, - 0x4A, 0x4B, 0x5A, 0x5B, 0x6A, 0x6B, 0x7A, 0x7B, - 0x0C, 0x0D, 0x1C, 0x1D, 0x2C, 0x2D, 0x3C, 0x3D, - 0x4C, 0x4D, 0x5C, 0x5D, 0x6C, 0x6D, 0x7C, 0x7D, - }; - - for (size_t idx = 0, k = 0; k < nInputBytes; k++, idx = (idx + 1) % input_local.size()) { - input[k] = input_local[idx]; - } - - from = ov::make_tensor(fromType, input_shape, input.data()); - to = ov::make_tensor(toType, input_shape, output.data()); - } -public: - void SetUp(const UnpackTestsParams & getParam) { - ShapesInitializer shapeInit; - - std::tie(fromType, toType, scaleType, zeropType, nPartitions, shapeInit, useParallelFor, strictPartitions) = getParam; - - std::vector input, scale, zerop; - shapeInit(input, scale, zerop); - - input_shape = ov::Shape{input.begin(), input.end()}; - scale_shape = ov::Shape{scale.begin(), scale.end()}; - if (zerop.empty()) { - zerop_shape = ov::Shape({1}); - } else { - zerop_shape = ov::Shape{zerop.begin(), zerop.end()}; - } - - make_input(); - make_scales(); - make_zeropoints(); - - make_ref_output(); - } - std::string ToString() const { - std::ostringstream result; - result << (isNegative() ? "NEGATIVE_" : "") - <<"["; - - for (size_t i = 0; i != input_shape.size(); i++) { - result << input_shape[i] << ((i + 1 == input_shape.size()) ? "" : "x"); - } - result <<"]" - << "_p" << nPartitions - << (strictPartitions ? "_SP" : "") - << (useParallelFor ? "_parallel" : "_serial") - << "_from_" << fromType - << "_to_" << toType; - if (scaleType != ov::element::Type_t::undefined) - result << "_scale_" << scaleType; - if (zeropType != ov::element::Type_t::undefined) - result << "_zerop_" << zeropType; - - return result.str(); - } - - /** - * Negative test cases has to be carefully reviewed, to still remain positive runs at some points - * @return - */ - virtual bool isNegative() const { - return false; - } - - virtual void make_ref_output() { - size_t nElements = 1; - for (size_t dim : input_shape) { - nElements *= dim; - } - if (toType == ov::element::i8) { - details::unpack(input.data(), ref_output.data(), static_cast(nElements)); - } else if (toType == ov::element::f16) { - details::unpack_i4f16(input.data(), ref_output.data(), static_cast(nElements)); - } - } -}; - -template -class UnpackTestsTmpl : - public ::testing::Test, - public T, - public ::testing::WithParamInterface { -protected: - - void SetUp() override { - T::SetUp(GetParam()); - } -public: - static std::string getTestCaseName(const testing::TestParamInfo& obj) { - T _bt; - _bt.SetUp(obj.param); - return _bt.ToString(); - } -}; - -using UnpackTests = UnpackTestsTmpl; -class UnpackTestsRef : public UnpackTests {}; - -TEST_P(UnpackTests, i4) { - ASSERT_NO_THROW_WITH_MESSAGE(ov::npuw::util::unpack(from, to, ov::npuw::util::UnpackOptions{useParallelFor, nPartitions, strictPartitions})); - ASSERT_TRUE(details::fp16ArraysMatch(output, ref_output, input)); -} - -class UnpackWithScaleTestsBase : public UnpackTestsBase { -protected: - bool isNegative() const override { - if (scale_shape.size() != 3 && scale_shape.size() != 2) return true; - if (input_shape.back() % 64) return true; - if ((from->get_size() / scale->get_size()) % 64) return true; - if (toType != ov::element::f16) return true; - - return false; - } - - void make_ref_output() override { - if (isNegative()) return; - - size_t nElements = from->get_size(); - - const size_t nOutputElementsPerScale = ref_output.size() / (toType.bitwidth() / 8) / scale->get_size(); - - details::unpack_i4f16(input.data(), ref_output.data(), static_cast(nElements)); - - // lets apply per channel scale - uint16_t * pRef = reinterpret_cast(ref_output.data()); - uint16_t * pScale_f16 = reinterpret_cast(scale->data()); - float * pScale_f32 = reinterpret_cast(scale->data()); - - for (size_t i = 0; i < scale->get_size(); i++) { - for (size_t sc = 0; sc != nOutputElementsPerScale; sc++) { - float ref_scaled = details::half_to_float(pRef[0]); - if (scaleType == ov::element::f32) { - ref_scaled *= pScale_f32[0]; - } else if (scaleType == ov::element::f16) { - ref_scaled *= details::half_to_float(pScale_f16[0]); - } - *pRef = details::float_to_half(ref_scaled); - pRef++; - } - pScale_f32++; - pScale_f16++; - } - } - -}; - -using UnpackWithScaleTests = UnpackTestsTmpl; - - -TEST_P(UnpackWithScaleTests, i4_scale) { - ASSERT_NO_THROW_IF(!isNegative(), - ov::npuw::util::unpack(from, scale, to, ov::npuw::util::UnpackOptions{useParallelFor, nPartitions, strictPartitions})); - if (!isNegative()) { - ASSERT_TRUE(details::fp16ArraysMatch(output, ref_output, input)); - } -} - - -class UnpackTestsWithScaleAndZeroPointBase : public UnpackTestsBase { -protected: - bool isNegative() const override { - if (scale_shape.size() != 3 && scale_shape.size() != 2) return true; - if (input_shape.back() % 64) return true; - - return false; - } - - void make_ref_output() override { - if (isNegative()) return; - - size_t nElements = from->get_size(); - - const size_t nOutputElementsPerScale = ref_output.size() / (toType.bitwidth() / 8) / scale->get_size(); - - std::vector floatRef(nElements); - details::unpack_u4f32(input.data(), floatRef.data(), static_cast(nElements)); - - - // lets apply per channel scale - uint16_t * pRef = reinterpret_cast(ref_output.data()); - float * pFloatRef = reinterpret_cast(floatRef.data()); - const uint16_t * pScale_f16 = reinterpret_cast(scale->data()); - const float * pScale_f32 = reinterpret_cast(scale->data()); - - for (size_t i = 0; i < scale->get_size(); i++) { - for (size_t sc = 0; sc != nOutputElementsPerScale; sc++) { - // applying zeropoint - float ref_scaled = *pFloatRef - zeropValue; - - if (scaleType == ov::element::f32) { - ref_scaled *= pScale_f32[0]; - } else if (scaleType == ov::element::f16) { - ref_scaled *= details::half_to_float(pScale_f16[0]); - } - *pRef = details::float_to_half(ref_scaled); - - pFloatRef++; - pRef++; - } - pScale_f32++; - pScale_f16++; - } - } -}; - -using UnpackTestsWithScaleAndZeroPoint = UnpackTestsTmpl; - -TEST_P(UnpackTestsWithScaleAndZeroPoint, u4) { - ASSERT_NO_THROW_IF(!isNegative(), - ov::npuw::util::unpack(from, zerop, scale, to, ov::npuw::util::UnpackOptions{useParallelFor, nPartitions, strictPartitions})); - if (!isNegative()) { - ASSERT_TRUE(details::fp16ArraysMatch(output, ref_output, input, false)); - } -} - -class UnpackTestsWithScaleAndZeroPoint2 : public UnpackTestsWithScaleAndZeroPointBase { -protected: - bool isNegative() const override { - if (input_shape.back() % 64 || input_shape.size() != 3) return true; - if (scale_shape.back() % 64 || scale_shape.size() != 3) return true; - - return false; - } - - void make_ref_output() override { - if (isNegative()) return; - - size_t nElements = from->get_size(); - const auto from_shape = from->get_shape(); - - const size_t C = from_shape[from_shape.size() - 3]; - const size_t H = from_shape[from_shape.size() - 2]; - const size_t W = from_shape[from_shape.size() - 1]; - - std::vector floatRef(nElements); - details::unpack_u4f32(input.data(), floatRef.data(), static_cast(nElements)); - - uint16_t * pRef = reinterpret_cast(ref_output.data()); - float * pFloatRef = reinterpret_cast(floatRef.data()); - const uint16_t * pScale_f16 = reinterpret_cast(scale->data()); - const float * pScale_f32 = reinterpret_cast(scale->data()); - - for (size_t c = 0; c < C; ++c) { - for (size_t h = 0; h < H; ++h) { - for (size_t w = 0; w < W; ++w) { - size_t input_index = w + W * h + W * H * c; - size_t scale_index = w + W * c; - float ref_scaled = pFloatRef[input_index] - zeropValue; - if (scaleType == ov::element::f32) { - ref_scaled *= pScale_f32[scale_index]; - } else if (scaleType == ov::element::f16) { - ref_scaled *= details::half_to_float(pScale_f16[scale_index]); - } - pRef[w + W * h + c * W * H] = details::float_to_half(ref_scaled); - } - } - } - } -}; - -using UnpackTestsWithScaleAndZeroPointTest2 = UnpackTestsTmpl; - -TEST_P(UnpackTestsWithScaleAndZeroPointTest2, u4) { - ASSERT_NO_THROW_IF(!isNegative(), - ov::npuw::util::unpack(from, zerop, scale, to, ov::npuw::util::UnpackOptions{useParallelFor, nPartitions, strictPartitions})); - if (!isNegative()) { - ASSERT_TRUE(details::fp16ArraysMatch(output, ref_output, input, false)); - } -} - -class UnpackTestsWithScaleAndZeroPoint3 : public UnpackTestsWithScaleAndZeroPointBase { -protected: - bool isNegative() const override { - if (scale_shape.size() != 3 || zerop_shape.size() != 3) return true; - if (input_shape[2] % 64 || input_shape.size() != 3) return true; - - return false; - } - - void make_ref_output() override { - if (isNegative()) return; - - size_t nElements = from->get_size(); - - const size_t nOutputElementsPerScale = ref_output.size() / (toType.bitwidth() / 8) / scale->get_size(); - - std::vector floatRef(nElements); - details::unpack_u4f32(input.data(), floatRef.data(), static_cast(nElements)); - - - // lets apply per channel scale - uint16_t * pRef = reinterpret_cast(ref_output.data()); - const uint8_t* pZer = static_cast(zerop->data()); - float * pFloatRef = reinterpret_cast(floatRef.data()); - const uint16_t * pScale_f16 = reinterpret_cast(scale->data()); - const float * pScale_f32 = reinterpret_cast(scale->data()); - - for (size_t i = 0; i < scale->get_size(); i++) { - float zeroPointValue = static_cast((i % 2 == 0) ? details::lo4(pZer[i / 2]) : details::hi4(pZer[i / 2])); - for (size_t sc = 0; sc != nOutputElementsPerScale; sc++) { - // applying zeropoint - float ref_scaled = *pFloatRef - zeroPointValue; - - if (scaleType == ov::element::f32) { - ref_scaled *= pScale_f32[0]; - } else if (scaleType == ov::element::f16) { - ref_scaled *= details::half_to_float(pScale_f16[0]); - } - *pRef = details::float_to_half(ref_scaled); - - pFloatRef++; - pRef++; - } - pScale_f32++; - pScale_f16++; - } - } -}; - -using UnpackTestsWithScaleAndZeroPointTest3 = UnpackTestsTmpl; - -TEST_P(UnpackTestsWithScaleAndZeroPointTest3, u4) { - ASSERT_NO_THROW_IF(!isNegative(), - ov::npuw::util::unpack(from, zerop, scale, to, ov::npuw::util::UnpackOptions{useParallelFor, nPartitions, strictPartitions})); - if (!isNegative()) { - ASSERT_TRUE(details::fp16ArraysMatch(output, ref_output, input, false)); - } -} - -#define Tensors [](std::vector& input, std::vector&scale, std::vector&zerop) - - -namespace details { -::testing::internal::ParamGenerator::value_type> ShapesIn( - const std::vector& container) { - return ::testing::ValuesIn(container.begin(), container.end()); -} - -} // namespace details -} // anonymous namespace