From ef4a3e2214e3097420fdb4aa873fbd3800125689 Mon Sep 17 00:00:00 2001 From: "Shen, Wanglei" Date: Wed, 30 Oct 2024 02:57:59 +0800 Subject: [PATCH 1/8] identify specific cpu and apply latency with Pcore and Ecore on Windows --- .../dev_api/openvino/runtime/system_conf.hpp | 8 + src/inference/src/os/cpu_map_info.hpp | 4 + src/inference/src/os/win/win_system_conf.cpp | 6 + src/inference/src/system_conf.cpp | 10 + .../unit/cpu_map_parser/parser_windows.cpp | 185 ++++++++++++++++++ .../intel_cpu/src/cpu_streams_calculation.cpp | 9 +- 6 files changed, 219 insertions(+), 3 deletions(-) diff --git a/src/inference/dev_api/openvino/runtime/system_conf.hpp b/src/inference/dev_api/openvino/runtime/system_conf.hpp index 59d56dfdd49d73..348341ac7614cc 100644 --- a/src/inference/dev_api/openvino/runtime/system_conf.hpp +++ b/src/inference/dev_api/openvino/runtime/system_conf.hpp @@ -69,6 +69,14 @@ OPENVINO_RUNTIME_API int get_number_of_logical_cpu_cores(bool big_cores_only = f */ OPENVINO_RUNTIME_API int get_number_of_blocked_cores(); +/** + * @brief Returns number of SOC E cores. Please note that this is a temporary interface for performance + * optimization on a specific platform. May be removed in future release. + * @ingroup ov_dev_api_system_conf + * @return Number of SOC E cores. + */ +OPENVINO_RUNTIME_API int get_number_of_soc_ecores(); + /** * @brief Checks whether CPU supports SSE 4.2 capability * @ingroup ov_dev_api_system_conf diff --git a/src/inference/src/os/cpu_map_info.hpp b/src/inference/src/os/cpu_map_info.hpp index 497b25c3b68153..c6e1ebde800ba8 100644 --- a/src/inference/src/os/cpu_map_info.hpp +++ b/src/inference/src/os/cpu_map_info.hpp @@ -46,6 +46,8 @@ class CPU { int _sockets = 0; int _cores = 0; int _blocked_cores = 0; + int _org_processors = 0; + int _processors_with_l3 = 0; std::vector> _org_proc_type_table; std::vector> _proc_type_table; std::vector> _cpu_mapping_table; @@ -157,6 +159,7 @@ void get_cpu_mapping_from_cores(const int _processors, * @param[out] _sockets total number for sockets in system * @param[out] _cores total number for physical CPU cores in system * @param[out] _blocked_cores total number for blocked processors in system + * @param[out] _processors_with_l3 total number for processors with L3 cache * @param[out] _proc_type_table summary table of number of processors per type * @param[out] _cpu_mapping_table CPU mapping table for each processor * @return @@ -168,6 +171,7 @@ void parse_processor_info_win(const char* base_ptr, int& _sockets, int& _cores, int& _blocked_cores, + int& _processors_with_l3, std::vector>& _proc_type_table, std::vector>& _cpu_mapping_table); #endif diff --git a/src/inference/src/os/win/win_system_conf.cpp b/src/inference/src/os/win/win_system_conf.cpp index f0ea4f181896ac..d12085dc40becb 100644 --- a/src/inference/src/os/win/win_system_conf.cpp +++ b/src/inference/src/os/win/win_system_conf.cpp @@ -36,9 +36,11 @@ CPU::CPU() { _sockets, _cores, _blocked_cores, + _processors_with_l3, _proc_type_table, _cpu_mapping_table); _org_proc_type_table = _proc_type_table; + _org_processors = _processors; // ensure that get_org_numa_id and get_org_socket_id can return the correct value for (size_t i = 0; i < _cpu_mapping_table.size(); i++) { @@ -62,6 +64,7 @@ void parse_processor_info_win(const char* base_ptr, int& _sockets, int& _cores, int& _blocked_cores, + int& _processors_with_l3, std::vector>& _proc_type_table, std::vector>& _cpu_mapping_table) { std::vector list; @@ -218,6 +221,9 @@ void parse_processor_info_win(const char* base_ptr, _proc_type_table[0][MAIN_CORE_PROC]++; } } + } else if ((info->Relationship == RelationCache) && (info->Cache.Level == 3)) { + MaskToList(info->Cache.GroupMask.Mask); + _processors_with_l3 = list_len; } } _sockets++; diff --git a/src/inference/src/system_conf.cpp b/src/inference/src/system_conf.cpp index 27c671d07ad5c9..1e41cd6b0429a1 100644 --- a/src/inference/src/system_conf.cpp +++ b/src/inference/src/system_conf.cpp @@ -294,6 +294,11 @@ int get_number_of_blocked_cores() { return cpu._blocked_cores; } +int get_number_of_soc_ecores() { + CPU& cpu = cpu_info(); + return cpu._org_processors - cpu._processors_with_l3; +} + bool is_cpu_map_available() { CPU& cpu = cpu_info(); return cpu._proc_type_table.size() > 0; @@ -487,6 +492,11 @@ int get_number_of_blocked_cores() { return cpu._blocked_cores; } +int get_number_of_soc_ecores() { + CPU& cpu = cpu_info(); + return cpu._org_processors - cpu._processors_with_l3; +} + int get_org_socket_id(int socket_id) { CPU& cpu = cpu_info(); auto iter = cpu._socketid_mapping_table.find(socket_id); diff --git a/src/inference/tests/unit/cpu_map_parser/parser_windows.cpp b/src/inference/tests/unit/cpu_map_parser/parser_windows.cpp index 2c87b97562e975..edcf865cc3f124 100644 --- a/src/inference/tests/unit/cpu_map_parser/parser_windows.cpp +++ b/src/inference/tests/unit/cpu_map_parser/parser_windows.cpp @@ -34,6 +34,7 @@ struct WinCpuMapTestCase { int _sockets; int _cores; int _blocked_cores; + int _processors_with_l3; std::vector> _proc_type_table; std::vector> _cpu_mapping_table; std::string system_info; @@ -58,6 +59,7 @@ class WinCpuMapParserTests : public ov::test::TestsCommon, int test_sockets = 0; int test_cores = 0; int test_blocked_cores = 0; + int test_processors_with_l3 = 0; unsigned long len = (unsigned long)(test_len / 2); std::vector> test_proc_type_table; std::vector> test_cpu_mapping_table; @@ -69,6 +71,7 @@ class WinCpuMapParserTests : public ov::test::TestsCommon, test_sockets, test_cores, test_blocked_cores, + test_processors_with_l3 test_proc_type_table, test_cpu_mapping_table); @@ -77,6 +80,7 @@ class WinCpuMapParserTests : public ov::test::TestsCommon, ASSERT_EQ(test_data._sockets, test_sockets); ASSERT_EQ(test_data._cores, test_cores); ASSERT_EQ(test_data._blocked_cores, test_blocked_cores); + ASSERT_EQ(test_data._processors_with_l3, test_processors_with_l3); ASSERT_EQ(test_data._proc_type_table, test_proc_type_table); ASSERT_EQ(test_data._cpu_mapping_table, test_cpu_mapping_table); } @@ -88,6 +92,7 @@ WinCpuMapTestCase _2sockets_104cores_hyperthreading = { 2, // param[expected out]: total 2 sockets on this simulated platform 104, // param[expected out]: total 104 CPU cores on this simulated platform 0, // param[expected out]: total 0 processors on this simulated platform are blocked + 64, // param[expected out]: total 64 processors have L3 cache in one package on this simulated platform {{208, 104, 0, 104, -1, -1}, {104, 52, 0, 52, 0, 0}, {104, 52, 0, 52, 1, 1}}, // param[expected out]: The proc_type_table of this simulated platform @@ -703,6 +708,7 @@ WinCpuMapTestCase _2sockets_48cores_hyperthreading = { 2, 48, 0, + 48, {{96, 48, 0, 48, -1, -1}, {48, 24, 0, 24, 0, 0}, {48, 24, 0, 24, 1, 1}}, { {0, 0, 0, 0, HYPER_THREADING_PROC, 0, -1}, {1, 0, 0, 0, MAIN_CORE_PROC, 0, -1}, @@ -994,6 +1000,7 @@ WinCpuMapTestCase _2sockets_36cores_hyperthreading = { 2, 36, 0, + 36, {{72, 36, 0, 36, -1, -1}, {36, 18, 0, 18, 0, 0}, {36, 18, 0, 18, 1, 1}}, { {0, 0, 0, 0, HYPER_THREADING_PROC, 0, -1}, {1, 0, 0, 0, MAIN_CORE_PROC, 0, -1}, @@ -1187,6 +1194,7 @@ WinCpuMapTestCase _2sockets_48cores = { 2, 48, 0, + 24, {{48, 48, 0, 0, -1, -1}, {24, 24, 0, 0, 0, 0}, {24, 24, 0, 0, 1, 1}}, { {0, 0, 0, 0, MAIN_CORE_PROC, 0, -1}, {1, 0, 0, 1, MAIN_CORE_PROC, 1, -1}, @@ -1454,6 +1462,7 @@ WinCpuMapTestCase _1sockets_24cores_hyperthreading_set1 = { 1, 24, 0, + 32, {{32, 8, 16, 8, 0, 0}}, { {0, 0, 0, 0, HYPER_THREADING_PROC, 0, -1}, {1, 0, 0, 0, MAIN_CORE_PROC, 0, -1}, @@ -1565,6 +1574,7 @@ WinCpuMapTestCase _1sockets_24cores_hyperthreading_set2 = { 1, 24, 0, + 32, {{32, 8, 16, 8, 0, 0}}, { {0, 0, 0, 0, HYPER_THREADING_PROC, 0, -1}, {1, 0, 0, 0, MAIN_CORE_PROC, 0, -1}, @@ -1681,12 +1691,119 @@ WinCpuMapTestCase _1sockets_24cores_hyperthreading_set2 = { "00ffffffff00000000"}, }; + +WinCpuMapTestCase _1sockets_24cores = { + 24, + 1, + 1, + 24, + 0, + 24, + {{24, 8, 16, 0, 0, 0}}, + {"0300000030000000000000000000000000000000000000000000000000000100ffffff0000000000000000000000000000000000300000000" + "00100000000000000000000000000000000000000000100010000000000000000000000000000000700000030000000000000000000000000" + "000000000000000000000000000100010000000000000000000000000000000200000038000000010c400000c000000200000000000000000" + "00000000000000000000000000100010000000000000000000000000000000200000038000000011040000000010001000000000000000000" + "0000000000000000000000000100010000000000000000000000000000000200000038000000020c400000003000000000000000000000000" + "000000000000000000000000100010000000000000000000000000000000200000038000000030c4000000040020000000000000000000000" + "00000000000000000000000100ffffff000000000000000000000000000000000030000000000100000000000000000000000000000000000" + "00000010002000000000000000000000000000000070000003000000000000000000000000000000000000000000000000000010002000000" + "0000000000000000000000000200000038000000010c400000c00000020000000000000000000000000000000000000000000100020000000" + "00000000000000000000000020000003800000001104000000001000100000000000000000000000000000000000000000001000200000000" + "00000000000000000000000200000038000000020c40000000300000000000000000000000000000000000000000000000010002000000000" + "00000000000000000000000000000300000000000000000000000000000000000000000000000000001000400000000000000000000000000" + "000007000000300000000000000000000000000000000000000000000000000001003c0000000000000000000000000000000200000038000" + "00001084000008000000200000000000000000000000000000000000000000001000400000000000000000000000000000002000000380000" + "00010840000000010001000000000000000000000000000000000000000000010004000000000000000000000000000000020000003800000" + "002104000000040000000000000000000000000000000000000000000000001003c0000000000000000000000000000000000000030000000" + "00000000000000000000000000000000000000000000010008000000000000000000000000000000020000003800000001084000008000000" + "20000000000000000000000000000000000000000000100080000000000000000000000000000000200000038000000010840000000010001" + "00000000000000000000000000000000000000000001000800000000000000000000000000000000000000300000000000000000000000000" + "00000000000000000000000000100100000000000000000000000000000000200000038000000010840000080000002000000000000000000" + "00000000000000000000000001001000000000000000000000000000000002000000380000000108400000000100010000000000000000000" + "00000000000000000000000010010000000000000000000000000000000000000003000000000000000000000000000000000000000000000" + "00000001002000000000000000000000000000000002000000380000000108400000800000020000000000000000000000000000000000000" + "00000010020000000000000000000000000000000020000003800000001084000000001000100000000000000000000000000000000000000" + "00000100200000000000000000000000000000000000000030000000000000000000000000000000000000000000000000000100400000000" + "000000000000000000000000700000030000000000000000000000000000000000000000000000000000100c0030000000000000000000000" + "00000002000000380000000108400000800000020000000000000000000000000000000000000000000100400000000000000000000000000" + "00000020000003800000001084000000001000100000000000000000000000000000000000000000001004000000000000000000000000000" + "000002000000380000000210400000004000000000000000000000000000000000000000000000000100c0030000000000000000000000000" + "00000000000300000000000000000000000000000000000000000000000000001008000000000000000000000000000000002000000380000" + "00010840000080000002000000000000000000000000000000000000000000010080000000000000000000000000000000020000003800000" + "00108400000000100010000000000000000000000000000000000000000000100800000000000000000000000000000000000000030000000" + "00000000000000000000000000000000000000000000010000010000000000000000000000000000020000003800000001084000008000000" + "20000000000000000000000000000000000000000000100000100000000000000000000000000000200000038000000010840000000010001" + "00000000000000000000000000000000000000000001000001000000000000000000000000000000000000300000000000000000000000000" + "00000000000000000000000000100000200000000000000000000000000000200000038000000010840000080000002000000000000000000" + "00000000000000000000000001000002000000000000000000000000000002000000380000000108400000000100010000000000000000000" + "00000000000000000000000010000020000000000000000000000000000000000003000000000010000000000000000000000000000000000" + "00000001000004000000000000000000000000000007000000300000000000000000000000000000000000000000000000000001000004000" + "00000000000000000000000000200000038000000010c400000c0000002000000000000000000000000000000000000000000010000040000" + "00000000000000000000000002000000380000000110400000000100010000000000000000000000000000000000000000000100000400000" + "000000000000000000000000200000038000000020c4000000030000000000000000000000000000000000000000000000001000004000000" + "00000000000000000000000000000030000000000100000000000000000000000000000000000000000100000800000000000000000000000" + "00000070000003000000000000000000000000000000000000000000000000000010000080000000000000000000000000000020000003800" + "0000010c400000c00000020000000000000000000000000000000000000000000100000800000000000000000000000000000200000038000" + "00001104000000001000100000000000000000000000000000000000000000001000008000000000000000000000000000002000000380000" + "00020c40000000300000000000000000000000000000000000000000000000010000080000000000000000000000000000000000003000000" + "00001000000000000000000000000000000000000000001000010000000000000000000000000000007000000300000000000000000000000" + "00000000000000000000000000000100001000000000000000000000000000000200000038000000010c400000c0000002000000000000000" + "00000000000000000000000000001000010000000000000000000000000000002000000380000000110400000000100010000000000000000" + "000000000000000000000000000100001000000000000000000000000000000200000038000000020c4000000030000000000000000000000" + "00000000000000000000000000100001000000000000000000000000000000000000030000000000100000000000000000000000000000000" + "00000000010000200000000000000000000000000000070000003000000000000000000000000000000000000000000000000000010000200" + "0000000000000000000000000000200000038000000010c400000c00000020000000000000000000000000000000000000000000100002000" + "00000000000000000000000000020000003800000001104000000001000100000000000000000000000000000000000000000001000020000" + "00000000000000000000000000200000038000000020c40000000300000000000000000000000000000000000000000000000010000200000" + "00000000000000000000000000000000300000000000000000000000000000000000000000000000000001000040000000000000000000000" + "0000000070000003000000000000000000000000000000000000000000000000000010000c003000000000000000000000000000200000038" + "00000001084000008000000200000000000000000000000000000000000000000001000040000000000000000000000000000002000000380" + "00000010840000000010001000000000000000000000000000000000000000000010000400000000000000000000000000000020000003800" + "0000021040000000400000000000000000000000000000000000000000000000010000c003000000000000000000000000000000000030000" + "00000000000000000000000000000000000000000000000010000800000000000000000000000000000020000003800000001084000008000" + "00020000000000000000000000000000000000000000000100008000000000000000000000000000000200000038000000010840000000010" + "00100000000000000000000000000000000000000000001000080000000000000000000000000000000000000300000000000000000000000" + "00000000000000000000000000000100000001000000000000000000000000000200000038000000010840000080000002000000000000000" + "00000000000000000000000000001000000010000000000000000000000000002000000380000000108400000000100010000000000000000" + "00000000000000000000000000010000000100000000000000000000000000000000003000000000000000000000000000000000000000000" + "00000000001000000020000000000000000000000000002000000380000000108400000800000020000000000000000000000000000000000" + "00000000010000000200000000000000000000000000020000003800000001084000000001000100000000000000000000000000000000000" + "00000000100000002000000000000000000000000000000000030000000000000000000000000000000000000000000000000000100000004" + "00000000000000000000000000070000003000000000000000000000000000000000000000000000000000010000003c00000000000000000" + "00000000002000000380000000108400000800000020000000000000000000000000000000000000000000100000004000000000000000000" + "00000000020000003800000001084000000001000100000000000000000000000000000000000000000001000000040000000000000000000" + "00000000200000038000000021040000000400000000000000000000000000000000000000000000000010000003c00000000000000000000" + "00000000000000300000000000000000000000000000000000000000000000000001000000080000000000000000000000000002000000380" + "00000010840000080000002000000000000000000000000000000000000000000010000000800000000000000000000000000020000003800" + "00000108400000000100010000000000000000000000000000000000000000000100000008000000000000000000000000000000000030000" + "00000000000000000000000000000000000000000000000010000001000000000000000000000000000020000003800000001084000008000" + "00020000000000000000000000000000000000000000000100000010000000000000000000000000000200000038000000010840000000010" + "00100000000000000000000000000000000000000000001000000100000000000000000000000000000000000300000000000000000000000" + "00000000000000000000000000000100000020000000000000000000000000000200000038000000010840000080000002000000000000000" + "00000000000000000000000000001000000200000000000000000000000000002000000380000000108400000000100010000000000000000" + "00000000000000000000000000010000002000000000000000000000000000000000003000000000010000000000000000000000000000000" + "00000000001000000400000000000000000000000000007000000300000000000000000000000000000000000000000000000000001000000" + "40000000000000000000000000000200000038000000010c400000c0000002000000000000000000000000000000000000000000010000004" + "00000000000000000000000000002000000380000000110400000000100010000000000000000000000000000000000000000000100000040" + "000000000000000000000000000200000038000000020c4000000030000000000000000000000000000000000000000000000001000000400" + "00000000000000000000000000000000030000000000100000000000000000000000000000000000000000100000080000000000000000000" + "00000000070000003000000000000000000000000000000000000000000000000000010000008000000000000000000000000000020000003" + "8000000010c400000c00000020000000000000000000000000000000000000000000100000080000000000000000000000000000200000038" + "00000001104000000001000100000000000000000000000000000000000000000001000000800000000000000000000000000002000000380" + "00000020c40000000300000000000000000000000000000000000000000000000010000008000000000000000000000000000010000003000" + "0000000000000000000000000000000000000000000000000100ffffff0000000000000000000000000004000000500000000100010000000" + "00000000000000000000000000000000000181800000000000000000000000000000000000000000000000000000000000000000000000000" + "00ffffff0000000000"}, +}; + WinCpuMapTestCase _1sockets_22cores_hyperthreading = { 20, 1, 1, 14, 2, + 20, {{20, 6, 8, 6, 0, 0}}, { {0, 0, 0, 0, HYPER_THREADING_PROC, 0, -1}, {1, 0, 0, 0, MAIN_CORE_PROC, 0, -1}, @@ -1775,6 +1892,7 @@ WinCpuMapTestCase _1sockets_14cores_hyperthreading_set1 = { 1, 14, 0, + 20, {{20, 6, 8, 6, 0, 0}}, { {0, 0, 0, 0, HYPER_THREADING_PROC, 0, -1}, {1, 0, 0, 0, MAIN_CORE_PROC, 0, -1}, @@ -1848,6 +1966,7 @@ WinCpuMapTestCase _1sockets_14cores_hyperthreading_set2 = { 1, 14, 0, + 20, {{20, 6, 8, 6, 0, 0}}, { {0, 0, 0, 0, HYPER_THREADING_PROC, 0, -1}, {1, 0, 0, 0, MAIN_CORE_PROC, 0, -1}, @@ -1928,6 +2047,7 @@ WinCpuMapTestCase _1sockets_14cores_hyperthreading_set3 = { 1, 14, 0, + 20, {{20, 6, 8, 6, 0, 0}}, { {0, 0, 0, 0, HYPER_THREADING_PROC, 0, -1}, {1, 0, 0, 0, MAIN_CORE_PROC, 0, -1}, @@ -2008,6 +2128,7 @@ WinCpuMapTestCase _1sockets_10cores_hyperthreading = { 1, 10, 0, + 12, {{12, 2, 8, 2, 0, 0}}, { {0, 0, 0, 0, HYPER_THREADING_PROC, 0, -1}, @@ -2065,12 +2186,69 @@ WinCpuMapTestCase _1sockets_10cores_hyperthreading = { "0000000000000000000000000000000000000000000000000000000000000000000000000ff0f000000000000"}, }; +WinCpuMapTestCase _1sockets_8cores = { + 8, + 1, + 1, + 8, + 0, + 4, + {{8, 4, 4, 0, 0, 0}}, + { + {0, 0, 0, 0, MAIN_CORE_PROC, 0, -1}, + {1, 0, 0, 1, MAIN_CORE_PROC, 1, -1}, + {2, 0, 0, 2, MAIN_CORE_PROC, 2, -1}, + {3, 0, 0, 3, MAIN_CORE_PROC, 3, -1}, + {4, 0, 0, 4, EFFICIENT_CORE_PROC, 4, -1}, + {5, 0, 0, 5, EFFICIENT_CORE_PROC, 4, -1}, + {6, 0, 0, 6, EFFICIENT_CORE_PROC, 4, -1}, + {7, 0, 0, 7, EFFICIENT_CORE_PROC, 4, -1}, + }, + {"0300000030000000000000000000000000000000000000000000000000000100ff00000000000000000000000000000000000000300000000" + "00100000000000000000000000000000000000000000100010000000000000000000000000000000700000030000000000000000000000000" + "000000000000000000000000000100010000000000000000000000000000000200000038000000010c400000c000000200000000000000000" + "00000000000000000000000000100010000000000000000000000000000000200000038000000011040000000010001000000000000000000" + "0000000000000000000000000100010000000000000000000000000000000200000038000000020a400000002800000000000000000000000" + "000000000000000000000000100010000000000000000000000000000000200000038000000030c40000000c0000000000000000000000000" + "000000000000000000000001000f0000000000000000000000000000000000000030000000000100000000000000000000000000000000000" + "00000010002000000000000000000000000000000070000003000000000000000000000000000000000000000000000000000010002000000" + "0000000000000000000000000200000038000000010c400000c00000020000000000000000000000000000000000000000000100020000000" + "00000000000000000000000020000003800000001104000000001000100000000000000000000000000000000000000000001000200000000" + "00000000000000000000000200000038000000020a40000000280000000000000000000000000000000000000000000000010002000000000" + "00000000000000000000000000000300000000001000000000000000000000000000000000000000001000400000000000000000000000000" + "00000700000030000000000000000000000000000000000000000000000000000100040000000000000000000000000000000200000038000" + "000010c400000c000000200000000000000000000000000000000000000000001000400000000000000000000000000000002000000380000" + "00011040000000010001000000000000000000000000000000000000000000010004000000000000000000000000000000020000003800000" + "0020a400000002800000000000000000000000000000000000000000000000100040000000000000000000000000000000000000030000000" + "00010000000000000000000000000000000000000000010008000000000000000000000000000000070000003000000000000000000000000" + "0000000000000000000000000000100080000000000000000000000000000000200000038000000010c400000c00000020000000000000000" + "00000000000000000000000000010008000000000000000000000000000000020000003800000001104000000001000100000000000000000" + "00000000000000000000000000100080000000000000000000000000000000200000038000000020a40000000280000000000000000000000" + "00000000000000000000000001000800000000000000000000000000000000000000300000000000000000000000000000000000000000000" + "00000000100100000000000000000000000000000000700000030000000000000000000000000000000000000000000000000000100f00000" + "00000000000000000000000000020000003800000001084000008000000200000000000000000000000000000000000000000001001000000" + "00000000000000000000000000200000038000000010840000000010001000000000000000000000000000000000000000000010010000000" + "00000000000000000000000002000000380000000210400000004000000000000000000000000000000000000000000000000100f00000000" + "00000000000000000000000000000003000000000000000000000000000000000000000000000000000010020000000000000000000000000" + "00000002000000380000000108400000800000020000000000000000000000000000000000000000000100200000000000000000000000000" + "00000020000003800000001084000000001000100000000000000000000000000000000000000000001002000000000000000000000000000" + "00000000000030000000000000000000000000000000000000000000000000000100400000000000000000000000000000000200000038000" + "00001084000008000000200000000000000000000000000000000000000000001004000000000000000000000000000000002000000380000" + "00010840000000010001000000000000000000000000000000000000000000010040000000000000000000000000000000000000003000000" + "00000000000000000000000000000000000000000000001008000000000000000000000000000000002000000380000000108400000800000" + "02000000000000000000000000000000000000000000010080000000000000000000000000000000020000003800000001084000000001000" + "10000000000000000000000000000000000000000000100800000000000000000000000000000000100000030000000000000000000000000" + "000000000000000000000000000100ff000000000000000000000000000000040000005000000001000100000000000000000000000000000" + "000000000000008080000000000000000000000000000000000000000000000000000000000000000000000000000ff00000000000000"}, +}; + WinCpuMapTestCase _1sockets_6cores_hyperthreading_FMT7 = { 12, 1, 1, 6, 0, + 12, {{12, 6, 0, 6, 0, 0}}, { {0, 0, 0, 0, HYPER_THREADING_PROC, 0, -1}, @@ -2150,6 +2328,7 @@ WinCpuMapTestCase _1sockets_4cores = { 1, 4, 0, + 4, {{4, 4, 0, 0, 0, 0}}, { {0, 0, 0, 0, MAIN_CORE_PROC, 0, -1}, @@ -2182,6 +2361,7 @@ WinCpuMapTestCase _1sockets_4cores_hyperthreading = { 1, 4, 0, + 8, {{8, 4, 0, 4, 0, 0}}, { {0, 0, 0, 0, HYPER_THREADING_PROC, 0, -1}, @@ -2221,6 +2401,7 @@ WinCpuMapTestCase _1sockets_4cores_hyperthreading_1_FMT7 = { 1, 4, 0, + 8, {{8, 4, 0, 4, 0, 0}}, { {0, 0, 0, 0, HYPER_THREADING_PROC, 0, -1}, @@ -2260,6 +2441,7 @@ WinCpuMapTestCase _1sockets_4cores_hyperthreading_2_FMT7 = { 1, 4, 0, + 8, {{8, 4, 0, 4, 0, 0}}, { {0, 0, 0, 0, HYPER_THREADING_PROC, 0, -1}, @@ -2299,6 +2481,7 @@ WinCpuMapTestCase _1sockets_2cores_hyperthreading_FMT7 = { 1, 2, 0, + 4, {{4, 2, 0, 2, 0, 0}}, { {0, 0, 0, 0, HYPER_THREADING_PROC, 0, -1}, @@ -2330,11 +2513,13 @@ INSTANTIATE_TEST_SUITE_P(CPUMap, _2sockets_48cores, _1sockets_24cores_hyperthreading_set1, _1sockets_24cores_hyperthreading_set2, + _1sockets_24cores, _1sockets_22cores_hyperthreading, _1sockets_14cores_hyperthreading_set1, _1sockets_14cores_hyperthreading_set2, _1sockets_14cores_hyperthreading_set3, _1sockets_10cores_hyperthreading, + _1sockets_8cores, _1sockets_6cores_hyperthreading_FMT7, _1sockets_4cores, _1sockets_4cores_hyperthreading, diff --git a/src/plugins/intel_cpu/src/cpu_streams_calculation.cpp b/src/plugins/intel_cpu/src/cpu_streams_calculation.cpp index 7f5f968b10c3fe..259a0201a8deef 100644 --- a/src/plugins/intel_cpu/src/cpu_streams_calculation.cpp +++ b/src/plugins/intel_cpu/src/cpu_streams_calculation.cpp @@ -535,9 +535,9 @@ int get_model_prefer_threads(const int num_streams, const std::shared_ptr& model, Config& config) { const int sockets = get_num_sockets(); + const auto isa = dnnl::get_effective_cpu_isa(); auto model_prefer = 0; if (-1 == config.modelPreferThreads) { - const auto isa = dnnl::get_effective_cpu_isa(); float isaSpecificThreshold = 1.0f; switch (isa) { case dnnl::cpu_isa::sse41: @@ -621,7 +621,8 @@ int get_model_prefer_threads(const int num_streams, // latency if (num_streams <= sockets && num_streams > 0) { - if (proc_type_table[0][EFFICIENT_CORE_PROC] > 0 && proc_type_table[0][MAIN_CORE_PROC] > 0) { + if (proc_type_table.size() == 1 && proc_type_table[0][EFFICIENT_CORE_PROC] > 0 && + proc_type_table[0][MAIN_CORE_PROC] > 0) { #ifdef __APPLE__ if ((proc_type_table.size() == 1) && (proc_type_table[0][EFFICIENT_CORE_PROC] > 0)) { model_prefer = proc_type_table[0][MAIN_CORE_PROC] > proc_type_table[0][EFFICIENT_CORE_PROC] @@ -638,7 +639,9 @@ int get_model_prefer_threads(const int num_streams, // cores only cases except LLM. model_prefer = proc_type_table[0][MAIN_CORE_PROC] > (proc_type_table[0][EFFICIENT_CORE_PROC] / (int8_intensive ? int8_threshold : fp32_threshold)) - ? ((!llm_related && ov::get_number_of_blocked_cores()) + ? ((!llm_related && + ((ov::get_number_of_blocked_cores() != 0) || + ((ov::get_number_of_soc_ecores() == 0) && (isa == dnnl::cpu_isa::avx2_vnni_2)))) ? proc_type_table[0][MAIN_CORE_PROC] + proc_type_table[0][EFFICIENT_CORE_PROC] : proc_type_table[0][MAIN_CORE_PROC]) : proc_type_table[0][MAIN_CORE_PROC] + proc_type_table[0][EFFICIENT_CORE_PROC]; From 0fde218efe86f19d00012d630354281bb76a17f6 Mon Sep 17 00:00:00 2001 From: "Shen, Wanglei" Date: Wed, 30 Oct 2024 03:05:53 +0800 Subject: [PATCH 2/8] fix typo --- src/inference/tests/unit/cpu_map_parser/parser_windows.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/inference/tests/unit/cpu_map_parser/parser_windows.cpp b/src/inference/tests/unit/cpu_map_parser/parser_windows.cpp index edcf865cc3f124..97cf729ffade43 100644 --- a/src/inference/tests/unit/cpu_map_parser/parser_windows.cpp +++ b/src/inference/tests/unit/cpu_map_parser/parser_windows.cpp @@ -71,7 +71,7 @@ class WinCpuMapParserTests : public ov::test::TestsCommon, test_sockets, test_cores, test_blocked_cores, - test_processors_with_l3 + test_processors_with_l3, test_proc_type_table, test_cpu_mapping_table); From cba8089edf969dbfdf4b4540aff5de45ddd0b32e Mon Sep 17 00:00:00 2001 From: "Shen, Wanglei" Date: Wed, 30 Oct 2024 03:09:28 +0800 Subject: [PATCH 3/8] fix typo --- .../tests/unit/cpu_map_parser/parser_windows.cpp | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/src/inference/tests/unit/cpu_map_parser/parser_windows.cpp b/src/inference/tests/unit/cpu_map_parser/parser_windows.cpp index 97cf729ffade43..e6b5822fb04d1a 100644 --- a/src/inference/tests/unit/cpu_map_parser/parser_windows.cpp +++ b/src/inference/tests/unit/cpu_map_parser/parser_windows.cpp @@ -1691,7 +1691,6 @@ WinCpuMapTestCase _1sockets_24cores_hyperthreading_set2 = { "00ffffffff00000000"}, }; - WinCpuMapTestCase _1sockets_24cores = { 24, 1, @@ -1700,6 +1699,20 @@ WinCpuMapTestCase _1sockets_24cores = { 0, 24, {{24, 8, 16, 0, 0, 0}}, + { + {0, 0, 0, 0, MAIN_CORE_PROC, 0, -1}, {1, 0, 0, 1, MAIN_CORE_PROC, 1, -1}, + {2, 0, 0, 2, EFFICIENT_CORE_PROC, 2, -1}, {3, 0, 0, 3, EFFICIENT_CORE_PROC, 2, -1}, + {4, 0, 0, 4, EFFICIENT_CORE_PROC, 2, -1}, {5, 0, 0, 5, EFFICIENT_CORE_PROC, 2, -1}, + {6, 0, 0, 6, EFFICIENT_CORE_PROC, 3, -1}, {7, 0, 0, 7, EFFICIENT_CORE_PROC, 3, -1}, + {8, 0, 0, 8, EFFICIENT_CORE_PROC, 3, -1}, {9, 0, 0, 9, EFFICIENT_CORE_PROC, 3, -1}, + {10, 0, 0, 10, MAIN_CORE_PROC, 4, -1}, {11, 0, 0, 11, MAIN_CORE_PROC, 5, -1}, + {12, 0, 0, 12, MAIN_CORE_PROC, 6, -1}, {13, 0, 0, 13, MAIN_CORE_PROC, 7, -1}, + {14, 0, 0, 14, EFFICIENT_CORE_PROC, 8, -1}, {15, 0, 0, 15, EFFICIENT_CORE_PROC, 8, -1}, + {16, 0, 0, 16, EFFICIENT_CORE_PROC, 8, -1}, {17, 0, 0, 17, EFFICIENT_CORE_PROC, 8, -1}, + {18, 0, 0, 18, EFFICIENT_CORE_PROC, 9, -1}, {19, 0, 0, 19, EFFICIENT_CORE_PROC, 9, -1}, + {20, 0, 0, 20, EFFICIENT_CORE_PROC, 9, -1}, {21, 0, 0, 21, EFFICIENT_CORE_PROC, 9, -1}, + {22, 0, 0, 22, MAIN_CORE_PROC, 10, -1}, {23, 0, 0, 23, MAIN_CORE_PROC, 11, -1}, + }, {"0300000030000000000000000000000000000000000000000000000000000100ffffff0000000000000000000000000000000000300000000" "00100000000000000000000000000000000000000000100010000000000000000000000000000000700000030000000000000000000000000" "000000000000000000000000000100010000000000000000000000000000000200000038000000010c400000c000000200000000000000000" From a41b78f1d12947c93f98b55239220eebd6168bd1 Mon Sep 17 00:00:00 2001 From: "Shen, Wanglei" Date: Wed, 30 Oct 2024 03:15:06 +0800 Subject: [PATCH 4/8] update test data --- src/inference/tests/unit/cpu_map_parser/parser_windows.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/inference/tests/unit/cpu_map_parser/parser_windows.cpp b/src/inference/tests/unit/cpu_map_parser/parser_windows.cpp index e6b5822fb04d1a..ff71a5e25df224 100644 --- a/src/inference/tests/unit/cpu_map_parser/parser_windows.cpp +++ b/src/inference/tests/unit/cpu_map_parser/parser_windows.cpp @@ -2261,7 +2261,7 @@ WinCpuMapTestCase _1sockets_6cores_hyperthreading_FMT7 = { 1, 6, 0, - 12, + 1, {{12, 6, 0, 6, 0, 0}}, { {0, 0, 0, 0, HYPER_THREADING_PROC, 0, -1}, @@ -2341,7 +2341,7 @@ WinCpuMapTestCase _1sockets_4cores = { 1, 4, 0, - 4, + 0, {{4, 4, 0, 0, 0, 0}}, { {0, 0, 0, 0, MAIN_CORE_PROC, 0, -1}, From b1d0b5f0e010ad354b44417570ca3cffbd82311f Mon Sep 17 00:00:00 2001 From: "Shen, Wanglei" Date: Wed, 30 Oct 2024 17:22:13 +0800 Subject: [PATCH 5/8] update get_number_of_blocked_cores() --- src/inference/src/system_conf.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/inference/src/system_conf.cpp b/src/inference/src/system_conf.cpp index 1e41cd6b0429a1..e308e5f5857da4 100644 --- a/src/inference/src/system_conf.cpp +++ b/src/inference/src/system_conf.cpp @@ -238,6 +238,10 @@ int get_number_of_blocked_cores() { return 0; } +int get_number_of_soc_ecores() { + return 0; +} + int get_current_socket_id() { return 0; } From 8bb2e17215f09391fdac3cbe588c49a0c3d1ad04 Mon Sep 17 00:00:00 2001 From: "Shen, Wanglei" Date: Thu, 31 Oct 2024 23:24:27 +0800 Subject: [PATCH 6/8] refactoring thread cpu type parameter --- .../dev_api/openvino/runtime/system_conf.hpp | 26 +++---- src/inference/src/os/cpu_map_info.hpp | 10 +-- src/inference/src/os/win/win_system_conf.cpp | 25 ++++--- src/inference/src/system_conf.cpp | 23 ++---- .../unit/cpu_map_parser/parser_windows.cpp | 70 +++++++------------ .../intel_cpu/src/cpu_streams_calculation.cpp | 9 +-- 6 files changed, 65 insertions(+), 98 deletions(-) diff --git a/src/inference/dev_api/openvino/runtime/system_conf.hpp b/src/inference/dev_api/openvino/runtime/system_conf.hpp index 348341ac7614cc..ae34ea1c60b723 100644 --- a/src/inference/dev_api/openvino/runtime/system_conf.hpp +++ b/src/inference/dev_api/openvino/runtime/system_conf.hpp @@ -62,20 +62,12 @@ OPENVINO_RUNTIME_API int get_number_of_cpu_cores(bool big_cores_only = false); OPENVINO_RUNTIME_API int get_number_of_logical_cpu_cores(bool big_cores_only = false); /** - * @brief Returns number of blocked CPU cores. Please note that this is a temporary interface for performance - * optimization on a specific platform. May be removed in future release. + * @brief Returns cpu type for threading scheduling. Please note that this is a temporary interface for + * performance optimization on a specific platform. May be removed in future release. * @ingroup ov_dev_api_system_conf - * @return Number of blocked CPU cores. + * @return CPU type for threading scheduling. */ -OPENVINO_RUNTIME_API int get_number_of_blocked_cores(); - -/** - * @brief Returns number of SOC E cores. Please note that this is a temporary interface for performance - * optimization on a specific platform. May be removed in future release. - * @ingroup ov_dev_api_system_conf - * @return Number of SOC E cores. - */ -OPENVINO_RUNTIME_API int get_number_of_soc_ecores(); +OPENVINO_RUNTIME_API int get_thread_cpu_type(); /** * @brief Checks whether CPU supports SSE 4.2 capability @@ -349,4 +341,14 @@ enum ColumnOfCPUMappingTable { CPU_MAP_TABLE_SIZE = 7 //!< Size of CPU mapping table }; +/** + * @enum ThreadCPUType + * @brief This enum contains definition for specific CPU types in threading scheduling. + * + * THREAD_CPU_NORMAL is normal CPU for threading scheduling. + * THEAD_CPU_BLOCK_CORE is the CPU with blocked cores. + * THEAD_CPU_ONE_L3_CACHE is the CPU with one L3 cache for all cores. + */ +enum ThreadCPUType { THREAD_CPU_NORMAL = 0, THEAD_CPU_BLOCK_CORE = 1, THEAD_CPU_ONE_L3_CACHE = 2}; + } // namespace ov diff --git a/src/inference/src/os/cpu_map_info.hpp b/src/inference/src/os/cpu_map_info.hpp index c6e1ebde800ba8..e7402b4d1024b5 100644 --- a/src/inference/src/os/cpu_map_info.hpp +++ b/src/inference/src/os/cpu_map_info.hpp @@ -45,9 +45,7 @@ class CPU { int _numa_nodes = 0; int _sockets = 0; int _cores = 0; - int _blocked_cores = 0; - int _org_processors = 0; - int _processors_with_l3 = 0; + int _thread_cpu_type = ThreadCPUType::THREAD_CPU_NORMAL; std::vector> _org_proc_type_table; std::vector> _proc_type_table; std::vector> _cpu_mapping_table; @@ -158,8 +156,7 @@ void get_cpu_mapping_from_cores(const int _processors, * @param[out] _numa_nodes total number for nodes in system * @param[out] _sockets total number for sockets in system * @param[out] _cores total number for physical CPU cores in system - * @param[out] _blocked_cores total number for blocked processors in system - * @param[out] _processors_with_l3 total number for processors with L3 cache + * @param[out] _thread_cpu_type CPU type for threading scheduling * @param[out] _proc_type_table summary table of number of processors per type * @param[out] _cpu_mapping_table CPU mapping table for each processor * @return @@ -170,8 +167,7 @@ void parse_processor_info_win(const char* base_ptr, int& _numa_nodes, int& _sockets, int& _cores, - int& _blocked_cores, - int& _processors_with_l3, + int& _thread_cpu_type, std::vector>& _proc_type_table, std::vector>& _cpu_mapping_table); #endif diff --git a/src/inference/src/os/win/win_system_conf.cpp b/src/inference/src/os/win/win_system_conf.cpp index d12085dc40becb..f70d6008611d8c 100644 --- a/src/inference/src/os/win/win_system_conf.cpp +++ b/src/inference/src/os/win/win_system_conf.cpp @@ -35,8 +35,7 @@ CPU::CPU() { _numa_nodes, _sockets, _cores, - _blocked_cores, - _processors_with_l3, + _thread_cpu_type, _proc_type_table, _cpu_mapping_table); _org_proc_type_table = _proc_type_table; @@ -63,8 +62,7 @@ void parse_processor_info_win(const char* base_ptr, int& _numa_nodes, int& _sockets, int& _cores, - int& _blocked_cores, - int& _processors_with_l3, + int& _thread_cpu_type, std::vector>& _proc_type_table, std::vector>& _cpu_mapping_table) { std::vector list; @@ -84,11 +82,13 @@ void parse_processor_info_win(const char* base_ptr, int group_type = 0; int num_package = 0; + int num_proc_l3_cache = 0; + int num_blocked_cores = 0 _processors = 0; _sockets = 0; _cores = 0; - _blocked_cores = 0; + _thread_cpu_type = ThreadCPUType::THREAD_CPU_NORMAL; PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX info = NULL; @@ -163,7 +163,7 @@ void parse_processor_info_win(const char* base_ptr, proc_info[CPU_MAP_GROUP_ID] = group_id; if (group_id == CPU_BLOCKED) { proc_info[CPU_MAP_USED_FLAG] = CPU_BLOCKED; - _blocked_cores++; + num_blocked_cores++; } else { _proc_type_table[0][group_type]++; } @@ -195,7 +195,7 @@ void parse_processor_info_win(const char* base_ptr, if (_proc_type_table[0][EFFICIENT_CORE_PROC] > 0) { group_id = CPU_BLOCKED; group_type = EFFICIENT_CORE_PROC; - _blocked_cores++; + num_blocked_cores++; } else { group_id = group++; group_type = MAIN_CORE_PROC; @@ -223,13 +223,16 @@ void parse_processor_info_win(const char* base_ptr, } } else if ((info->Relationship == RelationCache) && (info->Cache.Level == 3)) { MaskToList(info->Cache.GroupMask.Mask); - _processors_with_l3 = list_len; + num_proc_l3_cache = list_len; } } _sockets++; - _processors -= _blocked_cores; - _cores -= _blocked_cores; - _proc_type_table[0][ALL_PROC] -= _blocked_cores; + _processors -= num_blocked_cores; + _cores -= num_blocked_cores; + _thread_cpu_type = num_blocked_cores > 0 ? ThreadCPUType::THEAD_CPU_BLOCK_CORE + : (_processors == num_proc_l3_cache && _sockets == 1) ? ThreadCPUType::THEAD_CPU_ONE_L3_CACHE + : ThreadCPUType::THREAD_CPU_NORMAL; + _proc_type_table[0][ALL_PROC] -= num_blocked_cores; if (_sockets > 1) { _proc_type_table.push_back(_proc_type_table[0]); _proc_type_table[0] = proc_init_line; diff --git a/src/inference/src/system_conf.cpp b/src/inference/src/system_conf.cpp index e308e5f5857da4..2f5ef85cd0ccaf 100644 --- a/src/inference/src/system_conf.cpp +++ b/src/inference/src/system_conf.cpp @@ -234,11 +234,8 @@ int get_number_of_logical_cpu_cores(bool) { return parallel_get_max_threads(); } -int get_number_of_blocked_cores() { - return 0; -} -int get_number_of_soc_ecores() { +int get_thread_cpu_type() { return 0; } @@ -293,14 +290,9 @@ int get_number_of_logical_cpu_cores(bool) { return parallel_get_max_threads(); } -int get_number_of_blocked_cores() { - CPU& cpu = cpu_info(); - return cpu._blocked_cores; -} - -int get_number_of_soc_ecores() { +int get_thread_cpu_type() { CPU& cpu = cpu_info(); - return cpu._org_processors - cpu._processors_with_l3; + return cpu._thread_cpu_type; } bool is_cpu_map_available() { @@ -491,14 +483,9 @@ int get_number_of_logical_cpu_cores(bool bigCoresOnly) { return logical_cores; } -int get_number_of_blocked_cores() { - CPU& cpu = cpu_info(); - return cpu._blocked_cores; -} - -int get_number_of_soc_ecores() { +int get_thread_cpu_type() { CPU& cpu = cpu_info(); - return cpu._org_processors - cpu._processors_with_l3; + return cpu._thread_cpu_type; } int get_org_socket_id(int socket_id) { diff --git a/src/inference/tests/unit/cpu_map_parser/parser_windows.cpp b/src/inference/tests/unit/cpu_map_parser/parser_windows.cpp index ff71a5e25df224..9103f54e8a7a92 100644 --- a/src/inference/tests/unit/cpu_map_parser/parser_windows.cpp +++ b/src/inference/tests/unit/cpu_map_parser/parser_windows.cpp @@ -33,8 +33,7 @@ struct WinCpuMapTestCase { int _numa_nodes; int _sockets; int _cores; - int _blocked_cores; - int _processors_with_l3; + int _thread_cpu_type; std::vector> _proc_type_table; std::vector> _cpu_mapping_table; std::string system_info; @@ -58,8 +57,7 @@ class WinCpuMapParserTests : public ov::test::TestsCommon, int test_numa_nodes = 0; int test_sockets = 0; int test_cores = 0; - int test_blocked_cores = 0; - int test_processors_with_l3 = 0; + int test_thread_cpu_type = 0; unsigned long len = (unsigned long)(test_len / 2); std::vector> test_proc_type_table; std::vector> test_cpu_mapping_table; @@ -70,8 +68,7 @@ class WinCpuMapParserTests : public ov::test::TestsCommon, test_numa_nodes, test_sockets, test_cores, - test_blocked_cores, - test_processors_with_l3, + test_thread_cpu_type, test_proc_type_table, test_cpu_mapping_table); @@ -79,8 +76,7 @@ class WinCpuMapParserTests : public ov::test::TestsCommon, ASSERT_EQ(test_data._numa_nodes, test_numa_nodes); ASSERT_EQ(test_data._sockets, test_sockets); ASSERT_EQ(test_data._cores, test_cores); - ASSERT_EQ(test_data._blocked_cores, test_blocked_cores); - ASSERT_EQ(test_data._processors_with_l3, test_processors_with_l3); + ASSERT_EQ(test_data._thread_cpu_type, test_thread_cpu_type); ASSERT_EQ(test_data._proc_type_table, test_proc_type_table); ASSERT_EQ(test_data._cpu_mapping_table, test_cpu_mapping_table); } @@ -91,8 +87,8 @@ WinCpuMapTestCase _2sockets_104cores_hyperthreading = { 2, // param[expected out]: total 2 numa nodes on this simulated platform 2, // param[expected out]: total 2 sockets on this simulated platform 104, // param[expected out]: total 104 CPU cores on this simulated platform - 0, // param[expected out]: total 0 processors on this simulated platform are blocked - 64, // param[expected out]: total 64 processors have L3 cache in one package on this simulated platform + ov::ThreadCPUType::THREAD_CPU_NORMAL, // param[expected out]: normal cpu type for threading scheduling + 64, // param[expected out]: total 64 processors have L3 cache in one package on this simulated platform {{208, 104, 0, 104, -1, -1}, {104, 52, 0, 52, 0, 0}, {104, 52, 0, 52, 1, 1}}, // param[expected out]: The proc_type_table of this simulated platform @@ -707,8 +703,7 @@ WinCpuMapTestCase _2sockets_48cores_hyperthreading = { 2, 2, 48, - 0, - 48, + ov::ThreadCPUType::THREAD_CPU_NORMAL, {{96, 48, 0, 48, -1, -1}, {48, 24, 0, 24, 0, 0}, {48, 24, 0, 24, 1, 1}}, { {0, 0, 0, 0, HYPER_THREADING_PROC, 0, -1}, {1, 0, 0, 0, MAIN_CORE_PROC, 0, -1}, @@ -999,8 +994,7 @@ WinCpuMapTestCase _2sockets_36cores_hyperthreading = { 2, 2, 36, - 0, - 36, + ov::ThreadCPUType::THREAD_CPU_NORMAL, {{72, 36, 0, 36, -1, -1}, {36, 18, 0, 18, 0, 0}, {36, 18, 0, 18, 1, 1}}, { {0, 0, 0, 0, HYPER_THREADING_PROC, 0, -1}, {1, 0, 0, 0, MAIN_CORE_PROC, 0, -1}, @@ -1193,8 +1187,7 @@ WinCpuMapTestCase _2sockets_48cores = { 2, 2, 48, - 0, - 24, + ov::ThreadCPUType::THREAD_CPU_NORMAL, {{48, 48, 0, 0, -1, -1}, {24, 24, 0, 0, 0, 0}, {24, 24, 0, 0, 1, 1}}, { {0, 0, 0, 0, MAIN_CORE_PROC, 0, -1}, {1, 0, 0, 1, MAIN_CORE_PROC, 1, -1}, @@ -1461,8 +1454,7 @@ WinCpuMapTestCase _1sockets_24cores_hyperthreading_set1 = { 1, 1, 24, - 0, - 32, + ov::ThreadCPUType::THREAD_CPU_NORMAL, {{32, 8, 16, 8, 0, 0}}, { {0, 0, 0, 0, HYPER_THREADING_PROC, 0, -1}, {1, 0, 0, 0, MAIN_CORE_PROC, 0, -1}, @@ -1573,8 +1565,7 @@ WinCpuMapTestCase _1sockets_24cores_hyperthreading_set2 = { 1, 1, 24, - 0, - 32, + ov::ThreadCPUType::THREAD_CPU_NORMAL, {{32, 8, 16, 8, 0, 0}}, { {0, 0, 0, 0, HYPER_THREADING_PROC, 0, -1}, {1, 0, 0, 0, MAIN_CORE_PROC, 0, -1}, @@ -1696,8 +1687,7 @@ WinCpuMapTestCase _1sockets_24cores = { 1, 1, 24, - 0, - 24, + ov::ThreadCPUType::THEAD_CPU_ONE_L3_CACHE, {{24, 8, 16, 0, 0, 0}}, { {0, 0, 0, 0, MAIN_CORE_PROC, 0, -1}, {1, 0, 0, 1, MAIN_CORE_PROC, 1, -1}, @@ -1815,8 +1805,7 @@ WinCpuMapTestCase _1sockets_22cores_hyperthreading = { 1, 1, 14, - 2, - 20, + ov::ThreadCPUType::THEAD_CPU_BLOCK_CORE, {{20, 6, 8, 6, 0, 0}}, { {0, 0, 0, 0, HYPER_THREADING_PROC, 0, -1}, {1, 0, 0, 0, MAIN_CORE_PROC, 0, -1}, @@ -1904,8 +1893,7 @@ WinCpuMapTestCase _1sockets_14cores_hyperthreading_set1 = { 1, 1, 14, - 0, - 20, + ov::ThreadCPUType::THREAD_CPU_NORMAL, {{20, 6, 8, 6, 0, 0}}, { {0, 0, 0, 0, HYPER_THREADING_PROC, 0, -1}, {1, 0, 0, 0, MAIN_CORE_PROC, 0, -1}, @@ -1978,8 +1966,7 @@ WinCpuMapTestCase _1sockets_14cores_hyperthreading_set2 = { 1, 1, 14, - 0, - 20, + ov::ThreadCPUType::THREAD_CPU_NORMAL, {{20, 6, 8, 6, 0, 0}}, { {0, 0, 0, 0, HYPER_THREADING_PROC, 0, -1}, {1, 0, 0, 0, MAIN_CORE_PROC, 0, -1}, @@ -2059,8 +2046,7 @@ WinCpuMapTestCase _1sockets_14cores_hyperthreading_set3 = { 1, 1, 14, - 0, - 20, + ov::ThreadCPUType::THREAD_CPU_NORMAL, {{20, 6, 8, 6, 0, 0}}, { {0, 0, 0, 0, HYPER_THREADING_PROC, 0, -1}, {1, 0, 0, 0, MAIN_CORE_PROC, 0, -1}, @@ -2140,8 +2126,7 @@ WinCpuMapTestCase _1sockets_10cores_hyperthreading = { 1, 1, 10, - 0, - 12, + ov::ThreadCPUType::THREAD_CPU_NORMAL, {{12, 2, 8, 2, 0, 0}}, { {0, 0, 0, 0, HYPER_THREADING_PROC, 0, -1}, @@ -2204,8 +2189,7 @@ WinCpuMapTestCase _1sockets_8cores = { 1, 1, 8, - 0, - 4, + ov::ThreadCPUType::THREAD_CPU_NORMAL, {{8, 4, 4, 0, 0, 0}}, { {0, 0, 0, 0, MAIN_CORE_PROC, 0, -1}, @@ -2260,8 +2244,7 @@ WinCpuMapTestCase _1sockets_6cores_hyperthreading_FMT7 = { 1, 1, 6, - 0, - 1, + ov::ThreadCPUType::THREAD_CPU_NORMAL, {{12, 6, 0, 6, 0, 0}}, { {0, 0, 0, 0, HYPER_THREADING_PROC, 0, -1}, @@ -2340,8 +2323,7 @@ WinCpuMapTestCase _1sockets_4cores = { 1, 1, 4, - 0, - 0, + ov::ThreadCPUType::THREAD_CPU_NORMAL, {{4, 4, 0, 0, 0, 0}}, { {0, 0, 0, 0, MAIN_CORE_PROC, 0, -1}, @@ -2373,8 +2355,7 @@ WinCpuMapTestCase _1sockets_4cores_hyperthreading = { 1, 1, 4, - 0, - 8, + ov::ThreadCPUType::THREAD_CPU_NORMAL, {{8, 4, 0, 4, 0, 0}}, { {0, 0, 0, 0, HYPER_THREADING_PROC, 0, -1}, @@ -2413,8 +2394,7 @@ WinCpuMapTestCase _1sockets_4cores_hyperthreading_1_FMT7 = { 1, 1, 4, - 0, - 8, + ov::ThreadCPUType::THREAD_CPU_NORMAL, {{8, 4, 0, 4, 0, 0}}, { {0, 0, 0, 0, HYPER_THREADING_PROC, 0, -1}, @@ -2453,8 +2433,7 @@ WinCpuMapTestCase _1sockets_4cores_hyperthreading_2_FMT7 = { 1, 1, 4, - 0, - 8, + ov::ThreadCPUType::THREAD_CPU_NORMAL, {{8, 4, 0, 4, 0, 0}}, { {0, 0, 0, 0, HYPER_THREADING_PROC, 0, -1}, @@ -2493,8 +2472,7 @@ WinCpuMapTestCase _1sockets_2cores_hyperthreading_FMT7 = { 1, 1, 2, - 0, - 4, + ov::ThreadCPUType::THREAD_CPU_NORMAL, {{4, 2, 0, 2, 0, 0}}, { {0, 0, 0, 0, HYPER_THREADING_PROC, 0, -1}, diff --git a/src/plugins/intel_cpu/src/cpu_streams_calculation.cpp b/src/plugins/intel_cpu/src/cpu_streams_calculation.cpp index 259a0201a8deef..54e6d271e0a5ef 100644 --- a/src/plugins/intel_cpu/src/cpu_streams_calculation.cpp +++ b/src/plugins/intel_cpu/src/cpu_streams_calculation.cpp @@ -635,13 +635,14 @@ int get_model_prefer_threads(const int num_streams, const int int8_threshold = 4; // ~relative efficiency of the VNNI-intensive code for Big vs Little cores; const int fp32_threshold = 2; // ~relative efficiency of the AVX2 fp32 code for Big vs Little cores; // By default the latency case uses (faster) Big cores only, depending on the compute ratio - // But on MTL detected by ov::get_number_of_blocked_cores(), use Big and Little cores together in Big + // But on MTL or ARL detected by ov::get_thread_cpu_type(), use Big and Little cores together in Big // cores only cases except LLM. + auto thread_cpu_type = ov::get_thread_cpu_type(); model_prefer = proc_type_table[0][MAIN_CORE_PROC] > (proc_type_table[0][EFFICIENT_CORE_PROC] / (int8_intensive ? int8_threshold : fp32_threshold)) - ? ((!llm_related && - ((ov::get_number_of_blocked_cores() != 0) || - ((ov::get_number_of_soc_ecores() == 0) && (isa == dnnl::cpu_isa::avx2_vnni_2)))) + ? ((!llm_related && ((thread_cpu_type == ThreadCPUType::THEAD_CPU_BLOCK_CORE) || + ((thread_cpu_type == ThreadCPUType::THEAD_CPU_ONE_L3_CACHE) && + (isa == dnnl::cpu_isa::avx2_vnni_2)))) ? proc_type_table[0][MAIN_CORE_PROC] + proc_type_table[0][EFFICIENT_CORE_PROC] : proc_type_table[0][MAIN_CORE_PROC]) : proc_type_table[0][MAIN_CORE_PROC] + proc_type_table[0][EFFICIENT_CORE_PROC]; From c7541622def6ae403af86580be5d6f00d38377cb Mon Sep 17 00:00:00 2001 From: "Shen, Wanglei" Date: Thu, 31 Oct 2024 23:40:59 +0800 Subject: [PATCH 7/8] update test data --- src/inference/src/os/win/win_system_conf.cpp | 3 +-- .../unit/cpu_map_parser/parser_windows.cpp | 21 +++++++++---------- 2 files changed, 11 insertions(+), 13 deletions(-) diff --git a/src/inference/src/os/win/win_system_conf.cpp b/src/inference/src/os/win/win_system_conf.cpp index f70d6008611d8c..a719c7d4a2c451 100644 --- a/src/inference/src/os/win/win_system_conf.cpp +++ b/src/inference/src/os/win/win_system_conf.cpp @@ -39,7 +39,6 @@ CPU::CPU() { _proc_type_table, _cpu_mapping_table); _org_proc_type_table = _proc_type_table; - _org_processors = _processors; // ensure that get_org_numa_id and get_org_socket_id can return the correct value for (size_t i = 0; i < _cpu_mapping_table.size(); i++) { @@ -83,7 +82,7 @@ void parse_processor_info_win(const char* base_ptr, int num_package = 0; int num_proc_l3_cache = 0; - int num_blocked_cores = 0 + int num_blocked_cores = 0; _processors = 0; _sockets = 0; diff --git a/src/inference/tests/unit/cpu_map_parser/parser_windows.cpp b/src/inference/tests/unit/cpu_map_parser/parser_windows.cpp index 9103f54e8a7a92..dc985aad6f325e 100644 --- a/src/inference/tests/unit/cpu_map_parser/parser_windows.cpp +++ b/src/inference/tests/unit/cpu_map_parser/parser_windows.cpp @@ -88,7 +88,6 @@ WinCpuMapTestCase _2sockets_104cores_hyperthreading = { 2, // param[expected out]: total 2 sockets on this simulated platform 104, // param[expected out]: total 104 CPU cores on this simulated platform ov::ThreadCPUType::THREAD_CPU_NORMAL, // param[expected out]: normal cpu type for threading scheduling - 64, // param[expected out]: total 64 processors have L3 cache in one package on this simulated platform {{208, 104, 0, 104, -1, -1}, {104, 52, 0, 52, 0, 0}, {104, 52, 0, 52, 1, 1}}, // param[expected out]: The proc_type_table of this simulated platform @@ -1454,7 +1453,7 @@ WinCpuMapTestCase _1sockets_24cores_hyperthreading_set1 = { 1, 1, 24, - ov::ThreadCPUType::THREAD_CPU_NORMAL, + ov::ThreadCPUType::THEAD_CPU_ONE_L3_CACHE, {{32, 8, 16, 8, 0, 0}}, { {0, 0, 0, 0, HYPER_THREADING_PROC, 0, -1}, {1, 0, 0, 0, MAIN_CORE_PROC, 0, -1}, @@ -1565,7 +1564,7 @@ WinCpuMapTestCase _1sockets_24cores_hyperthreading_set2 = { 1, 1, 24, - ov::ThreadCPUType::THREAD_CPU_NORMAL, + ov::ThreadCPUType::THEAD_CPU_ONE_L3_CACHE, {{32, 8, 16, 8, 0, 0}}, { {0, 0, 0, 0, HYPER_THREADING_PROC, 0, -1}, {1, 0, 0, 0, MAIN_CORE_PROC, 0, -1}, @@ -1893,7 +1892,7 @@ WinCpuMapTestCase _1sockets_14cores_hyperthreading_set1 = { 1, 1, 14, - ov::ThreadCPUType::THREAD_CPU_NORMAL, + ov::ThreadCPUType::THEAD_CPU_ONE_L3_CACHE, {{20, 6, 8, 6, 0, 0}}, { {0, 0, 0, 0, HYPER_THREADING_PROC, 0, -1}, {1, 0, 0, 0, MAIN_CORE_PROC, 0, -1}, @@ -1966,7 +1965,7 @@ WinCpuMapTestCase _1sockets_14cores_hyperthreading_set2 = { 1, 1, 14, - ov::ThreadCPUType::THREAD_CPU_NORMAL, + ov::ThreadCPUType::THEAD_CPU_ONE_L3_CACHE, {{20, 6, 8, 6, 0, 0}}, { {0, 0, 0, 0, HYPER_THREADING_PROC, 0, -1}, {1, 0, 0, 0, MAIN_CORE_PROC, 0, -1}, @@ -2046,7 +2045,7 @@ WinCpuMapTestCase _1sockets_14cores_hyperthreading_set3 = { 1, 1, 14, - ov::ThreadCPUType::THREAD_CPU_NORMAL, + ov::ThreadCPUType::THEAD_CPU_ONE_L3_CACHE, {{20, 6, 8, 6, 0, 0}}, { {0, 0, 0, 0, HYPER_THREADING_PROC, 0, -1}, {1, 0, 0, 0, MAIN_CORE_PROC, 0, -1}, @@ -2126,7 +2125,7 @@ WinCpuMapTestCase _1sockets_10cores_hyperthreading = { 1, 1, 10, - ov::ThreadCPUType::THREAD_CPU_NORMAL, + ov::ThreadCPUType::THEAD_CPU_ONE_L3_CACHE, {{12, 2, 8, 2, 0, 0}}, { {0, 0, 0, 0, HYPER_THREADING_PROC, 0, -1}, @@ -2355,7 +2354,7 @@ WinCpuMapTestCase _1sockets_4cores_hyperthreading = { 1, 1, 4, - ov::ThreadCPUType::THREAD_CPU_NORMAL, + ov::ThreadCPUType::THEAD_CPU_ONE_L3_CACHE, {{8, 4, 0, 4, 0, 0}}, { {0, 0, 0, 0, HYPER_THREADING_PROC, 0, -1}, @@ -2394,7 +2393,7 @@ WinCpuMapTestCase _1sockets_4cores_hyperthreading_1_FMT7 = { 1, 1, 4, - ov::ThreadCPUType::THREAD_CPU_NORMAL, + ov::ThreadCPUType::THEAD_CPU_ONE_L3_CACHE, {{8, 4, 0, 4, 0, 0}}, { {0, 0, 0, 0, HYPER_THREADING_PROC, 0, -1}, @@ -2433,7 +2432,7 @@ WinCpuMapTestCase _1sockets_4cores_hyperthreading_2_FMT7 = { 1, 1, 4, - ov::ThreadCPUType::THREAD_CPU_NORMAL, + ov::ThreadCPUType::THEAD_CPU_ONE_L3_CACHE, {{8, 4, 0, 4, 0, 0}}, { {0, 0, 0, 0, HYPER_THREADING_PROC, 0, -1}, @@ -2472,7 +2471,7 @@ WinCpuMapTestCase _1sockets_2cores_hyperthreading_FMT7 = { 1, 1, 2, - ov::ThreadCPUType::THREAD_CPU_NORMAL, + ov::ThreadCPUType::THEAD_CPU_ONE_L3_CACHE, {{4, 2, 0, 2, 0, 0}}, { {0, 0, 0, 0, HYPER_THREADING_PROC, 0, -1}, From 658d198a9ece98458492551a8a187c3d36d869a8 Mon Sep 17 00:00:00 2001 From: "Shen, Wanglei" Date: Fri, 1 Nov 2024 00:11:38 +0800 Subject: [PATCH 8/8] fix code style issue --- src/inference/src/system_conf.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/inference/src/system_conf.cpp b/src/inference/src/system_conf.cpp index 2f5ef85cd0ccaf..bcc3bf6dd4afb8 100644 --- a/src/inference/src/system_conf.cpp +++ b/src/inference/src/system_conf.cpp @@ -234,7 +234,6 @@ int get_number_of_logical_cpu_cores(bool) { return parallel_get_max_threads(); } - int get_thread_cpu_type() { return 0; }