From 74c85528d4362a3034f32148a2f167efbf54ab3f Mon Sep 17 00:00:00 2001 From: nihui Date: Sun, 23 Oct 2022 21:18:02 +0800 Subject: [PATCH 01/11] get_physical_cpu_count api family --- src/cpu.cpp | 110 ++++++++++++++++++++++++++++++++++++++++++++++++++++ src/cpu.h | 4 ++ 2 files changed, 114 insertions(+) diff --git a/src/cpu.cpp b/src/cpu.cpp index c84c6491157..ba3bf94dd4c 100644 --- a/src/cpu.cpp +++ b/src/cpu.cpp @@ -42,6 +42,11 @@ #include #endif +#if defined _WIN32 && !(defined __MINGW32__) +#define WIN32_LEAN_AND_MEAN +#include +#endif + #if defined __ANDROID__ || defined __linux__ #if defined __ANDROID__ #if __ANDROID_API__ >= 18 @@ -1164,6 +1169,111 @@ int get_big_cpu_count() return big_cpu_count ? big_cpu_count : g_cpucount; } +#if defined __ANDROID__ || defined __linux__ +static int get_thread_siblings(int cpuid) +{ + char path[256]; + sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/thread_siblings", cpuid); + + FILE* fp = fopen(path, "rb"); + if (!fp) + return -1; + + int thread_siblings = -1; + int nscan = fscanf(fp, "%x", &thread_siblings); + if (nscan != 1) + { + // ignore + } + + fclose(fp); + + return thread_siblings; +} +#endif // defined __ANDROID__ || defined __linux__ + +static int get_physical_cpucount() +{ + int count = 0; +#if (defined _WIN32 && !(defined __MINGW32__)) + typedef BOOL (WINAPI *LPFN_GLPI)(PSYSTEM_LOGICAL_PROCESSOR_INFORMATION, PDWORD); + LPFN_GLPI glpi = (LPFN_GLPI) GetProcAddress(GetModuleHandle(TEXT("kernel32")), "GetLogicalProcessorInformation"); + if (glpi == NULL) + { + NCNN_LOGE("GetLogicalProcessorInformation is not supported"); + return g_cpucount; + } + + DWORD return_length = 0; + glpi(NULL, &return_length); + + PSYSTEM_LOGICAL_PROCESSOR_INFORMATION buffer = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION)malloc(return_length); + glpi(buffer, &return_length); + + PSYSTEM_LOGICAL_PROCESSOR_INFORMATION ptr = buffer; + DWORD byte_offset = 0; + while (byte_offset + sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION) <= return_length) + { + if (ptr->Relationship == RelationProcessorCore) + { + count++; + } + + byte_offset += sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION); + ptr++; + } + + free(buffer); +#elif defined __ANDROID__ || defined __linux__ + std::vector thread_set; + for (int i = 0; i < g_cpucount; i++) + { + int thread_siblings = get_thread_siblings(i); + if (thread_siblings == -1) + { + // ignore malformed one + continue; + } + + for (size_t j = 0; j < thread_set.size(); j++) + { + if (thread_set[j] == thread_siblings) + continue; + + thread_set.push_back(thread_siblings); + count++; + } + } +#elif __APPLE__ + size_t len = sizeof(count); + sysctlbyname("hw.physicalcpu_max", &count, &len, NULL, 0); +#else + count = g_cpucount; +#endif + + if (count > g_cpucount) + count = g_cpucount; + + return count; +} + +static int g_physical_cpucount = get_physical_cpucount(); + +int get_physical_cpu_count() +{ + return g_physical_cpucount; +} + +int get_physical_little_cpu_count() +{ + return g_physical_cpucount * 2 - g_cpucount; +} + +int get_physical_big_cpu_count() +{ + return g_cpucount - g_physical_cpucount; +} + #if defined __ANDROID__ || defined __linux__ static int get_max_freq_khz(int cpuid) { diff --git a/src/cpu.h b/src/cpu.h index 5a94106ef47..cdc8b229b72 100644 --- a/src/cpu.h +++ b/src/cpu.h @@ -110,6 +110,10 @@ NCNN_EXPORT int get_cpu_count(); NCNN_EXPORT int get_little_cpu_count(); NCNN_EXPORT int get_big_cpu_count(); +NCNN_EXPORT int get_physical_cpu_count(); +NCNN_EXPORT int get_physical_little_cpu_count(); +NCNN_EXPORT int get_physical_big_cpu_count(); + // bind all threads on little clusters if powersave enabled // affects HMP arch cpu like ARM big.LITTLE // only implemented on android at the moment From a73032e7f86f5c1c63ff0b4aa56d398984946bb1 Mon Sep 17 00:00:00 2001 From: nihui Date: Sun, 23 Oct 2022 13:19:29 +0000 Subject: [PATCH 02/11] apply code-format changes --- src/cpu.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/cpu.cpp b/src/cpu.cpp index ba3bf94dd4c..7971bd3a3d3 100644 --- a/src/cpu.cpp +++ b/src/cpu.cpp @@ -1196,8 +1196,8 @@ static int get_physical_cpucount() { int count = 0; #if (defined _WIN32 && !(defined __MINGW32__)) - typedef BOOL (WINAPI *LPFN_GLPI)(PSYSTEM_LOGICAL_PROCESSOR_INFORMATION, PDWORD); - LPFN_GLPI glpi = (LPFN_GLPI) GetProcAddress(GetModuleHandle(TEXT("kernel32")), "GetLogicalProcessorInformation"); + typedef BOOL(WINAPI * LPFN_GLPI)(PSYSTEM_LOGICAL_PROCESSOR_INFORMATION, PDWORD); + LPFN_GLPI glpi = (LPFN_GLPI)GetProcAddress(GetModuleHandle(TEXT("kernel32")), "GetLogicalProcessorInformation"); if (glpi == NULL) { NCNN_LOGE("GetLogicalProcessorInformation is not supported"); From 32a65758c1704eda53db8563ace35ab7e155a079 Mon Sep 17 00:00:00 2001 From: nihuini Date: Mon, 24 Oct 2022 11:09:13 +0800 Subject: [PATCH 03/11] fix --- src/cpu.cpp | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/cpu.cpp b/src/cpu.cpp index 7971bd3a3d3..6c3fc3c070d 100644 --- a/src/cpu.cpp +++ b/src/cpu.cpp @@ -1235,11 +1235,18 @@ static int get_physical_cpucount() continue; } + bool thread_siblings_exists = false; for (size_t j = 0; j < thread_set.size(); j++) { if (thread_set[j] == thread_siblings) - continue; + { + thread_siblings_exists = true; + break; + } + } + if (!thread_siblings_exists) + { thread_set.push_back(thread_siblings); count++; } From 8cfdbf059bf8573f089e8d4566eb298cda2be2e8 Mon Sep 17 00:00:00 2001 From: nihuini Date: Mon, 24 Oct 2022 14:08:02 +0800 Subject: [PATCH 04/11] fix up system --- src/cpu.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/cpu.cpp b/src/cpu.cpp index 6c3fc3c070d..b9ba9812aa1 100644 --- a/src/cpu.cpp +++ b/src/cpu.cpp @@ -1273,11 +1273,17 @@ int get_physical_cpu_count() int get_physical_little_cpu_count() { + if (g_physical_cpucount == g_cpucount) + return get_little_cpu_count(); + return g_physical_cpucount * 2 - g_cpucount; } int get_physical_big_cpu_count() { + if (g_physical_cpucount == g_cpucount) + return get_big_cpu_count(); + return g_cpucount - g_physical_cpucount; } From bd868e7aab96e3bdecbb062c97565a6ea0c6da17 Mon Sep 17 00:00:00 2001 From: nihuini Date: Mon, 24 Oct 2022 14:18:01 +0800 Subject: [PATCH 05/11] set default to physical big cpu --- benchmark/benchncnn.cpp | 4 ++-- src/option.cpp | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/benchmark/benchncnn.cpp b/benchmark/benchncnn.cpp index 1d4ae1d755e..714dca3180f 100644 --- a/benchmark/benchncnn.cpp +++ b/benchmark/benchncnn.cpp @@ -161,8 +161,8 @@ void benchmark(const char* comment, const ncnn::Mat& _in, const ncnn::Option& op int main(int argc, char** argv) { int loop_count = 4; - int num_threads = ncnn::get_cpu_count(); - int powersave = 0; + int num_threads = ncnn::get_physical_big_cpu_count(); + int powersave = 2; int gpu_device = -1; int cooling_down = 1; diff --git a/src/option.cpp b/src/option.cpp index 4aabfdde5ed..80d4455307e 100644 --- a/src/option.cpp +++ b/src/option.cpp @@ -21,7 +21,7 @@ namespace ncnn { Option::Option() { lightmode = true; - num_threads = get_big_cpu_count(); + num_threads = get_physical_big_cpu_count(); blob_allocator = 0; workspace_allocator = 0; From d9b76919e113a77e0b5591c09df08958195848e3 Mon Sep 17 00:00:00 2001 From: nihuini Date: Thu, 27 Oct 2022 17:22:12 +0800 Subject: [PATCH 06/11] always treat smt core as big core --- src/cpu.cpp | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/src/cpu.cpp b/src/cpu.cpp index b9ba9812aa1..75221822216 100644 --- a/src/cpu.cpp +++ b/src/cpu.cpp @@ -1362,6 +1362,27 @@ static int get_max_freq_khz(int cpuid) return max_freq_khz; } +static int get_thread_siblings_list_count(int cpuid) +{ + char path[256]; + sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list", cpuid); + + FILE* fp = fopen(path, "rb"); + if (!fp) + return 1; + + int count = 1; + while (!feof(fp)) + { + if (fgetc(fp) == ',') + count++; + } + + fclose(fp); + + return count; +} + static int set_sched_affinity(const CpuSet& thread_affinity_mask) { // set affinity for thread @@ -1480,6 +1501,13 @@ static int setup_thread_affinity_masks() for (int i = 0; i < g_cpucount; i++) { + if (get_thread_siblings_list_count(i) > 1) + { + // always treat smt core as big core + g_thread_affinity_mask_big.enable(i); + continue; + } + if (cpu_max_freq_khz[i] < max_freq_khz_medium) g_thread_affinity_mask_little.enable(i); else From 59baf2239dea107f140286030672c8799ca43c8f Mon Sep 17 00:00:00 2001 From: nihuini Date: Fri, 28 Oct 2022 11:09:45 +0800 Subject: [PATCH 07/11] is_smt_cpu --- src/cpu.cpp | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/src/cpu.cpp b/src/cpu.cpp index 75221822216..4ce9edcd532 100644 --- a/src/cpu.cpp +++ b/src/cpu.cpp @@ -1362,25 +1362,37 @@ static int get_max_freq_khz(int cpuid) return max_freq_khz; } -static int get_thread_siblings_list_count(int cpuid) +static bool is_smt_cpu(int cpuid) { + // https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/stable/sysfs-devices-system-cpu#L68-72 char path[256]; - sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list", cpuid); + sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/core_cpus_list", cpuid); FILE* fp = fopen(path, "rb"); + if (!fp) - return 1; + { + sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list", cpuid); + fp = fopen(path, "rb"); + + if (!fp) + return false; + } - int count = 1; + bool is_smt = false; while (!feof(fp)) { - if (fgetc(fp) == ',') - count++; + char ch = fgetc(fp); + if (ch == ',' || ch == '-') + { + is_smt = true; + break; + } } fclose(fp); - return count; + return is_smt; } static int set_sched_affinity(const CpuSet& thread_affinity_mask) @@ -1501,7 +1513,7 @@ static int setup_thread_affinity_masks() for (int i = 0; i < g_cpucount; i++) { - if (get_thread_siblings_list_count(i) > 1) + if (is_smt_cpu(i)) { // always treat smt core as big core g_thread_affinity_mask_big.enable(i); From aa1f4fc387eaacfee4d4c13e835584ed2da8cea8 Mon Sep 17 00:00:00 2001 From: nihui Date: Sun, 30 Oct 2022 21:36:08 +0800 Subject: [PATCH 08/11] get max freq mhz on windows --- src/cpu.cpp | 142 +++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 141 insertions(+), 1 deletion(-) diff --git a/src/cpu.cpp b/src/cpu.cpp index 4ce9edcd532..8533a4d0bfd 100644 --- a/src/cpu.cpp +++ b/src/cpu.cpp @@ -45,6 +45,7 @@ #if defined _WIN32 && !(defined __MINGW32__) #define WIN32_LEAN_AND_MEAN #include +#include #endif #if defined __ANDROID__ || defined __linux__ @@ -1114,6 +1115,10 @@ static int get_cpucount() count = emscripten_num_logical_cores(); else count = 1; +#elif (defined _WIN32 && !(defined __MINGW32__)) + SYSTEM_INFO system_info; + GetSystemInfo(&system_info); + count = system_info.dwNumberOfProcessors; #elif defined __ANDROID__ || defined __linux__ // get cpu count from /proc/cpuinfo FILE* fp = fopen("/proc/cpuinfo", "rb"); @@ -1287,6 +1292,100 @@ int get_physical_big_cpu_count() return g_cpucount - g_physical_cpucount; } +#if (defined _WIN32 && !(defined __MINGW32__)) +static int count_set_bits(ULONG_PTR bitMask) +{ + DWORD LSHIFT = sizeof(ULONG_PTR) * 8 - 1; + int bitSetCount = 0; + ULONG_PTR bitTest = (ULONG_PTR)1 << LSHIFT; + DWORD i; + + for (i = 0; i <= LSHIFT; ++i) + { + bitSetCount += ((bitMask & bitTest) ? 1 : 0); + bitTest /= 2; + } + + return bitSetCount; +} + +static ULONG_PTR get_smt_cpu_mask() +{ + ULONG_PTR smt_cpu_mask = 0; + + typedef BOOL(WINAPI * LPFN_GLPI)(PSYSTEM_LOGICAL_PROCESSOR_INFORMATION, PDWORD); + LPFN_GLPI glpi = (LPFN_GLPI)GetProcAddress(GetModuleHandle(TEXT("kernel32")), "GetLogicalProcessorInformation"); + if (glpi == NULL) + { + NCNN_LOGE("GetLogicalProcessorInformation is not supported"); + return 0; + } + + DWORD return_length = 0; + glpi(NULL, &return_length); + + PSYSTEM_LOGICAL_PROCESSOR_INFORMATION buffer = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION)malloc(return_length); + glpi(buffer, &return_length); + + PSYSTEM_LOGICAL_PROCESSOR_INFORMATION ptr = buffer; + DWORD byte_offset = 0; + while (byte_offset + sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION) <= return_length) + { + if (ptr->Relationship == RelationProcessorCore) + { + int smt_count = count_set_bits(ptr->ProcessorMask); + if (smt_count > 1) + { + // this core is smt + smt_cpu_mask |= ptr->ProcessorMask; + } + } + + byte_offset += sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION); + ptr++; + } + + free(buffer); + + return smt_cpu_mask; +} + +static std::vector get_max_freq_mhz() +{ + typedef struct _PROCESSOR_POWER_INFORMATION { + ULONG Number; + ULONG MaxMhz; + ULONG CurrentMhz; + ULONG MhzLimit; + ULONG MaxIdleState; + ULONG CurrentIdleState; + } PROCESSOR_POWER_INFORMATION, * PPROCESSOR_POWER_INFORMATION; + + typedef LONG(WINAPI * LPFN_CNPI)(POWER_INFORMATION_LEVEL, PVOID, ULONG, PVOID, ULONG); + LPFN_CNPI cnpi = (LPFN_CNPI)GetProcAddress(GetModuleHandle(TEXT("powrprof")), "CallNtPowerInformation"); + if (cnpi == NULL) + { + NCNN_LOGE("CallNtPowerInformation is not supported"); + return std::vector(g_cpucount, 0); + } + + DWORD return_length = sizeof(PROCESSOR_POWER_INFORMATION) * g_cpucount; + PPROCESSOR_POWER_INFORMATION buffer = (PPROCESSOR_POWER_INFORMATION)malloc(return_length); + + cnpi(ProcessorInformation, NULL, 0, buffer, return_length); + + std::vector ret; + for (int i = 0; i < g_cpucount; i++) + { + ULONG max_mhz = buffer[i].MaxMhz; + ret.push_back(max_mhz); + } + + free(buffer); + return ret; +} +#endif // (defined _WIN32 && !(defined __MINGW32__)) + #if defined __ANDROID__ || defined __linux__ static int get_max_freq_khz(int cpuid) { @@ -1485,7 +1584,48 @@ static int setup_thread_affinity_masks() { g_thread_affinity_mask_all.disable_all(); -#if defined __ANDROID__ || defined __linux__ +#if (defined _WIN32 && !(defined __MINGW32__)) + // get max freq mhz for all cores + int max_freq_mhz_min = INT_MAX; + int max_freq_mhz_max = 0; + std::vector all_max_freq_mhz = get_max_freq_mhz(); + for (int i = 0; i < g_cpucount; i++) + { + int max_freq_mhz = all_max_freq_mhz[i]; + + // NCNN_LOGE("%d max freq = %d khz", i, max_freq_mhz); + + if (max_freq_mhz > max_freq_mhz_max) + max_freq_mhz_max = max_freq_mhz; + if (max_freq_mhz < max_freq_mhz_min) + max_freq_mhz_min = max_freq_mhz; + } + + int max_freq_mhz_medium = (max_freq_mhz_min + max_freq_mhz_max) / 2; + if (max_freq_mhz_medium == max_freq_mhz_max) + { + g_thread_affinity_mask_little.disable_all(); + g_thread_affinity_mask_big = g_thread_affinity_mask_all; + return 0; + } + + ULONG_PTR smt_cpu_mask = get_smt_cpu_mask(); + + for (int i = 0; i < g_cpucount; i++) + { + if (smt_cpu_mask & (1 << i)) + { + // always treat smt core as big core + g_thread_affinity_mask_big.enable(i); + continue; + } + + if (cpu_max_freq_mhz[i] < max_freq_mhz_medium) + g_thread_affinity_mask_little.enable(i); + else + g_thread_affinity_mask_big.enable(i); + } +#elif defined __ANDROID__ || defined __linux__ int max_freq_khz_min = INT_MAX; int max_freq_khz_max = 0; std::vector cpu_max_freq_khz(g_cpucount); From 0830f763d9b500716065861ee6ee8969c18e2662 Mon Sep 17 00:00:00 2001 From: nihui Date: Sun, 30 Oct 2022 13:37:46 +0000 Subject: [PATCH 09/11] apply code-format changes --- src/cpu.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/cpu.cpp b/src/cpu.cpp index 8533a4d0bfd..45e0a06cc0c 100644 --- a/src/cpu.cpp +++ b/src/cpu.cpp @@ -1352,14 +1352,15 @@ static ULONG_PTR get_smt_cpu_mask() static std::vector get_max_freq_mhz() { - typedef struct _PROCESSOR_POWER_INFORMATION { + typedef struct _PROCESSOR_POWER_INFORMATION + { ULONG Number; ULONG MaxMhz; ULONG CurrentMhz; ULONG MhzLimit; ULONG MaxIdleState; ULONG CurrentIdleState; - } PROCESSOR_POWER_INFORMATION, * PPROCESSOR_POWER_INFORMATION; + } PROCESSOR_POWER_INFORMATION, *PPROCESSOR_POWER_INFORMATION; typedef LONG(WINAPI * LPFN_CNPI)(POWER_INFORMATION_LEVEL, PVOID, ULONG, PVOID, ULONG); LPFN_CNPI cnpi = (LPFN_CNPI)GetProcAddress(GetModuleHandle(TEXT("powrprof")), "CallNtPowerInformation"); From b9815096749f13c6af09914a119023ca7826cd15 Mon Sep 17 00:00:00 2001 From: nihui Date: Sun, 30 Oct 2022 22:02:56 +0800 Subject: [PATCH 10/11] windows thread affinity --- src/cpu.cpp | 92 +++++++++++++++++++++++++++++++++++++---------------- src/cpu.h | 7 ++++ 2 files changed, 72 insertions(+), 27 deletions(-) diff --git a/src/cpu.cpp b/src/cpu.cpp index 45e0a06cc0c..96a02ebe957 100644 --- a/src/cpu.cpp +++ b/src/cpu.cpp @@ -302,7 +302,44 @@ static int g_hw_optional_arm_FEAT_BF16 = get_hw_capability("hw.optional.arm.FEAT static int g_hw_optional_arm_FEAT_I8MM = get_hw_capability("hw.optional.arm.FEAT_I8MM"); #endif // __APPLE__ -#if defined __ANDROID__ || defined __linux__ +#if (defined _WIN32 && !(defined __MINGW32__)) +CpuSet::CpuSet() +{ + disable_all(); +} + +void CpuSet::enable(int cpu) +{ + mask |= (1 << cpu); +} + +void CpuSet::disable(int cpu) +{ + mask &= ~(1 << cpu); +} + +void CpuSet::disable_all() +{ + mask = 0; +} + +bool CpuSet::is_enabled(int cpu) const +{ + return mask & (1 << cpu); +} + +int CpuSet::num_enabled() const +{ + int num_enabled = 0; + for (int i = 0; i < (int)sizeof(mask) * 8; i++) + { + if (is_enabled(i)) + num_enabled++; + } + + return num_enabled; +} +#elif defined __ANDROID__ || defined __linux__ CpuSet::CpuSet() { disable_all(); @@ -1293,32 +1330,16 @@ int get_physical_big_cpu_count() } #if (defined _WIN32 && !(defined __MINGW32__)) -static int count_set_bits(ULONG_PTR bitMask) -{ - DWORD LSHIFT = sizeof(ULONG_PTR) * 8 - 1; - int bitSetCount = 0; - ULONG_PTR bitTest = (ULONG_PTR)1 << LSHIFT; - DWORD i; - - for (i = 0; i <= LSHIFT; ++i) - { - bitSetCount += ((bitMask & bitTest) ? 1 : 0); - bitTest /= 2; - } - - return bitSetCount; -} - -static ULONG_PTR get_smt_cpu_mask() +static CpuSet get_smt_cpu_mask() { - ULONG_PTR smt_cpu_mask = 0; + CpuSet smt_cpu_mask; typedef BOOL(WINAPI * LPFN_GLPI)(PSYSTEM_LOGICAL_PROCESSOR_INFORMATION, PDWORD); LPFN_GLPI glpi = (LPFN_GLPI)GetProcAddress(GetModuleHandle(TEXT("kernel32")), "GetLogicalProcessorInformation"); if (glpi == NULL) { NCNN_LOGE("GetLogicalProcessorInformation is not supported"); - return 0; + return smt_cpu_mask; } DWORD return_length = 0; @@ -1333,11 +1354,12 @@ static ULONG_PTR get_smt_cpu_mask() { if (ptr->Relationship == RelationProcessorCore) { - int smt_count = count_set_bits(ptr->ProcessorMask); - if (smt_count > 1) + CpuSet smt_set; + smt_set.mask = ptr->ProcessorMask; + if (smt_set.num_enabled() > 1) { // this core is smt - smt_cpu_mask |= ptr->ProcessorMask; + smt_cpu_mask.mask |= smt_set.mask; } } @@ -1362,11 +1384,14 @@ static std::vector get_max_freq_mhz() ULONG CurrentIdleState; } PROCESSOR_POWER_INFORMATION, *PPROCESSOR_POWER_INFORMATION; + HMODULE powrprof = LoadLibrary(TEXT("powrprof.dll")); + typedef LONG(WINAPI * LPFN_CNPI)(POWER_INFORMATION_LEVEL, PVOID, ULONG, PVOID, ULONG); - LPFN_CNPI cnpi = (LPFN_CNPI)GetProcAddress(GetModuleHandle(TEXT("powrprof")), "CallNtPowerInformation"); + LPFN_CNPI cnpi = (LPFN_CNPI)GetProcAddress(powrprof, "CallNtPowerInformation"); if (cnpi == NULL) { NCNN_LOGE("CallNtPowerInformation is not supported"); + FreeLibrary(powrprof); return std::vector(g_cpucount, 0); } @@ -1383,8 +1408,21 @@ static std::vector get_max_freq_mhz() } free(buffer); + FreeLibrary(powrprof); return ret; } + +static int set_sched_affinity(const CpuSet& thread_affinity_mask) +{ + DWORD_PTR prev_mask = SetThreadAffinityMask(GetCurrentThread(), thread_affinity_mask.mask); + if (prev_mask == 0) + { + NCNN_LOGE("SetThreadAffinityMask failed %d", GetLastError()); + return -1; + } + + return 0; +} #endif // (defined _WIN32 && !(defined __MINGW32__)) #if defined __ANDROID__ || defined __linux__ @@ -1610,11 +1648,11 @@ static int setup_thread_affinity_masks() return 0; } - ULONG_PTR smt_cpu_mask = get_smt_cpu_mask(); + CpuSet smt_cpu_mask = get_smt_cpu_mask(); for (int i = 0; i < g_cpucount; i++) { - if (smt_cpu_mask & (1 << i)) + if (smt_cpu_mask.is_enabled(i)) { // always treat smt core as big core g_thread_affinity_mask_big.enable(i); @@ -1781,7 +1819,7 @@ const CpuSet& get_cpu_thread_affinity_mask(int powersave) int set_cpu_thread_affinity(const CpuSet& thread_affinity_mask) { -#if defined __ANDROID__ || defined __linux__ +#if defined __ANDROID__ || defined __linux__ || (defined _WIN32 && !(defined __MINGW32__)) int num_threads = thread_affinity_mask.num_enabled(); #ifdef _OPENMP diff --git a/src/cpu.h b/src/cpu.h index cdc8b229b72..c38061df0b1 100644 --- a/src/cpu.h +++ b/src/cpu.h @@ -17,6 +17,10 @@ #include +#if (defined _WIN32 && !(defined __MINGW32__)) +#define WIN32_LEAN_AND_MEAN +#include +#endif #if defined __ANDROID__ || defined __linux__ #include // cpu_set_t #endif @@ -36,6 +40,9 @@ class NCNN_EXPORT CpuSet int num_enabled() const; public: +#if (defined _WIN32 && !(defined __MINGW32__)) + ULONG_PTR mask; +#endif #if defined __ANDROID__ || defined __linux__ cpu_set_t cpu_set; #endif From 73a8140de390b19169eaa2083d8e740279007c1c Mon Sep 17 00:00:00 2001 From: nihui Date: Sun, 30 Oct 2022 22:10:30 +0800 Subject: [PATCH 11/11] fix build --- src/cpu.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/cpu.cpp b/src/cpu.cpp index 96a02ebe957..eb012c31587 100644 --- a/src/cpu.cpp +++ b/src/cpu.cpp @@ -1627,10 +1627,10 @@ static int setup_thread_affinity_masks() // get max freq mhz for all cores int max_freq_mhz_min = INT_MAX; int max_freq_mhz_max = 0; - std::vector all_max_freq_mhz = get_max_freq_mhz(); + std::vector cpu_max_freq_mhz = get_max_freq_mhz(); for (int i = 0; i < g_cpucount; i++) { - int max_freq_mhz = all_max_freq_mhz[i]; + int max_freq_mhz = cpu_max_freq_mhz[i]; // NCNN_LOGE("%d max freq = %d khz", i, max_freq_mhz);