Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

get_physical_cpu_count api family #4302

Merged
merged 12 commits into from
Oct 31, 2022
4 changes: 2 additions & 2 deletions benchmark/benchncnn.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -161,8 +161,8 @@ void benchmark(const char* comment, const ncnn::Mat& _in, const ncnn::Option& op
int main(int argc, char** argv)
{
int loop_count = 4;
int num_threads = ncnn::get_cpu_count();
int powersave = 0;
int num_threads = ncnn::get_physical_big_cpu_count();
int powersave = 2;
int gpu_device = -1;
int cooling_down = 1;

Expand Down
151 changes: 151 additions & 0 deletions src/cpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,11 @@
#include <emscripten/threading.h>
#endif

#if defined _WIN32 && !(defined __MINGW32__)
#define WIN32_LEAN_AND_MEAN
#include <windows.h>
#endif

#if defined __ANDROID__ || defined __linux__
#if defined __ANDROID__
#if __ANDROID_API__ >= 18
Expand Down Expand Up @@ -1164,6 +1169,124 @@ int get_big_cpu_count()
return big_cpu_count ? big_cpu_count : g_cpucount;
}

#if defined __ANDROID__ || defined __linux__
static int get_thread_siblings(int cpuid)
{
char path[256];
sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/thread_siblings", cpuid);

FILE* fp = fopen(path, "rb");
if (!fp)
return -1;

int thread_siblings = -1;
int nscan = fscanf(fp, "%x", &thread_siblings);
if (nscan != 1)
{
// ignore
}

fclose(fp);

return thread_siblings;
}
#endif // defined __ANDROID__ || defined __linux__

static int get_physical_cpucount()
{
int count = 0;
#if (defined _WIN32 && !(defined __MINGW32__))
typedef BOOL(WINAPI * LPFN_GLPI)(PSYSTEM_LOGICAL_PROCESSOR_INFORMATION, PDWORD);
LPFN_GLPI glpi = (LPFN_GLPI)GetProcAddress(GetModuleHandle(TEXT("kernel32")), "GetLogicalProcessorInformation");
if (glpi == NULL)
{
NCNN_LOGE("GetLogicalProcessorInformation is not supported");
return g_cpucount;
}

DWORD return_length = 0;
glpi(NULL, &return_length);

PSYSTEM_LOGICAL_PROCESSOR_INFORMATION buffer = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION)malloc(return_length);
glpi(buffer, &return_length);

PSYSTEM_LOGICAL_PROCESSOR_INFORMATION ptr = buffer;
DWORD byte_offset = 0;
while (byte_offset + sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION) <= return_length)
{
if (ptr->Relationship == RelationProcessorCore)
{
count++;
}

byte_offset += sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION);
ptr++;
}

free(buffer);
#elif defined __ANDROID__ || defined __linux__
std::vector<int> thread_set;
for (int i = 0; i < g_cpucount; i++)
{
int thread_siblings = get_thread_siblings(i);
if (thread_siblings == -1)
{
// ignore malformed one
continue;
}

bool thread_siblings_exists = false;
for (size_t j = 0; j < thread_set.size(); j++)
{
if (thread_set[j] == thread_siblings)
{
thread_siblings_exists = true;
break;
}
}

if (!thread_siblings_exists)
{
thread_set.push_back(thread_siblings);
count++;
}
}
#elif __APPLE__
size_t len = sizeof(count);
sysctlbyname("hw.physicalcpu_max", &count, &len, NULL, 0);
#else
count = g_cpucount;
#endif

if (count > g_cpucount)
count = g_cpucount;

return count;
}

static int g_physical_cpucount = get_physical_cpucount();

int get_physical_cpu_count()
{
return g_physical_cpucount;
}

int get_physical_little_cpu_count()
{
if (g_physical_cpucount == g_cpucount)
return get_little_cpu_count();

return g_physical_cpucount * 2 - g_cpucount;
}

int get_physical_big_cpu_count()
{
if (g_physical_cpucount == g_cpucount)
return get_big_cpu_count();

return g_cpucount - g_physical_cpucount;
}

#if defined __ANDROID__ || defined __linux__
static int get_max_freq_khz(int cpuid)
{
Expand Down Expand Up @@ -1239,6 +1362,27 @@ static int get_max_freq_khz(int cpuid)
return max_freq_khz;
}

static int get_thread_siblings_list_count(int cpuid)
{
char path[256];
sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list", cpuid);

FILE* fp = fopen(path, "rb");
if (!fp)
return 1;

int count = 1;
while (!feof(fp))
{
if (fgetc(fp) == ',')
count++;
}

fclose(fp);

return count;
}

static int set_sched_affinity(const CpuSet& thread_affinity_mask)
{
// set affinity for thread
Expand Down Expand Up @@ -1357,6 +1501,13 @@ static int setup_thread_affinity_masks()

for (int i = 0; i < g_cpucount; i++)
{
if (get_thread_siblings_list_count(i) > 1)
{
// always treat smt core as big core
g_thread_affinity_mask_big.enable(i);
continue;
}

if (cpu_max_freq_khz[i] < max_freq_khz_medium)
g_thread_affinity_mask_little.enable(i);
else
Expand Down
4 changes: 4 additions & 0 deletions src/cpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,10 @@ NCNN_EXPORT int get_cpu_count();
NCNN_EXPORT int get_little_cpu_count();
NCNN_EXPORT int get_big_cpu_count();

NCNN_EXPORT int get_physical_cpu_count();
NCNN_EXPORT int get_physical_little_cpu_count();
NCNN_EXPORT int get_physical_big_cpu_count();

// bind all threads on little clusters if powersave enabled
// affects HMP arch cpu like ARM big.LITTLE
// only implemented on android at the moment
Expand Down
2 changes: 1 addition & 1 deletion src/option.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ namespace ncnn {
Option::Option()
{
lightmode = true;
num_threads = get_big_cpu_count();
num_threads = get_physical_big_cpu_count();
blob_allocator = 0;
workspace_allocator = 0;

Expand Down