From abedcbd9ade60f1eb767c508e47e8d747cdfa1e4 Mon Sep 17 00:00:00 2001 From: "Jeff R. Hammond" Date: Wed, 12 Aug 2020 18:37:06 +0000 Subject: [PATCH 1/5] support non-x86 platforms tested on AAarch64: - Cavium ThunderX2 with Linux 4.15 - Raspberry Pi 4 with Linux 5.5 Signed-off-by: Jeff R. Hammond --- sycl/source/detail/platform_util.cpp | 53 +++++++++++++++++++++++++--- 1 file changed, 49 insertions(+), 4 deletions(-) diff --git a/sycl/source/detail/platform_util.cpp b/sycl/source/detail/platform_util.cpp index 068650692c8db..c2649f3e26669 100644 --- a/sycl/source/detail/platform_util.cpp +++ b/sycl/source/detail/platform_util.cpp @@ -10,16 +10,24 @@ #include #include +#if defined(__x86_64__) || defined(__i386__) #if defined(SYCL_RT_OS_LINUX) #include #elif defined(SYCL_RT_OS_WINDOWS) #include #endif +#endif + +#if defined(SYCL_RT_OS_LINUX) +#include +#include +#endif __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { namespace detail { +#if defined(__x86_64__) || defined(__i386__) // Used by methods that duplicate OpenCL behaviour in order to get CPU info static void cpuid(uint32_t *CPUInfo, uint32_t Type, uint32_t SubType = 0) { #if defined(SYCL_RT_OS_LINUX) @@ -28,11 +36,13 @@ static void cpuid(uint32_t *CPUInfo, uint32_t Type, uint32_t SubType = 0) { __cpuidex(reinterpret_cast(CPUInfo), Type, SubType); #endif } +#endif uint32_t PlatformUtil::getMaxClockFrequency() { throw runtime_error( "max_clock_frequency parameter is not supported for host device", PI_INVALID_DEVICE); +#if defined(__x86_64__) || defined(__i386__) uint32_t CPUInfo[4]; string_class Buff(sizeof(CPUInfo) * 3 + 1, 0); size_t Offset = 0; @@ -62,21 +72,49 @@ uint32_t PlatformUtil::getMaxClockFrequency() { Buff = Buff.substr(Buff.rfind(' '), Buff.length()); Freq *= std::stod(Buff); return Freq; +#else +#warning Your platform is not supported! +#endif + return 0; } uint32_t PlatformUtil::getMemCacheLineSize() { +#if defined(__x86_64__) || defined(__i386__) uint32_t CPUInfo[4]; cpuid(CPUInfo, 0x80000006); return CPUInfo[2] & 0xff; +#elif defined(SYCL_RT_OS_LINUX) && defined(_SC_LEVEL2_DCACHE_LINESIZE) + long linesize = sysconf(_SC_LEVEL2_DCACHE_LINESIZE); + if (linesize > 0) { + return linesize; + } +#else +#warning Your platform is not supported. +#endif + return 8; } uint64_t PlatformUtil::getMemCacheSize() { +#if defined(__x86_64__) || defined(__i386__) uint32_t CPUInfo[4]; cpuid(CPUInfo, 0x80000006); return static_cast(CPUInfo[2] >> 16) * 1024; +#elif defined(SYCL_RT_OS_LINUX) && defined(_SC_LEVEL2_DCACHE_SIZE) + long cachesize = sysconf(_SC_LEVEL2_DCACHE_SIZE); + if (cachesize > 0) { + return cachesize; + } +#else +#warning Your platform is not supported. +#endif + return static_cast(16 * 1024); } uint32_t PlatformUtil::getNativeVectorWidth(PlatformUtil::TypeIndex TIndex) { + + uint32_t Index = static_cast(TIndex); + +#if defined(__x86_64__) || defined(__i386__) // SSE4.2 has 16 byte (XMM) registers static constexpr uint32_t VECTOR_WIDTH_SSE42[] = {16, 8, 4, 2, 4, 2, 0}; // AVX supports 32 byte (YMM) registers only for floats and doubles @@ -86,8 +124,6 @@ uint32_t PlatformUtil::getNativeVectorWidth(PlatformUtil::TypeIndex TIndex) { // AVX512 has 64 byte (ZMM) registers static constexpr uint32_t VECTOR_WIDTH_AVX512[] = {64, 32, 16, 8, 16, 8, 0}; - uint32_t Index = static_cast(TIndex); - #if defined(SYCL_RT_OS_LINUX) if (__builtin_cpu_supports("avx512f")) return VECTOR_WIDTH_AVX512[Index]; @@ -119,14 +155,23 @@ uint32_t PlatformUtil::getNativeVectorWidth(PlatformUtil::TypeIndex TIndex) { #endif return VECTOR_WIDTH_SSE42[Index]; + +#elif defined(__ARM_NEON) + // NEON has 16 byte registers + static constexpr uint32_t VECTOR_WIDTH_NEON[] = {16, 8, 4, 2, 4, 2, 0}; + return VECTOR_WIDTH_NEON[Index]; + +#else +#warning Your platform is not supported! +#endif + return 0; } void PlatformUtil::prefetch(const char *Ptr, size_t NumBytes) { if (!Ptr) return; - // The current implementation assumes 64-byte x86 cache lines. - const size_t CacheLineSize = 64; + const size_t CacheLineSize = PlatformUtil::getMemCacheLineSize(); const size_t CacheLineMask = ~(CacheLineSize - 1); const char *PtrEnd = Ptr + NumBytes; From e44b026099afc98e86c82304ccaeb5b73bf7fa81 Mon Sep 17 00:00:00 2001 From: "Hammond, Jeff R" Date: Mon, 17 Aug 2020 17:35:33 -0700 Subject: [PATCH 2/5] clang-format fixes Signed-off-by: Hammond, Jeff R --- sycl/source/detail/platform_util.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sycl/source/detail/platform_util.cpp b/sycl/source/detail/platform_util.cpp index c2649f3e26669..b62529bf01aae 100644 --- a/sycl/source/detail/platform_util.cpp +++ b/sycl/source/detail/platform_util.cpp @@ -19,8 +19,8 @@ #endif #if defined(SYCL_RT_OS_LINUX) -#include #include +#include #endif __SYCL_INLINE_NAMESPACE(cl) { @@ -86,7 +86,7 @@ uint32_t PlatformUtil::getMemCacheLineSize() { #elif defined(SYCL_RT_OS_LINUX) && defined(_SC_LEVEL2_DCACHE_LINESIZE) long linesize = sysconf(_SC_LEVEL2_DCACHE_LINESIZE); if (linesize > 0) { - return linesize; + return linesize; } #else #warning Your platform is not supported. @@ -102,7 +102,7 @@ uint64_t PlatformUtil::getMemCacheSize() { #elif defined(SYCL_RT_OS_LINUX) && defined(_SC_LEVEL2_DCACHE_SIZE) long cachesize = sysconf(_SC_LEVEL2_DCACHE_SIZE); if (cachesize > 0) { - return cachesize; + return cachesize; } #else #warning Your platform is not supported. From 1c3bb5c30170db81aa6068cced88b166ad6d9ebc Mon Sep 17 00:00:00 2001 From: "Hammond, Jeff R" Date: Sun, 23 Aug 2020 09:46:33 -0700 Subject: [PATCH 3/5] NIT camel case fix Signed-off-by: Hammond, Jeff R --- sycl/source/detail/platform_util.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/sycl/source/detail/platform_util.cpp b/sycl/source/detail/platform_util.cpp index b62529bf01aae..631961e6b08f5 100644 --- a/sycl/source/detail/platform_util.cpp +++ b/sycl/source/detail/platform_util.cpp @@ -84,9 +84,9 @@ uint32_t PlatformUtil::getMemCacheLineSize() { cpuid(CPUInfo, 0x80000006); return CPUInfo[2] & 0xff; #elif defined(SYCL_RT_OS_LINUX) && defined(_SC_LEVEL2_DCACHE_LINESIZE) - long linesize = sysconf(_SC_LEVEL2_DCACHE_LINESIZE); - if (linesize > 0) { - return linesize; + long lineSize = sysconf(_SC_LEVEL2_DCACHE_LINESIZE); + if (lineSize > 0) { + return lineSize; } #else #warning Your platform is not supported. @@ -100,9 +100,9 @@ uint64_t PlatformUtil::getMemCacheSize() { cpuid(CPUInfo, 0x80000006); return static_cast(CPUInfo[2] >> 16) * 1024; #elif defined(SYCL_RT_OS_LINUX) && defined(_SC_LEVEL2_DCACHE_SIZE) - long cachesize = sysconf(_SC_LEVEL2_DCACHE_SIZE); - if (cachesize > 0) { - return cachesize; + long cacheSize = sysconf(_SC_LEVEL2_DCACHE_SIZE); + if (cacheSize > 0) { + return cacheSize; } #else #warning Your platform is not supported. From 95c3fb515aa334fa4f0046ebc5113c46df7c6d76 Mon Sep 17 00:00:00 2001 From: "Hammond, Jeff R" Date: Wed, 2 Sep 2020 14:03:37 -0700 Subject: [PATCH 4/5] remove #warning in #else case MSVC does not support #warning compile-time warnings about non-support are not that useful. the code path that generated the warning should not break anything, it just might not be optimal and accurately reflect the underlying platform. Signed-off-by: Hammond, Jeff R --- sycl/source/detail/platform_util.cpp | 8 -------- 1 file changed, 8 deletions(-) diff --git a/sycl/source/detail/platform_util.cpp b/sycl/source/detail/platform_util.cpp index 631961e6b08f5..ef4f448415fdd 100644 --- a/sycl/source/detail/platform_util.cpp +++ b/sycl/source/detail/platform_util.cpp @@ -72,8 +72,6 @@ uint32_t PlatformUtil::getMaxClockFrequency() { Buff = Buff.substr(Buff.rfind(' '), Buff.length()); Freq *= std::stod(Buff); return Freq; -#else -#warning Your platform is not supported! #endif return 0; } @@ -88,8 +86,6 @@ uint32_t PlatformUtil::getMemCacheLineSize() { if (lineSize > 0) { return lineSize; } -#else -#warning Your platform is not supported. #endif return 8; } @@ -104,8 +100,6 @@ uint64_t PlatformUtil::getMemCacheSize() { if (cacheSize > 0) { return cacheSize; } -#else -#warning Your platform is not supported. #endif return static_cast(16 * 1024); } @@ -161,8 +155,6 @@ uint32_t PlatformUtil::getNativeVectorWidth(PlatformUtil::TypeIndex TIndex) { static constexpr uint32_t VECTOR_WIDTH_NEON[] = {16, 8, 4, 2, 4, 2, 0}; return VECTOR_WIDTH_NEON[Index]; -#else -#warning Your platform is not supported! #endif return 0; } From 76b6e28b9f158516a5cc995f185604cc3b21dc69 Mon Sep 17 00:00:00 2001 From: "Hammond, Jeff R" Date: Thu, 3 Sep 2020 06:42:37 -0700 Subject: [PATCH 5/5] silence compiler issues on Windows Signed-off-by: Hammond, Jeff R --- sycl/source/detail/platform_util.cpp | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/sycl/source/detail/platform_util.cpp b/sycl/source/detail/platform_util.cpp index ef4f448415fdd..38b64076bc987 100644 --- a/sycl/source/detail/platform_util.cpp +++ b/sycl/source/detail/platform_util.cpp @@ -10,18 +10,15 @@ #include #include -#if defined(__x86_64__) || defined(__i386__) #if defined(SYCL_RT_OS_LINUX) +#include +#include +#if defined(__x86_64__) || defined(__i386__) #include +#endif #elif defined(SYCL_RT_OS_WINDOWS) #include #endif -#endif - -#if defined(SYCL_RT_OS_LINUX) -#include -#include -#endif __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { @@ -106,9 +103,9 @@ uint64_t PlatformUtil::getMemCacheSize() { uint32_t PlatformUtil::getNativeVectorWidth(PlatformUtil::TypeIndex TIndex) { +#if defined(__x86_64__) || defined(__i386__) uint32_t Index = static_cast(TIndex); -#if defined(__x86_64__) || defined(__i386__) // SSE4.2 has 16 byte (XMM) registers static constexpr uint32_t VECTOR_WIDTH_SSE42[] = {16, 8, 4, 2, 4, 2, 0}; // AVX supports 32 byte (YMM) registers only for floats and doubles @@ -151,6 +148,8 @@ uint32_t PlatformUtil::getNativeVectorWidth(PlatformUtil::TypeIndex TIndex) { return VECTOR_WIDTH_SSE42[Index]; #elif defined(__ARM_NEON) + uint32_t Index = static_cast(TIndex); + // NEON has 16 byte registers static constexpr uint32_t VECTOR_WIDTH_NEON[] = {16, 8, 4, 2, 4, 2, 0}; return VECTOR_WIDTH_NEON[Index];