Skip to content

Commit

Permalink
[SYCL] Support non-x86 platforms (#2333)
Browse files Browse the repository at this point in the history
Tested on AAarch64:
- Cavium ThunderX2 with Linux 4.15
- Raspberry Pi 4 with Linux 5.5

Signed-off-by: Jeff R. Hammond <jeff.r.hammond@intel.com>
  • Loading branch information
Jeff Hammond authored Sep 3, 2020
1 parent a21d7ef commit 2f632f8
Showing 1 changed file with 40 additions and 4 deletions.
44 changes: 40 additions & 4 deletions sycl/source/detail/platform_util.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,11 @@
#include <detail/platform_util.hpp>

#if defined(SYCL_RT_OS_LINUX)
#include <errno.h>
#include <unistd.h>
#if defined(__x86_64__) || defined(__i386__)
#include <cpuid.h>
#endif
#elif defined(SYCL_RT_OS_WINDOWS)
#include <intrin.h>
#endif
Expand All @@ -20,6 +24,7 @@ __SYCL_INLINE_NAMESPACE(cl) {
namespace sycl {
namespace detail {

#if defined(__x86_64__) || defined(__i386__)
// Used by methods that duplicate OpenCL behaviour in order to get CPU info
static void cpuid(uint32_t *CPUInfo, uint32_t Type, uint32_t SubType = 0) {
#if defined(SYCL_RT_OS_LINUX)
Expand All @@ -28,11 +33,13 @@ static void cpuid(uint32_t *CPUInfo, uint32_t Type, uint32_t SubType = 0) {
__cpuidex(reinterpret_cast<int *>(CPUInfo), Type, SubType);
#endif
}
#endif

uint32_t PlatformUtil::getMaxClockFrequency() {
throw runtime_error(
"max_clock_frequency parameter is not supported for host device",
PI_INVALID_DEVICE);
#if defined(__x86_64__) || defined(__i386__)
uint32_t CPUInfo[4];
string_class Buff(sizeof(CPUInfo) * 3 + 1, 0);
size_t Offset = 0;
Expand Down Expand Up @@ -62,21 +69,43 @@ uint32_t PlatformUtil::getMaxClockFrequency() {
Buff = Buff.substr(Buff.rfind(' '), Buff.length());
Freq *= std::stod(Buff);
return Freq;
#endif
return 0;
}

uint32_t PlatformUtil::getMemCacheLineSize() {
#if defined(__x86_64__) || defined(__i386__)
uint32_t CPUInfo[4];
cpuid(CPUInfo, 0x80000006);
return CPUInfo[2] & 0xff;
#elif defined(SYCL_RT_OS_LINUX) && defined(_SC_LEVEL2_DCACHE_LINESIZE)
long lineSize = sysconf(_SC_LEVEL2_DCACHE_LINESIZE);
if (lineSize > 0) {
return lineSize;
}
#endif
return 8;
}

uint64_t PlatformUtil::getMemCacheSize() {
#if defined(__x86_64__) || defined(__i386__)
uint32_t CPUInfo[4];
cpuid(CPUInfo, 0x80000006);
return static_cast<uint64_t>(CPUInfo[2] >> 16) * 1024;
#elif defined(SYCL_RT_OS_LINUX) && defined(_SC_LEVEL2_DCACHE_SIZE)
long cacheSize = sysconf(_SC_LEVEL2_DCACHE_SIZE);
if (cacheSize > 0) {
return cacheSize;
}
#endif
return static_cast<uint64_t>(16 * 1024);
}

uint32_t PlatformUtil::getNativeVectorWidth(PlatformUtil::TypeIndex TIndex) {

#if defined(__x86_64__) || defined(__i386__)
uint32_t Index = static_cast<uint32_t>(TIndex);

// SSE4.2 has 16 byte (XMM) registers
static constexpr uint32_t VECTOR_WIDTH_SSE42[] = {16, 8, 4, 2, 4, 2, 0};
// AVX supports 32 byte (YMM) registers only for floats and doubles
Expand All @@ -86,8 +115,6 @@ uint32_t PlatformUtil::getNativeVectorWidth(PlatformUtil::TypeIndex TIndex) {
// AVX512 has 64 byte (ZMM) registers
static constexpr uint32_t VECTOR_WIDTH_AVX512[] = {64, 32, 16, 8, 16, 8, 0};

uint32_t Index = static_cast<uint32_t>(TIndex);

#if defined(SYCL_RT_OS_LINUX)
if (__builtin_cpu_supports("avx512f"))
return VECTOR_WIDTH_AVX512[Index];
Expand Down Expand Up @@ -119,14 +146,23 @@ uint32_t PlatformUtil::getNativeVectorWidth(PlatformUtil::TypeIndex TIndex) {
#endif

return VECTOR_WIDTH_SSE42[Index];

#elif defined(__ARM_NEON)
uint32_t Index = static_cast<uint32_t>(TIndex);

// NEON has 16 byte registers
static constexpr uint32_t VECTOR_WIDTH_NEON[] = {16, 8, 4, 2, 4, 2, 0};
return VECTOR_WIDTH_NEON[Index];

#endif
return 0;
}

void PlatformUtil::prefetch(const char *Ptr, size_t NumBytes) {
if (!Ptr)
return;

// The current implementation assumes 64-byte x86 cache lines.
const size_t CacheLineSize = 64;
const size_t CacheLineSize = PlatformUtil::getMemCacheLineSize();
const size_t CacheLineMask = ~(CacheLineSize - 1);
const char *PtrEnd = Ptr + NumBytes;

Expand Down

0 comments on commit 2f632f8

Please sign in to comment.