Skip to content

Commit

Permalink
SVE detection fixes, submodule fixes, other cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
NishantPrabhuFujitsu committed Oct 29, 2024
1 parent 77b2980 commit 2ea2be3
Show file tree
Hide file tree
Showing 4 changed files with 6 additions and 9 deletions.
3 changes: 2 additions & 1 deletion src/inference/src/system_conf.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
# include <sys/auxv.h>
# define ARM_COMPUTE_CPU_FEATURE_HWCAP_FPHP (1 << 9)
# define ARM_COMPUTE_CPU_FEATURE_HWCAP_ASIMDHP (1 << 10)
# define ARM_COMPUTE_CPU_FEATURE_HWCAP_SVE (1 << 24)
#elif defined(__APPLE__) && defined(__aarch64__)
# include <sys/sysctl.h>
# include <sys/types.h>
Expand Down Expand Up @@ -181,7 +182,7 @@ bool with_cpu_sve() {
# if !defined(_WIN64) && !defined(BARE_METAL) && !defined(__APPLE__) && !defined(__OpenBSD__) && \
!defined(__arm__) && defined(__aarch64__)
const uint32_t hwcaps = getauxval(AT_HWCAP);
return hwcaps & HWCAP_SVE;
return hwcaps & ARM_COMPUTE_CPU_FEATURE_HWCAP_SVE;
# elif !defined(_WIN64) && !defined(BARE_METAL) && !defined(__APPLE__) && !defined(__OpenBSD__) && \
!defined(__aarch64__) && defined(__arm__)
return false;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,10 @@ static constexpr size_t vec_len_f32_avx2 = vec_len_avx2 / sizeof(float);
static constexpr size_t vec_len_f32_neon = vec_len_neon / sizeof(float);
static constexpr size_t vec_len_f16_neon = vec_len_neon / sizeof(ov::float16);

#if defined(HAVE_SVE)
static constexpr size_t vec_len_f32_sve = svcntw();
#endif

#ifdef HAVE_AVX512F
inline __m512 cvt_bf16_to_fp32(const __m256i src) {
__m512i y = _mm512_cvtepu16_epi32(src);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,6 @@ void cvt_copy(TA* dst, TB* src, size_t n) {
}
#elif defined(OPENVINO_ARCH_ARM64)
#if defined(HAVE_SVE)
size_t vec_len_f32_sve = svcntw();
auto _dst = reinterpret_cast<float32_t*>(dst);
size_t inc = vec_len_f32_sve;
svbool_t pg = svptrue_b32();
Expand All @@ -86,7 +85,6 @@ void cvt_copy(TA* dst, TB* src, size_t n) {
}
}
#else
int vec_len_f32_neon = 4;
auto _dst = reinterpret_cast<float32_t*>(dst);
if (std::is_same<TA, float>::value && std::is_same<TB, float>::value) {
for (; i + vec_len_f32_neon <= n; i += vec_len_f32_neon) {
Expand Down Expand Up @@ -123,7 +121,6 @@ static void attn_acc_value(float* out, float weight, T* v, size_t S, float* scal
}
#elif defined(OPENVINO_ARCH_ARM64)
#if defined(HAVE_SVE)
size_t vec_len_f32_sve = svcntw();
auto _v = reinterpret_cast<float32_t*>(v);
svfloat32_t attn_w_vec_fp32 = svdup_n_f32(weight);
size_t inc = vec_len_f32_sve;
Expand Down Expand Up @@ -404,7 +401,6 @@ static float sum_q_head(T* a, size_t n) {
sum = _mm256_cvtss_f32(vsum0);
#elif defined(OPENVINO_ARCH_ARM64)
#if defined(HAVE_SVE)
size_t vec_len_f32_sve = svcntw();
svfloat32_t sum0 = svdup_n_f32(0.0f);
svfloat32_t sum1 = svdup_n_f32(0.0f);
svfloat32_t sum2 = svdup_n_f32(0.0f);
Expand Down Expand Up @@ -448,7 +444,6 @@ static float sum_q_head(T* a, size_t n) {
float32_t sum_3 = svaddv_f32(pg, sum3);
sum = static_cast<float>(sum_0 + sum_1 + sum_2 + sum_3);
#else
size_t vec_len_f32_neon = 4;
float32x4_t vsum0 = vdupq_n_f32(0.0f);
float32x4_t vsum1 = vdupq_n_f32(0.0f);
float32x4_t vsum2 = vdupq_n_f32(0.0f);
Expand Down Expand Up @@ -589,7 +584,6 @@ static float dot_product(TA* a, TB* b, size_t n, float* scale, float* zp, float*

#elif defined(OPENVINO_ARCH_ARM64)
#if defined(HAVE_SVE)
size_t vec_len_f32_sve = svcntw();
svbool_t pg = svptrue_b32();
svfloat32_t sum0 = svdup_n_f32(0.0f);
svfloat32_t sum1 = svdup_n_f32(0.0f);
Expand Down Expand Up @@ -940,7 +934,6 @@ static void attn_reduce(T* dst, float* temp, size_t M, size_t S, size_t temp_str
}
#elif defined(OPENVINO_ARCH_ARM64)
#if defined(HAVE_SVE)
size_t vec_len_f32_sve = svcntw();
auto _dst = reinterpret_cast<float32_t*>(dst);
size_t inc = vec_len_f32_sve;
svbool_t pg = svptrue_b32();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -806,7 +806,6 @@ inline void multiply_scalar(float* a, float* a_dst, const float val, const size_
}
#elif defined(OPENVINO_ARCH_ARM64)
#if defined(HAVE_SVE)
size_t vec_len_f32_sve = svcntw();
svfloat32_t v_scale = svdup_n_f32(val);
size_t inc = vec_len_f32_sve;
svbool_t pg = svptrue_b32();
Expand Down

0 comments on commit 2ea2be3

Please sign in to comment.