diff --git a/src/inference/src/system_conf.cpp b/src/inference/src/system_conf.cpp index e85f79c79d0c3a..0e256204549566 100644 --- a/src/inference/src/system_conf.cpp +++ b/src/inference/src/system_conf.cpp @@ -22,6 +22,7 @@ # include # define ARM_COMPUTE_CPU_FEATURE_HWCAP_FPHP (1 << 9) # define ARM_COMPUTE_CPU_FEATURE_HWCAP_ASIMDHP (1 << 10) +# define ARM_COMPUTE_CPU_FEATURE_HWCAP_SVE (1 << 24) #elif defined(__APPLE__) && defined(__aarch64__) # include # include @@ -181,7 +182,7 @@ bool with_cpu_sve() { # if !defined(_WIN64) && !defined(BARE_METAL) && !defined(__APPLE__) && !defined(__OpenBSD__) && \ !defined(__arm__) && defined(__aarch64__) const uint32_t hwcaps = getauxval(AT_HWCAP); - return hwcaps & HWCAP_SVE; + return hwcaps & ARM_COMPUTE_CPU_FEATURE_HWCAP_SVE; # elif !defined(_WIN64) && !defined(BARE_METAL) && !defined(__APPLE__) && !defined(__OpenBSD__) && \ !defined(__aarch64__) && defined(__arm__) return false; diff --git a/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/common.hpp b/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/common.hpp index ce51eed0c75c6c..157cc5333a1cf0 100644 --- a/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/common.hpp +++ b/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/common.hpp @@ -34,6 +34,10 @@ static constexpr size_t vec_len_f32_avx2 = vec_len_avx2 / sizeof(float); static constexpr size_t vec_len_f32_neon = vec_len_neon / sizeof(float); static constexpr size_t vec_len_f16_neon = vec_len_neon / sizeof(ov::float16); +#if defined(HAVE_SVE) +static constexpr size_t vec_len_f32_sve = svcntw(); +#endif + #ifdef HAVE_AVX512F inline __m512 cvt_bf16_to_fp32(const __m256i src) { __m512i y = _mm512_cvtepu16_epi32(src); diff --git a/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/mha_single_token.cpp b/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/mha_single_token.cpp index fc58fab938eac5..e95d08391940d8 100644 --- a/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/mha_single_token.cpp +++ b/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/mha_single_token.cpp @@ -63,7 +63,6 @@ void cvt_copy(TA* dst, TB* src, size_t n) { } #elif defined(OPENVINO_ARCH_ARM64) #if defined(HAVE_SVE) - size_t vec_len_f32_sve = svcntw(); auto _dst = reinterpret_cast(dst); size_t inc = vec_len_f32_sve; svbool_t pg = svptrue_b32(); @@ -86,7 +85,6 @@ void cvt_copy(TA* dst, TB* src, size_t n) { } } #else - int vec_len_f32_neon = 4; auto _dst = reinterpret_cast(dst); if (std::is_same::value && std::is_same::value) { for (; i + vec_len_f32_neon <= n; i += vec_len_f32_neon) { @@ -123,7 +121,6 @@ static void attn_acc_value(float* out, float weight, T* v, size_t S, float* scal } #elif defined(OPENVINO_ARCH_ARM64) #if defined(HAVE_SVE) - size_t vec_len_f32_sve = svcntw(); auto _v = reinterpret_cast(v); svfloat32_t attn_w_vec_fp32 = svdup_n_f32(weight); size_t inc = vec_len_f32_sve; @@ -404,7 +401,6 @@ static float sum_q_head(T* a, size_t n) { sum = _mm256_cvtss_f32(vsum0); #elif defined(OPENVINO_ARCH_ARM64) #if defined(HAVE_SVE) - size_t vec_len_f32_sve = svcntw(); svfloat32_t sum0 = svdup_n_f32(0.0f); svfloat32_t sum1 = svdup_n_f32(0.0f); svfloat32_t sum2 = svdup_n_f32(0.0f); @@ -448,7 +444,6 @@ static float sum_q_head(T* a, size_t n) { float32_t sum_3 = svaddv_f32(pg, sum3); sum = static_cast(sum_0 + sum_1 + sum_2 + sum_3); #else - size_t vec_len_f32_neon = 4; float32x4_t vsum0 = vdupq_n_f32(0.0f); float32x4_t vsum1 = vdupq_n_f32(0.0f); float32x4_t vsum2 = vdupq_n_f32(0.0f); @@ -589,7 +584,6 @@ static float dot_product(TA* a, TB* b, size_t n, float* scale, float* zp, float* #elif defined(OPENVINO_ARCH_ARM64) #if defined(HAVE_SVE) - size_t vec_len_f32_sve = svcntw(); svbool_t pg = svptrue_b32(); svfloat32_t sum0 = svdup_n_f32(0.0f); svfloat32_t sum1 = svdup_n_f32(0.0f); @@ -940,7 +934,6 @@ static void attn_reduce(T* dst, float* temp, size_t M, size_t S, size_t temp_str } #elif defined(OPENVINO_ARCH_ARM64) #if defined(HAVE_SVE) - size_t vec_len_f32_sve = svcntw(); auto _dst = reinterpret_cast(dst); size_t inc = vec_len_f32_sve; svbool_t pg = svptrue_b32(); diff --git a/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/softmax_kernel.hpp b/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/softmax_kernel.hpp index 705cec9da2e47b..284a71e1450051 100644 --- a/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/softmax_kernel.hpp +++ b/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/softmax_kernel.hpp @@ -806,7 +806,6 @@ inline void multiply_scalar(float* a, float* a_dst, const float val, const size_ } #elif defined(OPENVINO_ARCH_ARM64) #if defined(HAVE_SVE) - size_t vec_len_f32_sve = svcntw(); svfloat32_t v_scale = svdup_n_f32(val); size_t inc = vec_len_f32_sve; svbool_t pg = svptrue_b32();