From c95eb77398fa131e4ef60be65841ca09a284115d Mon Sep 17 00:00:00 2001 From: liangan1 Date: Wed, 17 Jan 2024 08:51:20 +0800 Subject: [PATCH] Reduce write cache polution to improve IAKV performance (#2457) (#2476) Co-authored-by: Chunyuan WU --- csrc/cpu/aten/kernels/MaskedMultiHeadAttentionKrnl.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/csrc/cpu/aten/kernels/MaskedMultiHeadAttentionKrnl.cpp b/csrc/cpu/aten/kernels/MaskedMultiHeadAttentionKrnl.cpp index a7753d2c0..6762b7cc8 100644 --- a/csrc/cpu/aten/kernels/MaskedMultiHeadAttentionKrnl.cpp +++ b/csrc/cpu/aten/kernels/MaskedMultiHeadAttentionKrnl.cpp @@ -798,7 +798,8 @@ scale_dot_product_for_indirect_access_kv_cache( } } } - flag_access[thread_id][bi][hi] = 1; + if (flag_access[thread_id][bi][hi] == 0) + flag_access[thread_id][bi][hi] = 1; } } } @@ -1102,7 +1103,8 @@ scale_dot_product_for_indirect_access_kv_cache_half( flag_access[thread_id][bi][hi]); } } - flag_access[thread_id][bi][hi] = 1; + if (flag_access[thread_id][bi][hi] == 0) + flag_access[thread_id][bi][hi] = 1; } } }