From 016a6f46a03e1e8948d1b355e09869d03e8bef6b Mon Sep 17 00:00:00 2001 From: Evan Harvey Date: Thu, 13 Jan 2022 11:32:40 -0700 Subject: [PATCH] src/batched: Use SerialOpt2 for 33 to 39 square matrices --- src/batched/dense/KokkosBatched_Gemm_Decl.hpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/batched/dense/KokkosBatched_Gemm_Decl.hpp b/src/batched/dense/KokkosBatched_Gemm_Decl.hpp index 9b1eb18e1f..9e147c1253 100644 --- a/src/batched/dense/KokkosBatched_Gemm_Decl.hpp +++ b/src/batched/dense/KokkosBatched_Gemm_Decl.hpp @@ -468,8 +468,7 @@ int BatchedGemm(BatchedGemmHandleType *const handle, const ScalarType alpha, // } else if (on_gpu && ((std::is_same::value) ? (c_m >= 16) - : (c_m >= 24))) { // Vinh's note: use this condition - // for now, might need to revisit + : (c_m >= 24 && c_m <= 32) || c_m >= 40)) { handle->teamSz = handle->vecLen = 8; constexpr int tile_m = 32, tile_n = 32, tile_k = 8; #ifdef __CUDACC_RDC__