diff --git a/batched/sparse/impl/KokkosBatched_Spmv_TeamVector_Impl.hpp b/batched/sparse/impl/KokkosBatched_Spmv_TeamVector_Impl.hpp index 45f7aa5819..b6f614a252 100644 --- a/batched/sparse/impl/KokkosBatched_Spmv_TeamVector_Impl.hpp +++ b/batched/sparse/impl/KokkosBatched_Spmv_TeamVector_Impl.hpp @@ -219,7 +219,7 @@ KOKKOS_INLINE_FUNCTION int TeamVectorSpmvInternal::invoke( const OrdinalType ys1) { #if !defined(__CUDA_ARCH__) && !defined(__HIP_DEVICE_COMPILE__) if (member.team_size() == 1) { - if (N_team > 1 && valuess0 == 1) { + if (N_team > 1 && valuess0 == 1 && valuess1 % N_team == 0) { /* Left layout as valuess0 = 1 and non-zero vector length given at compilation time Here we use the SIMD data type which is using Intel