kokkos · brian-kelley · Dec 18, 2019 · Dec 17, 2019 · Dec 17, 2019 · Dec 17, 2019
diff --git a/src/blas/impl/KokkosBlas3_gemm_impl.hpp b/src/blas/impl/KokkosBlas3_gemm_impl.hpp
@@ -99,11 +99,11 @@ struct impl_deep_copy_matrix_block<TeamHandle,ViewTypeScratch,ViewType,Layout,bl
     } else {
       Kokkos::parallel_for(Kokkos::TeamThreadRange(team,blockDim_j), [&] (const int j) {
 #ifndef KOKKOS_IMPL_BATCHED_GEMM_GCC_CXX14_WORKAROUND
-        const int idx_j = offset_j+j;
+        int idx_j = offset_j+j;
 #endif
         Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,blockDim_i), [&] (const int i) {
 #ifdef KOKKOS_IMPL_BATCHED_GEMM_GCC_CXX14_WORKAROUND
-          const int idx_j = offset_j+j;
+          int idx_j = offset_j+j;
 #endif
           const int idx_i = offset_i+i;
           A_scr(i,j) = idx_i<A.extent_int(0) && idx_j<A.extent_int(1) ? A(idx_i,idx_j) : ATV::zero();
@@ -131,11 +131,11 @@ struct impl_deep_copy_matrix_block<TeamHandle,ViewTypeScratch,ViewType,Kokkos::L
     } else {
       Kokkos::parallel_for(Kokkos::TeamThreadRange(team,blockDim_i), [&] (const int i) {
 #ifndef KOKKOS_IMPL_BATCHED_GEMM_GCC_CXX14_WORKAROUND
-        const int idx_i = offset_i+i;
+        int idx_i = offset_i+i;
 #endif
         Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,blockDim_j), [&] (const int j) {
 #ifdef KOKKOS_IMPL_BATCHED_GEMM_GCC_CXX14_WORKAROUND
-          const int idx_i = offset_i+i;
+          int idx_i = offset_i+i;
 #endif
           const int idx_j = offset_j+j;
           A_scr(i,j) = idx_i<A.extent_int(0) && idx_j<A.extent_int(1) ? A(idx_i,idx_j) : ATV::zero();
@@ -168,11 +168,11 @@ struct impl_deep_copy_matrix_block<TeamHandle,ViewTypeScratch,ViewType,Layout,bl
     } else {
       Kokkos::parallel_for(Kokkos::TeamThreadRange(team,blockDim_j), [&] (const int j) {
 #ifndef KOKKOS_IMPL_BATCHED_GEMM_GCC_CXX14_WORKAROUND
-        const int idx_j = offset_j+j;
+        int idx_j = offset_j+j;
 #endif
         Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,blockDim_i), [&] (const int i) {
 #ifdef KOKKOS_IMPL_BATCHED_GEMM_GCC_CXX14_WORKAROUND
-          const int idx_j = offset_j+j;
+          int idx_j = offset_j+j;
 #endif
           const int idx_i = offset_i+i;
           A_scr(i,j) = idx_i<A.extent_int(1) && idx_j<A.extent_int(0) ? A(idx_j,idx_i) : ATV::zero();
@@ -205,11 +205,11 @@ struct impl_deep_copy_matrix_block<TeamHandle,ViewTypeScratch,ViewType,Kokkos::L
     } else {
       Kokkos::parallel_for(Kokkos::TeamThreadRange(team,blockDim_i), [&] (const int i) {
 #ifndef KOKKOS_IMPL_BATCHED_GEMM_GCC_CXX14_WORKAROUND
-        const int idx_i = offset_i+i;
+        int idx_i = offset_i+i;
 #endif
         Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,blockDim_j), [&] (const int j) {
 #ifdef KOKKOS_IMPL_BATCHED_GEMM_GCC_CXX14_WORKAROUND
-          const int idx_i = offset_i+i;
+          int idx_i = offset_i+i;
 #endif
           const int idx_j = offset_j+j;
           A_scr(i,j) = idx_i<A.extent_int(1) && idx_j<A.extent_int(0) ? A(idx_j,idx_i) : ATV::zero();
@@ -242,11 +242,11 @@ struct impl_deep_copy_matrix_block<TeamHandle,ViewTypeScratch,ViewType,Layout,bl
     } else {
       Kokkos::parallel_for(Kokkos::TeamThreadRange(team,blockDim_j), [&] (const int j) {
 #ifndef KOKKOS_IMPL_BATCHED_GEMM_GCC_CXX14_WORKAROUND
-        const int idx_j = offset_j+j;
+        int idx_j = offset_j+j;
 #endif
         Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,blockDim_i), [&] (const int i) {
 #ifdef KOKKOS_IMPL_BATCHED_GEMM_GCC_CXX14_WORKAROUND
-          const int idx_j = offset_j+j;
+          int idx_j = offset_j+j;
 #endif
           const int idx_i = offset_i+i;
           A_scr(i,j) = idx_i<A.extent_int(1) && idx_j<A.extent_int(0) ? ATV::conj(A(idx_j,idx_i)) : ATV::zero();
@@ -279,11 +279,11 @@ struct impl_deep_copy_matrix_block<TeamHandle,ViewTypeScratch,ViewType,Kokkos::L
     } else {
       Kokkos::parallel_for(Kokkos::TeamThreadRange(team,blockDim_i), [&] (const int i) {
 #ifndef KOKKOS_IMPL_BATCHED_GEMM_GCC_CXX14_WORKAROUND
-        const int idx_i = offset_i+i;
+        int idx_i = offset_i+i;
 #endif
         Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,blockDim_j), [&] (const int j) {
 #ifdef KOKKOS_IMPL_BATCHED_GEMM_GCC_CXX14_WORKAROUND
-          const int idx_i = offset_i+i;
+          int idx_i = offset_i+i;
 #endif
           const int idx_j = offset_j+j;
           A_scr(i,j) = idx_i<A.extent_int(1) && idx_j<A.extent_int(0) ? ATV::conj(A(idx_j,idx_i)) : ATV::zero();

diff --git a/src/common/KokkosKernels_Sorting.hpp b/src/common/KokkosKernels_Sorting.hpp
@@ -52,7 +52,7 @@ namespace KokkosKernels {
 namespace Impl {
 
 //Radix sort for integers, on a single thread within a team.
-//Pros: few diverging branches, so OK for sorting on a single GPU thread/warp. Better on CPU cores.
+//Pros: few diverging branches, so OK for sorting on a single GPU vector lane. Better on CPU cores.
 //Con: requires auxiliary storage, and this version only works for integers
 template<typename Ordinal, typename ValueType>
 KOKKOS_INLINE_FUNCTION void
@@ -166,7 +166,7 @@ SerialRadixSort(ValueType* values, ValueType* valuesAux, Ordinal n)
 
 //Radix sort for integers (no internal parallelism).
 //While sorting, also permute "perm" array along with the values.
-//Pros: few diverging branches, so good for sorting on a single GPU thread/warp.
+//Pros: few diverging branches, so good for sorting on a single GPU vector lane.
 //Con: requires auxiliary storage, this version only works for integers (although float/double is possible)
 template<typename Ordinal, typename ValueType, typename PermType>
 KOKKOS_INLINE_FUNCTION void
@@ -395,11 +395,11 @@ TeamBitonicSort2(ValueType* values, PermType* perm, Ordinal n, const TeamMember
             if(elem2 < n)
             {
               //both elements in bounds, so compare them and swap if out of order
-              if(comp(values[elem2], values[elem2]))
+              if(comp(values[elem2], values[elem1]))
               {
-                ValueType temp = values[elem1];
+                ValueType temp1 = values[elem1];
                 values[elem1] = values[elem2];
-                values[elem2] = temp;
+                values[elem2] = temp1;
                 PermType temp2 = perm[elem1];
                 perm[elem1] = perm[elem2];
                 perm[elem2] = temp2;
@@ -414,9 +414,9 @@ TeamBitonicSort2(ValueType* values, PermType* perm, Ordinal n, const TeamMember
             {
               if(comp(values[elem2], values[elem1]))
               {
-                ValueType temp = values[elem1];
+                ValueType temp1 = values[elem1];
                 values[elem1] = values[elem2];
-                values[elem2] = temp;
+                values[elem2] = temp1;
                 PermType temp2 = perm[elem1];
                 perm[elem1] = perm[elem2];
                 perm[elem2] = temp2;