Make sure to call ExecutionSpace::concurrency from an object

In anticipation of Kokkos turning ExecutionSpace::concurrency() into a non-static member function.
kokkos · Dec 1, 2022 · b322df9 · b322df9
1 parent 3dff8ed
commit b322df9
Show file tree

Hide file tree

Showing 17 changed files with 30 additions and 30 deletions.
diff --git a/common/src/KokkosKernels_Uniform_Initialized_MemoryPool.hpp b/common/src/KokkosKernels_Uniform_Initialized_MemoryPool.hpp
@@ -80,7 +80,7 @@ enum PoolType { OneThread2OneChunk, ManyThread2OneChunk };
  not
  *   enough memory. This case still would work for #chunks = #threads, with an
  extra atomic
- *   operation. On GPUs, even when #chunks = Kokkos::Cuda::concurrency(), this
+ *   operation. On GPUs, even when #chunks = Kokkos::Cuda().concurrency(), this
  option is safe
  *   to use.
  *
@@ -116,7 +116,7 @@ enum PoolType { OneThread2OneChunk, ManyThread2OneChunk };
  *      --- either requires the initializations to go into loop body, resulting
  in N work in each loop.
  *      --- Or, we can use preinitialized 2d views where the first dimension is
- ExecutionSpace::concurrency()
+ ExecutionSpace().concurrency()
  *          However, this case becomes a problem in CUDA, as concurrency is
  pretty high and we might not have
  *          enough memory for that.

diff --git a/example/graph/KokkosKernels_Example_Distance2GraphColor.cpp b/example/graph/KokkosKernels_Example_Distance2GraphColor.cpp
@@ -473,7 +473,7 @@ void run_example(CrsGraph_type crsGraph, DataType num_cols, Parameters params) {
             << "    Num Edges      : " << crsGraph.entries.extent(0)
             << std::endl
             << "    Concurrency    : "
-            << Kokkos::DefaultExecutionSpace::concurrency() << std::endl
+            << Kokkos::DefaultExecutionSpace().concurrency() << std::endl
             << "    Algorithm      : " << label_algorithm << std::endl
             << "Coloring Stats" << std::endl
             << "    Num colors     : " << num_colors << std::endl

diff --git a/example/hashmap_accumulator/KokkosKernels_Example_HashmapAccumulator.cpp b/example/hashmap_accumulator/KokkosKernels_Example_HashmapAccumulator.cpp
@@ -220,7 +220,7 @@ void experiment(const parameters_t& params) {
   size_t max_value = 100;
 
   // Get the concurrecny
-  size_t concurrency = execution_space::concurrency();
+  size_t concurrency = execution_space().concurrency();
 
   // Set up random number generator
   std::random_device rd;

diff --git a/graph/src/KokkosGraph_CoarsenConstruct.hpp b/graph/src/KokkosGraph_CoarsenConstruct.hpp
@@ -1050,7 +1050,7 @@ class coarse_builder {
         3 * max_entries;   // for hash nexts, keys, and values (unless scalar_t
                            // != ordinal_t, in which case memory is unused)
     mem_chunk_size += 10;  // for metadata
-    mem_chunk_count = exec_space::concurrency();
+    mem_chunk_count = exec_space().concurrency();
     if (mem_chunk_count > remaining_count) {
       mem_chunk_count = remaining_count + 1;
     }
@@ -1385,7 +1385,7 @@ class coarse_builder {
                                   wgt_by_source);
     if (is_host_space) {
       bool use_dyn =
-          should_use_dyn(n, g.graph.row_map, exec_space::concurrency());
+          should_use_dyn(n, g.graph.row_map, exec_space().concurrency());
       if (use_dyn) {
         Kokkos::parallel_for("move edges to coarse matrix", dyn_policy_t(0, n),
                              translateF);
@@ -1422,7 +1422,7 @@ class coarse_builder {
 
     if (is_host_space) {
       bool use_dyn =
-          should_use_dyn(nc, source_offsets, exec_space::concurrency());
+          should_use_dyn(nc, source_offsets, exec_space().concurrency());
       if (use_dyn) {
         Kokkos::parallel_for(
             "move deduped edges to new coarse matrix", dyn_policy_t(0, nc),

diff --git a/graph/src/KokkosGraph_Triangle.hpp b/graph/src/KokkosGraph_Triangle.hpp
@@ -266,7 +266,7 @@ void triangle_generic(KernelHandle *handle, typename KernelHandle::nnz_lno_t m,
         KokkosKernels::Impl::kk_sort_by_row_size<size_type, nnz_lno_t,
                                                  ExecutionSpace>(
             m, row_mapA.data(), new_indices.data(), sort_decreasing_order,
-            ExecutionSpace::concurrency());
+            ExecutionSpace().concurrency());
       }
       sh->set_lower_triangular_permutation(new_indices);
     }
@@ -299,7 +299,7 @@ void triangle_generic(KernelHandle *handle, typename KernelHandle::nnz_lno_t m,
           m, row_mapA, entriesA, null_values, lower_triangular_matrix_rowmap,
           lower_triangular_matrix_entries, null_values, new_indices,
           handle->is_dynamic_scheduling(),
-          handle->get_team_work_size(1, ExecutionSpace::concurrency(), m));
+          handle->get_team_work_size(1, ExecutionSpace().concurrency(), m));
 
       sh->set_lower_triangular_matrix(lower_triangular_matrix_rowmap,
                                       lower_triangular_matrix_entries);

diff --git a/perf_test/graph/KokkosGraph_color_d2.cpp b/perf_test/graph/KokkosGraph_color_d2.cpp
@@ -539,7 +539,7 @@ void run_experiment(crsGraph_t crsGraph, int num_cols,
               << "    Num Edges      : " << crsGraph.entries.extent(0)
               << std::endl
               << "    Concurrency    : "
-              << Kokkos::DefaultExecutionSpace::concurrency() << std::endl
+              << Kokkos::DefaultExecutionSpace().concurrency() << std::endl
               << "    Algorithm      : " << label_algorithm << std::endl
               << "Overall Time/Stats" << std::endl
               << "    Total Time     : " << total_time << std::endl
@@ -602,7 +602,7 @@ void run_experiment(crsGraph_t crsGraph, int num_cols,
               << currentDateTimeStr << "," << crsGraph.numRows() << ","
               << crsGraph.entries.extent(0) << ","
               << Kokkos::DefaultExecutionSpace::name() << "," << label_algorithm
-              << "," << Kokkos::DefaultExecutionSpace::concurrency() << ","
+              << "," << Kokkos::DefaultExecutionSpace().concurrency() << ","
               << repeat << "," << total_time << "," << total_time_matrix_squared
               << "," << total_time_matrix_squared_d1 << ","
               << total_time_color_greedy << "," << total_time_find_conflicts
@@ -636,7 +636,7 @@ void run_experiment(crsGraph_t crsGraph, int num_cols,
               << currentDateTimeStr << "," << crsGraph.numRows() << ","
               << crsGraph.entries.extent(0) << ","
               << Kokkos::DefaultExecutionSpace::name() << "," << label_algorithm
-              << "," << Kokkos::DefaultExecutionSpace::concurrency() << ",";
+              << "," << Kokkos::DefaultExecutionSpace().concurrency() << ",";
     KokkosGraph::Impl::graph_print_distance2_color_histogram(&kh, true);
     std::cout << std::endl;
   }

diff --git a/perf_test/sparse/KokkosSparse_spmv_test.hpp b/perf_test/sparse/KokkosSparse_spmv_test.hpp
@@ -192,7 +192,7 @@ void matvec(AType& A, XType x, YType y, Ordinal rows_per_thread, int team_size,
         printf("PTR: %p\n",
                static_cast<const void*>(A.graph.row_block_offsets.data()));
         A.graph.create_block_partitioning(
-            AType::execution_space::concurrency());
+            typename AType::execution_space().concurrency());
         printf("PTR2: %p\n",
                static_cast<const void*>(A.graph.row_block_offsets.data()));
       }

diff --git a/perf_test/sparse/spmv/Kokkos_SPMV.hpp b/perf_test/sparse/spmv/Kokkos_SPMV.hpp
@@ -151,7 +151,7 @@ int launch_parameters(int numRows, int nnz, int rows_per_thread, int& team_size,
 
   if (rows_per_team < 0) {
     int nnz_per_team = 4096;
-    int conc         = execution_space::concurrency();
+    int conc         = execution_space().concurrency();
     while ((conc * nnz_per_team * 4 > nnz) && (nnz_per_team > 256))
       nnz_per_team /= 2;
     int tmp_nnz_per_row = nnz / numRows;

diff --git a/perf_test/sparse/spmv/Kokkos_SPMV_Inspector.hpp b/perf_test/sparse/spmv/Kokkos_SPMV_Inspector.hpp
@@ -128,13 +128,13 @@ void kk_inspector_matvec(AType A, XType x, YType y, int team_size,
   typedef typename AType::non_const_ordinal_type lno_t;
   typedef typename AType::non_const_value_type scalar_t;
 
+  int const concurrency = execution_space().concurrency();
   static int worksets =
       std::is_same<Schedule, Kokkos::Static>::value
-          ? team_size > 0 ? execution_space::concurrency() / team_size
-                          : execution_space::concurrency()
+          ? team_size > 0 ? concurrency / team_size : concurrency
           :  // static
-          team_size > 0 ? execution_space::concurrency() * 32 / team_size
-                        : execution_space::concurrency() * 32;  // dynamic
+          team_size > 0 ? concurrency * 32 / team_size
+                        : concurrency * 32;  // dynamic
   static Kokkos::View<lno_t*, memory_space> workset_offsets;
   if (workset_offsets.extent(0) == 0) {
     workset_offsets     = Kokkos::View<lno_t*>("WorksetOffsets", worksets + 1);

diff --git a/sparse/impl/KokkosSparse_bspgemm_impl_kkmem.hpp b/sparse/impl/KokkosSparse_bspgemm_impl_kkmem.hpp
@@ -1501,7 +1501,7 @@ void KokkosBSPGEMM<HandleType, a_row_view_t_, a_lno_nnz_view_t_,
     std::cout << "\t\t max_nnz: " << max_nnz
               << " min_hash_size:" << min_hash_size
               << " concurrency:" << this->concurrency
-              << " MyExecSpace::concurrency():" << MyExecSpace::concurrency()
+              << " MyExecSpace().concurrency():" << MyExecSpace().concurrency()
               << " numchunks:" << num_chunks << std::endl;
   }
 

diff --git a/sparse/impl/KokkosSparse_cluster_gauss_seidel_impl.hpp b/sparse/impl/KokkosSparse_cluster_gauss_seidel_impl.hpp
@@ -781,7 +781,7 @@ class ClusterGaussSeidel {
     scalar_persistent_work_view_t inverse_diagonal(
         Kokkos::view_alloc(Kokkos::WithoutInitializing, "Aii^-1"), num_rows);
     nnz_lno_t rows_per_team = this->handle->get_team_work_size(
-        suggested_team_size, MyExecSpace::concurrency(), num_rows);
+        suggested_team_size, MyExecSpace().concurrency(), num_rows);
 
     if (have_diagonal_given) {
       Kokkos::deep_copy(inverse_diagonal, this->given_inverse_diagonal);
@@ -839,7 +839,7 @@ class ClusterGaussSeidel {
           this->handle->get_suggested_team_size(suggested_vector_size);
 
       nnz_lno_t rows_per_team = this->handle->get_team_work_size(
-          suggested_team_size, MyExecSpace::concurrency(), num_rows);
+          suggested_team_size, MyExecSpace().concurrency(), num_rows);
       // Get clusters per team. Round down to favor finer granularity, since
       // this is sensitive to load imbalance
       nnz_lno_t clusters_per_team =

diff --git a/sparse/impl/KokkosSparse_gauss_seidel_impl.hpp b/sparse/impl/KokkosSparse_gauss_seidel_impl.hpp
@@ -1111,7 +1111,7 @@ class PointGaussSeidel {
               brows, permuted_xadj, num_values_in_l1, num_large_rows);
           num_big_rows = KOKKOSKERNELS_MACRO_MIN(
               num_large_rows,
-              (size_type)(MyExecSpace::concurrency() / suggested_vector_size));
+              (size_type)(MyExecSpace().concurrency() / suggested_vector_size));
           // std::cout << "num_big_rows:" << num_big_rows << std::endl;
 
           if (KokkosKernels::Impl::kk_is_gpu_exec_space<MyExecSpace>()) {
@@ -1389,7 +1389,7 @@ class PointGaussSeidel {
       int suggested_team_size =
           this->handle->get_suggested_team_size(suggested_vector_size);
       nnz_lno_t rows_per_team = this->handle->get_team_work_size(
-          suggested_team_size, MyExecSpace::concurrency(), num_rows);
+          suggested_team_size, MyExecSpace().concurrency(), num_rows);
 
       nnz_lno_t block_size        = gsHandle->get_block_size();
       nnz_lno_t block_matrix_size = block_size * block_size;
@@ -1590,7 +1590,7 @@ class PointGaussSeidel {
     int suggested_team_size =
         this->handle->get_suggested_team_size(suggested_vector_size);
     nnz_lno_t team_row_chunk_size = this->handle->get_team_work_size(
-        suggested_team_size, MyExecSpace::concurrency(), brows);
+        suggested_team_size, MyExecSpace().concurrency(), brows);
 
     // size_t shmem_size_to_use = this->handle->get_shmem_size();
     size_t l1_shmem_size       = gsHandle->get_level_1_mem();

diff --git a/sparse/impl/KokkosSparse_spgemm_impl.hpp b/sparse/impl/KokkosSparse_spgemm_impl.hpp
@@ -625,7 +625,7 @@ class KokkosSPGEMM {
         valsB(),
         transposeB(transposeB_),
         shmem_size(handle_->get_shmem_size()),
-        concurrency(MyExecSpace::concurrency()),
+        concurrency(MyExecSpace().concurrency()),
         use_dynamic_schedule(handle_->is_dynamic_scheduling()),
         KOKKOSKERNELS_VERBOSE(handle_->get_verbose()),
         MyEnumExecSpace(this->handle->get_handle_exec_space()),
@@ -656,7 +656,7 @@ class KokkosSPGEMM {
         valsB(valsB_),
         transposeB(transposeB_),
         shmem_size(handle_->get_shmem_size()),
-        concurrency(MyExecSpace::concurrency()),
+        concurrency(MyExecSpace().concurrency()),
         use_dynamic_schedule(handle_->is_dynamic_scheduling()),
         KOKKOSKERNELS_VERBOSE(handle_->get_verbose()),
         MyEnumExecSpace(this->handle->get_handle_exec_space()),

diff --git a/sparse/impl/KokkosSparse_spgemm_impl_kkmem.hpp b/sparse/impl/KokkosSparse_spgemm_impl_kkmem.hpp
@@ -1589,7 +1589,7 @@ void KokkosSPGEMM<HandleType, a_row_view_t_, a_lno_nnz_view_t_,
     std::cout << "\t\t max_nnz: " << max_nnz << " chunk_size:" << chunksize
               << " min_hash_size:" << min_hash_size
               << " concurrency:" << concurrency
-              << " MyExecSpace::concurrency():" << MyExecSpace::concurrency()
+              << " MyExecSpace().concurrency():" << MyExecSpace().concurrency()
               << " numchunks:" << num_chunks << std::endl;
   }
 

diff --git a/sparse/impl/KokkosSparse_spgemm_jacobi_sparseacc_impl.hpp b/sparse/impl/KokkosSparse_spgemm_jacobi_sparseacc_impl.hpp
@@ -1562,7 +1562,7 @@ void KokkosSPGEMM<HandleType, a_row_view_t_, a_lno_nnz_view_t_,
     std::cout << "\t\t max_nnz: " << max_nnz << " chunk_size:" << chunksize
               << " min_hash_size:" << min_hash_size
               << " concurrency:" << concurrency
-              << " MyExecSpace::concurrency():" << MyExecSpace::concurrency()
+              << " MyExecSpace().concurrency():" << MyExecSpace().concurrency()
               << " numchunks:" << num_chunks << std::endl;
   }
 

diff --git a/sparse/impl/KokkosSparse_spmv_impl.hpp b/sparse/impl/KokkosSparse_spmv_impl.hpp
@@ -285,7 +285,7 @@ int64_t spmv_launch_parameters(int64_t numRows, int64_t nnz,
 
   if (rows_per_team < 0) {
     int64_t nnz_per_team = 4096;
-    int64_t conc         = execution_space::concurrency();
+    int64_t conc         = execution_space().concurrency();
     while ((conc * nnz_per_team * 4 > nnz) && (nnz_per_team > 256))
       nnz_per_team /= 2;
     rows_per_team = (nnz_per_team + nnz_per_row - 1) / nnz_per_row;

diff --git a/sparse/impl/KokkosSparse_spmv_struct_impl.hpp b/sparse/impl/KokkosSparse_spmv_struct_impl.hpp
@@ -810,7 +810,7 @@ int64_t spmv_struct_launch_parameters(int64_t numInterior, int64_t nnz,
 
   if (rows_per_team < 0) {
     int64_t nnz_per_team = 4096;
-    int64_t conc         = execution_space::concurrency();
+    int64_t conc         = execution_space().concurrency();
     while ((conc * nnz_per_team * 4 > nnz) && (nnz_per_team > 256))
       nnz_per_team /= 2;
     rows_per_team = (nnz_per_team + nnz_per_row - 1) / nnz_per_row;