diff --git a/common/src/KokkosKernels_Uniform_Initialized_MemoryPool.hpp b/common/src/KokkosKernels_Uniform_Initialized_MemoryPool.hpp index 93dc94f07e..1008a39ea3 100644 --- a/common/src/KokkosKernels_Uniform_Initialized_MemoryPool.hpp +++ b/common/src/KokkosKernels_Uniform_Initialized_MemoryPool.hpp @@ -80,7 +80,7 @@ enum PoolType { OneThread2OneChunk, ManyThread2OneChunk }; not * enough memory. This case still would work for #chunks = #threads, with an extra atomic - * operation. On GPUs, even when #chunks = Kokkos::Cuda::concurrency(), this + * operation. On GPUs, even when #chunks = Kokkos::Cuda().concurrency(), this option is safe * to use. * @@ -116,7 +116,7 @@ enum PoolType { OneThread2OneChunk, ManyThread2OneChunk }; * --- either requires the initializations to go into loop body, resulting in N work in each loop. * --- Or, we can use preinitialized 2d views where the first dimension is - ExecutionSpace::concurrency() + ExecutionSpace().concurrency() * However, this case becomes a problem in CUDA, as concurrency is pretty high and we might not have * enough memory for that. diff --git a/example/graph/KokkosKernels_Example_Distance2GraphColor.cpp b/example/graph/KokkosKernels_Example_Distance2GraphColor.cpp index e921ed06cd..2e855566d5 100644 --- a/example/graph/KokkosKernels_Example_Distance2GraphColor.cpp +++ b/example/graph/KokkosKernels_Example_Distance2GraphColor.cpp @@ -473,7 +473,7 @@ void run_example(CrsGraph_type crsGraph, DataType num_cols, Parameters params) { << " Num Edges : " << crsGraph.entries.extent(0) << std::endl << " Concurrency : " - << Kokkos::DefaultExecutionSpace::concurrency() << std::endl + << Kokkos::DefaultExecutionSpace().concurrency() << std::endl << " Algorithm : " << label_algorithm << std::endl << "Coloring Stats" << std::endl << " Num colors : " << num_colors << std::endl diff --git a/example/hashmap_accumulator/KokkosKernels_Example_HashmapAccumulator.cpp b/example/hashmap_accumulator/KokkosKernels_Example_HashmapAccumulator.cpp index aec112b584..be3adcf6c2 100644 --- a/example/hashmap_accumulator/KokkosKernels_Example_HashmapAccumulator.cpp +++ b/example/hashmap_accumulator/KokkosKernels_Example_HashmapAccumulator.cpp @@ -220,7 +220,7 @@ void experiment(const parameters_t& params) { size_t max_value = 100; // Get the concurrecny - size_t concurrency = execution_space::concurrency(); + size_t concurrency = execution_space().concurrency(); // Set up random number generator std::random_device rd; diff --git a/graph/src/KokkosGraph_CoarsenConstruct.hpp b/graph/src/KokkosGraph_CoarsenConstruct.hpp index ae40aec410..9e9b4049e6 100644 --- a/graph/src/KokkosGraph_CoarsenConstruct.hpp +++ b/graph/src/KokkosGraph_CoarsenConstruct.hpp @@ -1050,7 +1050,7 @@ class coarse_builder { 3 * max_entries; // for hash nexts, keys, and values (unless scalar_t // != ordinal_t, in which case memory is unused) mem_chunk_size += 10; // for metadata - mem_chunk_count = exec_space::concurrency(); + mem_chunk_count = exec_space().concurrency(); if (mem_chunk_count > remaining_count) { mem_chunk_count = remaining_count + 1; } @@ -1385,7 +1385,7 @@ class coarse_builder { wgt_by_source); if (is_host_space) { bool use_dyn = - should_use_dyn(n, g.graph.row_map, exec_space::concurrency()); + should_use_dyn(n, g.graph.row_map, exec_space().concurrency()); if (use_dyn) { Kokkos::parallel_for("move edges to coarse matrix", dyn_policy_t(0, n), translateF); @@ -1422,7 +1422,7 @@ class coarse_builder { if (is_host_space) { bool use_dyn = - should_use_dyn(nc, source_offsets, exec_space::concurrency()); + should_use_dyn(nc, source_offsets, exec_space().concurrency()); if (use_dyn) { Kokkos::parallel_for( "move deduped edges to new coarse matrix", dyn_policy_t(0, nc), diff --git a/graph/src/KokkosGraph_Triangle.hpp b/graph/src/KokkosGraph_Triangle.hpp index b9780a23c6..3432ad0711 100644 --- a/graph/src/KokkosGraph_Triangle.hpp +++ b/graph/src/KokkosGraph_Triangle.hpp @@ -266,7 +266,7 @@ void triangle_generic(KernelHandle *handle, typename KernelHandle::nnz_lno_t m, KokkosKernels::Impl::kk_sort_by_row_size( m, row_mapA.data(), new_indices.data(), sort_decreasing_order, - ExecutionSpace::concurrency()); + ExecutionSpace().concurrency()); } sh->set_lower_triangular_permutation(new_indices); } @@ -299,7 +299,7 @@ void triangle_generic(KernelHandle *handle, typename KernelHandle::nnz_lno_t m, m, row_mapA, entriesA, null_values, lower_triangular_matrix_rowmap, lower_triangular_matrix_entries, null_values, new_indices, handle->is_dynamic_scheduling(), - handle->get_team_work_size(1, ExecutionSpace::concurrency(), m)); + handle->get_team_work_size(1, ExecutionSpace().concurrency(), m)); sh->set_lower_triangular_matrix(lower_triangular_matrix_rowmap, lower_triangular_matrix_entries); diff --git a/perf_test/graph/KokkosGraph_color_d2.cpp b/perf_test/graph/KokkosGraph_color_d2.cpp index bc4add8b34..ea143ee609 100644 --- a/perf_test/graph/KokkosGraph_color_d2.cpp +++ b/perf_test/graph/KokkosGraph_color_d2.cpp @@ -539,7 +539,7 @@ void run_experiment(crsGraph_t crsGraph, int num_cols, << " Num Edges : " << crsGraph.entries.extent(0) << std::endl << " Concurrency : " - << Kokkos::DefaultExecutionSpace::concurrency() << std::endl + << Kokkos::DefaultExecutionSpace().concurrency() << std::endl << " Algorithm : " << label_algorithm << std::endl << "Overall Time/Stats" << std::endl << " Total Time : " << total_time << std::endl @@ -602,7 +602,7 @@ void run_experiment(crsGraph_t crsGraph, int num_cols, << currentDateTimeStr << "," << crsGraph.numRows() << "," << crsGraph.entries.extent(0) << "," << Kokkos::DefaultExecutionSpace::name() << "," << label_algorithm - << "," << Kokkos::DefaultExecutionSpace::concurrency() << "," + << "," << Kokkos::DefaultExecutionSpace().concurrency() << "," << repeat << "," << total_time << "," << total_time_matrix_squared << "," << total_time_matrix_squared_d1 << "," << total_time_color_greedy << "," << total_time_find_conflicts @@ -636,7 +636,7 @@ void run_experiment(crsGraph_t crsGraph, int num_cols, << currentDateTimeStr << "," << crsGraph.numRows() << "," << crsGraph.entries.extent(0) << "," << Kokkos::DefaultExecutionSpace::name() << "," << label_algorithm - << "," << Kokkos::DefaultExecutionSpace::concurrency() << ","; + << "," << Kokkos::DefaultExecutionSpace().concurrency() << ","; KokkosGraph::Impl::graph_print_distance2_color_histogram(&kh, true); std::cout << std::endl; } diff --git a/perf_test/sparse/KokkosSparse_spmv_test.hpp b/perf_test/sparse/KokkosSparse_spmv_test.hpp index b6ff552faf..31acb797d3 100644 --- a/perf_test/sparse/KokkosSparse_spmv_test.hpp +++ b/perf_test/sparse/KokkosSparse_spmv_test.hpp @@ -192,7 +192,7 @@ void matvec(AType& A, XType x, YType y, Ordinal rows_per_thread, int team_size, printf("PTR: %p\n", static_cast(A.graph.row_block_offsets.data())); A.graph.create_block_partitioning( - AType::execution_space::concurrency()); + typename AType::execution_space().concurrency()); printf("PTR2: %p\n", static_cast(A.graph.row_block_offsets.data())); } diff --git a/perf_test/sparse/spmv/Kokkos_SPMV.hpp b/perf_test/sparse/spmv/Kokkos_SPMV.hpp index cc98865c81..8eefd363c9 100644 --- a/perf_test/sparse/spmv/Kokkos_SPMV.hpp +++ b/perf_test/sparse/spmv/Kokkos_SPMV.hpp @@ -151,7 +151,7 @@ int launch_parameters(int numRows, int nnz, int rows_per_thread, int& team_size, if (rows_per_team < 0) { int nnz_per_team = 4096; - int conc = execution_space::concurrency(); + int conc = execution_space().concurrency(); while ((conc * nnz_per_team * 4 > nnz) && (nnz_per_team > 256)) nnz_per_team /= 2; int tmp_nnz_per_row = nnz / numRows; diff --git a/perf_test/sparse/spmv/Kokkos_SPMV_Inspector.hpp b/perf_test/sparse/spmv/Kokkos_SPMV_Inspector.hpp index 3a631fc743..8ff2e0c7e5 100644 --- a/perf_test/sparse/spmv/Kokkos_SPMV_Inspector.hpp +++ b/perf_test/sparse/spmv/Kokkos_SPMV_Inspector.hpp @@ -128,13 +128,13 @@ void kk_inspector_matvec(AType A, XType x, YType y, int team_size, typedef typename AType::non_const_ordinal_type lno_t; typedef typename AType::non_const_value_type scalar_t; + int const concurrency = execution_space().concurrency(); static int worksets = std::is_same::value - ? team_size > 0 ? execution_space::concurrency() / team_size - : execution_space::concurrency() + ? team_size > 0 ? concurrency / team_size : concurrency : // static - team_size > 0 ? execution_space::concurrency() * 32 / team_size - : execution_space::concurrency() * 32; // dynamic + team_size > 0 ? concurrency * 32 / team_size + : concurrency * 32; // dynamic static Kokkos::View workset_offsets; if (workset_offsets.extent(0) == 0) { workset_offsets = Kokkos::View("WorksetOffsets", worksets + 1); diff --git a/sparse/impl/KokkosSparse_bspgemm_impl_kkmem.hpp b/sparse/impl/KokkosSparse_bspgemm_impl_kkmem.hpp index aae9d83b5f..2e857d2dbe 100644 --- a/sparse/impl/KokkosSparse_bspgemm_impl_kkmem.hpp +++ b/sparse/impl/KokkosSparse_bspgemm_impl_kkmem.hpp @@ -1501,7 +1501,7 @@ void KokkosBSPGEMMconcurrency - << " MyExecSpace::concurrency():" << MyExecSpace::concurrency() + << " MyExecSpace().concurrency():" << MyExecSpace().concurrency() << " numchunks:" << num_chunks << std::endl; } diff --git a/sparse/impl/KokkosSparse_cluster_gauss_seidel_impl.hpp b/sparse/impl/KokkosSparse_cluster_gauss_seidel_impl.hpp index bb95eea101..3938b67ec6 100644 --- a/sparse/impl/KokkosSparse_cluster_gauss_seidel_impl.hpp +++ b/sparse/impl/KokkosSparse_cluster_gauss_seidel_impl.hpp @@ -781,7 +781,7 @@ class ClusterGaussSeidel { scalar_persistent_work_view_t inverse_diagonal( Kokkos::view_alloc(Kokkos::WithoutInitializing, "Aii^-1"), num_rows); nnz_lno_t rows_per_team = this->handle->get_team_work_size( - suggested_team_size, MyExecSpace::concurrency(), num_rows); + suggested_team_size, MyExecSpace().concurrency(), num_rows); if (have_diagonal_given) { Kokkos::deep_copy(inverse_diagonal, this->given_inverse_diagonal); @@ -839,7 +839,7 @@ class ClusterGaussSeidel { this->handle->get_suggested_team_size(suggested_vector_size); nnz_lno_t rows_per_team = this->handle->get_team_work_size( - suggested_team_size, MyExecSpace::concurrency(), num_rows); + suggested_team_size, MyExecSpace().concurrency(), num_rows); // Get clusters per team. Round down to favor finer granularity, since // this is sensitive to load imbalance nnz_lno_t clusters_per_team = diff --git a/sparse/impl/KokkosSparse_gauss_seidel_impl.hpp b/sparse/impl/KokkosSparse_gauss_seidel_impl.hpp index 313e398620..c67757a65f 100644 --- a/sparse/impl/KokkosSparse_gauss_seidel_impl.hpp +++ b/sparse/impl/KokkosSparse_gauss_seidel_impl.hpp @@ -1111,7 +1111,7 @@ class PointGaussSeidel { brows, permuted_xadj, num_values_in_l1, num_large_rows); num_big_rows = KOKKOSKERNELS_MACRO_MIN( num_large_rows, - (size_type)(MyExecSpace::concurrency() / suggested_vector_size)); + (size_type)(MyExecSpace().concurrency() / suggested_vector_size)); // std::cout << "num_big_rows:" << num_big_rows << std::endl; if (KokkosKernels::Impl::kk_is_gpu_exec_space()) { @@ -1389,7 +1389,7 @@ class PointGaussSeidel { int suggested_team_size = this->handle->get_suggested_team_size(suggested_vector_size); nnz_lno_t rows_per_team = this->handle->get_team_work_size( - suggested_team_size, MyExecSpace::concurrency(), num_rows); + suggested_team_size, MyExecSpace().concurrency(), num_rows); nnz_lno_t block_size = gsHandle->get_block_size(); nnz_lno_t block_matrix_size = block_size * block_size; @@ -1590,7 +1590,7 @@ class PointGaussSeidel { int suggested_team_size = this->handle->get_suggested_team_size(suggested_vector_size); nnz_lno_t team_row_chunk_size = this->handle->get_team_work_size( - suggested_team_size, MyExecSpace::concurrency(), brows); + suggested_team_size, MyExecSpace().concurrency(), brows); // size_t shmem_size_to_use = this->handle->get_shmem_size(); size_t l1_shmem_size = gsHandle->get_level_1_mem(); diff --git a/sparse/impl/KokkosSparse_spgemm_impl.hpp b/sparse/impl/KokkosSparse_spgemm_impl.hpp index dadc944b09..776579ded2 100644 --- a/sparse/impl/KokkosSparse_spgemm_impl.hpp +++ b/sparse/impl/KokkosSparse_spgemm_impl.hpp @@ -625,7 +625,7 @@ class KokkosSPGEMM { valsB(), transposeB(transposeB_), shmem_size(handle_->get_shmem_size()), - concurrency(MyExecSpace::concurrency()), + concurrency(MyExecSpace().concurrency()), use_dynamic_schedule(handle_->is_dynamic_scheduling()), KOKKOSKERNELS_VERBOSE(handle_->get_verbose()), MyEnumExecSpace(this->handle->get_handle_exec_space()), @@ -656,7 +656,7 @@ class KokkosSPGEMM { valsB(valsB_), transposeB(transposeB_), shmem_size(handle_->get_shmem_size()), - concurrency(MyExecSpace::concurrency()), + concurrency(MyExecSpace().concurrency()), use_dynamic_schedule(handle_->is_dynamic_scheduling()), KOKKOSKERNELS_VERBOSE(handle_->get_verbose()), MyEnumExecSpace(this->handle->get_handle_exec_space()), diff --git a/sparse/impl/KokkosSparse_spgemm_impl_kkmem.hpp b/sparse/impl/KokkosSparse_spgemm_impl_kkmem.hpp index 94cec7af04..50d3681eed 100644 --- a/sparse/impl/KokkosSparse_spgemm_impl_kkmem.hpp +++ b/sparse/impl/KokkosSparse_spgemm_impl_kkmem.hpp @@ -1589,7 +1589,7 @@ void KokkosSPGEMM nnz) && (nnz_per_team > 256)) nnz_per_team /= 2; rows_per_team = (nnz_per_team + nnz_per_row - 1) / nnz_per_row; diff --git a/sparse/impl/KokkosSparse_spmv_struct_impl.hpp b/sparse/impl/KokkosSparse_spmv_struct_impl.hpp index 71387b5d06..6949a86866 100644 --- a/sparse/impl/KokkosSparse_spmv_struct_impl.hpp +++ b/sparse/impl/KokkosSparse_spmv_struct_impl.hpp @@ -810,7 +810,7 @@ int64_t spmv_struct_launch_parameters(int64_t numInterior, int64_t nnz, if (rows_per_team < 0) { int64_t nnz_per_team = 4096; - int64_t conc = execution_space::concurrency(); + int64_t conc = execution_space().concurrency(); while ((conc * nnz_per_team * 4 > nnz) && (nnz_per_team > 256)) nnz_per_team /= 2; rows_per_team = (nnz_per_team + nnz_per_row - 1) / nnz_per_row;