From f8d5d42904e310d7a8cd8829749976634ae80620 Mon Sep 17 00:00:00 2001 From: Brian Kelley Date: Wed, 18 Dec 2019 10:20:06 -0700 Subject: [PATCH 01/14] WIP: sparse perf-test cleanup and ETI fixes - Cleaning up duplicated MatrixMarket code from perf_test/spmv that exists in IOUtils (#493) - Changing the scalar/lno_t/size_type/layout to tolerate any ETI combination (previously, only double/int/int/left was really supported) --- perf_test/sparse/KokkosSparse_gs.cpp | 31 +-- ...MV.hpp => KokkosSparse_perftest_types.hpp} | 67 +++-- perf_test/sparse/KokkosSparse_spgemm.cpp | 44 +-- perf_test/sparse/KokkosSparse_spiluk.cpp | 27 +- perf_test/sparse/KokkosSparse_spmv.cpp | 70 +++-- perf_test/sparse/KokkosSparse_spmv_struct.cpp | 21 +- perf_test/sparse/KokkosSparse_sptrsv.cpp | 25 +- perf_test/sparse/spmv/CuSparse_SPMV.hpp | 67 ++++- perf_test/sparse/spmv/Kokkos_SPMV.hpp | 16 +- .../sparse/spmv/Kokkos_SPMV_Inspector.hpp | 36 +-- perf_test/sparse/spmv/MKL_SPMV.hpp | 65 +++-- perf_test/sparse/spmv/OpenMPDynamic_SPMV.hpp | 57 ++-- .../sparse/spmv/OpenMPSmartStatic_SPMV.hpp | 176 ++++++------ perf_test/sparse/spmv/OpenMPStatic_SPMV.hpp | 57 ++-- perf_test/sparse/spmv/matrix_market.hpp | 252 ++++-------------- 15 files changed, 418 insertions(+), 593 deletions(-) rename perf_test/sparse/{spmv/KokkosKernels_SPMV.hpp => KokkosSparse_perftest_types.hpp} (54%) diff --git a/perf_test/sparse/KokkosSparse_gs.cpp b/perf_test/sparse/KokkosSparse_gs.cpp index 07e46e2de8..0e0887efcf 100644 --- a/perf_test/sparse/KokkosSparse_gs.cpp +++ b/perf_test/sparse/KokkosSparse_gs.cpp @@ -50,6 +50,7 @@ #include #include #include +#include "KokkosSparse_perftest_types.hpp" #include #include #include @@ -57,26 +58,12 @@ using std::cout; using std::string; -#if defined(KOKKOSKERNELS_INST_ORDINAL_INT) - typedef int default_lno_t; -#elif defined(KOKKOSKERNELS_INST_ORDINAL_INT64_T) - typedef int64_t default_lno_t; -#else - #error "Expect int and/or int64_t to be enabled as ORDINAL (lno_t) types" -#endif - //Prefer int as the default offset type, because cuSPARSE doesn't support size_t for rowptrs. -#if defined(KOKKOSKERNELS_INST_OFFSET_INT) - typedef int default_size_type; -#elif defined(KOKKOSKERNELS_INST_OFFSET_SIZE_T) - typedef size_t default_size_type; -#else - #error "Expect size_t and/or int to be enabled as OFFSET (size_type) types" -#endif - -template +template void runGS(string matrixPath, string devName, bool symmetric) { - typedef double scalar_t; + typedef default_scalar scalar_t; + typedef default_lno_t lno_t; + typedef default_size_type size_type; typedef typename device_t::execution_space exec_space; typedef typename device_t::memory_space mem_space; typedef KokkosKernels::Experimental::KokkosKernelsHandle KernelHandle; @@ -239,28 +226,28 @@ int main(int argc, char** argv) #ifdef KOKKOS_ENABLE_SERIAL if(device == "serial") { - runGS(matrixPath, device, sym); + runGS(matrixPath, device, sym); run = true; } #endif #ifdef KOKKOS_ENABLE_OPENMP if(device == "openmp") { - runGS(matrixPath, device, sym); + runGS(matrixPath, device, sym); run = true; } #endif #ifdef KOKKOS_ENABLE_THREADS if(device == "threads") { - runGS(matrixPath, device, sym); + runGS(matrixPath, device, sym); run = true; } #endif #ifdef KOKKOS_ENABLE_CUDA if(device == "cuda") { - runGS(matrixPath, device, sym); + runGS(matrixPath, device, sym); run = true; } #endif diff --git a/perf_test/sparse/spmv/KokkosKernels_SPMV.hpp b/perf_test/sparse/KokkosSparse_perftest_types.hpp similarity index 54% rename from perf_test/sparse/spmv/KokkosKernels_SPMV.hpp rename to perf_test/sparse/KokkosSparse_perftest_types.hpp index d2e9cbb407..e5c59c47e0 100644 --- a/perf_test/sparse/spmv/KokkosKernels_SPMV.hpp +++ b/perf_test/sparse/KokkosSparse_perftest_types.hpp @@ -41,48 +41,43 @@ //@HEADER */ -#ifndef KOKKOSKERNELS_SPMV_HPP_ -#define KOKKOSKERNELS_SPMV_HPP_ +#ifndef KOKKOSSPARSE_PERFTEST_TYPES_H +#define KOKKOSSPARSE_PERFTEST_TYPES_H -#ifdef MAKE_BUILD -#ifdef KOKKOS_ENABLE_CUDA - #define KOKKOSKERNELS_ETI_MANGLING_TYPEDEFS() \ - typedef Kokkos::Device Kokkos_Device0Kokkos_Cuda_Kokkos_CudaSpace0; \ - typedef Kokkos::complex Kokkos_complex0double0; \ - typedef long long longlong; +#include +#include //for all the ETI #cmakedefine macros + +#if defined(KOKKOSKERNELS_INST_ORDINAL_INT) + typedef int default_lno_t; +#elif defined(KOKKOSKERNELS_INST_ORDINAL_INT64_T) + typedef int64_t default_lno_t; #else - #ifdef KOKKOS_ENABLE_OPENMP - #define KOKKOSKERNELS_ETI_MANGLING_TYPEDEFS() \ - typedef Kokkos::Device Kokkos_Device0Kokkos_OpenMP_Kokkos_HostSpace0; \ - typedef Kokkos::complex Kokkos_complex0double0; \ - typedef long long longlong; - #else - #ifdef KOKKOS_ENABLE_THREADS - #define KOKKOSKERNELS_ETI_MANGLING_TYPEDEFS() \ - typedef Kokkos::Device Kokkos_Device0Kokkos_Threads_Kokkos_HostSpace0; \ - typedef Kokkos::complex Kokkos_complex0double0; \ - typedef long long longlong; - #else - #define KOKKOSKERNELS_ETI_MANGLING_TYPEDEFS() \ - typedef Kokkos::Device Kokkos_Device0Kokkos_OpenMP_Kokkos_HostSpace0; \ - typedef Kokkos::complex Kokkos_complex0double0; \ - typedef long long longlong; - #endif - #endif + #error "Expect INT and/or INT64_T to be enabled as ORDINAL (lno_t) types" #endif - + //Prefer int as the default offset type, because cuSPARSE doesn't support size_t for rowptrs. +#if defined(KOKKOSKERNELS_INST_OFFSET_INT) + typedef int default_size_type; +#elif defined(KOKKOSKERNELS_INST_OFFSET_SIZE_T) + typedef size_t default_size_type; +#else + #error "Expect SIZE_T and/or INT to be enabled as OFFSET (size_type) types" #endif -#include -#include - -#ifdef HAVE_KK_KERNELS +#if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) + typedef Kokkos::LayoutLeft default_layout; +#elif defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) + typedef Kokkos::LayoutRight default_layout; +#else + #error "Expect LAYOUTLEFT and/or LAYOUTRIGHT to be enabled as layout types" +#endif +#if defined(KOKKOSKERNELS_INST_DOUBLE) + typedef double default_scalar; +#elif defined(KOKKOSKERNELS_INST_FLOAT) + typedef float default_scalar; +#else + #error "Expect at least one real-valued scalar type (double or float) to be enabled" +#endif -template -void kokkoskernels_matvec(AType A, XType x, YType y, int rows_per_thread, int team_size, int vector_length) { - KokkosSparse::spmv (KokkosSparse::NoTranspose,1.0,A,x,0.0,y); -} #endif -#endif /* KOKKOSKERNELS_SPMV_HPP_ */ diff --git a/perf_test/sparse/KokkosSparse_spgemm.cpp b/perf_test/sparse/KokkosSparse_spgemm.cpp index d51674fa3d..fc5f6f499b 100644 --- a/perf_test/sparse/KokkosSparse_spgemm.cpp +++ b/perf_test/sparse/KokkosSparse_spgemm.cpp @@ -42,17 +42,13 @@ */ #include #include "KokkosKernels_config.h" -#if defined(KOKKOSKERNELS_INST_DOUBLE) && \ - defined(KOKKOSKERNELS_INST_OFFSET_INT) && \ - defined(KOKKOSKERNELS_INST_ORDINAL_INT) +#include "KokkosSparse_perftest_types.hpp" #include "KokkosKernels_IOUtils.hpp" #include "KokkosSparse_multimem_spgemm.hpp" - -#define SIZE_TYPE int -#define INDEX_TYPE int -#define SCALAR_TYPE double -//double +typedef default_size_type size_type; +typedef default_lno_t lno_t; +typedef default_scalar scalar_t; void print_options(){ std::cerr << "Options\n" << std::endl; @@ -303,12 +299,12 @@ int main (int argc, char ** argv){ if (params.use_openmp) { #ifdef KOKKOSKERNELS_INST_MEMSPACE_HBWSPACE KokkosKernels::Experiment::run_multi_mem_spgemm - ( + ( params ); #else KokkosKernels::Experiment::run_multi_mem_spgemm - ( + ( params ); #endif @@ -319,18 +315,17 @@ int main (int argc, char ** argv){ if (params.use_cuda) { #ifdef KOKKOSKERNELS_INST_MEMSPACE_CUDAHOSTPINNEDSPACE KokkosKernels::Experiment::run_multi_mem_spgemm - ( + ( params ); #else KokkosKernels::Experiment::run_multi_mem_spgemm - ( + ( params ); #endif } -#else #endif Kokkos::finalize(); @@ -338,26 +333,3 @@ int main (int argc, char ** argv){ return 0; } - -#else -int main() { -#if !defined(KOKKOSKERNELS_INST_DOUBLE) -std::cout << " not defined KOKKOSKERNELS_INST_DOUBLE" << std::endl; -#endif - -#if !defined(KOKKOSKERNELS_INST_OFFSET_INT) -std::cout << " not defined KOKKOSKERNELS_INST_OFFSET_INT" << std::endl; - -#endif - -#if !defined(KOKKOSKERNELS_INST_ORDINAL_INT) -std::cout << " not defined KOKKOSKERNELS_INST_ORDINAL_INT" << std::endl; - -#endif -} -#endif - - - - - diff --git a/perf_test/sparse/KokkosSparse_spiluk.cpp b/perf_test/sparse/KokkosSparse_spiluk.cpp index 5573d10cfb..c94394b444 100644 --- a/perf_test/sparse/KokkosSparse_spiluk.cpp +++ b/perf_test/sparse/KokkosSparse_spiluk.cpp @@ -63,24 +63,9 @@ #include "KokkosSparse_spmv.hpp" #include "KokkosBlas1_nrm2.hpp" #include "KokkosSparse_CrsMatrix.hpp" +#include "KokkosSparse_perftest_types.hpp" #include -#if defined(KOKKOSKERNELS_INST_ORDINAL_INT) - typedef int default_lno_t; -#elif defined(KOKKOSKERNELS_INST_ORDINAL_INT64_T) - typedef int64_t default_lno_t; -#else - #error "Expect int and/or int64_t to be enabled as ORDINAL (lno_t) types" -#endif - //Prefer int as the default offset type, because cuSPARSE doesn't support size_t for rowptrs. -#if defined(KOKKOSKERNELS_INST_OFFSET_INT) - typedef int default_size_type; -#elif defined(KOKKOSKERNELS_INST_OFFSET_SIZE_T) - typedef size_t default_size_type; -#else - #error "Expect size_t and/or int to be enabled as OFFSET (size_type) types" -#endif - #if defined( KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA ) && (!defined(KOKKOS_ENABLE_CUDA) || ( 8000 <= CUDA_VERSION )) using namespace KokkosSparse; using namespace KokkosSparse::Experimental; @@ -89,10 +74,8 @@ using namespace KokkosKernels::Experimental; enum {DEFAULT, CUSPARSE, LVLSCHED_RP, LVLSCHED_TP1/*, LVLSCHED_TP2*/}; -template -int test_spiluk_perf(std::vector tests, std::string afilename, int K, int team_size, int vector_length, /*int idx_offset,*/ int loop) { - - typedef Scalar scalar_t; +int test_spiluk_perf(std::vector tests, std::string afilename, int k, int team_size, int vector_length, /*int idx_offset,*/ int loop) { + typedef default_scalar scalar_t; typedef default_lno_t lno_t; typedef default_size_type size_type; typedef Kokkos::DefaultExecutionSpace execution_space; @@ -314,7 +297,7 @@ int test_spiluk_perf(std::vector tests, std::string afilename, int K, int t return 1; } else { - Kokkos::View h_tmp_entries ( "h_tmp_entries", a_row_end-a_row_start); + Kokkos::View h_tmp_entries ( "h_tmp_entries", a_row_end-a_row_start); Kokkos::View h_tmp_values ( "h_tmp_values", a_row_end-a_row_start); Kokkos::deep_copy(subview(h_tmp_entries, Kokkos::make_pair(0,l_row_end-l_row_start)), @@ -505,7 +488,7 @@ int main(int argc, char **argv) Kokkos::initialize(argc,argv); { - int total_errors = test_spiluk_perf(tests, afilename, k, team_size, vector_length, /*idx_offset,*/ loop); + int total_errors = test_spiluk_perf(tests, afilename, k, team_size, vector_length, /*idx_offset,*/ loop); if(total_errors == 0) printf("Kokkos::SPILUK Test: Passed\n"); diff --git a/perf_test/sparse/KokkosSparse_spmv.cpp b/perf_test/sparse/KokkosSparse_spmv.cpp index 81dc0acd6e..b16e9ea307 100644 --- a/perf_test/sparse/KokkosSparse_spmv.cpp +++ b/perf_test/sparse/KokkosSparse_spmv.cpp @@ -52,17 +52,15 @@ #include #ifdef HAVE_CUSPARSE -#include +#include #endif #include #include -#include -#include -#include -#include +#ifdef HAVE_MKL #include +#endif #ifdef _OPENMP #include @@ -70,22 +68,22 @@ #include #endif +#include "KokkosSparse_perftest_types.hpp" + enum {KOKKOS, MKL, CUSPARSE, KK_KERNELS, KK_KERNELS_INSP, KK_INSP, OMP_STATIC, OMP_DYNAMIC, OMP_INSP}; enum {AUTO, DYNAMIC, STATIC}; -#ifdef INT64 -typedef long long int LocalOrdinalType; -#else -typedef int LocalOrdinalType; -#endif - +typedef default_scalar scalar_t; +typedef default_lno_t lno_t; +typedef default_size_type size_type; +typedef default_layout layout_t; -template< typename ScalarType , typename OrdinalType> -int SparseMatrix_generate(OrdinalType nrows, OrdinalType ncols, OrdinalType &nnz, OrdinalType varianz_nel_row, OrdinalType width_row, ScalarType* &values, OrdinalType* &rowPtr, OrdinalType* &colInd) +template< typename Scalar, typename Offset, typename Ordinal> +int SparseMatrix_generate(Ordinal nrows, Ordinal ncols, Ordinal &nnz, Ordinal varianz_nel_row, Ordinal width_row, Scalar* &values, Offset* &rowPtr, Ordinal* &colInd) { - rowPtr = new OrdinalType[nrows+1]; + rowPtr = new Offset[nrows+1]; - OrdinalType elements_per_row = nnz/nrows; + Ordinal elements_per_row = nnz/nrows; srand(13721); rowPtr[0] = 0; for(int row=0;row(A, x, y, rows_per_thread, team_size, vector_length); break; -#ifdef _OPENMP +#ifdef KOKKOS_ENABLE_OPENMP case OMP_STATIC: - openmp_static_matvec(A, x, y, rows_per_thread, team_size, vector_length); + openmp_static_matvec(A, x, y); break; case OMP_DYNAMIC: - openmp_dynamic_matvec(A, x, y, rows_per_thread, team_size, vector_length); + openmp_dynamic_matvec(A, x, y); break; case OMP_INSP: - openmp_smart_static_matvec(A, x, y, rows_per_thread, team_size, vector_length); + openmp_smart_static_matvec(A, x, y); break; #endif #ifdef HAVE_MKL case MKL: - mkl_matvec(A, x, y, rows_per_thread, team_size, vector_length); + mkl_matvec(A, x, y); break; #endif #ifdef HAVE_CUSPARSE case CUSPARSE: - cusparse_matvec(A, x, y, rows_per_thread, team_size, vector_length); + cusparse_matvec(A, x, y); break; #endif -#ifdef HAVE_KK_KERNELS case KK_KERNELS: - kokkoskernels_matvec(A, x, y, rows_per_thread, team_size, vector_length); + KokkosSparse::spmv (KokkosSparse::NoTranspose,1.0,A,x,0.0,y); break; - case KK_KERNELS_INSP: - if(A.graph.row_block_offsets.data()==NULL) { - printf("PTR: %p\n",A.graph.row_block_offsets.data()); - A.graph.create_block_partitioning(AType::execution_space::concurrency()); - printf("PTR2: %p\n",A.graph.row_block_offsets.data()); - } - kokkoskernels_matvec(A, x, y, rows_per_thread, team_size, vector_length); - break; -#endif + case KK_KERNELS_INSP: + if(A.graph.row_block_offsets.data()==NULL) { + printf("PTR: %p\n",A.graph.row_block_offsets.data()); + A.graph.create_block_partitioning(AType::execution_space::concurrency()); + printf("PTR2: %p\n",A.graph.row_block_offsets.data()); + } + kokkoskernels_matvec(A, x, y, rows_per_thread, team_size, vector_length); + break; default: - fprintf(stderr, "Selected test is not available.\n"); - - } + fprintf(stderr, "Selected test is not available.\n"); + } } template @@ -407,3 +402,4 @@ int main(int argc, char **argv) Kokkos::finalize(); } + diff --git a/perf_test/sparse/KokkosSparse_spmv_struct.cpp b/perf_test/sparse/KokkosSparse_spmv_struct.cpp index 4fe5df2389..29aced0be2 100644 --- a/perf_test/sparse/KokkosSparse_spmv_struct.cpp +++ b/perf_test/sparse/KokkosSparse_spmv_struct.cpp @@ -56,25 +56,14 @@ #include #include #include +#include "KokkosSparse_perftest_types.hpp" enum {STRUCT, UNSTR}; enum {AUTO, DYNAMIC, STATIC}; -#if defined(KOKKOSKERNELS_INST_ORDINAL_INT) - typedef int default_lno_t; -#elif defined(KOKKOSKERNELS_INST_ORDINAL_INT64_T) - typedef int64_t default_lno_t; -#else - #error "Expect int and/or int64_t to be enabled as ORDINAL (lno_t) types" -#endif - //Prefer int as the default offset type, because cuSPARSE doesn't support size_t for rowptrs. -#if defined(KOKKOSKERNELS_INST_OFFSET_INT) - typedef int default_size_type; -#elif defined(KOKKOSKERNELS_INST_OFFSET_SIZE_T) - typedef size_t default_size_type; -#else - #error "Expect size_t and/or int to be enabled as OFFSET (size_type) types" -#endif +typedef default_scalar Scalar; +typedef default_lno_t lno_t; +typedef default_size_type size_type; void print_help() { printf("SPMV_struct benchmark code written by Luc Berger-Vergiat.\n"); @@ -94,8 +83,6 @@ void print_help() { int main(int argc, char **argv) { - typedef double Scalar; - int nx = 100; int ny = 100; int nz = 100; diff --git a/perf_test/sparse/KokkosSparse_sptrsv.cpp b/perf_test/sparse/KokkosSparse_sptrsv.cpp index bae058d175..b7051711d6 100644 --- a/perf_test/sparse/KokkosSparse_sptrsv.cpp +++ b/perf_test/sparse/KokkosSparse_sptrsv.cpp @@ -62,26 +62,11 @@ #include "KokkosSparse_sptrsv.hpp" #include "KokkosSparse_spmv.hpp" #include "KokkosSparse_CrsMatrix.hpp" +#include "KokkosSparse_perftest_types.hpp" #include //#define INTERNAL_CUSPARSE -#if defined(KOKKOSKERNELS_INST_ORDINAL_INT) - typedef int default_lno_t; -#elif defined(KOKKOSKERNELS_INST_ORDINAL_INT64_T) - typedef int64_t default_lno_t; -#else - #error "Expect int and/or int64_t to be enabled as ORDINAL (lno_t) types" -#endif - //Prefer int as the default offset type, because cuSPARSE doesn't support size_t for rowptrs. -#if defined(KOKKOSKERNELS_INST_OFFSET_INT) - typedef int default_size_type; -#elif defined(KOKKOSKERNELS_INST_OFFSET_SIZE_T) - typedef size_t default_size_type; -#else - #error "Expect size_t and/or int to be enabled as OFFSET (size_type) types" -#endif - #if defined( KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA ) && (!defined(KOKKOS_ENABLE_CUDA) || ( 8000 <= CUDA_VERSION )) using namespace KokkosSparse; using namespace KokkosSparse::Experimental; @@ -131,10 +116,8 @@ void check_entries_sorted(const RowMapType drow_map, const EntriesType dentries) } -template int test_sptrsv_perf(std::vector tests, const std::string& lfilename, const std::string& ufilename, const int team_size, const int vector_length, const int idx_offset, const int loop, const int chain_threshold = 0, const float dense_row_percent = -1.0) { - - typedef Scalar scalar_t; + typedef default_scalar scalar_t; typedef default_lno_t lno_t; typedef default_size_type size_type; typedef Kokkos::DefaultExecutionSpace execution_space; @@ -1013,7 +996,11 @@ int main(int argc, char **argv) Kokkos::initialize(argc,argv); { +<<<<<<< 6d0becda14a6248e88026e8eedaa4a18e7396ee4 int total_errors = test_sptrsv_perf(tests, lfilename, ufilename, team_size, vector_length, idx_offset, loop, chain_threshold, dense_row_percent); +======= + int total_errors = test_sptrsv_perf(tests,lfilename,ufilename,team_size,vector_length,idx_offset,loop); +>>>>>>> WIP: sparse perf-test cleanup and ETI fixes if(total_errors == 0) printf("Kokkos::SPTRSV Test: Passed\n"); diff --git a/perf_test/sparse/spmv/CuSparse_SPMV.hpp b/perf_test/sparse/spmv/CuSparse_SPMV.hpp index 42e00295d0..d5d01065d9 100644 --- a/perf_test/sparse/spmv/CuSparse_SPMV.hpp +++ b/perf_test/sparse/spmv/CuSparse_SPMV.hpp @@ -47,24 +47,65 @@ #ifdef HAVE_CUSPARSE #include -template -void cusparse_matvec(AType A, XType x, YType y, int rows_per_thread, int team_size, int vector_length) { +template +void cusparse_matvec_wrapper( + cusparseHandle_t& handle, cusparseMatDescr_t& desc, + int numRows, int numCols, int nnz, + Scalar* values, int* rowptrs, int* entries, + Scalar* x, Scalar* y) +{ + throw std::runtime_error("Can't use cuSPARSE mat-vec for scalar types other than double and float."); +} +template<> +void cusparse_matvec_wrapper( + cusparseHandle_t& handle, cusparseMatDescr_t& descr, + int numRows, int numCols, int nnz, + double* values, int* rowptrs, int* entries, + double* x, double* y) +{ double s_a = 1.0; double s_b = 0.0; + cusparseDcsrmv(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, + numRows, numCols, nnz, + &s_a, + descr, + values, rowptrs, entries, + x, + &s_b, + y); +} - cusparseDcsrmv (A.cusparse_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, - A.numRows(), A.numCols(), A.nnz(), - &s_a, - A.cusparse_descr, - A.values.data(), - (const int*) A.graph.row_map.data(), - A.graph.entries.data(), - x.data(), - &s_b, - y.data()); +template<> +void cusparse_matvec_wrapper( + cusparseHandle_t& handle, cusparseMatDescr_t& descr, + int numRows, int numCols, int nnz, + float* values, int* rowptrs, int* entries, + float* x, double* y) +{ + float s_a = 1.0f; + float s_b = 0.0f; + cusparseScsrmv(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, + numRows, numCols, nnz, + &s_a, + descr, + values, rowptrs, entries, + x, + &s_b, + y); } -#endif +template +void cusparse_matvec(AType A, XType x, YType y) { + typedef AType::non_const_value_type Scalar; + //Run cuSPARSE spmv corresponding to scalar type + cusparse_matvec_wrapper( + A.cusparse_handle, A.cusparse_descr, + A.numRows(), A.numCols(), A.nnz(), + A.values.data(), A.graph.row_map.data(), A.graph.entries.data(), + x.data(), y.data()); +} + +#endif #endif /* CUSPARSE_SPMV_HPP_ */ diff --git a/perf_test/sparse/spmv/Kokkos_SPMV.hpp b/perf_test/sparse/spmv/Kokkos_SPMV.hpp index aea860b1c2..a3d050ea31 100644 --- a/perf_test/sparse/spmv/Kokkos_SPMV.hpp +++ b/perf_test/sparse/spmv/Kokkos_SPMV.hpp @@ -44,8 +44,6 @@ #ifndef KOKKOS_SPMV_HPP_ #define KOKKOS_SPMV_HPP_ - - template -void kk_matvec(AType A, XType x, YType y, int rows_per_thread, int team_size, int vector_length) { - - typedef typename XType::non_const_value_type Scalar; +void kokkos_matvec(AType A, XType x, YType y, int rows_per_thread, int team_size, int vector_length) { typedef typename AType::execution_space execution_space; - typedef KokkosSparse::CrsMatrix matrix_type ; - typedef typename Kokkos::View y_type; - typedef typename Kokkos::View x_type; + typedef typename AType::non_const_size_type size_type; + typedef typename AType::non_const_ordinal_type lno_t; + typedef typename AType::non_const_scalar_type scalar_t; + typedef KokkosSparse::CrsMatrix matrix_type ; int rows_per_team = launch_parameters(A.numRows(),A.nnz(),rows_per_thread,team_size,vector_length); double s_a = 1.0; double s_b = 0.0; - SPMV_Functor func (s_a,A,x,s_b,y,rows_per_team); + SPMV_Functor func (s_a,A,x,s_b,y,rows_per_team); int worksets = (y.extent(0)+rows_per_team-1)/rows_per_team; @@ -191,3 +188,4 @@ void kk_matvec(AType A, XType x, YType y, int rows_per_thread, int team_size, in #endif /* KOKKOS_SPMV_HPP_ */ + diff --git a/perf_test/sparse/spmv/Kokkos_SPMV_Inspector.hpp b/perf_test/sparse/spmv/Kokkos_SPMV_Inspector.hpp index f3131825a7..bd9f5c8818 100644 --- a/perf_test/sparse/spmv/Kokkos_SPMV_Inspector.hpp +++ b/perf_test/sparse/spmv/Kokkos_SPMV_Inspector.hpp @@ -51,13 +51,12 @@ template + bool conjugate> struct SPMV_Inspector_Functor { typedef typename AMatrix::execution_space execution_space; typedef typename AMatrix::non_const_ordinal_type ordinal_type; typedef typename AMatrix::non_const_value_type value_type; - typedef SizeType size_type; + typedef typename AMatrix::non_const_size_type size_type; typedef typename Kokkos::TeamPolicy team_policy; typedef typename team_policy::member_type team_member; typedef Kokkos::Details::ArithTraits ATV; @@ -65,7 +64,7 @@ struct SPMV_Inspector_Functor { const value_type alpha; AMatrix m_A; XVector m_x; - Kokkos::View m_workset_offsets; + Kokkos::View m_workset_offsets; const value_type beta; YVector m_y; @@ -73,7 +72,7 @@ struct SPMV_Inspector_Functor { SPMV_Inspector_Functor (const value_type alpha_, const AMatrix m_A_, const XVector m_x_, - const Kokkos::View m_workset_offsets_, + const Kokkos::View m_workset_offsets_, const value_type beta_, const YVector m_y_) : alpha (alpha_), m_A (m_A_), m_x (m_x_), @@ -123,25 +122,25 @@ struct SPMV_Inspector_Functor { template void kk_inspector_matvec(AType A, XType x, YType y, int rows_per_thread, int team_size, int vector_length) { - typedef typename XType::non_const_value_type Scalar; typedef typename AType::execution_space execution_space; - typedef KokkosSparse::CrsMatrix matrix_type ; - typedef typename Kokkos::View y_type; - typedef typename Kokkos::View x_type; + typedef typename AType::device_type::memory_space memory_space; + typedef typename AType::non_const_size_type size_type; + typedef typename AType::non_const_ordinal_type lno_t; + typedef typename AType::non_const_scalar_type scalar_t; //int rows_per_team = launch_parameters(A.numRows(),A.nnz(),rows_per_thread,team_size,vector_length); //static int worksets = (y.extent(0)+rows_per_team-1)/rows_per_team; static int worksets = std::is_same::value ? team_size>0?execution_space::concurrency()/team_size:execution_space::concurrency() : //static team_size>0?execution_space::concurrency()*32/team_size:execution_space::concurrency()*32 ; //dynamic - static Kokkos::View workset_offsets; + static Kokkos::View workset_offsets; if(workset_offsets.extent(0) == 0) { - workset_offsets = Kokkos::View ("WorksetOffsets",worksets+1); - const size_t nnz = A.nnz(); - int nnz_per_workset = (nnz+worksets-1)/worksets; + workset_offsets = Kokkos::View("WorksetOffsets", worksets + 1); + const size_type nnz = A.nnz(); + lno_t nnz_per_workset = (nnz+worksets-1)/worksets; workset_offsets(0) = 0; - int ws = 1; - for(int row = 0; row ws*nnz_per_workset) { workset_offsets(ws) = row; ws++; @@ -153,9 +152,9 @@ void kk_inspector_matvec(AType A, XType x, YType y, int rows_per_thread, int tea printf("Worksets: %i %i\n",worksets,ws); worksets = ws; } - double s_a = 1.0; - double s_b = 0.0; - SPMV_Inspector_Functor func (s_a,A,x,workset_offsets,s_b,y); + Scalar s_a(1.0); + Scalar s_b(0.0); + SPMV_Inspector_Functor func(s_a, A, x, workset_offsets, s_b, y); Kokkos::TeamPolicy > policy(1,1); @@ -169,3 +168,4 @@ void kk_inspector_matvec(AType A, XType x, YType y, int rows_per_thread, int tea #endif /* KOKKOS_SPMV_HPP_ */ + diff --git a/perf_test/sparse/spmv/MKL_SPMV.hpp b/perf_test/sparse/spmv/MKL_SPMV.hpp index f18266d726..31592a6956 100644 --- a/perf_test/sparse/spmv/MKL_SPMV.hpp +++ b/perf_test/sparse/spmv/MKL_SPMV.hpp @@ -47,32 +47,65 @@ #ifdef HAVE_MKL #include -template -void mkl_matvec(AType A, XType x, YType y, int rows_per_thread, int team_size, int vector_length) { +template +void mkl_matvec_wrapper( + int numRows, int numCols, int nnz, + Scalar* values, int* rowptrs, int* entries, + Scalar* x, Scalar* y) +{ + throw std::runtime_error("Can't use cuSPARSE mat-vec for scalar types other than double and float."); +} +template<> +void mkl_matvec_wrapper( + int numRows, int numCols, int nnz, + double* values, int* rowptrs, int* entries, + double* x, double* y) +{ double s_a = 1.0; double s_b = 0.0; - char matdescra[6] = "GLNC0"; char transa = 'N'; - int m = A.numRows(); - int n = x.extent(1); - int k = A.numCols(); - double* x_ptr = (double*)x.data(); - double* y_ptr = (double*)y.data(); - mkl_dcsrmv(&transa, - &m, &k, + &numRows, &numCols, &s_a, matdescra, - A.values.data(), - A.graph.entries.data(), - (int*) &A.graph.row_map(0), - (int*) &A.graph.row_map(1), - x_ptr, + values, entries, rowptrs, rowptrs + 1, + x, &s_b, - y_ptr); + y); +} + +template<> +void mkl_matvec_wrapper( + int numRows, int numCols, int nnz, + float* values, int* rowptrs, int* entries, + float* x, float* y) +{ + float s_a = 1.0; + float s_b = 0.0; + char matdescra[6] = "GLNC0"; + char transa = 'N'; + mkl_scsrmv(&transa, + &numRows, &numCols, + &s_a, + matdescra, + values, entries, rowptrs, rowptrs + 1, + x, + &s_b, + y); +} + +template +void mkl_matvec(AType A, XType x, YType y) { + typedef AType::non_const_value_type Scalar; + mkl_matvec_wrapper( + A.cusparse_handle, A.cusparse_descr, + A.numRows(), A.numCols(), A.nnz(), + A.values.data(), A.graph.row_map.data(), A.graph.entries.data(), + x.data(), y.data()); } #endif #endif /* MKL_SPMV_HPP_ */ + diff --git a/perf_test/sparse/spmv/OpenMPDynamic_SPMV.hpp b/perf_test/sparse/spmv/OpenMPDynamic_SPMV.hpp index 7d1c2ace08..3a1639a61a 100644 --- a/perf_test/sparse/spmv/OpenMPDynamic_SPMV.hpp +++ b/perf_test/sparse/spmv/OpenMPDynamic_SPMV.hpp @@ -44,41 +44,40 @@ #ifndef OPENMP_DYNAMIC_SPMV_HPP_ #define OPENMP_DYNAMIC_SPMV_HPP_ -template +template void openmp_dynamic_matvec(AType A, XType x, YType y, int rows_per_thread, int team_size, int vector_length) { #define OMP_BENCH_RESTRICT __restrict__ - const double s_a = 1.0; - const double s_b = 0.0; + const Scalar s_a(1.0); + const Scalar s_b(0.0); + + const Ordinal rowCount = A.numRows(); + const Scalar* OMP_BENCH_RESTRICT x_ptr = x.data(); + Scalar* OMP_BENCH_RESTRICT y_ptr = y.data(); + const Scalar* OMP_BENCH_RESTRICT matrixCoeffs = A.values.data(); + const Ordinal* OMP_BENCH_RESTRICT matrixCols = A.graph.entries.data(); + const Offset* OMP_BENCH_RESTRICT matrixRowOffsets = A.graph.row_map.data(); - const int rowCount = A.numRows(); - const double* OMP_BENCH_RESTRICT x_ptr = (double*) x.data(); - double* OMP_BENCH_RESTRICT y_ptr = (double*) y.data(); - const double* OMP_BENCH_RESTRICT matrixCoeffs = A.values.data(); - const int* OMP_BENCH_RESTRICT matrixCols = A.graph.entries.data(); - const int* OMP_BENCH_RESTRICT matrixRowOffsets = &A.graph.row_map(0); - #pragma omp parallel for schedule(dynamic) - for(int row = 0; row < rowCount; ++row) { - - const int rowStart = matrixRowOffsets[row]; - const int rowEnd = matrixRowOffsets[row + 1]; - - double sum = 0.0; - - for(int i = rowStart; i < rowEnd; ++i) { - const int x_entry = matrixCols[i]; - const double alpha_MC = s_a * matrixCoeffs[i]; - sum += alpha_MC * x_ptr[x_entry]; - } - - if(0.0 == s_b) { - y_ptr[row] = sum; - } else { - y_ptr[row] = s_b * y_ptr[row] + sum; - } - + for(Ordinal lrow = 0; row < rowCount; ++row) { + const Offset rowStart = matrixRowOffsets[row]; + const Offset rowEnd = matrixRowOffsets[row + 1]; + + Scalar sum(0.0); + + for(Offset i = rowStart; i < rowEnd; ++i) { + const Ordinal x_entry = matrixCols[i]; + const Scalar alpha_MC = s_a * matrixCoeffs[i]; + sum += alpha_MC * x_ptr[x_entry]; + } + + if(0.0 == s_b) { + y_ptr[row] = sum; + } else { + y_ptr[row] = s_b * y_ptr[row] + sum; + } + } #undef OMP_BENCH_RESTRICT diff --git a/perf_test/sparse/spmv/OpenMPSmartStatic_SPMV.hpp b/perf_test/sparse/spmv/OpenMPSmartStatic_SPMV.hpp index bf75dee4f8..7cbd3fb616 100644 --- a/perf_test/sparse/spmv/OpenMPSmartStatic_SPMV.hpp +++ b/perf_test/sparse/spmv/OpenMPSmartStatic_SPMV.hpp @@ -44,7 +44,7 @@ #ifndef OPENMP_SMART_STATIC_SPMV_HPP_ #define OPENMP_SMART_STATIC_SPMV_HPP_ -#ifdef _OPENMP +#ifdef KOKKOS_ENABLE_OEPNMP #include @@ -52,71 +52,71 @@ int* OMP_BENCH_RESTRICT threadStarts; -template +template void establishSmartSchedule(AType A) { - const LocalOrdinal rowCount = A.numRows(); - const LocalOrdinal* OMP_BENCH_RESTRICT matrixRowOffsets = &A.graph.row_map(0); - - // Generate a schedule - LocalOrdinal* rowSizes = NULL; - posix_memalign((void**) &rowSizes, 64, sizeof(int) * A.numRows()); - posix_memalign((void**) &threadStarts, 128, sizeof(int) * (omp_get_max_threads() + 1)); - - for(int i = 0; i < omp_get_max_threads(); ++i) { - threadStarts[i] = A.numRows(); - } - - unsigned long long int nnz = 0; - - #pragma omp parallel for reduction(+:nnz) - for(LocalOrdinal row = 0; row < rowCount; ++row) { - const LocalOrdinal rowElements = matrixRowOffsets[row + 1] - matrixRowOffsets[row]; - rowSizes[row] = rowElements; - nnz += rowElements; - } - - LocalOrdinal nzPerThreadTarget = (int)(nnz / (unsigned long long int) omp_get_max_threads()); - - if(nzPerThreadTarget > 128) { - nzPerThreadTarget &= 0xFFFFFFFC; - } - - LocalOrdinal nextRow = 0; - - printf("Target NZ Per Thread: %20d\n", nzPerThreadTarget); - threadStarts[0] = 0; - - for(int thread = 1; thread < omp_get_max_threads(); ++thread) { - LocalOrdinal nzAccum = 0; - - while(nzAccum < nzPerThreadTarget) { - if(nextRow >= rowCount) - break; - - nzAccum += rowSizes[nextRow]; - nextRow++; - } - - threadStarts[thread] = nextRow; - } - - threadStarts[omp_get_max_threads()] = A.numRows(); - - //printf("Schedule: Target-per-Row=%20d\n", rowsPerThreadTarget); - //for(int i = 0; i < omp_get_max_threads(); ++i) { - // printf("thread [%5d] start=%20d end=%20d\n", i, threadStarts[i], threadStarts[i+1]); - //} - - free(rowSizes); + const LocalOrdinal rowCount = A.numRows(); + const LocalOrdinal* OMP_BENCH_RESTRICT matrixRowOffsets = &A.graph.row_map(0); + + // Generate a schedule + LocalOrdinal* rowSizes = NULL; + posix_memalign((void**) &rowSizes, 64, sizeof(int) * A.numRows()); + posix_memalign((void**) &threadStarts, 128, sizeof(int) * (omp_get_max_threads() + 1)); + + for(int i = 0; i < omp_get_max_threads(); ++i) { + threadStarts[i] = A.numRows(); + } + + unsigned long long int nnz = 0; + + #pragma omp parallel for reduction(+:nnz) + for(LocalOrdinal row = 0; row < rowCount; ++row) { + const LocalOrdinal rowElements = matrixRowOffsets[row + 1] - matrixRowOffsets[row]; + rowSizes[row] = rowElements; + nnz += rowElements; + } + + LocalOrdinal nzPerThreadTarget = (int)(nnz / (unsigned long long int) omp_get_max_threads()); + + if(nzPerThreadTarget > 128) { + nzPerThreadTarget &= 0xFFFFFFFC; + } + + LocalOrdinal nextRow = 0; + + printf("Target NZ Per Thread: %20d\n", nzPerThreadTarget); + threadStarts[0] = 0; + + for(int thread = 1; thread < omp_get_max_threads(); ++thread) { + LocalOrdinal nzAccum = 0; + + while(nzAccum < nzPerThreadTarget) { + if(nextRow >= rowCount) + break; + + nzAccum += rowSizes[nextRow]; + nextRow++; + } + + threadStarts[thread] = nextRow; + } + + threadStarts[omp_get_max_threads()] = A.numRows(); + + //printf("Schedule: Target-per-Row=%20d\n", rowsPerThreadTarget); + //for(int i = 0; i < omp_get_max_threads(); ++i) { + // printf("thread [%5d] start=%20d end=%20d\n", i, threadStarts[i], threadStarts[i+1]); + //} + + free(rowSizes); } template void openmp_smart_static_matvec(AType A, XType x, YType y, int rows_per_thread, - int team_size, int vector_length) { - + int team_size, int vector_length) { + if( NULL == threadStarts ) { - //printf("Generating Schedule...\n"); - establishSmartSchedule(A); + //printf("Generating Schedule...\n"); + establishSmartSchedule(A); } const Scalar s_a = 1.0; @@ -128,12 +128,12 @@ void openmp_smart_static_matvec(AType A, XType x, YType y, int rows_per_thread, const Scalar* OMP_BENCH_RESTRICT matrixCoeffs = A.values.data(); const LocalOrdinal* OMP_BENCH_RESTRICT matrixCols = A.graph.entries.data(); const LocalOrdinal* OMP_BENCH_RESTRICT matrixRowOffsets = &A.graph.row_map(0); - + #ifdef KOKKOS_ENABLE_PROFILING - uint64_t kpID = 0; - if(Kokkos::Profiling::profileLibraryLoaded()) { - Kokkos::Profiling::beginParallelFor("KokkosSparse::Test_SPMV_raw_openmp", 0, &kpID); - } + uint64_t kpID = 0; + if(Kokkos::Profiling::profileLibraryLoaded()) { + Kokkos::Profiling::beginParallelFor("KokkosSparse::Test_SPMV_raw_openmp", 0, &kpID); + } #endif #pragma omp parallel @@ -146,34 +146,34 @@ void openmp_smart_static_matvec(AType A, XType x, YType y, int rows_per_thread, const int myID = omp_get_thread_num(); const LocalOrdinal myStart = threadStarts[myID]; const LocalOrdinal myEnd = threadStarts[myID + 1]; - - for(int row = myStart; row < myEnd; ++row) { - const LocalOrdinal rowStart = matrixRowOffsets[row]; - const LocalOrdinal rowEnd = matrixRowOffsets[row + 1]; - - Scalar sum = 0.0; - - for(LocalOrdinal i = rowStart; i < rowEnd; ++i) { - const LocalOrdinal x_entry = matrixCols[i]; - const Scalar alpha_MC = s_a * matrixCoeffs[i]; - sum += alpha_MC * x_ptr[x_entry]; - } - - if(0.0 == s_b) { - y_ptr[row] = sum; - } else { - y_ptr[row] = s_b * y_ptr[row] + sum; - } - } + + for(int row = myStart; row < myEnd; ++row) { + const LocalOrdinal rowStart = matrixRowOffsets[row]; + const LocalOrdinal rowEnd = matrixRowOffsets[row + 1]; + + Scalar sum = 0.0; + + for(LocalOrdinal i = rowStart; i < rowEnd; ++i) { + const LocalOrdinal x_entry = matrixCols[i]; + const Scalar alpha_MC = s_a * matrixCoeffs[i]; + sum += alpha_MC * x_ptr[x_entry]; + } + + if(0.0 == s_b) { + y_ptr[row] = sum; + } else { + y_ptr[row] = s_b * y_ptr[row] + sum; + } + } } #ifdef KOKKOS_ENABLE_PROFILING - if(Kokkos::Profiling::profileLibraryLoaded()) { - Kokkos::Profiling::endParallelFor(kpID); - } + if(Kokkos::Profiling::profileLibraryLoaded()) { + Kokkos::Profiling::endParallelFor(kpID); + } #endif } #undef OMP_BENCH_RESTRICT -#endif /* _OPENMP */ +#endif /* KOKKOS_ENABLE_OPENMP */ #endif /* OPENMP_SMART_STATIC_SPMV_HPP_ */ diff --git a/perf_test/sparse/spmv/OpenMPStatic_SPMV.hpp b/perf_test/sparse/spmv/OpenMPStatic_SPMV.hpp index b59ff8a4ac..7064d2c352 100644 --- a/perf_test/sparse/spmv/OpenMPStatic_SPMV.hpp +++ b/perf_test/sparse/spmv/OpenMPStatic_SPMV.hpp @@ -44,45 +44,42 @@ #ifndef OPENMP_STATIC_SPMV_HPP_ #define OPENMP_STATIC_SPMV_HPP_ -template +template void openmp_static_matvec(AType A, XType x, YType y, int rows_per_thread, int team_size, int vector_length) { #define OMP_BENCH_RESTRICT __restrict__ - const double s_a = 1.0; - const double s_b = 0.0; + const Scalar s_a(1.0); + const Scalar s_b(0.0); + + const Ordinal rowCount = A.numRows(); + const Scalar* OMP_BENCH_RESTRICT x_ptr = x.data(); + Scalar* OMP_BENCH_RESTRICT y_ptr = y.data(); + const Scalar* OMP_BENCH_RESTRICT matrixCoeffs = A.values.data(); + const Ordinal* OMP_BENCH_RESTRICT matrixCols = A.graph.entries.data(); + const Offset* OMP_BENCH_RESTRICT matrixRowOffsets = &A.graph.row_map(0); - const int rowCount = A.numRows(); - const double* OMP_BENCH_RESTRICT x_ptr = (double*) x.data(); - double* OMP_BENCH_RESTRICT y_ptr = (double*) y.data(); - const double* OMP_BENCH_RESTRICT matrixCoeffs = A.values.data(); - const int* OMP_BENCH_RESTRICT matrixCols = A.graph.entries.data(); - const int* OMP_BENCH_RESTRICT matrixRowOffsets = &A.graph.row_map(0); - #pragma omp parallel for - for(int row = 0; row < rowCount; ++row) { - - const int rowStart = matrixRowOffsets[row]; - const int rowEnd = matrixRowOffsets[row + 1]; - - double sum = 0.0; - - for(int i = rowStart; i < rowEnd; ++i) { - const int x_entry = matrixCols[i]; - const double alpha_MC = s_a * matrixCoeffs[i]; - sum += alpha_MC * x_ptr[x_entry]; - } - - if(0.0 == s_b) { - y_ptr[row] = sum; - } else { - y_ptr[row] = s_b * y_ptr[row] + sum; - } - + for(Ordinal row = 0; row < rowCount; ++row) { + + const Offset rowStart = matrixRowOffsets[row]; + const Offset rowEnd = matrixRowOffsets[row + 1]; + + Scalar sum = 0.0; + + for(Offset i = rowStart; i < rowEnd; ++i) { + const Oridnal x_entry = matrixCols[i]; + const Scalar alpha_MC = s_a * matrixCoeffs[i]; + sum += alpha_MC * x_ptr[x_entry]; + } + + if(0.0 == s_b) + y_ptr[row] = sum; + else + y_ptr[row] = s_b * y_ptr[row] + sum; } #undef OMP_BENCH_RESTRICT - } #endif /* OPENMP_STATIC_SPMV_HPP_ */ diff --git a/perf_test/sparse/spmv/matrix_market.hpp b/perf_test/sparse/spmv/matrix_market.hpp index 164a213c23..0bc1874227 100644 --- a/perf_test/sparse/spmv/matrix_market.hpp +++ b/perf_test/sparse/spmv/matrix_market.hpp @@ -48,181 +48,22 @@ #include #include #include +#include #include -namespace Impl { -template< typename OrdinalType> -void SparseGraph_SortRows(OrdinalType nrows, OrdinalType* rowPtr, OrdinalType* colInd) { - #ifdef _OPENMP - #pragma omp parallel for - #endif - for(int row = 0; row < nrows; row++) { - OrdinalType row_start = rowPtr[row]; - OrdinalType row_end = rowPtr[row+1]; - for(OrdinalType i = row_start; i < row_end-1; i++) { - for(OrdinalType j = row_end-1; j > i; j--) { - if(colInd[j] < colInd[j-1]) { - int idx = colInd[j]; - colInd[j] = colInd[j-1]; - colInd[j-1] = idx; - } - } - } - } - - #ifdef _OPENMP - #pragma omp parallel for - #endif - for(int row = 0; row < nrows; row++) { - OrdinalType row_start = rowPtr[row]; - OrdinalType row_end = rowPtr[row+1]; - for(OrdinalType i = row_start; i < row_end-1; i++) { - if(colInd[i+1] < colInd[i]) printf("Error Not sorted %i %i | %i %i\n",row,i,colInd[i],colInd[i+1]); - } - } -} -} - -template< typename ScalarType , typename OrdinalType> -int SparseMatrix_MatrixMarket_read(const char* filename, OrdinalType &nrows, OrdinalType &ncols, OrdinalType &nnz, - ScalarType* &values, OrdinalType* &rowPtr, OrdinalType* &colInd, bool sort, OrdinalType idx_offset = 0) +template< typename ScalarType, typename Offset, typename OrdinalType> +Offset SparseMatrix_WriteBinaryFormat(const char* filename, OrdinalType& nrows, OrdinalType& ncols, Offset& nnz, + ScalarType*& values, Offset*& rowPtr, OrdinalType*& colInd, bool sort, OrdinalType idx_offset = 0) { - FILE* file = fopen(filename,"r"); - char line[512]; - line[0]='%'; - int count=-1; - char* symmetric = NULL; - char* pattern = NULL; - int nlines; - - while(line[0]=='%') - { - fgets(line,511,file); - count++; - if(count==0) { - symmetric=strstr(line,"symmetric"); - pattern=strstr(line,"pattern"); - } - } - rewind(file); - for(int i=0;inlines-10) - printf("Read: %i %i %i %le\n",nnz,rowIndtmp[nnz],colIndtmp[nnz],valuestmp[nnz]); - rowIndtmp[nnz]-= idx_offset; - colIndtmp[nnz]-= idx_offset; - priorEntrySameRowInd[nnz] = lastEntryWithRowInd[rowIndtmp[nnz]-1]; - lastEntryWithRowInd[rowIndtmp[nnz]-1]=nnz; - if((symmetric) && (rowIndtmp[nnz]!=colIndtmp[nnz])) - { - nnz++; - rowIndtmp[nnz]=colIndtmp[nnz-1]; - colIndtmp[nnz]=rowIndtmp[nnz-1]; - valuestmp[nnz]=valuestmp[nnz-1]; - priorEntrySameRowInd[nnz] = lastEntryWithRowInd[rowIndtmp[nnz]-1]; - lastEntryWithRowInd[rowIndtmp[nnz]-1]=nnz; - } - - nnz++; - } - - values = new ScalarType[nnz]; - colInd = new OrdinalType[nnz]; - rowPtr = new OrdinalType[nrows+1]; - - int pos = 0; - for(int row=0;row-1) - { - values[pos] = valuestmp[j]; - colInd[pos] = colIndtmp[j]-1; - j = priorEntrySameRowInd[j]; - pos++; - } - } - rowPtr[nrows]=pos; - - printf("Number of Non-Zeros: %i\n",pos); - delete [] valuestmp; - delete [] colIndtmp; - delete [] rowIndtmp; - delete [] priorEntrySameRowInd; - delete [] lastEntryWithRowInd; - - size_t min_span = nrows+1; - size_t max_span = 0; - size_t ave_span = 0; - for(int row=0; rowmax) max = colInd[i]; - } - if(rowPtr[row+1]>rowPtr[row]) { - size_t span = max-min; - if(spanmax_span) max_span = span; - ave_span += span; - } else min_span = 0; - } - - printf("%lu Spans: %lu %lu %lu\n",(size_t) nnz,min_span,max_span,ave_span/nrows); - if(sort) - Impl::SparseGraph_SortRows(nrows,rowPtr,colInd); - return nnz; -} - -template< typename ScalarType , typename OrdinalType> -int SparseMatrix_WriteBinaryFormat(const char* filename, OrdinalType &nrows, OrdinalType &ncols, OrdinalType &nnz, - ScalarType* &values, OrdinalType* &rowPtr, OrdinalType* &colInd, bool sort, OrdinalType idx_offset = 0) -{ - nnz = SparseMatrix_MatrixMarket_read(filename,nrows,ncols,nnz,values,rowPtr,colInd,sort, idx_offset); - - - char * filename_row = new char[strlen(filename)+5]; - char * filename_col = new char[strlen(filename)+5]; - char * filename_vals = new char[strlen(filename)+6]; - char * filename_descr = new char[strlen(filename)+7]; - strcpy(filename_row,filename); - strcpy(filename_col,filename); - strcpy(filename_vals,filename); - strcpy(filename_descr,filename); - strcat(filename_row,"_row"); - strcat(filename_col,"_col"); - strcat(filename_vals,"_vals"); - strcat(filename_descr,"_descr"); - FILE* RowFile = fopen(filename_row,"w"); - FILE* ColFile = fopen(filename_col,"w"); - FILE* ValsFile = fopen(filename_vals,"w"); - FILE* DescrFile = fopen(filename_descr,"w"); - + std::string base_filename(filename); + std::string filename_row = base_filename + "_row"; + std::string filename_col = base_filename + "_col"; + std::string filename_vals = base_filename + "_vals"; + std::string filename_descr = base_filename + "_descr"; + FILE* RowFile = fopen(filename_row.c_str(),"w"); + FILE* ColFile = fopen(filename_col.c_str(),"w"); + FILE* ValsFile = fopen(filename_vals.c_str(),"w"); + FILE* DescrFile = fopen(filename_descr.c_str(),"w"); FILE* file = fopen(filename,"r"); char line[512]; @@ -241,25 +82,42 @@ int SparseMatrix_WriteBinaryFormat(const char* filename, OrdinalType &nrows, Ord if(line[0]=='%') fprintf ( DescrFile , "%s",line); else - fprintf ( DescrFile , "%i %i %i\n",nrows,ncols,nnz); + fprintf ( DescrFile , "%i %i %i\n",(int) nrows, (int) ncols, (int) nnz); } fprintf ( DescrFile , "\n"); - fwrite ( rowPtr, sizeof(OrdinalType), nrows+1, RowFile); - fwrite ( colInd, sizeof(OrdinalType), nnz, ColFile); - fwrite ( values, sizeof(ScalarType), nnz, ValsFile); + //Always read/write binary format using double for scalars and int for rowptrs/colinds + //This means the same binary files will still work even if the template parameters change. + for(Ordinal i = 0; i < nrows + 1; i++) + { + int r = rowPtr[i]; + fwrite(&r, sizeof(int), 1, RowFile); + } + for(Offset i = 0; i < nnz; i++) + { + int c = colInd[i]; + fwrite(&c, sizeof(int), 1, ColFile); + double v = values[i]; + fwrite(&v, sizeof(double), 1, ValsFile); + } + for(Offset i = 0; i < nnz; i++) + { + int c = colInd[i]; + fwrite(&c, sizeof(int), 1, ColFile); + } fclose(RowFile); fclose(ColFile); fclose(ValsFile); fclose(DescrFile); - size_t min_span = nrows+1; - size_t max_span = 0; - size_t ave_span = 0; - for(int row=0; rowmax) max = colInd[i]; } @@ -270,18 +128,22 @@ int SparseMatrix_WriteBinaryFormat(const char* filename, OrdinalType &nrows, Ord ave_span += span; } else min_span = 0; } - printf("%lu Spans: %lu %lu %lu\n",(size_t) nnz,min_span,max_span,ave_span/nrows); + printf("%zu Spans: %i %i %i\n", (size_t) nnz, (int) min_span, (int) max_span, (int) (ave_span/nrows)); return nnz; } -template< typename ScalarType , typename OrdinalType> +template< typename ScalarType, typename OrdinalType> int SparseMatrix_ReadBinaryFormat(const char* filename, OrdinalType &nrows, OrdinalType &ncols, OrdinalType &nnz, ScalarType* &values, OrdinalType* &rowPtr, OrdinalType* &colInd) { - char * filename_descr = new char[strlen(filename)+7]; - strcpy(filename_descr,filename); - strcat(filename_descr,"_descr"); - FILE* file = fopen(filename_descr,"r"); + std::string base_filename(filename); + std::string filename_row = base_filename + "_row"; + std::string filename_col = base_filename + "_col"; + std::string filename_vals = base_filename + "_vals"; + std::string filename_descr = base_filename + "_descr"; + FILE* RowFile = fopen(filename_row.c_str(),"rb"); + FILE* ColFile = fopen(filename_col.c_str(),"rb"); + FILE* ValsFile = fopen(filename_vals.c_str(),"rb"); char line[512]; line[0]='%'; int count=-1; @@ -304,19 +166,6 @@ int SparseMatrix_ReadBinaryFormat(const char* filename, OrdinalType &nrows, Ordi fclose(file); - char * filename_row = new char[strlen(filename)+5]; - char * filename_col = new char[strlen(filename)+5]; - char * filename_vals = new char[strlen(filename)+6]; - strcpy(filename_row,filename); - strcpy(filename_col,filename); - strcpy(filename_vals,filename); - strcat(filename_row,"_row"); - strcat(filename_col,"_col"); - strcat(filename_vals,"_vals"); - FILE* RowFile = fopen(filename_row,"r"); - FILE* ColFile = fopen(filename_col,"r"); - FILE* ValsFile = fopen(filename_vals,"r"); - bool read_values = false; if(ValsFile == NULL) read_values = false; @@ -363,3 +212,4 @@ int SparseMatrix_ReadBinaryFormat(const char* filename, OrdinalType &nrows, Ordi } #endif /* MATRIX_MARKET_HPP_ */ + From f6c0f60f4c6ecddd9787949862e6d99320bc31e6 Mon Sep 17 00:00:00 2001 From: Brian Kelley Date: Mon, 10 Feb 2020 13:04:32 -0700 Subject: [PATCH 02/14] Misc fixes and cleanup in perftests --- perf_test/sparse/KokkosSparse_spiluk.cpp | 1 - perf_test/sparse/KokkosSparse_spmv.cpp | 101 +++-------- perf_test/sparse/KokkosSparse_sptrsv.cpp | 1 - perf_test/sparse/spmv/matrix_market.hpp | 215 ----------------------- src/common/KokkosKernels_IOUtils.hpp | 13 +- 5 files changed, 32 insertions(+), 299 deletions(-) delete mode 100644 perf_test/sparse/spmv/matrix_market.hpp diff --git a/perf_test/sparse/KokkosSparse_spiluk.cpp b/perf_test/sparse/KokkosSparse_spiluk.cpp index c94394b444..8a3a8b1fe2 100644 --- a/perf_test/sparse/KokkosSparse_spiluk.cpp +++ b/perf_test/sparse/KokkosSparse_spiluk.cpp @@ -56,7 +56,6 @@ #endif #include -#include #include "KokkosKernels_SparseUtils.hpp" #include "KokkosSparse_spiluk.hpp" diff --git a/perf_test/sparse/KokkosSparse_spmv.cpp b/perf_test/sparse/KokkosSparse_spmv.cpp index b16e9ea307..c67f02adba 100644 --- a/perf_test/sparse/KokkosSparse_spmv.cpp +++ b/perf_test/sparse/KokkosSparse_spmv.cpp @@ -51,13 +51,13 @@ #include #include +#include +#include + #ifdef HAVE_CUSPARSE #include #endif -#include -#include - #ifdef HAVE_MKL #include #endif @@ -73,43 +73,13 @@ enum {KOKKOS, MKL, CUSPARSE, KK_KERNELS, KK_KERNELS_INSP, KK_INSP, OMP_STATIC, OMP_DYNAMIC, OMP_INSP}; enum {AUTO, DYNAMIC, STATIC}; -typedef default_scalar scalar_t; -typedef default_lno_t lno_t; -typedef default_size_type size_type; -typedef default_layout layout_t; - -template< typename Scalar, typename Offset, typename Ordinal> -int SparseMatrix_generate(Ordinal nrows, Ordinal ncols, Ordinal &nnz, Ordinal varianz_nel_row, Ordinal width_row, Scalar* &values, Offset* &rowPtr, Ordinal* &colInd) -{ - rowPtr = new Offset[nrows+1]; - - Ordinal elements_per_row = nnz/nrows; - srand(13721); - rowPtr[0] = 0; - for(int row=0;row=ncols) pos-=ncols; - colInd[k]= pos; - values[k] = 100.0*rand()/INT_MAX-50.0; - } - } - return nnz; -} +typedef default_scalar Scalar; +typedef default_lno_t Ordinal; +typedef default_size_type Offset; +typedef default_layout Layout; template -void matvec(AType& A, XType x, YType y, int rows_per_thread, int team_size, int vector_length, int test, int schedule) { +void matvec(AType& A, XType x, YType y, Ordinal rows_per_thread, int team_size, int vector_length, int test, int schedule) { switch(test) { @@ -168,31 +138,22 @@ void matvec(AType& A, XType x, YType y, int rows_per_thread, int team_size, int } } -template -int test_crs_matrix_singlevec(int numRows, int numCols, int nnz, int test, const char* filename, const bool binaryfile, int rows_per_thread, int team_size, int vector_length,int idx_offset, int schedule, int loop) { - typedef KokkosSparse::CrsMatrix matrix_type ; - typedef typename Kokkos::View mv_type; - typedef typename Kokkos::View mv_random_read_type; +int test_crs_matrix_singlevec(Ordinal numRows, Ordinal numCols, int test, const char* filename, Ordinal rows_per_thread, int team_size, int vector_length, int schedule, int loop) { + typedef KokkosSparse::CrsMatrix matrix_type; + typedef typename Kokkos::View mv_type; typedef typename mv_type::HostMirror h_mv_type; - Scalar* val = NULL; - int* row = NULL; - int* col = NULL; - srand(17312837); - if(filename==NULL) - nnz = SparseMatrix_generate(numRows,numCols,nnz,nnz/numRows*0.2,numRows*0.01,val,row,col); + matrix_type A; + if(filename) + A = KokkosKernels::Impl::read_kokkos_crst_matrix(filename); else - if(!binaryfile) - nnz = SparseMatrix_MatrixMarket_read(filename,numRows,numCols,nnz,val,row,col,false,idx_offset); - else - nnz = SparseMatrix_ReadBinaryFormat(filename,numRows,numCols,nnz,val,row,col); - - matrix_type A("CRS::A",numRows,numCols,nnz,val,row,col,false); - - mv_type x("X",numCols); - mv_random_read_type t_x(x); - mv_type y("Y",numRows); + A = KokkosKernels::Impl::kk_generate_sparse_matrix(numRows, numCols, 10 * numRows, 0, numCols); + numRows = A.numRows(); + numCols = A.numCols(); + Offset nnz = A.nnz(); + mv_type x("X", numCols); + mv_type y("Y", numRows); h_mv_type h_x = Kokkos::create_mirror_view(x); h_mv_type h_y = Kokkos::create_mirror_view(y); h_mv_type h_y_compare = Kokkos::create_mirror(y); @@ -227,9 +188,9 @@ int test_crs_matrix_singlevec(int numRows, int numCols, int nnz, int test, const Kokkos::deep_copy(y,h_y); Kokkos::deep_copy(A.graph.entries,h_graph.entries); Kokkos::deep_copy(A.values,h_values); - typename KokkosSparse::CrsMatrix::values_type x1("X1",numCols); + mv_type x1("X1",numCols); Kokkos::deep_copy(x1,h_x); - typename KokkosSparse::CrsMatrix::values_type y1("Y1",numRows); + mv_type y1("Y1",numRows); //int nnz_per_row = A.nnz()/A.numRows(); matvec(A,x1,y1,rows_per_thread,team_size,vector_length,test,schedule); @@ -239,6 +200,7 @@ int test_crs_matrix_singlevec(int numRows, int numCols, int nnz, int test, const Scalar error = 0; Scalar sum = 0; for(int i=0;i(filename,numRows,numCols,nnz,val,row,col,true,idx_offset); - return 0; - } - Kokkos::initialize(argc,argv); - int total_errors = test_crs_matrix_singlevec(size,size,size*10,test,filename,binaryfile,rows_per_thread,team_size,vector_length,idx_offset,schedule,loop); + int total_errors = test_crs_matrix_singlevec(size,size,test,filename,rows_per_thread,team_size,vector_length,schedule,loop); if(total_errors == 0) printf("Kokkos::MultiVector Test: Passed\n"); diff --git a/perf_test/sparse/KokkosSparse_sptrsv.cpp b/perf_test/sparse/KokkosSparse_sptrsv.cpp index b7051711d6..3e52b67385 100644 --- a/perf_test/sparse/KokkosSparse_sptrsv.cpp +++ b/perf_test/sparse/KokkosSparse_sptrsv.cpp @@ -56,7 +56,6 @@ #endif #include -#include #include "KokkosKernels_SparseUtils.hpp" #include "KokkosSparse_sptrsv.hpp" diff --git a/perf_test/sparse/spmv/matrix_market.hpp b/perf_test/sparse/spmv/matrix_market.hpp deleted file mode 100644 index 0bc1874227..0000000000 --- a/perf_test/sparse/spmv/matrix_market.hpp +++ /dev/null @@ -1,215 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// KokkosKernels 0.9: Linear Algebra and Graph Kernels -// Copyright 2017 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Siva Rajamanickam (srajama@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#ifndef MATRIX_MARKET_HPP_ -#define MATRIX_MARKET_HPP_ - -#include -#include -#include -#include -#include -#include - -template< typename ScalarType, typename Offset, typename OrdinalType> -Offset SparseMatrix_WriteBinaryFormat(const char* filename, OrdinalType& nrows, OrdinalType& ncols, Offset& nnz, - ScalarType*& values, Offset*& rowPtr, OrdinalType*& colInd, bool sort, OrdinalType idx_offset = 0) -{ - std::string base_filename(filename); - std::string filename_row = base_filename + "_row"; - std::string filename_col = base_filename + "_col"; - std::string filename_vals = base_filename + "_vals"; - std::string filename_descr = base_filename + "_descr"; - FILE* RowFile = fopen(filename_row.c_str(),"w"); - FILE* ColFile = fopen(filename_col.c_str(),"w"); - FILE* ValsFile = fopen(filename_vals.c_str(),"w"); - FILE* DescrFile = fopen(filename_descr.c_str(),"w"); - - FILE* file = fopen(filename,"r"); - char line[512]; - line[0]='%'; - int count=-1; - //char* symmetric = NULL; - //int nlines; - - while(line[0]=='%') - { - fgets(line,511,file); - line[511] = 0; - count++; - //if(count==0) symmetric=strstr(line,"symmetric"); - - if(line[0]=='%') - fprintf ( DescrFile , "%s",line); - else - fprintf ( DescrFile , "%i %i %i\n",(int) nrows, (int) ncols, (int) nnz); - } - fprintf ( DescrFile , "\n"); - - //Always read/write binary format using double for scalars and int for rowptrs/colinds - //This means the same binary files will still work even if the template parameters change. - for(Ordinal i = 0; i < nrows + 1; i++) - { - int r = rowPtr[i]; - fwrite(&r, sizeof(int), 1, RowFile); - } - for(Offset i = 0; i < nnz; i++) - { - int c = colInd[i]; - fwrite(&c, sizeof(int), 1, ColFile); - double v = values[i]; - fwrite(&v, sizeof(double), 1, ValsFile); - } - for(Offset i = 0; i < nnz; i++) - { - int c = colInd[i]; - fwrite(&c, sizeof(int), 1, ColFile); - } - - fclose(RowFile); - fclose(ColFile); - fclose(ValsFile); - fclose(DescrFile); - - Ordinal min_span = nrows+1; - Ordinal max_span = 0; - Ordinal ave_span = 0; - for(Ordinal row=0; rowmax) max = colInd[i]; - } - if(rowPtr[row+1]>rowPtr[row]) { - size_t span = max-min; - if(spanmax_span) max_span = span; - ave_span += span; - } else min_span = 0; - } - printf("%zu Spans: %i %i %i\n", (size_t) nnz, (int) min_span, (int) max_span, (int) (ave_span/nrows)); - - return nnz; -} - -template< typename ScalarType, typename OrdinalType> -int SparseMatrix_ReadBinaryFormat(const char* filename, OrdinalType &nrows, OrdinalType &ncols, OrdinalType &nnz, ScalarType* &values, OrdinalType* &rowPtr, OrdinalType* &colInd) -{ - std::string base_filename(filename); - std::string filename_row = base_filename + "_row"; - std::string filename_col = base_filename + "_col"; - std::string filename_vals = base_filename + "_vals"; - std::string filename_descr = base_filename + "_descr"; - FILE* RowFile = fopen(filename_row.c_str(),"rb"); - FILE* ColFile = fopen(filename_col.c_str(),"rb"); - FILE* ValsFile = fopen(filename_vals.c_str(),"rb"); - char line[512]; - line[0]='%'; - int count=-1; - char* symmetric = NULL; - //int nlines; - - while(line[0]=='%') - { - fgets(line,511,file); - count++; - if(count==0) symmetric=strstr(line,"symmetric"); - } - rewind(file); - for(int i=0;imax) max = colInd[i]; - } - if(rowPtr[row+1]>rowPtr[row]) { - size_t span = max-min; - if(spanmax_span) max_span = span; - ave_span += span; - } else min_span = 0; - } - printf("%lu Spans: %lu %lu %lu\n",(size_t) nnz,min_span,max_span,ave_span/nrows); - - - return nnz; -} - -#endif /* MATRIX_MARKET_HPP_ */ - diff --git a/src/common/KokkosKernels_IOUtils.hpp b/src/common/KokkosKernels_IOUtils.hpp index b649686eb4..e5d505b119 100644 --- a/src/common/KokkosKernels_IOUtils.hpp +++ b/src/common/KokkosKernels_IOUtils.hpp @@ -83,7 +83,7 @@ void kk_sparseMatrix_generate( rowPtr[0] = 0; for(int row=0;row=ncols) pos-=ncols; @@ -110,7 +110,7 @@ void kk_sparseMatrix_generate( if (!is_already_in_the_row) { colInd[k]= pos; - values[k] = 100.0*rand()/INT_MAX-50.0; + values[k] = 100.0*rand()/RAND_MAX-50.0; break; } } @@ -139,7 +139,6 @@ void kk_sparseMatrix_generate_lower_upper_triangle( rowPtr[0] = 0; for(int row=0;row=ncols) pos-=ncols; colInd[k]= pos; - values[k] = 100.0*rand()/INT_MAX-50.0; + values[k] = 100.0*rand()/RAND_MAX-50.0; total_values += Kokkos::Details::ArithTraits::abs(values[k]); } From eae9f14f753190bffd506e6a5e841c27dab2b195 Mon Sep 17 00:00:00 2001 From: Brian Kelley Date: Mon, 17 Feb 2020 15:29:20 -0700 Subject: [PATCH 03/14] Finish cleanup of spmv perf tests --- perf_test/sparse/KokkosSparse_spmv.cpp | 54 +++++++++++-------- perf_test/sparse/spmv/Kokkos_SPMV.hpp | 2 +- .../sparse/spmv/Kokkos_SPMV_Inspector.hpp | 10 ++-- perf_test/sparse/spmv/OpenMPDynamic_SPMV.hpp | 4 +- .../sparse/spmv/OpenMPSmartStatic_SPMV.hpp | 43 ++++++++------- perf_test/sparse/spmv/OpenMPStatic_SPMV.hpp | 4 +- 6 files changed, 62 insertions(+), 55 deletions(-) diff --git a/perf_test/sparse/KokkosSparse_spmv.cpp b/perf_test/sparse/KokkosSparse_spmv.cpp index c67f02adba..dcbcabd4be 100644 --- a/perf_test/sparse/KokkosSparse_spmv.cpp +++ b/perf_test/sparse/KokkosSparse_spmv.cpp @@ -53,6 +53,10 @@ #include #include +#include +#include +#include +#include #ifdef HAVE_CUSPARSE #include @@ -62,7 +66,7 @@ #include #endif -#ifdef _OPENMP +#ifdef KOKKOS_ENABLE_OPENMP #include #include #include @@ -87,28 +91,28 @@ void matvec(AType& A, XType x, YType y, Ordinal rows_per_thread, int team_size, if(schedule == AUTO) schedule = A.nnz()>10000000?DYNAMIC:STATIC; if(schedule == STATIC) - kk_matvec(A, x, y, rows_per_thread, team_size, vector_length); + kokkos_matvec(A, x, y, rows_per_thread, team_size, vector_length); if(schedule == DYNAMIC) - kk_matvec(A, x, y, rows_per_thread, team_size, vector_length); + kokkos_matvec(A, x, y, rows_per_thread, team_size, vector_length); break; case KK_INSP: if(schedule == AUTO) schedule = A.nnz()>10000000?DYNAMIC:STATIC; if(schedule == STATIC) - kk_inspector_matvec(A, x, y, rows_per_thread, team_size, vector_length); + kk_inspector_matvec(A, x, y, team_size, vector_length); if(schedule == DYNAMIC) - kk_inspector_matvec(A, x, y, rows_per_thread, team_size, vector_length); + kk_inspector_matvec(A, x, y, team_size, vector_length); break; #ifdef KOKKOS_ENABLE_OPENMP case OMP_STATIC: - openmp_static_matvec(A, x, y); + openmp_static_matvec(A, x, y); break; case OMP_DYNAMIC: - openmp_dynamic_matvec(A, x, y); + openmp_dynamic_matvec(A, x, y); break; case OMP_INSP: - openmp_smart_static_matvec(A, x, y); + openmp_smart_static_matvec(A, x, y); break; #endif @@ -126,13 +130,13 @@ void matvec(AType& A, XType x, YType y, Ordinal rows_per_thread, int team_size, KokkosSparse::spmv (KokkosSparse::NoTranspose,1.0,A,x,0.0,y); break; case KK_KERNELS_INSP: - if(A.graph.row_block_offsets.data()==NULL) { - printf("PTR: %p\n",A.graph.row_block_offsets.data()); - A.graph.create_block_partitioning(AType::execution_space::concurrency()); - printf("PTR2: %p\n",A.graph.row_block_offsets.data()); - } - kokkoskernels_matvec(A, x, y, rows_per_thread, team_size, vector_length); - break; + if(A.graph.row_block_offsets.data()==NULL) { + printf("PTR: %p\n",A.graph.row_block_offsets.data()); + A.graph.create_block_partitioning(AType::execution_space::concurrency()); + printf("PTR2: %p\n",A.graph.row_block_offsets.data()); + } + KokkosSparse::spmv (KokkosSparse::NoTranspose,1.0,A,x,0.0,y); + break; default: fprintf(stderr, "Selected test is not available.\n"); } @@ -146,9 +150,13 @@ int test_crs_matrix_singlevec(Ordinal numRows, Ordinal numCols, int test, const srand(17312837); matrix_type A; if(filename) - A = KokkosKernels::Impl::read_kokkos_crst_matrix(filename); + A = KokkosKernels::Impl::read_kokkos_crst_matrix(filename); else - A = KokkosKernels::Impl::kk_generate_sparse_matrix(numRows, numCols, 10 * numRows, 0, numCols); + { + Offset nnz = 10 * numRows; + //note: the help text says the bandwidth is fixed at 0.01 * numRows + A = KokkosKernels::Impl::kk_generate_sparse_matrix(numRows, numCols, nnz, 0, 0.01 * numRows); + } numRows = A.numRows(); numCols = A.numCols(); Offset nnz = A.nnz(); @@ -251,7 +259,7 @@ void print_help() { printf(" Options:\n"); printf(" kk,kk-kernels (Kokkos/Trilinos)\n"); printf(" kk-insp (Kokkos Structure Inspection)\n"); -#ifdef _OPENMP +#ifdef KOKKOS_ENABLE_OPENMP printf(" omp-dynamic,omp-static (Standard OpenMP)\n"); printf(" omp-insp (OpenMP Structure Inspection)\n"); #endif @@ -275,8 +283,6 @@ int main(int argc, char **argv) int test=KOKKOS; //int type=-1; char* filename = NULL; - bool binaryfile = false; - bool write_binary = false; int rows_per_thread = -1; int vector_length = -1; @@ -295,6 +301,11 @@ int main(int argc, char **argv) //if((strcmp(argv[i],"-v")==0)) {numVecs=atoi(argv[++i]); continue;} if((strcmp(argv[i],"--test")==0)) { i++; + if(i == argc) + { + std::cerr << "Must pass algorithm name after '--test'"; + exit(1); + } if((strcmp(argv[i],"mkl")==0)) test = MKL; if((strcmp(argv[i],"kk")==0)) @@ -307,7 +318,7 @@ int main(int argc, char **argv) test = KK_KERNELS_INSP; if((strcmp(argv[i],"kk-insp")==0)) test = KK_INSP; -#ifdef _OPENMP +#ifdef KOKKOS_ENABLE_OPENMP if((strcmp(argv[i],"omp-static") == 0)) test = OMP_STATIC; if((strcmp(argv[i], "omp-dynamic") == 0)) @@ -323,7 +334,6 @@ int main(int argc, char **argv) if((strcmp(argv[i],"-rpt")==0)) {rows_per_thread=atoi(argv[++i]); continue;} if((strcmp(argv[i],"-ts")==0)) {team_size=atoi(argv[++i]); continue;} if((strcmp(argv[i],"-vl")==0)) {vector_length=atoi(argv[++i]); continue;} - if((strcmp(argv[i],"--write-binary")==0)) {write_binary=true;} if((strcmp(argv[i],"-l")==0)) {loop=atoi(argv[++i]); continue;} if((strcmp(argv[i],"--schedule")==0)) { i++; diff --git a/perf_test/sparse/spmv/Kokkos_SPMV.hpp b/perf_test/sparse/spmv/Kokkos_SPMV.hpp index a3d050ea31..0eed31e998 100644 --- a/perf_test/sparse/spmv/Kokkos_SPMV.hpp +++ b/perf_test/sparse/spmv/Kokkos_SPMV.hpp @@ -165,7 +165,7 @@ void kokkos_matvec(AType A, XType x, YType y, int rows_per_thread, int team_size typedef typename AType::execution_space execution_space; typedef typename AType::non_const_size_type size_type; typedef typename AType::non_const_ordinal_type lno_t; - typedef typename AType::non_const_scalar_type scalar_t; + typedef typename AType::non_const_value_type scalar_t; typedef KokkosSparse::CrsMatrix matrix_type ; int rows_per_team = launch_parameters(A.numRows(),A.nnz(),rows_per_thread,team_size,vector_length); diff --git a/perf_test/sparse/spmv/Kokkos_SPMV_Inspector.hpp b/perf_test/sparse/spmv/Kokkos_SPMV_Inspector.hpp index bd9f5c8818..b55f2a9928 100644 --- a/perf_test/sparse/spmv/Kokkos_SPMV_Inspector.hpp +++ b/perf_test/sparse/spmv/Kokkos_SPMV_Inspector.hpp @@ -120,16 +120,14 @@ struct SPMV_Inspector_Functor { }; template -void kk_inspector_matvec(AType A, XType x, YType y, int rows_per_thread, int team_size, int vector_length) { +void kk_inspector_matvec(AType A, XType x, YType y, int team_size, int vector_length) { typedef typename AType::execution_space execution_space; typedef typename AType::device_type::memory_space memory_space; typedef typename AType::non_const_size_type size_type; typedef typename AType::non_const_ordinal_type lno_t; - typedef typename AType::non_const_scalar_type scalar_t; + typedef typename AType::non_const_value_type scalar_t; - //int rows_per_team = launch_parameters(A.numRows(),A.nnz(),rows_per_thread,team_size,vector_length); - //static int worksets = (y.extent(0)+rows_per_team-1)/rows_per_team; static int worksets = std::is_same::value ? team_size>0?execution_space::concurrency()/team_size:execution_space::concurrency() : //static team_size>0?execution_space::concurrency()*32/team_size:execution_space::concurrency()*32 ; //dynamic @@ -152,8 +150,8 @@ void kk_inspector_matvec(AType A, XType x, YType y, int rows_per_thread, int tea printf("Worksets: %i %i\n",worksets,ws); worksets = ws; } - Scalar s_a(1.0); - Scalar s_b(0.0); + scalar_t s_a(1.0); + scalar_t s_b(0.0); SPMV_Inspector_Functor func(s_a, A, x, workset_offsets, s_b, y); Kokkos::TeamPolicy > policy(1,1); diff --git a/perf_test/sparse/spmv/OpenMPDynamic_SPMV.hpp b/perf_test/sparse/spmv/OpenMPDynamic_SPMV.hpp index 3a1639a61a..bebc14344e 100644 --- a/perf_test/sparse/spmv/OpenMPDynamic_SPMV.hpp +++ b/perf_test/sparse/spmv/OpenMPDynamic_SPMV.hpp @@ -45,7 +45,7 @@ #define OPENMP_DYNAMIC_SPMV_HPP_ template -void openmp_dynamic_matvec(AType A, XType x, YType y, int rows_per_thread, int team_size, int vector_length) { +void openmp_dynamic_matvec(AType A, XType x, YType y) { #define OMP_BENCH_RESTRICT __restrict__ @@ -60,7 +60,7 @@ void openmp_dynamic_matvec(AType A, XType x, YType y, int rows_per_thread, int t const Offset* OMP_BENCH_RESTRICT matrixRowOffsets = A.graph.row_map.data(); #pragma omp parallel for schedule(dynamic) - for(Ordinal lrow = 0; row < rowCount; ++row) { + for(Ordinal row = 0; row < rowCount; ++row) { const Offset rowStart = matrixRowOffsets[row]; const Offset rowEnd = matrixRowOffsets[row + 1]; diff --git a/perf_test/sparse/spmv/OpenMPSmartStatic_SPMV.hpp b/perf_test/sparse/spmv/OpenMPSmartStatic_SPMV.hpp index 7cbd3fb616..1bd5b30575 100644 --- a/perf_test/sparse/spmv/OpenMPSmartStatic_SPMV.hpp +++ b/perf_test/sparse/spmv/OpenMPSmartStatic_SPMV.hpp @@ -44,7 +44,7 @@ #ifndef OPENMP_SMART_STATIC_SPMV_HPP_ #define OPENMP_SMART_STATIC_SPMV_HPP_ -#ifdef KOKKOS_ENABLE_OEPNMP +#ifdef KOKKOS_ENABLE_OPENMP #include @@ -54,11 +54,11 @@ int* OMP_BENCH_RESTRICT threadStarts; template void establishSmartSchedule(AType A) { - const LocalOrdinal rowCount = A.numRows(); - const LocalOrdinal* OMP_BENCH_RESTRICT matrixRowOffsets = &A.graph.row_map(0); + const Ordinal rowCount = A.numRows(); + const Offset* OMP_BENCH_RESTRICT matrixRowOffsets = &A.graph.row_map(0); // Generate a schedule - LocalOrdinal* rowSizes = NULL; + Ordinal* rowSizes = NULL; posix_memalign((void**) &rowSizes, 64, sizeof(int) * A.numRows()); posix_memalign((void**) &threadStarts, 128, sizeof(int) * (omp_get_max_threads() + 1)); @@ -69,25 +69,25 @@ void establishSmartSchedule(AType A) { unsigned long long int nnz = 0; #pragma omp parallel for reduction(+:nnz) - for(LocalOrdinal row = 0; row < rowCount; ++row) { - const LocalOrdinal rowElements = matrixRowOffsets[row + 1] - matrixRowOffsets[row]; + for(Ordinal row = 0; row < rowCount; ++row) { + const Ordinal rowElements = matrixRowOffsets[row + 1] - matrixRowOffsets[row]; rowSizes[row] = rowElements; nnz += rowElements; } - LocalOrdinal nzPerThreadTarget = (int)(nnz / (unsigned long long int) omp_get_max_threads()); + Ordinal nzPerThreadTarget = (int)(nnz / (unsigned long long int) omp_get_max_threads()); if(nzPerThreadTarget > 128) { nzPerThreadTarget &= 0xFFFFFFFC; } - LocalOrdinal nextRow = 0; + Ordinal nextRow = 0; printf("Target NZ Per Thread: %20d\n", nzPerThreadTarget); threadStarts[0] = 0; for(int thread = 1; thread < omp_get_max_threads(); ++thread) { - LocalOrdinal nzAccum = 0; + Ordinal nzAccum = 0; while(nzAccum < nzPerThreadTarget) { if(nextRow >= rowCount) @@ -110,24 +110,23 @@ void establishSmartSchedule(AType A) { free(rowSizes); } -template -void openmp_smart_static_matvec(AType A, XType x, YType y, int rows_per_thread, - int team_size, int vector_length) { +template +void openmp_smart_static_matvec(AType A, XType x, YType y) { if( NULL == threadStarts ) { //printf("Generating Schedule...\n"); - establishSmartSchedule(A); + establishSmartSchedule(A); } const Scalar s_a = 1.0; const Scalar s_b = 0.0; - //const LocalOrdinal rowCount = A.numRows(); + //const Ordinal rowCount = A.numRows(); const Scalar* OMP_BENCH_RESTRICT x_ptr = (Scalar*) x.data(); Scalar* OMP_BENCH_RESTRICT y_ptr = (Scalar*) y.data(); const Scalar* OMP_BENCH_RESTRICT matrixCoeffs = A.values.data(); - const LocalOrdinal* OMP_BENCH_RESTRICT matrixCols = A.graph.entries.data(); - const LocalOrdinal* OMP_BENCH_RESTRICT matrixRowOffsets = &A.graph.row_map(0); + const Ordinal* OMP_BENCH_RESTRICT matrixCols = A.graph.entries.data(); + const Offset* OMP_BENCH_RESTRICT matrixRowOffsets = &A.graph.row_map(0); #ifdef KOKKOS_ENABLE_PROFILING uint64_t kpID = 0; @@ -144,17 +143,17 @@ void openmp_smart_static_matvec(AType A, XType x, YType y, int rows_per_thread, #endif const int myID = omp_get_thread_num(); - const LocalOrdinal myStart = threadStarts[myID]; - const LocalOrdinal myEnd = threadStarts[myID + 1]; + const Ordinal myStart = threadStarts[myID]; + const Ordinal myEnd = threadStarts[myID + 1]; for(int row = myStart; row < myEnd; ++row) { - const LocalOrdinal rowStart = matrixRowOffsets[row]; - const LocalOrdinal rowEnd = matrixRowOffsets[row + 1]; + const Offset rowStart = matrixRowOffsets[row]; + const Offset rowEnd = matrixRowOffsets[row + 1]; Scalar sum = 0.0; - for(LocalOrdinal i = rowStart; i < rowEnd; ++i) { - const LocalOrdinal x_entry = matrixCols[i]; + for(Offset i = rowStart; i < rowEnd; ++i) { + const Ordinal x_entry = matrixCols[i]; const Scalar alpha_MC = s_a * matrixCoeffs[i]; sum += alpha_MC * x_ptr[x_entry]; } diff --git a/perf_test/sparse/spmv/OpenMPStatic_SPMV.hpp b/perf_test/sparse/spmv/OpenMPStatic_SPMV.hpp index 7064d2c352..d8de83cff7 100644 --- a/perf_test/sparse/spmv/OpenMPStatic_SPMV.hpp +++ b/perf_test/sparse/spmv/OpenMPStatic_SPMV.hpp @@ -45,7 +45,7 @@ #define OPENMP_STATIC_SPMV_HPP_ template -void openmp_static_matvec(AType A, XType x, YType y, int rows_per_thread, int team_size, int vector_length) { +void openmp_static_matvec(AType A, XType x, YType y) { #define OMP_BENCH_RESTRICT __restrict__ @@ -68,7 +68,7 @@ void openmp_static_matvec(AType A, XType x, YType y, int rows_per_thread, int te Scalar sum = 0.0; for(Offset i = rowStart; i < rowEnd; ++i) { - const Oridnal x_entry = matrixCols[i]; + const Ordinal x_entry = matrixCols[i]; const Scalar alpha_MC = s_a * matrixCoeffs[i]; sum += alpha_MC * x_ptr[x_entry]; } From 794a777261d72f36a8fe0a6954ea237f95f6a0a3 Mon Sep 17 00:00:00 2001 From: Brian Kelley Date: Wed, 19 Feb 2020 12:59:19 -0700 Subject: [PATCH 04/14] Fixed Blas1 dot ETI for special accumulators When doing dot(View, View) -> float (or with complex*), dot uses double (or complex) to actually sum the values. This wasn't getting ETI'd correctly, but now perf_test and unit_test both build correctly with float enabled as a scalar. --- src/blas/KokkosBlas1_dot.hpp | 34 ++-- src/blas/impl/KokkosBlas1_dot_spec.hpp | 205 +++++++++++++++++++++---- 2 files changed, 193 insertions(+), 46 deletions(-) diff --git a/src/blas/KokkosBlas1_dot.hpp b/src/blas/KokkosBlas1_dot.hpp index 7bfed45b28..8be0fbc4f2 100644 --- a/src/blas/KokkosBlas1_dot.hpp +++ b/src/blas/KokkosBlas1_dot.hpp @@ -93,31 +93,31 @@ dot (const XVector& x, const YVector& y) using dot_type = typename Kokkos::Details::InnerProductSpaceTraits< typename XVector::non_const_value_type>::dot_type; - // Some platforms, such as Mac Clang, seem to get poor accuracy with - // float and complex. Work around some Trilinos test - // failures by using a higher-precision type for intermediate dot - // product sums. - constexpr bool is_complex_float = - std::is_same>::value; - constexpr bool is_real_float = std::is_same::value; - using result_type = typename std::conditional, - typename std::conditional::type - >::type; - using RVector_Internal = Kokkos::View, result_type is complex + //These special cases are to maintain accuracy. + using result_type = + typename KokkosBlas::Impl::DotAccumulatingScalar::type; + using RVector_Internal = Kokkos::View>; + using RVector_Result = Kokkos::View>; result_type result {}; - RVector_Internal R = RVector_Internal(&result); + RVector_Result R = RVector_Result(&result); XVector_Internal X = x; YVector_Internal Y = y; - Impl::Dot::dot (R,X,Y); + //Even though RVector is the template parameter, Dot::dot has an overload that + //accepts RVector_Internal (with the special accumulator, if dot_type is 32-bit precision). + //Impl::Dot needs to support both cases, and it's easier to do this with overloading than + //by extending the ETI to deal with two different scalar types. + Impl::DotSpecialAccumulator::dot(R,X,Y); Kokkos::fence(); // mfh 22 Jan 2020: We need the line below because // Kokkos::complex lacks a constructor that takes a diff --git a/src/blas/impl/KokkosBlas1_dot_spec.hpp b/src/blas/impl/KokkosBlas1_dot_spec.hpp index 4d1db931c0..7daf3e8582 100644 --- a/src/blas/impl/KokkosBlas1_dot_spec.hpp +++ b/src/blas/impl/KokkosBlas1_dot_spec.hpp @@ -49,13 +49,46 @@ #include // Include the actual functors -#if !defined(KOKKOSKERNELS_ETI_ONLY) || KOKKOSKERNELS_IMPL_COMPILE_LIBRARY +#if !defined(KOKKOSKERNELS_ETI_ONLY) || KOKKOSKERNELS_IMPL_COMPILE_LIBRARY #include #include #endif namespace KokkosBlas { namespace Impl { + +// Some platforms, such as Mac Clang, seem to get poor accuracy with +// float and complex. Work around some Trilinos test +// failures by using a higher-precision type for intermediate dot +// product sums. +// +// Note that this is not the same thing as InnerProductSpaceTraits::dot_type +template +struct DotAccumulatingScalar +{ + using type = scalar_t; +}; + +template<> +struct DotAccumulatingScalar +{ + using type = double; +}; + +template<> +struct DotAccumulatingScalar> +{ + using type = Kokkos::complex; +}; + +template +struct HasSpecialAccumulator +{ + enum : bool { + value = !std::is_same::type>::value + }; +}; + // Specialization struct which defines whether a specialization exists template struct dot_eti_spec_avail { @@ -70,7 +103,6 @@ struct dot_eti_spec_avail { // the declarations of full specializations go in this header file. // We may spread out definitions (see _INST macro below) across one or // more .cpp files. -// #define KOKKOSBLAS1_DOT_ETI_SPEC_AVAIL( SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE ) \ template<> \ struct dot_eti_spec_avail< \ @@ -151,32 +183,61 @@ struct Dot { static void dot (const RV&, const XV& R, const YV& X); }; +//This version never has TPL support, but it does use the same ETI system +template::value> +struct DotSpecialAccumulator { + //Note: not doing the static_asserts to validate RV, XV, YV since those errors + //would have already arisen when building the library. + using size_type = typename YV::size_type; + using dot_type = typename Kokkos::Details::InnerProductSpaceTraits< + typename XV::non_const_value_type>::dot_type; + using accum_type = typename DotAccumulatingScalar::type; + //This is the same View type as RV, but using the special accumulator as the value type + using RV_Result = Kokkos::View >; + + static void dot (const RV_Result& R, const XV& X, const YV& Y); +}; + #if !defined(KOKKOSKERNELS_ETI_ONLY) || KOKKOSKERNELS_IMPL_COMPILE_LIBRARY //! Full specialization of Dot for single vectors (1-D Views). +// The rank-1 case is currently the only one that may use a different accumulator +// type than InnerProductSpaceTraits::dot_type. template struct Dot { + //Check some things about the template parameters at compile time to get nice error messages, + //before using them under the assumption they are valid. + static_assert (Kokkos::Impl::is_view::value, "KokkosBlas::Impl::" + "Dot<1-D>: XV is not a Kokkos::View."); + static_assert (Kokkos::Impl::is_view::value, "KokkosBlas::Impl::" + "Dot<1-D>: YV is not a Kokkos::View."); + static_assert (Kokkos::Impl::is_view::value, "KokkosBlas::Impl::" + "Dot<1-D>: RV is not a Kokkos::View."); + static_assert (RV::rank == 0, "KokkosBlas::Impl::Dot<1-D>: " + "RV is not rank 0."); + static_assert (XV::rank == 1, "KokkosBlas::Impl::Dot<1-D>: " + "XV is not rank 1."); + static_assert (YV::rank == 1, "KokkosBlas::Impl::Dot<1-D>: " + "YV is not rank 1."); + static_assert (std::is_same::value, + "KokkosBlas::Dot<1D>: R is const. " + "It must be nonconst, because it is an output argument " + "(we have to be able to write to its entries)."); + typedef typename YV::size_type size_type; + typedef typename RV::non_const_value_type dot_type; + typedef typename DotAccumulatingScalar::type special_result_type; + + //This is the same View type as RV, but using the special accumulator as the value type + typedef Kokkos::View< + special_result_type, + typename RV::array_layout, + typename RV::device_type, + Kokkos::MemoryTraits > RV_Result; static void dot (const RV& R, const XV& X, const YV& Y) { - static_assert (Kokkos::Impl::is_view::value, "KokkosBlas::Impl::" - "Dot<1-D>: RV is not a Kokkos::View."); - static_assert (Kokkos::Impl::is_view::value, "KokkosBlas::Impl::" - "Dot<1-D>: XV is not a Kokkos::View."); - static_assert (Kokkos::Impl::is_view::value, "KokkosBlas::Impl::" - "Dot<1-D>: YV is not a Kokkos::View."); - static_assert (RV::rank == 0, "KokkosBlas::Impl::Dot<1-D>: " - "RV is not rank 0."); - static_assert (XV::rank == 1, "KokkosBlas::Impl::Dot<1-D>: " - "XV is not rank 1."); - static_assert (YV::rank == 1, "KokkosBlas::Impl::Dot<1-D>: " - "YV is not rank 1."); - static_assert (std::is_same::value, - "KokkosBlas::Dot<1D>: R is const. " - "It must be nonconst, because it is an output argument " - "(we have to be able to write to its entries)."); - Kokkos::Profiling::pushRegion(KOKKOSKERNELS_IMPL_COMPILE_LIBRARY?"KokkosBlas::dot[ETI]":"KokkosBlas::dot[noETI]"); #ifdef KOKKOSKERNELS_ENABLE_CHECK_SPECIALIZATION if(KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) @@ -201,19 +262,77 @@ struct Dot } }; +//Implementation that has the same template args as Dot, but which internally uses +//DotAccumulatingScalar for the result view. +// +//Is never supported by TPLs, but uses the same dot_eti_spec_avail::value. +template +struct DotSpecialAccumulator +{ + static_assert (Kokkos::Impl::is_view::value, "KokkosBlas::Impl::" + "DotSpecialAccumulator: XV is not a Kokkos::View."); + static_assert (Kokkos::Impl::is_view::value, "KokkosBlas::Impl::" + "DotSpecialAccumulator: YV is not a Kokkos::View."); + static_assert (XV::rank == YV::rank, "KokkosBlas::Impl::" + "DotSpecialAccumulator: X and Y have different ranks."); + static_assert (XV::rank == 1, "KokkosBlas::Impl::" + "DotSpecialAccumulator: X and Y are not rank-1 Views."); + static_assert (Kokkos::Impl::is_view::value, "KokkosBlas::Impl::" + "DotSpecialAccumulator: RV is not a Kokkos::View."); + static_assert (std::is_same::value, + "KokkosBlas::Impl::DotSpecialAccumulator: X and Y have different scalar types."); + static_assert (std::is_same::value, + "KokkosBlas::Dot<1D>: R is const. " + "It must be nonconst, because it is an output argument " + "(we have to be able to write to its entries)."); + + using size_type = typename YV::size_type; + using dot_type = typename Kokkos::Details::InnerProductSpaceTraits< + typename XV::non_const_value_type>::dot_type; + using accum_type = typename DotAccumulatingScalar::type; + //This is the same View type as RV, but using the special accumulator as the value type + using RV_Result = Kokkos::View >; + + static void dot (const RV_Result& R, const XV& X, const YV& Y) + { + Kokkos::Profiling::pushRegion(KOKKOSKERNELS_IMPL_COMPILE_LIBRARY?"KokkosBlas::dot[ETI]":"KokkosBlas::dot[noETI]"); + #ifdef KOKKOSKERNELS_ENABLE_CHECK_SPECIALIZATION + if(KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) + printf("KokkosBlas::dot<> ETI specialization for < %s , %s >\n",typeid(XV).name(),typeid(YV).name()); + else { + printf("KokkosBlas::dot<> non-ETI specialization for < %s , %s >\n",typeid(XV).name(),typeid(YV).name()); + } + #endif + const size_type numElems = X.extent(0); + + if (numElems < static_cast (INT_MAX)) { + typedef int index_type; + DotFunctor f(X,Y); + f.run("KokkosBlas::dot<1D>",R); + } + else { + typedef int64_t index_type; + DotFunctor f(X,Y); + f.run("KokkosBlas::dot<1D>",R); + } + Kokkos::Profiling::popRegion(); + } +}; + template struct Dot { + static_assert (Kokkos::Impl::is_view::value, "KokkosBlas::Impl::" + "Dot<2-D>: XV is not a Kokkos::View."); + static_assert (Kokkos::Impl::is_view::value, "KokkosBlas::Impl::" + "Dot<2-D>: YV is not a Kokkos::View."); + static_assert (RV::rank == 1, "KokkosBlas::Impl::Dot<2-D>: " + "RV is not rank 1."); + typedef typename YV::size_type size_type; static void dot (const RV& R, const XV& X, const YV& Y) { - static_assert (Kokkos::Impl::is_view::value, "KokkosBlas::Impl::" - "Dot<2-D>: XV is not a Kokkos::View."); - static_assert (Kokkos::Impl::is_view::value, "KokkosBlas::Impl::" - "Dot<2-D>: YV is not a Kokkos::View."); - static_assert (RV::rank == 1, "KokkosBlas::Impl::Dot<2-D>: " - "RV is not rank 1."); - Kokkos::Profiling::pushRegion(KOKKOSKERNELS_IMPL_COMPILE_LIBRARY?"KokkosBlas::dot[ETI]":"KokkosBlas::dot[noETI]"); #ifdef KOKKOSKERNELS_ENABLE_CHECK_SPECIALIZATION if(KOKKOSKERNELS_IMPL_COMPILE_LIBRARY) @@ -244,7 +363,7 @@ struct Dot >, \ Kokkos::View, \ Kokkos::MemoryTraits >, \ - 1,1,false,true>; + 1,1,false,true>; \ +extern template struct DotSpecialAccumulator< \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, true>; \ +extern template struct DotSpecialAccumulator< \ + Kokkos::View >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, true>; #define KOKKOSBLAS1_DOT_ETI_SPEC_INST( SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE ) \ template struct Dot< \ @@ -283,7 +416,21 @@ template struct Dot< \ Kokkos::MemoryTraits >, \ Kokkos::View, \ Kokkos::MemoryTraits >, \ - 1,1,false,true>; + 1,1,false,true>; \ +template struct DotSpecialAccumulator< \ + Kokkos::View >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, true>; \ +template struct DotSpecialAccumulator< \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, true>; // // From 00aee41307967dde2cb77b2587e9b8d31afeea69 Mon Sep 17 00:00:00 2001 From: Brian Kelley Date: Wed, 19 Feb 2020 13:13:00 -0700 Subject: [PATCH 05/14] Fix -Wnarrowing warning in InnerProductSpaceTraits, when casting complex to complex --- src/Kokkos_InnerProductSpaceTraits.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Kokkos_InnerProductSpaceTraits.hpp b/src/Kokkos_InnerProductSpaceTraits.hpp index 0bcafc24d8..0fce3a9fbf 100644 --- a/src/Kokkos_InnerProductSpaceTraits.hpp +++ b/src/Kokkos_InnerProductSpaceTraits.hpp @@ -347,7 +347,7 @@ struct CastPossiblyComplex, Kokkos::complex> { static Kokkos::complex cast (const Kokkos::complex& x) { - return {x.real(), x.imag()}; + return {static_cast(x.real()), static_cast(x.imag())}; } }; From 0b9466f8ccb2ee7550831f4331c7d91d6b630fd5 Mon Sep 17 00:00:00 2001 From: Brian Kelley Date: Wed, 19 Feb 2020 13:49:24 -0700 Subject: [PATCH 06/14] WIP: cleaning out MyCRSMatrix --- perf_test/graph/KokkosGraph_color.cpp | 6 +- perf_test/graph/KokkosGraph_color_d2.cpp | 4 +- .../graph/KokkosGraph_multimem_triangle.hpp | 17 +- .../sparse/KokkosSparse_multimem_spgemm.hpp | 7 +- test_common/KokkosKernels_MatrixConverter.cpp | 4 +- test_common/KokkosKernels_MyCRSMatrix.hpp | 243 ------------------ 6 files changed, 15 insertions(+), 266 deletions(-) delete mode 100644 test_common/KokkosKernels_MyCRSMatrix.hpp diff --git a/perf_test/graph/KokkosGraph_color.cpp b/perf_test/graph/KokkosGraph_color.cpp index de050da4bf..4c5b564689 100644 --- a/perf_test/graph/KokkosGraph_color.cpp +++ b/perf_test/graph/KokkosGraph_color.cpp @@ -50,7 +50,7 @@ #include #include "KokkosKernels_IOUtils.hpp" -#include "KokkosKernels_MyCRSMatrix.hpp" +#include "KokkosSparse_CrsMatrix.hpp" #include "KokkosKernels_TestParameters.hpp" #include "KokkosGraph_Distance1Color.hpp" @@ -318,7 +318,7 @@ void run_multi_mem_experiment(Parameters params){ typedef Kokkos::Device myFastDevice; typedef Kokkos::Device mySlowExecSpace; - typedef typename MyKokkosSparse::CrsMatrix fast_crstmat_t; + typedef typename KokkosSparse::CrsMatrix fast_crstmat_t; typedef typename fast_crstmat_t::StaticCrsGraphType fast_graph_t; //typedef typename fast_graph_t::row_map_type::non_const_type fast_row_map_view_t; //typedef typename fast_graph_t::entries_type::non_const_type fast_cols_view_t; @@ -326,7 +326,7 @@ void run_multi_mem_experiment(Parameters params){ //typedef typename fast_graph_t::row_map_type::const_type const_fast_row_map_view_t; //typedef typename fast_graph_t::entries_type::const_type const_fast_cols_view_t; - typedef typename MyKokkosSparse::CrsMatrix slow_crstmat_t; + typedef typename KokkosSparse::CrsMatrix slow_crstmat_t; typedef typename slow_crstmat_t::StaticCrsGraphType slow_graph_t; //typedef typename slow_graph_t::row_map_type::non_const_type slow_row_map_view_t; diff --git a/perf_test/graph/KokkosGraph_color_d2.cpp b/perf_test/graph/KokkosGraph_color_d2.cpp index ee19481d58..9b5d0ddccb 100644 --- a/perf_test/graph/KokkosGraph_color_d2.cpp +++ b/perf_test/graph/KokkosGraph_color_d2.cpp @@ -61,7 +61,7 @@ #include #include -#include +#include "KokkosSparse_CrsMatrix.hpp" #include #include @@ -602,7 +602,7 @@ void experiment_driver(Parameters params) { using myExecSpace = exec_space; using myFastDevice = Kokkos::Device; - using fast_crstmat_t = typename MyKokkosSparse::CrsMatrix; + using fast_crstmat_t = typename KokkosSparse::CrsMatrix; using fast_graph_t = typename fast_crstmat_t::StaticCrsGraphType; char *a_mat_file = params.a_mtx_bin_file; diff --git a/perf_test/graph/KokkosGraph_multimem_triangle.hpp b/perf_test/graph/KokkosGraph_multimem_triangle.hpp index a33f1897f9..7b8206e172 100644 --- a/perf_test/graph/KokkosGraph_multimem_triangle.hpp +++ b/perf_test/graph/KokkosGraph_multimem_triangle.hpp @@ -42,7 +42,8 @@ */ #include "KokkosGraph_run_triangle.hpp" -#include "KokkosKernels_MyCRSMatrix.hpp" +#include "KokkosSparse_CrsMatrix.hpp" + namespace KokkosKernels{ namespace Experiment{ @@ -55,22 +56,12 @@ namespace Experiment{ typedef Kokkos::Device myFastDevice; typedef Kokkos::Device mySlowExecSpace; - typedef typename MyKokkosSparse::CrsMatrix fast_crstmat_t; + typedef typename KokkosSparse::CrsMatrix fast_crstmat_t; typedef typename fast_crstmat_t::StaticCrsGraphType fast_graph_t; - //typedef typename fast_graph_t::row_map_type::non_const_type fast_row_map_view_t; - //typedef typename fast_graph_t::entries_type::non_const_type fast_cols_view_t; - - //typedef typename fast_graph_t::row_map_type::const_type const_fast_row_map_view_t; - //typedef typename fast_graph_t::entries_type::const_type const_fast_cols_view_t; - typedef typename MyKokkosSparse::CrsMatrix slow_crstmat_t; + typedef typename KokkosSparse::CrsMatrix slow_crstmat_t; typedef typename slow_crstmat_t::StaticCrsGraphType slow_graph_t; - //typedef typename slow_graph_t::row_map_type::non_const_type slow_row_map_view_t; - //typedef typename slow_graph_t::entries_type::non_const_type slow_cols_view_t; - //typedef typename slow_graph_t::row_map_type::const_type const_slow_row_map_view_t; - //typedef typename slow_graph_t::entries_type::const_type const_slow_cols_view_t; - char *a_mat_file = params.a_mtx_bin_file; //char *b_mat_file = params.b_mtx_bin_file; //char *c_mat_file = params.c_mtx_bin_file; diff --git a/perf_test/sparse/KokkosSparse_multimem_spgemm.hpp b/perf_test/sparse/KokkosSparse_multimem_spgemm.hpp index 1ed50a85bd..42568f2d67 100644 --- a/perf_test/sparse/KokkosSparse_multimem_spgemm.hpp +++ b/perf_test/sparse/KokkosSparse_multimem_spgemm.hpp @@ -41,8 +41,9 @@ //@HEADER */ -#include "KokkosKernels_MyCRSMatrix.hpp" +#include "KokkosSparse_CrsMatrix.hpp" #include "KokkosSparse_run_spgemm.hpp" + namespace KokkosKernels{ namespace Experiment{ @@ -55,7 +56,7 @@ namespace Experiment{ typedef Kokkos::Device myFastDevice; typedef Kokkos::Device mySlowExecSpace; - typedef typename MyKokkosSparse::CrsMatrix fast_crstmat_t; + typedef typename KokkosSparse::CrsMatrix fast_crstmat_t; //typedef typename fast_crstmat_t::StaticCrsGraphType fast_graph_t; //typedef typename fast_crstmat_t::row_map_type::non_const_type fast_row_map_view_t; typedef typename fast_crstmat_t::index_type::non_const_type fast_cols_view_t; @@ -64,7 +65,7 @@ namespace Experiment{ typedef typename fast_crstmat_t::index_type::const_type const_fast_cols_view_t; typedef typename fast_crstmat_t::values_type::const_type const_fast_values_view_t; - typedef typename MyKokkosSparse::CrsMatrix slow_crstmat_t; + typedef typename KokkosSparse::CrsMatrix slow_crstmat_t; //typedef typename slow_crstmat_t::StaticCrsGraphType slow_graph_t; //typedef typename slow_crstmat_t::row_map_type::non_const_type slow_row_map_view_t; typedef typename slow_crstmat_t::index_type::non_const_type slow_cols_view_t; diff --git a/test_common/KokkosKernels_MatrixConverter.cpp b/test_common/KokkosKernels_MatrixConverter.cpp index 8768b70c13..aaf6b10f57 100644 --- a/test_common/KokkosKernels_MatrixConverter.cpp +++ b/test_common/KokkosKernels_MatrixConverter.cpp @@ -46,7 +46,7 @@ #include "KokkosKernels_Utils.hpp" #include -#include "KokkosKernels_MyCRSMatrix.hpp" +#include "KokkosSparse_CrsMatrix.hpp" int main (int argc, char* argv[]){ typedef int size_type; @@ -93,7 +93,7 @@ int main (int argc, char* argv[]){ } typedef Kokkos::DefaultHostExecutionSpace MyExecSpace; - typedef typename MyKokkosSparse::CrsMatrix crstmat_t; + typedef typename KokkosSparse::CrsMatrix crstmat_t; typedef typename crstmat_t::StaticCrsGraphType graph_t; typedef typename graph_t::row_map_type::non_const_type row_map_view_t; typedef typename graph_t::entries_type::non_const_type cols_view_t; diff --git a/test_common/KokkosKernels_MyCRSMatrix.hpp b/test_common/KokkosKernels_MyCRSMatrix.hpp deleted file mode 100644 index ab50f763b9..0000000000 --- a/test_common/KokkosKernels_MyCRSMatrix.hpp +++ /dev/null @@ -1,243 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// KokkosKernels 0.9: Linear Algebra and Graph Kernels -// Copyright 2017 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Siva Rajamanickam (srajama@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#include "KokkosKernels_Utils.hpp" -namespace MyKokkosSparse{ - -template -class StaticCrsGraph { - -public: - typedef OrdinalType data_type; - typedef typename Device::execution_space execution_space; - typedef Device device_type; - typedef SizeType size_type; - - typedef Kokkos::View row_map_type; - typedef Kokkos::View entries_type; - - entries_type entries; - row_map_type row_map; - OrdinalType num_cols; - - //! Construct an empty view. - StaticCrsGraph () : entries(), row_map(), num_cols() {} - - //! Copy constructor (shallow copy). - StaticCrsGraph (const StaticCrsGraph& rhs) : entries (rhs.entries), row_map (rhs.row_map), num_cols(rhs.num_cols) - {} - - template - StaticCrsGraph (const EntriesType& entries_,const RowMapType& row_map_) : entries (entries_), row_map (row_map_) - {} - template - StaticCrsGraph (const EntriesType& entries_,const RowMapType& row_map_, OrdinalType numCols_) : - entries (entries_), row_map (row_map_), num_cols(numCols_) {} - /** \brief Assign to a view of the rhs array. - * If the old view is the last view - * then allocated memory is deallocated. - */ - StaticCrsGraph& operator= (const StaticCrsGraph& rhs) { - entries = rhs.entries; - row_map = rhs.row_map; - return *this; - } - - KOKKOS_INLINE_FUNCTION - data_type numCols() const { - return num_cols; - } - - /** \brief Destroy this view of the array. - * If the last view then allocated memory is deallocated. - */ - ~StaticCrsGraph() {} - KOKKOS_INLINE_FUNCTION - data_type numRows() const { - return (row_map.extent(0) != 0) ? - row_map.extent(0) - static_cast (1) : - static_cast (0); - } -}; - - - - -template -class CrsMatrix{ -public: - typedef typename Kokkos::ViewTraits::host_mirror_space host_mirror_space ; - - typedef typename Device::execution_space execution_space; - typedef typename Device::memory_space memory_space; - typedef Kokkos::Device device_type; - typedef ScalarType value_type; - typedef OrdinalType ordinal_type; - typedef MemoryTraits memory_traits; - typedef SizeType size_type; - - typedef StaticCrsGraph StaticCrsGraphType; - typedef typename StaticCrsGraphType::entries_type index_type; - typedef typename index_type::non_const_value_type const_ordinal_type; - typedef typename index_type::non_const_value_type non_const_ordinal_type; - typedef typename StaticCrsGraphType::row_map_type row_map_type; - typedef Kokkos::View values_type; - typedef CrsMatrix HostMirror; - - StaticCrsGraphType graph; - values_type values; - CrsMatrix () : - numCols_ (0) - {} - CrsMatrix (const std::string& label, - const OrdinalType& ncols, - const values_type& vals, - const StaticCrsGraphType& graph_) : - graph (graph_), - values (vals), - numCols_ (ncols) - { - } - - //! The number of rows in the sparse matrix. - KOKKOS_INLINE_FUNCTION ordinal_type numRows () const { - return graph.numRows (); - } - - //! The number of columns in the sparse matrix. - KOKKOS_INLINE_FUNCTION ordinal_type numCols () const { - return numCols_; - } - - //! The number of stored entries in the sparse matrix. - KOKKOS_INLINE_FUNCTION size_type nnz () const { - return graph.entries.extent(0); - } - ordinal_type numCols_; -}; - - -template -crsMat_t get_crsmat( - typename crsMat_t::row_map_type::non_const_type::value_type *xadj, - typename crsMat_t::index_type::non_const_type::value_type *adj, - typename crsMat_t::values_type::non_const_type::value_type *ew, - typename crsMat_t::row_map_type::non_const_type::value_type ne, - typename crsMat_t::index_type::non_const_type::value_type nv, - int is_one_based){ - - typedef typename crsMat_t::StaticCrsGraphType graph_t; - typedef typename crsMat_t::row_map_type::non_const_type row_map_view_t; - typedef typename crsMat_t::index_type::non_const_type cols_view_t; - typedef typename crsMat_t::values_type::non_const_type values_view_t; - - typedef typename row_map_view_t::value_type size_type; - typedef typename cols_view_t::value_type lno_t; - typedef typename values_view_t::value_type scalar_t; - - - - row_map_view_t rowmap_view("rowmap_view", nv+1); - cols_view_t columns_view("colsmap_view", ne); - values_view_t values_view("values_view", ne); - - KokkosKernels::Impl::copy_vector(ne, ew, values_view); - KokkosKernels::Impl::copy_vector(ne, adj, columns_view); - KokkosKernels::Impl::copy_vector(nv+1, xadj, rowmap_view); - - size_type ncols = 0; - KokkosKernels::Impl::view_reduce_max(ne, columns_view, ncols); - ncols += 1; - - if (is_one_based) { - //if algorithm is mkl_csrmultcsr convert to 1 base so that we dont dublicate the memory at the experiments/ - KokkosKernels::Impl::kk_a_times_x_plus_b< row_map_view_t, row_map_view_t, int, int, myExecSpace>(nv + 1, rowmap_view, rowmap_view, 1, 1); - KokkosKernels::Impl::kk_a_times_x_plus_b< cols_view_t, cols_view_t, int, int, myExecSpace>(ne, columns_view, columns_view, 1, 1); - } - - graph_t static_graph (columns_view, rowmap_view); - crsMat_t crsmat("CrsMatrix", ncols, values_view, static_graph); - return crsmat; -} - -template -out_crsMat_t copy_crsmat(in_crsMat_t inputMat){ -/* - typedef typename out_crsMat_t::StaticCrsGraphType graph_t; - typedef typename out_crsMat_t::row_map_type::non_const_type row_map_view_t; - typedef typename out_crsMat_t::index_type::non_const_type cols_view_t; - typedef typename out_crsMat_t::values_type::non_const_type values_view_t; - - - typedef typename in_crsMat_t::StaticCrsGraphType in_graph_t; - typedef typename in_crsMat_t::row_map_type::const_type in_row_map_view_t; - typedef typename in_crsMat_t::index_type::const_type in_cols_view_t; - typedef typename in_crsMat_t::values_type::const_type in_values_view_t; - - typedef typename row_map_view_t::value_type size_type; - typedef typename cols_view_t::value_type lno_t; - typedef typename values_view_t::value_type scalar_t; - - - - const size_type nv = inputMat.numRows(); - const size_type ne = inputMat.graph.entries.extent(0); - - row_map_view_t rowmap_view("rowmap_view", nv+1); - cols_view_t columns_view("colsmap_view", ne); - values_view_t values_view("values_view", ne); - - KokkosKernels::Impl::copy_vector(ne, inputMat.values, values_view); - KokkosKernels::Impl::copy_vector(ne, inputMat.graph.entries, columns_view); - KokkosKernels::Impl::copy_vector(nv+1, inputMat.graph.row_map, rowmap_view); - - size_type ncols = 0; - KokkosKernels::Impl::view_reduce_max(ne, columns_view, ncols); - ncols += 1; - - graph_t static_graph (columns_view, rowmap_view); - out_crsMat_t crsmat("CrsMatrix", ncols, values_view, static_graph); - return crsmat; - */ -} -} From cef4ceeda7012d8fcbfabcfeca2847be5567c0bb Mon Sep 17 00:00:00 2001 From: Brian Kelley Date: Wed, 19 Feb 2020 14:15:16 -0700 Subject: [PATCH 07/14] Removed unused file ...spgemm_impl_common.hpp --- .../impl/KokkosSparse_spgemm_impl_common.hpp | 50 ------------------- 1 file changed, 50 deletions(-) delete mode 100644 src/sparse/impl/KokkosSparse_spgemm_impl_common.hpp diff --git a/src/sparse/impl/KokkosSparse_spgemm_impl_common.hpp b/src/sparse/impl/KokkosSparse_spgemm_impl_common.hpp deleted file mode 100644 index 81e40743e8..0000000000 --- a/src/sparse/impl/KokkosSparse_spgemm_impl_common.hpp +++ /dev/null @@ -1,50 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// KokkosKernels 0.9: Linear Algebra and Graph Kernels -// Copyright 2017 Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Siva Rajamanickam (srajama@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -namespace KokkosSparse{ - -namespace Impl{ - - -} -} From 73acc1a8754d9794f6e6335c5709cd1aa0edf442 Mon Sep 17 00:00:00 2001 From: Brian Kelley Date: Wed, 19 Feb 2020 14:58:24 -0700 Subject: [PATCH 08/14] Make perf tests use StaticCrsGraph/CrsMatrix instead of the test_common versions of those which are now gone --- perf_test/graph/KokkosGraph_color.cpp | 44 ++-- perf_test/graph/KokkosGraph_color_d2.cpp | 73 +++--- perf_test/sparse/KokkosSparse_run_spgemm.hpp | 237 +++++++++---------- 3 files changed, 158 insertions(+), 196 deletions(-) diff --git a/perf_test/graph/KokkosGraph_color.cpp b/perf_test/graph/KokkosGraph_color.cpp index 4c5b564689..d71c75c1b4 100644 --- a/perf_test/graph/KokkosGraph_color.cpp +++ b/perf_test/graph/KokkosGraph_color.cpp @@ -210,7 +210,7 @@ namespace Experiment{ template void run_experiment( - crsGraph_t crsGraph, Parameters params){ + crsGraph_t crsGraph, int num_cols, Parameters params){ //using namespace KokkosSparse; using namespace KokkosGraph; using namespace KokkosGraph::Experimental; @@ -231,8 +231,6 @@ void run_experiment( typedef typename crsGraph_t3::row_map_type::non_const_type lno_view_t; typedef typename crsGraph_t3::entries_type::non_const_type lno_nnz_view_t; - - typedef typename lno_view_t::non_const_value_type size_type; typedef typename lno_nnz_view_t::non_const_value_type lno_t; @@ -295,7 +293,7 @@ void run_experiment( } - graph_color_symbolic(&kh,crsGraph.numRows(), crsGraph.numCols(), crsGraph.row_map, crsGraph.entries); + graph_color_symbolic(&kh,crsGraph.numRows(), num_cols, crsGraph.row_map, crsGraph.entries); std::cout << std::endl << "Time:" << kh.get_graph_coloring_handle()->get_overall_coloring_time() << " " @@ -341,21 +339,21 @@ void run_multi_mem_experiment(Parameters params){ slow_graph_t a_slow_crsgraph, /*b_slow_crsgraph,*/ c_slow_crsgraph; fast_graph_t a_fast_crsgraph, /*b_fast_crsgraph,*/ c_fast_crsgraph; - + int num_cols = 0; //read a and b matrices and store them on slow or fast memory. if (params.a_mem_space == 1){ fast_crstmat_t a_fast_crsmat; a_fast_crsmat = KokkosKernels::Impl::read_kokkos_crst_matrix(a_mat_file); a_fast_crsgraph = a_fast_crsmat.graph; - a_fast_crsgraph.num_cols = a_fast_crsmat.numCols(); + num_cols = a_fast_crsmat.numCols(); } else { slow_crstmat_t a_slow_crsmat; a_slow_crsmat = KokkosKernels::Impl::read_kokkos_crst_matrix(a_mat_file); a_slow_crsgraph = a_slow_crsmat.graph; - a_slow_crsgraph.num_cols = a_slow_crsmat.numCols(); + num_cols = a_slow_crsmat.numCols(); } @@ -366,13 +364,13 @@ void run_multi_mem_experiment(Parameters params){ /* c_fast_crsgraph = */ KokkosKernels::Experiment::run_experiment - (a_fast_crsgraph, /*b_fast_crsgraph,*/ params); + (a_fast_crsgraph, num_cols, params); } else { /* c_fast_crsgraph = */ KokkosKernels::Experiment::run_experiment - (a_fast_crsgraph, /*b_fast_crsgraph,*/ params); + (a_fast_crsgraph, num_cols, params); } } @@ -382,13 +380,13 @@ void run_multi_mem_experiment(Parameters params){ /*c_slow_crsgraph =*/ KokkosKernels::Experiment::run_experiment - (a_fast_crsgraph, /*b_fast_crsgraph,*/ params); + (a_fast_crsgraph, num_cols, params); } else { /*c_slow_crsgraph =*/ KokkosKernels::Experiment::run_experiment - (a_fast_crsgraph, /*b_fast_crsgraph,*/ params); + (a_fast_crsgraph, num_cols, params); } } } @@ -399,13 +397,13 @@ void run_multi_mem_experiment(Parameters params){ /* c_fast_crsgraph = */ KokkosKernels::Experiment::run_experiment - (a_fast_crsgraph, /*b_slow_crsgraph,*/ params); + (a_fast_crsgraph, num_cols, params); } else { /* c_fast_crsgraph = */ KokkosKernels::Experiment::run_experiment - (a_fast_crsgraph, /*b_slow_crsgraph,*/ params); + (a_fast_crsgraph, num_cols, params); } } @@ -415,13 +413,13 @@ void run_multi_mem_experiment(Parameters params){ /*c_slow_crsgraph =*/ KokkosKernels::Experiment::run_experiment - (a_fast_crsgraph, /*b_slow_crsgraph,*/ params); + (a_fast_crsgraph, num_cols, params); } else { /*c_slow_crsgraph =*/ KokkosKernels::Experiment::run_experiment - (a_fast_crsgraph, /*b_slow_crsgraph,*/ params); + (a_fast_crsgraph, num_cols, params); } } @@ -435,13 +433,13 @@ void run_multi_mem_experiment(Parameters params){ /* c_fast_crsgraph = */ KokkosKernels::Experiment::run_experiment - (a_slow_crsgraph, /*b_fast_crsgraph,*/ params); + (a_slow_crsgraph, num_cols, params); } else { /* c_fast_crsgraph = */ KokkosKernels::Experiment::run_experiment - (a_slow_crsgraph, /*b_fast_crsgraph,*/ params); + (a_slow_crsgraph, num_cols, params); } } @@ -451,13 +449,13 @@ void run_multi_mem_experiment(Parameters params){ /*c_slow_crsgraph =*/ KokkosKernels::Experiment::run_experiment - (a_slow_crsgraph, /*b_fast_crsgraph,*/ params); + (a_slow_crsgraph, num_cols, params); } else { /*c_slow_crsgraph =*/ KokkosKernels::Experiment::run_experiment - (a_slow_crsgraph, /*b_fast_crsgraph,*/ params); + (a_slow_crsgraph, num_cols, params); } } } @@ -468,13 +466,13 @@ void run_multi_mem_experiment(Parameters params){ /* c_fast_crsgraph = */ KokkosKernels::Experiment::run_experiment - (a_slow_crsgraph, /*b_slow_crsgraph,*/ params); + (a_slow_crsgraph, num_cols, params); } else { /* c_fast_crsgraph = */ KokkosKernels::Experiment::run_experiment - (a_slow_crsgraph, /*b_slow_crsgraph,*/ params); + (a_slow_crsgraph, num_cols, params); } } @@ -484,13 +482,13 @@ void run_multi_mem_experiment(Parameters params){ /*c_slow_crsgraph =*/ KokkosKernels::Experiment::run_experiment - (a_slow_crsgraph, /*b_slow_crsgraph,*/ params); + (a_slow_crsgraph, num_cols, params); } else { /*c_slow_crsgraph =*/ KokkosKernels::Experiment::run_experiment - (a_slow_crsgraph, /*b_slow_crsgraph,*/ params); + (a_slow_crsgraph, num_cols, params); } } diff --git a/perf_test/graph/KokkosGraph_color_d2.cpp b/perf_test/graph/KokkosGraph_color_d2.cpp index 9b5d0ddccb..ddfc38aea2 100644 --- a/perf_test/graph/KokkosGraph_color_d2.cpp +++ b/perf_test/graph/KokkosGraph_color_d2.cpp @@ -274,12 +274,20 @@ std::string getCurrentDateTimeStr() } -template -void run_experiment(crsGraph_t crsGraph, Parameters params) +template +void run_experiment(crsGraph_t crsGraph, int num_cols, Parameters params) { using namespace KokkosGraph; using namespace KokkosGraph::Experimental; + using device_t = typename crsGraph_t::device_type; + using exec_space = typename device_t::execution_space; + using mem_space = typename device_t::memory_space; + using lno_view_t = typename crsGraph_t::row_map_type::non_const_type; + using lno_nnz_view_t = typename crsGraph_t::entries_type::non_const_type; + using size_type = typename lno_view_t::non_const_value_type; + using lno_t = typename lno_nnz_view_t::non_const_value_type; + int algorithm = params.algorithm; int repeat = params.repeat; int chunk_size = params.chunk_size; @@ -289,16 +297,9 @@ void run_experiment(crsGraph_t crsGraph, Parameters params) int use_dynamic_scheduling = params.use_dynamic_scheduling; int verbose = params.verbose; - // char spgemm_step = params.spgemm_step; int vector_size = params.vector_size; - using lno_view_t = typename crsGraph_t3::row_map_type::non_const_type; - using lno_nnz_view_t = typename crsGraph_t3::entries_type::non_const_type; - - using size_type = typename lno_view_t::non_const_value_type; - using lno_t = typename lno_nnz_view_t::non_const_value_type; - - typedef KokkosKernels::Experimental::KokkosKernelsHandle KernelHandle; + typedef KokkosKernels::Experimental::KokkosKernelsHandle KernelHandle; // Get Date/Time stamps of start to use later when printing out summary data. //auto t = std::time(nullptr); @@ -366,7 +367,7 @@ void run_experiment(crsGraph_t crsGraph, Parameters params) // Loop over # of experiments to run for(int i = 0; i < repeat; ++i) { - graph_compute_distance2_color(&kh, crsGraph.numRows(), crsGraph.numCols(), crsGraph.row_map, crsGraph.entries, crsGraph.row_map, crsGraph.entries); + graph_compute_distance2_color(&kh, crsGraph.numRows(), num_cols, crsGraph.row_map, crsGraph.entries, crsGraph.row_map, crsGraph.entries); total_colors += kh.get_distance2_graph_coloring_handle()->get_num_colors(); total_phases += kh.get_distance2_graph_coloring_handle()->get_num_phases(); @@ -393,7 +394,7 @@ void run_experiment(crsGraph_t crsGraph, Parameters params) bool d2_coloring_is_valid = false; bool d2_coloring_validation_flags[4] = { false }; - d2_coloring_is_valid = KokkosGraph::Impl::graph_verify_distance2_color(&kh, crsGraph.numRows(), crsGraph.numCols(), crsGraph.row_map, crsGraph.entries, crsGraph.row_map, crsGraph.entries, d2_coloring_validation_flags); + d2_coloring_is_valid = KokkosGraph::Impl::graph_verify_distance2_color(&kh, crsGraph.numRows(), num_cols, crsGraph.row_map, crsGraph.entries, crsGraph.row_map, crsGraph.entries, d2_coloring_validation_flags); // Print out messages based on coloring validation check. if(d2_coloring_is_valid) @@ -419,7 +420,7 @@ void run_experiment(crsGraph_t crsGraph, Parameters params) // ------------------------------------------ // Print out the colors histogram // ------------------------------------------ - KokkosGraph::Impl::graph_print_distance2_color_histogram(&kh, crsGraph.numRows(), crsGraph.numCols(), crsGraph.row_map, crsGraph.entries, crsGraph.row_map, crsGraph.entries, false); + KokkosGraph::Impl::graph_print_distance2_color_histogram(&kh, crsGraph.numRows(), num_cols, crsGraph.row_map, crsGraph.entries, crsGraph.row_map, crsGraph.entries, false); } // for i... @@ -438,7 +439,7 @@ void run_experiment(crsGraph_t crsGraph, Parameters params) non_const_1d_size_type_view_t degree_d2_dist = non_const_1d_size_type_view_t("degree d2", crsGraph.numRows()); size_t degree_d2_max=0; - KokkosGraph::Impl::graph_compute_distance2_degree(&kh, crsGraph.numRows(), crsGraph.numCols(), + KokkosGraph::Impl::graph_compute_distance2_degree(&kh, crsGraph.numRows(), num_cols, crsGraph.row_map, crsGraph.entries, crsGraph.row_map, crsGraph.entries, degree_d2_dist, degree_d2_max); @@ -590,46 +591,26 @@ void run_experiment(crsGraph_t crsGraph, Parameters params) << "," << label_algorithm << "," << Kokkos::DefaultExecutionSpace::concurrency() << ","; - KokkosGraph::Impl::graph_print_distance2_color_histogram(&kh, crsGraph.numRows(), crsGraph.numCols(), crsGraph.row_map, crsGraph.entries, crsGraph.row_map, crsGraph.entries, true); + KokkosGraph::Impl::graph_print_distance2_color_histogram(&kh, crsGraph.numRows(), num_cols, crsGraph.row_map, crsGraph.entries, crsGraph.row_map, crsGraph.entries, true); std::cout << std::endl; // Kokkos::print_configuration(std::cout); } -template +template void experiment_driver(Parameters params) { - using myExecSpace = exec_space; - using myFastDevice = Kokkos::Device; - using fast_crstmat_t = typename KokkosSparse::CrsMatrix; - using fast_graph_t = typename fast_crstmat_t::StaticCrsGraphType; - - char *a_mat_file = params.a_mtx_bin_file; - - fast_graph_t a_fast_crsgraph, /*b_fast_crsgraph,*/ c_fast_crsgraph; - - if(params.a_mem_space == 1) - { - fast_crstmat_t a_fast_crsmat; - a_fast_crsmat = KokkosKernels::Impl::read_kokkos_crst_matrix(a_mat_file); - a_fast_crsgraph = a_fast_crsmat.graph; - a_fast_crsgraph.num_cols = a_fast_crsmat.numCols(); - } - - if(params.a_mem_space == 1 && params.b_mem_space==1 && params.c_mem_space==1 && params.work_mem_space==1) - { - KokkosKernels::Experiment::run_experiment - (a_fast_crsgraph, /*b_fast_crsgraph,*/ params); - } - else - { - std::cout << ">>> unhandled memspace configuration flags:" << std::endl - << ">>> a_mem_space = " << params.a_mem_space << std::endl - << ">>> b_mem_space = " << params.a_mem_space << std::endl - << ">>> c_mem_space = " << params.a_mem_space << std::endl - << ">>> work_mem_space = " << params.work_mem_space << std::endl; - } + using myExecSpace = exec_space; + using device_t = Kokkos::Device; + using crsMat_t = typename KokkosSparse::CrsMatrix; + using graph_t = typename crsMat_t::StaticCrsGraphType; + + crsMat_t A = KokkosKernels::Impl::read_kokkos_crst_matrix(params.a_mtx_bin_file); + graph_t Agraph = A.graph; + int num_cols = A.numCols(); + + KokkosKernels::Experiment::run_experiment(Agraph, num_cols, params); } diff --git a/perf_test/sparse/KokkosSparse_run_spgemm.hpp b/perf_test/sparse/KokkosSparse_run_spgemm.hpp index 35dc757e68..4ba0af0478 100644 --- a/perf_test/sparse/KokkosSparse_run_spgemm.hpp +++ b/perf_test/sparse/KokkosSparse_run_spgemm.hpp @@ -177,19 +177,21 @@ crsMat_t3 run_experiment( int check_output = params.check_output; int mkl_keep_output = params.mkl_keep_output; //spgemm_step++; - typedef typename crsMat_t3::values_type::non_const_type scalar_view_t; - typedef typename crsMat_t3::StaticCrsGraphType::row_map_type::non_const_type lno_view_t; - typedef typename crsMat_t3::StaticCrsGraphType::entries_type::non_const_type lno_nnz_view_t; - - lno_view_t row_mapC; - lno_nnz_view_t entriesC; - scalar_view_t valuesC; - - + typedef typename crsMat_t3::values_type::non_const_type scalar_view_t; + typedef typename crsMat_t3::row_map_type::non_const_type lno_view_t; + typedef typename crsMat_t3::index_type::non_const_type lno_nnz_view_t; typedef typename lno_nnz_view_t::value_type lno_t; typedef typename lno_view_t::value_type size_type; typedef typename scalar_view_t::value_type scalar_t; + typedef CrsMatrix crsMatHost_t; + typedef typename crsMatHost_t::values_type::non_const_type host_scalar_view_t; + typedef typename crsMatHost_t::row_map_type::non_const_type host_lno_view_t; + typedef typename crsMatHost_t::index_type::non_const_type host_lno_nnz_view_t; + + lno_view_t row_mapC; + lno_nnz_view_t entriesC; + scalar_view_t valuesC; typedef KokkosKernels::Experimental::KokkosKernelsHandle get_c_nnz(); - if (c_nnz_size){ - entriesC_ref = typename lno_nnz_view_t::HostMirror (Kokkos::ViewAllocateWithoutInitializing("entriesC"), c_nnz_size); - valuesC_ref = typename scalar_view_t::HostMirror (Kokkos::ViewAllocateWithoutInitializing("valuesC"), c_nnz_size); - } + entriesC_ref = host_lno_nnz_view_t(Kokkos::ViewAllocateWithoutInitializing("entriesC"), c_nnz_size); + valuesC_ref = host_scalar_view_t (Kokkos::ViewAllocateWithoutInitializing("valuesC"), c_nnz_size); spgemm_numeric( &sequential_kh, @@ -285,132 +284,116 @@ crsMat_t3 run_experiment( ); ExecSpace().fence(); - typename crsMat_t3::HostMirror::StaticCrsGraphType static_graph (entriesC_ref, row_mapC_ref); - typename crsMat_t3::HostMirror Ccrsmat("CrsMatrixC", k, valuesC_ref, static_graph); - Ccrsmat_ref = Ccrsmat; + Ccrsmat_ref = crsMatHost_t("CorrectC", m, k, valuesC_ref.extent(0), valuesC_ref, row_mapC_ref, entriesC_ref); } - for (int i = 0; i < repeat; ++i){ - kh.create_spgemm_handle(KokkosSparse::SPGEMMAlgorithm(algorithm)); + for (int i = 0; i < repeat; ++i) { + kh.create_spgemm_handle(KokkosSparse::SPGEMMAlgorithm(algorithm)); - kh.get_spgemm_handle()->mkl_keep_output = mkl_keep_output; - kh.get_spgemm_handle()->set_mkl_sort_option(params.mkl_sort_option); + kh.get_spgemm_handle()->mkl_keep_output = mkl_keep_output; + kh.get_spgemm_handle()->set_mkl_sort_option(params.mkl_sort_option); - //if mkl2 input needs to be converted to 1base. - kh.get_spgemm_handle()->mkl_convert_to_1base = true; + //if mkl2 input needs to be converted to 1base. + kh.get_spgemm_handle()->mkl_convert_to_1base = true; - //250000 default. if cache-mode is used on KNL can increase to 1M. - kh.get_spgemm_handle()->MaxColDenseAcc = params.MaxColDenseAcc; + //250000 default. if cache-mode is used on KNL can increase to 1M. + kh.get_spgemm_handle()->MaxColDenseAcc = params.MaxColDenseAcc; - if (i == 0){ - kh.get_spgemm_handle()->set_read_write_cost_calc (calculate_read_write_cost); - } - //do the compression whether in 2 step, or 1 step. - kh.get_spgemm_handle()->set_compression_steps(!params.compression2step); - //whether to scale the hash more. default is 1, so no scale. - kh.get_spgemm_handle()->set_min_hash_size_scale(params.minhashscale); - //max occupancy in 1-level LP hashes. LL hashes can be 100% - kh.get_spgemm_handle()->set_first_level_hash_cut_off(params.first_level_hash_cut_off); - //min reduction on FLOPs to run compression - kh.get_spgemm_handle()->set_compression_cut_off(params.compression_cut_off); - - - - row_mapC = lno_view_t - ("non_const_lnow_row", - m + 1); - entriesC = lno_nnz_view_t ("entriesC (empty)", 0); - valuesC = scalar_view_t ("valuesC (empty)", 0); - - - Kokkos::Impl::Timer timer1; - spgemm_symbolic ( - &kh, - m, - n, - k, - crsMat.graph.row_map, - crsMat.graph.entries, - TRANPOSEFIRST, - crsMat2.graph.row_map, - crsMat2.graph.entries, - TRANPOSESECOND, - row_mapC - ); - - ExecSpace().fence(); - double symbolic_time = timer1.seconds(); - - Kokkos::Impl::Timer timer3; - size_type c_nnz_size = kh.get_spgemm_handle()->get_c_nnz(); - if (verbose) std::cout << "C SIZE:" << c_nnz_size << std::endl; - if (c_nnz_size){ - entriesC = lno_nnz_view_t (Kokkos::ViewAllocateWithoutInitializing("entriesC"), c_nnz_size); - valuesC = scalar_view_t (Kokkos::ViewAllocateWithoutInitializing("valuesC"), c_nnz_size); - } + if (i == 0){ + kh.get_spgemm_handle()->set_read_write_cost_calc (calculate_read_write_cost); + } + //do the compression whether in 2 step, or 1 step. + kh.get_spgemm_handle()->set_compression_steps(!params.compression2step); + //whether to scale the hash more. default is 1, so no scale. + kh.get_spgemm_handle()->set_min_hash_size_scale(params.minhashscale); + //max occupancy in 1-level LP hashes. LL hashes can be 100% + kh.get_spgemm_handle()->set_first_level_hash_cut_off(params.first_level_hash_cut_off); + //min reduction on FLOPs to run compression + kh.get_spgemm_handle()->set_compression_cut_off(params.compression_cut_off); + + row_mapC = lno_view_t + ("non_const_lnow_row", + m + 1); + entriesC = lno_nnz_view_t ("entriesC (empty)", 0); + valuesC = scalar_view_t ("valuesC (empty)", 0); + + Kokkos::Impl::Timer timer1; + spgemm_symbolic ( + &kh, + m, + n, + k, + crsMat.graph.row_map, + crsMat.graph.entries, + TRANPOSEFIRST, + crsMat2.graph.row_map, + crsMat2.graph.entries, + TRANPOSESECOND, + row_mapC + ); - spgemm_numeric( - &kh, - m, - n, - k, - crsMat.graph.row_map, - crsMat.graph.entries, - crsMat.values, - TRANPOSEFIRST, - - crsMat2.graph.row_map, - crsMat2.graph.entries, - crsMat2.values, - TRANPOSESECOND, - row_mapC, - entriesC, - valuesC - ); - ExecSpace().fence(); - double numeric_time = timer3.seconds(); - - std::cout - << "mm_time:" << symbolic_time + numeric_time - << " symbolic_time:" << symbolic_time - << " numeric_time:" << numeric_time << std::endl; - } - if (verbose) { - std::cout << "row_mapC:" << row_mapC.extent(0) << std::endl; - std::cout << "entriesC:" << entriesC.extent(0) << std::endl; - std::cout << "valuesC:" << valuesC.extent(0) << std::endl; - KokkosKernels::Impl::print_1Dview(valuesC); - KokkosKernels::Impl::print_1Dview(entriesC); - KokkosKernels::Impl::print_1Dview(row_mapC); - } + ExecSpace().fence(); + double symbolic_time = timer1.seconds(); + Kokkos::Impl::Timer timer3; + size_type c_nnz_size = kh.get_spgemm_handle()->get_c_nnz(); + if (verbose) std::cout << "C SIZE:" << c_nnz_size << std::endl; + if (c_nnz_size){ + entriesC = lno_nnz_view_t (Kokkos::ViewAllocateWithoutInitializing("entriesC"), c_nnz_size); + valuesC = scalar_view_t (Kokkos::ViewAllocateWithoutInitializing("valuesC"), c_nnz_size); + } - if (check_output){ + spgemm_numeric( + &kh, + m, + n, + k, + crsMat.graph.row_map, + crsMat.graph.entries, + crsMat.values, + TRANPOSEFIRST, - typename lno_view_t::HostMirror row_mapC_host = Kokkos::create_mirror_view (row_mapC); - typename lno_nnz_view_t::HostMirror entriesC_host = Kokkos::create_mirror_view (entriesC); - typename scalar_view_t::HostMirror valuesC_host = Kokkos::create_mirror_view (valuesC); + crsMat2.graph.row_map, + crsMat2.graph.entries, + crsMat2.values, + TRANPOSESECOND, + row_mapC, + entriesC, + valuesC + ); + ExecSpace().fence(); + double numeric_time = timer3.seconds(); - Kokkos::deep_copy (row_mapC_host, row_mapC); + std::cout + << "mm_time:" << symbolic_time + numeric_time + << " symbolic_time:" << symbolic_time + << " numeric_time:" << numeric_time << std::endl; + } + if (verbose) { + std::cout << "row_mapC:" << row_mapC.extent(0) << std::endl; + std::cout << "entriesC:" << entriesC.extent(0) << std::endl; + std::cout << "valuesC:" << valuesC.extent(0) << std::endl; + KokkosKernels::Impl::print_1Dview(valuesC); + KokkosKernels::Impl::print_1Dview(entriesC); + KokkosKernels::Impl::print_1Dview(row_mapC); + } - Kokkos::deep_copy (entriesC_host, entriesC); - Kokkos::deep_copy (valuesC_host, valuesC); - typename crsMat_t3::HostMirror::StaticCrsGraphType static_graph (entriesC_host, row_mapC_host); - typename crsMat_t3::HostMirror Ccrsmathost("CrsMatrixC", k, valuesC_host, static_graph); + if (check_output){ + auto row_mapC_host = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), row_mapC); + auto entriesC_host = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), entriesC); + auto valuesC_host = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), valuesC); - bool is_identical = is_same_matrix(Ccrsmat_ref, Ccrsmathost); + crsMatHost_t Ccrsmathost( + "CHost", m, k, valuesC_host.extent(0), valuesC_host, row_mapC_host, entriesC_host); + bool is_identical = is_same_matrix(Ccrsmat_ref, Ccrsmathost); if (!is_identical){ std::cerr << "Result differs. If values are differing, might be floating point order error." << std::endl; exit(1); } } - - typename crsMat_t3::StaticCrsGraphType static_graph (entriesC, row_mapC); - crsMat_t3 Ccrsmat("CrsMatrixC", k, valuesC, static_graph); - return Ccrsmat; - + return crsMat_t3("CrsMatrixC", m, k, valuesC.extent(0), valuesC, row_mapC, entriesC); } From 88b284d471b69925045ed78e7c6257c66d6fcaaf Mon Sep 17 00:00:00 2001 From: Brian Kelley Date: Wed, 19 Feb 2020 16:25:11 -0700 Subject: [PATCH 09/14] Fixed typo "K" -> "k" in spiluk perf test --- perf_test/sparse/KokkosSparse_spiluk.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/perf_test/sparse/KokkosSparse_spiluk.cpp b/perf_test/sparse/KokkosSparse_spiluk.cpp index 8a3a8b1fe2..eae9e76244 100644 --- a/perf_test/sparse/KokkosSparse_spiluk.cpp +++ b/perf_test/sparse/KokkosSparse_spiluk.cpp @@ -106,7 +106,7 @@ int test_spiluk_perf(std::vector tests, std::string afilename, int k, int t graph_t graph = A.graph; // in_graph const size_type nrows = graph.numRows(); const int nnz = A.nnz(); - const typename KernelHandle::const_nnz_lno_t fill_lev = lno_t(K) ; + const typename KernelHandle::const_nnz_lno_t fill_lev = lno_t(k); #ifdef KOKKOSKERNELS_ENABLE_TPL_CUSPARSE //cuSPARSE requires lno_t = size_type = int. For both, int is always used (if enabled) From 6d602967d9113a884f8884aba2c003b46f6bbffc Mon Sep 17 00:00:00 2001 From: Brian Kelley Date: Wed, 19 Feb 2020 16:36:36 -0700 Subject: [PATCH 10/14] Removed git conflict stuff from sptrsv perftest --- perf_test/sparse/KokkosSparse_sptrsv.cpp | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/perf_test/sparse/KokkosSparse_sptrsv.cpp b/perf_test/sparse/KokkosSparse_sptrsv.cpp index 3e52b67385..66edf2cbc6 100644 --- a/perf_test/sparse/KokkosSparse_sptrsv.cpp +++ b/perf_test/sparse/KokkosSparse_sptrsv.cpp @@ -995,11 +995,7 @@ int main(int argc, char **argv) Kokkos::initialize(argc,argv); { -<<<<<<< 6d0becda14a6248e88026e8eedaa4a18e7396ee4 - int total_errors = test_sptrsv_perf(tests, lfilename, ufilename, team_size, vector_length, idx_offset, loop, chain_threshold, dense_row_percent); -======= - int total_errors = test_sptrsv_perf(tests,lfilename,ufilename,team_size,vector_length,idx_offset,loop); ->>>>>>> WIP: sparse perf-test cleanup and ETI fixes + int total_errors = test_sptrsv_perf(tests, lfilename, ufilename, team_size, vector_length, idx_offset, loop, chain_threshold, dense_row_percent); if(total_errors == 0) printf("Kokkos::SPTRSV Test: Passed\n"); From 16208de3c225d4a73059a51340a43a8c74950c7c Mon Sep 17 00:00:00 2001 From: Brian Kelley Date: Wed, 19 Feb 2020 16:43:00 -0700 Subject: [PATCH 11/14] Made the spgemm perf test run with Serial Last resort if nothing else is enabled, or nothing else was selected at runtime with argc/argv --- perf_test/sparse/KokkosSparse_spgemm.cpp | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/perf_test/sparse/KokkosSparse_spgemm.cpp b/perf_test/sparse/KokkosSparse_spgemm.cpp index fc5f6f499b..34af2fedff 100644 --- a/perf_test/sparse/KokkosSparse_spgemm.cpp +++ b/perf_test/sparse/KokkosSparse_spgemm.cpp @@ -293,6 +293,7 @@ int main (int argc, char ** argv){ Kokkos::initialize( Kokkos::InitArguments( num_threads, -1, device_id ) ); Kokkos::print_configuration(std::cout); + bool ran = false; #if defined( KOKKOS_ENABLE_OPENMP ) @@ -308,6 +309,7 @@ int main (int argc, char ** argv){ params ); #endif + ran = true; } #endif @@ -325,6 +327,16 @@ int main (int argc, char ** argv){ ); #endif + ran = true; + } +#endif + +#if defined( KOKKOS_ENABLE_SERIAL ) + //If only serial is enabled (or no other device was specified), run with serial + if (!ran) + { + KokkosKernels::Experiment::run_multi_mem_spgemm + (params); } #endif From 9359cdea11dcc8e9596dbd78c9f803cfc87dc8b9 Mon Sep 17 00:00:00 2001 From: Brian Kelley Date: Thu, 20 Feb 2020 18:58:53 -0600 Subject: [PATCH 12/14] Fixed SPGEMM ref output type, perftest warnings --- perf_test/graph/KokkosGraph_color_d2.cpp | 1 - perf_test/sparse/KokkosSparse_run_spgemm.hpp | 46 ++++++++----------- perf_test/sparse/KokkosSparse_spmv.cpp | 4 +- perf_test/sparse/KokkosSparse_spmv_struct.cpp | 2 - 4 files changed, 20 insertions(+), 33 deletions(-) diff --git a/perf_test/graph/KokkosGraph_color_d2.cpp b/perf_test/graph/KokkosGraph_color_d2.cpp index ddfc38aea2..447346f488 100644 --- a/perf_test/graph/KokkosGraph_color_d2.cpp +++ b/perf_test/graph/KokkosGraph_color_d2.cpp @@ -601,7 +601,6 @@ void run_experiment(crsGraph_t crsGraph, int num_cols, Parameters params) template void experiment_driver(Parameters params) { - using myExecSpace = exec_space; using device_t = Kokkos::Device; using crsMat_t = typename KokkosSparse::CrsMatrix; using graph_t = typename crsMat_t::StaticCrsGraphType; diff --git a/perf_test/sparse/KokkosSparse_run_spgemm.hpp b/perf_test/sparse/KokkosSparse_run_spgemm.hpp index 4ba0af0478..dabdb3a422 100644 --- a/perf_test/sparse/KokkosSparse_run_spgemm.hpp +++ b/perf_test/sparse/KokkosSparse_run_spgemm.hpp @@ -157,12 +157,11 @@ bool is_same_matrix(crsMat_t output_mat1, crsMat_t output_mat2){ template -crsMat_t3 run_experiment( - crsMat_t crsMat, crsMat_t2 crsMat2, Parameters params){ - //int algorithm, int repeat, int chunk_size ,int multi_color_scale, int shmemsize, int team_size, int use_dynamic_scheduling, int verbose){ - +crsMat_t3 run_experiment(crsMat_t crsMat, crsMat_t2 crsMat2, Parameters params) +{ using namespace KokkosSparse; using namespace KokkosSparse::Experimental; + using device_t = Kokkos::Device; int algorithm = params.algorithm; int repeat = params.repeat; int chunk_size = params.chunk_size; @@ -184,11 +183,6 @@ crsMat_t3 run_experiment( typedef typename lno_view_t::value_type size_type; typedef typename scalar_view_t::value_type scalar_t; - typedef CrsMatrix crsMatHost_t; - typedef typename crsMatHost_t::values_type::non_const_type host_scalar_view_t; - typedef typename crsMatHost_t::row_map_type::non_const_type host_lno_view_t; - typedef typename crsMatHost_t::index_type::non_const_type host_lno_nnz_view_t; - lno_view_t row_mapC; lno_nnz_view_t entriesC; scalar_view_t valuesC; @@ -223,15 +217,19 @@ crsMat_t3 run_experiment( exit(1); } - host_lno_view_t row_mapC_ref; - host_lno_nnz_view_t entriesC_ref; - host_scalar_view_t valuesC_ref; - crsMatHost_t Ccrsmat_ref; + //The reference product (for verifying correctness) + //Don't allocate them if they won't be used, but they must be declared here. + lno_view_t row_mapC_ref; + lno_nnz_view_t entriesC_ref; + scalar_view_t valuesC_ref; + //Reference output has same type as actual output + crsMat_t3 Ccrsmat_ref; + if (check_output) { if (verbose) std::cout << "Running a reference algorithm" << std::endl; - row_mapC_ref = host_lno_view_t("non_const_lnow_row", m + 1); + row_mapC_ref = lno_view_t("non_const_lnow_row", m + 1); KernelHandle sequential_kh; sequential_kh.set_team_work_size(chunk_size); sequential_kh.set_shmem_size(shmemsize); @@ -261,8 +259,8 @@ crsMat_t3 run_experiment( size_type c_nnz_size = sequential_kh.get_spgemm_handle()->get_c_nnz(); - entriesC_ref = host_lno_nnz_view_t(Kokkos::ViewAllocateWithoutInitializing("entriesC"), c_nnz_size); - valuesC_ref = host_scalar_view_t (Kokkos::ViewAllocateWithoutInitializing("valuesC"), c_nnz_size); + entriesC_ref = lno_nnz_view_t(Kokkos::ViewAllocateWithoutInitializing("entriesC"), c_nnz_size); + valuesC_ref = scalar_view_t (Kokkos::ViewAllocateWithoutInitializing("valuesC"), c_nnz_size); spgemm_numeric( &sequential_kh, @@ -284,7 +282,7 @@ crsMat_t3 run_experiment( ); ExecSpace().fence(); - Ccrsmat_ref = crsMatHost_t("CorrectC", m, k, valuesC_ref.extent(0), valuesC_ref, row_mapC_ref, entriesC_ref); + Ccrsmat_ref = crsMat_t3("CorrectC", m, k, valuesC_ref.extent(0), valuesC_ref, row_mapC_ref, entriesC_ref); } for (int i = 0; i < repeat; ++i) { @@ -377,23 +375,15 @@ crsMat_t3 run_experiment( KokkosKernels::Impl::print_1Dview(entriesC); KokkosKernels::Impl::print_1Dview(row_mapC); } - - + crsMat_t3 Ccrsmat_result("CrsMatrixC", m, k, valuesC.extent(0), valuesC, row_mapC, entriesC); if (check_output){ - auto row_mapC_host = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), row_mapC); - auto entriesC_host = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), entriesC); - auto valuesC_host = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), valuesC); - - crsMatHost_t Ccrsmathost( - "CHost", m, k, valuesC_host.extent(0), valuesC_host, row_mapC_host, entriesC_host); - bool is_identical = is_same_matrix(Ccrsmat_ref, Ccrsmathost); + bool is_identical = is_same_matrix(Ccrsmat_result, Ccrsmat_ref); if (!is_identical){ std::cerr << "Result differs. If values are differing, might be floating point order error." << std::endl; exit(1); } } - - return crsMat_t3("CrsMatrixC", m, k, valuesC.extent(0), valuesC, row_mapC, entriesC); + return Ccrsmat_result; } diff --git a/perf_test/sparse/KokkosSparse_spmv.cpp b/perf_test/sparse/KokkosSparse_spmv.cpp index dcbcabd4be..fd415c018a 100644 --- a/perf_test/sparse/KokkosSparse_spmv.cpp +++ b/perf_test/sparse/KokkosSparse_spmv.cpp @@ -131,9 +131,9 @@ void matvec(AType& A, XType x, YType y, Ordinal rows_per_thread, int team_size, break; case KK_KERNELS_INSP: if(A.graph.row_block_offsets.data()==NULL) { - printf("PTR: %p\n",A.graph.row_block_offsets.data()); + printf("PTR: %p\n",static_cast(A.graph.row_block_offsets.data())); A.graph.create_block_partitioning(AType::execution_space::concurrency()); - printf("PTR2: %p\n",A.graph.row_block_offsets.data()); + printf("PTR2: %p\n",static_cast(A.graph.row_block_offsets.data())); } KokkosSparse::spmv (KokkosSparse::NoTranspose,1.0,A,x,0.0,y); break; diff --git a/perf_test/sparse/KokkosSparse_spmv_struct.cpp b/perf_test/sparse/KokkosSparse_spmv_struct.cpp index 29aced0be2..41d2391486 100644 --- a/perf_test/sparse/KokkosSparse_spmv_struct.cpp +++ b/perf_test/sparse/KokkosSparse_spmv_struct.cpp @@ -138,8 +138,6 @@ int main(int argc, char **argv) Kokkos::initialize(argc,argv); { - typedef default_lno_t lno_t; - typedef default_size_type size_type; typedef KokkosSparse::CrsMatrix matrix_type; typedef typename Kokkos::View mv_type; // typedef typename Kokkos::View mv_random_read_type; From ae147467665e7f8c8878b24bcea179901ff3c6f4 Mon Sep 17 00:00:00 2001 From: Brian Kelley Date: Thu, 20 Feb 2020 19:05:33 -0600 Subject: [PATCH 13/14] Fixed more warnings --- perf_test/sparse/KokkosSparse_spgemm.cpp | 6 +----- perf_test/sparse/KokkosSparse_spmv.cpp | 4 ++-- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/perf_test/sparse/KokkosSparse_spgemm.cpp b/perf_test/sparse/KokkosSparse_spgemm.cpp index 34af2fedff..e1faecbe71 100644 --- a/perf_test/sparse/KokkosSparse_spgemm.cpp +++ b/perf_test/sparse/KokkosSparse_spgemm.cpp @@ -293,8 +293,6 @@ int main (int argc, char ** argv){ Kokkos::initialize( Kokkos::InitArguments( num_threads, -1, device_id ) ); Kokkos::print_configuration(std::cout); - bool ran = false; - #if defined( KOKKOS_ENABLE_OPENMP ) if (params.use_openmp) { @@ -309,7 +307,6 @@ int main (int argc, char ** argv){ params ); #endif - ran = true; } #endif @@ -327,13 +324,12 @@ int main (int argc, char ** argv){ ); #endif - ran = true; } #endif #if defined( KOKKOS_ENABLE_SERIAL ) //If only serial is enabled (or no other device was specified), run with serial - if (!ran) + if (!params.use_openmp && !params.use_cuda) { KokkosKernels::Experiment::run_multi_mem_spgemm (params); diff --git a/perf_test/sparse/KokkosSparse_spmv.cpp b/perf_test/sparse/KokkosSparse_spmv.cpp index fd415c018a..3737c2f75c 100644 --- a/perf_test/sparse/KokkosSparse_spmv.cpp +++ b/perf_test/sparse/KokkosSparse_spmv.cpp @@ -131,9 +131,9 @@ void matvec(AType& A, XType x, YType y, Ordinal rows_per_thread, int team_size, break; case KK_KERNELS_INSP: if(A.graph.row_block_offsets.data()==NULL) { - printf("PTR: %p\n",static_cast(A.graph.row_block_offsets.data())); + printf("PTR: %p\n",static_cast(A.graph.row_block_offsets.data())); A.graph.create_block_partitioning(AType::execution_space::concurrency()); - printf("PTR2: %p\n",static_cast(A.graph.row_block_offsets.data())); + printf("PTR2: %p\n",static_cast(A.graph.row_block_offsets.data())); } KokkosSparse::spmv (KokkosSparse::NoTranspose,1.0,A,x,0.0,y); break; From 6a32a49a1b1021a20e76953a661c3730d47f20d5 Mon Sep 17 00:00:00 2001 From: Brian Kelley Date: Thu, 20 Feb 2020 19:17:18 -0600 Subject: [PATCH 14/14] Made SPGEMM driver not crash in arg parsing If "--flag" expects another argument after, check that there is actually another arg before trying to read it. --- perf_test/sparse/KokkosSparse_spgemm.cpp | 76 ++++++++++++++---------- 1 file changed, 43 insertions(+), 33 deletions(-) diff --git a/perf_test/sparse/KokkosSparse_spgemm.cpp b/perf_test/sparse/KokkosSparse_spgemm.cpp index e1faecbe71..ce595a3af4 100644 --- a/perf_test/sparse/KokkosSparse_spgemm.cpp +++ b/perf_test/sparse/KokkosSparse_spgemm.cpp @@ -66,42 +66,52 @@ void print_options(){ std::cerr << "\tVerbose Output: '--verbose'" << std::endl; } +static char* getNextArg(int& i, int argc, char** argv) +{ + i++; + if(i >= argc) + { + std::cerr << "Error: expected additional command-line argument!\n"; + exit(1); + } + return argv[i]; +} int parse_inputs (KokkosKernels::Experiment::Parameters ¶ms, int argc, char **argv){ for ( int i = 1 ; i < argc ; ++i ) { if ( 0 == strcasecmp( argv[i] , "--threads" ) ) { - params.use_threads = atoi( argv[++i] ); + params.use_threads = atoi(getNextArg(i, argc, argv)); } else if ( 0 == strcasecmp( argv[i] , "--openmp" ) ) { - params.use_openmp = atoi( argv[++i] ); + params.use_openmp = atoi(getNextArg(i, argc, argv)); } else if ( 0 == strcasecmp( argv[i] , "--cuda" ) ) { - params.use_cuda = atoi( argv[++i] ) + 1; + params.use_cuda = atoi(getNextArg(i, argc, argv)) + 1; } else if ( 0 == strcasecmp( argv[i] , "--repeat" ) ) { - params.repeat = atoi( argv[++i] ); + params.repeat = atoi(getNextArg(i, argc, argv)); } else if ( 0 == strcasecmp( argv[i] , "--hashscale" ) ) { - params.minhashscale = atoi( argv[++i] ); + params.minhashscale = atoi(getNextArg(i, argc, argv)); } else if ( 0 == strcasecmp( argv[i] , "--chunksize" ) ) { - params.chunk_size = atoi( argv[++i] ) ; + params.chunk_size = atoi(getNextArg(i, argc, argv)); } else if ( 0 == strcasecmp( argv[i] , "--teamsize" ) ) { - params.team_size = atoi( argv[++i] ) ; + params.team_size = atoi(getNextArg(i, argc, argv)); } else if ( 0 == strcasecmp( argv[i] , "--vectorsize" ) ) { - params.vector_size = atoi( argv[++i] ) ; + params.vector_size = atoi(getNextArg(i, argc, argv)); } else if ( 0 == strcasecmp( argv[i] , "--compression2step" ) ) { params.compression2step = true ; } else if ( 0 == strcasecmp( argv[i] , "--shmem" ) ) { - params.shmemsize = atoi( argv[++i] ) ; + params.shmemsize = atoi(getNextArg(i, argc, argv)); } else if ( 0 == strcasecmp( argv[i] , "--memspaces" ) ) { - int memspaces = atoi( argv[++i] ) ; + int memspaces = atoi(getNextArg(i, argc, argv)); int memspaceinfo = memspaces; std::cout << "memspaceinfo:" << memspaceinfo << std::endl; if (memspaceinfo & 1){ @@ -145,19 +155,19 @@ int parse_inputs (KokkosKernels::Experiment::Parameters ¶ms, int argc, char params.calculate_read_write_cost = 1; } else if ( 0 == strcasecmp( argv[i] , "--CIF" ) ) { - params.coloring_input_file = argv[++i]; + params.coloring_input_file = getNextArg(i, argc, argv); } else if ( 0 == strcasecmp( argv[i] , "--COF" ) ) { - params.coloring_output_file = argv[++i]; + params.coloring_output_file = getNextArg(i, argc, argv); } else if ( 0 == strcasecmp( argv[i] , "--CCO" ) ) { //if 0.85 set, if compression does not reduce flops by at least 15% symbolic will run on original matrix. //otherwise, it will compress the graph and run symbolic on compressed one. - params.compression_cut_off = atof( argv[++i] ) ; + params.compression_cut_off = atof(getNextArg(i, argc, argv)); } else if ( 0 == strcasecmp( argv[i] , "--FLHCO" ) ) { //if linear probing is used as hash, what is the max occupancy percantage we allow in the hash. - params.first_level_hash_cut_off = atof( argv[++i] ) ; + params.first_level_hash_cut_off = atof(getNextArg(i, argc, argv)); } else if ( 0 == strcasecmp( argv[i] , "--flop" ) ) { @@ -169,11 +179,11 @@ int parse_inputs (KokkosKernels::Experiment::Parameters ¶ms, int argc, char //when mkl2 is run, the sort option to use. //7:not to sort the output //8:to sort the output - params.mkl_sort_option = atoi( argv[++i] ) ; + params.mkl_sort_option = atoi(getNextArg(i, argc, argv)); } else if ( 0 == strcasecmp( argv[i] , "--mklkeepout" ) ) { //mkl output is not kept. - params.mkl_keep_output = atoi( argv[++i] ) ; + params.mkl_keep_output = atoi(getNextArg(i, argc, argv)); } else if ( 0 == strcasecmp( argv[i] , "--checkoutput" ) ) { //check correctness @@ -181,18 +191,18 @@ int parse_inputs (KokkosKernels::Experiment::Parameters ¶ms, int argc, char } else if ( 0 == strcasecmp( argv[i] , "--amtx" ) ) { //A at C=AxB - params.a_mtx_bin_file = argv[++i]; + params.a_mtx_bin_file = getNextArg(i, argc, argv); } else if ( 0 == strcasecmp( argv[i] , "--bmtx" ) ) { //B at C=AxB. //if not provided, C = AxA will be performed. - params.b_mtx_bin_file = argv[++i]; + params.b_mtx_bin_file = getNextArg(i, argc, argv); } else if ( 0 == strcasecmp( argv[i] , "--cmtx" ) ) { //if provided, C will be written to given file. //has to have ".bin", or ".crs" extension. - params.c_mtx_bin_file = argv[++i]; + params.c_mtx_bin_file = getNextArg(i, argc, argv); } else if ( 0 == strcasecmp( argv[i] , "--dynamic" ) ) { //dynamic scheduling will be used for loops. @@ -207,7 +217,7 @@ int parse_inputs (KokkosKernels::Experiment::Parameters ¶ms, int argc, char //this parameter overwrites this. //with cache mode, or CPUs with smaller thread count, where memory bandwidth is not an issue, //this cut-off can be increased to be more than 250,000 - params.MaxColDenseAcc= atoi( argv[++i] ) ; + params.MaxColDenseAcc = atoi(getNextArg(i, argc, argv)); } else if ( 0 == strcasecmp( argv[i] , "--verbose" ) ) { //print the timing and information about the inner steps. @@ -216,43 +226,43 @@ int parse_inputs (KokkosKernels::Experiment::Parameters ¶ms, int argc, char params.verbose = 1; } else if ( 0 == strcasecmp( argv[i] , "--algorithm" ) ) { - ++i; + char* algoStr = getNextArg(i, argc, argv); - if ( 0 == strcasecmp( argv[i] , "DEFAULT" ) ) { + if ( 0 == strcasecmp( algoStr, "DEFAULT" ) ) { params.algorithm = KokkosSparse::SPGEMM_KK; } - else if ( 0 == strcasecmp( argv[i] , "KKDEFAULT" ) ) { + else if ( 0 == strcasecmp( algoStr, "KKDEFAULT" ) ) { params.algorithm = KokkosSparse::SPGEMM_KK; } - else if ( 0 == strcasecmp( argv[i] , "KKSPGEMM" ) ) { + else if ( 0 == strcasecmp( algoStr, "KKSPGEMM" ) ) { params.algorithm = KokkosSparse::SPGEMM_KK; } - else if ( 0 == strcasecmp( argv[i] , "KKMEM" ) ) { + else if ( 0 == strcasecmp( algoStr, "KKMEM" ) ) { params.algorithm = KokkosSparse::SPGEMM_KK_MEMORY; } - else if ( 0 == strcasecmp( argv[i] , "KKDENSE" ) ) { + else if ( 0 == strcasecmp( algoStr, "KKDENSE" ) ) { params.algorithm = KokkosSparse::SPGEMM_KK_DENSE; } - else if ( 0 == strcasecmp( argv[i] , "KKLP" ) ) { + else if ( 0 == strcasecmp( algoStr, "KKLP" ) ) { params.algorithm = KokkosSparse::SPGEMM_KK_LP; } - else if ( 0 == strcasecmp( argv[i] , "MKL" ) ) { + else if ( 0 == strcasecmp( algoStr, "MKL" ) ) { params.algorithm = KokkosSparse::SPGEMM_MKL; } - else if ( 0 == strcasecmp( argv[i] , "CUSPARSE" ) ) { + else if ( 0 == strcasecmp( algoStr, "CUSPARSE" ) ) { params.algorithm = KokkosSparse::SPGEMM_CUSPARSE; } - else if ( 0 == strcasecmp( argv[i] , "CUSP" ) ) { + else if ( 0 == strcasecmp( algoStr, "CUSP" ) ) { params.algorithm = KokkosSparse::SPGEMM_CUSP; } - else if ( 0 == strcasecmp( argv[i] , "KKDEBUG" ) ) { + else if ( 0 == strcasecmp( algoStr, "KKDEBUG" ) ) { params.algorithm = KokkosSparse::SPGEMM_KK_LP; } - else if ( 0 == strcasecmp( argv[i] , "MKL2" ) ) { + else if ( 0 == strcasecmp( algoStr, "MKL2" ) ) { params.algorithm = KokkosSparse::SPGEMM_MKL2PHASE; } - else if ( 0 == strcasecmp( argv[i] , "VIENNA" ) ) { + else if ( 0 == strcasecmp( algoStr, "VIENNA" ) ) { params.algorithm = KokkosSparse::SPGEMM_VIENNA; }