Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Small fixes to spgemm, and plug gaps in testing #1159

Merged
merged 3 commits into from
Nov 1, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions src/common/KokkosKernels_IOUtils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,12 @@ void kk_sparseMatrix_generate(OrdinalType nrows, OrdinalType ncols,
int varianz = (1.0 * rand() / RAND_MAX - 0.5) * row_size_variance;
int numRowEntries = elements_per_row + varianz;
if (numRowEntries < 0) numRowEntries = 0;
// Clamping numRowEntries above accomplishes 2 things:
// - If ncols is 0, numRowEntries will also be 0
// - With numRowEntries at most 2/3 the number of columns, in the worst
// case
// 90% of insertions will succeed after 6 tries
if (numRowEntries > 0.66 * ncols) numRowEntries = 0.66 * ncols;
rowPtr[row + 1] = rowPtr[row] + numRowEntries;
}
nnz = rowPtr[nrows];
Expand Down
1 change: 0 additions & 1 deletion src/sparse/KokkosSparse_spgemm.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,6 @@ void spgemm_numeric(KernelHandle& kh, const AMatrix& A, const bool Amode,
&kh, A.numRows(), B.numRows(), B.numCols(), A.graph.row_map,
A.graph.entries, A.values, Amode, B.graph.row_map, B.graph.entries,
B.values, Bmode, C.graph.row_map, C.graph.entries, C.values);
kh.destroy_spgemm_handle();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is the handle destroyed when kh goes out of scope or was this just a typo?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, kh is passed in (by reference, so it doesn't go out of scope) from the user and it must have had create_spgemm_handle(...) called on it. Removing this destroy for 2 reasons:

  • for symmetry, if the user is responsible for creating the subhandle they should also be responsible for destroying it
  • to enable reuse: the simplified interface should be able to do one symbolic followd by multiple numerics

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The reuse issue affected a user on slack btw, that's how I found out about it

}

} // namespace KokkosSparse
Expand Down
4 changes: 4 additions & 0 deletions src/sparse/KokkosSparse_spgemm_handle.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -547,7 +547,11 @@ class SPGEMMHandle {

#if defined(KOKKOS_ENABLE_CUDA)
if (std::is_same<Kokkos::Cuda, ExecutionSpace>::value) {
#ifdef KOKKOSKERNELS_ENABLE_TPL_CUSPARSE
this->algorithm_type = SPGEMM_CUSPARSE;
#else
this->algorithm_type = SPGEMM_KK;
#endif
#ifdef VERBOSE
std::cout << "Cuda Execution Space, Default Algorithm: SPGEMM_CUSPARSE"
<< std::endl;
Expand Down
8 changes: 6 additions & 2 deletions src/sparse/impl/KokkosSparse_spgemm_impl_kkmem.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1364,8 +1364,12 @@ void KokkosSPGEMM<HandleType, a_row_view_t_, a_lno_nnz_view_t_,
if (KokkosKernels::Impl::kk_is_gpu_exec_space<
typename HandleType::HandleExecSpace>()) {
// then chose the best method and parameters.
size_type average_row_nnz = overall_nnz / this->a_row_cnt;
size_t average_row_flops = original_overall_flops / this->a_row_cnt;
size_type average_row_nnz = 0;
size_t average_row_flops = 0;
if (this->a_row_cnt > 0) {
average_row_nnz = overall_nnz / this->a_row_cnt;
average_row_flops = original_overall_flops / this->a_row_cnt;
}
// if we have very low flops per row, or our maximum number of nnz is
// prett small, then we do row-base algorithm.
if (SPGEMM_KK_LP != this->spgemm_algorithm &&
Expand Down
8 changes: 5 additions & 3 deletions src/sparse/impl/KokkosSparse_spgemm_impl_symbolic.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1804,9 +1804,11 @@ void KokkosSPGEMM<
#else
size_t original_overall_flops =
this->handle->get_spgemm_handle()->compressed_overall_flops;
size_t estimate_max_nnz =
(sqrt(maxNumRoughNonzeros) * sqrt(original_overall_flops / m)) /
estimate_compress;
size_t estimate_max_nnz = 0;
if (m > 0)
estimate_max_nnz =
(sqrt(maxNumRoughNonzeros) * sqrt(original_overall_flops / m)) /
estimate_compress;
if (KOKKOSKERNELS_VERBOSE) {
std::cout << "\t\t\testimate_max_nnz:" << estimate_max_nnz
<< " maxNumRoughNonzeros:" << maxNumRoughNonzeros
Expand Down
52 changes: 29 additions & 23 deletions unit_test/sparse/Test_Sparse_spgemm.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -246,9 +246,11 @@ bool is_same_matrix(crsMat_t output_mat_actual, crsMat_t output_mat_reference) {
}
} // namespace Test

// Generate matrices and test all supported spgemm algorithms.
// C := AB, where A is m*k, B is k*n, and C is m*n.
template <typename scalar_t, typename lno_t, typename size_type,
typename device>
void test_spgemm(lno_t numRows, size_type nnz, lno_t bandwidth,
void test_spgemm(lno_t m, lno_t k, lno_t n, size_type nnz, lno_t bandwidth,
lno_t row_size_variance, bool oldInterface = false) {
using namespace Test;
// device::execution_space::initialize();
Expand All @@ -260,22 +262,21 @@ void test_spgemm(lno_t numRows, size_type nnz, lno_t bandwidth,
// typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
// typedef typename crsMat_t::values_type::non_const_type scalar_view_t;

lno_t numCols = numRows;
// Generate random compressed sparse row matrix. Randomly generated (non-zero)
// values are stored in a 1-D (1 rank) array.
crsMat_t input_mat = KokkosKernels::Impl::kk_generate_sparse_matrix<crsMat_t>(
numRows, numCols, nnz, row_size_variance, bandwidth);
crsMat_t A = KokkosKernels::Impl::kk_generate_sparse_matrix<crsMat_t>(
m, k, nnz, row_size_variance, bandwidth);
crsMat_t B = KokkosKernels::Impl::kk_generate_sparse_matrix<crsMat_t>(
k, n, nnz, row_size_variance, bandwidth);

crsMat_t output_mat2;
if (oldInterface)
run_spgemm_old_interface<crsMat_t, device>(input_mat, input_mat,
SPGEMM_DEBUG, output_mat2);
run_spgemm_old_interface<crsMat_t, device>(A, B, SPGEMM_DEBUG, output_mat2);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can the old interface be removed? Has the old interface already been marked for deprecation?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@e10harvey For now the "old" interface is not deprecated - it's just an alternative. The CrsMatrix based interface is implemented in terms of it. We test both just to be safe.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also there are some users that want to have close control on memory allocations that prefer the "old" interface.
That way they can create their own Kokkos Views and give them pre-allocated memory.

else
run_spgemm<crsMat_t, device>(input_mat, input_mat, SPGEMM_DEBUG,
output_mat2);
run_spgemm<crsMat_t, device>(A, B, SPGEMM_DEBUG, output_mat2);

std::vector<SPGEMMAlgorithm> algorithms = {SPGEMM_KK_MEMORY, SPGEMM_KK_SPEED,
SPGEMM_KK_MEMSPEED};
std::vector<SPGEMMAlgorithm> algorithms = {
SPGEMM_KK, SPGEMM_KK_MEMORY, SPGEMM_KK_SPEED, SPGEMM_KK_MEMSPEED};

#ifdef HAVE_KOKKOSKERNELS_MKL
algorithms.push_back(SPGEMM_MKL);
Expand Down Expand Up @@ -309,7 +310,7 @@ void test_spgemm(lno_t numRows, size_type nnz, lno_t bandwidth,
}
// if size_type is larger than int, mkl casts it to int.
// it will fail if casting cause overflow.
if (input_mat.values.extent(0) > max_integer) {
if (A.values.extent(0) > max_integer) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Need to check all extents against max_integer here? If this is protecting a code path in the production code, this check should really be moved to the production code where an exception case raised and then we can try-catch the exception here.

Copy link
Contributor Author

@brian-kelley brian-kelley Nov 1, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I noticed this too - luckily the MKL wrapper also has this check:

//if size_type != int...
      const int max_integer = 2147483647;
      if (entriesB.extent(0) > max_integer ||
          entriesA.extent(0) > max_integer) {
        throw std::runtime_error(
            "MKL requires integer values for size type for SPGEMM. Copying to "
            "integer will cause overflow.\n");
        return;
      }
//then convert rowmaps to int

is_expected_to_fail = true;
}

Expand All @@ -333,11 +334,10 @@ void test_spgemm(lno_t numRows, size_type nnz, lno_t bandwidth,
int res = 0;
try {
if (oldInterface)
res = run_spgemm_old_interface<crsMat_t, device>(
input_mat, input_mat, spgemm_algorithm, output_mat);
res = run_spgemm_old_interface<crsMat_t, device>(A, B, spgemm_algorithm,
output_mat);
else
res = run_spgemm<crsMat_t, device>(input_mat, input_mat,
spgemm_algorithm, output_mat);
res = run_spgemm<crsMat_t, device>(A, B, spgemm_algorithm, output_mat);
} catch (const char *message) {
EXPECT_TRUE(is_expected_to_fail) << algo;
failed = true;
Expand Down Expand Up @@ -433,14 +433,20 @@ void test_issue402() {
<< "KKMEM still has issue 402 bug; C=AA' is incorrect!\n";
}

#define EXECUTE_TEST(SCALAR, ORDINAL, OFFSET, DEVICE) \
TEST_F(TestCategory, \
sparse##_##spgemm##_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \
test_spgemm<SCALAR, ORDINAL, OFFSET, DEVICE>(10000, 10000 * 20, 500, 10); \
test_spgemm<SCALAR, ORDINAL, OFFSET, DEVICE>(0, 0, 10, 10); \
test_issue402<SCALAR, ORDINAL, OFFSET, DEVICE>(); \
test_spgemm<SCALAR, ORDINAL, OFFSET, DEVICE>(10000, 10000 * 20, 500, 10, \
true); \
#define EXECUTE_TEST(SCALAR, ORDINAL, OFFSET, DEVICE) \
TEST_F(TestCategory, \
sparse##_##spgemm##_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \
test_spgemm<SCALAR, ORDINAL, OFFSET, DEVICE>(10000, 10000, 10000, \
10000 * 20, 500, 10, false); \
test_spgemm<SCALAR, ORDINAL, OFFSET, DEVICE>(10000, 10000, 10000, \
10000 * 20, 500, 10, true); \
test_spgemm<SCALAR, ORDINAL, OFFSET, DEVICE>(0, 0, 0, 0, 10, 10, false); \
test_spgemm<SCALAR, ORDINAL, OFFSET, DEVICE>(0, 0, 0, 0, 10, 10, true); \
test_spgemm<SCALAR, ORDINAL, OFFSET, DEVICE>(0, 12, 5, 0, 10, 0, false); \
test_spgemm<SCALAR, ORDINAL, OFFSET, DEVICE>(0, 12, 5, 0, 10, 0, true); \
test_spgemm<SCALAR, ORDINAL, OFFSET, DEVICE>(10, 10, 0, 0, 10, 10, false); \
test_spgemm<SCALAR, ORDINAL, OFFSET, DEVICE>(10, 10, 0, 0, 10, 10, true); \
test_issue402<SCALAR, ORDINAL, OFFSET, DEVICE>(); \
}

// test_spgemm<SCALAR,ORDINAL,OFFSET,DEVICE>(50000, 50000 * 30, 100, 10);
Expand Down