Skip to content

Commit

Permalink
rename prefix_sum -> prefix_sum_nonnegative
Browse files Browse the repository at this point in the history
  • Loading branch information
upsj committed Mar 20, 2023
1 parent 65b44a2 commit 1072cbb
Show file tree
Hide file tree
Showing 65 changed files with 188 additions and 160 deletions.
7 changes: 4 additions & 3 deletions benchmark/blas/blas_common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -359,7 +359,8 @@ class AdvancedApplyOperation : public BenchmarkOperation {
};


GKO_REGISTER_OPERATION(prefix_sum, components::prefix_sum);
GKO_REGISTER_OPERATION(prefix_sum_nonnegative,
components::prefix_sum_nonnegative);


template <typename IndexType>
Expand All @@ -381,8 +382,8 @@ class PrefixSumOperation : public BenchmarkOperation {

void run() override
{
array_.get_executor()->run(
make_prefix_sum(array_.get_data(), array_.get_num_elems()));
array_.get_executor()->run(make_prefix_sum_nonnegative(
array_.get_data(), array_.get_num_elems()));
}

private:
Expand Down
10 changes: 5 additions & 5 deletions common/cuda_hip/components/prefix_sum_kernels.hpp.inc
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,8 @@ struct overflowing_sum<size_type> {


template <typename IndexType>
void prefix_sum(std::shared_ptr<const DefaultExecutor> exec, IndexType* counts,
size_type num_entries)
void prefix_sum_nonnegative(std::shared_ptr<const DefaultExecutor> exec,
IndexType* counts, size_type num_entries)
{
constexpr auto max = std::numeric_limits<IndexType>::max();
thrust::exclusive_scan(thrust_policy(exec), counts, counts + num_entries,
Expand All @@ -79,8 +79,8 @@ void prefix_sum(std::shared_ptr<const DefaultExecutor> exec, IndexType* counts,
}
}

GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_PREFIX_SUM_KERNEL);
GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_PREFIX_SUM_NONNEGATIVE_KERNEL);

// instantiate for size_type as well, as this is used in the Sellp format
template void prefix_sum<size_type>(std::shared_ptr<const DefaultExecutor>,
size_type*, size_type);
template void prefix_sum_nonnegative<size_type>(
std::shared_ptr<const DefaultExecutor>, size_type*, size_type);
8 changes: 4 additions & 4 deletions common/cuda_hip/factorization/factorization_kernels.hpp.inc
Original file line number Diff line number Diff line change
Expand Up @@ -409,7 +409,7 @@ void add_diagonal_elements(std::shared_ptr<const DefaultExecutor> exec,
return;
}

components::prefix_sum(exec, row_ptrs_add, row_ptrs_size);
components::prefix_sum_nonnegative(exec, row_ptrs_add, row_ptrs_size);
exec->synchronize();

auto total_additions =
Expand Down Expand Up @@ -465,8 +465,8 @@ void initialize_row_ptrs_l_u(
u_row_ptrs);
}

components::prefix_sum(exec, l_row_ptrs, num_rows + 1);
components::prefix_sum(exec, u_row_ptrs, num_rows + 1);
components::prefix_sum_nonnegative(exec, l_row_ptrs, num_rows + 1);
components::prefix_sum_nonnegative(exec, u_row_ptrs, num_rows + 1);
}

GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
Expand Down Expand Up @@ -520,7 +520,7 @@ void initialize_row_ptrs_l(
as_device_type(system_matrix->get_const_values()), l_row_ptrs);
}

components::prefix_sum(exec, l_row_ptrs, num_rows + 1);
components::prefix_sum_nonnegative(exec, l_row_ptrs, num_rows + 1);
}

GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
Expand Down
8 changes: 4 additions & 4 deletions common/cuda_hip/preconditioner/isai_kernels.hpp.inc
Original file line number Diff line number Diff line change
Expand Up @@ -461,8 +461,8 @@ void generate_tri_inverse(std::shared_ptr<const DefaultExecutor> exec,
excess_nz_ptrs);
}
}
components::prefix_sum(exec, excess_rhs_ptrs, num_rows + 1);
components::prefix_sum(exec, excess_nz_ptrs, num_rows + 1);
components::prefix_sum_nonnegative(exec, excess_rhs_ptrs, num_rows + 1);
components::prefix_sum_nonnegative(exec, excess_nz_ptrs, num_rows + 1);
}

GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
Expand Down Expand Up @@ -490,8 +490,8 @@ void generate_general_inverse(std::shared_ptr<const DefaultExecutor> exec,
as_device_type(inverse->get_values()), excess_rhs_ptrs,
excess_nz_ptrs, spd);
}
components::prefix_sum(exec, excess_rhs_ptrs, num_rows + 1);
components::prefix_sum(exec, excess_nz_ptrs, num_rows + 1);
components::prefix_sum_nonnegative(exec, excess_rhs_ptrs, num_rows + 1);
components::prefix_sum_nonnegative(exec, excess_nz_ptrs, num_rows + 1);
}

GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
Expand Down
7 changes: 4 additions & 3 deletions common/unified/distributed/partition_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -100,8 +100,8 @@ void build_from_mapping(std::shared_ptr<const DefaultExecutor> exec,
range_starting_index[i] = cur_part != prev_part ? 1 : 0;
},
mapping.get_num_elems(), mapping, range_starting_index);
components::prefix_sum(exec, range_starting_index.get_data(),
mapping.get_num_elems() + 1);
components::prefix_sum_nonnegative(exec, range_starting_index.get_data(),
mapping.get_num_elems() + 1);
run_kernel(
exec,
[] GKO_KERNEL(auto i, auto size, auto mapping,
Expand Down Expand Up @@ -140,7 +140,8 @@ void build_ranges_from_global_size(std::shared_ptr<const DefaultExecutor> exec,
ranges[i] = size_per_part + (i < rest ? 1 : 0);
},
ranges.get_num_elems() - 1, size_per_part, rest, ranges.get_data());
components::prefix_sum(exec, ranges.get_data(), ranges.get_num_elems());
components::prefix_sum_nonnegative(exec, ranges.get_data(),
ranges.get_num_elems());
}

GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_PARTITION_BUILD_FROM_GLOBAL_SIZE);
Expand Down
2 changes: 1 addition & 1 deletion common/unified/matrix/csr_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -280,7 +280,7 @@ void build_lookup_offsets(std::shared_ptr<const DefaultExecutor> exec,
}
},
num_rows, row_ptrs, col_idxs, num_rows, allowed, storage_offsets);
components::prefix_sum(exec, storage_offsets, num_rows + 1);
components::prefix_sum_nonnegative(exec, storage_offsets, num_rows + 1);
}

GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(
Expand Down
2 changes: 1 addition & 1 deletion common/unified/matrix/dense_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -354,7 +354,7 @@ void compute_slice_sets(std::shared_ptr<const DefaultExecutor> exec,
gko::dim<2>{num_slices, slice_size}, row_nnz, slice_size, stride_factor,
num_rows);
exec->copy(num_slices, slice_lengths, slice_sets);
components::prefix_sum(exec, slice_sets, num_slices + 1);
components::prefix_sum_nonnegative(exec, slice_sets, num_slices + 1);
}

GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
Expand Down
3 changes: 2 additions & 1 deletion common/unified/matrix/hybrid_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,8 @@ void compute_coo_row_ptrs(std::shared_ptr<const DefaultExecutor> exec,
static_cast<int64>(ell_lim));
},
row_nnz.get_num_elems(), row_nnz, ell_lim, coo_row_ptrs);
components::prefix_sum(exec, coo_row_ptrs, row_nnz.get_num_elems() + 1);
components::prefix_sum_nonnegative(exec, coo_row_ptrs,
row_nnz.get_num_elems() + 1);
}


Expand Down
2 changes: 1 addition & 1 deletion common/unified/matrix/sellp_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ void compute_slice_sets(std::shared_ptr<const DefaultExecutor> exec,
gko::dim<2>{num_slices, slice_size}, row_ptrs, slice_size,
stride_factor, num_rows);
exec->copy(num_slices, slice_lengths, slice_sets);
components::prefix_sum(exec, slice_sets, num_slices + 1);
components::prefix_sum_nonnegative(exec, slice_sets, num_slices + 1);
}

GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(
Expand Down
3 changes: 2 additions & 1 deletion common/unified/multigrid/pgm_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,8 @@ void renumber(std::shared_ptr<const DefaultExecutor> exec,
},
num, agg.get_const_data(), agg_map.get_data());

components::prefix_sum(exec, agg_map.get_data(), agg_map.get_num_elems());
components::prefix_sum_nonnegative(exec, agg_map.get_data(),
agg_map.get_num_elems());

run_kernel(
exec,
Expand Down
8 changes: 4 additions & 4 deletions core/components/prefix_sum_kernels.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,14 +66,14 @@ namespace kernels {
* \param num_entries Size of the array, equal to one more than the number
* of entries to be summed.
*/
#define GKO_DECLARE_PREFIX_SUM_KERNEL(IndexType) \
void prefix_sum(std::shared_ptr<const DefaultExecutor> exec, \
IndexType* counts, size_type num_entries)
#define GKO_DECLARE_PREFIX_SUM_NONNEGATIVE_KERNEL(IndexType) \
void prefix_sum_nonnegative(std::shared_ptr<const DefaultExecutor> exec, \
IndexType* counts, size_type num_entries)


#define GKO_DECLARE_ALL_AS_TEMPLATES \
template <typename IndexType> \
GKO_DECLARE_PREFIX_SUM_KERNEL(IndexType)
GKO_DECLARE_PREFIX_SUM_NONNEGATIVE_KERNEL(IndexType)


GKO_DECLARE_FOR_ALL_EXECUTOR_NAMESPACES(components,
Expand Down
4 changes: 2 additions & 2 deletions core/device_hooks/common_kernels.inc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -186,10 +186,10 @@ namespace components {


GKO_STUB_VALUE_CONVERSION(GKO_DECLARE_CONVERT_PRECISION_KERNEL);
GKO_STUB_INDEX_TYPE(GKO_DECLARE_PREFIX_SUM_KERNEL);
GKO_STUB_INDEX_TYPE(GKO_DECLARE_PREFIX_SUM_NONNEGATIVE_KERNEL);
// explicitly instantiate for size_type, as this is
// used in the SellP format
template GKO_DECLARE_PREFIX_SUM_KERNEL(size_type);
template GKO_DECLARE_PREFIX_SUM_NONNEGATIVE_KERNEL(size_type);

GKO_STUB_TEMPLATE_TYPE(GKO_DECLARE_FILL_ARRAY_KERNEL);
template GKO_DECLARE_FILL_ARRAY_KERNEL(bool);
Expand Down
5 changes: 3 additions & 2 deletions core/factorization/symbolic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,8 @@ namespace {

GKO_REGISTER_OPERATION(symbolic_count, cholesky::symbolic_count);
GKO_REGISTER_OPERATION(symbolic, cholesky::symbolic_factorize);
GKO_REGISTER_OPERATION(prefix_sum, components::prefix_sum);
GKO_REGISTER_OPERATION(prefix_sum_nonnegative,
components::prefix_sum_nonnegative);
GKO_REGISTER_OPERATION(initialize, lu_factorization::initialize);
GKO_REGISTER_OPERATION(factorize, lu_factorization::factorize);
GKO_REGISTER_HOST_OPERATION(compute_elim_forest, compute_elim_forest);
Expand All @@ -77,7 +78,7 @@ void symbolic_cholesky(
array<IndexType> row_ptrs{exec, num_rows + 1};
array<IndexType> tmp{exec};
exec->run(make_symbolic_count(mtx, *forest, row_ptrs.get_data(), tmp));
exec->run(make_prefix_sum(row_ptrs.get_data(), num_rows + 1));
exec->run(make_prefix_sum_nonnegative(row_ptrs.get_data(), num_rows + 1));
const auto factor_nnz = static_cast<size_type>(
exec->copy_val_to_host(row_ptrs.get_const_data() + num_rows));
factors = matrix_type::create(
Expand Down
10 changes: 6 additions & 4 deletions core/matrix/csr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,8 @@ GKO_REGISTER_OPERATION(is_sorted_by_column_index,
csr::is_sorted_by_column_index);
GKO_REGISTER_OPERATION(extract_diagonal, csr::extract_diagonal);
GKO_REGISTER_OPERATION(fill_array, components::fill_array);
GKO_REGISTER_OPERATION(prefix_sum, components::prefix_sum);
GKO_REGISTER_OPERATION(prefix_sum_nonnegative,
components::prefix_sum_nonnegative);
GKO_REGISTER_OPERATION(inplace_absolute_array,
components::inplace_absolute_array);
GKO_REGISTER_OPERATION(outplace_absolute_array,
Expand Down Expand Up @@ -664,7 +665,8 @@ Csr<ValueType, IndexType>::create_submatrix(const gko::span& row_span,
array<IndexType> row_ptrs(exec, row_span.length() + 1);
exec->run(csr::make_calculate_nonzeros_per_row_in_span(
this, row_span, column_span, &row_ptrs));
exec->run(csr::make_prefix_sum(row_ptrs.get_data(), row_span.length() + 1));
exec->run(csr::make_prefix_sum_nonnegative(row_ptrs.get_data(),
row_span.length() + 1));
auto num_nnz =
exec->copy_val_to_host(row_ptrs.get_data() + sub_mat_size[0]);
auto sub_mat = Mat::create(exec, sub_mat_size,
Expand Down Expand Up @@ -708,8 +710,8 @@ Csr<ValueType, IndexType>::create_submatrix(
array<IndexType> row_ptrs(exec, submat_num_rows + 1);
exec->run(csr::make_calculate_nonzeros_per_row_in_index_set(
this, row_index_set, col_index_set, row_ptrs.get_data()));
exec->run(
csr::make_prefix_sum(row_ptrs.get_data(), submat_num_rows + 1));
exec->run(csr::make_prefix_sum_nonnegative(row_ptrs.get_data(),
submat_num_rows + 1));
auto num_nnz =
exec->copy_val_to_host(row_ptrs.get_data() + sub_mat_size[0]);
auto sub_mat = Mat::create(exec, sub_mat_size,
Expand Down
15 changes: 10 additions & 5 deletions core/matrix/dense.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,8 @@ GKO_REGISTER_OPERATION(compute_hybrid_coo_row_ptrs,
GKO_REGISTER_OPERATION(count_nonzeros_per_row, dense::count_nonzeros_per_row);
GKO_REGISTER_OPERATION(count_nonzero_blocks_per_row,
dense::count_nonzero_blocks_per_row);
GKO_REGISTER_OPERATION(prefix_sum, components::prefix_sum);
GKO_REGISTER_OPERATION(prefix_sum_nonnegative,
components::prefix_sum_nonnegative);
GKO_REGISTER_OPERATION(compute_slice_sets, dense::compute_slice_sets);
GKO_REGISTER_OPERATION(transpose, dense::transpose);
GKO_REGISTER_OPERATION(conj_transpose, dense::conj_transpose);
Expand Down Expand Up @@ -557,7 +558,8 @@ void Dense<ValueType>::convert_impl(Coo<ValueType, IndexType>* result) const

array<int64> row_ptrs{exec, num_rows + 1};
exec->run(dense::make_count_nonzeros_per_row(this, row_ptrs.get_data()));
exec->run(dense::make_prefix_sum(row_ptrs.get_data(), num_rows + 1));
exec->run(
dense::make_prefix_sum_nonnegative(row_ptrs.get_data(), num_rows + 1));
const auto nnz =
exec->copy_val_to_host(row_ptrs.get_const_data() + num_rows);
result->resize(this->get_size(), nnz);
Expand Down Expand Up @@ -606,7 +608,8 @@ void Dense<ValueType>::convert_impl(Csr<ValueType, IndexType>* result) const
tmp->row_ptrs_.resize_and_reset(num_rows + 1);
exec->run(
dense::make_count_nonzeros_per_row(this, tmp->get_row_ptrs()));
exec->run(dense::make_prefix_sum(tmp->get_row_ptrs(), num_rows + 1));
exec->run(dense::make_prefix_sum_nonnegative(tmp->get_row_ptrs(),
num_rows + 1));
const auto nnz =
exec->copy_val_to_host(tmp->get_const_row_ptrs() + num_rows);
tmp->col_idxs_.resize_and_reset(nnz);
Expand Down Expand Up @@ -658,7 +661,8 @@ void Dense<ValueType>::convert_impl(Fbcsr<ValueType, IndexType>* result) const
tmp->row_ptrs_.resize_and_reset(row_blocks + 1);
exec->run(dense::make_count_nonzero_blocks_per_row(this, bs,
tmp->get_row_ptrs()));
exec->run(dense::make_prefix_sum(tmp->get_row_ptrs(), row_blocks + 1));
exec->run(dense::make_prefix_sum_nonnegative(tmp->get_row_ptrs(),
row_blocks + 1));
const auto nnz_blocks =
exec->copy_val_to_host(tmp->get_const_row_ptrs() + row_blocks);
tmp->col_idxs_.resize_and_reset(nnz_blocks);
Expand Down Expand Up @@ -859,7 +863,8 @@ void Dense<ValueType>::convert_impl(
tmp->row_ptrs_.resize_and_reset(num_rows + 1);
exec->run(
dense::make_count_nonzeros_per_row(this, tmp->row_ptrs_.get_data()));
exec->run(dense::make_prefix_sum(tmp->row_ptrs_.get_data(), num_rows + 1));
exec->run(dense::make_prefix_sum_nonnegative(tmp->row_ptrs_.get_data(),
num_rows + 1));
const auto nnz =
exec->copy_val_to_host(tmp->row_ptrs_.get_const_data() + num_rows);
tmp->col_idxs_.resize_and_reset(nnz);
Expand Down
7 changes: 4 additions & 3 deletions core/matrix/ell.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,8 @@ GKO_REGISTER_OPERATION(convert_to_csr, ell::convert_to_csr);
GKO_REGISTER_OPERATION(count_nonzeros_per_row, ell::count_nonzeros_per_row);
GKO_REGISTER_OPERATION(extract_diagonal, ell::extract_diagonal);
GKO_REGISTER_OPERATION(fill_array, components::fill_array);
GKO_REGISTER_OPERATION(prefix_sum, components::prefix_sum);
GKO_REGISTER_OPERATION(prefix_sum_nonnegative,
components::prefix_sum_nonnegative);
GKO_REGISTER_OPERATION(inplace_absolute_array,
components::inplace_absolute_array);
GKO_REGISTER_OPERATION(outplace_absolute_array,
Expand Down Expand Up @@ -230,8 +231,8 @@ void Ell<ValueType, IndexType>::convert_to(
tmp->row_ptrs_.resize_and_reset(num_rows + 1);
exec->run(
ell::make_count_nonzeros_per_row(this, tmp->row_ptrs_.get_data()));
exec->run(
ell::make_prefix_sum(tmp->row_ptrs_.get_data(), num_rows + 1));
exec->run(ell::make_prefix_sum_nonnegative(tmp->row_ptrs_.get_data(),
num_rows + 1));
const auto nnz = static_cast<size_type>(
exec->copy_val_to_host(tmp->row_ptrs_.get_const_data() + num_rows));
tmp->col_idxs_.resize_and_reset(nnz);
Expand Down
7 changes: 4 additions & 3 deletions core/matrix/hybrid.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,8 @@ GKO_REGISTER_OPERATION(compute_coo_row_ptrs, hybrid::compute_coo_row_ptrs);
GKO_REGISTER_OPERATION(convert_idxs_to_ptrs, components::convert_idxs_to_ptrs);
GKO_REGISTER_OPERATION(convert_to_csr, hybrid::convert_to_csr);
GKO_REGISTER_OPERATION(fill_array, components::fill_array);
GKO_REGISTER_OPERATION(prefix_sum, components::prefix_sum);
GKO_REGISTER_OPERATION(prefix_sum_nonnegative,
components::prefix_sum_nonnegative);
GKO_REGISTER_OPERATION(inplace_absolute_array,
components::inplace_absolute_array);
GKO_REGISTER_OPERATION(outplace_absolute_array,
Expand Down Expand Up @@ -213,8 +214,8 @@ void Hybrid<ValueType, IndexType>::convert_to(
array<IndexType> coo_row_ptrs{exec, num_rows + 1};
exec->run(hybrid::make_ell_count_nonzeros_per_row(
this->get_ell(), ell_row_ptrs.get_data()));
exec->run(
hybrid::make_prefix_sum(ell_row_ptrs.get_data(), num_rows + 1));
exec->run(hybrid::make_prefix_sum_nonnegative(ell_row_ptrs.get_data(),
num_rows + 1));
exec->run(hybrid::make_convert_idxs_to_ptrs(
this->get_const_coo_row_idxs(), this->get_coo_num_stored_elements(),
num_rows, coo_row_ptrs.get_data()));
Expand Down
7 changes: 4 additions & 3 deletions core/matrix/sellp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,8 @@ namespace {
GKO_REGISTER_OPERATION(spmv, sellp::spmv);
GKO_REGISTER_OPERATION(advanced_spmv, sellp::advanced_spmv);
GKO_REGISTER_OPERATION(convert_idxs_to_ptrs, components::convert_idxs_to_ptrs);
GKO_REGISTER_OPERATION(prefix_sum, components::prefix_sum);
GKO_REGISTER_OPERATION(prefix_sum_nonnegative,
components::prefix_sum_nonnegative);
GKO_REGISTER_OPERATION(compute_slice_sets, sellp::compute_slice_sets);
GKO_REGISTER_OPERATION(fill_in_matrix_data, sellp::fill_in_matrix_data);
GKO_REGISTER_OPERATION(fill_in_dense, sellp::fill_in_dense);
Expand Down Expand Up @@ -206,8 +207,8 @@ void Sellp<ValueType, IndexType>::convert_to(
tmp->row_ptrs_.resize_and_reset(num_rows + 1);
exec->run(sellp::make_count_nonzeros_per_row(
this, tmp->row_ptrs_.get_data()));
exec->run(
sellp::make_prefix_sum(tmp->row_ptrs_.get_data(), num_rows + 1));
exec->run(sellp::make_prefix_sum_nonnegative(tmp->row_ptrs_.get_data(),
num_rows + 1));
const auto nnz = static_cast<size_type>(
exec->copy_val_to_host(tmp->row_ptrs_.get_const_data() + num_rows));
tmp->col_idxs_.resize_and_reset(nnz);
Expand Down
2 changes: 1 addition & 1 deletion cuda/factorization/par_ict_kernels.cu
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ void add_candidates(syn::value_list<int, subwarp_size>,
}
// build row ptrs
components::prefix_sum(exec, l_new_row_ptrs, num_rows + 1);
components::prefix_sum_nonnegative(exec, l_new_row_ptrs, num_rows + 1);
// resize output arrays
auto l_new_nnz = exec->copy_val_to_host(l_new_row_ptrs + num_rows);
Expand Down
2 changes: 1 addition & 1 deletion cuda/factorization/par_ilut_approx_filter_kernel.cu
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ void threshold_filter_approx(syn::value_list<int, subwarp_size>,
}
// build row pointers
components::prefix_sum(exec, new_row_ptrs, num_rows + 1);
components::prefix_sum_nonnegative(exec, new_row_ptrs, num_rows + 1);
// build matrix
auto new_nnz = exec->copy_val_to_host(new_row_ptrs + num_rows);
Expand Down
2 changes: 1 addition & 1 deletion cuda/factorization/par_ilut_filter_kernel.cu
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ void threshold_filter(syn::value_list<int, subwarp_size>,
}
// build row pointers
components::prefix_sum(exec, new_row_ptrs, num_rows + 1);
components::prefix_sum_nonnegative(exec, new_row_ptrs, num_rows + 1);
// build matrix
auto new_nnz = exec->copy_val_to_host(new_row_ptrs + num_rows);
Expand Down
2 changes: 1 addition & 1 deletion cuda/factorization/par_ilut_select_common.cu
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ void sampleselect_count(std::shared_ptr<const DefaultExecutor> exec,
exec->get_stream()>>>(partial_counts,
total_counts, num_blocks);
// compute prefix sum over bucket counts
components::prefix_sum(exec, total_counts, bucket_count + 1);
components::prefix_sum_nonnegative(exec, total_counts, bucket_count + 1);
}
Expand Down
Loading

0 comments on commit 1072cbb

Please sign in to comment.