Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add single-value access functions for array access #1485

Merged
merged 12 commits into from
Dec 12, 2023
7 changes: 3 additions & 4 deletions benchmark/sparse_blas/operations.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@


#include "benchmark/sparse_blas/operations.hpp"
#include "core/base/array_access.hpp"
#include "core/factorization/elimination_forest.hpp"
#include "core/factorization/symbolic.hpp"
#include "core/matrix/csr_kernels.hpp"
Expand Down Expand Up @@ -393,8 +394,7 @@ class GenerateLookupOperation : public BenchmarkOperation {
exec->run(make_build_lookup_offsets(
mtx_->get_const_row_ptrs(), mtx_->get_const_col_idxs(), num_rows,
allowed_sparsity_, storage_offsets_.get_data()));
storage_.resize_and_reset(exec->copy_val_to_host(
storage_offsets_.get_const_data() + num_rows));
storage_.resize_and_reset(get_element(storage_offsets_, num_rows));
}

std::pair<bool, double> validate() const override
Expand Down Expand Up @@ -479,8 +479,7 @@ class LookupOperation : public BenchmarkOperation {
exec->run(make_build_lookup_offsets(
mtx_->get_const_row_ptrs(), mtx_->get_const_col_idxs(), num_rows,
allowed_sparsity_, storage_offsets_.get_data()));
storage_.resize_and_reset(exec->copy_val_to_host(
storage_offsets_.get_const_data() + num_rows));
storage_.resize_and_reset(get_element(storage_offsets_, num_rows));
exec->run(make_build_lookup(
mtx_->get_const_row_ptrs(), mtx_->get_const_col_idxs(), num_rows,
allowed_sparsity_, storage_offsets_.get_const_data(),
Expand Down
3 changes: 1 addition & 2 deletions common/cuda_hip/factorization/factorization_kernels.hpp.inc
Original file line number Diff line number Diff line change
Expand Up @@ -384,8 +384,7 @@ void add_diagonal_elements(std::shared_ptr<const DefaultExecutor> exec,
components::prefix_sum_nonnegative(exec, row_ptrs_add, row_ptrs_size);
exec->synchronize();

auto total_additions =
exec->copy_val_to_host(row_ptrs_add + row_ptrs_size - 1);
auto total_additions = get_element(row_ptrs_addition, row_ptrs_size - 1);
size_type new_num_elems = static_cast<size_type>(total_additions) +
mtx->get_num_stored_elements();

Expand Down
2 changes: 1 addition & 1 deletion common/cuda_hip/matrix/csr_kernels.hpp.inc
Original file line number Diff line number Diff line change
Expand Up @@ -1746,7 +1746,7 @@ void check_diagonal_entries_exist(
exec->get_stream()>>>(
num_diag, mtx->get_const_row_ptrs(), mtx->get_const_col_idxs(),
has_diags.get_data());
has_all_diags = exec->copy_val_to_host(has_diags.get_const_data());
has_all_diags = get_element(has_diags, 0);
} else {
has_all_diags = true;
}
Expand Down
2 changes: 1 addition & 1 deletion common/cuda_hip/matrix/fbcsr_kernels.hpp.inc
Original file line number Diff line number Diff line change
Expand Up @@ -327,7 +327,7 @@ void is_sorted_by_column_index(
to_check->get_const_row_ptrs(), to_check->get_const_col_idxs(),
num_brows, gpu_array.get_data());
}
*is_sorted = exec->copy_val_to_host(gpu_array.get_data());
*is_sorted = get_element(gpu_array, 0);
}


Expand Down
2 changes: 1 addition & 1 deletion common/cuda_hip/solver/multigrid_kernels.hpp.inc
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ void kcycle_check_stop(std::shared_ptr<const DefaultExecutor> exec,
as_device_type(new_norm->get_const_values()), rel_tol,
as_device_type(dis_stop.get_data()));
}
is_stop = exec->copy_val_to_host(dis_stop.get_const_data());
is_stop = get_element(dis_stop, 0);
}

GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_TYPE(
Expand Down
4 changes: 2 additions & 2 deletions common/unified/distributed/partition_helpers_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

#include "common/unified/base/kernel_launch.hpp"
#include "common/unified/base/kernel_launch_reduction.hpp"
#include "core/base/array_access.hpp"


namespace gko {
Expand Down Expand Up @@ -36,8 +37,7 @@ void check_consecutive_ranges(std::shared_ptr<const DefaultExecutor> exec,
[] GKO_KERNEL(auto x) { return x; }, static_cast<uint32>(true),
result_uint32.get_data(), num_ranges - 1,
range_start_ends.get_const_data() + 1);
result =
static_cast<bool>(exec->copy_val_to_host(result_uint32.get_data()));
result = static_cast<bool>(get_element(result_uint32, 0));
} else {
result = true;
}
Expand Down
6 changes: 3 additions & 3 deletions common/unified/distributed/partition_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

#include "common/unified/base/kernel_launch.hpp"
#include "common/unified/base/kernel_launch_reduction.hpp"
#include "core/base/array_access.hpp"
#include "core/components/prefix_sum_kernels.hpp"


Expand All @@ -31,7 +32,7 @@ void count_ranges(std::shared_ptr<const DefaultExecutor> exec,
},
GKO_KERNEL_REDUCE_SUM(size_type), result.get_data(), mapping.get_size(),
mapping);
num_ranges = exec->copy_val_to_host(result.get_const_data());
num_ranges = get_element(result, 0);
}


Expand Down Expand Up @@ -144,8 +145,7 @@ void has_ordered_parts(
},
[] GKO_KERNEL(const auto a) { return a; }, uint32(1),
result_uint32.get_data(), num_ranges - 1, part_ids);
*result = static_cast<bool>(
exec->copy_val_to_host(result_uint32.get_const_data()));
*result = static_cast<bool>(get_element(result_uint32, 0));
}

GKO_INSTANTIATE_FOR_EACH_LOCAL_GLOBAL_INDEX_TYPE(
Expand Down
4 changes: 2 additions & 2 deletions common/unified/matrix/dense_kernels.template.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

#include "common/unified/base/kernel_launch.hpp"
#include "common/unified/base/kernel_launch_reduction.hpp"
#include "core/base/array_access.hpp"
#include "core/base/mixed_precision_types.hpp"
#include "core/components/prefix_sum_kernels.hpp"

Expand Down Expand Up @@ -278,8 +279,7 @@ void compute_max_nnz_per_row(std::shared_ptr<const DefaultExecutor> exec,
GKO_KERNEL_REDUCE_MAX(size_type),
partial.get_data() + source->get_size()[0], source->get_size()[0],
partial);
result = exec->copy_val_to_host(partial.get_const_data() +
source->get_size()[0]);
result = get_element(partial, source->get_size()[0]);
}


Expand Down
3 changes: 2 additions & 1 deletion common/unified/matrix/ell_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

#include "common/unified/base/kernel_launch.hpp"
#include "common/unified/base/kernel_launch_reduction.hpp"
#include "core/base/array_access.hpp"
#include "core/matrix/dense_kernels.hpp"


Expand All @@ -36,7 +37,7 @@ void compute_max_row_nnz(std::shared_ptr<const DefaultExecutor> exec,
},
GKO_KERNEL_REDUCE_MAX(size_type), result.get_data(),
row_ptrs.get_size() - 1, row_ptrs);
max_nnz = exec->copy_val_to_host(result.get_const_data());
max_nnz = get_element(result, 0);
}

GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_ELL_COMPUTE_MAX_ROW_NNZ_KERNEL);
Expand Down
8 changes: 4 additions & 4 deletions common/unified/multigrid/pgm_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

#include "common/unified/base/kernel_launch.hpp"
#include "common/unified/base/kernel_launch_reduction.hpp"
#include "core/base/array_access.hpp"
#include "core/components/prefix_sum_kernels.hpp"


Expand Down Expand Up @@ -59,7 +60,7 @@ void count_unagg(std::shared_ptr<const DefaultExecutor> exec,
GKO_KERNEL_REDUCE_SUM(IndexType), d_result.get_data(), agg.get_size(),
agg);

*num_unagg = exec->copy_val_to_host(d_result.get_const_data());
*num_unagg = get_element(d_result, 0);
}

GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_PGM_COUNT_UNAGG_KERNEL);
Expand Down Expand Up @@ -91,7 +92,7 @@ void renumber(std::shared_ptr<const DefaultExecutor> exec,
agg[tidx] = map[agg[tidx]];
},
num, agg_map.get_const_data(), agg.get_data());
*num_agg = exec->copy_val_to_host(agg_map.get_const_data() + num);
*num_agg = get_element(agg_map, num);
}

GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_PGM_RENUMBER_KERNEL);
Expand Down Expand Up @@ -149,8 +150,7 @@ void count_unrepeated_nnz(std::shared_ptr<const DefaultExecutor> exec,
},
GKO_KERNEL_REDUCE_SUM(IndexType), d_result.get_data(), nnz - 1,
row_idxs, col_idxs);
*coarse_nnz = static_cast<size_type>(
exec->copy_val_to_host(d_result.get_const_data()) + 1);
*coarse_nnz = static_cast<size_type>(get_element(d_result, 0) + 1);
} else {
*coarse_nnz = nnz;
}
Expand Down
3 changes: 2 additions & 1 deletion core/base/array.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#include <ginkgo/core/base/math.hpp>


#include "core/base/array_access.hpp"
#include "core/components/fill_array_kernels.hpp"
#include "core/components/precision_conversion_kernels.hpp"
#include "core/components/reduce_array_kernels.hpp"
Expand Down Expand Up @@ -83,7 +84,7 @@ ValueType reduce_add(const array<ValueType>& input_arr,
auto value = array<ValueType>(exec, 1);
value.fill(ValueType{0});
exec->run(array_kernels::make_reduce_add_array(input_arr, value));
return init_value + exec->copy_val_to_host(value.get_data());
return init_value + get_element(value, 0);
}


Expand Down
66 changes: 66 additions & 0 deletions core/base/array_access.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
// SPDX-FileCopyrightText: 2017-2023 The Ginkgo authors
//
// SPDX-License-Identifier: BSD-3-Clause

#ifndef GKO_CORE_BASE_ARRAY_ACCESS_HPP_
#define GKO_CORE_BASE_ARRAY_ACCESS_HPP_


#include <ginkgo/core/base/array.hpp>
#include <ginkgo/core/base/exception_helpers.hpp>


namespace gko {


/**
* Returns a single value from an array.
*
* This involves a bounds check, polymorphic calls and potentially a
* device-to-host copy, so it is not suitable for accessing many elements
* in performance-critical code.
*
* @param array the array to get the element from.
* @param index the array element index.
* @tparam ValueType the value type of the array.
* @return the value at index.
*/
template <typename ValueType>
ValueType get_element(const array<ValueType>& array, size_type index)
{
// TODO2.0 add bounds check for negative indices
GKO_ENSURE_IN_BOUNDS(index, array.get_size());
return array.get_executor()->copy_val_to_host(array.get_const_data() +
index);
}


/**
* Sets a single entry in the array to a new value.
*
* This involves a bounds check, polymorphic calls and potentially a
* host-to-device copy, so it is not suitable for accessing many elements
* in performance-critical code.
*
* @param array the array to set the element in.
* @param index the array element index.
* @param value the new value.
* @tparam ValueType the value type of the array.
* @tparam ParameterType the type of the value to be assigned.
*/
template <typename ValueType, typename ParameterType>
void set_element(array<ValueType>& array, size_type index, ParameterType value)
{
auto converted_value = static_cast<ValueType>(value);
// TODO2.0 add bounds check for negative indices
GKO_ENSURE_IN_BOUNDS(index, array.get_size());
auto exec = array.get_executor();
exec->copy_from(exec->get_master(), 1, &converted_value,
array.get_data() + index);
}


} // namespace gko


#endif // GKO_CORE_BASE_ARRAY_ACCESS_HPP_
11 changes: 6 additions & 5 deletions core/base/index_set.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#include <ginkgo/core/base/types.hpp>


#include "core/base/array_access.hpp"
#include "core/base/index_set_kernels.hpp"


Expand Down Expand Up @@ -60,7 +61,7 @@ IndexType index_set<IndexType>::get_global_index(const IndexType index) const
auto global_idx =
array<IndexType>(exec, this->map_local_to_global(local_idx, true));

return exec->copy_val_to_host(global_idx.get_data());
return get_element(global_idx, 0);
}


Expand All @@ -73,17 +74,17 @@ IndexType index_set<IndexType>::get_local_index(const IndexType index) const
auto local_idx =
array<IndexType>(exec, this->map_global_to_local(global_idx, true));

return exec->copy_val_to_host(local_idx.get_data());
return get_element(local_idx, 0);
}


template <typename IndexType>
array<IndexType> index_set<IndexType>::to_global_indices() const
{
auto exec = this->get_executor();
auto num_elems = exec->copy_val_to_host(
this->superset_cumulative_indices_.get_const_data() +
this->superset_cumulative_indices_.get_size() - 1);
auto num_elems =
get_element(this->superset_cumulative_indices_,
this->superset_cumulative_indices_.get_size() - 1);
auto decomp_indices = gko::array<IndexType>(exec, num_elems);
exec->run(idx_set::make_to_global_indices(
this->get_num_subsets(), this->get_subsets_begin(),
Expand Down
11 changes: 9 additions & 2 deletions core/distributed/partition.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#include <ginkgo/core/distributed/partition.hpp>


#include "core/base/array_access.hpp"
#include "core/distributed/partition_kernels.hpp"


Expand Down Expand Up @@ -90,8 +91,14 @@ void Partition<LocalIndexType, GlobalIndexType>::finalize_construction()
offsets_.get_const_data(), part_ids_.get_const_data(), get_num_ranges(),
get_num_parts(), num_empty_parts_, starting_indices_.get_data(),
part_sizes_.get_data()));
size_ = offsets_.get_executor()->copy_val_to_host(
offsets_.get_const_data() + get_num_ranges());
size_ = get_element(offsets_, get_num_ranges());
}

template <typename LocalIndexType, typename GlobalIndexType>
LocalIndexType Partition<LocalIndexType, GlobalIndexType>::get_part_size(
comm_index_type part) const
{
return get_element(this->part_sizes_, part);
}


Expand Down
5 changes: 3 additions & 2 deletions core/factorization/cholesky.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include <ginkgo/core/base/types.hpp>


#include "core/base/array_access.hpp"
#include "core/components/fill_array_kernels.hpp"
#include "core/factorization/cholesky_kernels.hpp"
#include "core/factorization/elimination_forest.hpp"
Expand Down Expand Up @@ -96,8 +97,8 @@ std::unique_ptr<LinOp> Cholesky<ValueType, IndexType>::generate_impl(
exec->run(make_build_lookup_offsets(
factors->get_const_row_ptrs(), factors->get_const_col_idxs(), num_rows,
allowed_sparsity, storage_offsets.get_data()));
const auto storage_size = static_cast<size_type>(
exec->copy_val_to_host(storage_offsets.get_const_data() + num_rows));
const auto storage_size =
static_cast<size_type>(get_element(storage_offsets, num_rows));
array<int32> storage{exec, storage_size};
exec->run(make_build_lookup(
factors->get_const_row_ptrs(), factors->get_const_col_idxs(), num_rows,
Expand Down
13 changes: 7 additions & 6 deletions core/factorization/factorization.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include <ginkgo/core/matrix/csr.hpp>


#include "core/base/array_access.hpp"
#include "core/factorization/factorization_kernels.hpp"


Expand Down Expand Up @@ -49,10 +50,10 @@ Factorization<ValueType, IndexType>::unpack() const
const auto mtx = this->get_combined();
exec->run(make_initialize_row_ptrs_l_u(mtx.get(), l_row_ptrs.get_data(),
u_row_ptrs.get_data()));
const auto l_nnz = static_cast<size_type>(
exec->copy_val_to_host(l_row_ptrs.get_const_data() + size[0]));
const auto u_nnz = static_cast<size_type>(
exec->copy_val_to_host(u_row_ptrs.get_const_data() + size[0]));
const auto l_nnz =
static_cast<size_type>(get_element(l_row_ptrs, size[0]));
const auto u_nnz =
static_cast<size_type>(get_element(u_row_ptrs, size[0]));
// create matrices
auto l_mtx = matrix_type::create(
exec, size, array<value_type>{exec, l_nnz},
Expand All @@ -70,8 +71,8 @@ Factorization<ValueType, IndexType>::unpack() const
array<index_type> l_row_ptrs{exec, size[0] + 1};
const auto mtx = this->get_combined();
exec->run(make_initialize_row_ptrs_l(mtx.get(), l_row_ptrs.get_data()));
const auto l_nnz = static_cast<size_type>(
exec->copy_val_to_host(l_row_ptrs.get_const_data() + size[0]));
const auto l_nnz =
static_cast<size_type>(get_element(l_row_ptrs, size[0]));
// create matrices
auto l_mtx = matrix_type::create(
exec, size, array<value_type>{exec, l_nnz},
Expand Down
4 changes: 2 additions & 2 deletions core/factorization/ic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include <ginkgo/core/base/exception_helpers.hpp>


#include "core/base/array_access.hpp"
#include "core/factorization/factorization_kernels.hpp"
#include "core/factorization/ic_kernels.hpp"

Expand Down Expand Up @@ -69,8 +70,7 @@ std::unique_ptr<Composition<ValueType>> Ic<ValueType, IndexType>::generate(
local_system_matrix.get(), l_row_ptrs.get_data()));

// Get nnz from device memory
auto l_nnz = static_cast<size_type>(
exec->copy_val_to_host(l_row_ptrs.get_data() + num_rows));
auto l_nnz = static_cast<size_type>(get_element(l_row_ptrs, num_rows));

// Init arrays
array<IndexType> l_col_idxs{exec, l_nnz};
Expand Down
Loading