Skip to content

Commit

Permalink
Format tests and fix docs
Browse files Browse the repository at this point in the history
  • Loading branch information
pratikvn committed Feb 14, 2024
1 parent e5775f2 commit 612b423
Show file tree
Hide file tree
Showing 8 changed files with 186 additions and 203 deletions.
3 changes: 2 additions & 1 deletion core/preconditioner/batch_jacobi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,8 @@ void Jacobi<ValueType, IndexType>::generate_precond(

// Note: Row-major order offers advantage in terms of
// performance in both preconditioner generation and application for both
// reference and cuda backend. Note: The pattern blocks in block_pattern are
// reference and cuda backend.
// Note: The pattern blocks in block_pattern are
// also stored in a similar way.

// array for storing the common pattern of the diagonal blocks
Expand Down
2 changes: 2 additions & 0 deletions core/test/preconditioner/batch_jacobi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,11 @@ class BatchJacobiFactory : public ::testing::Test {
gko::array<index_type> block_pointers;
};


TEST_F(BatchJacobiFactory, KnowsItsExecutor)
{
auto batch_jacobi_factory = batch_jacobi_prec::build().on(this->exec);

ASSERT_EQ(batch_jacobi_factory->get_executor(), this->exec);
}

Expand Down
6 changes: 2 additions & 4 deletions hip/preconditioner/batch_jacobi_kernels.hip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,14 @@
#include "core/preconditioner/batch_jacobi_kernels.hpp"


#include <hip/base/batch_struct.hpp>
#include <hip/matrix/batch_struct.hpp>


#include <ginkgo/core/base/exception_helpers.hpp>


#include "core/base/batch_struct.hpp"
#include "core/base/utils.hpp"
#include "core/matrix/batch_struct.hpp"
#include "hip/base/batch_struct.hip.hpp"
#include "hip/matrix/batch_struct.hip.hpp"


namespace gko {
Expand Down
69 changes: 30 additions & 39 deletions include/ginkgo/core/preconditioner/batch_jacobi.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,8 @@ struct batched_jacobi_blocks_storage_scheme {
/**
* Returns the offset of the batch with id "batch_id"
*
* @param batch_id the index of the batch entry in the batch
* @param num_blocks number of blocks in an individual matrix entry
* @param batch_id the index of the batch entry in the batch
* @param num_blocks number of blocks in an individual matrix item
* @param block_storage_cumulative the cumulative block storage array
*
* @return the offset of the group belonging to block with ID `block_id`
Expand All @@ -60,9 +60,9 @@ struct batched_jacobi_blocks_storage_scheme {
* Returns the (local) offset of the block with id: "block_id" within its
* batch entry
*
* @param block_id the id of the block from the perspective of individual
* batch entry
* @param blocks_storage_cumulative the cumulative block storage array
* @param block_id the id of the block from the perspective of individual
* batch item
* @param blocks_storage_cumulative the cumulative block storage array
*
* @return the offset of the block with id: `block_id` within its batch
* entry
Expand All @@ -79,9 +79,9 @@ struct batched_jacobi_blocks_storage_scheme {
* with index = batch_id and has local id = "block_id" within its batch
* entry
*
* @param batch_id the index of the batch entry in the batch
* @param num_blocks number of blocks in an individual matrix entry
* @param block_id the id of the block from the perspective of individual
* @param batch_id the index of the batch entry in the batch
* @param num_blocks number of blocks in an individual matrix entry
* @param block_id the id of the block from the perspective of individual
* batch entry
* @param block_storage_cumulative the cumulative block storage array
*
Expand All @@ -102,9 +102,9 @@ struct batched_jacobi_blocks_storage_scheme {
/**
* Returns the stride between the rows of the block.
*
* @param block_idx the id of the block from the perspective of individual
* @param block_idx the id of the block from the perspective of individual
* batch entry
* @param block_ptrs the block pointers array
* @param block_ptrs the block pointers array
*
* @return stride between rows of the block
*/
Expand All @@ -121,9 +121,20 @@ struct batched_jacobi_blocks_storage_scheme {
* by inverting the diagonal blocks (stored in a dense row major fashion) of the
* source operator.
*
* Note: Batched Preconditioners do not support user facing apply.
* With the batched preconditioners, it is required that all items in the batch
* have the same sparsity pattern. The detection of the blocks and the block
* pointers require that the sparsity pattern of all the items be the same.
* Other cases is undefined behaviour. The input batch matrix must be in
* batch::Csr matrix format or must be convertible to batch::Csr matrix format.
* The block detection algorithm and the conversion to dense blocks kernels
* require this assumption.
*
* @tparam ValueType precision of matrix elements
* @note In a fashion similar to the non-batched Jacobi preconditioner, the
* maximum possible size of the diagonal blocks is equal to the maximum warp
* size on the device (32 for NVIDIA GPUs, 64 for AMD GPUs).
*
* @tparam ValueType value precision of matrix elements
* @tparam IndexType index precision of matrix elements
*
* @ingroup jacobi
* @ingroup precond
Expand Down Expand Up @@ -244,13 +255,13 @@ class Jacobi : public EnableBatchLinOp<Jacobi<ValueType, IndexType>> {
}

/**
* Returns the number of elements explicitly stored in the matrix.
* Returns the number of elements explicitly stored in the dense blocks.
*
* @note Returns 0 in case of scalar jacobi preconditioner as the
* preconditioner is generated inside the batched solver kernels, hence,
* blocks array storage is not required.
*
* @return the number of elements explicitly stored in the matrix.
* @return the number of elements explicitly stored in the dense blocks.
*/
size_type get_num_stored_elements() const noexcept
{
Expand All @@ -269,8 +280,12 @@ class Jacobi : public EnableBatchLinOp<Jacobi<ValueType, IndexType>> {
* efficiency, when the max_block_size is set to 1, specialized kernels
* are used and the additional objects (block_ptrs etc) are set to null
* values.
*
* @note Unlike the regular block Jacobi preconditioner, for the batched
* preconditioner, smaller blocks are more efficient, as the matrices
* themselves are considerably smaller.
*/
uint32 GKO_FACTORY_PARAMETER_SCALAR(max_block_size, 32u);
uint32 GKO_FACTORY_PARAMETER_SCALAR(max_block_size, 8u);

/**
* Starting (row / column) indexes of individual blocks.
Expand Down Expand Up @@ -304,11 +319,6 @@ class Jacobi : public EnableBatchLinOp<Jacobi<ValueType, IndexType>> {
GKO_ENABLE_BUILD_METHOD(Factory);

protected:
/**
* Creates an empty Jacobi preconditioner.
*
* @param exec the executor this object is assigned to
*/
explicit Jacobi(std::shared_ptr<const Executor> exec)
: EnableBatchLinOp<Jacobi>(exec),
num_blocks_{},
Expand All @@ -321,13 +331,6 @@ class Jacobi : public EnableBatchLinOp<Jacobi<ValueType, IndexType>> {
parameters_.block_pointers.set_executor(this->get_executor());
}

/**
* Creates a Jacobi preconditioner from a matrix using a Jacobi::Factory.
*
* @param factory the factory to use to create the preconditioner
* @param system_matrix the matrix this preconditioner should be created
* from
*/
explicit Jacobi(const Factory* factory,
std::shared_ptr<const BatchLinOp> system_matrix)
: EnableBatchLinOp<Jacobi>(factory->get_executor(),
Expand All @@ -347,19 +350,10 @@ class Jacobi : public EnableBatchLinOp<Jacobi<ValueType, IndexType>> {
this->generate_precond(system_matrix.get());
}

/**
* Generates the preconditioner.
*
*/
void generate_precond(const BatchLinOp* const system_matrix);

private:
/**
* Computes the storage space required for the requested number of blocks.
*
* @return the total memory (as the number of elements) that need to be
* allocated for the scheme
*
* @note To simplify using the method in situations where the number of
* blocks is not known, for a special input `size_type{} - 1`
* the method returns `0` to avoid overallocation of memory.
Expand All @@ -374,9 +368,6 @@ class Jacobi : public EnableBatchLinOp<Jacobi<ValueType, IndexType>> {
num_blocks_));
}

/**
* Detects the diagonal blocks
*/
void detect_blocks(
const size_type num_batch,
const gko::matrix::Csr<ValueType, IndexType>* system_matrix);
Expand Down
3 changes: 1 addition & 2 deletions reference/preconditioner/batch_block_jacobi.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ namespace host {


/**
* BlockBlockJacobi preconditioner for batch solvers.
* BatchBlockJacobi preconditioner for batch solvers.
*/
template <typename ValueType, typename IndexType>
class BatchBlockJacobi final {
Expand Down Expand Up @@ -57,7 +57,6 @@ class BatchBlockJacobi final {
blocks_arr_batch_{blocks_arr_batch},
block_ptrs_arr_{block_ptrs_arr},
blocks_arr_entry_{}

{}

/**
Expand Down
40 changes: 22 additions & 18 deletions reference/preconditioner/batch_jacobi_kernels.hpp.inc
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ inline void swap_numbers(T& a, T& b)
b = tmp;
}


template <typename ValueType>
inline void swap_rows(const int size, std::vector<ValueType>& mat,
const int row1, const int row2)
Expand All @@ -58,6 +59,7 @@ inline void swap_rows(const int size, std::vector<ValueType>& mat,
}
}


template <typename ValueType>
inline int choose_pivot(const int k, const int block_size,
const std::vector<ValueType>& temp_dense_block)
Expand All @@ -75,43 +77,44 @@ inline int choose_pivot(const int k, const int block_size,
return piv;
}


template <typename ValueType>
inline void invert_dense_block(const int block_size,
std::vector<ValueType>& temp_dense_block,
std::vector<int>& col_perm)
{
// the block is stored in the row-major order, with row stride = block_size

// Gauss-Jordan elimination with pivoting

// the block is stored in the row-major order, with row stride = block_size
for (int k = 0; k < block_size; k++) {
const int piv = choose_pivot(k, block_size, temp_dense_block);
if (k != piv) {
swap_rows(block_size, temp_dense_block, k, piv);
swap_numbers(col_perm[piv], col_perm[k]);
}

const ValueType d = temp_dense_block[k * block_size + k];

// SCAL (scaling the kth col)
// uncoalesced accesses
for (int r = 0; r < block_size; r++) {
temp_dense_block[r * block_size + k] /= -d;
} // uncoalesced accesses
}
temp_dense_block[k * block_size + k] = zero<ValueType>();

// GER (Note: the kth row and kth col do not change)
// data-locality
for (int r = 0; r < block_size; r++) {
for (int c = 0; c < block_size; c++) {
temp_dense_block[r * block_size + c] +=
temp_dense_block[r * block_size + k] *
temp_dense_block[k * block_size + c];
}
} // data-locality
}

// SCAL (scaling the kth row)
// data-locality
for (int c = 0; c < block_size; c++) {
temp_dense_block[k * block_size + c] /= d;
} // data-locality
}
temp_dense_block[k * block_size + k] = one<ValueType>() / d;
}
}
Expand All @@ -131,15 +134,14 @@ inline void compute_block_jacobi_impl(
const auto offset_indiv =
storage_scheme.get_block_offset(block_idx, cumulative_block_storage);
const auto stride = storage_scheme.get_stride(block_idx, block_pointers);
ValueType* dense_block_ptr =
blocks + offset_batch +
offset_indiv; // the blocks are in row-major order
// the blocks are in row-major order
ValueType* dense_block_ptr = blocks + offset_batch + offset_indiv;

const auto bsize =
block_pointers[block_idx + 1] - block_pointers[block_idx];

std::vector<ValueType> temp_dense_block(bsize *
bsize); // also in row-major order
// also in row-major order
std::vector<ValueType> temp_dense_block(bsize * bsize);
std::vector<int> col_perm(bsize);
for (int i = 0; i < bsize; i++) {
col_perm[i] = i;
Expand All @@ -148,16 +150,18 @@ inline void compute_block_jacobi_impl(
for (int row = 0; row < bsize; row++) {
for (int col = 0; col < bsize; col++) {
auto to_write = zero<ValueType>();
// data locality
const auto idx =
blocks_pattern[offset_indiv +
row * storage_scheme.get_stride(block_idx,
block_pointers) +
col]; // data locality
col];
if (idx >= 0) {
assert(idx < A_entry.get_single_item_num_nnz());
to_write = A_entry.values[idx];
}
temp_dense_block[row * bsize + col] = to_write; // data locality
// data locality
temp_dense_block[row * bsize + col] = to_write;
}
}

Expand Down Expand Up @@ -190,10 +194,10 @@ void batch_jacobi_apply_helper(
const auto rub = gko::kernels::host::get_batch_struct(r);
const auto zub = gko::kernels::host::get_batch_struct(z);

using scalar_jacobi_prec =
gko::kernels::host::BatchScalarJacobi<ValueType, int>;
using block_jacobi_prec =
gko::kernels::host::BatchBlockJacobi<ValueType, int>;
using scalar_jacobi_prec = gko::kernels::host::BatchScalarJacobi<
ValueType, typename BatchMatrixType::index_type>;
using block_jacobi_prec = gko::kernels::host::BatchBlockJacobi<
ValueType, typename BatchMatrixType::index_type>;

if (max_block_size == 1) {
scalar_jacobi_prec prec;
Expand Down
Loading

0 comments on commit 612b423

Please sign in to comment.