Format tests and fix docs

ginkgo-project · Feb 14, 2024 · 612b423 · 612b423
1 parent e5775f2
commit 612b423
Show file tree

Hide file tree

Showing 8 changed files with 186 additions and 203 deletions.
diff --git a/core/preconditioner/batch_jacobi.cpp b/core/preconditioner/batch_jacobi.cpp
@@ -107,7 +107,8 @@ void Jacobi<ValueType, IndexType>::generate_precond(
 
     // Note: Row-major order offers advantage in terms of
     // performance in both preconditioner generation and application for both
-    // reference and cuda backend. Note: The pattern blocks in block_pattern are
+    // reference and cuda backend.
+    // Note: The pattern blocks in block_pattern are
     // also stored in a similar way.
 
     // array for storing the common pattern of the diagonal blocks

diff --git a/core/test/preconditioner/batch_jacobi.cpp b/core/test/preconditioner/batch_jacobi.cpp
@@ -38,9 +38,11 @@ class BatchJacobiFactory : public ::testing::Test {
     gko::array<index_type> block_pointers;
 };
 
+
 TEST_F(BatchJacobiFactory, KnowsItsExecutor)
 {
     auto batch_jacobi_factory = batch_jacobi_prec::build().on(this->exec);
+
     ASSERT_EQ(batch_jacobi_factory->get_executor(), this->exec);
 }
 

diff --git a/hip/preconditioner/batch_jacobi_kernels.hip.cpp b/hip/preconditioner/batch_jacobi_kernels.hip.cpp
@@ -5,16 +5,14 @@
 #include "core/preconditioner/batch_jacobi_kernels.hpp"
 
 
-#include <hip/base/batch_struct.hpp>
-#include <hip/matrix/batch_struct.hpp>
-
-
 #include <ginkgo/core/base/exception_helpers.hpp>
 
 
 #include "core/base/batch_struct.hpp"
 #include "core/base/utils.hpp"
 #include "core/matrix/batch_struct.hpp"
+#include "hip/base/batch_struct.hip.hpp"
+#include "hip/matrix/batch_struct.hip.hpp"
 
 
 namespace gko {

diff --git a/include/ginkgo/core/preconditioner/batch_jacobi.hpp b/include/ginkgo/core/preconditioner/batch_jacobi.hpp
@@ -43,8 +43,8 @@ struct batched_jacobi_blocks_storage_scheme {
     /**
      * Returns the offset of the batch with id "batch_id"
      *
-     * @param batch_id the index of the batch entry in the batch
-     * @param num_blocks  number of blocks in an individual matrix entry
+     * @param batch_id  the index of the batch entry in the batch
+     * @param num_blocks  number of blocks in an individual matrix item
      * @param block_storage_cumulative  the cumulative block storage array
      *
      * @return the offset of the group belonging to block with ID `block_id`
@@ -60,9 +60,9 @@ struct batched_jacobi_blocks_storage_scheme {
      * Returns the (local) offset of the block with id: "block_id" within its
      * batch entry
      *
-     * @param block_id the id of the block from the perspective of individual
-     * batch entry
-     * @param blocks_storage_cumulative the cumulative block storage array
+     * @param block_id  the id of the block from the perspective of individual
+     *                  batch item
+     * @param blocks_storage_cumulative  the cumulative block storage array
      *
      * @return the offset of the block with id: `block_id` within its batch
      * entry
@@ -79,9 +79,9 @@ struct batched_jacobi_blocks_storage_scheme {
      * with index = batch_id and has local id = "block_id" within its batch
      * entry
      *
-     * @param batch_id the index of the batch entry in the batch
-     * @param num_blocks number of blocks in an individual matrix entry
-     * @param block_id the id of the block from the perspective of individual
+     * @param batch_id  the index of the batch entry in the batch
+     * @param num_blocks  number of blocks in an individual matrix entry
+     * @param block_id  the id of the block from the perspective of individual
      * batch entry
      * @param block_storage_cumulative  the cumulative block storage array
      *
@@ -102,9 +102,9 @@ struct batched_jacobi_blocks_storage_scheme {
     /**
      * Returns the stride between the rows of the block.
      *
-     * @param block_idx the id of the block from the perspective of individual
+     * @param block_idx  the id of the block from the perspective of individual
      * batch entry
-     * @param block_ptrs the block pointers array
+     * @param block_ptrs  the block pointers array
      *
      * @return stride between rows of the block
      */
@@ -121,9 +121,20 @@ struct batched_jacobi_blocks_storage_scheme {
  * by inverting the diagonal blocks (stored in a dense row major fashion) of the
  * source operator.
  *
- * Note: Batched Preconditioners do not support user facing apply.
+ * With the batched preconditioners, it is required that all items in the batch
+ * have the same sparsity pattern. The detection of the blocks and the block
+ * pointers require that the sparsity pattern of all the items be the same.
+ * Other cases is undefined behaviour. The input batch matrix must be in
+ * batch::Csr matrix format or must be convertible to batch::Csr matrix format.
+ * The block detection algorithm and the conversion to dense blocks kernels
+ * require this assumption.
  *
- * @tparam ValueType  precision of matrix elements
+ * @note In a fashion similar to the non-batched Jacobi preconditioner, the
+ * maximum possible size of the diagonal blocks is equal to the maximum warp
+ * size on the device (32 for NVIDIA GPUs, 64 for AMD GPUs).
+ *
+ * @tparam ValueType  value precision of matrix elements
+ * @tparam IndexType  index precision of matrix elements
  *
  * @ingroup jacobi
  * @ingroup precond
@@ -244,13 +255,13 @@ class Jacobi : public EnableBatchLinOp<Jacobi<ValueType, IndexType>> {
     }
 
     /**
-     * Returns the number of elements explicitly stored in the matrix.
+     * Returns the number of elements explicitly stored in the dense blocks.
      *
      * @note Returns 0 in case of scalar jacobi preconditioner as the
      * preconditioner is generated inside the batched solver kernels, hence,
      * blocks array storage is not required.
      *
-     * @return the number of elements explicitly stored in the matrix.
+     * @return the number of elements explicitly stored in the dense blocks.
      */
     size_type get_num_stored_elements() const noexcept
     {
@@ -269,8 +280,12 @@ class Jacobi : public EnableBatchLinOp<Jacobi<ValueType, IndexType>> {
          * efficiency, when the max_block_size is set to 1, specialized kernels
          * are used and the additional objects (block_ptrs etc) are set to null
          * values.
+         *
+         * @note Unlike the regular block Jacobi preconditioner, for the batched
+         * preconditioner, smaller blocks are more efficient, as the matrices
+         * themselves are considerably smaller.
          */
-        uint32 GKO_FACTORY_PARAMETER_SCALAR(max_block_size, 32u);
+        uint32 GKO_FACTORY_PARAMETER_SCALAR(max_block_size, 8u);
 
         /**
          * Starting (row / column) indexes of individual blocks.
@@ -304,11 +319,6 @@ class Jacobi : public EnableBatchLinOp<Jacobi<ValueType, IndexType>> {
     GKO_ENABLE_BUILD_METHOD(Factory);
 
 protected:
-    /**
-     * Creates an empty Jacobi preconditioner.
-     *
-     * @param exec  the executor this object is assigned to
-     */
     explicit Jacobi(std::shared_ptr<const Executor> exec)
         : EnableBatchLinOp<Jacobi>(exec),
           num_blocks_{},
@@ -321,13 +331,6 @@ class Jacobi : public EnableBatchLinOp<Jacobi<ValueType, IndexType>> {
         parameters_.block_pointers.set_executor(this->get_executor());
     }
 
-    /**
-     * Creates a Jacobi preconditioner from a matrix using a Jacobi::Factory.
-     *
-     * @param factory  the factory to use to create the preconditioner
-     * @param system_matrix  the matrix this preconditioner should be created
-     *                       from
-     */
     explicit Jacobi(const Factory* factory,
                     std::shared_ptr<const BatchLinOp> system_matrix)
         : EnableBatchLinOp<Jacobi>(factory->get_executor(),
@@ -347,19 +350,10 @@ class Jacobi : public EnableBatchLinOp<Jacobi<ValueType, IndexType>> {
         this->generate_precond(system_matrix.get());
     }
 
-    /**
-     * Generates the preconditioner.
-     *
-     */
     void generate_precond(const BatchLinOp* const system_matrix);
 
 private:
     /**
-     * Computes the storage space required for the requested number of blocks.
-     *
-     * @return the total memory (as the number of elements) that need to be
-     *         allocated for the scheme
-     *
      * @note  To simplify using the method in situations where the number of
      *        blocks is not known, for a special input `size_type{} - 1`
      *        the method returns `0` to avoid overallocation of memory.
@@ -374,9 +368,6 @@ class Jacobi : public EnableBatchLinOp<Jacobi<ValueType, IndexType>> {
                              num_blocks_));
     }
 
-    /**
-     * Detects the diagonal blocks
-     */
     void detect_blocks(
         const size_type num_batch,
         const gko::matrix::Csr<ValueType, IndexType>* system_matrix);

diff --git a/reference/preconditioner/batch_block_jacobi.hpp b/reference/preconditioner/batch_block_jacobi.hpp
@@ -19,7 +19,7 @@ namespace host {
 
 
 /**
- * BlockBlockJacobi preconditioner for batch solvers.
+ * BatchBlockJacobi preconditioner for batch solvers.
  */
 template <typename ValueType, typename IndexType>
 class BatchBlockJacobi final {
@@ -57,7 +57,6 @@ class BatchBlockJacobi final {
           blocks_arr_batch_{blocks_arr_batch},
           block_ptrs_arr_{block_ptrs_arr},
           blocks_arr_entry_{}
-
     {}
 
     /**

diff --git a/reference/preconditioner/batch_jacobi_kernels.hpp.inc b/reference/preconditioner/batch_jacobi_kernels.hpp.inc
@@ -49,6 +49,7 @@ inline void swap_numbers(T& a, T& b)
     b = tmp;
 }
 
+
 template <typename ValueType>
 inline void swap_rows(const int size, std::vector<ValueType>& mat,
                       const int row1, const int row2)
@@ -58,6 +59,7 @@ inline void swap_rows(const int size, std::vector<ValueType>& mat,
     }
 }
 
+
 template <typename ValueType>
 inline int choose_pivot(const int k, const int block_size,
                         const std::vector<ValueType>& temp_dense_block)
@@ -75,43 +77,44 @@ inline int choose_pivot(const int k, const int block_size,
     return piv;
 }
 
+
 template <typename ValueType>
 inline void invert_dense_block(const int block_size,
                                std::vector<ValueType>& temp_dense_block,
                                std::vector<int>& col_perm)
 {
-    // the block is stored in the row-major order, with row stride = block_size
-
     // Gauss-Jordan elimination with pivoting
-
+    // the block is stored in the row-major order, with row stride = block_size
     for (int k = 0; k < block_size; k++) {
         const int piv = choose_pivot(k, block_size, temp_dense_block);
         if (k != piv) {
             swap_rows(block_size, temp_dense_block, k, piv);
             swap_numbers(col_perm[piv], col_perm[k]);
         }
-
         const ValueType d = temp_dense_block[k * block_size + k];
 
         // SCAL (scaling the kth col)
+        // uncoalesced accesses
         for (int r = 0; r < block_size; r++) {
             temp_dense_block[r * block_size + k] /= -d;
-        }  // uncoalesced accesses
+        }
         temp_dense_block[k * block_size + k] = zero<ValueType>();
 
         // GER (Note: the kth row and kth col do not change)
+        // data-locality
         for (int r = 0; r < block_size; r++) {
             for (int c = 0; c < block_size; c++) {
                 temp_dense_block[r * block_size + c] +=
                     temp_dense_block[r * block_size + k] *
                     temp_dense_block[k * block_size + c];
             }
-        }  // data-locality
+        }
 
         // SCAL (scaling the kth row)
+        // data-locality
         for (int c = 0; c < block_size; c++) {
             temp_dense_block[k * block_size + c] /= d;
-        }  // data-locality
+        }
         temp_dense_block[k * block_size + k] = one<ValueType>() / d;
     }
 }
@@ -131,15 +134,14 @@ inline void compute_block_jacobi_impl(
     const auto offset_indiv =
         storage_scheme.get_block_offset(block_idx, cumulative_block_storage);
     const auto stride = storage_scheme.get_stride(block_idx, block_pointers);
-    ValueType* dense_block_ptr =
-        blocks + offset_batch +
-        offset_indiv;  // the blocks are in row-major order
+    // the blocks are in row-major order
+    ValueType* dense_block_ptr = blocks + offset_batch + offset_indiv;
 
     const auto bsize =
         block_pointers[block_idx + 1] - block_pointers[block_idx];
 
-    std::vector<ValueType> temp_dense_block(bsize *
-                                            bsize);  // also in row-major order
+    // also in row-major order
+    std::vector<ValueType> temp_dense_block(bsize * bsize);
     std::vector<int> col_perm(bsize);
     for (int i = 0; i < bsize; i++) {
         col_perm[i] = i;
@@ -148,16 +150,18 @@ inline void compute_block_jacobi_impl(
     for (int row = 0; row < bsize; row++) {
         for (int col = 0; col < bsize; col++) {
             auto to_write = zero<ValueType>();
+            // data locality
             const auto idx =
                 blocks_pattern[offset_indiv +
                                row * storage_scheme.get_stride(block_idx,
                                                                block_pointers) +
-                               col];  // data locality
+                               col];
             if (idx >= 0) {
                 assert(idx < A_entry.get_single_item_num_nnz());
                 to_write = A_entry.values[idx];
             }
-            temp_dense_block[row * bsize + col] = to_write;  // data locality
+            // data locality
+            temp_dense_block[row * bsize + col] = to_write;
         }
     }
 
@@ -190,10 +194,10 @@ void batch_jacobi_apply_helper(
     const auto rub = gko::kernels::host::get_batch_struct(r);
     const auto zub = gko::kernels::host::get_batch_struct(z);
 
-    using scalar_jacobi_prec =
-        gko::kernels::host::BatchScalarJacobi<ValueType, int>;
-    using block_jacobi_prec =
-        gko::kernels::host::BatchBlockJacobi<ValueType, int>;
+    using scalar_jacobi_prec = gko::kernels::host::BatchScalarJacobi<
+        ValueType, typename BatchMatrixType::index_type>;
+    using block_jacobi_prec = gko::kernels::host::BatchBlockJacobi<
+        ValueType, typename BatchMatrixType::index_type>;
 
     if (max_block_size == 1) {
         scalar_jacobi_prec prec;