19
19
#include " in_reg_array.cuh"
20
20
21
21
#include < cub/cub.cuh>
22
+ #include < cuda/functional>
22
23
23
24
#include < cstdint>
24
25
@@ -213,7 +214,7 @@ struct DispatchFSM : DeviceFSMPolicy {
213
214
214
215
// Kernel invocation
215
216
uint32_t grid_size = std::max (
216
- 1u , CUB_QUOTIENT_CEILING (num_chars, PolicyT::BLOCK_THREADS * PolicyT::ITEMS_PER_THREAD));
217
+ 1u , cuda::ceil_div< uint32_t > (num_chars, PolicyT::BLOCK_THREADS * PolicyT::ITEMS_PER_THREAD));
217
218
218
219
dfa_kernel<<<grid_size, PolicyT::BLOCK_THREADS, 0 , stream>>> (dfa,
219
220
d_chars_in,
@@ -349,8 +350,9 @@ struct DispatchFSM : DeviceFSMPolicy {
349
350
NUM_SYMBOLS_PER_BLOCK = BLOCK_THREADS * SYMBOLS_PER_THREAD
350
351
};
351
352
352
- BlockOffsetT num_blocks = std::max (1u , CUB_QUOTIENT_CEILING (num_chars, NUM_SYMBOLS_PER_BLOCK));
353
- size_t num_threads = num_blocks * BLOCK_THREADS;
353
+ BlockOffsetT num_blocks =
354
+ std::max<uint32_t >(1u , cuda::ceil_div<uint32_t >(num_chars, NUM_SYMBOLS_PER_BLOCK));
355
+ size_t num_threads = num_blocks * BLOCK_THREADS;
354
356
355
357
// ------------------------------------------------------------------------------
356
358
// TEMPORARY MEMORY REQUIREMENTS
@@ -416,7 +418,7 @@ struct DispatchFSM : DeviceFSMPolicy {
416
418
num_blocks, allocations[MEM_FST_OFFSET], allocation_sizes[MEM_FST_OFFSET]);
417
419
if (error != cudaSuccess) return error;
418
420
constexpr uint32_t FST_INIT_TPB = 256 ;
419
- uint32_t num_fst_init_blocks = CUB_QUOTIENT_CEILING (num_blocks, FST_INIT_TPB);
421
+ uint32_t num_fst_init_blocks = cuda::ceil_div (num_blocks, FST_INIT_TPB);
420
422
initialization_pass_kernel<<<num_fst_init_blocks, FST_INIT_TPB, 0 , stream>>> (
421
423
fst_offset_tile_state, num_blocks);
422
424
}
@@ -431,7 +433,7 @@ struct DispatchFSM : DeviceFSMPolicy {
431
433
num_blocks, allocations[MEM_SINGLE_PASS_STV], allocation_sizes[MEM_SINGLE_PASS_STV]);
432
434
if (error != cudaSuccess) return error;
433
435
constexpr uint32_t STV_INIT_TPB = 256 ;
434
- uint32_t num_stv_init_blocks = CUB_QUOTIENT_CEILING (num_blocks, STV_INIT_TPB);
436
+ uint32_t num_stv_init_blocks = cuda::ceil_div (num_blocks, STV_INIT_TPB);
435
437
initialization_pass_kernel<<<num_stv_init_blocks, STV_INIT_TPB, 0 , stream>>> (stv_tile_state,
436
438
num_blocks);
437
439
} else {
0 commit comments