Skip to content

Commit

Permalink
Revert "Drop pre CUDA 11 macro guards in occupancy calculation"
Browse files Browse the repository at this point in the history
This reverts commit d34c751.
  • Loading branch information
dalg24 committed Jan 6, 2023
1 parent 1fd8589 commit d4bd012
Showing 1 changed file with 17 additions and 0 deletions.
17 changes: 17 additions & 0 deletions core/src/Cuda/Kokkos_Cuda_BlockSize_Deduction.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,24 @@ inline int cuda_max_active_blocks_per_sm(cudaDeviceProp const& properties,
: max_blocks_regs);

// Limits due to blocks/SM
#if CUDA_VERSION >= 11000
int const max_blocks_per_sm = properties.maxBlocksPerMultiProcessor;
#else
int const max_blocks_per_sm = [&properties]() {
switch (properties.major) {
case 3: return 16;
case 5:
case 6: return 32;
case 7: {
int isTuring = properties.minor == 5;
return (isTuring) ? 16 : 32;
}
default:
throw_runtime_exception("Unknown device in cuda block size deduction");
return 0;
}
}();
#endif

// Overall occupancy in blocks
return std::min({max_blocks_regs, max_blocks_shmem, max_blocks_per_sm});
Expand Down

0 comments on commit d4bd012

Please sign in to comment.