forked from facebookresearch/xformers
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add specialized kernel for cutlassB / K<96
ghstack-source-id: fe298b6c7f897aff850298e301a07289e019e7b1 Pull Request resolved: https://github.com/fairinternal/xformers/pull/455 __original_commit__ = fairinternal/xformers@81e708e30a9b4dc44c1b1c19d2cfde5989f52221
- Loading branch information
danthe3rd
authored and
xFormers Bot
committed
Feb 2, 2023
1 parent
615175f
commit 82d5881
Showing
6 changed files
with
140 additions
and
65 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
24 changes: 24 additions & 0 deletions
24
xformers/csrc/attention/cuda/fmha/kernels/cutlassB_bf16_aligned_k96.cu
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
#ifndef XFORMERS_MEM_EFF_ATTENTION_DISABLE_BACKWARD | ||
// This file is auto-generated. See "generate_kernels.py" | ||
#include "../kernel_backward.h" | ||
|
||
__global__ void __launch_bounds__( | ||
AttentionBackwardKernel<cutlass::arch::Sm80, cutlass::bfloat16_t, true, false, true, 128, 64, 96>::kNumThreads, | ||
AttentionBackwardKernel<cutlass::arch::Sm80, cutlass::bfloat16_t, true, false, true, 128, 64, 96>::kMinBlocksPerSm) | ||
fmha_cutlassB_bf16_aligned_128x64_k96_sm80(typename AttentionBackwardKernel<cutlass::arch::Sm80, cutlass::bfloat16_t, true, false, true, 128, 64, 96>::Params p) { | ||
#ifdef __CUDA_ARCH__ | ||
#if __CUDA_ARCH__ >= 800 | ||
#if __CUDA_ARCH__ < 900 | ||
if (!p.advance_to_block()) { | ||
return; | ||
} | ||
AttentionBackwardKernel<cutlass::arch::Sm80, cutlass::bfloat16_t, true, false, true, 128, 64, 96>::attention_kernel(p); | ||
return; | ||
#endif | ||
#endif | ||
printf( | ||
"FATAL: kernel `fmha_cutlassB_bf16_aligned_128x64_k96_sm80` is for sm80-sm90, but was built for sm%d\n", | ||
int(__CUDA_ARCH__ + 0) / 10); | ||
#endif | ||
} | ||
#endif // XFORMERS_MEM_EFF_ATTENTION_DISABLE_BACKWARD |
24 changes: 24 additions & 0 deletions
24
xformers/csrc/attention/cuda/fmha/kernels/cutlassB_f16_aligned_k96.cu
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
#ifndef XFORMERS_MEM_EFF_ATTENTION_DISABLE_BACKWARD | ||
// This file is auto-generated. See "generate_kernels.py" | ||
#include "../kernel_backward.h" | ||
|
||
__global__ void __launch_bounds__( | ||
AttentionBackwardKernel<cutlass::arch::Sm80, cutlass::half_t, true, false, true, 128, 64, 96>::kNumThreads, | ||
AttentionBackwardKernel<cutlass::arch::Sm80, cutlass::half_t, true, false, true, 128, 64, 96>::kMinBlocksPerSm) | ||
fmha_cutlassB_f16_aligned_128x64_k96_sm80(typename AttentionBackwardKernel<cutlass::arch::Sm80, cutlass::half_t, true, false, true, 128, 64, 96>::Params p) { | ||
#ifdef __CUDA_ARCH__ | ||
#if __CUDA_ARCH__ >= 800 | ||
#if __CUDA_ARCH__ < 900 | ||
if (!p.advance_to_block()) { | ||
return; | ||
} | ||
AttentionBackwardKernel<cutlass::arch::Sm80, cutlass::half_t, true, false, true, 128, 64, 96>::attention_kernel(p); | ||
return; | ||
#endif | ||
#endif | ||
printf( | ||
"FATAL: kernel `fmha_cutlassB_f16_aligned_128x64_k96_sm80` is for sm80-sm90, but was built for sm%d\n", | ||
int(__CUDA_ARCH__ + 0) / 10); | ||
#endif | ||
} | ||
#endif // XFORMERS_MEM_EFF_ATTENTION_DISABLE_BACKWARD |
Oops, something went wrong.