diff --git a/.github/workflows/linters_reusable.yml b/.github/workflows/linters_reusable.yml index a1209a3a40..6a2cf4803b 100644 --- a/.github/workflows/linters_reusable.yml +++ b/.github/workflows/linters_reusable.yml @@ -46,4 +46,4 @@ jobs: clang-format --version # apply to our files - excluding autogenerated files - ./.circleci/run-clang-format.py -e "*fmha/kernels" -r xformers/csrc + ./.circleci/run-clang-format.py -e "*fmha/autogen" -r xformers/csrc diff --git a/xformers/csrc/attention/cuda/fmha/attention_backward_generic.cu b/xformers/csrc/attention/cuda/fmha/attention_backward_generic.cu index 7ef097113b..9b7ff47ffe 100644 --- a/xformers/csrc/attention/cuda/fmha/attention_backward_generic.cu +++ b/xformers/csrc/attention/cuda/fmha/attention_backward_generic.cu @@ -10,9 +10,9 @@ #include #include "ATen/ops/empty_like.h" +#include "autogen/cutlassB.h" #include "gemm_kernel_utils.h" #include "kernel_backward.h" -#include "kernels/cutlassB.h" #include "pytorch_utils.h" namespace { diff --git a/xformers/csrc/attention/cuda/fmha/attention_forward_generic.cu b/xformers/csrc/attention/cuda/fmha/attention_forward_generic.cu index d5fb4927e3..e61c35fac2 100644 --- a/xformers/csrc/attention/cuda/fmha/attention_forward_generic.cu +++ b/xformers/csrc/attention/cuda/fmha/attention_forward_generic.cu @@ -12,8 +12,8 @@ #include #include +#include "autogen/cutlassF.h" #include "kernel_forward.h" -#include "kernels/cutlassF.h" #include "pytorch_utils.h" namespace { diff --git a/xformers/csrc/attention/cuda/fmha/kernels/cutlassB.h b/xformers/csrc/attention/cuda/fmha/autogen/cutlassB.h similarity index 99% rename from xformers/csrc/attention/cuda/fmha/kernels/cutlassB.h rename to xformers/csrc/attention/cuda/fmha/autogen/cutlassB.h index 06159a973e..d1d84fa4e5 100644 --- a/xformers/csrc/attention/cuda/fmha/kernels/cutlassB.h +++ b/xformers/csrc/attention/cuda/fmha/autogen/cutlassB.h @@ -1,8 +1,7 @@ // This file is auto-generated. See "generate_kernels.py" -#include "../kernel_backward.h" - #pragma once #ifndef XFORMERS_MEM_EFF_ATTENTION_DISABLE_BACKWARD +#include "../kernel_backward.h" // ======== f16 / sm50 ======== __global__ void __launch_bounds__( AttentionBackwardKernel::kNumThreads, diff --git a/xformers/csrc/attention/cuda/fmha/kernels/cutlassF.h b/xformers/csrc/attention/cuda/fmha/autogen/cutlassF.h similarity index 99% rename from xformers/csrc/attention/cuda/fmha/kernels/cutlassF.h rename to xformers/csrc/attention/cuda/fmha/autogen/cutlassF.h index 63c87d06e1..7d9eea298a 100644 --- a/xformers/csrc/attention/cuda/fmha/kernels/cutlassF.h +++ b/xformers/csrc/attention/cuda/fmha/autogen/cutlassF.h @@ -1,8 +1,7 @@ // This file is auto-generated. See "generate_kernels.py" -#include "../kernel_forward.h" - #pragma once #ifndef XFORMERS_MEM_EFF_ATTENTION_DISABLE_FORWARD +#include "../kernel_forward.h" // ======== bf16 / sm80 ======== __global__ void __launch_bounds__( AttentionKernel::kNumThreads, diff --git a/xformers/csrc/attention/cuda/fmha/kernels/cutlassB_bf16_aligned_k128.cu b/xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_bf16_aligned_k128.cu similarity index 98% rename from xformers/csrc/attention/cuda/fmha/kernels/cutlassB_bf16_aligned_k128.cu rename to xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_bf16_aligned_k128.cu index ab9b7e3de3..b6e3a248c6 100644 --- a/xformers/csrc/attention/cuda/fmha/kernels/cutlassB_bf16_aligned_k128.cu +++ b/xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_bf16_aligned_k128.cu @@ -1,7 +1,6 @@ -#ifndef XFORMERS_MEM_EFF_ATTENTION_DISABLE_BACKWARD // This file is auto-generated. See "generate_kernels.py" -#include "../kernel_backward.h" - +#ifndef XFORMERS_MEM_EFF_ATTENTION_DISABLE_BACKWARD +#include "../../kernel_backward.h" __global__ void __launch_bounds__( AttentionBackwardKernel::kNumThreads, AttentionBackwardKernel::kMinBlocksPerSm) diff --git a/xformers/csrc/attention/cuda/fmha/kernels/cutlassB_bf16_aligned_k128_dropout.cu b/xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_bf16_aligned_k128_dropout.cu similarity index 98% rename from xformers/csrc/attention/cuda/fmha/kernels/cutlassB_bf16_aligned_k128_dropout.cu rename to xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_bf16_aligned_k128_dropout.cu index 6e68ee3e3f..92d14d2668 100644 --- a/xformers/csrc/attention/cuda/fmha/kernels/cutlassB_bf16_aligned_k128_dropout.cu +++ b/xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_bf16_aligned_k128_dropout.cu @@ -1,7 +1,6 @@ -#ifndef XFORMERS_MEM_EFF_ATTENTION_DISABLE_BACKWARD // This file is auto-generated. See "generate_kernels.py" -#include "../kernel_backward.h" - +#ifndef XFORMERS_MEM_EFF_ATTENTION_DISABLE_BACKWARD +#include "../../kernel_backward.h" __global__ void __launch_bounds__( AttentionBackwardKernel::kNumThreads, AttentionBackwardKernel::kMinBlocksPerSm) diff --git a/xformers/csrc/attention/cuda/fmha/kernels/cutlassB_bf16_aligned_k32.cu b/xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_bf16_aligned_k32.cu similarity index 96% rename from xformers/csrc/attention/cuda/fmha/kernels/cutlassB_bf16_aligned_k32.cu rename to xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_bf16_aligned_k32.cu index 2bffac489d..224c085cd8 100644 --- a/xformers/csrc/attention/cuda/fmha/kernels/cutlassB_bf16_aligned_k32.cu +++ b/xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_bf16_aligned_k32.cu @@ -1,7 +1,6 @@ -#ifndef XFORMERS_MEM_EFF_ATTENTION_DISABLE_BACKWARD // This file is auto-generated. See "generate_kernels.py" -#include "../kernel_backward.h" - +#ifndef XFORMERS_MEM_EFF_ATTENTION_DISABLE_BACKWARD +#include "../../kernel_backward.h" __global__ void __launch_bounds__( AttentionBackwardKernel::kNumThreads, AttentionBackwardKernel::kMinBlocksPerSm) diff --git a/xformers/csrc/attention/cuda/fmha/kernels/cutlassB_bf16_aligned_k32_dropout.cu b/xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_bf16_aligned_k32_dropout.cu similarity index 96% rename from xformers/csrc/attention/cuda/fmha/kernels/cutlassB_bf16_aligned_k32_dropout.cu rename to xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_bf16_aligned_k32_dropout.cu index b6687e0714..36fbbf4951 100644 --- a/xformers/csrc/attention/cuda/fmha/kernels/cutlassB_bf16_aligned_k32_dropout.cu +++ b/xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_bf16_aligned_k32_dropout.cu @@ -1,7 +1,6 @@ -#ifndef XFORMERS_MEM_EFF_ATTENTION_DISABLE_BACKWARD // This file is auto-generated. See "generate_kernels.py" -#include "../kernel_backward.h" - +#ifndef XFORMERS_MEM_EFF_ATTENTION_DISABLE_BACKWARD +#include "../../kernel_backward.h" __global__ void __launch_bounds__( AttentionBackwardKernel::kNumThreads, AttentionBackwardKernel::kMinBlocksPerSm) diff --git a/xformers/csrc/attention/cuda/fmha/kernels/cutlassB_bf16_aligned_k64.cu b/xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_bf16_aligned_k64.cu similarity index 96% rename from xformers/csrc/attention/cuda/fmha/kernels/cutlassB_bf16_aligned_k64.cu rename to xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_bf16_aligned_k64.cu index 4b38ad4ebf..7558851608 100644 --- a/xformers/csrc/attention/cuda/fmha/kernels/cutlassB_bf16_aligned_k64.cu +++ b/xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_bf16_aligned_k64.cu @@ -1,7 +1,6 @@ -#ifndef XFORMERS_MEM_EFF_ATTENTION_DISABLE_BACKWARD // This file is auto-generated. See "generate_kernels.py" -#include "../kernel_backward.h" - +#ifndef XFORMERS_MEM_EFF_ATTENTION_DISABLE_BACKWARD +#include "../../kernel_backward.h" __global__ void __launch_bounds__( AttentionBackwardKernel::kNumThreads, AttentionBackwardKernel::kMinBlocksPerSm) diff --git a/xformers/csrc/attention/cuda/fmha/kernels/cutlassB_bf16_aligned_k64_dropout.cu b/xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_bf16_aligned_k64_dropout.cu similarity index 96% rename from xformers/csrc/attention/cuda/fmha/kernels/cutlassB_bf16_aligned_k64_dropout.cu rename to xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_bf16_aligned_k64_dropout.cu index f3114dd6b9..8b5f5a4cf8 100644 --- a/xformers/csrc/attention/cuda/fmha/kernels/cutlassB_bf16_aligned_k64_dropout.cu +++ b/xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_bf16_aligned_k64_dropout.cu @@ -1,7 +1,6 @@ -#ifndef XFORMERS_MEM_EFF_ATTENTION_DISABLE_BACKWARD // This file is auto-generated. See "generate_kernels.py" -#include "../kernel_backward.h" - +#ifndef XFORMERS_MEM_EFF_ATTENTION_DISABLE_BACKWARD +#include "../../kernel_backward.h" __global__ void __launch_bounds__( AttentionBackwardKernel::kNumThreads, AttentionBackwardKernel::kMinBlocksPerSm) diff --git a/xformers/csrc/attention/cuda/fmha/kernels/cutlassB_bf16_aligned_k65536.cu b/xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_bf16_aligned_k65536.cu similarity index 98% rename from xformers/csrc/attention/cuda/fmha/kernels/cutlassB_bf16_aligned_k65536.cu rename to xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_bf16_aligned_k65536.cu index 2d3fca1693..eabcae5405 100644 --- a/xformers/csrc/attention/cuda/fmha/kernels/cutlassB_bf16_aligned_k65536.cu +++ b/xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_bf16_aligned_k65536.cu @@ -1,7 +1,6 @@ -#ifndef XFORMERS_MEM_EFF_ATTENTION_DISABLE_BACKWARD // This file is auto-generated. See "generate_kernels.py" -#include "../kernel_backward.h" - +#ifndef XFORMERS_MEM_EFF_ATTENTION_DISABLE_BACKWARD +#include "../../kernel_backward.h" __global__ void __launch_bounds__( AttentionBackwardKernel::kNumThreads, AttentionBackwardKernel::kMinBlocksPerSm) diff --git a/xformers/csrc/attention/cuda/fmha/kernels/cutlassB_bf16_aligned_k65536_dropout.cu b/xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_bf16_aligned_k65536_dropout.cu similarity index 98% rename from xformers/csrc/attention/cuda/fmha/kernels/cutlassB_bf16_aligned_k65536_dropout.cu rename to xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_bf16_aligned_k65536_dropout.cu index 72f21f5c20..829bb827d8 100644 --- a/xformers/csrc/attention/cuda/fmha/kernels/cutlassB_bf16_aligned_k65536_dropout.cu +++ b/xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_bf16_aligned_k65536_dropout.cu @@ -1,7 +1,6 @@ -#ifndef XFORMERS_MEM_EFF_ATTENTION_DISABLE_BACKWARD // This file is auto-generated. See "generate_kernels.py" -#include "../kernel_backward.h" - +#ifndef XFORMERS_MEM_EFF_ATTENTION_DISABLE_BACKWARD +#include "../../kernel_backward.h" __global__ void __launch_bounds__( AttentionBackwardKernel::kNumThreads, AttentionBackwardKernel::kMinBlocksPerSm) diff --git a/xformers/csrc/attention/cuda/fmha/kernels/cutlassB_bf16_aligned_k96.cu b/xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_bf16_aligned_k96.cu similarity index 96% rename from xformers/csrc/attention/cuda/fmha/kernels/cutlassB_bf16_aligned_k96.cu rename to xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_bf16_aligned_k96.cu index 48376625bd..6c33c93f9a 100644 --- a/xformers/csrc/attention/cuda/fmha/kernels/cutlassB_bf16_aligned_k96.cu +++ b/xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_bf16_aligned_k96.cu @@ -1,7 +1,6 @@ -#ifndef XFORMERS_MEM_EFF_ATTENTION_DISABLE_BACKWARD // This file is auto-generated. See "generate_kernels.py" -#include "../kernel_backward.h" - +#ifndef XFORMERS_MEM_EFF_ATTENTION_DISABLE_BACKWARD +#include "../../kernel_backward.h" __global__ void __launch_bounds__( AttentionBackwardKernel::kNumThreads, AttentionBackwardKernel::kMinBlocksPerSm) diff --git a/xformers/csrc/attention/cuda/fmha/kernels/cutlassB_f16_aligned_k128.cu b/xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_f16_aligned_k128.cu similarity index 99% rename from xformers/csrc/attention/cuda/fmha/kernels/cutlassB_f16_aligned_k128.cu rename to xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_f16_aligned_k128.cu index e7928afb4e..3294c12622 100644 --- a/xformers/csrc/attention/cuda/fmha/kernels/cutlassB_f16_aligned_k128.cu +++ b/xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_f16_aligned_k128.cu @@ -1,7 +1,6 @@ -#ifndef XFORMERS_MEM_EFF_ATTENTION_DISABLE_BACKWARD // This file is auto-generated. See "generate_kernels.py" -#include "../kernel_backward.h" - +#ifndef XFORMERS_MEM_EFF_ATTENTION_DISABLE_BACKWARD +#include "../../kernel_backward.h" __global__ void __launch_bounds__( AttentionBackwardKernel::kNumThreads, AttentionBackwardKernel::kMinBlocksPerSm) diff --git a/xformers/csrc/attention/cuda/fmha/kernels/cutlassB_f16_aligned_k128_dropout.cu b/xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_f16_aligned_k128_dropout.cu similarity index 99% rename from xformers/csrc/attention/cuda/fmha/kernels/cutlassB_f16_aligned_k128_dropout.cu rename to xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_f16_aligned_k128_dropout.cu index 79c0bef310..30eb954a68 100644 --- a/xformers/csrc/attention/cuda/fmha/kernels/cutlassB_f16_aligned_k128_dropout.cu +++ b/xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_f16_aligned_k128_dropout.cu @@ -1,7 +1,6 @@ -#ifndef XFORMERS_MEM_EFF_ATTENTION_DISABLE_BACKWARD // This file is auto-generated. See "generate_kernels.py" -#include "../kernel_backward.h" - +#ifndef XFORMERS_MEM_EFF_ATTENTION_DISABLE_BACKWARD +#include "../../kernel_backward.h" __global__ void __launch_bounds__( AttentionBackwardKernel::kNumThreads, AttentionBackwardKernel::kMinBlocksPerSm) diff --git a/xformers/csrc/attention/cuda/fmha/kernels/cutlassB_f16_aligned_k32.cu b/xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_f16_aligned_k32.cu similarity index 99% rename from xformers/csrc/attention/cuda/fmha/kernels/cutlassB_f16_aligned_k32.cu rename to xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_f16_aligned_k32.cu index 33993f3099..6e16baafd5 100644 --- a/xformers/csrc/attention/cuda/fmha/kernels/cutlassB_f16_aligned_k32.cu +++ b/xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_f16_aligned_k32.cu @@ -1,7 +1,6 @@ -#ifndef XFORMERS_MEM_EFF_ATTENTION_DISABLE_BACKWARD // This file is auto-generated. See "generate_kernels.py" -#include "../kernel_backward.h" - +#ifndef XFORMERS_MEM_EFF_ATTENTION_DISABLE_BACKWARD +#include "../../kernel_backward.h" __global__ void __launch_bounds__( AttentionBackwardKernel::kNumThreads, AttentionBackwardKernel::kMinBlocksPerSm) diff --git a/xformers/csrc/attention/cuda/fmha/kernels/cutlassB_f16_aligned_k32_dropout.cu b/xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_f16_aligned_k32_dropout.cu similarity index 99% rename from xformers/csrc/attention/cuda/fmha/kernels/cutlassB_f16_aligned_k32_dropout.cu rename to xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_f16_aligned_k32_dropout.cu index 28bee7d67e..124ab1d145 100644 --- a/xformers/csrc/attention/cuda/fmha/kernels/cutlassB_f16_aligned_k32_dropout.cu +++ b/xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_f16_aligned_k32_dropout.cu @@ -1,7 +1,6 @@ -#ifndef XFORMERS_MEM_EFF_ATTENTION_DISABLE_BACKWARD // This file is auto-generated. See "generate_kernels.py" -#include "../kernel_backward.h" - +#ifndef XFORMERS_MEM_EFF_ATTENTION_DISABLE_BACKWARD +#include "../../kernel_backward.h" __global__ void __launch_bounds__( AttentionBackwardKernel::kNumThreads, AttentionBackwardKernel::kMinBlocksPerSm) diff --git a/xformers/csrc/attention/cuda/fmha/kernels/cutlassB_f16_aligned_k64.cu b/xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_f16_aligned_k64.cu similarity index 99% rename from xformers/csrc/attention/cuda/fmha/kernels/cutlassB_f16_aligned_k64.cu rename to xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_f16_aligned_k64.cu index b981ea6cf8..bca1bb0152 100644 --- a/xformers/csrc/attention/cuda/fmha/kernels/cutlassB_f16_aligned_k64.cu +++ b/xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_f16_aligned_k64.cu @@ -1,7 +1,6 @@ -#ifndef XFORMERS_MEM_EFF_ATTENTION_DISABLE_BACKWARD // This file is auto-generated. See "generate_kernels.py" -#include "../kernel_backward.h" - +#ifndef XFORMERS_MEM_EFF_ATTENTION_DISABLE_BACKWARD +#include "../../kernel_backward.h" __global__ void __launch_bounds__( AttentionBackwardKernel::kNumThreads, AttentionBackwardKernel::kMinBlocksPerSm) diff --git a/xformers/csrc/attention/cuda/fmha/kernels/cutlassB_f16_aligned_k64_dropout.cu b/xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_f16_aligned_k64_dropout.cu similarity index 99% rename from xformers/csrc/attention/cuda/fmha/kernels/cutlassB_f16_aligned_k64_dropout.cu rename to xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_f16_aligned_k64_dropout.cu index ef2c7f4868..3822630a0d 100644 --- a/xformers/csrc/attention/cuda/fmha/kernels/cutlassB_f16_aligned_k64_dropout.cu +++ b/xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_f16_aligned_k64_dropout.cu @@ -1,7 +1,6 @@ -#ifndef XFORMERS_MEM_EFF_ATTENTION_DISABLE_BACKWARD // This file is auto-generated. See "generate_kernels.py" -#include "../kernel_backward.h" - +#ifndef XFORMERS_MEM_EFF_ATTENTION_DISABLE_BACKWARD +#include "../../kernel_backward.h" __global__ void __launch_bounds__( AttentionBackwardKernel::kNumThreads, AttentionBackwardKernel::kMinBlocksPerSm) diff --git a/xformers/csrc/attention/cuda/fmha/kernels/cutlassB_f16_aligned_k65536.cu b/xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_f16_aligned_k65536.cu similarity index 99% rename from xformers/csrc/attention/cuda/fmha/kernels/cutlassB_f16_aligned_k65536.cu rename to xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_f16_aligned_k65536.cu index 8ff8b416ab..67e83d7ac6 100644 --- a/xformers/csrc/attention/cuda/fmha/kernels/cutlassB_f16_aligned_k65536.cu +++ b/xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_f16_aligned_k65536.cu @@ -1,7 +1,6 @@ -#ifndef XFORMERS_MEM_EFF_ATTENTION_DISABLE_BACKWARD // This file is auto-generated. See "generate_kernels.py" -#include "../kernel_backward.h" - +#ifndef XFORMERS_MEM_EFF_ATTENTION_DISABLE_BACKWARD +#include "../../kernel_backward.h" __global__ void __launch_bounds__( AttentionBackwardKernel::kNumThreads, AttentionBackwardKernel::kMinBlocksPerSm) diff --git a/xformers/csrc/attention/cuda/fmha/kernels/cutlassB_f16_aligned_k65536_dropout.cu b/xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_f16_aligned_k65536_dropout.cu similarity index 99% rename from xformers/csrc/attention/cuda/fmha/kernels/cutlassB_f16_aligned_k65536_dropout.cu rename to xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_f16_aligned_k65536_dropout.cu index f47f8a827d..5a4c82a05f 100644 --- a/xformers/csrc/attention/cuda/fmha/kernels/cutlassB_f16_aligned_k65536_dropout.cu +++ b/xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_f16_aligned_k65536_dropout.cu @@ -1,7 +1,6 @@ -#ifndef XFORMERS_MEM_EFF_ATTENTION_DISABLE_BACKWARD // This file is auto-generated. See "generate_kernels.py" -#include "../kernel_backward.h" - +#ifndef XFORMERS_MEM_EFF_ATTENTION_DISABLE_BACKWARD +#include "../../kernel_backward.h" __global__ void __launch_bounds__( AttentionBackwardKernel::kNumThreads, AttentionBackwardKernel::kMinBlocksPerSm) diff --git a/xformers/csrc/attention/cuda/fmha/kernels/cutlassB_f16_aligned_k96.cu b/xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_f16_aligned_k96.cu similarity index 96% rename from xformers/csrc/attention/cuda/fmha/kernels/cutlassB_f16_aligned_k96.cu rename to xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_f16_aligned_k96.cu index 385a57825d..80cc604fcb 100644 --- a/xformers/csrc/attention/cuda/fmha/kernels/cutlassB_f16_aligned_k96.cu +++ b/xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_f16_aligned_k96.cu @@ -1,7 +1,6 @@ -#ifndef XFORMERS_MEM_EFF_ATTENTION_DISABLE_BACKWARD // This file is auto-generated. See "generate_kernels.py" -#include "../kernel_backward.h" - +#ifndef XFORMERS_MEM_EFF_ATTENTION_DISABLE_BACKWARD +#include "../../kernel_backward.h" __global__ void __launch_bounds__( AttentionBackwardKernel::kNumThreads, AttentionBackwardKernel::kMinBlocksPerSm) diff --git a/xformers/csrc/attention/cuda/fmha/kernels/cutlassB_f16_notaligned_k128.cu b/xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_f16_notaligned_k128.cu similarity index 99% rename from xformers/csrc/attention/cuda/fmha/kernels/cutlassB_f16_notaligned_k128.cu rename to xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_f16_notaligned_k128.cu index e2329af74c..5ef7bafa5a 100644 --- a/xformers/csrc/attention/cuda/fmha/kernels/cutlassB_f16_notaligned_k128.cu +++ b/xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_f16_notaligned_k128.cu @@ -1,7 +1,6 @@ -#ifndef XFORMERS_MEM_EFF_ATTENTION_DISABLE_BACKWARD // This file is auto-generated. See "generate_kernels.py" -#include "../kernel_backward.h" - +#ifndef XFORMERS_MEM_EFF_ATTENTION_DISABLE_BACKWARD +#include "../../kernel_backward.h" __global__ void __launch_bounds__( AttentionBackwardKernel::kNumThreads, AttentionBackwardKernel::kMinBlocksPerSm) diff --git a/xformers/csrc/attention/cuda/fmha/kernels/cutlassB_f16_notaligned_k128_dropout.cu b/xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_f16_notaligned_k128_dropout.cu similarity index 99% rename from xformers/csrc/attention/cuda/fmha/kernels/cutlassB_f16_notaligned_k128_dropout.cu rename to xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_f16_notaligned_k128_dropout.cu index 3731f7a029..3c77af7e8e 100644 --- a/xformers/csrc/attention/cuda/fmha/kernels/cutlassB_f16_notaligned_k128_dropout.cu +++ b/xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_f16_notaligned_k128_dropout.cu @@ -1,7 +1,6 @@ -#ifndef XFORMERS_MEM_EFF_ATTENTION_DISABLE_BACKWARD // This file is auto-generated. See "generate_kernels.py" -#include "../kernel_backward.h" - +#ifndef XFORMERS_MEM_EFF_ATTENTION_DISABLE_BACKWARD +#include "../../kernel_backward.h" __global__ void __launch_bounds__( AttentionBackwardKernel::kNumThreads, AttentionBackwardKernel::kMinBlocksPerSm) diff --git a/xformers/csrc/attention/cuda/fmha/kernels/cutlassB_f16_notaligned_k32.cu b/xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_f16_notaligned_k32.cu similarity index 98% rename from xformers/csrc/attention/cuda/fmha/kernels/cutlassB_f16_notaligned_k32.cu rename to xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_f16_notaligned_k32.cu index ccdd2956d3..f7485b306c 100644 --- a/xformers/csrc/attention/cuda/fmha/kernels/cutlassB_f16_notaligned_k32.cu +++ b/xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_f16_notaligned_k32.cu @@ -1,7 +1,6 @@ -#ifndef XFORMERS_MEM_EFF_ATTENTION_DISABLE_BACKWARD // This file is auto-generated. See "generate_kernels.py" -#include "../kernel_backward.h" - +#ifndef XFORMERS_MEM_EFF_ATTENTION_DISABLE_BACKWARD +#include "../../kernel_backward.h" __global__ void __launch_bounds__( AttentionBackwardKernel::kNumThreads, AttentionBackwardKernel::kMinBlocksPerSm) diff --git a/xformers/csrc/attention/cuda/fmha/kernels/cutlassB_f16_notaligned_k32_dropout.cu b/xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_f16_notaligned_k32_dropout.cu similarity index 98% rename from xformers/csrc/attention/cuda/fmha/kernels/cutlassB_f16_notaligned_k32_dropout.cu rename to xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_f16_notaligned_k32_dropout.cu index c7e2eb4c90..8fa5f1c5c5 100644 --- a/xformers/csrc/attention/cuda/fmha/kernels/cutlassB_f16_notaligned_k32_dropout.cu +++ b/xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_f16_notaligned_k32_dropout.cu @@ -1,7 +1,6 @@ -#ifndef XFORMERS_MEM_EFF_ATTENTION_DISABLE_BACKWARD // This file is auto-generated. See "generate_kernels.py" -#include "../kernel_backward.h" - +#ifndef XFORMERS_MEM_EFF_ATTENTION_DISABLE_BACKWARD +#include "../../kernel_backward.h" __global__ void __launch_bounds__( AttentionBackwardKernel::kNumThreads, AttentionBackwardKernel::kMinBlocksPerSm) diff --git a/xformers/csrc/attention/cuda/fmha/kernels/cutlassB_f16_notaligned_k64.cu b/xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_f16_notaligned_k64.cu similarity index 98% rename from xformers/csrc/attention/cuda/fmha/kernels/cutlassB_f16_notaligned_k64.cu rename to xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_f16_notaligned_k64.cu index 9f6012deae..14950d066b 100644 --- a/xformers/csrc/attention/cuda/fmha/kernels/cutlassB_f16_notaligned_k64.cu +++ b/xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_f16_notaligned_k64.cu @@ -1,7 +1,6 @@ -#ifndef XFORMERS_MEM_EFF_ATTENTION_DISABLE_BACKWARD // This file is auto-generated. See "generate_kernels.py" -#include "../kernel_backward.h" - +#ifndef XFORMERS_MEM_EFF_ATTENTION_DISABLE_BACKWARD +#include "../../kernel_backward.h" __global__ void __launch_bounds__( AttentionBackwardKernel::kNumThreads, AttentionBackwardKernel::kMinBlocksPerSm) diff --git a/xformers/csrc/attention/cuda/fmha/kernels/cutlassB_f16_notaligned_k64_dropout.cu b/xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_f16_notaligned_k64_dropout.cu similarity index 98% rename from xformers/csrc/attention/cuda/fmha/kernels/cutlassB_f16_notaligned_k64_dropout.cu rename to xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_f16_notaligned_k64_dropout.cu index df59acafa7..b3f36a6550 100644 --- a/xformers/csrc/attention/cuda/fmha/kernels/cutlassB_f16_notaligned_k64_dropout.cu +++ b/xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_f16_notaligned_k64_dropout.cu @@ -1,7 +1,6 @@ -#ifndef XFORMERS_MEM_EFF_ATTENTION_DISABLE_BACKWARD // This file is auto-generated. See "generate_kernels.py" -#include "../kernel_backward.h" - +#ifndef XFORMERS_MEM_EFF_ATTENTION_DISABLE_BACKWARD +#include "../../kernel_backward.h" __global__ void __launch_bounds__( AttentionBackwardKernel::kNumThreads, AttentionBackwardKernel::kMinBlocksPerSm) diff --git a/xformers/csrc/attention/cuda/fmha/kernels/cutlassB_f16_notaligned_k65536.cu b/xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_f16_notaligned_k65536.cu similarity index 99% rename from xformers/csrc/attention/cuda/fmha/kernels/cutlassB_f16_notaligned_k65536.cu rename to xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_f16_notaligned_k65536.cu index 627b7e7488..e598676406 100644 --- a/xformers/csrc/attention/cuda/fmha/kernels/cutlassB_f16_notaligned_k65536.cu +++ b/xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_f16_notaligned_k65536.cu @@ -1,7 +1,6 @@ -#ifndef XFORMERS_MEM_EFF_ATTENTION_DISABLE_BACKWARD // This file is auto-generated. See "generate_kernels.py" -#include "../kernel_backward.h" - +#ifndef XFORMERS_MEM_EFF_ATTENTION_DISABLE_BACKWARD +#include "../../kernel_backward.h" __global__ void __launch_bounds__( AttentionBackwardKernel::kNumThreads, AttentionBackwardKernel::kMinBlocksPerSm) diff --git a/xformers/csrc/attention/cuda/fmha/kernels/cutlassB_f16_notaligned_k65536_dropout.cu b/xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_f16_notaligned_k65536_dropout.cu similarity index 99% rename from xformers/csrc/attention/cuda/fmha/kernels/cutlassB_f16_notaligned_k65536_dropout.cu rename to xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_f16_notaligned_k65536_dropout.cu index c80117e654..3cf074dcca 100644 --- a/xformers/csrc/attention/cuda/fmha/kernels/cutlassB_f16_notaligned_k65536_dropout.cu +++ b/xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_f16_notaligned_k65536_dropout.cu @@ -1,7 +1,6 @@ -#ifndef XFORMERS_MEM_EFF_ATTENTION_DISABLE_BACKWARD // This file is auto-generated. See "generate_kernels.py" -#include "../kernel_backward.h" - +#ifndef XFORMERS_MEM_EFF_ATTENTION_DISABLE_BACKWARD +#include "../../kernel_backward.h" __global__ void __launch_bounds__( AttentionBackwardKernel::kNumThreads, AttentionBackwardKernel::kMinBlocksPerSm) diff --git a/xformers/csrc/attention/cuda/fmha/kernels/cutlassB_f32_aligned_k128.cu b/xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_f32_aligned_k128.cu similarity index 99% rename from xformers/csrc/attention/cuda/fmha/kernels/cutlassB_f32_aligned_k128.cu rename to xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_f32_aligned_k128.cu index 493353dc37..165302fb4f 100644 --- a/xformers/csrc/attention/cuda/fmha/kernels/cutlassB_f32_aligned_k128.cu +++ b/xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_f32_aligned_k128.cu @@ -1,7 +1,6 @@ -#ifndef XFORMERS_MEM_EFF_ATTENTION_DISABLE_BACKWARD // This file is auto-generated. See "generate_kernels.py" -#include "../kernel_backward.h" - +#ifndef XFORMERS_MEM_EFF_ATTENTION_DISABLE_BACKWARD +#include "../../kernel_backward.h" __global__ void __launch_bounds__( AttentionBackwardKernel::kNumThreads, AttentionBackwardKernel::kMinBlocksPerSm) diff --git a/xformers/csrc/attention/cuda/fmha/kernels/cutlassB_f32_aligned_k128_dropout.cu b/xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_f32_aligned_k128_dropout.cu similarity index 99% rename from xformers/csrc/attention/cuda/fmha/kernels/cutlassB_f32_aligned_k128_dropout.cu rename to xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_f32_aligned_k128_dropout.cu index c112440155..004c4e6022 100644 --- a/xformers/csrc/attention/cuda/fmha/kernels/cutlassB_f32_aligned_k128_dropout.cu +++ b/xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_f32_aligned_k128_dropout.cu @@ -1,7 +1,6 @@ -#ifndef XFORMERS_MEM_EFF_ATTENTION_DISABLE_BACKWARD // This file is auto-generated. See "generate_kernels.py" -#include "../kernel_backward.h" - +#ifndef XFORMERS_MEM_EFF_ATTENTION_DISABLE_BACKWARD +#include "../../kernel_backward.h" __global__ void __launch_bounds__( AttentionBackwardKernel::kNumThreads, AttentionBackwardKernel::kMinBlocksPerSm) diff --git a/xformers/csrc/attention/cuda/fmha/kernels/cutlassB_f32_aligned_k32.cu b/xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_f32_aligned_k32.cu similarity index 98% rename from xformers/csrc/attention/cuda/fmha/kernels/cutlassB_f32_aligned_k32.cu rename to xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_f32_aligned_k32.cu index 945e893ace..93f4c3e8c9 100644 --- a/xformers/csrc/attention/cuda/fmha/kernels/cutlassB_f32_aligned_k32.cu +++ b/xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_f32_aligned_k32.cu @@ -1,7 +1,6 @@ -#ifndef XFORMERS_MEM_EFF_ATTENTION_DISABLE_BACKWARD // This file is auto-generated. See "generate_kernels.py" -#include "../kernel_backward.h" - +#ifndef XFORMERS_MEM_EFF_ATTENTION_DISABLE_BACKWARD +#include "../../kernel_backward.h" __global__ void __launch_bounds__( AttentionBackwardKernel::kNumThreads, AttentionBackwardKernel::kMinBlocksPerSm) diff --git a/xformers/csrc/attention/cuda/fmha/kernels/cutlassB_f32_aligned_k32_dropout.cu b/xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_f32_aligned_k32_dropout.cu similarity index 98% rename from xformers/csrc/attention/cuda/fmha/kernels/cutlassB_f32_aligned_k32_dropout.cu rename to xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_f32_aligned_k32_dropout.cu index 0f8ad347e8..99841bedf1 100644 --- a/xformers/csrc/attention/cuda/fmha/kernels/cutlassB_f32_aligned_k32_dropout.cu +++ b/xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_f32_aligned_k32_dropout.cu @@ -1,7 +1,6 @@ -#ifndef XFORMERS_MEM_EFF_ATTENTION_DISABLE_BACKWARD // This file is auto-generated. See "generate_kernels.py" -#include "../kernel_backward.h" - +#ifndef XFORMERS_MEM_EFF_ATTENTION_DISABLE_BACKWARD +#include "../../kernel_backward.h" __global__ void __launch_bounds__( AttentionBackwardKernel::kNumThreads, AttentionBackwardKernel::kMinBlocksPerSm) diff --git a/xformers/csrc/attention/cuda/fmha/kernels/cutlassB_f32_aligned_k64.cu b/xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_f32_aligned_k64.cu similarity index 98% rename from xformers/csrc/attention/cuda/fmha/kernels/cutlassB_f32_aligned_k64.cu rename to xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_f32_aligned_k64.cu index 9960471007..0450de921e 100644 --- a/xformers/csrc/attention/cuda/fmha/kernels/cutlassB_f32_aligned_k64.cu +++ b/xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_f32_aligned_k64.cu @@ -1,7 +1,6 @@ -#ifndef XFORMERS_MEM_EFF_ATTENTION_DISABLE_BACKWARD // This file is auto-generated. See "generate_kernels.py" -#include "../kernel_backward.h" - +#ifndef XFORMERS_MEM_EFF_ATTENTION_DISABLE_BACKWARD +#include "../../kernel_backward.h" __global__ void __launch_bounds__( AttentionBackwardKernel::kNumThreads, AttentionBackwardKernel::kMinBlocksPerSm) diff --git a/xformers/csrc/attention/cuda/fmha/kernels/cutlassB_f32_aligned_k64_dropout.cu b/xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_f32_aligned_k64_dropout.cu similarity index 98% rename from xformers/csrc/attention/cuda/fmha/kernels/cutlassB_f32_aligned_k64_dropout.cu rename to xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_f32_aligned_k64_dropout.cu index 246079dda2..d1060f58dd 100644 --- a/xformers/csrc/attention/cuda/fmha/kernels/cutlassB_f32_aligned_k64_dropout.cu +++ b/xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_f32_aligned_k64_dropout.cu @@ -1,7 +1,6 @@ -#ifndef XFORMERS_MEM_EFF_ATTENTION_DISABLE_BACKWARD // This file is auto-generated. See "generate_kernels.py" -#include "../kernel_backward.h" - +#ifndef XFORMERS_MEM_EFF_ATTENTION_DISABLE_BACKWARD +#include "../../kernel_backward.h" __global__ void __launch_bounds__( AttentionBackwardKernel::kNumThreads, AttentionBackwardKernel::kMinBlocksPerSm) diff --git a/xformers/csrc/attention/cuda/fmha/kernels/cutlassB_f32_aligned_k65536.cu b/xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_f32_aligned_k65536.cu similarity index 99% rename from xformers/csrc/attention/cuda/fmha/kernels/cutlassB_f32_aligned_k65536.cu rename to xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_f32_aligned_k65536.cu index 45bc862139..54c5edffdc 100644 --- a/xformers/csrc/attention/cuda/fmha/kernels/cutlassB_f32_aligned_k65536.cu +++ b/xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_f32_aligned_k65536.cu @@ -1,7 +1,6 @@ -#ifndef XFORMERS_MEM_EFF_ATTENTION_DISABLE_BACKWARD // This file is auto-generated. See "generate_kernels.py" -#include "../kernel_backward.h" - +#ifndef XFORMERS_MEM_EFF_ATTENTION_DISABLE_BACKWARD +#include "../../kernel_backward.h" __global__ void __launch_bounds__( AttentionBackwardKernel::kNumThreads, AttentionBackwardKernel::kMinBlocksPerSm) diff --git a/xformers/csrc/attention/cuda/fmha/kernels/cutlassB_f32_aligned_k65536_dropout.cu b/xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_f32_aligned_k65536_dropout.cu similarity index 99% rename from xformers/csrc/attention/cuda/fmha/kernels/cutlassB_f32_aligned_k65536_dropout.cu rename to xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_f32_aligned_k65536_dropout.cu index fa9db51269..2c5188d6b9 100644 --- a/xformers/csrc/attention/cuda/fmha/kernels/cutlassB_f32_aligned_k65536_dropout.cu +++ b/xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_f32_aligned_k65536_dropout.cu @@ -1,7 +1,6 @@ -#ifndef XFORMERS_MEM_EFF_ATTENTION_DISABLE_BACKWARD // This file is auto-generated. See "generate_kernels.py" -#include "../kernel_backward.h" - +#ifndef XFORMERS_MEM_EFF_ATTENTION_DISABLE_BACKWARD +#include "../../kernel_backward.h" __global__ void __launch_bounds__( AttentionBackwardKernel::kNumThreads, AttentionBackwardKernel::kMinBlocksPerSm) diff --git a/xformers/csrc/attention/cuda/fmha/kernels/cutlassB_f32_notaligned_k128.cu b/xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_f32_notaligned_k128.cu similarity index 98% rename from xformers/csrc/attention/cuda/fmha/kernels/cutlassB_f32_notaligned_k128.cu rename to xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_f32_notaligned_k128.cu index a14970b709..add111c092 100644 --- a/xformers/csrc/attention/cuda/fmha/kernels/cutlassB_f32_notaligned_k128.cu +++ b/xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_f32_notaligned_k128.cu @@ -1,7 +1,6 @@ -#ifndef XFORMERS_MEM_EFF_ATTENTION_DISABLE_BACKWARD // This file is auto-generated. See "generate_kernels.py" -#include "../kernel_backward.h" - +#ifndef XFORMERS_MEM_EFF_ATTENTION_DISABLE_BACKWARD +#include "../../kernel_backward.h" __global__ void __launch_bounds__( AttentionBackwardKernel::kNumThreads, AttentionBackwardKernel::kMinBlocksPerSm) diff --git a/xformers/csrc/attention/cuda/fmha/kernels/cutlassB_f32_notaligned_k128_dropout.cu b/xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_f32_notaligned_k128_dropout.cu similarity index 98% rename from xformers/csrc/attention/cuda/fmha/kernels/cutlassB_f32_notaligned_k128_dropout.cu rename to xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_f32_notaligned_k128_dropout.cu index 1cdfba5c8c..16fa2465e9 100644 --- a/xformers/csrc/attention/cuda/fmha/kernels/cutlassB_f32_notaligned_k128_dropout.cu +++ b/xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_f32_notaligned_k128_dropout.cu @@ -1,7 +1,6 @@ -#ifndef XFORMERS_MEM_EFF_ATTENTION_DISABLE_BACKWARD // This file is auto-generated. See "generate_kernels.py" -#include "../kernel_backward.h" - +#ifndef XFORMERS_MEM_EFF_ATTENTION_DISABLE_BACKWARD +#include "../../kernel_backward.h" __global__ void __launch_bounds__( AttentionBackwardKernel::kNumThreads, AttentionBackwardKernel::kMinBlocksPerSm) diff --git a/xformers/csrc/attention/cuda/fmha/kernels/cutlassB_f32_notaligned_k32.cu b/xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_f32_notaligned_k32.cu similarity index 98% rename from xformers/csrc/attention/cuda/fmha/kernels/cutlassB_f32_notaligned_k32.cu rename to xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_f32_notaligned_k32.cu index ffcb779fe3..a9b4bb7892 100644 --- a/xformers/csrc/attention/cuda/fmha/kernels/cutlassB_f32_notaligned_k32.cu +++ b/xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_f32_notaligned_k32.cu @@ -1,7 +1,6 @@ -#ifndef XFORMERS_MEM_EFF_ATTENTION_DISABLE_BACKWARD // This file is auto-generated. See "generate_kernels.py" -#include "../kernel_backward.h" - +#ifndef XFORMERS_MEM_EFF_ATTENTION_DISABLE_BACKWARD +#include "../../kernel_backward.h" __global__ void __launch_bounds__( AttentionBackwardKernel::kNumThreads, AttentionBackwardKernel::kMinBlocksPerSm) diff --git a/xformers/csrc/attention/cuda/fmha/kernels/cutlassB_f32_notaligned_k32_dropout.cu b/xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_f32_notaligned_k32_dropout.cu similarity index 98% rename from xformers/csrc/attention/cuda/fmha/kernels/cutlassB_f32_notaligned_k32_dropout.cu rename to xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_f32_notaligned_k32_dropout.cu index 34e685d590..3c0b9c08d2 100644 --- a/xformers/csrc/attention/cuda/fmha/kernels/cutlassB_f32_notaligned_k32_dropout.cu +++ b/xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_f32_notaligned_k32_dropout.cu @@ -1,7 +1,6 @@ -#ifndef XFORMERS_MEM_EFF_ATTENTION_DISABLE_BACKWARD // This file is auto-generated. See "generate_kernels.py" -#include "../kernel_backward.h" - +#ifndef XFORMERS_MEM_EFF_ATTENTION_DISABLE_BACKWARD +#include "../../kernel_backward.h" __global__ void __launch_bounds__( AttentionBackwardKernel::kNumThreads, AttentionBackwardKernel::kMinBlocksPerSm) diff --git a/xformers/csrc/attention/cuda/fmha/kernels/cutlassB_f32_notaligned_k64.cu b/xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_f32_notaligned_k64.cu similarity index 98% rename from xformers/csrc/attention/cuda/fmha/kernels/cutlassB_f32_notaligned_k64.cu rename to xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_f32_notaligned_k64.cu index c5f0dbb388..de2d1a1e2f 100644 --- a/xformers/csrc/attention/cuda/fmha/kernels/cutlassB_f32_notaligned_k64.cu +++ b/xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_f32_notaligned_k64.cu @@ -1,7 +1,6 @@ -#ifndef XFORMERS_MEM_EFF_ATTENTION_DISABLE_BACKWARD // This file is auto-generated. See "generate_kernels.py" -#include "../kernel_backward.h" - +#ifndef XFORMERS_MEM_EFF_ATTENTION_DISABLE_BACKWARD +#include "../../kernel_backward.h" __global__ void __launch_bounds__( AttentionBackwardKernel::kNumThreads, AttentionBackwardKernel::kMinBlocksPerSm) diff --git a/xformers/csrc/attention/cuda/fmha/kernels/cutlassB_f32_notaligned_k64_dropout.cu b/xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_f32_notaligned_k64_dropout.cu similarity index 98% rename from xformers/csrc/attention/cuda/fmha/kernels/cutlassB_f32_notaligned_k64_dropout.cu rename to xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_f32_notaligned_k64_dropout.cu index ae8ca1cf55..65a669dd81 100644 --- a/xformers/csrc/attention/cuda/fmha/kernels/cutlassB_f32_notaligned_k64_dropout.cu +++ b/xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_f32_notaligned_k64_dropout.cu @@ -1,7 +1,6 @@ -#ifndef XFORMERS_MEM_EFF_ATTENTION_DISABLE_BACKWARD // This file is auto-generated. See "generate_kernels.py" -#include "../kernel_backward.h" - +#ifndef XFORMERS_MEM_EFF_ATTENTION_DISABLE_BACKWARD +#include "../../kernel_backward.h" __global__ void __launch_bounds__( AttentionBackwardKernel::kNumThreads, AttentionBackwardKernel::kMinBlocksPerSm) diff --git a/xformers/csrc/attention/cuda/fmha/kernels/cutlassB_f32_notaligned_k65536.cu b/xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_f32_notaligned_k65536.cu similarity index 98% rename from xformers/csrc/attention/cuda/fmha/kernels/cutlassB_f32_notaligned_k65536.cu rename to xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_f32_notaligned_k65536.cu index 891b4cc471..f39508b734 100644 --- a/xformers/csrc/attention/cuda/fmha/kernels/cutlassB_f32_notaligned_k65536.cu +++ b/xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_f32_notaligned_k65536.cu @@ -1,7 +1,6 @@ -#ifndef XFORMERS_MEM_EFF_ATTENTION_DISABLE_BACKWARD // This file is auto-generated. See "generate_kernels.py" -#include "../kernel_backward.h" - +#ifndef XFORMERS_MEM_EFF_ATTENTION_DISABLE_BACKWARD +#include "../../kernel_backward.h" __global__ void __launch_bounds__( AttentionBackwardKernel::kNumThreads, AttentionBackwardKernel::kMinBlocksPerSm) diff --git a/xformers/csrc/attention/cuda/fmha/kernels/cutlassB_f32_notaligned_k65536_dropout.cu b/xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_f32_notaligned_k65536_dropout.cu similarity index 98% rename from xformers/csrc/attention/cuda/fmha/kernels/cutlassB_f32_notaligned_k65536_dropout.cu rename to xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_f32_notaligned_k65536_dropout.cu index 9d45186d02..e99d4bcf36 100644 --- a/xformers/csrc/attention/cuda/fmha/kernels/cutlassB_f32_notaligned_k65536_dropout.cu +++ b/xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassB_f32_notaligned_k65536_dropout.cu @@ -1,7 +1,6 @@ -#ifndef XFORMERS_MEM_EFF_ATTENTION_DISABLE_BACKWARD // This file is auto-generated. See "generate_kernels.py" -#include "../kernel_backward.h" - +#ifndef XFORMERS_MEM_EFF_ATTENTION_DISABLE_BACKWARD +#include "../../kernel_backward.h" __global__ void __launch_bounds__( AttentionBackwardKernel::kNumThreads, AttentionBackwardKernel::kMinBlocksPerSm) diff --git a/xformers/csrc/attention/cuda/fmha/kernels/cutlassF_bf16_aligned.cu b/xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassF_bf16_aligned.cu similarity index 98% rename from xformers/csrc/attention/cuda/fmha/kernels/cutlassF_bf16_aligned.cu rename to xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassF_bf16_aligned.cu index b32e91dd5e..d86e344826 100644 --- a/xformers/csrc/attention/cuda/fmha/kernels/cutlassF_bf16_aligned.cu +++ b/xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassF_bf16_aligned.cu @@ -1,7 +1,6 @@ -#ifndef XFORMERS_MEM_EFF_ATTENTION_DISABLE_FORWARD // This file is auto-generated. See "generate_kernels.py" -#include "../kernel_forward.h" - +#ifndef XFORMERS_MEM_EFF_ATTENTION_DISABLE_FORWARD +#include "../../kernel_forward.h" __global__ void __launch_bounds__( AttentionKernel::kNumThreads, AttentionKernel::kMinBlocksPerSm) diff --git a/xformers/csrc/attention/cuda/fmha/kernels/cutlassF_f16_aligned.cu b/xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassF_f16_aligned.cu similarity index 99% rename from xformers/csrc/attention/cuda/fmha/kernels/cutlassF_f16_aligned.cu rename to xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassF_f16_aligned.cu index 73f60913fb..f8674d67ed 100644 --- a/xformers/csrc/attention/cuda/fmha/kernels/cutlassF_f16_aligned.cu +++ b/xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassF_f16_aligned.cu @@ -1,7 +1,6 @@ -#ifndef XFORMERS_MEM_EFF_ATTENTION_DISABLE_FORWARD // This file is auto-generated. See "generate_kernels.py" -#include "../kernel_forward.h" - +#ifndef XFORMERS_MEM_EFF_ATTENTION_DISABLE_FORWARD +#include "../../kernel_forward.h" __global__ void __launch_bounds__( AttentionKernel::kNumThreads, AttentionKernel::kMinBlocksPerSm) diff --git a/xformers/csrc/attention/cuda/fmha/kernels/cutlassF_f16_notaligned.cu b/xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassF_f16_notaligned.cu similarity index 99% rename from xformers/csrc/attention/cuda/fmha/kernels/cutlassF_f16_notaligned.cu rename to xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassF_f16_notaligned.cu index 68eef9bc50..996460d0cc 100644 --- a/xformers/csrc/attention/cuda/fmha/kernels/cutlassF_f16_notaligned.cu +++ b/xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassF_f16_notaligned.cu @@ -1,7 +1,6 @@ -#ifndef XFORMERS_MEM_EFF_ATTENTION_DISABLE_FORWARD // This file is auto-generated. See "generate_kernels.py" -#include "../kernel_forward.h" - +#ifndef XFORMERS_MEM_EFF_ATTENTION_DISABLE_FORWARD +#include "../../kernel_forward.h" __global__ void __launch_bounds__( AttentionKernel::kNumThreads, AttentionKernel::kMinBlocksPerSm) diff --git a/xformers/csrc/attention/cuda/fmha/kernels/cutlassF_f32_aligned.cu b/xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassF_f32_aligned.cu similarity index 99% rename from xformers/csrc/attention/cuda/fmha/kernels/cutlassF_f32_aligned.cu rename to xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassF_f32_aligned.cu index 7df7ad5a02..299c0cb7b0 100644 --- a/xformers/csrc/attention/cuda/fmha/kernels/cutlassF_f32_aligned.cu +++ b/xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassF_f32_aligned.cu @@ -1,7 +1,6 @@ -#ifndef XFORMERS_MEM_EFF_ATTENTION_DISABLE_FORWARD // This file is auto-generated. See "generate_kernels.py" -#include "../kernel_forward.h" - +#ifndef XFORMERS_MEM_EFF_ATTENTION_DISABLE_FORWARD +#include "../../kernel_forward.h" __global__ void __launch_bounds__( AttentionKernel::kNumThreads, AttentionKernel::kMinBlocksPerSm) diff --git a/xformers/csrc/attention/cuda/fmha/kernels/cutlassF_f32_notaligned.cu b/xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassF_f32_notaligned.cu similarity index 99% rename from xformers/csrc/attention/cuda/fmha/kernels/cutlassF_f32_notaligned.cu rename to xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassF_f32_notaligned.cu index 13adba725c..fa138fb1c1 100644 --- a/xformers/csrc/attention/cuda/fmha/kernels/cutlassF_f32_notaligned.cu +++ b/xformers/csrc/attention/cuda/fmha/autogen/impl/cutlassF_f32_notaligned.cu @@ -1,7 +1,6 @@ -#ifndef XFORMERS_MEM_EFF_ATTENTION_DISABLE_FORWARD // This file is auto-generated. See "generate_kernels.py" -#include "../kernel_forward.h" - +#ifndef XFORMERS_MEM_EFF_ATTENTION_DISABLE_FORWARD +#include "../../kernel_forward.h" __global__ void __launch_bounds__( AttentionKernel::kNumThreads, AttentionKernel::kMinBlocksPerSm) diff --git a/xformers/csrc/attention/cuda/fmha/kernels/generate_kernels.py b/xformers/csrc/attention/cuda/fmha/generate_kernels.py similarity index 95% rename from xformers/csrc/attention/cuda/fmha/kernels/generate_kernels.py rename to xformers/csrc/attention/cuda/fmha/generate_kernels.py index 6c832eb2b7..5425b25807 100644 --- a/xformers/csrc/attention/cuda/fmha/kernels/generate_kernels.py +++ b/xformers/csrc/attention/cuda/fmha/generate_kernels.py @@ -277,9 +277,7 @@ def get_all(cls) -> List["BwdKernel"]: def write_decl_impl( kernels: List[T], family_name: str, impl_file: str, disable_def: str ) -> None: - cpp_file_header = f"""// This file is auto-generated. See "generate_kernels.py" -#include "{impl_file}" - + cpp_file_header = """// This file is auto-generated. See "generate_kernels.py" """ kernels.sort() @@ -290,6 +288,7 @@ def write_decl_impl( dispatch_all = "" declarations = cpp_file_header + "#pragma once\n" declarations += f"#ifndef {disable_def}\n" + declarations += f"""#include "../{impl_file}"\n""" # Declaration of kernel functions for k in kernels: @@ -323,25 +322,29 @@ def write_decl_impl( }} """ declarations += f"#endif // {disable_def}\n" - Path(f"{family_name}.h").write_text(declarations) + + autogen_dir = Path(__file__).parent / "autogen" + (autogen_dir / f"{family_name}.h").write_text(declarations) for f, f_kernels in implfile_to_kernels.items(): - impl_cu = f"#ifndef {disable_def}\n{cpp_file_header}" + impl_cu = cpp_file_header + impl_cu += f"#ifndef {disable_def}\n" + impl_cu += f"""#include "../../{impl_file}"\n""" for k in f_kernels: impl_cu += k.cpp_impl impl_cu += f"#endif // {disable_def}\n" - Path(f"{family_name}_{f}.cu").write_text(impl_cu) + (autogen_dir / "impl" / f"{family_name}_{f}.cu").write_text(impl_cu) write_decl_impl( FwdKernel.get_all(), "cutlassF", - impl_file="../kernel_forward.h", + impl_file="kernel_forward.h", disable_def="XFORMERS_MEM_EFF_ATTENTION_DISABLE_FORWARD", ) write_decl_impl( BwdKernel.get_all(), "cutlassB", - impl_file="../kernel_backward.h", + impl_file="kernel_backward.h", disable_def="XFORMERS_MEM_EFF_ATTENTION_DISABLE_BACKWARD", )