Skip to content

Commit

Permalink
Support the SPV_EXT_shader_atomic_float_add extension
Browse files Browse the repository at this point in the history
The new `AtomicFAddEXT` instruction will be mapped onto
`__spirv_AtomicFAddEXT()` external calls in LLVM IR. No
additional logic is required to facilitate this - the
existing infrastructure does the replacement by default
based on the `__spirv` prefix.

The full specification can be found at
github.com/KhronosGroup/SPIRV-Registry/blob/master/extensions/EXT/SPV_EXT_shader_atomic_float_add.asciidoc

Signed-off-by: Artem Gindinson <artem.gindinson@intel.com>
  • Loading branch information
AGindinson authored and vmaksimo committed Nov 30, 2020
1 parent c3485f8 commit 4fdbfae
Show file tree
Hide file tree
Showing 6 changed files with 154 additions and 5 deletions.
1 change: 1 addition & 0 deletions llvm-spirv/include/LLVMSPIRVExtensions.inc
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#error "EXT macro must be defined"
#endif

EXT(SPV_EXT_shader_atomic_float_add)
EXT(SPV_KHR_no_integer_wrap_decoration)
EXT(SPV_KHR_float_controls)
EXT(SPV_INTEL_subgroups)
Expand Down
32 changes: 27 additions & 5 deletions llvm-spirv/lib/SPIRV/libSPIRV/SPIRVInstruction.h
Original file line number Diff line number Diff line change
Expand Up @@ -2721,30 +2721,51 @@ _SPIRV_OP(PowN, true, 10)

class SPIRVAtomicInstBase : public SPIRVInstTemplateBase {
public:
llvm::Optional<ExtensionID> getRequiredExtension() const override {
if (getOpCode() == OpAtomicFAddEXT)
return ExtensionID::SPV_EXT_shader_atomic_float_add;
return {};
}

SPIRVCapVec getRequiredCapability() const override {
SPIRVCapVec CapVec;
// Most of atomic instructions do not require any capabilities
// ... unless they operate on 64-bit integers.
if (hasType() && getType()->isTypeInt(64)) {
if (!hasType())
return CapVec;

// Most atomic instructionsrequire a specific capability when
// operating on 64-bit integers.
if (getType()->isTypeInt(64)) {
// In SPIRV 1.2 spec only 2 atomic instructions have no result type:
// 1. OpAtomicStore - need to check type of the Value operand
// 2. OpAtomicFlagClear - doesn't require Int64Atomics capability.
CapVec.push_back(CapabilityInt64Atomics);
} else if (getOpCode() == OpAtomicFAddEXT) {
if (getType()->isTypeFloat(32))
CapVec.push_back(CapabilityAtomicFloat32AddEXT);
else if (getType()->isTypeFloat(64))
CapVec.push_back(CapabilityAtomicFloat64AddEXT);
else
llvm_unreachable(
"AtomicFAddEXT can only be generated for f32 or f64 types");
}
// Per the spec OpAtomicCompareExchangeWeak, OpAtomicFlagTestAndSet and
// OpAtomicFlagClear instructions require kernel capability. But this
// capability should be added by setting OpenCL memory model.
return CapVec;
}

// Overriding the following method only because of OpAtomicStore.
// We have to declare Int64Atomics capability if the Value operand is int64.
// Overriding the following method because of particular OpAtomic*
// instructions. We may have to declare additional capabilities,
// e.g. based on operand types.
void setOpWords(const std::vector<SPIRVWord> &TheOps) override {
SPIRVInstTemplateBase::setOpWords(TheOps);
static const unsigned ValueOperandIndex = 3;
if (getOpCode() == OpAtomicStore &&
getOperand(ValueOperandIndex)->getType()->isTypeInt(64))
Module->addCapability(CapabilityInt64Atomics);
if (getOpCode() == OpAtomicFAddEXT)
for (auto RC : getRequiredCapability())
Module->addCapability(RC);
}
};

Expand All @@ -2769,6 +2790,7 @@ _SPIRV_OP(AtomicSMax, true, 7)
_SPIRV_OP(AtomicAnd, true, 7)
_SPIRV_OP(AtomicOr, true, 7)
_SPIRV_OP(AtomicXor, true, 7)
_SPIRV_OP(AtomicFAddEXT, true, 7)
_SPIRV_OP(MemoryBarrier, false, 3)
#undef _SPIRV_OP

Expand Down
2 changes: 2 additions & 0 deletions llvm-spirv/lib/SPIRV/libSPIRV/SPIRVNameMapEnum.h
Original file line number Diff line number Diff line change
Expand Up @@ -454,6 +454,8 @@ template <> inline void SPIRVMap<Capability, std::string>::init() {
"PhysicalStorageBufferAddresses");
add(CapabilityPhysicalStorageBufferAddressesEXT,
"PhysicalStorageBufferAddressesEXT");
add(CapabilityAtomicFloat32AddEXT, "AtomicFloat32AddEXT");
add(CapabilityAtomicFloat64AddEXT, "AtomicFloat64AddEXT");
add(CapabilityComputeDerivativeGroupLinearNV,
"ComputeDerivativeGroupLinearNV");
add(CapabilityCooperativeMatrixNV, "CooperativeMatrixNV");
Expand Down
1 change: 1 addition & 0 deletions llvm-spirv/lib/SPIRV/libSPIRV/SPIRVOpCodeEnum.h
Original file line number Diff line number Diff line change
Expand Up @@ -528,4 +528,5 @@ _SPIRV_OP(CrossWorkgroupCastToPtrINTEL, 5938)
_SPIRV_OP(ReadPipeBlockingINTEL, 5946)
_SPIRV_OP(WritePipeBlockingINTEL, 5947)
_SPIRV_OP(FPGARegINTEL, 5949)
_SPIRV_OP(AtomicFAddEXT, 6035)
_SPIRV_OP(TypeBufferSurfaceINTEL, 6086)
4 changes: 4 additions & 0 deletions llvm-spirv/lib/SPIRV/libSPIRV/spirv.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -991,6 +991,8 @@ enum Capability {
CapabilityUSMStorageClassesINTEL = 5935,
CapabilityFPGAMemoryAccessesINTEL = 5898,
CapabilityIOPipeINTEL = 5943,
CapabilityAtomicFloat32AddEXT = 6033,
CapabilityAtomicFloat64AddEXT = 6034,
CapabilityMax = 0x7fffffff,
};

Expand Down Expand Up @@ -1589,6 +1591,7 @@ enum Op {
OpReadPipeBlockingINTEL = 5946,
OpWritePipeBlockingINTEL = 5947,
OpFPGARegINTEL = 5949,
OpAtomicFAddEXT = 6035,
OpTypeBufferSurfaceINTEL = 6086,
OpMax = 0x7fffffff,
};
Expand Down Expand Up @@ -1989,6 +1992,7 @@ inline void HasResultAndType(Op opcode, bool *hasResult, bool *hasResultType) {
case OpSubgroupImageMediaBlockWriteINTEL: *hasResult = false; *hasResultType = false; break;
case OpUCountLeadingZerosINTEL: *hasResult = true; *hasResultType = true; break;
case OpUCountTrailingZerosINTEL: *hasResult = true; *hasResultType = true; break;
case OpAtomicFAddEXT: *hasResult = true; *hasResultType = true; break;
case OpAbsISubINTEL: *hasResult = true; *hasResultType = true; break;
case OpAbsUSubINTEL: *hasResult = true; *hasResultType = true; break;
case OpIAddSatINTEL: *hasResult = true; *hasResultType = true; break;
Expand Down
119 changes: 119 additions & 0 deletions llvm-spirv/test/AtomicFAddExt.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
; RUN: llvm-as %s -o %t.bc
; RUN: llvm-spirv %t.bc --spirv-ext=+SPV_EXT_shader_atomic_float_add -o %t.spv
; RUN: llvm-spirv -to-text %t.spv -o %t.spt
; RUN: FileCheck < %t.spt %s --check-prefix=CHECK-SPIRV

; RUN: llvm-spirv -r %t.spv -o %t.rev.bc
; RUN: llvm-dis %t.rev.bc -o - | FileCheck %s --check-prefix=CHECK-LLVM

target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"
target triple = "spir64-unknown-unknown-sycldevice"

%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range" = type { %"class._ZTSN2cl4sycl6detail5arrayILi1EEE.cl::sycl::detail::array" }
%"class._ZTSN2cl4sycl6detail5arrayILi1EEE.cl::sycl::detail::array" = type { [1 x i64] }
%"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id" = type { %"class._ZTSN2cl4sycl6detail5arrayILi1EEE.cl::sycl::detail::array" }

$_ZTSZZ3addIfEvvENKUlRN2cl4sycl7handlerEE19_14clES3_EUlNS1_4itemILi1ELb1EEEE23_37 = comdat any

$_ZTSZZ3addIdEvvENKUlRN2cl4sycl7handlerEE19_14clES3_EUlNS1_4itemILi1ELb1EEEE23_37 = comdat any

@__spirv_BuiltInGlobalInvocationId = external dso_local local_unnamed_addr addrspace(1) constant <3 x i64>, align 32

; CHECK-SPIRV: Capability AtomicFloat32AddEXT
; CHECK-SPIRV: Capability AtomicFloat64AddEXT
; CHECK-SPIRV: Extension "SPV_EXT_shader_atomic_float_add"
; CHECK-SPIRV: TypeFloat [[TYPE_FLOAT_32:[0-9]+]] 32
; CHECK-SPIRV: TypeFloat [[TYPE_FLOAT_64:[0-9]+]] 64

; Function Attrs: convergent norecurse mustprogress
define weak_odr dso_local spir_kernel void @_ZTSZZ3addIfEvvENKUlRN2cl4sycl7handlerEE19_14clES3_EUlNS1_4itemILi1ELb1EEEE23_37(float addrspace(1)* %_arg_, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_1, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_2, %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* byval(%"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id") align 8 %_arg_3, float addrspace(1)* %_arg_4, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_6, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_7, %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* byval(%"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id") align 8 %_arg_8) local_unnamed_addr #0 comdat !kernel_arg_buffer_location !4 {
entry:
%0 = getelementptr inbounds %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id", %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* %_arg_3, i64 0, i32 0, i32 0, i64 0
%1 = load i64, i64* %0, align 8
%add.ptr.i29 = getelementptr inbounds float, float addrspace(1)* %_arg_, i64 %1
%2 = getelementptr inbounds %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id", %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* %_arg_8, i64 0, i32 0, i32 0, i64 0
%3 = load i64, i64* %2, align 8
%add.ptr.i = getelementptr inbounds float, float addrspace(1)* %_arg_4, i64 %3
%4 = load <3 x i64>, <3 x i64> addrspace(4)* addrspacecast (<3 x i64> addrspace(1)* @__spirv_BuiltInGlobalInvocationId to <3 x i64> addrspace(4)*), align 32, !noalias !5
%5 = extractelement <3 x i64> %4, i64 0
; CHECK-SPIRV: 7 AtomicFAddEXT [[TYPE_FLOAT_32]]
; CHECK-LLVM: call spir_func float @[[FLOAT_FUNC_NAME:_Z21__spirv_AtomicFAddEXT[[:alnum:]]+]]({{.*}})
%call3.i.i.i.i = tail call spir_func float @_Z21__spirv_AtomicFAddEXTPU3AS1fN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEf(float addrspace(1)* %add.ptr.i29, i32 1, i32 896, float 1.000000e+00) #2
%add.i.i = fadd float %call3.i.i.i.i, 1.000000e+00
%sext.i = shl i64 %5, 32
%conv5.i = ashr exact i64 %sext.i, 32
%ptridx.i.i = getelementptr inbounds float, float addrspace(1)* %add.ptr.i, i64 %conv5.i
%ptridx.ascast.i.i = addrspacecast float addrspace(1)* %ptridx.i.i to float addrspace(4)*
store float %add.i.i, float addrspace(4)* %ptridx.ascast.i.i, align 4, !tbaa !14
ret void
}

; Function Attrs: convergent
; CHECK-LLVM: declare {{.*}}spir_func float @[[FLOAT_FUNC_NAME]](float addrspace(1)*, i32, i32, float)
declare dso_local spir_func float @_Z21__spirv_AtomicFAddEXTPU3AS1fN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEf(float addrspace(1)*, i32, i32, float) local_unnamed_addr #1

; Function Attrs: convergent norecurse mustprogress
define weak_odr dso_local spir_kernel void @_ZTSZZ3addIdEvvENKUlRN2cl4sycl7handlerEE19_14clES3_EUlNS1_4itemILi1ELb1EEEE23_37(double addrspace(1)* %_arg_, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_1, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_2, %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* byval(%"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id") align 8 %_arg_3, double addrspace(1)* %_arg_4, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_6, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_7, %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* byval(%"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id") align 8 %_arg_8) local_unnamed_addr #0 comdat !kernel_arg_buffer_location !4 {
entry:
%0 = getelementptr inbounds %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id", %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* %_arg_3, i64 0, i32 0, i32 0, i64 0
%1 = load i64, i64* %0, align 8
%add.ptr.i29 = getelementptr inbounds double, double addrspace(1)* %_arg_, i64 %1
%2 = getelementptr inbounds %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id", %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* %_arg_8, i64 0, i32 0, i32 0, i64 0
%3 = load i64, i64* %2, align 8
%add.ptr.i = getelementptr inbounds double, double addrspace(1)* %_arg_4, i64 %3
%4 = load <3 x i64>, <3 x i64> addrspace(4)* addrspacecast (<3 x i64> addrspace(1)* @__spirv_BuiltInGlobalInvocationId to <3 x i64> addrspace(4)*), align 32, !noalias !18
%5 = extractelement <3 x i64> %4, i64 0
; CHECK-SPIRV: 7 AtomicFAddEXT [[TYPE_FLOAT_64]]
; CHECK-LLVM: call spir_func double @[[DOUBLE_FUNC_NAME:_Z21__spirv_AtomicFAddEXT[[:alnum:]]+]]({{.*}})
%call3.i.i.i.i = tail call spir_func double @_Z21__spirv_AtomicFAddEXTPU3AS1dN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEd(double addrspace(1)* %add.ptr.i29, i32 1, i32 896, double 1.000000e+00) #2
%add.i.i = fadd double %call3.i.i.i.i, 1.000000e+00
%sext.i = shl i64 %5, 32
%conv5.i = ashr exact i64 %sext.i, 32
%ptridx.i.i = getelementptr inbounds double, double addrspace(1)* %add.ptr.i, i64 %conv5.i
%ptridx.ascast.i.i = addrspacecast double addrspace(1)* %ptridx.i.i to double addrspace(4)*
store double %add.i.i, double addrspace(4)* %ptridx.ascast.i.i, align 8, !tbaa !27
ret void
}

; Function Attrs: convergent
; CHECK-LLVM: declare {{.*}}spir_func double @[[DOUBLE_FUNC_NAME]](double addrspace(1)*, i32, i32, double)
declare dso_local spir_func double @_Z21__spirv_AtomicFAddEXTPU3AS1dN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEd(double addrspace(1)*, i32, i32, double) local_unnamed_addr #1

attributes #0 = { convergent norecurse mustprogress "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-module-id"="fadd.cpp" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { convergent "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #2 = { convergent nounwind }

!llvm.module.flags = !{!0}
!opencl.spir.version = !{!1}
!spirv.Source = !{!2}
!llvm.ident = !{!3}

!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{i32 1, i32 2}
!2 = !{i32 4, i32 100000}
!3 = !{!"clang version 12.0.0"}
!4 = !{i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1}
!5 = !{!6, !8, !10, !12}
!6 = distinct !{!6, !7, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEE8initSizeEv: %agg.result"}
!7 = distinct !{!7, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEE8initSizeEv"}
!8 = distinct !{!8, !9, !"_ZN7__spirvL22initGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEEET0_v: %agg.result"}
!9 = distinct !{!9, !"_ZN7__spirvL22initGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEEET0_v"}
!10 = distinct !{!10, !11, !"_ZN2cl4sycl6detail7Builder7getItemILi1ELb1EEENSt9enable_ifIXT0_EKNS0_4itemIXT_EXT0_EEEE4typeEv: %agg.result"}
!11 = distinct !{!11, !"_ZN2cl4sycl6detail7Builder7getItemILi1ELb1EEENSt9enable_ifIXT0_EKNS0_4itemIXT_EXT0_EEEE4typeEv"}
!12 = distinct !{!12, !13, !"_ZN2cl4sycl6detail7Builder10getElementILi1ELb1EEEDTcl7getItemIXT_EXT0_EEEEPNS0_4itemIXT_EXT0_EEE: %agg.result"}
!13 = distinct !{!13, !"_ZN2cl4sycl6detail7Builder10getElementILi1ELb1EEEDTcl7getItemIXT_EXT0_EEEEPNS0_4itemIXT_EXT0_EEE"}
!14 = !{!15, !15, i64 0}
!15 = !{!"float", !16, i64 0}
!16 = !{!"omnipotent char", !17, i64 0}
!17 = !{!"Simple C++ TBAA"}
!18 = !{!19, !21, !23, !25}
!19 = distinct !{!19, !20, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEE8initSizeEv: %agg.result"}
!20 = distinct !{!20, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEE8initSizeEv"}
!21 = distinct !{!21, !22, !"_ZN7__spirvL22initGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEEET0_v: %agg.result"}
!22 = distinct !{!22, !"_ZN7__spirvL22initGlobalInvocationIdILi1EN2cl4sycl2idILi1EEEEET0_v"}
!23 = distinct !{!23, !24, !"_ZN2cl4sycl6detail7Builder7getItemILi1ELb1EEENSt9enable_ifIXT0_EKNS0_4itemIXT_EXT0_EEEE4typeEv: %agg.result"}
!24 = distinct !{!24, !"_ZN2cl4sycl6detail7Builder7getItemILi1ELb1EEENSt9enable_ifIXT0_EKNS0_4itemIXT_EXT0_EEEE4typeEv"}
!25 = distinct !{!25, !26, !"_ZN2cl4sycl6detail7Builder10getElementILi1ELb1EEEDTcl7getItemIXT_EXT0_EEEEPNS0_4itemIXT_EXT0_EEE: %agg.result"}
!26 = distinct !{!26, !"_ZN2cl4sycl6detail7Builder10getElementILi1ELb1EEEDTcl7getItemIXT_EXT0_EEEEPNS0_4itemIXT_EXT0_EEE"}
!27 = !{!28, !28, i64 0}
!28 = !{!"double", !16, i64 0}

0 comments on commit 4fdbfae

Please sign in to comment.