Skip to content

Commit

Permalink
[AMDGPU] Restore non-buffer atomic tests lost in llvm#93801 (llvm#94978)
Browse files Browse the repository at this point in the history
  • Loading branch information
jayfoad authored and Lukacma committed Jun 12, 2024
1 parent 2ac3c9a commit 9a23d26
Show file tree
Hide file tree
Showing 3 changed files with 178 additions and 0 deletions.
45 changes: 45 additions & 0 deletions llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.atomic.csub.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1030 -verify-machineinstrs | FileCheck %s -check-prefixes=GCN,PREGFX12
; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1031 -verify-machineinstrs | FileCheck %s -check-prefixes=GCN,PREGFX12
; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs | FileCheck %s -check-prefixes=GCN,GFX12PLUS

declare i32 @llvm.amdgcn.global.atomic.csub(ptr addrspace(1), i32)

; GCN-LABEL: {{^}}global_atomic_csub_rtn:
; PREGFX12: global_atomic_csub v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9:]+}}, s{{\[[0-9]+:[0-9]+\]}} glc
; GFX12PLUS: global_atomic_sub_clamp_u32 v0, v0, v1, s[0:1] th:TH_ATOMIC_RETURN
define amdgpu_kernel void @global_atomic_csub_rtn(ptr addrspace(1) %ptr, i32 %data) {
main_body:
%ret = call i32 @llvm.amdgcn.global.atomic.csub(ptr addrspace(1) %ptr, i32 %data)
ret void
}

; GCN-LABEL: {{^}}global_atomic_csub_no_rtn:
; PREGFX12: global_atomic_csub v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}
; GFX12PLUS: global_atomic_sub_clamp_u32 v0, v1, s[0:1]
define amdgpu_kernel void @global_atomic_csub_no_rtn(ptr addrspace(1) %ptr, i32 %data) #0 {
main_body:
%ret = call i32 @llvm.amdgcn.global.atomic.csub(ptr addrspace(1) %ptr, i32 %data)
ret void
}

; GCN-LABEL: {{^}}global_atomic_csub_off4_rtn:
; PREGFX12: global_atomic_csub v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} offset:4 glc
; GFX12PLUS: global_atomic_sub_clamp_u32 v0, v0, v1, s[0:1] offset:4 th:TH_ATOMIC_RETURN
define amdgpu_kernel void @global_atomic_csub_off4_rtn(ptr addrspace(1) %ptr, i32 %data) {
main_body:
%p = getelementptr i32, ptr addrspace(1) %ptr, i64 1
%ret = call i32 @llvm.amdgcn.global.atomic.csub(ptr addrspace(1) %p, i32 %data)
ret void
}

; GCN-LABEL: {{^}}global_atomic_csub_off4_no_rtn:
; PREGFX12: global_atomic_csub v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} offset:4
; GFX12PLUS: global_atomic_sub_clamp_u32 v0, v1, s[0:1] offset:4
define amdgpu_kernel void @global_atomic_csub_off4_no_rtn(ptr addrspace(1) %ptr, i32 %data) #0 {
main_body:
%p = getelementptr i32, ptr addrspace(1) %ptr, i64 1
%ret = call i32 @llvm.amdgcn.global.atomic.csub(ptr addrspace(1) %p, i32 %data)
ret void
}

attributes #0 = { "target-features"="+atomic-csub-no-rtn-insts" }
56 changes: 56 additions & 0 deletions llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.atomic.fadd.gfx90a.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs | FileCheck %s -check-prefix=GFX90A

declare float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1), float)
declare <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1.v2f16(ptr addrspace(1), <2 x half>)

; GFX90A-LABEL: {{^}}global_atomic_add_f32:
; GFX90A: global_atomic_add_f32 v0, v[0:1], v2, off glc
define amdgpu_ps float @global_atomic_add_f32(ptr addrspace(1) %ptr, float %data) {
main_body:
%ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1) %ptr, float %data)
ret float %ret
}

; GFX90A-LABEL: {{^}}global_atomic_add_f32_off4:
; GFX90A: global_atomic_add_f32 v0, v[0:1], v2, off offset:4 glc
define amdgpu_ps float @global_atomic_add_f32_off4(ptr addrspace(1) %ptr, float %data) {
main_body:
%p = getelementptr float, ptr addrspace(1) %ptr, i64 1
%ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1) %p, float %data)
ret float %ret
}

; GFX90A-LABEL: {{^}}global_atomic_add_f32_offneg4:
; GFX90A: global_atomic_add_f32 v0, v[0:1], v2, off offset:-4 glc
define amdgpu_ps float @global_atomic_add_f32_offneg4(ptr addrspace(1) %ptr, float %data) {
main_body:
%p = getelementptr float, ptr addrspace(1) %ptr, i64 -1
%ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1) %p, float %data)
ret float %ret
}

; GFX90A-LABEL: {{^}}global_atomic_pk_add_v2f16:
; GFX90A: global_atomic_pk_add_f16 v0, v[0:1], v2, off glc
define amdgpu_ps <2 x half> @global_atomic_pk_add_v2f16(ptr addrspace(1) %ptr, <2 x half> %data) {
main_body:
%ret = call <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1.v2f16(ptr addrspace(1) %ptr, <2 x half> %data)
ret <2 x half> %ret
}

; GFX90A-LABEL: {{^}}global_atomic_pk_add_v2f16_off4:
; GFX90A: global_atomic_pk_add_f16 v0, v[0:1], v2, off offset:4 glc
define amdgpu_ps <2 x half> @global_atomic_pk_add_v2f16_off4(ptr addrspace(1) %ptr, <2 x half> %data) {
main_body:
%p = getelementptr <2 x half>, ptr addrspace(1) %ptr, i64 1
%ret = call <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1.v2f16(ptr addrspace(1) %p, <2 x half> %data)
ret <2 x half> %ret
}

; GFX90A-LABEL: {{^}}global_atomic_pk_add_v2f16_offneg4:
; GFX90A: global_atomic_pk_add_f16 v0, v[0:1], v2, off offset:-4 glc
define amdgpu_ps <2 x half> @global_atomic_pk_add_v2f16_offneg4(ptr addrspace(1) %ptr, <2 x half> %data) {
main_body:
%p = getelementptr <2 x half>, ptr addrspace(1) %ptr, i64 -1
%ret = call <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1.v2f16(ptr addrspace(1) %p, <2 x half> %data)
ret <2 x half> %ret
}
77 changes: 77 additions & 0 deletions llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.atomic.fadd.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx908 -verify-machineinstrs -amdgpu-atomic-optimizer-strategy=DPP | FileCheck %s -check-prefix=GCN
; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs -amdgpu-atomic-optimizer-strategy=DPP | FileCheck %s -check-prefix=GCN

declare float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1), float)
declare <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1.v2f16(ptr addrspace(1), <2 x half>)
declare float @llvm.amdgcn.flat.atomic.fadd.f32.p0.f32(ptr, float)

; GCN-LABEL: {{^}}global_atomic_add_f32:
; GCN: global_atomic_add_f32 v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}
define amdgpu_kernel void @global_atomic_add_f32(ptr addrspace(1) %ptr, float %data) {
main_body:
%ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1) %ptr, float %data)
ret void
}

; GCN-LABEL: {{^}}global_atomic_add_f32_off4:
; GCN: global_atomic_add_f32 v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} offset:4
define amdgpu_kernel void @global_atomic_add_f32_off4(ptr addrspace(1) %ptr, float %data) {
main_body:
%p = getelementptr float, ptr addrspace(1) %ptr, i64 1
%ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1) %p, float %data)
ret void
}

; GCN-LABEL: {{^}}global_atomic_add_f32_offneg4:
; GCN: global_atomic_add_f32 v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} offset:-4
define amdgpu_kernel void @global_atomic_add_f32_offneg4(ptr addrspace(1) %ptr, float %data) {
main_body:
%p = getelementptr float, ptr addrspace(1) %ptr, i64 -1
%ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1) %p, float %data)
ret void
}

; GCN-LABEL: {{^}}global_atomic_pk_add_v2f16:
; GCN: global_atomic_pk_add_f16 v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]$}}
define amdgpu_kernel void @global_atomic_pk_add_v2f16(ptr addrspace(1) %ptr, <2 x half> %data) {
main_body:
%ret = call <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1.v2f16(ptr addrspace(1) %ptr, <2 x half> %data)
ret void
}

; GCN-LABEL: {{^}}global_atomic_pk_add_v2f16_off4:
; GCN: global_atomic_pk_add_f16 v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} offset:4
define amdgpu_kernel void @global_atomic_pk_add_v2f16_off4(ptr addrspace(1) %ptr, <2 x half> %data) {
main_body:
%p = getelementptr <2 x half>, ptr addrspace(1) %ptr, i64 1
%ret = call <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1.v2f16(ptr addrspace(1) %p, <2 x half> %data)
ret void
}

; GCN-LABEL: {{^}}global_atomic_pk_add_v2f16_offneg4:
; GCN: global_atomic_pk_add_f16 v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} offset:-4{{$}}
define amdgpu_kernel void @global_atomic_pk_add_v2f16_offneg4(ptr addrspace(1) %ptr, <2 x half> %data) {
main_body:
%p = getelementptr <2 x half>, ptr addrspace(1) %ptr, i64 -1
%ret = call <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1.v2f16(ptr addrspace(1) %p, <2 x half> %data)
ret void
}

; Make sure this artificially selects with an incorrect subtarget, but
; the feature set.
; GCN-LABEL: {{^}}global_atomic_fadd_f32_wrong_subtarget:
; GCN: global_atomic_add_f32 v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]$}}
define amdgpu_kernel void @global_atomic_fadd_f32_wrong_subtarget(ptr addrspace(1) %ptr, float %data) #0 {
%ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1) %ptr, float %data)
ret void
}

; GCN-LABEL: {{^}}flat_atomic_fadd_f32_wrong_subtarget:
; GCN: flat_atomic_add_f32 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}
define amdgpu_kernel void @flat_atomic_fadd_f32_wrong_subtarget(ptr %ptr, float %data) #1 {
%ret = call float @llvm.amdgcn.flat.atomic.fadd.f32.p0.f32(ptr %ptr, float %data)
ret void
}

attributes #0 = { "target-cpu"="gfx803" "target-features"="+atomic-fadd-no-rtn-insts"}
attributes #1 = { "target-cpu"="gfx803" "target-features"="+flat-atomic-fadd-f32-inst"}

0 comments on commit 9a23d26

Please sign in to comment.