From 70023cd80614a49ea3d6262facba19c6484869d2 Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Mon, 10 Jun 2024 14:29:03 +0100 Subject: [PATCH 1/2] [AMDGPU] Restore non-buffer atomic tests lost in #93801 --- .../CodeGen/AMDGPU/llvm.amdgcn.atomic.csub.ll | 45 +++++++++++ .../AMDGPU/llvm.amdgcn.atomic.fadd.gfx90a.ll | 56 ++++++++++++++ .../CodeGen/AMDGPU/llvm.amdgcn.atomic.fadd.ll | 77 +++++++++++++++++++ 3 files changed, 178 insertions(+) create mode 100644 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.csub.ll create mode 100644 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.fadd.gfx90a.ll create mode 100644 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.fadd.ll diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.csub.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.csub.ll new file mode 100644 index 00000000000000..d7dd0ce58a08f3 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.csub.ll @@ -0,0 +1,45 @@ +; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1030 -verify-machineinstrs | FileCheck %s -check-prefixes=GCN,PREGFX12 +; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1031 -verify-machineinstrs | FileCheck %s -check-prefixes=GCN,PREGFX12 +; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs | FileCheck %s -check-prefixes=GCN,GFX12PLUS + +declare i32 @llvm.amdgcn.global.atomic.csub(ptr addrspace(1), i32) + +; GCN-LABEL: {{^}}global_atomic_csub_rtn: +; PREGFX12: global_atomic_csub v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9:]+}}, s{{\[[0-9]+:[0-9]+\]}} glc +; GFX12PLUS: global_atomic_sub_clamp_u32 v0, v0, v1, s[0:1] th:TH_ATOMIC_RETURN +define amdgpu_kernel void @global_atomic_csub_rtn(ptr addrspace(1) %ptr, i32 %data) { +main_body: + %ret = call i32 @llvm.amdgcn.global.atomic.csub(ptr addrspace(1) %ptr, i32 %data) + ret void +} + +; GCN-LABEL: {{^}}global_atomic_csub_no_rtn: +; PREGFX12: global_atomic_csub v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} +; GFX12PLUS: global_atomic_sub_clamp_u32 v0, v1, s[0:1] +define amdgpu_kernel void @global_atomic_csub_no_rtn(ptr addrspace(1) %ptr, i32 %data) #0 { +main_body: + %ret = call i32 @llvm.amdgcn.global.atomic.csub(ptr addrspace(1) %ptr, i32 %data) + ret void +} + +; GCN-LABEL: {{^}}global_atomic_csub_off4_rtn: +; PREGFX12: global_atomic_csub v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} offset:4 glc +; GFX12PLUS: global_atomic_sub_clamp_u32 v0, v0, v1, s[0:1] offset:4 th:TH_ATOMIC_RETURN +define amdgpu_kernel void @global_atomic_csub_off4_rtn(ptr addrspace(1) %ptr, i32 %data) { +main_body: + %p = getelementptr i32, ptr addrspace(1) %ptr, i64 1 + %ret = call i32 @llvm.amdgcn.global.atomic.csub(ptr addrspace(1) %p, i32 %data) + ret void +} + +; GCN-LABEL: {{^}}global_atomic_csub_off4_no_rtn: +; PREGFX12: global_atomic_csub v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} offset:4 +; GFX12PLUS: global_atomic_sub_clamp_u32 v0, v1, s[0:1] offset:4 +define amdgpu_kernel void @global_atomic_csub_off4_no_rtn(ptr addrspace(1) %ptr, i32 %data) #0 { +main_body: + %p = getelementptr i32, ptr addrspace(1) %ptr, i64 1 + %ret = call i32 @llvm.amdgcn.global.atomic.csub(ptr addrspace(1) %p, i32 %data) + ret void +} + +attributes #0 = { "target-features"="+atomic-csub-no-rtn-insts" } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.fadd.gfx90a.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.fadd.gfx90a.ll new file mode 100644 index 00000000000000..af841057471891 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.fadd.gfx90a.ll @@ -0,0 +1,56 @@ +; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs | FileCheck %s -check-prefix=GFX90A + +declare float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1), float) +declare <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1.v2f16(ptr addrspace(1), <2 x half>) + +; GFX90A-LABEL: {{^}}global_atomic_add_f32: +; GFX90A: global_atomic_add_f32 v0, v[0:1], v2, off glc +define amdgpu_ps float @global_atomic_add_f32(ptr addrspace(1) %ptr, float %data) { +main_body: + %ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1) %ptr, float %data) + ret float %ret +} + +; GFX90A-LABEL: {{^}}global_atomic_add_f32_off4: +; GFX90A: global_atomic_add_f32 v0, v[0:1], v2, off offset:4 glc +define amdgpu_ps float @global_atomic_add_f32_off4(ptr addrspace(1) %ptr, float %data) { +main_body: + %p = getelementptr float, ptr addrspace(1) %ptr, i64 1 + %ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1) %p, float %data) + ret float %ret +} + +; GFX90A-LABEL: {{^}}global_atomic_add_f32_offneg4: +; GFX90A: global_atomic_add_f32 v0, v[0:1], v2, off offset:-4 glc +define amdgpu_ps float @global_atomic_add_f32_offneg4(ptr addrspace(1) %ptr, float %data) { +main_body: + %p = getelementptr float, ptr addrspace(1) %ptr, i64 -1 + %ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1) %p, float %data) + ret float %ret +} + +; GFX90A-LABEL: {{^}}global_atomic_pk_add_v2f16: +; GFX90A: global_atomic_pk_add_f16 v0, v[0:1], v2, off glc +define amdgpu_ps <2 x half> @global_atomic_pk_add_v2f16(ptr addrspace(1) %ptr, <2 x half> %data) { +main_body: + %ret = call <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1.v2f16(ptr addrspace(1) %ptr, <2 x half> %data) + ret <2 x half> %ret +} + +; GFX90A-LABEL: {{^}}global_atomic_pk_add_v2f16_off4: +; GFX90A: global_atomic_pk_add_f16 v0, v[0:1], v2, off offset:4 glc +define amdgpu_ps <2 x half> @global_atomic_pk_add_v2f16_off4(ptr addrspace(1) %ptr, <2 x half> %data) { +main_body: + %p = getelementptr <2 x half>, ptr addrspace(1) %ptr, i64 1 + %ret = call <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1.v2f16(ptr addrspace(1) %p, <2 x half> %data) + ret <2 x half> %ret +} + +; GFX90A-LABEL: {{^}}global_atomic_pk_add_v2f16_offneg4: +; GFX90A: global_atomic_pk_add_f16 v0, v[0:1], v2, off offset:-4 glc +define amdgpu_ps <2 x half> @global_atomic_pk_add_v2f16_offneg4(ptr addrspace(1) %ptr, <2 x half> %data) { +main_body: + %p = getelementptr <2 x half>, ptr addrspace(1) %ptr, i64 -1 + %ret = call <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1.v2f16(ptr addrspace(1) %p, <2 x half> %data) + ret <2 x half> %ret +} diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.fadd.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.fadd.ll new file mode 100644 index 00000000000000..0c3ce3308dd8fe --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.fadd.ll @@ -0,0 +1,77 @@ +; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx908 -verify-machineinstrs -amdgpu-atomic-optimizer-strategy=DPP | FileCheck %s -check-prefix=GCN +; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs -amdgpu-atomic-optimizer-strategy=DPP | FileCheck %s -check-prefix=GCN + +declare float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1), float) +declare <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1.v2f16(ptr addrspace(1), <2 x half>) +declare float @llvm.amdgcn.flat.atomic.fadd.f32.p0.f32(ptr, float) + +; GCN-LABEL: {{^}}global_atomic_add_f32: +; GCN: global_atomic_add_f32 v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} +define amdgpu_kernel void @global_atomic_add_f32(ptr addrspace(1) %ptr, float %data) { +main_body: + %ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1) %ptr, float %data) + ret void +} + +; GCN-LABEL: {{^}}global_atomic_add_f32_off4: +; GCN: global_atomic_add_f32 v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} offset:4 +define amdgpu_kernel void @global_atomic_add_f32_off4(ptr addrspace(1) %ptr, float %data) { +main_body: + %p = getelementptr float, ptr addrspace(1) %ptr, i64 1 + %ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1) %p, float %data) + ret void +} + +; GCN-LABEL: {{^}}global_atomic_add_f32_offneg4: +; GCN: global_atomic_add_f32 v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} offset:-4 +define amdgpu_kernel void @global_atomic_add_f32_offneg4(ptr addrspace(1) %ptr, float %data) { +main_body: + %p = getelementptr float, ptr addrspace(1) %ptr, i64 -1 + %ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1) %p, float %data) + ret void +} + +; GCN-LABEL: {{^}}global_atomic_pk_add_v2f16: +; GCN: global_atomic_pk_add_f16 v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]$}} +define amdgpu_kernel void @global_atomic_pk_add_v2f16(ptr addrspace(1) %ptr, <2 x half> %data) { +main_body: + %ret = call <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1.v2f16(ptr addrspace(1) %ptr, <2 x half> %data) + ret void +} + +; GCN-LABEL: {{^}}global_atomic_pk_add_v2f16_off4: +; GCN: global_atomic_pk_add_f16 v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} offset:4 +define amdgpu_kernel void @global_atomic_pk_add_v2f16_off4(ptr addrspace(1) %ptr, <2 x half> %data) { +main_body: + %p = getelementptr <2 x half>, ptr addrspace(1) %ptr, i64 1 + %ret = call <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1.v2f16(ptr addrspace(1) %p, <2 x half> %data) + ret void +} + +; GCN-LABEL: {{^}}global_atomic_pk_add_v2f16_offneg4: +; GCN: global_atomic_pk_add_f16 v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} offset:-4{{$}} +define amdgpu_kernel void @global_atomic_pk_add_v2f16_offneg4(ptr addrspace(1) %ptr, <2 x half> %data) { +main_body: + %p = getelementptr <2 x half>, ptr addrspace(1) %ptr, i64 -1 + %ret = call <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1.v2f16(ptr addrspace(1) %p, <2 x half> %data) + ret void +} + +; Make sure this artificially selects with an incorrect subtarget, but +; the feature set. +; GCN-LABEL: {{^}}global_atomic_fadd_f32_wrong_subtarget: +; GCN: global_atomic_add_f32 v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]$}} +define amdgpu_kernel void @global_atomic_fadd_f32_wrong_subtarget(ptr addrspace(1) %ptr, float %data) #0 { + %ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1) %ptr, float %data) + ret void +} + +; GCN-LABEL: {{^}}flat_atomic_fadd_f32_wrong_subtarget: +; GCN: flat_atomic_add_f32 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} +define amdgpu_kernel void @flat_atomic_fadd_f32_wrong_subtarget(ptr %ptr, float %data) #1 { + %ret = call float @llvm.amdgcn.flat.atomic.fadd.f32.p0.f32(ptr %ptr, float %data) + ret void +} + +attributes #0 = { "target-cpu"="gfx803" "target-features"="+atomic-fadd-no-rtn-insts"} +attributes #1 = { "target-cpu"="gfx803" "target-features"="+flat-atomic-fadd-f32-inst"} From 5737cfd08197f1efe9433889ea5718191d258574 Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Mon, 10 Jun 2024 14:54:02 +0100 Subject: [PATCH 2/2] Renames --- ...vm.amdgcn.atomic.csub.ll => llvm.amdgcn.global.atomic.csub.ll} | 0 ...ic.fadd.gfx90a.ll => llvm.amdgcn.global.atomic.fadd.gfx90a.ll} | 0 ...vm.amdgcn.atomic.fadd.ll => llvm.amdgcn.global.atomic.fadd.ll} | 0 3 files changed, 0 insertions(+), 0 deletions(-) rename llvm/test/CodeGen/AMDGPU/{llvm.amdgcn.atomic.csub.ll => llvm.amdgcn.global.atomic.csub.ll} (100%) rename llvm/test/CodeGen/AMDGPU/{llvm.amdgcn.atomic.fadd.gfx90a.ll => llvm.amdgcn.global.atomic.fadd.gfx90a.ll} (100%) rename llvm/test/CodeGen/AMDGPU/{llvm.amdgcn.atomic.fadd.ll => llvm.amdgcn.global.atomic.fadd.ll} (100%) diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.csub.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.atomic.csub.ll similarity index 100% rename from llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.csub.ll rename to llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.atomic.csub.ll diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.fadd.gfx90a.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.atomic.fadd.gfx90a.ll similarity index 100% rename from llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.fadd.gfx90a.ll rename to llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.atomic.fadd.gfx90a.ll diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.fadd.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.atomic.fadd.ll similarity index 100% rename from llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.fadd.ll rename to llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.atomic.fadd.ll