From bb7143f6669345825c214b26fbe336857f4bf523 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Fri, 9 Aug 2024 00:39:12 +0400 Subject: [PATCH] AMDGPU: Avoid creating unnecessary block split in atomic expansion (#102440) This was creating a new block to insert the is.shared check, but we can just do that in the original block. --- llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 8 ---- llvm/test/CodeGen/AMDGPU/atomicrmw-expand.ll | 4 +- .../CodeGen/AMDGPU/flat-atomicrmw-fadd.ll | 44 +++++++++---------- .../AtomicExpand/AMDGPU/expand-atomic-mmra.ll | 2 - ...and-atomic-rmw-fadd-flat-specialization.ll | 24 ++++------ .../AMDGPU/expand-atomic-rmw-fadd.ll | 6 +-- 6 files changed, 34 insertions(+), 54 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 8497aa33449e99..899465d66d957b 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -16609,9 +16609,6 @@ void SITargetLowering::emitExpandAtomicRMW(AtomicRMWInst *AI) const { // // With this expansion we produce the following code: // [...] - // br label %atomicrmw.check.shared - // - // atomicrmw.check.shared: // %is.shared = call i1 @llvm.amdgcn.is.shared(ptr %addr) // br i1 %is.shared, label %atomicrmw.shared, label %atomicrmw.check.private // @@ -16654,8 +16651,6 @@ void SITargetLowering::emitExpandAtomicRMW(AtomicRMWInst *AI) const { Function *F = BB->getParent(); BasicBlock *ExitBB = BB->splitBasicBlock(Builder.GetInsertPoint(), "atomicrmw.end"); - BasicBlock *CheckSharedBB = - BasicBlock::Create(Ctx, "atomicrmw.check.shared", F, ExitBB); BasicBlock *SharedBB = BasicBlock::Create(Ctx, "atomicrmw.shared", F, ExitBB); BasicBlock *CheckPrivateBB = BasicBlock::Create(Ctx, "atomicrmw.check.private", F, ExitBB); @@ -16682,9 +16677,6 @@ void SITargetLowering::emitExpandAtomicRMW(AtomicRMWInst *AI) const { std::prev(BB->end())->eraseFromParent(); Builder.SetInsertPoint(BB); - Builder.CreateBr(CheckSharedBB); - - Builder.SetInsertPoint(CheckSharedBB); CallInst *IsShared = Builder.CreateIntrinsic(Intrinsic::amdgcn_is_shared, {}, {Addr}, nullptr, "is.shared"); Builder.CreateCondBr(IsShared, SharedBB, CheckPrivateBB); diff --git a/llvm/test/CodeGen/AMDGPU/atomicrmw-expand.ll b/llvm/test/CodeGen/AMDGPU/atomicrmw-expand.ll index 8bf7a1cc42f642..4f0bc512565d13 100644 --- a/llvm/test/CodeGen/AMDGPU/atomicrmw-expand.ll +++ b/llvm/test/CodeGen/AMDGPU/atomicrmw-expand.ll @@ -222,7 +222,7 @@ define float @syncscope_workgroup_rtn(ptr %addr, float %val) #0 { define void @syncscope_workgroup_nortn(ptr %addr, float %val) #0 { ; GFX908-LABEL: syncscope_workgroup_nortn: -; GFX908: ; %bb.0: ; %atomicrmw.check.shared +; GFX908: ; %bb.0: ; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX908-NEXT: s_mov_b64 s[4:5], src_shared_base ; GFX908-NEXT: v_cmp_ne_u32_e32 vcc, s5, v1 @@ -272,7 +272,7 @@ define void @syncscope_workgroup_nortn(ptr %addr, float %val) #0 { ; GFX908-NEXT: s_setpc_b64 s[30:31] ; ; GFX90A-LABEL: syncscope_workgroup_nortn: -; GFX90A: ; %bb.0: ; %atomicrmw.check.shared +; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: s_mov_b64 s[4:5], src_shared_base ; GFX90A-NEXT: v_cmp_ne_u32_e32 vcc, s5, v1 diff --git a/llvm/test/CodeGen/AMDGPU/flat-atomicrmw-fadd.ll b/llvm/test/CodeGen/AMDGPU/flat-atomicrmw-fadd.ll index 896b85ea14da11..422c8a0be23b49 100644 --- a/llvm/test/CodeGen/AMDGPU/flat-atomicrmw-fadd.ll +++ b/llvm/test/CodeGen/AMDGPU/flat-atomicrmw-fadd.ll @@ -630,7 +630,7 @@ define void @flat_agent_atomic_fadd_noret_f32__amdgpu_no_fine_grained_memory__am ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX90A-LABEL: flat_agent_atomic_fadd_noret_f32__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode: -; GFX90A: ; %bb.0: ; %atomicrmw.check.shared +; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: s_mov_b64 s[4:5], src_shared_base ; GFX90A-NEXT: v_cmp_ne_u32_e32 vcc, s5, v1 @@ -682,7 +682,7 @@ define void @flat_agent_atomic_fadd_noret_f32__amdgpu_no_fine_grained_memory__am ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; ; GFX908-LABEL: flat_agent_atomic_fadd_noret_f32__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode: -; GFX908: ; %bb.0: ; %atomicrmw.check.shared +; GFX908: ; %bb.0: ; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX908-NEXT: s_mov_b64 s[4:5], src_shared_base ; GFX908-NEXT: v_cmp_ne_u32_e32 vcc, s5, v1 @@ -839,7 +839,7 @@ define void @flat_agent_atomic_fadd_noret_f32__offset12b_pos__amdgpu_no_fine_gra ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX90A-LABEL: flat_agent_atomic_fadd_noret_f32__offset12b_pos__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode: -; GFX90A: ; %bb.0: ; %atomicrmw.check.shared +; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: v_add_co_u32_e32 v0, vcc, 0x7fc, v0 ; GFX90A-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc @@ -893,7 +893,7 @@ define void @flat_agent_atomic_fadd_noret_f32__offset12b_pos__amdgpu_no_fine_gra ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; ; GFX908-LABEL: flat_agent_atomic_fadd_noret_f32__offset12b_pos__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode: -; GFX908: ; %bb.0: ; %atomicrmw.check.shared +; GFX908: ; %bb.0: ; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX908-NEXT: v_add_co_u32_e32 v0, vcc, 0x7fc, v0 ; GFX908-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc @@ -1062,7 +1062,7 @@ define void @flat_agent_atomic_fadd_noret_f32__offset12b_neg__amdgpu_no_fine_gra ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX90A-LABEL: flat_agent_atomic_fadd_noret_f32__offset12b_neg__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode: -; GFX90A: ; %bb.0: ; %atomicrmw.check.shared +; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff800, v0 ; GFX90A-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc @@ -1116,7 +1116,7 @@ define void @flat_agent_atomic_fadd_noret_f32__offset12b_neg__amdgpu_no_fine_gra ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; ; GFX908-LABEL: flat_agent_atomic_fadd_noret_f32__offset12b_neg__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode: -; GFX908: ; %bb.0: ; %atomicrmw.check.shared +; GFX908: ; %bb.0: ; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX908-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff800, v0 ; GFX908-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc @@ -1469,7 +1469,7 @@ define void @flat_system_atomic_fadd_noret_f32__offset12b_pos__amdgpu_no_fine_gr ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX90A-LABEL: flat_system_atomic_fadd_noret_f32__offset12b_pos__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode: -; GFX90A: ; %bb.0: ; %atomicrmw.check.shared +; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: v_add_co_u32_e32 v0, vcc, 0x7fc, v0 ; GFX90A-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc @@ -1525,7 +1525,7 @@ define void @flat_system_atomic_fadd_noret_f32__offset12b_pos__amdgpu_no_fine_gr ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; ; GFX908-LABEL: flat_system_atomic_fadd_noret_f32__offset12b_pos__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode: -; GFX908: ; %bb.0: ; %atomicrmw.check.shared +; GFX908: ; %bb.0: ; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX908-NEXT: v_add_co_u32_e32 v0, vcc, 0x7fc, v0 ; GFX908-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc @@ -2006,7 +2006,7 @@ define void @flat_agent_atomic_fadd_noret_f32___amdgpu_no_fine_grained_memory__a ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX90A-LABEL: flat_agent_atomic_fadd_noret_f32___amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode: -; GFX90A: ; %bb.0: ; %atomicrmw.check.shared +; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: v_add_co_u32_e32 v0, vcc, 0x7fc, v0 ; GFX90A-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc @@ -2060,7 +2060,7 @@ define void @flat_agent_atomic_fadd_noret_f32___amdgpu_no_fine_grained_memory__a ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; ; GFX908-LABEL: flat_agent_atomic_fadd_noret_f32___amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode: -; GFX908: ; %bb.0: ; %atomicrmw.check.shared +; GFX908: ; %bb.0: ; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX908-NEXT: v_add_co_u32_e32 v0, vcc, 0x7fc, v0 ; GFX908-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc @@ -2950,7 +2950,7 @@ define void @flat_agent_atomic_fadd_noret_f32__ftz__amdgpu_no_fine_grained_memor ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX90A-LABEL: flat_agent_atomic_fadd_noret_f32__ftz__amdgpu_no_fine_grained_memory: -; GFX90A: ; %bb.0: ; %atomicrmw.check.shared +; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: s_mov_b64 s[4:5], src_shared_base ; GFX90A-NEXT: v_cmp_ne_u32_e32 vcc, s5, v1 @@ -3002,7 +3002,7 @@ define void @flat_agent_atomic_fadd_noret_f32__ftz__amdgpu_no_fine_grained_memor ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; ; GFX908-LABEL: flat_agent_atomic_fadd_noret_f32__ftz__amdgpu_no_fine_grained_memory: -; GFX908: ; %bb.0: ; %atomicrmw.check.shared +; GFX908: ; %bb.0: ; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX908-NEXT: s_mov_b64 s[4:5], src_shared_base ; GFX908-NEXT: v_cmp_ne_u32_e32 vcc, s5, v1 @@ -3159,7 +3159,7 @@ define void @flat_agent_atomic_fadd_noret_f32__offset12b_pos__ftz__amdgpu_no_fin ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX90A-LABEL: flat_agent_atomic_fadd_noret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory: -; GFX90A: ; %bb.0: ; %atomicrmw.check.shared +; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: v_add_co_u32_e32 v0, vcc, 0x7fc, v0 ; GFX90A-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc @@ -3213,7 +3213,7 @@ define void @flat_agent_atomic_fadd_noret_f32__offset12b_pos__ftz__amdgpu_no_fin ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; ; GFX908-LABEL: flat_agent_atomic_fadd_noret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory: -; GFX908: ; %bb.0: ; %atomicrmw.check.shared +; GFX908: ; %bb.0: ; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX908-NEXT: v_add_co_u32_e32 v0, vcc, 0x7fc, v0 ; GFX908-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc @@ -3382,7 +3382,7 @@ define void @flat_agent_atomic_fadd_noret_f32__offset12b_neg__ftz__amdgpu_no_fin ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX90A-LABEL: flat_agent_atomic_fadd_noret_f32__offset12b_neg__ftz__amdgpu_no_fine_grained_memory: -; GFX90A: ; %bb.0: ; %atomicrmw.check.shared +; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff800, v0 ; GFX90A-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc @@ -3436,7 +3436,7 @@ define void @flat_agent_atomic_fadd_noret_f32__offset12b_neg__ftz__amdgpu_no_fin ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; ; GFX908-LABEL: flat_agent_atomic_fadd_noret_f32__offset12b_neg__ftz__amdgpu_no_fine_grained_memory: -; GFX908: ; %bb.0: ; %atomicrmw.check.shared +; GFX908: ; %bb.0: ; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX908-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff800, v0 ; GFX908-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc @@ -3789,7 +3789,7 @@ define void @flat_system_atomic_fadd_noret_f32__offset12b_pos__ftz__amdgpu_no_fi ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX90A-LABEL: flat_system_atomic_fadd_noret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory: -; GFX90A: ; %bb.0: ; %atomicrmw.check.shared +; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: v_add_co_u32_e32 v0, vcc, 0x7fc, v0 ; GFX90A-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc @@ -3845,7 +3845,7 @@ define void @flat_system_atomic_fadd_noret_f32__offset12b_pos__ftz__amdgpu_no_fi ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; ; GFX908-LABEL: flat_system_atomic_fadd_noret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory: -; GFX908: ; %bb.0: ; %atomicrmw.check.shared +; GFX908: ; %bb.0: ; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX908-NEXT: v_add_co_u32_e32 v0, vcc, 0x7fc, v0 ; GFX908-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc @@ -4198,7 +4198,7 @@ define void @flat_agent_atomic_fadd_noret_f32__ieee__amdgpu_no_fine_grained_memo ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX90A-LABEL: flat_agent_atomic_fadd_noret_f32__ieee__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode: -; GFX90A: ; %bb.0: ; %atomicrmw.check.shared +; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: v_add_co_u32_e32 v0, vcc, 0x7fc, v0 ; GFX90A-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc @@ -4254,7 +4254,7 @@ define void @flat_agent_atomic_fadd_noret_f32__ieee__amdgpu_no_fine_grained_memo ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; ; GFX908-LABEL: flat_agent_atomic_fadd_noret_f32__ieee__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode: -; GFX908: ; %bb.0: ; %atomicrmw.check.shared +; GFX908: ; %bb.0: ; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX908-NEXT: v_add_co_u32_e32 v0, vcc, 0x7fc, v0 ; GFX908-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc @@ -5239,7 +5239,7 @@ define void @flat_agent_atomic_fadd_noret_f32__amdgpu_no_fine_grained_memory_amd ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX90A-LABEL: flat_agent_atomic_fadd_noret_f32__amdgpu_no_fine_grained_memory_amdgpu_no_remote_memory__amdgpu_ignore_denormal_mode: -; GFX90A: ; %bb.0: ; %atomicrmw.check.shared +; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: s_mov_b64 s[4:5], src_shared_base ; GFX90A-NEXT: v_cmp_ne_u32_e32 vcc, s5, v1 @@ -5291,7 +5291,7 @@ define void @flat_agent_atomic_fadd_noret_f32__amdgpu_no_fine_grained_memory_amd ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; ; GFX908-LABEL: flat_agent_atomic_fadd_noret_f32__amdgpu_no_fine_grained_memory_amdgpu_no_remote_memory__amdgpu_ignore_denormal_mode: -; GFX908: ; %bb.0: ; %atomicrmw.check.shared +; GFX908: ; %bb.0: ; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX908-NEXT: s_mov_b64 s[4:5], src_shared_base ; GFX908-NEXT: v_cmp_ne_u32_e32 vcc, s5, v1 diff --git a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-mmra.ll b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-mmra.ll index d51e9291a6119c..78969839efcb8a 100644 --- a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-mmra.ll +++ b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-mmra.ll @@ -127,8 +127,6 @@ define i16 @test_cmpxchg_i16_global_agent_align4(ptr addrspace(1) %out, i16 %in, define void @syncscope_workgroup_nortn(ptr %addr, float %val) #0 { ; GFX90A-LABEL: define void @syncscope_workgroup_nortn( ; GFX90A-SAME: ptr [[ADDR:%.*]], float [[VAL:%.*]]) #[[ATTR1:[0-9]+]] { -; GFX90A-NEXT: br label [[ATOMICRMW_CHECK_SHARED:%.*]] -; GFX90A: atomicrmw.check.shared: ; GFX90A-NEXT: [[IS_SHARED:%.*]] = call i1 @llvm.amdgcn.is.shared(ptr [[ADDR]]) ; GFX90A-NEXT: br i1 [[IS_SHARED]], label [[ATOMICRMW_SHARED:%.*]], label [[ATOMICRMW_CHECK_PRIVATE:%.*]] ; GFX90A: atomicrmw.shared: diff --git a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-rmw-fadd-flat-specialization.ll b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-rmw-fadd-flat-specialization.ll index 70dc5b267f73b9..96cbf057b490a8 100644 --- a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-rmw-fadd-flat-specialization.ll +++ b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-rmw-fadd-flat-specialization.ll @@ -22,8 +22,6 @@ define float @syncscope_system(ptr %addr, float %val) { ; GFX908-NEXT: ret float [[TMP5]] ; ; GFX90A-LABEL: @syncscope_system( -; GFX90A-NEXT: br label [[ATOMICRMW_CHECK_SHARED:%.*]] -; GFX90A: atomicrmw.check.shared: ; GFX90A-NEXT: [[IS_SHARED:%.*]] = call i1 @llvm.amdgcn.is.shared(ptr [[ADDR:%.*]]) ; GFX90A-NEXT: br i1 [[IS_SHARED]], label [[ATOMICRMW_SHARED:%.*]], label [[ATOMICRMW_CHECK_PRIVATE:%.*]] ; GFX90A: atomicrmw.shared: @@ -36,8 +34,8 @@ define float @syncscope_system(ptr %addr, float %val) { ; GFX90A: atomicrmw.private: ; GFX90A-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[ADDR]] to ptr addrspace(5) ; GFX90A-NEXT: [[LOADED_PRIVATE:%.*]] = load float, ptr addrspace(5) [[TMP3]], align 4 -; GFX90A-NEXT: [[VAL_NEW:%.*]] = fadd float [[LOADED_PRIVATE]], [[VAL]] -; GFX90A-NEXT: store float [[VAL_NEW]], ptr addrspace(5) [[TMP3]], align 4 +; GFX90A-NEXT: [[NEW:%.*]] = fadd float [[LOADED_PRIVATE]], [[VAL]] +; GFX90A-NEXT: store float [[NEW]], ptr addrspace(5) [[TMP3]], align 4 ; GFX90A-NEXT: br label [[ATOMICRMW_PHI]] ; GFX90A: atomicrmw.global: ; GFX90A-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[ADDR]] to ptr addrspace(1) @@ -94,8 +92,6 @@ define float @syncscope_workgroup_rtn(ptr %addr, float %val) { ; GFX908-NEXT: ret float [[TMP5]] ; ; GFX90A-LABEL: @syncscope_workgroup_rtn( -; GFX90A-NEXT: br label [[ATOMICRMW_CHECK_SHARED:%.*]] -; GFX90A: atomicrmw.check.shared: ; GFX90A-NEXT: [[IS_SHARED:%.*]] = call i1 @llvm.amdgcn.is.shared(ptr [[ADDR:%.*]]) ; GFX90A-NEXT: br i1 [[IS_SHARED]], label [[ATOMICRMW_SHARED:%.*]], label [[ATOMICRMW_CHECK_PRIVATE:%.*]] ; GFX90A: atomicrmw.shared: @@ -108,8 +104,8 @@ define float @syncscope_workgroup_rtn(ptr %addr, float %val) { ; GFX90A: atomicrmw.private: ; GFX90A-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[ADDR]] to ptr addrspace(5) ; GFX90A-NEXT: [[LOADED_PRIVATE:%.*]] = load float, ptr addrspace(5) [[TMP3]], align 4 -; GFX90A-NEXT: [[VAL_NEW:%.*]] = fadd float [[LOADED_PRIVATE]], [[VAL]] -; GFX90A-NEXT: store float [[VAL_NEW]], ptr addrspace(5) [[TMP3]], align 4 +; GFX90A-NEXT: [[NEW:%.*]] = fadd float [[LOADED_PRIVATE]], [[VAL]] +; GFX90A-NEXT: store float [[NEW]], ptr addrspace(5) [[TMP3]], align 4 ; GFX90A-NEXT: br label [[ATOMICRMW_PHI]] ; GFX90A: atomicrmw.global: ; GFX90A-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[ADDR]] to ptr addrspace(1) @@ -150,8 +146,6 @@ define float @syncscope_workgroup_rtn(ptr %addr, float %val) { define void @syncscope_workgroup_nortn(ptr %addr, float %val) { ; GFX908-LABEL: @syncscope_workgroup_nortn( -; GFX908-NEXT: br label [[ATOMICRMW_CHECK_SHARED:%.*]] -; GFX908: atomicrmw.check.shared: ; GFX908-NEXT: [[IS_SHARED:%.*]] = call i1 @llvm.amdgcn.is.shared(ptr [[ADDR:%.*]]) ; GFX908-NEXT: br i1 [[IS_SHARED]], label [[ATOMICRMW_SHARED:%.*]], label [[ATOMICRMW_CHECK_PRIVATE:%.*]] ; GFX908: atomicrmw.shared: @@ -164,8 +158,8 @@ define void @syncscope_workgroup_nortn(ptr %addr, float %val) { ; GFX908: atomicrmw.private: ; GFX908-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[ADDR]] to ptr addrspace(5) ; GFX908-NEXT: [[LOADED_PRIVATE:%.*]] = load float, ptr addrspace(5) [[TMP3]], align 4 -; GFX908-NEXT: [[VAL_NEW:%.*]] = fadd float [[LOADED_PRIVATE]], [[VAL]] -; GFX908-NEXT: store float [[VAL_NEW]], ptr addrspace(5) [[TMP3]], align 4 +; GFX908-NEXT: [[NEW:%.*]] = fadd float [[LOADED_PRIVATE]], [[VAL]] +; GFX908-NEXT: store float [[NEW]], ptr addrspace(5) [[TMP3]], align 4 ; GFX908-NEXT: br label [[ATOMICRMW_PHI]] ; GFX908: atomicrmw.global: ; GFX908-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[ADDR]] to ptr addrspace(1) @@ -178,8 +172,6 @@ define void @syncscope_workgroup_nortn(ptr %addr, float %val) { ; GFX908-NEXT: ret void ; ; GFX90A-LABEL: @syncscope_workgroup_nortn( -; GFX90A-NEXT: br label [[ATOMICRMW_CHECK_SHARED:%.*]] -; GFX90A: atomicrmw.check.shared: ; GFX90A-NEXT: [[IS_SHARED:%.*]] = call i1 @llvm.amdgcn.is.shared(ptr [[ADDR:%.*]]) ; GFX90A-NEXT: br i1 [[IS_SHARED]], label [[ATOMICRMW_SHARED:%.*]], label [[ATOMICRMW_CHECK_PRIVATE:%.*]] ; GFX90A: atomicrmw.shared: @@ -192,8 +184,8 @@ define void @syncscope_workgroup_nortn(ptr %addr, float %val) { ; GFX90A: atomicrmw.private: ; GFX90A-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[ADDR]] to ptr addrspace(5) ; GFX90A-NEXT: [[LOADED_PRIVATE:%.*]] = load float, ptr addrspace(5) [[TMP3]], align 4 -; GFX90A-NEXT: [[VAL_NEW:%.*]] = fadd float [[LOADED_PRIVATE]], [[VAL]] -; GFX90A-NEXT: store float [[VAL_NEW]], ptr addrspace(5) [[TMP3]], align 4 +; GFX90A-NEXT: [[NEW:%.*]] = fadd float [[LOADED_PRIVATE]], [[VAL]] +; GFX90A-NEXT: store float [[NEW]], ptr addrspace(5) [[TMP3]], align 4 ; GFX90A-NEXT: br label [[ATOMICRMW_PHI]] ; GFX90A: atomicrmw.global: ; GFX90A-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[ADDR]] to ptr addrspace(1) diff --git a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-rmw-fadd.ll b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-rmw-fadd.ll index def9522077004f..7eaaf2ae1ec997 100644 --- a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-rmw-fadd.ll +++ b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-rmw-fadd.ll @@ -595,8 +595,6 @@ define float @test_atomicrmw_fadd_f32_flat_unsafe(ptr %ptr, float %value) #0 { ; GFX908-NEXT: ret float [[TMP5]] ; ; GFX90A-LABEL: @test_atomicrmw_fadd_f32_flat_unsafe( -; GFX90A-NEXT: br label [[ATOMICRMW_CHECK_SHARED:%.*]] -; GFX90A: atomicrmw.check.shared: ; GFX90A-NEXT: [[IS_SHARED:%.*]] = call i1 @llvm.amdgcn.is.shared(ptr [[PTR:%.*]]) ; GFX90A-NEXT: br i1 [[IS_SHARED]], label [[ATOMICRMW_SHARED:%.*]], label [[ATOMICRMW_CHECK_PRIVATE:%.*]] ; GFX90A: atomicrmw.shared: @@ -609,8 +607,8 @@ define float @test_atomicrmw_fadd_f32_flat_unsafe(ptr %ptr, float %value) #0 { ; GFX90A: atomicrmw.private: ; GFX90A-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[PTR]] to ptr addrspace(5) ; GFX90A-NEXT: [[LOADED_PRIVATE:%.*]] = load float, ptr addrspace(5) [[TMP3]], align 4 -; GFX90A-NEXT: [[VAL_NEW:%.*]] = fadd float [[LOADED_PRIVATE]], [[VALUE]] -; GFX90A-NEXT: store float [[VAL_NEW]], ptr addrspace(5) [[TMP3]], align 4 +; GFX90A-NEXT: [[NEW:%.*]] = fadd float [[LOADED_PRIVATE]], [[VALUE]] +; GFX90A-NEXT: store float [[NEW]], ptr addrspace(5) [[TMP3]], align 4 ; GFX90A-NEXT: br label [[ATOMICRMW_PHI]] ; GFX90A: atomicrmw.global: ; GFX90A-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[PTR]] to ptr addrspace(1)