diff --git a/libc/benchmarks/gpu/LibcGpuBenchmark.cpp b/libc/benchmarks/gpu/LibcGpuBenchmark.cpp index 28235cf137c186..f237e2ea1b9545 100644 --- a/libc/benchmarks/gpu/LibcGpuBenchmark.cpp +++ b/libc/benchmarks/gpu/LibcGpuBenchmark.cpp @@ -126,12 +126,14 @@ void print_header() { LIBC_NAMESPACE::printf("Running Suite: %-10s\n", benchmarks[0]->get_suite_name().data()); LIBC_NAMESPACE::printf("%s", RESET); - LIBC_NAMESPACE::printf( + cpp::string titles = "Benchmark | Cycles | Min | Max | " - "Iterations | Time / Iteration | Stddev | Threads |\n"); - LIBC_NAMESPACE::printf( - "---------------------------------------------------------------------" - "--------------------------------\n"); + "Iterations | Time / Iteration | Stddev | Threads |\n"; + LIBC_NAMESPACE::printf(titles.data()); + + cpp::string separator(titles.size(), '-'); + separator[titles.size() - 1] = '\n'; + LIBC_NAMESPACE::printf(separator.data()); } void Benchmark::run_benchmarks() { diff --git a/libc/benchmarks/gpu/src/math/sin_benchmark.cpp b/libc/benchmarks/gpu/src/math/sin_benchmark.cpp index e86961790b9438..0dc3eae2a24a09 100644 --- a/libc/benchmarks/gpu/src/math/sin_benchmark.cpp +++ b/libc/benchmarks/gpu/src/math/sin_benchmark.cpp @@ -48,8 +48,8 @@ BENCH(NvSinVeryLarge, LIBC_NAMESPACE::__nv_sin, 30, 1000); #endif #ifdef AMDGPU_MATH_FOUND -BENCH(AmdgpuSin, LIBC_NAMESPACE::__ocml_sin_f64, -1023, 1023); -BENCH(AmdgpuSinTwoPi, LIBC_NAMESPACE::__ocml_sin_f64, -10, 3); -BENCH(AmdgpuSinTwoPow30, LIBC_NAMESPACE::__ocml_sin_f64, 0, 30); -BENCH(AmdgpuSinVeryLarge, LIBC_NAMESPACE::__ocml_sin_f64, 30, 1000); +BENCH(AmdSin, LIBC_NAMESPACE::__ocml_sin_f64, -1023, 1023); +BENCH(AmdSinTwoPi, LIBC_NAMESPACE::__ocml_sin_f64, -10, 3); +BENCH(AmdSinTwoPow30, LIBC_NAMESPACE::__ocml_sin_f64, 0, 30); +BENCH(AmdSinVeryLarge, LIBC_NAMESPACE::__ocml_sin_f64, 30, 1000); #endif diff --git a/libc/newhdrgen/yaml_to_classes.py b/libc/newhdrgen/yaml_to_classes.py index 37a4f78ec4a7b6..3eb5e4ef2546c1 100644 --- a/libc/newhdrgen/yaml_to_classes.py +++ b/libc/newhdrgen/yaml_to_classes.py @@ -190,7 +190,15 @@ def add_function_to_yaml(yaml_file, function_details): if new_function.attributes: function_dict["attributes"] = new_function.attributes - yaml_data["functions"].append(function_dict) + insert_index = 0 + for i, func in enumerate(yaml_data["functions"]): + if func["name"] > new_function.name: + insert_index = i + break + else: + insert_index = len(yaml_data["functions"]) + + yaml_data["functions"].insert(insert_index, function_dict) class IndentYamlListDumper(yaml.Dumper): def increase_indent(self, flow=False, indentless=False): diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index 5b2214fa66c40b..9ccdbab008aec8 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -3843,10 +3843,6 @@ class TargetLowering : public TargetLoweringBase { /// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum. virtual unsigned getJumpTableEncoding() const; - virtual MVT getJumpTableRegTy(const DataLayout &DL) const { - return getPointerTy(DL); - } - virtual const MCExpr * LowerCustomJumpTableEntry(const MachineJumpTableInfo * /*MJTI*/, const MachineBasicBlock * /*MBB*/, unsigned /*uid*/, diff --git a/llvm/include/llvm/Config/llvm-config.h.cmake b/llvm/include/llvm/Config/llvm-config.h.cmake index d766445737819d..f92316023404c1 100644 --- a/llvm/include/llvm/Config/llvm-config.h.cmake +++ b/llvm/include/llvm/Config/llvm-config.h.cmake @@ -16,7 +16,7 @@ /* Indicate that this is LLVM compiled from the amd-gfx branch. */ #define LLVM_HAVE_BRANCH_AMD_GFX -#define LLVM_MAIN_REVISION 507869 +#define LLVM_MAIN_REVISION 507873 /* Define if LLVM_ENABLE_DUMP is enabled */ #cmakedefine LLVM_ENABLE_DUMP diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 37ba62911ec70b..1f4436fb3a4966 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -2977,7 +2977,7 @@ void SelectionDAGBuilder::visitJumpTable(SwitchCG::JumpTable &JT) { // Emit the code for the jump table assert(JT.SL && "Should set SDLoc for SelectionDAG!"); assert(JT.Reg != -1U && "Should lower JT Header first!"); - EVT PTy = DAG.getTargetLoweringInfo().getJumpTableRegTy(DAG.getDataLayout()); + EVT PTy = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); SDValue Index = DAG.getCopyFromReg(getControlRoot(), *JT.SL, JT.Reg, PTy); SDValue Table = DAG.getJumpTable(JT.JTI, PTy); SDValue BrJumpTable = DAG.getNode(ISD::BR_JT, *JT.SL, MVT::Other, @@ -3005,13 +3005,12 @@ void SelectionDAGBuilder::visitJumpTableHeader(SwitchCG::JumpTable &JT, // This value may be smaller or larger than the target's pointer type, and // therefore require extension or truncating. const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - SwitchOp = - DAG.getZExtOrTrunc(Sub, dl, TLI.getJumpTableRegTy(DAG.getDataLayout())); + SwitchOp = DAG.getZExtOrTrunc(Sub, dl, TLI.getPointerTy(DAG.getDataLayout())); unsigned JumpTableReg = - FuncInfo.CreateReg(TLI.getJumpTableRegTy(DAG.getDataLayout())); - SDValue CopyTo = - DAG.getCopyToReg(getControlRoot(), dl, JumpTableReg, SwitchOp); + FuncInfo.CreateReg(TLI.getPointerTy(DAG.getDataLayout())); + SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), dl, + JumpTableReg, SwitchOp); JT.Reg = JumpTableReg; if (!JTH.FallthroughUnreachable) { diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 1a905ed0510fa5..b9bac1d6fb0ec2 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -16655,9 +16655,6 @@ void SITargetLowering::emitExpandAtomicRMW(AtomicRMWInst *AI) const { // // With this expansion we produce the following code: // [...] - // br label %atomicrmw.check.shared - // - // atomicrmw.check.shared: // %is.shared = call i1 @llvm.amdgcn.is.shared(ptr %addr) // br i1 %is.shared, label %atomicrmw.shared, label %atomicrmw.check.private // @@ -16700,8 +16697,6 @@ void SITargetLowering::emitExpandAtomicRMW(AtomicRMWInst *AI) const { Function *F = BB->getParent(); BasicBlock *ExitBB = BB->splitBasicBlock(Builder.GetInsertPoint(), "atomicrmw.end"); - BasicBlock *CheckSharedBB = - BasicBlock::Create(Ctx, "atomicrmw.check.shared", F, ExitBB); BasicBlock *SharedBB = BasicBlock::Create(Ctx, "atomicrmw.shared", F, ExitBB); BasicBlock *CheckPrivateBB = BasicBlock::Create(Ctx, "atomicrmw.check.private", F, ExitBB); @@ -16728,9 +16723,6 @@ void SITargetLowering::emitExpandAtomicRMW(AtomicRMWInst *AI) const { std::prev(BB->end())->eraseFromParent(); Builder.SetInsertPoint(BB); - Builder.CreateBr(CheckSharedBB); - - Builder.SetInsertPoint(CheckSharedBB); CallInst *IsShared = Builder.CreateIntrinsic(Intrinsic::amdgcn_is_shared, {}, {Addr}, nullptr, "is.shared"); Builder.CreateCondBr(IsShared, SharedBB, CheckPrivateBB); diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp index bf647c88f00e28..516fc7339a4bf3 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -25,7 +25,6 @@ #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGNodes.h" @@ -583,7 +582,9 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM, setOperationAction(ISD::ROTR, MVT::i8, Expand); setOperationAction(ISD::BSWAP, MVT::i16, Expand); - setOperationAction(ISD::BR_JT, MVT::Other, Custom); + // Indirect branch is not supported. + // This also disables Jump Table creation. + setOperationAction(ISD::BR_JT, MVT::Other, Expand); setOperationAction(ISD::BRIND, MVT::Other, Expand); setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); @@ -944,9 +945,6 @@ const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const { MAKE_CASE(NVPTXISD::Dummy) MAKE_CASE(NVPTXISD::MUL_WIDE_SIGNED) MAKE_CASE(NVPTXISD::MUL_WIDE_UNSIGNED) - MAKE_CASE(NVPTXISD::BrxEnd) - MAKE_CASE(NVPTXISD::BrxItem) - MAKE_CASE(NVPTXISD::BrxStart) MAKE_CASE(NVPTXISD::Tex1DFloatS32) MAKE_CASE(NVPTXISD::Tex1DFloatFloat) MAKE_CASE(NVPTXISD::Tex1DFloatFloatLevel) @@ -2787,8 +2785,6 @@ NVPTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { return LowerFP_ROUND(Op, DAG); case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG); - case ISD::BR_JT: - return LowerBR_JT(Op, DAG); case ISD::VAARG: return LowerVAARG(Op, DAG); case ISD::VASTART: @@ -2814,41 +2810,6 @@ NVPTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { } } -SDValue NVPTXTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const { - SDLoc DL(Op); - SDValue Chain = Op.getOperand(0); - const auto *JT = cast(Op.getOperand(1)); - SDValue Index = Op.getOperand(2); - - unsigned JId = JT->getIndex(); - MachineJumpTableInfo *MJTI = DAG.getMachineFunction().getJumpTableInfo(); - ArrayRef MBBs = MJTI->getJumpTables()[JId].MBBs; - - SDValue IdV = DAG.getConstant(JId, DL, MVT::i32); - - // Generate BrxStart node - SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue); - Chain = DAG.getNode(NVPTXISD::BrxStart, DL, VTs, Chain, IdV); - - // Generate BrxItem nodes - assert(!MBBs.empty()); - for (MachineBasicBlock *MBB : MBBs.drop_back()) - Chain = DAG.getNode(NVPTXISD::BrxItem, DL, VTs, Chain.getValue(0), - DAG.getBasicBlock(MBB), Chain.getValue(1)); - - // Generate BrxEnd nodes - SDValue EndOps[] = {Chain.getValue(0), DAG.getBasicBlock(MBBs.back()), Index, - IdV, Chain.getValue(1)}; - SDValue BrxEnd = DAG.getNode(NVPTXISD::BrxEnd, DL, VTs, EndOps); - - return BrxEnd; -} - -// This will prevent AsmPrinter from trying to print the jump tables itself. -unsigned NVPTXTargetLowering::getJumpTableEncoding() const { - return MachineJumpTableInfo::EK_Inline; -} - // This function is almost a copy of SelectionDAG::expandVAArg(). // The only diff is that this one produces loads from local address space. SDValue NVPTXTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const { diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h index 32e6b044b0de1f..63262961b363ed 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h @@ -62,9 +62,6 @@ enum NodeType : unsigned { BFI, PRMT, DYNAMIC_STACKALLOC, - BrxStart, - BrxItem, - BrxEnd, Dummy, LoadV2 = ISD::FIRST_TARGET_MEMORY_OPCODE, @@ -583,11 +580,6 @@ class NVPTXTargetLowering : public TargetLowering { return true; } - // The default is the same as pointer type, but brx.idx only accepts i32 - MVT getJumpTableRegTy(const DataLayout &) const override { return MVT::i32; } - - unsigned getJumpTableEncoding() const override; - bool enableAggressiveFMAFusion(EVT VT) const override { return true; } // The default is to transform llvm.ctlz(x, false) (where false indicates that @@ -645,8 +637,6 @@ class NVPTXTargetLowering : public TargetLowering { SDValue LowerSelect(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td index 904574b2e1d660..6a096fa5acea7c 100644 --- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td +++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td @@ -3880,44 +3880,6 @@ def DYNAMIC_STACKALLOC64 : [(set Int64Regs:$ptr, (dyn_alloca Int64Regs:$size, (i32 timm:$align)))]>, Requires<[hasPTX<73>, hasSM<52>]>; - -// -// BRX -// - -def SDTBrxStartProfile : SDTypeProfile<0, 1, [SDTCisInt<0>]>; -def SDTBrxItemProfile : SDTypeProfile<0, 1, [SDTCisVT<0, OtherVT>]>; -def SDTBrxEndProfile : SDTypeProfile<0, 3, [SDTCisVT<0, OtherVT>, SDTCisInt<1>, SDTCisInt<2>]>; - -def brx_start : - SDNode<"NVPTXISD::BrxStart", SDTBrxStartProfile, - [SDNPHasChain, SDNPOutGlue, SDNPSideEffect]>; -def brx_item : - SDNode<"NVPTXISD::BrxItem", SDTBrxItemProfile, - [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; -def brx_end : - SDNode<"NVPTXISD::BrxEnd", SDTBrxEndProfile, - [SDNPHasChain, SDNPInGlue, SDNPSideEffect]>; - -let isTerminator = 1, isBranch = 1, isIndirectBranch = 1 in { - - def BRX_START : - NVPTXInst<(outs), (ins i32imm:$id), - "$$L_brx_$id: .branchtargets", - [(brx_start (i32 imm:$id))]>; - - def BRX_ITEM : - NVPTXInst<(outs), (ins brtarget:$target), - "\t$target,", - [(brx_item bb:$target)]>; - - def BRX_END : - NVPTXInst<(outs), (ins brtarget:$target, Int32Regs:$val, i32imm:$id), - "\t$target;\n\tbrx.idx \t$val, $$L_brx_$id;", - [(brx_end bb:$target, (i32 Int32Regs:$val), (i32 imm:$id))]>; -} - - include "NVPTXIntrinsics.td" //----------------------------------- diff --git a/llvm/test/CodeGen/AMDGPU/atomicrmw-expand.ll b/llvm/test/CodeGen/AMDGPU/atomicrmw-expand.ll index 8bf7a1cc42f642..4f0bc512565d13 100644 --- a/llvm/test/CodeGen/AMDGPU/atomicrmw-expand.ll +++ b/llvm/test/CodeGen/AMDGPU/atomicrmw-expand.ll @@ -222,7 +222,7 @@ define float @syncscope_workgroup_rtn(ptr %addr, float %val) #0 { define void @syncscope_workgroup_nortn(ptr %addr, float %val) #0 { ; GFX908-LABEL: syncscope_workgroup_nortn: -; GFX908: ; %bb.0: ; %atomicrmw.check.shared +; GFX908: ; %bb.0: ; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX908-NEXT: s_mov_b64 s[4:5], src_shared_base ; GFX908-NEXT: v_cmp_ne_u32_e32 vcc, s5, v1 @@ -272,7 +272,7 @@ define void @syncscope_workgroup_nortn(ptr %addr, float %val) #0 { ; GFX908-NEXT: s_setpc_b64 s[30:31] ; ; GFX90A-LABEL: syncscope_workgroup_nortn: -; GFX90A: ; %bb.0: ; %atomicrmw.check.shared +; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: s_mov_b64 s[4:5], src_shared_base ; GFX90A-NEXT: v_cmp_ne_u32_e32 vcc, s5, v1 diff --git a/llvm/test/CodeGen/AMDGPU/flat-atomicrmw-fadd.ll b/llvm/test/CodeGen/AMDGPU/flat-atomicrmw-fadd.ll index 896b85ea14da11..422c8a0be23b49 100644 --- a/llvm/test/CodeGen/AMDGPU/flat-atomicrmw-fadd.ll +++ b/llvm/test/CodeGen/AMDGPU/flat-atomicrmw-fadd.ll @@ -630,7 +630,7 @@ define void @flat_agent_atomic_fadd_noret_f32__amdgpu_no_fine_grained_memory__am ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX90A-LABEL: flat_agent_atomic_fadd_noret_f32__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode: -; GFX90A: ; %bb.0: ; %atomicrmw.check.shared +; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: s_mov_b64 s[4:5], src_shared_base ; GFX90A-NEXT: v_cmp_ne_u32_e32 vcc, s5, v1 @@ -682,7 +682,7 @@ define void @flat_agent_atomic_fadd_noret_f32__amdgpu_no_fine_grained_memory__am ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; ; GFX908-LABEL: flat_agent_atomic_fadd_noret_f32__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode: -; GFX908: ; %bb.0: ; %atomicrmw.check.shared +; GFX908: ; %bb.0: ; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX908-NEXT: s_mov_b64 s[4:5], src_shared_base ; GFX908-NEXT: v_cmp_ne_u32_e32 vcc, s5, v1 @@ -839,7 +839,7 @@ define void @flat_agent_atomic_fadd_noret_f32__offset12b_pos__amdgpu_no_fine_gra ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX90A-LABEL: flat_agent_atomic_fadd_noret_f32__offset12b_pos__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode: -; GFX90A: ; %bb.0: ; %atomicrmw.check.shared +; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: v_add_co_u32_e32 v0, vcc, 0x7fc, v0 ; GFX90A-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc @@ -893,7 +893,7 @@ define void @flat_agent_atomic_fadd_noret_f32__offset12b_pos__amdgpu_no_fine_gra ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; ; GFX908-LABEL: flat_agent_atomic_fadd_noret_f32__offset12b_pos__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode: -; GFX908: ; %bb.0: ; %atomicrmw.check.shared +; GFX908: ; %bb.0: ; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX908-NEXT: v_add_co_u32_e32 v0, vcc, 0x7fc, v0 ; GFX908-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc @@ -1062,7 +1062,7 @@ define void @flat_agent_atomic_fadd_noret_f32__offset12b_neg__amdgpu_no_fine_gra ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX90A-LABEL: flat_agent_atomic_fadd_noret_f32__offset12b_neg__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode: -; GFX90A: ; %bb.0: ; %atomicrmw.check.shared +; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff800, v0 ; GFX90A-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc @@ -1116,7 +1116,7 @@ define void @flat_agent_atomic_fadd_noret_f32__offset12b_neg__amdgpu_no_fine_gra ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; ; GFX908-LABEL: flat_agent_atomic_fadd_noret_f32__offset12b_neg__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode: -; GFX908: ; %bb.0: ; %atomicrmw.check.shared +; GFX908: ; %bb.0: ; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX908-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff800, v0 ; GFX908-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc @@ -1469,7 +1469,7 @@ define void @flat_system_atomic_fadd_noret_f32__offset12b_pos__amdgpu_no_fine_gr ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX90A-LABEL: flat_system_atomic_fadd_noret_f32__offset12b_pos__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode: -; GFX90A: ; %bb.0: ; %atomicrmw.check.shared +; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: v_add_co_u32_e32 v0, vcc, 0x7fc, v0 ; GFX90A-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc @@ -1525,7 +1525,7 @@ define void @flat_system_atomic_fadd_noret_f32__offset12b_pos__amdgpu_no_fine_gr ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; ; GFX908-LABEL: flat_system_atomic_fadd_noret_f32__offset12b_pos__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode: -; GFX908: ; %bb.0: ; %atomicrmw.check.shared +; GFX908: ; %bb.0: ; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX908-NEXT: v_add_co_u32_e32 v0, vcc, 0x7fc, v0 ; GFX908-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc @@ -2006,7 +2006,7 @@ define void @flat_agent_atomic_fadd_noret_f32___amdgpu_no_fine_grained_memory__a ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX90A-LABEL: flat_agent_atomic_fadd_noret_f32___amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode: -; GFX90A: ; %bb.0: ; %atomicrmw.check.shared +; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: v_add_co_u32_e32 v0, vcc, 0x7fc, v0 ; GFX90A-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc @@ -2060,7 +2060,7 @@ define void @flat_agent_atomic_fadd_noret_f32___amdgpu_no_fine_grained_memory__a ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; ; GFX908-LABEL: flat_agent_atomic_fadd_noret_f32___amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode: -; GFX908: ; %bb.0: ; %atomicrmw.check.shared +; GFX908: ; %bb.0: ; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX908-NEXT: v_add_co_u32_e32 v0, vcc, 0x7fc, v0 ; GFX908-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc @@ -2950,7 +2950,7 @@ define void @flat_agent_atomic_fadd_noret_f32__ftz__amdgpu_no_fine_grained_memor ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX90A-LABEL: flat_agent_atomic_fadd_noret_f32__ftz__amdgpu_no_fine_grained_memory: -; GFX90A: ; %bb.0: ; %atomicrmw.check.shared +; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: s_mov_b64 s[4:5], src_shared_base ; GFX90A-NEXT: v_cmp_ne_u32_e32 vcc, s5, v1 @@ -3002,7 +3002,7 @@ define void @flat_agent_atomic_fadd_noret_f32__ftz__amdgpu_no_fine_grained_memor ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; ; GFX908-LABEL: flat_agent_atomic_fadd_noret_f32__ftz__amdgpu_no_fine_grained_memory: -; GFX908: ; %bb.0: ; %atomicrmw.check.shared +; GFX908: ; %bb.0: ; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX908-NEXT: s_mov_b64 s[4:5], src_shared_base ; GFX908-NEXT: v_cmp_ne_u32_e32 vcc, s5, v1 @@ -3159,7 +3159,7 @@ define void @flat_agent_atomic_fadd_noret_f32__offset12b_pos__ftz__amdgpu_no_fin ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX90A-LABEL: flat_agent_atomic_fadd_noret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory: -; GFX90A: ; %bb.0: ; %atomicrmw.check.shared +; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: v_add_co_u32_e32 v0, vcc, 0x7fc, v0 ; GFX90A-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc @@ -3213,7 +3213,7 @@ define void @flat_agent_atomic_fadd_noret_f32__offset12b_pos__ftz__amdgpu_no_fin ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; ; GFX908-LABEL: flat_agent_atomic_fadd_noret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory: -; GFX908: ; %bb.0: ; %atomicrmw.check.shared +; GFX908: ; %bb.0: ; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX908-NEXT: v_add_co_u32_e32 v0, vcc, 0x7fc, v0 ; GFX908-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc @@ -3382,7 +3382,7 @@ define void @flat_agent_atomic_fadd_noret_f32__offset12b_neg__ftz__amdgpu_no_fin ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX90A-LABEL: flat_agent_atomic_fadd_noret_f32__offset12b_neg__ftz__amdgpu_no_fine_grained_memory: -; GFX90A: ; %bb.0: ; %atomicrmw.check.shared +; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff800, v0 ; GFX90A-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc @@ -3436,7 +3436,7 @@ define void @flat_agent_atomic_fadd_noret_f32__offset12b_neg__ftz__amdgpu_no_fin ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; ; GFX908-LABEL: flat_agent_atomic_fadd_noret_f32__offset12b_neg__ftz__amdgpu_no_fine_grained_memory: -; GFX908: ; %bb.0: ; %atomicrmw.check.shared +; GFX908: ; %bb.0: ; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX908-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff800, v0 ; GFX908-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc @@ -3789,7 +3789,7 @@ define void @flat_system_atomic_fadd_noret_f32__offset12b_pos__ftz__amdgpu_no_fi ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX90A-LABEL: flat_system_atomic_fadd_noret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory: -; GFX90A: ; %bb.0: ; %atomicrmw.check.shared +; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: v_add_co_u32_e32 v0, vcc, 0x7fc, v0 ; GFX90A-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc @@ -3845,7 +3845,7 @@ define void @flat_system_atomic_fadd_noret_f32__offset12b_pos__ftz__amdgpu_no_fi ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; ; GFX908-LABEL: flat_system_atomic_fadd_noret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory: -; GFX908: ; %bb.0: ; %atomicrmw.check.shared +; GFX908: ; %bb.0: ; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX908-NEXT: v_add_co_u32_e32 v0, vcc, 0x7fc, v0 ; GFX908-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc @@ -4198,7 +4198,7 @@ define void @flat_agent_atomic_fadd_noret_f32__ieee__amdgpu_no_fine_grained_memo ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX90A-LABEL: flat_agent_atomic_fadd_noret_f32__ieee__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode: -; GFX90A: ; %bb.0: ; %atomicrmw.check.shared +; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: v_add_co_u32_e32 v0, vcc, 0x7fc, v0 ; GFX90A-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc @@ -4254,7 +4254,7 @@ define void @flat_agent_atomic_fadd_noret_f32__ieee__amdgpu_no_fine_grained_memo ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; ; GFX908-LABEL: flat_agent_atomic_fadd_noret_f32__ieee__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode: -; GFX908: ; %bb.0: ; %atomicrmw.check.shared +; GFX908: ; %bb.0: ; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX908-NEXT: v_add_co_u32_e32 v0, vcc, 0x7fc, v0 ; GFX908-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc @@ -5239,7 +5239,7 @@ define void @flat_agent_atomic_fadd_noret_f32__amdgpu_no_fine_grained_memory_amd ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX90A-LABEL: flat_agent_atomic_fadd_noret_f32__amdgpu_no_fine_grained_memory_amdgpu_no_remote_memory__amdgpu_ignore_denormal_mode: -; GFX90A: ; %bb.0: ; %atomicrmw.check.shared +; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: s_mov_b64 s[4:5], src_shared_base ; GFX90A-NEXT: v_cmp_ne_u32_e32 vcc, s5, v1 @@ -5291,7 +5291,7 @@ define void @flat_agent_atomic_fadd_noret_f32__amdgpu_no_fine_grained_memory_amd ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; ; GFX908-LABEL: flat_agent_atomic_fadd_noret_f32__amdgpu_no_fine_grained_memory_amdgpu_no_remote_memory__amdgpu_ignore_denormal_mode: -; GFX908: ; %bb.0: ; %atomicrmw.check.shared +; GFX908: ; %bb.0: ; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX908-NEXT: s_mov_b64 s[4:5], src_shared_base ; GFX908-NEXT: v_cmp_ne_u32_e32 vcc, s5, v1 diff --git a/llvm/test/CodeGen/NVPTX/jump-table.ll b/llvm/test/CodeGen/NVPTX/jump-table.ll deleted file mode 100644 index 867e171a5840ae..00000000000000 --- a/llvm/test/CodeGen/NVPTX/jump-table.ll +++ /dev/null @@ -1,69 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc < %s | FileCheck %s -; RUN: %if ptxas %{ llc < %s | %ptxas-verify %} - -target triple = "nvptx64-nvidia-cuda" - -@out = addrspace(1) global i32 0, align 4 - -define void @foo(i32 %i) { -; CHECK-LABEL: foo( -; CHECK: { -; CHECK-NEXT: .reg .pred %p<2>; -; CHECK-NEXT: .reg .b32 %r<7>; -; CHECK-EMPTY: -; CHECK-NEXT: // %bb.0: // %entry -; CHECK-NEXT: ld.param.u32 %r2, [foo_param_0]; -; CHECK-NEXT: setp.gt.u32 %p1, %r2, 3; -; CHECK-NEXT: @%p1 bra $L__BB0_6; -; CHECK-NEXT: // %bb.1: // %entry -; CHECK-NEXT: $L_brx_0: .branchtargets -; CHECK-NEXT: $L__BB0_2, -; CHECK-NEXT: $L__BB0_3, -; CHECK-NEXT: $L__BB0_4, -; CHECK-NEXT: $L__BB0_5; -; CHECK-NEXT: brx.idx %r2, $L_brx_0; -; CHECK-NEXT: $L__BB0_2: // %case0 -; CHECK-NEXT: mov.b32 %r6, 0; -; CHECK-NEXT: st.global.u32 [out], %r6; -; CHECK-NEXT: bra.uni $L__BB0_6; -; CHECK-NEXT: $L__BB0_4: // %case2 -; CHECK-NEXT: mov.b32 %r4, 2; -; CHECK-NEXT: st.global.u32 [out], %r4; -; CHECK-NEXT: bra.uni $L__BB0_6; -; CHECK-NEXT: $L__BB0_5: // %case3 -; CHECK-NEXT: mov.b32 %r3, 3; -; CHECK-NEXT: st.global.u32 [out], %r3; -; CHECK-NEXT: bra.uni $L__BB0_6; -; CHECK-NEXT: $L__BB0_3: // %case1 -; CHECK-NEXT: mov.b32 %r5, 1; -; CHECK-NEXT: st.global.u32 [out], %r5; -; CHECK-NEXT: $L__BB0_6: // %end -; CHECK-NEXT: ret; -entry: - switch i32 %i, label %end [ - i32 0, label %case0 - i32 1, label %case1 - i32 2, label %case2 - i32 3, label %case3 - ] - -case0: - store i32 0, ptr addrspace(1) @out, align 4 - br label %end - -case1: - store i32 1, ptr addrspace(1) @out, align 4 - br label %end - -case2: - store i32 2, ptr addrspace(1) @out, align 4 - br label %end - -case3: - store i32 3, ptr addrspace(1) @out, align 4 - br label %end - -end: - ret void -} diff --git a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-mmra.ll b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-mmra.ll index d51e9291a6119c..78969839efcb8a 100644 --- a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-mmra.ll +++ b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-mmra.ll @@ -127,8 +127,6 @@ define i16 @test_cmpxchg_i16_global_agent_align4(ptr addrspace(1) %out, i16 %in, define void @syncscope_workgroup_nortn(ptr %addr, float %val) #0 { ; GFX90A-LABEL: define void @syncscope_workgroup_nortn( ; GFX90A-SAME: ptr [[ADDR:%.*]], float [[VAL:%.*]]) #[[ATTR1:[0-9]+]] { -; GFX90A-NEXT: br label [[ATOMICRMW_CHECK_SHARED:%.*]] -; GFX90A: atomicrmw.check.shared: ; GFX90A-NEXT: [[IS_SHARED:%.*]] = call i1 @llvm.amdgcn.is.shared(ptr [[ADDR]]) ; GFX90A-NEXT: br i1 [[IS_SHARED]], label [[ATOMICRMW_SHARED:%.*]], label [[ATOMICRMW_CHECK_PRIVATE:%.*]] ; GFX90A: atomicrmw.shared: diff --git a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-rmw-fadd-flat-specialization.ll b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-rmw-fadd-flat-specialization.ll index 70dc5b267f73b9..96cbf057b490a8 100644 --- a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-rmw-fadd-flat-specialization.ll +++ b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-rmw-fadd-flat-specialization.ll @@ -22,8 +22,6 @@ define float @syncscope_system(ptr %addr, float %val) { ; GFX908-NEXT: ret float [[TMP5]] ; ; GFX90A-LABEL: @syncscope_system( -; GFX90A-NEXT: br label [[ATOMICRMW_CHECK_SHARED:%.*]] -; GFX90A: atomicrmw.check.shared: ; GFX90A-NEXT: [[IS_SHARED:%.*]] = call i1 @llvm.amdgcn.is.shared(ptr [[ADDR:%.*]]) ; GFX90A-NEXT: br i1 [[IS_SHARED]], label [[ATOMICRMW_SHARED:%.*]], label [[ATOMICRMW_CHECK_PRIVATE:%.*]] ; GFX90A: atomicrmw.shared: @@ -36,8 +34,8 @@ define float @syncscope_system(ptr %addr, float %val) { ; GFX90A: atomicrmw.private: ; GFX90A-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[ADDR]] to ptr addrspace(5) ; GFX90A-NEXT: [[LOADED_PRIVATE:%.*]] = load float, ptr addrspace(5) [[TMP3]], align 4 -; GFX90A-NEXT: [[VAL_NEW:%.*]] = fadd float [[LOADED_PRIVATE]], [[VAL]] -; GFX90A-NEXT: store float [[VAL_NEW]], ptr addrspace(5) [[TMP3]], align 4 +; GFX90A-NEXT: [[NEW:%.*]] = fadd float [[LOADED_PRIVATE]], [[VAL]] +; GFX90A-NEXT: store float [[NEW]], ptr addrspace(5) [[TMP3]], align 4 ; GFX90A-NEXT: br label [[ATOMICRMW_PHI]] ; GFX90A: atomicrmw.global: ; GFX90A-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[ADDR]] to ptr addrspace(1) @@ -94,8 +92,6 @@ define float @syncscope_workgroup_rtn(ptr %addr, float %val) { ; GFX908-NEXT: ret float [[TMP5]] ; ; GFX90A-LABEL: @syncscope_workgroup_rtn( -; GFX90A-NEXT: br label [[ATOMICRMW_CHECK_SHARED:%.*]] -; GFX90A: atomicrmw.check.shared: ; GFX90A-NEXT: [[IS_SHARED:%.*]] = call i1 @llvm.amdgcn.is.shared(ptr [[ADDR:%.*]]) ; GFX90A-NEXT: br i1 [[IS_SHARED]], label [[ATOMICRMW_SHARED:%.*]], label [[ATOMICRMW_CHECK_PRIVATE:%.*]] ; GFX90A: atomicrmw.shared: @@ -108,8 +104,8 @@ define float @syncscope_workgroup_rtn(ptr %addr, float %val) { ; GFX90A: atomicrmw.private: ; GFX90A-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[ADDR]] to ptr addrspace(5) ; GFX90A-NEXT: [[LOADED_PRIVATE:%.*]] = load float, ptr addrspace(5) [[TMP3]], align 4 -; GFX90A-NEXT: [[VAL_NEW:%.*]] = fadd float [[LOADED_PRIVATE]], [[VAL]] -; GFX90A-NEXT: store float [[VAL_NEW]], ptr addrspace(5) [[TMP3]], align 4 +; GFX90A-NEXT: [[NEW:%.*]] = fadd float [[LOADED_PRIVATE]], [[VAL]] +; GFX90A-NEXT: store float [[NEW]], ptr addrspace(5) [[TMP3]], align 4 ; GFX90A-NEXT: br label [[ATOMICRMW_PHI]] ; GFX90A: atomicrmw.global: ; GFX90A-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[ADDR]] to ptr addrspace(1) @@ -150,8 +146,6 @@ define float @syncscope_workgroup_rtn(ptr %addr, float %val) { define void @syncscope_workgroup_nortn(ptr %addr, float %val) { ; GFX908-LABEL: @syncscope_workgroup_nortn( -; GFX908-NEXT: br label [[ATOMICRMW_CHECK_SHARED:%.*]] -; GFX908: atomicrmw.check.shared: ; GFX908-NEXT: [[IS_SHARED:%.*]] = call i1 @llvm.amdgcn.is.shared(ptr [[ADDR:%.*]]) ; GFX908-NEXT: br i1 [[IS_SHARED]], label [[ATOMICRMW_SHARED:%.*]], label [[ATOMICRMW_CHECK_PRIVATE:%.*]] ; GFX908: atomicrmw.shared: @@ -164,8 +158,8 @@ define void @syncscope_workgroup_nortn(ptr %addr, float %val) { ; GFX908: atomicrmw.private: ; GFX908-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[ADDR]] to ptr addrspace(5) ; GFX908-NEXT: [[LOADED_PRIVATE:%.*]] = load float, ptr addrspace(5) [[TMP3]], align 4 -; GFX908-NEXT: [[VAL_NEW:%.*]] = fadd float [[LOADED_PRIVATE]], [[VAL]] -; GFX908-NEXT: store float [[VAL_NEW]], ptr addrspace(5) [[TMP3]], align 4 +; GFX908-NEXT: [[NEW:%.*]] = fadd float [[LOADED_PRIVATE]], [[VAL]] +; GFX908-NEXT: store float [[NEW]], ptr addrspace(5) [[TMP3]], align 4 ; GFX908-NEXT: br label [[ATOMICRMW_PHI]] ; GFX908: atomicrmw.global: ; GFX908-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[ADDR]] to ptr addrspace(1) @@ -178,8 +172,6 @@ define void @syncscope_workgroup_nortn(ptr %addr, float %val) { ; GFX908-NEXT: ret void ; ; GFX90A-LABEL: @syncscope_workgroup_nortn( -; GFX90A-NEXT: br label [[ATOMICRMW_CHECK_SHARED:%.*]] -; GFX90A: atomicrmw.check.shared: ; GFX90A-NEXT: [[IS_SHARED:%.*]] = call i1 @llvm.amdgcn.is.shared(ptr [[ADDR:%.*]]) ; GFX90A-NEXT: br i1 [[IS_SHARED]], label [[ATOMICRMW_SHARED:%.*]], label [[ATOMICRMW_CHECK_PRIVATE:%.*]] ; GFX90A: atomicrmw.shared: @@ -192,8 +184,8 @@ define void @syncscope_workgroup_nortn(ptr %addr, float %val) { ; GFX90A: atomicrmw.private: ; GFX90A-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[ADDR]] to ptr addrspace(5) ; GFX90A-NEXT: [[LOADED_PRIVATE:%.*]] = load float, ptr addrspace(5) [[TMP3]], align 4 -; GFX90A-NEXT: [[VAL_NEW:%.*]] = fadd float [[LOADED_PRIVATE]], [[VAL]] -; GFX90A-NEXT: store float [[VAL_NEW]], ptr addrspace(5) [[TMP3]], align 4 +; GFX90A-NEXT: [[NEW:%.*]] = fadd float [[LOADED_PRIVATE]], [[VAL]] +; GFX90A-NEXT: store float [[NEW]], ptr addrspace(5) [[TMP3]], align 4 ; GFX90A-NEXT: br label [[ATOMICRMW_PHI]] ; GFX90A: atomicrmw.global: ; GFX90A-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[ADDR]] to ptr addrspace(1) diff --git a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-rmw-fadd.ll b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-rmw-fadd.ll index def9522077004f..7eaaf2ae1ec997 100644 --- a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-rmw-fadd.ll +++ b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-rmw-fadd.ll @@ -595,8 +595,6 @@ define float @test_atomicrmw_fadd_f32_flat_unsafe(ptr %ptr, float %value) #0 { ; GFX908-NEXT: ret float [[TMP5]] ; ; GFX90A-LABEL: @test_atomicrmw_fadd_f32_flat_unsafe( -; GFX90A-NEXT: br label [[ATOMICRMW_CHECK_SHARED:%.*]] -; GFX90A: atomicrmw.check.shared: ; GFX90A-NEXT: [[IS_SHARED:%.*]] = call i1 @llvm.amdgcn.is.shared(ptr [[PTR:%.*]]) ; GFX90A-NEXT: br i1 [[IS_SHARED]], label [[ATOMICRMW_SHARED:%.*]], label [[ATOMICRMW_CHECK_PRIVATE:%.*]] ; GFX90A: atomicrmw.shared: @@ -609,8 +607,8 @@ define float @test_atomicrmw_fadd_f32_flat_unsafe(ptr %ptr, float %value) #0 { ; GFX90A: atomicrmw.private: ; GFX90A-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[PTR]] to ptr addrspace(5) ; GFX90A-NEXT: [[LOADED_PRIVATE:%.*]] = load float, ptr addrspace(5) [[TMP3]], align 4 -; GFX90A-NEXT: [[VAL_NEW:%.*]] = fadd float [[LOADED_PRIVATE]], [[VALUE]] -; GFX90A-NEXT: store float [[VAL_NEW]], ptr addrspace(5) [[TMP3]], align 4 +; GFX90A-NEXT: [[NEW:%.*]] = fadd float [[LOADED_PRIVATE]], [[VALUE]] +; GFX90A-NEXT: store float [[NEW]], ptr addrspace(5) [[TMP3]], align 4 ; GFX90A-NEXT: br label [[ATOMICRMW_PHI]] ; GFX90A: atomicrmw.global: ; GFX90A-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[PTR]] to ptr addrspace(1)