From 15a34d5ae6aace9b6e68d857596099915363985d Mon Sep 17 00:00:00 2001 From: Vang Thao Date: Wed, 17 Apr 2024 21:06:43 -0400 Subject: [PATCH 1/5] [AMDGPU] Fix negative immediate offset for unbuffered smem loads For unbuffered smem loads, it is illegal for the immediate offset to be negative if the resulting IOFFSET + (SGPR[Offset] or M0 or zero) is negative. New PR of https://github.com/llvm/llvm-project/pull/79553. --- llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 55 ++++- llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h | 12 +- .../AMDGPU/AMDGPUInstructionSelector.cpp | 19 +- llvm/lib/Target/AMDGPU/GCNSubtarget.h | 4 + .../GlobalISel/inst-select-load-constant.mir | 20 +- llvm/test/CodeGen/AMDGPU/GlobalISel/smrd.ll | 6 +- .../AMDGPU/cgp-addressing-modes-smem.ll | 12 +- .../AMDGPU/gfx12_scalar_subword_loads.ll | 68 ++++-- llvm/test/CodeGen/AMDGPU/global-saddr-load.ll | 204 +++++++++++++----- llvm/test/CodeGen/AMDGPU/llvm.prefetch.ll | 38 +++- llvm/test/CodeGen/AMDGPU/smrd.ll | 6 +- 11 files changed, 345 insertions(+), 99 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index bba7682cd7a0d1..bf65244255f341 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -1984,8 +1984,10 @@ bool AMDGPUDAGToDAGISel::SelectScratchSVAddr(SDNode *N, SDValue Addr, // not null) offset. If Imm32Only is true, match only 32-bit immediate // offsets available on CI. bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode, - SDValue *SOffset, SDValue *Offset, - bool Imm32Only, bool IsBuffer) const { + SDValue *SBase, SDValue *SOffset, + SDValue *Offset, bool Imm32Only, + bool IsBuffer, + bool HasSOffset) const { assert((!SOffset || !Offset) && "Cannot match both soffset and offset at the same time!"); @@ -2016,7 +2018,14 @@ bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode, AMDGPU::getSMRDEncodedOffset(*Subtarget, ByteOffset, IsBuffer); if (EncodedOffset && Offset && !Imm32Only) { *Offset = CurDAG->getTargetConstant(*EncodedOffset, SL, MVT::i32); - return true; + if (EncodedOffset >= 0 || IsBuffer || HasSOffset || + !Subtarget->hasSignedSMRDImmOffset()) + return true; + // For unbuffered smem loads, it is illegal for the Immediate Offset to be + // negative if the resulting (Offset + (M0 or SOffset or zero) is negative. + // Handle the case where the Immediate Offset is negative and there is no + // SOffset. + return false; } // SGPR and literal offsets are unsigned. @@ -2072,13 +2081,34 @@ SDValue AMDGPUDAGToDAGISel::Expand32BitAddress(SDValue Addr) const { // true, match only 32-bit immediate offsets available on CI. bool AMDGPUDAGToDAGISel::SelectSMRDBaseOffset(SDValue Addr, SDValue &SBase, SDValue *SOffset, SDValue *Offset, - bool Imm32Only, - bool IsBuffer) const { + bool Imm32Only, bool IsBuffer, + bool HasSOffset) const { if (SOffset && Offset) { assert(!Imm32Only && !IsBuffer); SDValue B; - return SelectSMRDBaseOffset(Addr, B, nullptr, Offset) && - SelectSMRDBaseOffset(B, SBase, SOffset, nullptr); + if (!SelectSMRDBaseOffset(Addr, B, nullptr, Offset, false, false, true)) + return false; + + if (!SelectSMRDBaseOffset(B, SBase, SOffset, nullptr, false, false, true)) + return false; + + if (IsBuffer || Imm32Only || !Subtarget->hasSignedSMRDImmOffset()) + return true; + + // For unbuffered smem loads, it is illegal for the Immediate Offset to be + // negative if the resulting (Offset + (M0 or SOffset or zero) is negative. + // Handle the case where the Immediate Offset + SOffset is negative. + if (ConstantSDNode *C = dyn_cast(*Offset)) { + int64_t ByteOffset = C->getSExtValue(); + if (ByteOffset >= 0) + return true; + + KnownBits SKnown = CurDAG->computeKnownBits(*SOffset); + if (ByteOffset + SKnown.getMinValue().getSExtValue() < 0) + return false; + } + + return true; } // A 32-bit (address + offset) should not cause unsigned 32-bit integer @@ -2097,11 +2127,14 @@ bool AMDGPUDAGToDAGISel::SelectSMRDBaseOffset(SDValue Addr, SDValue &SBase, } if (!N0 || !N1) return false; - if (SelectSMRDOffset(N1, SOffset, Offset, Imm32Only, IsBuffer)) { + + if (SelectSMRDOffset(N1, &N0, SOffset, Offset, Imm32Only, IsBuffer, + HasSOffset)) { SBase = N0; return true; } - if (SelectSMRDOffset(N0, SOffset, Offset, Imm32Only, IsBuffer)) { + if (SelectSMRDOffset(N0, &N1, SOffset, Offset, Imm32Only, IsBuffer, + HasSOffset)) { SBase = N1; return true; } @@ -2149,14 +2182,14 @@ bool AMDGPUDAGToDAGISel::SelectSMRDSgprImm(SDValue Addr, SDValue &SBase, } bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm(SDValue N, SDValue &Offset) const { - return SelectSMRDOffset(N, /* SOffset */ nullptr, &Offset, + return SelectSMRDOffset(N, /*SBase=*/nullptr, /* SOffset */ nullptr, &Offset, /* Imm32Only */ false, /* IsBuffer */ true); } bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(SDValue N, SDValue &Offset) const { assert(Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS); - return SelectSMRDOffset(N, /* SOffset */ nullptr, &Offset, + return SelectSMRDOffset(N, /*SBase=*/nullptr, /* SOffset */ nullptr, &Offset, /* Imm32Only */ true, /* IsBuffer */ true); } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h index f987b747c0e21b..b1ad16af3c35a2 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h @@ -183,13 +183,15 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel { bool SelectScratchSVAddr(SDNode *N, SDValue Addr, SDValue &VAddr, SDValue &SAddr, SDValue &Offset) const; - bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue *SOffset, - SDValue *Offset, bool Imm32Only = false, - bool IsBuffer = false) const; + bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue *SBase, + SDValue *SOffset, SDValue *Offset, + bool Imm32Only = false, bool IsBuffer = false, + bool HasSOffset = false) const; SDValue Expand32BitAddress(SDValue Addr) const; bool SelectSMRDBaseOffset(SDValue Addr, SDValue &SBase, SDValue *SOffset, SDValue *Offset, bool Imm32Only = false, - bool IsBuffer = false) const; + bool IsBuffer = false, + bool HasSOffset = false) const; bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue *SOffset, SDValue *Offset, bool Imm32Only = false) const; bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const; @@ -201,6 +203,8 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel { bool SelectSMRDBufferImm32(SDValue N, SDValue &Offset) const; bool SelectSMRDBufferSgprImm(SDValue N, SDValue &SOffset, SDValue &Offset) const; + bool SelectSMRDPrefetchImm(SDValue Addr, SDValue &SBase, + SDValue &Offset) const; bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const; bool SelectVOP3ModsImpl(SDValue In, SDValue &Src, unsigned &SrcMods, diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index e13c13913d4e82..10dda8a9e1eaac 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -4211,6 +4211,17 @@ bool AMDGPUInstructionSelector::selectSmrdOffset(MachineOperand &Root, Base = GEPI2.SgprParts[0]; *SOffset = OffsetReg; *Offset = *EncodedImm; + if (*Offset >= 0 || !STI.hasSignedSMRDImmOffset()) + return true; + + // For unbuffered smem loads, it is illegal for the Immediate Offset + // to be negative if the resulting (Offset + (M0 or SOffset or zero) + // is negative. Handle the case where the Immediate Offset + SOffset + // is negative. + auto SKnown = KB->getKnownBits(*SOffset); + if (*Offset + SKnown.getMinValue().getSExtValue() < 0) + return false; + return true; } } @@ -4221,7 +4232,13 @@ bool AMDGPUInstructionSelector::selectSmrdOffset(MachineOperand &Root, if (Offset && GEPI.SgprParts.size() == 1 && EncodedImm) { Base = GEPI.SgprParts[0]; *Offset = *EncodedImm; - return true; + if (*Offset >= 0 || !STI.hasSignedSMRDImmOffset()) + return true; + // For unbuffered smem loads, it is illegal for the Immediate Offset to be + // negative if the resulting (Offset + (M0 or SOffset or zero is negative. + // Handle the case where the Immediate Offset is negative and there is no + // SOffset. + return false; } // SGPR offset is unsigned. diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h index 8a4a46ce50d1d7..25c24c924f0a20 100644 --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -1315,6 +1315,10 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, // of sign-extending. bool hasGetPCZeroExtension() const { return GFX12Insts; } + // \returns true if the target supports signed immediate offset for SMRD + // instructions. + bool hasSignedSMRDImmOffset() const { return getGeneration() >= GFX9; } + /// \returns SGPR allocation granularity supported by the subtarget. unsigned getSGPRAllocGranule() const { return AMDGPU::IsaInfo::getSGPRAllocGranule(this); diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-constant.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-constant.mir index c44477273dad09..504f7697a0fcca 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-constant.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-constant.mir @@ -1234,7 +1234,15 @@ body: | ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], -1, 0 :: (load (s32), addrspace 4) + ; GFX10-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -1 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub0 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub1 + ; GFX10-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc + ; GFX10-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc + ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 + ; GFX10-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[REG_SEQUENCE]], 0, 0 :: (load (s32), addrspace 4) ; GFX10-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] %0:sgpr(p4) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = G_CONSTANT i64 -1 @@ -1304,7 +1312,15 @@ body: | ; GFX10: liveins: $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GFX10-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], -524288, 0 :: (load (s32), addrspace 4) + ; GFX10-NEXT: [[S_MOV_B:%[0-9]+]]:sreg_64 = S_MOV_B64_IMM_PSEUDO -524288 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub0 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B]].sub1 + ; GFX10-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc + ; GFX10-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def dead $scc, implicit $scc + ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1 + ; GFX10-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[REG_SEQUENCE]], 0, 0 :: (load (s32), addrspace 4) ; GFX10-NEXT: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]] %0:sgpr(p4) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = G_CONSTANT i64 -524288 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/smrd.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/smrd.ll index 139f82b3dc9f7a..9ee0acf2aa2db6 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/smrd.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/smrd.ll @@ -88,11 +88,13 @@ entry: ret void } -; GFX9_10 can use a signed immediate byte offset +; GFX9+ can use a signed immediate byte offset but not without sgpr[offset] ; GCN-LABEL: {{^}}smrd6: ; SICIVI: s_add_u32 s{{[0-9]}}, s{{[0-9]}}, -4 ; SICIVI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x0 -; GFX9_10: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], -0x4 +; GFX9_10: s_add_u32 s2, s2, -4 +; GFX9_10: s_addc_u32 s3, s3, -1 +; GFX9_10: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x0 define amdgpu_kernel void @smrd6(ptr addrspace(1) %out, ptr addrspace(4) %ptr) #0 { entry: %tmp = getelementptr i32, ptr addrspace(4) %ptr, i64 -1 diff --git a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-smem.ll b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-smem.ll index 54dc5b8b9d3dd6..41d2360dd5e1e6 100644 --- a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-smem.ll +++ b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-smem.ll @@ -297,9 +297,11 @@ define amdgpu_cs void @test_sink_smem_offset_neg400(ptr addrspace(4) inreg %ptr, ; GFX9: ; %bb.0: ; %entry ; GFX9-NEXT: .LBB5_1: ; %loop ; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_load_dword s3, s[0:1], -0x190 ; GFX9-NEXT: s_add_i32 s2, s2, -1 +; GFX9-NEXT: s_add_u32 s4, s0, 0xfffffe70 +; GFX9-NEXT: s_addc_u32 s5, s1, -1 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: s_load_dword s3, s[4:5], 0x0 ; GFX9-NEXT: s_cmp_lg_u32 s2, 0 ; GFX9-NEXT: s_cbranch_scc1 .LBB5_1 ; GFX9-NEXT: ; %bb.2: ; %end @@ -307,10 +309,14 @@ define amdgpu_cs void @test_sink_smem_offset_neg400(ptr addrspace(4) inreg %ptr, ; ; GFX12-LABEL: test_sink_smem_offset_neg400: ; GFX12: ; %bb.0: ; %entry +; GFX12-NEXT: s_movk_i32 s4, 0xfe70 +; GFX12-NEXT: s_mov_b32 s5, -1 +; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX12-NEXT: s_add_nc_u64 s[0:1], s[0:1], s[4:5] ; GFX12-NEXT: .LBB5_1: ; %loop ; GFX12-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: s_load_b32 s3, s[0:1], -0x190 +; GFX12-NEXT: s_load_b32 s3, s[0:1], 0x0 ; GFX12-NEXT: s_add_co_i32 s2, s2, -1 ; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX12-NEXT: s_cmp_lg_u32 s2, 0 diff --git a/llvm/test/CodeGen/AMDGPU/gfx12_scalar_subword_loads.ll b/llvm/test/CodeGen/AMDGPU/gfx12_scalar_subword_loads.ll index c69207c0472e7c..08da89ec0fb229 100644 --- a/llvm/test/CodeGen/AMDGPU/gfx12_scalar_subword_loads.ll +++ b/llvm/test/CodeGen/AMDGPU/gfx12_scalar_subword_loads.ll @@ -19,15 +19,31 @@ define amdgpu_ps void @test_s_load_i8(ptr addrspace(4) inreg %in, ptr addrspace( } define amdgpu_ps void @test_s_load_i8_imm(ptr addrspace(4) inreg %in, ptr addrspace(1) %out) { -; GCN-LABEL: test_s_load_i8_imm: -; GCN: ; %bb.0: -; GCN-NEXT: s_load_i8 s0, s[0:1], -0x64 -; GCN-NEXT: s_wait_kmcnt 0x0 -; GCN-NEXT: v_mov_b32_e32 v2, s0 -; GCN-NEXT: global_store_b32 v[0:1], v2, off -; GCN-NEXT: s_nop 0 -; GCN-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) -; GCN-NEXT: s_endpgm +; DAG-LABEL: test_s_load_i8_imm: +; DAG: ; %bb.0: +; DAG-NEXT: s_movk_i32 s2, 0xff9c +; DAG-NEXT: s_mov_b32 s3, -1 +; DAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; DAG-NEXT: s_add_nc_u64 s[0:1], s[0:1], s[2:3] +; DAG-NEXT: s_load_i8 s0, s[0:1], 0x0 +; DAG-NEXT: s_wait_kmcnt 0x0 +; DAG-NEXT: v_mov_b32_e32 v2, s0 +; DAG-NEXT: global_store_b32 v[0:1], v2, off +; DAG-NEXT: s_nop 0 +; DAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; DAG-NEXT: s_endpgm +; +; GISEL-LABEL: test_s_load_i8_imm: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_add_co_u32 s0, s0, 0xffffff9c +; GISEL-NEXT: s_add_co_ci_u32 s1, s1, -1 +; GISEL-NEXT: s_load_i8 s0, s[0:1], 0x0 +; GISEL-NEXT: s_wait_kmcnt 0x0 +; GISEL-NEXT: v_mov_b32_e32 v2, s0 +; GISEL-NEXT: global_store_b32 v[0:1], v2, off +; GISEL-NEXT: s_nop 0 +; GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; GISEL-NEXT: s_endpgm %gep = getelementptr i8, ptr addrspace(4) %in, i64 -100 %ld = load i8, ptr addrspace(4) %gep %sext = sext i8 %ld to i32 @@ -195,15 +211,31 @@ define amdgpu_ps void @test_s_load_i16(ptr addrspace(4) inreg %in, ptr addrspace } define amdgpu_ps void @test_s_load_i16_imm(ptr addrspace(4) inreg %in, ptr addrspace(1) %out) { -; GCN-LABEL: test_s_load_i16_imm: -; GCN: ; %bb.0: -; GCN-NEXT: s_load_i16 s0, s[0:1], -0xc8 -; GCN-NEXT: s_wait_kmcnt 0x0 -; GCN-NEXT: v_mov_b32_e32 v2, s0 -; GCN-NEXT: global_store_b32 v[0:1], v2, off -; GCN-NEXT: s_nop 0 -; GCN-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) -; GCN-NEXT: s_endpgm +; DAG-LABEL: test_s_load_i16_imm: +; DAG: ; %bb.0: +; DAG-NEXT: s_movk_i32 s2, 0xff38 +; DAG-NEXT: s_mov_b32 s3, -1 +; DAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; DAG-NEXT: s_add_nc_u64 s[0:1], s[0:1], s[2:3] +; DAG-NEXT: s_load_i16 s0, s[0:1], 0x0 +; DAG-NEXT: s_wait_kmcnt 0x0 +; DAG-NEXT: v_mov_b32_e32 v2, s0 +; DAG-NEXT: global_store_b32 v[0:1], v2, off +; DAG-NEXT: s_nop 0 +; DAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; DAG-NEXT: s_endpgm +; +; GISEL-LABEL: test_s_load_i16_imm: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_add_co_u32 s0, s0, 0xffffff38 +; GISEL-NEXT: s_add_co_ci_u32 s1, s1, -1 +; GISEL-NEXT: s_load_i16 s0, s[0:1], 0x0 +; GISEL-NEXT: s_wait_kmcnt 0x0 +; GISEL-NEXT: v_mov_b32_e32 v2, s0 +; GISEL-NEXT: global_store_b32 v[0:1], v2, off +; GISEL-NEXT: s_nop 0 +; GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; GISEL-NEXT: s_endpgm %gep = getelementptr i16, ptr addrspace(4) %in, i64 -100 %ld = load i16, ptr addrspace(4) %gep %sext = sext i16 %ld to i32 diff --git a/llvm/test/CodeGen/AMDGPU/global-saddr-load.ll b/llvm/test/CodeGen/AMDGPU/global-saddr-load.ll index d9cbbc11f9a738..2f7e91faa41847 100644 --- a/llvm/test/CodeGen/AMDGPU/global-saddr-load.ll +++ b/llvm/test/CodeGen/AMDGPU/global-saddr-load.ll @@ -157,12 +157,25 @@ define amdgpu_ps float @global_load_saddr_i8_offset_neg4096(ptr addrspace(1) inr ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: ; return to shader part epilog ; -; GFX12-LABEL: global_load_saddr_i8_offset_neg4096: -; GFX12: ; %bb.0: -; GFX12-NEXT: s_load_u8 s0, s[2:3], -0x1000 -; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: v_mov_b32_e32 v0, s0 -; GFX12-NEXT: ; return to shader part epilog +; GFX12-SDAG-LABEL: global_load_saddr_i8_offset_neg4096: +; GFX12-SDAG: ; %bb.0: +; GFX12-SDAG-NEXT: s_movk_i32 s0, 0xf000 +; GFX12-SDAG-NEXT: s_mov_b32 s1, -1 +; GFX12-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX12-SDAG-NEXT: s_add_nc_u64 s[0:1], s[2:3], s[0:1] +; GFX12-SDAG-NEXT: s_load_u8 s0, s[0:1], 0x0 +; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, s0 +; GFX12-SDAG-NEXT: ; return to shader part epilog +; +; GFX12-GISEL-LABEL: global_load_saddr_i8_offset_neg4096: +; GFX12-GISEL: ; %bb.0: +; GFX12-GISEL-NEXT: s_add_co_u32 s0, s2, 0xfffff000 +; GFX12-GISEL-NEXT: s_add_co_ci_u32 s1, s3, -1 +; GFX12-GISEL-NEXT: s_load_u8 s0, s[0:1], 0x0 +; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, s0 +; GFX12-GISEL-NEXT: ; return to shader part epilog %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 -4096 %load = load i8, ptr addrspace(1) %gep0 %zext = zext i8 %load to i32 @@ -198,12 +211,25 @@ define amdgpu_ps float @global_load_saddr_i8_offset_neg4097(ptr addrspace(1) inr ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: ; return to shader part epilog ; -; GFX12-LABEL: global_load_saddr_i8_offset_neg4097: -; GFX12: ; %bb.0: -; GFX12-NEXT: s_load_u8 s0, s[2:3], -0x1001 -; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: v_mov_b32_e32 v0, s0 -; GFX12-NEXT: ; return to shader part epilog +; GFX12-SDAG-LABEL: global_load_saddr_i8_offset_neg4097: +; GFX12-SDAG: ; %bb.0: +; GFX12-SDAG-NEXT: s_movk_i32 s0, 0xefff +; GFX12-SDAG-NEXT: s_mov_b32 s1, -1 +; GFX12-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX12-SDAG-NEXT: s_add_nc_u64 s[0:1], s[2:3], s[0:1] +; GFX12-SDAG-NEXT: s_load_u8 s0, s[0:1], 0x0 +; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, s0 +; GFX12-SDAG-NEXT: ; return to shader part epilog +; +; GFX12-GISEL-LABEL: global_load_saddr_i8_offset_neg4097: +; GFX12-GISEL: ; %bb.0: +; GFX12-GISEL-NEXT: s_add_co_u32 s0, s2, 0xffffefff +; GFX12-GISEL-NEXT: s_add_co_ci_u32 s1, s3, -1 +; GFX12-GISEL-NEXT: s_load_u8 s0, s[0:1], 0x0 +; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, s0 +; GFX12-GISEL-NEXT: ; return to shader part epilog %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 -4097 %load = load i8, ptr addrspace(1) %gep0 %zext = zext i8 %load to i32 @@ -239,12 +265,25 @@ define amdgpu_ps float @global_load_saddr_i8_offset_neg4098(ptr addrspace(1) inr ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: ; return to shader part epilog ; -; GFX12-LABEL: global_load_saddr_i8_offset_neg4098: -; GFX12: ; %bb.0: -; GFX12-NEXT: s_load_u8 s0, s[2:3], -0x1002 -; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: v_mov_b32_e32 v0, s0 -; GFX12-NEXT: ; return to shader part epilog +; GFX12-SDAG-LABEL: global_load_saddr_i8_offset_neg4098: +; GFX12-SDAG: ; %bb.0: +; GFX12-SDAG-NEXT: s_movk_i32 s0, 0xeffe +; GFX12-SDAG-NEXT: s_mov_b32 s1, -1 +; GFX12-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX12-SDAG-NEXT: s_add_nc_u64 s[0:1], s[2:3], s[0:1] +; GFX12-SDAG-NEXT: s_load_u8 s0, s[0:1], 0x0 +; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, s0 +; GFX12-SDAG-NEXT: ; return to shader part epilog +; +; GFX12-GISEL-LABEL: global_load_saddr_i8_offset_neg4098: +; GFX12-GISEL: ; %bb.0: +; GFX12-GISEL-NEXT: s_add_co_u32 s0, s2, 0xffffeffe +; GFX12-GISEL-NEXT: s_add_co_ci_u32 s1, s3, -1 +; GFX12-GISEL-NEXT: s_load_u8 s0, s[0:1], 0x0 +; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, s0 +; GFX12-GISEL-NEXT: ; return to shader part epilog %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 -4098 %load = load i8, ptr addrspace(1) %gep0 %zext = zext i8 %load to i32 @@ -376,12 +415,25 @@ define amdgpu_ps float @global_load_saddr_i8_offset_neg2048(ptr addrspace(1) inr ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: ; return to shader part epilog ; -; GFX12-LABEL: global_load_saddr_i8_offset_neg2048: -; GFX12: ; %bb.0: -; GFX12-NEXT: s_load_u8 s0, s[2:3], -0x800 -; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: v_mov_b32_e32 v0, s0 -; GFX12-NEXT: ; return to shader part epilog +; GFX12-SDAG-LABEL: global_load_saddr_i8_offset_neg2048: +; GFX12-SDAG: ; %bb.0: +; GFX12-SDAG-NEXT: s_movk_i32 s0, 0xf800 +; GFX12-SDAG-NEXT: s_mov_b32 s1, -1 +; GFX12-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX12-SDAG-NEXT: s_add_nc_u64 s[0:1], s[2:3], s[0:1] +; GFX12-SDAG-NEXT: s_load_u8 s0, s[0:1], 0x0 +; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, s0 +; GFX12-SDAG-NEXT: ; return to shader part epilog +; +; GFX12-GISEL-LABEL: global_load_saddr_i8_offset_neg2048: +; GFX12-GISEL: ; %bb.0: +; GFX12-GISEL-NEXT: s_add_co_u32 s0, s2, 0xfffff800 +; GFX12-GISEL-NEXT: s_add_co_ci_u32 s1, s3, -1 +; GFX12-GISEL-NEXT: s_load_u8 s0, s[0:1], 0x0 +; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, s0 +; GFX12-GISEL-NEXT: ; return to shader part epilog %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 -2048 %load = load i8, ptr addrspace(1) %gep0 %zext = zext i8 %load to i32 @@ -413,12 +465,25 @@ define amdgpu_ps float @global_load_saddr_i8_offset_neg2049(ptr addrspace(1) inr ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: ; return to shader part epilog ; -; GFX12-LABEL: global_load_saddr_i8_offset_neg2049: -; GFX12: ; %bb.0: -; GFX12-NEXT: s_load_u8 s0, s[2:3], -0x801 -; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: v_mov_b32_e32 v0, s0 -; GFX12-NEXT: ; return to shader part epilog +; GFX12-SDAG-LABEL: global_load_saddr_i8_offset_neg2049: +; GFX12-SDAG: ; %bb.0: +; GFX12-SDAG-NEXT: s_movk_i32 s0, 0xf7ff +; GFX12-SDAG-NEXT: s_mov_b32 s1, -1 +; GFX12-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX12-SDAG-NEXT: s_add_nc_u64 s[0:1], s[2:3], s[0:1] +; GFX12-SDAG-NEXT: s_load_u8 s0, s[0:1], 0x0 +; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, s0 +; GFX12-SDAG-NEXT: ; return to shader part epilog +; +; GFX12-GISEL-LABEL: global_load_saddr_i8_offset_neg2049: +; GFX12-GISEL: ; %bb.0: +; GFX12-GISEL-NEXT: s_add_co_u32 s0, s2, 0xfffff7ff +; GFX12-GISEL-NEXT: s_add_co_ci_u32 s1, s3, -1 +; GFX12-GISEL-NEXT: s_load_u8 s0, s[0:1], 0x0 +; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, s0 +; GFX12-GISEL-NEXT: ; return to shader part epilog %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 -2049 %load = load i8, ptr addrspace(1) %gep0 %zext = zext i8 %load to i32 @@ -450,12 +515,25 @@ define amdgpu_ps float @global_load_saddr_i8_offset_neg2050(ptr addrspace(1) inr ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: ; return to shader part epilog ; -; GFX12-LABEL: global_load_saddr_i8_offset_neg2050: -; GFX12: ; %bb.0: -; GFX12-NEXT: s_load_u8 s0, s[2:3], -0x802 -; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: v_mov_b32_e32 v0, s0 -; GFX12-NEXT: ; return to shader part epilog +; GFX12-SDAG-LABEL: global_load_saddr_i8_offset_neg2050: +; GFX12-SDAG: ; %bb.0: +; GFX12-SDAG-NEXT: s_movk_i32 s0, 0xf7fe +; GFX12-SDAG-NEXT: s_mov_b32 s1, -1 +; GFX12-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX12-SDAG-NEXT: s_add_nc_u64 s[0:1], s[2:3], s[0:1] +; GFX12-SDAG-NEXT: s_load_u8 s0, s[0:1], 0x0 +; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, s0 +; GFX12-SDAG-NEXT: ; return to shader part epilog +; +; GFX12-GISEL-LABEL: global_load_saddr_i8_offset_neg2050: +; GFX12-GISEL: ; %bb.0: +; GFX12-GISEL-NEXT: s_add_co_u32 s0, s2, 0xfffff7fe +; GFX12-GISEL-NEXT: s_add_co_ci_u32 s1, s3, -1 +; GFX12-GISEL-NEXT: s_load_u8 s0, s[0:1], 0x0 +; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, s0 +; GFX12-GISEL-NEXT: ; return to shader part epilog %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 -2050 %load = load i8, ptr addrspace(1) %gep0 %zext = zext i8 %load to i32 @@ -525,12 +603,25 @@ define amdgpu_ps float @global_load_saddr_i8_offset_0xFFFFFF(ptr addrspace(1) in ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: ; return to shader part epilog ; -; GFX12-LABEL: global_load_saddr_i8_offset_0xFFFFFF: -; GFX12: ; %bb.0: -; GFX12-NEXT: s_load_u8 s0, s[2:3], -0x800000 -; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: v_mov_b32_e32 v0, s0 -; GFX12-NEXT: ; return to shader part epilog +; GFX12-SDAG-LABEL: global_load_saddr_i8_offset_0xFFFFFF: +; GFX12-SDAG: ; %bb.0: +; GFX12-SDAG-NEXT: s_mov_b32 s0, 0xff800000 +; GFX12-SDAG-NEXT: s_mov_b32 s1, -1 +; GFX12-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX12-SDAG-NEXT: s_add_nc_u64 s[0:1], s[2:3], s[0:1] +; GFX12-SDAG-NEXT: s_load_u8 s0, s[0:1], 0x0 +; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, s0 +; GFX12-SDAG-NEXT: ; return to shader part epilog +; +; GFX12-GISEL-LABEL: global_load_saddr_i8_offset_0xFFFFFF: +; GFX12-GISEL: ; %bb.0: +; GFX12-GISEL-NEXT: s_add_co_u32 s0, s2, 0xff800000 +; GFX12-GISEL-NEXT: s_add_co_ci_u32 s1, s3, -1 +; GFX12-GISEL-NEXT: s_load_u8 s0, s[0:1], 0x0 +; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, s0 +; GFX12-GISEL-NEXT: ; return to shader part epilog %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 -8388608 %load = load i8, ptr addrspace(1) %gep0 %zext = zext i8 %load to i32 @@ -1721,12 +1812,29 @@ define amdgpu_ps float @global_load_saddr_i8_zext_uniform_offset_immoffset(ptr a ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: ; return to shader part epilog ; -; GFX12-LABEL: global_load_saddr_i8_zext_uniform_offset_immoffset: -; GFX12: ; %bb.0: -; GFX12-NEXT: s_load_u8 s0, s[2:3], s4 offset:-0x18 -; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: v_mov_b32_e32 v0, s0 -; GFX12-NEXT: ; return to shader part epilog +; GFX12-SDAG-LABEL: global_load_saddr_i8_zext_uniform_offset_immoffset: +; GFX12-SDAG: ; %bb.0: +; GFX12-SDAG-NEXT: s_mov_b32 s5, 0 +; GFX12-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1) +; GFX12-SDAG-NEXT: s_add_nc_u64 s[0:1], s[2:3], s[4:5] +; GFX12-SDAG-NEXT: s_movk_i32 s2, 0xffe8 +; GFX12-SDAG-NEXT: s_mov_b32 s3, -1 +; GFX12-SDAG-NEXT: s_add_nc_u64 s[0:1], s[0:1], s[2:3] +; GFX12-SDAG-NEXT: s_load_u8 s0, s[0:1], 0x0 +; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, s0 +; GFX12-SDAG-NEXT: ; return to shader part epilog +; +; GFX12-GISEL-LABEL: global_load_saddr_i8_zext_uniform_offset_immoffset: +; GFX12-GISEL: ; %bb.0: +; GFX12-GISEL-NEXT: s_add_co_u32 s0, s2, s4 +; GFX12-GISEL-NEXT: s_add_co_ci_u32 s1, s3, 0 +; GFX12-GISEL-NEXT: s_add_co_u32 s0, s0, 0xffffffe8 +; GFX12-GISEL-NEXT: s_add_co_ci_u32 s1, s1, -1 +; GFX12-GISEL-NEXT: s_load_u8 s0, s[0:1], 0x0 +; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, s0 +; GFX12-GISEL-NEXT: ; return to shader part epilog %zext.offset = zext i32 %soffset to i64 %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset %gep1 = getelementptr inbounds i8, ptr addrspace(1) %gep0, i64 -24 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.prefetch.ll b/llvm/test/CodeGen/AMDGPU/llvm.prefetch.ll index 77fd0bc058aca5..2b517736ecff32 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.prefetch.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.prefetch.ll @@ -53,14 +53,25 @@ entry: } define amdgpu_ps void @prefetch_data_sgpr_min_offset(ptr addrspace(4) inreg %ptr) { -; GFX12-LABEL: prefetch_data_sgpr_min_offset: -; GFX12: ; %bb.0: ; %entry -; GFX12-NEXT: s_prefetch_data s[0:1], -0x800000, null, 0 -; GFX12-NEXT: s_endpgm +; GFX12-SDAG-LABEL: prefetch_data_sgpr_min_offset: +; GFX12-SDAG: ; %bb.0: ; %entry +; GFX12-SDAG-NEXT: s_mov_b32 s2, 0xff800000 +; GFX12-SDAG-NEXT: s_mov_b32 s3, -1 +; GFX12-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX12-SDAG-NEXT: s_add_nc_u64 s[0:1], s[0:1], s[2:3] +; GFX12-SDAG-NEXT: s_prefetch_data s[0:1], 0x0, null, 0 +; GFX12-SDAG-NEXT: s_endpgm ; ; GFX11-LABEL: prefetch_data_sgpr_min_offset: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_endpgm +; +; GFX12-GISEL-LABEL: prefetch_data_sgpr_min_offset: +; GFX12-GISEL: ; %bb.0: ; %entry +; GFX12-GISEL-NEXT: s_add_co_u32 s0, s0, 0xff800000 +; GFX12-GISEL-NEXT: s_add_co_ci_u32 s1, s1, -1 +; GFX12-GISEL-NEXT: s_prefetch_data s[0:1], 0x0, null, 0 +; GFX12-GISEL-NEXT: s_endpgm entry: %gep = getelementptr i8, ptr addrspace(4) %ptr, i32 -8388608 tail call void @llvm.prefetch.p4(ptr addrspace(4) %gep, i32 0, i32 0, i32 1) @@ -215,14 +226,25 @@ entry: } define amdgpu_ps void @prefetch_inst_sgpr_min_offset(ptr addrspace(4) inreg %ptr) { -; GFX12-LABEL: prefetch_inst_sgpr_min_offset: -; GFX12: ; %bb.0: ; %entry -; GFX12-NEXT: s_prefetch_inst s[0:1], -0x800000, null, 0 -; GFX12-NEXT: s_endpgm +; GFX12-SDAG-LABEL: prefetch_inst_sgpr_min_offset: +; GFX12-SDAG: ; %bb.0: ; %entry +; GFX12-SDAG-NEXT: s_mov_b32 s2, 0xff800000 +; GFX12-SDAG-NEXT: s_mov_b32 s3, -1 +; GFX12-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX12-SDAG-NEXT: s_add_nc_u64 s[0:1], s[0:1], s[2:3] +; GFX12-SDAG-NEXT: s_prefetch_inst s[0:1], 0x0, null, 0 +; GFX12-SDAG-NEXT: s_endpgm ; ; GFX11-LABEL: prefetch_inst_sgpr_min_offset: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_endpgm +; +; GFX12-GISEL-LABEL: prefetch_inst_sgpr_min_offset: +; GFX12-GISEL: ; %bb.0: ; %entry +; GFX12-GISEL-NEXT: s_add_co_u32 s0, s0, 0xff800000 +; GFX12-GISEL-NEXT: s_add_co_ci_u32 s1, s1, -1 +; GFX12-GISEL-NEXT: s_prefetch_inst s[0:1], 0x0, null, 0 +; GFX12-GISEL-NEXT: s_endpgm entry: %gep = getelementptr i8, ptr addrspace(4) %ptr, i32 -8388608 tail call void @llvm.prefetch.p4(ptr addrspace(4) %gep, i32 0, i32 0, i32 0) diff --git a/llvm/test/CodeGen/AMDGPU/smrd.ll b/llvm/test/CodeGen/AMDGPU/smrd.ll index 4ce9260b8d53de..52db7fea08e053 100644 --- a/llvm/test/CodeGen/AMDGPU/smrd.ll +++ b/llvm/test/CodeGen/AMDGPU/smrd.ll @@ -88,11 +88,13 @@ entry: ret void } -; GFX9_10 can use a signed immediate byte offset +; GFX9+ can use a signed immediate byte offset but not without sgpr[offset] ; GCN-LABEL: {{^}}smrd6: ; SICIVI: s_add_u32 s{{[0-9]}}, s{{[0-9]}}, -4 ; SICIVI: s_load_dword s{{[0-9]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x0 -; GFX9_10: s_load_dword s{{[0-9]}}, s{{\[[0-9]+:[0-9]+\]}}, -0x4 +; GFX9_10: s_add_u32 s2, s2, -4 +; GFX9_10: s_addc_u32 s3, s3, -1 +; GFX9_10: s_load_dword s{{[0-9]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x0 define amdgpu_kernel void @smrd6(ptr addrspace(1) %out, ptr addrspace(4) %ptr) #0 { entry: %tmp = getelementptr i32, ptr addrspace(4) %ptr, i64 -1 From c369787b0890965c023955f8f28333f3d3c8dd2e Mon Sep 17 00:00:00 2001 From: Vang Thao Date: Wed, 8 May 2024 15:27:13 -0400 Subject: [PATCH 2/5] Restructured some parts of the changes --- llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 81 +++++++++---------- llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h | 12 +-- .../AMDGPU/AMDGPUInstructionSelector.cpp | 17 ++-- .../Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 19 +++-- llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h | 4 +- 5 files changed, 66 insertions(+), 67 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index bf65244255f341..8310b034ec4298 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -1984,10 +1984,10 @@ bool AMDGPUDAGToDAGISel::SelectScratchSVAddr(SDNode *N, SDValue Addr, // not null) offset. If Imm32Only is true, match only 32-bit immediate // offsets available on CI. bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode, - SDValue *SBase, SDValue *SOffset, - SDValue *Offset, bool Imm32Only, - bool IsBuffer, - bool HasSOffset) const { + SDValue *SOffset, SDValue *Offset, + bool Imm32Only, bool IsBuffer, + bool HasSOffset, + int64_t ImmOffset) const { assert((!SOffset || !Offset) && "Cannot match both soffset and offset at the same time!"); @@ -1995,18 +1995,28 @@ bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode, if (!C) { if (!SOffset) return false; + bool Changed = false; if (ByteOffsetNode.getValueType().isScalarInteger() && ByteOffsetNode.getValueType().getSizeInBits() == 32) { *SOffset = ByteOffsetNode; - return true; - } - if (ByteOffsetNode.getOpcode() == ISD::ZERO_EXTEND) { + Changed = true; + } else if (ByteOffsetNode.getOpcode() == ISD::ZERO_EXTEND) { if (ByteOffsetNode.getOperand(0).getValueType().getSizeInBits() == 32) { *SOffset = ByteOffsetNode.getOperand(0); - return true; + Changed = true; } } - return false; + // For unbuffered smem loads, it is illegal for the Immediate Offset to be + // negative if the resulting (Offset + (M0 or SOffset or zero) is negative. + // Handle the case where the Immediate Offset + SOffset is negative. + if (AMDGPU::hasSMRDSignedImmOffset(*Subtarget) && Changed && + !IsBuffer & !Imm32Only && ImmOffset < 0) { + KnownBits SKnown = CurDAG->computeKnownBits(*SOffset); + if (ImmOffset + SKnown.getMinValue().getSExtValue() < 0) + return false; + } + + return Changed; } SDLoc SL(ByteOffsetNode); @@ -2014,18 +2024,11 @@ bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode, // GFX9 and GFX10 have signed byte immediate offsets. The immediate // offset for S_BUFFER instructions is unsigned. int64_t ByteOffset = IsBuffer ? C->getZExtValue() : C->getSExtValue(); - std::optional EncodedOffset = - AMDGPU::getSMRDEncodedOffset(*Subtarget, ByteOffset, IsBuffer); + std::optional EncodedOffset = AMDGPU::getSMRDEncodedOffset( + *Subtarget, ByteOffset, IsBuffer, HasSOffset); if (EncodedOffset && Offset && !Imm32Only) { *Offset = CurDAG->getTargetConstant(*EncodedOffset, SL, MVT::i32); - if (EncodedOffset >= 0 || IsBuffer || HasSOffset || - !Subtarget->hasSignedSMRDImmOffset()) - return true; - // For unbuffered smem loads, it is illegal for the Immediate Offset to be - // negative if the resulting (Offset + (M0 or SOffset or zero) is negative. - // Handle the case where the Immediate Offset is negative and there is no - // SOffset. - return false; + return true; } // SGPR and literal offsets are unsigned. @@ -2082,33 +2085,21 @@ SDValue AMDGPUDAGToDAGISel::Expand32BitAddress(SDValue Addr) const { bool AMDGPUDAGToDAGISel::SelectSMRDBaseOffset(SDValue Addr, SDValue &SBase, SDValue *SOffset, SDValue *Offset, bool Imm32Only, bool IsBuffer, - bool HasSOffset) const { + bool HasSOffset, + int64_t ImmOffset) const { if (SOffset && Offset) { assert(!Imm32Only && !IsBuffer); SDValue B; - if (!SelectSMRDBaseOffset(Addr, B, nullptr, Offset, false, false, true)) - return false; - if (!SelectSMRDBaseOffset(B, SBase, SOffset, nullptr, false, false, true)) + if (!SelectSMRDBaseOffset(Addr, B, nullptr, Offset, false, false, true)) return false; - if (IsBuffer || Imm32Only || !Subtarget->hasSignedSMRDImmOffset()) - return true; + int64_t ImmOff = 0; + if (ConstantSDNode *C = dyn_cast(*Offset)) + ImmOff = C->getSExtValue(); - // For unbuffered smem loads, it is illegal for the Immediate Offset to be - // negative if the resulting (Offset + (M0 or SOffset or zero) is negative. - // Handle the case where the Immediate Offset + SOffset is negative. - if (ConstantSDNode *C = dyn_cast(*Offset)) { - int64_t ByteOffset = C->getSExtValue(); - if (ByteOffset >= 0) - return true; - - KnownBits SKnown = CurDAG->computeKnownBits(*SOffset); - if (ByteOffset + SKnown.getMinValue().getSExtValue() < 0) - return false; - } - - return true; + return SelectSMRDBaseOffset(B, SBase, SOffset, nullptr, false, false, true, + ImmOff); } // A 32-bit (address + offset) should not cause unsigned 32-bit integer @@ -2128,13 +2119,13 @@ bool AMDGPUDAGToDAGISel::SelectSMRDBaseOffset(SDValue Addr, SDValue &SBase, if (!N0 || !N1) return false; - if (SelectSMRDOffset(N1, &N0, SOffset, Offset, Imm32Only, IsBuffer, - HasSOffset)) { + if (SelectSMRDOffset(N1, SOffset, Offset, Imm32Only, IsBuffer, HasSOffset, + ImmOffset)) { SBase = N0; return true; } - if (SelectSMRDOffset(N0, &N1, SOffset, Offset, Imm32Only, IsBuffer, - HasSOffset)) { + if (SelectSMRDOffset(N0, SOffset, Offset, Imm32Only, IsBuffer, HasSOffset, + ImmOffset)) { SBase = N1; return true; } @@ -2182,14 +2173,14 @@ bool AMDGPUDAGToDAGISel::SelectSMRDSgprImm(SDValue Addr, SDValue &SBase, } bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm(SDValue N, SDValue &Offset) const { - return SelectSMRDOffset(N, /*SBase=*/nullptr, /* SOffset */ nullptr, &Offset, + return SelectSMRDOffset(N, /* SOffset */ nullptr, &Offset, /* Imm32Only */ false, /* IsBuffer */ true); } bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(SDValue N, SDValue &Offset) const { assert(Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS); - return SelectSMRDOffset(N, /*SBase=*/nullptr, /* SOffset */ nullptr, &Offset, + return SelectSMRDOffset(N, /* SOffset */ nullptr, &Offset, /* Imm32Only */ true, /* IsBuffer */ true); } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h index b1ad16af3c35a2..d8bb2a9d11e4f9 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h @@ -183,15 +183,15 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel { bool SelectScratchSVAddr(SDNode *N, SDValue Addr, SDValue &VAddr, SDValue &SAddr, SDValue &Offset) const; - bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue *SBase, - SDValue *SOffset, SDValue *Offset, - bool Imm32Only = false, bool IsBuffer = false, - bool HasSOffset = false) const; + bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue *SOffset, + SDValue *Offset, bool Imm32Only = false, + bool IsBuffer = false, bool HasSOffset = false, + int64_t ImmOffset = 0) const; SDValue Expand32BitAddress(SDValue Addr) const; bool SelectSMRDBaseOffset(SDValue Addr, SDValue &SBase, SDValue *SOffset, SDValue *Offset, bool Imm32Only = false, - bool IsBuffer = false, - bool HasSOffset = false) const; + bool IsBuffer = false, bool HasSOffset = false, + int64_t ImmOffset = 0) const; bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue *SOffset, SDValue *Offset, bool Imm32Only = false) const; bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index 10dda8a9e1eaac..9f238795cbb7c8 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -4198,10 +4198,11 @@ bool AMDGPUInstructionSelector::selectSmrdOffset(MachineOperand &Root, return false; const GEPInfo &GEPI = AddrInfo[0]; - std::optional EncodedImm = - AMDGPU::getSMRDEncodedOffset(STI, GEPI.Imm, false); + std::optional EncodedImm; if (SOffset && Offset) { + EncodedImm = AMDGPU::getSMRDEncodedOffset(STI, GEPI.Imm, /*IsBuffer=*/false, + /*HasSOffset=*/true); if (GEPI.SgprParts.size() == 1 && GEPI.Imm != 0 && EncodedImm && AddrInfo.size() > 1) { const GEPInfo &GEPI2 = AddrInfo[1]; @@ -4211,7 +4212,7 @@ bool AMDGPUInstructionSelector::selectSmrdOffset(MachineOperand &Root, Base = GEPI2.SgprParts[0]; *SOffset = OffsetReg; *Offset = *EncodedImm; - if (*Offset >= 0 || !STI.hasSignedSMRDImmOffset()) + if (*Offset >= 0 || !AMDGPU::hasSMRDSignedImmOffset(STI)) return true; // For unbuffered smem loads, it is illegal for the Immediate Offset @@ -4229,16 +4230,12 @@ bool AMDGPUInstructionSelector::selectSmrdOffset(MachineOperand &Root, return false; } + EncodedImm = AMDGPU::getSMRDEncodedOffset(STI, GEPI.Imm, /*IsBuffer=*/false, + /*HasSOffset=*/false); if (Offset && GEPI.SgprParts.size() == 1 && EncodedImm) { Base = GEPI.SgprParts[0]; *Offset = *EncodedImm; - if (*Offset >= 0 || !STI.hasSignedSMRDImmOffset()) - return true; - // For unbuffered smem loads, it is illegal for the Immediate Offset to be - // negative if the resulting (Offset + (M0 or SOffset or zero is negative. - // Handle the case where the Immediate Offset is negative and there is no - // SOffset. - return false; + return true; } // SGPR offset is unsigned. diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index 4e0074451aa58c..2f10e8b6e9935c 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -158,6 +158,12 @@ namespace llvm { namespace AMDGPU { +/// \returns true if the target supports signed immediate offset for SMRD +/// instructions. +bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST) { + return isGFX9Plus(ST); +} + /// \returns True if \p STI is AMDHSA. bool isHsaAbi(const MCSubtargetInfo &STI) { return STI.getTargetTriple().getOS() == Triple::AMDHSA; @@ -2874,10 +2880,6 @@ static bool hasSMEMByteOffset(const MCSubtargetInfo &ST) { return isGCN3Encoding(ST) || isGFX10Plus(ST); } -static bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST) { - return isGFX9Plus(ST); -} - bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset) { if (isGFX12Plus(ST)) @@ -2912,7 +2914,14 @@ uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, } std::optional getSMRDEncodedOffset(const MCSubtargetInfo &ST, - int64_t ByteOffset, bool IsBuffer) { + int64_t ByteOffset, bool IsBuffer, + bool HasSOffset) { + // For unbuffered smem loads, it is illegal for the Immediate Offset to be + // negative if the resulting (Offset + (M0 or SOffset or zero) is negative. + // Handle case where SOffset is not present. + if (!IsBuffer && hasSMRDSignedImmOffset(ST) && !HasSOffset && ByteOffset < 0) + return std::nullopt; + if (isGFX12Plus(ST)) // 24 bit signed offsets return isInt<24>(ByteOffset) ? std::optional(ByteOffset) : std::nullopt; diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index 943588fe701cc8..a326ac927ef6c3 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -1300,6 +1300,7 @@ bool hasVOPD(const MCSubtargetInfo &STI); bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI); int getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR); unsigned hasKernargPreload(const MCSubtargetInfo &STI); +bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST); /// Is Reg - scalar register bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI); @@ -1472,7 +1473,8 @@ uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, uint64_t ByteOffset); /// S_LOAD instructions have a signed offset, on other subtargets it is /// unsigned. S_BUFFER has an unsigned offset for all subtargets. std::optional getSMRDEncodedOffset(const MCSubtargetInfo &ST, - int64_t ByteOffset, bool IsBuffer); + int64_t ByteOffset, bool IsBuffer, + bool HasSOffset = false); /// \return The encoding that can be used for a 32-bit literal offset in an SMRD /// instruction. This is only useful on CI.s From 7ce94de3ba1b83fd8141bba00f4c5e597602c059 Mon Sep 17 00:00:00 2001 From: Vang Thao Date: Tue, 28 May 2024 15:09:49 -0400 Subject: [PATCH 3/5] Restructure SOffset check in SelectSMRDBaseOffset --- llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 40 ++++++++++++------- llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h | 2 + 2 files changed, 27 insertions(+), 15 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index 8310b034ec4298..08804e4a863582 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -1980,6 +1980,23 @@ bool AMDGPUDAGToDAGISel::SelectScratchSVAddr(SDNode *N, SDValue Addr, return true; } +// For unbuffered smem loads, it is illegal for the Immediate Offset to be +// negative if the resulting (Offset + (M0 or SOffset or zero) is negative. +// Handle the case where the Immediate Offset + SOffset is negative. +bool AMDGPUDAGToDAGISel::isSOffsetLegalWithImmOffset(SDValue *SOffset, + bool Imm32Only, + bool IsBuffer, + int64_t ImmOffset) const { + if (AMDGPU::hasSMRDSignedImmOffset(*Subtarget) && !IsBuffer & !Imm32Only && + ImmOffset < 0) { + KnownBits SKnown = CurDAG->computeKnownBits(*SOffset); + if (ImmOffset + SKnown.getMinValue().getSExtValue() < 0) + return false; + } + + return true; +} + // Match an immediate (if Offset is not null) or an SGPR (if SOffset is // not null) offset. If Imm32Only is true, match only 32-bit immediate // offsets available on CI. @@ -1995,28 +2012,21 @@ bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode, if (!C) { if (!SOffset) return false; - bool Changed = false; + if (ByteOffsetNode.getValueType().isScalarInteger() && ByteOffsetNode.getValueType().getSizeInBits() == 32) { *SOffset = ByteOffsetNode; - Changed = true; - } else if (ByteOffsetNode.getOpcode() == ISD::ZERO_EXTEND) { + return isSOffsetLegalWithImmOffset(SOffset, Imm32Only, IsBuffer, + ImmOffset); + } + if (ByteOffsetNode.getOpcode() == ISD::ZERO_EXTEND) { if (ByteOffsetNode.getOperand(0).getValueType().getSizeInBits() == 32) { *SOffset = ByteOffsetNode.getOperand(0); - Changed = true; + return isSOffsetLegalWithImmOffset(SOffset, Imm32Only, IsBuffer, + ImmOffset); } } - // For unbuffered smem loads, it is illegal for the Immediate Offset to be - // negative if the resulting (Offset + (M0 or SOffset or zero) is negative. - // Handle the case where the Immediate Offset + SOffset is negative. - if (AMDGPU::hasSMRDSignedImmOffset(*Subtarget) && Changed && - !IsBuffer & !Imm32Only && ImmOffset < 0) { - KnownBits SKnown = CurDAG->computeKnownBits(*SOffset); - if (ImmOffset + SKnown.getMinValue().getSExtValue() < 0) - return false; - } - - return Changed; + return false; } SDLoc SL(ByteOffsetNode); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h index d8bb2a9d11e4f9..d145511ccaae4f 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h @@ -143,6 +143,8 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel { bool isFlatScratchBaseLegal(SDValue Addr) const; bool isFlatScratchBaseLegalSV(SDValue Addr) const; bool isFlatScratchBaseLegalSVImm(SDValue Addr) const; + bool isSOffsetLegalWithImmOffset(SDValue *SOffset, bool Imm32Only, + bool IsBuffer, int64_t ImmOffset = 0) const; bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const; bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0, From 2ec61da1a5df20808b1da448d5019035f57a6978 Mon Sep 17 00:00:00 2001 From: Vang Thao Date: Fri, 21 Jun 2024 14:08:53 -0400 Subject: [PATCH 4/5] Move hasSMRDSignedImmOffset() check to last. --- llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index 08804e4a863582..72b0b499068323 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -1987,8 +1987,8 @@ bool AMDGPUDAGToDAGISel::isSOffsetLegalWithImmOffset(SDValue *SOffset, bool Imm32Only, bool IsBuffer, int64_t ImmOffset) const { - if (AMDGPU::hasSMRDSignedImmOffset(*Subtarget) && !IsBuffer & !Imm32Only && - ImmOffset < 0) { + if (!IsBuffer && !Imm32Only && ImmOffset < 0 && + AMDGPU::hasSMRDSignedImmOffset(*Subtarget)) { KnownBits SKnown = CurDAG->computeKnownBits(*SOffset); if (ImmOffset + SKnown.getMinValue().getSExtValue() < 0) return false; From e50ac1894dfba7b528b3613c1f861f8077803177 Mon Sep 17 00:00:00 2001 From: Vang Thao Date: Fri, 21 Jun 2024 17:46:15 -0400 Subject: [PATCH 5/5] Remove unused hasSignedSMRDImmOffset in GCNSubtarget and move hasSMRDSignedImmOffset --- llvm/lib/Target/AMDGPU/GCNSubtarget.h | 4 ---- llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 2 +- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h index 25c24c924f0a20..8a4a46ce50d1d7 100644 --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -1315,10 +1315,6 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, // of sign-extending. bool hasGetPCZeroExtension() const { return GFX12Insts; } - // \returns true if the target supports signed immediate offset for SMRD - // instructions. - bool hasSignedSMRDImmOffset() const { return getGeneration() >= GFX9; } - /// \returns SGPR allocation granularity supported by the subtarget. unsigned getSGPRAllocGranule() const { return AMDGPU::IsaInfo::getSGPRAllocGranule(this); diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index 2f10e8b6e9935c..1578fb7d015c97 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -2919,7 +2919,7 @@ std::optional getSMRDEncodedOffset(const MCSubtargetInfo &ST, // For unbuffered smem loads, it is illegal for the Immediate Offset to be // negative if the resulting (Offset + (M0 or SOffset or zero) is negative. // Handle case where SOffset is not present. - if (!IsBuffer && hasSMRDSignedImmOffset(ST) && !HasSOffset && ByteOffset < 0) + if (!IsBuffer && !HasSOffset && ByteOffset < 0 && hasSMRDSignedImmOffset(ST)) return std::nullopt; if (isGFX12Plus(ST)) // 24 bit signed offsets