Skip to content

Commit

Permalink
[AMDGPU] Disallow negative s_load offsets in isLegalAddressingMode (p…
Browse files Browse the repository at this point in the history
…relim)

This is a cherry-pick of an unmerged upstream change
llvm#91327

Once the upstream change is finished and merged, this will need reverting.

Change-Id: Ie89236494ed38063856d881b5c2944e650ee17c4
  • Loading branch information
jayfoad authored and dstutt committed May 17, 2024
1 parent 0c0d406 commit d4000e3
Show file tree
Hide file tree
Showing 2 changed files with 72 additions and 27 deletions.
10 changes: 10 additions & 0 deletions llvm/lib/Target/AMDGPU/SIISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1605,6 +1605,16 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL,
return false;
}

if ((AS == AMDGPUAS::CONSTANT_ADDRESS ||
AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT) &&
AM.BaseOffs < 0) {
// Scalar (non-buffer) loads can only use a negative offset if
// soffset+offset is non-negative. Since the compiler can only prove that
// in a few special cases, it is safer to claim that negative offsets are
// not supported.
return false;
}

if (AM.Scale == 0) // r + i or just i, depending on HasBaseReg.
return true;

Expand Down
89 changes: 62 additions & 27 deletions llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-smem.ll
Original file line number Diff line number Diff line change
Expand Up @@ -279,33 +279,19 @@ end:
}

define amdgpu_cs void @test_sink_smem_offset_neg400(ptr addrspace(4) inreg %ptr, i32 inreg %val) {
; GFX678-LABEL: test_sink_smem_offset_neg400:
; GFX678: ; %bb.0: ; %entry
; GFX678-NEXT: s_add_u32 s0, s0, 0xfffffe70
; GFX678-NEXT: s_addc_u32 s1, s1, -1
; GFX678-NEXT: .LBB5_1: ; %loop
; GFX678-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX678-NEXT: s_waitcnt lgkmcnt(0)
; GFX678-NEXT: s_load_dword s3, s[0:1], 0x0
; GFX678-NEXT: s_add_i32 s2, s2, -1
; GFX678-NEXT: s_cmp_lg_u32 s2, 0
; GFX678-NEXT: s_cbranch_scc1 .LBB5_1
; GFX678-NEXT: ; %bb.2: ; %end
; GFX678-NEXT: s_endpgm
;
; GFX9-LABEL: test_sink_smem_offset_neg400:
; GFX9: ; %bb.0: ; %entry
; GFX9-NEXT: .LBB5_1: ; %loop
; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX9-NEXT: s_add_i32 s2, s2, -1
; GFX9-NEXT: s_add_u32 s4, s0, 0xfffffe70
; GFX9-NEXT: s_addc_u32 s5, s1, -1
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_load_dword s3, s[4:5], 0x0
; GFX9-NEXT: s_cmp_lg_u32 s2, 0
; GFX9-NEXT: s_cbranch_scc1 .LBB5_1
; GFX9-NEXT: ; %bb.2: ; %end
; GFX9-NEXT: s_endpgm
; GFX6789-LABEL: test_sink_smem_offset_neg400:
; GFX6789: ; %bb.0: ; %entry
; GFX6789-NEXT: s_add_u32 s0, s0, 0xfffffe70
; GFX6789-NEXT: s_addc_u32 s1, s1, -1
; GFX6789-NEXT: .LBB5_1: ; %loop
; GFX6789-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX6789-NEXT: s_waitcnt lgkmcnt(0)
; GFX6789-NEXT: s_load_dword s3, s[0:1], 0x0
; GFX6789-NEXT: s_add_i32 s2, s2, -1
; GFX6789-NEXT: s_cmp_lg_u32 s2, 0
; GFX6789-NEXT: s_cbranch_scc1 .LBB5_1
; GFX6789-NEXT: ; %bb.2: ; %end
; GFX6789-NEXT: s_endpgm
;
; GFX12-LABEL: test_sink_smem_offset_neg400:
; GFX12: ; %bb.0: ; %entry
Expand Down Expand Up @@ -337,3 +323,52 @@ loop:
end:
ret void
}

; Same for address space 6, constant 32-bit.
define amdgpu_cs void @test_sink_smem_offset_neg400_32bit(ptr addrspace(6) inreg %ptr, i32 inreg %val) {
; GFX6789-LABEL: test_sink_smem_offset_neg400_32bit:
; GFX6789: ; %bb.0: ; %entry
; GFX6789-NEXT: s_add_i32 s2, s0, 0xfffffe70
; GFX6789-NEXT: s_mov_b32 s3, 0
; GFX6789-NEXT: .LBB6_1: ; %loop
; GFX6789-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX6789-NEXT: s_waitcnt lgkmcnt(0)
; GFX6789-NEXT: s_load_dword s0, s[2:3], 0x0
; GFX6789-NEXT: s_add_i32 s1, s1, -1
; GFX6789-NEXT: s_cmp_lg_u32 s1, 0
; GFX6789-NEXT: s_cbranch_scc1 .LBB6_1
; GFX6789-NEXT: ; %bb.2: ; %end
; GFX6789-NEXT: s_endpgm
;
; GFX12-LABEL: test_sink_smem_offset_neg400_32bit:
; GFX12: ; %bb.0: ; %entry
; GFX12-NEXT: s_add_co_i32 s2, s0, 0xfffffe70
; GFX12-NEXT: s_mov_b32 s3, 0
; GFX12-NEXT: .LBB6_1: ; %loop
; GFX12-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: s_load_b32 s0, s[2:3], 0x0
; GFX12-NEXT: s_add_co_i32 s1, s1, -1
; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX12-NEXT: s_cmp_lg_u32 s1, 0
; GFX12-NEXT: s_cbranch_scc1 .LBB6_1
; GFX12-NEXT: ; %bb.2: ; %end
; GFX12-NEXT: s_endpgm
entry:
%gep = getelementptr i8, ptr addrspace(6) %ptr, i64 -400
br label %loop

loop:
%count = phi i32 [ %dec, %loop ], [ %val, %entry ]
%dec = sub i32 %count, 1
%load = load volatile i32, ptr addrspace(6) %gep
%cond = icmp eq i32 %dec, 0
br i1 %cond, label %end, label %loop

end:
ret void
}

;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; GFX678: {{.*}}
; GFX9: {{.*}}

0 comments on commit d4000e3

Please sign in to comment.