Skip to content

Commit

Permalink
Revert "[AMDGPU]Optimize SGPR spills (llvm#93668)"
Browse files Browse the repository at this point in the history
This reverts commit 4b9112e. A separate
issue(llvm#96353) describing it has been opened to further keep its track.
  • Loading branch information
vg0204 authored and AlexisPerry committed Jun 27, 2024
1 parent 5704654 commit 79354df
Show file tree
Hide file tree
Showing 7 changed files with 102 additions and 126 deletions.
5 changes: 0 additions & 5 deletions llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1437,11 +1437,6 @@ bool GCNPassConfig::addRegAssignAndRewriteOptimized() {
// since FastRegAlloc does the replacements itself.
addPass(createVirtRegRewriter(false));

// At this point, the sgpr-regalloc has been done and it is good to have the
// stack slot coloring to try to optimize the SGPR spill stack indices before
// attempting the custom SGPR spill lowering.
addPass(&StackSlotColoringID);

// Equivalent of PEI for SGPRs.
addPass(&SILowerSGPRSpillsID);
addPass(&SIPreAllocateWWMRegsID);
Expand Down
9 changes: 2 additions & 7 deletions llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1775,13 +1775,8 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI, int Index,

if (SpillToVGPR) {

// Since stack slot coloring pass is trying to optimize SGPR spills,
// VGPR lanes (mapped from spill stack slot) may be shared for SGPR
// spills of different sizes. This accounts for number of VGPR lanes alloted
// equal to the largest SGPR being spilled in them.
assert(SB.NumSubRegs <= VGPRSpills.size() &&
"Num of SGPRs spilled should be less than or equal to num of "
"the VGPR lanes.");
assert(SB.NumSubRegs == VGPRSpills.size() &&
"Num of VGPR lanes should be equal to num of SGPRs spilled");

for (unsigned i = 0, e = SB.NumSubRegs; i < e; ++i) {
Register SubReg =
Expand Down
8 changes: 0 additions & 8 deletions llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
Original file line number Diff line number Diff line change
Expand Up @@ -366,12 +366,10 @@
; GCN-O1-NEXT: Machine Optimization Remark Emitter
; GCN-O1-NEXT: Greedy Register Allocator
; GCN-O1-NEXT: Virtual Register Rewriter
; GCN-O1-NEXT: Stack Slot Coloring
; GCN-O1-NEXT: SI lower SGPR spill instructions
; GCN-O1-NEXT: Virtual Register Map
; GCN-O1-NEXT: Live Register Matrix
; GCN-O1-NEXT: SI Pre-allocate WWM Registers
; GCN-O1-NEXT: Live Stack Slot Analysis
; GCN-O1-NEXT: Greedy Register Allocator
; GCN-O1-NEXT: SI Lower WWM Copies
; GCN-O1-NEXT: GCN NSA Reassign
Expand Down Expand Up @@ -673,12 +671,10 @@
; GCN-O1-OPTS-NEXT: Machine Optimization Remark Emitter
; GCN-O1-OPTS-NEXT: Greedy Register Allocator
; GCN-O1-OPTS-NEXT: Virtual Register Rewriter
; GCN-O1-OPTS-NEXT: Stack Slot Coloring
; GCN-O1-OPTS-NEXT: SI lower SGPR spill instructions
; GCN-O1-OPTS-NEXT: Virtual Register Map
; GCN-O1-OPTS-NEXT: Live Register Matrix
; GCN-O1-OPTS-NEXT: SI Pre-allocate WWM Registers
; GCN-O1-OPTS-NEXT: Live Stack Slot Analysis
; GCN-O1-OPTS-NEXT: Greedy Register Allocator
; GCN-O1-OPTS-NEXT: SI Lower WWM Copies
; GCN-O1-OPTS-NEXT: GCN NSA Reassign
Expand Down Expand Up @@ -986,12 +982,10 @@
; GCN-O2-NEXT: Machine Optimization Remark Emitter
; GCN-O2-NEXT: Greedy Register Allocator
; GCN-O2-NEXT: Virtual Register Rewriter
; GCN-O2-NEXT: Stack Slot Coloring
; GCN-O2-NEXT: SI lower SGPR spill instructions
; GCN-O2-NEXT: Virtual Register Map
; GCN-O2-NEXT: Live Register Matrix
; GCN-O2-NEXT: SI Pre-allocate WWM Registers
; GCN-O2-NEXT: Live Stack Slot Analysis
; GCN-O2-NEXT: Greedy Register Allocator
; GCN-O2-NEXT: SI Lower WWM Copies
; GCN-O2-NEXT: GCN NSA Reassign
Expand Down Expand Up @@ -1311,12 +1305,10 @@
; GCN-O3-NEXT: Machine Optimization Remark Emitter
; GCN-O3-NEXT: Greedy Register Allocator
; GCN-O3-NEXT: Virtual Register Rewriter
; GCN-O3-NEXT: Stack Slot Coloring
; GCN-O3-NEXT: SI lower SGPR spill instructions
; GCN-O3-NEXT: Virtual Register Map
; GCN-O3-NEXT: Live Register Matrix
; GCN-O3-NEXT: SI Pre-allocate WWM Registers
; GCN-O3-NEXT: Live Stack Slot Analysis
; GCN-O3-NEXT: Greedy Register Allocator
; GCN-O3-NEXT: SI Lower WWM Copies
; GCN-O3-NEXT: GCN NSA Reassign
Expand Down
72 changes: 36 additions & 36 deletions llvm/test/CodeGen/AMDGPU/preserve-wwm-copy-dst-reg.ll
Original file line number Diff line number Diff line change
Expand Up @@ -221,15 +221,15 @@ define void @preserve_wwm_copy_dstreg(ptr %parg0, ptr %parg1, ptr %parg2) #0 {
; GFX906-NEXT: ; def s29
; GFX906-NEXT: ;;#ASMEND
; GFX906-NEXT: buffer_load_dword v31, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
; GFX906-NEXT: v_writelane_b32 v40, s21, 12
; GFX906-NEXT: v_writelane_b32 v40, s22, 13
; GFX906-NEXT: v_writelane_b32 v40, s23, 14
; GFX906-NEXT: v_writelane_b32 v40, s24, 15
; GFX906-NEXT: v_writelane_b32 v40, s25, 16
; GFX906-NEXT: v_writelane_b32 v40, s26, 17
; GFX906-NEXT: v_writelane_b32 v40, s27, 18
; GFX906-NEXT: v_writelane_b32 v40, s28, 19
; GFX906-NEXT: v_writelane_b32 v40, s29, 20
; GFX906-NEXT: v_writelane_b32 v40, s21, 24
; GFX906-NEXT: v_writelane_b32 v40, s22, 25
; GFX906-NEXT: v_writelane_b32 v40, s23, 26
; GFX906-NEXT: v_writelane_b32 v40, s24, 27
; GFX906-NEXT: v_writelane_b32 v40, s25, 28
; GFX906-NEXT: v_writelane_b32 v40, s26, 29
; GFX906-NEXT: v_writelane_b32 v40, s27, 30
; GFX906-NEXT: v_writelane_b32 v40, s28, 31
; GFX906-NEXT: v_writelane_b32 v40, s29, 32
; GFX906-NEXT: v_readlane_b32 s4, v40, 10
; GFX906-NEXT: v_readlane_b32 s6, v40, 0
; GFX906-NEXT: v_readlane_b32 s8, v40, 8
Expand All @@ -249,39 +249,39 @@ define void @preserve_wwm_copy_dstreg(ptr %parg0, ptr %parg1, ptr %parg2) #0 {
; GFX906-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX906-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX906-NEXT: s_mov_b64 exec, s[34:35]
; GFX906-NEXT: v_readlane_b32 s21, v40, 12
; GFX906-NEXT: v_readlane_b32 s21, v40, 24
; GFX906-NEXT: ;;#ASMSTART
; GFX906-NEXT: ; use s21
; GFX906-NEXT: ;;#ASMEND
; GFX906-NEXT: v_readlane_b32 s22, v40, 13
; GFX906-NEXT: v_readlane_b32 s22, v40, 25
; GFX906-NEXT: ;;#ASMSTART
; GFX906-NEXT: ; use s22
; GFX906-NEXT: ;;#ASMEND
; GFX906-NEXT: v_readlane_b32 s23, v40, 14
; GFX906-NEXT: v_readlane_b32 s23, v40, 26
; GFX906-NEXT: ;;#ASMSTART
; GFX906-NEXT: ; use s23
; GFX906-NEXT: ;;#ASMEND
; GFX906-NEXT: v_readlane_b32 s24, v40, 15
; GFX906-NEXT: v_readlane_b32 s24, v40, 27
; GFX906-NEXT: ;;#ASMSTART
; GFX906-NEXT: ; use s24
; GFX906-NEXT: ;;#ASMEND
; GFX906-NEXT: v_readlane_b32 s25, v40, 16
; GFX906-NEXT: v_readlane_b32 s25, v40, 28
; GFX906-NEXT: ;;#ASMSTART
; GFX906-NEXT: ; use s25
; GFX906-NEXT: ;;#ASMEND
; GFX906-NEXT: v_readlane_b32 s26, v40, 17
; GFX906-NEXT: v_readlane_b32 s26, v40, 29
; GFX906-NEXT: ;;#ASMSTART
; GFX906-NEXT: ; use s26
; GFX906-NEXT: ;;#ASMEND
; GFX906-NEXT: v_readlane_b32 s27, v40, 18
; GFX906-NEXT: v_readlane_b32 s27, v40, 30
; GFX906-NEXT: ;;#ASMSTART
; GFX906-NEXT: ; use s27
; GFX906-NEXT: ;;#ASMEND
; GFX906-NEXT: v_readlane_b32 s28, v40, 19
; GFX906-NEXT: v_readlane_b32 s28, v40, 31
; GFX906-NEXT: ;;#ASMSTART
; GFX906-NEXT: ; use s28
; GFX906-NEXT: ;;#ASMEND
; GFX906-NEXT: v_readlane_b32 s29, v40, 20
; GFX906-NEXT: v_readlane_b32 s29, v40, 32
; GFX906-NEXT: ;;#ASMSTART
; GFX906-NEXT: ; use s29
; GFX906-NEXT: ;;#ASMEND
Expand Down Expand Up @@ -602,15 +602,15 @@ define void @preserve_wwm_copy_dstreg(ptr %parg0, ptr %parg1, ptr %parg2) #0 {
; GFX908-NEXT: ; def s29
; GFX908-NEXT: ;;#ASMEND
; GFX908-NEXT: buffer_load_dword v31, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
; GFX908-NEXT: v_writelane_b32 v40, s21, 12
; GFX908-NEXT: v_writelane_b32 v40, s22, 13
; GFX908-NEXT: v_writelane_b32 v40, s23, 14
; GFX908-NEXT: v_writelane_b32 v40, s24, 15
; GFX908-NEXT: v_writelane_b32 v40, s25, 16
; GFX908-NEXT: v_writelane_b32 v40, s26, 17
; GFX908-NEXT: v_writelane_b32 v40, s27, 18
; GFX908-NEXT: v_writelane_b32 v40, s28, 19
; GFX908-NEXT: v_writelane_b32 v40, s29, 20
; GFX908-NEXT: v_writelane_b32 v40, s21, 24
; GFX908-NEXT: v_writelane_b32 v40, s22, 25
; GFX908-NEXT: v_writelane_b32 v40, s23, 26
; GFX908-NEXT: v_writelane_b32 v40, s24, 27
; GFX908-NEXT: v_writelane_b32 v40, s25, 28
; GFX908-NEXT: v_writelane_b32 v40, s26, 29
; GFX908-NEXT: v_writelane_b32 v40, s27, 30
; GFX908-NEXT: v_writelane_b32 v40, s28, 31
; GFX908-NEXT: v_writelane_b32 v40, s29, 32
; GFX908-NEXT: v_readlane_b32 s4, v40, 10
; GFX908-NEXT: v_readlane_b32 s6, v40, 0
; GFX908-NEXT: v_readlane_b32 s8, v40, 8
Expand All @@ -630,39 +630,39 @@ define void @preserve_wwm_copy_dstreg(ptr %parg0, ptr %parg1, ptr %parg2) #0 {
; GFX908-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX908-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX908-NEXT: s_mov_b64 exec, s[34:35]
; GFX908-NEXT: v_readlane_b32 s21, v40, 12
; GFX908-NEXT: v_readlane_b32 s21, v40, 24
; GFX908-NEXT: ;;#ASMSTART
; GFX908-NEXT: ; use s21
; GFX908-NEXT: ;;#ASMEND
; GFX908-NEXT: v_readlane_b32 s22, v40, 13
; GFX908-NEXT: v_readlane_b32 s22, v40, 25
; GFX908-NEXT: ;;#ASMSTART
; GFX908-NEXT: ; use s22
; GFX908-NEXT: ;;#ASMEND
; GFX908-NEXT: v_readlane_b32 s23, v40, 14
; GFX908-NEXT: v_readlane_b32 s23, v40, 26
; GFX908-NEXT: ;;#ASMSTART
; GFX908-NEXT: ; use s23
; GFX908-NEXT: ;;#ASMEND
; GFX908-NEXT: v_readlane_b32 s24, v40, 15
; GFX908-NEXT: v_readlane_b32 s24, v40, 27
; GFX908-NEXT: ;;#ASMSTART
; GFX908-NEXT: ; use s24
; GFX908-NEXT: ;;#ASMEND
; GFX908-NEXT: v_readlane_b32 s25, v40, 16
; GFX908-NEXT: v_readlane_b32 s25, v40, 28
; GFX908-NEXT: ;;#ASMSTART
; GFX908-NEXT: ; use s25
; GFX908-NEXT: ;;#ASMEND
; GFX908-NEXT: v_readlane_b32 s26, v40, 17
; GFX908-NEXT: v_readlane_b32 s26, v40, 29
; GFX908-NEXT: ;;#ASMSTART
; GFX908-NEXT: ; use s26
; GFX908-NEXT: ;;#ASMEND
; GFX908-NEXT: v_readlane_b32 s27, v40, 18
; GFX908-NEXT: v_readlane_b32 s27, v40, 30
; GFX908-NEXT: ;;#ASMSTART
; GFX908-NEXT: ; use s27
; GFX908-NEXT: ;;#ASMEND
; GFX908-NEXT: v_readlane_b32 s28, v40, 19
; GFX908-NEXT: v_readlane_b32 s28, v40, 31
; GFX908-NEXT: ;;#ASMSTART
; GFX908-NEXT: ; use s28
; GFX908-NEXT: ;;#ASMEND
; GFX908-NEXT: v_readlane_b32 s29, v40, 20
; GFX908-NEXT: v_readlane_b32 s29, v40, 32
; GFX908-NEXT: ;;#ASMSTART
; GFX908-NEXT: ; use s29
; GFX908-NEXT: ;;#ASMEND
Expand Down
8 changes: 0 additions & 8 deletions llvm/test/CodeGen/AMDGPU/sgpr-regalloc-flags.ll
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,10 @@

; DEFAULT: Greedy Register Allocator
; DEFAULT-NEXT: Virtual Register Rewriter
; DEFAULT-NEXT: Stack Slot Coloring
; DEFAULT-NEXT: SI lower SGPR spill instructions
; DEFAULT-NEXT: Virtual Register Map
; DEFAULT-NEXT: Live Register Matrix
; DEFAULT-NEXT: SI Pre-allocate WWM Registers
; DEFAULT-NEXT: Live Stack Slot Analysis
; DEFAULT-NEXT: Greedy Register Allocator
; DEFAULT-NEXT: SI Lower WWM Copies
; DEFAULT-NEXT: GCN NSA Reassign
Expand Down Expand Up @@ -52,12 +50,10 @@
; BASIC-DEFAULT-NEXT: Live Register Matrix
; BASIC-DEFAULT-NEXT: Basic Register Allocator
; BASIC-DEFAULT-NEXT: Virtual Register Rewriter
; BASIC-DEFAULT-NEXT: Stack Slot Coloring
; BASIC-DEFAULT-NEXT: SI lower SGPR spill instructions
; BASIC-DEFAULT-NEXT: Virtual Register Map
; BASIC-DEFAULT-NEXT: Live Register Matrix
; BASIC-DEFAULT-NEXT: SI Pre-allocate WWM Registers
; BASIC-DEFAULT-NEXT: Live Stack Slot Analysis
; BASIC-DEFAULT-NEXT: Bundle Machine CFG Edges
; BASIC-DEFAULT-NEXT: Spill Code Placement Analysis
; BASIC-DEFAULT-NEXT: Lazy Machine Block Frequency Analysis
Expand All @@ -73,12 +69,10 @@

; DEFAULT-BASIC: Greedy Register Allocator
; DEFAULT-BASIC-NEXT: Virtual Register Rewriter
; DEFAULT-BASIC-NEXT: Stack Slot Coloring
; DEFAULT-BASIC-NEXT: SI lower SGPR spill instructions
; DEFAULT-BASIC-NEXT: Virtual Register Map
; DEFAULT-BASIC-NEXT: Live Register Matrix
; DEFAULT-BASIC-NEXT: SI Pre-allocate WWM Registers
; DEFAULT-BASIC-NEXT: Live Stack Slot Analysis
; DEFAULT-BASIC-NEXT: Basic Register Allocator
; DEFAULT-BASIC-NEXT: SI Lower WWM Copies
; DEFAULT-BASIC-NEXT: GCN NSA Reassign
Expand All @@ -96,12 +90,10 @@
; BASIC-BASIC-NEXT: Live Register Matrix
; BASIC-BASIC-NEXT: Basic Register Allocator
; BASIC-BASIC-NEXT: Virtual Register Rewriter
; BASIC-BASIC-NEXT: Stack Slot Coloring
; BASIC-BASIC-NEXT: SI lower SGPR spill instructions
; BASIC-BASIC-NEXT: Virtual Register Map
; BASIC-BASIC-NEXT: Live Register Matrix
; BASIC-BASIC-NEXT: SI Pre-allocate WWM Registers
; BASIC-BASIC-NEXT: Live Stack Slot Analysis
; BASIC-BASIC-NEXT: Basic Register Allocator
; BASIC-BASIC-NEXT: SI Lower WWM Copies
; BASIC-BASIC-NEXT: GCN NSA Reassign
Expand Down
Loading

0 comments on commit 79354df

Please sign in to comment.