-
Notifications
You must be signed in to change notification settings - Fork 14.5k
release/21.x: [LoongArch] Strengthen stack size estimation for LSX/LASX extension (#146455) #149777
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
llvmbot
wants to merge
2
commits into
llvm:release/21.x
Choose a base branch
from
llvmbot:issue146455
base: release/21.x
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
+783
−416
Conversation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
…lvm#146455) This patch adds an emergency spill slot when ran out of registers. PR llvm#139201 introduces `vstelm` instructions with only 8-bit imm offset, it causes no spill slot to store the spill registers. (cherry picked from commit 64a0478)
@heiher What do you think about merging this PR to the release branch? |
@llvm/pr-subscribers-backend-loongarch Author: None (llvmbot) ChangesBackport 64a0478 Requested by: @tangaac Patch is 83.60 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/149777.diff 16 Files Affected:
diff --git a/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp
index ac5e7f3891c72..1493bf4cba695 100644
--- a/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp
@@ -158,7 +158,12 @@ void LoongArchFrameLowering::processFunctionBeforeFrameFinalized(
// estimateStackSize has been observed to under-estimate the final stack
// size, so give ourselves wiggle-room by checking for stack size
// representable an 11-bit signed field rather than 12-bits.
- if (!isInt<11>(MFI.estimateStackSize(MF)))
+ // For [x]vstelm.{b/h/w/d} memory instructions with 8 imm offset, 7-bit
+ // signed field is fine.
+ unsigned EstimateStackSize = MFI.estimateStackSize(MF);
+ if (!isInt<11>(EstimateStackSize) ||
+ (MF.getSubtarget<LoongArchSubtarget>().hasExtLSX() &&
+ !isInt<7>(EstimateStackSize)))
ScavSlotsNum = std::max(ScavSlotsNum, 1u);
// For CFR spill.
diff --git a/llvm/test/CodeGen/LoongArch/calling-conv-common.ll b/llvm/test/CodeGen/LoongArch/calling-conv-common.ll
index d07e2914c753a..f7653af1fa9ba 100644
--- a/llvm/test/CodeGen/LoongArch/calling-conv-common.ll
+++ b/llvm/test/CodeGen/LoongArch/calling-conv-common.ll
@@ -122,23 +122,23 @@ define i64 @callee_large_scalars(i256 %a, i256 %b) nounwind {
define i64 @caller_large_scalars() nounwind {
; CHECK-LABEL: caller_large_scalars:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi.d $sp, $sp, -80
-; CHECK-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill
-; CHECK-NEXT: st.d $zero, $sp, 24
+; CHECK-NEXT: addi.d $sp, $sp, -96
+; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
+; CHECK-NEXT: st.d $zero, $sp, 40
; CHECK-NEXT: vrepli.b $vr0, 0
-; CHECK-NEXT: vst $vr0, $sp, 8
+; CHECK-NEXT: vst $vr0, $sp, 24
; CHECK-NEXT: ori $a0, $zero, 2
-; CHECK-NEXT: st.d $a0, $sp, 0
-; CHECK-NEXT: st.d $zero, $sp, 56
-; CHECK-NEXT: vst $vr0, $sp, 40
+; CHECK-NEXT: st.d $a0, $sp, 16
+; CHECK-NEXT: st.d $zero, $sp, 72
+; CHECK-NEXT: vst $vr0, $sp, 56
; CHECK-NEXT: ori $a2, $zero, 1
-; CHECK-NEXT: addi.d $a0, $sp, 32
-; CHECK-NEXT: addi.d $a1, $sp, 0
-; CHECK-NEXT: st.d $a2, $sp, 32
+; CHECK-NEXT: addi.d $a0, $sp, 48
+; CHECK-NEXT: addi.d $a1, $sp, 16
+; CHECK-NEXT: st.d $a2, $sp, 48
; CHECK-NEXT: pcaddu18i $ra, %call36(callee_large_scalars)
; CHECK-NEXT: jirl $ra, $ra, 0
-; CHECK-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload
-; CHECK-NEXT: addi.d $sp, $sp, 80
+; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
+; CHECK-NEXT: addi.d $sp, $sp, 96
; CHECK-NEXT: ret
%1 = call i64 @callee_large_scalars(i256 1, i256 2)
ret i64 %1
@@ -177,20 +177,20 @@ define i64 @callee_large_scalars_exhausted_regs(i64 %a, i64 %b, i64 %c, i64 %d,
define i64 @caller_large_scalars_exhausted_regs() nounwind {
; CHECK-LABEL: caller_large_scalars_exhausted_regs:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi.d $sp, $sp, -96
-; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
-; CHECK-NEXT: addi.d $a0, $sp, 16
+; CHECK-NEXT: addi.d $sp, $sp, -112
+; CHECK-NEXT: st.d $ra, $sp, 104 # 8-byte Folded Spill
+; CHECK-NEXT: addi.d $a0, $sp, 32
; CHECK-NEXT: st.d $a0, $sp, 8
; CHECK-NEXT: ori $a0, $zero, 9
; CHECK-NEXT: st.d $a0, $sp, 0
-; CHECK-NEXT: st.d $zero, $sp, 40
+; CHECK-NEXT: st.d $zero, $sp, 56
; CHECK-NEXT: vrepli.b $vr0, 0
-; CHECK-NEXT: vst $vr0, $sp, 24
+; CHECK-NEXT: vst $vr0, $sp, 40
; CHECK-NEXT: ori $a0, $zero, 10
-; CHECK-NEXT: st.d $a0, $sp, 16
-; CHECK-NEXT: st.d $zero, $sp, 72
+; CHECK-NEXT: st.d $a0, $sp, 32
+; CHECK-NEXT: st.d $zero, $sp, 88
; CHECK-NEXT: ori $a0, $zero, 8
-; CHECK-NEXT: st.d $a0, $sp, 48
+; CHECK-NEXT: st.d $a0, $sp, 64
; CHECK-NEXT: ori $a0, $zero, 1
; CHECK-NEXT: ori $a1, $zero, 2
; CHECK-NEXT: ori $a2, $zero, 3
@@ -198,12 +198,12 @@ define i64 @caller_large_scalars_exhausted_regs() nounwind {
; CHECK-NEXT: ori $a4, $zero, 5
; CHECK-NEXT: ori $a5, $zero, 6
; CHECK-NEXT: ori $a6, $zero, 7
-; CHECK-NEXT: addi.d $a7, $sp, 48
-; CHECK-NEXT: vst $vr0, $sp, 56
+; CHECK-NEXT: addi.d $a7, $sp, 64
+; CHECK-NEXT: vst $vr0, $sp, 72
; CHECK-NEXT: pcaddu18i $ra, %call36(callee_large_scalars_exhausted_regs)
; CHECK-NEXT: jirl $ra, $ra, 0
-; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
-; CHECK-NEXT: addi.d $sp, $sp, 96
+; CHECK-NEXT: ld.d $ra, $sp, 104 # 8-byte Folded Reload
+; CHECK-NEXT: addi.d $sp, $sp, 112
; CHECK-NEXT: ret
%1 = call i64 @callee_large_scalars_exhausted_regs(
i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i256 8, i64 9,
diff --git a/llvm/test/CodeGen/LoongArch/calling-conv-half.ll b/llvm/test/CodeGen/LoongArch/calling-conv-half.ll
index c88b67f13d1e7..da8c3e93f6842 100644
--- a/llvm/test/CodeGen/LoongArch/calling-conv-half.ll
+++ b/llvm/test/CodeGen/LoongArch/calling-conv-half.ll
@@ -1252,8 +1252,8 @@ define i32 @caller_half_on_stack() nounwind {
;
; LA64F-LP64S-LABEL: caller_half_on_stack:
; LA64F-LP64S: # %bb.0:
-; LA64F-LP64S-NEXT: addi.d $sp, $sp, -80
-; LA64F-LP64S-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill
+; LA64F-LP64S-NEXT: addi.d $sp, $sp, -96
+; LA64F-LP64S-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
; LA64F-LP64S-NEXT: lu12i.w $a0, -12
; LA64F-LP64S-NEXT: ori $a1, $a0, 3200
; LA64F-LP64S-NEXT: lu32i.d $a1, 0
@@ -1292,8 +1292,8 @@ define i32 @caller_half_on_stack() nounwind {
; LA64F-LP64S-NEXT: st.w $t0, $sp, 0
; LA64F-LP64S-NEXT: pcaddu18i $ra, %call36(callee_half_on_stack)
; LA64F-LP64S-NEXT: jirl $ra, $ra, 0
-; LA64F-LP64S-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload
-; LA64F-LP64S-NEXT: addi.d $sp, $sp, 80
+; LA64F-LP64S-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
+; LA64F-LP64S-NEXT: addi.d $sp, $sp, 96
; LA64F-LP64S-NEXT: ret
;
; LA64F-LP64D-LABEL: caller_half_on_stack:
@@ -1336,8 +1336,8 @@ define i32 @caller_half_on_stack() nounwind {
;
; LA64D-LP64S-LABEL: caller_half_on_stack:
; LA64D-LP64S: # %bb.0:
-; LA64D-LP64S-NEXT: addi.d $sp, $sp, -80
-; LA64D-LP64S-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill
+; LA64D-LP64S-NEXT: addi.d $sp, $sp, -96
+; LA64D-LP64S-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
; LA64D-LP64S-NEXT: lu12i.w $a0, -12
; LA64D-LP64S-NEXT: ori $a1, $a0, 3200
; LA64D-LP64S-NEXT: lu32i.d $a1, 0
@@ -1376,8 +1376,8 @@ define i32 @caller_half_on_stack() nounwind {
; LA64D-LP64S-NEXT: st.w $t0, $sp, 0
; LA64D-LP64S-NEXT: pcaddu18i $ra, %call36(callee_half_on_stack)
; LA64D-LP64S-NEXT: jirl $ra, $ra, 0
-; LA64D-LP64S-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload
-; LA64D-LP64S-NEXT: addi.d $sp, $sp, 80
+; LA64D-LP64S-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
+; LA64D-LP64S-NEXT: addi.d $sp, $sp, 96
; LA64D-LP64S-NEXT: ret
;
; LA64D-LP64D-LABEL: caller_half_on_stack:
diff --git a/llvm/test/CodeGen/LoongArch/can-not-realign-stack.ll b/llvm/test/CodeGen/LoongArch/can-not-realign-stack.ll
index 52d8dd05aaa4c..1a9de3b0ef3d1 100644
--- a/llvm/test/CodeGen/LoongArch/can-not-realign-stack.ll
+++ b/llvm/test/CodeGen/LoongArch/can-not-realign-stack.ll
@@ -14,41 +14,41 @@
define dso_local noundef signext i32 @main() nounwind {
; CHECK-LABEL: main:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: addi.d $sp, $sp, -272
-; CHECK-NEXT: st.d $ra, $sp, 264 # 8-byte Folded Spill
+; CHECK-NEXT: addi.d $sp, $sp, -288
+; CHECK-NEXT: st.d $ra, $sp, 280 # 8-byte Folded Spill
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI0_0)
; CHECK-NEXT: xvld $xr0, $a0, %pc_lo12(.LCPI0_0)
-; CHECK-NEXT: xvst $xr0, $sp, 96 # 32-byte Folded Spill
+; CHECK-NEXT: xvst $xr0, $sp, 112 # 32-byte Folded Spill
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI0_1)
; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI0_1)
-; CHECK-NEXT: xvst $xr1, $sp, 64 # 32-byte Folded Spill
+; CHECK-NEXT: xvst $xr1, $sp, 80 # 32-byte Folded Spill
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI0_2)
; CHECK-NEXT: xvld $xr2, $a0, %pc_lo12(.LCPI0_2)
-; CHECK-NEXT: xvst $xr2, $sp, 32 # 32-byte Folded Spill
+; CHECK-NEXT: xvst $xr2, $sp, 48 # 32-byte Folded Spill
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI0_3)
; CHECK-NEXT: xvld $xr3, $a0, %pc_lo12(.LCPI0_3)
-; CHECK-NEXT: xvst $xr3, $sp, 0 # 32-byte Folded Spill
-; CHECK-NEXT: xvst $xr0, $sp, 136
-; CHECK-NEXT: xvst $xr1, $sp, 168
-; CHECK-NEXT: xvst $xr2, $sp, 200
-; CHECK-NEXT: xvst $xr3, $sp, 232
-; CHECK-NEXT: addi.d $a0, $sp, 136
+; CHECK-NEXT: xvst $xr3, $sp, 16 # 32-byte Folded Spill
+; CHECK-NEXT: xvst $xr0, $sp, 152
+; CHECK-NEXT: xvst $xr1, $sp, 184
+; CHECK-NEXT: xvst $xr2, $sp, 216
+; CHECK-NEXT: xvst $xr3, $sp, 248
+; CHECK-NEXT: addi.d $a0, $sp, 152
; CHECK-NEXT: pcaddu18i $ra, %call36(foo)
; CHECK-NEXT: jirl $ra, $ra, 0
-; CHECK-NEXT: xvld $xr0, $sp, 96 # 32-byte Folded Reload
-; CHECK-NEXT: xvst $xr0, $sp, 136
-; CHECK-NEXT: xvld $xr0, $sp, 64 # 32-byte Folded Reload
-; CHECK-NEXT: xvst $xr0, $sp, 168
-; CHECK-NEXT: xvld $xr0, $sp, 32 # 32-byte Folded Reload
-; CHECK-NEXT: xvst $xr0, $sp, 200
-; CHECK-NEXT: xvld $xr0, $sp, 0 # 32-byte Folded Reload
-; CHECK-NEXT: xvst $xr0, $sp, 232
-; CHECK-NEXT: addi.d $a0, $sp, 136
+; CHECK-NEXT: xvld $xr0, $sp, 112 # 32-byte Folded Reload
+; CHECK-NEXT: xvst $xr0, $sp, 152
+; CHECK-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload
+; CHECK-NEXT: xvst $xr0, $sp, 184
+; CHECK-NEXT: xvld $xr0, $sp, 48 # 32-byte Folded Reload
+; CHECK-NEXT: xvst $xr0, $sp, 216
+; CHECK-NEXT: xvld $xr0, $sp, 16 # 32-byte Folded Reload
+; CHECK-NEXT: xvst $xr0, $sp, 248
+; CHECK-NEXT: addi.d $a0, $sp, 152
; CHECK-NEXT: pcaddu18i $ra, %call36(bar)
; CHECK-NEXT: jirl $ra, $ra, 0
; CHECK-NEXT: move $a0, $zero
-; CHECK-NEXT: ld.d $ra, $sp, 264 # 8-byte Folded Reload
-; CHECK-NEXT: addi.d $sp, $sp, 272
+; CHECK-NEXT: ld.d $ra, $sp, 280 # 8-byte Folded Reload
+; CHECK-NEXT: addi.d $sp, $sp, 288
; CHECK-NEXT: ret
entry:
%s = alloca %struct.S, align 2
diff --git a/llvm/test/CodeGen/LoongArch/emergency-spill-slot.ll b/llvm/test/CodeGen/LoongArch/emergency-spill-slot.ll
index ccc5c703e71ed..15ac95dfc6c55 100644
--- a/llvm/test/CodeGen/LoongArch/emergency-spill-slot.ll
+++ b/llvm/test/CodeGen/LoongArch/emergency-spill-slot.ll
@@ -28,12 +28,12 @@ define void @func() {
; CHECK-NEXT: ld.w $a3, $a1, 0
; CHECK-NEXT: ld.w $a2, $a1, 0
; CHECK-NEXT: ld.w $a0, $a1, 0
-; CHECK-NEXT: st.d $fp, $sp, 0
+; CHECK-NEXT: st.d $fp, $sp, 0 # 8-byte Folded Spill
; CHECK-NEXT: lu12i.w $fp, 1
; CHECK-NEXT: ori $fp, $fp, 12
; CHECK-NEXT: add.d $fp, $sp, $fp
; CHECK-NEXT: st.w $t8, $fp, 0
-; CHECK-NEXT: ld.d $fp, $sp, 0
+; CHECK-NEXT: ld.d $fp, $sp, 0 # 8-byte Folded Reload
; CHECK-NEXT: st.w $t8, $a1, 0
; CHECK-NEXT: st.w $t7, $a1, 0
; CHECK-NEXT: st.w $t6, $a1, 0
diff --git a/llvm/test/CodeGen/LoongArch/frame.ll b/llvm/test/CodeGen/LoongArch/frame.ll
index 048703029d8c6..b29d8634854f3 100644
--- a/llvm/test/CodeGen/LoongArch/frame.ll
+++ b/llvm/test/CodeGen/LoongArch/frame.ll
@@ -1,5 +1,6 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc --mtriple=loongarch64 -mattr=+d < %s | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc --mtriple=loongarch64 -mattr=+d,-lsx < %s | FileCheck %s --check-prefixes=CHECK,NOLSX
+; RUN: llc --mtriple=loongarch64 -mattr=+d,+lsx < %s | FileCheck %s --check-prefixes=CHECK,LSX
%struct.key_t = type { i32, [16 x i8] }
@@ -7,20 +8,35 @@ declare void @llvm.memset.p0.i64(ptr, i8, i64, i1)
declare void @test1(ptr)
define i32 @test() nounwind {
-; CHECK-LABEL: test:
-; CHECK: # %bb.0:
-; CHECK-NEXT: addi.d $sp, $sp, -32
-; CHECK-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill
-; CHECK-NEXT: st.w $zero, $sp, 16
-; CHECK-NEXT: vrepli.b $vr0, 0
-; CHECK-NEXT: vst $vr0, $sp, 0
-; CHECK-NEXT: addi.d $a0, $sp, 4
-; CHECK-NEXT: pcaddu18i $ra, %call36(test1)
-; CHECK-NEXT: jirl $ra, $ra, 0
-; CHECK-NEXT: move $a0, $zero
-; CHECK-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload
-; CHECK-NEXT: addi.d $sp, $sp, 32
-; CHECK-NEXT: ret
+; NOLSX-LABEL: test:
+; NOLSX: # %bb.0:
+; NOLSX-NEXT: addi.d $sp, $sp, -32
+; NOLSX-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill
+; NOLSX-NEXT: st.w $zero, $sp, 16
+; NOLSX-NEXT: st.d $zero, $sp, 8
+; NOLSX-NEXT: st.d $zero, $sp, 0
+; NOLSX-NEXT: addi.d $a0, $sp, 4
+; NOLSX-NEXT: pcaddu18i $ra, %call36(test1)
+; NOLSX-NEXT: jirl $ra, $ra, 0
+; NOLSX-NEXT: move $a0, $zero
+; NOLSX-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload
+; NOLSX-NEXT: addi.d $sp, $sp, 32
+; NOLSX-NEXT: ret
+;
+; LSX-LABEL: test:
+; LSX: # %bb.0:
+; LSX-NEXT: addi.d $sp, $sp, -32
+; LSX-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill
+; LSX-NEXT: st.w $zero, $sp, 16
+; LSX-NEXT: vrepli.b $vr0, 0
+; LSX-NEXT: vst $vr0, $sp, 0
+; LSX-NEXT: addi.d $a0, $sp, 4
+; LSX-NEXT: pcaddu18i $ra, %call36(test1)
+; LSX-NEXT: jirl $ra, $ra, 0
+; LSX-NEXT: move $a0, $zero
+; LSX-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload
+; LSX-NEXT: addi.d $sp, $sp, 32
+; LSX-NEXT: ret
%key = alloca %struct.key_t, align 4
call void @llvm.memset.p0.i64(ptr %key, i8 0, i64 20, i1 false)
%1 = getelementptr inbounds %struct.key_t, ptr %key, i64 0, i32 1, i64 0
@@ -98,3 +114,62 @@ define void @test_large_frame_size_1234576() "frame-pointer"="all" {
%1 = alloca i8, i32 1234567
ret void
}
+
+;; Note: will create an emergency spill slot, if (!isInt<7>(StackSize)).
+;; Should involve only one SP-adjusting addi per adjustment.
+;; LSX 112 + 16(emergency solt) = 128
+define void @test_frame_size_112() {
+; NOLSX-LABEL: test_frame_size_112:
+; NOLSX: # %bb.0:
+; NOLSX-NEXT: addi.d $sp, $sp, -112
+; NOLSX-NEXT: .cfi_def_cfa_offset 112
+; NOLSX-NEXT: addi.d $sp, $sp, 112
+; NOLSX-NEXT: ret
+;
+; LSX-LABEL: test_frame_size_112:
+; LSX: # %bb.0:
+; LSX-NEXT: addi.d $sp, $sp, -128
+; LSX-NEXT: .cfi_def_cfa_offset 128
+; LSX-NEXT: addi.d $sp, $sp, 128
+; LSX-NEXT: ret
+ %1 = alloca i8, i32 112
+ ret void
+}
+
+;; LSX 128 + 16(emergency solt) = 144
+define void @test_frame_size_128() {
+; NOLSX-LABEL: test_frame_size_128:
+; NOLSX: # %bb.0:
+; NOLSX-NEXT: addi.d $sp, $sp, -128
+; NOLSX-NEXT: .cfi_def_cfa_offset 128
+; NOLSX-NEXT: addi.d $sp, $sp, 128
+; NOLSX-NEXT: ret
+;
+; LSX-LABEL: test_frame_size_128:
+; LSX: # %bb.0:
+; LSX-NEXT: addi.d $sp, $sp, -144
+; LSX-NEXT: .cfi_def_cfa_offset 144
+; LSX-NEXT: addi.d $sp, $sp, 144
+; LSX-NEXT: ret
+ %1 = alloca i8, i32 128
+ ret void
+}
+
+;; LSX 144 + 16(emergency solt) = 160
+define void @test_frame_size_144() {
+; NOLSX-LABEL: test_frame_size_144:
+; NOLSX: # %bb.0:
+; NOLSX-NEXT: addi.d $sp, $sp, -144
+; NOLSX-NEXT: .cfi_def_cfa_offset 144
+; NOLSX-NEXT: addi.d $sp, $sp, 144
+; NOLSX-NEXT: ret
+;
+; LSX-LABEL: test_frame_size_144:
+; LSX: # %bb.0:
+; LSX-NEXT: addi.d $sp, $sp, -160
+; LSX-NEXT: .cfi_def_cfa_offset 160
+; LSX-NEXT: addi.d $sp, $sp, 160
+; LSX-NEXT: ret
+ %1 = alloca i8, i32 144
+ ret void
+}
diff --git a/llvm/test/CodeGen/LoongArch/intrinsic-memcpy.ll b/llvm/test/CodeGen/LoongArch/intrinsic-memcpy.ll
index 402ddb9ad941b..5a55b253c77bb 100644
--- a/llvm/test/CodeGen/LoongArch/intrinsic-memcpy.ll
+++ b/llvm/test/CodeGen/LoongArch/intrinsic-memcpy.ll
@@ -6,11 +6,11 @@
define void @box(ptr noalias nocapture noundef writeonly sret(%Box) align 16 dereferenceable(48) %b, i64 %i) {
; CHECK-LABEL: box:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi.d $sp, $sp, -96
-; CHECK-NEXT: .cfi_def_cfa_offset 96
+; CHECK-NEXT: addi.d $sp, $sp, -112
+; CHECK-NEXT: .cfi_def_cfa_offset 112
; CHECK-NEXT: slli.d $a2, $a1, 5
; CHECK-NEXT: alsl.d $a1, $a1, $a2, 4
-; CHECK-NEXT: addi.d $a2, $sp, 0
+; CHECK-NEXT: addi.d $a2, $sp, 16
; CHECK-NEXT: add.d $a3, $a2, $a1
; CHECK-NEXT: vldx $vr0, $a1, $a2
; CHECK-NEXT: vld $vr1, $a3, 32
@@ -18,7 +18,7 @@ define void @box(ptr noalias nocapture noundef writeonly sret(%Box) align 16 der
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: vst $vr1, $a0, 32
; CHECK-NEXT: vst $vr2, $a0, 16
-; CHECK-NEXT: addi.d $sp, $sp, 96
+; CHECK-NEXT: addi.d $sp, $sp, 112
; CHECK-NEXT: ret
%1 = alloca [2 x %Box], align 16
%2 = getelementptr inbounds [2 x %Box], ptr %1, i64 0, i64 %i
diff --git a/llvm/test/CodeGen/LoongArch/lasx/fpowi.ll b/llvm/test/CodeGen/LoongArch/lasx/fpowi.ll
index 789b51d9b5e5b..9528280d181a3 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/fpowi.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/fpowi.ll
@@ -6,10 +6,10 @@ declare <8 x float> @llvm.powi.v8f32.i32(<8 x float>, i32)
define <8 x float> @powi_v8f32(<8 x float> %va, i32 %b) nounwind {
; CHECK-LABEL: powi_v8f32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: addi.d $sp, $sp, -80
-; CHECK-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill
-; CHECK-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill
-; CHECK-NEXT: xvst $xr0, $sp, 0 # 32-byte Folded Spill
+; CHECK-NEXT: addi.d $sp, $sp, -96
+; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
+; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill
+; CHECK-NEXT: xvst $xr0, $sp, 16 # 32-byte Folded Spill
; CHECK-NEXT: addi.w $fp, $a0, 0
; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 0
; CHECK-NEXT: movgr2fr.w $fa0, $a0
@@ -18,79 +18,79 @@ define <8 x float> @powi_v8f32(<8 x float> %va, i32 %b) nounwind {
; CHECK-NEXT: jirl $ra, $ra, 0
; CHECK-NEXT: movfr2gr.s $a0, $fa0
; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 0
-; CHECK-NEXT: xvst $xr0, $sp, 32 # 32-byte Folded Spill
-; CHECK-NEXT: xvld $xr0, $sp, 0 # 32-byte Folded Reload
+; CHECK-NEXT: xvst $xr0, $sp, 48 # 32-byte Folded Spill
+; CHECK-NEXT: xvld $xr0, $sp, 16 # 32-byte Folded Reload
; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 1
; CHECK-NEXT: movgr2fr.w $fa0, $a0
; CHECK-NEXT: move $a0, $fp
; CHECK-NEXT: pcaddu18i $ra, %call36(__powisf2)
; CHECK-NEXT: jirl $ra, $ra, 0
; CHECK-NEXT: movfr2gr.s $a0, $fa0
-; CHECK-NEXT: xvld $xr0, $sp, 32 # 32-byte Folded Reload
+; CHECK-NEXT: xvld $xr0, $sp, 48 # 32-byte Folded Reload
; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 1
-; CHECK-NEXT: xvst $xr0, $sp, 32 # 32-byte Folded Spill
-; CHECK-NEXT: xvld $xr0, $sp, 0 # 32-byte Folded Reload
+; CHECK-NEXT: xvst $xr0, $sp, 48 # 32-byte Folded Spill
+; CHECK-NEXT: xvld $xr0, $sp, 16 # 32-byte Folded Reload
; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 2
; CHECK-NEXT: movgr2fr.w $fa0, $a0
; CHECK-NEXT: move $a0, $fp
; CHECK-NEXT: pcaddu18i $ra, %call36(__powisf2)
; CHECK-NEXT: jirl $ra, $ra, 0
; CHECK-NEXT: movfr2gr.s $a0, $fa0
-; CHECK-NEXT: xvld $xr0, $sp, 32 # 32-byte Folded Reload
+; CHECK-NEXT: xvld $xr0, $sp, 48 # 32-byte Folded Reload
; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 2
-; CHECK-NEXT: xvst $xr0, $sp, 32 # 32-byte Folded Spill
-; CHECK-NEXT: xvld $xr0, $sp, 0 # 32-byte Folded Reload
+; CHECK-NEXT: xvst $xr0, $sp, 48 # 32-byte Folded Spill
+; CHECK-NEXT: xvld $xr0, $sp, 16 # 32-byte Folded Reload
; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 3
; CHECK-NEXT: movgr2fr.w $fa0, $a0
; CHECK-NEXT: move $a0, $fp
; CHECK-NEXT: pcaddu18i $ra, %call36(__powisf2)
; CHECK-NEXT: jirl $ra, $ra, 0
; CHECK-NEXT: movfr2gr.s $a0, $fa0
-; CHECK-NEXT: xvld $xr0, $sp, 32 # 32-byte Folded Reload
+; CHECK-NEXT: xvld $xr0, $sp, 48 # 32-byte Folded Reload
; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 3
-; CHECK-NEXT: xvst $xr0, $sp, 32 # 32-byte Folded Spill
-; CHECK-NEXT: xvld $xr0, $sp, 0 # 32-byte Folded Reload
+; CHECK-NEXT: xvst $xr0, $sp, 48 # 32-byte Folded Spill
+; CHECK-NEXT: xvld $xr0, $sp, 16 # 32-byte Folded Reload
; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 4
; CHECK-NEXT: movgr2fr.w $fa0, $a0
; CHECK-NEXT: move $a0, $fp
; CHECK-NEXT: pcaddu18i $ra, %call36(__powisf2)
; CHECK-NEXT: jirl $ra, $ra, 0
; CHECK-NEXT: movfr2gr.s $a0, $fa0
-; CHECK-NEXT: xvld $xr0, $sp, 32 # 32-byte Folded Reload
+; CHECK-NEXT: xvld $x...
[truncated]
|
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Backport 64a0478
Requested by: @tangaac