-
Notifications
You must be signed in to change notification settings - Fork 12.7k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
release/19.x: Revert " [LICM] Fold associative binary ops to promote code hoisting (#81608)" #100094
Conversation
@llvm/pr-subscribers-llvm-transforms @llvm/pr-subscribers-backend-powerpc Author: None (llvmbot) ChangesBackport b48819d Requested by: @nikic Patch is 26.62 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/100094.diff 6 Files Affected:
diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp
index fe264503dee9e..91ef2b4b7c183 100644
--- a/llvm/lib/Transforms/Scalar/LICM.cpp
+++ b/llvm/lib/Transforms/Scalar/LICM.cpp
@@ -113,8 +113,6 @@ STATISTIC(NumFPAssociationsHoisted, "Number of invariant FP expressions "
STATISTIC(NumIntAssociationsHoisted,
"Number of invariant int expressions "
"reassociated and hoisted out of the loop");
-STATISTIC(NumBOAssociationsHoisted, "Number of invariant BinaryOp expressions "
- "reassociated and hoisted out of the loop");
/// Memory promotion is enabled by default.
static cl::opt<bool>
@@ -2781,60 +2779,6 @@ static bool hoistMulAddAssociation(Instruction &I, Loop &L,
return true;
}
-/// Reassociate general associative binary expressions of the form
-///
-/// 1. "(LV op C1) op C2" ==> "LV op (C1 op C2)"
-///
-/// where op is an associative binary op, LV is a loop variant, and C1 and C2
-/// are loop invariants that we want to hoist.
-///
-/// TODO: This can be extended to more cases such as
-/// 2. "C1 op (C2 op LV)" ==> "(C1 op C2) op LV"
-/// 3. "(C1 op LV) op C2" ==> "LV op (C1 op C2)" if op is commutative
-/// 4. "C1 op (LV op C2)" ==> "(C1 op C2) op LV" if op is commutative
-static bool hoistBOAssociation(Instruction &I, Loop &L,
- ICFLoopSafetyInfo &SafetyInfo,
- MemorySSAUpdater &MSSAU, AssumptionCache *AC,
- DominatorTree *DT) {
- BinaryOperator *BO = dyn_cast<BinaryOperator>(&I);
- if (!BO || !BO->isAssociative())
- return false;
-
- Instruction::BinaryOps Opcode = BO->getOpcode();
- BinaryOperator *Op0 = dyn_cast<BinaryOperator>(BO->getOperand(0));
-
- // Transform: "(LV op C1) op C2" ==> "LV op (C1 op C2)"
- if (Op0 && Op0->getOpcode() == Opcode) {
- Value *LV = Op0->getOperand(0);
- Value *C1 = Op0->getOperand(1);
- Value *C2 = BO->getOperand(1);
-
- if (L.isLoopInvariant(LV) || !L.isLoopInvariant(C1) ||
- !L.isLoopInvariant(C2))
- return false;
-
- auto *Preheader = L.getLoopPreheader();
- assert(Preheader && "Loop is not in simplify form?");
- IRBuilder<> Builder(Preheader->getTerminator());
- Value *Inv = Builder.CreateBinOp(Opcode, C1, C2, "invariant.op");
-
- auto *NewBO =
- BinaryOperator::Create(Opcode, LV, Inv, BO->getName() + ".reass", BO);
- NewBO->copyIRFlags(BO);
- BO->replaceAllUsesWith(NewBO);
- eraseInstruction(*BO, SafetyInfo, MSSAU);
-
- // Note: (LV op C1) might not be erased if it has more uses than the one we
- // just replaced.
- if (Op0->use_empty())
- eraseInstruction(*Op0, SafetyInfo, MSSAU);
-
- return true;
- }
-
- return false;
-}
-
static bool hoistArithmetics(Instruction &I, Loop &L,
ICFLoopSafetyInfo &SafetyInfo,
MemorySSAUpdater &MSSAU, AssumptionCache *AC,
@@ -2872,12 +2816,6 @@ static bool hoistArithmetics(Instruction &I, Loop &L,
return true;
}
- if (hoistBOAssociation(I, L, SafetyInfo, MSSAU, AC, DT)) {
- ++NumHoisted;
- ++NumBOAssociationsHoisted;
- return true;
- }
-
return false;
}
diff --git a/llvm/test/CodeGen/PowerPC/common-chain.ll b/llvm/test/CodeGen/PowerPC/common-chain.ll
index ccf0e4520f468..5f8c21e30f8fd 100644
--- a/llvm/test/CodeGen/PowerPC/common-chain.ll
+++ b/llvm/test/CodeGen/PowerPC/common-chain.ll
@@ -642,8 +642,8 @@ define i64 @two_chain_two_bases_succ(ptr %p, i64 %offset, i64 %base1, i64 %base2
; CHECK-NEXT: cmpdi r7, 0
; CHECK-NEXT: ble cr0, .LBB6_4
; CHECK-NEXT: # %bb.1: # %for.body.preheader
-; CHECK-NEXT: add r5, r5, r4
; CHECK-NEXT: add r6, r6, r4
+; CHECK-NEXT: add r5, r5, r4
; CHECK-NEXT: mtctr r7
; CHECK-NEXT: sldi r4, r4, 1
; CHECK-NEXT: add r5, r3, r5
@@ -743,219 +743,214 @@ define signext i32 @spill_reduce_succ(ptr %input1, ptr %input2, ptr %output, i64
; CHECK-NEXT: std r9, -184(r1) # 8-byte Folded Spill
; CHECK-NEXT: std r8, -176(r1) # 8-byte Folded Spill
; CHECK-NEXT: std r7, -168(r1) # 8-byte Folded Spill
-; CHECK-NEXT: std r4, -160(r1) # 8-byte Folded Spill
+; CHECK-NEXT: std r3, -160(r1) # 8-byte Folded Spill
; CHECK-NEXT: ble cr0, .LBB7_7
; CHECK-NEXT: # %bb.1: # %for.body.preheader
-; CHECK-NEXT: sldi r4, r6, 2
-; CHECK-NEXT: li r6, 1
-; CHECK-NEXT: mr r0, r10
-; CHECK-NEXT: std r10, -192(r1) # 8-byte Folded Spill
-; CHECK-NEXT: cmpdi r4, 1
-; CHECK-NEXT: iselgt r4, r4, r6
-; CHECK-NEXT: addi r7, r4, -1
-; CHECK-NEXT: clrldi r6, r4, 63
-; CHECK-NEXT: cmpldi r7, 3
+; CHECK-NEXT: sldi r6, r6, 2
+; CHECK-NEXT: li r7, 1
+; CHECK-NEXT: mr r30, r10
+; CHECK-NEXT: cmpdi r6, 1
+; CHECK-NEXT: iselgt r7, r6, r7
+; CHECK-NEXT: addi r8, r7, -1
+; CHECK-NEXT: clrldi r6, r7, 63
+; CHECK-NEXT: cmpldi r8, 3
; CHECK-NEXT: blt cr0, .LBB7_4
; CHECK-NEXT: # %bb.2: # %for.body.preheader.new
-; CHECK-NEXT: ld r0, -192(r1) # 8-byte Folded Reload
-; CHECK-NEXT: ld r30, -184(r1) # 8-byte Folded Reload
-; CHECK-NEXT: ld r8, -176(r1) # 8-byte Folded Reload
-; CHECK-NEXT: rldicl r7, r4, 62, 2
-; CHECK-NEXT: ld r9, -168(r1) # 8-byte Folded Reload
-; CHECK-NEXT: add r11, r0, r30
-; CHECK-NEXT: add r4, r0, r0
-; CHECK-NEXT: mulli r23, r0, 24
-; CHECK-NEXT: add r14, r0, r8
-; CHECK-NEXT: sldi r12, r0, 5
-; CHECK-NEXT: add r31, r0, r9
-; CHECK-NEXT: sldi r9, r9, 3
-; CHECK-NEXT: sldi r18, r0, 4
-; CHECK-NEXT: sldi r8, r8, 3
-; CHECK-NEXT: add r10, r4, r4
-; CHECK-NEXT: sldi r4, r30, 3
-; CHECK-NEXT: sldi r11, r11, 3
-; CHECK-NEXT: add r26, r12, r9
-; CHECK-NEXT: add r16, r18, r9
-; CHECK-NEXT: add r29, r12, r8
-; CHECK-NEXT: add r19, r18, r8
-; CHECK-NEXT: add r30, r12, r4
-; CHECK-NEXT: mr r20, r4
-; CHECK-NEXT: std r4, -200(r1) # 8-byte Folded Spill
-; CHECK-NEXT: ld r4, -160(r1) # 8-byte Folded Reload
-; CHECK-NEXT: add r15, r5, r11
-; CHECK-NEXT: sldi r11, r14, 3
-; CHECK-NEXT: add r29, r5, r29
-; CHECK-NEXT: add r28, r3, r26
-; CHECK-NEXT: add r19, r5, r19
-; CHECK-NEXT: add r21, r23, r9
-; CHECK-NEXT: add r24, r23, r8
-; CHECK-NEXT: add r14, r5, r11
-; CHECK-NEXT: sldi r11, r31, 3
-; CHECK-NEXT: add r25, r23, r20
-; CHECK-NEXT: add r20, r18, r20
-; CHECK-NEXT: add r30, r5, r30
-; CHECK-NEXT: add r18, r3, r16
-; CHECK-NEXT: add r24, r5, r24
-; CHECK-NEXT: add r23, r3, r21
-; CHECK-NEXT: add r27, r4, r26
-; CHECK-NEXT: add r22, r4, r21
-; CHECK-NEXT: add r17, r4, r16
-; CHECK-NEXT: add r2, r4, r11
-; CHECK-NEXT: rldicl r4, r7, 2, 1
-; CHECK-NEXT: sub r7, r8, r9
-; CHECK-NEXT: ld r8, -200(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r14, -168(r1) # 8-byte Folded Reload
+; CHECK-NEXT: mulli r24, r30, 24
+; CHECK-NEXT: ld r16, -184(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r15, -176(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r3, -160(r1) # 8-byte Folded Reload
+; CHECK-NEXT: rldicl r0, r7, 62, 2
+; CHECK-NEXT: sldi r11, r30, 5
+; CHECK-NEXT: sldi r19, r30, 4
+; CHECK-NEXT: sldi r7, r14, 3
+; CHECK-NEXT: add r14, r30, r14
+; CHECK-NEXT: sldi r10, r16, 3
+; CHECK-NEXT: sldi r12, r15, 3
+; CHECK-NEXT: add r16, r30, r16
+; CHECK-NEXT: add r15, r30, r15
+; CHECK-NEXT: add r27, r11, r7
+; CHECK-NEXT: add r22, r24, r7
+; CHECK-NEXT: add r17, r19, r7
+; CHECK-NEXT: sldi r2, r14, 3
+; CHECK-NEXT: add r26, r24, r10
+; CHECK-NEXT: add r25, r24, r12
+; CHECK-NEXT: add r21, r19, r10
+; CHECK-NEXT: add r20, r19, r12
+; CHECK-NEXT: add r8, r11, r10
+; CHECK-NEXT: sldi r16, r16, 3
+; CHECK-NEXT: add r29, r5, r27
+; CHECK-NEXT: add r28, r4, r27
+; CHECK-NEXT: add r27, r3, r27
+; CHECK-NEXT: add r24, r5, r22
+; CHECK-NEXT: add r23, r4, r22
+; CHECK-NEXT: add r22, r3, r22
+; CHECK-NEXT: add r19, r5, r17
+; CHECK-NEXT: add r18, r4, r17
+; CHECK-NEXT: add r17, r3, r17
+; CHECK-NEXT: add r14, r5, r2
+; CHECK-NEXT: add r31, r4, r2
+; CHECK-NEXT: add r2, r3, r2
+; CHECK-NEXT: add r9, r5, r8
+; CHECK-NEXT: add r8, r11, r12
; CHECK-NEXT: add r26, r5, r26
; CHECK-NEXT: add r25, r5, r25
; CHECK-NEXT: add r21, r5, r21
; CHECK-NEXT: add r20, r5, r20
; CHECK-NEXT: add r16, r5, r16
-; CHECK-NEXT: add r31, r5, r11
-; CHECK-NEXT: add r11, r3, r11
-; CHECK-NEXT: addi r4, r4, -4
-; CHECK-NEXT: rldicl r4, r4, 62, 2
-; CHECK-NEXT: sub r8, r8, r9
-; CHECK-NEXT: li r9, 0
-; CHECK-NEXT: addi r4, r4, 1
-; CHECK-NEXT: mtctr r4
+; CHECK-NEXT: add r8, r5, r8
+; CHECK-NEXT: rldicl r3, r0, 2, 1
+; CHECK-NEXT: addi r3, r3, -4
+; CHECK-NEXT: sub r0, r12, r7
+; CHECK-NEXT: sub r12, r10, r7
+; CHECK-NEXT: li r7, 0
+; CHECK-NEXT: mr r10, r30
+; CHECK-NEXT: sldi r15, r15, 3
+; CHECK-NEXT: add r15, r5, r15
+; CHECK-NEXT: rldicl r3, r3, 62, 2
+; CHECK-NEXT: addi r3, r3, 1
+; CHECK-NEXT: mtctr r3
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: .LBB7_3: # %for.body
; CHECK-NEXT: #
-; CHECK-NEXT: lfd f0, 0(r11)
-; CHECK-NEXT: lfd f1, 0(r2)
-; CHECK-NEXT: add r0, r0, r10
-; CHECK-NEXT: xsmuldp f0, f0, f1
+; CHECK-NEXT: lfd f0, 0(r2)
; CHECK-NEXT: lfd f1, 0(r31)
+; CHECK-NEXT: add r3, r10, r30
+; CHECK-NEXT: add r3, r3, r30
+; CHECK-NEXT: xsmuldp f0, f0, f1
+; CHECK-NEXT: lfd f1, 0(r14)
+; CHECK-NEXT: add r3, r3, r30
+; CHECK-NEXT: add r10, r3, r30
; CHECK-NEXT: xsadddp f0, f1, f0
-; CHECK-NEXT: stfd f0, 0(r31)
-; CHECK-NEXT: add r31, r31, r12
-; CHECK-NEXT: lfdx f0, r11, r7
-; CHECK-NEXT: lfdx f1, r2, r7
+; CHECK-NEXT: stfd f0, 0(r14)
+; CHECK-NEXT: add r14, r14, r11
+; CHECK-NEXT: lfdx f0, r2, r0
+; CHECK-NEXT: lfdx f1, r31, r0
; CHECK-NEXT: xsmuldp f0, f0, f1
-; CHECK-NEXT: lfdx f1, r14, r9
+; CHECK-NEXT: lfdx f1, r15, r7
; CHECK-NEXT: xsadddp f0, f1, f0
-; CHECK-NEXT: stfdx f0, r14, r9
-; CHECK-NEXT: lfdx f0, r11, r8
-; CHECK-NEXT: lfdx f1, r2, r8
-; CHECK-NEXT: add r11, r11, r12
-; CHECK-NEXT: add r2, r2, r12
+; CHECK-NEXT: stfdx f0, r15, r7
+; CHECK-NEXT: lfdx f0, r2, r12
+; CHECK-NEXT: lfdx f1, r31, r12
+; CHECK-NEXT: add r2, r2, r11
+; CHECK-NEXT: add r31, r31, r11
; CHECK-NEXT: xsmuldp f0, f0, f1
-; CHECK-NEXT: lfdx f1, r15, r9
+; CHECK-NEXT: lfdx f1, r16, r7
; CHECK-NEXT: xsadddp f0, f1, f0
-; CHECK-NEXT: stfdx f0, r15, r9
-; CHECK-NEXT: lfd f0, 0(r18)
-; CHECK-NEXT: lfd f1, 0(r17)
+; CHECK-NEXT: stfdx f0, r16, r7
+; CHECK-NEXT: lfd f0, 0(r17)
+; CHECK-NEXT: lfd f1, 0(r18)
; CHECK-NEXT: xsmuldp f0, f0, f1
-; CHECK-NEXT: lfdx f1, r16, r9
+; CHECK-NEXT: lfdx f1, r19, r7
; CHECK-NEXT: xsadddp f0, f1, f0
-; CHECK-NEXT: stfdx f0, r16, r9
-; CHECK-NEXT: lfdx f0, r18, r7
-; CHECK-NEXT: lfdx f1, r17, r7
+; CHECK-NEXT: stfdx f0, r19, r7
+; CHECK-NEXT: lfdx f0, r17, r0
+; CHECK-NEXT: lfdx f1, r18, r0
; CHECK-NEXT: xsmuldp f0, f0, f1
-; CHECK-NEXT: lfdx f1, r19, r9
+; CHECK-NEXT: lfdx f1, r20, r7
; CHECK-NEXT: xsadddp f0, f1, f0
-; CHECK-NEXT: stfdx f0, r19, r9
-; CHECK-NEXT: lfdx f0, r18, r8
-; CHECK-NEXT: lfdx f1, r17, r8
-; CHECK-NEXT: add r18, r18, r12
-; CHECK-NEXT: add r17, r17, r12
+; CHECK-NEXT: stfdx f0, r20, r7
+; CHECK-NEXT: lfdx f0, r17, r12
+; CHECK-NEXT: lfdx f1, r18, r12
+; CHECK-NEXT: add r17, r17, r11
+; CHECK-NEXT: add r18, r18, r11
; CHECK-NEXT: xsmuldp f0, f0, f1
-; CHECK-NEXT: lfdx f1, r20, r9
+; CHECK-NEXT: lfdx f1, r21, r7
; CHECK-NEXT: xsadddp f0, f1, f0
-; CHECK-NEXT: stfdx f0, r20, r9
-; CHECK-NEXT: lfd f0, 0(r23)
-; CHECK-NEXT: lfd f1, 0(r22)
+; CHECK-NEXT: stfdx f0, r21, r7
+; CHECK-NEXT: lfd f0, 0(r22)
+; CHECK-NEXT: lfd f1, 0(r23)
; CHECK-NEXT: xsmuldp f0, f0, f1
-; CHECK-NEXT: lfdx f1, r21, r9
+; CHECK-NEXT: lfdx f1, r24, r7
; CHECK-NEXT: xsadddp f0, f1, f0
-; CHECK-NEXT: stfdx f0, r21, r9
-; CHECK-NEXT: lfdx f0, r23, r7
-; CHECK-NEXT: lfdx f1, r22, r7
+; CHECK-NEXT: stfdx f0, r24, r7
+; CHECK-NEXT: lfdx f0, r22, r0
+; CHECK-NEXT: lfdx f1, r23, r0
; CHECK-NEXT: xsmuldp f0, f0, f1
-; CHECK-NEXT: lfdx f1, r24, r9
+; CHECK-NEXT: lfdx f1, r25, r7
; CHECK-NEXT: xsadddp f0, f1, f0
-; CHECK-NEXT: stfdx f0, r24, r9
-; CHECK-NEXT: lfdx f0, r23, r8
-; CHECK-NEXT: lfdx f1, r22, r8
-; CHECK-NEXT: add r23, r23, r12
-; CHECK-NEXT: add r22, r22, r12
+; CHECK-NEXT: stfdx f0, r25, r7
+; CHECK-NEXT: lfdx f0, r22, r12
+; CHECK-NEXT: lfdx f1, r23, r12
+; CHECK-NEXT: add r22, r22, r11
+; CHECK-NEXT: add r23, r23, r11
; CHECK-NEXT: xsmuldp f0, f0, f1
-; CHECK-NEXT: lfdx f1, r25, r9
+; CHECK-NEXT: lfdx f1, r26, r7
; CHECK-NEXT: xsadddp f0, f1, f0
-; CHECK-NEXT: stfdx f0, r25, r9
-; CHECK-NEXT: lfd f0, 0(r28)
-; CHECK-NEXT: lfd f1, 0(r27)
+; CHECK-NEXT: stfdx f0, r26, r7
+; CHECK-NEXT: lfd f0, 0(r27)
+; CHECK-NEXT: lfd f1, 0(r28)
; CHECK-NEXT: xsmuldp f0, f0, f1
-; CHECK-NEXT: lfdx f1, r26, r9
+; CHECK-NEXT: lfdx f1, r29, r7
; CHECK-NEXT: xsadddp f0, f1, f0
-; CHECK-NEXT: stfdx f0, r26, r9
-; CHECK-NEXT: lfdx f0, r28, r7
-; CHECK-NEXT: lfdx f1, r27, r7
+; CHECK-NEXT: stfdx f0, r29, r7
+; CHECK-NEXT: lfdx f0, r27, r0
+; CHECK-NEXT: lfdx f1, r28, r0
; CHECK-NEXT: xsmuldp f0, f0, f1
-; CHECK-NEXT: lfdx f1, r29, r9
+; CHECK-NEXT: lfdx f1, r8, r7
; CHECK-NEXT: xsadddp f0, f1, f0
-; CHECK-NEXT: stfdx f0, r29, r9
-; CHECK-NEXT: lfdx f0, r28, r8
-; CHECK-NEXT: lfdx f1, r27, r8
-; CHECK-NEXT: add r28, r28, r12
-; CHECK-NEXT: add r27, r27, r12
+; CHECK-NEXT: stfdx f0, r8, r7
+; CHECK-NEXT: lfdx f0, r27, r12
+; CHECK-NEXT: lfdx f1, r28, r12
+; CHECK-NEXT: add r27, r27, r11
+; CHECK-NEXT: add r28, r28, r11
; CHECK-NEXT: xsmuldp f0, f0, f1
-; CHECK-NEXT: lfdx f1, r30, r9
+; CHECK-NEXT: lfdx f1, r9, r7
; CHECK-NEXT: xsadddp f0, f1, f0
-; CHECK-NEXT: stfdx f0, r30, r9
-; CHECK-NEXT: add r9, r9, r12
+; CHECK-NEXT: stfdx f0, r9, r7
+; CHECK-NEXT: add r7, r7, r11
; CHECK-NEXT: bdnz .LBB7_3
; CHECK-NEXT: .LBB7_4: # %for.cond.cleanup.loopexit.unr-lcssa
-; CHECK-NEXT: ld r7, -192(r1) # 8-byte Folded Reload
; CHECK-NEXT: cmpldi r6, 0
; CHECK-NEXT: beq cr0, .LBB7_7
; CHECK-NEXT: # %bb.5: # %for.body.epil.preheader
-; CHECK-NEXT: ld r4, -184(r1) # 8-byte Folded Reload
-; CHECK-NEXT: ld r29, -160(r1) # 8-byte Folded Reload
-; CHECK-NEXT: mr r30, r3
-; CHECK-NEXT: sldi r7, r7, 3
-; CHECK-NEXT: add r4, r0, r4
-; CHECK-NEXT: sldi r4, r4, 3
-; CHECK-NEXT: add r3, r5, r4
-; CHECK-NEXT: add r8, r29, r4
-; CHECK-NEXT: add r9, r30, r4
-; CHECK-NEXT: ld r4, -176(r1) # 8-byte Folded Reload
-; CHECK-NEXT: add r4, r0, r4
-; CHECK-NEXT: sldi r4, r4, 3
-; CHECK-NEXT: add r10, r5, r4
-; CHECK-NEXT: add r11, r29, r4
-; CHECK-NEXT: add r12, r30, r4
-; CHECK-NEXT: ld r4, -168(r1) # 8-byte Folded Reload
-; CHECK-NEXT: add r4, r0, r4
-; CHECK-NEXT: sldi r0, r4, 3
-; CHECK-NEXT: add r5, r5, r0
-; CHECK-NEXT: add r4, r29, r0
-; CHECK-NEXT: add r30, r30, r0
-; CHECK-NEXT: li r0, 0
+; CHECK-NEXT: ld r3, -184(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r0, -160(r1) # 8-byte Folded Reload
+; CHECK-NEXT: sldi r8, r30, 3
+; CHECK-NEXT: add r3, r10, r3
+; CHECK-NEXT: sldi r3, r3, 3
+; CHECK-NEXT: add r7, r5, r3
+; CHECK-NEXT: add r9, r4, r3
+; CHECK-NEXT: add r11, r0, r3
+; CHECK-NEXT: ld r3, -176(r1) # 8-byte Folded Reload
+; CHECK-NEXT: add r3, r10, r3
+; CHECK-NEXT: sldi r3, r3, 3
+; CHECK-NEXT: add r12, r5, r3
+; CHECK-NEXT: add r30, r4, r3
+; CHECK-NEXT: add r29, r0, r3
+; CHECK-NEXT: ld r3, -168(r1) # 8-byte Folded Reload
+; CHECK-NEXT: add r3, r10, r3
+; CHECK-NEXT: li r10, 0
+; CHECK-NEXT: sldi r3, r3, 3
+; CHECK-NEXT: add r5, r5, r3
+; CHECK-NEXT: add r4, r4, r3
+; CHECK-NEXT: add r3, r0, r3
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: .LBB7_6: # %for.body.epil
; CHECK-NEXT: #
-; CHECK-NEXT: lfdx f0, r30, r0
-; CHECK-NEXT: lfdx f1, r4, r0
+; CHECK-NEXT: lfdx f0, r3, r10
+; CHECK-NEXT: lfdx f1, r4, r10
; CHECK-NEXT: addi r6, r6, -1
; CHECK-NEXT: cmpldi r6, 0
; CHECK-NEXT: xsmuldp f0, f0, f1
; CHECK-NEXT: lfd f1, 0(r5)
; CHECK-NEXT: xsadddp f0, f1, f0
; CHECK-NEXT: stfd f0, 0(r5)
-; CHECK-NEXT: add r5, r5, r7
-; CHECK-NEXT: lfdx f0, r12, r0
-; CHECK-NEXT: lfdx f1, r11, r0
+; CHECK-NEXT: add r5, r5, r8
+; CHECK-NEXT: lfdx f0, r29, r10
+; CHECK-NEXT: lfdx f1, r30, r10
; CHECK-NEXT: xsmuldp f0, f0, f1
-; CHECK-NEXT: lfdx f1, r10, r0
+; CHECK-NEXT: lfdx f1, r12, r10
; CHECK-NEXT: xsadddp f0, f1, f0
-; CHECK-NEXT: stfdx f0, r10, r0
-; CHECK-NEXT: lfdx f0, r9, r0
-; CHECK-NEXT: lfdx f1, r8, r0
+; CHECK-NEXT: stfdx f0, r12, r10
+; CHECK-NEXT: lfdx f0, r11, r10
+; CHECK-NEXT: lfdx f1, r9, r10
; CHECK-NEXT: xsmuldp f0, f0, f1
-; CHECK-NEXT: lfdx f1, r3, r0
+; CHECK-NEXT: lfdx f1, r7, r10
; CHECK-NEXT: xsadddp f0, f1, f0
-; CHECK-NEXT: stfdx f0, r3, r0
-; CHECK-NEXT: add r0, r0, r7
+; CHECK-NEXT: stfdx f0, r7, r10
+; CHECK-NEXT: add r10, r10, r8
; CHECK-NEXT: bne cr0, .LBB7_6
; CHECK-NEXT: .LBB7_7: # %for.cond.cleanup
; CHECK-NEXT: ld r2, -152(r1) # 8-byte Folded Reload
diff --git a/llvm/test/CodeGen/PowerPC/p10-spill-crlt.ll b/llvm/test/CodeGen/PowerPC/p10-spill-crlt.ll
index c733a01950603..4b032781c3764 100644
--- a/llvm/test/CodeGen/PowerPC/p10-spill-crlt.ll
+++ b/llvm/test/CodeGen/PowerPC/p10-spill-crlt.ll
@@ -30,16 +30,14 @@ define dso_local void @P10_Spill_CR_LT() local_unnamed_addr {
; CHECK-NEXT: mflr r0
; CHECK-NEXT: std r0, 16(r1)
; CHECK-NEXT: stw r12, 8(r1)
-; CHECK-NEXT: stdu r1, -64(r1)
-; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: stdu r1, -48(r1)
+; CHECK-NEXT: .cfi_def_cfa_offset 48
; CHECK-NEXT: .cfi_offset lr, 16
-; CHECK-NEXT: .cfi_offset r29, -24
; CHECK-NEXT: .cfi_offset r30, -16
; CHECK-NEXT: .cfi_offset cr2, 8
; CHECK-NEXT: .cfi_offset cr3, 8
; CHECK-NEXT: .cfi_offset cr4, 8
-; CHECK-NEXT: std r29, 40(r1) # 8-byte Folded Spill
-; CHECK-NEXT: std r30, 48(r1) # 8-byte Folded Spill
+; CHECK-NEXT: std r30, 32(r1) # 8-byte Folded Spill
; CHECK-NEXT: bl call_2@notoc
; CHECK-NEXT: bc 12, 4*cr5+lt, .LBB0_13
; CHECK-NEXT: # %bb.1: # %bb
@@ -67,11 +65,10 @@ define dso_local void @P10_Spill_CR_LT() local_unnamed_addr {
; CHECK-NEXT: bc 12, 4*cr3+eq, .LBB0_11
; CHECK-NEXT: # %bb.6: # %bb32
; CHECK-NEXT: #
+; CHECK-NEXT: rlwinm r30, r30, 0, 24, 22
; CHECK-NEXT: andi. r3, r30, 2
-; CHECK-NEXT: rlwinm r29, r30, 0, 24, 22
; CHECK-NEXT: mcrf cr2, cr0
; CHECK-NEXT: bl call_4@notoc
-; CHECK-NEXT: mr r30, r29
; CHECK-NEXT: beq+ cr2, .LBB0_3
; CHECK-NEXT: # %bb.7: # %bb37
; CHECK-NEXT: .LBB0_8: # %bb22
@@ -92,13 +89,11 @@ define dso_local void @P10_Spill_CR_LT() local_unnamed_addr {
; CHECK-BE-NEXT: stdu r1, -144(r1)
; CHECK-BE-NEXT: .cfi_def_cfa_offset 144
; CHECK-BE-NEXT: .cfi_offset lr, 16
-; CHECK-BE-NEXT: .cfi_offset r28, -32
; CHECK-BE-NEXT: .cfi_offset r29, -24
; CHECK-BE-NEXT: .cfi_offset r30, -16
; CHECK-BE-NEXT: .cfi_offset cr2, 8
; CHECK-BE-NEXT: .cfi_offset cr2, 8
; CHECK-BE-NEXT: .cfi_offset cr2, 8
-; CHECK-BE-NEXT: std r28, 112(r1) # 8-byte Folded Spill
; CHECK-BE-NEXT: std r29, 120(r1) # 8-byte Folded Spill
; CHECK-BE-NEXT: std r30, 128(r1) # 8-byte Folded Spill
; CHECK-BE-NEXT: bl call_2
@@ -131,12 +126,11 @@ define dso_local void @P10_Spill_CR_LT() local_unnamed_addr {
; CHECK-BE-NEXT: bc 12, 4*cr3+eq, .LBB0_11
; CHECK-BE-NEXT: # %bb.6: # %bb32
; CHECK-BE-NEXT: #
+; CHECK-BE-NEXT: rlwinm r29, r29, 0, 24, 22
; CHECK-BE-NEXT: andi. r3, r29, 2
-; CHECK-BE-NEXT: rlwinm r28, r29, 0, 24, 22
; CHECK-BE-NEXT: mcrf cr2, cr0
; CHECK-BE-NEXT: bl call_4
; CHECK-BE-NEXT: nop
-; CHECK-BE-NEXT: mr r29, r28
; CHECK-BE-NEXT: beq+ cr2, .LBB0_3
; CHECK-BE-NEXT: # %bb.7: # %bb37
; CHECK-BE-NEXT: .LBB0_8: # %bb22
diff --git a/llvm/test/Transforms/LICM/hoist-binop.ll b/llvm/test/Transforms/LICM/hoist-binop.ll
deleted f...
[truncated]
|
…lvm#81608)" This reverts commit f2ccf80. The flag propagation code is incorrect. (cherry picked from commit b48819d)
@nikic (or anyone else). If you would like to add a note about this fix in the release notes (completely optional). Please reply to this comment with a one or two sentence description of the fix. When you are done, please add the release:note label to this PR. |
Backport b48819d
Requested by: @nikic