-
Notifications
You must be signed in to change notification settings - Fork 12.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[CVP] Check whether the default case is reachable #79993
Conversation
@llvm/pr-subscribers-llvm-transforms Author: Yingwei Zheng (dtcxzyw) ChangesThis patch eliminates unreachable default cases using context-sensitive range information. Full diff: https://github.com/llvm/llvm-project/pull/79993.diff 3 Files Affected:
diff --git a/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
index 9235850de92f3..e1a5d83e4b96c 100644
--- a/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
+++ b/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
@@ -371,6 +371,7 @@ static bool processSwitch(SwitchInst *I, LazyValueInfo *LVI,
{ // Scope for SwitchInstProfUpdateWrapper. It must not live during
// ConstantFoldTerminator() as the underlying SwitchInst can be changed.
SwitchInstProfUpdateWrapper SI(*I);
+ unsigned ReachableCaseCount = 0;
for (auto CI = SI->case_begin(), CE = SI->case_end(); CI != CE;) {
ConstantInt *Case = CI->getCaseValue();
@@ -407,6 +408,35 @@ static bool processSwitch(SwitchInst *I, LazyValueInfo *LVI,
// Increment the case iterator since we didn't delete it.
++CI;
+ ++ReachableCaseCount;
+ }
+
+ BasicBlock *DefaultDest = SI->getDefaultDest();
+ if (!isa<UnreachableInst>(DefaultDest->getFirstNonPHIOrDbg())) {
+ ConstantRange CR = LVI->getConstantRangeAtUse(I->getOperandUse(0),
+ /*UndefAllowed*/ false);
+ // The default dest is unreachable if all cases are covered.
+ if (!CR.isSizeLargerThan(ReachableCaseCount)) {
+ BasicBlock *NewUnreachableBB =
+ BasicBlock::Create(BB->getContext(), "default.unreachable",
+ BB->getParent(), DefaultDest);
+ new UnreachableInst(BB->getContext(), NewUnreachableBB);
+
+ bool RemoveOldBB = --SuccessorsCount[DefaultDest] == 0;
+
+ if (RemoveOldBB)
+ DefaultDest->removePredecessor(BB);
+ SI->setDefaultDest(NewUnreachableBB);
+
+ if (RemoveOldBB)
+ DTU.applyUpdatesPermissive(
+ {{DominatorTree::Delete, BB, DefaultDest}});
+ DTU.applyUpdatesPermissive(
+ {{DominatorTree::Insert, BB, NewUnreachableBB}});
+
+ ++NumDeadCases;
+ Changed = true;
+ }
}
}
@@ -1227,6 +1257,7 @@ CorrelatedValuePropagationPass::run(Function &F, FunctionAnalysisManager &AM) {
if (!Changed) {
PA = PreservedAnalyses::all();
} else {
+ assert(DT->verify());
PA.preserve<DominatorTreeAnalysis>();
PA.preserve<LazyValueAnalysis>();
}
diff --git a/llvm/test/Transforms/CorrelatedValuePropagation/basic.ll b/llvm/test/Transforms/CorrelatedValuePropagation/basic.ll
index 6227a5c822b10..9941d4c070d39 100644
--- a/llvm/test/Transforms/CorrelatedValuePropagation/basic.ll
+++ b/llvm/test/Transforms/CorrelatedValuePropagation/basic.ll
@@ -442,7 +442,7 @@ define i32 @switch_range(i32 %cond) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[S:%.*]] = urem i32 [[COND:%.*]], 3
; CHECK-NEXT: [[S1:%.*]] = add nuw nsw i32 [[S]], 1
-; CHECK-NEXT: switch i32 [[S1]], label [[UNREACHABLE:%.*]] [
+; CHECK-NEXT: switch i32 [[S1]], label [[DEFAULT_UNREACHABLE:%.*]] [
; CHECK-NEXT: i32 1, label [[EXIT1:%.*]]
; CHECK-NEXT: i32 2, label [[EXIT2:%.*]]
; CHECK-NEXT: i32 3, label [[EXIT1]]
@@ -451,6 +451,8 @@ define i32 @switch_range(i32 %cond) {
; CHECK-NEXT: ret i32 1
; CHECK: exit2:
; CHECK-NEXT: ret i32 2
+; CHECK: default.unreachable:
+; CHECK-NEXT: unreachable
; CHECK: unreachable:
; CHECK-NEXT: ret i32 0
;
@@ -513,10 +515,9 @@ define i8 @switch_defaultdest_multipleuse(i8 %t0) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[O:%.*]] = or i8 [[T0:%.*]], 1
; CHECK-NEXT: [[R:%.*]] = srem i8 1, [[O]]
-; CHECK-NEXT: switch i8 [[R]], label [[EXIT:%.*]] [
-; CHECK-NEXT: i8 0, label [[EXIT]]
-; CHECK-NEXT: i8 1, label [[EXIT]]
-; CHECK-NEXT: ]
+; CHECK-NEXT: br label [[EXIT:%.*]]
+; CHECK: default.unreachable:
+; CHECK-NEXT: unreachable
; CHECK: exit:
; CHECK-NEXT: ret i8 0
;
diff --git a/llvm/test/Transforms/CorrelatedValuePropagation/switch.ll b/llvm/test/Transforms/CorrelatedValuePropagation/switch.ll
new file mode 100644
index 0000000000000..4d396b5422401
--- /dev/null
+++ b/llvm/test/Transforms/CorrelatedValuePropagation/switch.ll
@@ -0,0 +1,299 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt < %s -passes=correlated-propagation -S | FileCheck %s
+
+define i32 @test_unreachable_default(i32 noundef %num) {
+; CHECK-LABEL: define i32 @test_unreachable_default(
+; CHECK-SAME: i32 noundef [[NUM:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[SUB:%.*]] = add i32 [[NUM]], -120
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[SUB]], 3
+; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP]], i32 [[SUB]], i32 2
+; CHECK-NEXT: switch i32 [[COND]], label [[DEFAULT_UNREACHABLE:%.*]] [
+; CHECK-NEXT: i32 0, label [[SW_BB:%.*]]
+; CHECK-NEXT: i32 1, label [[SW_BB2:%.*]]
+; CHECK-NEXT: i32 2, label [[SW_BB4:%.*]]
+; CHECK-NEXT: ]
+; CHECK: sw.bb:
+; CHECK-NEXT: [[CALL:%.*]] = call i32 @call0()
+; CHECK-NEXT: br label [[CLEANUP:%.*]]
+; CHECK: sw.bb2:
+; CHECK-NEXT: [[CALL3:%.*]] = call i32 @call1()
+; CHECK-NEXT: br label [[CLEANUP]]
+; CHECK: sw.bb4:
+; CHECK-NEXT: [[CALL5:%.*]] = call i32 @call2()
+; CHECK-NEXT: br label [[CLEANUP]]
+; CHECK: default.unreachable:
+; CHECK-NEXT: unreachable
+; CHECK: sw.default:
+; CHECK-NEXT: [[CALL6:%.*]] = call i32 @call3()
+; CHECK-NEXT: br label [[CLEANUP]]
+; CHECK: cleanup:
+; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i32 [ [[CALL6]], [[SW_DEFAULT:%.*]] ], [ [[CALL5]], [[SW_BB4]] ], [ [[CALL3]], [[SW_BB2]] ], [ [[CALL]], [[SW_BB]] ]
+; CHECK-NEXT: ret i32 [[RETVAL_0]]
+;
+entry:
+ %sub = add i32 %num, -120
+ %cmp = icmp ult i32 %sub, 3
+ %cond = select i1 %cmp, i32 %sub, i32 2
+ switch i32 %cond, label %sw.default [
+ i32 0, label %sw.bb
+ i32 1, label %sw.bb2
+ i32 2, label %sw.bb4
+ ]
+
+sw.bb:
+ %call = call i32 @call0()
+ br label %cleanup
+
+sw.bb2:
+ %call3 = call i32 @call1()
+ br label %cleanup
+
+sw.bb4:
+ %call5 = call i32 @call2()
+ br label %cleanup
+
+sw.default:
+ %call6 = call i32 @call3()
+ br label %cleanup
+
+cleanup:
+ %retval.0 = phi i32 [ %call6, %sw.default ], [ %call5, %sw.bb4 ], [ %call3, %sw.bb2 ], [ %call, %sw.bb ]
+ ret i32 %retval.0
+}
+
+define i32 @test_unreachable_default_shared_edge(i32 noundef %num) {
+; CHECK-LABEL: define i32 @test_unreachable_default_shared_edge(
+; CHECK-SAME: i32 noundef [[NUM:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[SUB:%.*]] = add i32 [[NUM]], -120
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[SUB]], 3
+; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP]], i32 [[SUB]], i32 2
+; CHECK-NEXT: switch i32 [[COND]], label [[DEFAULT_UNREACHABLE:%.*]] [
+; CHECK-NEXT: i32 0, label [[SW_BB:%.*]]
+; CHECK-NEXT: i32 1, label [[SW_BB2:%.*]]
+; CHECK-NEXT: i32 2, label [[SW_BB4:%.*]]
+; CHECK-NEXT: ]
+; CHECK: sw.bb:
+; CHECK-NEXT: [[CALL:%.*]] = call i32 @call0()
+; CHECK-NEXT: br label [[CLEANUP:%.*]]
+; CHECK: sw.bb2:
+; CHECK-NEXT: [[CALL3:%.*]] = call i32 @call1()
+; CHECK-NEXT: br label [[CLEANUP]]
+; CHECK: default.unreachable:
+; CHECK-NEXT: unreachable
+; CHECK: sw.bb4:
+; CHECK-NEXT: [[CALL5:%.*]] = call i32 @call2()
+; CHECK-NEXT: br label [[CLEANUP]]
+; CHECK: cleanup:
+; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i32 [ [[CALL5]], [[SW_BB4]] ], [ [[CALL3]], [[SW_BB2]] ], [ [[CALL]], [[SW_BB]] ]
+; CHECK-NEXT: ret i32 [[RETVAL_0]]
+;
+entry:
+ %sub = add i32 %num, -120
+ %cmp = icmp ult i32 %sub, 3
+ %cond = select i1 %cmp, i32 %sub, i32 2
+ switch i32 %cond, label %sw.bb4 [
+ i32 0, label %sw.bb
+ i32 1, label %sw.bb2
+ i32 2, label %sw.bb4
+ ]
+
+sw.bb:
+ %call = call i32 @call0()
+ br label %cleanup
+
+sw.bb2:
+ %call3 = call i32 @call1()
+ br label %cleanup
+
+sw.bb4:
+ %call5 = call i32 @call2()
+ br label %cleanup
+
+cleanup:
+ %retval.0 = phi i32 [ %call5, %sw.bb4 ], [ %call3, %sw.bb2 ], [ %call, %sw.bb ]
+ ret i32 %retval.0
+}
+
+; Negative tests
+
+define i32 @test_reachable_default(i32 noundef %num) {
+; CHECK-LABEL: define i32 @test_reachable_default(
+; CHECK-SAME: i32 noundef [[NUM:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[SUB:%.*]] = add i32 [[NUM]], -120
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[SUB]], 3
+; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP]], i32 [[SUB]], i32 4
+; CHECK-NEXT: switch i32 [[COND]], label [[SW_DEFAULT:%.*]] [
+; CHECK-NEXT: i32 0, label [[SW_BB:%.*]]
+; CHECK-NEXT: i32 1, label [[SW_BB2:%.*]]
+; CHECK-NEXT: i32 2, label [[SW_BB4:%.*]]
+; CHECK-NEXT: ]
+; CHECK: sw.bb:
+; CHECK-NEXT: [[CALL:%.*]] = call i32 @call0()
+; CHECK-NEXT: br label [[CLEANUP:%.*]]
+; CHECK: sw.bb2:
+; CHECK-NEXT: [[CALL3:%.*]] = call i32 @call1()
+; CHECK-NEXT: br label [[CLEANUP]]
+; CHECK: sw.bb4:
+; CHECK-NEXT: [[CALL5:%.*]] = call i32 @call2()
+; CHECK-NEXT: br label [[CLEANUP]]
+; CHECK: sw.default:
+; CHECK-NEXT: [[CALL6:%.*]] = call i32 @call3()
+; CHECK-NEXT: br label [[CLEANUP]]
+; CHECK: cleanup:
+; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i32 [ [[CALL6]], [[SW_DEFAULT]] ], [ [[CALL5]], [[SW_BB4]] ], [ [[CALL3]], [[SW_BB2]] ], [ [[CALL]], [[SW_BB]] ]
+; CHECK-NEXT: ret i32 [[RETVAL_0]]
+;
+entry:
+ %sub = add i32 %num, -120
+ %cmp = icmp ult i32 %sub, 3
+ %cond = select i1 %cmp, i32 %sub, i32 4
+ switch i32 %cond, label %sw.default [
+ i32 0, label %sw.bb
+ i32 1, label %sw.bb2
+ i32 2, label %sw.bb4
+ ]
+
+sw.bb:
+ %call = call i32 @call0()
+ br label %cleanup
+
+sw.bb2:
+ %call3 = call i32 @call1()
+ br label %cleanup
+
+sw.bb4:
+ %call5 = call i32 @call2()
+ br label %cleanup
+
+sw.default:
+ %call6 = call i32 @call3()
+ br label %cleanup
+
+cleanup:
+ %retval.0 = phi i32 [ %call6, %sw.default ], [ %call5, %sw.bb4 ], [ %call3, %sw.bb2 ], [ %call, %sw.bb ]
+ ret i32 %retval.0
+}
+
+define i32 @test_unreachable_default_cond_may_be_undef(i32 %num) {
+; CHECK-LABEL: define i32 @test_unreachable_default_cond_may_be_undef(
+; CHECK-SAME: i32 [[NUM:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[SUB:%.*]] = add i32 [[NUM]], -120
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[SUB]], 3
+; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP]], i32 [[SUB]], i32 2
+; CHECK-NEXT: switch i32 [[COND]], label [[SW_DEFAULT:%.*]] [
+; CHECK-NEXT: i32 0, label [[SW_BB:%.*]]
+; CHECK-NEXT: i32 1, label [[SW_BB2:%.*]]
+; CHECK-NEXT: i32 2, label [[SW_BB4:%.*]]
+; CHECK-NEXT: ]
+; CHECK: sw.bb:
+; CHECK-NEXT: [[CALL:%.*]] = call i32 @call0()
+; CHECK-NEXT: br label [[CLEANUP:%.*]]
+; CHECK: sw.bb2:
+; CHECK-NEXT: [[CALL3:%.*]] = call i32 @call1()
+; CHECK-NEXT: br label [[CLEANUP]]
+; CHECK: sw.bb4:
+; CHECK-NEXT: [[CALL5:%.*]] = call i32 @call2()
+; CHECK-NEXT: br label [[CLEANUP]]
+; CHECK: sw.default:
+; CHECK-NEXT: [[CALL6:%.*]] = call i32 @call3()
+; CHECK-NEXT: br label [[CLEANUP]]
+; CHECK: cleanup:
+; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i32 [ [[CALL6]], [[SW_DEFAULT]] ], [ [[CALL5]], [[SW_BB4]] ], [ [[CALL3]], [[SW_BB2]] ], [ [[CALL]], [[SW_BB]] ]
+; CHECK-NEXT: ret i32 [[RETVAL_0]]
+;
+entry:
+ %sub = add i32 %num, -120
+ %cmp = icmp ult i32 %sub, 3
+ %cond = select i1 %cmp, i32 %sub, i32 2
+ switch i32 %cond, label %sw.default [
+ i32 0, label %sw.bb
+ i32 1, label %sw.bb2
+ i32 2, label %sw.bb4
+ ]
+
+sw.bb:
+ %call = call i32 @call0()
+ br label %cleanup
+
+sw.bb2:
+ %call3 = call i32 @call1()
+ br label %cleanup
+
+sw.bb4:
+ %call5 = call i32 @call2()
+ br label %cleanup
+
+sw.default:
+ %call6 = call i32 @call3()
+ br label %cleanup
+
+cleanup:
+ %retval.0 = phi i32 [ %call6, %sw.default ], [ %call5, %sw.bb4 ], [ %call3, %sw.bb2 ], [ %call, %sw.bb ]
+ ret i32 %retval.0
+}
+
+define i32 @test_default_is_already_unreachable(i32 %num) {
+; CHECK-LABEL: define i32 @test_default_is_already_unreachable(
+; CHECK-SAME: i32 [[NUM:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[SUB:%.*]] = add i32 [[NUM]], -120
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[SUB]], 3
+; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP]], i32 [[SUB]], i32 2
+; CHECK-NEXT: switch i32 [[COND]], label [[SW_DEFAULT:%.*]] [
+; CHECK-NEXT: i32 0, label [[SW_BB:%.*]]
+; CHECK-NEXT: i32 1, label [[SW_BB2:%.*]]
+; CHECK-NEXT: i32 2, label [[SW_BB4:%.*]]
+; CHECK-NEXT: ]
+; CHECK: sw.bb:
+; CHECK-NEXT: [[CALL:%.*]] = call i32 @call0()
+; CHECK-NEXT: br label [[CLEANUP:%.*]]
+; CHECK: sw.bb2:
+; CHECK-NEXT: [[CALL3:%.*]] = call i32 @call1()
+; CHECK-NEXT: br label [[CLEANUP]]
+; CHECK: sw.bb4:
+; CHECK-NEXT: [[CALL5:%.*]] = call i32 @call2()
+; CHECK-NEXT: br label [[CLEANUP]]
+; CHECK: sw.default:
+; CHECK-NEXT: unreachable
+; CHECK: cleanup:
+; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i32 [ [[CALL5]], [[SW_BB4]] ], [ [[CALL3]], [[SW_BB2]] ], [ [[CALL]], [[SW_BB]] ]
+; CHECK-NEXT: ret i32 [[RETVAL_0]]
+;
+entry:
+ %sub = add i32 %num, -120
+ %cmp = icmp ult i32 %sub, 3
+ %cond = select i1 %cmp, i32 %sub, i32 2
+ switch i32 %cond, label %sw.default [
+ i32 0, label %sw.bb
+ i32 1, label %sw.bb2
+ i32 2, label %sw.bb4
+ ]
+
+sw.bb:
+ %call = call i32 @call0()
+ br label %cleanup
+
+sw.bb2:
+ %call3 = call i32 @call1()
+ br label %cleanup
+
+sw.bb4:
+ %call5 = call i32 @call2()
+ br label %cleanup
+
+sw.default:
+ unreachable
+
+cleanup:
+ %retval.0 = phi i32 [ %call5, %sw.bb4 ], [ %call3, %sw.bb2 ], [ %call, %sw.bb ]
+ ret i32 %retval.0
+}
+
+declare i32 @call0()
+declare i32 @call1()
+declare i32 @call2()
+declare i32 @call3()
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
Heads-up - this might be the reason for the significant increase of generated protocol buffers compilation times (~5x). We are looking for a reproducer, but maybe you have ideas right away? |
Well, actually it's not a 5x increase, it's an increase from ~7s to "I got bored to wait" minutes (thus, either a superlinear from code size or an infinite loop). I ran clang under
|
Probably another instance of #78582. |
It finally finished after 1031s (increased from 7s before this patch). |
A bit longer run under perf gives:
|
Could you please share the reproducer? |
I've been trying to prepare a shareable test case, but so far I only have a few megabytes of C++ with internal code. |
Could you check the difference between your code and #78578? |
The original code is coming from a different file from what @dwblaikie used to prepare the reduced example in #76669 (comment). |
I've got an example IR that shows the compile time regression (not as dramatic as 7->1000 s, but still around 25x increase):
|
Hopefully, this helps understanding what's wrong. Please let me know if you have ideas on how to proceed or need a longer time to resolve the problem. |
When I'm on vacation, I usually do something in the evening. If you want to revert it, you'd better ask @dtcxzyw. @dtcxzyw should be more familiar with the backend than I am. Perhaps he would like to look into this as well (but I'm sure he's on vacation as well). Happy too! :3 |
I'll add you to #78582, and I'll update the follow-up here. The similarity between the two IRs is that the replicated BBs have many predecessors and successors. It's just that this IR is represented as a loop. (This number has also increased from 128 to 1547.) |
I fixed the regression by changing the default case from |
An “arbitrarily meaningless” BB? |
Yeah :) |
I confirmed that the regression can be fixed by #78582. |
I have considered a similar approach in my optimization PR. I'm pretty sure this has to be the wrong approach. Since unreachable provides more facts, this will definitely help with the final machine code. Also see #78582 (comment). |
I'm just saying that the key to this problem is the unreachable default case. |
I get it. Please ignore me. :3 |
I found this PR does the same thing as https://reviews.llvm.org/D106056. |
Hi, this is causing quite a significant number of failures in our internal testing - many protocol buffer generated files are timing out. We really appreciate it if you submit the fix soon or revert this until the fix is ready. Thanks! |
Potential fix: #78582 |
Yes, I see, but reading through comments, I don't understand how close it is to being submitted. And we really need to mitigate the issue soon. |
That is fair. I will revert it today. |
This reverts commit a034e65.
Thank you! We appreciate it. |
I'm also seeing an issue from this patch. I have a switch with all values 0-126 and a default. All 128 cases jump to basic blocks that contain a branch to a common merge point. The common merge point contains a phi that picks a constant based on how it was reached. I believe this constant is the original switch value + 5. Except for the default case which is 0. This makes it a candidate for This patch determines the default case is unreachable. Correction, it turns out I had modified JumpThreading in my downstream to prevent it from threading this kind of lookup table. I guess the unreachable broke my modified JumpThreading. So this isn't a regression upstream. |
#78582 should have unlocked this PR. |
This patch eliminates unreachable default cases using context-sensitive range information.
…96089) This patch reverts #81585 as #78582 has been landed. Now clang works well with reproducer #79993 (comment).
…" (llvm#96089) This patch reverts llvm#81585 as llvm#78582 has been landed. Now clang works well with reproducer llvm#79993 (comment).
This patch eliminates unreachable default cases using context-sensitive range information.
Related patch: #76295