-
Notifications
You must be signed in to change notification settings - Fork 12.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[SimplifyCFG] When only one case value is missing, replace default with that case #76669
Conversation
@llvm/pr-subscribers-llvm-transforms Author: Quentin Dian (DianQK) ChangesCloses #73446. Full diff: https://github.com/llvm/llvm-project/pull/76669.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 55e375670cc61e..a808861e97f3b7 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -5414,11 +5414,13 @@ static bool CasesAreContiguous(SmallVectorImpl<ConstantInt *> &Cases) {
}
static void createUnreachableSwitchDefault(SwitchInst *Switch,
- DomTreeUpdater *DTU) {
+ DomTreeUpdater *DTU,
+ bool RemoveOrigDefaultBlock = true) {
LLVM_DEBUG(dbgs() << "SimplifyCFG: switch default is dead.\n");
auto *BB = Switch->getParent();
auto *OrigDefaultBlock = Switch->getDefaultDest();
- OrigDefaultBlock->removePredecessor(BB);
+ if (RemoveOrigDefaultBlock)
+ OrigDefaultBlock->removePredecessor(BB);
BasicBlock *NewDefaultBlock = BasicBlock::Create(
BB->getContext(), BB->getName() + ".unreachabledefault", BB->getParent(),
OrigDefaultBlock);
@@ -5427,7 +5429,8 @@ static void createUnreachableSwitchDefault(SwitchInst *Switch,
if (DTU) {
SmallVector<DominatorTree::UpdateType, 2> Updates;
Updates.push_back({DominatorTree::Insert, BB, &*NewDefaultBlock});
- if (!is_contained(successors(BB), OrigDefaultBlock))
+ if (RemoveOrigDefaultBlock &&
+ !is_contained(successors(BB), OrigDefaultBlock))
Updates.push_back({DominatorTree::Delete, BB, &*OrigDefaultBlock});
DTU->applyUpdates(Updates);
}
@@ -5609,10 +5612,30 @@ static bool eliminateDeadSwitchCases(SwitchInst *SI, DomTreeUpdater *DTU,
Known.getBitWidth() - (Known.Zero | Known.One).popcount();
assert(NumUnknownBits <= Known.getBitWidth());
if (HasDefault && DeadCases.empty() &&
- NumUnknownBits < 64 /* avoid overflow */ &&
- SI->getNumCases() == (1ULL << NumUnknownBits)) {
- createUnreachableSwitchDefault(SI, DTU);
- return true;
+ NumUnknownBits < 64 /* avoid overflow */) {
+ uint64_t AllNumCases = 1ULL << NumUnknownBits;
+ if (SI->getNumCases() == AllNumCases) {
+ createUnreachableSwitchDefault(SI, DTU);
+ return true;
+ }
+ // When only one case value is missing, replace default with that case.
+ if (SI->getNumCases() == AllNumCases - 1) {
+ uint64_t MissingCaseVal = 0;
+ for (const auto &Case : SI->cases())
+ MissingCaseVal ^= Case.getCaseValue()->getValue().getLimitedValue();
+ for (uint64_t I = 0; I < AllNumCases; I++)
+ MissingCaseVal ^= I;
+ auto *MissingCase =
+ cast<ConstantInt>(ConstantInt::get(Cond->getType(), MissingCaseVal));
+ SI->addCase(MissingCase, SI->getDefaultDest());
+ SwitchInstProfUpdateWrapper SIW(*SI);
+ auto DefaultCaseWeight = SIW.getSuccessorWeight(0);
+ SIW.setSuccessorWeight(
+ SI->findCaseValue(MissingCase)->getSuccessorIndex(),
+ DefaultCaseWeight);
+ createUnreachableSwitchDefault(SI, DTU, false);
+ return true;
+ }
}
if (DeadCases.empty())
diff --git a/llvm/test/Transforms/SimplifyCFG/switch-dead-default.ll b/llvm/test/Transforms/SimplifyCFG/switch-dead-default.ll
index 1662bb99f27bcc..c0893a4893bfc6 100644
--- a/llvm/test/Transforms/SimplifyCFG/switch-dead-default.ll
+++ b/llvm/test/Transforms/SimplifyCFG/switch-dead-default.ll
@@ -77,14 +77,14 @@ default:
ret void
}
-; This one is a negative test - we know the value of the default,
-; but that's about it
+; We can replace the default branch with case 3 since it is the only case that is missing.
define void @test3(i2 %a) {
; CHECK-LABEL: @test3(
-; CHECK-NEXT: switch i2 [[A:%.*]], label [[DEFAULT:%.*]] [
-; CHECK-NEXT: i2 0, label [[CASE0:%.*]]
-; CHECK-NEXT: i2 1, label [[CASE1:%.*]]
-; CHECK-NEXT: i2 -2, label [[CASE2:%.*]]
+; CHECK-NEXT: switch i2 [[A:%.*]], label [[DOTUNREACHABLEDEFAULT:%.*]] [
+; CHECK-NEXT: i2 0, label [[CASE0:%.*]]
+; CHECK-NEXT: i2 1, label [[CASE1:%.*]]
+; CHECK-NEXT: i2 -2, label [[CASE2:%.*]]
+; CHECK-NEXT: i2 -1, label [[DEFAULT:%.*]]
; CHECK-NEXT: ]
; CHECK: common.ret:
; CHECK-NEXT: ret void
@@ -97,6 +97,8 @@ define void @test3(i2 %a) {
; CHECK: case2:
; CHECK-NEXT: call void @foo(i32 2)
; CHECK-NEXT: br label [[COMMON_RET]]
+; CHECK: .unreachabledefault:
+; CHECK-NEXT: unreachable
; CHECK: default:
; CHECK-NEXT: call void @foo(i32 3)
; CHECK-NEXT: br label [[COMMON_RET]]
|
Emm, this patch seems to increase the inline cost estimation of switch insts. Not sure it is a regression. |
44b251b
to
4afd32e
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM. Please wait for additional approval from other reviewers.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What's the purpose of adding the profile-data to the test case that's been updated, does it not work without that?
The original author apparently wished there to be a negative test case, if it's possible to keep a negative case and just add another test function for the new behaviour, IMHO that's preferred. (Perhaps that's not possible, but it would mean that the test coverage strictly improves).
The change here looks good -- however all the rationale for why this change is being made is in the rust bug report you link to. Could you please elaborate the comment on the added code ("When only one case value is missing...") to explain why this is a good thing to do. There's a risk that someone in the future tries to delete the code if it's not clear to them why it's beneficial.
To confirm my own understanding: adding an unreachable default is fine because we know the bits being switched upon are in a small finite set of cases; the new form has a better structure / is easier to codegen for? If so, sounds like a good change, LGTM.
4afd32e
to
4e93999
Compare
This is just to verify that we are updating the profile-data correctly. I've added a test case without profile-data.
As I was adding test cases, I found this one that needs to be updated. Retrieved from 98a2dab, cc @preames.
I have added detailed explanations in PR.
Yes, we no longer need to think about the default branch. |
4e93999
to
a77ed9d
Compare
I'd suggest adding a test variant that allows conversion to lookup table, as that's where we expect to see the main improvement. |
a77ed9d
to
cdeee33
Compare
I added two test variants. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
llvm/test/Transforms/SimplifyCFG/X86/switch-dead-default-lookup-table.ll
Outdated
Show resolved
Hide resolved
…th that case Eliminating the default branch will provide more opportunities for optimization, such as lookup tables.
cdeee33
to
878c1f4
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM; the test consideration was just to see if there was an obvious way to preserve existing coverage, it sounds like that's not easy or maybe possible, so never mind.
actually, this seems to be uncovering some bad behavior during codegen, still investigating |
To expound a bit on this - the test case we're looking at goes from 300MB to 18GB peak RAM usage. 81% of that peak memory usage is coming from SSAUpdaterImpl. (TailDuplicateBase, TailDuplicator, MachineSSAUpdater) |
& the test case in question has a rather long/machine generated switch statement - so something related to that is probably triggering some pathology |
Could you share a test case? This looks like a different issue than #77831? |
Will share as soon as we have something shareable - it's proving a bit difficult to reduce something to a shareable/non-private thing. We're hoping that by sharing the info we can it might be enough to motivate a revert or help inspire you/others to figure out the bug and/or guess at a representative test case yourself in the mean time. But, yeah, we'll keep working on something shareable in the mean time. |
I am adding here a reproducer where we see a 5x memory increase due to this commit. To reproduce: Could you please revert? |
I did finally get something a bit smaller (exact file attached)
|
If it does block something important, that's fine. :) But I'll prioritize investigating this one. I think this example could probably be reduced to two switch statements. (Perhaps there are multiple issues here.) I can also manually change the default branch to unreachable to reproduce in clang 17. e.g. case 127: out1 = b[2] >> 31; break; \
default: __builtin_unreachable(); break; \ |
…fault with that case (#76669)" This reverts commit 7d81e07, which introduces a compiler memory usage regression. See #76669 (comment)
I've prepared #78469 to revert both commits. Waiting for premerge checks. |
…the Condition is Too Wide" (#78469) Reverts #77831, which depends on #76669, which seriously regresses compilation time / memory usage see #76669 (comment).
Certainly plausible - I tried my best to reduce it as much as possible with both automatic and manual reduction - but no doubt there's more that could be done (yeah, didn't try merging the switches to just make larger switches) & I'd set my boundary at 10x memory usage, so the problem probably occurs at smaller amounts - but wanted to make sure something that made the issue clear/wasn't only marginally problematic.
Yeah, since this wasn't in the area you changed, it's presumably a case where this change caused some IR that tickles an existing problem in another pass (tail deduplication) - so, yeah, that it's reachable before your change through other codepaths sounds plausible/believable too. But causing the problem to show up more often might be enough reason to hold off on this change until the follow-on issue might be resolved or at least worked around. |
This example will make the problem more obvious than the two switch statements. I'll create an issue that using a small case and link back to this one.
Yeah, I often see commits being reverted. :) |
…the Condition is Too Wide" (llvm#78469) Reverts llvm#77831, which depends on llvm#76669, which seriously regresses compilation time / memory usage see llvm#76669 (comment).
…ition is Too Wide (llvm#77831) llvm#76669 taught SimplifyCFG to handle switches when `default` has only one case. When the `switch`'s condition is wider than 64 bit, the current implementation can calculate the wrong default value. This PR skips cases where the condition is too wide.
…fault with that case (#76669)" When the default branch is the last case, we can transform that branch into a concrete branch with an unreachable default branch. ```llvm target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" define i64 @src(i64 %0) { %2 = urem i64 %0, 4 switch i64 %2, label %5 [ i64 1, label %3 i64 2, label %3 i64 3, label %4 ] 3: ; preds = %1, %1 br label %5 4: ; preds = %1 br label %5 5: ; preds = %1, %4, %3 %.0 = phi i64 [ 2, %4 ], [ 1, %3 ], [ 0, %1 ] ret i64 %.0 } define i64 @tgt(i64 %0) { %2 = urem i64 %0, 4 switch i64 %2, label %unreachable [ i64 0, label %5 i64 1, label %3 i64 2, label %3 i64 3, label %4 ] unreachable: ; preds = %1 unreachable 3: ; preds = %1, %1 br label %5 4: ; preds = %1 br label %5 5: ; preds = %1, %4, %3 %.0 = phi i64 [ 2, %4 ], [ 1, %3 ], [ 0, %1 ] ret i64 %.0 } ``` Alive2: https://alive2.llvm.org/ce/z/Y-PGXv After transform to a lookup table, I believe `tgt` is better code. The final instructions are as follows: ```asm src: # @src and edi, 3 lea rax, [rdi - 1] cmp rax, 2 ja .LBB0_1 mov rax, qword ptr [8*rdi + .Lswitch.table.src-8] ret .LBB0_1: xor eax, eax ret tgt: # @tgt and edi, 3 mov rax, qword ptr [8*rdi + .Lswitch.table.tgt] ret .Lswitch.table.src: .quad 1 # 0x1 .quad 1 # 0x1 .quad 2 # 0x2 .Lswitch.table.tgt: .quad 0 # 0x0 .quad 1 # 0x1 .quad 1 # 0x1 .quad 2 # 0x2 ``` Godbolt: https://llvm.godbolt.org/z/borME8znd Closes #73446. (cherry picked from commit 7d81e07)
…the Condition is Too Wide (#77831)" #76669 taught SimplifyCFG to handle switches when `default` has only one case. When the `switch`'s condition is wider than 64 bit, the current implementation can calculate the wrong default value. This PR skips cases where the condition is too wide. (cherry picked from commit 39bb790)
When the default branch is the last case, we can transform that branch into a concrete branch with an unreachable default branch.
Alive2: https://alive2.llvm.org/ce/z/Y-PGXv
After transform to a lookup table, I believe
tgt
is better code.The final instructions are as follows:
Godbolt: https://llvm.godbolt.org/z/borME8znd
Closes #73446.