[SimplifyCFG] Simplify uncond br with icmp & select (#165580)
Previously, SimplifyCFG only simplified unconditional branches when they
met a pattern (`swicth` -> `icmp` -> `br` -> `phi`) as follows:
```LLVM
switch i8 %A, label %DEFAULT [ i8 1, label %end i8 2, label %end ]
DEFAULT:
%tmp = icmp eq i8 %A, 92
br label %end
end:
... = phi i1 [ true, %entry ], [ %tmp, %DEFAULT ], [ true, %entry ]
```
This PR supports a new and more generic pattern (`switch` -> `icmp` ->
`select` -> `br` -> `phi` ) to simplify unconditional branches as
follows:
```LLVM
; BEFORE
case1:
switch i32 %x, label %DEFAULT [
i32 0, label %end
i32 1, label %case2
]
case2:
br label %end
DEFAULT:
%tmp = icmp eq i32 %x, 2
%val = select i1 %tmp, i32 V3, i32 V4
br label %end
end:
... = phi i32 [ V1, %case1 ], [ V2, %case2 ], [ %val, %DEFAULT ]
```
We prefer to split the edge to 'end' so that there are TWO entries of
V3/V4 to the PHI, merging the icmp & select into the switch, as follows:
```LLVM
; AFTER
case1:
switch i32 %x, label %DEFAULT [
i32 0, label %end
i32 1, label %case2
i32 2, label %case3
]
case2:
br label %end
case3:
br label %end
DEFAULT:
br label %end
end:
... = phi i32 [ V1, %case1 ], [ V2, %case2 ], [ V3, %case3 ], [ V4, %DEFAULT]
```
Alive2 Proof: https://alive2.llvm.org/ce/z/jYHM4f
Promising Optimization Impact:
https://github.com/dtcxzyw/llvm-opt-benchmark/pull/3006
This commit is contained in:
parent
ffb5831fce
commit
a0e222f7c7
@ -302,7 +302,9 @@ class SimplifyCFGOpt {
|
||||
|
||||
bool tryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI,
|
||||
IRBuilder<> &Builder);
|
||||
|
||||
bool tryToSimplifyUncondBranchWithICmpSelectInIt(ICmpInst *ICI,
|
||||
SelectInst *Select,
|
||||
IRBuilder<> &Builder);
|
||||
bool hoistCommonCodeFromSuccessors(Instruction *TI, bool AllInstsEqOnly);
|
||||
bool hoistSuccIdenticalTerminatorToSwitchOrIf(
|
||||
Instruction *TI, Instruction *I1,
|
||||
@ -5023,16 +5025,65 @@ bool SimplifyCFGOpt::simplifyIndirectBrOnSelect(IndirectBrInst *IBI,
|
||||
/// the PHI, merging the third icmp into the switch.
|
||||
bool SimplifyCFGOpt::tryToSimplifyUncondBranchWithICmpInIt(
|
||||
ICmpInst *ICI, IRBuilder<> &Builder) {
|
||||
// Select == nullptr means we assume that there is a hidden no-op select
|
||||
// instruction of `_ = select %icmp, true, false` after `%icmp = icmp ...`
|
||||
return tryToSimplifyUncondBranchWithICmpSelectInIt(ICI, nullptr, Builder);
|
||||
}
|
||||
|
||||
/// Similar to tryToSimplifyUncondBranchWithICmpInIt, but handle a more generic
|
||||
/// case. This is called when we find an icmp instruction (a seteq/setne with a
|
||||
/// constant) and its following select instruction as the only TWO instructions
|
||||
/// in a block that ends with an uncond branch. We are looking for a very
|
||||
/// specific pattern that occurs when "
|
||||
/// if (A == 1) return C1;
|
||||
/// if (A == 2) return C2;
|
||||
/// if (A < 3) return C3;
|
||||
/// return C4;
|
||||
/// " gets simplified. In this case, we merge the first two "branches of icmp"
|
||||
/// into a switch, but then the default value goes to an uncond block with a lt
|
||||
/// icmp and select in it, as InstCombine can not simplify "A < 3" as "A == 2".
|
||||
/// After SimplifyCFG and other subsequent optimizations (e.g., SCCP), we might
|
||||
/// get something like:
|
||||
///
|
||||
/// case1:
|
||||
/// switch i8 %A, label %DEFAULT [ i8 0, label %end i8 1, label %case2 ]
|
||||
/// case2:
|
||||
/// br label %end
|
||||
/// DEFAULT:
|
||||
/// %tmp = icmp eq i8 %A, 2
|
||||
/// %val = select i1 %tmp, i8 C3, i8 C4
|
||||
/// br label %end
|
||||
/// end:
|
||||
/// _ = phi i8 [ C1, %case1 ], [ C2, %case2 ], [ %val, %DEFAULT ]
|
||||
///
|
||||
/// We prefer to split the edge to 'end' so that there are TWO entries of V3/V4
|
||||
/// to the PHI, merging the icmp & select into the switch, as follows:
|
||||
///
|
||||
/// case1:
|
||||
/// switch i8 %A, label %DEFAULT [
|
||||
/// i8 0, label %end
|
||||
/// i8 1, label %case2
|
||||
/// i8 2, label %case3
|
||||
/// ]
|
||||
/// case2:
|
||||
/// br label %end
|
||||
/// case3:
|
||||
/// br label %end
|
||||
/// DEFAULT:
|
||||
/// br label %end
|
||||
/// end:
|
||||
/// _ = phi i8 [ C1, %case1 ], [ C2, %case2 ], [ C3, %case2 ], [ C4, %DEFAULT]
|
||||
bool SimplifyCFGOpt::tryToSimplifyUncondBranchWithICmpSelectInIt(
|
||||
ICmpInst *ICI, SelectInst *Select, IRBuilder<> &Builder) {
|
||||
BasicBlock *BB = ICI->getParent();
|
||||
|
||||
// If the block has any PHIs in it or the icmp has multiple uses, it is too
|
||||
// complex.
|
||||
if (isa<PHINode>(BB->begin()) || !ICI->hasOneUse())
|
||||
// If the block has any PHIs in it or the icmp/select has multiple uses, it is
|
||||
// too complex.
|
||||
/// TODO: support multi-phis in succ BB of select's BB.
|
||||
if (isa<PHINode>(BB->begin()) || !ICI->hasOneUse() ||
|
||||
(Select && !Select->hasOneUse()))
|
||||
return false;
|
||||
|
||||
Value *V = ICI->getOperand(0);
|
||||
ConstantInt *Cst = cast<ConstantInt>(ICI->getOperand(1));
|
||||
|
||||
// The pattern we're looking for is where our only predecessor is a switch on
|
||||
// 'V' and this block is the default case for the switch. In this case we can
|
||||
// fold the compared value into the switch to simplify things.
|
||||
@ -5040,8 +5091,36 @@ bool SimplifyCFGOpt::tryToSimplifyUncondBranchWithICmpInIt(
|
||||
if (!Pred || !isa<SwitchInst>(Pred->getTerminator()))
|
||||
return false;
|
||||
|
||||
Value *IcmpCond;
|
||||
ConstantInt *NewCaseVal;
|
||||
CmpPredicate Predicate;
|
||||
|
||||
// Match icmp X, C
|
||||
if (!match(ICI,
|
||||
m_ICmp(Predicate, m_Value(IcmpCond), m_ConstantInt(NewCaseVal))))
|
||||
return false;
|
||||
|
||||
Value *SelectCond, *SelectTrueVal, *SelectFalseVal;
|
||||
Instruction *User;
|
||||
if (!Select) {
|
||||
// If Select == nullptr, we can assume that there is a hidden no-op select
|
||||
// just after icmp
|
||||
SelectCond = ICI;
|
||||
SelectTrueVal = Builder.getTrue();
|
||||
SelectFalseVal = Builder.getFalse();
|
||||
User = ICI->user_back();
|
||||
} else {
|
||||
SelectCond = Select->getCondition();
|
||||
// Check if the select condition is the same as the icmp condition.
|
||||
if (SelectCond != ICI)
|
||||
return false;
|
||||
SelectTrueVal = Select->getTrueValue();
|
||||
SelectFalseVal = Select->getFalseValue();
|
||||
User = Select->user_back();
|
||||
}
|
||||
|
||||
SwitchInst *SI = cast<SwitchInst>(Pred->getTerminator());
|
||||
if (SI->getCondition() != V)
|
||||
if (SI->getCondition() != IcmpCond)
|
||||
return false;
|
||||
|
||||
// If BB is reachable on a non-default case, then we simply know the value of
|
||||
@ -5063,9 +5142,9 @@ bool SimplifyCFGOpt::tryToSimplifyUncondBranchWithICmpInIt(
|
||||
// Ok, the block is reachable from the default dest. If the constant we're
|
||||
// comparing exists in one of the other edges, then we can constant fold ICI
|
||||
// and zap it.
|
||||
if (SI->findCaseValue(Cst) != SI->case_default()) {
|
||||
if (SI->findCaseValue(NewCaseVal) != SI->case_default()) {
|
||||
Value *V;
|
||||
if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
|
||||
if (Predicate == ICmpInst::ICMP_EQ)
|
||||
V = ConstantInt::getFalse(BB->getContext());
|
||||
else
|
||||
V = ConstantInt::getTrue(BB->getContext());
|
||||
@ -5076,25 +5155,30 @@ bool SimplifyCFGOpt::tryToSimplifyUncondBranchWithICmpInIt(
|
||||
return requestResimplify();
|
||||
}
|
||||
|
||||
// The use of the icmp has to be in the 'end' block, by the only PHI node in
|
||||
// The use of the select has to be in the 'end' block, by the only PHI node in
|
||||
// the block.
|
||||
BasicBlock *SuccBlock = BB->getTerminator()->getSuccessor(0);
|
||||
PHINode *PHIUse = dyn_cast<PHINode>(ICI->user_back());
|
||||
PHINode *PHIUse = dyn_cast<PHINode>(User);
|
||||
if (PHIUse == nullptr || PHIUse != &SuccBlock->front() ||
|
||||
isa<PHINode>(++BasicBlock::iterator(PHIUse)))
|
||||
return false;
|
||||
|
||||
// If the icmp is a SETEQ, then the default dest gets false, the new edge gets
|
||||
// true in the PHI.
|
||||
Constant *DefaultCst = ConstantInt::getTrue(BB->getContext());
|
||||
Constant *NewCst = ConstantInt::getFalse(BB->getContext());
|
||||
// If the icmp is a SETEQ, then the default dest gets SelectFalseVal, the new
|
||||
// edge gets SelectTrueVal in the PHI.
|
||||
Value *DefaultCst = SelectFalseVal;
|
||||
Value *NewCst = SelectTrueVal;
|
||||
|
||||
if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
|
||||
if (ICI->getPredicate() == ICmpInst::ICMP_NE)
|
||||
std::swap(DefaultCst, NewCst);
|
||||
|
||||
// Replace ICI (which is used by the PHI for the default value) with true or
|
||||
// false depending on if it is EQ or NE.
|
||||
ICI->replaceAllUsesWith(DefaultCst);
|
||||
// Replace Select (which is used by the PHI for the default value) with
|
||||
// SelectFalseVal or SelectTrueVal depending on if ICI is EQ or NE.
|
||||
if (Select) {
|
||||
Select->replaceAllUsesWith(DefaultCst);
|
||||
Select->eraseFromParent();
|
||||
} else {
|
||||
ICI->replaceAllUsesWith(DefaultCst);
|
||||
}
|
||||
ICI->eraseFromParent();
|
||||
|
||||
SmallVector<DominatorTree::UpdateType, 2> Updates;
|
||||
@ -5111,7 +5195,7 @@ bool SimplifyCFGOpt::tryToSimplifyUncondBranchWithICmpInIt(
|
||||
NewW = ((uint64_t(*W0) + 1) >> 1);
|
||||
SIW.setSuccessorWeight(0, *NewW);
|
||||
}
|
||||
SIW.addCase(Cst, NewBB, NewW);
|
||||
SIW.addCase(NewCaseVal, NewBB, NewW);
|
||||
if (DTU)
|
||||
Updates.push_back({DominatorTree::Insert, Pred, NewBB});
|
||||
}
|
||||
@ -8302,13 +8386,18 @@ bool SimplifyCFGOpt::simplifyUncondBranch(BranchInst *BI,
|
||||
|
||||
// If the only instruction in the block is a seteq/setne comparison against a
|
||||
// constant, try to simplify the block.
|
||||
if (ICmpInst *ICI = dyn_cast<ICmpInst>(I))
|
||||
if (ICmpInst *ICI = dyn_cast<ICmpInst>(I)) {
|
||||
if (ICI->isEquality() && isa<ConstantInt>(ICI->getOperand(1))) {
|
||||
++I;
|
||||
if (I->isTerminator() &&
|
||||
tryToSimplifyUncondBranchWithICmpInIt(ICI, Builder))
|
||||
return true;
|
||||
if (isa<SelectInst>(I) && I->getNextNode()->isTerminator() &&
|
||||
tryToSimplifyUncondBranchWithICmpSelectInIt(ICI, cast<SelectInst>(I),
|
||||
Builder))
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
// See if we can merge an empty landing pad block with another which is
|
||||
// equivalent.
|
||||
|
||||
@ -15,8 +15,8 @@
|
||||
; DISABLE-NOT: @{{.*}} = private unnamed_addr constant [3 x ptr] [ptr @c1, ptr @c2, ptr @c3]
|
||||
; ENABLE: @{{.*}} = private unnamed_addr constant [3 x ptr] [ptr @g1, ptr @g2, ptr @g3]
|
||||
; DISABLE-NOT: @{{.*}} = private unnamed_addr constant [3 x ptr] [ptr @g1, ptr @g2, ptr @g3]
|
||||
; ENABLE: @{{.*}} = private unnamed_addr constant [3 x ptr] [ptr @f1, ptr @f2, ptr @f3]
|
||||
; DISABLE-NOT: @{{.*}} = private unnamed_addr constant [3 x ptr] [ptr @f1, ptr @f2, ptr @f3]
|
||||
; ENABLE: @{{.*}} = private unnamed_addr constant [4 x ptr] [ptr @f1, ptr @f2, ptr @f3, ptr @f4]
|
||||
; DISABLE-NOT: @{{.*}} = private unnamed_addr constant [4 x ptr] [ptr @f1, ptr @f2, ptr @f3, ptr @f4]
|
||||
|
||||
target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
|
||||
target triple = "armv7a--none-eabi"
|
||||
|
||||
@ -410,13 +410,12 @@ define i1 @single_value_with_mask(i32 %x) {
|
||||
; OPTNOLUT-NEXT: i32 21, label %[[END]]
|
||||
; OPTNOLUT-NEXT: i32 48, label %[[END]]
|
||||
; OPTNOLUT-NEXT: i32 16, label %[[END]]
|
||||
; OPTNOLUT-NEXT: i32 80, label %[[END]]
|
||||
; OPTNOLUT-NEXT: ]
|
||||
; OPTNOLUT: [[DEFAULT]]:
|
||||
; OPTNOLUT-NEXT: [[CMP:%.*]] = icmp eq i32 [[X]], 80
|
||||
; OPTNOLUT-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i1 false, i1 true
|
||||
; OPTNOLUT-NEXT: br label %[[END]]
|
||||
; OPTNOLUT: [[END]]:
|
||||
; OPTNOLUT-NEXT: [[RES:%.*]] = phi i1 [ false, %[[ENTRY]] ], [ false, %[[ENTRY]] ], [ false, %[[ENTRY]] ], [ false, %[[ENTRY]] ], [ [[SEL]], %[[DEFAULT]] ]
|
||||
; OPTNOLUT-NEXT: [[RES:%.*]] = phi i1 [ false, %[[ENTRY]] ], [ false, %[[ENTRY]] ], [ false, %[[ENTRY]] ], [ false, %[[ENTRY]] ], [ true, %[[DEFAULT]] ], [ false, %[[ENTRY]] ]
|
||||
; OPTNOLUT-NEXT: ret i1 [[RES]]
|
||||
;
|
||||
; TTINOLUT-LABEL: define i1 @single_value_with_mask(
|
||||
|
||||
@ -1314,6 +1314,136 @@ if.end:
|
||||
ret void
|
||||
}
|
||||
|
||||
define i32 @switch_with_icmp_select_after_it(i32 %x) {
|
||||
; CHECK-LABEL: @switch_with_icmp_select_after_it(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: switch i32 [[X:%.*]], label [[DEFAULT:%.*]] [
|
||||
; CHECK-NEXT: i32 18, label [[END:%.*]]
|
||||
; CHECK-NEXT: i32 21, label [[END]]
|
||||
; CHECK-NEXT: i32 48, label [[END]]
|
||||
; CHECK-NEXT: i32 16, label [[END]]
|
||||
; CHECK-NEXT: i32 80, label [[SWITCH_EDGE:%.*]]
|
||||
; CHECK-NEXT: ]
|
||||
; CHECK: switch.edge:
|
||||
; CHECK-NEXT: br label [[END]]
|
||||
; CHECK: default:
|
||||
; CHECK-NEXT: br label [[END]]
|
||||
; CHECK: end:
|
||||
; CHECK-NEXT: [[RES:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ 1, [[ENTRY]] ], [ 1, [[ENTRY]] ], [ 1, [[ENTRY]] ], [ 3, [[DEFAULT]] ], [ 2, [[SWITCH_EDGE]] ]
|
||||
; CHECK-NEXT: ret i32 [[RES]]
|
||||
;
|
||||
entry:
|
||||
switch i32 %x, label %default [
|
||||
i32 18, label %end
|
||||
i32 21, label %end
|
||||
i32 48, label %end
|
||||
i32 16, label %end
|
||||
]
|
||||
default:
|
||||
%cmp = icmp eq i32 %x, 80
|
||||
; Create a new switch case BB for case 80.
|
||||
%sel = select i1 %cmp, i32 2, i32 3
|
||||
br label %end
|
||||
end:
|
||||
%res = phi i32 [ 1, %entry ], [ 1, %entry ], [ 1, %entry ], [ 1, %entry ], [ %sel, %default ]
|
||||
ret i32 %res
|
||||
}
|
||||
|
||||
define i32 @switch_with_icmp_select_after_it2(i32 %x) {
|
||||
; CHECK-LABEL: @switch_with_icmp_select_after_it2(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: switch i32 [[X:%.*]], label [[DEFAULT:%.*]] [
|
||||
; CHECK-NEXT: i32 18, label [[END:%.*]]
|
||||
; CHECK-NEXT: i32 21, label [[END]]
|
||||
; CHECK-NEXT: i32 48, label [[END]]
|
||||
; CHECK-NEXT: i32 16, label [[END]]
|
||||
; CHECK-NEXT: i32 80, label [[END]]
|
||||
; CHECK-NEXT: ]
|
||||
; CHECK: default:
|
||||
; CHECK-NEXT: br label [[END]]
|
||||
; CHECK: end:
|
||||
; CHECK-NEXT: [[RES:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ 1, [[ENTRY]] ], [ 1, [[ENTRY]] ], [ 1, [[ENTRY]] ], [ 3, [[DEFAULT]] ], [ 1, [[ENTRY]] ]
|
||||
; CHECK-NEXT: ret i32 [[RES]]
|
||||
;
|
||||
entry:
|
||||
switch i32 %x, label %default [
|
||||
i32 18, label %end
|
||||
i32 21, label %end
|
||||
i32 48, label %end
|
||||
i32 16, label %end
|
||||
]
|
||||
default:
|
||||
%cmp = icmp eq i32 %x, 80
|
||||
; Should not create new case BB
|
||||
%sel = select i1 %cmp, i32 1, i32 3
|
||||
br label %end
|
||||
end:
|
||||
%res = phi i32 [ 1, %entry ], [ 1, %entry ], [ 1, %entry ], [ 1, %entry ], [ %sel, %default ]
|
||||
ret i32 %res
|
||||
}
|
||||
|
||||
define i32 @switch_with_icmp_select_after_it3(i32 %x) {
|
||||
; CHECK-LABEL: @switch_with_icmp_select_after_it3(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[X:%.*]], 80
|
||||
; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i32 3, i32 1
|
||||
; CHECK-NEXT: ret i32 [[SEL]]
|
||||
;
|
||||
entry:
|
||||
switch i32 %x, label %default [
|
||||
i32 18, label %end
|
||||
i32 21, label %end
|
||||
i32 48, label %end
|
||||
i32 16, label %end
|
||||
]
|
||||
default:
|
||||
%cmp = icmp eq i32 %x, 80
|
||||
; Should not create new case BB
|
||||
%sel = select i1 %cmp, i32 3, i32 1
|
||||
br label %end
|
||||
end:
|
||||
%res = phi i32 [ 1, %entry ], [ 1, %entry ], [ 1, %entry ], [ 1, %entry ], [ %sel, %default ]
|
||||
ret i32 %res
|
||||
}
|
||||
|
||||
; TODO: support this case (multi-phis).
|
||||
define i32 @switch_with_icmp_select_after_it_multi_phis(i32 %x) {
|
||||
; CHECK-LABEL: @switch_with_icmp_select_after_it_multi_phis(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: switch i32 [[X:%.*]], label [[DEFAULT:%.*]] [
|
||||
; CHECK-NEXT: i32 18, label [[END:%.*]]
|
||||
; CHECK-NEXT: i32 21, label [[END]]
|
||||
; CHECK-NEXT: i32 48, label [[END]]
|
||||
; CHECK-NEXT: i32 16, label [[END]]
|
||||
; CHECK-NEXT: ]
|
||||
; CHECK: default:
|
||||
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[X]], 80
|
||||
; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i32 2, i32 3
|
||||
; CHECK-NEXT: br label [[END]]
|
||||
; CHECK: end:
|
||||
; CHECK-NEXT: [[RES1:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ 0, [[ENTRY]] ], [ 0, [[ENTRY]] ], [ 0, [[ENTRY]] ], [ 100, [[DEFAULT]] ]
|
||||
; CHECK-NEXT: [[RES2:%.*]] = phi i32 [ 1, [[ENTRY]] ], [ 1, [[ENTRY]] ], [ 1, [[ENTRY]] ], [ 1, [[ENTRY]] ], [ [[SEL]], [[DEFAULT]] ]
|
||||
; CHECK-NEXT: [[RES:%.*]] = xor i32 [[RES1]], [[RES2]]
|
||||
; CHECK-NEXT: ret i32 [[RES]]
|
||||
;
|
||||
entry:
|
||||
switch i32 %x, label %default [
|
||||
i32 18, label %end
|
||||
i32 21, label %end
|
||||
i32 48, label %end
|
||||
i32 16, label %end
|
||||
]
|
||||
default:
|
||||
%cmp = icmp eq i32 %x, 80
|
||||
%sel = select i1 %cmp, i32 2, i32 3
|
||||
br label %end
|
||||
end:
|
||||
%res1 = phi i32 [ 0, %entry ], [ 0, %entry ], [ 0, %entry ], [ 0, %entry ], [ 100, %default ]
|
||||
%res2 = phi i32 [ 1, %entry ], [ 1, %entry ], [ 1, %entry ], [ 1, %entry ], [ %sel, %default ]
|
||||
%res = xor i32 %res1, %res2
|
||||
ret i32 %res
|
||||
}
|
||||
|
||||
!0 = !{!"function_entry_count", i32 100}
|
||||
!1 = !{!"branch_weights", i32 6, i32 10}
|
||||
;.
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user