[AArch64] Add intra-block CSINC optimization to AArch64ConditionOptimizer (#173734)

This patch extends the AArch64ConditionOptimizer pass to handle CSINC
instructions within a single basic block, complementing the existing
cross-block branch optimization.

The optimization finds two CMP+CSINC pairs comparing the same register
with immediates differing by 1, and adjusts one comparison to enable CSE
to eliminate the redundant CMP instruction.

Example transformation:
```
  cmp  w8, #10
  csinc w9, w0, w1, gt     ; w9 = (w8 > 10) ? w0 : w1+1
  cmp  w8, #9              ; Removed by CSE after adjustment
  csinc w10, w0, w1, gt    ; w10 = (w8 > 9) ? w0 : w1+1
```

After optimization:
```
  cmp  w8, #10
  csinc w9, w0, w1, gt     ; w9 = (w8 > 10) ? w0 : w1+1
  csinc w10, w0, w1, ge    ; w10 = (w8 >= 10) ? w0 : w1+1
```

The existing cross-block logic has also been extracted into its own
method.

Any feedback on code quality and better practices is highly welcome.

Co-authored-by: Hussam Alhassan <hsm.link@proton.me>
This commit is contained in:
Hussam A. 2026-01-08 18:02:34 +00:00 committed by GitHub
parent f8b68c7b74
commit 4f7da2f136
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 466 additions and 182 deletions

View File

@ -6,15 +6,17 @@
//
//===----------------------------------------------------------------------===//
//
// This pass tries to make consecutive compares of values use same operands to
// allow CSE pass to remove duplicated instructions. For this it analyzes
// branches and adjusts comparisons with immediate values by converting:
// * GE -> GT
// * GT -> GE
// * LT -> LE
// * LE -> LT
// and adjusting immediate values appropriately. It basically corrects two
// immediate values towards each other to make them equal.
//
// This pass tries to make consecutive comparisons of values use the same
// operands to allow the CSE pass to remove duplicate instructions. It adjusts
// comparisons with immediate values by converting between inclusive and
// exclusive forms (GE <-> GT, LE <-> LT) and correcting immediate values to
// make them equal.
//
// The pass handles:
// * Cross-block: SUBS/ADDS followed by conditional branches
// * Intra-block: CSINC conditional instructions
//
//
// Consider the following example in C:
//
@ -49,11 +51,16 @@
// b.le .LBB0_6
// ...
//
// Currently only SUBS and ADDS followed by b.?? are supported.
// See optimizeCrossBlock() and optimizeIntraBlock() for implementation details.
//
// TODO: maybe handle TBNZ/TBZ the same way as CMP when used instead for "a < 0"
// TODO: handle other conditional instructions (e.g. CSET)
// TODO: allow second branching to be anything if it doesn't require adjusting
// TODO: For cross-block:
// - handle other conditional instructions (e.g. CSET)
// - allow second branching to be anything if it doesn't require adjusting
// TODO: For intra-block:
// - handle CINC and CSET (CSINC aliases) as their conditions are inverted
// compared to CSINC.
// - handle other non-CSINC conditional instructions
//
//===----------------------------------------------------------------------===//
@ -111,6 +118,9 @@ public:
void modifyCmp(MachineInstr *CmpMI, const CmpInfo &Info);
bool adjustTo(MachineInstr *CmpMI, AArch64CC::CondCode Cmp, MachineInstr *To,
int ToImm);
bool isPureCmp(MachineInstr &CmpMI);
bool optimizeIntraBlock(MachineBasicBlock &MBB);
bool optimizeCrossBlock(MachineBasicBlock &HBB);
bool runOnMachineFunction(MachineFunction &MF) override;
StringRef getPassName() const override {
@ -323,6 +333,274 @@ bool AArch64ConditionOptimizer::adjustTo(MachineInstr *CmpMI,
return false;
}
bool AArch64ConditionOptimizer::isPureCmp(MachineInstr &CmpMI) {
unsigned ShiftAmt = AArch64_AM::getShiftValue(CmpMI.getOperand(3).getImm());
if (!CmpMI.getOperand(2).isImm()) {
LLVM_DEBUG(dbgs() << "Immediate of cmp is symbolic, " << CmpMI << '\n');
return false;
} else if (CmpMI.getOperand(2).getImm() << ShiftAmt >= 0xfff) {
LLVM_DEBUG(dbgs() << "Immediate of cmp may be out of range, " << CmpMI
<< '\n');
return false;
} else if (!MRI->use_nodbg_empty(CmpMI.getOperand(0).getReg())) {
LLVM_DEBUG(dbgs() << "Destination of cmp is not dead, " << CmpMI << '\n');
return false;
}
return true;
}
// This function transforms two CMP+CSINC pairs within the same basic block
// when both conditions are the same (GT/GT or LT/LT) and immediates differ
// by 1.
//
// Example transformation:
// cmp w8, #10
// csinc w9, w0, w1, gt ; w9 = (w8 > 10) ? w0 : w1+1
// cmp w8, #9
// csinc w10, w0, w1, gt ; w10 = (w8 > 9) ? w0 : w1+1
//
// Into:
// cmp w8, #10
// csinc w9, w0, w1, gt ; w9 = (w8 > 10) ? w0 : w1+1
// csinc w10, w0, w1, ge ; w10 = (w8 >= 10) ? w0 : w1+1
//
// The second CMP is eliminated, enabling CSE to remove the redundant
// comparison.
bool AArch64ConditionOptimizer::optimizeIntraBlock(MachineBasicBlock &MBB) {
MachineInstr *FirstCmp = nullptr;
MachineInstr *FirstCSINC = nullptr;
MachineInstr *SecondCmp = nullptr;
MachineInstr *SecondCSINC = nullptr;
// Find two CMP + CSINC pairs
for (MachineInstr &MI : MBB) {
switch (MI.getOpcode()) {
// cmp is an alias for subs with a dead destination register.
case AArch64::SUBSWri:
case AArch64::SUBSXri:
// cmn is an alias for adds with a dead destination register.
case AArch64::ADDSWri:
case AArch64::ADDSXri: {
if (!FirstCmp) {
FirstCmp = &MI;
} else if (FirstCSINC && !SecondCmp) {
SecondCmp = &MI;
}
break;
}
case AArch64::CSINCWr:
case AArch64::CSINCXr: {
// Found a CSINC, ensure it comes after the corresponding comparison
if (FirstCmp && !FirstCSINC) {
FirstCSINC = &MI;
} else if (SecondCmp && !SecondCSINC) {
SecondCSINC = &MI;
}
break;
}
}
if (SecondCSINC)
break;
}
if (!SecondCmp || !SecondCSINC) {
LLVM_DEBUG(dbgs() << "Didn't find two CMP+CSINC pairs\n");
return false;
}
if (FirstCmp->getOperand(1).getReg() != SecondCmp->getOperand(1).getReg()) {
LLVM_DEBUG(dbgs() << "CMPs compare different registers\n");
return false;
}
if (!isPureCmp(*FirstCmp) || !isPureCmp(*SecondCmp)) {
LLVM_DEBUG(dbgs() << "One or both CMPs are not pure\n");
return false;
}
// Check that nothing else modifies the flags between the first CMP and second
// conditional
for (auto It = std::next(MachineBasicBlock::iterator(FirstCmp));
It != std::next(MachineBasicBlock::iterator(SecondCSINC)); ++It) {
if (&*It != SecondCmp &&
It->modifiesRegister(AArch64::NZCV, /*TRI=*/nullptr)) {
LLVM_DEBUG(dbgs() << "Flags modified between CMPs by: " << *It << '\n');
return false;
}
}
// Check flags aren't read after second conditional within the same block
for (auto It = std::next(MachineBasicBlock::iterator(SecondCSINC));
It != MBB.end(); ++It) {
if (It->readsRegister(AArch64::NZCV, /*TRI=*/nullptr)) {
LLVM_DEBUG(dbgs() << "Flags read after second CSINC by: " << *It << '\n');
return false;
}
}
// Since we may modify a cmp in this MBB, make sure NZCV does not live out.
for (auto *SuccBB : MBB.successors())
if (SuccBB->isLiveIn(AArch64::NZCV))
return false;
// Extract condition codes from both CSINCs (operand 3)
AArch64CC::CondCode FirstCond =
(AArch64CC::CondCode)(int)FirstCSINC->getOperand(3).getImm();
AArch64CC::CondCode SecondCond =
(AArch64CC::CondCode)(int)SecondCSINC->getOperand(3).getImm();
const int FirstImm = (int)FirstCmp->getOperand(2).getImm();
const int SecondImm = (int)SecondCmp->getOperand(2).getImm();
LLVM_DEBUG(dbgs() << "Comparing intra-block CSINCs: "
<< AArch64CC::getCondCodeName(FirstCond) << " #" << FirstImm
<< " and " << AArch64CC::getCondCodeName(SecondCond) << " #"
<< SecondImm << '\n');
// Check if both conditions are the same and immediates differ by 1
if (((FirstCond == AArch64CC::GT && SecondCond == AArch64CC::GT) ||
(FirstCond == AArch64CC::LT && SecondCond == AArch64CC::LT)) &&
std::abs(SecondImm - FirstImm) == 1) {
// Pick which comparison to adjust to match the other
// For GT: adjust the one with smaller immediate
// For LT: adjust the one with larger immediate
bool adjustFirst = (FirstImm < SecondImm);
if (FirstCond == AArch64CC::LT) {
adjustFirst = !adjustFirst;
}
MachineInstr *CmpToAdjust = adjustFirst ? FirstCmp : SecondCmp;
MachineInstr *CSINCToAdjust = adjustFirst ? FirstCSINC : SecondCSINC;
AArch64CC::CondCode CondToAdjust = adjustFirst ? FirstCond : SecondCond;
int TargetImm = adjustFirst ? SecondImm : FirstImm;
CmpInfo AdjustedInfo = adjustCmp(CmpToAdjust, CondToAdjust);
if (std::get<0>(AdjustedInfo) == TargetImm &&
std::get<1>(AdjustedInfo) ==
(adjustFirst ? SecondCmp : FirstCmp)->getOpcode()) {
LLVM_DEBUG(dbgs() << "Successfully optimizing intra-block CSINC pair\n");
// Modify the selected CMP and CSINC
CmpToAdjust->getOperand(2).setImm(std::get<0>(AdjustedInfo));
CmpToAdjust->setDesc(TII->get(std::get<1>(AdjustedInfo)));
CSINCToAdjust->getOperand(3).setImm(std::get<2>(AdjustedInfo));
return true;
}
}
return false;
}
// Optimize across blocks
bool AArch64ConditionOptimizer::optimizeCrossBlock(MachineBasicBlock &HBB) {
SmallVector<MachineOperand, 4> HeadCond;
MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
if (TII->analyzeBranch(HBB, TBB, FBB, HeadCond)) {
return false;
}
// Equivalence check is to skip loops.
if (!TBB || TBB == &HBB) {
return false;
}
SmallVector<MachineOperand, 4> TrueCond;
MachineBasicBlock *TBB_TBB = nullptr, *TBB_FBB = nullptr;
if (TII->analyzeBranch(*TBB, TBB_TBB, TBB_FBB, TrueCond)) {
return false;
}
MachineInstr *HeadCmpMI = findSuitableCompare(&HBB);
if (!HeadCmpMI) {
return false;
}
MachineInstr *TrueCmpMI = findSuitableCompare(TBB);
if (!TrueCmpMI) {
return false;
}
AArch64CC::CondCode HeadCmp;
if (HeadCond.empty() || !parseCond(HeadCond, HeadCmp)) {
return false;
}
AArch64CC::CondCode TrueCmp;
if (TrueCond.empty() || !parseCond(TrueCond, TrueCmp)) {
return false;
}
const int HeadImm = (int)HeadCmpMI->getOperand(2).getImm();
const int TrueImm = (int)TrueCmpMI->getOperand(2).getImm();
LLVM_DEBUG(dbgs() << "Head branch:\n");
LLVM_DEBUG(dbgs() << "\tcondition: " << AArch64CC::getCondCodeName(HeadCmp)
<< '\n');
LLVM_DEBUG(dbgs() << "\timmediate: " << HeadImm << '\n');
LLVM_DEBUG(dbgs() << "True branch:\n");
LLVM_DEBUG(dbgs() << "\tcondition: " << AArch64CC::getCondCodeName(TrueCmp)
<< '\n');
LLVM_DEBUG(dbgs() << "\timmediate: " << TrueImm << '\n');
if (((HeadCmp == AArch64CC::GT && TrueCmp == AArch64CC::LT) ||
(HeadCmp == AArch64CC::LT && TrueCmp == AArch64CC::GT)) &&
std::abs(TrueImm - HeadImm) == 2) {
// This branch transforms machine instructions that correspond to
//
// 1) (a > {TrueImm} && ...) || (a < {HeadImm} && ...)
// 2) (a < {TrueImm} && ...) || (a > {HeadImm} && ...)
//
// into
//
// 1) (a >= {NewImm} && ...) || (a <= {NewImm} && ...)
// 2) (a <= {NewImm} && ...) || (a >= {NewImm} && ...)
CmpInfo HeadCmpInfo = adjustCmp(HeadCmpMI, HeadCmp);
CmpInfo TrueCmpInfo = adjustCmp(TrueCmpMI, TrueCmp);
if (std::get<0>(HeadCmpInfo) == std::get<0>(TrueCmpInfo) &&
std::get<1>(HeadCmpInfo) == std::get<1>(TrueCmpInfo)) {
modifyCmp(HeadCmpMI, HeadCmpInfo);
modifyCmp(TrueCmpMI, TrueCmpInfo);
return true;
}
} else if (((HeadCmp == AArch64CC::GT && TrueCmp == AArch64CC::GT) ||
(HeadCmp == AArch64CC::LT && TrueCmp == AArch64CC::LT)) &&
std::abs(TrueImm - HeadImm) == 1) {
// This branch transforms machine instructions that correspond to
//
// 1) (a > {TrueImm} && ...) || (a > {HeadImm} && ...)
// 2) (a < {TrueImm} && ...) || (a < {HeadImm} && ...)
//
// into
//
// 1) (a <= {NewImm} && ...) || (a > {NewImm} && ...)
// 2) (a < {NewImm} && ...) || (a >= {NewImm} && ...)
// GT -> GE transformation increases immediate value, so picking the
// smaller one; LT -> LE decreases immediate value so invert the choice.
bool adjustHeadCond = (HeadImm < TrueImm);
if (HeadCmp == AArch64CC::LT) {
adjustHeadCond = !adjustHeadCond;
}
if (adjustHeadCond) {
return adjustTo(HeadCmpMI, HeadCmp, TrueCmpMI, TrueImm);
} else {
return adjustTo(TrueCmpMI, TrueCmp, HeadCmpMI, HeadImm);
}
}
// Other transformation cases almost never occur due to generation of < or >
// comparisons instead of <= and >=.
return false;
}
bool AArch64ConditionOptimizer::runOnMachineFunction(MachineFunction &MF) {
LLVM_DEBUG(dbgs() << "********** AArch64 Conditional Compares **********\n"
<< "********** Function: " << MF.getName() << '\n');
@ -342,106 +620,8 @@ bool AArch64ConditionOptimizer::runOnMachineFunction(MachineFunction &MF) {
// child_begin() / child_end() until after a node has been visited.
for (MachineDomTreeNode *I : depth_first(DomTree)) {
MachineBasicBlock *HBB = I->getBlock();
SmallVector<MachineOperand, 4> HeadCond;
MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
if (TII->analyzeBranch(*HBB, TBB, FBB, HeadCond)) {
continue;
}
// Equivalence check is to skip loops.
if (!TBB || TBB == HBB) {
continue;
}
SmallVector<MachineOperand, 4> TrueCond;
MachineBasicBlock *TBB_TBB = nullptr, *TBB_FBB = nullptr;
if (TII->analyzeBranch(*TBB, TBB_TBB, TBB_FBB, TrueCond)) {
continue;
}
MachineInstr *HeadCmpMI = findSuitableCompare(HBB);
if (!HeadCmpMI) {
continue;
}
MachineInstr *TrueCmpMI = findSuitableCompare(TBB);
if (!TrueCmpMI) {
continue;
}
AArch64CC::CondCode HeadCmp;
if (HeadCond.empty() || !parseCond(HeadCond, HeadCmp)) {
continue;
}
AArch64CC::CondCode TrueCmp;
if (TrueCond.empty() || !parseCond(TrueCond, TrueCmp)) {
continue;
}
const int HeadImm = (int)HeadCmpMI->getOperand(2).getImm();
const int TrueImm = (int)TrueCmpMI->getOperand(2).getImm();
LLVM_DEBUG(dbgs() << "Head branch:\n");
LLVM_DEBUG(dbgs() << "\tcondition: " << AArch64CC::getCondCodeName(HeadCmp)
<< '\n');
LLVM_DEBUG(dbgs() << "\timmediate: " << HeadImm << '\n');
LLVM_DEBUG(dbgs() << "True branch:\n");
LLVM_DEBUG(dbgs() << "\tcondition: " << AArch64CC::getCondCodeName(TrueCmp)
<< '\n');
LLVM_DEBUG(dbgs() << "\timmediate: " << TrueImm << '\n');
if (((HeadCmp == AArch64CC::GT && TrueCmp == AArch64CC::LT) ||
(HeadCmp == AArch64CC::LT && TrueCmp == AArch64CC::GT)) &&
std::abs(TrueImm - HeadImm) == 2) {
// This branch transforms machine instructions that correspond to
//
// 1) (a > {TrueImm} && ...) || (a < {HeadImm} && ...)
// 2) (a < {TrueImm} && ...) || (a > {HeadImm} && ...)
//
// into
//
// 1) (a >= {NewImm} && ...) || (a <= {NewImm} && ...)
// 2) (a <= {NewImm} && ...) || (a >= {NewImm} && ...)
CmpInfo HeadCmpInfo = adjustCmp(HeadCmpMI, HeadCmp);
CmpInfo TrueCmpInfo = adjustCmp(TrueCmpMI, TrueCmp);
if (std::get<0>(HeadCmpInfo) == std::get<0>(TrueCmpInfo) &&
std::get<1>(HeadCmpInfo) == std::get<1>(TrueCmpInfo)) {
modifyCmp(HeadCmpMI, HeadCmpInfo);
modifyCmp(TrueCmpMI, TrueCmpInfo);
Changed = true;
}
} else if (((HeadCmp == AArch64CC::GT && TrueCmp == AArch64CC::GT) ||
(HeadCmp == AArch64CC::LT && TrueCmp == AArch64CC::LT)) &&
std::abs(TrueImm - HeadImm) == 1) {
// This branch transforms machine instructions that correspond to
//
// 1) (a > {TrueImm} && ...) || (a > {HeadImm} && ...)
// 2) (a < {TrueImm} && ...) || (a < {HeadImm} && ...)
//
// into
//
// 1) (a <= {NewImm} && ...) || (a > {NewImm} && ...)
// 2) (a < {NewImm} && ...) || (a >= {NewImm} && ...)
// GT -> GE transformation increases immediate value, so picking the
// smaller one; LT -> LE decreases immediate value so invert the choice.
bool adjustHeadCond = (HeadImm < TrueImm);
if (HeadCmp == AArch64CC::LT) {
adjustHeadCond = !adjustHeadCond;
}
if (adjustHeadCond) {
Changed |= adjustTo(HeadCmpMI, HeadCmp, TrueCmpMI, TrueImm);
} else {
Changed |= adjustTo(TrueCmpMI, TrueCmp, HeadCmpMI, HeadImm);
}
}
// Other transformation cases almost never occur due to generation of < or >
// comparisons instead of <= and >=.
Changed |= optimizeIntraBlock(*HBB);
Changed |= optimizeCrossBlock(*HBB);
}
return Changed;

View File

@ -7,6 +7,110 @@
@c = external global i32
@d = external global i32
; Test intra-block CSINC optimization with (a > 10) and (a >= 10)
; Two CSINC instructions should share a single CMP after optimization
define void @intra_block_csinc(i32 %x, i32 %y, ptr %out1, ptr %out2) #0 {
; CHECK-LABEL: intra_block_csinc:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: adrp x8, :got:a
; CHECK-NEXT: ldr x8, [x8, :got_lo12:a]
; CHECK-NEXT: ldr w8, [x8]
; CHECK-NEXT: cmp w8, #10
; CHECK-NEXT: csinc w8, w0, w1, gt
; CHECK-NEXT: csinc w9, w0, w1, ge
; CHECK-NEXT: str w8, [x2]
; CHECK-NEXT: str w9, [x3]
; CHECK-NEXT: ret
entry:
%val = load i32, ptr @a, align 4
; First: result1 = (a > 10) ? x : (y + 1)
%cond1 = icmp sgt i32 %val, 10
%y_inc1 = add i32 %y, 1
%result1 = select i1 %cond1, i32 %x, i32 %y_inc1
store i32 %result1, ptr %out1
; Second: result2 = (a >= 10) ? x : (y + 1)
; Canonicalizes to (a > 9), then optimizes to reuse first CMP with adjusted condition
%cond2 = icmp sge i32 %val, 10
%y_inc2 = add i32 %y, 1
%result2 = select i1 %cond2, i32 %x, i32 %y_inc2
store i32 %result2, ptr %out2
ret void
}
; Negative test: different registers should not be optimized
define void @intra_block_csinc_different_regs(i32 %x, i32 %y, ptr %out1, ptr %out2) #0 {
; CHECK-LABEL: intra_block_csinc_different_regs:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: adrp x8, :got:a
; CHECK-NEXT: adrp x9, :got:b
; CHECK-NEXT: ldr x8, [x8, :got_lo12:a]
; CHECK-NEXT: ldr x9, [x9, :got_lo12:b]
; CHECK-NEXT: ldr w8, [x8]
; CHECK-NEXT: ldr w9, [x9]
; CHECK-NEXT: cmp w8, #10
; CHECK-NEXT: csinc w8, w0, w1, gt
; CHECK-NEXT: cmp w9, #9
; CHECK-NEXT: str w8, [x2]
; CHECK-NEXT: csinc w8, w0, w1, gt
; CHECK-NEXT: str w8, [x3]
; CHECK-NEXT: ret
entry:
%val1 = load i32, ptr @a, align 4
%val2 = load i32, ptr @b, align 4
; First: result1 = (a > 10) ? x : (y + 1)
%cond1 = icmp sgt i32 %val1, 10
%y_inc1 = add i32 %y, 1
%result1 = select i1 %cond1, i32 %x, i32 %y_inc1
store i32 %result1, ptr %out1
; Second: result2 = (b > 9) ? x : (y + 1) - compares DIFFERENT register
; Should NOT optimize - need both CMPs
%cond2 = icmp sgt i32 %val2, 9
%y_inc2 = add i32 %y, 1
%result2 = select i1 %cond2, i32 %x, i32 %y_inc2
store i32 %result2, ptr %out2
ret void
}
; Test intra-block CSINC optimization with (a < 5) and (a < 6)
; LT/LT pattern - symmetric to GT/GT case
define void @intra_block_csinc_lt(i32 %x, i32 %y, ptr %out1, ptr %out2) #0 {
; CHECK-LABEL: intra_block_csinc_lt:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: adrp x8, :got:a
; CHECK-NEXT: ldr x8, [x8, :got_lo12:a]
; CHECK-NEXT: ldr w8, [x8]
; CHECK-NEXT: cmp w8, #5
; CHECK-NEXT: csinc w8, w0, w1, lt
; CHECK-NEXT: csinc w9, w0, w1, le
; CHECK-NEXT: str w8, [x2]
; CHECK-NEXT: str w9, [x3]
; CHECK-NEXT: ret
entry:
%val = load i32, ptr @a, align 4
; First: result1 = (a < 5) ? x : (y + 1)
%cond1 = icmp slt i32 %val, 5
%y_inc1 = add i32 %y, 1
%result1 = select i1 %cond1, i32 %x, i32 %y_inc1
store i32 %result1, ptr %out1
; Second: result2 = (a < 6) ? x : (y + 1)
; Optimizes to reuse first CMP (#5) with adjusted condition (le)
%cond2 = icmp slt i32 %val, 6
%y_inc2 = add i32 %y, 1
%result2 = select i1 %cond2, i32 %x, i32 %y_inc2
store i32 %result2, ptr %out2
ret void
}
; (a > 10 && b == c) || (a >= 10 && b == d)
define i32 @combine_gt_ge_10() #0 {
; CHECK-LABEL: combine_gt_ge_10:
@ -17,30 +121,30 @@ define i32 @combine_gt_ge_10() #0 {
; CHECK-NEXT: cmp w8, #10
; CHECK-NEXT: adrp x8, :got:b
; CHECK-NEXT: ldr x8, [x8, :got_lo12:b]
; CHECK-NEXT: b.le .LBB0_3
; CHECK-NEXT: b.le .LBB3_3
; CHECK-NEXT: // %bb.1: // %land.lhs.true
; CHECK-NEXT: adrp x9, :got:c
; CHECK-NEXT: ldr x9, [x9, :got_lo12:c]
; CHECK-NEXT: ldr w10, [x8]
; CHECK-NEXT: ldr w9, [x9]
; CHECK-NEXT: cmp w10, w9
; CHECK-NEXT: b.ne .LBB0_4
; CHECK-NEXT: b.ne .LBB3_4
; CHECK-NEXT: // %bb.2:
; CHECK-NEXT: mov w0, #1 // =0x1
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB0_3: // %lor.lhs.false
; CHECK-NEXT: b.lt .LBB0_6
; CHECK-NEXT: .LBB0_4: // %land.lhs.true3
; CHECK-NEXT: .LBB3_3: // %lor.lhs.false
; CHECK-NEXT: b.lt .LBB3_6
; CHECK-NEXT: .LBB3_4: // %land.lhs.true3
; CHECK-NEXT: adrp x9, :got:d
; CHECK-NEXT: ldr x9, [x9, :got_lo12:d]
; CHECK-NEXT: ldr w8, [x8]
; CHECK-NEXT: ldr w9, [x9]
; CHECK-NEXT: cmp w8, w9
; CHECK-NEXT: b.ne .LBB0_6
; CHECK-NEXT: b.ne .LBB3_6
; CHECK-NEXT: // %bb.5:
; CHECK-NEXT: mov w0, #1 // =0x1
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB0_6: // %if.end
; CHECK-NEXT: .LBB3_6: // %if.end
; CHECK-NEXT: mov w0, wzr
; CHECK-NEXT: ret
entry:
@ -80,7 +184,7 @@ define i32 @combine_gt_lt_5() #0 {
; CHECK-NEXT: ldr x8, [x8, :got_lo12:a]
; CHECK-NEXT: ldr w8, [x8]
; CHECK-NEXT: cmp w8, #5
; CHECK-NEXT: b.le .LBB1_3
; CHECK-NEXT: b.le .LBB4_3
; CHECK-NEXT: // %bb.1: // %land.lhs.true
; CHECK-NEXT: adrp x8, :got:b
; CHECK-NEXT: adrp x9, :got:c
@ -89,12 +193,12 @@ define i32 @combine_gt_lt_5() #0 {
; CHECK-NEXT: ldr w8, [x8]
; CHECK-NEXT: ldr w9, [x9]
; CHECK-NEXT: cmp w8, w9
; CHECK-NEXT: b.ne .LBB1_6
; CHECK-NEXT: b.ne .LBB4_6
; CHECK-NEXT: // %bb.2:
; CHECK-NEXT: mov w0, #1 // =0x1
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB1_3: // %lor.lhs.false
; CHECK-NEXT: b.ge .LBB1_6
; CHECK-NEXT: .LBB4_3: // %lor.lhs.false
; CHECK-NEXT: b.ge .LBB4_6
; CHECK-NEXT: // %bb.4: // %land.lhs.true3
; CHECK-NEXT: adrp x8, :got:b
; CHECK-NEXT: adrp x9, :got:d
@ -103,11 +207,11 @@ define i32 @combine_gt_lt_5() #0 {
; CHECK-NEXT: ldr w8, [x8]
; CHECK-NEXT: ldr w9, [x9]
; CHECK-NEXT: cmp w8, w9
; CHECK-NEXT: b.ne .LBB1_6
; CHECK-NEXT: b.ne .LBB4_6
; CHECK-NEXT: // %bb.5:
; CHECK-NEXT: mov w0, #1 // =0x1
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB1_6: // %if.end
; CHECK-NEXT: .LBB4_6: // %if.end
; CHECK-NEXT: mov w0, wzr
; CHECK-NEXT: ret
entry:
@ -149,30 +253,30 @@ define i32 @combine_lt_ge_5() #0 {
; CHECK-NEXT: cmp w8, #5
; CHECK-NEXT: adrp x8, :got:b
; CHECK-NEXT: ldr x8, [x8, :got_lo12:b]
; CHECK-NEXT: b.ge .LBB2_3
; CHECK-NEXT: b.ge .LBB5_3
; CHECK-NEXT: // %bb.1: // %land.lhs.true
; CHECK-NEXT: adrp x9, :got:c
; CHECK-NEXT: ldr x9, [x9, :got_lo12:c]
; CHECK-NEXT: ldr w10, [x8]
; CHECK-NEXT: ldr w9, [x9]
; CHECK-NEXT: cmp w10, w9
; CHECK-NEXT: b.ne .LBB2_4
; CHECK-NEXT: b.ne .LBB5_4
; CHECK-NEXT: // %bb.2:
; CHECK-NEXT: mov w0, #1 // =0x1
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB2_3: // %lor.lhs.false
; CHECK-NEXT: b.gt .LBB2_6
; CHECK-NEXT: .LBB2_4: // %land.lhs.true3
; CHECK-NEXT: .LBB5_3: // %lor.lhs.false
; CHECK-NEXT: b.gt .LBB5_6
; CHECK-NEXT: .LBB5_4: // %land.lhs.true3
; CHECK-NEXT: adrp x9, :got:d
; CHECK-NEXT: ldr x9, [x9, :got_lo12:d]
; CHECK-NEXT: ldr w8, [x8]
; CHECK-NEXT: ldr w9, [x9]
; CHECK-NEXT: cmp w8, w9
; CHECK-NEXT: b.ne .LBB2_6
; CHECK-NEXT: b.ne .LBB5_6
; CHECK-NEXT: // %bb.5:
; CHECK-NEXT: mov w0, #1 // =0x1
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB2_6: // %if.end
; CHECK-NEXT: .LBB5_6: // %if.end
; CHECK-NEXT: mov w0, wzr
; CHECK-NEXT: ret
entry:
@ -212,7 +316,7 @@ define i32 @combine_lt_gt_5() #0 {
; CHECK-NEXT: ldr x8, [x8, :got_lo12:a]
; CHECK-NEXT: ldr w8, [x8]
; CHECK-NEXT: cmp w8, #5
; CHECK-NEXT: b.ge .LBB3_3
; CHECK-NEXT: b.ge .LBB6_3
; CHECK-NEXT: // %bb.1: // %land.lhs.true
; CHECK-NEXT: adrp x8, :got:b
; CHECK-NEXT: adrp x9, :got:c
@ -221,12 +325,12 @@ define i32 @combine_lt_gt_5() #0 {
; CHECK-NEXT: ldr w8, [x8]
; CHECK-NEXT: ldr w9, [x9]
; CHECK-NEXT: cmp w8, w9
; CHECK-NEXT: b.ne .LBB3_6
; CHECK-NEXT: b.ne .LBB6_6
; CHECK-NEXT: // %bb.2:
; CHECK-NEXT: mov w0, #1 // =0x1
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB3_3: // %lor.lhs.false
; CHECK-NEXT: b.le .LBB3_6
; CHECK-NEXT: .LBB6_3: // %lor.lhs.false
; CHECK-NEXT: b.le .LBB6_6
; CHECK-NEXT: // %bb.4: // %land.lhs.true3
; CHECK-NEXT: adrp x8, :got:b
; CHECK-NEXT: adrp x9, :got:d
@ -235,11 +339,11 @@ define i32 @combine_lt_gt_5() #0 {
; CHECK-NEXT: ldr w8, [x8]
; CHECK-NEXT: ldr w9, [x9]
; CHECK-NEXT: cmp w8, w9
; CHECK-NEXT: b.ne .LBB3_6
; CHECK-NEXT: b.ne .LBB6_6
; CHECK-NEXT: // %bb.5:
; CHECK-NEXT: mov w0, #1 // =0x1
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB3_6: // %if.end
; CHECK-NEXT: .LBB6_6: // %if.end
; CHECK-NEXT: mov w0, wzr
; CHECK-NEXT: ret
entry:
@ -279,7 +383,7 @@ define i32 @combine_gt_lt_n5() #0 {
; CHECK-NEXT: ldr x8, [x8, :got_lo12:a]
; CHECK-NEXT: ldr w8, [x8]
; CHECK-NEXT: cmn w8, #5
; CHECK-NEXT: b.le .LBB4_3
; CHECK-NEXT: b.le .LBB7_3
; CHECK-NEXT: // %bb.1: // %land.lhs.true
; CHECK-NEXT: adrp x8, :got:b
; CHECK-NEXT: adrp x9, :got:c
@ -288,12 +392,12 @@ define i32 @combine_gt_lt_n5() #0 {
; CHECK-NEXT: ldr w8, [x8]
; CHECK-NEXT: ldr w9, [x9]
; CHECK-NEXT: cmp w8, w9
; CHECK-NEXT: b.ne .LBB4_6
; CHECK-NEXT: b.ne .LBB7_6
; CHECK-NEXT: // %bb.2:
; CHECK-NEXT: mov w0, #1 // =0x1
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB4_3: // %lor.lhs.false
; CHECK-NEXT: b.ge .LBB4_6
; CHECK-NEXT: .LBB7_3: // %lor.lhs.false
; CHECK-NEXT: b.ge .LBB7_6
; CHECK-NEXT: // %bb.4: // %land.lhs.true3
; CHECK-NEXT: adrp x8, :got:b
; CHECK-NEXT: adrp x9, :got:d
@ -302,11 +406,11 @@ define i32 @combine_gt_lt_n5() #0 {
; CHECK-NEXT: ldr w8, [x8]
; CHECK-NEXT: ldr w9, [x9]
; CHECK-NEXT: cmp w8, w9
; CHECK-NEXT: b.ne .LBB4_6
; CHECK-NEXT: b.ne .LBB7_6
; CHECK-NEXT: // %bb.5:
; CHECK-NEXT: mov w0, #1 // =0x1
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB4_6: // %if.end
; CHECK-NEXT: .LBB7_6: // %if.end
; CHECK-NEXT: mov w0, wzr
; CHECK-NEXT: ret
entry:
@ -346,7 +450,7 @@ define i32 @combine_lt_gt_n5() #0 {
; CHECK-NEXT: ldr x8, [x8, :got_lo12:a]
; CHECK-NEXT: ldr w8, [x8]
; CHECK-NEXT: cmn w8, #5
; CHECK-NEXT: b.ge .LBB5_3
; CHECK-NEXT: b.ge .LBB8_3
; CHECK-NEXT: // %bb.1: // %land.lhs.true
; CHECK-NEXT: adrp x8, :got:b
; CHECK-NEXT: adrp x9, :got:c
@ -355,12 +459,12 @@ define i32 @combine_lt_gt_n5() #0 {
; CHECK-NEXT: ldr w8, [x8]
; CHECK-NEXT: ldr w9, [x9]
; CHECK-NEXT: cmp w8, w9
; CHECK-NEXT: b.ne .LBB5_6
; CHECK-NEXT: b.ne .LBB8_6
; CHECK-NEXT: // %bb.2:
; CHECK-NEXT: mov w0, #1 // =0x1
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB5_3: // %lor.lhs.false
; CHECK-NEXT: b.le .LBB5_6
; CHECK-NEXT: .LBB8_3: // %lor.lhs.false
; CHECK-NEXT: b.le .LBB8_6
; CHECK-NEXT: // %bb.4: // %land.lhs.true3
; CHECK-NEXT: adrp x8, :got:b
; CHECK-NEXT: adrp x9, :got:d
@ -369,11 +473,11 @@ define i32 @combine_lt_gt_n5() #0 {
; CHECK-NEXT: ldr w8, [x8]
; CHECK-NEXT: ldr w9, [x9]
; CHECK-NEXT: cmp w8, w9
; CHECK-NEXT: b.ne .LBB5_6
; CHECK-NEXT: b.ne .LBB8_6
; CHECK-NEXT: // %bb.5:
; CHECK-NEXT: mov w0, #1 // =0x1
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB5_6: // %if.end
; CHECK-NEXT: .LBB8_6: // %if.end
; CHECK-NEXT: mov w0, wzr
; CHECK-NEXT: ret
entry:
@ -428,19 +532,19 @@ define void @combine_non_adjacent_cmp_br(ptr nocapture readonly %hdCall) #0 {
; CHECK-NEXT: mov w19, #24 // =0x18
; CHECK-NEXT: adrp x22, glob
; CHECK-NEXT: add x21, x20, #2
; CHECK-NEXT: .LBB6_1: // %land.rhs
; CHECK-NEXT: .LBB9_1: // %land.rhs
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ldr x8, [x19]
; CHECK-NEXT: cmp x8, #1
; CHECK-NEXT: b.lt .LBB6_3
; CHECK-NEXT: b.lt .LBB9_3
; CHECK-NEXT: // %bb.2: // %while.body
; CHECK-NEXT: // in Loop: Header=BB6_1 Depth=1
; CHECK-NEXT: // in Loop: Header=BB9_1 Depth=1
; CHECK-NEXT: ldr x0, [x22, :lo12:glob]
; CHECK-NEXT: bl Update
; CHECK-NEXT: sub x21, x21, #2
; CHECK-NEXT: cmp x20, x21
; CHECK-NEXT: b.lt .LBB6_1
; CHECK-NEXT: .LBB6_3: // %while.end
; CHECK-NEXT: b.lt .LBB9_1
; CHECK-NEXT: .LBB9_3: // %while.end
; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT: ldp x22, x21, [sp, #16] // 16-byte Folded Reload
; CHECK-NEXT: ldr x30, [sp], #48 // 8-byte Folded Reload
@ -488,19 +592,19 @@ define i32 @do_nothing_if_resultant_opcodes_would_differ() #0 {
; CHECK-NEXT: ldr x19, [x19, :got_lo12:a]
; CHECK-NEXT: ldr w8, [x19]
; CHECK-NEXT: cmn w8, #2
; CHECK-NEXT: b.gt .LBB7_4
; CHECK-NEXT: b.gt .LBB10_4
; CHECK-NEXT: // %bb.1: // %while.body.preheader
; CHECK-NEXT: sub w20, w8, #1
; CHECK-NEXT: .LBB7_2: // %while.body
; CHECK-NEXT: .LBB10_2: // %while.body
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: bl do_something
; CHECK-NEXT: adds w20, w20, #1
; CHECK-NEXT: b.mi .LBB7_2
; CHECK-NEXT: b.mi .LBB10_2
; CHECK-NEXT: // %bb.3: // %while.cond.while.end_crit_edge
; CHECK-NEXT: ldr w8, [x19]
; CHECK-NEXT: .LBB7_4: // %while.end
; CHECK-NEXT: .LBB10_4: // %while.end
; CHECK-NEXT: cmp w8, #1
; CHECK-NEXT: b.gt .LBB7_7
; CHECK-NEXT: b.gt .LBB10_7
; CHECK-NEXT: // %bb.5: // %land.lhs.true
; CHECK-NEXT: adrp x8, :got:b
; CHECK-NEXT: adrp x9, :got:d
@ -509,13 +613,13 @@ define i32 @do_nothing_if_resultant_opcodes_would_differ() #0 {
; CHECK-NEXT: ldr w8, [x8]
; CHECK-NEXT: ldr w9, [x9]
; CHECK-NEXT: cmp w8, w9
; CHECK-NEXT: b.ne .LBB7_7
; CHECK-NEXT: b.ne .LBB10_7
; CHECK-NEXT: // %bb.6:
; CHECK-NEXT: mov w0, #123 // =0x7b
; CHECK-NEXT: b .LBB7_8
; CHECK-NEXT: .LBB7_7: // %if.end
; CHECK-NEXT: b .LBB10_8
; CHECK-NEXT: .LBB10_7: // %if.end
; CHECK-NEXT: mov w0, wzr
; CHECK-NEXT: .LBB7_8: // %return
; CHECK-NEXT: .LBB10_8: // %return
; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
; CHECK-NEXT: ldr x30, [sp], #32 // 8-byte Folded Reload
; CHECK-NEXT: .cfi_def_cfa_offset 0
@ -573,20 +677,20 @@ define i32 @do_nothing_if_compares_can_not_be_adjusted_to_each_other() #0 {
; CHECK-NEXT: ldr x8, [x8, :got_lo12:a]
; CHECK-NEXT: ldr w8, [x8]
; CHECK-NEXT: cmp w8, #0
; CHECK-NEXT: b.gt .LBB8_3
; CHECK-NEXT: b.gt .LBB11_3
; CHECK-NEXT: // %bb.1: // %while.body.preheader
; CHECK-NEXT: sub w19, w8, #1
; CHECK-NEXT: .LBB8_2: // %while.body
; CHECK-NEXT: .LBB11_2: // %while.body
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: bl do_something
; CHECK-NEXT: adds w19, w19, #1
; CHECK-NEXT: b.mi .LBB8_2
; CHECK-NEXT: .LBB8_3: // %while.end
; CHECK-NEXT: b.mi .LBB11_2
; CHECK-NEXT: .LBB11_3: // %while.end
; CHECK-NEXT: adrp x8, :got:c
; CHECK-NEXT: ldr x8, [x8, :got_lo12:c]
; CHECK-NEXT: ldr w8, [x8]
; CHECK-NEXT: cmn w8, #2
; CHECK-NEXT: b.lt .LBB8_6
; CHECK-NEXT: b.lt .LBB11_6
; CHECK-NEXT: // %bb.4: // %land.lhs.true
; CHECK-NEXT: adrp x8, :got:b
; CHECK-NEXT: adrp x9, :got:d
@ -595,7 +699,7 @@ define i32 @do_nothing_if_compares_can_not_be_adjusted_to_each_other() #0 {
; CHECK-NEXT: ldr w8, [x8]
; CHECK-NEXT: ldr w9, [x9]
; CHECK-NEXT: cmp w8, w9
; CHECK-NEXT: b.ne .LBB8_6
; CHECK-NEXT: b.ne .LBB11_6
; CHECK-NEXT: // %bb.5:
; CHECK-NEXT: mov w0, #123 // =0x7b
; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload
@ -603,7 +707,7 @@ define i32 @do_nothing_if_compares_can_not_be_adjusted_to_each_other() #0 {
; CHECK-NEXT: .cfi_restore w19
; CHECK-NEXT: .cfi_restore w30
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB8_6: // %if.end
; CHECK-NEXT: .LBB11_6: // %if.end
; CHECK-NEXT: .cfi_restore_state
; CHECK-NEXT: mov w0, wzr
; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload
@ -659,14 +763,14 @@ define i32 @fcmpri(i32 %argc, ptr nocapture readonly %argv) #0 {
; CHECK-LABEL: fcmpri:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: cmp w0, #2
; CHECK-NEXT: b.lt .LBB9_3
; CHECK-NEXT: b.lt .LBB12_3
; CHECK-NEXT: // %bb.1: // %land.lhs.true
; CHECK-NEXT: ldr x8, [x1, #8]
; CHECK-NEXT: cbz x8, .LBB9_3
; CHECK-NEXT: cbz x8, .LBB12_3
; CHECK-NEXT: // %bb.2:
; CHECK-NEXT: mov w0, #3 // =0x3
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB9_3: // %if.end
; CHECK-NEXT: .LBB12_3: // %if.end
; CHECK-NEXT: str d8, [sp, #-32]! // 8-byte Folded Spill
; CHECK-NEXT: .cfi_def_cfa_offset 32
; CHECK-NEXT: stp x30, x19, [sp, #16] // 16-byte Folded Spill
@ -782,12 +886,12 @@ define i32 @combine_gt_ge_sel(i64 %v, ptr %p) #0 {
; CHECK-NEXT: cmp w8, #0
; CHECK-NEXT: csel x9, x0, xzr, gt
; CHECK-NEXT: str x9, [x1]
; CHECK-NEXT: b.le .LBB11_2
; CHECK-NEXT: b.le .LBB14_2
; CHECK-NEXT: // %bb.1: // %lor.lhs.false
; CHECK-NEXT: cmp w8, #2
; CHECK-NEXT: b.ge .LBB11_4
; CHECK-NEXT: b .LBB11_6
; CHECK-NEXT: .LBB11_2: // %land.lhs.true
; CHECK-NEXT: b.ge .LBB14_4
; CHECK-NEXT: b .LBB14_6
; CHECK-NEXT: .LBB14_2: // %land.lhs.true
; CHECK-NEXT: adrp x8, :got:b
; CHECK-NEXT: adrp x9, :got:c
; CHECK-NEXT: ldr x8, [x8, :got_lo12:b]
@ -795,11 +899,11 @@ define i32 @combine_gt_ge_sel(i64 %v, ptr %p) #0 {
; CHECK-NEXT: ldr w8, [x8]
; CHECK-NEXT: ldr w9, [x9]
; CHECK-NEXT: cmp w8, w9
; CHECK-NEXT: b.ne .LBB11_4
; CHECK-NEXT: b.ne .LBB14_4
; CHECK-NEXT: // %bb.3:
; CHECK-NEXT: mov w0, #1 // =0x1
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB11_4: // %land.lhs.true3
; CHECK-NEXT: .LBB14_4: // %land.lhs.true3
; CHECK-NEXT: adrp x8, :got:b
; CHECK-NEXT: adrp x9, :got:d
; CHECK-NEXT: ldr x8, [x8, :got_lo12:b]
@ -807,11 +911,11 @@ define i32 @combine_gt_ge_sel(i64 %v, ptr %p) #0 {
; CHECK-NEXT: ldr w8, [x8]
; CHECK-NEXT: ldr w9, [x9]
; CHECK-NEXT: cmp w8, w9
; CHECK-NEXT: b.ne .LBB11_6
; CHECK-NEXT: b.ne .LBB14_6
; CHECK-NEXT: // %bb.5:
; CHECK-NEXT: mov w0, #1 // =0x1
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB11_6: // %if.end
; CHECK-NEXT: .LBB14_6: // %if.end
; CHECK-NEXT: mov w0, wzr
; CHECK-NEXT: ret
entry: