[AArch64] Add intra-block CSINC optimization to AArch64ConditionOptimizer (#173734)
This patch extends the AArch64ConditionOptimizer pass to handle CSINC instructions within a single basic block, complementing the existing cross-block branch optimization. The optimization finds two CMP+CSINC pairs comparing the same register with immediates differing by 1, and adjusts one comparison to enable CSE to eliminate the redundant CMP instruction. Example transformation: ``` cmp w8, #10 csinc w9, w0, w1, gt ; w9 = (w8 > 10) ? w0 : w1+1 cmp w8, #9 ; Removed by CSE after adjustment csinc w10, w0, w1, gt ; w10 = (w8 > 9) ? w0 : w1+1 ``` After optimization: ``` cmp w8, #10 csinc w9, w0, w1, gt ; w9 = (w8 > 10) ? w0 : w1+1 csinc w10, w0, w1, ge ; w10 = (w8 >= 10) ? w0 : w1+1 ``` The existing cross-block logic has also been extracted into its own method. Any feedback on code quality and better practices is highly welcome. Co-authored-by: Hussam Alhassan <hsm.link@proton.me>
This commit is contained in:
parent
f8b68c7b74
commit
4f7da2f136
@ -6,15 +6,17 @@
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This pass tries to make consecutive compares of values use same operands to
|
||||
// allow CSE pass to remove duplicated instructions. For this it analyzes
|
||||
// branches and adjusts comparisons with immediate values by converting:
|
||||
// * GE -> GT
|
||||
// * GT -> GE
|
||||
// * LT -> LE
|
||||
// * LE -> LT
|
||||
// and adjusting immediate values appropriately. It basically corrects two
|
||||
// immediate values towards each other to make them equal.
|
||||
//
|
||||
// This pass tries to make consecutive comparisons of values use the same
|
||||
// operands to allow the CSE pass to remove duplicate instructions. It adjusts
|
||||
// comparisons with immediate values by converting between inclusive and
|
||||
// exclusive forms (GE <-> GT, LE <-> LT) and correcting immediate values to
|
||||
// make them equal.
|
||||
//
|
||||
// The pass handles:
|
||||
// * Cross-block: SUBS/ADDS followed by conditional branches
|
||||
// * Intra-block: CSINC conditional instructions
|
||||
//
|
||||
//
|
||||
// Consider the following example in C:
|
||||
//
|
||||
@ -49,11 +51,16 @@
|
||||
// b.le .LBB0_6
|
||||
// ...
|
||||
//
|
||||
// Currently only SUBS and ADDS followed by b.?? are supported.
|
||||
// See optimizeCrossBlock() and optimizeIntraBlock() for implementation details.
|
||||
//
|
||||
// TODO: maybe handle TBNZ/TBZ the same way as CMP when used instead for "a < 0"
|
||||
// TODO: handle other conditional instructions (e.g. CSET)
|
||||
// TODO: allow second branching to be anything if it doesn't require adjusting
|
||||
// TODO: For cross-block:
|
||||
// - handle other conditional instructions (e.g. CSET)
|
||||
// - allow second branching to be anything if it doesn't require adjusting
|
||||
// TODO: For intra-block:
|
||||
// - handle CINC and CSET (CSINC aliases) as their conditions are inverted
|
||||
// compared to CSINC.
|
||||
// - handle other non-CSINC conditional instructions
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
@ -111,6 +118,9 @@ public:
|
||||
void modifyCmp(MachineInstr *CmpMI, const CmpInfo &Info);
|
||||
bool adjustTo(MachineInstr *CmpMI, AArch64CC::CondCode Cmp, MachineInstr *To,
|
||||
int ToImm);
|
||||
bool isPureCmp(MachineInstr &CmpMI);
|
||||
bool optimizeIntraBlock(MachineBasicBlock &MBB);
|
||||
bool optimizeCrossBlock(MachineBasicBlock &HBB);
|
||||
bool runOnMachineFunction(MachineFunction &MF) override;
|
||||
|
||||
StringRef getPassName() const override {
|
||||
@ -323,6 +333,274 @@ bool AArch64ConditionOptimizer::adjustTo(MachineInstr *CmpMI,
|
||||
return false;
|
||||
}
|
||||
|
||||
bool AArch64ConditionOptimizer::isPureCmp(MachineInstr &CmpMI) {
|
||||
unsigned ShiftAmt = AArch64_AM::getShiftValue(CmpMI.getOperand(3).getImm());
|
||||
if (!CmpMI.getOperand(2).isImm()) {
|
||||
LLVM_DEBUG(dbgs() << "Immediate of cmp is symbolic, " << CmpMI << '\n');
|
||||
return false;
|
||||
} else if (CmpMI.getOperand(2).getImm() << ShiftAmt >= 0xfff) {
|
||||
LLVM_DEBUG(dbgs() << "Immediate of cmp may be out of range, " << CmpMI
|
||||
<< '\n');
|
||||
return false;
|
||||
} else if (!MRI->use_nodbg_empty(CmpMI.getOperand(0).getReg())) {
|
||||
LLVM_DEBUG(dbgs() << "Destination of cmp is not dead, " << CmpMI << '\n');
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// This function transforms two CMP+CSINC pairs within the same basic block
|
||||
// when both conditions are the same (GT/GT or LT/LT) and immediates differ
|
||||
// by 1.
|
||||
//
|
||||
// Example transformation:
|
||||
// cmp w8, #10
|
||||
// csinc w9, w0, w1, gt ; w9 = (w8 > 10) ? w0 : w1+1
|
||||
// cmp w8, #9
|
||||
// csinc w10, w0, w1, gt ; w10 = (w8 > 9) ? w0 : w1+1
|
||||
//
|
||||
// Into:
|
||||
// cmp w8, #10
|
||||
// csinc w9, w0, w1, gt ; w9 = (w8 > 10) ? w0 : w1+1
|
||||
// csinc w10, w0, w1, ge ; w10 = (w8 >= 10) ? w0 : w1+1
|
||||
//
|
||||
// The second CMP is eliminated, enabling CSE to remove the redundant
|
||||
// comparison.
|
||||
bool AArch64ConditionOptimizer::optimizeIntraBlock(MachineBasicBlock &MBB) {
|
||||
MachineInstr *FirstCmp = nullptr;
|
||||
MachineInstr *FirstCSINC = nullptr;
|
||||
MachineInstr *SecondCmp = nullptr;
|
||||
MachineInstr *SecondCSINC = nullptr;
|
||||
|
||||
// Find two CMP + CSINC pairs
|
||||
for (MachineInstr &MI : MBB) {
|
||||
switch (MI.getOpcode()) {
|
||||
// cmp is an alias for subs with a dead destination register.
|
||||
case AArch64::SUBSWri:
|
||||
case AArch64::SUBSXri:
|
||||
// cmn is an alias for adds with a dead destination register.
|
||||
case AArch64::ADDSWri:
|
||||
case AArch64::ADDSXri: {
|
||||
if (!FirstCmp) {
|
||||
FirstCmp = &MI;
|
||||
} else if (FirstCSINC && !SecondCmp) {
|
||||
SecondCmp = &MI;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case AArch64::CSINCWr:
|
||||
case AArch64::CSINCXr: {
|
||||
// Found a CSINC, ensure it comes after the corresponding comparison
|
||||
if (FirstCmp && !FirstCSINC) {
|
||||
FirstCSINC = &MI;
|
||||
} else if (SecondCmp && !SecondCSINC) {
|
||||
SecondCSINC = &MI;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (SecondCSINC)
|
||||
break;
|
||||
}
|
||||
|
||||
if (!SecondCmp || !SecondCSINC) {
|
||||
LLVM_DEBUG(dbgs() << "Didn't find two CMP+CSINC pairs\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (FirstCmp->getOperand(1).getReg() != SecondCmp->getOperand(1).getReg()) {
|
||||
LLVM_DEBUG(dbgs() << "CMPs compare different registers\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!isPureCmp(*FirstCmp) || !isPureCmp(*SecondCmp)) {
|
||||
LLVM_DEBUG(dbgs() << "One or both CMPs are not pure\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
// Check that nothing else modifies the flags between the first CMP and second
|
||||
// conditional
|
||||
for (auto It = std::next(MachineBasicBlock::iterator(FirstCmp));
|
||||
It != std::next(MachineBasicBlock::iterator(SecondCSINC)); ++It) {
|
||||
if (&*It != SecondCmp &&
|
||||
It->modifiesRegister(AArch64::NZCV, /*TRI=*/nullptr)) {
|
||||
LLVM_DEBUG(dbgs() << "Flags modified between CMPs by: " << *It << '\n');
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// Check flags aren't read after second conditional within the same block
|
||||
for (auto It = std::next(MachineBasicBlock::iterator(SecondCSINC));
|
||||
It != MBB.end(); ++It) {
|
||||
if (It->readsRegister(AArch64::NZCV, /*TRI=*/nullptr)) {
|
||||
LLVM_DEBUG(dbgs() << "Flags read after second CSINC by: " << *It << '\n');
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// Since we may modify a cmp in this MBB, make sure NZCV does not live out.
|
||||
for (auto *SuccBB : MBB.successors())
|
||||
if (SuccBB->isLiveIn(AArch64::NZCV))
|
||||
return false;
|
||||
|
||||
// Extract condition codes from both CSINCs (operand 3)
|
||||
AArch64CC::CondCode FirstCond =
|
||||
(AArch64CC::CondCode)(int)FirstCSINC->getOperand(3).getImm();
|
||||
AArch64CC::CondCode SecondCond =
|
||||
(AArch64CC::CondCode)(int)SecondCSINC->getOperand(3).getImm();
|
||||
|
||||
const int FirstImm = (int)FirstCmp->getOperand(2).getImm();
|
||||
const int SecondImm = (int)SecondCmp->getOperand(2).getImm();
|
||||
|
||||
LLVM_DEBUG(dbgs() << "Comparing intra-block CSINCs: "
|
||||
<< AArch64CC::getCondCodeName(FirstCond) << " #" << FirstImm
|
||||
<< " and " << AArch64CC::getCondCodeName(SecondCond) << " #"
|
||||
<< SecondImm << '\n');
|
||||
|
||||
// Check if both conditions are the same and immediates differ by 1
|
||||
if (((FirstCond == AArch64CC::GT && SecondCond == AArch64CC::GT) ||
|
||||
(FirstCond == AArch64CC::LT && SecondCond == AArch64CC::LT)) &&
|
||||
std::abs(SecondImm - FirstImm) == 1) {
|
||||
// Pick which comparison to adjust to match the other
|
||||
// For GT: adjust the one with smaller immediate
|
||||
// For LT: adjust the one with larger immediate
|
||||
bool adjustFirst = (FirstImm < SecondImm);
|
||||
if (FirstCond == AArch64CC::LT) {
|
||||
adjustFirst = !adjustFirst;
|
||||
}
|
||||
|
||||
MachineInstr *CmpToAdjust = adjustFirst ? FirstCmp : SecondCmp;
|
||||
MachineInstr *CSINCToAdjust = adjustFirst ? FirstCSINC : SecondCSINC;
|
||||
AArch64CC::CondCode CondToAdjust = adjustFirst ? FirstCond : SecondCond;
|
||||
int TargetImm = adjustFirst ? SecondImm : FirstImm;
|
||||
|
||||
CmpInfo AdjustedInfo = adjustCmp(CmpToAdjust, CondToAdjust);
|
||||
|
||||
if (std::get<0>(AdjustedInfo) == TargetImm &&
|
||||
std::get<1>(AdjustedInfo) ==
|
||||
(adjustFirst ? SecondCmp : FirstCmp)->getOpcode()) {
|
||||
LLVM_DEBUG(dbgs() << "Successfully optimizing intra-block CSINC pair\n");
|
||||
|
||||
// Modify the selected CMP and CSINC
|
||||
CmpToAdjust->getOperand(2).setImm(std::get<0>(AdjustedInfo));
|
||||
CmpToAdjust->setDesc(TII->get(std::get<1>(AdjustedInfo)));
|
||||
CSINCToAdjust->getOperand(3).setImm(std::get<2>(AdjustedInfo));
|
||||
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
// Optimize across blocks
|
||||
bool AArch64ConditionOptimizer::optimizeCrossBlock(MachineBasicBlock &HBB) {
|
||||
SmallVector<MachineOperand, 4> HeadCond;
|
||||
MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
|
||||
if (TII->analyzeBranch(HBB, TBB, FBB, HeadCond)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Equivalence check is to skip loops.
|
||||
if (!TBB || TBB == &HBB) {
|
||||
return false;
|
||||
}
|
||||
|
||||
SmallVector<MachineOperand, 4> TrueCond;
|
||||
MachineBasicBlock *TBB_TBB = nullptr, *TBB_FBB = nullptr;
|
||||
if (TII->analyzeBranch(*TBB, TBB_TBB, TBB_FBB, TrueCond)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
MachineInstr *HeadCmpMI = findSuitableCompare(&HBB);
|
||||
if (!HeadCmpMI) {
|
||||
return false;
|
||||
}
|
||||
|
||||
MachineInstr *TrueCmpMI = findSuitableCompare(TBB);
|
||||
if (!TrueCmpMI) {
|
||||
return false;
|
||||
}
|
||||
|
||||
AArch64CC::CondCode HeadCmp;
|
||||
if (HeadCond.empty() || !parseCond(HeadCond, HeadCmp)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
AArch64CC::CondCode TrueCmp;
|
||||
if (TrueCond.empty() || !parseCond(TrueCond, TrueCmp)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const int HeadImm = (int)HeadCmpMI->getOperand(2).getImm();
|
||||
const int TrueImm = (int)TrueCmpMI->getOperand(2).getImm();
|
||||
|
||||
LLVM_DEBUG(dbgs() << "Head branch:\n");
|
||||
LLVM_DEBUG(dbgs() << "\tcondition: " << AArch64CC::getCondCodeName(HeadCmp)
|
||||
<< '\n');
|
||||
LLVM_DEBUG(dbgs() << "\timmediate: " << HeadImm << '\n');
|
||||
|
||||
LLVM_DEBUG(dbgs() << "True branch:\n");
|
||||
LLVM_DEBUG(dbgs() << "\tcondition: " << AArch64CC::getCondCodeName(TrueCmp)
|
||||
<< '\n');
|
||||
LLVM_DEBUG(dbgs() << "\timmediate: " << TrueImm << '\n');
|
||||
|
||||
if (((HeadCmp == AArch64CC::GT && TrueCmp == AArch64CC::LT) ||
|
||||
(HeadCmp == AArch64CC::LT && TrueCmp == AArch64CC::GT)) &&
|
||||
std::abs(TrueImm - HeadImm) == 2) {
|
||||
// This branch transforms machine instructions that correspond to
|
||||
//
|
||||
// 1) (a > {TrueImm} && ...) || (a < {HeadImm} && ...)
|
||||
// 2) (a < {TrueImm} && ...) || (a > {HeadImm} && ...)
|
||||
//
|
||||
// into
|
||||
//
|
||||
// 1) (a >= {NewImm} && ...) || (a <= {NewImm} && ...)
|
||||
// 2) (a <= {NewImm} && ...) || (a >= {NewImm} && ...)
|
||||
|
||||
CmpInfo HeadCmpInfo = adjustCmp(HeadCmpMI, HeadCmp);
|
||||
CmpInfo TrueCmpInfo = adjustCmp(TrueCmpMI, TrueCmp);
|
||||
if (std::get<0>(HeadCmpInfo) == std::get<0>(TrueCmpInfo) &&
|
||||
std::get<1>(HeadCmpInfo) == std::get<1>(TrueCmpInfo)) {
|
||||
modifyCmp(HeadCmpMI, HeadCmpInfo);
|
||||
modifyCmp(TrueCmpMI, TrueCmpInfo);
|
||||
return true;
|
||||
}
|
||||
} else if (((HeadCmp == AArch64CC::GT && TrueCmp == AArch64CC::GT) ||
|
||||
(HeadCmp == AArch64CC::LT && TrueCmp == AArch64CC::LT)) &&
|
||||
std::abs(TrueImm - HeadImm) == 1) {
|
||||
// This branch transforms machine instructions that correspond to
|
||||
//
|
||||
// 1) (a > {TrueImm} && ...) || (a > {HeadImm} && ...)
|
||||
// 2) (a < {TrueImm} && ...) || (a < {HeadImm} && ...)
|
||||
//
|
||||
// into
|
||||
//
|
||||
// 1) (a <= {NewImm} && ...) || (a > {NewImm} && ...)
|
||||
// 2) (a < {NewImm} && ...) || (a >= {NewImm} && ...)
|
||||
|
||||
// GT -> GE transformation increases immediate value, so picking the
|
||||
// smaller one; LT -> LE decreases immediate value so invert the choice.
|
||||
bool adjustHeadCond = (HeadImm < TrueImm);
|
||||
if (HeadCmp == AArch64CC::LT) {
|
||||
adjustHeadCond = !adjustHeadCond;
|
||||
}
|
||||
|
||||
if (adjustHeadCond) {
|
||||
return adjustTo(HeadCmpMI, HeadCmp, TrueCmpMI, TrueImm);
|
||||
} else {
|
||||
return adjustTo(TrueCmpMI, TrueCmp, HeadCmpMI, HeadImm);
|
||||
}
|
||||
}
|
||||
// Other transformation cases almost never occur due to generation of < or >
|
||||
// comparisons instead of <= and >=.
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool AArch64ConditionOptimizer::runOnMachineFunction(MachineFunction &MF) {
|
||||
LLVM_DEBUG(dbgs() << "********** AArch64 Conditional Compares **********\n"
|
||||
<< "********** Function: " << MF.getName() << '\n');
|
||||
@ -342,106 +620,8 @@ bool AArch64ConditionOptimizer::runOnMachineFunction(MachineFunction &MF) {
|
||||
// child_begin() / child_end() until after a node has been visited.
|
||||
for (MachineDomTreeNode *I : depth_first(DomTree)) {
|
||||
MachineBasicBlock *HBB = I->getBlock();
|
||||
|
||||
SmallVector<MachineOperand, 4> HeadCond;
|
||||
MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
|
||||
if (TII->analyzeBranch(*HBB, TBB, FBB, HeadCond)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Equivalence check is to skip loops.
|
||||
if (!TBB || TBB == HBB) {
|
||||
continue;
|
||||
}
|
||||
|
||||
SmallVector<MachineOperand, 4> TrueCond;
|
||||
MachineBasicBlock *TBB_TBB = nullptr, *TBB_FBB = nullptr;
|
||||
if (TII->analyzeBranch(*TBB, TBB_TBB, TBB_FBB, TrueCond)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
MachineInstr *HeadCmpMI = findSuitableCompare(HBB);
|
||||
if (!HeadCmpMI) {
|
||||
continue;
|
||||
}
|
||||
|
||||
MachineInstr *TrueCmpMI = findSuitableCompare(TBB);
|
||||
if (!TrueCmpMI) {
|
||||
continue;
|
||||
}
|
||||
|
||||
AArch64CC::CondCode HeadCmp;
|
||||
if (HeadCond.empty() || !parseCond(HeadCond, HeadCmp)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
AArch64CC::CondCode TrueCmp;
|
||||
if (TrueCond.empty() || !parseCond(TrueCond, TrueCmp)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const int HeadImm = (int)HeadCmpMI->getOperand(2).getImm();
|
||||
const int TrueImm = (int)TrueCmpMI->getOperand(2).getImm();
|
||||
|
||||
LLVM_DEBUG(dbgs() << "Head branch:\n");
|
||||
LLVM_DEBUG(dbgs() << "\tcondition: " << AArch64CC::getCondCodeName(HeadCmp)
|
||||
<< '\n');
|
||||
LLVM_DEBUG(dbgs() << "\timmediate: " << HeadImm << '\n');
|
||||
|
||||
LLVM_DEBUG(dbgs() << "True branch:\n");
|
||||
LLVM_DEBUG(dbgs() << "\tcondition: " << AArch64CC::getCondCodeName(TrueCmp)
|
||||
<< '\n');
|
||||
LLVM_DEBUG(dbgs() << "\timmediate: " << TrueImm << '\n');
|
||||
|
||||
if (((HeadCmp == AArch64CC::GT && TrueCmp == AArch64CC::LT) ||
|
||||
(HeadCmp == AArch64CC::LT && TrueCmp == AArch64CC::GT)) &&
|
||||
std::abs(TrueImm - HeadImm) == 2) {
|
||||
// This branch transforms machine instructions that correspond to
|
||||
//
|
||||
// 1) (a > {TrueImm} && ...) || (a < {HeadImm} && ...)
|
||||
// 2) (a < {TrueImm} && ...) || (a > {HeadImm} && ...)
|
||||
//
|
||||
// into
|
||||
//
|
||||
// 1) (a >= {NewImm} && ...) || (a <= {NewImm} && ...)
|
||||
// 2) (a <= {NewImm} && ...) || (a >= {NewImm} && ...)
|
||||
|
||||
CmpInfo HeadCmpInfo = adjustCmp(HeadCmpMI, HeadCmp);
|
||||
CmpInfo TrueCmpInfo = adjustCmp(TrueCmpMI, TrueCmp);
|
||||
if (std::get<0>(HeadCmpInfo) == std::get<0>(TrueCmpInfo) &&
|
||||
std::get<1>(HeadCmpInfo) == std::get<1>(TrueCmpInfo)) {
|
||||
modifyCmp(HeadCmpMI, HeadCmpInfo);
|
||||
modifyCmp(TrueCmpMI, TrueCmpInfo);
|
||||
Changed = true;
|
||||
}
|
||||
} else if (((HeadCmp == AArch64CC::GT && TrueCmp == AArch64CC::GT) ||
|
||||
(HeadCmp == AArch64CC::LT && TrueCmp == AArch64CC::LT)) &&
|
||||
std::abs(TrueImm - HeadImm) == 1) {
|
||||
// This branch transforms machine instructions that correspond to
|
||||
//
|
||||
// 1) (a > {TrueImm} && ...) || (a > {HeadImm} && ...)
|
||||
// 2) (a < {TrueImm} && ...) || (a < {HeadImm} && ...)
|
||||
//
|
||||
// into
|
||||
//
|
||||
// 1) (a <= {NewImm} && ...) || (a > {NewImm} && ...)
|
||||
// 2) (a < {NewImm} && ...) || (a >= {NewImm} && ...)
|
||||
|
||||
// GT -> GE transformation increases immediate value, so picking the
|
||||
// smaller one; LT -> LE decreases immediate value so invert the choice.
|
||||
bool adjustHeadCond = (HeadImm < TrueImm);
|
||||
if (HeadCmp == AArch64CC::LT) {
|
||||
adjustHeadCond = !adjustHeadCond;
|
||||
}
|
||||
|
||||
if (adjustHeadCond) {
|
||||
Changed |= adjustTo(HeadCmpMI, HeadCmp, TrueCmpMI, TrueImm);
|
||||
} else {
|
||||
Changed |= adjustTo(TrueCmpMI, TrueCmp, HeadCmpMI, HeadImm);
|
||||
}
|
||||
}
|
||||
// Other transformation cases almost never occur due to generation of < or >
|
||||
// comparisons instead of <= and >=.
|
||||
Changed |= optimizeIntraBlock(*HBB);
|
||||
Changed |= optimizeCrossBlock(*HBB);
|
||||
}
|
||||
|
||||
return Changed;
|
||||
|
||||
@ -7,6 +7,110 @@
|
||||
@c = external global i32
|
||||
@d = external global i32
|
||||
|
||||
|
||||
; Test intra-block CSINC optimization with (a > 10) and (a >= 10)
|
||||
; Two CSINC instructions should share a single CMP after optimization
|
||||
define void @intra_block_csinc(i32 %x, i32 %y, ptr %out1, ptr %out2) #0 {
|
||||
; CHECK-LABEL: intra_block_csinc:
|
||||
; CHECK: // %bb.0: // %entry
|
||||
; CHECK-NEXT: adrp x8, :got:a
|
||||
; CHECK-NEXT: ldr x8, [x8, :got_lo12:a]
|
||||
; CHECK-NEXT: ldr w8, [x8]
|
||||
; CHECK-NEXT: cmp w8, #10
|
||||
; CHECK-NEXT: csinc w8, w0, w1, gt
|
||||
; CHECK-NEXT: csinc w9, w0, w1, ge
|
||||
; CHECK-NEXT: str w8, [x2]
|
||||
; CHECK-NEXT: str w9, [x3]
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%val = load i32, ptr @a, align 4
|
||||
|
||||
; First: result1 = (a > 10) ? x : (y + 1)
|
||||
%cond1 = icmp sgt i32 %val, 10
|
||||
%y_inc1 = add i32 %y, 1
|
||||
%result1 = select i1 %cond1, i32 %x, i32 %y_inc1
|
||||
store i32 %result1, ptr %out1
|
||||
|
||||
; Second: result2 = (a >= 10) ? x : (y + 1)
|
||||
; Canonicalizes to (a > 9), then optimizes to reuse first CMP with adjusted condition
|
||||
%cond2 = icmp sge i32 %val, 10
|
||||
%y_inc2 = add i32 %y, 1
|
||||
%result2 = select i1 %cond2, i32 %x, i32 %y_inc2
|
||||
store i32 %result2, ptr %out2
|
||||
|
||||
ret void
|
||||
}
|
||||
|
||||
; Negative test: different registers should not be optimized
|
||||
define void @intra_block_csinc_different_regs(i32 %x, i32 %y, ptr %out1, ptr %out2) #0 {
|
||||
; CHECK-LABEL: intra_block_csinc_different_regs:
|
||||
; CHECK: // %bb.0: // %entry
|
||||
; CHECK-NEXT: adrp x8, :got:a
|
||||
; CHECK-NEXT: adrp x9, :got:b
|
||||
; CHECK-NEXT: ldr x8, [x8, :got_lo12:a]
|
||||
; CHECK-NEXT: ldr x9, [x9, :got_lo12:b]
|
||||
; CHECK-NEXT: ldr w8, [x8]
|
||||
; CHECK-NEXT: ldr w9, [x9]
|
||||
; CHECK-NEXT: cmp w8, #10
|
||||
; CHECK-NEXT: csinc w8, w0, w1, gt
|
||||
; CHECK-NEXT: cmp w9, #9
|
||||
; CHECK-NEXT: str w8, [x2]
|
||||
; CHECK-NEXT: csinc w8, w0, w1, gt
|
||||
; CHECK-NEXT: str w8, [x3]
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%val1 = load i32, ptr @a, align 4
|
||||
%val2 = load i32, ptr @b, align 4
|
||||
|
||||
; First: result1 = (a > 10) ? x : (y + 1)
|
||||
%cond1 = icmp sgt i32 %val1, 10
|
||||
%y_inc1 = add i32 %y, 1
|
||||
%result1 = select i1 %cond1, i32 %x, i32 %y_inc1
|
||||
store i32 %result1, ptr %out1
|
||||
|
||||
; Second: result2 = (b > 9) ? x : (y + 1) - compares DIFFERENT register
|
||||
; Should NOT optimize - need both CMPs
|
||||
%cond2 = icmp sgt i32 %val2, 9
|
||||
%y_inc2 = add i32 %y, 1
|
||||
%result2 = select i1 %cond2, i32 %x, i32 %y_inc2
|
||||
store i32 %result2, ptr %out2
|
||||
|
||||
ret void
|
||||
}
|
||||
|
||||
; Test intra-block CSINC optimization with (a < 5) and (a < 6)
|
||||
; LT/LT pattern - symmetric to GT/GT case
|
||||
define void @intra_block_csinc_lt(i32 %x, i32 %y, ptr %out1, ptr %out2) #0 {
|
||||
; CHECK-LABEL: intra_block_csinc_lt:
|
||||
; CHECK: // %bb.0: // %entry
|
||||
; CHECK-NEXT: adrp x8, :got:a
|
||||
; CHECK-NEXT: ldr x8, [x8, :got_lo12:a]
|
||||
; CHECK-NEXT: ldr w8, [x8]
|
||||
; CHECK-NEXT: cmp w8, #5
|
||||
; CHECK-NEXT: csinc w8, w0, w1, lt
|
||||
; CHECK-NEXT: csinc w9, w0, w1, le
|
||||
; CHECK-NEXT: str w8, [x2]
|
||||
; CHECK-NEXT: str w9, [x3]
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%val = load i32, ptr @a, align 4
|
||||
|
||||
; First: result1 = (a < 5) ? x : (y + 1)
|
||||
%cond1 = icmp slt i32 %val, 5
|
||||
%y_inc1 = add i32 %y, 1
|
||||
%result1 = select i1 %cond1, i32 %x, i32 %y_inc1
|
||||
store i32 %result1, ptr %out1
|
||||
|
||||
; Second: result2 = (a < 6) ? x : (y + 1)
|
||||
; Optimizes to reuse first CMP (#5) with adjusted condition (le)
|
||||
%cond2 = icmp slt i32 %val, 6
|
||||
%y_inc2 = add i32 %y, 1
|
||||
%result2 = select i1 %cond2, i32 %x, i32 %y_inc2
|
||||
store i32 %result2, ptr %out2
|
||||
|
||||
ret void
|
||||
}
|
||||
|
||||
; (a > 10 && b == c) || (a >= 10 && b == d)
|
||||
define i32 @combine_gt_ge_10() #0 {
|
||||
; CHECK-LABEL: combine_gt_ge_10:
|
||||
@ -17,30 +121,30 @@ define i32 @combine_gt_ge_10() #0 {
|
||||
; CHECK-NEXT: cmp w8, #10
|
||||
; CHECK-NEXT: adrp x8, :got:b
|
||||
; CHECK-NEXT: ldr x8, [x8, :got_lo12:b]
|
||||
; CHECK-NEXT: b.le .LBB0_3
|
||||
; CHECK-NEXT: b.le .LBB3_3
|
||||
; CHECK-NEXT: // %bb.1: // %land.lhs.true
|
||||
; CHECK-NEXT: adrp x9, :got:c
|
||||
; CHECK-NEXT: ldr x9, [x9, :got_lo12:c]
|
||||
; CHECK-NEXT: ldr w10, [x8]
|
||||
; CHECK-NEXT: ldr w9, [x9]
|
||||
; CHECK-NEXT: cmp w10, w9
|
||||
; CHECK-NEXT: b.ne .LBB0_4
|
||||
; CHECK-NEXT: b.ne .LBB3_4
|
||||
; CHECK-NEXT: // %bb.2:
|
||||
; CHECK-NEXT: mov w0, #1 // =0x1
|
||||
; CHECK-NEXT: ret
|
||||
; CHECK-NEXT: .LBB0_3: // %lor.lhs.false
|
||||
; CHECK-NEXT: b.lt .LBB0_6
|
||||
; CHECK-NEXT: .LBB0_4: // %land.lhs.true3
|
||||
; CHECK-NEXT: .LBB3_3: // %lor.lhs.false
|
||||
; CHECK-NEXT: b.lt .LBB3_6
|
||||
; CHECK-NEXT: .LBB3_4: // %land.lhs.true3
|
||||
; CHECK-NEXT: adrp x9, :got:d
|
||||
; CHECK-NEXT: ldr x9, [x9, :got_lo12:d]
|
||||
; CHECK-NEXT: ldr w8, [x8]
|
||||
; CHECK-NEXT: ldr w9, [x9]
|
||||
; CHECK-NEXT: cmp w8, w9
|
||||
; CHECK-NEXT: b.ne .LBB0_6
|
||||
; CHECK-NEXT: b.ne .LBB3_6
|
||||
; CHECK-NEXT: // %bb.5:
|
||||
; CHECK-NEXT: mov w0, #1 // =0x1
|
||||
; CHECK-NEXT: ret
|
||||
; CHECK-NEXT: .LBB0_6: // %if.end
|
||||
; CHECK-NEXT: .LBB3_6: // %if.end
|
||||
; CHECK-NEXT: mov w0, wzr
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
@ -80,7 +184,7 @@ define i32 @combine_gt_lt_5() #0 {
|
||||
; CHECK-NEXT: ldr x8, [x8, :got_lo12:a]
|
||||
; CHECK-NEXT: ldr w8, [x8]
|
||||
; CHECK-NEXT: cmp w8, #5
|
||||
; CHECK-NEXT: b.le .LBB1_3
|
||||
; CHECK-NEXT: b.le .LBB4_3
|
||||
; CHECK-NEXT: // %bb.1: // %land.lhs.true
|
||||
; CHECK-NEXT: adrp x8, :got:b
|
||||
; CHECK-NEXT: adrp x9, :got:c
|
||||
@ -89,12 +193,12 @@ define i32 @combine_gt_lt_5() #0 {
|
||||
; CHECK-NEXT: ldr w8, [x8]
|
||||
; CHECK-NEXT: ldr w9, [x9]
|
||||
; CHECK-NEXT: cmp w8, w9
|
||||
; CHECK-NEXT: b.ne .LBB1_6
|
||||
; CHECK-NEXT: b.ne .LBB4_6
|
||||
; CHECK-NEXT: // %bb.2:
|
||||
; CHECK-NEXT: mov w0, #1 // =0x1
|
||||
; CHECK-NEXT: ret
|
||||
; CHECK-NEXT: .LBB1_3: // %lor.lhs.false
|
||||
; CHECK-NEXT: b.ge .LBB1_6
|
||||
; CHECK-NEXT: .LBB4_3: // %lor.lhs.false
|
||||
; CHECK-NEXT: b.ge .LBB4_6
|
||||
; CHECK-NEXT: // %bb.4: // %land.lhs.true3
|
||||
; CHECK-NEXT: adrp x8, :got:b
|
||||
; CHECK-NEXT: adrp x9, :got:d
|
||||
@ -103,11 +207,11 @@ define i32 @combine_gt_lt_5() #0 {
|
||||
; CHECK-NEXT: ldr w8, [x8]
|
||||
; CHECK-NEXT: ldr w9, [x9]
|
||||
; CHECK-NEXT: cmp w8, w9
|
||||
; CHECK-NEXT: b.ne .LBB1_6
|
||||
; CHECK-NEXT: b.ne .LBB4_6
|
||||
; CHECK-NEXT: // %bb.5:
|
||||
; CHECK-NEXT: mov w0, #1 // =0x1
|
||||
; CHECK-NEXT: ret
|
||||
; CHECK-NEXT: .LBB1_6: // %if.end
|
||||
; CHECK-NEXT: .LBB4_6: // %if.end
|
||||
; CHECK-NEXT: mov w0, wzr
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
@ -149,30 +253,30 @@ define i32 @combine_lt_ge_5() #0 {
|
||||
; CHECK-NEXT: cmp w8, #5
|
||||
; CHECK-NEXT: adrp x8, :got:b
|
||||
; CHECK-NEXT: ldr x8, [x8, :got_lo12:b]
|
||||
; CHECK-NEXT: b.ge .LBB2_3
|
||||
; CHECK-NEXT: b.ge .LBB5_3
|
||||
; CHECK-NEXT: // %bb.1: // %land.lhs.true
|
||||
; CHECK-NEXT: adrp x9, :got:c
|
||||
; CHECK-NEXT: ldr x9, [x9, :got_lo12:c]
|
||||
; CHECK-NEXT: ldr w10, [x8]
|
||||
; CHECK-NEXT: ldr w9, [x9]
|
||||
; CHECK-NEXT: cmp w10, w9
|
||||
; CHECK-NEXT: b.ne .LBB2_4
|
||||
; CHECK-NEXT: b.ne .LBB5_4
|
||||
; CHECK-NEXT: // %bb.2:
|
||||
; CHECK-NEXT: mov w0, #1 // =0x1
|
||||
; CHECK-NEXT: ret
|
||||
; CHECK-NEXT: .LBB2_3: // %lor.lhs.false
|
||||
; CHECK-NEXT: b.gt .LBB2_6
|
||||
; CHECK-NEXT: .LBB2_4: // %land.lhs.true3
|
||||
; CHECK-NEXT: .LBB5_3: // %lor.lhs.false
|
||||
; CHECK-NEXT: b.gt .LBB5_6
|
||||
; CHECK-NEXT: .LBB5_4: // %land.lhs.true3
|
||||
; CHECK-NEXT: adrp x9, :got:d
|
||||
; CHECK-NEXT: ldr x9, [x9, :got_lo12:d]
|
||||
; CHECK-NEXT: ldr w8, [x8]
|
||||
; CHECK-NEXT: ldr w9, [x9]
|
||||
; CHECK-NEXT: cmp w8, w9
|
||||
; CHECK-NEXT: b.ne .LBB2_6
|
||||
; CHECK-NEXT: b.ne .LBB5_6
|
||||
; CHECK-NEXT: // %bb.5:
|
||||
; CHECK-NEXT: mov w0, #1 // =0x1
|
||||
; CHECK-NEXT: ret
|
||||
; CHECK-NEXT: .LBB2_6: // %if.end
|
||||
; CHECK-NEXT: .LBB5_6: // %if.end
|
||||
; CHECK-NEXT: mov w0, wzr
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
@ -212,7 +316,7 @@ define i32 @combine_lt_gt_5() #0 {
|
||||
; CHECK-NEXT: ldr x8, [x8, :got_lo12:a]
|
||||
; CHECK-NEXT: ldr w8, [x8]
|
||||
; CHECK-NEXT: cmp w8, #5
|
||||
; CHECK-NEXT: b.ge .LBB3_3
|
||||
; CHECK-NEXT: b.ge .LBB6_3
|
||||
; CHECK-NEXT: // %bb.1: // %land.lhs.true
|
||||
; CHECK-NEXT: adrp x8, :got:b
|
||||
; CHECK-NEXT: adrp x9, :got:c
|
||||
@ -221,12 +325,12 @@ define i32 @combine_lt_gt_5() #0 {
|
||||
; CHECK-NEXT: ldr w8, [x8]
|
||||
; CHECK-NEXT: ldr w9, [x9]
|
||||
; CHECK-NEXT: cmp w8, w9
|
||||
; CHECK-NEXT: b.ne .LBB3_6
|
||||
; CHECK-NEXT: b.ne .LBB6_6
|
||||
; CHECK-NEXT: // %bb.2:
|
||||
; CHECK-NEXT: mov w0, #1 // =0x1
|
||||
; CHECK-NEXT: ret
|
||||
; CHECK-NEXT: .LBB3_3: // %lor.lhs.false
|
||||
; CHECK-NEXT: b.le .LBB3_6
|
||||
; CHECK-NEXT: .LBB6_3: // %lor.lhs.false
|
||||
; CHECK-NEXT: b.le .LBB6_6
|
||||
; CHECK-NEXT: // %bb.4: // %land.lhs.true3
|
||||
; CHECK-NEXT: adrp x8, :got:b
|
||||
; CHECK-NEXT: adrp x9, :got:d
|
||||
@ -235,11 +339,11 @@ define i32 @combine_lt_gt_5() #0 {
|
||||
; CHECK-NEXT: ldr w8, [x8]
|
||||
; CHECK-NEXT: ldr w9, [x9]
|
||||
; CHECK-NEXT: cmp w8, w9
|
||||
; CHECK-NEXT: b.ne .LBB3_6
|
||||
; CHECK-NEXT: b.ne .LBB6_6
|
||||
; CHECK-NEXT: // %bb.5:
|
||||
; CHECK-NEXT: mov w0, #1 // =0x1
|
||||
; CHECK-NEXT: ret
|
||||
; CHECK-NEXT: .LBB3_6: // %if.end
|
||||
; CHECK-NEXT: .LBB6_6: // %if.end
|
||||
; CHECK-NEXT: mov w0, wzr
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
@ -279,7 +383,7 @@ define i32 @combine_gt_lt_n5() #0 {
|
||||
; CHECK-NEXT: ldr x8, [x8, :got_lo12:a]
|
||||
; CHECK-NEXT: ldr w8, [x8]
|
||||
; CHECK-NEXT: cmn w8, #5
|
||||
; CHECK-NEXT: b.le .LBB4_3
|
||||
; CHECK-NEXT: b.le .LBB7_3
|
||||
; CHECK-NEXT: // %bb.1: // %land.lhs.true
|
||||
; CHECK-NEXT: adrp x8, :got:b
|
||||
; CHECK-NEXT: adrp x9, :got:c
|
||||
@ -288,12 +392,12 @@ define i32 @combine_gt_lt_n5() #0 {
|
||||
; CHECK-NEXT: ldr w8, [x8]
|
||||
; CHECK-NEXT: ldr w9, [x9]
|
||||
; CHECK-NEXT: cmp w8, w9
|
||||
; CHECK-NEXT: b.ne .LBB4_6
|
||||
; CHECK-NEXT: b.ne .LBB7_6
|
||||
; CHECK-NEXT: // %bb.2:
|
||||
; CHECK-NEXT: mov w0, #1 // =0x1
|
||||
; CHECK-NEXT: ret
|
||||
; CHECK-NEXT: .LBB4_3: // %lor.lhs.false
|
||||
; CHECK-NEXT: b.ge .LBB4_6
|
||||
; CHECK-NEXT: .LBB7_3: // %lor.lhs.false
|
||||
; CHECK-NEXT: b.ge .LBB7_6
|
||||
; CHECK-NEXT: // %bb.4: // %land.lhs.true3
|
||||
; CHECK-NEXT: adrp x8, :got:b
|
||||
; CHECK-NEXT: adrp x9, :got:d
|
||||
@ -302,11 +406,11 @@ define i32 @combine_gt_lt_n5() #0 {
|
||||
; CHECK-NEXT: ldr w8, [x8]
|
||||
; CHECK-NEXT: ldr w9, [x9]
|
||||
; CHECK-NEXT: cmp w8, w9
|
||||
; CHECK-NEXT: b.ne .LBB4_6
|
||||
; CHECK-NEXT: b.ne .LBB7_6
|
||||
; CHECK-NEXT: // %bb.5:
|
||||
; CHECK-NEXT: mov w0, #1 // =0x1
|
||||
; CHECK-NEXT: ret
|
||||
; CHECK-NEXT: .LBB4_6: // %if.end
|
||||
; CHECK-NEXT: .LBB7_6: // %if.end
|
||||
; CHECK-NEXT: mov w0, wzr
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
@ -346,7 +450,7 @@ define i32 @combine_lt_gt_n5() #0 {
|
||||
; CHECK-NEXT: ldr x8, [x8, :got_lo12:a]
|
||||
; CHECK-NEXT: ldr w8, [x8]
|
||||
; CHECK-NEXT: cmn w8, #5
|
||||
; CHECK-NEXT: b.ge .LBB5_3
|
||||
; CHECK-NEXT: b.ge .LBB8_3
|
||||
; CHECK-NEXT: // %bb.1: // %land.lhs.true
|
||||
; CHECK-NEXT: adrp x8, :got:b
|
||||
; CHECK-NEXT: adrp x9, :got:c
|
||||
@ -355,12 +459,12 @@ define i32 @combine_lt_gt_n5() #0 {
|
||||
; CHECK-NEXT: ldr w8, [x8]
|
||||
; CHECK-NEXT: ldr w9, [x9]
|
||||
; CHECK-NEXT: cmp w8, w9
|
||||
; CHECK-NEXT: b.ne .LBB5_6
|
||||
; CHECK-NEXT: b.ne .LBB8_6
|
||||
; CHECK-NEXT: // %bb.2:
|
||||
; CHECK-NEXT: mov w0, #1 // =0x1
|
||||
; CHECK-NEXT: ret
|
||||
; CHECK-NEXT: .LBB5_3: // %lor.lhs.false
|
||||
; CHECK-NEXT: b.le .LBB5_6
|
||||
; CHECK-NEXT: .LBB8_3: // %lor.lhs.false
|
||||
; CHECK-NEXT: b.le .LBB8_6
|
||||
; CHECK-NEXT: // %bb.4: // %land.lhs.true3
|
||||
; CHECK-NEXT: adrp x8, :got:b
|
||||
; CHECK-NEXT: adrp x9, :got:d
|
||||
@ -369,11 +473,11 @@ define i32 @combine_lt_gt_n5() #0 {
|
||||
; CHECK-NEXT: ldr w8, [x8]
|
||||
; CHECK-NEXT: ldr w9, [x9]
|
||||
; CHECK-NEXT: cmp w8, w9
|
||||
; CHECK-NEXT: b.ne .LBB5_6
|
||||
; CHECK-NEXT: b.ne .LBB8_6
|
||||
; CHECK-NEXT: // %bb.5:
|
||||
; CHECK-NEXT: mov w0, #1 // =0x1
|
||||
; CHECK-NEXT: ret
|
||||
; CHECK-NEXT: .LBB5_6: // %if.end
|
||||
; CHECK-NEXT: .LBB8_6: // %if.end
|
||||
; CHECK-NEXT: mov w0, wzr
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
@ -428,19 +532,19 @@ define void @combine_non_adjacent_cmp_br(ptr nocapture readonly %hdCall) #0 {
|
||||
; CHECK-NEXT: mov w19, #24 // =0x18
|
||||
; CHECK-NEXT: adrp x22, glob
|
||||
; CHECK-NEXT: add x21, x20, #2
|
||||
; CHECK-NEXT: .LBB6_1: // %land.rhs
|
||||
; CHECK-NEXT: .LBB9_1: // %land.rhs
|
||||
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: ldr x8, [x19]
|
||||
; CHECK-NEXT: cmp x8, #1
|
||||
; CHECK-NEXT: b.lt .LBB6_3
|
||||
; CHECK-NEXT: b.lt .LBB9_3
|
||||
; CHECK-NEXT: // %bb.2: // %while.body
|
||||
; CHECK-NEXT: // in Loop: Header=BB6_1 Depth=1
|
||||
; CHECK-NEXT: // in Loop: Header=BB9_1 Depth=1
|
||||
; CHECK-NEXT: ldr x0, [x22, :lo12:glob]
|
||||
; CHECK-NEXT: bl Update
|
||||
; CHECK-NEXT: sub x21, x21, #2
|
||||
; CHECK-NEXT: cmp x20, x21
|
||||
; CHECK-NEXT: b.lt .LBB6_1
|
||||
; CHECK-NEXT: .LBB6_3: // %while.end
|
||||
; CHECK-NEXT: b.lt .LBB9_1
|
||||
; CHECK-NEXT: .LBB9_3: // %while.end
|
||||
; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
|
||||
; CHECK-NEXT: ldp x22, x21, [sp, #16] // 16-byte Folded Reload
|
||||
; CHECK-NEXT: ldr x30, [sp], #48 // 8-byte Folded Reload
|
||||
@ -488,19 +592,19 @@ define i32 @do_nothing_if_resultant_opcodes_would_differ() #0 {
|
||||
; CHECK-NEXT: ldr x19, [x19, :got_lo12:a]
|
||||
; CHECK-NEXT: ldr w8, [x19]
|
||||
; CHECK-NEXT: cmn w8, #2
|
||||
; CHECK-NEXT: b.gt .LBB7_4
|
||||
; CHECK-NEXT: b.gt .LBB10_4
|
||||
; CHECK-NEXT: // %bb.1: // %while.body.preheader
|
||||
; CHECK-NEXT: sub w20, w8, #1
|
||||
; CHECK-NEXT: .LBB7_2: // %while.body
|
||||
; CHECK-NEXT: .LBB10_2: // %while.body
|
||||
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: bl do_something
|
||||
; CHECK-NEXT: adds w20, w20, #1
|
||||
; CHECK-NEXT: b.mi .LBB7_2
|
||||
; CHECK-NEXT: b.mi .LBB10_2
|
||||
; CHECK-NEXT: // %bb.3: // %while.cond.while.end_crit_edge
|
||||
; CHECK-NEXT: ldr w8, [x19]
|
||||
; CHECK-NEXT: .LBB7_4: // %while.end
|
||||
; CHECK-NEXT: .LBB10_4: // %while.end
|
||||
; CHECK-NEXT: cmp w8, #1
|
||||
; CHECK-NEXT: b.gt .LBB7_7
|
||||
; CHECK-NEXT: b.gt .LBB10_7
|
||||
; CHECK-NEXT: // %bb.5: // %land.lhs.true
|
||||
; CHECK-NEXT: adrp x8, :got:b
|
||||
; CHECK-NEXT: adrp x9, :got:d
|
||||
@ -509,13 +613,13 @@ define i32 @do_nothing_if_resultant_opcodes_would_differ() #0 {
|
||||
; CHECK-NEXT: ldr w8, [x8]
|
||||
; CHECK-NEXT: ldr w9, [x9]
|
||||
; CHECK-NEXT: cmp w8, w9
|
||||
; CHECK-NEXT: b.ne .LBB7_7
|
||||
; CHECK-NEXT: b.ne .LBB10_7
|
||||
; CHECK-NEXT: // %bb.6:
|
||||
; CHECK-NEXT: mov w0, #123 // =0x7b
|
||||
; CHECK-NEXT: b .LBB7_8
|
||||
; CHECK-NEXT: .LBB7_7: // %if.end
|
||||
; CHECK-NEXT: b .LBB10_8
|
||||
; CHECK-NEXT: .LBB10_7: // %if.end
|
||||
; CHECK-NEXT: mov w0, wzr
|
||||
; CHECK-NEXT: .LBB7_8: // %return
|
||||
; CHECK-NEXT: .LBB10_8: // %return
|
||||
; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
|
||||
; CHECK-NEXT: ldr x30, [sp], #32 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 0
|
||||
@ -573,20 +677,20 @@ define i32 @do_nothing_if_compares_can_not_be_adjusted_to_each_other() #0 {
|
||||
; CHECK-NEXT: ldr x8, [x8, :got_lo12:a]
|
||||
; CHECK-NEXT: ldr w8, [x8]
|
||||
; CHECK-NEXT: cmp w8, #0
|
||||
; CHECK-NEXT: b.gt .LBB8_3
|
||||
; CHECK-NEXT: b.gt .LBB11_3
|
||||
; CHECK-NEXT: // %bb.1: // %while.body.preheader
|
||||
; CHECK-NEXT: sub w19, w8, #1
|
||||
; CHECK-NEXT: .LBB8_2: // %while.body
|
||||
; CHECK-NEXT: .LBB11_2: // %while.body
|
||||
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: bl do_something
|
||||
; CHECK-NEXT: adds w19, w19, #1
|
||||
; CHECK-NEXT: b.mi .LBB8_2
|
||||
; CHECK-NEXT: .LBB8_3: // %while.end
|
||||
; CHECK-NEXT: b.mi .LBB11_2
|
||||
; CHECK-NEXT: .LBB11_3: // %while.end
|
||||
; CHECK-NEXT: adrp x8, :got:c
|
||||
; CHECK-NEXT: ldr x8, [x8, :got_lo12:c]
|
||||
; CHECK-NEXT: ldr w8, [x8]
|
||||
; CHECK-NEXT: cmn w8, #2
|
||||
; CHECK-NEXT: b.lt .LBB8_6
|
||||
; CHECK-NEXT: b.lt .LBB11_6
|
||||
; CHECK-NEXT: // %bb.4: // %land.lhs.true
|
||||
; CHECK-NEXT: adrp x8, :got:b
|
||||
; CHECK-NEXT: adrp x9, :got:d
|
||||
@ -595,7 +699,7 @@ define i32 @do_nothing_if_compares_can_not_be_adjusted_to_each_other() #0 {
|
||||
; CHECK-NEXT: ldr w8, [x8]
|
||||
; CHECK-NEXT: ldr w9, [x9]
|
||||
; CHECK-NEXT: cmp w8, w9
|
||||
; CHECK-NEXT: b.ne .LBB8_6
|
||||
; CHECK-NEXT: b.ne .LBB11_6
|
||||
; CHECK-NEXT: // %bb.5:
|
||||
; CHECK-NEXT: mov w0, #123 // =0x7b
|
||||
; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload
|
||||
@ -603,7 +707,7 @@ define i32 @do_nothing_if_compares_can_not_be_adjusted_to_each_other() #0 {
|
||||
; CHECK-NEXT: .cfi_restore w19
|
||||
; CHECK-NEXT: .cfi_restore w30
|
||||
; CHECK-NEXT: ret
|
||||
; CHECK-NEXT: .LBB8_6: // %if.end
|
||||
; CHECK-NEXT: .LBB11_6: // %if.end
|
||||
; CHECK-NEXT: .cfi_restore_state
|
||||
; CHECK-NEXT: mov w0, wzr
|
||||
; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload
|
||||
@ -659,14 +763,14 @@ define i32 @fcmpri(i32 %argc, ptr nocapture readonly %argv) #0 {
|
||||
; CHECK-LABEL: fcmpri:
|
||||
; CHECK: // %bb.0: // %entry
|
||||
; CHECK-NEXT: cmp w0, #2
|
||||
; CHECK-NEXT: b.lt .LBB9_3
|
||||
; CHECK-NEXT: b.lt .LBB12_3
|
||||
; CHECK-NEXT: // %bb.1: // %land.lhs.true
|
||||
; CHECK-NEXT: ldr x8, [x1, #8]
|
||||
; CHECK-NEXT: cbz x8, .LBB9_3
|
||||
; CHECK-NEXT: cbz x8, .LBB12_3
|
||||
; CHECK-NEXT: // %bb.2:
|
||||
; CHECK-NEXT: mov w0, #3 // =0x3
|
||||
; CHECK-NEXT: ret
|
||||
; CHECK-NEXT: .LBB9_3: // %if.end
|
||||
; CHECK-NEXT: .LBB12_3: // %if.end
|
||||
; CHECK-NEXT: str d8, [sp, #-32]! // 8-byte Folded Spill
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 32
|
||||
; CHECK-NEXT: stp x30, x19, [sp, #16] // 16-byte Folded Spill
|
||||
@ -782,12 +886,12 @@ define i32 @combine_gt_ge_sel(i64 %v, ptr %p) #0 {
|
||||
; CHECK-NEXT: cmp w8, #0
|
||||
; CHECK-NEXT: csel x9, x0, xzr, gt
|
||||
; CHECK-NEXT: str x9, [x1]
|
||||
; CHECK-NEXT: b.le .LBB11_2
|
||||
; CHECK-NEXT: b.le .LBB14_2
|
||||
; CHECK-NEXT: // %bb.1: // %lor.lhs.false
|
||||
; CHECK-NEXT: cmp w8, #2
|
||||
; CHECK-NEXT: b.ge .LBB11_4
|
||||
; CHECK-NEXT: b .LBB11_6
|
||||
; CHECK-NEXT: .LBB11_2: // %land.lhs.true
|
||||
; CHECK-NEXT: b.ge .LBB14_4
|
||||
; CHECK-NEXT: b .LBB14_6
|
||||
; CHECK-NEXT: .LBB14_2: // %land.lhs.true
|
||||
; CHECK-NEXT: adrp x8, :got:b
|
||||
; CHECK-NEXT: adrp x9, :got:c
|
||||
; CHECK-NEXT: ldr x8, [x8, :got_lo12:b]
|
||||
@ -795,11 +899,11 @@ define i32 @combine_gt_ge_sel(i64 %v, ptr %p) #0 {
|
||||
; CHECK-NEXT: ldr w8, [x8]
|
||||
; CHECK-NEXT: ldr w9, [x9]
|
||||
; CHECK-NEXT: cmp w8, w9
|
||||
; CHECK-NEXT: b.ne .LBB11_4
|
||||
; CHECK-NEXT: b.ne .LBB14_4
|
||||
; CHECK-NEXT: // %bb.3:
|
||||
; CHECK-NEXT: mov w0, #1 // =0x1
|
||||
; CHECK-NEXT: ret
|
||||
; CHECK-NEXT: .LBB11_4: // %land.lhs.true3
|
||||
; CHECK-NEXT: .LBB14_4: // %land.lhs.true3
|
||||
; CHECK-NEXT: adrp x8, :got:b
|
||||
; CHECK-NEXT: adrp x9, :got:d
|
||||
; CHECK-NEXT: ldr x8, [x8, :got_lo12:b]
|
||||
@ -807,11 +911,11 @@ define i32 @combine_gt_ge_sel(i64 %v, ptr %p) #0 {
|
||||
; CHECK-NEXT: ldr w8, [x8]
|
||||
; CHECK-NEXT: ldr w9, [x9]
|
||||
; CHECK-NEXT: cmp w8, w9
|
||||
; CHECK-NEXT: b.ne .LBB11_6
|
||||
; CHECK-NEXT: b.ne .LBB14_6
|
||||
; CHECK-NEXT: // %bb.5:
|
||||
; CHECK-NEXT: mov w0, #1 // =0x1
|
||||
; CHECK-NEXT: ret
|
||||
; CHECK-NEXT: .LBB11_6: // %if.end
|
||||
; CHECK-NEXT: .LBB14_6: // %if.end
|
||||
; CHECK-NEXT: mov w0, wzr
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user