[CodeGen] Drop uses of BranchInst (#186391)
Largely a straight-forward replacement with occasional simplifcations. For AMDGPU, I assumed that unconditional branches are always uniform and therefore "simplified"/changed AMDGPUAnnotateUniformValues to only annotate conditional branches. Target-specific FastISel only selects conditional branches, unconditional branches are already handled by the non-target-specific code.
This commit is contained in:
parent
8885c37028
commit
01571f1b4a
@ -854,8 +854,7 @@ bool CodeGenPrepare::eliminateFallThrough(Function &F, DominatorTree *DT) {
|
||||
if (DT && !DT->isReachableFromEntry(BB))
|
||||
continue;
|
||||
|
||||
BranchInst *Term = dyn_cast<BranchInst>(SinglePred->getTerminator());
|
||||
if (Term && !Term->isConditional()) {
|
||||
if (isa<UncondBrInst>(SinglePred->getTerminator())) {
|
||||
Changed = true;
|
||||
LLVM_DEBUG(dbgs() << "To merge:\n" << *BB << "\n\n\n");
|
||||
|
||||
@ -885,8 +884,8 @@ bool CodeGenPrepare::eliminateFallThrough(Function &F, DominatorTree *DT) {
|
||||
/// Find a destination block from BB if BB is mergeable empty block.
|
||||
BasicBlock *CodeGenPrepare::findDestBlockOfMergeableEmptyBlock(BasicBlock *BB) {
|
||||
// If this block doesn't end with an uncond branch, ignore it.
|
||||
BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator());
|
||||
if (!BI || !BI->isUnconditional())
|
||||
UncondBrInst *BI = dyn_cast<UncondBrInst>(BB->getTerminator());
|
||||
if (!BI)
|
||||
return nullptr;
|
||||
|
||||
// If the instruction before the branch (skipping debug info) isn't a phi
|
||||
@ -899,7 +898,7 @@ BasicBlock *CodeGenPrepare::findDestBlockOfMergeableEmptyBlock(BasicBlock *BB) {
|
||||
}
|
||||
|
||||
// Do not break infinite loops.
|
||||
BasicBlock *DestBB = BI->getSuccessor(0);
|
||||
BasicBlock *DestBB = BI->getSuccessor();
|
||||
if (DestBB == BB)
|
||||
return nullptr;
|
||||
|
||||
@ -1121,8 +1120,8 @@ static void replaceAllUsesWith(Value *Old, Value *New,
|
||||
/// Eliminate a basic block that has only phi's and an unconditional branch in
|
||||
/// it.
|
||||
void CodeGenPrepare::eliminateMostlyEmptyBlock(BasicBlock *BB) {
|
||||
BranchInst *BI = cast<BranchInst>(BB->getTerminator());
|
||||
BasicBlock *DestBB = BI->getSuccessor(0);
|
||||
UncondBrInst *BI = cast<UncondBrInst>(BB->getTerminator());
|
||||
BasicBlock *DestBB = BI->getSuccessor();
|
||||
|
||||
LLVM_DEBUG(dbgs() << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n"
|
||||
<< *BB << *DestBB);
|
||||
@ -1939,10 +1938,10 @@ static bool foldICmpWithDominatingICmp(CmpInst *Cmp,
|
||||
if (Pred != ICmpInst::ICMP_EQ)
|
||||
return false;
|
||||
|
||||
// If icmp eq has users other than BranchInst and SelectInst, converting it to
|
||||
// If icmp eq has users other than CondBrInst and SelectInst, converting it to
|
||||
// icmp slt/sgt would introduce more redundant LLVM IR.
|
||||
for (User *U : Cmp->users()) {
|
||||
if (isa<BranchInst>(U))
|
||||
if (isa<CondBrInst>(U))
|
||||
continue;
|
||||
if (isa<SelectInst>(U) && cast<SelectInst>(U)->getCondition() == Cmp)
|
||||
continue;
|
||||
@ -1981,8 +1980,7 @@ static bool foldICmpWithDominatingICmp(CmpInst *Cmp,
|
||||
// Res = (a < b) ? <LT_RES> : (a > b) ? <GT_RES> : <EQ_RES>;
|
||||
// And similarly for branches.
|
||||
for (User *U : Cmp->users()) {
|
||||
if (auto *BI = dyn_cast<BranchInst>(U)) {
|
||||
assert(BI->isConditional() && "Must be conditional");
|
||||
if (auto *BI = dyn_cast<CondBrInst>(U)) {
|
||||
BI->swapSuccessors();
|
||||
continue;
|
||||
}
|
||||
@ -3087,8 +3085,8 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB,
|
||||
for (auto const &TailCallBB : TailCallBBs) {
|
||||
// Make sure the call instruction is followed by an unconditional branch to
|
||||
// the return block.
|
||||
BranchInst *BI = dyn_cast<BranchInst>(TailCallBB->getTerminator());
|
||||
if (!BI || !BI->isUnconditional() || BI->getSuccessor(0) != BB)
|
||||
UncondBrInst *BI = dyn_cast<UncondBrInst>(TailCallBB->getTerminator());
|
||||
if (!BI || BI->getSuccessor() != BB)
|
||||
continue;
|
||||
|
||||
// Duplicate the return into TailCallBB.
|
||||
@ -7793,28 +7791,28 @@ bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
|
||||
BasicBlock *TrueBlock = nullptr;
|
||||
BasicBlock *FalseBlock = nullptr;
|
||||
BasicBlock *EndBlock = nullptr;
|
||||
BranchInst *TrueBranch = nullptr;
|
||||
BranchInst *FalseBranch = nullptr;
|
||||
UncondBrInst *TrueBranch = nullptr;
|
||||
UncondBrInst *FalseBranch = nullptr;
|
||||
if (TrueInstrs.size() == 0) {
|
||||
FalseBranch = cast<BranchInst>(SplitBlockAndInsertIfElse(
|
||||
FalseBranch = cast<UncondBrInst>(SplitBlockAndInsertIfElse(
|
||||
CondFr, SplitPt, false, nullptr, nullptr, LI));
|
||||
FalseBlock = FalseBranch->getParent();
|
||||
EndBlock = cast<BasicBlock>(FalseBranch->getOperand(0));
|
||||
} else if (FalseInstrs.size() == 0) {
|
||||
TrueBranch = cast<BranchInst>(SplitBlockAndInsertIfThen(
|
||||
TrueBranch = cast<UncondBrInst>(SplitBlockAndInsertIfThen(
|
||||
CondFr, SplitPt, false, nullptr, nullptr, LI));
|
||||
TrueBlock = TrueBranch->getParent();
|
||||
EndBlock = cast<BasicBlock>(TrueBranch->getOperand(0));
|
||||
EndBlock = TrueBranch->getSuccessor();
|
||||
} else {
|
||||
Instruction *ThenTerm = nullptr;
|
||||
Instruction *ElseTerm = nullptr;
|
||||
SplitBlockAndInsertIfThenElse(CondFr, SplitPt, &ThenTerm, &ElseTerm,
|
||||
nullptr, nullptr, LI);
|
||||
TrueBranch = cast<BranchInst>(ThenTerm);
|
||||
FalseBranch = cast<BranchInst>(ElseTerm);
|
||||
TrueBranch = cast<UncondBrInst>(ThenTerm);
|
||||
FalseBranch = cast<UncondBrInst>(ElseTerm);
|
||||
TrueBlock = TrueBranch->getParent();
|
||||
FalseBlock = FalseBranch->getParent();
|
||||
EndBlock = cast<BasicBlock>(TrueBranch->getOperand(0));
|
||||
EndBlock = TrueBranch->getSuccessor();
|
||||
}
|
||||
|
||||
EndBlock->setName("select.end");
|
||||
@ -9304,7 +9302,7 @@ bool CodeGenPrepare::splitBranchCondition(Function &F, ModifyDT &ModifiedDT) {
|
||||
m_Br(m_OneUse(m_Instruction(LogicOp)), TBB, FBB)))
|
||||
continue;
|
||||
|
||||
auto *Br1 = cast<BranchInst>(BB.getTerminator());
|
||||
auto *Br1 = cast<CondBrInst>(BB.getTerminator());
|
||||
if (Br1->getMetadata(LLVMContext::MD_unpredictable))
|
||||
continue;
|
||||
|
||||
|
||||
@ -1739,8 +1739,8 @@ bool ComplexDeinterleavingGraph::collectPotentialReductions(BasicBlock *B) {
|
||||
if (Factor != 2)
|
||||
return false;
|
||||
|
||||
auto *Br = dyn_cast<BranchInst>(B->getTerminator());
|
||||
if (!Br || Br->getNumSuccessors() != 2)
|
||||
auto *Br = dyn_cast<CondBrInst>(B->getTerminator());
|
||||
if (!Br)
|
||||
return false;
|
||||
|
||||
// Identify simple one-block loop
|
||||
@ -2472,7 +2472,7 @@ void ComplexDeinterleavingGraph::processReductionOperation(
|
||||
auto *FinalReductionReal = ReductionInfo[Real].second;
|
||||
auto *FinalReductionImag = ReductionInfo[Imag].second;
|
||||
|
||||
auto *Br = cast<BranchInst>(BackEdge->getTerminator());
|
||||
auto *Br = cast<CondBrInst>(BackEdge->getTerminator());
|
||||
BasicBlock *ExitBB = Br->getSuccessor(Br->getSuccessor(0) == BackEdge);
|
||||
Builder.SetInsertPoint(&*ExitBB->getFirstInsertionPt());
|
||||
|
||||
|
||||
@ -278,7 +278,7 @@ bool DwarfEHPrepare::InsertUnwindResumeCalls() {
|
||||
// that feeds the _Unwind_Resume call.
|
||||
for (ResumeInst *RI : Resumes) {
|
||||
BasicBlock *Parent = RI->getParent();
|
||||
BranchInst::Create(UnwindBB, Parent);
|
||||
UncondBrInst::Create(UnwindBB, Parent);
|
||||
Updates.push_back({DominatorTree::Insert, Parent, UnwindBB});
|
||||
|
||||
Value *ExnObj = GetExceptionObject(RI);
|
||||
|
||||
@ -390,17 +390,14 @@ void MemCmpExpansion::emitLoadCompareByteBlock(unsigned BlockIndex,
|
||||
// next LoadCmpBlock,
|
||||
Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_NE, Diff,
|
||||
ConstantInt::get(Diff->getType(), 0));
|
||||
BranchInst *CmpBr =
|
||||
BranchInst::Create(EndBlock, LoadCmpBlocks[BlockIndex + 1], Cmp);
|
||||
Builder.Insert(CmpBr);
|
||||
Builder.CreateCondBr(Cmp, EndBlock, LoadCmpBlocks[BlockIndex + 1]);
|
||||
if (DTU)
|
||||
DTU->applyUpdates(
|
||||
{{DominatorTree::Insert, BB, EndBlock},
|
||||
{DominatorTree::Insert, BB, LoadCmpBlocks[BlockIndex + 1]}});
|
||||
} else {
|
||||
// The last block has an unconditional branch to EndBlock.
|
||||
BranchInst *CmpBr = BranchInst::Create(EndBlock);
|
||||
Builder.Insert(CmpBr);
|
||||
Builder.CreateBr(EndBlock);
|
||||
if (DTU)
|
||||
DTU->applyUpdates({{DominatorTree::Insert, BB, EndBlock}});
|
||||
}
|
||||
@ -488,10 +485,9 @@ void MemCmpExpansion::emitLoadCompareBlockMultipleLoads(unsigned BlockIndex,
|
||||
// Early exit branch if difference found to ResultBlock. Otherwise,
|
||||
// continue to next LoadCmpBlock or EndBlock.
|
||||
BasicBlock *BB = Builder.GetInsertBlock();
|
||||
BranchInst *CmpBr = BranchInst::Create(ResBlock.BB, NextBB, Cmp);
|
||||
CondBrInst *CmpBr = Builder.CreateCondBr(Cmp, ResBlock.BB, NextBB);
|
||||
setExplicitlyUnknownBranchWeightsIfProfiled(*CmpBr, DEBUG_TYPE,
|
||||
CI->getFunction());
|
||||
Builder.Insert(CmpBr);
|
||||
if (DTU)
|
||||
DTU->applyUpdates({{DominatorTree::Insert, BB, ResBlock.BB},
|
||||
{DominatorTree::Insert, BB, NextBB}});
|
||||
@ -554,10 +550,9 @@ void MemCmpExpansion::emitLoadCompareBlock(unsigned BlockIndex) {
|
||||
// Early exit branch if difference found to ResultBlock. Otherwise, continue
|
||||
// to next LoadCmpBlock or EndBlock.
|
||||
BasicBlock *BB = Builder.GetInsertBlock();
|
||||
BranchInst *CmpBr = BranchInst::Create(NextBB, ResBlock.BB, Cmp);
|
||||
CondBrInst *CmpBr = Builder.CreateCondBr(Cmp, NextBB, ResBlock.BB);
|
||||
setExplicitlyUnknownBranchWeightsIfProfiled(*CmpBr, DEBUG_TYPE,
|
||||
CI->getFunction());
|
||||
Builder.Insert(CmpBr);
|
||||
if (DTU)
|
||||
DTU->applyUpdates({{DominatorTree::Insert, BB, NextBB},
|
||||
{DominatorTree::Insert, BB, ResBlock.BB}});
|
||||
@ -582,8 +577,7 @@ void MemCmpExpansion::emitMemCmpResultBlock() {
|
||||
Builder.SetInsertPoint(ResBlock.BB, InsertPt);
|
||||
Value *Res = ConstantInt::get(Type::getInt32Ty(CI->getContext()), 1);
|
||||
PhiRes->addIncoming(Res, ResBlock.BB);
|
||||
BranchInst *NewBr = BranchInst::Create(EndBlock);
|
||||
Builder.Insert(NewBr);
|
||||
Builder.CreateBr(EndBlock);
|
||||
if (DTU)
|
||||
DTU->applyUpdates({{DominatorTree::Insert, ResBlock.BB, EndBlock}});
|
||||
return;
|
||||
@ -601,8 +595,7 @@ void MemCmpExpansion::emitMemCmpResultBlock() {
|
||||
DEBUG_TYPE, CI->getFunction());
|
||||
|
||||
PhiRes->addIncoming(Res, ResBlock.BB);
|
||||
BranchInst *NewBr = BranchInst::Create(EndBlock);
|
||||
Builder.Insert(NewBr);
|
||||
Builder.CreateBr(EndBlock);
|
||||
if (DTU)
|
||||
DTU->applyUpdates({{DominatorTree::Insert, ResBlock.BB, EndBlock}});
|
||||
}
|
||||
|
||||
@ -391,11 +391,8 @@ static bool CanGenerateTest(Loop *L, Value *Count) {
|
||||
return false;
|
||||
|
||||
BasicBlock *Pred = Preheader->getSinglePredecessor();
|
||||
if (!isa<BranchInst>(Pred->getTerminator()))
|
||||
return false;
|
||||
|
||||
auto *BI = cast<BranchInst>(Pred->getTerminator());
|
||||
if (BI->isUnconditional() || !isa<ICmpInst>(BI->getCondition()))
|
||||
auto *BI = dyn_cast<CondBrInst>(Pred->getTerminator());
|
||||
if (!BI || !isa<ICmpInst>(BI->getCondition()))
|
||||
return false;
|
||||
|
||||
// Check that the icmp is checking for equality of Count and zero and that
|
||||
@ -453,7 +450,7 @@ Value *HardwareLoop::InitLoopCount() {
|
||||
|
||||
BasicBlock *BB = L->getLoopPreheader();
|
||||
if (UseLoopGuard && BB->getSinglePredecessor() &&
|
||||
cast<BranchInst>(BB->getTerminator())->isUnconditional()) {
|
||||
isa<UncondBrInst>(BB->getTerminator())) {
|
||||
BasicBlock *Predecessor = BB->getSinglePredecessor();
|
||||
// If it's not safe to create a while loop then don't force it and create a
|
||||
// do-while loop instead
|
||||
@ -503,13 +500,9 @@ Value* HardwareLoop::InsertIterationSetup(Value *LoopCountInit) {
|
||||
|
||||
// Use the return value of the intrinsic to control the entry of the loop.
|
||||
if (UseLoopGuard) {
|
||||
assert((isa<BranchInst>(BeginBB->getTerminator()) &&
|
||||
cast<BranchInst>(BeginBB->getTerminator())->isConditional()) &&
|
||||
"Expected conditional branch");
|
||||
|
||||
Value *SetCount =
|
||||
UsePhi ? Builder.CreateExtractValue(LoopSetup, 1) : LoopSetup;
|
||||
auto *LoopGuard = cast<BranchInst>(BeginBB->getTerminator());
|
||||
auto *LoopGuard = cast<CondBrInst>(BeginBB->getTerminator());
|
||||
LoopGuard->setCondition(SetCount);
|
||||
if (LoopGuard->getSuccessor(0) != L->getLoopPreheader())
|
||||
LoopGuard->swapSuccessors();
|
||||
|
||||
@ -241,7 +241,7 @@ bool runImpl(Function &F, const TargetLowering *TLI, DomTreeUpdater *DTU) {
|
||||
Updates.reserve(IndirectBrs.size() + 2 * IndirectBrSuccs.size());
|
||||
for (auto *IBr : IndirectBrs) {
|
||||
SwitchPN->addIncoming(GetSwitchValue(IBr), IBr->getParent());
|
||||
BranchInst::Create(SwitchBB, IBr->getIterator());
|
||||
UncondBrInst::Create(SwitchBB, IBr->getIterator());
|
||||
if (DTU) {
|
||||
Updates.push_back({DominatorTree::Insert, IBr->getParent(), SwitchBB});
|
||||
for (BasicBlock *SuccBB : IBr->successors())
|
||||
|
||||
@ -624,11 +624,11 @@ static bool expandCondLoop(Function &Intr) {
|
||||
for (User *U : llvm::make_early_inc_range(Intr.users())) {
|
||||
auto *Call = cast<CallInst>(U);
|
||||
|
||||
auto *Br = cast<BranchInst>(
|
||||
auto *Br = cast<UncondBrInst>(
|
||||
SplitBlockAndInsertIfThen(Call->getArgOperand(0), Call, false,
|
||||
getExplicitlyUnknownBranchWeightsIfProfiled(
|
||||
*Call->getFunction(), DEBUG_TYPE)));
|
||||
Br->setSuccessor(0, Br->getParent());
|
||||
Br->setSuccessor(Br->getParent());
|
||||
Call->eraseFromParent();
|
||||
}
|
||||
return true;
|
||||
@ -641,8 +641,8 @@ static bool expandLoopTrap(Function &Intr) {
|
||||
std::all_of(Call->getParent()->begin(), BasicBlock::iterator(Call),
|
||||
[](Instruction &I) { return !I.mayHaveSideEffects(); })) {
|
||||
for (auto *BB : predecessors(Call->getParent())) {
|
||||
auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
|
||||
if (!BI || BI->isUnconditional())
|
||||
auto *BI = dyn_cast<CondBrInst>(BB->getTerminator());
|
||||
if (!BI)
|
||||
continue;
|
||||
IRBuilder<> B(BI);
|
||||
Value *Cond;
|
||||
|
||||
@ -659,7 +659,7 @@ void SelectOptimizeImpl::convertProfitableSIGroups(SelectGroups &ProfSIGroups) {
|
||||
// These are the new basic blocks for the conditional branch.
|
||||
// At least one will become an actual new basic block.
|
||||
BasicBlock *TrueBlock = nullptr, *FalseBlock = nullptr;
|
||||
BranchInst *TrueBranch = nullptr, *FalseBranch = nullptr;
|
||||
UncondBrInst *TrueBranch = nullptr, *FalseBranch = nullptr;
|
||||
// Checks if select-like instruction would materialise on the given branch
|
||||
auto HasSelectLike = [](SelectGroup &SG, bool IsTrue) {
|
||||
for (auto &SL : SG.Selects) {
|
||||
@ -671,7 +671,7 @@ void SelectOptimizeImpl::convertProfitableSIGroups(SelectGroups &ProfSIGroups) {
|
||||
if (!TrueSlicesInterleaved.empty() || HasSelectLike(ASI, true)) {
|
||||
TrueBlock = BasicBlock::Create(EndBlock->getContext(), "select.true.sink",
|
||||
EndBlock->getParent(), EndBlock);
|
||||
TrueBranch = BranchInst::Create(EndBlock, TrueBlock);
|
||||
TrueBranch = UncondBrInst::Create(EndBlock, TrueBlock);
|
||||
TrueBranch->setDebugLoc(LastSI.getI()->getDebugLoc());
|
||||
for (Instruction *TrueInst : TrueSlicesInterleaved)
|
||||
TrueInst->moveBefore(TrueBranch->getIterator());
|
||||
@ -680,7 +680,7 @@ void SelectOptimizeImpl::convertProfitableSIGroups(SelectGroups &ProfSIGroups) {
|
||||
FalseBlock =
|
||||
BasicBlock::Create(EndBlock->getContext(), "select.false.sink",
|
||||
EndBlock->getParent(), EndBlock);
|
||||
FalseBranch = BranchInst::Create(EndBlock, FalseBlock);
|
||||
FalseBranch = UncondBrInst::Create(EndBlock, FalseBlock);
|
||||
FalseBranch->setDebugLoc(LastSI.getI()->getDebugLoc());
|
||||
for (Instruction *FalseInst : FalseSlicesInterleaved)
|
||||
FalseInst->moveBefore(FalseBranch->getIterator());
|
||||
@ -693,7 +693,7 @@ void SelectOptimizeImpl::convertProfitableSIGroups(SelectGroups &ProfSIGroups) {
|
||||
|
||||
FalseBlock = BasicBlock::Create(StartBlock->getContext(), "select.false",
|
||||
EndBlock->getParent(), EndBlock);
|
||||
auto *FalseBranch = BranchInst::Create(EndBlock, FalseBlock);
|
||||
auto *FalseBranch = UncondBrInst::Create(EndBlock, FalseBlock);
|
||||
FalseBranch->setDebugLoc(SI.getI()->getDebugLoc());
|
||||
}
|
||||
|
||||
|
||||
@ -2529,15 +2529,9 @@ static bool collectInstructionDeps(
|
||||
}
|
||||
|
||||
bool SelectionDAGBuilder::shouldKeepJumpConditionsTogether(
|
||||
const FunctionLoweringInfo &FuncInfo, const BranchInst &I,
|
||||
const FunctionLoweringInfo &FuncInfo, const CondBrInst &I,
|
||||
Instruction::BinaryOps Opc, const Value *Lhs, const Value *Rhs,
|
||||
TargetLoweringBase::CondMergingParams Params) const {
|
||||
if (I.getNumSuccessors() != 2)
|
||||
return false;
|
||||
|
||||
if (!I.isConditional())
|
||||
return false;
|
||||
|
||||
if (Params.BaseCost < 0)
|
||||
return false;
|
||||
|
||||
|
||||
@ -46,12 +46,12 @@ class AtomicCmpXchgInst;
|
||||
class AtomicRMWInst;
|
||||
class AssumptionCache;
|
||||
class BasicBlock;
|
||||
class BranchInst;
|
||||
class CallInst;
|
||||
class CallBrInst;
|
||||
class CatchPadInst;
|
||||
class CatchReturnInst;
|
||||
class CatchSwitchInst;
|
||||
class CondBrInst;
|
||||
class CleanupPadInst;
|
||||
class CleanupReturnInst;
|
||||
class Constant;
|
||||
@ -398,7 +398,7 @@ public:
|
||||
}
|
||||
|
||||
bool shouldKeepJumpConditionsTogether(
|
||||
const FunctionLoweringInfo &FuncInfo, const BranchInst &I,
|
||||
const FunctionLoweringInfo &FuncInfo, const CondBrInst &I,
|
||||
Instruction::BinaryOps Opc, const Value *Lhs, const Value *Rhs,
|
||||
TargetLoweringBase::CondMergingParams Params) const;
|
||||
|
||||
|
||||
@ -385,7 +385,7 @@ bool SjLjEHPrepareImpl::setupEntryBlockAndCallSites(Function &F) {
|
||||
if (Function *Callee = II->getCalledFunction())
|
||||
if (Callee->getIntrinsicID() == Intrinsic::donothing) {
|
||||
// Remove the NOP invoke.
|
||||
BranchInst::Create(II->getNormalDest(), II->getIterator());
|
||||
UncondBrInst::Create(II->getNormalDest(), II->getIterator());
|
||||
II->eraseFromParent();
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -733,7 +733,7 @@ bool InsertStackProtectors(const TargetLowering &TLI,
|
||||
/*Unreachable=*/false, Weights, DTU,
|
||||
/*LI=*/nullptr, /*ThenBlock=*/FailBB);
|
||||
|
||||
auto *BI = cast<BranchInst>(Cmp->getParent()->getTerminator());
|
||||
auto *BI = cast<CondBrInst>(Cmp->getParent()->getTerminator());
|
||||
BasicBlock *NewBB = BI->getSuccessor(1);
|
||||
NewBB->setName("SP_return");
|
||||
NewBB->moveAfter(&BB);
|
||||
|
||||
@ -1396,12 +1396,12 @@ void WinEHPrepareImpl::replaceUseWithLoad(
|
||||
// br label %PHIBlock
|
||||
// So move the terminators to each others' blocks and swap their
|
||||
// successors.
|
||||
BranchInst *Goto = cast<BranchInst>(IncomingBlock->getTerminator());
|
||||
UncondBrInst *Goto = cast<UncondBrInst>(IncomingBlock->getTerminator());
|
||||
Goto->removeFromParent();
|
||||
CatchRet->removeFromParent();
|
||||
CatchRet->insertInto(IncomingBlock, IncomingBlock->end());
|
||||
Goto->insertInto(NewBlock, NewBlock->end());
|
||||
Goto->setSuccessor(0, PHIBlock);
|
||||
Goto->setSuccessor(PHIBlock);
|
||||
CatchRet->setSuccessor(NewBlock);
|
||||
// Update the color mapping for the newly split edge.
|
||||
// Grab a reference to the ColorVector to be inserted before getting the
|
||||
|
||||
@ -211,7 +211,7 @@ private:
|
||||
bool WantResult = true);
|
||||
|
||||
// Emit functions.
|
||||
bool emitCompareAndBranch(const BranchInst *BI);
|
||||
bool emitCompareAndBranch(const CondBrInst *BI);
|
||||
bool emitCmp(const Value *LHS, const Value *RHS, bool IsZExt);
|
||||
bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt);
|
||||
bool emitICmp_ri(MVT RetVT, Register LHSReg, uint64_t Imm);
|
||||
@ -2251,7 +2251,7 @@ static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred) {
|
||||
}
|
||||
|
||||
/// Try to emit a combined compare-and-branch instruction.
|
||||
bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) {
|
||||
bool AArch64FastISel::emitCompareAndBranch(const CondBrInst *BI) {
|
||||
// Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions
|
||||
// will not be produced, as they are conditional branch instructions that do
|
||||
// not set flags.
|
||||
@ -2377,12 +2377,7 @@ bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) {
|
||||
}
|
||||
|
||||
bool AArch64FastISel::selectBranch(const Instruction *I) {
|
||||
const BranchInst *BI = cast<BranchInst>(I);
|
||||
if (BI->isUnconditional()) {
|
||||
MachineBasicBlock *MSucc = FuncInfo.getMBB(BI->getSuccessor(0));
|
||||
fastEmitBranch(MSucc, BI->getDebugLoc());
|
||||
return true;
|
||||
}
|
||||
const CondBrInst *BI = cast<CondBrInst>(I);
|
||||
|
||||
MachineBasicBlock *TBB = FuncInfo.getMBB(BI->getSuccessor(0));
|
||||
MachineBasicBlock *FBB = FuncInfo.getMBB(BI->getSuccessor(1));
|
||||
|
||||
@ -5242,7 +5242,7 @@ static bool shouldUnrollMultiExitLoop(Loop *L, ScalarEvolution &SE,
|
||||
return false;
|
||||
|
||||
if (any_of(Blocks, [](BasicBlock *BB) {
|
||||
return !isa<BranchInst>(BB->getTerminator());
|
||||
return !isa<UncondBrInst, CondBrInst>(BB->getTerminator());
|
||||
}))
|
||||
return false;
|
||||
|
||||
@ -5371,10 +5371,9 @@ getAppleRuntimeUnrollPreferences(Loop *L, ScalarEvolution &SE,
|
||||
|
||||
// Try to runtime-unroll loops with early-continues depending on loop-varying
|
||||
// loads; this helps with branch-prediction for the early-continues.
|
||||
auto *Term = dyn_cast<BranchInst>(Header->getTerminator());
|
||||
auto *Term = dyn_cast<CondBrInst>(Header->getTerminator());
|
||||
SmallVector<BasicBlock *> Preds(predecessors(Latch));
|
||||
if (!Term || !Term->isConditional() || Preds.size() == 1 ||
|
||||
!llvm::is_contained(Preds, Header) ||
|
||||
if (!Term || Preds.size() == 1 || !llvm::is_contained(Preds, Header) ||
|
||||
none_of(Preds, [L](BasicBlock *Pred) { return L->contains(Pred); }))
|
||||
return;
|
||||
|
||||
@ -6530,8 +6529,7 @@ bool AArch64TTIImpl::shouldTreatInstructionLikeSelect(
|
||||
// break point in the code - the end of a block with an unconditional
|
||||
// terminator.
|
||||
if (I->getOpcode() == Instruction::Or &&
|
||||
isa<BranchInst>(I->getNextNode()) &&
|
||||
cast<BranchInst>(I->getNextNode())->isUnconditional())
|
||||
isa<UncondBrInst>(I->getNextNode()))
|
||||
return true;
|
||||
|
||||
if (I->getOpcode() == Instruction::Add ||
|
||||
|
||||
@ -51,7 +51,7 @@ public:
|
||||
: UA(&UA), MSSA(&MSSA), AA(&AA),
|
||||
isEntryFunc(AMDGPU::isEntryFunctionCC(F.getCallingConv())) {}
|
||||
|
||||
void visitBranchInst(BranchInst &I);
|
||||
void visitCondBrInst(CondBrInst &I);
|
||||
void visitLoadInst(LoadInst &I);
|
||||
|
||||
bool changed() const { return Changed; }
|
||||
@ -59,7 +59,7 @@ public:
|
||||
|
||||
} // End anonymous namespace
|
||||
|
||||
void AMDGPUAnnotateUniformValues::visitBranchInst(BranchInst &I) {
|
||||
void AMDGPUAnnotateUniformValues::visitCondBrInst(CondBrInst &I) {
|
||||
if (UA->isUniform(&I))
|
||||
setUniformMetadata(&I);
|
||||
}
|
||||
|
||||
@ -838,7 +838,7 @@ void AMDGPUAtomicOptimizerImpl::optimizeAtomic(Instruction &I,
|
||||
//
|
||||
// OriginalBB is known to have a branch as terminator because
|
||||
// SplitBlockAndInsertIfThen will have inserted one.
|
||||
BranchInst *Terminator = cast<BranchInst>(OriginalBB->getTerminator());
|
||||
CondBrInst *Terminator = cast<CondBrInst>(OriginalBB->getTerminator());
|
||||
B.SetInsertPoint(ComputeEnd);
|
||||
Terminator->removeFromParent();
|
||||
B.Insert(Terminator);
|
||||
|
||||
@ -167,8 +167,8 @@ void AMDGPUTTIImpl::getUnrollingPreferences(
|
||||
// if region and potentially even PHI itself, saving on both divergence
|
||||
// and registers used for the PHI.
|
||||
// Add a small bonus for each of such "if" statements.
|
||||
if (const BranchInst *Br = dyn_cast<BranchInst>(&I)) {
|
||||
if (UP.Threshold < MaxBoost && Br->isConditional()) {
|
||||
if (const CondBrInst *Br = dyn_cast<CondBrInst>(&I)) {
|
||||
if (UP.Threshold < MaxBoost) {
|
||||
BasicBlock *Succ0 = Br->getSuccessor(0);
|
||||
BasicBlock *Succ1 = Br->getSuccessor(1);
|
||||
if ((L->contains(Succ0) && L->isLoopExiting(Succ0)) ||
|
||||
|
||||
@ -164,7 +164,7 @@ BasicBlock *AMDGPUUnifyDivergentExitNodesImpl::unifyReturnBlockSet(
|
||||
|
||||
// Remove and delete the return inst.
|
||||
BB->getTerminator()->eraseFromParent();
|
||||
BranchInst::Create(NewRetBlock, BB);
|
||||
UncondBrInst::Create(NewRetBlock, BB);
|
||||
Updates.emplace_back(DominatorTree::Insert, BB, NewRetBlock);
|
||||
}
|
||||
|
||||
@ -216,8 +216,8 @@ static void handleNBranch(Function &F, BasicBlock *BB, Instruction *BI,
|
||||
// Create a branch that will always branch to the transition block and
|
||||
// references DummyReturnBB.
|
||||
BB->getTerminator()->eraseFromParent();
|
||||
BranchInst::Create(TransitionBB, DummyReturnBB,
|
||||
ConstantInt::getTrue(F.getContext()), BB);
|
||||
CondBrInst::Create(ConstantInt::getTrue(F.getContext()), TransitionBB,
|
||||
DummyReturnBB, BB);
|
||||
Updates.emplace_back(DominatorTree::Insert, BB, DummyReturnBB);
|
||||
}
|
||||
|
||||
@ -225,8 +225,8 @@ bool AMDGPUUnifyDivergentExitNodesImpl::run(Function &F, DominatorTree *DT,
|
||||
const PostDominatorTree &PDT,
|
||||
const UniformityInfo &UA) {
|
||||
if (PDT.root_size() == 0 ||
|
||||
(PDT.root_size() == 1 &&
|
||||
!isa<BranchInst, CallBrInst>(PDT.getRoot()->getTerminator())))
|
||||
(PDT.root_size() == 1 && !isa<UncondBrInst, CondBrInst, CallBrInst>(
|
||||
PDT.getRoot()->getTerminator())))
|
||||
return false;
|
||||
|
||||
// Loop over all of the blocks in a function, tracking all of the blocks that
|
||||
@ -250,35 +250,32 @@ bool AMDGPUUnifyDivergentExitNodesImpl::run(Function &F, DominatorTree *DT,
|
||||
PDT.roots(), [&](auto BB) { return !isUniformlyReached(UA, *BB); });
|
||||
|
||||
for (BasicBlock *BB : PDT.roots()) {
|
||||
if (auto *RI = dyn_cast<ReturnInst>(BB->getTerminator())) {
|
||||
Instruction *Term = BB->getTerminator();
|
||||
if (auto *RI = dyn_cast<ReturnInst>(Term)) {
|
||||
auto *CI = dyn_cast_or_null<CallInst>(RI->getPrevNode());
|
||||
if (CI && CI->isMustTailCall())
|
||||
continue;
|
||||
if (HasDivergentExitBlock)
|
||||
ReturningBlocks.push_back(BB);
|
||||
} else if (isa<UnreachableInst>(BB->getTerminator())) {
|
||||
} else if (isa<UnreachableInst>(Term)) {
|
||||
if (HasDivergentExitBlock)
|
||||
UnreachableBlocks.push_back(BB);
|
||||
} else if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) {
|
||||
} else if (UncondBrInst *BI = dyn_cast<UncondBrInst>(Term)) {
|
||||
if (!DummyReturnBB)
|
||||
DummyReturnBB = createDummyReturnBlock(F, ReturningBlocks);
|
||||
|
||||
if (BI->isUnconditional()) {
|
||||
BasicBlock *LoopHeaderBB = BI->getSuccessor(0);
|
||||
BI->eraseFromParent(); // Delete the unconditional branch.
|
||||
// Add a new conditional branch with a dummy edge to the return block.
|
||||
BranchInst::Create(LoopHeaderBB, DummyReturnBB,
|
||||
ConstantInt::getTrue(F.getContext()), BB);
|
||||
Updates.emplace_back(DominatorTree::Insert, BB, DummyReturnBB);
|
||||
} else {
|
||||
handleNBranch(F, BB, BI, DummyReturnBB, Updates);
|
||||
}
|
||||
BasicBlock *LoopHeaderBB = BI->getSuccessor();
|
||||
BI->eraseFromParent(); // Delete the unconditional branch.
|
||||
// Add a new conditional branch with a dummy edge to the return block.
|
||||
CondBrInst::Create(ConstantInt::getTrue(F.getContext()), LoopHeaderBB,
|
||||
DummyReturnBB, BB);
|
||||
Updates.emplace_back(DominatorTree::Insert, BB, DummyReturnBB);
|
||||
Changed = true;
|
||||
} else if (CallBrInst *CBI = dyn_cast<CallBrInst>(BB->getTerminator())) {
|
||||
} else if (isa<CondBrInst, CallBrInst>(Term)) {
|
||||
if (!DummyReturnBB)
|
||||
DummyReturnBB = createDummyReturnBlock(F, ReturningBlocks);
|
||||
|
||||
handleNBranch(F, BB, CBI, DummyReturnBB, Updates);
|
||||
handleNBranch(F, BB, Term, DummyReturnBB, Updates);
|
||||
Changed = true;
|
||||
} else {
|
||||
llvm_unreachable("unsupported block terminator");
|
||||
@ -299,7 +296,7 @@ bool AMDGPUUnifyDivergentExitNodesImpl::run(Function &F, DominatorTree *DT,
|
||||
for (BasicBlock *BB : UnreachableBlocks) {
|
||||
// Remove and delete the unreachable inst.
|
||||
BB->getTerminator()->eraseFromParent();
|
||||
BranchInst::Create(UnreachableBlock, BB);
|
||||
UncondBrInst::Create(UnreachableBlock, BB);
|
||||
Updates.emplace_back(DominatorTree::Insert, BB, UnreachableBlock);
|
||||
}
|
||||
Changed = true;
|
||||
|
||||
@ -66,7 +66,7 @@ private:
|
||||
|
||||
void initialize(const GCNSubtarget &ST);
|
||||
|
||||
bool isUniform(BranchInst *T);
|
||||
bool isUniform(CondBrInst *T);
|
||||
|
||||
bool isTopOfStack(BasicBlock *BB);
|
||||
|
||||
@ -80,15 +80,14 @@ private:
|
||||
|
||||
bool eraseIfUnused(PHINode *Phi);
|
||||
|
||||
bool openIf(BranchInst *Term);
|
||||
bool openIf(CondBrInst *Term);
|
||||
|
||||
bool insertElse(BranchInst *Term);
|
||||
bool insertElse(CondBrInst *Term);
|
||||
|
||||
Value *
|
||||
handleLoopCondition(Value *Cond, PHINode *Broken, llvm::Loop *L,
|
||||
BranchInst *Term);
|
||||
Value *handleLoopCondition(Value *Cond, PHINode *Broken, llvm::Loop *L,
|
||||
CondBrInst *Term);
|
||||
|
||||
bool handleLoop(BranchInst *Term);
|
||||
bool handleLoop(CondBrInst *Term);
|
||||
|
||||
bool closeControlFlow(BasicBlock *BB);
|
||||
|
||||
@ -128,7 +127,7 @@ void SIAnnotateControlFlow::initialize(const GCNSubtarget &ST) {
|
||||
|
||||
/// Is the branch condition uniform or did the StructurizeCFG pass
|
||||
/// consider it as such?
|
||||
bool SIAnnotateControlFlow::isUniform(BranchInst *T) {
|
||||
bool SIAnnotateControlFlow::isUniform(CondBrInst *T) {
|
||||
return UA->isUniform(T) || T->hasMetadata("structurizecfg.uniform");
|
||||
}
|
||||
|
||||
@ -184,7 +183,7 @@ bool SIAnnotateControlFlow::eraseIfUnused(PHINode *Phi) {
|
||||
}
|
||||
|
||||
/// Open a new "If" block
|
||||
bool SIAnnotateControlFlow::openIf(BranchInst *Term) {
|
||||
bool SIAnnotateControlFlow::openIf(CondBrInst *Term) {
|
||||
if (isUniform(Term))
|
||||
return false;
|
||||
|
||||
@ -199,7 +198,7 @@ bool SIAnnotateControlFlow::openIf(BranchInst *Term) {
|
||||
}
|
||||
|
||||
/// Close the last "If" block and open a new "Else" block
|
||||
bool SIAnnotateControlFlow::insertElse(BranchInst *Term) {
|
||||
bool SIAnnotateControlFlow::insertElse(CondBrInst *Term) {
|
||||
if (isUniform(Term)) {
|
||||
return false;
|
||||
}
|
||||
@ -215,8 +214,9 @@ bool SIAnnotateControlFlow::insertElse(BranchInst *Term) {
|
||||
}
|
||||
|
||||
/// Recursively handle the condition leading to a loop
|
||||
Value *SIAnnotateControlFlow::handleLoopCondition(
|
||||
Value *Cond, PHINode *Broken, llvm::Loop *L, BranchInst *Term) {
|
||||
Value *SIAnnotateControlFlow::handleLoopCondition(Value *Cond, PHINode *Broken,
|
||||
llvm::Loop *L,
|
||||
CondBrInst *Term) {
|
||||
|
||||
auto CreateBreak = [this, Cond, Broken](Instruction *I) -> CallInst * {
|
||||
return IRBuilder<>(I).CreateCall(
|
||||
@ -257,7 +257,7 @@ Value *SIAnnotateControlFlow::handleLoopCondition(
|
||||
}
|
||||
|
||||
/// Handle a back edge (loop)
|
||||
bool SIAnnotateControlFlow::handleLoop(BranchInst *Term) {
|
||||
bool SIAnnotateControlFlow::handleLoop(CondBrInst *Term) {
|
||||
if (isUniform(Term))
|
||||
return false;
|
||||
|
||||
@ -347,9 +347,9 @@ bool SIAnnotateControlFlow::run() {
|
||||
E = df_end(&F->getEntryBlock());
|
||||
I != E; ++I) {
|
||||
BasicBlock *BB = *I;
|
||||
BranchInst *Term = dyn_cast<BranchInst>(BB->getTerminator());
|
||||
CondBrInst *Term = dyn_cast<CondBrInst>(BB->getTerminator());
|
||||
|
||||
if (!Term || Term->isUnconditional()) {
|
||||
if (!Term) {
|
||||
if (isTopOfStack(BB))
|
||||
Changed |= closeControlFlow(BB);
|
||||
|
||||
|
||||
@ -1268,7 +1268,7 @@ static ARMCC::CondCodes getComparePred(CmpInst::Predicate Pred) {
|
||||
}
|
||||
|
||||
bool ARMFastISel::SelectBranch(const Instruction *I) {
|
||||
const BranchInst *BI = cast<BranchInst>(I);
|
||||
const CondBrInst *BI = cast<CondBrInst>(I);
|
||||
MachineBasicBlock *TBB = FuncInfo.getMBB(BI->getSuccessor(0));
|
||||
MachineBasicBlock *FBB = FuncInfo.getMBB(BI->getSuccessor(1));
|
||||
|
||||
|
||||
@ -218,8 +218,8 @@ bool BPFAdjustOptImpl::serializeICMPCrossBB(BasicBlock &BB) {
|
||||
return false;
|
||||
|
||||
Instruction *TI = B2->getTerminator();
|
||||
auto *BI = dyn_cast<BranchInst>(TI);
|
||||
if (!BI || !BI->isConditional())
|
||||
auto *BI = dyn_cast<CondBrInst>(TI);
|
||||
if (!BI)
|
||||
return false;
|
||||
auto *Cond = dyn_cast<ICmpInst>(BI->getCondition());
|
||||
if (!Cond || &*B2->getFirstNonPHIIt() != Cond)
|
||||
@ -228,8 +228,8 @@ bool BPFAdjustOptImpl::serializeICMPCrossBB(BasicBlock &BB) {
|
||||
auto Cond2Op = Cond->getPredicate();
|
||||
|
||||
TI = B1->getTerminator();
|
||||
BI = dyn_cast<BranchInst>(TI);
|
||||
if (!BI || !BI->isConditional())
|
||||
BI = dyn_cast<CondBrInst>(TI);
|
||||
if (!BI)
|
||||
return false;
|
||||
Cond = dyn_cast<ICmpInst>(BI->getCondition());
|
||||
if (!Cond)
|
||||
|
||||
@ -480,7 +480,7 @@ static void translateInstructionMetadata(Module &M) {
|
||||
translateBranchMetadata(M, I);
|
||||
|
||||
for (auto &I : make_early_inc_range(BB)) {
|
||||
if (isa<BranchInst>(I))
|
||||
if (isa<UncondBrInst, CondBrInst>(I))
|
||||
if (MDNode *LoopMD = I.getMetadata(MDLoopKind))
|
||||
translateLoopMetadata(M, &I, LoopMD);
|
||||
I.dropUnknownNonDebugMetadata(DXILCompatibleMDs);
|
||||
|
||||
@ -2336,14 +2336,15 @@ void DXILBitcodeWriter::writeInstruction(const Instruction &I, unsigned InstID,
|
||||
}
|
||||
} break;
|
||||
case Instruction::UncondBr:
|
||||
Code = bitc::FUNC_CODE_INST_BR;
|
||||
Vals.push_back(VE.getValueID(cast<UncondBrInst>(I).getSuccessor()));
|
||||
break;
|
||||
case Instruction::CondBr: {
|
||||
Code = bitc::FUNC_CODE_INST_BR;
|
||||
const BranchInst &II = cast<BranchInst>(I);
|
||||
const CondBrInst &II = cast<CondBrInst>(I);
|
||||
Vals.push_back(VE.getValueID(II.getSuccessor(0)));
|
||||
if (II.isConditional()) {
|
||||
Vals.push_back(VE.getValueID(II.getSuccessor(1)));
|
||||
pushValue(II.getCondition(), InstID, Vals);
|
||||
}
|
||||
Vals.push_back(VE.getValueID(II.getSuccessor(1)));
|
||||
pushValue(II.getCondition(), InstID, Vals);
|
||||
} break;
|
||||
case Instruction::Switch: {
|
||||
Code = bitc::FUNC_CODE_INST_SWITCH;
|
||||
|
||||
@ -2342,7 +2342,7 @@ bool HexagonLoopIdiomRecognize::coverLoop(Loop *L,
|
||||
// instructions in it that are not involved in the original set Insts.
|
||||
for (auto *B : L->blocks()) {
|
||||
for (auto &In : *B) {
|
||||
if (isa<BranchInst>(In))
|
||||
if (isa<UncondBrInst, CondBrInst>(In))
|
||||
continue;
|
||||
if (!Worklist.count(&In) && In.mayHaveSideEffects())
|
||||
return false;
|
||||
|
||||
@ -950,7 +950,7 @@ bool MipsFastISel::selectStore(const Instruction *I) {
|
||||
// This can cause a redundant sltiu to be generated.
|
||||
// FIXME: try and eliminate this in a future patch.
|
||||
bool MipsFastISel::selectBranch(const Instruction *I) {
|
||||
const BranchInst *BI = cast<BranchInst>(I);
|
||||
const CondBrInst *BI = cast<CondBrInst>(I);
|
||||
MachineBasicBlock *BrBB = FuncInfo.MBB;
|
||||
//
|
||||
// TBB is the basic block for the case where the comparison is true.
|
||||
|
||||
@ -147,16 +147,9 @@ void NVPTXImageOptimizer::replaceWith(Instruction *From, ConstantInt *To) {
|
||||
// live is actually unreachable and can be trivially eliminated by the
|
||||
// unreachable block elimination pass.
|
||||
for (Use &U : From->uses()) {
|
||||
if (BranchInst *BI = dyn_cast<BranchInst>(U)) {
|
||||
if (BI->isUnconditional()) continue;
|
||||
BasicBlock *Dest;
|
||||
if (To->isZero())
|
||||
// Get false block
|
||||
Dest = BI->getSuccessor(1);
|
||||
else
|
||||
// Get true block
|
||||
Dest = BI->getSuccessor(0);
|
||||
BranchInst::Create(Dest, BI->getIterator());
|
||||
if (CondBrInst *BI = dyn_cast<CondBrInst>(U)) {
|
||||
BasicBlock *Dest = BI->getSuccessor(To->isZero() ? 1 : 0);
|
||||
UncondBrInst::Create(Dest, BI->getIterator());
|
||||
InstrToDelete.push_back(BI);
|
||||
}
|
||||
}
|
||||
|
||||
@ -752,7 +752,7 @@ bool PPCFastISel::SelectStore(const Instruction *I) {
|
||||
|
||||
// Attempt to fast-select a branch instruction.
|
||||
bool PPCFastISel::SelectBranch(const Instruction *I) {
|
||||
const BranchInst *BI = cast<BranchInst>(I);
|
||||
const CondBrInst *BI = cast<CondBrInst>(I);
|
||||
MachineBasicBlock *BrBB = FuncInfo.MBB;
|
||||
MachineBasicBlock *TBB = FuncInfo.getMBB(BI->getSuccessor(0));
|
||||
MachineBasicBlock *FBB = FuncInfo.getMBB(BI->getSuccessor(1));
|
||||
|
||||
@ -386,10 +386,9 @@ bool PPCTTIImpl::isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
|
||||
Instruction *TI = BB->getTerminator();
|
||||
if (!TI) continue;
|
||||
|
||||
if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
|
||||
if (CondBrInst *BI = dyn_cast<CondBrInst>(TI)) {
|
||||
uint64_t TrueWeight = 0, FalseWeight = 0;
|
||||
if (!BI->isConditional() ||
|
||||
!extractBranchWeights(*BI, TrueWeight, FalseWeight))
|
||||
if (!extractBranchWeights(*BI, TrueWeight, FalseWeight))
|
||||
continue;
|
||||
|
||||
// If the exit path is more frequent than the loop path,
|
||||
|
||||
@ -3589,8 +3589,7 @@ bool RISCVTTIImpl::shouldTreatInstructionLikeSelect(
|
||||
// break point in the code - the end of a block with an unconditional
|
||||
// terminator.
|
||||
if (I->getOpcode() == Instruction::Or &&
|
||||
isa<BranchInst>(I->getNextNode()) &&
|
||||
cast<BranchInst>(I->getNextNode())->isUnconditional())
|
||||
isa<UncondBrInst>(I->getNextNode()))
|
||||
return true;
|
||||
|
||||
if (I->getOpcode() == Instruction::Add ||
|
||||
|
||||
@ -1499,12 +1499,7 @@ bool WebAssemblyFastISel::selectStore(const Instruction *I) {
|
||||
}
|
||||
|
||||
bool WebAssemblyFastISel::selectBr(const Instruction *I) {
|
||||
const auto *Br = cast<BranchInst>(I);
|
||||
if (Br->isUnconditional()) {
|
||||
MachineBasicBlock *MSucc = FuncInfo.getMBB(Br->getSuccessor(0));
|
||||
fastEmitBranch(MSucc, Br->getDebugLoc());
|
||||
return true;
|
||||
}
|
||||
const auto *Br = cast<CondBrInst>(I);
|
||||
|
||||
MachineBasicBlock *TBB = FuncInfo.getMBB(Br->getSuccessor(0));
|
||||
MachineBasicBlock *FBB = FuncInfo.getMBB(Br->getSuccessor(1));
|
||||
|
||||
@ -1646,7 +1646,7 @@ void WebAssemblyLowerEmscriptenEHSjLj::handleLongjmpableCallsForWasmSjLj(
|
||||
BasicBlock *OrigEntry = Entry->getNextNode();
|
||||
BasicBlock *SetjmpDispatchBB =
|
||||
BasicBlock::Create(C, "setjmp.dispatch", &F, OrigEntry);
|
||||
cast<BranchInst>(Entry->getTerminator())->setSuccessor(0, SetjmpDispatchBB);
|
||||
cast<UncondBrInst>(Entry->getTerminator())->setSuccessor(SetjmpDispatchBB);
|
||||
|
||||
// Create catch.dispatch.longjmp BB and a catchswitch instruction
|
||||
BasicBlock *CatchDispatchLongjmpBB =
|
||||
|
||||
@ -1643,7 +1643,7 @@ bool X86FastISel::X86SelectSExt(const Instruction *I) {
|
||||
bool X86FastISel::X86SelectBranch(const Instruction *I) {
|
||||
// Unconditional branches are selected by tablegen-generated code.
|
||||
// Handle a conditional branch.
|
||||
const BranchInst *BI = cast<BranchInst>(I);
|
||||
const CondBrInst *BI = cast<CondBrInst>(I);
|
||||
MachineBasicBlock *TrueMBB = FuncInfo.getMBB(BI->getSuccessor(0));
|
||||
MachineBasicBlock *FalseMBB = FuncInfo.getMBB(BI->getSuccessor(1));
|
||||
|
||||
|
||||
@ -120,8 +120,8 @@ BasicBlock *X86LowerAMXIntrinsics::createLoop(BasicBlock *Preheader,
|
||||
BasicBlock::Create(Ctx, Name + ".latch", Header->getParent(), Exit);
|
||||
|
||||
Type *I16Ty = Type::getInt16Ty(Ctx);
|
||||
BranchInst::Create(Body, Header);
|
||||
BranchInst::Create(Latch, Body);
|
||||
UncondBrInst::Create(Body, Header);
|
||||
UncondBrInst::Create(Latch, Body);
|
||||
PHINode *IV =
|
||||
PHINode::Create(I16Ty, 2, Name + ".iv", Header->getTerminator()->getIterator());
|
||||
IV->addIncoming(ConstantInt::get(I16Ty, 0), Preheader);
|
||||
@ -129,7 +129,7 @@ BasicBlock *X86LowerAMXIntrinsics::createLoop(BasicBlock *Preheader,
|
||||
B.SetInsertPoint(Latch);
|
||||
Value *Inc = B.CreateAdd(IV, Step, Name + ".step");
|
||||
Value *Cond = B.CreateICmpNE(Inc, Bound, Name + ".cond");
|
||||
auto *BR = BranchInst::Create(Header, Exit, Cond, Latch);
|
||||
auto *BR = CondBrInst::Create(Cond, Header, Exit, Latch);
|
||||
if (!ProfcheckDisableMetadataFixes) {
|
||||
if (auto *BoundInt = dyn_cast<ConstantInt>(Bound)) {
|
||||
assert(Step->getZExtValue() != 0 &&
|
||||
@ -144,9 +144,9 @@ BasicBlock *X86LowerAMXIntrinsics::createLoop(BasicBlock *Preheader,
|
||||
}
|
||||
IV->addIncoming(Inc, Latch);
|
||||
|
||||
BranchInst *PreheaderBr = cast<BranchInst>(Preheader->getTerminator());
|
||||
BasicBlock *Tmp = PreheaderBr->getSuccessor(0);
|
||||
PreheaderBr->setSuccessor(0, Header);
|
||||
UncondBrInst *PreheaderBr = cast<UncondBrInst>(Preheader->getTerminator());
|
||||
BasicBlock *Tmp = PreheaderBr->getSuccessor();
|
||||
PreheaderBr->setSuccessor(Header);
|
||||
DTU.applyUpdatesPermissive({
|
||||
{DominatorTree::Delete, Preheader, Tmp},
|
||||
{DominatorTree::Insert, Header, Body},
|
||||
|
||||
@ -1,5 +1,4 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
|
||||
; RUN: opt -mtriple=amdgcn -mcpu=gfx900 -amdgpu-aa -amdgpu-aa-wrapper -amdgpu-annotate-uniform -S < %s | FileCheck %s
|
||||
; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -amdgpu-atomic-optimizer-strategy=None < %s | FileCheck -check-prefix=GCN %s
|
||||
|
||||
@ -11,13 +10,13 @@
|
||||
define amdgpu_kernel void @simple_barrier(ptr addrspace(1) %arg) {
|
||||
; CHECK-LABEL: @simple_barrier(
|
||||
; CHECK-NEXT: bb:
|
||||
; CHECK-NEXT: [[I:%.*]] = load i32, ptr addrspace(1) [[ARG:%.*]], align 4, !amdgpu.noclobber !0
|
||||
; CHECK-NEXT: [[I:%.*]] = load i32, ptr addrspace(1) [[ARG:%.*]], align 4, !amdgpu.noclobber [[META0:![0-9]+]]
|
||||
; CHECK-NEXT: fence syncscope("workgroup") release
|
||||
; CHECK-NEXT: tail call void @llvm.amdgcn.s.barrier()
|
||||
; CHECK-NEXT: fence syncscope("workgroup") acquire
|
||||
; CHECK-NEXT: tail call void @llvm.amdgcn.wave.barrier()
|
||||
; CHECK-NEXT: [[I1:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[ARG]], i64 1, !amdgpu.uniform !0
|
||||
; CHECK-NEXT: [[I2:%.*]] = load i32, ptr addrspace(1) [[I1]], align 4, !amdgpu.noclobber !0
|
||||
; CHECK-NEXT: [[I1:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[ARG]], i64 1, !amdgpu.uniform [[META0]]
|
||||
; CHECK-NEXT: [[I2:%.*]] = load i32, ptr addrspace(1) [[I1]], align 4, !amdgpu.noclobber [[META0]]
|
||||
; CHECK-NEXT: [[I3:%.*]] = add i32 [[I2]], [[I]]
|
||||
; CHECK-NEXT: [[I4:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[ARG]], i64 2
|
||||
; CHECK-NEXT: store i32 [[I3]], ptr addrspace(1) [[I4]], align 4
|
||||
@ -55,17 +54,17 @@ bb:
|
||||
define amdgpu_kernel void @memory_phi_no_clobber(ptr addrspace(1) %arg, i1 %cond) {
|
||||
; CHECK-LABEL: @memory_phi_no_clobber(
|
||||
; CHECK-NEXT: bb:
|
||||
; CHECK-NEXT: [[I:%.*]] = load i32, ptr addrspace(1) [[ARG:%.*]], align 4, !amdgpu.noclobber !0
|
||||
; CHECK-NEXT: br i1 %cond, label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]], !amdgpu.uniform !0
|
||||
; CHECK-NEXT: [[I:%.*]] = load i32, ptr addrspace(1) [[ARG:%.*]], align 4, !amdgpu.noclobber [[META0]]
|
||||
; CHECK-NEXT: br i1 [[COND:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]], !amdgpu.uniform [[META0]]
|
||||
; CHECK: if.then:
|
||||
; CHECK-NEXT: tail call void @llvm.amdgcn.s.barrier()
|
||||
; CHECK-NEXT: br label [[IF_END:%.*]], !amdgpu.uniform !0
|
||||
; CHECK-NEXT: br label [[IF_END:%.*]]
|
||||
; CHECK: if.else:
|
||||
; CHECK-NEXT: fence syncscope("workgroup") release
|
||||
; CHECK-NEXT: br label [[IF_END]], !amdgpu.uniform !0
|
||||
; CHECK-NEXT: br label [[IF_END]]
|
||||
; CHECK: if.end:
|
||||
; CHECK-NEXT: [[I1:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[ARG]], i64 1, !amdgpu.uniform !0
|
||||
; CHECK-NEXT: [[I2:%.*]] = load i32, ptr addrspace(1) [[I1]], align 4, !amdgpu.noclobber !0
|
||||
; CHECK-NEXT: [[I1:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[ARG]], i64 1, !amdgpu.uniform [[META0]]
|
||||
; CHECK-NEXT: [[I2:%.*]] = load i32, ptr addrspace(1) [[I1]], align 4, !amdgpu.noclobber [[META0]]
|
||||
; CHECK-NEXT: [[I3:%.*]] = add i32 [[I2]], [[I]]
|
||||
; CHECK-NEXT: [[I4:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[ARG]], i64 2
|
||||
; CHECK-NEXT: store i32 [[I3]], ptr addrspace(1) [[I4]], align 4
|
||||
@ -121,17 +120,17 @@ if.end:
|
||||
define amdgpu_kernel void @memory_phi_clobber1(ptr addrspace(1) %arg, i1 %cond) {
|
||||
; CHECK-LABEL: @memory_phi_clobber1(
|
||||
; CHECK-NEXT: bb:
|
||||
; CHECK-NEXT: [[I:%.*]] = load i32, ptr addrspace(1) [[ARG:%.*]], align 4, !amdgpu.noclobber !0
|
||||
; CHECK-NEXT: br i1 %cond, label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]], !amdgpu.uniform !0
|
||||
; CHECK-NEXT: [[I:%.*]] = load i32, ptr addrspace(1) [[ARG:%.*]], align 4, !amdgpu.noclobber [[META0]]
|
||||
; CHECK-NEXT: br i1 [[COND:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]], !amdgpu.uniform [[META0]]
|
||||
; CHECK: if.then:
|
||||
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[ARG]], i64 3
|
||||
; CHECK-NEXT: store i32 1, ptr addrspace(1) [[GEP]], align 4
|
||||
; CHECK-NEXT: br label [[IF_END:%.*]], !amdgpu.uniform !0
|
||||
; CHECK-NEXT: br label [[IF_END:%.*]]
|
||||
; CHECK: if.else:
|
||||
; CHECK-NEXT: tail call void @llvm.amdgcn.s.barrier()
|
||||
; CHECK-NEXT: br label [[IF_END]], !amdgpu.uniform !0
|
||||
; CHECK-NEXT: br label [[IF_END]]
|
||||
; CHECK: if.end:
|
||||
; CHECK-NEXT: [[I1:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[ARG]], i64 1, !amdgpu.uniform !0
|
||||
; CHECK-NEXT: [[I1:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[ARG]], i64 1, !amdgpu.uniform [[META0]]
|
||||
; CHECK-NEXT: [[I2:%.*]] = load i32, ptr addrspace(1) [[I1]], align 4
|
||||
; CHECK-NEXT: [[I3:%.*]] = add i32 [[I2]], [[I]]
|
||||
; CHECK-NEXT: [[I4:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[ARG]], i64 2
|
||||
@ -190,17 +189,17 @@ if.end:
|
||||
define amdgpu_kernel void @memory_phi_clobber2(ptr addrspace(1) %arg, i1 %cond) {
|
||||
; CHECK-LABEL: @memory_phi_clobber2(
|
||||
; CHECK-NEXT: bb:
|
||||
; CHECK-NEXT: [[I:%.*]] = load i32, ptr addrspace(1) [[ARG:%.*]], align 4, !amdgpu.noclobber !0
|
||||
; CHECK-NEXT: br i1 %cond, label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]], !amdgpu.uniform !0
|
||||
; CHECK-NEXT: [[I:%.*]] = load i32, ptr addrspace(1) [[ARG:%.*]], align 4, !amdgpu.noclobber [[META0]]
|
||||
; CHECK-NEXT: br i1 [[COND:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]], !amdgpu.uniform [[META0]]
|
||||
; CHECK: if.then:
|
||||
; CHECK-NEXT: tail call void @llvm.amdgcn.s.barrier()
|
||||
; CHECK-NEXT: br label [[IF_END:%.*]], !amdgpu.uniform !0
|
||||
; CHECK-NEXT: br label [[IF_END:%.*]]
|
||||
; CHECK: if.else:
|
||||
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[ARG]], i64 3
|
||||
; CHECK-NEXT: store i32 1, ptr addrspace(1) [[GEP]], align 4
|
||||
; CHECK-NEXT: br label [[IF_END]], !amdgpu.uniform !0
|
||||
; CHECK-NEXT: br label [[IF_END]]
|
||||
; CHECK: if.end:
|
||||
; CHECK-NEXT: [[I1:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[ARG]], i64 1, !amdgpu.uniform !0
|
||||
; CHECK-NEXT: [[I1:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[ARG]], i64 1, !amdgpu.uniform [[META0]]
|
||||
; CHECK-NEXT: [[I2:%.*]] = load i32, ptr addrspace(1) [[I1]], align 4
|
||||
; CHECK-NEXT: [[I3:%.*]] = add i32 [[I2]], [[I]]
|
||||
; CHECK-NEXT: [[I4:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[ARG]], i64 2
|
||||
@ -259,16 +258,16 @@ if.end:
|
||||
define amdgpu_kernel void @no_clobbering_loop1(ptr addrspace(1) %arg, i1 %cc) {
|
||||
; CHECK-LABEL: @no_clobbering_loop1(
|
||||
; CHECK-NEXT: bb:
|
||||
; CHECK-NEXT: [[I:%.*]] = load i32, ptr addrspace(1) [[ARG:%.*]], align 4, !amdgpu.noclobber !0
|
||||
; CHECK-NEXT: br label [[WHILE_COND:%.*]], !amdgpu.uniform !0
|
||||
; CHECK-NEXT: [[I:%.*]] = load i32, ptr addrspace(1) [[ARG:%.*]], align 4, !amdgpu.noclobber [[META0]]
|
||||
; CHECK-NEXT: br label [[WHILE_COND:%.*]]
|
||||
; CHECK: while.cond:
|
||||
; CHECK-NEXT: [[I1:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[ARG]], i64 1, !amdgpu.uniform !0
|
||||
; CHECK-NEXT: [[I2:%.*]] = load i32, ptr addrspace(1) [[I1]], align 4, !amdgpu.noclobber !0
|
||||
; CHECK-NEXT: [[I1:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[ARG]], i64 1, !amdgpu.uniform [[META0]]
|
||||
; CHECK-NEXT: [[I2:%.*]] = load i32, ptr addrspace(1) [[I1]], align 4, !amdgpu.noclobber [[META0]]
|
||||
; CHECK-NEXT: [[I3:%.*]] = add i32 [[I2]], [[I]]
|
||||
; CHECK-NEXT: [[I4:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[ARG]], i64 2
|
||||
; CHECK-NEXT: store i32 [[I3]], ptr addrspace(1) [[I4]], align 4
|
||||
; CHECK-NEXT: tail call void @llvm.amdgcn.wave.barrier()
|
||||
; CHECK-NEXT: br i1 [[CC:%.*]], label [[WHILE_COND]], label [[END:%.*]], !amdgpu.uniform !0
|
||||
; CHECK-NEXT: br i1 [[CC:%.*]], label [[WHILE_COND]], label [[END:%.*]], !amdgpu.uniform [[META0]]
|
||||
; CHECK: end:
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
@ -316,18 +315,18 @@ end:
|
||||
define amdgpu_kernel void @no_clobbering_loop2(ptr addrspace(1) noalias %arg, ptr addrspace(1) noalias %out, i32 %n) {
|
||||
; CHECK-LABEL: @no_clobbering_loop2(
|
||||
; CHECK-NEXT: bb:
|
||||
; CHECK-NEXT: [[I:%.*]] = load i32, ptr addrspace(1) [[ARG:%.*]], align 4, !amdgpu.noclobber !0
|
||||
; CHECK-NEXT: br label [[WHILE_COND:%.*]], !amdgpu.uniform !0
|
||||
; CHECK-NEXT: [[I:%.*]] = load i32, ptr addrspace(1) [[ARG:%.*]], align 4, !amdgpu.noclobber [[META0]]
|
||||
; CHECK-NEXT: br label [[WHILE_COND:%.*]]
|
||||
; CHECK: while.cond:
|
||||
; CHECK-NEXT: [[C:%.*]] = phi i32 [ 0, [[BB:%.*]] ], [ [[INC:%.*]], [[WHILE_COND]] ]
|
||||
; CHECK-NEXT: [[ACC:%.*]] = phi i32 [ [[I]], [[BB]] ], [ [[I3:%.*]], [[WHILE_COND]] ]
|
||||
; CHECK-NEXT: [[I1:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[ARG]], i32 [[C]], !amdgpu.uniform !0
|
||||
; CHECK-NEXT: [[I2:%.*]] = load i32, ptr addrspace(1) [[I1]], align 4, !amdgpu.noclobber !0
|
||||
; CHECK-NEXT: [[I1:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[ARG]], i32 [[C]], !amdgpu.uniform [[META0]]
|
||||
; CHECK-NEXT: [[I2:%.*]] = load i32, ptr addrspace(1) [[I1]], align 4, !amdgpu.noclobber [[META0]]
|
||||
; CHECK-NEXT: [[I3]] = add i32 [[I2]], [[ACC]]
|
||||
; CHECK-NEXT: tail call void @llvm.amdgcn.wave.barrier()
|
||||
; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[C]], 1
|
||||
; CHECK-NEXT: [[CC:%.*]] = icmp eq i32 [[INC]], [[N:%.*]]
|
||||
; CHECK-NEXT: br i1 [[CC]], label [[WHILE_COND]], label [[END:%.*]], !amdgpu.uniform !0
|
||||
; CHECK-NEXT: br i1 [[CC]], label [[WHILE_COND]], label [[END:%.*]], !amdgpu.uniform [[META0]]
|
||||
; CHECK: end:
|
||||
; CHECK-NEXT: store i32 [[I3]], ptr addrspace(1) [[OUT:%.*]], align 4
|
||||
; CHECK-NEXT: ret void
|
||||
@ -377,16 +376,16 @@ end:
|
||||
define amdgpu_kernel void @clobbering_loop(ptr addrspace(1) %arg, ptr addrspace(1) %out, i1 %cc) {
|
||||
; CHECK-LABEL: @clobbering_loop(
|
||||
; CHECK-NEXT: bb:
|
||||
; CHECK-NEXT: [[I:%.*]] = load i32, ptr addrspace(1) [[ARG:%.*]], align 4, !amdgpu.noclobber !0
|
||||
; CHECK-NEXT: br label [[WHILE_COND:%.*]], !amdgpu.uniform !0
|
||||
; CHECK-NEXT: [[I:%.*]] = load i32, ptr addrspace(1) [[ARG:%.*]], align 4, !amdgpu.noclobber [[META0]]
|
||||
; CHECK-NEXT: br label [[WHILE_COND:%.*]]
|
||||
; CHECK: while.cond:
|
||||
; CHECK-NEXT: [[I1:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[ARG]], i64 1, !amdgpu.uniform !0
|
||||
; CHECK-NEXT: [[I1:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[ARG]], i64 1, !amdgpu.uniform [[META0]]
|
||||
; CHECK-NEXT: [[I2:%.*]] = load i32, ptr addrspace(1) [[I1]], align 4
|
||||
; CHECK-NEXT: [[I3:%.*]] = add i32 [[I2]], [[I]]
|
||||
; CHECK-NEXT: [[I4:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[OUT:%.*]], i64 1
|
||||
; CHECK-NEXT: store i32 [[I3]], ptr addrspace(1) [[I4]], align 4
|
||||
; CHECK-NEXT: tail call void @llvm.amdgcn.wave.barrier()
|
||||
; CHECK-NEXT: br i1 [[CC:%.*]], label [[WHILE_COND]], label [[END:%.*]], !amdgpu.uniform !0
|
||||
; CHECK-NEXT: br i1 [[CC:%.*]], label [[WHILE_COND]], label [[END:%.*]], !amdgpu.uniform [[META0]]
|
||||
; CHECK: end:
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
@ -433,10 +432,10 @@ end:
|
||||
define amdgpu_kernel void @clobber_by_atomic_load(ptr addrspace(1) %arg) {
|
||||
; CHECK-LABEL: @clobber_by_atomic_load(
|
||||
; CHECK-NEXT: bb:
|
||||
; CHECK-NEXT: [[I:%.*]] = load i32, ptr addrspace(1) [[ARG:%.*]], align 4, !amdgpu.noclobber !0
|
||||
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[ARG]], i64 2, !amdgpu.uniform !0
|
||||
; CHECK-NEXT: [[VAL:%.*]] = load atomic i32, ptr addrspace(1) [[GEP]] seq_cst, align 4, !amdgpu.noclobber !0
|
||||
; CHECK-NEXT: [[I1:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[ARG]], i64 3, !amdgpu.uniform !0
|
||||
; CHECK-NEXT: [[I:%.*]] = load i32, ptr addrspace(1) [[ARG:%.*]], align 4, !amdgpu.noclobber [[META0]]
|
||||
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[ARG]], i64 2, !amdgpu.uniform [[META0]]
|
||||
; CHECK-NEXT: [[VAL:%.*]] = load atomic i32, ptr addrspace(1) [[GEP]] seq_cst, align 4, !amdgpu.noclobber [[META0]]
|
||||
; CHECK-NEXT: [[I1:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[ARG]], i64 3, !amdgpu.uniform [[META0]]
|
||||
; CHECK-NEXT: [[I2:%.*]] = load i32, ptr addrspace(1) [[I1]], align 4
|
||||
; CHECK-NEXT: [[I3:%.*]] = add i32 [[I2]], [[I]]
|
||||
; CHECK-NEXT: [[I4:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[ARG]], i64 4
|
||||
@ -477,7 +476,7 @@ define protected amdgpu_kernel void @no_alias_store(ptr addrspace(1) %in, ptr ad
|
||||
; CHECK-NEXT: fence syncscope("workgroup") release
|
||||
; CHECK-NEXT: tail call void @llvm.amdgcn.s.barrier()
|
||||
; CHECK-NEXT: fence syncscope("workgroup") acquire
|
||||
; CHECK-NEXT: [[LD:%.*]] = load i32, ptr addrspace(1) [[IN:%.*]], align 4, !amdgpu.noclobber !0
|
||||
; CHECK-NEXT: [[LD:%.*]] = load i32, ptr addrspace(1) [[IN:%.*]], align 4, !amdgpu.noclobber [[META0]]
|
||||
; CHECK-NEXT: store i32 [[LD]], ptr addrspace(1) [[OUT:%.*]], align 4
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
@ -543,7 +542,7 @@ define protected amdgpu_kernel void @no_alias_volatile_store(ptr addrspace(1) %i
|
||||
; CHECK-NEXT: fence syncscope("workgroup") release
|
||||
; CHECK-NEXT: tail call void @llvm.amdgcn.s.barrier()
|
||||
; CHECK-NEXT: fence syncscope("workgroup") acquire
|
||||
; CHECK-NEXT: [[LD:%.*]] = load i32, ptr addrspace(1) [[IN:%.*]], align 4, !amdgpu.noclobber !0
|
||||
; CHECK-NEXT: [[LD:%.*]] = load i32, ptr addrspace(1) [[IN:%.*]], align 4, !amdgpu.noclobber [[META0]]
|
||||
; CHECK-NEXT: store i32 [[LD]], ptr addrspace(1) [[OUT:%.*]], align 4
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
@ -573,7 +572,7 @@ define protected amdgpu_kernel void @no_alias_atomic_rmw_relaxed(ptr addrspace(1
|
||||
; CHECK-LABEL: @no_alias_atomic_rmw_relaxed(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[UNUSED:%.*]] = atomicrmw add ptr addrspace(3) @LDS, i32 5 monotonic, align 4
|
||||
; CHECK-NEXT: [[LD:%.*]] = load i32, ptr addrspace(1) [[IN:%.*]], align 4, !amdgpu.noclobber !0
|
||||
; CHECK-NEXT: [[LD:%.*]] = load i32, ptr addrspace(1) [[IN:%.*]], align 4, !amdgpu.noclobber [[META0]]
|
||||
; CHECK-NEXT: store i32 [[LD]], ptr addrspace(1) [[OUT:%.*]], align 4
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
@ -603,7 +602,7 @@ define protected amdgpu_kernel void @no_alias_atomic_cmpxchg(ptr addrspace(1) %i
|
||||
; CHECK-NEXT: fence syncscope("workgroup") release
|
||||
; CHECK-NEXT: tail call void @llvm.amdgcn.s.barrier()
|
||||
; CHECK-NEXT: fence syncscope("workgroup") acquire
|
||||
; CHECK-NEXT: [[LD:%.*]] = load i32, ptr addrspace(1) [[IN:%.*]], align 4, !amdgpu.noclobber !0
|
||||
; CHECK-NEXT: [[LD:%.*]] = load i32, ptr addrspace(1) [[IN:%.*]], align 4, !amdgpu.noclobber [[META0]]
|
||||
; CHECK-NEXT: store i32 [[LD]], ptr addrspace(1) [[OUT:%.*]], align 4
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
@ -640,7 +639,7 @@ define protected amdgpu_kernel void @no_alias_atomic_rmw(ptr addrspace(1) %in, p
|
||||
; CHECK-NEXT: fence syncscope("workgroup") release
|
||||
; CHECK-NEXT: tail call void @llvm.amdgcn.s.barrier()
|
||||
; CHECK-NEXT: fence syncscope("workgroup") acquire
|
||||
; CHECK-NEXT: [[LD:%.*]] = load i32, ptr addrspace(1) [[IN:%.*]], align 4, !amdgpu.noclobber !0
|
||||
; CHECK-NEXT: [[LD:%.*]] = load i32, ptr addrspace(1) [[IN:%.*]], align 4, !amdgpu.noclobber [[META0]]
|
||||
; CHECK-NEXT: store i32 [[LD]], ptr addrspace(1) [[OUT:%.*]], align 4
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
@ -791,7 +790,7 @@ define protected amdgpu_kernel void @no_alias_atomic_rmw_then_no_alias_store(ptr
|
||||
; CHECK-NEXT: fence syncscope("workgroup") release
|
||||
; CHECK-NEXT: tail call void @llvm.amdgcn.s.barrier()
|
||||
; CHECK-NEXT: fence syncscope("workgroup") acquire
|
||||
; CHECK-NEXT: [[LD:%.*]] = load i32, ptr addrspace(1) [[IN:%.*]], align 4, !amdgpu.noclobber !0
|
||||
; CHECK-NEXT: [[LD:%.*]] = load i32, ptr addrspace(1) [[IN:%.*]], align 4, !amdgpu.noclobber [[META0]]
|
||||
; CHECK-NEXT: store i32 [[LD]], ptr addrspace(1) [[OUT:%.*]], align 4
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user