[CodeGen] Drop uses of BranchInst (#186391)

Largely a straight-forward replacement with occasional simplifcations.

For AMDGPU, I assumed that unconditional branches are always uniform and
therefore "simplified"/changed AMDGPUAnnotateUniformValues to only
annotate conditional branches.

Target-specific FastISel only selects conditional branches,
unconditional branches are already handled by the non-target-specific
code.
This commit is contained in:
Alexis Engelke 2026-03-13 22:51:38 +01:00 committed by GitHub
parent 8885c37028
commit 01571f1b4a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
35 changed files with 168 additions and 214 deletions

View File

@ -854,8 +854,7 @@ bool CodeGenPrepare::eliminateFallThrough(Function &F, DominatorTree *DT) {
if (DT && !DT->isReachableFromEntry(BB))
continue;
BranchInst *Term = dyn_cast<BranchInst>(SinglePred->getTerminator());
if (Term && !Term->isConditional()) {
if (isa<UncondBrInst>(SinglePred->getTerminator())) {
Changed = true;
LLVM_DEBUG(dbgs() << "To merge:\n" << *BB << "\n\n\n");
@ -885,8 +884,8 @@ bool CodeGenPrepare::eliminateFallThrough(Function &F, DominatorTree *DT) {
/// Find a destination block from BB if BB is mergeable empty block.
BasicBlock *CodeGenPrepare::findDestBlockOfMergeableEmptyBlock(BasicBlock *BB) {
// If this block doesn't end with an uncond branch, ignore it.
BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator());
if (!BI || !BI->isUnconditional())
UncondBrInst *BI = dyn_cast<UncondBrInst>(BB->getTerminator());
if (!BI)
return nullptr;
// If the instruction before the branch (skipping debug info) isn't a phi
@ -899,7 +898,7 @@ BasicBlock *CodeGenPrepare::findDestBlockOfMergeableEmptyBlock(BasicBlock *BB) {
}
// Do not break infinite loops.
BasicBlock *DestBB = BI->getSuccessor(0);
BasicBlock *DestBB = BI->getSuccessor();
if (DestBB == BB)
return nullptr;
@ -1121,8 +1120,8 @@ static void replaceAllUsesWith(Value *Old, Value *New,
/// Eliminate a basic block that has only phi's and an unconditional branch in
/// it.
void CodeGenPrepare::eliminateMostlyEmptyBlock(BasicBlock *BB) {
BranchInst *BI = cast<BranchInst>(BB->getTerminator());
BasicBlock *DestBB = BI->getSuccessor(0);
UncondBrInst *BI = cast<UncondBrInst>(BB->getTerminator());
BasicBlock *DestBB = BI->getSuccessor();
LLVM_DEBUG(dbgs() << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n"
<< *BB << *DestBB);
@ -1939,10 +1938,10 @@ static bool foldICmpWithDominatingICmp(CmpInst *Cmp,
if (Pred != ICmpInst::ICMP_EQ)
return false;
// If icmp eq has users other than BranchInst and SelectInst, converting it to
// If icmp eq has users other than CondBrInst and SelectInst, converting it to
// icmp slt/sgt would introduce more redundant LLVM IR.
for (User *U : Cmp->users()) {
if (isa<BranchInst>(U))
if (isa<CondBrInst>(U))
continue;
if (isa<SelectInst>(U) && cast<SelectInst>(U)->getCondition() == Cmp)
continue;
@ -1981,8 +1980,7 @@ static bool foldICmpWithDominatingICmp(CmpInst *Cmp,
// Res = (a < b) ? <LT_RES> : (a > b) ? <GT_RES> : <EQ_RES>;
// And similarly for branches.
for (User *U : Cmp->users()) {
if (auto *BI = dyn_cast<BranchInst>(U)) {
assert(BI->isConditional() && "Must be conditional");
if (auto *BI = dyn_cast<CondBrInst>(U)) {
BI->swapSuccessors();
continue;
}
@ -3087,8 +3085,8 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB,
for (auto const &TailCallBB : TailCallBBs) {
// Make sure the call instruction is followed by an unconditional branch to
// the return block.
BranchInst *BI = dyn_cast<BranchInst>(TailCallBB->getTerminator());
if (!BI || !BI->isUnconditional() || BI->getSuccessor(0) != BB)
UncondBrInst *BI = dyn_cast<UncondBrInst>(TailCallBB->getTerminator());
if (!BI || BI->getSuccessor() != BB)
continue;
// Duplicate the return into TailCallBB.
@ -7793,28 +7791,28 @@ bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
BasicBlock *TrueBlock = nullptr;
BasicBlock *FalseBlock = nullptr;
BasicBlock *EndBlock = nullptr;
BranchInst *TrueBranch = nullptr;
BranchInst *FalseBranch = nullptr;
UncondBrInst *TrueBranch = nullptr;
UncondBrInst *FalseBranch = nullptr;
if (TrueInstrs.size() == 0) {
FalseBranch = cast<BranchInst>(SplitBlockAndInsertIfElse(
FalseBranch = cast<UncondBrInst>(SplitBlockAndInsertIfElse(
CondFr, SplitPt, false, nullptr, nullptr, LI));
FalseBlock = FalseBranch->getParent();
EndBlock = cast<BasicBlock>(FalseBranch->getOperand(0));
} else if (FalseInstrs.size() == 0) {
TrueBranch = cast<BranchInst>(SplitBlockAndInsertIfThen(
TrueBranch = cast<UncondBrInst>(SplitBlockAndInsertIfThen(
CondFr, SplitPt, false, nullptr, nullptr, LI));
TrueBlock = TrueBranch->getParent();
EndBlock = cast<BasicBlock>(TrueBranch->getOperand(0));
EndBlock = TrueBranch->getSuccessor();
} else {
Instruction *ThenTerm = nullptr;
Instruction *ElseTerm = nullptr;
SplitBlockAndInsertIfThenElse(CondFr, SplitPt, &ThenTerm, &ElseTerm,
nullptr, nullptr, LI);
TrueBranch = cast<BranchInst>(ThenTerm);
FalseBranch = cast<BranchInst>(ElseTerm);
TrueBranch = cast<UncondBrInst>(ThenTerm);
FalseBranch = cast<UncondBrInst>(ElseTerm);
TrueBlock = TrueBranch->getParent();
FalseBlock = FalseBranch->getParent();
EndBlock = cast<BasicBlock>(TrueBranch->getOperand(0));
EndBlock = TrueBranch->getSuccessor();
}
EndBlock->setName("select.end");
@ -9304,7 +9302,7 @@ bool CodeGenPrepare::splitBranchCondition(Function &F, ModifyDT &ModifiedDT) {
m_Br(m_OneUse(m_Instruction(LogicOp)), TBB, FBB)))
continue;
auto *Br1 = cast<BranchInst>(BB.getTerminator());
auto *Br1 = cast<CondBrInst>(BB.getTerminator());
if (Br1->getMetadata(LLVMContext::MD_unpredictable))
continue;

View File

@ -1739,8 +1739,8 @@ bool ComplexDeinterleavingGraph::collectPotentialReductions(BasicBlock *B) {
if (Factor != 2)
return false;
auto *Br = dyn_cast<BranchInst>(B->getTerminator());
if (!Br || Br->getNumSuccessors() != 2)
auto *Br = dyn_cast<CondBrInst>(B->getTerminator());
if (!Br)
return false;
// Identify simple one-block loop
@ -2472,7 +2472,7 @@ void ComplexDeinterleavingGraph::processReductionOperation(
auto *FinalReductionReal = ReductionInfo[Real].second;
auto *FinalReductionImag = ReductionInfo[Imag].second;
auto *Br = cast<BranchInst>(BackEdge->getTerminator());
auto *Br = cast<CondBrInst>(BackEdge->getTerminator());
BasicBlock *ExitBB = Br->getSuccessor(Br->getSuccessor(0) == BackEdge);
Builder.SetInsertPoint(&*ExitBB->getFirstInsertionPt());

View File

@ -278,7 +278,7 @@ bool DwarfEHPrepare::InsertUnwindResumeCalls() {
// that feeds the _Unwind_Resume call.
for (ResumeInst *RI : Resumes) {
BasicBlock *Parent = RI->getParent();
BranchInst::Create(UnwindBB, Parent);
UncondBrInst::Create(UnwindBB, Parent);
Updates.push_back({DominatorTree::Insert, Parent, UnwindBB});
Value *ExnObj = GetExceptionObject(RI);

View File

@ -390,17 +390,14 @@ void MemCmpExpansion::emitLoadCompareByteBlock(unsigned BlockIndex,
// next LoadCmpBlock,
Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_NE, Diff,
ConstantInt::get(Diff->getType(), 0));
BranchInst *CmpBr =
BranchInst::Create(EndBlock, LoadCmpBlocks[BlockIndex + 1], Cmp);
Builder.Insert(CmpBr);
Builder.CreateCondBr(Cmp, EndBlock, LoadCmpBlocks[BlockIndex + 1]);
if (DTU)
DTU->applyUpdates(
{{DominatorTree::Insert, BB, EndBlock},
{DominatorTree::Insert, BB, LoadCmpBlocks[BlockIndex + 1]}});
} else {
// The last block has an unconditional branch to EndBlock.
BranchInst *CmpBr = BranchInst::Create(EndBlock);
Builder.Insert(CmpBr);
Builder.CreateBr(EndBlock);
if (DTU)
DTU->applyUpdates({{DominatorTree::Insert, BB, EndBlock}});
}
@ -488,10 +485,9 @@ void MemCmpExpansion::emitLoadCompareBlockMultipleLoads(unsigned BlockIndex,
// Early exit branch if difference found to ResultBlock. Otherwise,
// continue to next LoadCmpBlock or EndBlock.
BasicBlock *BB = Builder.GetInsertBlock();
BranchInst *CmpBr = BranchInst::Create(ResBlock.BB, NextBB, Cmp);
CondBrInst *CmpBr = Builder.CreateCondBr(Cmp, ResBlock.BB, NextBB);
setExplicitlyUnknownBranchWeightsIfProfiled(*CmpBr, DEBUG_TYPE,
CI->getFunction());
Builder.Insert(CmpBr);
if (DTU)
DTU->applyUpdates({{DominatorTree::Insert, BB, ResBlock.BB},
{DominatorTree::Insert, BB, NextBB}});
@ -554,10 +550,9 @@ void MemCmpExpansion::emitLoadCompareBlock(unsigned BlockIndex) {
// Early exit branch if difference found to ResultBlock. Otherwise, continue
// to next LoadCmpBlock or EndBlock.
BasicBlock *BB = Builder.GetInsertBlock();
BranchInst *CmpBr = BranchInst::Create(NextBB, ResBlock.BB, Cmp);
CondBrInst *CmpBr = Builder.CreateCondBr(Cmp, NextBB, ResBlock.BB);
setExplicitlyUnknownBranchWeightsIfProfiled(*CmpBr, DEBUG_TYPE,
CI->getFunction());
Builder.Insert(CmpBr);
if (DTU)
DTU->applyUpdates({{DominatorTree::Insert, BB, NextBB},
{DominatorTree::Insert, BB, ResBlock.BB}});
@ -582,8 +577,7 @@ void MemCmpExpansion::emitMemCmpResultBlock() {
Builder.SetInsertPoint(ResBlock.BB, InsertPt);
Value *Res = ConstantInt::get(Type::getInt32Ty(CI->getContext()), 1);
PhiRes->addIncoming(Res, ResBlock.BB);
BranchInst *NewBr = BranchInst::Create(EndBlock);
Builder.Insert(NewBr);
Builder.CreateBr(EndBlock);
if (DTU)
DTU->applyUpdates({{DominatorTree::Insert, ResBlock.BB, EndBlock}});
return;
@ -601,8 +595,7 @@ void MemCmpExpansion::emitMemCmpResultBlock() {
DEBUG_TYPE, CI->getFunction());
PhiRes->addIncoming(Res, ResBlock.BB);
BranchInst *NewBr = BranchInst::Create(EndBlock);
Builder.Insert(NewBr);
Builder.CreateBr(EndBlock);
if (DTU)
DTU->applyUpdates({{DominatorTree::Insert, ResBlock.BB, EndBlock}});
}

View File

@ -391,11 +391,8 @@ static bool CanGenerateTest(Loop *L, Value *Count) {
return false;
BasicBlock *Pred = Preheader->getSinglePredecessor();
if (!isa<BranchInst>(Pred->getTerminator()))
return false;
auto *BI = cast<BranchInst>(Pred->getTerminator());
if (BI->isUnconditional() || !isa<ICmpInst>(BI->getCondition()))
auto *BI = dyn_cast<CondBrInst>(Pred->getTerminator());
if (!BI || !isa<ICmpInst>(BI->getCondition()))
return false;
// Check that the icmp is checking for equality of Count and zero and that
@ -453,7 +450,7 @@ Value *HardwareLoop::InitLoopCount() {
BasicBlock *BB = L->getLoopPreheader();
if (UseLoopGuard && BB->getSinglePredecessor() &&
cast<BranchInst>(BB->getTerminator())->isUnconditional()) {
isa<UncondBrInst>(BB->getTerminator())) {
BasicBlock *Predecessor = BB->getSinglePredecessor();
// If it's not safe to create a while loop then don't force it and create a
// do-while loop instead
@ -503,13 +500,9 @@ Value* HardwareLoop::InsertIterationSetup(Value *LoopCountInit) {
// Use the return value of the intrinsic to control the entry of the loop.
if (UseLoopGuard) {
assert((isa<BranchInst>(BeginBB->getTerminator()) &&
cast<BranchInst>(BeginBB->getTerminator())->isConditional()) &&
"Expected conditional branch");
Value *SetCount =
UsePhi ? Builder.CreateExtractValue(LoopSetup, 1) : LoopSetup;
auto *LoopGuard = cast<BranchInst>(BeginBB->getTerminator());
auto *LoopGuard = cast<CondBrInst>(BeginBB->getTerminator());
LoopGuard->setCondition(SetCount);
if (LoopGuard->getSuccessor(0) != L->getLoopPreheader())
LoopGuard->swapSuccessors();

View File

@ -241,7 +241,7 @@ bool runImpl(Function &F, const TargetLowering *TLI, DomTreeUpdater *DTU) {
Updates.reserve(IndirectBrs.size() + 2 * IndirectBrSuccs.size());
for (auto *IBr : IndirectBrs) {
SwitchPN->addIncoming(GetSwitchValue(IBr), IBr->getParent());
BranchInst::Create(SwitchBB, IBr->getIterator());
UncondBrInst::Create(SwitchBB, IBr->getIterator());
if (DTU) {
Updates.push_back({DominatorTree::Insert, IBr->getParent(), SwitchBB});
for (BasicBlock *SuccBB : IBr->successors())

View File

@ -624,11 +624,11 @@ static bool expandCondLoop(Function &Intr) {
for (User *U : llvm::make_early_inc_range(Intr.users())) {
auto *Call = cast<CallInst>(U);
auto *Br = cast<BranchInst>(
auto *Br = cast<UncondBrInst>(
SplitBlockAndInsertIfThen(Call->getArgOperand(0), Call, false,
getExplicitlyUnknownBranchWeightsIfProfiled(
*Call->getFunction(), DEBUG_TYPE)));
Br->setSuccessor(0, Br->getParent());
Br->setSuccessor(Br->getParent());
Call->eraseFromParent();
}
return true;
@ -641,8 +641,8 @@ static bool expandLoopTrap(Function &Intr) {
std::all_of(Call->getParent()->begin(), BasicBlock::iterator(Call),
[](Instruction &I) { return !I.mayHaveSideEffects(); })) {
for (auto *BB : predecessors(Call->getParent())) {
auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
if (!BI || BI->isUnconditional())
auto *BI = dyn_cast<CondBrInst>(BB->getTerminator());
if (!BI)
continue;
IRBuilder<> B(BI);
Value *Cond;

View File

@ -659,7 +659,7 @@ void SelectOptimizeImpl::convertProfitableSIGroups(SelectGroups &ProfSIGroups) {
// These are the new basic blocks for the conditional branch.
// At least one will become an actual new basic block.
BasicBlock *TrueBlock = nullptr, *FalseBlock = nullptr;
BranchInst *TrueBranch = nullptr, *FalseBranch = nullptr;
UncondBrInst *TrueBranch = nullptr, *FalseBranch = nullptr;
// Checks if select-like instruction would materialise on the given branch
auto HasSelectLike = [](SelectGroup &SG, bool IsTrue) {
for (auto &SL : SG.Selects) {
@ -671,7 +671,7 @@ void SelectOptimizeImpl::convertProfitableSIGroups(SelectGroups &ProfSIGroups) {
if (!TrueSlicesInterleaved.empty() || HasSelectLike(ASI, true)) {
TrueBlock = BasicBlock::Create(EndBlock->getContext(), "select.true.sink",
EndBlock->getParent(), EndBlock);
TrueBranch = BranchInst::Create(EndBlock, TrueBlock);
TrueBranch = UncondBrInst::Create(EndBlock, TrueBlock);
TrueBranch->setDebugLoc(LastSI.getI()->getDebugLoc());
for (Instruction *TrueInst : TrueSlicesInterleaved)
TrueInst->moveBefore(TrueBranch->getIterator());
@ -680,7 +680,7 @@ void SelectOptimizeImpl::convertProfitableSIGroups(SelectGroups &ProfSIGroups) {
FalseBlock =
BasicBlock::Create(EndBlock->getContext(), "select.false.sink",
EndBlock->getParent(), EndBlock);
FalseBranch = BranchInst::Create(EndBlock, FalseBlock);
FalseBranch = UncondBrInst::Create(EndBlock, FalseBlock);
FalseBranch->setDebugLoc(LastSI.getI()->getDebugLoc());
for (Instruction *FalseInst : FalseSlicesInterleaved)
FalseInst->moveBefore(FalseBranch->getIterator());
@ -693,7 +693,7 @@ void SelectOptimizeImpl::convertProfitableSIGroups(SelectGroups &ProfSIGroups) {
FalseBlock = BasicBlock::Create(StartBlock->getContext(), "select.false",
EndBlock->getParent(), EndBlock);
auto *FalseBranch = BranchInst::Create(EndBlock, FalseBlock);
auto *FalseBranch = UncondBrInst::Create(EndBlock, FalseBlock);
FalseBranch->setDebugLoc(SI.getI()->getDebugLoc());
}

View File

@ -2529,15 +2529,9 @@ static bool collectInstructionDeps(
}
bool SelectionDAGBuilder::shouldKeepJumpConditionsTogether(
const FunctionLoweringInfo &FuncInfo, const BranchInst &I,
const FunctionLoweringInfo &FuncInfo, const CondBrInst &I,
Instruction::BinaryOps Opc, const Value *Lhs, const Value *Rhs,
TargetLoweringBase::CondMergingParams Params) const {
if (I.getNumSuccessors() != 2)
return false;
if (!I.isConditional())
return false;
if (Params.BaseCost < 0)
return false;

View File

@ -46,12 +46,12 @@ class AtomicCmpXchgInst;
class AtomicRMWInst;
class AssumptionCache;
class BasicBlock;
class BranchInst;
class CallInst;
class CallBrInst;
class CatchPadInst;
class CatchReturnInst;
class CatchSwitchInst;
class CondBrInst;
class CleanupPadInst;
class CleanupReturnInst;
class Constant;
@ -398,7 +398,7 @@ public:
}
bool shouldKeepJumpConditionsTogether(
const FunctionLoweringInfo &FuncInfo, const BranchInst &I,
const FunctionLoweringInfo &FuncInfo, const CondBrInst &I,
Instruction::BinaryOps Opc, const Value *Lhs, const Value *Rhs,
TargetLoweringBase::CondMergingParams Params) const;

View File

@ -385,7 +385,7 @@ bool SjLjEHPrepareImpl::setupEntryBlockAndCallSites(Function &F) {
if (Function *Callee = II->getCalledFunction())
if (Callee->getIntrinsicID() == Intrinsic::donothing) {
// Remove the NOP invoke.
BranchInst::Create(II->getNormalDest(), II->getIterator());
UncondBrInst::Create(II->getNormalDest(), II->getIterator());
II->eraseFromParent();
continue;
}

View File

@ -733,7 +733,7 @@ bool InsertStackProtectors(const TargetLowering &TLI,
/*Unreachable=*/false, Weights, DTU,
/*LI=*/nullptr, /*ThenBlock=*/FailBB);
auto *BI = cast<BranchInst>(Cmp->getParent()->getTerminator());
auto *BI = cast<CondBrInst>(Cmp->getParent()->getTerminator());
BasicBlock *NewBB = BI->getSuccessor(1);
NewBB->setName("SP_return");
NewBB->moveAfter(&BB);

View File

@ -1396,12 +1396,12 @@ void WinEHPrepareImpl::replaceUseWithLoad(
// br label %PHIBlock
// So move the terminators to each others' blocks and swap their
// successors.
BranchInst *Goto = cast<BranchInst>(IncomingBlock->getTerminator());
UncondBrInst *Goto = cast<UncondBrInst>(IncomingBlock->getTerminator());
Goto->removeFromParent();
CatchRet->removeFromParent();
CatchRet->insertInto(IncomingBlock, IncomingBlock->end());
Goto->insertInto(NewBlock, NewBlock->end());
Goto->setSuccessor(0, PHIBlock);
Goto->setSuccessor(PHIBlock);
CatchRet->setSuccessor(NewBlock);
// Update the color mapping for the newly split edge.
// Grab a reference to the ColorVector to be inserted before getting the

View File

@ -211,7 +211,7 @@ private:
bool WantResult = true);
// Emit functions.
bool emitCompareAndBranch(const BranchInst *BI);
bool emitCompareAndBranch(const CondBrInst *BI);
bool emitCmp(const Value *LHS, const Value *RHS, bool IsZExt);
bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt);
bool emitICmp_ri(MVT RetVT, Register LHSReg, uint64_t Imm);
@ -2251,7 +2251,7 @@ static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred) {
}
/// Try to emit a combined compare-and-branch instruction.
bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) {
bool AArch64FastISel::emitCompareAndBranch(const CondBrInst *BI) {
// Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions
// will not be produced, as they are conditional branch instructions that do
// not set flags.
@ -2377,12 +2377,7 @@ bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) {
}
bool AArch64FastISel::selectBranch(const Instruction *I) {
const BranchInst *BI = cast<BranchInst>(I);
if (BI->isUnconditional()) {
MachineBasicBlock *MSucc = FuncInfo.getMBB(BI->getSuccessor(0));
fastEmitBranch(MSucc, BI->getDebugLoc());
return true;
}
const CondBrInst *BI = cast<CondBrInst>(I);
MachineBasicBlock *TBB = FuncInfo.getMBB(BI->getSuccessor(0));
MachineBasicBlock *FBB = FuncInfo.getMBB(BI->getSuccessor(1));

View File

@ -5242,7 +5242,7 @@ static bool shouldUnrollMultiExitLoop(Loop *L, ScalarEvolution &SE,
return false;
if (any_of(Blocks, [](BasicBlock *BB) {
return !isa<BranchInst>(BB->getTerminator());
return !isa<UncondBrInst, CondBrInst>(BB->getTerminator());
}))
return false;
@ -5371,10 +5371,9 @@ getAppleRuntimeUnrollPreferences(Loop *L, ScalarEvolution &SE,
// Try to runtime-unroll loops with early-continues depending on loop-varying
// loads; this helps with branch-prediction for the early-continues.
auto *Term = dyn_cast<BranchInst>(Header->getTerminator());
auto *Term = dyn_cast<CondBrInst>(Header->getTerminator());
SmallVector<BasicBlock *> Preds(predecessors(Latch));
if (!Term || !Term->isConditional() || Preds.size() == 1 ||
!llvm::is_contained(Preds, Header) ||
if (!Term || Preds.size() == 1 || !llvm::is_contained(Preds, Header) ||
none_of(Preds, [L](BasicBlock *Pred) { return L->contains(Pred); }))
return;
@ -6530,8 +6529,7 @@ bool AArch64TTIImpl::shouldTreatInstructionLikeSelect(
// break point in the code - the end of a block with an unconditional
// terminator.
if (I->getOpcode() == Instruction::Or &&
isa<BranchInst>(I->getNextNode()) &&
cast<BranchInst>(I->getNextNode())->isUnconditional())
isa<UncondBrInst>(I->getNextNode()))
return true;
if (I->getOpcode() == Instruction::Add ||

View File

@ -51,7 +51,7 @@ public:
: UA(&UA), MSSA(&MSSA), AA(&AA),
isEntryFunc(AMDGPU::isEntryFunctionCC(F.getCallingConv())) {}
void visitBranchInst(BranchInst &I);
void visitCondBrInst(CondBrInst &I);
void visitLoadInst(LoadInst &I);
bool changed() const { return Changed; }
@ -59,7 +59,7 @@ public:
} // End anonymous namespace
void AMDGPUAnnotateUniformValues::visitBranchInst(BranchInst &I) {
void AMDGPUAnnotateUniformValues::visitCondBrInst(CondBrInst &I) {
if (UA->isUniform(&I))
setUniformMetadata(&I);
}

View File

@ -838,7 +838,7 @@ void AMDGPUAtomicOptimizerImpl::optimizeAtomic(Instruction &I,
//
// OriginalBB is known to have a branch as terminator because
// SplitBlockAndInsertIfThen will have inserted one.
BranchInst *Terminator = cast<BranchInst>(OriginalBB->getTerminator());
CondBrInst *Terminator = cast<CondBrInst>(OriginalBB->getTerminator());
B.SetInsertPoint(ComputeEnd);
Terminator->removeFromParent();
B.Insert(Terminator);

View File

@ -167,8 +167,8 @@ void AMDGPUTTIImpl::getUnrollingPreferences(
// if region and potentially even PHI itself, saving on both divergence
// and registers used for the PHI.
// Add a small bonus for each of such "if" statements.
if (const BranchInst *Br = dyn_cast<BranchInst>(&I)) {
if (UP.Threshold < MaxBoost && Br->isConditional()) {
if (const CondBrInst *Br = dyn_cast<CondBrInst>(&I)) {
if (UP.Threshold < MaxBoost) {
BasicBlock *Succ0 = Br->getSuccessor(0);
BasicBlock *Succ1 = Br->getSuccessor(1);
if ((L->contains(Succ0) && L->isLoopExiting(Succ0)) ||

View File

@ -164,7 +164,7 @@ BasicBlock *AMDGPUUnifyDivergentExitNodesImpl::unifyReturnBlockSet(
// Remove and delete the return inst.
BB->getTerminator()->eraseFromParent();
BranchInst::Create(NewRetBlock, BB);
UncondBrInst::Create(NewRetBlock, BB);
Updates.emplace_back(DominatorTree::Insert, BB, NewRetBlock);
}
@ -216,8 +216,8 @@ static void handleNBranch(Function &F, BasicBlock *BB, Instruction *BI,
// Create a branch that will always branch to the transition block and
// references DummyReturnBB.
BB->getTerminator()->eraseFromParent();
BranchInst::Create(TransitionBB, DummyReturnBB,
ConstantInt::getTrue(F.getContext()), BB);
CondBrInst::Create(ConstantInt::getTrue(F.getContext()), TransitionBB,
DummyReturnBB, BB);
Updates.emplace_back(DominatorTree::Insert, BB, DummyReturnBB);
}
@ -225,8 +225,8 @@ bool AMDGPUUnifyDivergentExitNodesImpl::run(Function &F, DominatorTree *DT,
const PostDominatorTree &PDT,
const UniformityInfo &UA) {
if (PDT.root_size() == 0 ||
(PDT.root_size() == 1 &&
!isa<BranchInst, CallBrInst>(PDT.getRoot()->getTerminator())))
(PDT.root_size() == 1 && !isa<UncondBrInst, CondBrInst, CallBrInst>(
PDT.getRoot()->getTerminator())))
return false;
// Loop over all of the blocks in a function, tracking all of the blocks that
@ -250,35 +250,32 @@ bool AMDGPUUnifyDivergentExitNodesImpl::run(Function &F, DominatorTree *DT,
PDT.roots(), [&](auto BB) { return !isUniformlyReached(UA, *BB); });
for (BasicBlock *BB : PDT.roots()) {
if (auto *RI = dyn_cast<ReturnInst>(BB->getTerminator())) {
Instruction *Term = BB->getTerminator();
if (auto *RI = dyn_cast<ReturnInst>(Term)) {
auto *CI = dyn_cast_or_null<CallInst>(RI->getPrevNode());
if (CI && CI->isMustTailCall())
continue;
if (HasDivergentExitBlock)
ReturningBlocks.push_back(BB);
} else if (isa<UnreachableInst>(BB->getTerminator())) {
} else if (isa<UnreachableInst>(Term)) {
if (HasDivergentExitBlock)
UnreachableBlocks.push_back(BB);
} else if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) {
} else if (UncondBrInst *BI = dyn_cast<UncondBrInst>(Term)) {
if (!DummyReturnBB)
DummyReturnBB = createDummyReturnBlock(F, ReturningBlocks);
if (BI->isUnconditional()) {
BasicBlock *LoopHeaderBB = BI->getSuccessor(0);
BI->eraseFromParent(); // Delete the unconditional branch.
// Add a new conditional branch with a dummy edge to the return block.
BranchInst::Create(LoopHeaderBB, DummyReturnBB,
ConstantInt::getTrue(F.getContext()), BB);
Updates.emplace_back(DominatorTree::Insert, BB, DummyReturnBB);
} else {
handleNBranch(F, BB, BI, DummyReturnBB, Updates);
}
BasicBlock *LoopHeaderBB = BI->getSuccessor();
BI->eraseFromParent(); // Delete the unconditional branch.
// Add a new conditional branch with a dummy edge to the return block.
CondBrInst::Create(ConstantInt::getTrue(F.getContext()), LoopHeaderBB,
DummyReturnBB, BB);
Updates.emplace_back(DominatorTree::Insert, BB, DummyReturnBB);
Changed = true;
} else if (CallBrInst *CBI = dyn_cast<CallBrInst>(BB->getTerminator())) {
} else if (isa<CondBrInst, CallBrInst>(Term)) {
if (!DummyReturnBB)
DummyReturnBB = createDummyReturnBlock(F, ReturningBlocks);
handleNBranch(F, BB, CBI, DummyReturnBB, Updates);
handleNBranch(F, BB, Term, DummyReturnBB, Updates);
Changed = true;
} else {
llvm_unreachable("unsupported block terminator");
@ -299,7 +296,7 @@ bool AMDGPUUnifyDivergentExitNodesImpl::run(Function &F, DominatorTree *DT,
for (BasicBlock *BB : UnreachableBlocks) {
// Remove and delete the unreachable inst.
BB->getTerminator()->eraseFromParent();
BranchInst::Create(UnreachableBlock, BB);
UncondBrInst::Create(UnreachableBlock, BB);
Updates.emplace_back(DominatorTree::Insert, BB, UnreachableBlock);
}
Changed = true;

View File

@ -66,7 +66,7 @@ private:
void initialize(const GCNSubtarget &ST);
bool isUniform(BranchInst *T);
bool isUniform(CondBrInst *T);
bool isTopOfStack(BasicBlock *BB);
@ -80,15 +80,14 @@ private:
bool eraseIfUnused(PHINode *Phi);
bool openIf(BranchInst *Term);
bool openIf(CondBrInst *Term);
bool insertElse(BranchInst *Term);
bool insertElse(CondBrInst *Term);
Value *
handleLoopCondition(Value *Cond, PHINode *Broken, llvm::Loop *L,
BranchInst *Term);
Value *handleLoopCondition(Value *Cond, PHINode *Broken, llvm::Loop *L,
CondBrInst *Term);
bool handleLoop(BranchInst *Term);
bool handleLoop(CondBrInst *Term);
bool closeControlFlow(BasicBlock *BB);
@ -128,7 +127,7 @@ void SIAnnotateControlFlow::initialize(const GCNSubtarget &ST) {
/// Is the branch condition uniform or did the StructurizeCFG pass
/// consider it as such?
bool SIAnnotateControlFlow::isUniform(BranchInst *T) {
bool SIAnnotateControlFlow::isUniform(CondBrInst *T) {
return UA->isUniform(T) || T->hasMetadata("structurizecfg.uniform");
}
@ -184,7 +183,7 @@ bool SIAnnotateControlFlow::eraseIfUnused(PHINode *Phi) {
}
/// Open a new "If" block
bool SIAnnotateControlFlow::openIf(BranchInst *Term) {
bool SIAnnotateControlFlow::openIf(CondBrInst *Term) {
if (isUniform(Term))
return false;
@ -199,7 +198,7 @@ bool SIAnnotateControlFlow::openIf(BranchInst *Term) {
}
/// Close the last "If" block and open a new "Else" block
bool SIAnnotateControlFlow::insertElse(BranchInst *Term) {
bool SIAnnotateControlFlow::insertElse(CondBrInst *Term) {
if (isUniform(Term)) {
return false;
}
@ -215,8 +214,9 @@ bool SIAnnotateControlFlow::insertElse(BranchInst *Term) {
}
/// Recursively handle the condition leading to a loop
Value *SIAnnotateControlFlow::handleLoopCondition(
Value *Cond, PHINode *Broken, llvm::Loop *L, BranchInst *Term) {
Value *SIAnnotateControlFlow::handleLoopCondition(Value *Cond, PHINode *Broken,
llvm::Loop *L,
CondBrInst *Term) {
auto CreateBreak = [this, Cond, Broken](Instruction *I) -> CallInst * {
return IRBuilder<>(I).CreateCall(
@ -257,7 +257,7 @@ Value *SIAnnotateControlFlow::handleLoopCondition(
}
/// Handle a back edge (loop)
bool SIAnnotateControlFlow::handleLoop(BranchInst *Term) {
bool SIAnnotateControlFlow::handleLoop(CondBrInst *Term) {
if (isUniform(Term))
return false;
@ -347,9 +347,9 @@ bool SIAnnotateControlFlow::run() {
E = df_end(&F->getEntryBlock());
I != E; ++I) {
BasicBlock *BB = *I;
BranchInst *Term = dyn_cast<BranchInst>(BB->getTerminator());
CondBrInst *Term = dyn_cast<CondBrInst>(BB->getTerminator());
if (!Term || Term->isUnconditional()) {
if (!Term) {
if (isTopOfStack(BB))
Changed |= closeControlFlow(BB);

View File

@ -1268,7 +1268,7 @@ static ARMCC::CondCodes getComparePred(CmpInst::Predicate Pred) {
}
bool ARMFastISel::SelectBranch(const Instruction *I) {
const BranchInst *BI = cast<BranchInst>(I);
const CondBrInst *BI = cast<CondBrInst>(I);
MachineBasicBlock *TBB = FuncInfo.getMBB(BI->getSuccessor(0));
MachineBasicBlock *FBB = FuncInfo.getMBB(BI->getSuccessor(1));

View File

@ -218,8 +218,8 @@ bool BPFAdjustOptImpl::serializeICMPCrossBB(BasicBlock &BB) {
return false;
Instruction *TI = B2->getTerminator();
auto *BI = dyn_cast<BranchInst>(TI);
if (!BI || !BI->isConditional())
auto *BI = dyn_cast<CondBrInst>(TI);
if (!BI)
return false;
auto *Cond = dyn_cast<ICmpInst>(BI->getCondition());
if (!Cond || &*B2->getFirstNonPHIIt() != Cond)
@ -228,8 +228,8 @@ bool BPFAdjustOptImpl::serializeICMPCrossBB(BasicBlock &BB) {
auto Cond2Op = Cond->getPredicate();
TI = B1->getTerminator();
BI = dyn_cast<BranchInst>(TI);
if (!BI || !BI->isConditional())
BI = dyn_cast<CondBrInst>(TI);
if (!BI)
return false;
Cond = dyn_cast<ICmpInst>(BI->getCondition());
if (!Cond)

View File

@ -480,7 +480,7 @@ static void translateInstructionMetadata(Module &M) {
translateBranchMetadata(M, I);
for (auto &I : make_early_inc_range(BB)) {
if (isa<BranchInst>(I))
if (isa<UncondBrInst, CondBrInst>(I))
if (MDNode *LoopMD = I.getMetadata(MDLoopKind))
translateLoopMetadata(M, &I, LoopMD);
I.dropUnknownNonDebugMetadata(DXILCompatibleMDs);

View File

@ -2336,14 +2336,15 @@ void DXILBitcodeWriter::writeInstruction(const Instruction &I, unsigned InstID,
}
} break;
case Instruction::UncondBr:
Code = bitc::FUNC_CODE_INST_BR;
Vals.push_back(VE.getValueID(cast<UncondBrInst>(I).getSuccessor()));
break;
case Instruction::CondBr: {
Code = bitc::FUNC_CODE_INST_BR;
const BranchInst &II = cast<BranchInst>(I);
const CondBrInst &II = cast<CondBrInst>(I);
Vals.push_back(VE.getValueID(II.getSuccessor(0)));
if (II.isConditional()) {
Vals.push_back(VE.getValueID(II.getSuccessor(1)));
pushValue(II.getCondition(), InstID, Vals);
}
Vals.push_back(VE.getValueID(II.getSuccessor(1)));
pushValue(II.getCondition(), InstID, Vals);
} break;
case Instruction::Switch: {
Code = bitc::FUNC_CODE_INST_SWITCH;

View File

@ -2342,7 +2342,7 @@ bool HexagonLoopIdiomRecognize::coverLoop(Loop *L,
// instructions in it that are not involved in the original set Insts.
for (auto *B : L->blocks()) {
for (auto &In : *B) {
if (isa<BranchInst>(In))
if (isa<UncondBrInst, CondBrInst>(In))
continue;
if (!Worklist.count(&In) && In.mayHaveSideEffects())
return false;

View File

@ -950,7 +950,7 @@ bool MipsFastISel::selectStore(const Instruction *I) {
// This can cause a redundant sltiu to be generated.
// FIXME: try and eliminate this in a future patch.
bool MipsFastISel::selectBranch(const Instruction *I) {
const BranchInst *BI = cast<BranchInst>(I);
const CondBrInst *BI = cast<CondBrInst>(I);
MachineBasicBlock *BrBB = FuncInfo.MBB;
//
// TBB is the basic block for the case where the comparison is true.

View File

@ -147,16 +147,9 @@ void NVPTXImageOptimizer::replaceWith(Instruction *From, ConstantInt *To) {
// live is actually unreachable and can be trivially eliminated by the
// unreachable block elimination pass.
for (Use &U : From->uses()) {
if (BranchInst *BI = dyn_cast<BranchInst>(U)) {
if (BI->isUnconditional()) continue;
BasicBlock *Dest;
if (To->isZero())
// Get false block
Dest = BI->getSuccessor(1);
else
// Get true block
Dest = BI->getSuccessor(0);
BranchInst::Create(Dest, BI->getIterator());
if (CondBrInst *BI = dyn_cast<CondBrInst>(U)) {
BasicBlock *Dest = BI->getSuccessor(To->isZero() ? 1 : 0);
UncondBrInst::Create(Dest, BI->getIterator());
InstrToDelete.push_back(BI);
}
}

View File

@ -752,7 +752,7 @@ bool PPCFastISel::SelectStore(const Instruction *I) {
// Attempt to fast-select a branch instruction.
bool PPCFastISel::SelectBranch(const Instruction *I) {
const BranchInst *BI = cast<BranchInst>(I);
const CondBrInst *BI = cast<CondBrInst>(I);
MachineBasicBlock *BrBB = FuncInfo.MBB;
MachineBasicBlock *TBB = FuncInfo.getMBB(BI->getSuccessor(0));
MachineBasicBlock *FBB = FuncInfo.getMBB(BI->getSuccessor(1));

View File

@ -386,10 +386,9 @@ bool PPCTTIImpl::isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
Instruction *TI = BB->getTerminator();
if (!TI) continue;
if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
if (CondBrInst *BI = dyn_cast<CondBrInst>(TI)) {
uint64_t TrueWeight = 0, FalseWeight = 0;
if (!BI->isConditional() ||
!extractBranchWeights(*BI, TrueWeight, FalseWeight))
if (!extractBranchWeights(*BI, TrueWeight, FalseWeight))
continue;
// If the exit path is more frequent than the loop path,

View File

@ -3589,8 +3589,7 @@ bool RISCVTTIImpl::shouldTreatInstructionLikeSelect(
// break point in the code - the end of a block with an unconditional
// terminator.
if (I->getOpcode() == Instruction::Or &&
isa<BranchInst>(I->getNextNode()) &&
cast<BranchInst>(I->getNextNode())->isUnconditional())
isa<UncondBrInst>(I->getNextNode()))
return true;
if (I->getOpcode() == Instruction::Add ||

View File

@ -1499,12 +1499,7 @@ bool WebAssemblyFastISel::selectStore(const Instruction *I) {
}
bool WebAssemblyFastISel::selectBr(const Instruction *I) {
const auto *Br = cast<BranchInst>(I);
if (Br->isUnconditional()) {
MachineBasicBlock *MSucc = FuncInfo.getMBB(Br->getSuccessor(0));
fastEmitBranch(MSucc, Br->getDebugLoc());
return true;
}
const auto *Br = cast<CondBrInst>(I);
MachineBasicBlock *TBB = FuncInfo.getMBB(Br->getSuccessor(0));
MachineBasicBlock *FBB = FuncInfo.getMBB(Br->getSuccessor(1));

View File

@ -1646,7 +1646,7 @@ void WebAssemblyLowerEmscriptenEHSjLj::handleLongjmpableCallsForWasmSjLj(
BasicBlock *OrigEntry = Entry->getNextNode();
BasicBlock *SetjmpDispatchBB =
BasicBlock::Create(C, "setjmp.dispatch", &F, OrigEntry);
cast<BranchInst>(Entry->getTerminator())->setSuccessor(0, SetjmpDispatchBB);
cast<UncondBrInst>(Entry->getTerminator())->setSuccessor(SetjmpDispatchBB);
// Create catch.dispatch.longjmp BB and a catchswitch instruction
BasicBlock *CatchDispatchLongjmpBB =

View File

@ -1643,7 +1643,7 @@ bool X86FastISel::X86SelectSExt(const Instruction *I) {
bool X86FastISel::X86SelectBranch(const Instruction *I) {
// Unconditional branches are selected by tablegen-generated code.
// Handle a conditional branch.
const BranchInst *BI = cast<BranchInst>(I);
const CondBrInst *BI = cast<CondBrInst>(I);
MachineBasicBlock *TrueMBB = FuncInfo.getMBB(BI->getSuccessor(0));
MachineBasicBlock *FalseMBB = FuncInfo.getMBB(BI->getSuccessor(1));

View File

@ -120,8 +120,8 @@ BasicBlock *X86LowerAMXIntrinsics::createLoop(BasicBlock *Preheader,
BasicBlock::Create(Ctx, Name + ".latch", Header->getParent(), Exit);
Type *I16Ty = Type::getInt16Ty(Ctx);
BranchInst::Create(Body, Header);
BranchInst::Create(Latch, Body);
UncondBrInst::Create(Body, Header);
UncondBrInst::Create(Latch, Body);
PHINode *IV =
PHINode::Create(I16Ty, 2, Name + ".iv", Header->getTerminator()->getIterator());
IV->addIncoming(ConstantInt::get(I16Ty, 0), Preheader);
@ -129,7 +129,7 @@ BasicBlock *X86LowerAMXIntrinsics::createLoop(BasicBlock *Preheader,
B.SetInsertPoint(Latch);
Value *Inc = B.CreateAdd(IV, Step, Name + ".step");
Value *Cond = B.CreateICmpNE(Inc, Bound, Name + ".cond");
auto *BR = BranchInst::Create(Header, Exit, Cond, Latch);
auto *BR = CondBrInst::Create(Cond, Header, Exit, Latch);
if (!ProfcheckDisableMetadataFixes) {
if (auto *BoundInt = dyn_cast<ConstantInt>(Bound)) {
assert(Step->getZExtValue() != 0 &&
@ -144,9 +144,9 @@ BasicBlock *X86LowerAMXIntrinsics::createLoop(BasicBlock *Preheader,
}
IV->addIncoming(Inc, Latch);
BranchInst *PreheaderBr = cast<BranchInst>(Preheader->getTerminator());
BasicBlock *Tmp = PreheaderBr->getSuccessor(0);
PreheaderBr->setSuccessor(0, Header);
UncondBrInst *PreheaderBr = cast<UncondBrInst>(Preheader->getTerminator());
BasicBlock *Tmp = PreheaderBr->getSuccessor();
PreheaderBr->setSuccessor(Header);
DTU.applyUpdatesPermissive({
{DominatorTree::Delete, Preheader, Tmp},
{DominatorTree::Insert, Header, Body},

View File

@ -1,5 +1,4 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: opt -mtriple=amdgcn -mcpu=gfx900 -amdgpu-aa -amdgpu-aa-wrapper -amdgpu-annotate-uniform -S < %s | FileCheck %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -amdgpu-atomic-optimizer-strategy=None < %s | FileCheck -check-prefix=GCN %s
@ -11,13 +10,13 @@
define amdgpu_kernel void @simple_barrier(ptr addrspace(1) %arg) {
; CHECK-LABEL: @simple_barrier(
; CHECK-NEXT: bb:
; CHECK-NEXT: [[I:%.*]] = load i32, ptr addrspace(1) [[ARG:%.*]], align 4, !amdgpu.noclobber !0
; CHECK-NEXT: [[I:%.*]] = load i32, ptr addrspace(1) [[ARG:%.*]], align 4, !amdgpu.noclobber [[META0:![0-9]+]]
; CHECK-NEXT: fence syncscope("workgroup") release
; CHECK-NEXT: tail call void @llvm.amdgcn.s.barrier()
; CHECK-NEXT: fence syncscope("workgroup") acquire
; CHECK-NEXT: tail call void @llvm.amdgcn.wave.barrier()
; CHECK-NEXT: [[I1:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[ARG]], i64 1, !amdgpu.uniform !0
; CHECK-NEXT: [[I2:%.*]] = load i32, ptr addrspace(1) [[I1]], align 4, !amdgpu.noclobber !0
; CHECK-NEXT: [[I1:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[ARG]], i64 1, !amdgpu.uniform [[META0]]
; CHECK-NEXT: [[I2:%.*]] = load i32, ptr addrspace(1) [[I1]], align 4, !amdgpu.noclobber [[META0]]
; CHECK-NEXT: [[I3:%.*]] = add i32 [[I2]], [[I]]
; CHECK-NEXT: [[I4:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[ARG]], i64 2
; CHECK-NEXT: store i32 [[I3]], ptr addrspace(1) [[I4]], align 4
@ -55,17 +54,17 @@ bb:
define amdgpu_kernel void @memory_phi_no_clobber(ptr addrspace(1) %arg, i1 %cond) {
; CHECK-LABEL: @memory_phi_no_clobber(
; CHECK-NEXT: bb:
; CHECK-NEXT: [[I:%.*]] = load i32, ptr addrspace(1) [[ARG:%.*]], align 4, !amdgpu.noclobber !0
; CHECK-NEXT: br i1 %cond, label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]], !amdgpu.uniform !0
; CHECK-NEXT: [[I:%.*]] = load i32, ptr addrspace(1) [[ARG:%.*]], align 4, !amdgpu.noclobber [[META0]]
; CHECK-NEXT: br i1 [[COND:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]], !amdgpu.uniform [[META0]]
; CHECK: if.then:
; CHECK-NEXT: tail call void @llvm.amdgcn.s.barrier()
; CHECK-NEXT: br label [[IF_END:%.*]], !amdgpu.uniform !0
; CHECK-NEXT: br label [[IF_END:%.*]]
; CHECK: if.else:
; CHECK-NEXT: fence syncscope("workgroup") release
; CHECK-NEXT: br label [[IF_END]], !amdgpu.uniform !0
; CHECK-NEXT: br label [[IF_END]]
; CHECK: if.end:
; CHECK-NEXT: [[I1:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[ARG]], i64 1, !amdgpu.uniform !0
; CHECK-NEXT: [[I2:%.*]] = load i32, ptr addrspace(1) [[I1]], align 4, !amdgpu.noclobber !0
; CHECK-NEXT: [[I1:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[ARG]], i64 1, !amdgpu.uniform [[META0]]
; CHECK-NEXT: [[I2:%.*]] = load i32, ptr addrspace(1) [[I1]], align 4, !amdgpu.noclobber [[META0]]
; CHECK-NEXT: [[I3:%.*]] = add i32 [[I2]], [[I]]
; CHECK-NEXT: [[I4:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[ARG]], i64 2
; CHECK-NEXT: store i32 [[I3]], ptr addrspace(1) [[I4]], align 4
@ -121,17 +120,17 @@ if.end:
define amdgpu_kernel void @memory_phi_clobber1(ptr addrspace(1) %arg, i1 %cond) {
; CHECK-LABEL: @memory_phi_clobber1(
; CHECK-NEXT: bb:
; CHECK-NEXT: [[I:%.*]] = load i32, ptr addrspace(1) [[ARG:%.*]], align 4, !amdgpu.noclobber !0
; CHECK-NEXT: br i1 %cond, label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]], !amdgpu.uniform !0
; CHECK-NEXT: [[I:%.*]] = load i32, ptr addrspace(1) [[ARG:%.*]], align 4, !amdgpu.noclobber [[META0]]
; CHECK-NEXT: br i1 [[COND:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]], !amdgpu.uniform [[META0]]
; CHECK: if.then:
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[ARG]], i64 3
; CHECK-NEXT: store i32 1, ptr addrspace(1) [[GEP]], align 4
; CHECK-NEXT: br label [[IF_END:%.*]], !amdgpu.uniform !0
; CHECK-NEXT: br label [[IF_END:%.*]]
; CHECK: if.else:
; CHECK-NEXT: tail call void @llvm.amdgcn.s.barrier()
; CHECK-NEXT: br label [[IF_END]], !amdgpu.uniform !0
; CHECK-NEXT: br label [[IF_END]]
; CHECK: if.end:
; CHECK-NEXT: [[I1:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[ARG]], i64 1, !amdgpu.uniform !0
; CHECK-NEXT: [[I1:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[ARG]], i64 1, !amdgpu.uniform [[META0]]
; CHECK-NEXT: [[I2:%.*]] = load i32, ptr addrspace(1) [[I1]], align 4
; CHECK-NEXT: [[I3:%.*]] = add i32 [[I2]], [[I]]
; CHECK-NEXT: [[I4:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[ARG]], i64 2
@ -190,17 +189,17 @@ if.end:
define amdgpu_kernel void @memory_phi_clobber2(ptr addrspace(1) %arg, i1 %cond) {
; CHECK-LABEL: @memory_phi_clobber2(
; CHECK-NEXT: bb:
; CHECK-NEXT: [[I:%.*]] = load i32, ptr addrspace(1) [[ARG:%.*]], align 4, !amdgpu.noclobber !0
; CHECK-NEXT: br i1 %cond, label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]], !amdgpu.uniform !0
; CHECK-NEXT: [[I:%.*]] = load i32, ptr addrspace(1) [[ARG:%.*]], align 4, !amdgpu.noclobber [[META0]]
; CHECK-NEXT: br i1 [[COND:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]], !amdgpu.uniform [[META0]]
; CHECK: if.then:
; CHECK-NEXT: tail call void @llvm.amdgcn.s.barrier()
; CHECK-NEXT: br label [[IF_END:%.*]], !amdgpu.uniform !0
; CHECK-NEXT: br label [[IF_END:%.*]]
; CHECK: if.else:
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[ARG]], i64 3
; CHECK-NEXT: store i32 1, ptr addrspace(1) [[GEP]], align 4
; CHECK-NEXT: br label [[IF_END]], !amdgpu.uniform !0
; CHECK-NEXT: br label [[IF_END]]
; CHECK: if.end:
; CHECK-NEXT: [[I1:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[ARG]], i64 1, !amdgpu.uniform !0
; CHECK-NEXT: [[I1:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[ARG]], i64 1, !amdgpu.uniform [[META0]]
; CHECK-NEXT: [[I2:%.*]] = load i32, ptr addrspace(1) [[I1]], align 4
; CHECK-NEXT: [[I3:%.*]] = add i32 [[I2]], [[I]]
; CHECK-NEXT: [[I4:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[ARG]], i64 2
@ -259,16 +258,16 @@ if.end:
define amdgpu_kernel void @no_clobbering_loop1(ptr addrspace(1) %arg, i1 %cc) {
; CHECK-LABEL: @no_clobbering_loop1(
; CHECK-NEXT: bb:
; CHECK-NEXT: [[I:%.*]] = load i32, ptr addrspace(1) [[ARG:%.*]], align 4, !amdgpu.noclobber !0
; CHECK-NEXT: br label [[WHILE_COND:%.*]], !amdgpu.uniform !0
; CHECK-NEXT: [[I:%.*]] = load i32, ptr addrspace(1) [[ARG:%.*]], align 4, !amdgpu.noclobber [[META0]]
; CHECK-NEXT: br label [[WHILE_COND:%.*]]
; CHECK: while.cond:
; CHECK-NEXT: [[I1:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[ARG]], i64 1, !amdgpu.uniform !0
; CHECK-NEXT: [[I2:%.*]] = load i32, ptr addrspace(1) [[I1]], align 4, !amdgpu.noclobber !0
; CHECK-NEXT: [[I1:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[ARG]], i64 1, !amdgpu.uniform [[META0]]
; CHECK-NEXT: [[I2:%.*]] = load i32, ptr addrspace(1) [[I1]], align 4, !amdgpu.noclobber [[META0]]
; CHECK-NEXT: [[I3:%.*]] = add i32 [[I2]], [[I]]
; CHECK-NEXT: [[I4:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[ARG]], i64 2
; CHECK-NEXT: store i32 [[I3]], ptr addrspace(1) [[I4]], align 4
; CHECK-NEXT: tail call void @llvm.amdgcn.wave.barrier()
; CHECK-NEXT: br i1 [[CC:%.*]], label [[WHILE_COND]], label [[END:%.*]], !amdgpu.uniform !0
; CHECK-NEXT: br i1 [[CC:%.*]], label [[WHILE_COND]], label [[END:%.*]], !amdgpu.uniform [[META0]]
; CHECK: end:
; CHECK-NEXT: ret void
;
@ -316,18 +315,18 @@ end:
define amdgpu_kernel void @no_clobbering_loop2(ptr addrspace(1) noalias %arg, ptr addrspace(1) noalias %out, i32 %n) {
; CHECK-LABEL: @no_clobbering_loop2(
; CHECK-NEXT: bb:
; CHECK-NEXT: [[I:%.*]] = load i32, ptr addrspace(1) [[ARG:%.*]], align 4, !amdgpu.noclobber !0
; CHECK-NEXT: br label [[WHILE_COND:%.*]], !amdgpu.uniform !0
; CHECK-NEXT: [[I:%.*]] = load i32, ptr addrspace(1) [[ARG:%.*]], align 4, !amdgpu.noclobber [[META0]]
; CHECK-NEXT: br label [[WHILE_COND:%.*]]
; CHECK: while.cond:
; CHECK-NEXT: [[C:%.*]] = phi i32 [ 0, [[BB:%.*]] ], [ [[INC:%.*]], [[WHILE_COND]] ]
; CHECK-NEXT: [[ACC:%.*]] = phi i32 [ [[I]], [[BB]] ], [ [[I3:%.*]], [[WHILE_COND]] ]
; CHECK-NEXT: [[I1:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[ARG]], i32 [[C]], !amdgpu.uniform !0
; CHECK-NEXT: [[I2:%.*]] = load i32, ptr addrspace(1) [[I1]], align 4, !amdgpu.noclobber !0
; CHECK-NEXT: [[I1:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[ARG]], i32 [[C]], !amdgpu.uniform [[META0]]
; CHECK-NEXT: [[I2:%.*]] = load i32, ptr addrspace(1) [[I1]], align 4, !amdgpu.noclobber [[META0]]
; CHECK-NEXT: [[I3]] = add i32 [[I2]], [[ACC]]
; CHECK-NEXT: tail call void @llvm.amdgcn.wave.barrier()
; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[C]], 1
; CHECK-NEXT: [[CC:%.*]] = icmp eq i32 [[INC]], [[N:%.*]]
; CHECK-NEXT: br i1 [[CC]], label [[WHILE_COND]], label [[END:%.*]], !amdgpu.uniform !0
; CHECK-NEXT: br i1 [[CC]], label [[WHILE_COND]], label [[END:%.*]], !amdgpu.uniform [[META0]]
; CHECK: end:
; CHECK-NEXT: store i32 [[I3]], ptr addrspace(1) [[OUT:%.*]], align 4
; CHECK-NEXT: ret void
@ -377,16 +376,16 @@ end:
define amdgpu_kernel void @clobbering_loop(ptr addrspace(1) %arg, ptr addrspace(1) %out, i1 %cc) {
; CHECK-LABEL: @clobbering_loop(
; CHECK-NEXT: bb:
; CHECK-NEXT: [[I:%.*]] = load i32, ptr addrspace(1) [[ARG:%.*]], align 4, !amdgpu.noclobber !0
; CHECK-NEXT: br label [[WHILE_COND:%.*]], !amdgpu.uniform !0
; CHECK-NEXT: [[I:%.*]] = load i32, ptr addrspace(1) [[ARG:%.*]], align 4, !amdgpu.noclobber [[META0]]
; CHECK-NEXT: br label [[WHILE_COND:%.*]]
; CHECK: while.cond:
; CHECK-NEXT: [[I1:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[ARG]], i64 1, !amdgpu.uniform !0
; CHECK-NEXT: [[I1:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[ARG]], i64 1, !amdgpu.uniform [[META0]]
; CHECK-NEXT: [[I2:%.*]] = load i32, ptr addrspace(1) [[I1]], align 4
; CHECK-NEXT: [[I3:%.*]] = add i32 [[I2]], [[I]]
; CHECK-NEXT: [[I4:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[OUT:%.*]], i64 1
; CHECK-NEXT: store i32 [[I3]], ptr addrspace(1) [[I4]], align 4
; CHECK-NEXT: tail call void @llvm.amdgcn.wave.barrier()
; CHECK-NEXT: br i1 [[CC:%.*]], label [[WHILE_COND]], label [[END:%.*]], !amdgpu.uniform !0
; CHECK-NEXT: br i1 [[CC:%.*]], label [[WHILE_COND]], label [[END:%.*]], !amdgpu.uniform [[META0]]
; CHECK: end:
; CHECK-NEXT: ret void
;
@ -433,10 +432,10 @@ end:
define amdgpu_kernel void @clobber_by_atomic_load(ptr addrspace(1) %arg) {
; CHECK-LABEL: @clobber_by_atomic_load(
; CHECK-NEXT: bb:
; CHECK-NEXT: [[I:%.*]] = load i32, ptr addrspace(1) [[ARG:%.*]], align 4, !amdgpu.noclobber !0
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[ARG]], i64 2, !amdgpu.uniform !0
; CHECK-NEXT: [[VAL:%.*]] = load atomic i32, ptr addrspace(1) [[GEP]] seq_cst, align 4, !amdgpu.noclobber !0
; CHECK-NEXT: [[I1:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[ARG]], i64 3, !amdgpu.uniform !0
; CHECK-NEXT: [[I:%.*]] = load i32, ptr addrspace(1) [[ARG:%.*]], align 4, !amdgpu.noclobber [[META0]]
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[ARG]], i64 2, !amdgpu.uniform [[META0]]
; CHECK-NEXT: [[VAL:%.*]] = load atomic i32, ptr addrspace(1) [[GEP]] seq_cst, align 4, !amdgpu.noclobber [[META0]]
; CHECK-NEXT: [[I1:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[ARG]], i64 3, !amdgpu.uniform [[META0]]
; CHECK-NEXT: [[I2:%.*]] = load i32, ptr addrspace(1) [[I1]], align 4
; CHECK-NEXT: [[I3:%.*]] = add i32 [[I2]], [[I]]
; CHECK-NEXT: [[I4:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[ARG]], i64 4
@ -477,7 +476,7 @@ define protected amdgpu_kernel void @no_alias_store(ptr addrspace(1) %in, ptr ad
; CHECK-NEXT: fence syncscope("workgroup") release
; CHECK-NEXT: tail call void @llvm.amdgcn.s.barrier()
; CHECK-NEXT: fence syncscope("workgroup") acquire
; CHECK-NEXT: [[LD:%.*]] = load i32, ptr addrspace(1) [[IN:%.*]], align 4, !amdgpu.noclobber !0
; CHECK-NEXT: [[LD:%.*]] = load i32, ptr addrspace(1) [[IN:%.*]], align 4, !amdgpu.noclobber [[META0]]
; CHECK-NEXT: store i32 [[LD]], ptr addrspace(1) [[OUT:%.*]], align 4
; CHECK-NEXT: ret void
;
@ -543,7 +542,7 @@ define protected amdgpu_kernel void @no_alias_volatile_store(ptr addrspace(1) %i
; CHECK-NEXT: fence syncscope("workgroup") release
; CHECK-NEXT: tail call void @llvm.amdgcn.s.barrier()
; CHECK-NEXT: fence syncscope("workgroup") acquire
; CHECK-NEXT: [[LD:%.*]] = load i32, ptr addrspace(1) [[IN:%.*]], align 4, !amdgpu.noclobber !0
; CHECK-NEXT: [[LD:%.*]] = load i32, ptr addrspace(1) [[IN:%.*]], align 4, !amdgpu.noclobber [[META0]]
; CHECK-NEXT: store i32 [[LD]], ptr addrspace(1) [[OUT:%.*]], align 4
; CHECK-NEXT: ret void
;
@ -573,7 +572,7 @@ define protected amdgpu_kernel void @no_alias_atomic_rmw_relaxed(ptr addrspace(1
; CHECK-LABEL: @no_alias_atomic_rmw_relaxed(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[UNUSED:%.*]] = atomicrmw add ptr addrspace(3) @LDS, i32 5 monotonic, align 4
; CHECK-NEXT: [[LD:%.*]] = load i32, ptr addrspace(1) [[IN:%.*]], align 4, !amdgpu.noclobber !0
; CHECK-NEXT: [[LD:%.*]] = load i32, ptr addrspace(1) [[IN:%.*]], align 4, !amdgpu.noclobber [[META0]]
; CHECK-NEXT: store i32 [[LD]], ptr addrspace(1) [[OUT:%.*]], align 4
; CHECK-NEXT: ret void
;
@ -603,7 +602,7 @@ define protected amdgpu_kernel void @no_alias_atomic_cmpxchg(ptr addrspace(1) %i
; CHECK-NEXT: fence syncscope("workgroup") release
; CHECK-NEXT: tail call void @llvm.amdgcn.s.barrier()
; CHECK-NEXT: fence syncscope("workgroup") acquire
; CHECK-NEXT: [[LD:%.*]] = load i32, ptr addrspace(1) [[IN:%.*]], align 4, !amdgpu.noclobber !0
; CHECK-NEXT: [[LD:%.*]] = load i32, ptr addrspace(1) [[IN:%.*]], align 4, !amdgpu.noclobber [[META0]]
; CHECK-NEXT: store i32 [[LD]], ptr addrspace(1) [[OUT:%.*]], align 4
; CHECK-NEXT: ret void
;
@ -640,7 +639,7 @@ define protected amdgpu_kernel void @no_alias_atomic_rmw(ptr addrspace(1) %in, p
; CHECK-NEXT: fence syncscope("workgroup") release
; CHECK-NEXT: tail call void @llvm.amdgcn.s.barrier()
; CHECK-NEXT: fence syncscope("workgroup") acquire
; CHECK-NEXT: [[LD:%.*]] = load i32, ptr addrspace(1) [[IN:%.*]], align 4, !amdgpu.noclobber !0
; CHECK-NEXT: [[LD:%.*]] = load i32, ptr addrspace(1) [[IN:%.*]], align 4, !amdgpu.noclobber [[META0]]
; CHECK-NEXT: store i32 [[LD]], ptr addrspace(1) [[OUT:%.*]], align 4
; CHECK-NEXT: ret void
;
@ -791,7 +790,7 @@ define protected amdgpu_kernel void @no_alias_atomic_rmw_then_no_alias_store(ptr
; CHECK-NEXT: fence syncscope("workgroup") release
; CHECK-NEXT: tail call void @llvm.amdgcn.s.barrier()
; CHECK-NEXT: fence syncscope("workgroup") acquire
; CHECK-NEXT: [[LD:%.*]] = load i32, ptr addrspace(1) [[IN:%.*]], align 4, !amdgpu.noclobber !0
; CHECK-NEXT: [[LD:%.*]] = load i32, ptr addrspace(1) [[IN:%.*]], align 4, !amdgpu.noclobber [[META0]]
; CHECK-NEXT: store i32 [[LD]], ptr addrspace(1) [[OUT:%.*]], align 4
; CHECK-NEXT: ret void
;