Reapply: [AMDGPU][UnifyDivergentExitNodes][StructurizeCFG] Add support for callbr instruction with inline-asm (#152161) (#166195)
Reapply #152161 with fixed 'changed' flags.
This commit is contained in:
parent
dd45c060ff
commit
a8ea7f4580
@ -181,14 +181,52 @@ BasicBlock *AMDGPUUnifyDivergentExitNodesImpl::unifyReturnBlockSet(
|
||||
return NewRetBlock;
|
||||
}
|
||||
|
||||
static BasicBlock *
|
||||
createDummyReturnBlock(Function &F,
|
||||
SmallVector<BasicBlock *, 4> &ReturningBlocks) {
|
||||
BasicBlock *DummyReturnBB =
|
||||
BasicBlock::Create(F.getContext(), "DummyReturnBlock", &F);
|
||||
Type *RetTy = F.getReturnType();
|
||||
Value *RetVal = RetTy->isVoidTy() ? nullptr : PoisonValue::get(RetTy);
|
||||
ReturnInst::Create(F.getContext(), RetVal, DummyReturnBB);
|
||||
ReturningBlocks.push_back(DummyReturnBB);
|
||||
return DummyReturnBB;
|
||||
}
|
||||
|
||||
/// Handle conditional branch instructions (-> 2 targets) and callbr
|
||||
/// instructions with N targets.
|
||||
static void handleNBranch(Function &F, BasicBlock *BB, Instruction *BI,
|
||||
BasicBlock *DummyReturnBB,
|
||||
std::vector<DominatorTree::UpdateType> &Updates) {
|
||||
SmallVector<BasicBlock *, 2> Successors(successors(BB));
|
||||
|
||||
// Create a new transition block to hold the conditional branch.
|
||||
BasicBlock *TransitionBB = BB->splitBasicBlock(BI, "TransitionBlock");
|
||||
|
||||
Updates.reserve(Updates.size() + 2 * Successors.size() + 2);
|
||||
|
||||
// 'Successors' become successors of TransitionBB instead of BB,
|
||||
// and TransitionBB becomes a single successor of BB.
|
||||
Updates.emplace_back(DominatorTree::Insert, BB, TransitionBB);
|
||||
for (BasicBlock *Successor : Successors) {
|
||||
Updates.emplace_back(DominatorTree::Insert, TransitionBB, Successor);
|
||||
Updates.emplace_back(DominatorTree::Delete, BB, Successor);
|
||||
}
|
||||
|
||||
// Create a branch that will always branch to the transition block and
|
||||
// references DummyReturnBB.
|
||||
BB->getTerminator()->eraseFromParent();
|
||||
BranchInst::Create(TransitionBB, DummyReturnBB,
|
||||
ConstantInt::getTrue(F.getContext()), BB);
|
||||
Updates.emplace_back(DominatorTree::Insert, BB, DummyReturnBB);
|
||||
}
|
||||
|
||||
bool AMDGPUUnifyDivergentExitNodesImpl::run(Function &F, DominatorTree *DT,
|
||||
const PostDominatorTree &PDT,
|
||||
const UniformityInfo &UA) {
|
||||
assert(hasOnlySimpleTerminator(F) && "Unsupported block terminator.");
|
||||
|
||||
if (PDT.root_size() == 0 ||
|
||||
(PDT.root_size() == 1 &&
|
||||
!isa<BranchInst>(PDT.getRoot()->getTerminator())))
|
||||
!isa<BranchInst, CallBrInst>(PDT.getRoot()->getTerminator())))
|
||||
return false;
|
||||
|
||||
// Loop over all of the blocks in a function, tracking all of the blocks that
|
||||
@ -222,46 +260,28 @@ bool AMDGPUUnifyDivergentExitNodesImpl::run(Function &F, DominatorTree *DT,
|
||||
if (HasDivergentExitBlock)
|
||||
UnreachableBlocks.push_back(BB);
|
||||
} else if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) {
|
||||
|
||||
ConstantInt *BoolTrue = ConstantInt::getTrue(F.getContext());
|
||||
if (DummyReturnBB == nullptr) {
|
||||
DummyReturnBB =
|
||||
BasicBlock::Create(F.getContext(), "DummyReturnBlock", &F);
|
||||
Type *RetTy = F.getReturnType();
|
||||
Value *RetVal = RetTy->isVoidTy() ? nullptr : PoisonValue::get(RetTy);
|
||||
ReturnInst::Create(F.getContext(), RetVal, DummyReturnBB);
|
||||
ReturningBlocks.push_back(DummyReturnBB);
|
||||
}
|
||||
if (!DummyReturnBB)
|
||||
DummyReturnBB = createDummyReturnBlock(F, ReturningBlocks);
|
||||
|
||||
if (BI->isUnconditional()) {
|
||||
BasicBlock *LoopHeaderBB = BI->getSuccessor(0);
|
||||
BI->eraseFromParent(); // Delete the unconditional branch.
|
||||
// Add a new conditional branch with a dummy edge to the return block.
|
||||
BranchInst::Create(LoopHeaderBB, DummyReturnBB, BoolTrue, BB);
|
||||
Updates.emplace_back(DominatorTree::Insert, BB, DummyReturnBB);
|
||||
} else { // Conditional branch.
|
||||
SmallVector<BasicBlock *, 2> Successors(successors(BB));
|
||||
|
||||
// Create a new transition block to hold the conditional branch.
|
||||
BasicBlock *TransitionBB = BB->splitBasicBlock(BI, "TransitionBlock");
|
||||
|
||||
Updates.reserve(Updates.size() + 2 * Successors.size() + 2);
|
||||
|
||||
// 'Successors' become successors of TransitionBB instead of BB,
|
||||
// and TransitionBB becomes a single successor of BB.
|
||||
Updates.emplace_back(DominatorTree::Insert, BB, TransitionBB);
|
||||
for (BasicBlock *Successor : Successors) {
|
||||
Updates.emplace_back(DominatorTree::Insert, TransitionBB, Successor);
|
||||
Updates.emplace_back(DominatorTree::Delete, BB, Successor);
|
||||
}
|
||||
|
||||
// Create a branch that will always branch to the transition block and
|
||||
// references DummyReturnBB.
|
||||
BB->getTerminator()->eraseFromParent();
|
||||
BranchInst::Create(TransitionBB, DummyReturnBB, BoolTrue, BB);
|
||||
BranchInst::Create(LoopHeaderBB, DummyReturnBB,
|
||||
ConstantInt::getTrue(F.getContext()), BB);
|
||||
Updates.emplace_back(DominatorTree::Insert, BB, DummyReturnBB);
|
||||
} else {
|
||||
handleNBranch(F, BB, BI, DummyReturnBB, Updates);
|
||||
}
|
||||
Changed = true;
|
||||
} else if (CallBrInst *CBI = dyn_cast<CallBrInst>(BB->getTerminator())) {
|
||||
if (!DummyReturnBB)
|
||||
DummyReturnBB = createDummyReturnBlock(F, ReturningBlocks);
|
||||
|
||||
handleNBranch(F, BB, CBI, DummyReturnBB, Updates);
|
||||
Changed = true;
|
||||
} else {
|
||||
llvm_unreachable("unsupported block terminator");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -558,11 +558,10 @@ void StructurizeCFG::analyzeLoops(RegionNode *N) {
|
||||
} else {
|
||||
// Test for successors as back edge
|
||||
BasicBlock *BB = N->getNodeAs<BasicBlock>();
|
||||
BranchInst *Term = cast<BranchInst>(BB->getTerminator());
|
||||
|
||||
for (BasicBlock *Succ : Term->successors())
|
||||
if (Visited.count(Succ))
|
||||
Loops[Succ] = BB;
|
||||
if (BranchInst *Term = dyn_cast<BranchInst>(BB->getTerminator()))
|
||||
for (BasicBlock *Succ : Term->successors())
|
||||
if (Visited.count(Succ))
|
||||
Loops[Succ] = BB;
|
||||
}
|
||||
}
|
||||
|
||||
@ -594,7 +593,7 @@ void StructurizeCFG::gatherPredicates(RegionNode *N) {
|
||||
|
||||
for (BasicBlock *P : predecessors(BB)) {
|
||||
// Ignore it if it's a branch from outside into our region entry
|
||||
if (!ParentRegion->contains(P))
|
||||
if (!ParentRegion->contains(P) || !dyn_cast<BranchInst>(P->getTerminator()))
|
||||
continue;
|
||||
|
||||
Region *R = RI->getRegionFor(P);
|
||||
@ -1402,13 +1401,17 @@ bool StructurizeCFG::makeUniformRegion(Region *R, UniformityInfo &UA) {
|
||||
/// Run the transformation for each region found
|
||||
bool StructurizeCFG::run(Region *R, DominatorTree *DT,
|
||||
const TargetTransformInfo *TTI) {
|
||||
if (R->isTopLevelRegion())
|
||||
// CallBr and its corresponding direct target blocks are for now ignored by
|
||||
// this pass. This is not a limitation for the currently intended uses cases
|
||||
// of callbr in the AMDGPU backend.
|
||||
// Parent and child regions are not affected by this (current) restriction.
|
||||
// See `llvm/test/Transforms/StructurizeCFG/callbr.ll` for details.
|
||||
if (R->isTopLevelRegion() || isa<CallBrInst>(R->getEntry()->getTerminator()))
|
||||
return false;
|
||||
|
||||
this->DT = DT;
|
||||
this->TTI = TTI;
|
||||
Func = R->getEntry()->getParent();
|
||||
assert(hasOnlySimpleTerminator(*Func) && "Unsupported block terminator.");
|
||||
|
||||
ParentRegion = R;
|
||||
|
||||
|
||||
@ -158,6 +158,7 @@ static bool unifyLoopExits(DominatorTree &DT, LoopInfo &LI, Loop *L) {
|
||||
SmallVector<BasicBlock *, 8> CallBrTargetBlocksToFix;
|
||||
// Redirect exiting edges through a control flow hub.
|
||||
ControlFlowHub CHub;
|
||||
bool Changed = false;
|
||||
|
||||
for (unsigned I = 0; I < ExitingBlocks.size(); ++I) {
|
||||
BasicBlock *BB = ExitingBlocks[I];
|
||||
@ -182,6 +183,10 @@ static bool unifyLoopExits(DominatorTree &DT, LoopInfo &LI, Loop *L) {
|
||||
bool UpdatedLI = false;
|
||||
BasicBlock *NewSucc =
|
||||
SplitCallBrEdge(BB, Succ, J, &DTU, nullptr, &LI, &UpdatedLI);
|
||||
// SplitCallBrEdge modifies the CFG because it creates an intermediate
|
||||
// block. So we need to set the changed flag no matter what the
|
||||
// ControlFlowHub is going to do later.
|
||||
Changed = true;
|
||||
// Even if CallBr and Succ do not have a common parent loop, we need to
|
||||
// add the new target block to the parent loop of the current loop.
|
||||
if (!UpdatedLI)
|
||||
@ -207,6 +212,7 @@ static bool unifyLoopExits(DominatorTree &DT, LoopInfo &LI, Loop *L) {
|
||||
bool ChangedCFG;
|
||||
std::tie(LoopExitBlock, ChangedCFG) = CHub.finalize(
|
||||
&DTU, GuardBlocks, "loop.exit", MaxBooleansInControlFlowHub.getValue());
|
||||
ChangedCFG |= Changed;
|
||||
if (!ChangedCFG)
|
||||
return false;
|
||||
|
||||
|
||||
54
llvm/test/CodeGen/AMDGPU/callbr.ll
Normal file
54
llvm/test/CodeGen/AMDGPU/callbr.ll
Normal file
@ -0,0 +1,54 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
|
||||
; RUN: llc -mtriple=amdgcn -mcpu=gfx90a < %s | FileCheck %s
|
||||
|
||||
define void @callbr_inline_asm(ptr %src, ptr %dst1, ptr %dst2, i32 %c) {
|
||||
; CHECK-LABEL: callbr_inline_asm:
|
||||
; CHECK: ; %bb.0:
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; CHECK-NEXT: flat_load_dword v0, v[0:1]
|
||||
; CHECK-NEXT: ;;#ASMSTART
|
||||
; CHECK-NEXT: v_cmp_gt_i32 vcc v6, 42; s_cbranch_vccnz .LBB0_2
|
||||
; CHECK-NEXT: ;;#ASMEND
|
||||
; CHECK-NEXT: ; %bb.1: ; %fallthrough
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; CHECK-NEXT: flat_store_dword v[2:3], v0
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
||||
; CHECK-NEXT: .LBB0_2: ; Inline asm indirect target
|
||||
; CHECK-NEXT: ; %indirect
|
||||
; CHECK-NEXT: ; Label of block must be emitted
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; CHECK-NEXT: flat_store_dword v[4:5], v0
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
||||
%a = load i32, ptr %src, align 4
|
||||
callbr void asm "v_cmp_gt_i32 vcc $0, 42; s_cbranch_vccnz ${1:l}", "r,!i"(i32 %c) to label %fallthrough [label %indirect]
|
||||
fallthrough:
|
||||
store i32 %a, ptr %dst1, align 4
|
||||
br label %ret
|
||||
indirect:
|
||||
store i32 %a, ptr %dst2, align 4
|
||||
br label %ret
|
||||
ret:
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @callbr_self_loop(i1 %c) {
|
||||
; CHECK-LABEL: callbr_self_loop:
|
||||
; CHECK: ; %bb.0:
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; CHECK-NEXT: .LBB1_1: ; %callbr
|
||||
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: ;;#ASMSTART
|
||||
; CHECK-NEXT: ;;#ASMEND
|
||||
; CHECK-NEXT: s_branch .LBB1_1
|
||||
; CHECK-NEXT: .LBB1_2: ; Inline asm indirect target
|
||||
; CHECK-NEXT: ; %callbr.target.ret
|
||||
; CHECK-NEXT: ; Label of block must be emitted
|
||||
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
||||
br label %callbr
|
||||
callbr:
|
||||
callbr void asm "", "!i"() to label %callbr [label %ret]
|
||||
ret:
|
||||
ret void
|
||||
}
|
||||
@ -3,6 +3,7 @@
|
||||
|
||||
declare void @foo(ptr)
|
||||
declare i1 @bar(ptr)
|
||||
declare i32 @bar32(ptr)
|
||||
|
||||
define void @musttail_call_without_return_value(ptr %p) {
|
||||
; CHECK-LABEL: define void @musttail_call_without_return_value(
|
||||
@ -28,6 +29,31 @@ bb.1:
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @musttail_call_without_return_value_callbr(ptr %p) {
|
||||
; CHECK-LABEL: define void @musttail_call_without_return_value_callbr(
|
||||
; CHECK-SAME: ptr [[P:%.*]]) #[[ATTR0]] {
|
||||
; CHECK-NEXT: [[ENTRY:.*:]]
|
||||
; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[P]], align 1
|
||||
; CHECK-NEXT: callbr void asm "", "r,!i"(i32 [[LOAD]])
|
||||
; CHECK-NEXT: to label %[[BB_0:.*]] [label %bb.1]
|
||||
; CHECK: [[BB_0]]:
|
||||
; CHECK-NEXT: musttail call void @foo(ptr [[P]])
|
||||
; CHECK-NEXT: ret void
|
||||
; CHECK: [[BB_1:.*:]]
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
entry:
|
||||
%load = load i32, ptr %p, align 1
|
||||
callbr void asm "", "r,!i"(i32 %load) to label %bb.0 [label %bb.1]
|
||||
|
||||
bb.0:
|
||||
musttail call void @foo(ptr %p)
|
||||
ret void
|
||||
|
||||
bb.1:
|
||||
ret void
|
||||
}
|
||||
|
||||
define i1 @musttail_call_with_return_value(ptr %p) {
|
||||
; CHECK-LABEL: define i1 @musttail_call_with_return_value(
|
||||
; CHECK-SAME: ptr [[P:%.*]]) #[[ATTR0]] {
|
||||
@ -51,3 +77,28 @@ bb.0:
|
||||
bb.1:
|
||||
ret i1 %load
|
||||
}
|
||||
|
||||
define i32 @musttail_call_with_return_value_callbr(ptr %p) {
|
||||
; CHECK-LABEL: define i32 @musttail_call_with_return_value_callbr(
|
||||
; CHECK-SAME: ptr [[P:%.*]]) #[[ATTR0]] {
|
||||
; CHECK-NEXT: [[ENTRY:.*:]]
|
||||
; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[P]], align 1
|
||||
; CHECK-NEXT: callbr void asm "", "r,!i"(i32 [[LOAD]])
|
||||
; CHECK-NEXT: to label %[[BB_0:.*]] [label %bb.1]
|
||||
; CHECK: [[BB_0]]:
|
||||
; CHECK-NEXT: [[RET:%.*]] = musttail call i32 @bar32(ptr [[P]])
|
||||
; CHECK-NEXT: ret i32 [[RET]]
|
||||
; CHECK: [[BB_1:.*:]]
|
||||
; CHECK-NEXT: ret i32 [[LOAD]]
|
||||
;
|
||||
entry:
|
||||
%load = load i32, ptr %p, align 1
|
||||
callbr void asm "", "r,!i"(i32 %load) to label %bb.0 [label %bb.1]
|
||||
|
||||
bb.0:
|
||||
%ret = musttail call i32 @bar32(ptr %p)
|
||||
ret i32 %ret
|
||||
|
||||
bb.1:
|
||||
ret i32 %load
|
||||
}
|
||||
|
||||
@ -36,26 +36,60 @@ loop:
|
||||
br label %loop
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @infinite_loop_callbr(ptr addrspace(1) %out) {
|
||||
; SI-LABEL: infinite_loop_callbr:
|
||||
; SI: ; %bb.0: ; %entry
|
||||
; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
|
||||
; SI-NEXT: ;;#ASMSTART
|
||||
; SI-NEXT: ;;#ASMEND
|
||||
; SI-NEXT: s_mov_b32 s3, 0xf000
|
||||
; SI-NEXT: s_mov_b32 s2, -1
|
||||
; SI-NEXT: v_mov_b32_e32 v0, 0x3e7
|
||||
; SI-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; SI-NEXT: s_waitcnt vmcnt(0)
|
||||
; SI-NEXT: s_endpgm
|
||||
; IR-LABEL: @infinite_loop_callbr(
|
||||
; IR-NEXT: entry:
|
||||
; IR-NEXT: callbr void asm "", ""()
|
||||
; IR-NEXT: to label [[LOOP:%.*]] []
|
||||
; IR: loop:
|
||||
; IR-NEXT: store volatile i32 999, ptr addrspace(1) [[OUT:%.*]], align 4
|
||||
; IR-NEXT: br i1 true, label [[TRANSITIONBLOCK:%.*]], label [[DUMMYRETURNBLOCK:%.*]]
|
||||
; IR: TransitionBlock:
|
||||
; IR-NEXT: callbr void asm "", ""()
|
||||
; IR-NEXT: to label [[LOOP]] []
|
||||
; IR: DummyReturnBlock:
|
||||
; IR-NEXT: ret void
|
||||
;
|
||||
entry:
|
||||
callbr void asm "", ""() to label %loop []
|
||||
|
||||
loop:
|
||||
store volatile i32 999, ptr addrspace(1) %out, align 4
|
||||
callbr void asm "", ""() to label %loop []
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @infinite_loop_ret(ptr addrspace(1) %out) {
|
||||
; SI-LABEL: infinite_loop_ret:
|
||||
; SI: ; %bb.0: ; %entry
|
||||
; SI-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
|
||||
; SI-NEXT: s_and_saveexec_b64 s[0:1], vcc
|
||||
; SI-NEXT: s_cbranch_execz .LBB1_3
|
||||
; SI-NEXT: s_cbranch_execz .LBB2_3
|
||||
; SI-NEXT: ; %bb.1: ; %loop.preheader
|
||||
; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
|
||||
; SI-NEXT: s_mov_b32 s3, 0xf000
|
||||
; SI-NEXT: s_mov_b32 s2, -1
|
||||
; SI-NEXT: v_mov_b32_e32 v0, 0x3e7
|
||||
; SI-NEXT: s_and_b64 vcc, exec, -1
|
||||
; SI-NEXT: .LBB1_2: ; %loop
|
||||
; SI-NEXT: .LBB2_2: ; %loop
|
||||
; SI-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; SI-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; SI-NEXT: s_waitcnt vmcnt(0)
|
||||
; SI-NEXT: s_mov_b64 vcc, vcc
|
||||
; SI-NEXT: s_cbranch_vccnz .LBB1_2
|
||||
; SI-NEXT: .LBB1_3: ; %UnifiedReturnBlock
|
||||
; SI-NEXT: s_cbranch_vccnz .LBB2_2
|
||||
; SI-NEXT: .LBB2_3: ; %UnifiedReturnBlock
|
||||
; SI-NEXT: s_endpgm
|
||||
; IR-LABEL: @infinite_loop_ret(
|
||||
; IR-NEXT: entry:
|
||||
@ -81,44 +115,93 @@ return:
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @infinite_loop_ret_callbr(ptr addrspace(1) %out) {
|
||||
; SI-LABEL: infinite_loop_ret_callbr:
|
||||
; SI: ; %bb.0: ; %entry
|
||||
; SI-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
|
||||
; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
|
||||
; SI-NEXT: ;;#ASMSTART
|
||||
; SI-NEXT: ;;#ASMEND
|
||||
; SI-NEXT: ; %bb.1: ; %loop.preheader
|
||||
; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
|
||||
; SI-NEXT: s_mov_b32 s3, 0xf000
|
||||
; SI-NEXT: s_mov_b32 s2, -1
|
||||
; SI-NEXT: v_mov_b32_e32 v0, 0x3e7
|
||||
; SI-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; SI-NEXT: s_waitcnt vmcnt(0)
|
||||
; SI-NEXT: .LBB3_2: ; Inline asm indirect target
|
||||
; SI-NEXT: ; %UnifiedReturnBlock
|
||||
; SI-NEXT: ; Label of block must be emitted
|
||||
; SI-NEXT: s_endpgm
|
||||
; IR-LABEL: @infinite_loop_ret_callbr(
|
||||
; IR-NEXT: entry:
|
||||
; IR-NEXT: [[TMP:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x()
|
||||
; IR-NEXT: [[COND:%.*]] = icmp eq i32 [[TMP]], 1
|
||||
; IR-NEXT: [[COND32:%.*]] = zext i1 [[COND]] to i32
|
||||
; IR-NEXT: callbr void asm "", "r,!i"(i32 [[COND32]])
|
||||
; IR-NEXT: to label [[LOOP:%.*]] [label %UnifiedReturnBlock]
|
||||
; IR: loop:
|
||||
; IR-NEXT: store volatile i32 999, ptr addrspace(1) [[OUT:%.*]], align 4
|
||||
; IR-NEXT: br i1 true, label [[TRANSITIONBLOCK:%.*]], label [[UNIFIEDRETURNBLOCK:%.*]]
|
||||
; IR: TransitionBlock:
|
||||
; IR-NEXT: callbr void asm "", ""()
|
||||
; IR-NEXT: to label [[LOOP]] []
|
||||
; IR: UnifiedReturnBlock:
|
||||
; IR-NEXT: ret void
|
||||
;
|
||||
entry:
|
||||
%tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%cond = icmp eq i32 %tmp, 1
|
||||
%cond32 = zext i1 %cond to i32
|
||||
callbr void asm "", "r,!i"(i32 %cond32) to label %loop [label %return]
|
||||
|
||||
loop:
|
||||
store volatile i32 999, ptr addrspace(1) %out, align 4
|
||||
callbr void asm "", ""() to label %loop []
|
||||
|
||||
return:
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @infinite_loops(ptr addrspace(1) %out) {
|
||||
; SI-LABEL: infinite_loops:
|
||||
; SI: ; %bb.0: ; %entry
|
||||
; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
|
||||
; SI-NEXT: s_mov_b64 s[2:3], -1
|
||||
; SI-NEXT: s_cbranch_scc1 .LBB2_4
|
||||
; SI-NEXT: s_cbranch_scc1 .LBB4_4
|
||||
; SI-NEXT: ; %bb.1:
|
||||
; SI-NEXT: s_mov_b32 s3, 0xf000
|
||||
; SI-NEXT: s_mov_b32 s2, -1
|
||||
; SI-NEXT: v_mov_b32_e32 v0, 0x378
|
||||
; SI-NEXT: s_and_b64 vcc, exec, -1
|
||||
; SI-NEXT: .LBB2_2: ; %loop2
|
||||
; SI-NEXT: .LBB4_2: ; %loop2
|
||||
; SI-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; SI-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; SI-NEXT: s_waitcnt vmcnt(0)
|
||||
; SI-NEXT: s_mov_b64 vcc, vcc
|
||||
; SI-NEXT: s_cbranch_vccnz .LBB2_2
|
||||
; SI-NEXT: s_cbranch_vccnz .LBB4_2
|
||||
; SI-NEXT: ; %bb.3: ; %Flow
|
||||
; SI-NEXT: s_mov_b64 s[2:3], 0
|
||||
; SI-NEXT: .LBB2_4: ; %Flow2
|
||||
; SI-NEXT: .LBB4_4: ; %Flow2
|
||||
; SI-NEXT: s_and_b64 vcc, exec, s[2:3]
|
||||
; SI-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; SI-NEXT: s_mov_b64 vcc, vcc
|
||||
; SI-NEXT: s_cbranch_vccz .LBB2_7
|
||||
; SI-NEXT: s_cbranch_vccz .LBB4_7
|
||||
; SI-NEXT: ; %bb.5:
|
||||
; SI-NEXT: s_mov_b32 s3, 0xf000
|
||||
; SI-NEXT: s_mov_b32 s2, -1
|
||||
; SI-NEXT: s_waitcnt expcnt(0)
|
||||
; SI-NEXT: v_mov_b32_e32 v0, 0x3e7
|
||||
; SI-NEXT: s_and_b64 vcc, exec, 0
|
||||
; SI-NEXT: .LBB2_6: ; %loop1
|
||||
; SI-NEXT: .LBB4_6: ; %loop1
|
||||
; SI-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; SI-NEXT: s_waitcnt vmcnt(0)
|
||||
; SI-NEXT: s_mov_b64 vcc, vcc
|
||||
; SI-NEXT: s_cbranch_vccz .LBB2_6
|
||||
; SI-NEXT: .LBB2_7: ; %DummyReturnBlock
|
||||
; SI-NEXT: s_cbranch_vccz .LBB4_6
|
||||
; SI-NEXT: .LBB4_7: ; %DummyReturnBlock
|
||||
; SI-NEXT: s_endpgm
|
||||
; IR-LABEL: @infinite_loops(
|
||||
; IR-NEXT: entry:
|
||||
@ -144,24 +227,78 @@ loop2:
|
||||
br label %loop2
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @infinite_loops_callbr(ptr addrspace(1) %out) {
|
||||
; SI-LABEL: infinite_loops_callbr:
|
||||
; SI: ; %bb.0: ; %entry
|
||||
; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
|
||||
; SI-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; SI-NEXT: ;;#ASMSTART
|
||||
; SI-NEXT: ;;#ASMEND
|
||||
; SI-NEXT: ; %bb.1: ; %loop1
|
||||
; SI-NEXT: s_mov_b32 s3, 0xf000
|
||||
; SI-NEXT: s_mov_b32 s2, -1
|
||||
; SI-NEXT: v_mov_b32_e32 v0, 0x3e7
|
||||
; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; SI-NEXT: s_waitcnt vmcnt(0)
|
||||
; SI-NEXT: s_endpgm
|
||||
; SI-NEXT: .LBB5_2: ; Inline asm indirect target
|
||||
; SI-NEXT: ; %loop2.preheader
|
||||
; SI-NEXT: ; Label of block must be emitted
|
||||
; SI-NEXT: s_mov_b32 s3, 0xf000
|
||||
; SI-NEXT: s_mov_b32 s2, -1
|
||||
; SI-NEXT: v_mov_b32_e32 v0, 0x378
|
||||
; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; SI-NEXT: s_waitcnt vmcnt(0)
|
||||
; SI-NEXT: s_endpgm
|
||||
; IR-LABEL: @infinite_loops_callbr(
|
||||
; IR-NEXT: entry:
|
||||
; IR-NEXT: callbr void asm "", "r,!i"(i32 poison)
|
||||
; IR-NEXT: to label [[LOOP1:%.*]] [label %loop2]
|
||||
; IR: loop1:
|
||||
; IR-NEXT: store volatile i32 999, ptr addrspace(1) [[OUT:%.*]], align 4
|
||||
; IR-NEXT: br i1 true, label [[TRANSITIONBLOCK:%.*]], label [[DUMMYRETURNBLOCK:%.*]]
|
||||
; IR: TransitionBlock:
|
||||
; IR-NEXT: callbr void asm "", ""()
|
||||
; IR-NEXT: to label [[LOOP1]] []
|
||||
; IR: loop2:
|
||||
; IR-NEXT: store volatile i32 888, ptr addrspace(1) [[OUT]], align 4
|
||||
; IR-NEXT: br i1 true, label [[TRANSITIONBLOCK1:%.*]], label [[DUMMYRETURNBLOCK]]
|
||||
; IR: TransitionBlock1:
|
||||
; IR-NEXT: callbr void asm "", ""()
|
||||
; IR-NEXT: to label [[LOOP2:%.*]] []
|
||||
; IR: DummyReturnBlock:
|
||||
; IR-NEXT: ret void
|
||||
;
|
||||
entry:
|
||||
callbr void asm "", "r,!i"(i32 poison) to label %loop1 [label %loop2]
|
||||
|
||||
loop1:
|
||||
store volatile i32 999, ptr addrspace(1) %out, align 4
|
||||
callbr void asm "", ""() to label %loop1 []
|
||||
|
||||
loop2:
|
||||
store volatile i32 888, ptr addrspace(1) %out, align 4
|
||||
callbr void asm "", ""() to label %loop2 []
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @infinite_loop_nest_ret(ptr addrspace(1) %out) {
|
||||
; SI-LABEL: infinite_loop_nest_ret:
|
||||
; SI: ; %bb.0: ; %entry
|
||||
; SI-NEXT: v_cmp_ne_u32_e32 vcc, 1, v0
|
||||
; SI-NEXT: s_and_saveexec_b64 s[0:1], vcc
|
||||
; SI-NEXT: s_cbranch_execz .LBB3_5
|
||||
; SI-NEXT: s_cbranch_execz .LBB6_5
|
||||
; SI-NEXT: ; %bb.1: ; %outer_loop.preheader
|
||||
; SI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x9
|
||||
; SI-NEXT: v_cmp_ne_u32_e64 s[0:1], 3, v0
|
||||
; SI-NEXT: s_mov_b32 s7, 0xf000
|
||||
; SI-NEXT: s_mov_b32 s6, -1
|
||||
; SI-NEXT: v_mov_b32_e32 v0, 0x3e7
|
||||
; SI-NEXT: .LBB3_2: ; %outer_loop
|
||||
; SI-NEXT: .LBB6_2: ; %outer_loop
|
||||
; SI-NEXT: ; =>This Loop Header: Depth=1
|
||||
; SI-NEXT: ; Child Loop BB3_3 Depth 2
|
||||
; SI-NEXT: ; Child Loop BB6_3 Depth 2
|
||||
; SI-NEXT: s_mov_b64 s[2:3], 0
|
||||
; SI-NEXT: .LBB3_3: ; %inner_loop
|
||||
; SI-NEXT: ; Parent Loop BB3_2 Depth=1
|
||||
; SI-NEXT: .LBB6_3: ; %inner_loop
|
||||
; SI-NEXT: ; Parent Loop BB6_2 Depth=1
|
||||
; SI-NEXT: ; => This Inner Loop Header: Depth=2
|
||||
; SI-NEXT: s_and_b64 s[8:9], exec, s[0:1]
|
||||
; SI-NEXT: s_or_b64 s[2:3], s[8:9], s[2:3]
|
||||
@ -169,13 +306,13 @@ define amdgpu_kernel void @infinite_loop_nest_ret(ptr addrspace(1) %out) {
|
||||
; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
||||
; SI-NEXT: s_waitcnt vmcnt(0)
|
||||
; SI-NEXT: s_andn2_b64 exec, exec, s[2:3]
|
||||
; SI-NEXT: s_cbranch_execnz .LBB3_3
|
||||
; SI-NEXT: s_cbranch_execnz .LBB6_3
|
||||
; SI-NEXT: ; %bb.4: ; %loop.exit.guard
|
||||
; SI-NEXT: ; in Loop: Header=BB3_2 Depth=1
|
||||
; SI-NEXT: ; in Loop: Header=BB6_2 Depth=1
|
||||
; SI-NEXT: s_or_b64 exec, exec, s[2:3]
|
||||
; SI-NEXT: s_mov_b64 vcc, 0
|
||||
; SI-NEXT: s_branch .LBB3_2
|
||||
; SI-NEXT: .LBB3_5: ; %UnifiedReturnBlock
|
||||
; SI-NEXT: s_branch .LBB6_2
|
||||
; SI-NEXT: .LBB6_5: ; %UnifiedReturnBlock
|
||||
; SI-NEXT: s_endpgm
|
||||
; IR-LABEL: @infinite_loop_nest_ret(
|
||||
; IR-NEXT: entry:
|
||||
@ -212,4 +349,82 @@ return:
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @infinite_loop_nest_ret_callbr(ptr addrspace(1) %out) {
|
||||
; SI-LABEL: infinite_loop_nest_ret_callbr:
|
||||
; SI: ; %bb.0: ; %entry
|
||||
; SI-NEXT: v_cmp_ne_u32_e32 vcc, 1, v0
|
||||
; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
|
||||
; SI-NEXT: ;;#ASMSTART
|
||||
; SI-NEXT: ;;#ASMEND
|
||||
; SI-NEXT: ; %bb.1: ; %outer_loop.preheader
|
||||
; SI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x9
|
||||
; SI-NEXT: s_mov_b32 s7, 0xf000
|
||||
; SI-NEXT: s_mov_b32 s6, -1
|
||||
; SI-NEXT: v_mov_b32_e32 v0, 0x3e7
|
||||
; SI-NEXT: s_and_b64 s[0:1], exec, 0
|
||||
; SI-NEXT: s_branch .LBB7_3
|
||||
; SI-NEXT: .LBB7_2: ; %loop.exit.guard
|
||||
; SI-NEXT: ; in Loop: Header=BB7_3 Depth=1
|
||||
; SI-NEXT: s_and_b64 vcc, exec, s[2:3]
|
||||
; SI-NEXT: s_cbranch_vccnz .LBB7_5
|
||||
; SI-NEXT: .LBB7_3: ; %outer_loop
|
||||
; SI-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; SI-NEXT: ;;#ASMSTART
|
||||
; SI-NEXT: ;;#ASMEND
|
||||
; SI-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
||||
; SI-NEXT: s_waitcnt vmcnt(0)
|
||||
; SI-NEXT: s_mov_b64 s[2:3], -1
|
||||
; SI-NEXT: s_mov_b64 vcc, s[0:1]
|
||||
; SI-NEXT: s_cbranch_vccz .LBB7_2
|
||||
; SI-NEXT: ; %bb.4: ; %TransitionBlock.target.outer_loop
|
||||
; SI-NEXT: ; in Loop: Header=BB7_3 Depth=1
|
||||
; SI-NEXT: s_mov_b64 s[2:3], 0
|
||||
; SI-NEXT: s_branch .LBB7_2
|
||||
; SI-NEXT: .LBB7_5: ; Inline asm indirect target
|
||||
; SI-NEXT: ; %UnifiedReturnBlock
|
||||
; SI-NEXT: ; Label of block must be emitted
|
||||
; SI-NEXT: s_endpgm
|
||||
; IR-LABEL: @infinite_loop_nest_ret_callbr(
|
||||
; IR-NEXT: entry:
|
||||
; IR-NEXT: [[TMP:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x()
|
||||
; IR-NEXT: [[COND1:%.*]] = icmp ne i32 [[TMP]], 1
|
||||
; IR-NEXT: [[COND1_32:%.*]] = zext i1 [[COND1]] to i32
|
||||
; IR-NEXT: callbr void asm "", "r,!i"(i32 [[COND1_32]])
|
||||
; IR-NEXT: to label [[OUTER_LOOP:%.*]] [label %UnifiedReturnBlock]
|
||||
; IR: outer_loop:
|
||||
; IR-NEXT: callbr void asm "", ""()
|
||||
; IR-NEXT: to label [[INNER_LOOP:%.*]] []
|
||||
; IR: inner_loop:
|
||||
; IR-NEXT: store volatile i32 999, ptr addrspace(1) [[OUT:%.*]], align 4
|
||||
; IR-NEXT: [[COND3:%.*]] = icmp eq i32 [[TMP]], 3
|
||||
; IR-NEXT: [[COND3_32:%.*]] = zext i1 [[COND3]] to i32
|
||||
; IR-NEXT: br i1 true, label [[TRANSITIONBLOCK:%.*]], label [[UNIFIEDRETURNBLOCK:%.*]]
|
||||
; IR: TransitionBlock:
|
||||
; IR-NEXT: callbr void asm "", "r,!i"(i32 [[COND3_32]])
|
||||
; IR-NEXT: to label [[INNER_LOOP]] [label %outer_loop]
|
||||
; IR: UnifiedReturnBlock:
|
||||
; IR-NEXT: ret void
|
||||
;
|
||||
entry:
|
||||
%tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%cond1 = icmp ne i32 %tmp, 1 ; avoid following BB optimizing away through the domination
|
||||
%cond1_32 = zext i1 %cond1 to i32
|
||||
callbr void asm "", "r,!i"(i32 %cond1_32) to label %outer_loop [label %return]
|
||||
|
||||
outer_loop:
|
||||
; %cond2 = icmp eq i32 %tmp, 2
|
||||
; br i1 %cond2, label %outer_loop, label %inner_loop
|
||||
callbr void asm "", ""() to label %inner_loop []
|
||||
|
||||
inner_loop: ; preds = %LeafBlock, %LeafBlock1
|
||||
store volatile i32 999, ptr addrspace(1) %out, align 4
|
||||
%cond3 = icmp eq i32 %tmp, 3
|
||||
%cond3_32 = zext i1 %cond3 to i32
|
||||
callbr void asm "", "r,!i"(i32 %cond3_32) to label %inner_loop [label %outer_loop]
|
||||
|
||||
return:
|
||||
ret void
|
||||
}
|
||||
|
||||
declare i32 @llvm.amdgcn.workitem.id.x()
|
||||
|
||||
@ -3,15 +3,16 @@
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa %s -o - | FileCheck %s --check-prefix=ISA
|
||||
|
||||
define void @nested_inf_loop(i1 %0, i1 %1) {
|
||||
; OPT-LABEL: @nested_inf_loop(
|
||||
; OPT-NEXT: BB:
|
||||
; OPT-NEXT: br label [[BB1:%.*]]
|
||||
; OPT: BB1:
|
||||
; OPT-NEXT: [[BRMERGE:%.*]] = select i1 [[TMP0:%.*]], i1 true, i1 [[TMP1:%.*]]
|
||||
; OPT-NEXT: br i1 [[BRMERGE]], label [[BB1]], label [[INFLOOP:%.*]]
|
||||
; OPT: infloop:
|
||||
; OPT-NEXT: br i1 true, label [[INFLOOP]], label [[DUMMYRETURNBLOCK:%.*]]
|
||||
; OPT: DummyReturnBlock:
|
||||
; OPT-LABEL: define void @nested_inf_loop(
|
||||
; OPT-SAME: i1 [[TMP0:%.*]], i1 [[TMP1:%.*]]) {
|
||||
; OPT-NEXT: [[BB:.*:]]
|
||||
; OPT-NEXT: br label %[[BB1:.*]]
|
||||
; OPT: [[BB1]]:
|
||||
; OPT-NEXT: [[BRMERGE:%.*]] = select i1 [[TMP0]], i1 true, i1 [[TMP1]]
|
||||
; OPT-NEXT: br i1 [[BRMERGE]], label %[[BB1]], label %[[INFLOOP:.*]]
|
||||
; OPT: [[INFLOOP]]:
|
||||
; OPT-NEXT: br i1 true, label %[[INFLOOP]], label %[[DUMMYRETURNBLOCK:.*]]
|
||||
; OPT: [[DUMMYRETURNBLOCK]]:
|
||||
; OPT-NEXT: ret void
|
||||
;
|
||||
; ISA-LABEL: nested_inf_loop:
|
||||
@ -63,3 +64,84 @@ BB4:
|
||||
BB3:
|
||||
br label %BB1
|
||||
}
|
||||
|
||||
define void @nested_inf_loop_callbr(i32 %0, i32 %1) {
|
||||
; OPT-LABEL: define void @nested_inf_loop_callbr(
|
||||
; OPT-SAME: i32 [[TMP0:%.*]], i32 [[TMP1:%.*]]) {
|
||||
; OPT-NEXT: [[BB:.*:]]
|
||||
; OPT-NEXT: callbr void asm "", ""()
|
||||
; OPT-NEXT: to label %[[BB1:.*]] []
|
||||
; OPT: [[BB1]]:
|
||||
; OPT-NEXT: callbr void asm "", "r,!i"(i32 [[TMP0]])
|
||||
; OPT-NEXT: to label %[[BB3:.*]] [label %BB2]
|
||||
; OPT: [[BB2:.*:]]
|
||||
; OPT-NEXT: callbr void asm "", ""()
|
||||
; OPT-NEXT: to label %[[BB4:.*]] []
|
||||
; OPT: [[BB4]]:
|
||||
; OPT-NEXT: br i1 true, label %[[TRANSITIONBLOCK:.*]], label %[[DUMMYRETURNBLOCK:.*]]
|
||||
; OPT: [[TRANSITIONBLOCK]]:
|
||||
; OPT-NEXT: callbr void asm "", "r,!i"(i32 [[TMP1]])
|
||||
; OPT-NEXT: to label %[[BB3]] [label %BB4]
|
||||
; OPT: [[BB3]]:
|
||||
; OPT-NEXT: callbr void asm "", ""()
|
||||
; OPT-NEXT: to label %[[BB1]] []
|
||||
; OPT: [[DUMMYRETURNBLOCK]]:
|
||||
; OPT-NEXT: ret void
|
||||
;
|
||||
; ISA-LABEL: nested_inf_loop_callbr:
|
||||
; ISA: ; %bb.0: ; %BB
|
||||
; ISA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; ISA-NEXT: ;;#ASMSTART
|
||||
; ISA-NEXT: ;;#ASMEND
|
||||
; ISA-NEXT: ; implicit-def: $sgpr6_sgpr7
|
||||
; ISA-NEXT: ; implicit-def: $sgpr4_sgpr5
|
||||
; ISA-NEXT: .LBB1_1: ; %BB1
|
||||
; ISA-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; ISA-NEXT: ;;#ASMSTART
|
||||
; ISA-NEXT: ;;#ASMEND
|
||||
; ISA-NEXT: s_andn2_b64 s[6:7], s[6:7], exec
|
||||
; ISA-NEXT: s_and_b64 s[8:9], s[4:5], exec
|
||||
; ISA-NEXT: s_or_b64 s[6:7], s[6:7], s[8:9]
|
||||
; ISA-NEXT: .LBB1_2: ; %BB3
|
||||
; ISA-NEXT: ; in Loop: Header=BB1_1 Depth=1
|
||||
; ISA-NEXT: ;;#ASMSTART
|
||||
; ISA-NEXT: ;;#ASMEND
|
||||
; ISA-NEXT: s_andn2_b64 s[4:5], s[4:5], exec
|
||||
; ISA-NEXT: s_and_b64 s[8:9], s[6:7], exec
|
||||
; ISA-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9]
|
||||
; ISA-NEXT: s_branch .LBB1_1
|
||||
; ISA-NEXT: .LBB1_3: ; Inline asm indirect target
|
||||
; ISA-NEXT: ; %BB2
|
||||
; ISA-NEXT: ; in Loop: Header=BB1_1 Depth=1
|
||||
; ISA-NEXT: ; Label of block must be emitted
|
||||
; ISA-NEXT: ;;#ASMSTART
|
||||
; ISA-NEXT: ;;#ASMEND
|
||||
; ISA-NEXT: s_mov_b64 s[6:7], -1
|
||||
; ISA-NEXT: s_and_saveexec_b64 s[8:9], s[4:5]
|
||||
; ISA-NEXT: s_cbranch_execz .LBB1_5
|
||||
; ISA-NEXT: ; %bb.4: ; %TransitionBlock.target.BB3
|
||||
; ISA-NEXT: ; in Loop: Header=BB1_1 Depth=1
|
||||
; ISA-NEXT: s_xor_b64 s[6:7], exec, -1
|
||||
; ISA-NEXT: .LBB1_5: ; %loop.exit.guard
|
||||
; ISA-NEXT: ; in Loop: Header=BB1_1 Depth=1
|
||||
; ISA-NEXT: s_or_b64 exec, exec, s[8:9]
|
||||
; ISA-NEXT: s_and_b64 vcc, exec, s[6:7]
|
||||
; ISA-NEXT: s_mov_b64 s[6:7], 0
|
||||
; ISA-NEXT: s_cbranch_vccz .LBB1_2
|
||||
; ISA-NEXT: ; %bb.6: ; %DummyReturnBlock
|
||||
; ISA-NEXT: s_setpc_b64 s[30:31]
|
||||
BB:
|
||||
callbr void asm "", ""() to label %BB1 []
|
||||
|
||||
BB1:
|
||||
callbr void asm "", "r,!i"(i32 %0) to label %BB3 [label %BB2]
|
||||
|
||||
BB2:
|
||||
callbr void asm "", ""() to label %BB4 []
|
||||
|
||||
BB4:
|
||||
callbr void asm "", "r,!i"(i32 %1) to label %BB3 [label %BB4]
|
||||
|
||||
BB3:
|
||||
callbr void asm "", ""() to label %BB1 []
|
||||
}
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -stop-after=amdgpu-unify-divergent-exit-nodes | FileCheck %s --check-prefix=UNIFY
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt < %s -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -stop-after=amdgpu-unify-divergent-exit-nodes | FileCheck %s --check-prefix=UNIFY
|
||||
; RUN: llc < %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 | FileCheck %s
|
||||
|
||||
declare void @llvm.trap()
|
||||
@ -70,8 +70,33 @@ define amdgpu_kernel void @kernel(i32 %a, ptr addrspace(1) %x, i32 noundef %n) {
|
||||
; CHECK-NEXT: s_mov_b64 s[2:3], -1
|
||||
; CHECK-NEXT: s_trap 2
|
||||
; CHECK-NEXT: s_branch .LBB0_4
|
||||
|
||||
|
||||
; UNIFY-LABEL: @kernel(
|
||||
; UNIFY-NEXT: entry:
|
||||
; UNIFY-NEXT: [[TID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
; UNIFY-NEXT: [[CMP:%.*]] = icmp eq i32 [[N:%.*]], 256
|
||||
; UNIFY-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
|
||||
; UNIFY: if.then:
|
||||
; UNIFY-NEXT: [[CMP1:%.*]] = icmp eq i32 [[A:%.*]], 0
|
||||
; UNIFY-NEXT: br i1 [[CMP1]], label [[IF_END6_SINK_SPLIT:%.*]], label [[COND_FALSE:%.*]]
|
||||
; UNIFY: cond.false:
|
||||
; UNIFY-NEXT: call void @llvm.trap()
|
||||
; UNIFY-NEXT: unreachable
|
||||
; UNIFY: if.else:
|
||||
; UNIFY-NEXT: [[CMP2:%.*]] = icmp ult i32 [[TID]], 10
|
||||
; UNIFY-NEXT: br i1 [[CMP2]], label [[IF_THEN3:%.*]], label [[IF_END6:%.*]]
|
||||
; UNIFY: if.then3:
|
||||
; UNIFY-NEXT: [[CMP1_I7:%.*]] = icmp eq i32 [[A]], 0
|
||||
; UNIFY-NEXT: br i1 [[CMP1_I7]], label [[IF_END6_SINK_SPLIT]], label [[COND_FALSE_I8:%.*]]
|
||||
; UNIFY: cond.false.i8:
|
||||
; UNIFY-NEXT: call void @llvm.trap()
|
||||
; UNIFY-NEXT: unreachable
|
||||
; UNIFY: if.end6.sink.split:
|
||||
; UNIFY-NEXT: [[X1:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[X:%.*]], i32 [[TID]]
|
||||
; UNIFY-NEXT: store i32 [[A]], ptr addrspace(1) [[X1]], align 4
|
||||
; UNIFY-NEXT: br label [[IF_END6]]
|
||||
; UNIFY: if.end6:
|
||||
; UNIFY-NEXT: ret void
|
||||
;
|
||||
entry:
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%cmp = icmp eq i32 %n, 256
|
||||
@ -105,5 +130,129 @@ if.end6.sink.split:
|
||||
if.end6:
|
||||
ret void
|
||||
}
|
||||
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
|
||||
; UNIFY: {{.*}}
|
||||
|
||||
define amdgpu_kernel void @kernel_callbr(i32 %a, ptr addrspace(1) %x, i32 noundef %n) {
|
||||
; CHECK-LABEL: kernel_callbr:
|
||||
; CHECK: ; %bb.0: ; %entry
|
||||
; CHECK-NEXT: s_load_dword s1, s[8:9], 0x10
|
||||
; CHECK-NEXT: s_load_dword s0, s[8:9], 0x0
|
||||
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; CHECK-NEXT: s_cmpk_eq_i32 s1, 0x100
|
||||
; CHECK-NEXT: s_cselect_b64 s[2:3], -1, 0
|
||||
; CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[2:3]
|
||||
; CHECK-NEXT: ;;#ASMSTART
|
||||
; CHECK-NEXT: ;;#ASMEND
|
||||
; CHECK-NEXT: ; %bb.1: ; %if.then
|
||||
; CHECK-NEXT: s_cmp_eq_u32 s0, 0
|
||||
; CHECK-NEXT: s_cselect_b64 s[2:3], -1, 0
|
||||
; CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[2:3]
|
||||
; CHECK-NEXT: ;;#ASMSTART
|
||||
; CHECK-NEXT: ;;#ASMEND
|
||||
; CHECK-NEXT: .LBB1_2: ; %if.end6.sink.split
|
||||
; CHECK-NEXT: s_load_dwordx2 s[2:3], s[8:9], 0x8
|
||||
; CHECK-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
||||
; CHECK-NEXT: v_mov_b32_e32 v1, s0
|
||||
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; CHECK-NEXT: global_store_dword v0, v1, s[2:3]
|
||||
; CHECK-NEXT: ;;#ASMSTART
|
||||
; CHECK-NEXT: ;;#ASMEND
|
||||
; CHECK-NEXT: .LBB1_3: ; Inline asm indirect target
|
||||
; CHECK-NEXT: ; %UnifiedReturnBlock
|
||||
; CHECK-NEXT: ; Label of block must be emitted
|
||||
; CHECK-NEXT: s_endpgm
|
||||
; CHECK-NEXT: .LBB1_4: ; Inline asm indirect target
|
||||
; CHECK-NEXT: ; %if.else
|
||||
; CHECK-NEXT: ; Label of block must be emitted
|
||||
; CHECK-NEXT: v_cmp_gt_u32_e32 vcc, 10, v0
|
||||
; CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
|
||||
; CHECK-NEXT: ;;#ASMSTART
|
||||
; CHECK-NEXT: ;;#ASMEND
|
||||
; CHECK-NEXT: ; %bb.5: ; %if.then3
|
||||
; CHECK-NEXT: s_cmp_eq_u32 s0, 0
|
||||
; CHECK-NEXT: s_cselect_b64 s[2:3], -1, 0
|
||||
; CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[2:3]
|
||||
; CHECK-NEXT: ;;#ASMSTART
|
||||
; CHECK-NEXT: ;;#ASMEND
|
||||
; CHECK-NEXT: s_branch .LBB1_2
|
||||
; CHECK-NEXT: .LBB1_6: ; Inline asm indirect target
|
||||
; CHECK-NEXT: ; %cond.false.i8
|
||||
; CHECK-NEXT: ; Label of block must be emitted
|
||||
; CHECK-NEXT: .LBB1_7: ; Inline asm indirect target
|
||||
; CHECK-NEXT: ; %cond.false
|
||||
; CHECK-NEXT: ; Label of block must be emitted
|
||||
; CHECK-NEXT: s_trap 2
|
||||
; CHECK-NEXT: ; divergent unreachable
|
||||
; CHECK-NEXT: s_branch .LBB1_3
|
||||
; UNIFY-LABEL: @kernel_callbr(
|
||||
; UNIFY-NEXT: entry:
|
||||
; UNIFY-NEXT: [[TID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
; UNIFY-NEXT: [[CMP:%.*]] = icmp eq i32 [[N:%.*]], 256
|
||||
; UNIFY-NEXT: [[CMP32:%.*]] = zext i1 [[CMP]] to i32
|
||||
; UNIFY-NEXT: callbr void asm "", "r,!i"(i32 [[CMP32]])
|
||||
; UNIFY-NEXT: to label [[IF_THEN:%.*]] [label %if.else]
|
||||
; UNIFY: if.then:
|
||||
; UNIFY-NEXT: [[CMP1:%.*]] = icmp eq i32 [[A:%.*]], 0
|
||||
; UNIFY-NEXT: [[CMP1_32:%.*]] = zext i1 [[CMP1]] to i32
|
||||
; UNIFY-NEXT: callbr void asm "", "r,!i"(i32 [[CMP1_32]])
|
||||
; UNIFY-NEXT: to label [[IF_END6_SINK_SPLIT:%.*]] [label %cond.false]
|
||||
; UNIFY: cond.false:
|
||||
; UNIFY-NEXT: call void @llvm.trap()
|
||||
; UNIFY-NEXT: unreachable
|
||||
; UNIFY: if.else:
|
||||
; UNIFY-NEXT: [[CMP2:%.*]] = icmp ult i32 [[TID]], 10
|
||||
; UNIFY-NEXT: [[CMP2_32:%.*]] = zext i1 [[CMP2]] to i32
|
||||
; UNIFY-NEXT: callbr void asm "", "r,!i"(i32 [[CMP2_32]])
|
||||
; UNIFY-NEXT: to label [[IF_THEN3:%.*]] [label %if.end6]
|
||||
; UNIFY: if.then3:
|
||||
; UNIFY-NEXT: [[CMP1_I7:%.*]] = icmp eq i32 [[A]], 0
|
||||
; UNIFY-NEXT: [[CMP1_I7_32:%.*]] = zext i1 [[CMP1_I7]] to i32
|
||||
; UNIFY-NEXT: callbr void asm "", "r,!i"(i32 [[CMP1_I7_32]])
|
||||
; UNIFY-NEXT: to label [[IF_END6_SINK_SPLIT]] [label %cond.false.i8]
|
||||
; UNIFY: cond.false.i8:
|
||||
; UNIFY-NEXT: call void @llvm.trap()
|
||||
; UNIFY-NEXT: unreachable
|
||||
; UNIFY: if.end6.sink.split:
|
||||
; UNIFY-NEXT: [[X1:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[X:%.*]], i32 [[TID]]
|
||||
; UNIFY-NEXT: store i32 [[A]], ptr addrspace(1) [[X1]], align 4
|
||||
; UNIFY-NEXT: callbr void asm "", ""()
|
||||
; UNIFY-NEXT: to label [[IF_END6:%.*]] []
|
||||
; UNIFY: if.end6:
|
||||
; UNIFY-NEXT: ret void
|
||||
;
|
||||
entry:
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%cmp = icmp eq i32 %n, 256
|
||||
%cmp32 = zext i1 %cmp to i32
|
||||
callbr void asm "", "r,!i"(i32 %cmp32) to label %if.then [label %if.else]
|
||||
|
||||
if.then:
|
||||
%cmp1 = icmp eq i32 %a, 0
|
||||
%cmp1_32 = zext i1 %cmp1 to i32
|
||||
callbr void asm "", "r,!i"(i32 %cmp1_32) to label %if.end6.sink.split [label %cond.false]
|
||||
|
||||
cond.false:
|
||||
call void @llvm.trap()
|
||||
unreachable
|
||||
|
||||
if.else:
|
||||
%cmp2 = icmp ult i32 %tid, 10
|
||||
%cmp2_32 = zext i1 %cmp2 to i32
|
||||
callbr void asm "", "r,!i"(i32 %cmp2_32) to label %if.then3 [label %if.end6]
|
||||
|
||||
if.then3:
|
||||
%cmp1.i7 = icmp eq i32 %a, 0
|
||||
%cmp1.i7_32 = zext i1 %cmp1.i7 to i32
|
||||
callbr void asm "", "r,!i"(i32 %cmp1.i7_32) to label %if.end6.sink.split [label %cond.false.i8]
|
||||
|
||||
cond.false.i8:
|
||||
call void @llvm.trap()
|
||||
unreachable
|
||||
|
||||
if.end6.sink.split:
|
||||
%x1 = getelementptr inbounds i32, ptr addrspace(1) %x, i32 %tid
|
||||
store i32 %a, ptr addrspace(1) %x1, align 4
|
||||
callbr void asm "", ""() to label %if.end6 []
|
||||
|
||||
if.end6:
|
||||
ret void
|
||||
}
|
||||
|
||||
@ -37,3 +37,42 @@ n28: ; preds = %.loopexit, %n28
|
||||
n31: ; preds =
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_ps void @_amdgpu_ps_main_callbr() local_unnamed_addr #3 {
|
||||
; IR-LABEL: @_amdgpu_ps_main_callbr(
|
||||
; IR-NEXT: .entry:
|
||||
; IR-NEXT: callbr void asm "", ""()
|
||||
; IR-NEXT: to label [[DOTLOOPEXIT:%.*]] []
|
||||
; IR: .loopexit:
|
||||
; IR-NEXT: callbr void asm "", ""()
|
||||
; IR-NEXT: to label [[N28:%.*]] []
|
||||
; IR: n28:
|
||||
; IR-NEXT: [[DOT01:%.*]] = phi float [ 0.000000e+00, [[DOTLOOPEXIT]] ], [ [[N29:%.*]], [[TRANSITIONBLOCK:%.*]] ]
|
||||
; IR-NEXT: [[N29]] = fadd float [[DOT01]], 1.000000e+00
|
||||
; IR-NEXT: [[N30:%.*]] = fcmp ogt float [[N29]], 4.000000e+00
|
||||
; IR-NEXT: [[N30_32:%.*]] = zext i1 [[N30]] to i32
|
||||
; IR-NEXT: br i1 true, label [[TRANSITIONBLOCK]], label [[DUMMYRETURNBLOCK:%.*]]
|
||||
; IR: TransitionBlock:
|
||||
; IR-NEXT: callbr void asm "", "r,!i"(i32 [[N30_32]])
|
||||
; IR-NEXT: to label [[DOTLOOPEXIT]] [label %n28]
|
||||
; IR: n31:
|
||||
; IR-NEXT: ret void
|
||||
; IR: DummyReturnBlock:
|
||||
; IR-NEXT: ret void
|
||||
;
|
||||
.entry:
|
||||
callbr void asm "", ""() to label %.loopexit []
|
||||
|
||||
.loopexit: ; preds = %n28, %.entry
|
||||
callbr void asm "", ""() to label %n28 []
|
||||
|
||||
n28: ; preds = %.loopexit, %n28
|
||||
%.01 = phi float [ 0.000000e+00, %.loopexit ], [ %n29, %n28 ]
|
||||
%n29 = fadd float %.01, 1.0
|
||||
%n30 = fcmp ogt float %n29, 4.000000e+00
|
||||
%n30.32 = zext i1 %n30 to i32
|
||||
callbr void asm "", "r,!i"(i32 %n30.32) to label %.loopexit [label %n28]
|
||||
|
||||
n31: ; preds =
|
||||
ret void
|
||||
}
|
||||
|
||||
235
llvm/test/Transforms/StructurizeCFG/callbr.ll
Normal file
235
llvm/test/Transforms/StructurizeCFG/callbr.ll
Normal file
@ -0,0 +1,235 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
|
||||
; RUN: opt -S -passes=structurizecfg %s -o - | FileCheck %s
|
||||
|
||||
; Structurize as usual, but don't tear callbr and its destination blocks apart.
|
||||
;
|
||||
; Note: currently, callbr blocks and their corresponding target blocks
|
||||
; themselves are not handled by the structurizer.* If the CFG turns out to be
|
||||
; unstructured at the end, the CFG lowering (si-annotate-control-flow) will
|
||||
; detect this. For the currently intended use cases of callbr in the context of
|
||||
; the AMDGPU backend, this is not a limitation (cf.
|
||||
; https://discourse.llvm.org/t/rfc-add-callbr-intrinsic-support/86087).
|
||||
;
|
||||
; Note 2: while callbr and its targets remain untouched, everything else is
|
||||
; handled as usual, even if it is nested in a callbr region.
|
||||
;
|
||||
; *FIXME: this will be fixed in the future. Callbr can be handled as follows:
|
||||
; Input IR:
|
||||
; ```
|
||||
; define void @foo_callbr() {
|
||||
; callbr void asm "", "!i"() to label %fallthrough [label %indirect, ...]
|
||||
; fallthrough:
|
||||
; br label %exit
|
||||
; indirect:
|
||||
; br label %exit
|
||||
; ...
|
||||
; exit:
|
||||
; ret void
|
||||
; }
|
||||
; ```
|
||||
;
|
||||
; Output IR:
|
||||
; ```
|
||||
; define void @foo_callbr() {
|
||||
; callbr void asm "", "!i"()
|
||||
; to label %fallthrough [label %fake.indirect, label %fake.indirect1, label %fake.indirect2, ...]
|
||||
; fake.indirect: ; preds = %0
|
||||
; br label %Flow
|
||||
; fake.indirect1: ; preds = %0
|
||||
; br label %Flow
|
||||
; fake.indirect2: ; preds = %0
|
||||
; br label %Flow
|
||||
; ...
|
||||
; Flow: ; preds = %fallthrough, %fake.indirect[0-N]
|
||||
; %1 = phi i1 [ false, %fallthrough ], [ true, %fake.indirect ], [ false, %fake.indirect[1-N] ]
|
||||
; br i1 %1, label %indirect, label %Flow1
|
||||
; Flow1: ; preds = %Flow, %indirect
|
||||
; %2 = phi i1 [ false, %Flow], [ true, %fake.indirect1 ], [ false, %indirect ]
|
||||
; br i1 %2, label %indirect1, label %Flow2
|
||||
; Flow2: ; preds = %Flow, %indirect1
|
||||
; %2 = phi i1 [ false, %Flow], [ true, %fake.indirect2 ], [ false, %indirect1 ]
|
||||
; br i1 %2, label %indirect2, label %Flow3
|
||||
; ...
|
||||
; fallthrough: ; preds = %0
|
||||
; br label %Flow
|
||||
; indirect: ; preds = %Flow
|
||||
; br label %Flow1
|
||||
; indirect1: ; preds = %Flow1
|
||||
; br label %Flow2
|
||||
; indirect2: : preds = %Flow2
|
||||
; br label %Flow3
|
||||
; ...
|
||||
; exit: ; preds = %indirectN, %FlowN
|
||||
; ret void
|
||||
; }
|
||||
; ```
|
||||
;
|
||||
; Output IR as ASCII-art:
|
||||
; %0
|
||||
; ---------------------
|
||||
; | | | |
|
||||
; v v v v
|
||||
; f f.i f.i1 f.i2
|
||||
; | | | |
|
||||
; v v v v
|
||||
; ---------------------
|
||||
; %Flow
|
||||
; | \
|
||||
; | %indirect
|
||||
; | /
|
||||
; %Flow1
|
||||
; | \
|
||||
; | %indirect1
|
||||
; | /
|
||||
; %Flow2
|
||||
; | \
|
||||
; | %indirect2
|
||||
; | /
|
||||
; %exit
|
||||
;
|
||||
|
||||
; Only callbr, nothing to do.
|
||||
define void @callbr_simple() {
|
||||
; CHECK-LABEL: define void @callbr_simple() {
|
||||
; CHECK-NEXT: [[CALLBR:.*:]]
|
||||
; CHECK-NEXT: callbr void asm "", "!i"()
|
||||
; CHECK-NEXT: to label %[[INDIRECT:.*]] [label %indirect]
|
||||
; CHECK: [[INDIRECT]]:
|
||||
; CHECK-NEXT: br label %[[EXIT:.*]]
|
||||
; CHECK: [[INDIRECT1:.*:]]
|
||||
; CHECK-NEXT: br label %[[EXIT]]
|
||||
; CHECK: [[EXIT]]:
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
callbr:
|
||||
callbr void asm "", "!i"() to label %fallthrough [label %indirect]
|
||||
fallthrough:
|
||||
br label %exit
|
||||
indirect:
|
||||
br label %exit
|
||||
exit:
|
||||
ret void
|
||||
}
|
||||
|
||||
; Callbr nested in non-callbr: non-callbr is transformed
|
||||
define void @callbr_in_non_callbr(i1 %c) {
|
||||
; CHECK-LABEL: define void @callbr_in_non_callbr(
|
||||
; CHECK-SAME: i1 [[C:%.*]]) {
|
||||
; CHECK-NEXT: [[C_INV:%.*]] = xor i1 [[C]], true
|
||||
; CHECK-NEXT: br i1 [[C_INV]], label %[[NOCALLBR:.*]], label %[[FLOW:.*]]
|
||||
; CHECK: [[FLOW]]:
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = phi i1 [ false, %[[NOCALLBR]] ], [ true, [[TMP0:%.*]] ]
|
||||
; CHECK-NEXT: br i1 [[TMP1]], label %[[CALLBR:.*]], label %[[EXIT:.*]]
|
||||
; CHECK: [[CALLBR]]:
|
||||
; CHECK-NEXT: callbr void asm "", "!i"()
|
||||
; CHECK-NEXT: to label %[[INDIRECT:.*]] [label %indirect]
|
||||
; CHECK: [[INDIRECT]]:
|
||||
; CHECK-NEXT: br label %[[EXIT]]
|
||||
; CHECK: [[INDIRECT1:.*:]]
|
||||
; CHECK-NEXT: br label %[[EXIT]]
|
||||
; CHECK: [[NOCALLBR]]:
|
||||
; CHECK-NEXT: br label %[[FLOW]]
|
||||
; CHECK: [[EXIT]]:
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
br i1 %c, label %callbr, label %nocallbr
|
||||
callbr:
|
||||
callbr void asm "", "!i"() to label %fallthrough [label %indirect]
|
||||
fallthrough:
|
||||
br label %exit
|
||||
indirect:
|
||||
br label %exit
|
||||
nocallbr:
|
||||
br label %exit
|
||||
exit:
|
||||
ret void
|
||||
}
|
||||
|
||||
; Callbr parent of non-callbr: non-callbr is transformed
|
||||
define void @non_callbr_in_callbr(i1 %c) {
|
||||
; CHECK-LABEL: define void @non_callbr_in_callbr(
|
||||
; CHECK-SAME: i1 [[C:%.*]]) {
|
||||
; CHECK-NEXT: [[C_INV:%.*]] = xor i1 [[C]], true
|
||||
; CHECK-NEXT: callbr void asm "", "!i"()
|
||||
; CHECK-NEXT: to label %[[INDIRECT:.*]] [label %indirect]
|
||||
; CHECK: [[INDIRECT]]:
|
||||
; CHECK-NEXT: br i1 [[C_INV]], label %[[FALLTHROUGH2:.*]], label %[[FLOW:.*]]
|
||||
; CHECK: [[FLOW]]:
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = phi i1 [ false, %[[FALLTHROUGH2]] ], [ true, %[[INDIRECT]] ]
|
||||
; CHECK-NEXT: br i1 [[TMP1]], label %[[FALLTHROUGH1:.*]], label %[[FLOW1:.*]]
|
||||
; CHECK: [[FALLTHROUGH1]]:
|
||||
; CHECK-NEXT: br label %[[FLOW1]]
|
||||
; CHECK: [[FALLTHROUGH2]]:
|
||||
; CHECK-NEXT: br label %[[FLOW]]
|
||||
; CHECK: [[INDIRECT1:.*:]]
|
||||
; CHECK-NEXT: br label %[[EXIT:.*]]
|
||||
; CHECK: [[FLOW1]]:
|
||||
; CHECK-NEXT: br label %[[EXIT]]
|
||||
; CHECK: [[EXIT]]:
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
callbr void asm "", "!i"() to label %fallthrough [label %indirect]
|
||||
fallthrough:
|
||||
br i1 %c, label %fallthrough1, label %fallthrough2
|
||||
fallthrough1:
|
||||
br label %exit
|
||||
fallthrough2:
|
||||
br label %exit
|
||||
indirect:
|
||||
br label %exit
|
||||
exit:
|
||||
ret void
|
||||
}
|
||||
|
||||
; Callbr surrounded by non-callbr: all three regular branches are handled
|
||||
; correctly
|
||||
define void @callbr_nested_in_non_callbr(i1 %c, i1 %d, i1 %e, i1 %f) {
|
||||
; CHECK-LABEL: define void @callbr_nested_in_non_callbr(
|
||||
; CHECK-SAME: i1 [[C:%.*]], i1 [[D:%.*]], i1 [[E:%.*]], i1 [[F:%.*]]) {
|
||||
; CHECK-NEXT: [[C_INV:%.*]] = xor i1 [[C]], true
|
||||
; CHECK-NEXT: br i1 [[C_INV]], label %[[NOCALLBR:.*]], label %[[FLOW3:.*]]
|
||||
; CHECK: [[FLOW3]]:
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = phi i1 [ false, %[[FLOW:.*]] ], [ true, [[TMP0:%.*]] ]
|
||||
; CHECK-NEXT: br i1 [[TMP1]], label %[[CALLBR:.*]], label %[[RET:.*]]
|
||||
; CHECK: [[CALLBR]]:
|
||||
; CHECK-NEXT: callbr void asm "", "!i"()
|
||||
; CHECK-NEXT: to label %[[INDIRECT:.*]] [label %indirect]
|
||||
; CHECK: [[INDIRECT]]:
|
||||
; CHECK-NEXT: br i1 [[D]], label %[[FALLTHROUGH1:.*]], label %[[FLOW2:.*]]
|
||||
; CHECK: [[FALLTHROUGH1]]:
|
||||
; CHECK-NEXT: br label %[[FLOW2]]
|
||||
; CHECK: [[INDIRECT2:.*:]]
|
||||
; CHECK-NEXT: br i1 [[E]], label %[[INDIRECT1:.*]], label %[[FLOW1:.*]]
|
||||
; CHECK: [[INDIRECT1]]:
|
||||
; CHECK-NEXT: br label %[[FLOW1]]
|
||||
; CHECK: [[NOCALLBR]]:
|
||||
; CHECK-NEXT: br i1 [[F]], label %[[NOCALLBR1:.*]], label %[[FLOW]]
|
||||
; CHECK: [[NOCALLBR1]]:
|
||||
; CHECK-NEXT: br label %[[FLOW]]
|
||||
; CHECK: [[FLOW]]:
|
||||
; CHECK-NEXT: br label %[[FLOW3]]
|
||||
; CHECK: [[FLOW1]]:
|
||||
; CHECK-NEXT: br label %[[RET]]
|
||||
; CHECK: [[FLOW2]]:
|
||||
; CHECK-NEXT: br label %[[RET]]
|
||||
; CHECK: [[RET]]:
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
br i1 %c, label %callbr, label %nocallbr
|
||||
callbr:
|
||||
callbr void asm "", "!i"() to label %fallthrough [label %indirect]
|
||||
fallthrough:
|
||||
br i1 %d, label %fallthrough1, label %ret
|
||||
fallthrough1:
|
||||
br label %ret
|
||||
indirect:
|
||||
br i1 %e, label %indirect1, label %ret
|
||||
indirect1:
|
||||
br label %ret
|
||||
nocallbr:
|
||||
br i1 %f, label %nocallbr1, label %ret
|
||||
nocallbr1:
|
||||
br label %ret
|
||||
ret:
|
||||
ret void
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user