Reapply: [AMDGPU][UnifyDivergentExitNodes][StructurizeCFG] Add support for callbr instruction with inline-asm (#152161) (#166195)

Reapply #152161 with fixed 'changed' flags.
This commit is contained in:
Robert Imschweiler 2025-11-03 20:59:48 +01:00 committed by GitHub
parent dd45c060ff
commit a8ea7f4580
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
10 changed files with 933 additions and 79 deletions

View File

@ -181,14 +181,52 @@ BasicBlock *AMDGPUUnifyDivergentExitNodesImpl::unifyReturnBlockSet(
return NewRetBlock;
}
static BasicBlock *
createDummyReturnBlock(Function &F,
SmallVector<BasicBlock *, 4> &ReturningBlocks) {
BasicBlock *DummyReturnBB =
BasicBlock::Create(F.getContext(), "DummyReturnBlock", &F);
Type *RetTy = F.getReturnType();
Value *RetVal = RetTy->isVoidTy() ? nullptr : PoisonValue::get(RetTy);
ReturnInst::Create(F.getContext(), RetVal, DummyReturnBB);
ReturningBlocks.push_back(DummyReturnBB);
return DummyReturnBB;
}
/// Handle conditional branch instructions (-> 2 targets) and callbr
/// instructions with N targets.
static void handleNBranch(Function &F, BasicBlock *BB, Instruction *BI,
BasicBlock *DummyReturnBB,
std::vector<DominatorTree::UpdateType> &Updates) {
SmallVector<BasicBlock *, 2> Successors(successors(BB));
// Create a new transition block to hold the conditional branch.
BasicBlock *TransitionBB = BB->splitBasicBlock(BI, "TransitionBlock");
Updates.reserve(Updates.size() + 2 * Successors.size() + 2);
// 'Successors' become successors of TransitionBB instead of BB,
// and TransitionBB becomes a single successor of BB.
Updates.emplace_back(DominatorTree::Insert, BB, TransitionBB);
for (BasicBlock *Successor : Successors) {
Updates.emplace_back(DominatorTree::Insert, TransitionBB, Successor);
Updates.emplace_back(DominatorTree::Delete, BB, Successor);
}
// Create a branch that will always branch to the transition block and
// references DummyReturnBB.
BB->getTerminator()->eraseFromParent();
BranchInst::Create(TransitionBB, DummyReturnBB,
ConstantInt::getTrue(F.getContext()), BB);
Updates.emplace_back(DominatorTree::Insert, BB, DummyReturnBB);
}
bool AMDGPUUnifyDivergentExitNodesImpl::run(Function &F, DominatorTree *DT,
const PostDominatorTree &PDT,
const UniformityInfo &UA) {
assert(hasOnlySimpleTerminator(F) && "Unsupported block terminator.");
if (PDT.root_size() == 0 ||
(PDT.root_size() == 1 &&
!isa<BranchInst>(PDT.getRoot()->getTerminator())))
!isa<BranchInst, CallBrInst>(PDT.getRoot()->getTerminator())))
return false;
// Loop over all of the blocks in a function, tracking all of the blocks that
@ -222,46 +260,28 @@ bool AMDGPUUnifyDivergentExitNodesImpl::run(Function &F, DominatorTree *DT,
if (HasDivergentExitBlock)
UnreachableBlocks.push_back(BB);
} else if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) {
ConstantInt *BoolTrue = ConstantInt::getTrue(F.getContext());
if (DummyReturnBB == nullptr) {
DummyReturnBB =
BasicBlock::Create(F.getContext(), "DummyReturnBlock", &F);
Type *RetTy = F.getReturnType();
Value *RetVal = RetTy->isVoidTy() ? nullptr : PoisonValue::get(RetTy);
ReturnInst::Create(F.getContext(), RetVal, DummyReturnBB);
ReturningBlocks.push_back(DummyReturnBB);
}
if (!DummyReturnBB)
DummyReturnBB = createDummyReturnBlock(F, ReturningBlocks);
if (BI->isUnconditional()) {
BasicBlock *LoopHeaderBB = BI->getSuccessor(0);
BI->eraseFromParent(); // Delete the unconditional branch.
// Add a new conditional branch with a dummy edge to the return block.
BranchInst::Create(LoopHeaderBB, DummyReturnBB, BoolTrue, BB);
Updates.emplace_back(DominatorTree::Insert, BB, DummyReturnBB);
} else { // Conditional branch.
SmallVector<BasicBlock *, 2> Successors(successors(BB));
// Create a new transition block to hold the conditional branch.
BasicBlock *TransitionBB = BB->splitBasicBlock(BI, "TransitionBlock");
Updates.reserve(Updates.size() + 2 * Successors.size() + 2);
// 'Successors' become successors of TransitionBB instead of BB,
// and TransitionBB becomes a single successor of BB.
Updates.emplace_back(DominatorTree::Insert, BB, TransitionBB);
for (BasicBlock *Successor : Successors) {
Updates.emplace_back(DominatorTree::Insert, TransitionBB, Successor);
Updates.emplace_back(DominatorTree::Delete, BB, Successor);
}
// Create a branch that will always branch to the transition block and
// references DummyReturnBB.
BB->getTerminator()->eraseFromParent();
BranchInst::Create(TransitionBB, DummyReturnBB, BoolTrue, BB);
BranchInst::Create(LoopHeaderBB, DummyReturnBB,
ConstantInt::getTrue(F.getContext()), BB);
Updates.emplace_back(DominatorTree::Insert, BB, DummyReturnBB);
} else {
handleNBranch(F, BB, BI, DummyReturnBB, Updates);
}
Changed = true;
} else if (CallBrInst *CBI = dyn_cast<CallBrInst>(BB->getTerminator())) {
if (!DummyReturnBB)
DummyReturnBB = createDummyReturnBlock(F, ReturningBlocks);
handleNBranch(F, BB, CBI, DummyReturnBB, Updates);
Changed = true;
} else {
llvm_unreachable("unsupported block terminator");
}
}

View File

@ -558,11 +558,10 @@ void StructurizeCFG::analyzeLoops(RegionNode *N) {
} else {
// Test for successors as back edge
BasicBlock *BB = N->getNodeAs<BasicBlock>();
BranchInst *Term = cast<BranchInst>(BB->getTerminator());
for (BasicBlock *Succ : Term->successors())
if (Visited.count(Succ))
Loops[Succ] = BB;
if (BranchInst *Term = dyn_cast<BranchInst>(BB->getTerminator()))
for (BasicBlock *Succ : Term->successors())
if (Visited.count(Succ))
Loops[Succ] = BB;
}
}
@ -594,7 +593,7 @@ void StructurizeCFG::gatherPredicates(RegionNode *N) {
for (BasicBlock *P : predecessors(BB)) {
// Ignore it if it's a branch from outside into our region entry
if (!ParentRegion->contains(P))
if (!ParentRegion->contains(P) || !dyn_cast<BranchInst>(P->getTerminator()))
continue;
Region *R = RI->getRegionFor(P);
@ -1402,13 +1401,17 @@ bool StructurizeCFG::makeUniformRegion(Region *R, UniformityInfo &UA) {
/// Run the transformation for each region found
bool StructurizeCFG::run(Region *R, DominatorTree *DT,
const TargetTransformInfo *TTI) {
if (R->isTopLevelRegion())
// CallBr and its corresponding direct target blocks are for now ignored by
// this pass. This is not a limitation for the currently intended uses cases
// of callbr in the AMDGPU backend.
// Parent and child regions are not affected by this (current) restriction.
// See `llvm/test/Transforms/StructurizeCFG/callbr.ll` for details.
if (R->isTopLevelRegion() || isa<CallBrInst>(R->getEntry()->getTerminator()))
return false;
this->DT = DT;
this->TTI = TTI;
Func = R->getEntry()->getParent();
assert(hasOnlySimpleTerminator(*Func) && "Unsupported block terminator.");
ParentRegion = R;

View File

@ -158,6 +158,7 @@ static bool unifyLoopExits(DominatorTree &DT, LoopInfo &LI, Loop *L) {
SmallVector<BasicBlock *, 8> CallBrTargetBlocksToFix;
// Redirect exiting edges through a control flow hub.
ControlFlowHub CHub;
bool Changed = false;
for (unsigned I = 0; I < ExitingBlocks.size(); ++I) {
BasicBlock *BB = ExitingBlocks[I];
@ -182,6 +183,10 @@ static bool unifyLoopExits(DominatorTree &DT, LoopInfo &LI, Loop *L) {
bool UpdatedLI = false;
BasicBlock *NewSucc =
SplitCallBrEdge(BB, Succ, J, &DTU, nullptr, &LI, &UpdatedLI);
// SplitCallBrEdge modifies the CFG because it creates an intermediate
// block. So we need to set the changed flag no matter what the
// ControlFlowHub is going to do later.
Changed = true;
// Even if CallBr and Succ do not have a common parent loop, we need to
// add the new target block to the parent loop of the current loop.
if (!UpdatedLI)
@ -207,6 +212,7 @@ static bool unifyLoopExits(DominatorTree &DT, LoopInfo &LI, Loop *L) {
bool ChangedCFG;
std::tie(LoopExitBlock, ChangedCFG) = CHub.finalize(
&DTU, GuardBlocks, "loop.exit", MaxBooleansInControlFlowHub.getValue());
ChangedCFG |= Changed;
if (!ChangedCFG)
return false;

View File

@ -0,0 +1,54 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=amdgcn -mcpu=gfx90a < %s | FileCheck %s
define void @callbr_inline_asm(ptr %src, ptr %dst1, ptr %dst2, i32 %c) {
; CHECK-LABEL: callbr_inline_asm:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: flat_load_dword v0, v[0:1]
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: v_cmp_gt_i32 vcc v6, 42; s_cbranch_vccnz .LBB0_2
; CHECK-NEXT: ;;#ASMEND
; CHECK-NEXT: ; %bb.1: ; %fallthrough
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; CHECK-NEXT: flat_store_dword v[2:3], v0
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
; CHECK-NEXT: .LBB0_2: ; Inline asm indirect target
; CHECK-NEXT: ; %indirect
; CHECK-NEXT: ; Label of block must be emitted
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; CHECK-NEXT: flat_store_dword v[4:5], v0
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
%a = load i32, ptr %src, align 4
callbr void asm "v_cmp_gt_i32 vcc $0, 42; s_cbranch_vccnz ${1:l}", "r,!i"(i32 %c) to label %fallthrough [label %indirect]
fallthrough:
store i32 %a, ptr %dst1, align 4
br label %ret
indirect:
store i32 %a, ptr %dst2, align 4
br label %ret
ret:
ret void
}
define void @callbr_self_loop(i1 %c) {
; CHECK-LABEL: callbr_self_loop:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: .LBB1_1: ; %callbr
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ;;#ASMEND
; CHECK-NEXT: s_branch .LBB1_1
; CHECK-NEXT: .LBB1_2: ; Inline asm indirect target
; CHECK-NEXT: ; %callbr.target.ret
; CHECK-NEXT: ; Label of block must be emitted
; CHECK-NEXT: s_setpc_b64 s[30:31]
br label %callbr
callbr:
callbr void asm "", "!i"() to label %callbr [label %ret]
ret:
ret void
}

View File

@ -3,6 +3,7 @@
declare void @foo(ptr)
declare i1 @bar(ptr)
declare i32 @bar32(ptr)
define void @musttail_call_without_return_value(ptr %p) {
; CHECK-LABEL: define void @musttail_call_without_return_value(
@ -28,6 +29,31 @@ bb.1:
ret void
}
define void @musttail_call_without_return_value_callbr(ptr %p) {
; CHECK-LABEL: define void @musttail_call_without_return_value_callbr(
; CHECK-SAME: ptr [[P:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[P]], align 1
; CHECK-NEXT: callbr void asm "", "r,!i"(i32 [[LOAD]])
; CHECK-NEXT: to label %[[BB_0:.*]] [label %bb.1]
; CHECK: [[BB_0]]:
; CHECK-NEXT: musttail call void @foo(ptr [[P]])
; CHECK-NEXT: ret void
; CHECK: [[BB_1:.*:]]
; CHECK-NEXT: ret void
;
entry:
%load = load i32, ptr %p, align 1
callbr void asm "", "r,!i"(i32 %load) to label %bb.0 [label %bb.1]
bb.0:
musttail call void @foo(ptr %p)
ret void
bb.1:
ret void
}
define i1 @musttail_call_with_return_value(ptr %p) {
; CHECK-LABEL: define i1 @musttail_call_with_return_value(
; CHECK-SAME: ptr [[P:%.*]]) #[[ATTR0]] {
@ -51,3 +77,28 @@ bb.0:
bb.1:
ret i1 %load
}
define i32 @musttail_call_with_return_value_callbr(ptr %p) {
; CHECK-LABEL: define i32 @musttail_call_with_return_value_callbr(
; CHECK-SAME: ptr [[P:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[P]], align 1
; CHECK-NEXT: callbr void asm "", "r,!i"(i32 [[LOAD]])
; CHECK-NEXT: to label %[[BB_0:.*]] [label %bb.1]
; CHECK: [[BB_0]]:
; CHECK-NEXT: [[RET:%.*]] = musttail call i32 @bar32(ptr [[P]])
; CHECK-NEXT: ret i32 [[RET]]
; CHECK: [[BB_1:.*:]]
; CHECK-NEXT: ret i32 [[LOAD]]
;
entry:
%load = load i32, ptr %p, align 1
callbr void asm "", "r,!i"(i32 %load) to label %bb.0 [label %bb.1]
bb.0:
%ret = musttail call i32 @bar32(ptr %p)
ret i32 %ret
bb.1:
ret i32 %load
}

View File

@ -36,26 +36,60 @@ loop:
br label %loop
}
define amdgpu_kernel void @infinite_loop_callbr(ptr addrspace(1) %out) {
; SI-LABEL: infinite_loop_callbr:
; SI: ; %bb.0: ; %entry
; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
; SI-NEXT: ;;#ASMSTART
; SI-NEXT: ;;#ASMEND
; SI-NEXT: s_mov_b32 s3, 0xf000
; SI-NEXT: s_mov_b32 s2, -1
; SI-NEXT: v_mov_b32_e32 v0, 0x3e7
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
; SI-NEXT: s_waitcnt vmcnt(0)
; SI-NEXT: s_endpgm
; IR-LABEL: @infinite_loop_callbr(
; IR-NEXT: entry:
; IR-NEXT: callbr void asm "", ""()
; IR-NEXT: to label [[LOOP:%.*]] []
; IR: loop:
; IR-NEXT: store volatile i32 999, ptr addrspace(1) [[OUT:%.*]], align 4
; IR-NEXT: br i1 true, label [[TRANSITIONBLOCK:%.*]], label [[DUMMYRETURNBLOCK:%.*]]
; IR: TransitionBlock:
; IR-NEXT: callbr void asm "", ""()
; IR-NEXT: to label [[LOOP]] []
; IR: DummyReturnBlock:
; IR-NEXT: ret void
;
entry:
callbr void asm "", ""() to label %loop []
loop:
store volatile i32 999, ptr addrspace(1) %out, align 4
callbr void asm "", ""() to label %loop []
}
define amdgpu_kernel void @infinite_loop_ret(ptr addrspace(1) %out) {
; SI-LABEL: infinite_loop_ret:
; SI: ; %bb.0: ; %entry
; SI-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
; SI-NEXT: s_and_saveexec_b64 s[0:1], vcc
; SI-NEXT: s_cbranch_execz .LBB1_3
; SI-NEXT: s_cbranch_execz .LBB2_3
; SI-NEXT: ; %bb.1: ; %loop.preheader
; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
; SI-NEXT: s_mov_b32 s3, 0xf000
; SI-NEXT: s_mov_b32 s2, -1
; SI-NEXT: v_mov_b32_e32 v0, 0x3e7
; SI-NEXT: s_and_b64 vcc, exec, -1
; SI-NEXT: .LBB1_2: ; %loop
; SI-NEXT: .LBB2_2: ; %loop
; SI-NEXT: ; =>This Inner Loop Header: Depth=1
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
; SI-NEXT: s_waitcnt vmcnt(0)
; SI-NEXT: s_mov_b64 vcc, vcc
; SI-NEXT: s_cbranch_vccnz .LBB1_2
; SI-NEXT: .LBB1_3: ; %UnifiedReturnBlock
; SI-NEXT: s_cbranch_vccnz .LBB2_2
; SI-NEXT: .LBB2_3: ; %UnifiedReturnBlock
; SI-NEXT: s_endpgm
; IR-LABEL: @infinite_loop_ret(
; IR-NEXT: entry:
@ -81,44 +115,93 @@ return:
ret void
}
define amdgpu_kernel void @infinite_loop_ret_callbr(ptr addrspace(1) %out) {
; SI-LABEL: infinite_loop_ret_callbr:
; SI: ; %bb.0: ; %entry
; SI-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
; SI-NEXT: ;;#ASMSTART
; SI-NEXT: ;;#ASMEND
; SI-NEXT: ; %bb.1: ; %loop.preheader
; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
; SI-NEXT: s_mov_b32 s3, 0xf000
; SI-NEXT: s_mov_b32 s2, -1
; SI-NEXT: v_mov_b32_e32 v0, 0x3e7
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
; SI-NEXT: s_waitcnt vmcnt(0)
; SI-NEXT: .LBB3_2: ; Inline asm indirect target
; SI-NEXT: ; %UnifiedReturnBlock
; SI-NEXT: ; Label of block must be emitted
; SI-NEXT: s_endpgm
; IR-LABEL: @infinite_loop_ret_callbr(
; IR-NEXT: entry:
; IR-NEXT: [[TMP:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x()
; IR-NEXT: [[COND:%.*]] = icmp eq i32 [[TMP]], 1
; IR-NEXT: [[COND32:%.*]] = zext i1 [[COND]] to i32
; IR-NEXT: callbr void asm "", "r,!i"(i32 [[COND32]])
; IR-NEXT: to label [[LOOP:%.*]] [label %UnifiedReturnBlock]
; IR: loop:
; IR-NEXT: store volatile i32 999, ptr addrspace(1) [[OUT:%.*]], align 4
; IR-NEXT: br i1 true, label [[TRANSITIONBLOCK:%.*]], label [[UNIFIEDRETURNBLOCK:%.*]]
; IR: TransitionBlock:
; IR-NEXT: callbr void asm "", ""()
; IR-NEXT: to label [[LOOP]] []
; IR: UnifiedReturnBlock:
; IR-NEXT: ret void
;
entry:
%tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
%cond = icmp eq i32 %tmp, 1
%cond32 = zext i1 %cond to i32
callbr void asm "", "r,!i"(i32 %cond32) to label %loop [label %return]
loop:
store volatile i32 999, ptr addrspace(1) %out, align 4
callbr void asm "", ""() to label %loop []
return:
ret void
}
define amdgpu_kernel void @infinite_loops(ptr addrspace(1) %out) {
; SI-LABEL: infinite_loops:
; SI: ; %bb.0: ; %entry
; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
; SI-NEXT: s_mov_b64 s[2:3], -1
; SI-NEXT: s_cbranch_scc1 .LBB2_4
; SI-NEXT: s_cbranch_scc1 .LBB4_4
; SI-NEXT: ; %bb.1:
; SI-NEXT: s_mov_b32 s3, 0xf000
; SI-NEXT: s_mov_b32 s2, -1
; SI-NEXT: v_mov_b32_e32 v0, 0x378
; SI-NEXT: s_and_b64 vcc, exec, -1
; SI-NEXT: .LBB2_2: ; %loop2
; SI-NEXT: .LBB4_2: ; %loop2
; SI-NEXT: ; =>This Inner Loop Header: Depth=1
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
; SI-NEXT: s_waitcnt vmcnt(0)
; SI-NEXT: s_mov_b64 vcc, vcc
; SI-NEXT: s_cbranch_vccnz .LBB2_2
; SI-NEXT: s_cbranch_vccnz .LBB4_2
; SI-NEXT: ; %bb.3: ; %Flow
; SI-NEXT: s_mov_b64 s[2:3], 0
; SI-NEXT: .LBB2_4: ; %Flow2
; SI-NEXT: .LBB4_4: ; %Flow2
; SI-NEXT: s_and_b64 vcc, exec, s[2:3]
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: s_mov_b64 vcc, vcc
; SI-NEXT: s_cbranch_vccz .LBB2_7
; SI-NEXT: s_cbranch_vccz .LBB4_7
; SI-NEXT: ; %bb.5:
; SI-NEXT: s_mov_b32 s3, 0xf000
; SI-NEXT: s_mov_b32 s2, -1
; SI-NEXT: s_waitcnt expcnt(0)
; SI-NEXT: v_mov_b32_e32 v0, 0x3e7
; SI-NEXT: s_and_b64 vcc, exec, 0
; SI-NEXT: .LBB2_6: ; %loop1
; SI-NEXT: .LBB4_6: ; %loop1
; SI-NEXT: ; =>This Inner Loop Header: Depth=1
; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
; SI-NEXT: s_waitcnt vmcnt(0)
; SI-NEXT: s_mov_b64 vcc, vcc
; SI-NEXT: s_cbranch_vccz .LBB2_6
; SI-NEXT: .LBB2_7: ; %DummyReturnBlock
; SI-NEXT: s_cbranch_vccz .LBB4_6
; SI-NEXT: .LBB4_7: ; %DummyReturnBlock
; SI-NEXT: s_endpgm
; IR-LABEL: @infinite_loops(
; IR-NEXT: entry:
@ -144,24 +227,78 @@ loop2:
br label %loop2
}
define amdgpu_kernel void @infinite_loops_callbr(ptr addrspace(1) %out) {
; SI-LABEL: infinite_loops_callbr:
; SI: ; %bb.0: ; %entry
; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: ;;#ASMSTART
; SI-NEXT: ;;#ASMEND
; SI-NEXT: ; %bb.1: ; %loop1
; SI-NEXT: s_mov_b32 s3, 0xf000
; SI-NEXT: s_mov_b32 s2, -1
; SI-NEXT: v_mov_b32_e32 v0, 0x3e7
; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
; SI-NEXT: s_waitcnt vmcnt(0)
; SI-NEXT: s_endpgm
; SI-NEXT: .LBB5_2: ; Inline asm indirect target
; SI-NEXT: ; %loop2.preheader
; SI-NEXT: ; Label of block must be emitted
; SI-NEXT: s_mov_b32 s3, 0xf000
; SI-NEXT: s_mov_b32 s2, -1
; SI-NEXT: v_mov_b32_e32 v0, 0x378
; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
; SI-NEXT: s_waitcnt vmcnt(0)
; SI-NEXT: s_endpgm
; IR-LABEL: @infinite_loops_callbr(
; IR-NEXT: entry:
; IR-NEXT: callbr void asm "", "r,!i"(i32 poison)
; IR-NEXT: to label [[LOOP1:%.*]] [label %loop2]
; IR: loop1:
; IR-NEXT: store volatile i32 999, ptr addrspace(1) [[OUT:%.*]], align 4
; IR-NEXT: br i1 true, label [[TRANSITIONBLOCK:%.*]], label [[DUMMYRETURNBLOCK:%.*]]
; IR: TransitionBlock:
; IR-NEXT: callbr void asm "", ""()
; IR-NEXT: to label [[LOOP1]] []
; IR: loop2:
; IR-NEXT: store volatile i32 888, ptr addrspace(1) [[OUT]], align 4
; IR-NEXT: br i1 true, label [[TRANSITIONBLOCK1:%.*]], label [[DUMMYRETURNBLOCK]]
; IR: TransitionBlock1:
; IR-NEXT: callbr void asm "", ""()
; IR-NEXT: to label [[LOOP2:%.*]] []
; IR: DummyReturnBlock:
; IR-NEXT: ret void
;
entry:
callbr void asm "", "r,!i"(i32 poison) to label %loop1 [label %loop2]
loop1:
store volatile i32 999, ptr addrspace(1) %out, align 4
callbr void asm "", ""() to label %loop1 []
loop2:
store volatile i32 888, ptr addrspace(1) %out, align 4
callbr void asm "", ""() to label %loop2 []
}
define amdgpu_kernel void @infinite_loop_nest_ret(ptr addrspace(1) %out) {
; SI-LABEL: infinite_loop_nest_ret:
; SI: ; %bb.0: ; %entry
; SI-NEXT: v_cmp_ne_u32_e32 vcc, 1, v0
; SI-NEXT: s_and_saveexec_b64 s[0:1], vcc
; SI-NEXT: s_cbranch_execz .LBB3_5
; SI-NEXT: s_cbranch_execz .LBB6_5
; SI-NEXT: ; %bb.1: ; %outer_loop.preheader
; SI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x9
; SI-NEXT: v_cmp_ne_u32_e64 s[0:1], 3, v0
; SI-NEXT: s_mov_b32 s7, 0xf000
; SI-NEXT: s_mov_b32 s6, -1
; SI-NEXT: v_mov_b32_e32 v0, 0x3e7
; SI-NEXT: .LBB3_2: ; %outer_loop
; SI-NEXT: .LBB6_2: ; %outer_loop
; SI-NEXT: ; =>This Loop Header: Depth=1
; SI-NEXT: ; Child Loop BB3_3 Depth 2
; SI-NEXT: ; Child Loop BB6_3 Depth 2
; SI-NEXT: s_mov_b64 s[2:3], 0
; SI-NEXT: .LBB3_3: ; %inner_loop
; SI-NEXT: ; Parent Loop BB3_2 Depth=1
; SI-NEXT: .LBB6_3: ; %inner_loop
; SI-NEXT: ; Parent Loop BB6_2 Depth=1
; SI-NEXT: ; => This Inner Loop Header: Depth=2
; SI-NEXT: s_and_b64 s[8:9], exec, s[0:1]
; SI-NEXT: s_or_b64 s[2:3], s[8:9], s[2:3]
@ -169,13 +306,13 @@ define amdgpu_kernel void @infinite_loop_nest_ret(ptr addrspace(1) %out) {
; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
; SI-NEXT: s_waitcnt vmcnt(0)
; SI-NEXT: s_andn2_b64 exec, exec, s[2:3]
; SI-NEXT: s_cbranch_execnz .LBB3_3
; SI-NEXT: s_cbranch_execnz .LBB6_3
; SI-NEXT: ; %bb.4: ; %loop.exit.guard
; SI-NEXT: ; in Loop: Header=BB3_2 Depth=1
; SI-NEXT: ; in Loop: Header=BB6_2 Depth=1
; SI-NEXT: s_or_b64 exec, exec, s[2:3]
; SI-NEXT: s_mov_b64 vcc, 0
; SI-NEXT: s_branch .LBB3_2
; SI-NEXT: .LBB3_5: ; %UnifiedReturnBlock
; SI-NEXT: s_branch .LBB6_2
; SI-NEXT: .LBB6_5: ; %UnifiedReturnBlock
; SI-NEXT: s_endpgm
; IR-LABEL: @infinite_loop_nest_ret(
; IR-NEXT: entry:
@ -212,4 +349,82 @@ return:
ret void
}
define amdgpu_kernel void @infinite_loop_nest_ret_callbr(ptr addrspace(1) %out) {
; SI-LABEL: infinite_loop_nest_ret_callbr:
; SI: ; %bb.0: ; %entry
; SI-NEXT: v_cmp_ne_u32_e32 vcc, 1, v0
; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
; SI-NEXT: ;;#ASMSTART
; SI-NEXT: ;;#ASMEND
; SI-NEXT: ; %bb.1: ; %outer_loop.preheader
; SI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x9
; SI-NEXT: s_mov_b32 s7, 0xf000
; SI-NEXT: s_mov_b32 s6, -1
; SI-NEXT: v_mov_b32_e32 v0, 0x3e7
; SI-NEXT: s_and_b64 s[0:1], exec, 0
; SI-NEXT: s_branch .LBB7_3
; SI-NEXT: .LBB7_2: ; %loop.exit.guard
; SI-NEXT: ; in Loop: Header=BB7_3 Depth=1
; SI-NEXT: s_and_b64 vcc, exec, s[2:3]
; SI-NEXT: s_cbranch_vccnz .LBB7_5
; SI-NEXT: .LBB7_3: ; %outer_loop
; SI-NEXT: ; =>This Inner Loop Header: Depth=1
; SI-NEXT: ;;#ASMSTART
; SI-NEXT: ;;#ASMEND
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
; SI-NEXT: s_waitcnt vmcnt(0)
; SI-NEXT: s_mov_b64 s[2:3], -1
; SI-NEXT: s_mov_b64 vcc, s[0:1]
; SI-NEXT: s_cbranch_vccz .LBB7_2
; SI-NEXT: ; %bb.4: ; %TransitionBlock.target.outer_loop
; SI-NEXT: ; in Loop: Header=BB7_3 Depth=1
; SI-NEXT: s_mov_b64 s[2:3], 0
; SI-NEXT: s_branch .LBB7_2
; SI-NEXT: .LBB7_5: ; Inline asm indirect target
; SI-NEXT: ; %UnifiedReturnBlock
; SI-NEXT: ; Label of block must be emitted
; SI-NEXT: s_endpgm
; IR-LABEL: @infinite_loop_nest_ret_callbr(
; IR-NEXT: entry:
; IR-NEXT: [[TMP:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x()
; IR-NEXT: [[COND1:%.*]] = icmp ne i32 [[TMP]], 1
; IR-NEXT: [[COND1_32:%.*]] = zext i1 [[COND1]] to i32
; IR-NEXT: callbr void asm "", "r,!i"(i32 [[COND1_32]])
; IR-NEXT: to label [[OUTER_LOOP:%.*]] [label %UnifiedReturnBlock]
; IR: outer_loop:
; IR-NEXT: callbr void asm "", ""()
; IR-NEXT: to label [[INNER_LOOP:%.*]] []
; IR: inner_loop:
; IR-NEXT: store volatile i32 999, ptr addrspace(1) [[OUT:%.*]], align 4
; IR-NEXT: [[COND3:%.*]] = icmp eq i32 [[TMP]], 3
; IR-NEXT: [[COND3_32:%.*]] = zext i1 [[COND3]] to i32
; IR-NEXT: br i1 true, label [[TRANSITIONBLOCK:%.*]], label [[UNIFIEDRETURNBLOCK:%.*]]
; IR: TransitionBlock:
; IR-NEXT: callbr void asm "", "r,!i"(i32 [[COND3_32]])
; IR-NEXT: to label [[INNER_LOOP]] [label %outer_loop]
; IR: UnifiedReturnBlock:
; IR-NEXT: ret void
;
entry:
%tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
%cond1 = icmp ne i32 %tmp, 1 ; avoid following BB optimizing away through the domination
%cond1_32 = zext i1 %cond1 to i32
callbr void asm "", "r,!i"(i32 %cond1_32) to label %outer_loop [label %return]
outer_loop:
; %cond2 = icmp eq i32 %tmp, 2
; br i1 %cond2, label %outer_loop, label %inner_loop
callbr void asm "", ""() to label %inner_loop []
inner_loop: ; preds = %LeafBlock, %LeafBlock1
store volatile i32 999, ptr addrspace(1) %out, align 4
%cond3 = icmp eq i32 %tmp, 3
%cond3_32 = zext i1 %cond3 to i32
callbr void asm "", "r,!i"(i32 %cond3_32) to label %inner_loop [label %outer_loop]
return:
ret void
}
declare i32 @llvm.amdgcn.workitem.id.x()

View File

@ -3,15 +3,16 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa %s -o - | FileCheck %s --check-prefix=ISA
define void @nested_inf_loop(i1 %0, i1 %1) {
; OPT-LABEL: @nested_inf_loop(
; OPT-NEXT: BB:
; OPT-NEXT: br label [[BB1:%.*]]
; OPT: BB1:
; OPT-NEXT: [[BRMERGE:%.*]] = select i1 [[TMP0:%.*]], i1 true, i1 [[TMP1:%.*]]
; OPT-NEXT: br i1 [[BRMERGE]], label [[BB1]], label [[INFLOOP:%.*]]
; OPT: infloop:
; OPT-NEXT: br i1 true, label [[INFLOOP]], label [[DUMMYRETURNBLOCK:%.*]]
; OPT: DummyReturnBlock:
; OPT-LABEL: define void @nested_inf_loop(
; OPT-SAME: i1 [[TMP0:%.*]], i1 [[TMP1:%.*]]) {
; OPT-NEXT: [[BB:.*:]]
; OPT-NEXT: br label %[[BB1:.*]]
; OPT: [[BB1]]:
; OPT-NEXT: [[BRMERGE:%.*]] = select i1 [[TMP0]], i1 true, i1 [[TMP1]]
; OPT-NEXT: br i1 [[BRMERGE]], label %[[BB1]], label %[[INFLOOP:.*]]
; OPT: [[INFLOOP]]:
; OPT-NEXT: br i1 true, label %[[INFLOOP]], label %[[DUMMYRETURNBLOCK:.*]]
; OPT: [[DUMMYRETURNBLOCK]]:
; OPT-NEXT: ret void
;
; ISA-LABEL: nested_inf_loop:
@ -63,3 +64,84 @@ BB4:
BB3:
br label %BB1
}
define void @nested_inf_loop_callbr(i32 %0, i32 %1) {
; OPT-LABEL: define void @nested_inf_loop_callbr(
; OPT-SAME: i32 [[TMP0:%.*]], i32 [[TMP1:%.*]]) {
; OPT-NEXT: [[BB:.*:]]
; OPT-NEXT: callbr void asm "", ""()
; OPT-NEXT: to label %[[BB1:.*]] []
; OPT: [[BB1]]:
; OPT-NEXT: callbr void asm "", "r,!i"(i32 [[TMP0]])
; OPT-NEXT: to label %[[BB3:.*]] [label %BB2]
; OPT: [[BB2:.*:]]
; OPT-NEXT: callbr void asm "", ""()
; OPT-NEXT: to label %[[BB4:.*]] []
; OPT: [[BB4]]:
; OPT-NEXT: br i1 true, label %[[TRANSITIONBLOCK:.*]], label %[[DUMMYRETURNBLOCK:.*]]
; OPT: [[TRANSITIONBLOCK]]:
; OPT-NEXT: callbr void asm "", "r,!i"(i32 [[TMP1]])
; OPT-NEXT: to label %[[BB3]] [label %BB4]
; OPT: [[BB3]]:
; OPT-NEXT: callbr void asm "", ""()
; OPT-NEXT: to label %[[BB1]] []
; OPT: [[DUMMYRETURNBLOCK]]:
; OPT-NEXT: ret void
;
; ISA-LABEL: nested_inf_loop_callbr:
; ISA: ; %bb.0: ; %BB
; ISA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; ISA-NEXT: ;;#ASMSTART
; ISA-NEXT: ;;#ASMEND
; ISA-NEXT: ; implicit-def: $sgpr6_sgpr7
; ISA-NEXT: ; implicit-def: $sgpr4_sgpr5
; ISA-NEXT: .LBB1_1: ; %BB1
; ISA-NEXT: ; =>This Inner Loop Header: Depth=1
; ISA-NEXT: ;;#ASMSTART
; ISA-NEXT: ;;#ASMEND
; ISA-NEXT: s_andn2_b64 s[6:7], s[6:7], exec
; ISA-NEXT: s_and_b64 s[8:9], s[4:5], exec
; ISA-NEXT: s_or_b64 s[6:7], s[6:7], s[8:9]
; ISA-NEXT: .LBB1_2: ; %BB3
; ISA-NEXT: ; in Loop: Header=BB1_1 Depth=1
; ISA-NEXT: ;;#ASMSTART
; ISA-NEXT: ;;#ASMEND
; ISA-NEXT: s_andn2_b64 s[4:5], s[4:5], exec
; ISA-NEXT: s_and_b64 s[8:9], s[6:7], exec
; ISA-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9]
; ISA-NEXT: s_branch .LBB1_1
; ISA-NEXT: .LBB1_3: ; Inline asm indirect target
; ISA-NEXT: ; %BB2
; ISA-NEXT: ; in Loop: Header=BB1_1 Depth=1
; ISA-NEXT: ; Label of block must be emitted
; ISA-NEXT: ;;#ASMSTART
; ISA-NEXT: ;;#ASMEND
; ISA-NEXT: s_mov_b64 s[6:7], -1
; ISA-NEXT: s_and_saveexec_b64 s[8:9], s[4:5]
; ISA-NEXT: s_cbranch_execz .LBB1_5
; ISA-NEXT: ; %bb.4: ; %TransitionBlock.target.BB3
; ISA-NEXT: ; in Loop: Header=BB1_1 Depth=1
; ISA-NEXT: s_xor_b64 s[6:7], exec, -1
; ISA-NEXT: .LBB1_5: ; %loop.exit.guard
; ISA-NEXT: ; in Loop: Header=BB1_1 Depth=1
; ISA-NEXT: s_or_b64 exec, exec, s[8:9]
; ISA-NEXT: s_and_b64 vcc, exec, s[6:7]
; ISA-NEXT: s_mov_b64 s[6:7], 0
; ISA-NEXT: s_cbranch_vccz .LBB1_2
; ISA-NEXT: ; %bb.6: ; %DummyReturnBlock
; ISA-NEXT: s_setpc_b64 s[30:31]
BB:
callbr void asm "", ""() to label %BB1 []
BB1:
callbr void asm "", "r,!i"(i32 %0) to label %BB3 [label %BB2]
BB2:
callbr void asm "", ""() to label %BB4 []
BB4:
callbr void asm "", "r,!i"(i32 %1) to label %BB3 [label %BB4]
BB3:
callbr void asm "", ""() to label %BB1 []
}

View File

@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -stop-after=amdgpu-unify-divergent-exit-nodes | FileCheck %s --check-prefix=UNIFY
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -stop-after=amdgpu-unify-divergent-exit-nodes | FileCheck %s --check-prefix=UNIFY
; RUN: llc < %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 | FileCheck %s
declare void @llvm.trap()
@ -70,8 +70,33 @@ define amdgpu_kernel void @kernel(i32 %a, ptr addrspace(1) %x, i32 noundef %n) {
; CHECK-NEXT: s_mov_b64 s[2:3], -1
; CHECK-NEXT: s_trap 2
; CHECK-NEXT: s_branch .LBB0_4
; UNIFY-LABEL: @kernel(
; UNIFY-NEXT: entry:
; UNIFY-NEXT: [[TID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
; UNIFY-NEXT: [[CMP:%.*]] = icmp eq i32 [[N:%.*]], 256
; UNIFY-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
; UNIFY: if.then:
; UNIFY-NEXT: [[CMP1:%.*]] = icmp eq i32 [[A:%.*]], 0
; UNIFY-NEXT: br i1 [[CMP1]], label [[IF_END6_SINK_SPLIT:%.*]], label [[COND_FALSE:%.*]]
; UNIFY: cond.false:
; UNIFY-NEXT: call void @llvm.trap()
; UNIFY-NEXT: unreachable
; UNIFY: if.else:
; UNIFY-NEXT: [[CMP2:%.*]] = icmp ult i32 [[TID]], 10
; UNIFY-NEXT: br i1 [[CMP2]], label [[IF_THEN3:%.*]], label [[IF_END6:%.*]]
; UNIFY: if.then3:
; UNIFY-NEXT: [[CMP1_I7:%.*]] = icmp eq i32 [[A]], 0
; UNIFY-NEXT: br i1 [[CMP1_I7]], label [[IF_END6_SINK_SPLIT]], label [[COND_FALSE_I8:%.*]]
; UNIFY: cond.false.i8:
; UNIFY-NEXT: call void @llvm.trap()
; UNIFY-NEXT: unreachable
; UNIFY: if.end6.sink.split:
; UNIFY-NEXT: [[X1:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[X:%.*]], i32 [[TID]]
; UNIFY-NEXT: store i32 [[A]], ptr addrspace(1) [[X1]], align 4
; UNIFY-NEXT: br label [[IF_END6]]
; UNIFY: if.end6:
; UNIFY-NEXT: ret void
;
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%cmp = icmp eq i32 %n, 256
@ -105,5 +130,129 @@ if.end6.sink.split:
if.end6:
ret void
}
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; UNIFY: {{.*}}
define amdgpu_kernel void @kernel_callbr(i32 %a, ptr addrspace(1) %x, i32 noundef %n) {
; CHECK-LABEL: kernel_callbr:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_load_dword s1, s[8:9], 0x10
; CHECK-NEXT: s_load_dword s0, s[8:9], 0x0
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_cmpk_eq_i32 s1, 0x100
; CHECK-NEXT: s_cselect_b64 s[2:3], -1, 0
; CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[2:3]
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ;;#ASMEND
; CHECK-NEXT: ; %bb.1: ; %if.then
; CHECK-NEXT: s_cmp_eq_u32 s0, 0
; CHECK-NEXT: s_cselect_b64 s[2:3], -1, 0
; CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[2:3]
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ;;#ASMEND
; CHECK-NEXT: .LBB1_2: ; %if.end6.sink.split
; CHECK-NEXT: s_load_dwordx2 s[2:3], s[8:9], 0x8
; CHECK-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; CHECK-NEXT: v_mov_b32_e32 v1, s0
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: global_store_dword v0, v1, s[2:3]
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ;;#ASMEND
; CHECK-NEXT: .LBB1_3: ; Inline asm indirect target
; CHECK-NEXT: ; %UnifiedReturnBlock
; CHECK-NEXT: ; Label of block must be emitted
; CHECK-NEXT: s_endpgm
; CHECK-NEXT: .LBB1_4: ; Inline asm indirect target
; CHECK-NEXT: ; %if.else
; CHECK-NEXT: ; Label of block must be emitted
; CHECK-NEXT: v_cmp_gt_u32_e32 vcc, 10, v0
; CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ;;#ASMEND
; CHECK-NEXT: ; %bb.5: ; %if.then3
; CHECK-NEXT: s_cmp_eq_u32 s0, 0
; CHECK-NEXT: s_cselect_b64 s[2:3], -1, 0
; CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[2:3]
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ;;#ASMEND
; CHECK-NEXT: s_branch .LBB1_2
; CHECK-NEXT: .LBB1_6: ; Inline asm indirect target
; CHECK-NEXT: ; %cond.false.i8
; CHECK-NEXT: ; Label of block must be emitted
; CHECK-NEXT: .LBB1_7: ; Inline asm indirect target
; CHECK-NEXT: ; %cond.false
; CHECK-NEXT: ; Label of block must be emitted
; CHECK-NEXT: s_trap 2
; CHECK-NEXT: ; divergent unreachable
; CHECK-NEXT: s_branch .LBB1_3
; UNIFY-LABEL: @kernel_callbr(
; UNIFY-NEXT: entry:
; UNIFY-NEXT: [[TID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
; UNIFY-NEXT: [[CMP:%.*]] = icmp eq i32 [[N:%.*]], 256
; UNIFY-NEXT: [[CMP32:%.*]] = zext i1 [[CMP]] to i32
; UNIFY-NEXT: callbr void asm "", "r,!i"(i32 [[CMP32]])
; UNIFY-NEXT: to label [[IF_THEN:%.*]] [label %if.else]
; UNIFY: if.then:
; UNIFY-NEXT: [[CMP1:%.*]] = icmp eq i32 [[A:%.*]], 0
; UNIFY-NEXT: [[CMP1_32:%.*]] = zext i1 [[CMP1]] to i32
; UNIFY-NEXT: callbr void asm "", "r,!i"(i32 [[CMP1_32]])
; UNIFY-NEXT: to label [[IF_END6_SINK_SPLIT:%.*]] [label %cond.false]
; UNIFY: cond.false:
; UNIFY-NEXT: call void @llvm.trap()
; UNIFY-NEXT: unreachable
; UNIFY: if.else:
; UNIFY-NEXT: [[CMP2:%.*]] = icmp ult i32 [[TID]], 10
; UNIFY-NEXT: [[CMP2_32:%.*]] = zext i1 [[CMP2]] to i32
; UNIFY-NEXT: callbr void asm "", "r,!i"(i32 [[CMP2_32]])
; UNIFY-NEXT: to label [[IF_THEN3:%.*]] [label %if.end6]
; UNIFY: if.then3:
; UNIFY-NEXT: [[CMP1_I7:%.*]] = icmp eq i32 [[A]], 0
; UNIFY-NEXT: [[CMP1_I7_32:%.*]] = zext i1 [[CMP1_I7]] to i32
; UNIFY-NEXT: callbr void asm "", "r,!i"(i32 [[CMP1_I7_32]])
; UNIFY-NEXT: to label [[IF_END6_SINK_SPLIT]] [label %cond.false.i8]
; UNIFY: cond.false.i8:
; UNIFY-NEXT: call void @llvm.trap()
; UNIFY-NEXT: unreachable
; UNIFY: if.end6.sink.split:
; UNIFY-NEXT: [[X1:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[X:%.*]], i32 [[TID]]
; UNIFY-NEXT: store i32 [[A]], ptr addrspace(1) [[X1]], align 4
; UNIFY-NEXT: callbr void asm "", ""()
; UNIFY-NEXT: to label [[IF_END6:%.*]] []
; UNIFY: if.end6:
; UNIFY-NEXT: ret void
;
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%cmp = icmp eq i32 %n, 256
%cmp32 = zext i1 %cmp to i32
callbr void asm "", "r,!i"(i32 %cmp32) to label %if.then [label %if.else]
if.then:
%cmp1 = icmp eq i32 %a, 0
%cmp1_32 = zext i1 %cmp1 to i32
callbr void asm "", "r,!i"(i32 %cmp1_32) to label %if.end6.sink.split [label %cond.false]
cond.false:
call void @llvm.trap()
unreachable
if.else:
%cmp2 = icmp ult i32 %tid, 10
%cmp2_32 = zext i1 %cmp2 to i32
callbr void asm "", "r,!i"(i32 %cmp2_32) to label %if.then3 [label %if.end6]
if.then3:
%cmp1.i7 = icmp eq i32 %a, 0
%cmp1.i7_32 = zext i1 %cmp1.i7 to i32
callbr void asm "", "r,!i"(i32 %cmp1.i7_32) to label %if.end6.sink.split [label %cond.false.i8]
cond.false.i8:
call void @llvm.trap()
unreachable
if.end6.sink.split:
%x1 = getelementptr inbounds i32, ptr addrspace(1) %x, i32 %tid
store i32 %a, ptr addrspace(1) %x1, align 4
callbr void asm "", ""() to label %if.end6 []
if.end6:
ret void
}

View File

@ -37,3 +37,42 @@ n28: ; preds = %.loopexit, %n28
n31: ; preds =
ret void
}
define amdgpu_ps void @_amdgpu_ps_main_callbr() local_unnamed_addr #3 {
; IR-LABEL: @_amdgpu_ps_main_callbr(
; IR-NEXT: .entry:
; IR-NEXT: callbr void asm "", ""()
; IR-NEXT: to label [[DOTLOOPEXIT:%.*]] []
; IR: .loopexit:
; IR-NEXT: callbr void asm "", ""()
; IR-NEXT: to label [[N28:%.*]] []
; IR: n28:
; IR-NEXT: [[DOT01:%.*]] = phi float [ 0.000000e+00, [[DOTLOOPEXIT]] ], [ [[N29:%.*]], [[TRANSITIONBLOCK:%.*]] ]
; IR-NEXT: [[N29]] = fadd float [[DOT01]], 1.000000e+00
; IR-NEXT: [[N30:%.*]] = fcmp ogt float [[N29]], 4.000000e+00
; IR-NEXT: [[N30_32:%.*]] = zext i1 [[N30]] to i32
; IR-NEXT: br i1 true, label [[TRANSITIONBLOCK]], label [[DUMMYRETURNBLOCK:%.*]]
; IR: TransitionBlock:
; IR-NEXT: callbr void asm "", "r,!i"(i32 [[N30_32]])
; IR-NEXT: to label [[DOTLOOPEXIT]] [label %n28]
; IR: n31:
; IR-NEXT: ret void
; IR: DummyReturnBlock:
; IR-NEXT: ret void
;
.entry:
callbr void asm "", ""() to label %.loopexit []
.loopexit: ; preds = %n28, %.entry
callbr void asm "", ""() to label %n28 []
n28: ; preds = %.loopexit, %n28
%.01 = phi float [ 0.000000e+00, %.loopexit ], [ %n29, %n28 ]
%n29 = fadd float %.01, 1.0
%n30 = fcmp ogt float %n29, 4.000000e+00
%n30.32 = zext i1 %n30 to i32
callbr void asm "", "r,!i"(i32 %n30.32) to label %.loopexit [label %n28]
n31: ; preds =
ret void
}

View File

@ -0,0 +1,235 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt -S -passes=structurizecfg %s -o - | FileCheck %s
; Structurize as usual, but don't tear callbr and its destination blocks apart.
;
; Note: currently, callbr blocks and their corresponding target blocks
; themselves are not handled by the structurizer.* If the CFG turns out to be
; unstructured at the end, the CFG lowering (si-annotate-control-flow) will
; detect this. For the currently intended use cases of callbr in the context of
; the AMDGPU backend, this is not a limitation (cf.
; https://discourse.llvm.org/t/rfc-add-callbr-intrinsic-support/86087).
;
; Note 2: while callbr and its targets remain untouched, everything else is
; handled as usual, even if it is nested in a callbr region.
;
; *FIXME: this will be fixed in the future. Callbr can be handled as follows:
; Input IR:
; ```
; define void @foo_callbr() {
; callbr void asm "", "!i"() to label %fallthrough [label %indirect, ...]
; fallthrough:
; br label %exit
; indirect:
; br label %exit
; ...
; exit:
; ret void
; }
; ```
;
; Output IR:
; ```
; define void @foo_callbr() {
; callbr void asm "", "!i"()
; to label %fallthrough [label %fake.indirect, label %fake.indirect1, label %fake.indirect2, ...]
; fake.indirect: ; preds = %0
; br label %Flow
; fake.indirect1: ; preds = %0
; br label %Flow
; fake.indirect2: ; preds = %0
; br label %Flow
; ...
; Flow: ; preds = %fallthrough, %fake.indirect[0-N]
; %1 = phi i1 [ false, %fallthrough ], [ true, %fake.indirect ], [ false, %fake.indirect[1-N] ]
; br i1 %1, label %indirect, label %Flow1
; Flow1: ; preds = %Flow, %indirect
; %2 = phi i1 [ false, %Flow], [ true, %fake.indirect1 ], [ false, %indirect ]
; br i1 %2, label %indirect1, label %Flow2
; Flow2: ; preds = %Flow, %indirect1
; %2 = phi i1 [ false, %Flow], [ true, %fake.indirect2 ], [ false, %indirect1 ]
; br i1 %2, label %indirect2, label %Flow3
; ...
; fallthrough: ; preds = %0
; br label %Flow
; indirect: ; preds = %Flow
; br label %Flow1
; indirect1: ; preds = %Flow1
; br label %Flow2
; indirect2: : preds = %Flow2
; br label %Flow3
; ...
; exit: ; preds = %indirectN, %FlowN
; ret void
; }
; ```
;
; Output IR as ASCII-art:
; %0
; ---------------------
; | | | |
; v v v v
; f f.i f.i1 f.i2
; | | | |
; v v v v
; ---------------------
; %Flow
; | \
; | %indirect
; | /
; %Flow1
; | \
; | %indirect1
; | /
; %Flow2
; | \
; | %indirect2
; | /
; %exit
;
; Only callbr, nothing to do.
define void @callbr_simple() {
; CHECK-LABEL: define void @callbr_simple() {
; CHECK-NEXT: [[CALLBR:.*:]]
; CHECK-NEXT: callbr void asm "", "!i"()
; CHECK-NEXT: to label %[[INDIRECT:.*]] [label %indirect]
; CHECK: [[INDIRECT]]:
; CHECK-NEXT: br label %[[EXIT:.*]]
; CHECK: [[INDIRECT1:.*:]]
; CHECK-NEXT: br label %[[EXIT]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret void
;
callbr:
callbr void asm "", "!i"() to label %fallthrough [label %indirect]
fallthrough:
br label %exit
indirect:
br label %exit
exit:
ret void
}
; Callbr nested in non-callbr: non-callbr is transformed
define void @callbr_in_non_callbr(i1 %c) {
; CHECK-LABEL: define void @callbr_in_non_callbr(
; CHECK-SAME: i1 [[C:%.*]]) {
; CHECK-NEXT: [[C_INV:%.*]] = xor i1 [[C]], true
; CHECK-NEXT: br i1 [[C_INV]], label %[[NOCALLBR:.*]], label %[[FLOW:.*]]
; CHECK: [[FLOW]]:
; CHECK-NEXT: [[TMP1:%.*]] = phi i1 [ false, %[[NOCALLBR]] ], [ true, [[TMP0:%.*]] ]
; CHECK-NEXT: br i1 [[TMP1]], label %[[CALLBR:.*]], label %[[EXIT:.*]]
; CHECK: [[CALLBR]]:
; CHECK-NEXT: callbr void asm "", "!i"()
; CHECK-NEXT: to label %[[INDIRECT:.*]] [label %indirect]
; CHECK: [[INDIRECT]]:
; CHECK-NEXT: br label %[[EXIT]]
; CHECK: [[INDIRECT1:.*:]]
; CHECK-NEXT: br label %[[EXIT]]
; CHECK: [[NOCALLBR]]:
; CHECK-NEXT: br label %[[FLOW]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret void
;
br i1 %c, label %callbr, label %nocallbr
callbr:
callbr void asm "", "!i"() to label %fallthrough [label %indirect]
fallthrough:
br label %exit
indirect:
br label %exit
nocallbr:
br label %exit
exit:
ret void
}
; Callbr parent of non-callbr: non-callbr is transformed
define void @non_callbr_in_callbr(i1 %c) {
; CHECK-LABEL: define void @non_callbr_in_callbr(
; CHECK-SAME: i1 [[C:%.*]]) {
; CHECK-NEXT: [[C_INV:%.*]] = xor i1 [[C]], true
; CHECK-NEXT: callbr void asm "", "!i"()
; CHECK-NEXT: to label %[[INDIRECT:.*]] [label %indirect]
; CHECK: [[INDIRECT]]:
; CHECK-NEXT: br i1 [[C_INV]], label %[[FALLTHROUGH2:.*]], label %[[FLOW:.*]]
; CHECK: [[FLOW]]:
; CHECK-NEXT: [[TMP1:%.*]] = phi i1 [ false, %[[FALLTHROUGH2]] ], [ true, %[[INDIRECT]] ]
; CHECK-NEXT: br i1 [[TMP1]], label %[[FALLTHROUGH1:.*]], label %[[FLOW1:.*]]
; CHECK: [[FALLTHROUGH1]]:
; CHECK-NEXT: br label %[[FLOW1]]
; CHECK: [[FALLTHROUGH2]]:
; CHECK-NEXT: br label %[[FLOW]]
; CHECK: [[INDIRECT1:.*:]]
; CHECK-NEXT: br label %[[EXIT:.*]]
; CHECK: [[FLOW1]]:
; CHECK-NEXT: br label %[[EXIT]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret void
;
callbr void asm "", "!i"() to label %fallthrough [label %indirect]
fallthrough:
br i1 %c, label %fallthrough1, label %fallthrough2
fallthrough1:
br label %exit
fallthrough2:
br label %exit
indirect:
br label %exit
exit:
ret void
}
; Callbr surrounded by non-callbr: all three regular branches are handled
; correctly
define void @callbr_nested_in_non_callbr(i1 %c, i1 %d, i1 %e, i1 %f) {
; CHECK-LABEL: define void @callbr_nested_in_non_callbr(
; CHECK-SAME: i1 [[C:%.*]], i1 [[D:%.*]], i1 [[E:%.*]], i1 [[F:%.*]]) {
; CHECK-NEXT: [[C_INV:%.*]] = xor i1 [[C]], true
; CHECK-NEXT: br i1 [[C_INV]], label %[[NOCALLBR:.*]], label %[[FLOW3:.*]]
; CHECK: [[FLOW3]]:
; CHECK-NEXT: [[TMP1:%.*]] = phi i1 [ false, %[[FLOW:.*]] ], [ true, [[TMP0:%.*]] ]
; CHECK-NEXT: br i1 [[TMP1]], label %[[CALLBR:.*]], label %[[RET:.*]]
; CHECK: [[CALLBR]]:
; CHECK-NEXT: callbr void asm "", "!i"()
; CHECK-NEXT: to label %[[INDIRECT:.*]] [label %indirect]
; CHECK: [[INDIRECT]]:
; CHECK-NEXT: br i1 [[D]], label %[[FALLTHROUGH1:.*]], label %[[FLOW2:.*]]
; CHECK: [[FALLTHROUGH1]]:
; CHECK-NEXT: br label %[[FLOW2]]
; CHECK: [[INDIRECT2:.*:]]
; CHECK-NEXT: br i1 [[E]], label %[[INDIRECT1:.*]], label %[[FLOW1:.*]]
; CHECK: [[INDIRECT1]]:
; CHECK-NEXT: br label %[[FLOW1]]
; CHECK: [[NOCALLBR]]:
; CHECK-NEXT: br i1 [[F]], label %[[NOCALLBR1:.*]], label %[[FLOW]]
; CHECK: [[NOCALLBR1]]:
; CHECK-NEXT: br label %[[FLOW]]
; CHECK: [[FLOW]]:
; CHECK-NEXT: br label %[[FLOW3]]
; CHECK: [[FLOW1]]:
; CHECK-NEXT: br label %[[RET]]
; CHECK: [[FLOW2]]:
; CHECK-NEXT: br label %[[RET]]
; CHECK: [[RET]]:
; CHECK-NEXT: ret void
;
br i1 %c, label %callbr, label %nocallbr
callbr:
callbr void asm "", "!i"() to label %fallthrough [label %indirect]
fallthrough:
br i1 %d, label %fallthrough1, label %ret
fallthrough1:
br label %ret
indirect:
br i1 %e, label %indirect1, label %ret
indirect1:
br label %ret
nocallbr:
br i1 %f, label %nocallbr1, label %ret
nocallbr1:
br label %ret
ret:
ret void
}