reland "[StructurizeCFG] Hoist and simplify zero-cost incoming else p… (#149744)

…hi values (#139605)"

This relands commit b11523b494b with the fix for llvm-buildbot failures
"clang-hip-vega20" and "openmp-offload-amdgpu-runtime-2". The reland
prevents hoisting the phi node which fixes the issue.

Original PR description:

The order of if and else blocks can introduce unnecessary VGPR copies.
Consider the case of an if-else block where the incoming phi from the
'Else block' only contains zero-cost instructions, and the 'Then' block
modifies some value. There would be no interference when coalescing
because only one value is live at any point before structurization.
However, in the structurized CFG, the Then value is live at 'Else' block
due to the path if→flow→else, leading to additional VGPR copies.

This patch addresses the issue by:
- Identifying PHI nodes with zero-cost incoming values from the Else
block and hoisting those values to the nearest common dominator of the
Then and Else blocks.
- Updating Flow PHI nodes by replacing poison entries (on the if→flow
edge) with the correct hoisted values.
This commit is contained in:
Vigneshwar Jayakumar 2025-07-25 15:23:45 -05:00 committed by GitHub
parent 5ebdfe386e
commit 56ae79a6ab
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 466 additions and 11 deletions

View File

@ -19,6 +19,7 @@
#include "llvm/Analysis/RegionInfo.h"
#include "llvm/Analysis/RegionIterator.h"
#include "llvm/Analysis/RegionPass.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/UniformityAnalysis.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
@ -128,6 +129,7 @@ struct PredInfo {
using BBPredicates = DenseMap<BasicBlock *, PredInfo>;
using PredMap = DenseMap<BasicBlock *, BBPredicates>;
using BB2BBMap = DenseMap<BasicBlock *, BasicBlock *>;
using Val2BBMap = DenseMap<Value *, BasicBlock *>;
// A traits type that is intended to be used in graph algorithms. The graph
// traits starts at an entry node, and traverses the RegionNodes that are in
@ -279,7 +281,7 @@ class StructurizeCFG {
ConstantInt *BoolTrue;
ConstantInt *BoolFalse;
Value *BoolPoison;
const TargetTransformInfo *TTI;
Function *Func;
Region *ParentRegion;
@ -301,8 +303,12 @@ class StructurizeCFG {
PredMap LoopPreds;
BranchVector LoopConds;
Val2BBMap HoistedValues;
RegionNode *PrevNode;
void hoistZeroCostElseBlockPhiValues(BasicBlock *ElseBB, BasicBlock *ThenBB);
void orderNodes();
void analyzeLoops(RegionNode *N);
@ -332,6 +338,8 @@ class StructurizeCFG {
void simplifyAffectedPhis();
void simplifyHoistedPhis();
DebugLoc killTerminator(BasicBlock *BB);
void changeExit(RegionNode *Node, BasicBlock *NewExit,
@ -359,7 +367,7 @@ class StructurizeCFG {
public:
void init(Region *R);
bool run(Region *R, DominatorTree *DT);
bool run(Region *R, DominatorTree *DT, const TargetTransformInfo *TTI);
bool makeUniformRegion(Region *R, UniformityInfo &UA);
};
@ -385,8 +393,11 @@ public:
if (SCFG.makeUniformRegion(R, UA))
return false;
}
Function *F = R->getEntry()->getParent();
const TargetTransformInfo *TTI =
&getAnalysis<TargetTransformInfoWrapperPass>().getTTI(*F);
DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
return SCFG.run(R, DT);
return SCFG.run(R, DT, TTI);
}
StringRef getPassName() const override { return "Structurize control flow"; }
@ -394,7 +405,9 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override {
if (SkipUniformRegions)
AU.addRequired<UniformityInfoWrapperPass>();
AU.addRequired<TargetTransformInfoWrapperPass>();
AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<TargetTransformInfoWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
RegionPass::getAnalysisUsage(AU);
@ -403,6 +416,34 @@ public:
} // end anonymous namespace
/// Checks whether an instruction is zero cost instruction and checks if the
/// operands are from different BB. If so, this instruction can be coalesced
/// if its hoisted to predecessor block. So, this returns true.
static bool isHoistableInstruction(Instruction *I, BasicBlock *BB,
const TargetTransformInfo *TTI) {
if (I->getParent() != BB || isa<PHINode>(I))
return false;
// If the instruction is not a zero cost instruction, return false.
auto Cost = TTI->getInstructionCost(I, TargetTransformInfo::TCK_Latency);
InstructionCost::CostType CostVal =
Cost.isValid()
? Cost.getValue()
: (InstructionCost::CostType)TargetTransformInfo::TCC_Expensive;
if (CostVal != 0)
return false;
// Check if any operands are instructions defined in the same block.
for (auto &Op : I->operands()) {
if (auto *OpI = dyn_cast<Instruction>(Op)) {
if (OpI->getParent() == BB)
return false;
}
}
return true;
}
char StructurizeCFGLegacyPass::ID = 0;
INITIALIZE_PASS_BEGIN(StructurizeCFGLegacyPass, "structurizecfg",
@ -413,6 +454,39 @@ INITIALIZE_PASS_DEPENDENCY(RegionInfoPass)
INITIALIZE_PASS_END(StructurizeCFGLegacyPass, "structurizecfg",
"Structurize the CFG", false, false)
/// Structurization can introduce unnecessary VGPR copies due to register
/// coalescing interference. For example, if the Else block has a zero-cost
/// instruction and the Then block modifies the VGPR value, only one value is
/// live at a time in merge block before structurization. After structurization,
/// the coalescer may incorrectly treat the Then value as live in the Else block
/// (via the path Then → Flow → Else), leading to unnecessary VGPR copies.
///
/// This function examines phi nodes whose incoming values are zero-cost
/// instructions in the Else block. It identifies such values that can be safely
/// hoisted and moves them to the nearest common dominator of Then and Else
/// blocks. A follow-up function after setting PhiNodes assigns the hoisted
/// value to poison phi nodes along the if→flow edge, aiding register coalescing
/// and minimizing unnecessary live ranges.
void StructurizeCFG::hoistZeroCostElseBlockPhiValues(BasicBlock *ElseBB,
BasicBlock *ThenBB) {
BasicBlock *ElseSucc = ElseBB->getSingleSuccessor();
BasicBlock *CommonDominator = DT->findNearestCommonDominator(ElseBB, ThenBB);
if (!ElseSucc || !CommonDominator)
return;
Instruction *Term = CommonDominator->getTerminator();
for (PHINode &Phi : ElseSucc->phis()) {
Value *ElseVal = Phi.getIncomingValueForBlock(ElseBB);
auto *Inst = dyn_cast<Instruction>(ElseVal);
if (!Inst || !isHoistableInstruction(Inst, ElseBB, TTI))
continue;
Inst->removeFromParent();
Inst->insertInto(CommonDominator, Term->getIterator());
HoistedValues[Inst] = CommonDominator;
}
}
/// Build up the general order of nodes, by performing a topological sort of the
/// parent region's nodes, while ensuring that there is no outer cycle node
/// between any two inner cycle nodes.
@ -535,7 +609,7 @@ void StructurizeCFG::gatherPredicates(RegionNode *N) {
BasicBlock *Other = Term->getSuccessor(!i);
if (Visited.count(Other) && !Loops.count(Other) &&
!Pred.count(Other) && !Pred.count(P)) {
hoistZeroCostElseBlockPhiValues(Succ, Other);
Pred[Other] = {BoolFalse, std::nullopt};
Pred[P] = {BoolTrue, std::nullopt};
continue;
@ -891,6 +965,44 @@ void StructurizeCFG::setPhiValues() {
AffectedPhis.append(InsertedPhis.begin(), InsertedPhis.end());
}
/// Updates PHI nodes after hoisted zero cost instructions by replacing poison
/// entries on Flow nodes with the appropriate hoisted values
void StructurizeCFG::simplifyHoistedPhis() {
for (WeakVH VH : AffectedPhis) {
PHINode *Phi = dyn_cast_or_null<PHINode>(VH);
if (!Phi || Phi->getNumIncomingValues() != 2)
continue;
for (int i = 0; i < 2; i++) {
Value *V = Phi->getIncomingValue(i);
auto BBIt = HoistedValues.find(V);
if (BBIt == HoistedValues.end())
continue;
Value *OtherV = Phi->getIncomingValue(!i);
PHINode *OtherPhi = dyn_cast<PHINode>(OtherV);
if (!OtherPhi)
continue;
int PoisonValBBIdx = -1;
for (size_t i = 0; i < OtherPhi->getNumIncomingValues(); i++) {
if (!isa<PoisonValue>(OtherPhi->getIncomingValue(i)))
continue;
PoisonValBBIdx = i;
break;
}
if (PoisonValBBIdx == -1 ||
!DT->dominates(BBIt->second,
OtherPhi->getIncomingBlock(PoisonValBBIdx)))
continue;
OtherPhi->setIncomingValue(PoisonValBBIdx, V);
Phi->setIncomingValue(i, OtherV);
}
}
}
void StructurizeCFG::simplifyAffectedPhis() {
bool Changed;
do {
@ -1283,12 +1395,13 @@ bool StructurizeCFG::makeUniformRegion(Region *R, UniformityInfo &UA) {
}
/// Run the transformation for each region found
bool StructurizeCFG::run(Region *R, DominatorTree *DT) {
bool StructurizeCFG::run(Region *R, DominatorTree *DT,
const TargetTransformInfo *TTI) {
if (R->isTopLevelRegion())
return false;
this->DT = DT;
this->TTI = TTI;
Func = R->getEntry()->getParent();
assert(hasOnlySimpleTerminator(*Func) && "Unsupported block terminator.");
@ -1300,6 +1413,7 @@ bool StructurizeCFG::run(Region *R, DominatorTree *DT) {
insertConditions(false);
insertConditions(true);
setPhiValues();
simplifyHoistedPhis();
simplifyConditions();
simplifyAffectedPhis();
rebuildSSA();
@ -1349,7 +1463,7 @@ PreservedAnalyses StructurizeCFGPass::run(Function &F,
bool Changed = false;
DominatorTree *DT = &AM.getResult<DominatorTreeAnalysis>(F);
auto &RI = AM.getResult<RegionInfoAnalysis>(F);
TargetTransformInfo *TTI = &AM.getResult<TargetIRAnalysis>(F);
UniformityInfo *UI = nullptr;
if (SkipUniformRegions)
UI = &AM.getResult<UniformityInfoAnalysis>(F);
@ -1368,7 +1482,7 @@ PreservedAnalyses StructurizeCFGPass::run(Function &F,
continue;
}
Changed |= SCFG.run(R, DT);
Changed |= SCFG.run(R, DT, TTI);
}
if (!Changed)
return PreservedAnalyses::all();

View File

@ -9851,8 +9851,8 @@ define void @memmove_p5_p5_sz2048(ptr addrspace(5) align 1 %dst, ptr addrspace(5
; CHECK-NEXT: s_andn2_saveexec_b32 s6, s6
; CHECK-NEXT: s_cbranch_execz .LBB8_6
; CHECK-NEXT: ; %bb.4: ; %memmove_bwd_loop.preheader
; CHECK-NEXT: v_add_nc_u32_e32 v0, 0x700, v0
; CHECK-NEXT: v_add_nc_u32_e32 v1, 0x700, v1
; CHECK-NEXT: v_add_nc_u32_e32 v0, 0x700, v0
; CHECK-NEXT: s_movk_i32 s4, 0xf800
; CHECK-NEXT: s_mov_b32 s5, -1
; CHECK-NEXT: .LBB8_5: ; %memmove_bwd_loop
@ -11167,8 +11167,8 @@ define void @memmove_p5_p5_sz2048(ptr addrspace(5) align 1 %dst, ptr addrspace(5
; ALIGNED-NEXT: s_andn2_saveexec_b32 s6, s6
; ALIGNED-NEXT: s_cbranch_execz .LBB8_6
; ALIGNED-NEXT: ; %bb.4: ; %memmove_bwd_loop.preheader
; ALIGNED-NEXT: v_add_nc_u32_e32 v0, 0x700, v0
; ALIGNED-NEXT: v_add_nc_u32_e32 v1, 0x700, v1
; ALIGNED-NEXT: v_add_nc_u32_e32 v0, 0x700, v0
; ALIGNED-NEXT: s_movk_i32 s4, 0xf800
; ALIGNED-NEXT: s_mov_b32 s5, -1
; ALIGNED-NEXT: .LBB8_5: ; %memmove_bwd_loop
@ -12381,8 +12381,8 @@ define void @memmove_p5_p5_sz2048(ptr addrspace(5) align 1 %dst, ptr addrspace(5
; UNROLL3-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:2024
; UNROLL3-NEXT: buffer_load_dword v5, v1, s[0:3], 0 offen offset:2020
; UNROLL3-NEXT: buffer_load_dword v6, v1, s[0:3], 0 offen offset:2016
; UNROLL3-NEXT: v_add_nc_u32_e32 v2, 0x7b0, v0
; UNROLL3-NEXT: v_add_nc_u32_e32 v1, 0x7b0, v1
; UNROLL3-NEXT: v_add_nc_u32_e32 v2, 0x7b0, v0
; UNROLL3-NEXT: s_waitcnt vmcnt(3)
; UNROLL3-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:2028
; UNROLL3-NEXT: s_waitcnt vmcnt(2)

View File

@ -0,0 +1,180 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX900 %s
%pair = type { i32, i32 }
define void @test_extractvalue_then_else(ptr %ptr, i1 %cond) {
; GFX900-LABEL: test_extractvalue_then_else:
; GFX900: ; %bb.0: ; %if
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: flat_load_dword v3, v[0:1]
; GFX900-NEXT: v_and_b32_e32 v2, 1, v2
; GFX900-NEXT: v_cmp_ne_u32_e32 vcc, 1, v2
; GFX900-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GFX900-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
; GFX900-NEXT: s_cbranch_execz .LBB0_2
; GFX900-NEXT: ; %bb.1: ; %else
; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_add_u32_e32 v3, 1, v3
; GFX900-NEXT: .LBB0_2: ; %Flow
; GFX900-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5]
; GFX900-NEXT: s_or_b64 exec, exec, s[4:5]
; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX900-NEXT: flat_store_dword v[0:1], v3
; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
if:
%load_then = load %pair, ptr %ptr
br i1 %cond, label %then, label %else
then:
%a_then = extractvalue %pair %load_then, 0
br label %merge
else:
%a_else = extractvalue %pair %load_then, 0
%sum_else = add i32 %a_else, 1
br label %merge
merge:
%phi = phi i32 [ %a_then, %then ], [ %sum_else, %else ]
store i32 %phi, ptr %ptr
ret void
}
define void @test_extractvalue_else_then(ptr %ptr, i1 %cond) {
; GFX900-LABEL: test_extractvalue_else_then:
; GFX900: ; %bb.0: ; %if
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: flat_load_dword v3, v[0:1]
; GFX900-NEXT: v_and_b32_e32 v2, 1, v2
; GFX900-NEXT: v_cmp_ne_u32_e32 vcc, 1, v2
; GFX900-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GFX900-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
; GFX900-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5]
; GFX900-NEXT: s_cbranch_execz .LBB1_2
; GFX900-NEXT: ; %bb.1: ; %else
; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_add_u32_e32 v3, 1, v3
; GFX900-NEXT: .LBB1_2: ; %merge
; GFX900-NEXT: s_or_b64 exec, exec, s[4:5]
; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX900-NEXT: flat_store_dword v[0:1], v3
; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
if:
%load_then = load %pair, ptr %ptr
br i1 %cond, label %else, label %then
else:
%a_else = extractvalue %pair %load_then, 0
%sum_else = add i32 %a_else, 1
br label %merge
then:
%a_then = extractvalue %pair %load_then, 0
br label %merge
merge:
%phi = phi i32 [ %a_then, %then ], [ %sum_else, %else ]
store i32 %phi, ptr %ptr
ret void
}
define amdgpu_kernel void @test_loop_with_if( ptr %ptr, i1 %cond) #0 {
; GFX900-LABEL: test_loop_with_if:
; GFX900: ; %bb.0: ; %entry
; GFX900-NEXT: s_load_dword s2, s[4:5], 0x2c
; GFX900-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: s_mov_b64 s[4:5], 0
; GFX900-NEXT: s_movk_i32 s10, 0xfe
; GFX900-NEXT: s_waitcnt lgkmcnt(0)
; GFX900-NEXT: s_bitcmp1_b32 s2, 0
; GFX900-NEXT: s_cselect_b64 s[2:3], -1, 0
; GFX900-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[2:3]
; GFX900-NEXT: v_mov_b32_e32 v2, s1
; GFX900-NEXT: s_xor_b64 s[2:3], s[2:3], -1
; GFX900-NEXT: v_mov_b32_e32 v1, s0
; GFX900-NEXT: v_cmp_ne_u32_e64 s[0:1], 1, v3
; GFX900-NEXT: s_branch .LBB2_2
; GFX900-NEXT: .LBB2_1: ; %latch
; GFX900-NEXT: ; in Loop: Header=BB2_2 Depth=1
; GFX900-NEXT: s_or_b64 exec, exec, s[8:9]
; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_add_u32_e32 v5, 20, v3
; GFX900-NEXT: v_cmp_lt_i32_e32 vcc, s10, v5
; GFX900-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
; GFX900-NEXT: flat_store_dword v[1:2], v3
; GFX900-NEXT: s_andn2_b64 exec, exec, s[4:5]
; GFX900-NEXT: s_cbranch_execz .LBB2_8
; GFX900-NEXT: .LBB2_2: ; %loop
; GFX900-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX900-NEXT: flat_load_dwordx2 v[3:4], v[1:2]
; GFX900-NEXT: s_and_b64 vcc, exec, s[0:1]
; GFX900-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX900-NEXT: s_mov_b64 s[6:7], 0
; GFX900-NEXT: s_cbranch_vccnz .LBB2_4
; GFX900-NEXT: ; %bb.3: ; %if
; GFX900-NEXT: ; in Loop: Header=BB2_2 Depth=1
; GFX900-NEXT: v_cmp_gt_i32_e32 vcc, 11, v5
; GFX900-NEXT: s_andn2_b64 s[8:9], s[2:3], exec
; GFX900-NEXT: s_and_b64 s[12:13], vcc, exec
; GFX900-NEXT: s_mov_b64 s[6:7], -1
; GFX900-NEXT: s_or_b64 s[8:9], s[8:9], s[12:13]
; GFX900-NEXT: .LBB2_4: ; %Flow
; GFX900-NEXT: ; in Loop: Header=BB2_2 Depth=1
; GFX900-NEXT: s_and_saveexec_b64 s[12:13], s[8:9]
; GFX900-NEXT: s_xor_b64 s[8:9], exec, s[12:13]
; GFX900-NEXT: s_cbranch_execz .LBB2_6
; GFX900-NEXT: ; %bb.5: ; %else
; GFX900-NEXT: ; in Loop: Header=BB2_2 Depth=1
; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_add_u32_e32 v3, v3, v4
; GFX900-NEXT: s_andn2_b64 s[6:7], s[6:7], exec
; GFX900-NEXT: .LBB2_6: ; %Flow1
; GFX900-NEXT: ; in Loop: Header=BB2_2 Depth=1
; GFX900-NEXT: s_or_b64 exec, exec, s[8:9]
; GFX900-NEXT: s_and_saveexec_b64 s[8:9], s[6:7]
; GFX900-NEXT: s_cbranch_execz .LBB2_1
; GFX900-NEXT: ; %bb.7: ; %then
; GFX900-NEXT: ; in Loop: Header=BB2_2 Depth=1
; GFX900-NEXT: flat_store_dword v[1:2], v0
; GFX900-NEXT: s_branch .LBB2_1
; GFX900-NEXT: .LBB2_8: ; %end
; GFX900-NEXT: s_endpgm
entry:
%a = tail call i32 @llvm.amdgcn.workitem.id.x()
br label %loop
loop:
%entry_phi = phi i32 [ 0, %entry ], [ %a15, %latch ]
%load = load %pair, ptr %ptr
br i1 %cond, label %if, label %else
if:
%cmp = icmp sgt i32 %entry_phi, 10
br i1 %cmp, label %then, label %else
then:
%a_then = extractvalue %pair %load, 0
store i32 %a, ptr %ptr, align 4
br label %latch
else:
%a2 = extractvalue %pair %load, 1
%y = extractvalue %pair %load, 0
%a_else = add i32 %y, %a2
br label %latch
latch:
%a_test = phi i32 [ %a_then, %then ], [ %a_else, %else ]
store i32 %a_test, ptr %ptr
%a15 = add nsw i32 %a_test, 20
%a16 = icmp slt i32 %a15, 255
br i1 %a16, label %loop, label %end
end:
ret void
}

View File

@ -0,0 +1,161 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt -S -passes=structurizecfg < %s | FileCheck %s
%pair = type { i32, i32 }
define void @test_if_then_else(ptr %ptr, i1 %cond) {
; CHECK-LABEL: define void @test_if_then_else(
; CHECK-SAME: ptr [[PTR:%.*]], i1 [[COND:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: [[COND_INV:%.*]] = xor i1 [[COND]], true
; CHECK-NEXT: [[LOAD_THEN:%.*]] = load [[PAIR:%.*]], ptr [[PTR]], align 4
; CHECK-NEXT: [[A_THEN:%.*]] = extractvalue [[PAIR]] [[LOAD_THEN]], 0
; CHECK-NEXT: br i1 [[COND_INV]], label %[[ELSE:.*]], label %[[FLOW:.*]]
; CHECK: [[FLOW]]:
; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ [[SUM_ELSE:%.*]], %[[ELSE]] ], [ [[A_THEN]], %[[ENTRY]] ]
; CHECK-NEXT: [[TMP1:%.*]] = phi i1 [ false, %[[ELSE]] ], [ true, %[[ENTRY]] ]
; CHECK-NEXT: br i1 [[TMP1]], label %[[THEN:.*]], label %[[MERGE:.*]]
; CHECK: [[THEN]]:
; CHECK-NEXT: br label %[[MERGE]]
; CHECK: [[ELSE]]:
; CHECK-NEXT: [[A_ELSE:%.*]] = extractvalue [[PAIR]] [[LOAD_THEN]], 0
; CHECK-NEXT: [[SUM_ELSE]] = add i32 [[A_ELSE]], 1
; CHECK-NEXT: br label %[[FLOW]]
; CHECK: [[MERGE]]:
; CHECK-NEXT: store i32 [[TMP0]], ptr [[PTR]], align 4
; CHECK-NEXT: ret void
;
entry:
%load_then = load %pair, ptr %ptr
br i1 %cond, label %then, label %else
then:
%a_then = extractvalue %pair %load_then, 0
br label %merge
else:
%a_else = extractvalue %pair %load_then, 0
%sum_else = add i32 %a_else, 1
br label %merge
merge:
%phi = phi i32 [ %a_then, %then ], [ %sum_else, %else ]
store i32 %phi, ptr %ptr
ret void
}
define void @test_if_else_then(ptr %ptr, i1 %cond) {
; CHECK-LABEL: define void @test_if_else_then(
; CHECK-SAME: ptr [[PTR:%.*]], i1 [[COND:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: [[COND_INV:%.*]] = xor i1 [[COND]], true
; CHECK-NEXT: [[LOAD_THEN:%.*]] = load [[PAIR:%.*]], ptr [[PTR]], align 4
; CHECK-NEXT: br i1 [[COND_INV]], label %[[THEN:.*]], label %[[FLOW:.*]]
; CHECK: [[THEN]]:
; CHECK-NEXT: [[A_THEN:%.*]] = extractvalue [[PAIR]] [[LOAD_THEN]], 0
; CHECK-NEXT: br label %[[FLOW]]
; CHECK: [[FLOW]]:
; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ [[A_THEN]], %[[THEN]] ], [ poison, %[[ENTRY]] ]
; CHECK-NEXT: [[TMP1:%.*]] = phi i1 [ false, %[[THEN]] ], [ true, %[[ENTRY]] ]
; CHECK-NEXT: br i1 [[TMP1]], label %[[ELSE:.*]], label %[[MERGE:.*]]
; CHECK: [[ELSE]]:
; CHECK-NEXT: [[A_ELSE:%.*]] = extractvalue [[PAIR]] [[LOAD_THEN]], 0
; CHECK-NEXT: [[SUM_ELSE:%.*]] = add i32 [[A_ELSE]], 1
; CHECK-NEXT: br label %[[MERGE]]
; CHECK: [[MERGE]]:
; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ [[TMP0]], %[[FLOW]] ], [ [[SUM_ELSE]], %[[ELSE]] ]
; CHECK-NEXT: store i32 [[PHI]], ptr [[PTR]], align 4
; CHECK-NEXT: ret void
;
entry:
%load_then = load %pair, ptr %ptr
br i1 %cond, label %else, label %then
then:
%a_then = extractvalue %pair %load_then, 0
br label %merge
else:
%a_else = extractvalue %pair %load_then, 0
%sum_else = add i32 %a_else, 1
br label %merge
merge:
%phi = phi i32 [ %a_then, %then ], [ %sum_else, %else ]
store i32 %phi, ptr %ptr
ret void
}
define amdgpu_kernel void @test_loop_with_if( ptr %ptr, i1 %cond) #0 {
; CHECK-LABEL: define amdgpu_kernel void @test_loop_with_if(
; CHECK-SAME: ptr [[PTR:%.*]], i1 [[COND:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: [[COND_INV:%.*]] = xor i1 [[COND]], true
; CHECK-NEXT: [[I:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x()
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK-NEXT: [[I3:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I15:%.*]], %[[LATCH:.*]] ]
; CHECK-NEXT: [[LOAD:%.*]] = load [[PAIR:%.*]], ptr [[PTR]], align 4
; CHECK-NEXT: [[A_THEN:%.*]] = extractvalue [[PAIR]] [[LOAD]], 0
; CHECK-NEXT: br i1 [[COND]], label %[[IF:.*]], label %[[FLOW:.*]]
; CHECK: [[IF]]:
; CHECK-NEXT: [[I9:%.*]] = icmp sle i32 [[I3]], 10
; CHECK-NEXT: br label %[[FLOW]]
; CHECK: [[FLOW1:.*]]:
; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ [[Y:%.*]], %[[ELSE:.*]] ], [ [[A_THEN]], %[[FLOW]] ]
; CHECK-NEXT: [[TMP1:%.*]] = phi i1 [ false, %[[ELSE]] ], [ [[TMP2:%.*]], %[[FLOW]] ]
; CHECK-NEXT: br i1 [[TMP1]], label %[[THEN:.*]], label %[[LATCH]]
; CHECK: [[THEN]]:
; CHECK-NEXT: store i32 [[I]], ptr [[PTR]], align 4
; CHECK-NEXT: br label %[[LATCH]]
; CHECK: [[FLOW]]:
; CHECK-NEXT: [[TMP2]] = phi i1 [ true, %[[IF]] ], [ false, %[[LOOP]] ]
; CHECK-NEXT: [[TMP3:%.*]] = phi i1 [ [[I9]], %[[IF]] ], [ [[COND_INV]], %[[LOOP]] ]
; CHECK-NEXT: br i1 [[TMP3]], label %[[ELSE]], label %[[FLOW1]]
; CHECK: [[ELSE]]:
; CHECK-NEXT: [[I2:%.*]] = extractvalue [[PAIR]] [[LOAD]], 1
; CHECK-NEXT: [[A_ELSE:%.*]] = extractvalue [[PAIR]] [[LOAD]], 0
; CHECK-NEXT: [[Y]] = add i32 [[A_ELSE]], [[I2]]
; CHECK-NEXT: br label %[[FLOW1]]
; CHECK: [[LATCH]]:
; CHECK-NEXT: store i32 [[TMP0]], ptr [[PTR]], align 4
; CHECK-NEXT: [[I15]] = add nsw i32 [[TMP0]], 20
; CHECK-NEXT: [[I16:%.*]] = icmp sge i32 [[I15]], 255
; CHECK-NEXT: br i1 [[I16]], label %[[END:.*]], label %[[LOOP]]
; CHECK: [[END]]:
; CHECK-NEXT: ret void
;
entry:
%a = tail call i32 @llvm.amdgcn.workitem.id.x()
br label %loop
loop:
%entry_phi = phi i32 [ 0, %entry ], [ %a15, %latch ]
%load = load %pair, ptr %ptr
br i1 %cond, label %if, label %else
if:
%cmp = icmp sgt i32 %entry_phi, 10
br i1 %cmp, label %then, label %else
then:
%a_then = extractvalue %pair %load, 0
store i32 %a, ptr %ptr, align 4
br label %latch
else:
%a2 = extractvalue %pair %load, 1
%y = extractvalue %pair %load, 0
%a_else = add i32 %y, %a2
br label %latch
latch:
%a_test = phi i32 [ %a_then, %then ], [ %a_else, %else ]
store i32 %a_test, ptr %ptr
%a15 = add nsw i32 %a_test, 20
%a16 = icmp slt i32 %a15, 255
br i1 %a16, label %loop, label %end
end:
ret void
}