Revert "[StructurizeCFG] Hoist and simplify zero-cost incoming else phi values" (#148016)
reverting to fix Buildbot failures.
This commit is contained in:
parent
ce571c90a0
commit
25c3f64105
@ -19,7 +19,6 @@
|
||||
#include "llvm/Analysis/RegionInfo.h"
|
||||
#include "llvm/Analysis/RegionIterator.h"
|
||||
#include "llvm/Analysis/RegionPass.h"
|
||||
#include "llvm/Analysis/TargetTransformInfo.h"
|
||||
#include "llvm/Analysis/UniformityAnalysis.h"
|
||||
#include "llvm/IR/BasicBlock.h"
|
||||
#include "llvm/IR/CFG.h"
|
||||
@ -129,7 +128,6 @@ struct PredInfo {
|
||||
using BBPredicates = DenseMap<BasicBlock *, PredInfo>;
|
||||
using PredMap = DenseMap<BasicBlock *, BBPredicates>;
|
||||
using BB2BBMap = DenseMap<BasicBlock *, BasicBlock *>;
|
||||
using Val2BBMap = DenseMap<Value *, BasicBlock *>;
|
||||
|
||||
// A traits type that is intended to be used in graph algorithms. The graph
|
||||
// traits starts at an entry node, and traverses the RegionNodes that are in
|
||||
@ -281,7 +279,7 @@ class StructurizeCFG {
|
||||
ConstantInt *BoolTrue;
|
||||
ConstantInt *BoolFalse;
|
||||
Value *BoolPoison;
|
||||
const TargetTransformInfo *TTI;
|
||||
|
||||
Function *Func;
|
||||
Region *ParentRegion;
|
||||
|
||||
@ -303,12 +301,8 @@ class StructurizeCFG {
|
||||
PredMap LoopPreds;
|
||||
BranchVector LoopConds;
|
||||
|
||||
Val2BBMap HoistedValues;
|
||||
|
||||
RegionNode *PrevNode;
|
||||
|
||||
void hoistZeroCostElseBlockPhiValues(BasicBlock *ElseBB, BasicBlock *ThenBB);
|
||||
|
||||
void orderNodes();
|
||||
|
||||
void analyzeLoops(RegionNode *N);
|
||||
@ -338,8 +332,6 @@ class StructurizeCFG {
|
||||
|
||||
void simplifyAffectedPhis();
|
||||
|
||||
void simplifyHoistedPhis();
|
||||
|
||||
DebugLoc killTerminator(BasicBlock *BB);
|
||||
|
||||
void changeExit(RegionNode *Node, BasicBlock *NewExit,
|
||||
@ -367,7 +359,7 @@ class StructurizeCFG {
|
||||
|
||||
public:
|
||||
void init(Region *R);
|
||||
bool run(Region *R, DominatorTree *DT, const TargetTransformInfo *TTI);
|
||||
bool run(Region *R, DominatorTree *DT);
|
||||
bool makeUniformRegion(Region *R, UniformityInfo &UA);
|
||||
};
|
||||
|
||||
@ -393,11 +385,8 @@ public:
|
||||
if (SCFG.makeUniformRegion(R, UA))
|
||||
return false;
|
||||
}
|
||||
Function *F = R->getEntry()->getParent();
|
||||
const TargetTransformInfo *TTI =
|
||||
&getAnalysis<TargetTransformInfoWrapperPass>().getTTI(*F);
|
||||
DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
|
||||
return SCFG.run(R, DT, TTI);
|
||||
return SCFG.run(R, DT);
|
||||
}
|
||||
|
||||
StringRef getPassName() const override { return "Structurize control flow"; }
|
||||
@ -405,9 +394,7 @@ public:
|
||||
void getAnalysisUsage(AnalysisUsage &AU) const override {
|
||||
if (SkipUniformRegions)
|
||||
AU.addRequired<UniformityInfoWrapperPass>();
|
||||
AU.addRequired<TargetTransformInfoWrapperPass>();
|
||||
AU.addRequired<DominatorTreeWrapperPass>();
|
||||
AU.addRequired<TargetTransformInfoWrapperPass>();
|
||||
|
||||
AU.addPreserved<DominatorTreeWrapperPass>();
|
||||
RegionPass::getAnalysisUsage(AU);
|
||||
@ -416,34 +403,6 @@ public:
|
||||
|
||||
} // end anonymous namespace
|
||||
|
||||
/// Checks whether an instruction is zero cost instruction and checks if the
|
||||
/// operands are from different BB. If so, this instruction can be coalesced
|
||||
/// if its hoisted to predecessor block. So, this returns true.
|
||||
static bool isHoistableInstruction(Instruction *I, BasicBlock *BB,
|
||||
const TargetTransformInfo *TTI) {
|
||||
if (I->getParent() != BB)
|
||||
return false;
|
||||
|
||||
// If the instruction is not a zero cost instruction, return false.
|
||||
auto Cost = TTI->getInstructionCost(I, TargetTransformInfo::TCK_Latency);
|
||||
InstructionCost::CostType CostVal =
|
||||
Cost.isValid()
|
||||
? Cost.getValue()
|
||||
: (InstructionCost::CostType)TargetTransformInfo::TCC_Expensive;
|
||||
if (CostVal != 0)
|
||||
return false;
|
||||
|
||||
// Check if any operands are instructions defined in the same block.
|
||||
for (auto &Op : I->operands()) {
|
||||
if (auto *OpI = dyn_cast<Instruction>(Op)) {
|
||||
if (OpI->getParent() == BB)
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
char StructurizeCFGLegacyPass::ID = 0;
|
||||
|
||||
INITIALIZE_PASS_BEGIN(StructurizeCFGLegacyPass, "structurizecfg",
|
||||
@ -454,39 +413,6 @@ INITIALIZE_PASS_DEPENDENCY(RegionInfoPass)
|
||||
INITIALIZE_PASS_END(StructurizeCFGLegacyPass, "structurizecfg",
|
||||
"Structurize the CFG", false, false)
|
||||
|
||||
/// Structurization can introduce unnecessary VGPR copies due to register
|
||||
/// coalescing interference. For example, if the Else block has a zero-cost
|
||||
/// instruction and the Then block modifies the VGPR value, only one value is
|
||||
/// live at a time in merge block before structurization. After structurization,
|
||||
/// the coalescer may incorrectly treat the Then value as live in the Else block
|
||||
/// (via the path Then → Flow → Else), leading to unnecessary VGPR copies.
|
||||
///
|
||||
/// This function examines phi nodes whose incoming values are zero-cost
|
||||
/// instructions in the Else block. It identifies such values that can be safely
|
||||
/// hoisted and moves them to the nearest common dominator of Then and Else
|
||||
/// blocks. A follow-up function after setting PhiNodes assigns the hoisted
|
||||
/// value to poison phi nodes along the if→flow edge, aiding register coalescing
|
||||
/// and minimizing unnecessary live ranges.
|
||||
void StructurizeCFG::hoistZeroCostElseBlockPhiValues(BasicBlock *ElseBB,
|
||||
BasicBlock *ThenBB) {
|
||||
|
||||
BasicBlock *ElseSucc = ElseBB->getSingleSuccessor();
|
||||
BasicBlock *CommonDominator = DT->findNearestCommonDominator(ElseBB, ThenBB);
|
||||
|
||||
if (!ElseSucc || !CommonDominator)
|
||||
return;
|
||||
Instruction *Term = CommonDominator->getTerminator();
|
||||
for (PHINode &Phi : ElseSucc->phis()) {
|
||||
Value *ElseVal = Phi.getIncomingValueForBlock(ElseBB);
|
||||
auto *Inst = dyn_cast<Instruction>(ElseVal);
|
||||
if (!Inst || !isHoistableInstruction(Inst, ElseBB, TTI))
|
||||
continue;
|
||||
Inst->removeFromParent();
|
||||
Inst->insertInto(CommonDominator, Term->getIterator());
|
||||
HoistedValues[Inst] = CommonDominator;
|
||||
}
|
||||
}
|
||||
|
||||
/// Build up the general order of nodes, by performing a topological sort of the
|
||||
/// parent region's nodes, while ensuring that there is no outer cycle node
|
||||
/// between any two inner cycle nodes.
|
||||
@ -609,7 +535,7 @@ void StructurizeCFG::gatherPredicates(RegionNode *N) {
|
||||
BasicBlock *Other = Term->getSuccessor(!i);
|
||||
if (Visited.count(Other) && !Loops.count(Other) &&
|
||||
!Pred.count(Other) && !Pred.count(P)) {
|
||||
hoistZeroCostElseBlockPhiValues(Succ, Other);
|
||||
|
||||
Pred[Other] = {BoolFalse, std::nullopt};
|
||||
Pred[P] = {BoolTrue, std::nullopt};
|
||||
continue;
|
||||
@ -965,44 +891,6 @@ void StructurizeCFG::setPhiValues() {
|
||||
AffectedPhis.append(InsertedPhis.begin(), InsertedPhis.end());
|
||||
}
|
||||
|
||||
/// Updates PHI nodes after hoisted zero cost instructions by replacing poison
|
||||
/// entries on Flow nodes with the appropriate hoisted values
|
||||
void StructurizeCFG::simplifyHoistedPhis() {
|
||||
for (WeakVH VH : AffectedPhis) {
|
||||
PHINode *Phi = dyn_cast_or_null<PHINode>(VH);
|
||||
if (!Phi || Phi->getNumIncomingValues() != 2)
|
||||
continue;
|
||||
|
||||
for (int i = 0; i < 2; i++) {
|
||||
Value *V = Phi->getIncomingValue(i);
|
||||
auto BBIt = HoistedValues.find(V);
|
||||
|
||||
if (BBIt == HoistedValues.end())
|
||||
continue;
|
||||
|
||||
Value *OtherV = Phi->getIncomingValue(!i);
|
||||
PHINode *OtherPhi = dyn_cast<PHINode>(OtherV);
|
||||
if (!OtherPhi)
|
||||
continue;
|
||||
|
||||
int PoisonValBBIdx = -1;
|
||||
for (size_t i = 0; i < OtherPhi->getNumIncomingValues(); i++) {
|
||||
if (!isa<PoisonValue>(OtherPhi->getIncomingValue(i)))
|
||||
continue;
|
||||
PoisonValBBIdx = i;
|
||||
break;
|
||||
}
|
||||
if (PoisonValBBIdx == -1 ||
|
||||
!DT->dominates(BBIt->second,
|
||||
OtherPhi->getIncomingBlock(PoisonValBBIdx)))
|
||||
continue;
|
||||
|
||||
OtherPhi->setIncomingValue(PoisonValBBIdx, V);
|
||||
Phi->setIncomingValue(i, OtherV);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void StructurizeCFG::simplifyAffectedPhis() {
|
||||
bool Changed;
|
||||
do {
|
||||
@ -1395,13 +1283,12 @@ bool StructurizeCFG::makeUniformRegion(Region *R, UniformityInfo &UA) {
|
||||
}
|
||||
|
||||
/// Run the transformation for each region found
|
||||
bool StructurizeCFG::run(Region *R, DominatorTree *DT,
|
||||
const TargetTransformInfo *TTI) {
|
||||
bool StructurizeCFG::run(Region *R, DominatorTree *DT) {
|
||||
if (R->isTopLevelRegion())
|
||||
return false;
|
||||
|
||||
this->DT = DT;
|
||||
this->TTI = TTI;
|
||||
|
||||
Func = R->getEntry()->getParent();
|
||||
assert(hasOnlySimpleTerminator(*Func) && "Unsupported block terminator.");
|
||||
|
||||
@ -1413,7 +1300,6 @@ bool StructurizeCFG::run(Region *R, DominatorTree *DT,
|
||||
insertConditions(false);
|
||||
insertConditions(true);
|
||||
setPhiValues();
|
||||
simplifyHoistedPhis();
|
||||
simplifyConditions();
|
||||
simplifyAffectedPhis();
|
||||
rebuildSSA();
|
||||
@ -1463,7 +1349,7 @@ PreservedAnalyses StructurizeCFGPass::run(Function &F,
|
||||
bool Changed = false;
|
||||
DominatorTree *DT = &AM.getResult<DominatorTreeAnalysis>(F);
|
||||
auto &RI = AM.getResult<RegionInfoAnalysis>(F);
|
||||
TargetTransformInfo *TTI = &AM.getResult<TargetIRAnalysis>(F);
|
||||
|
||||
UniformityInfo *UI = nullptr;
|
||||
if (SkipUniformRegions)
|
||||
UI = &AM.getResult<UniformityInfoAnalysis>(F);
|
||||
@ -1482,7 +1368,7 @@ PreservedAnalyses StructurizeCFGPass::run(Function &F,
|
||||
continue;
|
||||
}
|
||||
|
||||
Changed |= SCFG.run(R, DT, TTI);
|
||||
Changed |= SCFG.run(R, DT);
|
||||
}
|
||||
if (!Changed)
|
||||
return PreservedAnalyses::all();
|
||||
|
||||
@ -9851,8 +9851,8 @@ define void @memmove_p5_p5_sz2048(ptr addrspace(5) align 1 %dst, ptr addrspace(5
|
||||
; CHECK-NEXT: s_andn2_saveexec_b32 s6, s6
|
||||
; CHECK-NEXT: s_cbranch_execz .LBB8_6
|
||||
; CHECK-NEXT: ; %bb.4: ; %memmove_bwd_loop.preheader
|
||||
; CHECK-NEXT: v_add_nc_u32_e32 v1, 0x700, v1
|
||||
; CHECK-NEXT: v_add_nc_u32_e32 v0, 0x700, v0
|
||||
; CHECK-NEXT: v_add_nc_u32_e32 v1, 0x700, v1
|
||||
; CHECK-NEXT: s_movk_i32 s4, 0xf800
|
||||
; CHECK-NEXT: s_mov_b32 s5, -1
|
||||
; CHECK-NEXT: .LBB8_5: ; %memmove_bwd_loop
|
||||
@ -11167,8 +11167,8 @@ define void @memmove_p5_p5_sz2048(ptr addrspace(5) align 1 %dst, ptr addrspace(5
|
||||
; ALIGNED-NEXT: s_andn2_saveexec_b32 s6, s6
|
||||
; ALIGNED-NEXT: s_cbranch_execz .LBB8_6
|
||||
; ALIGNED-NEXT: ; %bb.4: ; %memmove_bwd_loop.preheader
|
||||
; ALIGNED-NEXT: v_add_nc_u32_e32 v1, 0x700, v1
|
||||
; ALIGNED-NEXT: v_add_nc_u32_e32 v0, 0x700, v0
|
||||
; ALIGNED-NEXT: v_add_nc_u32_e32 v1, 0x700, v1
|
||||
; ALIGNED-NEXT: s_movk_i32 s4, 0xf800
|
||||
; ALIGNED-NEXT: s_mov_b32 s5, -1
|
||||
; ALIGNED-NEXT: .LBB8_5: ; %memmove_bwd_loop
|
||||
@ -12381,8 +12381,8 @@ define void @memmove_p5_p5_sz2048(ptr addrspace(5) align 1 %dst, ptr addrspace(5
|
||||
; UNROLL3-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:2024
|
||||
; UNROLL3-NEXT: buffer_load_dword v5, v1, s[0:3], 0 offen offset:2020
|
||||
; UNROLL3-NEXT: buffer_load_dword v6, v1, s[0:3], 0 offen offset:2016
|
||||
; UNROLL3-NEXT: v_add_nc_u32_e32 v1, 0x7b0, v1
|
||||
; UNROLL3-NEXT: v_add_nc_u32_e32 v2, 0x7b0, v0
|
||||
; UNROLL3-NEXT: v_add_nc_u32_e32 v1, 0x7b0, v1
|
||||
; UNROLL3-NEXT: s_waitcnt vmcnt(3)
|
||||
; UNROLL3-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:2028
|
||||
; UNROLL3-NEXT: s_waitcnt vmcnt(2)
|
||||
|
||||
@ -1,180 +0,0 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
|
||||
; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX900 %s
|
||||
|
||||
|
||||
%pair = type { i32, i32 }
|
||||
|
||||
define void @test_extractvalue_then_else(ptr %ptr, i1 %cond) {
|
||||
; GFX900-LABEL: test_extractvalue_then_else:
|
||||
; GFX900: ; %bb.0: ; %if
|
||||
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX900-NEXT: flat_load_dword v3, v[0:1]
|
||||
; GFX900-NEXT: v_and_b32_e32 v2, 1, v2
|
||||
; GFX900-NEXT: v_cmp_ne_u32_e32 vcc, 1, v2
|
||||
; GFX900-NEXT: s_and_saveexec_b64 s[4:5], vcc
|
||||
; GFX900-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
|
||||
; GFX900-NEXT: s_cbranch_execz .LBB0_2
|
||||
; GFX900-NEXT: ; %bb.1: ; %else
|
||||
; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX900-NEXT: v_add_u32_e32 v3, 1, v3
|
||||
; GFX900-NEXT: .LBB0_2: ; %Flow
|
||||
; GFX900-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5]
|
||||
; GFX900-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX900-NEXT: flat_store_dword v[0:1], v3
|
||||
; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX900-NEXT: s_setpc_b64 s[30:31]
|
||||
if:
|
||||
%load_then = load %pair, ptr %ptr
|
||||
br i1 %cond, label %then, label %else
|
||||
|
||||
then:
|
||||
%a_then = extractvalue %pair %load_then, 0
|
||||
br label %merge
|
||||
|
||||
else:
|
||||
%a_else = extractvalue %pair %load_then, 0
|
||||
%sum_else = add i32 %a_else, 1
|
||||
br label %merge
|
||||
|
||||
merge:
|
||||
%phi = phi i32 [ %a_then, %then ], [ %sum_else, %else ]
|
||||
store i32 %phi, ptr %ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @test_extractvalue_else_then(ptr %ptr, i1 %cond) {
|
||||
; GFX900-LABEL: test_extractvalue_else_then:
|
||||
; GFX900: ; %bb.0: ; %if
|
||||
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX900-NEXT: flat_load_dword v3, v[0:1]
|
||||
; GFX900-NEXT: v_and_b32_e32 v2, 1, v2
|
||||
; GFX900-NEXT: v_cmp_ne_u32_e32 vcc, 1, v2
|
||||
; GFX900-NEXT: s_and_saveexec_b64 s[4:5], vcc
|
||||
; GFX900-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
|
||||
; GFX900-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5]
|
||||
; GFX900-NEXT: s_cbranch_execz .LBB1_2
|
||||
; GFX900-NEXT: ; %bb.1: ; %else
|
||||
; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX900-NEXT: v_add_u32_e32 v3, 1, v3
|
||||
; GFX900-NEXT: .LBB1_2: ; %merge
|
||||
; GFX900-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX900-NEXT: flat_store_dword v[0:1], v3
|
||||
; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX900-NEXT: s_setpc_b64 s[30:31]
|
||||
if:
|
||||
%load_then = load %pair, ptr %ptr
|
||||
br i1 %cond, label %else, label %then
|
||||
|
||||
else:
|
||||
%a_else = extractvalue %pair %load_then, 0
|
||||
%sum_else = add i32 %a_else, 1
|
||||
br label %merge
|
||||
|
||||
then:
|
||||
%a_then = extractvalue %pair %load_then, 0
|
||||
br label %merge
|
||||
|
||||
merge:
|
||||
%phi = phi i32 [ %a_then, %then ], [ %sum_else, %else ]
|
||||
store i32 %phi, ptr %ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @test_loop_with_if( ptr %ptr, i1 %cond) #0 {
|
||||
; GFX900-LABEL: test_loop_with_if:
|
||||
; GFX900: ; %bb.0: ; %entry
|
||||
; GFX900-NEXT: s_load_dword s2, s[4:5], 0x2c
|
||||
; GFX900-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
|
||||
; GFX900-NEXT: v_mov_b32_e32 v5, 0
|
||||
; GFX900-NEXT: s_mov_b64 s[4:5], 0
|
||||
; GFX900-NEXT: s_movk_i32 s10, 0xfe
|
||||
; GFX900-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX900-NEXT: s_bitcmp1_b32 s2, 0
|
||||
; GFX900-NEXT: s_cselect_b64 s[2:3], -1, 0
|
||||
; GFX900-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[2:3]
|
||||
; GFX900-NEXT: v_mov_b32_e32 v2, s1
|
||||
; GFX900-NEXT: s_xor_b64 s[2:3], s[2:3], -1
|
||||
; GFX900-NEXT: v_mov_b32_e32 v1, s0
|
||||
; GFX900-NEXT: v_cmp_ne_u32_e64 s[0:1], 1, v3
|
||||
; GFX900-NEXT: s_branch .LBB2_2
|
||||
; GFX900-NEXT: .LBB2_1: ; %latch
|
||||
; GFX900-NEXT: ; in Loop: Header=BB2_2 Depth=1
|
||||
; GFX900-NEXT: s_or_b64 exec, exec, s[8:9]
|
||||
; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX900-NEXT: v_add_u32_e32 v5, 20, v3
|
||||
; GFX900-NEXT: v_cmp_lt_i32_e32 vcc, s10, v5
|
||||
; GFX900-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
|
||||
; GFX900-NEXT: flat_store_dword v[1:2], v3
|
||||
; GFX900-NEXT: s_andn2_b64 exec, exec, s[4:5]
|
||||
; GFX900-NEXT: s_cbranch_execz .LBB2_8
|
||||
; GFX900-NEXT: .LBB2_2: ; %loop
|
||||
; GFX900-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX900-NEXT: flat_load_dwordx2 v[3:4], v[1:2]
|
||||
; GFX900-NEXT: s_and_b64 vcc, exec, s[0:1]
|
||||
; GFX900-NEXT: s_mov_b64 s[8:9], s[2:3]
|
||||
; GFX900-NEXT: s_mov_b64 s[6:7], 0
|
||||
; GFX900-NEXT: s_cbranch_vccnz .LBB2_4
|
||||
; GFX900-NEXT: ; %bb.3: ; %if
|
||||
; GFX900-NEXT: ; in Loop: Header=BB2_2 Depth=1
|
||||
; GFX900-NEXT: v_cmp_gt_i32_e32 vcc, 11, v5
|
||||
; GFX900-NEXT: s_andn2_b64 s[8:9], s[2:3], exec
|
||||
; GFX900-NEXT: s_and_b64 s[12:13], vcc, exec
|
||||
; GFX900-NEXT: s_mov_b64 s[6:7], -1
|
||||
; GFX900-NEXT: s_or_b64 s[8:9], s[8:9], s[12:13]
|
||||
; GFX900-NEXT: .LBB2_4: ; %Flow
|
||||
; GFX900-NEXT: ; in Loop: Header=BB2_2 Depth=1
|
||||
; GFX900-NEXT: s_and_saveexec_b64 s[12:13], s[8:9]
|
||||
; GFX900-NEXT: s_xor_b64 s[8:9], exec, s[12:13]
|
||||
; GFX900-NEXT: s_cbranch_execz .LBB2_6
|
||||
; GFX900-NEXT: ; %bb.5: ; %else
|
||||
; GFX900-NEXT: ; in Loop: Header=BB2_2 Depth=1
|
||||
; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX900-NEXT: v_add_u32_e32 v3, v3, v4
|
||||
; GFX900-NEXT: s_andn2_b64 s[6:7], s[6:7], exec
|
||||
; GFX900-NEXT: .LBB2_6: ; %Flow1
|
||||
; GFX900-NEXT: ; in Loop: Header=BB2_2 Depth=1
|
||||
; GFX900-NEXT: s_or_b64 exec, exec, s[8:9]
|
||||
; GFX900-NEXT: s_and_saveexec_b64 s[8:9], s[6:7]
|
||||
; GFX900-NEXT: s_cbranch_execz .LBB2_1
|
||||
; GFX900-NEXT: ; %bb.7: ; %then
|
||||
; GFX900-NEXT: ; in Loop: Header=BB2_2 Depth=1
|
||||
; GFX900-NEXT: flat_store_dword v[1:2], v0
|
||||
; GFX900-NEXT: s_branch .LBB2_1
|
||||
; GFX900-NEXT: .LBB2_8: ; %end
|
||||
; GFX900-NEXT: s_endpgm
|
||||
entry:
|
||||
%a = tail call i32 @llvm.amdgcn.workitem.id.x()
|
||||
br label %loop
|
||||
|
||||
loop:
|
||||
%entry_phi = phi i32 [ 0, %entry ], [ %a15, %latch ]
|
||||
%load = load %pair, ptr %ptr
|
||||
br i1 %cond, label %if, label %else
|
||||
|
||||
if:
|
||||
%cmp = icmp sgt i32 %entry_phi, 10
|
||||
br i1 %cmp, label %then, label %else
|
||||
|
||||
then:
|
||||
%a_then = extractvalue %pair %load, 0
|
||||
store i32 %a, ptr %ptr, align 4
|
||||
br label %latch
|
||||
|
||||
else:
|
||||
%a2 = extractvalue %pair %load, 1
|
||||
%y = extractvalue %pair %load, 0
|
||||
%a_else = add i32 %y, %a2
|
||||
br label %latch
|
||||
|
||||
latch:
|
||||
%a_test = phi i32 [ %a_then, %then ], [ %a_else, %else ]
|
||||
store i32 %a_test, ptr %ptr
|
||||
%a15 = add nsw i32 %a_test, 20
|
||||
%a16 = icmp slt i32 %a15, 255
|
||||
br i1 %a16, label %loop, label %end
|
||||
|
||||
end:
|
||||
ret void
|
||||
}
|
||||
@ -1,161 +0,0 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
|
||||
; RUN: opt -S -passes=structurizecfg < %s | FileCheck %s
|
||||
|
||||
|
||||
%pair = type { i32, i32 }
|
||||
define void @test_if_then_else(ptr %ptr, i1 %cond) {
|
||||
; CHECK-LABEL: define void @test_if_then_else(
|
||||
; CHECK-SAME: ptr [[PTR:%.*]], i1 [[COND:%.*]]) {
|
||||
; CHECK-NEXT: [[ENTRY:.*]]:
|
||||
; CHECK-NEXT: [[COND_INV:%.*]] = xor i1 [[COND]], true
|
||||
; CHECK-NEXT: [[LOAD_THEN:%.*]] = load [[PAIR:%.*]], ptr [[PTR]], align 4
|
||||
; CHECK-NEXT: [[A_THEN:%.*]] = extractvalue [[PAIR]] [[LOAD_THEN]], 0
|
||||
; CHECK-NEXT: br i1 [[COND_INV]], label %[[ELSE:.*]], label %[[FLOW:.*]]
|
||||
; CHECK: [[FLOW]]:
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ [[SUM_ELSE:%.*]], %[[ELSE]] ], [ [[A_THEN]], %[[ENTRY]] ]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = phi i1 [ false, %[[ELSE]] ], [ true, %[[ENTRY]] ]
|
||||
; CHECK-NEXT: br i1 [[TMP1]], label %[[THEN:.*]], label %[[MERGE:.*]]
|
||||
; CHECK: [[THEN]]:
|
||||
; CHECK-NEXT: br label %[[MERGE]]
|
||||
; CHECK: [[ELSE]]:
|
||||
; CHECK-NEXT: [[A_ELSE:%.*]] = extractvalue [[PAIR]] [[LOAD_THEN]], 0
|
||||
; CHECK-NEXT: [[SUM_ELSE]] = add i32 [[A_ELSE]], 1
|
||||
; CHECK-NEXT: br label %[[FLOW]]
|
||||
; CHECK: [[MERGE]]:
|
||||
; CHECK-NEXT: store i32 [[TMP0]], ptr [[PTR]], align 4
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
entry:
|
||||
%load_then = load %pair, ptr %ptr
|
||||
br i1 %cond, label %then, label %else
|
||||
|
||||
then:
|
||||
%a_then = extractvalue %pair %load_then, 0
|
||||
br label %merge
|
||||
|
||||
else:
|
||||
%a_else = extractvalue %pair %load_then, 0
|
||||
%sum_else = add i32 %a_else, 1
|
||||
br label %merge
|
||||
|
||||
merge:
|
||||
%phi = phi i32 [ %a_then, %then ], [ %sum_else, %else ]
|
||||
store i32 %phi, ptr %ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @test_if_else_then(ptr %ptr, i1 %cond) {
|
||||
; CHECK-LABEL: define void @test_if_else_then(
|
||||
; CHECK-SAME: ptr [[PTR:%.*]], i1 [[COND:%.*]]) {
|
||||
; CHECK-NEXT: [[ENTRY:.*]]:
|
||||
; CHECK-NEXT: [[COND_INV:%.*]] = xor i1 [[COND]], true
|
||||
; CHECK-NEXT: [[LOAD_THEN:%.*]] = load [[PAIR:%.*]], ptr [[PTR]], align 4
|
||||
; CHECK-NEXT: br i1 [[COND_INV]], label %[[THEN:.*]], label %[[FLOW:.*]]
|
||||
; CHECK: [[THEN]]:
|
||||
; CHECK-NEXT: [[A_THEN:%.*]] = extractvalue [[PAIR]] [[LOAD_THEN]], 0
|
||||
; CHECK-NEXT: br label %[[FLOW]]
|
||||
; CHECK: [[FLOW]]:
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ [[A_THEN]], %[[THEN]] ], [ poison, %[[ENTRY]] ]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = phi i1 [ false, %[[THEN]] ], [ true, %[[ENTRY]] ]
|
||||
; CHECK-NEXT: br i1 [[TMP1]], label %[[ELSE:.*]], label %[[MERGE:.*]]
|
||||
; CHECK: [[ELSE]]:
|
||||
; CHECK-NEXT: [[A_ELSE:%.*]] = extractvalue [[PAIR]] [[LOAD_THEN]], 0
|
||||
; CHECK-NEXT: [[SUM_ELSE:%.*]] = add i32 [[A_ELSE]], 1
|
||||
; CHECK-NEXT: br label %[[MERGE]]
|
||||
; CHECK: [[MERGE]]:
|
||||
; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ [[TMP0]], %[[FLOW]] ], [ [[SUM_ELSE]], %[[ELSE]] ]
|
||||
; CHECK-NEXT: store i32 [[PHI]], ptr [[PTR]], align 4
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
entry:
|
||||
%load_then = load %pair, ptr %ptr
|
||||
br i1 %cond, label %else, label %then
|
||||
|
||||
then:
|
||||
%a_then = extractvalue %pair %load_then, 0
|
||||
br label %merge
|
||||
|
||||
else:
|
||||
%a_else = extractvalue %pair %load_then, 0
|
||||
%sum_else = add i32 %a_else, 1
|
||||
br label %merge
|
||||
|
||||
merge:
|
||||
%phi = phi i32 [ %a_then, %then ], [ %sum_else, %else ]
|
||||
store i32 %phi, ptr %ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @test_loop_with_if( ptr %ptr, i1 %cond) #0 {
|
||||
; CHECK-LABEL: define amdgpu_kernel void @test_loop_with_if(
|
||||
; CHECK-SAME: ptr [[PTR:%.*]], i1 [[COND:%.*]]) {
|
||||
; CHECK-NEXT: [[ENTRY:.*]]:
|
||||
; CHECK-NEXT: [[COND_INV:%.*]] = xor i1 [[COND]], true
|
||||
; CHECK-NEXT: [[I:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x()
|
||||
; CHECK-NEXT: br label %[[LOOP:.*]]
|
||||
; CHECK: [[LOOP]]:
|
||||
; CHECK-NEXT: [[I3:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I15:%.*]], %[[LATCH:.*]] ]
|
||||
; CHECK-NEXT: [[LOAD:%.*]] = load [[PAIR:%.*]], ptr [[PTR]], align 4
|
||||
; CHECK-NEXT: [[A_THEN:%.*]] = extractvalue [[PAIR]] [[LOAD]], 0
|
||||
; CHECK-NEXT: br i1 [[COND]], label %[[IF:.*]], label %[[FLOW:.*]]
|
||||
; CHECK: [[IF]]:
|
||||
; CHECK-NEXT: [[I9:%.*]] = icmp sle i32 [[I3]], 10
|
||||
; CHECK-NEXT: br label %[[FLOW]]
|
||||
; CHECK: [[FLOW1:.*]]:
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ [[Y:%.*]], %[[ELSE:.*]] ], [ [[A_THEN]], %[[FLOW]] ]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = phi i1 [ false, %[[ELSE]] ], [ [[TMP2:%.*]], %[[FLOW]] ]
|
||||
; CHECK-NEXT: br i1 [[TMP1]], label %[[THEN:.*]], label %[[LATCH]]
|
||||
; CHECK: [[THEN]]:
|
||||
; CHECK-NEXT: store i32 [[I]], ptr [[PTR]], align 4
|
||||
; CHECK-NEXT: br label %[[LATCH]]
|
||||
; CHECK: [[FLOW]]:
|
||||
; CHECK-NEXT: [[TMP2]] = phi i1 [ true, %[[IF]] ], [ false, %[[LOOP]] ]
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = phi i1 [ [[I9]], %[[IF]] ], [ [[COND_INV]], %[[LOOP]] ]
|
||||
; CHECK-NEXT: br i1 [[TMP3]], label %[[ELSE]], label %[[FLOW1]]
|
||||
; CHECK: [[ELSE]]:
|
||||
; CHECK-NEXT: [[I2:%.*]] = extractvalue [[PAIR]] [[LOAD]], 1
|
||||
; CHECK-NEXT: [[A_ELSE:%.*]] = extractvalue [[PAIR]] [[LOAD]], 0
|
||||
; CHECK-NEXT: [[Y]] = add i32 [[A_ELSE]], [[I2]]
|
||||
; CHECK-NEXT: br label %[[FLOW1]]
|
||||
; CHECK: [[LATCH]]:
|
||||
; CHECK-NEXT: store i32 [[TMP0]], ptr [[PTR]], align 4
|
||||
; CHECK-NEXT: [[I15]] = add nsw i32 [[TMP0]], 20
|
||||
; CHECK-NEXT: [[I16:%.*]] = icmp sge i32 [[I15]], 255
|
||||
; CHECK-NEXT: br i1 [[I16]], label %[[END:.*]], label %[[LOOP]]
|
||||
; CHECK: [[END]]:
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
entry:
|
||||
%a = tail call i32 @llvm.amdgcn.workitem.id.x()
|
||||
br label %loop
|
||||
|
||||
loop:
|
||||
%entry_phi = phi i32 [ 0, %entry ], [ %a15, %latch ]
|
||||
%load = load %pair, ptr %ptr
|
||||
br i1 %cond, label %if, label %else
|
||||
|
||||
if:
|
||||
%cmp = icmp sgt i32 %entry_phi, 10
|
||||
br i1 %cmp, label %then, label %else
|
||||
|
||||
then:
|
||||
%a_then = extractvalue %pair %load, 0
|
||||
store i32 %a, ptr %ptr, align 4
|
||||
br label %latch
|
||||
|
||||
else:
|
||||
%a2 = extractvalue %pair %load, 1
|
||||
%y = extractvalue %pair %load, 0
|
||||
%a_else = add i32 %y, %a2
|
||||
br label %latch
|
||||
|
||||
latch:
|
||||
%a_test = phi i32 [ %a_then, %then ], [ %a_else, %else ]
|
||||
store i32 %a_test, ptr %ptr
|
||||
%a15 = add nsw i32 %a_test, 20
|
||||
%a16 = icmp slt i32 %a15, 255
|
||||
br i1 %a16, label %loop, label %end
|
||||
|
||||
end:
|
||||
ret void
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user