[AMDGPU] Teach SILateBranchLowering pass to preserve MachineLoopInfo (#178276)

When splitting blocks inside loops due to SI_EARLY_TERMINATE_SCC0
handling, add the split block to the loop to keep MachineLoopInfo valid.
This commit is contained in:
Dark Steve 2026-01-29 22:23:00 +05:30 committed by GitHub
parent 7c07cb6542
commit fdb1623450
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 59 additions and 8 deletions

View File

@ -17,6 +17,7 @@
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "SIMachineFunctionInfo.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachinePassManager.h"
#include "llvm/InitializePasses.h"
@ -32,6 +33,7 @@ private:
const SIInstrInfo *TII;
const SIRegisterInfo *TRI;
MachineDominatorTree *MDT;
MachineLoopInfo *MLI;
const AMDGPU::LaneMaskConstants &LMC;
void expandChainCall(MachineInstr &MI, const GCNSubtarget &ST,
@ -39,9 +41,10 @@ private:
void earlyTerm(MachineInstr &MI, MachineBasicBlock *EarlyExitBlock);
public:
SILateBranchLowering(const GCNSubtarget &ST, MachineDominatorTree *MDT)
SILateBranchLowering(const GCNSubtarget &ST, MachineDominatorTree *MDT,
MachineLoopInfo *MLI)
: ST(ST), TII(ST.getInstrInfo()), TRI(&TII->getRegisterInfo()), MDT(MDT),
LMC(AMDGPU::LaneMaskConstants::get(ST)) {}
MLI(MLI), LMC(AMDGPU::LaneMaskConstants::get(ST)) {}
bool run(MachineFunction &MF);
};
@ -54,7 +57,9 @@ public:
bool runOnMachineFunction(MachineFunction &MF) override {
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
auto *MDT = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
return SILateBranchLowering(ST, MDT).run(MF);
auto *MLIWP = getAnalysisIfAvailable<MachineLoopInfoWrapperPass>();
MachineLoopInfo *MLI = MLIWP ? &MLIWP->getLI() : nullptr;
return SILateBranchLowering(ST, MDT, MLI).run(MF);
}
StringRef getPassName() const override {
@ -64,6 +69,7 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<MachineDominatorTreeWrapperPass>();
AU.addPreserved<MachineDominatorTreeWrapperPass>();
AU.addPreserved<MachineLoopInfoWrapperPass>();
MachineFunctionPass::getAnalysisUsage(AU);
}
};
@ -117,7 +123,7 @@ static void generateEndPgm(MachineBasicBlock &MBB,
}
static void splitBlock(MachineBasicBlock &MBB, MachineInstr &MI,
MachineDominatorTree *MDT) {
MachineDominatorTree *MDT, MachineLoopInfo *MLI) {
MachineBasicBlock *SplitBB = MBB.splitAt(MI, /*UpdateLiveIns*/ true);
// Update dominator tree
@ -129,6 +135,12 @@ static void splitBlock(MachineBasicBlock &MBB, MachineInstr &MI,
}
DTUpdates.push_back({DomTreeT::Insert, &MBB, SplitBB});
MDT->applyUpdates(DTUpdates);
// Update loop info if available
if (MLI) {
if (MachineLoop *Loop = MLI->getLoopFor(&MBB))
Loop->addBasicBlockToLoop(SplitBB, *MLI);
}
}
static void copyOpWithoutRegFlags(MachineInstrBuilder &MIB,
@ -199,7 +211,7 @@ void SILateBranchLowering::earlyTerm(MachineInstr &MI,
auto Next = std::next(MI.getIterator());
if (Next != MBB.end() && !Next->isTerminator())
splitBlock(MBB, *BranchMI, MDT);
splitBlock(MBB, *BranchMI, MDT, MLI);
MBB.addSuccessor(EarlyExitBlock);
MDT->insertEdge(&MBB, EarlyExitBlock);
@ -210,11 +222,14 @@ llvm::SILateBranchLoweringPass::run(MachineFunction &MF,
MachineFunctionAnalysisManager &MFAM) {
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
auto *MDT = &MFAM.getResult<MachineDominatorTreeAnalysis>(MF);
if (!SILateBranchLowering(ST, MDT).run(MF))
auto *MLI = MFAM.getCachedResult<MachineLoopAnalysis>(MF);
if (!SILateBranchLowering(ST, MDT, MLI).run(MF))
return PreservedAnalyses::all();
return getMachineFunctionPassPreservedAnalyses()
.preserve<MachineDominatorTreeAnalysis>();
auto PA = getMachineFunctionPassPreservedAnalyses();
PA.preserve<MachineDominatorTreeAnalysis>();
PA.preserve<MachineLoopAnalysis>();
return PA;
}
bool SILateBranchLowering::run(MachineFunction &MF) {

View File

@ -0,0 +1,36 @@
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -passes="require<machine-loops>,si-late-branch-lowering,print<machine-loops>" -debug-pass-manager -filetype=null %s 2>&1 | FileCheck %s
# Test that MachineLoopInfo is preserved when splitting a block inside a loop
# due to early termination handling.
# CHECK: Running analysis: MachineLoopAnalysis on early_term_in_loop
# CHECK-NEXT: Running analysis: MachineDominatorTreeAnalysis on early_term_in_loop
# CHECK-NEXT: Running pass: SILateBranchLoweringPass on early_term_in_loop
# CHECK-NEXT: Running pass: MachineLoopPrinterPass on early_term_in_loop
# CHECK-NEXT: Machine loop info for machine function 'early_term_in_loop':
# CHECK-NOT: Running analysis: MachineLoopAnalysis on early_term_in_loop
# CHECK-NEXT: Loop at depth 1 containing: %bb.1<header><exiting>,%bb.4<latch><exiting>
---
name: early_term_in_loop
tracksRegLiveness: true
body: |
bb.0:
successors: %bb.1
$vgpr0 = V_MOV_B32_e32 0, implicit $exec
; Loop header contains SI_EARLY_TERMINATE_SCC0 followed by more instructions.
; This triggers block splitting. Both bb.1 and bb.4 must remain in the loop.
bb.1:
liveins: $vgpr0
successors: %bb.1, %bb.2
S_CMP_LG_U32 0, 1, implicit-def $scc
SI_EARLY_TERMINATE_SCC0 implicit $scc, implicit $exec
$vgpr1 = V_MOV_B32_e32 1, implicit $exec
S_CBRANCH_SCC1 %bb.1, implicit $scc
bb.2:
liveins: $vgpr0, $vgpr1
EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
S_ENDPGM 0
...