From fdb16234505805a028f04397bca18b1bee4a6ab0 Mon Sep 17 00:00:00 2001 From: Dark Steve Date: Thu, 29 Jan 2026 22:23:00 +0530 Subject: [PATCH] [AMDGPU] Teach SILateBranchLowering pass to preserve MachineLoopInfo (#178276) When splitting blocks inside loops due to SI_EARLY_TERMINATE_SCC0 handling, add the split block to the loop to keep MachineLoopInfo valid. --- .../Target/AMDGPU/SILateBranchLowering.cpp | 31 +++++++++++----- ...ate-branch-lowering-preserve-loop-info.mir | 36 +++++++++++++++++++ 2 files changed, 59 insertions(+), 8 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/si-late-branch-lowering-preserve-loop-info.mir diff --git a/llvm/lib/Target/AMDGPU/SILateBranchLowering.cpp b/llvm/lib/Target/AMDGPU/SILateBranchLowering.cpp index d6f175e67ea4..83cf457dfac1 100644 --- a/llvm/lib/Target/AMDGPU/SILateBranchLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SILateBranchLowering.cpp @@ -17,6 +17,7 @@ #include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "SIMachineFunctionInfo.h" #include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachinePassManager.h" #include "llvm/InitializePasses.h" @@ -32,6 +33,7 @@ private: const SIInstrInfo *TII; const SIRegisterInfo *TRI; MachineDominatorTree *MDT; + MachineLoopInfo *MLI; const AMDGPU::LaneMaskConstants &LMC; void expandChainCall(MachineInstr &MI, const GCNSubtarget &ST, @@ -39,9 +41,10 @@ private: void earlyTerm(MachineInstr &MI, MachineBasicBlock *EarlyExitBlock); public: - SILateBranchLowering(const GCNSubtarget &ST, MachineDominatorTree *MDT) + SILateBranchLowering(const GCNSubtarget &ST, MachineDominatorTree *MDT, + MachineLoopInfo *MLI) : ST(ST), TII(ST.getInstrInfo()), TRI(&TII->getRegisterInfo()), MDT(MDT), - LMC(AMDGPU::LaneMaskConstants::get(ST)) {} + MLI(MLI), LMC(AMDGPU::LaneMaskConstants::get(ST)) {} bool run(MachineFunction &MF); }; @@ -54,7 +57,9 @@ public: bool runOnMachineFunction(MachineFunction &MF) override { const GCNSubtarget &ST = MF.getSubtarget(); auto *MDT = &getAnalysis().getDomTree(); - return SILateBranchLowering(ST, MDT).run(MF); + auto *MLIWP = getAnalysisIfAvailable(); + MachineLoopInfo *MLI = MLIWP ? &MLIWP->getLI() : nullptr; + return SILateBranchLowering(ST, MDT, MLI).run(MF); } StringRef getPassName() const override { @@ -64,6 +69,7 @@ public: void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); AU.addPreserved(); + AU.addPreserved(); MachineFunctionPass::getAnalysisUsage(AU); } }; @@ -117,7 +123,7 @@ static void generateEndPgm(MachineBasicBlock &MBB, } static void splitBlock(MachineBasicBlock &MBB, MachineInstr &MI, - MachineDominatorTree *MDT) { + MachineDominatorTree *MDT, MachineLoopInfo *MLI) { MachineBasicBlock *SplitBB = MBB.splitAt(MI, /*UpdateLiveIns*/ true); // Update dominator tree @@ -129,6 +135,12 @@ static void splitBlock(MachineBasicBlock &MBB, MachineInstr &MI, } DTUpdates.push_back({DomTreeT::Insert, &MBB, SplitBB}); MDT->applyUpdates(DTUpdates); + + // Update loop info if available + if (MLI) { + if (MachineLoop *Loop = MLI->getLoopFor(&MBB)) + Loop->addBasicBlockToLoop(SplitBB, *MLI); + } } static void copyOpWithoutRegFlags(MachineInstrBuilder &MIB, @@ -199,7 +211,7 @@ void SILateBranchLowering::earlyTerm(MachineInstr &MI, auto Next = std::next(MI.getIterator()); if (Next != MBB.end() && !Next->isTerminator()) - splitBlock(MBB, *BranchMI, MDT); + splitBlock(MBB, *BranchMI, MDT, MLI); MBB.addSuccessor(EarlyExitBlock); MDT->insertEdge(&MBB, EarlyExitBlock); @@ -210,11 +222,14 @@ llvm::SILateBranchLoweringPass::run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM) { const GCNSubtarget &ST = MF.getSubtarget(); auto *MDT = &MFAM.getResult(MF); - if (!SILateBranchLowering(ST, MDT).run(MF)) + auto *MLI = MFAM.getCachedResult(MF); + if (!SILateBranchLowering(ST, MDT, MLI).run(MF)) return PreservedAnalyses::all(); - return getMachineFunctionPassPreservedAnalyses() - .preserve(); + auto PA = getMachineFunctionPassPreservedAnalyses(); + PA.preserve(); + PA.preserve(); + return PA; } bool SILateBranchLowering::run(MachineFunction &MF) { diff --git a/llvm/test/CodeGen/AMDGPU/si-late-branch-lowering-preserve-loop-info.mir b/llvm/test/CodeGen/AMDGPU/si-late-branch-lowering-preserve-loop-info.mir new file mode 100644 index 000000000000..b5c9319f809c --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/si-late-branch-lowering-preserve-loop-info.mir @@ -0,0 +1,36 @@ +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -passes="require,si-late-branch-lowering,print" -debug-pass-manager -filetype=null %s 2>&1 | FileCheck %s + +# Test that MachineLoopInfo is preserved when splitting a block inside a loop +# due to early termination handling. + +# CHECK: Running analysis: MachineLoopAnalysis on early_term_in_loop +# CHECK-NEXT: Running analysis: MachineDominatorTreeAnalysis on early_term_in_loop +# CHECK-NEXT: Running pass: SILateBranchLoweringPass on early_term_in_loop +# CHECK-NEXT: Running pass: MachineLoopPrinterPass on early_term_in_loop +# CHECK-NEXT: Machine loop info for machine function 'early_term_in_loop': +# CHECK-NOT: Running analysis: MachineLoopAnalysis on early_term_in_loop +# CHECK-NEXT: Loop at depth 1 containing: %bb.1
,%bb.4 + +--- +name: early_term_in_loop +tracksRegLiveness: true +body: | + bb.0: + successors: %bb.1 + $vgpr0 = V_MOV_B32_e32 0, implicit $exec + + ; Loop header contains SI_EARLY_TERMINATE_SCC0 followed by more instructions. + ; This triggers block splitting. Both bb.1 and bb.4 must remain in the loop. + bb.1: + liveins: $vgpr0 + successors: %bb.1, %bb.2 + S_CMP_LG_U32 0, 1, implicit-def $scc + SI_EARLY_TERMINATE_SCC0 implicit $scc, implicit $exec + $vgpr1 = V_MOV_B32_e32 1, implicit $exec + S_CBRANCH_SCC1 %bb.1, implicit $scc + + bb.2: + liveins: $vgpr0, $vgpr1 + EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec + S_ENDPGM 0 +...