From 20e32ececd08347a8f5d169487d824de30990bae Mon Sep 17 00:00:00 2001 From: Dark Steve Date: Tue, 17 Feb 2026 20:57:47 +0530 Subject: [PATCH] [AMDGPU] Teach SIPreEmitPeephole pass to preserve MachineLoopInfo (#178868) `optimizeVccBranch` in `SIPreEmitPeephole` performs CFG modifications (i.e converting conditional branches to unconditional) which can invalidate `MachineLoopInfo`. This patch incrementally updates `MLI` before each `removeSuccessor()` call; if the edge being removed is the last back-edge to a loop header, the loop is destroyed from `MLI` (blocks and subloops are reparented to the parent loop). --- llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp | 79 +++++++++++++++++-- ...i-pre-emit-peephole-preserve-loop-info.mir | 52 ++++++++++++ 2 files changed, 125 insertions(+), 6 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/si-pre-emit-peephole-preserve-loop-info.mir diff --git a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp index 73aab4e721d1..411c14fef63b 100644 --- a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp +++ b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp @@ -24,6 +24,7 @@ #include "llvm/ADT/SetVector.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachinePostDominators.h" #include "llvm/CodeGen/TargetSchedule.h" #include "llvm/Support/BranchProbability.h" @@ -37,8 +38,11 @@ class SIPreEmitPeephole { private: const SIInstrInfo *TII = nullptr; const SIRegisterInfo *TRI = nullptr; + MachineLoopInfo *MLI = nullptr; bool optimizeVccBranch(MachineInstr &MI) const; + void updateMLIBeforeRemovingEdge(MachineBasicBlock *From, + MachineBasicBlock *To) const; bool optimizeSetGPR(MachineInstr &First, MachineInstr &MI) const; bool getBlockDestinations(MachineBasicBlock &SrcMBB, MachineBasicBlock *&TrueMBB, @@ -78,7 +82,7 @@ private: bool IsHiBits, const MachineOperand &SrcMO); public: - bool run(MachineFunction &MF); + bool run(MachineFunction &MF, MachineLoopInfo *MLI); }; class SIPreEmitPeepholeLegacy : public MachineFunctionPass { @@ -87,8 +91,16 @@ public: SIPreEmitPeepholeLegacy() : MachineFunctionPass(ID) {} + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addUsedIfAvailable(); + AU.addPreserved(); + MachineFunctionPass::getAnalysisUsage(AU); + } + bool runOnMachineFunction(MachineFunction &MF) override { - return SIPreEmitPeephole().run(MF); + auto *MLIWrapper = getAnalysisIfAvailable(); + MachineLoopInfo *MLI = MLIWrapper ? &MLIWrapper->getLI() : nullptr; + return SIPreEmitPeephole().run(MF, MLI); } }; @@ -101,6 +113,51 @@ char SIPreEmitPeepholeLegacy::ID = 0; char &llvm::SIPreEmitPeepholeID = SIPreEmitPeepholeLegacy::ID; +void SIPreEmitPeephole::updateMLIBeforeRemovingEdge( + MachineBasicBlock *From, MachineBasicBlock *To) const { + if (!MLI) + return; + + // Only handle back-edges: To must be a loop header with From inside the loop. + MachineLoop *Loop = MLI->getLoopFor(To); + if (!Loop || Loop->getHeader() != To || !Loop->contains(From)) + return; + + // Count back-edges + unsigned BackEdgeCount = 0; + for (MachineBasicBlock *Pred : To->predecessors()) { + if (Loop->contains(Pred)) + BackEdgeCount++; + } + + if (BackEdgeCount > 1) + return; + + MachineLoop *ParentLoop = Loop->getParentLoop(); + + // Re-map blocks directly owned by this loop to the parent. + for (MachineBasicBlock *BB : Loop->blocks()) { + if (MLI->getLoopFor(BB) == Loop) + MLI->changeLoopFor(BB, ParentLoop); + } + + // Reparent all child loops. + while (!Loop->isInnermost()) { + MachineLoop *Child = Loop->removeChildLoop(std::prev(Loop->end())); + if (ParentLoop) + ParentLoop->addChildLoop(Child); + else + MLI->addTopLevelLoop(Child); + } + + if (ParentLoop) + ParentLoop->removeChildLoop(Loop); + else + MLI->removeLoop(llvm::find(*MLI, Loop)); + + MLI->destroy(Loop); +} + bool SIPreEmitPeephole::optimizeVccBranch(MachineInstr &MI) const { // Match: // sreg = -1 or 0 @@ -250,11 +307,13 @@ bool SIPreEmitPeephole::optimizeVccBranch(MachineInstr &MI) const { for (auto *BranchMI : ToRemove) { MachineOperand &Dst = BranchMI->getOperand(0); assert(Dst.isMBB() && "destination is not basic block"); + updateMLIBeforeRemovingEdge(Parent, Dst.getMBB()); Parent->removeSuccessor(Dst.getMBB()); BranchMI->eraseFromParent(); } if (MachineBasicBlock *Succ = Parent->getFallThrough()) { + updateMLIBeforeRemovingEdge(Parent, Succ); Parent->removeSuccessor(Succ); } @@ -264,7 +323,9 @@ bool SIPreEmitPeephole::optimizeVccBranch(MachineInstr &MI) const { // Will never branch MachineOperand &Dst = MI.getOperand(0); assert(Dst.isMBB() && "destination is not basic block"); - MI.getParent()->removeSuccessor(Dst.getMBB()); + MachineBasicBlock *Parent = MI.getParent(); + updateMLIBeforeRemovingEdge(Parent, Dst.getMBB()); + Parent->removeSuccessor(Dst.getMBB()); MI.eraseFromParent(); return true; } else if (MaskValue == -1) { @@ -707,9 +768,14 @@ llvm::SIPreEmitPeepholePass::run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM) { auto *MDT = MFAM.getCachedResult(MF); auto *MPDT = MFAM.getCachedResult(MF); + auto *MLI = MFAM.getCachedResult(MF); + SIPreEmitPeephole Impl; - if (SIPreEmitPeephole().run(MF)) - return getMachineFunctionPassPreservedAnalyses(); + if (Impl.run(MF, MLI)) { + auto PA = getMachineFunctionPassPreservedAnalyses(); + PA.preserve(); + return PA; + } if (MDT) MDT->updateBlockNumbers(); @@ -718,10 +784,11 @@ llvm::SIPreEmitPeepholePass::run(MachineFunction &MF, return PreservedAnalyses::all(); } -bool SIPreEmitPeephole::run(MachineFunction &MF) { +bool SIPreEmitPeephole::run(MachineFunction &MF, MachineLoopInfo *LoopInfo) { const GCNSubtarget &ST = MF.getSubtarget(); TII = ST.getInstrInfo(); TRI = &TII->getRegisterInfo(); + MLI = LoopInfo; bool Changed = false; MF.RenumberBlocks(); diff --git a/llvm/test/CodeGen/AMDGPU/si-pre-emit-peephole-preserve-loop-info.mir b/llvm/test/CodeGen/AMDGPU/si-pre-emit-peephole-preserve-loop-info.mir new file mode 100644 index 000000000000..e3ac9f70a50a --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/si-pre-emit-peephole-preserve-loop-info.mir @@ -0,0 +1,52 @@ +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -passes="require,si-pre-emit-peephole,print" -debug-pass-manager -filetype=null %s 2>&1 | FileCheck %s + +# CHECK: Running analysis: MachineLoopAnalysis on vcc_and_removal_preserves_mli +# CHECK-NEXT: Running analysis: MachineDominatorTreeAnalysis on vcc_and_removal_preserves_mli +# CHECK-NEXT: Running pass: SIPreEmitPeepholePass on vcc_and_removal_preserves_mli +# CHECK-NEXT: Invalidating analysis: MachineDominatorTreeAnalysis on vcc_and_removal_preserves_mli +# CHECK-NEXT: Running pass: MachineLoopPrinterPass on vcc_and_removal_preserves_mli +# CHECK-NEXT: Machine loop info for machine function 'vcc_and_removal_preserves_mli': +# CHECK-NOT: Running analysis: MachineLoopAnalysis on vcc_and_removal_preserves_mli +# CHECK-NEXT: Loop at depth 1 containing: %bb.1
+ +--- +name: vcc_and_removal_preserves_mli +body: | + bb.0: + S_BRANCH %bb.1 + + ; S_AND gets removed + bb.1: + V_CMP_EQ_U32_e32 0, $vgpr0, implicit-def $vcc, implicit $exec + $vcc = S_AND_B64 $exec, $vcc, implicit-def dead $scc + S_CBRANCH_VCCNZ %bb.1, implicit $vcc + S_BRANCH %bb.2 + + bb.2: + S_ENDPGM 0 +... + +# CHECK-LABEL: Running pass: SIPreEmitPeepholePass on vcc_branch_destroys_loop +# CHECK-NOT: Running analysis: MachineLoopAnalysis on vcc_branch_destroys_loop +# CHECK: Machine loop info for machine function 'vcc_branch_destroys_loop': +# CHECK-NOT: Loop at depth + +--- +name: vcc_branch_destroys_loop +body: | + bb.0: + S_BRANCH %bb.1 + + ; After opt, S_CBRANCH_VCCZ becomes S_BRANCH %bb.3, S_BRANCH %bb.1 removed. + bb.1: + $vcc = S_MOV_B64 0 + $vcc = S_AND_B64 $exec, $vcc, implicit-def dead $scc + S_CBRANCH_VCCZ %bb.3, implicit $vcc + S_BRANCH %bb.1 + + bb.2: + S_ENDPGM 0 + + bb.3: + S_ENDPGM 0 +...