[AMDGPU] Teach SIPreEmitPeephole pass to preserve MachineLoopInfo (#178868)

`optimizeVccBranch` in `SIPreEmitPeephole` performs CFG modifications
(i.e converting conditional branches to unconditional) which can
invalidate `MachineLoopInfo`. This patch incrementally updates `MLI`
before each `removeSuccessor()` call; if the edge being removed is the
last back-edge to a loop header, the loop is destroyed from `MLI`
(blocks and subloops are reparented to the parent loop).
This commit is contained in:
Dark Steve 2026-02-17 20:57:47 +05:30 committed by GitHub
parent b849b64151
commit 20e32ececd
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 125 additions and 6 deletions

View File

@ -24,6 +24,7 @@
#include "llvm/ADT/SetVector.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachinePostDominators.h"
#include "llvm/CodeGen/TargetSchedule.h"
#include "llvm/Support/BranchProbability.h"
@ -37,8 +38,11 @@ class SIPreEmitPeephole {
private:
const SIInstrInfo *TII = nullptr;
const SIRegisterInfo *TRI = nullptr;
MachineLoopInfo *MLI = nullptr;
bool optimizeVccBranch(MachineInstr &MI) const;
void updateMLIBeforeRemovingEdge(MachineBasicBlock *From,
MachineBasicBlock *To) const;
bool optimizeSetGPR(MachineInstr &First, MachineInstr &MI) const;
bool getBlockDestinations(MachineBasicBlock &SrcMBB,
MachineBasicBlock *&TrueMBB,
@ -78,7 +82,7 @@ private:
bool IsHiBits, const MachineOperand &SrcMO);
public:
bool run(MachineFunction &MF);
bool run(MachineFunction &MF, MachineLoopInfo *MLI);
};
class SIPreEmitPeepholeLegacy : public MachineFunctionPass {
@ -87,8 +91,16 @@ public:
SIPreEmitPeepholeLegacy() : MachineFunctionPass(ID) {}
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addUsedIfAvailable<MachineLoopInfoWrapperPass>();
AU.addPreserved<MachineLoopInfoWrapperPass>();
MachineFunctionPass::getAnalysisUsage(AU);
}
bool runOnMachineFunction(MachineFunction &MF) override {
return SIPreEmitPeephole().run(MF);
auto *MLIWrapper = getAnalysisIfAvailable<MachineLoopInfoWrapperPass>();
MachineLoopInfo *MLI = MLIWrapper ? &MLIWrapper->getLI() : nullptr;
return SIPreEmitPeephole().run(MF, MLI);
}
};
@ -101,6 +113,51 @@ char SIPreEmitPeepholeLegacy::ID = 0;
char &llvm::SIPreEmitPeepholeID = SIPreEmitPeepholeLegacy::ID;
void SIPreEmitPeephole::updateMLIBeforeRemovingEdge(
MachineBasicBlock *From, MachineBasicBlock *To) const {
if (!MLI)
return;
// Only handle back-edges: To must be a loop header with From inside the loop.
MachineLoop *Loop = MLI->getLoopFor(To);
if (!Loop || Loop->getHeader() != To || !Loop->contains(From))
return;
// Count back-edges
unsigned BackEdgeCount = 0;
for (MachineBasicBlock *Pred : To->predecessors()) {
if (Loop->contains(Pred))
BackEdgeCount++;
}
if (BackEdgeCount > 1)
return;
MachineLoop *ParentLoop = Loop->getParentLoop();
// Re-map blocks directly owned by this loop to the parent.
for (MachineBasicBlock *BB : Loop->blocks()) {
if (MLI->getLoopFor(BB) == Loop)
MLI->changeLoopFor(BB, ParentLoop);
}
// Reparent all child loops.
while (!Loop->isInnermost()) {
MachineLoop *Child = Loop->removeChildLoop(std::prev(Loop->end()));
if (ParentLoop)
ParentLoop->addChildLoop(Child);
else
MLI->addTopLevelLoop(Child);
}
if (ParentLoop)
ParentLoop->removeChildLoop(Loop);
else
MLI->removeLoop(llvm::find(*MLI, Loop));
MLI->destroy(Loop);
}
bool SIPreEmitPeephole::optimizeVccBranch(MachineInstr &MI) const {
// Match:
// sreg = -1 or 0
@ -250,11 +307,13 @@ bool SIPreEmitPeephole::optimizeVccBranch(MachineInstr &MI) const {
for (auto *BranchMI : ToRemove) {
MachineOperand &Dst = BranchMI->getOperand(0);
assert(Dst.isMBB() && "destination is not basic block");
updateMLIBeforeRemovingEdge(Parent, Dst.getMBB());
Parent->removeSuccessor(Dst.getMBB());
BranchMI->eraseFromParent();
}
if (MachineBasicBlock *Succ = Parent->getFallThrough()) {
updateMLIBeforeRemovingEdge(Parent, Succ);
Parent->removeSuccessor(Succ);
}
@ -264,7 +323,9 @@ bool SIPreEmitPeephole::optimizeVccBranch(MachineInstr &MI) const {
// Will never branch
MachineOperand &Dst = MI.getOperand(0);
assert(Dst.isMBB() && "destination is not basic block");
MI.getParent()->removeSuccessor(Dst.getMBB());
MachineBasicBlock *Parent = MI.getParent();
updateMLIBeforeRemovingEdge(Parent, Dst.getMBB());
Parent->removeSuccessor(Dst.getMBB());
MI.eraseFromParent();
return true;
} else if (MaskValue == -1) {
@ -707,9 +768,14 @@ llvm::SIPreEmitPeepholePass::run(MachineFunction &MF,
MachineFunctionAnalysisManager &MFAM) {
auto *MDT = MFAM.getCachedResult<MachineDominatorTreeAnalysis>(MF);
auto *MPDT = MFAM.getCachedResult<MachinePostDominatorTreeAnalysis>(MF);
auto *MLI = MFAM.getCachedResult<MachineLoopAnalysis>(MF);
SIPreEmitPeephole Impl;
if (SIPreEmitPeephole().run(MF))
return getMachineFunctionPassPreservedAnalyses();
if (Impl.run(MF, MLI)) {
auto PA = getMachineFunctionPassPreservedAnalyses();
PA.preserve<MachineLoopAnalysis>();
return PA;
}
if (MDT)
MDT->updateBlockNumbers();
@ -718,10 +784,11 @@ llvm::SIPreEmitPeepholePass::run(MachineFunction &MF,
return PreservedAnalyses::all();
}
bool SIPreEmitPeephole::run(MachineFunction &MF) {
bool SIPreEmitPeephole::run(MachineFunction &MF, MachineLoopInfo *LoopInfo) {
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
TII = ST.getInstrInfo();
TRI = &TII->getRegisterInfo();
MLI = LoopInfo;
bool Changed = false;
MF.RenumberBlocks();

View File

@ -0,0 +1,52 @@
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -passes="require<machine-loops>,si-pre-emit-peephole,print<machine-loops>" -debug-pass-manager -filetype=null %s 2>&1 | FileCheck %s
# CHECK: Running analysis: MachineLoopAnalysis on vcc_and_removal_preserves_mli
# CHECK-NEXT: Running analysis: MachineDominatorTreeAnalysis on vcc_and_removal_preserves_mli
# CHECK-NEXT: Running pass: SIPreEmitPeepholePass on vcc_and_removal_preserves_mli
# CHECK-NEXT: Invalidating analysis: MachineDominatorTreeAnalysis on vcc_and_removal_preserves_mli
# CHECK-NEXT: Running pass: MachineLoopPrinterPass on vcc_and_removal_preserves_mli
# CHECK-NEXT: Machine loop info for machine function 'vcc_and_removal_preserves_mli':
# CHECK-NOT: Running analysis: MachineLoopAnalysis on vcc_and_removal_preserves_mli
# CHECK-NEXT: Loop at depth 1 containing: %bb.1<header><latch><exiting>
---
name: vcc_and_removal_preserves_mli
body: |
bb.0:
S_BRANCH %bb.1
; S_AND gets removed
bb.1:
V_CMP_EQ_U32_e32 0, $vgpr0, implicit-def $vcc, implicit $exec
$vcc = S_AND_B64 $exec, $vcc, implicit-def dead $scc
S_CBRANCH_VCCNZ %bb.1, implicit $vcc
S_BRANCH %bb.2
bb.2:
S_ENDPGM 0
...
# CHECK-LABEL: Running pass: SIPreEmitPeepholePass on vcc_branch_destroys_loop
# CHECK-NOT: Running analysis: MachineLoopAnalysis on vcc_branch_destroys_loop
# CHECK: Machine loop info for machine function 'vcc_branch_destroys_loop':
# CHECK-NOT: Loop at depth
---
name: vcc_branch_destroys_loop
body: |
bb.0:
S_BRANCH %bb.1
; After opt, S_CBRANCH_VCCZ becomes S_BRANCH %bb.3, S_BRANCH %bb.1 removed.
bb.1:
$vcc = S_MOV_B64 0
$vcc = S_AND_B64 $exec, $vcc, implicit-def dead $scc
S_CBRANCH_VCCZ %bb.3, implicit $vcc
S_BRANCH %bb.1
bb.2:
S_ENDPGM 0
bb.3:
S_ENDPGM 0
...