[AMDGPU] Teach SIPreEmitPeephole pass to preserve MachineLoopInfo (#178868)
`optimizeVccBranch` in `SIPreEmitPeephole` performs CFG modifications (i.e converting conditional branches to unconditional) which can invalidate `MachineLoopInfo`. This patch incrementally updates `MLI` before each `removeSuccessor()` call; if the edge being removed is the last back-edge to a loop header, the loop is destroyed from `MLI` (blocks and subloops are reparented to the parent loop).
This commit is contained in:
parent
b849b64151
commit
20e32ececd
@ -24,6 +24,7 @@
|
||||
#include "llvm/ADT/SetVector.h"
|
||||
#include "llvm/CodeGen/MachineDominators.h"
|
||||
#include "llvm/CodeGen/MachineFunctionPass.h"
|
||||
#include "llvm/CodeGen/MachineLoopInfo.h"
|
||||
#include "llvm/CodeGen/MachinePostDominators.h"
|
||||
#include "llvm/CodeGen/TargetSchedule.h"
|
||||
#include "llvm/Support/BranchProbability.h"
|
||||
@ -37,8 +38,11 @@ class SIPreEmitPeephole {
|
||||
private:
|
||||
const SIInstrInfo *TII = nullptr;
|
||||
const SIRegisterInfo *TRI = nullptr;
|
||||
MachineLoopInfo *MLI = nullptr;
|
||||
|
||||
bool optimizeVccBranch(MachineInstr &MI) const;
|
||||
void updateMLIBeforeRemovingEdge(MachineBasicBlock *From,
|
||||
MachineBasicBlock *To) const;
|
||||
bool optimizeSetGPR(MachineInstr &First, MachineInstr &MI) const;
|
||||
bool getBlockDestinations(MachineBasicBlock &SrcMBB,
|
||||
MachineBasicBlock *&TrueMBB,
|
||||
@ -78,7 +82,7 @@ private:
|
||||
bool IsHiBits, const MachineOperand &SrcMO);
|
||||
|
||||
public:
|
||||
bool run(MachineFunction &MF);
|
||||
bool run(MachineFunction &MF, MachineLoopInfo *MLI);
|
||||
};
|
||||
|
||||
class SIPreEmitPeepholeLegacy : public MachineFunctionPass {
|
||||
@ -87,8 +91,16 @@ public:
|
||||
|
||||
SIPreEmitPeepholeLegacy() : MachineFunctionPass(ID) {}
|
||||
|
||||
void getAnalysisUsage(AnalysisUsage &AU) const override {
|
||||
AU.addUsedIfAvailable<MachineLoopInfoWrapperPass>();
|
||||
AU.addPreserved<MachineLoopInfoWrapperPass>();
|
||||
MachineFunctionPass::getAnalysisUsage(AU);
|
||||
}
|
||||
|
||||
bool runOnMachineFunction(MachineFunction &MF) override {
|
||||
return SIPreEmitPeephole().run(MF);
|
||||
auto *MLIWrapper = getAnalysisIfAvailable<MachineLoopInfoWrapperPass>();
|
||||
MachineLoopInfo *MLI = MLIWrapper ? &MLIWrapper->getLI() : nullptr;
|
||||
return SIPreEmitPeephole().run(MF, MLI);
|
||||
}
|
||||
};
|
||||
|
||||
@ -101,6 +113,51 @@ char SIPreEmitPeepholeLegacy::ID = 0;
|
||||
|
||||
char &llvm::SIPreEmitPeepholeID = SIPreEmitPeepholeLegacy::ID;
|
||||
|
||||
void SIPreEmitPeephole::updateMLIBeforeRemovingEdge(
|
||||
MachineBasicBlock *From, MachineBasicBlock *To) const {
|
||||
if (!MLI)
|
||||
return;
|
||||
|
||||
// Only handle back-edges: To must be a loop header with From inside the loop.
|
||||
MachineLoop *Loop = MLI->getLoopFor(To);
|
||||
if (!Loop || Loop->getHeader() != To || !Loop->contains(From))
|
||||
return;
|
||||
|
||||
// Count back-edges
|
||||
unsigned BackEdgeCount = 0;
|
||||
for (MachineBasicBlock *Pred : To->predecessors()) {
|
||||
if (Loop->contains(Pred))
|
||||
BackEdgeCount++;
|
||||
}
|
||||
|
||||
if (BackEdgeCount > 1)
|
||||
return;
|
||||
|
||||
MachineLoop *ParentLoop = Loop->getParentLoop();
|
||||
|
||||
// Re-map blocks directly owned by this loop to the parent.
|
||||
for (MachineBasicBlock *BB : Loop->blocks()) {
|
||||
if (MLI->getLoopFor(BB) == Loop)
|
||||
MLI->changeLoopFor(BB, ParentLoop);
|
||||
}
|
||||
|
||||
// Reparent all child loops.
|
||||
while (!Loop->isInnermost()) {
|
||||
MachineLoop *Child = Loop->removeChildLoop(std::prev(Loop->end()));
|
||||
if (ParentLoop)
|
||||
ParentLoop->addChildLoop(Child);
|
||||
else
|
||||
MLI->addTopLevelLoop(Child);
|
||||
}
|
||||
|
||||
if (ParentLoop)
|
||||
ParentLoop->removeChildLoop(Loop);
|
||||
else
|
||||
MLI->removeLoop(llvm::find(*MLI, Loop));
|
||||
|
||||
MLI->destroy(Loop);
|
||||
}
|
||||
|
||||
bool SIPreEmitPeephole::optimizeVccBranch(MachineInstr &MI) const {
|
||||
// Match:
|
||||
// sreg = -1 or 0
|
||||
@ -250,11 +307,13 @@ bool SIPreEmitPeephole::optimizeVccBranch(MachineInstr &MI) const {
|
||||
for (auto *BranchMI : ToRemove) {
|
||||
MachineOperand &Dst = BranchMI->getOperand(0);
|
||||
assert(Dst.isMBB() && "destination is not basic block");
|
||||
updateMLIBeforeRemovingEdge(Parent, Dst.getMBB());
|
||||
Parent->removeSuccessor(Dst.getMBB());
|
||||
BranchMI->eraseFromParent();
|
||||
}
|
||||
|
||||
if (MachineBasicBlock *Succ = Parent->getFallThrough()) {
|
||||
updateMLIBeforeRemovingEdge(Parent, Succ);
|
||||
Parent->removeSuccessor(Succ);
|
||||
}
|
||||
|
||||
@ -264,7 +323,9 @@ bool SIPreEmitPeephole::optimizeVccBranch(MachineInstr &MI) const {
|
||||
// Will never branch
|
||||
MachineOperand &Dst = MI.getOperand(0);
|
||||
assert(Dst.isMBB() && "destination is not basic block");
|
||||
MI.getParent()->removeSuccessor(Dst.getMBB());
|
||||
MachineBasicBlock *Parent = MI.getParent();
|
||||
updateMLIBeforeRemovingEdge(Parent, Dst.getMBB());
|
||||
Parent->removeSuccessor(Dst.getMBB());
|
||||
MI.eraseFromParent();
|
||||
return true;
|
||||
} else if (MaskValue == -1) {
|
||||
@ -707,9 +768,14 @@ llvm::SIPreEmitPeepholePass::run(MachineFunction &MF,
|
||||
MachineFunctionAnalysisManager &MFAM) {
|
||||
auto *MDT = MFAM.getCachedResult<MachineDominatorTreeAnalysis>(MF);
|
||||
auto *MPDT = MFAM.getCachedResult<MachinePostDominatorTreeAnalysis>(MF);
|
||||
auto *MLI = MFAM.getCachedResult<MachineLoopAnalysis>(MF);
|
||||
SIPreEmitPeephole Impl;
|
||||
|
||||
if (SIPreEmitPeephole().run(MF))
|
||||
return getMachineFunctionPassPreservedAnalyses();
|
||||
if (Impl.run(MF, MLI)) {
|
||||
auto PA = getMachineFunctionPassPreservedAnalyses();
|
||||
PA.preserve<MachineLoopAnalysis>();
|
||||
return PA;
|
||||
}
|
||||
|
||||
if (MDT)
|
||||
MDT->updateBlockNumbers();
|
||||
@ -718,10 +784,11 @@ llvm::SIPreEmitPeepholePass::run(MachineFunction &MF,
|
||||
return PreservedAnalyses::all();
|
||||
}
|
||||
|
||||
bool SIPreEmitPeephole::run(MachineFunction &MF) {
|
||||
bool SIPreEmitPeephole::run(MachineFunction &MF, MachineLoopInfo *LoopInfo) {
|
||||
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
|
||||
TII = ST.getInstrInfo();
|
||||
TRI = &TII->getRegisterInfo();
|
||||
MLI = LoopInfo;
|
||||
bool Changed = false;
|
||||
|
||||
MF.RenumberBlocks();
|
||||
|
||||
@ -0,0 +1,52 @@
|
||||
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -passes="require<machine-loops>,si-pre-emit-peephole,print<machine-loops>" -debug-pass-manager -filetype=null %s 2>&1 | FileCheck %s
|
||||
|
||||
# CHECK: Running analysis: MachineLoopAnalysis on vcc_and_removal_preserves_mli
|
||||
# CHECK-NEXT: Running analysis: MachineDominatorTreeAnalysis on vcc_and_removal_preserves_mli
|
||||
# CHECK-NEXT: Running pass: SIPreEmitPeepholePass on vcc_and_removal_preserves_mli
|
||||
# CHECK-NEXT: Invalidating analysis: MachineDominatorTreeAnalysis on vcc_and_removal_preserves_mli
|
||||
# CHECK-NEXT: Running pass: MachineLoopPrinterPass on vcc_and_removal_preserves_mli
|
||||
# CHECK-NEXT: Machine loop info for machine function 'vcc_and_removal_preserves_mli':
|
||||
# CHECK-NOT: Running analysis: MachineLoopAnalysis on vcc_and_removal_preserves_mli
|
||||
# CHECK-NEXT: Loop at depth 1 containing: %bb.1<header><latch><exiting>
|
||||
|
||||
---
|
||||
name: vcc_and_removal_preserves_mli
|
||||
body: |
|
||||
bb.0:
|
||||
S_BRANCH %bb.1
|
||||
|
||||
; S_AND gets removed
|
||||
bb.1:
|
||||
V_CMP_EQ_U32_e32 0, $vgpr0, implicit-def $vcc, implicit $exec
|
||||
$vcc = S_AND_B64 $exec, $vcc, implicit-def dead $scc
|
||||
S_CBRANCH_VCCNZ %bb.1, implicit $vcc
|
||||
S_BRANCH %bb.2
|
||||
|
||||
bb.2:
|
||||
S_ENDPGM 0
|
||||
...
|
||||
|
||||
# CHECK-LABEL: Running pass: SIPreEmitPeepholePass on vcc_branch_destroys_loop
|
||||
# CHECK-NOT: Running analysis: MachineLoopAnalysis on vcc_branch_destroys_loop
|
||||
# CHECK: Machine loop info for machine function 'vcc_branch_destroys_loop':
|
||||
# CHECK-NOT: Loop at depth
|
||||
|
||||
---
|
||||
name: vcc_branch_destroys_loop
|
||||
body: |
|
||||
bb.0:
|
||||
S_BRANCH %bb.1
|
||||
|
||||
; After opt, S_CBRANCH_VCCZ becomes S_BRANCH %bb.3, S_BRANCH %bb.1 removed.
|
||||
bb.1:
|
||||
$vcc = S_MOV_B64 0
|
||||
$vcc = S_AND_B64 $exec, $vcc, implicit-def dead $scc
|
||||
S_CBRANCH_VCCZ %bb.3, implicit $vcc
|
||||
S_BRANCH %bb.1
|
||||
|
||||
bb.2:
|
||||
S_ENDPGM 0
|
||||
|
||||
bb.3:
|
||||
S_ENDPGM 0
|
||||
...
|
||||
Loading…
x
Reference in New Issue
Block a user