[AMDGPU][NPM] Port SILateBranchLowering to NPM (#130063)

This commit is contained in:
Akshat Oke 2025-03-26 19:28:19 +05:30 committed by GitHub
parent 75f810e025
commit 719b029c16
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 46 additions and 14 deletions

View File

@ -213,7 +213,7 @@ extern char &SILowerControlFlowLegacyID;
void initializeSIPreEmitPeepholePass(PassRegistry &); void initializeSIPreEmitPeepholePass(PassRegistry &);
extern char &SIPreEmitPeepholeID; extern char &SIPreEmitPeepholeID;
void initializeSILateBranchLoweringPass(PassRegistry &); void initializeSILateBranchLoweringLegacyPass(PassRegistry &);
extern char &SILateBranchLoweringPassID; extern char &SILateBranchLoweringPassID;
void initializeSIOptimizeExecMaskingLegacyPass(PassRegistry &); void initializeSIOptimizeExecMaskingLegacyPass(PassRegistry &);
@ -391,6 +391,14 @@ public:
MachineFunctionAnalysisManager &MFAM); MachineFunctionAnalysisManager &MFAM);
}; };
class SILateBranchLoweringPass
: public PassInfoMixin<SILateBranchLoweringPass> {
public:
PreservedAnalyses run(MachineFunction &MF,
MachineFunctionAnalysisManager &MFAM);
static bool isRequired() { return true; }
};
FunctionPass *createAMDGPUAnnotateUniformValuesLegacy(); FunctionPass *createAMDGPUAnnotateUniformValuesLegacy();
ModulePass *createAMDGPUPrintfRuntimeBinding(); ModulePass *createAMDGPUPrintfRuntimeBinding();

View File

@ -113,6 +113,7 @@ MACHINE_FUNCTION_PASS("si-form-memory-clauses", SIFormMemoryClausesPass())
MACHINE_FUNCTION_PASS("si-i1-copies", SILowerI1CopiesPass()) MACHINE_FUNCTION_PASS("si-i1-copies", SILowerI1CopiesPass())
MACHINE_FUNCTION_PASS("si-insert-hard-clauses", SIInsertHardClausesPass()) MACHINE_FUNCTION_PASS("si-insert-hard-clauses", SIInsertHardClausesPass())
MACHINE_FUNCTION_PASS("si-insert-waitcnts", SIInsertWaitcntsPass()) MACHINE_FUNCTION_PASS("si-insert-waitcnts", SIInsertWaitcntsPass())
MACHINE_FUNCTION_PASS("si-late-branch-lowering", SILateBranchLoweringPass())
MACHINE_FUNCTION_PASS("si-load-store-opt", SILoadStoreOptimizerPass()) MACHINE_FUNCTION_PASS("si-load-store-opt", SILoadStoreOptimizerPass())
MACHINE_FUNCTION_PASS("si-lower-control-flow", SILowerControlFlowPass()) MACHINE_FUNCTION_PASS("si-lower-control-flow", SILowerControlFlowPass())
MACHINE_FUNCTION_PASS("si-lower-sgpr-spills", SILowerSGPRSpillsPass()) MACHINE_FUNCTION_PASS("si-lower-sgpr-spills", SILowerSGPRSpillsPass())
@ -134,7 +135,6 @@ DUMMY_MACHINE_FUNCTION_PASS("amdgpu-pre-ra-optimizations", GCNPreRAOptimizations
DUMMY_MACHINE_FUNCTION_PASS("amdgpu-rewrite-partial-reg-uses", GCNRewritePartialRegUsesPass()) DUMMY_MACHINE_FUNCTION_PASS("amdgpu-rewrite-partial-reg-uses", GCNRewritePartialRegUsesPass())
DUMMY_MACHINE_FUNCTION_PASS("amdgpu-set-wave-priority", AMDGPUSetWavePriorityPass()) DUMMY_MACHINE_FUNCTION_PASS("amdgpu-set-wave-priority", AMDGPUSetWavePriorityPass())
DUMMY_MACHINE_FUNCTION_PASS("si-late-branch-lowering", SILateBranchLoweringPass())
DUMMY_MACHINE_FUNCTION_PASS("si-pre-emit-peephole", SIPreEmitPeepholePass()) DUMMY_MACHINE_FUNCTION_PASS("si-pre-emit-peephole", SIPreEmitPeepholePass())
// TODO: Move amdgpu-preload-kern-arg-prolog to MACHINE_FUNCTION_PASS since it // TODO: Move amdgpu-preload-kern-arg-prolog to MACHINE_FUNCTION_PASS since it
// already exists. // already exists.

View File

@ -541,7 +541,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
initializeSIWholeQuadModeLegacyPass(*PR); initializeSIWholeQuadModeLegacyPass(*PR);
initializeSILowerControlFlowLegacyPass(*PR); initializeSILowerControlFlowLegacyPass(*PR);
initializeSIPreEmitPeepholePass(*PR); initializeSIPreEmitPeepholePass(*PR);
initializeSILateBranchLoweringPass(*PR); initializeSILateBranchLoweringLegacyPass(*PR);
initializeSIMemoryLegalizerLegacyPass(*PR); initializeSIMemoryLegalizerLegacyPass(*PR);
initializeSIOptimizeExecMaskingLegacyPass(*PR); initializeSIOptimizeExecMaskingLegacyPass(*PR);
initializeSIPreAllocateWWMRegsLegacyPass(*PR); initializeSIPreAllocateWWMRegsLegacyPass(*PR);
@ -2166,7 +2166,8 @@ void AMDGPUCodeGenPassBuilder::addPreEmitPass(AddMachinePass &addPass) const {
// TODO: addPass(SIInsertHardClausesPass()); // TODO: addPass(SIInsertHardClausesPass());
} }
// addPass(SILateBranchLoweringPass()); addPass(SILateBranchLoweringPass());
if (isPassEnabled(EnableSetWavePriority, CodeGenOptLevel::Less)) { if (isPassEnabled(EnableSetWavePriority, CodeGenOptLevel::Less)) {
// TODO: addPass(AMDGPUSetWavePriorityPass()); // TODO: addPass(AMDGPUSetWavePriorityPass());
} }

View File

@ -16,6 +16,7 @@
#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "SIMachineFunctionInfo.h" #include "SIMachineFunctionInfo.h"
#include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachinePassManager.h"
using namespace llvm; using namespace llvm;
@ -23,7 +24,7 @@ using namespace llvm;
namespace { namespace {
class SILateBranchLowering : public MachineFunctionPass { class SILateBranchLowering {
private: private:
const SIRegisterInfo *TRI = nullptr; const SIRegisterInfo *TRI = nullptr;
const SIInstrInfo *TII = nullptr; const SIInstrInfo *TII = nullptr;
@ -34,14 +35,23 @@ private:
void earlyTerm(MachineInstr &MI, MachineBasicBlock *EarlyExitBlock); void earlyTerm(MachineInstr &MI, MachineBasicBlock *EarlyExitBlock);
public: public:
static char ID; SILateBranchLowering(MachineDominatorTree *MDT) : MDT(MDT) {}
bool run(MachineFunction &MF);
unsigned MovOpc; unsigned MovOpc;
Register ExecReg; Register ExecReg;
};
SILateBranchLowering() : MachineFunctionPass(ID) {} class SILateBranchLoweringLegacy : public MachineFunctionPass {
public:
static char ID;
SILateBranchLoweringLegacy() : MachineFunctionPass(ID) {}
bool runOnMachineFunction(MachineFunction &MF) override; bool runOnMachineFunction(MachineFunction &MF) override {
auto *MDT = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
return SILateBranchLowering(MDT).run(MF);
}
StringRef getPassName() const override { StringRef getPassName() const override {
return "SI Final Branch Preparation"; return "SI Final Branch Preparation";
@ -56,15 +66,15 @@ public:
} // end anonymous namespace } // end anonymous namespace
char SILateBranchLowering::ID = 0; char SILateBranchLoweringLegacy::ID = 0;
INITIALIZE_PASS_BEGIN(SILateBranchLowering, DEBUG_TYPE, INITIALIZE_PASS_BEGIN(SILateBranchLoweringLegacy, DEBUG_TYPE,
"SI insert s_cbranch_execz instructions", false, false) "SI insert s_cbranch_execz instructions", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)
INITIALIZE_PASS_END(SILateBranchLowering, DEBUG_TYPE, INITIALIZE_PASS_END(SILateBranchLoweringLegacy, DEBUG_TYPE,
"SI insert s_cbranch_execz instructions", false, false) "SI insert s_cbranch_execz instructions", false, false)
char &llvm::SILateBranchLoweringPassID = SILateBranchLowering::ID; char &llvm::SILateBranchLoweringPassID = SILateBranchLoweringLegacy::ID;
static void generateEndPgm(MachineBasicBlock &MBB, static void generateEndPgm(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I, DebugLoc DL, MachineBasicBlock::iterator I, DebugLoc DL,
@ -192,11 +202,21 @@ void SILateBranchLowering::earlyTerm(MachineInstr &MI,
MDT->insertEdge(&MBB, EarlyExitBlock); MDT->insertEdge(&MBB, EarlyExitBlock);
} }
bool SILateBranchLowering::runOnMachineFunction(MachineFunction &MF) { PreservedAnalyses
llvm::SILateBranchLoweringPass::run(MachineFunction &MF,
MachineFunctionAnalysisManager &MFAM) {
auto *MDT = &MFAM.getResult<MachineDominatorTreeAnalysis>(MF);
if (!SILateBranchLowering(MDT).run(MF))
return PreservedAnalyses::all();
return getMachineFunctionPassPreservedAnalyses()
.preserve<MachineDominatorTreeAnalysis>();
}
bool SILateBranchLowering::run(MachineFunction &MF) {
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
TII = ST.getInstrInfo(); TII = ST.getInstrInfo();
TRI = &TII->getRegisterInfo(); TRI = &TII->getRegisterInfo();
MDT = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
MovOpc = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64; MovOpc = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
ExecReg = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC; ExecReg = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;

View File

@ -2,6 +2,8 @@
# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize64 -run-pass=si-late-branch-lowering -verify-machineinstrs %s -o - | FileCheck -check-prefixes=GCN,GFX10 %s # RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize64 -run-pass=si-late-branch-lowering -verify-machineinstrs %s -o - | FileCheck -check-prefixes=GCN,GFX10 %s
# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -run-pass=si-late-branch-lowering -verify-machineinstrs %s -o - | FileCheck -check-prefixes=GCN,GFX11 %s # RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -run-pass=si-late-branch-lowering -verify-machineinstrs %s -o - | FileCheck -check-prefixes=GCN,GFX11 %s
# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -passes=si-late-branch-lowering %s -o - | FileCheck -check-prefixes=GCN,GFX11 %s
--- | --- |
define amdgpu_ps void @early_term_scc0_end_block() { define amdgpu_ps void @early_term_scc0_end_block() {
ret void ret void

View File

@ -1,4 +1,5 @@
# RUN: llc -o - %s -mtriple=amdgcn -mcpu=fiji -run-pass=si-late-branch-lowering -verify-machineinstrs | FileCheck -check-prefix=GCN %s # RUN: llc -o - %s -mtriple=amdgcn -mcpu=fiji -run-pass=si-late-branch-lowering -verify-machineinstrs | FileCheck -check-prefix=GCN %s
# RUN: llc -o - %s -mtriple=amdgcn -mcpu=fiji -passes=si-late-branch-lowering | FileCheck -check-prefix=GCN %s
# GCN-LABEL: readlane_exec0 # GCN-LABEL: readlane_exec0
# GCN: bb.0 # GCN: bb.0