[AMDGPU][NPM] Port SIPreEmitPeephole to NPM (#130065)

This commit is contained in:
Akshat Oke 2025-04-08 17:58:48 +05:30 committed by GitHub
parent 79cb6f05da
commit fcaefc2c19
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 39 additions and 14 deletions

View File

@ -210,7 +210,7 @@ extern char &SIWholeQuadModeID;
void initializeSILowerControlFlowLegacyPass(PassRegistry &); void initializeSILowerControlFlowLegacyPass(PassRegistry &);
extern char &SILowerControlFlowLegacyID; extern char &SILowerControlFlowLegacyID;
void initializeSIPreEmitPeepholePass(PassRegistry &); void initializeSIPreEmitPeepholeLegacyPass(PassRegistry &);
extern char &SIPreEmitPeepholeID; extern char &SIPreEmitPeepholeID;
void initializeSILateBranchLoweringLegacyPass(PassRegistry &); void initializeSILateBranchLoweringLegacyPass(PassRegistry &);
@ -399,6 +399,13 @@ public:
static bool isRequired() { return true; } static bool isRequired() { return true; }
}; };
class SIPreEmitPeepholePass : public PassInfoMixin<SIPreEmitPeepholePass> {
public:
PreservedAnalyses run(MachineFunction &MF,
MachineFunctionAnalysisManager &MFAM);
static bool isRequired() { return true; }
};
class AMDGPUSetWavePriorityPass class AMDGPUSetWavePriorityPass
: public PassInfoMixin<AMDGPUSetWavePriorityPass> { : public PassInfoMixin<AMDGPUSetWavePriorityPass> {
public: public:

View File

@ -127,6 +127,7 @@ MACHINE_FUNCTION_PASS("si-optimize-exec-masking-pre-ra", SIOptimizeExecMaskingPr
MACHINE_FUNCTION_PASS("si-peephole-sdwa", SIPeepholeSDWAPass()) MACHINE_FUNCTION_PASS("si-peephole-sdwa", SIPeepholeSDWAPass())
MACHINE_FUNCTION_PASS("si-post-ra-bundler", SIPostRABundlerPass()) MACHINE_FUNCTION_PASS("si-post-ra-bundler", SIPostRABundlerPass())
MACHINE_FUNCTION_PASS("si-pre-allocate-wwm-regs", SIPreAllocateWWMRegsPass()) MACHINE_FUNCTION_PASS("si-pre-allocate-wwm-regs", SIPreAllocateWWMRegsPass())
MACHINE_FUNCTION_PASS("si-pre-emit-peephole", SIPreEmitPeepholePass())
MACHINE_FUNCTION_PASS("si-shrink-instructions", SIShrinkInstructionsPass()) MACHINE_FUNCTION_PASS("si-shrink-instructions", SIShrinkInstructionsPass())
MACHINE_FUNCTION_PASS("si-wqm", SIWholeQuadModePass()) MACHINE_FUNCTION_PASS("si-wqm", SIWholeQuadModePass())
#undef MACHINE_FUNCTION_PASS #undef MACHINE_FUNCTION_PASS
@ -135,7 +136,6 @@ MACHINE_FUNCTION_PASS("si-wqm", SIWholeQuadModePass())
DUMMY_MACHINE_FUNCTION_PASS("amdgpu-pre-ra-optimizations", GCNPreRAOptimizationsPass()) DUMMY_MACHINE_FUNCTION_PASS("amdgpu-pre-ra-optimizations", GCNPreRAOptimizationsPass())
DUMMY_MACHINE_FUNCTION_PASS("amdgpu-rewrite-partial-reg-uses", GCNRewritePartialRegUsesPass()) DUMMY_MACHINE_FUNCTION_PASS("amdgpu-rewrite-partial-reg-uses", GCNRewritePartialRegUsesPass())
DUMMY_MACHINE_FUNCTION_PASS("si-pre-emit-peephole", SIPreEmitPeepholePass())
// TODO: Move amdgpu-preload-kern-arg-prolog to MACHINE_FUNCTION_PASS since it // TODO: Move amdgpu-preload-kern-arg-prolog to MACHINE_FUNCTION_PASS since it
// already exists. // already exists.
DUMMY_MACHINE_FUNCTION_PASS("amdgpu-preload-kern-arg-prolog", AMDGPUPreloadKernArgPrologPass()) DUMMY_MACHINE_FUNCTION_PASS("amdgpu-preload-kern-arg-prolog", AMDGPUPreloadKernArgPrologPass())

View File

@ -542,7 +542,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
initializeSIModeRegisterLegacyPass(*PR); initializeSIModeRegisterLegacyPass(*PR);
initializeSIWholeQuadModeLegacyPass(*PR); initializeSIWholeQuadModeLegacyPass(*PR);
initializeSILowerControlFlowLegacyPass(*PR); initializeSILowerControlFlowLegacyPass(*PR);
initializeSIPreEmitPeepholePass(*PR); initializeSIPreEmitPeepholeLegacyPass(*PR);
initializeSILateBranchLoweringLegacyPass(*PR); initializeSILateBranchLoweringLegacyPass(*PR);
initializeSIMemoryLegalizerLegacyPass(*PR); initializeSIMemoryLegalizerLegacyPass(*PR);
initializeSIOptimizeExecMaskingLegacyPass(*PR); initializeSIOptimizeExecMaskingLegacyPass(*PR);
@ -2173,9 +2173,8 @@ void AMDGPUCodeGenPassBuilder::addPreEmitPass(AddMachinePass &addPass) const {
if (isPassEnabled(EnableSetWavePriority, CodeGenOptLevel::Less)) if (isPassEnabled(EnableSetWavePriority, CodeGenOptLevel::Less))
addPass(AMDGPUSetWavePriorityPass()); addPass(AMDGPUSetWavePriorityPass());
if (TM.getOptLevel() > CodeGenOptLevel::None) { if (TM.getOptLevel() > CodeGenOptLevel::None)
// TODO: addPass(SIPreEmitPeepholePass()); addPass(SIPreEmitPeepholePass());
}
// The hazard recognizer that runs as part of the post-ra scheduler does not // The hazard recognizer that runs as part of the post-ra scheduler does not
// guarantee to be able handle all hazards correctly. This is because if there // guarantee to be able handle all hazards correctly. This is because if there

View File

@ -24,7 +24,7 @@ using namespace llvm;
namespace { namespace {
class SIPreEmitPeephole : public MachineFunctionPass { class SIPreEmitPeephole {
private: private:
const SIInstrInfo *TII = nullptr; const SIInstrInfo *TII = nullptr;
const SIRegisterInfo *TRI = nullptr; const SIRegisterInfo *TRI = nullptr;
@ -40,24 +40,31 @@ private:
const MachineBasicBlock &To) const; const MachineBasicBlock &To) const;
bool removeExeczBranch(MachineInstr &MI, MachineBasicBlock &SrcMBB); bool removeExeczBranch(MachineInstr &MI, MachineBasicBlock &SrcMBB);
public:
bool run(MachineFunction &MF);
};
class SIPreEmitPeepholeLegacy : public MachineFunctionPass {
public: public:
static char ID; static char ID;
SIPreEmitPeephole() : MachineFunctionPass(ID) { SIPreEmitPeepholeLegacy() : MachineFunctionPass(ID) {
initializeSIPreEmitPeepholePass(*PassRegistry::getPassRegistry()); initializeSIPreEmitPeepholeLegacyPass(*PassRegistry::getPassRegistry());
} }
bool runOnMachineFunction(MachineFunction &MF) override; bool runOnMachineFunction(MachineFunction &MF) override {
return SIPreEmitPeephole().run(MF);
}
}; };
} // End anonymous namespace. } // End anonymous namespace.
INITIALIZE_PASS(SIPreEmitPeephole, DEBUG_TYPE, INITIALIZE_PASS(SIPreEmitPeepholeLegacy, DEBUG_TYPE,
"SI peephole optimizations", false, false) "SI peephole optimizations", false, false)
char SIPreEmitPeephole::ID = 0; char SIPreEmitPeepholeLegacy::ID = 0;
char &llvm::SIPreEmitPeepholeID = SIPreEmitPeephole::ID; char &llvm::SIPreEmitPeepholeID = SIPreEmitPeepholeLegacy::ID;
bool SIPreEmitPeephole::optimizeVccBranch(MachineInstr &MI) const { bool SIPreEmitPeephole::optimizeVccBranch(MachineInstr &MI) const {
// Match: // Match:
@ -410,7 +417,16 @@ bool SIPreEmitPeephole::removeExeczBranch(MachineInstr &MI,
return true; return true;
} }
bool SIPreEmitPeephole::runOnMachineFunction(MachineFunction &MF) { PreservedAnalyses
llvm::SIPreEmitPeepholePass::run(MachineFunction &MF,
MachineFunctionAnalysisManager &MFAM) {
if (!SIPreEmitPeephole().run(MF))
return PreservedAnalyses::all();
return getMachineFunctionPassPreservedAnalyses();
}
bool SIPreEmitPeephole::run(MachineFunction &MF) {
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
TII = ST.getInstrInfo(); TII = ST.getInstrInfo();
TRI = &TII->getRegisterInfo(); TRI = &TII->getRegisterInfo();

View File

@ -1,5 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn -mcpu=polaris10 -run-pass si-pre-emit-peephole -verify-machineinstrs %s -o - | FileCheck %s # RUN: llc -mtriple=amdgcn -mcpu=polaris10 -run-pass si-pre-emit-peephole -verify-machineinstrs %s -o - | FileCheck %s
# RUN: llc -mtriple=amdgcn -mcpu=polaris10 -passes si-pre-emit-peephole %s -o - | FileCheck %s
--- ---

View File

@ -1,5 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=si-pre-emit-peephole -verify-machineinstrs %s -o - | FileCheck %s # RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=si-pre-emit-peephole -verify-machineinstrs %s -o - | FileCheck %s
# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -passes=si-pre-emit-peephole %s -o - | FileCheck %s
# Make sure mandatory skips are not removed around mode defs. # Make sure mandatory skips are not removed around mode defs.
--- ---

View File

@ -1,5 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass si-pre-emit-peephole -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s -implicit-check-not=S_SET_GPR_IDX # RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass si-pre-emit-peephole -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s -implicit-check-not=S_SET_GPR_IDX
# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -passes si-pre-emit-peephole -o - %s | FileCheck -check-prefix=GCN %s -implicit-check-not=S_SET_GPR_IDX
--- ---
name: simple name: simple