[X86][NewPM] Port X86FixupLEAs to NPM

This patch ports X86FixupLEAs to the new pass manager and adds some test
coverage.

Reviewers: paperchalice, arsenm, RKSimon, phoebewang

Pull Request: https://github.com/llvm/llvm-project/pull/173744
This commit is contained in:
Aiden Grossman 2025-12-31 10:40:46 -08:00 committed by GitHub
parent 93248b5327
commit e6206d567b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 115 additions and 64 deletions

View File

@ -67,7 +67,13 @@ FunctionPass *createX86PadShortFunctions();
/// Return a pass that selectively replaces certain instructions (like add,
/// sub, inc, dec, some shifts, and some multiplies) by equivalent LEA
/// instructions, in order to eliminate execution delays in some processors.
FunctionPass *createX86FixupLEAs();
class X86FixupLEAsPass : public PassInfoMixin<X86FixupLEAsPass> {
public:
PreservedAnalyses run(MachineFunction &MF,
MachineFunctionAnalysisManager &MFAM);
};
FunctionPass *createX86FixupLEAsLegacyPass();
/// Return a pass that replaces equivalent slower instructions with faster
/// ones.
@ -249,7 +255,7 @@ FunctionPass *createX86SuppressAPXForRelocationPass();
void initializeCompressEVEXPassPass(PassRegistry &);
void initializeFixupBWInstPassPass(PassRegistry &);
void initializeFixupLEAPassPass(PassRegistry &);
void initializeFixupLEAsLegacyPass(PassRegistry &);
void initializeX86ArgumentStackSlotPassPass(PassRegistry &);
void initializeX86AsmPrinterPass(PassRegistry &);
void initializeX86FixupInstTuningPassPass(PassRegistry &);

View File

@ -28,14 +28,14 @@
using namespace llvm;
#define FIXUPLEA_DESC "X86 LEA Fixup"
#define FIXUPLEA_NAME "x86-fixup-LEAs"
#define FIXUPLEA_NAME "x86-fixup-leas"
#define DEBUG_TYPE FIXUPLEA_NAME
STATISTIC(NumLEAs, "Number of LEA instructions created");
namespace {
class FixupLEAPass : public MachineFunctionPass {
class FixupLEAsImpl {
enum RegUsageState { RU_NotUsed, RU_Write, RU_Read };
/// Given a machine register, look for the instruction
@ -121,15 +121,30 @@ class FixupLEAPass : public MachineFunctionPass {
MachineBasicBlock::iterator &MBBI) const;
public:
static char ID;
StringRef getPassName() const override { return FIXUPLEA_DESC; }
FixupLEAPass() : MachineFunctionPass(ID) { }
FixupLEAsImpl(ProfileSummaryInfo *PSI, MachineBlockFrequencyInfo *MBFI)
: PSI(PSI), MBFI(MBFI) {}
/// Loop over all of the basic blocks,
/// replacing instructions by equivalent LEA instructions
/// if needed and when possible.
bool runOnMachineFunction(MachineFunction &MF);
private:
TargetSchedModel TSM;
const X86InstrInfo *TII = nullptr;
const X86RegisterInfo *TRI = nullptr;
ProfileSummaryInfo *PSI;
MachineBlockFrequencyInfo *MBFI;
};
class FixupLEAsLegacy : public MachineFunctionPass {
public:
static char ID;
StringRef getPassName() const override { return FIXUPLEA_DESC; }
FixupLEAsLegacy() : MachineFunctionPass(ID) {}
bool runOnMachineFunction(MachineFunction &MF) override;
// This pass runs after regalloc and doesn't support VReg operands.
@ -142,21 +157,16 @@ public:
AU.addRequired<LazyMachineBlockFrequencyInfoPass>();
MachineFunctionPass::getAnalysisUsage(AU);
}
private:
TargetSchedModel TSM;
const X86InstrInfo *TII = nullptr;
const X86RegisterInfo *TRI = nullptr;
};
}
char FixupLEAPass::ID = 0;
char FixupLEAsLegacy::ID = 0;
INITIALIZE_PASS(FixupLEAPass, FIXUPLEA_NAME, FIXUPLEA_DESC, false, false)
INITIALIZE_PASS(FixupLEAsLegacy, FIXUPLEA_NAME, FIXUPLEA_DESC, false, false)
MachineInstr *
FixupLEAPass::postRAConvertToLEA(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI) const {
FixupLEAsImpl::postRAConvertToLEA(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI) const {
MachineInstr &MI = *MBBI;
switch (MI.getOpcode()) {
case X86::MOV32rr:
@ -210,17 +220,16 @@ FixupLEAPass::postRAConvertToLEA(MachineBasicBlock &MBB,
return TII->convertToThreeAddress(MI, nullptr, nullptr);
}
FunctionPass *llvm::createX86FixupLEAs() { return new FixupLEAPass(); }
FunctionPass *llvm::createX86FixupLEAsLegacyPass() {
return new FixupLEAsLegacy();
}
static bool isLEA(unsigned Opcode) {
return Opcode == X86::LEA32r || Opcode == X86::LEA64r ||
Opcode == X86::LEA64_32r;
}
bool FixupLEAPass::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(MF.getFunction()))
return false;
bool FixupLEAsImpl::runOnMachineFunction(MachineFunction &MF) {
const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
bool IsSlowLEA = ST.slowLEA();
bool IsSlow3OpsLEA = ST.slow3OpsLEA();
@ -232,10 +241,6 @@ bool FixupLEAPass::runOnMachineFunction(MachineFunction &MF) {
TSM.init(&ST);
TII = ST.getInstrInfo();
TRI = ST.getRegisterInfo();
auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
auto *MBFI = (PSI && PSI->hasProfileSummary())
? &getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI()
: nullptr;
LLVM_DEBUG(dbgs() << "Start X86FixupLEAs\n";);
for (MachineBasicBlock &MBB : MF) {
@ -268,8 +273,8 @@ bool FixupLEAPass::runOnMachineFunction(MachineFunction &MF) {
return true;
}
FixupLEAPass::RegUsageState
FixupLEAPass::usesRegister(MachineOperand &p, MachineBasicBlock::iterator I) {
FixupLEAsImpl::RegUsageState
FixupLEAsImpl::usesRegister(MachineOperand &p, MachineBasicBlock::iterator I) {
RegUsageState RegUsage = RU_NotUsed;
MachineInstr &MI = *I;
@ -300,9 +305,8 @@ static inline bool getPreviousInstr(MachineBasicBlock::iterator &I,
return true;
}
MachineBasicBlock::iterator
FixupLEAPass::searchBackwards(MachineOperand &p, MachineBasicBlock::iterator &I,
MachineBasicBlock &MBB) {
MachineBasicBlock::iterator FixupLEAsImpl::searchBackwards(
MachineOperand &p, MachineBasicBlock::iterator &I, MachineBasicBlock &MBB) {
int InstrDistance = 1;
MachineBasicBlock::iterator CurInst;
static const int INSTR_DISTANCE_THRESHOLD = 5;
@ -394,8 +398,8 @@ static inline unsigned getINCDECFromLEA(unsigned LEAOpcode, bool IsINC) {
}
MachineBasicBlock::iterator
FixupLEAPass::searchALUInst(MachineBasicBlock::iterator &I,
MachineBasicBlock &MBB) const {
FixupLEAsImpl::searchALUInst(MachineBasicBlock::iterator &I,
MachineBasicBlock &MBB) const {
const int InstrDistanceThreshold = 5;
int InstrDistance = 1;
MachineBasicBlock::iterator CurInst = std::next(I);
@ -447,11 +451,11 @@ FixupLEAPass::searchALUInst(MachineBasicBlock::iterator &I,
return MachineBasicBlock::iterator();
}
void FixupLEAPass::checkRegUsage(MachineBasicBlock::iterator &LeaI,
MachineBasicBlock::iterator &AluI,
bool &BaseIndexDef, bool &AluDestRef,
MachineOperand **KilledBase,
MachineOperand **KilledIndex) const {
void FixupLEAsImpl::checkRegUsage(MachineBasicBlock::iterator &LeaI,
MachineBasicBlock::iterator &AluI,
bool &BaseIndexDef, bool &AluDestRef,
MachineOperand **KilledBase,
MachineOperand **KilledIndex) const {
BaseIndexDef = AluDestRef = false;
*KilledBase = *KilledIndex = nullptr;
Register BaseReg = LeaI->getOperand(1 + X86::AddrBaseReg).getReg();
@ -481,8 +485,8 @@ void FixupLEAPass::checkRegUsage(MachineBasicBlock::iterator &LeaI,
}
}
bool FixupLEAPass::optLEAALU(MachineBasicBlock::iterator &I,
MachineBasicBlock &MBB) const {
bool FixupLEAsImpl::optLEAALU(MachineBasicBlock::iterator &I,
MachineBasicBlock &MBB) const {
// Look for an add/sub instruction which uses the result of lea.
MachineBasicBlock::iterator AluI = searchALUInst(I, MBB);
if (AluI == MachineBasicBlock::iterator())
@ -545,9 +549,9 @@ bool FixupLEAPass::optLEAALU(MachineBasicBlock::iterator &I,
return true;
}
bool FixupLEAPass::optTwoAddrLEA(MachineBasicBlock::iterator &I,
MachineBasicBlock &MBB, bool OptIncDec,
bool UseLEAForSP) const {
bool FixupLEAsImpl::optTwoAddrLEA(MachineBasicBlock::iterator &I,
MachineBasicBlock &MBB, bool OptIncDec,
bool UseLEAForSP) const {
MachineInstr &MI = *I;
const MachineOperand &Base = MI.getOperand(1 + X86::AddrBaseReg);
@ -645,8 +649,8 @@ bool FixupLEAPass::optTwoAddrLEA(MachineBasicBlock::iterator &I,
return true;
}
void FixupLEAPass::processInstruction(MachineBasicBlock::iterator &I,
MachineBasicBlock &MBB) {
void FixupLEAsImpl::processInstruction(MachineBasicBlock::iterator &I,
MachineBasicBlock &MBB) {
// Process a load, store, or LEA instruction.
MachineInstr &MI = *I;
const MCInstrDesc &Desc = MI.getDesc();
@ -664,9 +668,9 @@ void FixupLEAPass::processInstruction(MachineBasicBlock::iterator &I,
}
}
void FixupLEAPass::seekLEAFixup(MachineOperand &p,
MachineBasicBlock::iterator &I,
MachineBasicBlock &MBB) {
void FixupLEAsImpl::seekLEAFixup(MachineOperand &p,
MachineBasicBlock::iterator &I,
MachineBasicBlock &MBB) {
MachineBasicBlock::iterator MBI = searchBackwards(p, I, MBB);
if (MBI != MachineBasicBlock::iterator()) {
MachineInstr *NewMI = postRAConvertToLEA(MBB, MBI);
@ -684,8 +688,8 @@ void FixupLEAPass::seekLEAFixup(MachineOperand &p,
}
}
void FixupLEAPass::processInstructionForSlowLEA(MachineBasicBlock::iterator &I,
MachineBasicBlock &MBB) {
void FixupLEAsImpl::processInstructionForSlowLEA(MachineBasicBlock::iterator &I,
MachineBasicBlock &MBB) {
MachineInstr &MI = *I;
const unsigned Opcode = MI.getOpcode();
@ -735,9 +739,9 @@ void FixupLEAPass::processInstructionForSlowLEA(MachineBasicBlock::iterator &I,
}
}
void FixupLEAPass::processInstrForSlow3OpLEA(MachineBasicBlock::iterator &I,
MachineBasicBlock &MBB,
bool OptIncDec) {
void FixupLEAsImpl::processInstrForSlow3OpLEA(MachineBasicBlock::iterator &I,
MachineBasicBlock &MBB,
bool OptIncDec) {
MachineInstr &MI = *I;
const unsigned LEAOpcode = MI.getOpcode();
@ -911,3 +915,35 @@ void FixupLEAPass::processInstrForSlow3OpLEA(MachineBasicBlock::iterator &I,
MBB.erase(I);
I = NewMI;
}
bool FixupLEAsLegacy::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(MF.getFunction()))
return false;
auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
auto *MBFI = (PSI && PSI->hasProfileSummary())
? &getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI()
: nullptr;
FixupLEAsImpl PassImpl(PSI, MBFI);
return PassImpl.runOnMachineFunction(MF);
}
PreservedAnalyses X86FixupLEAsPass::run(MachineFunction &MF,
MachineFunctionAnalysisManager &MFAM) {
ProfileSummaryInfo *PSI =
MFAM.getResult<ModuleAnalysisManagerMachineFunctionProxy>(MF)
.getCachedResult<ProfileSummaryAnalysis>(
*MF.getFunction().getParent());
if (!PSI)
report_fatal_error("x86-fixup-leas requires ProfileSummaryAnalysis", false);
MachineBlockFrequencyInfo *MBFI =
&MFAM.getResult<MachineBlockFrequencyAnalysis>(MF);
FixupLEAsImpl PassImpl(PSI, MBFI);
bool Changed = PassImpl.runOnMachineFunction(MF);
if (!Changed)
return PreservedAnalyses::all();
PreservedAnalyses PA = getMachineFunctionPassPreservedAnalyses();
PA.preserveSet<CFGAnalyses>();
return PA;
}

View File

@ -32,6 +32,7 @@ DUMMY_FUNCTION_PASS("x86-winehstate", WinEHStatePass())
MACHINE_FUNCTION_PASS("x86-avoid-trailing-call", X86AvoidTrailingCallPass())
MACHINE_FUNCTION_PASS("x86-dyn-alloca-expander", X86DynAllocaExpanderPass())
MACHINE_FUNCTION_PASS("x86-expand-pseudo", X86ExpandPseudoPass())
MACHINE_FUNCTION_PASS("x86-fixup-leas", X86FixupLEAsPass())
MACHINE_FUNCTION_PASS("x86-flags-copy-lowering", X86FlagsCopyLoweringPass())
MACHINE_FUNCTION_PASS("x86-fp-stackifier", X86FPStackifierPass())
MACHINE_FUNCTION_PASS("x86-isel", X86ISelDAGToDAGPass(*this))
@ -49,7 +50,6 @@ DUMMY_MACHINE_FUNCTION_PASS("x86-domain-reassignment", X86DomainReassignment())
DUMMY_MACHINE_FUNCTION_PASS("x86-execution-domain-fix", X86ExecutionDomainFix())
DUMMY_MACHINE_FUNCTION_PASS("fastpretileconfig", X86FastPreTileConfig())
DUMMY_MACHINE_FUNCTION_PASS("fasttileconfig", X86FastTileConfig())
DUMMY_MACHINE_FUNCTION_PASS("x86-fixup-LEAs", FixupLEAPass())
DUMMY_MACHINE_FUNCTION_PASS("x86-fixup-bw-inst", FixupBWInstPass())
DUMMY_MACHINE_FUNCTION_PASS("x86-fixup-inst-tuning", X86FixupInstTuningPass())
DUMMY_MACHINE_FUNCTION_PASS("x86-fixup-setcc", X86FixupSetCCPass())

View File

@ -75,7 +75,7 @@ extern "C" LLVM_C_ABI void LLVMInitializeX86Target() {
initializeWinEHStatePassPass(PR);
initializeFixupBWInstPassPass(PR);
initializeCompressEVEXPassPass(PR);
initializeFixupLEAPassPass(PR);
initializeFixupLEAsLegacyPass(PR);
initializeX86FPStackifierLegacyPass(PR);
initializeX86FixupSetCCPassPass(PR);
initializeX86CallFrameOptimizationPass(PR);
@ -566,7 +566,7 @@ void X86PassConfig::addPreEmitPass() {
if (getOptLevel() != CodeGenOptLevel::None) {
addPass(createX86FixupBWInsts());
addPass(createX86PadShortFunctions());
addPass(createX86FixupLEAs());
addPass(createX86FixupLEAsLegacyPass());
addPass(createX86FixupInstTuning());
addPass(createX86FixupVectorConstants());
}

View File

@ -1,5 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3
# RUN: llc -mtriple=i386-unknown-linux-gnu -mattr=slow-3ops-lea -run-pass x86-fixup-LEAs -o - %s | FileCheck %s
# RUN: llc -mtriple=i386-unknown-linux-gnu -mattr=slow-3ops-lea -run-pass x86-fixup-leas -o - %s | FileCheck %s
# RUN: llc -mtriple=i386-unknown-linux-gnu -mattr=slow-3ops-lea -passes="require<profile-summary>,function(machine-function(x86-fixup-leas))" -o - %s | FileCheck %s
--- |
define i32 @square(i32 %0) local_unnamed_addr {

View File

@ -1,5 +1,7 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -run-pass x86-fixup-LEAs -mtriple=i386 -verify-machineinstrs -mcpu=corei7-avx -o - %s | FileCheck %s
# RUN: llc -run-pass x86-fixup-leas -mtriple=i386 -verify-machineinstrs -mcpu=corei7-avx -o - %s | FileCheck %s
# RUN: llc -passes="require<profile-summary>,function(machine-function(x86-fixup-leas))" -mtriple=i386 -mcpu=corei7-avx -o - %s | FileCheck %s
--- |
; ModuleID = 'test/CodeGen/X86/fixup-lea.ll'
source_filename = "test/CodeGen/X86/fixup-lea.ll"

View File

@ -1,5 +1,7 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -run-pass x86-fixup-LEAs -mtriple=x86_64-gnu-unknown -verify-machineinstrs -mcpu=corei7-avx -o - %s | FileCheck %s
# RUN: llc -run-pass x86-fixup-leas -mtriple=x86_64-gnu-unknown -verify-machineinstrs -mcpu=corei7-avx -o - %s | FileCheck %s
# RUN: llc -passes="require<profile-summary>,function(machine-function(x86-fixup-leas))" -mtriple=x86_64-gnu-unknown -mcpu=corei7-avx -o - %s | FileCheck %s
--- |
; ModuleID = 'lea-2.ll'
source_filename = "lea-2.ll"

View File

@ -1,4 +1,5 @@
# RUN: llc -run-pass x86-fixup-LEAs -mtriple=i386 -verify-machineinstrs -mcpu=corei7-avx -o - %s | FileCheck %s
# RUN: llc -run-pass x86-fixup-leas -mtriple=i386 -verify-machineinstrs -mcpu=corei7-avx -o - %s | FileCheck %s
# RUN: llc -passes="require<profile-summary>,function(machine-function(x86-fixup-leas))" -mtriple=i386 -mcpu=corei7-avx -o - %s | FileCheck %s
#
# Test that several LEA => ADD transforms get substitutions applied to them,
# for corner cases that we can only hit with -mtriple=i386.

View File

@ -1,6 +1,9 @@
# RUN: llc -run-pass x86-fixup-LEAs -mtriple=x86_64-gnu-unknown -verify-machineinstrs -mcpu=corei7-avx -o - %s | FileCheck %s --check-prefixes=COREI7,CHECK
# RUN: llc -run-pass x86-fixup-LEAs -mtriple=x86_64-gnu-unknown -verify-machineinstrs -mcpu=haswell -o - %s | FileCheck %s --check-prefixes=HASWELL,CHECK
# RUN: llc -run-pass x86-fixup-LEAs -mtriple=x86_64-unknown-unknown -verify-machineinstrs -mcpu=atom -o - %s | FileCheck %s --check-prefixes=ATOM,CHECK
# RUN: llc -run-pass x86-fixup-leas -mtriple=x86_64-gnu-unknown -verify-machineinstrs -mcpu=corei7-avx -o - %s | FileCheck %s --check-prefixes=COREI7,CHECK
# RUN: llc -passes="require<profile-summary>,function(machine-function(x86-fixup-leas))" -mtriple=x86_64-gnu-unknown -mcpu=corei7-avx -o - %s | FileCheck %s --check-prefixes=COREI7,CHECK
# RUN: llc -run-pass x86-fixup-leas -mtriple=x86_64-gnu-unknown -verify-machineinstrs -mcpu=haswell -o - %s | FileCheck %s --check-prefixes=HASWELL,CHECK
# RUN: llc -passes="require<profile-summary>,function(machine-function(x86-fixup-leas))" -mtriple=x86_64-gnu-unknown -mcpu=haswell -o - %s | FileCheck %s --check-prefixes=HASWELL,CHECK
# RUN: llc -run-pass x86-fixup-leas -mtriple=x86_64-unknown-unknown -verify-machineinstrs -mcpu=atom -o - %s | FileCheck %s --check-prefixes=ATOM,CHECK
# RUN: llc -passes="require<profile-summary>,function(machine-function(x86-fixup-leas))" -mtriple=x86_64-unknown-unknown -mcpu=atom -o - %s | FileCheck %s --check-prefixes=ATOM,CHECK
#
# Test several LEA <=> ADD transformations that the fixup-leas pass performs,
# and check that any debug-instr-number attached to the original instruction