[CodeGen][NewPM] Port SIWholeQuadMode to NPM. (#125833)

This commit is contained in:
Christudasan Devadasan 2025-02-05 18:44:57 +05:30 committed by GitHub
parent e78be31639
commit b83c960bad
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 94 additions and 30 deletions

View File

@ -41,7 +41,7 @@ FunctionPass *createSIPeepholeSDWALegacyPass();
FunctionPass *createSILowerI1CopiesLegacyPass();
FunctionPass *createSIShrinkInstructionsLegacyPass();
FunctionPass *createSILoadStoreOptimizerLegacyPass();
FunctionPass *createSIWholeQuadModePass();
FunctionPass *createSIWholeQuadModeLegacyPass();
FunctionPass *createSIFixControlFlowLiveIntervalsPass();
FunctionPass *createSIOptimizeExecMaskingPreRAPass();
FunctionPass *createSIOptimizeVGPRLiveRangeLegacyPass();
@ -204,7 +204,7 @@ extern char &SILowerSGPRSpillsLegacyID;
void initializeSILoadStoreOptimizerLegacyPass(PassRegistry &);
extern char &SILoadStoreOptimizerLegacyID;
void initializeSIWholeQuadModePass(PassRegistry &);
void initializeSIWholeQuadModeLegacyPass(PassRegistry &);
extern char &SIWholeQuadModeID;
void initializeSILowerControlFlowLegacyPass(PassRegistry &);

View File

@ -111,6 +111,7 @@ MACHINE_FUNCTION_PASS("si-optimize-exec-masking", SIOptimizeExecMaskingPass())
MACHINE_FUNCTION_PASS("si-peephole-sdwa", SIPeepholeSDWAPass())
MACHINE_FUNCTION_PASS("si-pre-allocate-wwm-regs", SIPreAllocateWWMRegsPass())
MACHINE_FUNCTION_PASS("si-shrink-instructions", SIShrinkInstructionsPass())
MACHINE_FUNCTION_PASS("si-wqm", SIWholeQuadModePass())
#undef MACHINE_FUNCTION_PASS
#define DUMMY_MACHINE_FUNCTION_PASS(NAME, CREATE_PASS)
@ -140,7 +141,6 @@ DUMMY_MACHINE_FUNCTION_PASS("amdgpu-global-isel-divergence-lowering", AMDGPUGlob
DUMMY_MACHINE_FUNCTION_PASS("amdgpu-regbankselect", AMDGPURegBankSelectPass())
DUMMY_MACHINE_FUNCTION_PASS("amdgpu-regbanklegalize", AMDGPURegBankLegalizePass())
DUMMY_MACHINE_FUNCTION_PASS("amdgpu-regbank-combiner", AMDGPURegBankCombinerPass())
DUMMY_MACHINE_FUNCTION_PASS("si-wqm", SIWholeQuadModePass())
#undef DUMMY_MACHINE_FUNCTION_PASS

View File

@ -50,6 +50,7 @@
#include "SIPeepholeSDWA.h"
#include "SIPreAllocateWWMRegs.h"
#include "SIShrinkInstructions.h"
#include "SIWholeQuadMode.h"
#include "TargetInfo/AMDGPUTargetInfo.h"
#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/Analysis/CGSCCPassManager.h"
@ -529,7 +530,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
initializeSIInsertHardClausesPass(*PR);
initializeSIInsertWaitcntsPass(*PR);
initializeSIModeRegisterPass(*PR);
initializeSIWholeQuadModePass(*PR);
initializeSIWholeQuadModeLegacyPass(*PR);
initializeSILowerControlFlowLegacyPass(*PR);
initializeSIPreEmitPeepholePass(*PR);
initializeSILateBranchLoweringPass(*PR);

View File

@ -67,6 +67,7 @@
///
//===----------------------------------------------------------------------===//
#include "SIWholeQuadMode.h"
#include "AMDGPU.h"
#include "GCNSubtarget.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
@ -148,11 +149,19 @@ struct WorkItem {
WorkItem(MachineInstr *MI) : MI(MI) {}
};
class SIWholeQuadMode : public MachineFunctionPass {
class SIWholeQuadMode {
public:
SIWholeQuadMode(MachineFunction &MF, LiveIntervals *LIS,
MachineDominatorTree *MDT, MachinePostDominatorTree *PDT)
: ST(&MF.getSubtarget<GCNSubtarget>()), TII(ST->getInstrInfo()),
TRI(&TII->getRegisterInfo()), MRI(&MF.getRegInfo()), LIS(LIS), MDT(MDT),
PDT(PDT) {}
bool run(MachineFunction &MF);
private:
const GCNSubtarget *ST;
const SIInstrInfo *TII;
const SIRegisterInfo *TRI;
const GCNSubtarget *ST;
MachineRegisterInfo *MRI;
LiveIntervals *LIS;
MachineDominatorTree *MDT;
@ -225,12 +234,13 @@ private:
void lowerInitExec(MachineInstr &MI);
MachineBasicBlock::iterator lowerInitExecInstrs(MachineBasicBlock &Entry,
bool &Changed);
};
class SIWholeQuadModeLegacy : public MachineFunctionPass {
public:
static char ID;
SIWholeQuadMode() :
MachineFunctionPass(ID) { }
SIWholeQuadModeLegacy() : MachineFunctionPass(ID) {}
bool runOnMachineFunction(MachineFunction &MF) override;
@ -250,23 +260,22 @@ public:
MachineFunctionProperties::Property::IsSSA);
}
};
} // end anonymous namespace
char SIWholeQuadMode::ID = 0;
char SIWholeQuadModeLegacy::ID = 0;
INITIALIZE_PASS_BEGIN(SIWholeQuadMode, DEBUG_TYPE, "SI Whole Quad Mode", false,
false)
INITIALIZE_PASS_BEGIN(SIWholeQuadModeLegacy, DEBUG_TYPE, "SI Whole Quad Mode",
false, false)
INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTreeWrapperPass)
INITIALIZE_PASS_END(SIWholeQuadMode, DEBUG_TYPE, "SI Whole Quad Mode", false,
false)
INITIALIZE_PASS_END(SIWholeQuadModeLegacy, DEBUG_TYPE, "SI Whole Quad Mode",
false, false)
char &llvm::SIWholeQuadModeID = SIWholeQuadMode::ID;
char &llvm::SIWholeQuadModeID = SIWholeQuadModeLegacy::ID;
FunctionPass *llvm::createSIWholeQuadModePass() {
return new SIWholeQuadMode;
FunctionPass *llvm::createSIWholeQuadModeLegacyPass() {
return new SIWholeQuadModeLegacy;
}
#ifndef NDEBUG
@ -1689,7 +1698,7 @@ SIWholeQuadMode::lowerInitExecInstrs(MachineBasicBlock &Entry, bool &Changed) {
return InsertPt;
}
bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) {
bool SIWholeQuadMode::run(MachineFunction &MF) {
LLVM_DEBUG(dbgs() << "SI Whole Quad Mode on " << MF.getName()
<< " ------------- \n");
LLVM_DEBUG(MF.dump(););
@ -1704,18 +1713,6 @@ bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) {
SetInactiveInstrs.clear();
StateTransition.clear();
ST = &MF.getSubtarget<GCNSubtarget>();
TII = ST->getInstrInfo();
TRI = &TII->getRegisterInfo();
MRI = &MF.getRegInfo();
LIS = &getAnalysis<LiveIntervalsWrapperPass>().getLIS();
auto *MDTWrapper = getAnalysisIfAvailable<MachineDominatorTreeWrapperPass>();
MDT = MDTWrapper ? &MDTWrapper->getDomTree() : nullptr;
auto *PDTWrapper =
getAnalysisIfAvailable<MachinePostDominatorTreeWrapperPass>();
PDT = PDTWrapper ? &PDTWrapper->getPostDomTree() : nullptr;
if (ST->isWave32()) {
AndOpc = AMDGPU::S_AND_B32;
AndTermOpc = AMDGPU::S_AND_B32_term;
@ -1816,3 +1813,38 @@ bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) {
return Changed;
}
bool SIWholeQuadModeLegacy::runOnMachineFunction(MachineFunction &MF) {
LiveIntervals *LIS = &getAnalysis<LiveIntervalsWrapperPass>().getLIS();
auto *MDTWrapper = getAnalysisIfAvailable<MachineDominatorTreeWrapperPass>();
MachineDominatorTree *MDT = MDTWrapper ? &MDTWrapper->getDomTree() : nullptr;
auto *PDTWrapper =
getAnalysisIfAvailable<MachinePostDominatorTreeWrapperPass>();
MachinePostDominatorTree *PDT =
PDTWrapper ? &PDTWrapper->getPostDomTree() : nullptr;
SIWholeQuadMode Impl(MF, LIS, MDT, PDT);
return Impl.run(MF);
}
PreservedAnalyses
SIWholeQuadModePass::run(MachineFunction &MF,
MachineFunctionAnalysisManager &MFAM) {
MFPropsModifier _(*this, MF);
LiveIntervals *LIS = &MFAM.getResult<LiveIntervalsAnalysis>(MF);
MachineDominatorTree *MDT =
MFAM.getCachedResult<MachineDominatorTreeAnalysis>(MF);
MachinePostDominatorTree *PDT =
MFAM.getCachedResult<MachinePostDominatorTreeAnalysis>(MF);
SIWholeQuadMode Impl(MF, LIS, MDT, PDT);
bool Changed = Impl.run(MF);
if (!Changed)
return PreservedAnalyses::all();
PreservedAnalyses PA = getMachineFunctionPassPreservedAnalyses();
PA.preserve<SlotIndexesAnalysis>();
PA.preserve<LiveIntervalsAnalysis>();
PA.preserve<MachineDominatorTreeAnalysis>();
PA.preserve<MachinePostDominatorTreeAnalysis>();
return PA;
}

View File

@ -0,0 +1,27 @@
//===- SIWholeQuadMode.h ----------------------------------------*- C++- *-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_LIB_TARGET_AMDGPU_SIWHOLEQUADMODE_H
#define LLVM_LIB_TARGET_AMDGPU_SIWHOLEQUADMODE_H
#include "llvm/CodeGen/MachinePassManager.h"
namespace llvm {
class SIWholeQuadModePass : public PassInfoMixin<SIWholeQuadModePass> {
public:
PreservedAnalyses run(MachineFunction &MF,
MachineFunctionAnalysisManager &MFAM);
MachineFunctionProperties getClearedProperties() const {
return MachineFunctionProperties().set(
MachineFunctionProperties::Property::IsSSA);
}
};
} // namespace llvm
#endif // LLVM_LIB_TARGET_AMDGPU_SIWHOLEQUADMODE_H

View File

@ -1,5 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -run-pass=early-machinelicm,si-wqm -o - %s | FileCheck -check-prefix=GCN %s
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -passes=early-machinelicm,si-wqm -o - %s | FileCheck -check-prefix=GCN %s
# Machine LICM may hoist an intruction from a WWM region, which will force SI-WQM pass
# to create a second WWM region. This is an unwanted hoisting.

View File

@ -1,5 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -run-pass si-wqm -o - %s | FileCheck %s
# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -passes=si-wqm -o - %s | FileCheck %s
---
# Test that we don't do silly things when there is no whole wave mode in the

View File

@ -1,5 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2
# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs -run-pass si-wqm -o - %s | FileCheck %s
# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -passes=si-wqm -o - %s | FileCheck %s
--- |
define amdgpu_ps void @exit_to_exact() {

View File

@ -1,4 +1,5 @@
# RUN: llc -mtriple=amdgcn -mcpu=fiji -verify-machineinstrs -run-pass si-wqm -o - %s | FileCheck %s
# RUN: llc -mtriple=amdgcn -mcpu=fiji -passes=si-wqm -o - %s | FileCheck %s
--- |
define amdgpu_ps void @test_strict_wwm_scc() {