[CodeGen][NewPM] Port SIWholeQuadMode to NPM. (#125833)
This commit is contained in:
parent
e78be31639
commit
b83c960bad
@ -41,7 +41,7 @@ FunctionPass *createSIPeepholeSDWALegacyPass();
|
||||
FunctionPass *createSILowerI1CopiesLegacyPass();
|
||||
FunctionPass *createSIShrinkInstructionsLegacyPass();
|
||||
FunctionPass *createSILoadStoreOptimizerLegacyPass();
|
||||
FunctionPass *createSIWholeQuadModePass();
|
||||
FunctionPass *createSIWholeQuadModeLegacyPass();
|
||||
FunctionPass *createSIFixControlFlowLiveIntervalsPass();
|
||||
FunctionPass *createSIOptimizeExecMaskingPreRAPass();
|
||||
FunctionPass *createSIOptimizeVGPRLiveRangeLegacyPass();
|
||||
@ -204,7 +204,7 @@ extern char &SILowerSGPRSpillsLegacyID;
|
||||
void initializeSILoadStoreOptimizerLegacyPass(PassRegistry &);
|
||||
extern char &SILoadStoreOptimizerLegacyID;
|
||||
|
||||
void initializeSIWholeQuadModePass(PassRegistry &);
|
||||
void initializeSIWholeQuadModeLegacyPass(PassRegistry &);
|
||||
extern char &SIWholeQuadModeID;
|
||||
|
||||
void initializeSILowerControlFlowLegacyPass(PassRegistry &);
|
||||
|
||||
@ -111,6 +111,7 @@ MACHINE_FUNCTION_PASS("si-optimize-exec-masking", SIOptimizeExecMaskingPass())
|
||||
MACHINE_FUNCTION_PASS("si-peephole-sdwa", SIPeepholeSDWAPass())
|
||||
MACHINE_FUNCTION_PASS("si-pre-allocate-wwm-regs", SIPreAllocateWWMRegsPass())
|
||||
MACHINE_FUNCTION_PASS("si-shrink-instructions", SIShrinkInstructionsPass())
|
||||
MACHINE_FUNCTION_PASS("si-wqm", SIWholeQuadModePass())
|
||||
#undef MACHINE_FUNCTION_PASS
|
||||
|
||||
#define DUMMY_MACHINE_FUNCTION_PASS(NAME, CREATE_PASS)
|
||||
@ -140,7 +141,6 @@ DUMMY_MACHINE_FUNCTION_PASS("amdgpu-global-isel-divergence-lowering", AMDGPUGlob
|
||||
DUMMY_MACHINE_FUNCTION_PASS("amdgpu-regbankselect", AMDGPURegBankSelectPass())
|
||||
DUMMY_MACHINE_FUNCTION_PASS("amdgpu-regbanklegalize", AMDGPURegBankLegalizePass())
|
||||
DUMMY_MACHINE_FUNCTION_PASS("amdgpu-regbank-combiner", AMDGPURegBankCombinerPass())
|
||||
DUMMY_MACHINE_FUNCTION_PASS("si-wqm", SIWholeQuadModePass())
|
||||
|
||||
#undef DUMMY_MACHINE_FUNCTION_PASS
|
||||
|
||||
|
||||
@ -50,6 +50,7 @@
|
||||
#include "SIPeepholeSDWA.h"
|
||||
#include "SIPreAllocateWWMRegs.h"
|
||||
#include "SIShrinkInstructions.h"
|
||||
#include "SIWholeQuadMode.h"
|
||||
#include "TargetInfo/AMDGPUTargetInfo.h"
|
||||
#include "Utils/AMDGPUBaseInfo.h"
|
||||
#include "llvm/Analysis/CGSCCPassManager.h"
|
||||
@ -529,7 +530,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
|
||||
initializeSIInsertHardClausesPass(*PR);
|
||||
initializeSIInsertWaitcntsPass(*PR);
|
||||
initializeSIModeRegisterPass(*PR);
|
||||
initializeSIWholeQuadModePass(*PR);
|
||||
initializeSIWholeQuadModeLegacyPass(*PR);
|
||||
initializeSILowerControlFlowLegacyPass(*PR);
|
||||
initializeSIPreEmitPeepholePass(*PR);
|
||||
initializeSILateBranchLoweringPass(*PR);
|
||||
|
||||
@ -67,6 +67,7 @@
|
||||
///
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "SIWholeQuadMode.h"
|
||||
#include "AMDGPU.h"
|
||||
#include "GCNSubtarget.h"
|
||||
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
|
||||
@ -148,11 +149,19 @@ struct WorkItem {
|
||||
WorkItem(MachineInstr *MI) : MI(MI) {}
|
||||
};
|
||||
|
||||
class SIWholeQuadMode : public MachineFunctionPass {
|
||||
class SIWholeQuadMode {
|
||||
public:
|
||||
SIWholeQuadMode(MachineFunction &MF, LiveIntervals *LIS,
|
||||
MachineDominatorTree *MDT, MachinePostDominatorTree *PDT)
|
||||
: ST(&MF.getSubtarget<GCNSubtarget>()), TII(ST->getInstrInfo()),
|
||||
TRI(&TII->getRegisterInfo()), MRI(&MF.getRegInfo()), LIS(LIS), MDT(MDT),
|
||||
PDT(PDT) {}
|
||||
bool run(MachineFunction &MF);
|
||||
|
||||
private:
|
||||
const GCNSubtarget *ST;
|
||||
const SIInstrInfo *TII;
|
||||
const SIRegisterInfo *TRI;
|
||||
const GCNSubtarget *ST;
|
||||
MachineRegisterInfo *MRI;
|
||||
LiveIntervals *LIS;
|
||||
MachineDominatorTree *MDT;
|
||||
@ -225,12 +234,13 @@ private:
|
||||
void lowerInitExec(MachineInstr &MI);
|
||||
MachineBasicBlock::iterator lowerInitExecInstrs(MachineBasicBlock &Entry,
|
||||
bool &Changed);
|
||||
};
|
||||
|
||||
class SIWholeQuadModeLegacy : public MachineFunctionPass {
|
||||
public:
|
||||
static char ID;
|
||||
|
||||
SIWholeQuadMode() :
|
||||
MachineFunctionPass(ID) { }
|
||||
SIWholeQuadModeLegacy() : MachineFunctionPass(ID) {}
|
||||
|
||||
bool runOnMachineFunction(MachineFunction &MF) override;
|
||||
|
||||
@ -250,23 +260,22 @@ public:
|
||||
MachineFunctionProperties::Property::IsSSA);
|
||||
}
|
||||
};
|
||||
|
||||
} // end anonymous namespace
|
||||
|
||||
char SIWholeQuadMode::ID = 0;
|
||||
char SIWholeQuadModeLegacy::ID = 0;
|
||||
|
||||
INITIALIZE_PASS_BEGIN(SIWholeQuadMode, DEBUG_TYPE, "SI Whole Quad Mode", false,
|
||||
false)
|
||||
INITIALIZE_PASS_BEGIN(SIWholeQuadModeLegacy, DEBUG_TYPE, "SI Whole Quad Mode",
|
||||
false, false)
|
||||
INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass)
|
||||
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)
|
||||
INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTreeWrapperPass)
|
||||
INITIALIZE_PASS_END(SIWholeQuadMode, DEBUG_TYPE, "SI Whole Quad Mode", false,
|
||||
false)
|
||||
INITIALIZE_PASS_END(SIWholeQuadModeLegacy, DEBUG_TYPE, "SI Whole Quad Mode",
|
||||
false, false)
|
||||
|
||||
char &llvm::SIWholeQuadModeID = SIWholeQuadMode::ID;
|
||||
char &llvm::SIWholeQuadModeID = SIWholeQuadModeLegacy::ID;
|
||||
|
||||
FunctionPass *llvm::createSIWholeQuadModePass() {
|
||||
return new SIWholeQuadMode;
|
||||
FunctionPass *llvm::createSIWholeQuadModeLegacyPass() {
|
||||
return new SIWholeQuadModeLegacy;
|
||||
}
|
||||
|
||||
#ifndef NDEBUG
|
||||
@ -1689,7 +1698,7 @@ SIWholeQuadMode::lowerInitExecInstrs(MachineBasicBlock &Entry, bool &Changed) {
|
||||
return InsertPt;
|
||||
}
|
||||
|
||||
bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) {
|
||||
bool SIWholeQuadMode::run(MachineFunction &MF) {
|
||||
LLVM_DEBUG(dbgs() << "SI Whole Quad Mode on " << MF.getName()
|
||||
<< " ------------- \n");
|
||||
LLVM_DEBUG(MF.dump(););
|
||||
@ -1704,18 +1713,6 @@ bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) {
|
||||
SetInactiveInstrs.clear();
|
||||
StateTransition.clear();
|
||||
|
||||
ST = &MF.getSubtarget<GCNSubtarget>();
|
||||
|
||||
TII = ST->getInstrInfo();
|
||||
TRI = &TII->getRegisterInfo();
|
||||
MRI = &MF.getRegInfo();
|
||||
LIS = &getAnalysis<LiveIntervalsWrapperPass>().getLIS();
|
||||
auto *MDTWrapper = getAnalysisIfAvailable<MachineDominatorTreeWrapperPass>();
|
||||
MDT = MDTWrapper ? &MDTWrapper->getDomTree() : nullptr;
|
||||
auto *PDTWrapper =
|
||||
getAnalysisIfAvailable<MachinePostDominatorTreeWrapperPass>();
|
||||
PDT = PDTWrapper ? &PDTWrapper->getPostDomTree() : nullptr;
|
||||
|
||||
if (ST->isWave32()) {
|
||||
AndOpc = AMDGPU::S_AND_B32;
|
||||
AndTermOpc = AMDGPU::S_AND_B32_term;
|
||||
@ -1816,3 +1813,38 @@ bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) {
|
||||
|
||||
return Changed;
|
||||
}
|
||||
|
||||
bool SIWholeQuadModeLegacy::runOnMachineFunction(MachineFunction &MF) {
|
||||
LiveIntervals *LIS = &getAnalysis<LiveIntervalsWrapperPass>().getLIS();
|
||||
auto *MDTWrapper = getAnalysisIfAvailable<MachineDominatorTreeWrapperPass>();
|
||||
MachineDominatorTree *MDT = MDTWrapper ? &MDTWrapper->getDomTree() : nullptr;
|
||||
auto *PDTWrapper =
|
||||
getAnalysisIfAvailable<MachinePostDominatorTreeWrapperPass>();
|
||||
MachinePostDominatorTree *PDT =
|
||||
PDTWrapper ? &PDTWrapper->getPostDomTree() : nullptr;
|
||||
SIWholeQuadMode Impl(MF, LIS, MDT, PDT);
|
||||
return Impl.run(MF);
|
||||
}
|
||||
|
||||
PreservedAnalyses
|
||||
SIWholeQuadModePass::run(MachineFunction &MF,
|
||||
MachineFunctionAnalysisManager &MFAM) {
|
||||
MFPropsModifier _(*this, MF);
|
||||
|
||||
LiveIntervals *LIS = &MFAM.getResult<LiveIntervalsAnalysis>(MF);
|
||||
MachineDominatorTree *MDT =
|
||||
MFAM.getCachedResult<MachineDominatorTreeAnalysis>(MF);
|
||||
MachinePostDominatorTree *PDT =
|
||||
MFAM.getCachedResult<MachinePostDominatorTreeAnalysis>(MF);
|
||||
SIWholeQuadMode Impl(MF, LIS, MDT, PDT);
|
||||
bool Changed = Impl.run(MF);
|
||||
if (!Changed)
|
||||
return PreservedAnalyses::all();
|
||||
|
||||
PreservedAnalyses PA = getMachineFunctionPassPreservedAnalyses();
|
||||
PA.preserve<SlotIndexesAnalysis>();
|
||||
PA.preserve<LiveIntervalsAnalysis>();
|
||||
PA.preserve<MachineDominatorTreeAnalysis>();
|
||||
PA.preserve<MachinePostDominatorTreeAnalysis>();
|
||||
return PA;
|
||||
}
|
||||
|
||||
27
llvm/lib/Target/AMDGPU/SIWholeQuadMode.h
Normal file
27
llvm/lib/Target/AMDGPU/SIWholeQuadMode.h
Normal file
@ -0,0 +1,27 @@
|
||||
//===- SIWholeQuadMode.h ----------------------------------------*- C++- *-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef LLVM_LIB_TARGET_AMDGPU_SIWHOLEQUADMODE_H
|
||||
#define LLVM_LIB_TARGET_AMDGPU_SIWHOLEQUADMODE_H
|
||||
|
||||
#include "llvm/CodeGen/MachinePassManager.h"
|
||||
|
||||
namespace llvm {
|
||||
class SIWholeQuadModePass : public PassInfoMixin<SIWholeQuadModePass> {
|
||||
public:
|
||||
PreservedAnalyses run(MachineFunction &MF,
|
||||
MachineFunctionAnalysisManager &MFAM);
|
||||
|
||||
MachineFunctionProperties getClearedProperties() const {
|
||||
return MachineFunctionProperties().set(
|
||||
MachineFunctionProperties::Property::IsSSA);
|
||||
}
|
||||
};
|
||||
} // namespace llvm
|
||||
|
||||
#endif // LLVM_LIB_TARGET_AMDGPU_SIWHOLEQUADMODE_H
|
||||
@ -1,5 +1,6 @@
|
||||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
|
||||
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -run-pass=early-machinelicm,si-wqm -o - %s | FileCheck -check-prefix=GCN %s
|
||||
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -passes=early-machinelicm,si-wqm -o - %s | FileCheck -check-prefix=GCN %s
|
||||
|
||||
# Machine LICM may hoist an intruction from a WWM region, which will force SI-WQM pass
|
||||
# to create a second WWM region. This is an unwanted hoisting.
|
||||
|
||||
@ -1,5 +1,6 @@
|
||||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
|
||||
# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -run-pass si-wqm -o - %s | FileCheck %s
|
||||
# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -passes=si-wqm -o - %s | FileCheck %s
|
||||
|
||||
---
|
||||
# Test that we don't do silly things when there is no whole wave mode in the
|
||||
|
||||
@ -1,5 +1,6 @@
|
||||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2
|
||||
# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs -run-pass si-wqm -o - %s | FileCheck %s
|
||||
# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -passes=si-wqm -o - %s | FileCheck %s
|
||||
|
||||
--- |
|
||||
define amdgpu_ps void @exit_to_exact() {
|
||||
|
||||
@ -1,4 +1,5 @@
|
||||
# RUN: llc -mtriple=amdgcn -mcpu=fiji -verify-machineinstrs -run-pass si-wqm -o - %s | FileCheck %s
|
||||
# RUN: llc -mtriple=amdgcn -mcpu=fiji -passes=si-wqm -o - %s | FileCheck %s
|
||||
|
||||
--- |
|
||||
define amdgpu_ps void @test_strict_wwm_scc() {
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user