292 lines
10 KiB
C++
292 lines
10 KiB
C++
//===-- AMDGPURegBankSelect.cpp -------------------------------------------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
/// Assign register banks to all register operands of G_ instructions using
|
|
/// machine uniformity analysis.
|
|
/// Sgpr - uniform values and some lane masks
|
|
/// Vgpr - divergent, non S1, values
|
|
/// Vcc - divergent S1 values(lane masks)
|
|
/// However in some cases G_ instructions with this register bank assignment
|
|
/// can't be inst-selected. This is solved in AMDGPURegBankLegalize.
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "AMDGPU.h"
|
|
#include "AMDGPUGlobalISelUtils.h"
|
|
#include "GCNSubtarget.h"
|
|
#include "llvm/CodeGen/GlobalISel/CSEInfo.h"
|
|
#include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h"
|
|
#include "llvm/CodeGen/MachineUniformityAnalysis.h"
|
|
#include "llvm/CodeGen/TargetPassConfig.h"
|
|
#include "llvm/InitializePasses.h"
|
|
|
|
#define DEBUG_TYPE "amdgpu-regbankselect"
|
|
|
|
using namespace llvm;
|
|
using namespace AMDGPU;
|
|
|
|
namespace {
|
|
|
|
class AMDGPURegBankSelect : public MachineFunctionPass {
|
|
public:
|
|
static char ID;
|
|
|
|
AMDGPURegBankSelect() : MachineFunctionPass(ID) {}
|
|
|
|
bool runOnMachineFunction(MachineFunction &MF) override;
|
|
|
|
StringRef getPassName() const override {
|
|
return "AMDGPU Register Bank Select";
|
|
}
|
|
|
|
void getAnalysisUsage(AnalysisUsage &AU) const override {
|
|
AU.addRequired<TargetPassConfig>();
|
|
AU.addRequired<GISelCSEAnalysisWrapperPass>();
|
|
AU.addRequired<MachineUniformityAnalysisPass>();
|
|
MachineFunctionPass::getAnalysisUsage(AU);
|
|
}
|
|
|
|
// This pass assigns register banks to all virtual registers, and we maintain
|
|
// this property in subsequent passes
|
|
MachineFunctionProperties getSetProperties() const override {
|
|
return MachineFunctionProperties().setRegBankSelected();
|
|
}
|
|
};
|
|
|
|
} // End anonymous namespace.
|
|
|
|
INITIALIZE_PASS_BEGIN(AMDGPURegBankSelect, DEBUG_TYPE,
|
|
"AMDGPU Register Bank Select", false, false)
|
|
INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
|
|
INITIALIZE_PASS_DEPENDENCY(GISelCSEAnalysisWrapperPass)
|
|
INITIALIZE_PASS_DEPENDENCY(MachineUniformityAnalysisPass)
|
|
INITIALIZE_PASS_END(AMDGPURegBankSelect, DEBUG_TYPE,
|
|
"AMDGPU Register Bank Select", false, false)
|
|
|
|
char AMDGPURegBankSelect::ID = 0;
|
|
|
|
char &llvm::AMDGPURegBankSelectID = AMDGPURegBankSelect::ID;
|
|
|
|
FunctionPass *llvm::createAMDGPURegBankSelectPass() {
|
|
return new AMDGPURegBankSelect();
|
|
}
|
|
|
|
class RegBankSelectHelper {
|
|
MachineIRBuilder &B;
|
|
MachineRegisterInfo &MRI;
|
|
AMDGPU::IntrinsicLaneMaskAnalyzer &ILMA;
|
|
const MachineUniformityInfo &MUI;
|
|
const SIRegisterInfo &TRI;
|
|
const RegisterBank *SgprRB;
|
|
const RegisterBank *VgprRB;
|
|
const RegisterBank *VccRB;
|
|
|
|
public:
|
|
RegBankSelectHelper(MachineIRBuilder &B,
|
|
AMDGPU::IntrinsicLaneMaskAnalyzer &ILMA,
|
|
const MachineUniformityInfo &MUI,
|
|
const SIRegisterInfo &TRI, const RegisterBankInfo &RBI)
|
|
: B(B), MRI(*B.getMRI()), ILMA(ILMA), MUI(MUI), TRI(TRI),
|
|
SgprRB(&RBI.getRegBank(AMDGPU::SGPRRegBankID)),
|
|
VgprRB(&RBI.getRegBank(AMDGPU::VGPRRegBankID)),
|
|
VccRB(&RBI.getRegBank(AMDGPU::VCCRegBankID)) {}
|
|
|
|
// Temporal divergence copy: COPY to vgpr with implicit use of $exec inside of
|
|
// the cycle
|
|
// Note: uniformity analysis does not consider that registers with vgpr def
|
|
// are divergent (you can have uniform value in vgpr).
|
|
// - TODO: implicit use of $exec could be implemented as indicator that
|
|
// instruction is divergent
|
|
bool isTemporalDivergenceCopy(Register Reg) {
|
|
MachineInstr *MI = MRI.getVRegDef(Reg);
|
|
if (!MI->isCopy() || MI->getNumImplicitOperands() != 1)
|
|
return false;
|
|
|
|
return MI->implicit_operands().begin()->getReg() == TRI.getExec();
|
|
}
|
|
|
|
const RegisterBank *getRegBankToAssign(Register Reg) {
|
|
if (!isTemporalDivergenceCopy(Reg) &&
|
|
(MUI.isUniform(Reg) || ILMA.isS32S64LaneMask(Reg)))
|
|
return SgprRB;
|
|
if (MRI.getType(Reg) == LLT::scalar(1))
|
|
return VccRB;
|
|
return VgprRB;
|
|
}
|
|
|
|
// %rc:RegClass(s32) = G_ ...
|
|
// ...
|
|
// %a = G_ ..., %rc
|
|
// ->
|
|
// %rb:RegBank(s32) = G_ ...
|
|
// %rc:RegClass(s32) = COPY %rb
|
|
// ...
|
|
// %a = G_ ..., %rb
|
|
void reAssignRegBankOnDef(MachineInstr &MI, MachineOperand &DefOP,
|
|
const RegisterBank *RB) {
|
|
// Register that already has Register class got it during pre-inst selection
|
|
// of another instruction. Maybe cross bank copy was required so we insert a
|
|
// copy that can be removed later. This simplifies post regbanklegalize
|
|
// combiner and avoids need to special case some patterns.
|
|
Register Reg = DefOP.getReg();
|
|
LLT Ty = MRI.getType(Reg);
|
|
Register NewReg = MRI.createVirtualRegister({RB, Ty});
|
|
DefOP.setReg(NewReg);
|
|
|
|
auto &MBB = *MI.getParent();
|
|
B.setInsertPt(MBB, MBB.SkipPHIsAndLabels(std::next(MI.getIterator())));
|
|
B.buildCopy(Reg, NewReg);
|
|
|
|
// The problem was discovered for uniform S1 that was used as both
|
|
// lane mask(vcc) and regular sgpr S1.
|
|
// - lane-mask(vcc) use was by si_if, this use is divergent and requires
|
|
// non-trivial sgpr-S1-to-vcc copy. But pre-inst-selection of si_if sets
|
|
// sreg_64_xexec(S1) on def of uniform S1 making it lane-mask.
|
|
// - the regular sgpr S1(uniform) instruction is now broken since
|
|
// it uses sreg_64_xexec(S1) which is divergent.
|
|
|
|
// Replace virtual registers with register class on generic instructions
|
|
// uses with virtual registers with register bank.
|
|
for (auto &UseMI : make_early_inc_range(MRI.use_instructions(Reg))) {
|
|
if (UseMI.isPreISelOpcode()) {
|
|
for (MachineOperand &Op : UseMI.operands()) {
|
|
if (Op.isReg() && Op.getReg() == Reg)
|
|
Op.setReg(NewReg);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// %a = G_ ..., %rc
|
|
// ->
|
|
// %rb:RegBank(s32) = COPY %rc
|
|
// %a = G_ ..., %rb
|
|
void constrainRegBankUse(MachineInstr &MI, MachineOperand &UseOP,
|
|
const RegisterBank *RB) {
|
|
Register Reg = UseOP.getReg();
|
|
|
|
LLT Ty = MRI.getType(Reg);
|
|
Register NewReg = MRI.createVirtualRegister({RB, Ty});
|
|
UseOP.setReg(NewReg);
|
|
|
|
if (MI.isPHI()) {
|
|
auto DefMI = MRI.getVRegDef(Reg)->getIterator();
|
|
MachineBasicBlock *DefMBB = DefMI->getParent();
|
|
B.setInsertPt(*DefMBB, DefMBB->SkipPHIsAndLabels(std::next(DefMI)));
|
|
} else {
|
|
B.setInstr(MI);
|
|
}
|
|
|
|
B.buildCopy(NewReg, Reg);
|
|
}
|
|
};
|
|
|
|
static Register getVReg(MachineOperand &Op) {
|
|
if (!Op.isReg())
|
|
return {};
|
|
|
|
// Operands of COPY and G_SI_CALL can be physical registers.
|
|
Register Reg = Op.getReg();
|
|
if (!Reg.isVirtual())
|
|
return {};
|
|
|
|
return Reg;
|
|
}
|
|
|
|
bool AMDGPURegBankSelect::runOnMachineFunction(MachineFunction &MF) {
|
|
if (MF.getProperties().hasFailedISel())
|
|
return false;
|
|
|
|
// Setup the instruction builder with CSE.
|
|
const TargetPassConfig &TPC = getAnalysis<TargetPassConfig>();
|
|
GISelCSEAnalysisWrapper &Wrapper =
|
|
getAnalysis<GISelCSEAnalysisWrapperPass>().getCSEWrapper();
|
|
GISelCSEInfo &CSEInfo = Wrapper.get(TPC.getCSEConfig());
|
|
GISelObserverWrapper Observer;
|
|
Observer.addObserver(&CSEInfo);
|
|
|
|
CSEMIRBuilder B(MF);
|
|
B.setCSEInfo(&CSEInfo);
|
|
B.setChangeObserver(Observer);
|
|
|
|
RAIIDelegateInstaller DelegateInstaller(MF, &Observer);
|
|
RAIIMFObserverInstaller MFObserverInstaller(MF, Observer);
|
|
|
|
IntrinsicLaneMaskAnalyzer ILMA(MF);
|
|
MachineUniformityInfo &MUI =
|
|
getAnalysis<MachineUniformityAnalysisPass>().getUniformityInfo();
|
|
MachineRegisterInfo &MRI = *B.getMRI();
|
|
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
|
|
RegBankSelectHelper RBSHelper(B, ILMA, MUI, *ST.getRegisterInfo(),
|
|
*ST.getRegBankInfo());
|
|
// Virtual registers at this point don't have register banks.
|
|
// Virtual registers in def and use operands of already inst-selected
|
|
// instruction have register class.
|
|
|
|
for (MachineBasicBlock &MBB : MF) {
|
|
for (MachineInstr &MI : MBB) {
|
|
// Vregs in def and use operands of COPY can have either register class
|
|
// or bank. If there is neither on vreg in def operand, assign bank.
|
|
if (MI.isCopy()) {
|
|
Register DefReg = getVReg(MI.getOperand(0));
|
|
if (!DefReg.isValid() || MRI.getRegClassOrNull(DefReg))
|
|
continue;
|
|
|
|
assert(!MRI.getRegBankOrNull(DefReg));
|
|
MRI.setRegBank(DefReg, *RBSHelper.getRegBankToAssign(DefReg));
|
|
continue;
|
|
}
|
|
|
|
if (!MI.isPreISelOpcode())
|
|
continue;
|
|
|
|
// Vregs in def and use operands of G_ instructions need to have register
|
|
// banks assigned. Before this loop possible case are
|
|
// - (1) vreg without register class or bank in def or use operand
|
|
// - (2) vreg with register class in def operand
|
|
// - (3) vreg, defined by G_ instruction, in use operand
|
|
// - (4) vreg, defined by pre-inst-selected instruction, in use operand
|
|
|
|
// First three cases are handled in loop through all def operands of G_
|
|
// instructions. For case (1) simply setRegBank. Cases (2) and (3) are
|
|
// handled by reAssignRegBankOnDef.
|
|
for (MachineOperand &DefOP : MI.defs()) {
|
|
Register DefReg = getVReg(DefOP);
|
|
if (!DefReg.isValid())
|
|
continue;
|
|
|
|
const RegisterBank *RB = RBSHelper.getRegBankToAssign(DefReg);
|
|
if (MRI.getRegClassOrNull(DefReg))
|
|
RBSHelper.reAssignRegBankOnDef(MI, DefOP, RB);
|
|
else {
|
|
assert(!MRI.getRegBankOrNull(DefReg));
|
|
MRI.setRegBank(DefReg, *RB);
|
|
}
|
|
}
|
|
|
|
// Register bank select doesn't modify pre-inst-selected instructions.
|
|
// For case (4) need to insert a copy, handled by constrainRegBankUse.
|
|
for (MachineOperand &UseOP : MI.uses()) {
|
|
Register UseReg = getVReg(UseOP);
|
|
if (!UseReg.isValid())
|
|
continue;
|
|
|
|
// Skip case (3).
|
|
if (!MRI.getRegClassOrNull(UseReg) ||
|
|
MRI.getVRegDef(UseReg)->isPreISelOpcode())
|
|
continue;
|
|
|
|
// Use with register class defined by pre-inst-selected instruction.
|
|
const RegisterBank *RB = RBSHelper.getRegBankToAssign(UseReg);
|
|
RBSHelper.constrainRegBankUse(MI, UseOP, RB);
|
|
}
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|