
If we have a v_mov_b32 or v_accvgpr_write_b32 with an inline immediate, replace it with a pseudo which writes to the combined AV_* class. This relaxes the operand constraints, which will allow the allocator to inflate the register class to AV_* to potentially avoid spilling. The allocator does not know how to replace an instruction to enable the change of register class. I originally tried to do this by changing all of the places we introduce v_mov_b32 with immediate, but it's along tail of niche cases that require manual updating. Plus we can restrict this to only run on functions where we know we will be allocating AGPRs.
109 lines
3.5 KiB
C++
109 lines
3.5 KiB
C++
//===-- AMDGPUPrepareAGPRAlloc.cpp ----------------------------------------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// Make simple transformations to relax register constraints for cases which can
|
|
// allocate to AGPRs or VGPRs. Replace materialize of inline immediates into
|
|
// AGPR or VGPR with a pseudo with an AV_* class register constraint. This
|
|
// allows later passes to inflate the register class if necessary. The register
|
|
// allocator does not know to replace instructions to relax constraints.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "AMDGPUPrepareAGPRAlloc.h"
|
|
#include "AMDGPU.h"
|
|
#include "GCNSubtarget.h"
|
|
#include "SIMachineFunctionInfo.h"
|
|
#include "SIRegisterInfo.h"
|
|
#include "llvm/CodeGen/LiveIntervals.h"
|
|
#include "llvm/CodeGen/MachineFunctionPass.h"
|
|
#include "llvm/InitializePasses.h"
|
|
|
|
using namespace llvm;
|
|
|
|
#define DEBUG_TYPE "amdgpu-prepare-agpr-alloc"
|
|
|
|
namespace {
|
|
|
|
class AMDGPUPrepareAGPRAllocImpl {
|
|
private:
|
|
const SIInstrInfo &TII;
|
|
MachineRegisterInfo &MRI;
|
|
|
|
public:
|
|
AMDGPUPrepareAGPRAllocImpl(const GCNSubtarget &ST, MachineRegisterInfo &MRI)
|
|
: TII(*ST.getInstrInfo()), MRI(MRI) {}
|
|
bool run(MachineFunction &MF);
|
|
};
|
|
|
|
class AMDGPUPrepareAGPRAllocLegacy : public MachineFunctionPass {
|
|
public:
|
|
static char ID;
|
|
|
|
AMDGPUPrepareAGPRAllocLegacy() : MachineFunctionPass(ID) {
|
|
initializeAMDGPUPrepareAGPRAllocLegacyPass(
|
|
*PassRegistry::getPassRegistry());
|
|
}
|
|
|
|
bool runOnMachineFunction(MachineFunction &MF) override;
|
|
|
|
StringRef getPassName() const override { return "AMDGPU Prepare AGPR Alloc"; }
|
|
|
|
void getAnalysisUsage(AnalysisUsage &AU) const override {
|
|
AU.setPreservesAll();
|
|
MachineFunctionPass::getAnalysisUsage(AU);
|
|
}
|
|
};
|
|
} // End anonymous namespace.
|
|
|
|
INITIALIZE_PASS_BEGIN(AMDGPUPrepareAGPRAllocLegacy, DEBUG_TYPE,
|
|
"AMDGPU Prepare AGPR Alloc", false, false)
|
|
INITIALIZE_PASS_END(AMDGPUPrepareAGPRAllocLegacy, DEBUG_TYPE,
|
|
"AMDGPU Prepare AGPR Alloc", false, false)
|
|
|
|
char AMDGPUPrepareAGPRAllocLegacy::ID = 0;
|
|
|
|
char &llvm::AMDGPUPrepareAGPRAllocLegacyID = AMDGPUPrepareAGPRAllocLegacy::ID;
|
|
|
|
bool AMDGPUPrepareAGPRAllocLegacy::runOnMachineFunction(MachineFunction &MF) {
|
|
if (skipFunction(MF.getFunction()))
|
|
return false;
|
|
|
|
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
|
|
return AMDGPUPrepareAGPRAllocImpl(ST, MF.getRegInfo()).run(MF);
|
|
}
|
|
|
|
PreservedAnalyses
|
|
AMDGPUPrepareAGPRAllocPass::run(MachineFunction &MF,
|
|
MachineFunctionAnalysisManager &MFAM) {
|
|
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
|
|
AMDGPUPrepareAGPRAllocImpl(ST, MF.getRegInfo()).run(MF);
|
|
return PreservedAnalyses::all();
|
|
}
|
|
|
|
bool AMDGPUPrepareAGPRAllocImpl::run(MachineFunction &MF) {
|
|
if (MRI.isReserved(AMDGPU::AGPR0))
|
|
return false;
|
|
|
|
const MCInstrDesc &AVImmPseudo = TII.get(AMDGPU::AV_MOV_B32_IMM_PSEUDO);
|
|
|
|
bool Changed = false;
|
|
for (MachineBasicBlock &MBB : MF) {
|
|
for (MachineInstr &MI : MBB) {
|
|
if ((MI.getOpcode() == AMDGPU::V_MOV_B32_e32 &&
|
|
TII.isInlineConstant(MI, 1)) ||
|
|
(MI.getOpcode() == AMDGPU::V_ACCVGPR_WRITE_B32_e64 &&
|
|
MI.getOperand(1).isImm())) {
|
|
MI.setDesc(AVImmPseudo);
|
|
Changed = true;
|
|
}
|
|
}
|
|
}
|
|
|
|
return Changed;
|
|
}
|