llvm-project/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
Scott Linder 60b1967c39 [AMDGPU] Add Scratch Wave Offset to Scratch Buffer Descriptor in entry functions
Add the scratch wave offset to the scratch buffer descriptor (SRSrc) in
the entry function prologue. This allows us to removes the scratch wave
offset register from the calling convention ABI.

As part of this change, allow the use of an inline constant zero for the
SOffset of MUBUF instructions accessing the stack in entry functions
when a frame pointer is not requested/required. Entry functions with
calls still need to set up the calling convention ABI stack pointer
register, and reference it in order to address arguments of called
functions. The ABI stack pointer register remains unswizzled, but is now
wave-relative instead of queue-relative.

Non-entry functions also use an inline constant zero SOffset for
wave-relative scratch access, but continue to use the stack and frame
pointers as before. When the stack or frame pointer is converted to a
swizzled offset it is now scaled directly, as the scratch wave offset no
longer needs to be subtracted first.

Update llvm/docs/AMDGPUUsage.rst to reflect these changes to the calling
convention.

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D75138
2020-03-19 15:35:16 -04:00

922 lines
26 KiB
C++

//==- SIMachineFunctionInfo.h - SIMachineFunctionInfo interface --*- C++ -*-==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
/// \file
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
#define LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
#include "AMDGPUArgumentUsageInfo.h"
#include "AMDGPUMachineFunction.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "SIInstrInfo.h"
#include "SIRegisterInfo.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/SparseBitVector.h"
#include "llvm/CodeGen/MIRYamlMapping.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Support/ErrorHandling.h"
#include <array>
#include <cassert>
#include <utility>
#include <vector>
namespace llvm {
class MachineFrameInfo;
class MachineFunction;
class TargetRegisterClass;
class AMDGPUPseudoSourceValue : public PseudoSourceValue {
public:
enum AMDGPUPSVKind : unsigned {
PSVBuffer = PseudoSourceValue::TargetCustom,
PSVImage,
GWSResource
};
protected:
AMDGPUPseudoSourceValue(unsigned Kind, const TargetInstrInfo &TII)
: PseudoSourceValue(Kind, TII) {}
public:
bool isConstant(const MachineFrameInfo *) const override {
// This should probably be true for most images, but we will start by being
// conservative.
return false;
}
bool isAliased(const MachineFrameInfo *) const override {
return true;
}
bool mayAlias(const MachineFrameInfo *) const override {
return true;
}
};
class AMDGPUBufferPseudoSourceValue final : public AMDGPUPseudoSourceValue {
public:
explicit AMDGPUBufferPseudoSourceValue(const TargetInstrInfo &TII)
: AMDGPUPseudoSourceValue(PSVBuffer, TII) {}
static bool classof(const PseudoSourceValue *V) {
return V->kind() == PSVBuffer;
}
};
class AMDGPUImagePseudoSourceValue final : public AMDGPUPseudoSourceValue {
public:
// TODO: Is the img rsrc useful?
explicit AMDGPUImagePseudoSourceValue(const TargetInstrInfo &TII)
: AMDGPUPseudoSourceValue(PSVImage, TII) {}
static bool classof(const PseudoSourceValue *V) {
return V->kind() == PSVImage;
}
};
class AMDGPUGWSResourcePseudoSourceValue final : public AMDGPUPseudoSourceValue {
public:
explicit AMDGPUGWSResourcePseudoSourceValue(const TargetInstrInfo &TII)
: AMDGPUPseudoSourceValue(GWSResource, TII) {}
static bool classof(const PseudoSourceValue *V) {
return V->kind() == GWSResource;
}
// These are inaccessible memory from IR.
bool isAliased(const MachineFrameInfo *) const override {
return false;
}
// These are inaccessible memory from IR.
bool mayAlias(const MachineFrameInfo *) const override {
return false;
}
void printCustom(raw_ostream &OS) const override {
OS << "GWSResource";
}
};
namespace yaml {
struct SIArgument {
bool IsRegister;
union {
StringValue RegisterName;
unsigned StackOffset;
};
Optional<unsigned> Mask;
// Default constructor, which creates a stack argument.
SIArgument() : IsRegister(false), StackOffset(0) {}
SIArgument(const SIArgument &Other) {
IsRegister = Other.IsRegister;
if (IsRegister) {
::new ((void *)std::addressof(RegisterName))
StringValue(Other.RegisterName);
} else
StackOffset = Other.StackOffset;
Mask = Other.Mask;
}
SIArgument &operator=(const SIArgument &Other) {
IsRegister = Other.IsRegister;
if (IsRegister) {
::new ((void *)std::addressof(RegisterName))
StringValue(Other.RegisterName);
} else
StackOffset = Other.StackOffset;
Mask = Other.Mask;
return *this;
}
~SIArgument() {
if (IsRegister)
RegisterName.~StringValue();
}
// Helper to create a register or stack argument.
static inline SIArgument createArgument(bool IsReg) {
if (IsReg)
return SIArgument(IsReg);
return SIArgument();
}
private:
// Construct a register argument.
SIArgument(bool) : IsRegister(true), RegisterName() {}
};
template <> struct MappingTraits<SIArgument> {
static void mapping(IO &YamlIO, SIArgument &A) {
if (YamlIO.outputting()) {
if (A.IsRegister)
YamlIO.mapRequired("reg", A.RegisterName);
else
YamlIO.mapRequired("offset", A.StackOffset);
} else {
auto Keys = YamlIO.keys();
if (is_contained(Keys, "reg")) {
A = SIArgument::createArgument(true);
YamlIO.mapRequired("reg", A.RegisterName);
} else if (is_contained(Keys, "offset"))
YamlIO.mapRequired("offset", A.StackOffset);
else
YamlIO.setError("missing required key 'reg' or 'offset'");
}
YamlIO.mapOptional("mask", A.Mask);
}
static const bool flow = true;
};
struct SIArgumentInfo {
Optional<SIArgument> PrivateSegmentBuffer;
Optional<SIArgument> DispatchPtr;
Optional<SIArgument> QueuePtr;
Optional<SIArgument> KernargSegmentPtr;
Optional<SIArgument> DispatchID;
Optional<SIArgument> FlatScratchInit;
Optional<SIArgument> PrivateSegmentSize;
Optional<SIArgument> WorkGroupIDX;
Optional<SIArgument> WorkGroupIDY;
Optional<SIArgument> WorkGroupIDZ;
Optional<SIArgument> WorkGroupInfo;
Optional<SIArgument> PrivateSegmentWaveByteOffset;
Optional<SIArgument> ImplicitArgPtr;
Optional<SIArgument> ImplicitBufferPtr;
Optional<SIArgument> WorkItemIDX;
Optional<SIArgument> WorkItemIDY;
Optional<SIArgument> WorkItemIDZ;
};
template <> struct MappingTraits<SIArgumentInfo> {
static void mapping(IO &YamlIO, SIArgumentInfo &AI) {
YamlIO.mapOptional("privateSegmentBuffer", AI.PrivateSegmentBuffer);
YamlIO.mapOptional("dispatchPtr", AI.DispatchPtr);
YamlIO.mapOptional("queuePtr", AI.QueuePtr);
YamlIO.mapOptional("kernargSegmentPtr", AI.KernargSegmentPtr);
YamlIO.mapOptional("dispatchID", AI.DispatchID);
YamlIO.mapOptional("flatScratchInit", AI.FlatScratchInit);
YamlIO.mapOptional("privateSegmentSize", AI.PrivateSegmentSize);
YamlIO.mapOptional("workGroupIDX", AI.WorkGroupIDX);
YamlIO.mapOptional("workGroupIDY", AI.WorkGroupIDY);
YamlIO.mapOptional("workGroupIDZ", AI.WorkGroupIDZ);
YamlIO.mapOptional("workGroupInfo", AI.WorkGroupInfo);
YamlIO.mapOptional("privateSegmentWaveByteOffset",
AI.PrivateSegmentWaveByteOffset);
YamlIO.mapOptional("implicitArgPtr", AI.ImplicitArgPtr);
YamlIO.mapOptional("implicitBufferPtr", AI.ImplicitBufferPtr);
YamlIO.mapOptional("workItemIDX", AI.WorkItemIDX);
YamlIO.mapOptional("workItemIDY", AI.WorkItemIDY);
YamlIO.mapOptional("workItemIDZ", AI.WorkItemIDZ);
}
};
// Default to default mode for default calling convention.
struct SIMode {
bool IEEE = true;
bool DX10Clamp = true;
bool FP32InputDenormals = true;
bool FP32OutputDenormals = true;
bool FP64FP16InputDenormals = true;
bool FP64FP16OutputDenormals = true;
SIMode() = default;
SIMode(const AMDGPU::SIModeRegisterDefaults &Mode) {
IEEE = Mode.IEEE;
DX10Clamp = Mode.DX10Clamp;
FP32InputDenormals = Mode.FP32InputDenormals;
FP32OutputDenormals = Mode.FP32OutputDenormals;
FP64FP16InputDenormals = Mode.FP64FP16InputDenormals;
FP64FP16OutputDenormals = Mode.FP64FP16OutputDenormals;
}
bool operator ==(const SIMode Other) const {
return IEEE == Other.IEEE &&
DX10Clamp == Other.DX10Clamp &&
FP32InputDenormals == Other.FP32InputDenormals &&
FP32OutputDenormals == Other.FP32OutputDenormals &&
FP64FP16InputDenormals == Other.FP64FP16InputDenormals &&
FP64FP16OutputDenormals == Other.FP64FP16OutputDenormals;
}
};
template <> struct MappingTraits<SIMode> {
static void mapping(IO &YamlIO, SIMode &Mode) {
YamlIO.mapOptional("ieee", Mode.IEEE, true);
YamlIO.mapOptional("dx10-clamp", Mode.DX10Clamp, true);
YamlIO.mapOptional("fp32-input-denormals", Mode.FP32InputDenormals, true);
YamlIO.mapOptional("fp32-output-denormals", Mode.FP32OutputDenormals, true);
YamlIO.mapOptional("fp64-fp16-input-denormals", Mode.FP64FP16InputDenormals, true);
YamlIO.mapOptional("fp64-fp16-output-denormals", Mode.FP64FP16OutputDenormals, true);
}
};
struct SIMachineFunctionInfo final : public yaml::MachineFunctionInfo {
uint64_t ExplicitKernArgSize = 0;
unsigned MaxKernArgAlign = 0;
unsigned LDSSize = 0;
bool IsEntryFunction = false;
bool NoSignedZerosFPMath = false;
bool MemoryBound = false;
bool WaveLimiter = false;
uint32_t HighBitsOf32BitAddress = 0;
StringValue ScratchRSrcReg = "$private_rsrc_reg";
StringValue FrameOffsetReg = "$fp_reg";
StringValue StackPtrOffsetReg = "$sp_reg";
Optional<SIArgumentInfo> ArgInfo;
SIMode Mode;
SIMachineFunctionInfo() = default;
SIMachineFunctionInfo(const llvm::SIMachineFunctionInfo &,
const TargetRegisterInfo &TRI);
void mappingImpl(yaml::IO &YamlIO) override;
~SIMachineFunctionInfo() = default;
};
template <> struct MappingTraits<SIMachineFunctionInfo> {
static void mapping(IO &YamlIO, SIMachineFunctionInfo &MFI) {
YamlIO.mapOptional("explicitKernArgSize", MFI.ExplicitKernArgSize,
UINT64_C(0));
YamlIO.mapOptional("maxKernArgAlign", MFI.MaxKernArgAlign, 0u);
YamlIO.mapOptional("ldsSize", MFI.LDSSize, 0u);
YamlIO.mapOptional("isEntryFunction", MFI.IsEntryFunction, false);
YamlIO.mapOptional("noSignedZerosFPMath", MFI.NoSignedZerosFPMath, false);
YamlIO.mapOptional("memoryBound", MFI.MemoryBound, false);
YamlIO.mapOptional("waveLimiter", MFI.WaveLimiter, false);
YamlIO.mapOptional("scratchRSrcReg", MFI.ScratchRSrcReg,
StringValue("$private_rsrc_reg"));
YamlIO.mapOptional("frameOffsetReg", MFI.FrameOffsetReg,
StringValue("$fp_reg"));
YamlIO.mapOptional("stackPtrOffsetReg", MFI.StackPtrOffsetReg,
StringValue("$sp_reg"));
YamlIO.mapOptional("argumentInfo", MFI.ArgInfo);
YamlIO.mapOptional("mode", MFI.Mode, SIMode());
YamlIO.mapOptional("highBitsOf32BitAddress",
MFI.HighBitsOf32BitAddress, 0u);
}
};
} // end namespace yaml
/// This class keeps track of the SPI_SP_INPUT_ADDR config register, which
/// tells the hardware which interpolation parameters to load.
class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
friend class GCNTargetMachine;
Register TIDReg = AMDGPU::NoRegister;
// Registers that may be reserved for spilling purposes. These may be the same
// as the input registers.
Register ScratchRSrcReg = AMDGPU::PRIVATE_RSRC_REG;
// This is the the unswizzled offset from the current dispatch's scratch wave
// base to the beginning of the current function's frame.
Register FrameOffsetReg = AMDGPU::FP_REG;
// This is an ABI register used in the non-entry calling convention to
// communicate the unswizzled offset from the current dispatch's scratch wave
// base to the beginning of the new function's frame.
Register StackPtrOffsetReg = AMDGPU::SP_REG;
AMDGPUFunctionArgInfo ArgInfo;
// Graphics info.
unsigned PSInputAddr = 0;
unsigned PSInputEnable = 0;
/// Number of bytes of arguments this function has on the stack. If the callee
/// is expected to restore the argument stack this should be a multiple of 16,
/// all usable during a tail call.
///
/// The alternative would forbid tail call optimisation in some cases: if we
/// want to transfer control from a function with 8-bytes of stack-argument
/// space to a function with 16-bytes then misalignment of this value would
/// make a stack adjustment necessary, which could not be undone by the
/// callee.
unsigned BytesInStackArgArea = 0;
bool ReturnsVoid = true;
// A pair of default/requested minimum/maximum flat work group sizes.
// Minimum - first, maximum - second.
std::pair<unsigned, unsigned> FlatWorkGroupSizes = {0, 0};
// A pair of default/requested minimum/maximum number of waves per execution
// unit. Minimum - first, maximum - second.
std::pair<unsigned, unsigned> WavesPerEU = {0, 0};
DenseMap<const Value *,
std::unique_ptr<const AMDGPUBufferPseudoSourceValue>> BufferPSVs;
DenseMap<const Value *,
std::unique_ptr<const AMDGPUImagePseudoSourceValue>> ImagePSVs;
std::unique_ptr<const AMDGPUGWSResourcePseudoSourceValue> GWSResourcePSV;
private:
unsigned LDSWaveSpillSize = 0;
unsigned NumUserSGPRs = 0;
unsigned NumSystemSGPRs = 0;
bool HasSpilledSGPRs = false;
bool HasSpilledVGPRs = false;
bool HasNonSpillStackObjects = false;
bool IsStackRealigned = false;
unsigned NumSpilledSGPRs = 0;
unsigned NumSpilledVGPRs = 0;
// Feature bits required for inputs passed in user SGPRs.
bool PrivateSegmentBuffer : 1;
bool DispatchPtr : 1;
bool QueuePtr : 1;
bool KernargSegmentPtr : 1;
bool DispatchID : 1;
bool FlatScratchInit : 1;
// Feature bits required for inputs passed in system SGPRs.
bool WorkGroupIDX : 1; // Always initialized.
bool WorkGroupIDY : 1;
bool WorkGroupIDZ : 1;
bool WorkGroupInfo : 1;
bool PrivateSegmentWaveByteOffset : 1;
bool WorkItemIDX : 1; // Always initialized.
bool WorkItemIDY : 1;
bool WorkItemIDZ : 1;
// Private memory buffer
// Compute directly in sgpr[0:1]
// Other shaders indirect 64-bits at sgpr[0:1]
bool ImplicitBufferPtr : 1;
// Pointer to where the ABI inserts special kernel arguments separate from the
// user arguments. This is an offset from the KernargSegmentPtr.
bool ImplicitArgPtr : 1;
// The hard-wired high half of the address of the global information table
// for AMDPAL OS type. 0xffffffff represents no hard-wired high half, since
// current hardware only allows a 16 bit value.
unsigned GITPtrHigh;
unsigned HighBitsOf32BitAddress;
unsigned GDSSize;
// Current recorded maximum possible occupancy.
unsigned Occupancy;
MCPhysReg getNextUserSGPR() const;
MCPhysReg getNextSystemSGPR() const;
public:
struct SpilledReg {
Register VGPR;
int Lane = -1;
SpilledReg() = default;
SpilledReg(Register R, int L) : VGPR (R), Lane (L) {}
bool hasLane() { return Lane != -1;}
bool hasReg() { return VGPR != 0;}
};
struct SGPRSpillVGPRCSR {
// VGPR used for SGPR spills
Register VGPR;
// If the VGPR is a CSR, the stack slot used to save/restore it in the
// prolog/epilog.
Optional<int> FI;
SGPRSpillVGPRCSR(Register V, Optional<int> F) : VGPR(V), FI(F) {}
};
struct VGPRSpillToAGPR {
SmallVector<MCPhysReg, 32> Lanes;
bool FullyAllocated = false;
};
SparseBitVector<> WWMReservedRegs;
void ReserveWWMRegister(Register Reg) { WWMReservedRegs.set(Reg); }
private:
// Track VGPR + wave index for each subregister of the SGPR spilled to
// frameindex key.
DenseMap<int, std::vector<SpilledReg>> SGPRToVGPRSpills;
unsigned NumVGPRSpillLanes = 0;
SmallVector<SGPRSpillVGPRCSR, 2> SpillVGPRs;
DenseMap<int, VGPRSpillToAGPR> VGPRToAGPRSpills;
// AGPRs used for VGPR spills.
SmallVector<MCPhysReg, 32> SpillAGPR;
// VGPRs used for AGPR spills.
SmallVector<MCPhysReg, 32> SpillVGPR;
public: // FIXME
/// If this is set, an SGPR used for save/restore of the register used for the
/// frame pointer.
Register SGPRForFPSaveRestoreCopy;
Optional<int> FramePointerSaveIndex;
public:
SIMachineFunctionInfo(const MachineFunction &MF);
bool initializeBaseYamlFields(const yaml::SIMachineFunctionInfo &YamlMFI);
ArrayRef<SpilledReg> getSGPRToVGPRSpills(int FrameIndex) const {
auto I = SGPRToVGPRSpills.find(FrameIndex);
return (I == SGPRToVGPRSpills.end()) ?
ArrayRef<SpilledReg>() : makeArrayRef(I->second);
}
ArrayRef<SGPRSpillVGPRCSR> getSGPRSpillVGPRs() const {
return SpillVGPRs;
}
ArrayRef<MCPhysReg> getAGPRSpillVGPRs() const {
return SpillAGPR;
}
ArrayRef<MCPhysReg> getVGPRSpillAGPRs() const {
return SpillVGPR;
}
MCPhysReg getVGPRToAGPRSpill(int FrameIndex, unsigned Lane) const {
auto I = VGPRToAGPRSpills.find(FrameIndex);
return (I == VGPRToAGPRSpills.end()) ? (MCPhysReg)AMDGPU::NoRegister
: I->second.Lanes[Lane];
}
bool haveFreeLanesForSGPRSpill(const MachineFunction &MF,
unsigned NumLane) const;
bool allocateSGPRSpillToVGPR(MachineFunction &MF, int FI);
bool allocateVGPRSpillToAGPR(MachineFunction &MF, int FI, bool isAGPRtoVGPR);
void removeDeadFrameIndices(MachineFrameInfo &MFI);
bool hasCalculatedTID() const { return TIDReg != 0; };
Register getTIDReg() const { return TIDReg; };
void setTIDReg(Register Reg) { TIDReg = Reg; }
unsigned getBytesInStackArgArea() const {
return BytesInStackArgArea;
}
void setBytesInStackArgArea(unsigned Bytes) {
BytesInStackArgArea = Bytes;
}
// Add user SGPRs.
Register addPrivateSegmentBuffer(const SIRegisterInfo &TRI);
Register addDispatchPtr(const SIRegisterInfo &TRI);
Register addQueuePtr(const SIRegisterInfo &TRI);
Register addKernargSegmentPtr(const SIRegisterInfo &TRI);
Register addDispatchID(const SIRegisterInfo &TRI);
Register addFlatScratchInit(const SIRegisterInfo &TRI);
Register addImplicitBufferPtr(const SIRegisterInfo &TRI);
// Add system SGPRs.
Register addWorkGroupIDX() {
ArgInfo.WorkGroupIDX = ArgDescriptor::createRegister(getNextSystemSGPR());
NumSystemSGPRs += 1;
return ArgInfo.WorkGroupIDX.getRegister();
}
Register addWorkGroupIDY() {
ArgInfo.WorkGroupIDY = ArgDescriptor::createRegister(getNextSystemSGPR());
NumSystemSGPRs += 1;
return ArgInfo.WorkGroupIDY.getRegister();
}
Register addWorkGroupIDZ() {
ArgInfo.WorkGroupIDZ = ArgDescriptor::createRegister(getNextSystemSGPR());
NumSystemSGPRs += 1;
return ArgInfo.WorkGroupIDZ.getRegister();
}
Register addWorkGroupInfo() {
ArgInfo.WorkGroupInfo = ArgDescriptor::createRegister(getNextSystemSGPR());
NumSystemSGPRs += 1;
return ArgInfo.WorkGroupInfo.getRegister();
}
// Add special VGPR inputs
void setWorkItemIDX(ArgDescriptor Arg) {
ArgInfo.WorkItemIDX = Arg;
}
void setWorkItemIDY(ArgDescriptor Arg) {
ArgInfo.WorkItemIDY = Arg;
}
void setWorkItemIDZ(ArgDescriptor Arg) {
ArgInfo.WorkItemIDZ = Arg;
}
Register addPrivateSegmentWaveByteOffset() {
ArgInfo.PrivateSegmentWaveByteOffset
= ArgDescriptor::createRegister(getNextSystemSGPR());
NumSystemSGPRs += 1;
return ArgInfo.PrivateSegmentWaveByteOffset.getRegister();
}
void setPrivateSegmentWaveByteOffset(Register Reg) {
ArgInfo.PrivateSegmentWaveByteOffset = ArgDescriptor::createRegister(Reg);
}
bool hasPrivateSegmentBuffer() const {
return PrivateSegmentBuffer;
}
bool hasDispatchPtr() const {
return DispatchPtr;
}
bool hasQueuePtr() const {
return QueuePtr;
}
bool hasKernargSegmentPtr() const {
return KernargSegmentPtr;
}
bool hasDispatchID() const {
return DispatchID;
}
bool hasFlatScratchInit() const {
return FlatScratchInit;
}
bool hasWorkGroupIDX() const {
return WorkGroupIDX;
}
bool hasWorkGroupIDY() const {
return WorkGroupIDY;
}
bool hasWorkGroupIDZ() const {
return WorkGroupIDZ;
}
bool hasWorkGroupInfo() const {
return WorkGroupInfo;
}
bool hasPrivateSegmentWaveByteOffset() const {
return PrivateSegmentWaveByteOffset;
}
bool hasWorkItemIDX() const {
return WorkItemIDX;
}
bool hasWorkItemIDY() const {
return WorkItemIDY;
}
bool hasWorkItemIDZ() const {
return WorkItemIDZ;
}
bool hasImplicitArgPtr() const {
return ImplicitArgPtr;
}
bool hasImplicitBufferPtr() const {
return ImplicitBufferPtr;
}
AMDGPUFunctionArgInfo &getArgInfo() {
return ArgInfo;
}
const AMDGPUFunctionArgInfo &getArgInfo() const {
return ArgInfo;
}
std::pair<const ArgDescriptor *, const TargetRegisterClass *>
getPreloadedValue(AMDGPUFunctionArgInfo::PreloadedValue Value) const {
return ArgInfo.getPreloadedValue(Value);
}
Register getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const {
auto Arg = ArgInfo.getPreloadedValue(Value).first;
return Arg ? Arg->getRegister() : Register();
}
unsigned getGITPtrHigh() const {
return GITPtrHigh;
}
uint32_t get32BitAddressHighBits() const {
return HighBitsOf32BitAddress;
}
unsigned getGDSSize() const {
return GDSSize;
}
unsigned getNumUserSGPRs() const {
return NumUserSGPRs;
}
unsigned getNumPreloadedSGPRs() const {
return NumUserSGPRs + NumSystemSGPRs;
}
Register getPrivateSegmentWaveByteOffsetSystemSGPR() const {
return ArgInfo.PrivateSegmentWaveByteOffset.getRegister();
}
/// Returns the physical register reserved for use as the resource
/// descriptor for scratch accesses.
Register getScratchRSrcReg() const {
return ScratchRSrcReg;
}
void setScratchRSrcReg(Register Reg) {
assert(Reg != 0 && "Should never be unset");
ScratchRSrcReg = Reg;
}
Register getFrameOffsetReg() const {
return FrameOffsetReg;
}
void setFrameOffsetReg(Register Reg) {
assert(Reg != 0 && "Should never be unset");
FrameOffsetReg = Reg;
}
void setStackPtrOffsetReg(Register Reg) {
assert(Reg != 0 && "Should never be unset");
StackPtrOffsetReg = Reg;
}
// Note the unset value for this is AMDGPU::SP_REG rather than
// NoRegister. This is mostly a workaround for MIR tests where state that
// can't be directly computed from the function is not preserved in serialized
// MIR.
Register getStackPtrOffsetReg() const {
return StackPtrOffsetReg;
}
Register getQueuePtrUserSGPR() const {
return ArgInfo.QueuePtr.getRegister();
}
Register getImplicitBufferPtrUserSGPR() const {
return ArgInfo.ImplicitBufferPtr.getRegister();
}
bool hasSpilledSGPRs() const {
return HasSpilledSGPRs;
}
void setHasSpilledSGPRs(bool Spill = true) {
HasSpilledSGPRs = Spill;
}
bool hasSpilledVGPRs() const {
return HasSpilledVGPRs;
}
void setHasSpilledVGPRs(bool Spill = true) {
HasSpilledVGPRs = Spill;
}
bool hasNonSpillStackObjects() const {
return HasNonSpillStackObjects;
}
void setHasNonSpillStackObjects(bool StackObject = true) {
HasNonSpillStackObjects = StackObject;
}
bool isStackRealigned() const {
return IsStackRealigned;
}
void setIsStackRealigned(bool Realigned = true) {
IsStackRealigned = Realigned;
}
unsigned getNumSpilledSGPRs() const {
return NumSpilledSGPRs;
}
unsigned getNumSpilledVGPRs() const {
return NumSpilledVGPRs;
}
void addToSpilledSGPRs(unsigned num) {
NumSpilledSGPRs += num;
}
void addToSpilledVGPRs(unsigned num) {
NumSpilledVGPRs += num;
}
unsigned getPSInputAddr() const {
return PSInputAddr;
}
unsigned getPSInputEnable() const {
return PSInputEnable;
}
bool isPSInputAllocated(unsigned Index) const {
return PSInputAddr & (1 << Index);
}
void markPSInputAllocated(unsigned Index) {
PSInputAddr |= 1 << Index;
}
void markPSInputEnabled(unsigned Index) {
PSInputEnable |= 1 << Index;
}
bool returnsVoid() const {
return ReturnsVoid;
}
void setIfReturnsVoid(bool Value) {
ReturnsVoid = Value;
}
/// \returns A pair of default/requested minimum/maximum flat work group sizes
/// for this function.
std::pair<unsigned, unsigned> getFlatWorkGroupSizes() const {
return FlatWorkGroupSizes;
}
/// \returns Default/requested minimum flat work group size for this function.
unsigned getMinFlatWorkGroupSize() const {
return FlatWorkGroupSizes.first;
}
/// \returns Default/requested maximum flat work group size for this function.
unsigned getMaxFlatWorkGroupSize() const {
return FlatWorkGroupSizes.second;
}
/// \returns A pair of default/requested minimum/maximum number of waves per
/// execution unit.
std::pair<unsigned, unsigned> getWavesPerEU() const {
return WavesPerEU;
}
/// \returns Default/requested minimum number of waves per execution unit.
unsigned getMinWavesPerEU() const {
return WavesPerEU.first;
}
/// \returns Default/requested maximum number of waves per execution unit.
unsigned getMaxWavesPerEU() const {
return WavesPerEU.second;
}
/// \returns SGPR used for \p Dim's work group ID.
Register getWorkGroupIDSGPR(unsigned Dim) const {
switch (Dim) {
case 0:
assert(hasWorkGroupIDX());
return ArgInfo.WorkGroupIDX.getRegister();
case 1:
assert(hasWorkGroupIDY());
return ArgInfo.WorkGroupIDY.getRegister();
case 2:
assert(hasWorkGroupIDZ());
return ArgInfo.WorkGroupIDZ.getRegister();
}
llvm_unreachable("unexpected dimension");
}
unsigned getLDSWaveSpillSize() const {
return LDSWaveSpillSize;
}
const AMDGPUBufferPseudoSourceValue *getBufferPSV(const SIInstrInfo &TII,
const Value *BufferRsrc) {
assert(BufferRsrc);
auto PSV = BufferPSVs.try_emplace(
BufferRsrc,
std::make_unique<AMDGPUBufferPseudoSourceValue>(TII));
return PSV.first->second.get();
}
const AMDGPUImagePseudoSourceValue *getImagePSV(const SIInstrInfo &TII,
const Value *ImgRsrc) {
assert(ImgRsrc);
auto PSV = ImagePSVs.try_emplace(
ImgRsrc,
std::make_unique<AMDGPUImagePseudoSourceValue>(TII));
return PSV.first->second.get();
}
const AMDGPUGWSResourcePseudoSourceValue *getGWSPSV(const SIInstrInfo &TII) {
if (!GWSResourcePSV) {
GWSResourcePSV =
std::make_unique<AMDGPUGWSResourcePseudoSourceValue>(TII);
}
return GWSResourcePSV.get();
}
unsigned getOccupancy() const {
return Occupancy;
}
unsigned getMinAllowedOccupancy() const {
if (!isMemoryBound() && !needsWaveLimiter())
return Occupancy;
return (Occupancy < 4) ? Occupancy : 4;
}
void limitOccupancy(const MachineFunction &MF);
void limitOccupancy(unsigned Limit) {
if (Occupancy > Limit)
Occupancy = Limit;
}
void increaseOccupancy(const MachineFunction &MF, unsigned Limit) {
if (Occupancy < Limit)
Occupancy = Limit;
limitOccupancy(MF);
}
};
} // end namespace llvm
#endif // LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H