
Add support for using the existing SCRATCH_STORE_BLOCK and SCRATCH_LOAD_BLOCK instructions for saving and restoring callee-saved VGPRs. This is controlled by a new subtarget feature, block-vgpr-csr. It does not include WWM registers - those will be saved and restored individually, just like before. This patch does not change the ABI. Use of this feature may lead to slightly increased stack usage, because the memory is not compacted if certain registers don't have to be transferred (this will happen in practice for calling conventions where the callee and caller saved registers are interleaved in groups of 8). However, if the registers at the end of the block of 32 don't have to be transferred, we don't need to use a whole 128-byte stack slot - we can trim some space off the end of the range. In order to implement this feature, we need to rely less on the target-independent code in the PrologEpilogInserter, so we override several new methods in SIFrameLowering. We also add new pseudos, SI_BLOCK_SPILL_V1024_SAVE/RESTORE. One peculiarity is that both the SI_BLOCK_V1024_RESTORE pseudo and the SCRATCH_LOAD_BLOCK instructions will have all the registers that are not transferred added as implicit uses. This is done in order to inform LiveRegUnits that those registers are not available before the restore (since we're not really restoring them - so we can't afford to scavenge them). Unfortunately, this trick doesn't work with the save, so before the save all the registers in the block will be unavailable (see the unit test). This was reverted due to failures in the builds with expensive checks on, now fixed by always updating LiveIntervals and SlotIndexes in SILowerSGPRSpills.
115 lines
5.0 KiB
C++
115 lines
5.0 KiB
C++
//===--------------------- SIFrameLowering.h --------------------*- C++ -*-===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#ifndef LLVM_LIB_TARGET_AMDGPU_SIFRAMELOWERING_H
|
|
#define LLVM_LIB_TARGET_AMDGPU_SIFRAMELOWERING_H
|
|
|
|
#include "AMDGPUFrameLowering.h"
|
|
#include "SIRegisterInfo.h"
|
|
|
|
namespace llvm {
|
|
|
|
class SIFrameLowering final : public AMDGPUFrameLowering {
|
|
public:
|
|
SIFrameLowering(StackDirection D, Align StackAl, int LAO,
|
|
Align TransAl = Align(1))
|
|
: AMDGPUFrameLowering(D, StackAl, LAO, TransAl) {}
|
|
~SIFrameLowering() override = default;
|
|
|
|
void emitEntryFunctionPrologue(MachineFunction &MF,
|
|
MachineBasicBlock &MBB) const;
|
|
void emitPrologue(MachineFunction &MF,
|
|
MachineBasicBlock &MBB) const override;
|
|
void emitEpilogue(MachineFunction &MF,
|
|
MachineBasicBlock &MBB) const override;
|
|
StackOffset getFrameIndexReference(const MachineFunction &MF, int FI,
|
|
Register &FrameReg) const override;
|
|
|
|
void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs,
|
|
RegScavenger *RS = nullptr) const override;
|
|
void determineCalleeSavesSGPR(MachineFunction &MF, BitVector &SavedRegs,
|
|
RegScavenger *RS = nullptr) const;
|
|
void determinePrologEpilogSGPRSaves(MachineFunction &MF, BitVector &SavedRegs,
|
|
bool NeedExecCopyReservedReg) const;
|
|
void emitCSRSpillStores(MachineFunction &MF, MachineBasicBlock &MBB,
|
|
MachineBasicBlock::iterator MBBI, DebugLoc &DL,
|
|
LiveRegUnits &LiveUnits, Register FrameReg,
|
|
Register FramePtrRegScratchCopy) const;
|
|
void emitCSRSpillRestores(MachineFunction &MF, MachineBasicBlock &MBB,
|
|
MachineBasicBlock::iterator MBBI, DebugLoc &DL,
|
|
LiveRegUnits &LiveUnits, Register FrameReg,
|
|
Register FramePtrRegScratchCopy) const;
|
|
bool
|
|
assignCalleeSavedSpillSlots(MachineFunction &MF,
|
|
const TargetRegisterInfo *TRI,
|
|
std::vector<CalleeSavedInfo> &CSI) const override;
|
|
|
|
bool assignCalleeSavedSpillSlots(MachineFunction &MF,
|
|
const TargetRegisterInfo *TRI,
|
|
std::vector<CalleeSavedInfo> &CSI,
|
|
unsigned &MinCSFrameIndex,
|
|
unsigned &MaxCSFrameIndex) const override;
|
|
|
|
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
|
|
MachineBasicBlock::iterator MI,
|
|
ArrayRef<CalleeSavedInfo> CSI,
|
|
const TargetRegisterInfo *TRI) const override;
|
|
|
|
bool
|
|
restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
|
|
MachineBasicBlock::iterator MI,
|
|
MutableArrayRef<CalleeSavedInfo> CSI,
|
|
const TargetRegisterInfo *TRI) const override;
|
|
|
|
bool allocateScavengingFrameIndexesNearIncomingSP(
|
|
const MachineFunction &MF) const override;
|
|
|
|
bool isSupportedStackID(TargetStackID::Value ID) const override;
|
|
|
|
void processFunctionBeforeFrameFinalized(
|
|
MachineFunction &MF,
|
|
RegScavenger *RS = nullptr) const override;
|
|
|
|
void processFunctionBeforeFrameIndicesReplaced(
|
|
MachineFunction &MF, RegScavenger *RS = nullptr) const override;
|
|
|
|
MachineBasicBlock::iterator
|
|
eliminateCallFramePseudoInstr(MachineFunction &MF,
|
|
MachineBasicBlock &MBB,
|
|
MachineBasicBlock::iterator MI) const override;
|
|
|
|
protected:
|
|
bool hasFPImpl(const MachineFunction &MF) const override;
|
|
|
|
private:
|
|
void emitEntryFunctionFlatScratchInit(MachineFunction &MF,
|
|
MachineBasicBlock &MBB,
|
|
MachineBasicBlock::iterator I,
|
|
const DebugLoc &DL,
|
|
Register ScratchWaveOffsetReg) const;
|
|
|
|
Register getEntryFunctionReservedScratchRsrcReg(MachineFunction &MF) const;
|
|
|
|
void emitEntryFunctionScratchRsrcRegSetup(
|
|
MachineFunction &MF, MachineBasicBlock &MBB,
|
|
MachineBasicBlock::iterator I, const DebugLoc &DL,
|
|
Register PreloadedPrivateBufferReg, Register ScratchRsrcReg,
|
|
Register ScratchWaveOffsetReg) const;
|
|
|
|
public:
|
|
bool requiresStackPointerReference(const MachineFunction &MF) const;
|
|
|
|
// Returns true if the function may need to reserve space on the stack for the
|
|
// CWSR trap handler.
|
|
bool mayReserveScratchForCWSR(const MachineFunction &MF) const;
|
|
};
|
|
|
|
} // end namespace llvm
|
|
|
|
#endif // LLVM_LIB_TARGET_AMDGPU_SIFRAMELOWERING_H
|