//===- GCNRegPressure.h -----------------------------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// /// /// \file /// This file defines the GCNRegPressure class, which tracks registry pressure /// by bookkeeping number of SGPR/VGPRs used, weights for large SGPR/VGPRs. It /// also implements a compare function, which compares different register /// pressures, and declares one with max occupancy as winner. /// //===----------------------------------------------------------------------===// #ifndef LLVM_LIB_TARGET_AMDGPU_GCNREGPRESSURE_H #define LLVM_LIB_TARGET_AMDGPU_GCNREGPRESSURE_H #include "GCNSubtarget.h" #include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/RegisterPressure.h" #include namespace llvm { class MachineRegisterInfo; class raw_ostream; class SlotIndex; struct GCNRegPressure { enum RegKind { SGPR, VGPR, AGPR, AVGPR, TOTAL_KINDS }; GCNRegPressure() { clear(); } bool empty() const { return !Value[SGPR] && !Value[VGPR] && !Value[AGPR] && !Value[AVGPR]; } void clear() { std::fill(&Value[0], &Value[ValueArraySize], 0); } /// \returns the SGPR32 pressure unsigned getSGPRNum() const { return Value[SGPR]; } /// \returns the aggregated ArchVGPR32, AccVGPR32, and Pseudo AVGPR pressure /// dependent upon \p UnifiedVGPRFile unsigned getVGPRNum(bool UnifiedVGPRFile) const { if (UnifiedVGPRFile) { return Value[AGPR] ? getUnifiedVGPRNum(Value[VGPR], Value[AGPR], Value[AVGPR]) : Value[VGPR] + Value[AVGPR]; } // AVGPR assignment priority is based on the width of the register. Account // AVGPR pressure as VGPR. return std::max(Value[VGPR] + Value[AVGPR], Value[AGPR]); } /// Returns the aggregated VGPR pressure, assuming \p NumArchVGPRs ArchVGPRs /// \p NumAGPRs AGPRS, and \p NumAVGPRs AVGPRs for a target with a unified /// VGPR file. inline static unsigned getUnifiedVGPRNum(unsigned NumArchVGPRs, unsigned NumAGPRs, unsigned NumAVGPRs) { // Assume AVGPRs will be assigned as VGPRs. return alignTo(NumArchVGPRs + NumAVGPRs, AMDGPU::IsaInfo::getArchVGPRAllocGranule()) + NumAGPRs; } /// \returns the ArchVGPR32 pressure, plus the AVGPRS which we assume will be /// allocated as VGPR unsigned getArchVGPRNum() const { return Value[VGPR] + Value[AVGPR]; } /// \returns the AccVGPR32 pressure unsigned getAGPRNum() const { return Value[AGPR]; } /// \returns the AVGPR32 pressure unsigned getAVGPRNum() const { return Value[AVGPR]; } unsigned getVGPRTuplesWeight() const { return std::max(Value[TOTAL_KINDS + VGPR] + Value[TOTAL_KINDS + AVGPR], Value[TOTAL_KINDS + AGPR]); } unsigned getSGPRTuplesWeight() const { return Value[TOTAL_KINDS + SGPR]; } unsigned getOccupancy(const GCNSubtarget &ST, unsigned DynamicVGPRBlockSize) const { return std::min(ST.getOccupancyWithNumSGPRs(getSGPRNum()), ST.getOccupancyWithNumVGPRs(getVGPRNum(ST.hasGFX90AInsts()), DynamicVGPRBlockSize)); } void inc(unsigned Reg, LaneBitmask PrevMask, LaneBitmask NewMask, const MachineRegisterInfo &MRI); bool higherOccupancy(const GCNSubtarget &ST, const GCNRegPressure &O, unsigned DynamicVGPRBlockSize) const { return getOccupancy(ST, DynamicVGPRBlockSize) > O.getOccupancy(ST, DynamicVGPRBlockSize); } /// Compares \p this GCNRegpressure to \p O, returning true if \p this is /// less. Since GCNRegpressure contains different types of pressures, and due /// to target-specific pecularities (e.g. we care about occupancy rather than /// raw register usage), we determine if \p this GCNRegPressure is less than /// \p O based on the following tiered comparisons (in order order of /// precedence): /// 1. Better occupancy /// 2. Less spilling (first preference to VGPR spills, then to SGPR spills) /// 3. Less tuple register pressure (first preference to VGPR tuples if we /// determine that SGPR pressure is not important) /// 4. Less raw register pressure (first preference to VGPR tuples if we /// determine that SGPR pressure is not important) bool less(const MachineFunction &MF, const GCNRegPressure &O, unsigned MaxOccupancy = std::numeric_limits::max()) const; bool operator==(const GCNRegPressure &O) const { return std::equal(&Value[0], &Value[ValueArraySize], O.Value); } bool operator!=(const GCNRegPressure &O) const { return !(*this == O); } GCNRegPressure &operator+=(const GCNRegPressure &RHS) { for (unsigned I = 0; I < ValueArraySize; ++I) Value[I] += RHS.Value[I]; return *this; } GCNRegPressure &operator-=(const GCNRegPressure &RHS) { for (unsigned I = 0; I < ValueArraySize; ++I) Value[I] -= RHS.Value[I]; return *this; } void dump() const; private: static constexpr unsigned ValueArraySize = TOTAL_KINDS * 2; /// Pressure for all register kinds (first all regular registers kinds, then /// all tuple register kinds). unsigned Value[ValueArraySize]; static unsigned getRegKind(const TargetRegisterClass *RC, const SIRegisterInfo *STI); friend GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2); friend Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST, unsigned DynamicVGPRBlockSize); }; inline GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2) { GCNRegPressure Res; for (unsigned I = 0; I < GCNRegPressure::ValueArraySize; ++I) Res.Value[I] = std::max(P1.Value[I], P2.Value[I]); return Res; } inline GCNRegPressure operator+(const GCNRegPressure &P1, const GCNRegPressure &P2) { GCNRegPressure Sum = P1; Sum += P2; return Sum; } inline GCNRegPressure operator-(const GCNRegPressure &P1, const GCNRegPressure &P2) { GCNRegPressure Diff = P1; Diff -= P2; return Diff; } //////////////////////////////////////////////////////////////////////////////// // GCNRPTarget /// Models a register pressure target, allowing to evaluate and track register /// savings against that target from a starting \ref GCNRegPressure. class GCNRPTarget { public: /// Sets up the target such that the register pressure starting at \p RP does /// not show register spilling on function \p MF (w.r.t. the function's /// mininum target occupancy). GCNRPTarget(const MachineFunction &MF, const GCNRegPressure &RP); /// Sets up the target such that the register pressure starting at \p RP does /// not use more than \p NumSGPRs SGPRs and \p NumVGPRs VGPRs on function \p /// MF. GCNRPTarget(unsigned NumSGPRs, unsigned NumVGPRs, const MachineFunction &MF, const GCNRegPressure &RP); /// Sets up the target such that the register pressure starting at \p RP does /// not prevent achieving an occupancy of at least \p Occupancy on function /// \p MF. GCNRPTarget(unsigned Occupancy, const MachineFunction &MF, const GCNRegPressure &RP); /// Changes the target (same semantics as constructor). void setTarget(unsigned NumSGPRs, unsigned NumVGPRs); const GCNRegPressure &getCurrentRP() const { return RP; } void setRP(const GCNRegPressure &NewRP) { RP = NewRP; } /// Determines whether saving virtual register \p Reg will be beneficial /// towards achieving the RP target. bool isSaveBeneficial(Register Reg) const; /// Saves virtual register \p Reg with lanemask \p Mask. void saveReg(Register Reg, LaneBitmask Mask, const MachineRegisterInfo &MRI) { RP.inc(Reg, Mask, LaneBitmask::getNone(), MRI); } /// Whether the current RP is at or below the defined pressure target. bool satisfied() const; #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) friend raw_ostream &operator<<(raw_ostream &OS, const GCNRPTarget &Target) { OS << "Actual/Target: " << Target.RP.getSGPRNum() << '/' << Target.MaxSGPRs << " SGPRs, " << Target.RP.getArchVGPRNum() << '/' << Target.MaxVGPRs << " ArchVGPRs, " << Target.RP.getAGPRNum() << '/' << Target.MaxVGPRs << " AGPRs"; if (Target.MaxUnifiedVGPRs) { OS << ", " << Target.RP.getVGPRNum(true) << '/' << Target.MaxUnifiedVGPRs << " VGPRs (unified)"; } return OS; } #endif private: const MachineFunction &MF; const bool UnifiedRF; /// Current register pressure. GCNRegPressure RP; /// Target number of SGPRs. unsigned MaxSGPRs; /// Target number of ArchVGPRs and AGPRs. unsigned MaxVGPRs; /// Target number of overall VGPRs for subtargets with unified RFs. Always 0 /// for subtargets with non-unified RFs. unsigned MaxUnifiedVGPRs; GCNRPTarget(const GCNRegPressure &RP, const MachineFunction &MF) : MF(MF), UnifiedRF(MF.getSubtarget().hasGFX90AInsts()), RP(RP) {} }; /////////////////////////////////////////////////////////////////////////////// // GCNRPTracker class GCNRPTracker { public: using LiveRegSet = DenseMap; protected: const LiveIntervals &LIS; LiveRegSet LiveRegs; GCNRegPressure CurPressure, MaxPressure; const MachineInstr *LastTrackedMI = nullptr; mutable const MachineRegisterInfo *MRI = nullptr; GCNRPTracker(const LiveIntervals &LIS_) : LIS(LIS_) {} void reset(const MachineInstr &MI, const LiveRegSet *LiveRegsCopy, bool After); /// Mostly copy/paste from CodeGen/RegisterPressure.cpp void bumpDeadDefs(ArrayRef DeadDefs); LaneBitmask getLastUsedLanes(Register RegUnit, SlotIndex Pos) const; public: // reset tracker and set live register set to the specified value. void reset(const MachineRegisterInfo &MRI_, const LiveRegSet &LiveRegs_); // live regs for the current state const decltype(LiveRegs) &getLiveRegs() const { return LiveRegs; } const MachineInstr *getLastTrackedMI() const { return LastTrackedMI; } void clearMaxPressure() { MaxPressure.clear(); } GCNRegPressure getPressure() const { return CurPressure; } decltype(LiveRegs) moveLiveRegs() { return std::move(LiveRegs); } }; GCNRPTracker::LiveRegSet getLiveRegs(SlotIndex SI, const LiveIntervals &LIS, const MachineRegisterInfo &MRI); //////////////////////////////////////////////////////////////////////////////// // GCNUpwardRPTracker class GCNUpwardRPTracker : public GCNRPTracker { public: GCNUpwardRPTracker(const LiveIntervals &LIS_) : GCNRPTracker(LIS_) {} using GCNRPTracker::reset; /// reset tracker at the specified slot index \p SI. void reset(const MachineRegisterInfo &MRI, SlotIndex SI) { GCNRPTracker::reset(MRI, llvm::getLiveRegs(SI, LIS, MRI)); } /// reset tracker to the end of the \p MBB. void reset(const MachineBasicBlock &MBB) { reset(MBB.getParent()->getRegInfo(), LIS.getSlotIndexes()->getMBBEndIdx(&MBB)); } /// reset tracker to the point just after \p MI (in program order). void reset(const MachineInstr &MI) { reset(MI.getMF()->getRegInfo(), LIS.getInstructionIndex(MI).getDeadSlot()); } /// Move to the state of RP just before the \p MI . If \p UseInternalIterator /// is set, also update the internal iterators. Setting \p UseInternalIterator /// to false allows for an externally managed iterator / program order. void recede(const MachineInstr &MI); /// \p returns whether the tracker's state after receding MI corresponds /// to reported by LIS. bool isValid() const; const GCNRegPressure &getMaxPressure() const { return MaxPressure; } void resetMaxPressure() { MaxPressure = CurPressure; } GCNRegPressure getMaxPressureAndReset() { GCNRegPressure RP = MaxPressure; resetMaxPressure(); return RP; } }; //////////////////////////////////////////////////////////////////////////////// // GCNDownwardRPTracker class GCNDownwardRPTracker : public GCNRPTracker { // Last position of reset or advanceBeforeNext MachineBasicBlock::const_iterator NextMI; MachineBasicBlock::const_iterator MBBEnd; public: GCNDownwardRPTracker(const LiveIntervals &LIS_) : GCNRPTracker(LIS_) {} using GCNRPTracker::reset; MachineBasicBlock::const_iterator getNext() const { return NextMI; } /// \p return MaxPressure and clear it. GCNRegPressure moveMaxPressure() { auto Res = MaxPressure; MaxPressure.clear(); return Res; } /// Reset tracker to the point before the \p MI /// filling \p LiveRegs upon this point using LIS. /// \p returns false if block is empty except debug values. bool reset(const MachineInstr &MI, const LiveRegSet *LiveRegs = nullptr); /// Move to the state right before the next MI or after the end of MBB. /// \p returns false if reached end of the block. /// If \p UseInternalIterator is true, then internal iterators are used and /// set to process in program order. If \p UseInternalIterator is false, then /// it is assumed that the tracker is using an externally managed iterator, /// and advance* calls will not update the state of the iterator. In such /// cases, the tracker will move to the state right before the provided \p MI /// and use LIS for RP calculations. bool advanceBeforeNext(MachineInstr *MI = nullptr, bool UseInternalIterator = true); /// Move to the state at the MI, advanceBeforeNext has to be called first. /// If \p UseInternalIterator is true, then internal iterators are used and /// set to process in program order. If \p UseInternalIterator is false, then /// it is assumed that the tracker is using an externally managed iterator, /// and advance* calls will not update the state of the iterator. In such /// cases, the tracker will move to the state at the provided \p MI . void advanceToNext(MachineInstr *MI = nullptr, bool UseInternalIterator = true); /// Move to the state at the next MI. \p returns false if reached end of /// block. If \p UseInternalIterator is true, then internal iterators are used /// and set to process in program order. If \p UseInternalIterator is false, /// then it is assumed that the tracker is using an externally managed /// iterator, and advance* calls will not update the state of the iterator. In /// such cases, the tracker will move to the state right before the provided /// \p MI and use LIS for RP calculations. bool advance(MachineInstr *MI = nullptr, bool UseInternalIterator = true); /// Advance instructions until before \p End. bool advance(MachineBasicBlock::const_iterator End); /// Reset to \p Begin and advance to \p End. bool advance(MachineBasicBlock::const_iterator Begin, MachineBasicBlock::const_iterator End, const LiveRegSet *LiveRegsCopy = nullptr); /// Mostly copy/paste from CodeGen/RegisterPressure.cpp /// Calculate the impact \p MI will have on CurPressure and \return the /// speculated pressure. In order to support RP Speculation, this does not /// rely on the implicit program ordering in the LiveIntervals. GCNRegPressure bumpDownwardPressure(const MachineInstr *MI, const SIRegisterInfo *TRI) const; }; /// \returns the LaneMask of live lanes of \p Reg at position \p SI. Only the /// active lanes of \p LaneMaskFilter will be set in the return value. This is /// used, for example, to limit the live lanes to a specific subreg when /// calculating use masks. LaneBitmask getLiveLaneMask(unsigned Reg, SlotIndex SI, const LiveIntervals &LIS, const MachineRegisterInfo &MRI, LaneBitmask LaneMaskFilter = LaneBitmask::getAll()); LaneBitmask getLiveLaneMask(const LiveInterval &LI, SlotIndex SI, const MachineRegisterInfo &MRI, LaneBitmask LaneMaskFilter = LaneBitmask::getAll()); GCNRPTracker::LiveRegSet getLiveRegs(SlotIndex SI, const LiveIntervals &LIS, const MachineRegisterInfo &MRI); /// creates a map MachineInstr -> LiveRegSet /// R - range of iterators on instructions /// After - upon entry or exit of every instruction /// Note: there is no entry in the map for instructions with empty live reg set /// Complexity = O(NumVirtRegs * averageLiveRangeSegmentsPerReg * lg(R)) template DenseMap getLiveRegMap(Range &&R, bool After, LiveIntervals &LIS) { std::vector Indexes; Indexes.reserve(std::distance(R.begin(), R.end())); auto &SII = *LIS.getSlotIndexes(); for (MachineInstr *I : R) { auto SI = SII.getInstructionIndex(*I); Indexes.push_back(After ? SI.getDeadSlot() : SI.getBaseIndex()); } llvm::sort(Indexes); auto &MRI = (*R.begin())->getParent()->getParent()->getRegInfo(); DenseMap LiveRegMap; SmallVector LiveIdxs, SRLiveIdxs; for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) { auto Reg = Register::index2VirtReg(I); if (!LIS.hasInterval(Reg)) continue; auto &LI = LIS.getInterval(Reg); LiveIdxs.clear(); if (!LI.findIndexesLiveAt(Indexes, std::back_inserter(LiveIdxs))) continue; if (!LI.hasSubRanges()) { for (auto SI : LiveIdxs) LiveRegMap[SII.getInstructionFromIndex(SI)][Reg] = MRI.getMaxLaneMaskForVReg(Reg); } else for (const auto &S : LI.subranges()) { // constrain search for subranges by indexes live at main range SRLiveIdxs.clear(); S.findIndexesLiveAt(LiveIdxs, std::back_inserter(SRLiveIdxs)); for (auto SI : SRLiveIdxs) LiveRegMap[SII.getInstructionFromIndex(SI)][Reg] |= S.LaneMask; } } return LiveRegMap; } inline GCNRPTracker::LiveRegSet getLiveRegsAfter(const MachineInstr &MI, const LiveIntervals &LIS) { return getLiveRegs(LIS.getInstructionIndex(MI).getDeadSlot(), LIS, MI.getParent()->getParent()->getRegInfo()); } inline GCNRPTracker::LiveRegSet getLiveRegsBefore(const MachineInstr &MI, const LiveIntervals &LIS) { return getLiveRegs(LIS.getInstructionIndex(MI).getBaseIndex(), LIS, MI.getParent()->getParent()->getRegInfo()); } template GCNRegPressure getRegPressure(const MachineRegisterInfo &MRI, Range &&LiveRegs) { GCNRegPressure Res; for (const auto &RM : LiveRegs) Res.inc(RM.first, LaneBitmask::getNone(), RM.second, MRI); return Res; } bool isEqual(const GCNRPTracker::LiveRegSet &S1, const GCNRPTracker::LiveRegSet &S2); Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST = nullptr, unsigned DynamicVGPRBlockSize = 0); Printable print(const GCNRPTracker::LiveRegSet &LiveRegs, const MachineRegisterInfo &MRI); Printable reportMismatch(const GCNRPTracker::LiveRegSet &LISLR, const GCNRPTracker::LiveRegSet &TrackedL, const TargetRegisterInfo *TRI, StringRef Pfx = " "); struct GCNRegPressurePrinter : public MachineFunctionPass { static char ID; public: GCNRegPressurePrinter() : MachineFunctionPass(ID) {} bool runOnMachineFunction(MachineFunction &MF) override; void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); AU.setPreservesAll(); MachineFunctionPass::getAnalysisUsage(AU); } }; } // end namespace llvm #endif // LLVM_LIB_TARGET_AMDGPU_GCNREGPRESSURE_H