From 1843a7fe9f2d89cac9b4994adacf4edef5c36a4d Mon Sep 17 00:00:00 2001 From: Shilei Tian Date: Wed, 21 Jan 2026 15:29:09 -0500 Subject: [PATCH] [NFCI][AMDGPU] Use X-macro to reduce boilerplate in `GCNSubtarget.h` (#176844) `GCNSubtarget.h` contained a large amount of repetitive code following the pattern `bool HasXXX = false;` for member declarations and `bool hasXXX() const { return HasXXX; }` for getters. This boilerplate made the file unnecessarily long and harder to maintain. This patch introduces an X-macro pattern `GCN_SUBTARGET_HAS_FEATURE` that consolidates 135 simple subtarget features into a single list. The macro is expanded twice: once in the protected section to generate member variable declarations, and once in the public section to generate the corresponding getter methods. This reduces the file by approximately 600 lines while preserving the exact same API and functionality. Features with complex getter logic or inconsistent naming conventions are left as manual implementations for future improvement. Ideally, these could be generated by TableGen using `GET_SUBTARGETINFO_MACRO`, similar to the X86 backend. However, `AMDGPU.td` has several issues that prevent direct adoption: duplicate field names (e.g., `DumpCode` is set by both `FeatureDumpCode` and `FeatureDumpCodeLower`), and inconsistent naming conventions where many features don't have the `Has` prefix (e.g., `FlatAddressSpace`, `GFX10Insts`, `FP64`). Fixing these issues would require renaming fields in `AMDGPU.td` and updating all references, which is left for future work. --- llvm/lib/Target/AMDGPU/GCNSubtarget.h | 1072 +++++-------------- llvm/lib/Target/AMDGPU/SIFrameLowering.cpp | 2 +- llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 6 +- llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp | 4 +- 4 files changed, 262 insertions(+), 822 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h index 4f2036d34242..a2bf057c5e11 100644 --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -26,6 +26,176 @@ #define GET_SUBTARGETINFO_HEADER #include "AMDGPUGenSubtargetInfo.inc" +//===----------------------------------------------------------------------===// +// X-Macro for simple subtarget features. +// +// This macro defines features that follow the simple pattern: +// bool HasXXX = false; // member declaration +// bool hasXXX() const { return HasXXX; } // getter +// +// To add a new simple feature: +// 1. Add X(FeatureName) to this list +// 2. Remove the manual bool HasFeatureName declaration from protected section +// 3. Remove the manual hasFeatureName() getter from public section +// +// The macro will generate both automatically. +// +// Note: The features are ordered alphabetically for convenience. Preferably +// this would be generated automatically by TableGen, but there are some cases +// where the features were not defined in a way that was compatible with the +// auto-generation. +//===----------------------------------------------------------------------===// +#define GCN_SUBTARGET_HAS_FEATURE(X) \ + X(1_5xVGPRs) \ + X(1024AddressableVGPRs) \ + X(45BitNumRecordsBufferResource) \ + X(64BitLiterals) \ + X(A16) \ + X(AddMinMaxInsts) \ + X(AddSubU64Insts) \ + X(AgentScopeFineGrainedRemoteMemoryAtomics) \ + X(ApertureRegs) \ + X(ArchitectedFlatScratch) \ + X(ArchitectedSGPRs) \ + X(AshrPkInsts) \ + X(AtomicBufferGlobalPkAddF16Insts) \ + X(AtomicBufferGlobalPkAddF16NoRtnInsts) \ + X(AtomicBufferPkAddBF16Inst) \ + X(AtomicCSubNoRtnInsts) \ + X(AtomicDsPkAdd16Insts) \ + X(AtomicFaddNoRtnInsts) \ + X(AtomicFaddRtnInsts) \ + X(AtomicFlatPkAdd16Insts) \ + X(AtomicFMinFMaxF32FlatInsts) \ + X(AtomicFMinFMaxF32GlobalInsts) \ + X(AtomicFMinFMaxF64FlatInsts) \ + X(AtomicFMinFMaxF64GlobalInsts) \ + X(AtomicGlobalPkAddBF16Inst) \ + X(BitOp3Insts) \ + X(BVHDualAndBVH8Insts) \ + X(Clusters) \ + X(CubeInsts) \ + X(CvtFP8Vop1Bug) \ + X(CvtNormInsts) \ + X(CvtPkNormVOP2Insts) \ + X(CvtPkNormVOP3Insts) \ + X(DefaultComponentBroadcast) \ + X(DefaultComponentZero) \ + X(DLInsts) \ + X(Dot10Insts) \ + X(Dot11Insts) \ + X(Dot12Insts) \ + X(Dot13Insts) \ + X(Dot1Insts) \ + X(Dot2Insts) \ + X(Dot3Insts) \ + X(Dot4Insts) \ + X(Dot5Insts) \ + X(Dot6Insts) \ + X(Dot7Insts) \ + X(Dot8Insts) \ + X(Dot9Insts) \ + X(DPALU_DPP) \ + X(DPP) \ + X(DPP8) \ + X(DPPSrc1SGPR) \ + X(EmulatedSystemScopeAtomics) \ + X(ExtendedImageInsts) \ + X(FlatAtomicFaddF32Inst) \ + X(FlatBufferGlobalAtomicFaddF64Inst) \ + X(FlatSegmentOffsetBug) \ + X(FmacF64Inst) \ + X(FmaMixBF16Insts) \ + X(FmaMixInsts) \ + X(FP8ConversionInsts) \ + X(FP8E5M3Insts) \ + X(FP8Insts) \ + X(G16) \ + X(GDS) \ + X(GetWaveIdInst) \ + X(GloballyAddressableScratch) \ + X(GWS) \ + X(IEEEMinimumMaximumInsts) \ + X(ImageGather4D16Bug) \ + X(ImageInsts) \ + X(ImageStoreD16Bug) \ + X(InstFwdPrefetchBug) \ + X(IntClamp) \ + X(LdsBarrierArriveAtomic) \ + X(LdsBranchVmemWARHazard) \ + X(LerpInst) \ + X(LshlAddU64Inst) \ + X(MADIntraFwdBug) \ + X(MadU32Inst) \ + X(MAIInsts) \ + X(McastLoadInsts) \ + X(MemoryAtomicFaddF32DenormalSupport) \ + X(MFMAInlineLiteralBug) \ + X(Min3Max3PKF16) \ + X(Minimum3Maximum3F16) \ + X(Minimum3Maximum3F32) \ + X(Minimum3Maximum3PKF16) \ + X(Movrel) \ + X(MSAALoadDstSelBug) \ + X(NoDataDepHazard) \ + X(NoSdstCMPX) \ + X(NSAClauseBug) \ + X(NSAEncoding) \ + X(NSAtoVMEMBug) \ + X(Offset3fBug) \ + X(PackedFP32Ops) \ + X(PackedTID) \ + X(PartialNSAEncoding) \ + X(Permlane16Swap) \ + X(Permlane32Swap) \ + X(PkAddMinMaxInsts) \ + X(PkFmacF16Inst) \ + X(PointSampleAccel) \ + X(PrivEnabledTrap2NopBug) \ + X(PrngInst) \ + X(PseudoScalarTrans) \ + X(QsadInsts) \ + X(R128A16) \ + X(RequiredExportPriority) \ + X(RestrictedSOffset) \ + X(SadInsts) \ + X(SafeCUPrefetch) \ + X(SafeSmemPrefetch) \ + X(SALUFloatInsts) \ + X(ScalarAtomics) \ + X(ScalarDwordx3Loads) \ + X(ScalarStores) \ + X(SDWAMac) \ + X(SDWAOmod) \ + X(SDWAOutModsVOPC) \ + X(SDWAScalar) \ + X(SDWASdst) \ + X(SetPrioIncWgInst) \ + X(SetregVGPRMSBFixup) \ + X(ShaderCyclesHiLoRegisters) \ + X(ShaderCyclesRegister) \ + X(SMemRealTime) \ + X(SMemTimeInst) \ + X(SMEMtoVectorWriteHazard) \ + X(SWakeupBarrier) \ + X(TanhInsts) \ + X(TensorCvtLutInsts) \ + X(TransposeLoadF4F6Insts) \ + X(UnpackedD16VMem) \ + X(VALUTransUseHazard) \ + X(VcmpxExecWARHazard) \ + X(VcmpxPermlaneHazard) \ + X(VGPRIndexMode) \ + X(VmemPrefInsts) \ + X(VMemToLDSLoad) \ + X(VMEMtoScalarWriteHazard) \ + X(VmemWriteVgprInOrder) \ + X(VOP3Literal) \ + X(VOPDInsts) \ + X(Vscnt) \ + X(WaitXcnt) \ + X(XF32Insts) + namespace llvm { class GCNTargetMachine; @@ -78,7 +248,6 @@ protected: bool UnalignedScratchAccess = false; bool UnalignedAccessMode = false; bool RelaxedBufferOOBMode = false; - bool HasApertureRegs = false; bool SupportsXNACK = false; bool KernargPreload = false; @@ -120,84 +289,8 @@ protected: bool UserSGPRInit16Bug = false; bool NegativeScratchOffsetBug = false; bool NegativeUnalignedScratchOffsetBug = false; - bool HasSMemRealTime = false; - bool HasIntClamp = false; - bool HasFmaMixInsts = false; - bool HasFmaMixBF16Insts = false; - bool HasMovrel = false; - bool HasVGPRIndexMode = false; - bool HasScalarDwordx3Loads = false; - bool HasScalarStores = false; - bool HasScalarAtomics = false; - bool HasSDWAOmod = false; - bool HasSDWAScalar = false; - bool HasSDWASdst = false; - bool HasSDWAMac = false; - bool HasSDWAOutModsVOPC = false; - bool HasDPP = false; - bool HasDPP8 = false; - bool HasDPALU_DPP = false; - bool HasDPPSrc1SGPR = false; - bool HasPackedFP32Ops = false; - bool HasImageInsts = false; - bool HasExtendedImageInsts = false; - bool HasR128A16 = false; - bool HasA16 = false; - bool HasG16 = false; - bool HasNSAEncoding = false; - bool HasPartialNSAEncoding = false; bool GFX10_AEncoding = false; bool GFX10_BEncoding = false; - bool HasDLInsts = false; - bool HasFmacF64Inst = false; - bool HasDot1Insts = false; - bool HasDot2Insts = false; - bool HasDot3Insts = false; - bool HasDot4Insts = false; - bool HasDot5Insts = false; - bool HasDot6Insts = false; - bool HasDot7Insts = false; - bool HasDot8Insts = false; - bool HasDot9Insts = false; - bool HasDot10Insts = false; - bool HasDot11Insts = false; - bool HasDot12Insts = false; - bool HasDot13Insts = false; - bool HasMAIInsts = false; - bool HasFP8Insts = false; - bool HasFP8ConversionInsts = false; - bool HasMcastLoadInsts = false; - bool HasCubeInsts = false; - bool HasLerpInst = false; - bool HasSadInsts = false; - bool HasQsadInsts = false; - bool HasCvtNormInsts = false; - bool HasCvtPkNormVOP2Insts = false; - bool HasCvtPkNormVOP3Insts = false; - bool HasFP8E5M3Insts = false; - bool HasCvtFP8Vop1Bug = false; - bool HasPkFmacF16Inst = false; - bool HasAtomicFMinFMaxF32GlobalInsts = false; - bool HasAtomicFMinFMaxF64GlobalInsts = false; - bool HasAtomicFMinFMaxF32FlatInsts = false; - bool HasAtomicFMinFMaxF64FlatInsts = false; - bool HasAtomicDsPkAdd16Insts = false; - bool HasAtomicFlatPkAdd16Insts = false; - bool HasAtomicFaddRtnInsts = false; - bool HasAtomicFaddNoRtnInsts = false; - bool HasMemoryAtomicFaddF32DenormalSupport = false; - bool HasAtomicBufferGlobalPkAddF16NoRtnInsts = false; - bool HasAtomicBufferGlobalPkAddF16Insts = false; - bool HasAtomicCSubNoRtnInsts = false; - bool HasAtomicGlobalPkAddBF16Inst = false; - bool HasAtomicBufferPkAddBF16Inst = false; - bool HasFlatAtomicFaddF32Inst = false; - bool HasFlatBufferGlobalAtomicFaddF64Inst = false; - bool HasDefaultComponentZero = false; - bool HasAgentScopeFineGrainedRemoteMemoryAtomics = false; - bool HasEmulatedSystemScopeAtomics = false; - bool HasDefaultComponentBroadcast = false; - bool HasXF32Insts = false; /// The maximum number of instructions that may be placed within an S_CLAUSE, /// which is one greater than the maximum argument to S_CLAUSE. A value of 0 /// indicates a lack of S_CLAUSE support. @@ -205,107 +298,38 @@ protected: bool SupportsSRAMECC = false; bool DynamicVGPR = false; bool DynamicVGPRBlockSize32 = false; - bool HasVMemToLDSLoad = false; bool RequiresAlignVGPR = false; // This should not be used directly. 'TargetID' tracks the dynamic settings // for SRAMECC. bool EnableSRAMECC = false; - bool HasNoSdstCMPX = false; - bool HasVscnt = false; - bool HasWaitXcnt = false; - bool HasGetWaveIdInst = false; - bool HasSMemTimeInst = false; - bool HasShaderCyclesRegister = false; - bool HasShaderCyclesHiLoRegisters = false; - bool HasVOP3Literal = false; - bool HasNoDataDepHazard = false; bool FlatAddressSpace = false; bool FlatInstOffsets = false; bool FlatGlobalInsts = false; bool FlatScratchInsts = false; bool FlatGVSMode = false; bool ScalarFlatScratchInsts = false; - bool HasArchitectedFlatScratch = false; bool EnableFlatScratch = false; - bool HasArchitectedSGPRs = false; - bool HasGDS = false; - bool HasGWS = false; bool AddNoCarryInsts = false; - bool HasUnpackedD16VMem = false; bool LDSMisalignedBug = false; - bool HasMFMAInlineLiteralBug = false; bool UnalignedBufferAccess = false; bool UnalignedDSAccess = false; - bool HasPackedTID = false; bool ScalarizeGlobal = false; - bool HasSALUFloatInsts = false; - bool HasPseudoScalarTrans = false; - bool HasRestrictedSOffset = false; - bool Has64BitLiterals = false; - bool Has1024AddressableVGPRs = false; - bool HasSetregVGPRMSBFixup = false; - bool HasBitOp3Insts = false; - bool HasTanhInsts = false; - bool HasTensorCvtLutInsts = false; - bool HasTransposeLoadF4F6Insts = false; - bool HasPrngInst = false; - bool HasBVHDualAndBVH8Insts = false; - bool HasPermlane16Swap = false; - bool HasPermlane32Swap = false; - bool HasVcmpxPermlaneHazard = false; - bool HasVMEMtoScalarWriteHazard = false; - bool HasSMEMtoVectorWriteHazard = false; - bool HasInstFwdPrefetchBug = false; - bool HasVmemPrefInsts = false; - bool HasSafeSmemPrefetch = false; - bool HasSafeCUPrefetch = false; - bool HasVcmpxExecWARHazard = false; - bool HasLdsBranchVmemWARHazard = false; - bool HasNSAtoVMEMBug = false; - bool HasNSAClauseBug = false; - bool HasOffset3fBug = false; - bool HasFlatSegmentOffsetBug = false; - bool HasImageStoreD16Bug = false; - bool HasImageGather4D16Bug = false; - bool HasMSAALoadDstSelBug = false; - bool HasPrivEnabledTrap2NopBug = false; - bool Has1_5xVGPRs = false; - bool HasMADIntraFwdBug = false; - bool HasVOPDInsts = false; - bool HasVALUTransUseHazard = false; - bool HasRequiredExportPriority = false; - bool HasVmemWriteVgprInOrder = false; - bool HasAshrPkInsts = false; - bool HasIEEEMinimumMaximumInsts = false; - bool HasMinimum3Maximum3F32 = false; - bool HasMinimum3Maximum3F16 = false; - bool HasMin3Max3PKF16 = false; - bool HasMinimum3Maximum3PKF16 = false; - bool HasLshlAddU64Inst = false; - bool HasAddSubU64Insts = false; - bool HasMadU32Inst = false; - bool HasAddMinMaxInsts = false; - bool HasPkAddMinMaxInsts = false; - bool HasPointSampleAccel = false; - bool HasLdsBarrierArriveAtomic = false; - bool HasSetPrioIncWgInst = false; - bool HasSWakeupBarrier = false; - bool RequiresCOV6 = false; bool UseBlockVGPROpsForCSR = false; - bool HasGloballyAddressableScratch = false; - bool Has45BitNumRecordsBufferResource = false; - - bool HasClusters = false; bool RequiresWaitsBeforeSystemScopeStores = false; bool UseAddPC64Inst = false; // Dummy feature to use for assembler in tablegen. bool FeatureDisable = false; + // Simple subtarget features - auto-generated from X-macro. +#define DECL_HAS_MEMBER(Name) bool Has##Name = false; + GCN_SUBTARGET_HAS_FEATURE(DECL_HAS_MEMBER) +#undef DECL_HAS_MEMBER + private: SIInstrInfo InstrInfo; SITargetLowering TLInfo; @@ -316,24 +340,20 @@ public: const GCNTargetMachine &TM); ~GCNSubtarget() override; - GCNSubtarget &initializeSubtargetDependencies(const Triple &TT, - StringRef GPU, StringRef FS); + GCNSubtarget &initializeSubtargetDependencies(const Triple &TT, StringRef GPU, + StringRef FS); /// Diagnose inconsistent subtarget features before attempting to codegen /// function \p F. void checkSubtargetFeatures(const Function &F) const; - const SIInstrInfo *getInstrInfo() const override { - return &InstrInfo; - } + const SIInstrInfo *getInstrInfo() const override { return &InstrInfo; } const SIFrameLowering *getFrameLowering() const override { return &FrameLowering; } - const SITargetLowering *getTargetLowering() const override { - return &TLInfo; - } + const SITargetLowering *getTargetLowering() const override { return &TLInfo; } const SIRegisterInfo *getRegisterInfo() const override { return &InstrInfo.getRegisterInfo(); @@ -371,9 +391,14 @@ public: void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS); - Generation getGeneration() const { - return (Generation)Gen; - } + // Simple subtarget feature getters - auto-generated from X-macro. +#define DECL_HAS_GETTER(Name) \ + bool has##Name() const { return Has##Name; } + GCN_SUBTARGET_HAS_FEATURE(DECL_HAS_GETTER) +#undef DECL_HAS_GETTER +#undef GCN_SUBTARGET_HAS_FEATURE + + Generation getGeneration() const { return (Generation)Gen; } bool isGFX11Plus() const { return getGeneration() >= GFX11; } @@ -396,9 +421,7 @@ public: return llvm::countl_zero(getMaxWaveScratchSize()) + getWavefrontSizeLog2(); } - int getLDSBankCount() const { - return LDSBankCount; - } + int getLDSBankCount() const { return LDSBankCount; } unsigned getMaxPrivateElementSize(bool ForBufferRSrc = false) const { return (ForBufferRSrc || !enableFlatScratch()) ? MaxPrivateElementSize : 16; @@ -417,29 +440,15 @@ public: return getGeneration() >= GFX10; } - bool hasIntClamp() const { - return HasIntClamp; - } + bool hasFP64() const { return FP64; } - bool hasFP64() const { - return FP64; - } + bool hasMIMG_R128() const { return MIMG_R128; } - bool hasMIMG_R128() const { - return MIMG_R128; - } + bool hasHWFP64() const { return FP64; } - bool hasHWFP64() const { - return FP64; - } + bool hasHalfRate64Ops() const { return HalfRate64Ops; } - bool hasHalfRate64Ops() const { - return HalfRate64Ops; - } - - bool hasFullRate64Ops() const { - return FullRate64Ops; - } + bool hasFullRate64Ops() const { return FullRate64Ops; } bool hasAddr64() const { return (getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS); @@ -455,39 +464,21 @@ public: return getGeneration() >= VOLCANIC_ISLANDS; } - bool hasFractBug() const { - return getGeneration() == SOUTHERN_ISLANDS; - } + bool hasFractBug() const { return getGeneration() == SOUTHERN_ISLANDS; } - bool hasMed3_16() const { - return getGeneration() >= AMDGPUSubtarget::GFX9; - } + bool hasMed3_16() const { return getGeneration() >= AMDGPUSubtarget::GFX9; } bool hasMin3Max3_16() const { return getGeneration() >= AMDGPUSubtarget::GFX9; } - bool hasFmaMixInsts() const { - return HasFmaMixInsts; - } + bool hasFMA() const { return FMA; } - bool hasFmaMixBF16Insts() const { return HasFmaMixBF16Insts; } + bool hasSwap() const { return GFX9Insts; } - bool hasFMA() const { - return FMA; - } + bool hasScalarPackInsts() const { return GFX9Insts; } - bool hasSwap() const { - return GFX9Insts; - } - - bool hasScalarPackInsts() const { - return GFX9Insts; - } - - bool hasScalarMulHiInsts() const { - return GFX9Insts; - } + bool hasScalarMulHiInsts() const { return GFX9Insts; } bool hasScalarSubwordLoads() const { return getGeneration() >= GFX12; } @@ -502,9 +493,7 @@ public: /// True if the offset field of DS instructions works as expected. On SI, the /// offset uses a 16-bit adder and does not always wrap properly. - bool hasUsableDSOffset() const { - return getGeneration() >= SEA_ISLANDS; - } + bool hasUsableDSOffset() const { return getGeneration() >= SEA_ISLANDS; } bool unsafeDSOffsetFoldingEnabled() const { return EnableUnsafeDSOffsetFolding; @@ -517,14 +506,10 @@ public: /// Extra wait hazard is needed in some cases before /// s_cbranch_vccnz/s_cbranch_vccz. - bool hasReadVCCZBug() const { - return getGeneration() <= SEA_ISLANDS; - } + bool hasReadVCCZBug() const { return getGeneration() <= SEA_ISLANDS; } /// Writes to VCC_LO/VCC_HI update the VCCZ flag. - bool partialVCCWritesUpdateVCCZ() const { - return getGeneration() >= GFX10; - } + bool partialVCCWritesUpdateVCCZ() const { return getGeneration() >= GFX10; } /// A read of an SGPR by SMRD instruction requires 4 wait states when the SGPR /// was written by a VALU instruction. @@ -538,18 +523,14 @@ public: return getGeneration() >= VOLCANIC_ISLANDS; } - bool hasRFEHazards() const { - return getGeneration() >= VOLCANIC_ISLANDS; - } + bool hasRFEHazards() const { return getGeneration() >= VOLCANIC_ISLANDS; } /// Number of hazard wait states for s_setreg_b32/s_setreg_imm32_b32. unsigned getSetRegWaitStates() const { return getGeneration() <= SEA_ISLANDS ? 1 : 2; } - bool dumpCode() const { - return DumpCode; - } + bool dumpCode() const { return DumpCode; } /// Return the amount of LDS that can be used that will not restrict the /// occupancy lower than WaveCount. @@ -565,25 +546,17 @@ public: return getGeneration() >= AMDGPUSubtarget::GFX10; } - bool useFlatForGlobal() const { - return FlatForGlobal; - } + bool useFlatForGlobal() const { return FlatForGlobal; } /// \returns If target supports ds_read/write_b128 and user enables generation /// of ds_read/write_b128. - bool useDS128() const { - return CIInsts && EnableDS128; - } + bool useDS128() const { return CIInsts && EnableDS128; } /// \return If target supports ds_read/write_b96/128. - bool hasDS96AndDS128() const { - return CIInsts; - } + bool hasDS96AndDS128() const { return CIInsts; } /// Have v_trunc_f64, v_ceil_f64, v_rndne_f64 - bool haveRoundOpsF64() const { - return CIInsts; - } + bool haveRoundOpsF64() const { return CIInsts; } /// \returns If MUBUF instructions always perform range checking, even for /// buffer resources used for private memory access. @@ -593,91 +566,55 @@ public: /// \returns If target requires PRT Struct NULL support (zero result registers /// for sparse texture support). - bool usePRTStrictNull() const { - return EnablePRTStrictNull; - } + bool usePRTStrictNull() const { return EnablePRTStrictNull; } - bool hasAutoWaitcntBeforeBarrier() const { - return AutoWaitcntBeforeBarrier; - } + bool hasAutoWaitcntBeforeBarrier() const { return AutoWaitcntBeforeBarrier; } /// \returns true if the target supports backing off of s_barrier instructions /// when an exception is raised. - bool supportsBackOffBarrier() const { - return BackOffBarrier; - } + bool supportsBackOffBarrier() const { return BackOffBarrier; } - bool hasUnalignedBufferAccess() const { - return UnalignedBufferAccess; - } + bool hasUnalignedBufferAccess() const { return UnalignedBufferAccess; } bool hasUnalignedBufferAccessEnabled() const { return UnalignedBufferAccess && UnalignedAccessMode; } - bool hasUnalignedDSAccess() const { - return UnalignedDSAccess; - } + bool hasUnalignedDSAccess() const { return UnalignedDSAccess; } bool hasUnalignedDSAccessEnabled() const { return UnalignedDSAccess && UnalignedAccessMode; } - bool hasUnalignedScratchAccess() const { - return UnalignedScratchAccess; - } + bool hasUnalignedScratchAccess() const { return UnalignedScratchAccess; } bool hasUnalignedScratchAccessEnabled() const { return UnalignedScratchAccess && UnalignedAccessMode; } - bool hasUnalignedAccessMode() const { - return UnalignedAccessMode; - } + bool hasUnalignedAccessMode() const { return UnalignedAccessMode; } bool hasRelaxedBufferOOBMode() const { return RelaxedBufferOOBMode; } - bool hasApertureRegs() const { - return HasApertureRegs; - } + bool isTrapHandlerEnabled() const { return TrapHandler; } - bool isTrapHandlerEnabled() const { - return TrapHandler; - } + bool isXNACKEnabled() const { return TargetID.isXnackOnOrAny(); } - bool isXNACKEnabled() const { - return TargetID.isXnackOnOrAny(); - } + bool isTgSplitEnabled() const { return EnableTgSplit; } - bool isTgSplitEnabled() const { - return EnableTgSplit; - } - - bool isCuModeEnabled() const { - return EnableCuMode; - } + bool isCuModeEnabled() const { return EnableCuMode; } bool isPreciseMemoryEnabled() const { return EnablePreciseMemory; } - bool hasFlatAddressSpace() const { - return FlatAddressSpace; - } + bool hasFlatAddressSpace() const { return FlatAddressSpace; } - bool hasFlatScrRegister() const { - return hasFlatAddressSpace(); - } + bool hasFlatScrRegister() const { return hasFlatAddressSpace(); } - bool hasFlatInstOffsets() const { - return FlatInstOffsets; - } + bool hasFlatInstOffsets() const { return FlatInstOffsets; } - bool hasFlatGlobalInsts() const { - return FlatGlobalInsts; - } + bool hasFlatGlobalInsts() const { return FlatGlobalInsts; } - bool hasFlatScratchInsts() const { - return FlatScratchInsts; - } + bool hasFlatScratchInsts() const { return FlatScratchInsts; } // Check if target supports ST addressing mode with FLAT scratch instructions. // The ST addressing mode means no registers are used, either VGPR or SGPR, @@ -688,22 +625,16 @@ public: bool hasFlatScratchSVSMode() const { return GFX940Insts || GFX11Insts; } - bool hasScalarFlatScratchInsts() const { - return ScalarFlatScratchInsts; - } + bool hasScalarFlatScratchInsts() const { return ScalarFlatScratchInsts; } bool enableFlatScratch() const { return flatScratchIsArchitected() || (EnableFlatScratch && hasFlatScratchInsts()); } - bool hasGlobalAddTidInsts() const { - return GFX10_BEncoding; - } + bool hasGlobalAddTidInsts() const { return GFX10_BEncoding; } - bool hasAtomicCSub() const { - return GFX10_BEncoding; - } + bool hasAtomicCSub() const { return GFX10_BEncoding; } bool hasMTBUFInsts() const { return !hasGFX1250Insts(); } @@ -724,290 +655,51 @@ public: return getGeneration() >= GFX9; } - bool hasFlatSegmentOffsetBug() const { - return HasFlatSegmentOffsetBug; - } + bool hasFlatLgkmVMemCountInOrder() const { return getGeneration() > GFX9; } - bool hasFlatLgkmVMemCountInOrder() const { - return getGeneration() > GFX9; - } - - bool hasD16LoadStore() const { - return getGeneration() >= GFX9; - } + bool hasD16LoadStore() const { return getGeneration() >= GFX9; } bool d16PreservesUnusedBits() const { return hasD16LoadStore() && !TargetID.isSramEccOnOrAny(); } - bool hasD16Images() const { - return getGeneration() >= VOLCANIC_ISLANDS; - } + bool hasD16Images() const { return getGeneration() >= VOLCANIC_ISLANDS; } /// Return if most LDS instructions have an m0 use that require m0 to be /// initialized. - bool ldsRequiresM0Init() const { - return getGeneration() < GFX9; - } + bool ldsRequiresM0Init() const { return getGeneration() < GFX9; } // True if the hardware rewinds and replays GWS operations if a wave is // preempted. // // If this is false, a GWS operation requires testing if a nack set the // MEM_VIOL bit, and repeating if so. - bool hasGWSAutoReplay() const { - return getGeneration() >= GFX9; - } + bool hasGWSAutoReplay() const { return getGeneration() >= GFX9; } /// \returns if target has ds_gws_sema_release_all instruction. - bool hasGWSSemaReleaseAll() const { - return CIInsts; - } + bool hasGWSSemaReleaseAll() const { return CIInsts; } /// \returns true if the target has integer add/sub instructions that do not /// produce a carry-out. This includes v_add_[iu]32, v_sub_[iu]32, /// v_add_[iu]16, and v_sub_[iu]16, all of which support the clamp modifier /// for saturation. - bool hasAddNoCarry() const { - return AddNoCarryInsts; - } + bool hasAddNoCarry() const { return AddNoCarryInsts; } bool hasScalarAddSub64() const { return getGeneration() >= GFX12; } bool hasScalarSMulU64() const { return getGeneration() >= GFX12; } - bool hasUnpackedD16VMem() const { - return HasUnpackedD16VMem; - } - // Covers VS/PS/CS graphics shaders bool isMesaGfxShader(const Function &F) const { return isMesa3DOS() && AMDGPU::isShader(F.getCallingConv()); } - bool hasMad64_32() const { - return getGeneration() >= SEA_ISLANDS; - } - - bool hasSDWAOmod() const { - return HasSDWAOmod; - } - - bool hasSDWAScalar() const { - return HasSDWAScalar; - } - - bool hasSDWASdst() const { - return HasSDWASdst; - } - - bool hasSDWAMac() const { - return HasSDWAMac; - } - - bool hasSDWAOutModsVOPC() const { - return HasSDWAOutModsVOPC; - } - - bool hasDLInsts() const { - return HasDLInsts; - } - - bool hasFmacF64Inst() const { return HasFmacF64Inst; } - - bool hasDot1Insts() const { - return HasDot1Insts; - } - - bool hasDot2Insts() const { - return HasDot2Insts; - } - - bool hasDot3Insts() const { - return HasDot3Insts; - } - - bool hasDot4Insts() const { - return HasDot4Insts; - } - - bool hasDot5Insts() const { - return HasDot5Insts; - } - - bool hasDot6Insts() const { - return HasDot6Insts; - } - - bool hasDot7Insts() const { - return HasDot7Insts; - } - - bool hasDot8Insts() const { - return HasDot8Insts; - } - - bool hasDot9Insts() const { - return HasDot9Insts; - } - - bool hasDot10Insts() const { - return HasDot10Insts; - } - - bool hasDot11Insts() const { - return HasDot11Insts; - } - - bool hasDot12Insts() const { - return HasDot12Insts; - } - - bool hasDot13Insts() const { - return HasDot13Insts; - } - - bool hasMAIInsts() const { - return HasMAIInsts; - } - - bool hasFP8Insts() const { - return HasFP8Insts; - } - - bool hasFP8ConversionInsts() const { return HasFP8ConversionInsts; } - - bool hasMcastLoadInsts() const { return HasMcastLoadInsts; } - - bool hasCubeInsts() const { return HasCubeInsts; } - - bool hasLerpInst() const { return HasLerpInst; } - - bool hasSadInsts() const { return HasSadInsts; } - - bool hasQsadInsts() const { return HasQsadInsts; } - - bool hasCvtNormInsts() const { return HasCvtNormInsts; } - - bool hasCvtPkNormVOP2Insts() const { return HasCvtPkNormVOP2Insts; } - - bool hasCvtPkNormVOP3Insts() const { return HasCvtPkNormVOP3Insts; } - - bool hasFP8E5M3Insts() const { return HasFP8E5M3Insts; } - - bool hasPkFmacF16Inst() const { - return HasPkFmacF16Inst; - } - - bool hasAtomicFMinFMaxF32GlobalInsts() const { - return HasAtomicFMinFMaxF32GlobalInsts; - } - - bool hasAtomicFMinFMaxF64GlobalInsts() const { - return HasAtomicFMinFMaxF64GlobalInsts; - } - - bool hasAtomicFMinFMaxF32FlatInsts() const { - return HasAtomicFMinFMaxF32FlatInsts; - } - - bool hasAtomicFMinFMaxF64FlatInsts() const { - return HasAtomicFMinFMaxF64FlatInsts; - } - - bool hasAtomicDsPkAdd16Insts() const { return HasAtomicDsPkAdd16Insts; } - - bool hasAtomicFlatPkAdd16Insts() const { return HasAtomicFlatPkAdd16Insts; } + bool hasMad64_32() const { return getGeneration() >= SEA_ISLANDS; } bool hasAtomicFaddInsts() const { return HasAtomicFaddRtnInsts || HasAtomicFaddNoRtnInsts; } - bool hasAtomicFaddRtnInsts() const { return HasAtomicFaddRtnInsts; } - - bool hasAtomicFaddNoRtnInsts() const { return HasAtomicFaddNoRtnInsts; } - - bool hasAtomicBufferGlobalPkAddF16NoRtnInsts() const { - return HasAtomicBufferGlobalPkAddF16NoRtnInsts; - } - - bool hasAtomicBufferGlobalPkAddF16Insts() const { - return HasAtomicBufferGlobalPkAddF16Insts; - } - - bool hasAtomicGlobalPkAddBF16Inst() const { - return HasAtomicGlobalPkAddBF16Inst; - } - - bool hasAtomicBufferPkAddBF16Inst() const { - return HasAtomicBufferPkAddBF16Inst; - } - - bool hasFlatAtomicFaddF32Inst() const { return HasFlatAtomicFaddF32Inst; } - - /// \return true if the target has flat, global, and buffer atomic fadd for - /// double. - bool hasFlatBufferGlobalAtomicFaddF64Inst() const { - return HasFlatBufferGlobalAtomicFaddF64Inst; - } - - /// \return true if the target's flat, global, and buffer atomic fadd for - /// float supports denormal handling. - bool hasMemoryAtomicFaddF32DenormalSupport() const { - return HasMemoryAtomicFaddF32DenormalSupport; - } - - /// \return true if atomic operations targeting fine-grained memory work - /// correctly at device scope, in allocations in host or peer PCIe device - /// memory. - bool supportsAgentScopeFineGrainedRemoteMemoryAtomics() const { - return HasAgentScopeFineGrainedRemoteMemoryAtomics; - } - - /// \return true is HW emulates system scope atomics unsupported by the PCI-e - /// via CAS loop. - bool hasEmulatedSystemScopeAtomics() const { - return HasEmulatedSystemScopeAtomics; - } - - bool hasDefaultComponentZero() const { return HasDefaultComponentZero; } - - bool hasDefaultComponentBroadcast() const { - return HasDefaultComponentBroadcast; - } - - bool hasNoSdstCMPX() const { - return HasNoSdstCMPX; - } - - bool hasVscnt() const { - return HasVscnt; - } - - bool hasGetWaveIdInst() const { - return HasGetWaveIdInst; - } - - bool hasSMemTimeInst() const { - return HasSMemTimeInst; - } - - bool hasShaderCyclesRegister() const { - return HasShaderCyclesRegister; - } - - bool hasShaderCyclesHiLoRegisters() const { - return HasShaderCyclesHiLoRegisters; - } - - bool hasVOP3Literal() const { - return HasVOP3Literal; - } - - bool hasNoDataDepHazard() const { - return HasNoDataDepHazard; - } - bool vmemWriteNeedsExpWaitcnt() const { return getGeneration() < SEA_ISLANDS; } @@ -1018,12 +710,6 @@ public: bool hasPrefetch() const { return GFX12Insts; } - bool hasVmemPrefInsts() const { return HasVmemPrefInsts; } - - bool hasSafeSmemPrefetch() const { return HasSafeSmemPrefetch; } - - bool hasSafeCUPrefetch() const { return HasSafeCUPrefetch; } - // Has s_cmpk_* instructions. bool hasSCmpK() const { return getGeneration() < GFX12; } @@ -1037,15 +723,11 @@ public: // dynamic realignment in common cases. Align getStackAlignment() const { return Align(16); } - bool enableMachineScheduler() const override { - return true; - } + bool enableMachineScheduler() const override { return true; } bool useAA() const override; - bool enableSubRegLiveness() const override { - return true; - } + bool enableSubRegLiveness() const override { return true; } void setScalarizeGlobalBehavior(bool b) { ScalarizeGlobal = b; } bool getScalarizeGlobalBehavior() const { return ScalarizeGlobal; } @@ -1054,9 +736,7 @@ public: static bool hasHalfRate64Ops(const TargetSubtargetInfo &STI); // XXX - Why is this here if it isn't in the default pass set? - bool enableEarlyIfConversion() const override { - return true; - } + bool enableEarlyIfConversion() const override { return true; } void overrideSchedPolicy(MachineSchedPolicy &Policy, const SchedRegion &Region) const override; @@ -1070,34 +750,12 @@ public: return AMDGPU::getMaxNumUserSGPRs(*this); } - bool hasSMemRealTime() const { - return HasSMemRealTime; - } - - bool hasMovrel() const { - return HasMovrel; - } - - bool hasVGPRIndexMode() const { - return HasVGPRIndexMode; - } - bool useVGPRIndexMode() const; bool hasScalarCompareEq64() const { return getGeneration() >= VOLCANIC_ISLANDS; } - bool hasScalarDwordx3Loads() const { return HasScalarDwordx3Loads; } - - bool hasScalarStores() const { - return HasScalarStores; - } - - bool hasScalarAtomics() const { - return HasScalarAtomics; - } - bool hasLDSFPAtomicAddF32() const { return GFX8Insts; } bool hasLDSFPAtomicAddF64() const { return GFX90AInsts || GFX1250Insts; } @@ -1107,36 +765,14 @@ public: /// \returns true if the subtarget has the v_permlane64_b32 instruction. bool hasPermLane64() const { return getGeneration() >= GFX11; } - bool hasDPP() const { - return HasDPP; - } - - bool hasDPPBroadcasts() const { - return HasDPP && getGeneration() < GFX10; - } + bool hasDPPBroadcasts() const { return HasDPP && getGeneration() < GFX10; } bool hasDPPWavefrontShifts() const { return HasDPP && getGeneration() < GFX10; } - bool hasDPP8() const { - return HasDPP8; - } - - bool hasDPALU_DPP() const { - return HasDPALU_DPP; - } - - bool hasDPPSrc1SGPR() const { return HasDPPSrc1SGPR; } - - bool hasPackedFP32Ops() const { - return HasPackedFP32Ops; - } - // Has V_PK_MOV_B32 opcode - bool hasPkMovB32() const { - return GFX90AInsts; - } + bool hasPkMovB32() const { return GFX90AInsts; } bool hasFmaakFmamkF32Insts() const { return getGeneration() >= GFX10 || hasGFX940Insts(); @@ -1144,64 +780,22 @@ public: bool hasFmaakFmamkF64Insts() const { return hasGFX1250Insts(); } - bool hasImageInsts() const { - return HasImageInsts; - } - - bool hasExtendedImageInsts() const { - return HasExtendedImageInsts; - } - - bool hasR128A16() const { - return HasR128A16; - } - - bool hasA16() const { return HasA16; } - - bool hasG16() const { return HasG16; } - - bool hasOffset3fBug() const { - return HasOffset3fBug; - } - - bool hasImageStoreD16Bug() const { return HasImageStoreD16Bug; } - - bool hasImageGather4D16Bug() const { return HasImageGather4D16Bug; } - - bool hasMADIntraFwdBug() const { return HasMADIntraFwdBug; } - - bool hasMSAALoadDstSelBug() const { return HasMSAALoadDstSelBug; } - - bool hasPrivEnabledTrap2NopBug() const { return HasPrivEnabledTrap2NopBug; } - - bool hasNSAEncoding() const { return HasNSAEncoding; } - bool hasNonNSAEncoding() const { return getGeneration() < GFX12; } - bool hasPartialNSAEncoding() const { return HasPartialNSAEncoding; } - unsigned getNSAMaxSize(bool HasSampler = false) const { return AMDGPU::getNSAMaxSize(*this, HasSampler); } - bool hasGFX10_AEncoding() const { - return GFX10_AEncoding; - } + bool hasGFX10_AEncoding() const { return GFX10_AEncoding; } - bool hasGFX10_BEncoding() const { - return GFX10_BEncoding; - } + bool hasGFX10_BEncoding() const { return GFX10_BEncoding; } - bool hasGFX10_3Insts() const { - return GFX10_3Insts; - } + bool hasGFX10_3Insts() const { return GFX10_3Insts; } bool hasMadF16() const; bool hasMovB64() const { return GFX940Insts || GFX1250Insts; } - bool hasLshlAddU64Inst() const { return HasLshlAddU64Inst; } - // Scalar and global loads support scale_offset bit. bool hasScaleOffset() const { return GFX1250Insts; } @@ -1210,21 +804,13 @@ public: // FLAT GLOBAL VOffset is signed bool hasSignedGVSOffset() const { return GFX1250Insts; } - bool enableSIScheduler() const { - return EnableSIScheduler; - } + bool enableSIScheduler() const { return EnableSIScheduler; } - bool loadStoreOptEnabled() const { - return EnableLoadStoreOpt; - } + bool loadStoreOptEnabled() const { return EnableLoadStoreOpt; } - bool hasSGPRInitBug() const { - return SGPRInitBug; - } + bool hasSGPRInitBug() const { return SGPRInitBug; } - bool hasUserSGPRInit16Bug() const { - return UserSGPRInit16Bug && isWave32(); - } + bool hasUserSGPRInit16Bug() const { return UserSGPRInit16Bug && isWave32(); } bool hasNegativeScratchOffsetBug() const { return NegativeScratchOffsetBug; } @@ -1232,18 +818,12 @@ public: return NegativeUnalignedScratchOffsetBug; } - bool hasMFMAInlineLiteralBug() const { - return HasMFMAInlineLiteralBug; - } - bool has12DWordStoreHazard() const { return getGeneration() != AMDGPUSubtarget::SOUTHERN_ISLANDS; } // \returns true if the subtarget supports DWORDX3 load/store instructions. - bool hasDwordx3LoadStores() const { - return CIInsts; - } + bool hasDwordx3LoadStores() const { return CIInsts; } bool hasReadM0MovRelInterpHazard() const { return getGeneration() == AMDGPUSubtarget::GFX9; @@ -1262,39 +842,11 @@ public: return getGeneration() == AMDGPUSubtarget::GFX9; } - bool hasVcmpxPermlaneHazard() const { - return HasVcmpxPermlaneHazard; - } - - bool hasVMEMtoScalarWriteHazard() const { - return HasVMEMtoScalarWriteHazard; - } - - bool hasSMEMtoVectorWriteHazard() const { - return HasSMEMtoVectorWriteHazard; - } - - bool hasLDSMisalignedBug() const { - return LDSMisalignedBug && !EnableCuMode; - } - - bool hasInstFwdPrefetchBug() const { - return HasInstFwdPrefetchBug; - } - - bool hasVcmpxExecWARHazard() const { - return HasVcmpxExecWARHazard; - } - - bool hasLdsBranchVmemWARHazard() const { - return HasLdsBranchVmemWARHazard; - } + bool hasLDSMisalignedBug() const { return LDSMisalignedBug && !EnableCuMode; } // Shift amount of a 64 bit shift cannot be a highest allocated register // if also at the end of the allocation block. - bool hasShift64HighRegBug() const { - return GFX90AInsts && !GFX940Insts; - } + bool hasShift64HighRegBug() const { return GFX90AInsts && !GFX940Insts; } // Has one cycle hazard on transcendental instruction feeding a // non transcendental VALU. @@ -1308,15 +860,7 @@ public: bool hasDOTOpSelHazard() const { return GFX940Insts || GFX11Insts; } // Does not have HW interlocs for VALU writing and then reading SGPRs. - bool hasVDecCoExecHazard() const { - return GFX940Insts; - } - - bool hasNSAtoVMEMBug() const { - return HasNSAtoVMEMBug; - } - - bool hasNSAClauseBug() const { return HasNSAClauseBug; } + bool hasVDecCoExecHazard() const { return GFX940Insts; } bool hasHardClauses() const { return MaxHardClauseLength > 0; } @@ -1336,18 +880,12 @@ public: return getGeneration() == GFX11; } - bool hasVALUTransUseHazard() const { return HasVALUTransUseHazard; } - bool hasCvtScaleForwardingHazard() const { return GFX950Insts; } bool requiresCodeObjectV6() const { return RequiresCOV6; } bool useVGPRBlockOpsForCSR() const { return UseBlockVGPROpsForCSR; } - bool hasGloballyAddressableScratch() const { - return HasGloballyAddressableScratch; - } - bool hasVALUMaskWriteHazard() const { return getGeneration() == GFX11; } bool hasVALUReadSGPRHazard() const { return GFX12Insts && !GFX1250Insts; } @@ -1370,17 +908,11 @@ public: /// target. bool hasNullExportTarget() const { return !GFX11Insts; } - bool has1_5xVGPRs() const { return Has1_5xVGPRs; } - - bool hasVOPDInsts() const { return HasVOPDInsts; } - bool hasFlatScratchSVSSwizzleBug() const { return getGeneration() == GFX11; } /// Return true if the target has the S_DELAY_ALU instruction. bool hasDelayAlu() const { return GFX11Insts; } - bool hasPackedTID() const { return HasPackedTID; } - // GFX94* is a derivation to GFX90A. hasGFX940Insts() being true implies that // hasGFX90AInsts is also true. bool hasGFX940Insts() const { return GFX940Insts; } @@ -1392,21 +924,7 @@ public: /// Returns true if the target supports /// global_load_lds_dwordx3/global_load_lds_dwordx4 or /// buffer_load_dwordx3/buffer_load_dwordx4 with the lds bit. - bool hasLDSLoadB96_B128() const { - return hasGFX950Insts(); - } - - bool hasVMemToLDSLoad() const { return HasVMemToLDSLoad; } - - bool hasSALUFloatInsts() const { return HasSALUFloatInsts; } - - bool hasPseudoScalarTrans() const { return HasPseudoScalarTrans; } - - bool hasRestrictedSOffset() const { return HasRestrictedSOffset; } - - bool hasRequiredExportPriority() const { return HasRequiredExportPriority; } - - bool hasVmemWriteVgprInOrder() const { return HasVmemWriteVgprInOrder; } + bool hasLDSLoadB96_B128() const { return hasGFX950Insts(); } /// \returns true if the target uses LOADcnt/SAMPLEcnt/BVHcnt, DScnt/KMcnt /// and STOREcnt rather than VMcnt, LGKMcnt and VScnt respectively. @@ -1418,9 +936,6 @@ public: return getGeneration() == GFX12; } - /// \returns true if the target has instructions with xf32 format support. - bool hasXF32Insts() const { return HasXF32Insts; } - /// \returns true if the target has packed f32 instructions that only read 32 /// bits from a scalar operand (SGPR or literal) and replicates the bits to /// both channels. @@ -1428,65 +943,20 @@ public: return getGeneration() == GFX12 && GFX1250Insts; } - bool hasBitOp3Insts() const { return HasBitOp3Insts; } - - bool hasPermlane16Swap() const { return HasPermlane16Swap; } - bool hasPermlane32Swap() const { return HasPermlane32Swap; } - bool hasAshrPkInsts() const { return HasAshrPkInsts; } - - bool hasMinimum3Maximum3F32() const { - return HasMinimum3Maximum3F32; - } - - bool hasMinimum3Maximum3F16() const { - return HasMinimum3Maximum3F16; - } - - bool hasMin3Max3PKF16() const { return HasMin3Max3PKF16; } - - bool hasTanhInsts() const { return HasTanhInsts; } - - bool hasTensorCvtLutInsts() const { return HasTensorCvtLutInsts; } - bool hasAddPC64Inst() const { return GFX1250Insts; } bool useAddPC64Inst() const { return UseAddPC64Inst; } - bool has1024AddressableVGPRs() const { return Has1024AddressableVGPRs; } - - bool hasSetregVGPRMSBFixup() const { return HasSetregVGPRMSBFixup; } - - bool hasMinimum3Maximum3PKF16() const { - return HasMinimum3Maximum3PKF16; - } - - bool hasTransposeLoadF4F6Insts() const { return HasTransposeLoadF4F6Insts; } - /// \returns true if the target supports expert scheduling mode 2 which relies /// on the compiler to insert waits to avoid hazards between VMEM and VALU /// instructions in some instances. bool hasExpertSchedulingMode() const { return getGeneration() >= GFX12; } - /// \returns true if the target has s_wait_xcnt insertion. Supported for - /// GFX1250. - bool hasWaitXCnt() const { return HasWaitXcnt; } - - // A single DWORD instructions can use a 64-bit literal. - bool has64BitLiterals() const { return Has64BitLiterals; } - - bool hasPointSampleAccel() const { return HasPointSampleAccel; } - - bool hasLdsBarrierArriveAtomic() const { return HasLdsBarrierArriveAtomic; } - /// \returns The maximum number of instructions that can be enclosed in an /// S_CLAUSE on the given subtarget, or 0 for targets that do not support that /// instruction. unsigned maxHardClauseLength() const { return MaxHardClauseLength; } - bool hasPrngInst() const { return HasPrngInst; } - - bool hasBVHDualAndBVH8Insts() const { return HasBVHDualAndBVH8Insts; } - /// Return the maximum number of waves per SIMD for kernels using \p SGPRs /// SGPRs unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const; @@ -1520,19 +990,14 @@ public: bool flatScratchIsArchitected() const { return HasArchitectedFlatScratch; } /// \returns true if the architected SGPRs are enabled. - bool hasArchitectedSGPRs() const { return HasArchitectedSGPRs; } /// \returns true if Global Data Share is supported. - bool hasGDS() const { return HasGDS; } /// \returns true if Global Wave Sync is supported. - bool hasGWS() const { return HasGWS; } /// \returns true if the machine has merged shaders in which s0-s7 are /// reserved by the hardware and user SGPRs start at s8 - bool hasMergedShaders() const { - return getGeneration() >= GFX9; - } + bool hasMergedShaders() const { return getGeneration() >= GFX9; } // \returns true if the target supports the pre-NGG legacy geometry path. bool hasLegacyGeometry() const { return getGeneration() < GFX11; } @@ -1548,7 +1013,6 @@ public: // \returns true if CSUB (a.k.a. SUB_CLAMP on GFX12) atomics support a // no-return form. - bool hasAtomicCSubNoRtnInsts() const { return HasAtomicCSubNoRtnInsts; } // \returns true if the target has DX10_CLAMP kernel descriptor mode bit bool hasDX10ClampMode() const { return getGeneration() < GFX12; } @@ -1557,7 +1021,6 @@ public: bool hasIEEEMode() const { return getGeneration() < GFX12; } // \returns true if the target has IEEE fminimum/fmaximum instructions - bool hasIEEEMinimumMaximumInsts() const { return HasIEEEMinimumMaximumInsts; } // \returns true if the target has WG_RR_MODE kernel descriptor mode bit bool hasRrWGMode() const { return getGeneration() >= GFX12; } @@ -1573,10 +1036,8 @@ public: bool hasVOPD3() const { return GFX1250Insts; } // \returns true if the target has V_ADD_U64/V_SUB_U64 instructions. - bool hasAddSubU64Insts() const { return HasAddSubU64Insts; } // \returns true if the target has V_MAD_U32 instruction. - bool hasMadU32Inst() const { return HasMadU32Inst; } // \returns true if the target has V_MUL_U64/V_MUL_I64 instructions. bool hasVectorMulU64() const { return GFX1250Insts; } @@ -1589,10 +1050,8 @@ public: bool hasIntMinMax64() const { return GFX1250Insts; } // \returns true if the target has V_ADD_{MIN|MAX}_{I|U}32 instructions. - bool hasAddMinMaxInsts() const { return HasAddMinMaxInsts; } // \returns true if the target has V_PK_ADD_{MIN|MAX}_{I|U}16 instructions. - bool hasPkAddMinMaxInsts() const { return HasPkAddMinMaxInsts; } // \returns true if the target has V_PK_{MIN|MAX}3_{I|U}16 instructions. bool hasPkMinMax3Insts() const { return GFX1250Insts; } @@ -1600,12 +1059,6 @@ public: // \returns ture if target has S_GET_SHADER_CYCLES_U64 instruction. bool hasSGetShaderCyclesInst() const { return GFX1250Insts; } - // \returns true if target has S_SETPRIO_INC_WG instruction. - bool hasSetPrioIncWgInst() const { return HasSetPrioIncWgInst; } - - // \returns true if target has S_WAKEUP_BARRIER instruction. - bool hasSWakeupBarrier() const { return HasSWakeupBarrier; } - // \returns true if S_GETPC_B64 zero-extends the result from 48 bits instead // of sign-extending. Note that GFX1250 has not only fixed the bug but also // extended VA to 57 bits. @@ -1752,9 +1205,7 @@ public: /// unit requirement. unsigned getMaxNumVGPRs(const Function &F) const; - unsigned getMaxNumAGPRs(const Function &F) const { - return getMaxNumVGPRs(F); - } + unsigned getMaxNumAGPRs(const Function &F) const { return getMaxNumVGPRs(F); } /// Return a pair of maximum numbers of VGPRs and AGPRs that meet the number /// of waves per execution unit required for the function \p MF. @@ -1774,13 +1225,9 @@ public: bool supportsWave64() const { return !hasGFX1250Insts(); } - bool isWave32() const { - return getWavefrontSize() == 32; - } + bool isWave32() const { return getWavefrontSize() == 32; } - bool isWave64() const { - return getWavefrontSize() == 64; - } + bool isWave64() const { return getWavefrontSize() == 64; } /// Returns if the wavesize of this subtarget is known reliable. This is false /// only for the a default target-cpu that does not have an explicit @@ -1872,9 +1319,6 @@ public: return GFX1250Insts && getGeneration() == GFX12; } - /// \returns true if the subtarget supports clusters of workgroups. - bool hasClusters() const { return HasClusters; } - /// \returns true if the subtarget requires a wait for xcnt before VMEM /// accesses that must never be repeated in the event of a page fault/re-try. /// Atomic stores/rmw and all volatile accesses fall under this criteria. @@ -1894,10 +1338,6 @@ public: /// \returns true if the sub-target supports buffer resource (V#) with 45-bit /// num_records. - bool has45BitNumRecordsBufferResource() const { - return Has45BitNumRecordsBufferResource; - } - bool requiresWaitsBeforeSystemScopeStores() const { return RequiresWaitsBeforeSystemScopeStores; } diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp index ec3e720ef888..117ea136b2fe 100644 --- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -773,7 +773,7 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF, ScratchRsrcReg, ScratchWaveOffsetReg); } - if (ST.hasWaitXCnt()) { + if (ST.hasWaitXcnt()) { // Set REPLAY_MODE (bit 25) in MODE register to enable multi-group XNACK // replay. This aligns hardware behavior with the compiler's s_wait_xcnt // insertion logic, which assumes multi-group mode by default. diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index ae5e9fa2cb69..07c26a7404c6 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -18660,12 +18660,12 @@ static bool globalMemoryFPAtomicIsLegal(const GCNSubtarget &Subtarget, // With AgentScopeFineGrainedRemoteMemoryAtomics, system scoped device local // allocations work. if (HasSystemScope) { - if (Subtarget.supportsAgentScopeFineGrainedRemoteMemoryAtomics() && + if (Subtarget.hasAgentScopeFineGrainedRemoteMemoryAtomics() && RMW->hasMetadata("amdgpu.no.remote.memory")) return true; if (Subtarget.hasEmulatedSystemScopeAtomics()) return true; - } else if (Subtarget.supportsAgentScopeFineGrainedRemoteMemoryAtomics()) + } else if (Subtarget.hasAgentScopeFineGrainedRemoteMemoryAtomics()) return true; return RMW->hasMetadata("amdgpu.no.fine.grained.memory"); @@ -18775,7 +18775,7 @@ SITargetLowering::shouldExpandAtomicRMWInIR(const AtomicRMWInst *RMW) const { // If fine-grained remote memory works at device scope, we don't need to // do anything. if (!HasSystemScope && - Subtarget->supportsAgentScopeFineGrainedRemoteMemoryAtomics()) + Subtarget->hasAgentScopeFineGrainedRemoteMemoryAtomics()) return atomicSupportedIfLegalIntType(RMW); // If we are targeting a remote allocated address, it depends what kind of diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp index 0a20b03819c1..f0c2b9f8dde2 100644 --- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp @@ -2407,7 +2407,7 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore( ScoreBrackets.determineWaitForPhysReg(SmemAccessCounter, Reg, Wait); } - if (ST->hasWaitXCnt() && Op.isDef()) + if (ST->hasWaitXcnt() && Op.isDef()) ScoreBrackets.determineWaitForPhysReg(X_CNT, Reg, Wait); } } @@ -2744,7 +2744,7 @@ void SIInsertWaitcnts::updateEventWaitcntAfter(MachineInstr &Inst, } } - if (!ST->hasWaitXCnt()) + if (!ST->hasWaitXcnt()) return; if (IsVMEMAccess)