[NFCI][AMDGPU] Remove more redundant code from GCNSubtarget.h (#177297)
We are getting pretty close to use `GET_SUBTARGETINFO_MACRO` in the header with this cleanup.
This commit is contained in:
parent
a81d2bf933
commit
02d34a76f7
@ -70,10 +70,9 @@ multiclass AMDGPUSubtargetFeature<string FeatureString,
|
||||
def Has#NAME : Predicate<"Subtarget->has"#NAME#"()">;
|
||||
}
|
||||
|
||||
def FeatureFastFMAF32 : SubtargetFeature<"fast-fmaf",
|
||||
"FastFMAF32",
|
||||
"true",
|
||||
"Assuming f32 fma is at least as fast as mul + add"
|
||||
defm FastFMAF32 : AMDGPUSubtargetFeature<"fast-fmaf",
|
||||
"Assuming f32 fma is at least as fast as mul + add",
|
||||
/*GenPredicate=*/0
|
||||
>;
|
||||
|
||||
defm FastDenormalF32 : AMDGPUSubtargetFeature<"fast-denormal-f32",
|
||||
@ -135,9 +134,7 @@ defm FlatGVSMode : AMDGPUSubtargetFeature<"flat-gvs-mode",
|
||||
[FeatureFlatAddressSpace]
|
||||
>;
|
||||
|
||||
def FeatureAddNoCarryInsts : SubtargetFeature<"add-no-carry-insts",
|
||||
"AddNoCarryInsts",
|
||||
"true",
|
||||
defm AddNoCarryInsts : AMDGPUSubtargetFeature<"add-no-carry-insts",
|
||||
"Have VALU add/sub instructions without carry out"
|
||||
>;
|
||||
|
||||
@ -145,10 +142,9 @@ defm UnalignedBufferAccess : AMDGPUSubtargetFeature<"unaligned-buffer-access",
|
||||
"Hardware supports unaligned global loads and stores"
|
||||
>;
|
||||
|
||||
def FeatureTrapHandler: SubtargetFeature<"trap-handler",
|
||||
"TrapHandler",
|
||||
"true",
|
||||
"Trap handler support"
|
||||
defm TrapHandler: AMDGPUSubtargetFeature<"trap-handler",
|
||||
"Trap handler support",
|
||||
/*GenPredicate=*/0
|
||||
>;
|
||||
|
||||
defm UnalignedScratchAccess : AMDGPUSubtargetFeature<"unaligned-scratch-access",
|
||||
@ -241,16 +237,14 @@ defm SGPRInitBug : AMDGPUSubtargetFeature<"sgpr-init-bug",
|
||||
"VI SGPR initialization bug requiring a fixed SGPR allocation size"
|
||||
>;
|
||||
|
||||
def FeatureUserSGPRInit16Bug : SubtargetFeature<"user-sgpr-init16-bug",
|
||||
"UserSGPRInit16Bug",
|
||||
"true",
|
||||
"Bug requiring at least 16 user+system SGPRs to be enabled"
|
||||
defm UserSGPRInit16Bug : AMDGPUSubtargetFeature<"user-sgpr-init16-bug",
|
||||
"Bug requiring at least 16 user+system SGPRs to be enabled",
|
||||
/*GenPredicate=*/0
|
||||
>;
|
||||
|
||||
def FeatureLdsMisalignedBug : SubtargetFeature<"lds-misaligned-bug",
|
||||
"LDSMisalignedBug",
|
||||
"true",
|
||||
"Some GFX10 bug with multi-dword LDS and flat access that is not naturally aligned in WGP mode"
|
||||
defm LDSMisalignedBug : AMDGPUSubtargetFeature<"lds-misaligned-bug",
|
||||
"Some GFX10 bug with multi-dword LDS and flat access that is not naturally aligned in WGP mode",
|
||||
/*GenPredicate=*/0
|
||||
>;
|
||||
|
||||
defm MFMAInlineLiteralBug : AMDGPUSubtargetFeature<"mfma-inline-literal-bug",
|
||||
@ -336,11 +330,10 @@ defm NegativeScratchOffsetBug : AMDGPUSubtargetFeature<"negative-scratch-offset-
|
||||
"Negative immediate offsets in scratch instructions with an SGPR offset page fault on GFX9"
|
||||
>;
|
||||
|
||||
def FeatureNegativeUnalignedScratchOffsetBug : SubtargetFeature<"negative-unaligned-scratch-offset-bug",
|
||||
"NegativeUnalignedScratchOffsetBug",
|
||||
"true",
|
||||
defm NegativeUnalignedScratchOffsetBug : AMDGPUSubtargetFeature<"negative-unaligned-scratch-offset-bug",
|
||||
"Scratch instructions with a VGPR offset and a negative immediate offset that"
|
||||
"is not a multiple of 4 read wrong memory on GFX10"
|
||||
"is not a multiple of 4 read wrong memory on GFX10",
|
||||
/*GenPredicate=*/0
|
||||
>;
|
||||
|
||||
defm Offset3fBug : AMDGPUSubtargetFeature<"offset-3f-bug",
|
||||
@ -1085,11 +1078,9 @@ defm SWakeupBarrier : AMDGPUSubtargetFeature<"s-wakeup-barrier-inst",
|
||||
// wave32 and wave64. Instead what users do is assemble with both
|
||||
// wavesizes enabled. We translate this into this special mode so this
|
||||
// only influences assembler behavior and nothing else.
|
||||
def FeatureAssemblerPermissiveWavesize : SubtargetFeature<
|
||||
"assembler-permissive-wavesize",
|
||||
"AssemblerPermissiveWavesize",
|
||||
"true",
|
||||
"allow parsing wave32 and wave64 variants of instructions"
|
||||
defm AssemblerPermissiveWavesize : AMDGPUSubtargetFeature<"assembler-permissive-wavesize",
|
||||
"Allow parsing wave32 and wave64 variants of instructions",
|
||||
/*GenPredicate=*/0
|
||||
>;
|
||||
|
||||
class FeatureMaxPrivateElementSize<int size> : SubtargetFeature<
|
||||
@ -1168,17 +1159,14 @@ def FeatureFlatForGlobal : SubtargetFeature<"flat-for-global",
|
||||
"Force to generate flat instruction for global"
|
||||
>;
|
||||
|
||||
def FeatureAutoWaitcntBeforeBarrier : SubtargetFeature <
|
||||
"auto-waitcnt-before-barrier",
|
||||
"AutoWaitcntBeforeBarrier",
|
||||
"true",
|
||||
"Hardware automatically inserts waitcnt before barrier"
|
||||
defm AutoWaitcntBeforeBarrier : AMDGPUSubtargetFeature <"auto-waitcnt-before-barrier",
|
||||
"Hardware automatically inserts waitcnt before barrier",
|
||||
/*GenPredicate=*/0
|
||||
>;
|
||||
|
||||
def FeatureBackOffBarrier : SubtargetFeature <"back-off-barrier",
|
||||
"BackOffBarrier",
|
||||
"true",
|
||||
"Hardware supports backing off s_barrier if an exception occurs"
|
||||
defm BackOffBarrier : AMDGPUSubtargetFeature <"back-off-barrier",
|
||||
"Hardware supports backing off s_barrier if an exception occurs",
|
||||
/*GenPredicate=*/0
|
||||
>;
|
||||
|
||||
defm TrigReducedRange : AMDGPUSubtargetFeature<"trig-reduced-range",
|
||||
@ -1186,10 +1174,9 @@ defm TrigReducedRange : AMDGPUSubtargetFeature<"trig-reduced-range",
|
||||
/*GenPredicate=*/0
|
||||
>;
|
||||
|
||||
def FeatureKernargPreload : SubtargetFeature <"kernarg-preload",
|
||||
"KernargPreload",
|
||||
"true",
|
||||
"Hardware supports preloading of kernel arguments in user SGPRs."
|
||||
defm KernargPreload : AMDGPUSubtargetFeature <"kernarg-preload",
|
||||
"Hardware supports preloading of kernel arguments in user SGPRs.",
|
||||
/*GenPredicate=*/0
|
||||
>;
|
||||
|
||||
// Alignment enforcement is controlled by a configuration register:
|
||||
@ -1722,7 +1709,7 @@ def FeatureISAVersion10_1_Common : FeatureSet<
|
||||
FeatureGetWaveIdInst,
|
||||
FeatureMadMacF32Insts,
|
||||
FeatureDsSrc2Insts,
|
||||
FeatureLdsMisalignedBug,
|
||||
FeatureLDSMisalignedBug,
|
||||
FeatureSupportsXNACK,
|
||||
// gfx101x bugs
|
||||
FeatureVcmpxPermlaneHazard,
|
||||
@ -2385,10 +2372,7 @@ def HasDSAddTid : Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::GFX9
|
||||
def HasLDSFPAtomicAddF32 : Predicate<"Subtarget->hasLDSFPAtomicAddF32()">,
|
||||
AssemblerPredicate<(all_of FeatureGFX8Insts)>;
|
||||
|
||||
def HasAddNoCarryInsts : Predicate<"Subtarget->hasAddNoCarry()">,
|
||||
AssemblerPredicate<(all_of FeatureAddNoCarryInsts)>;
|
||||
|
||||
def NotHasAddNoCarryInsts : Predicate<"!Subtarget->hasAddNoCarry()">;
|
||||
def NotHasAddNoCarryInsts : Predicate<"!Subtarget->hasAddNoCarryInsts()">;
|
||||
|
||||
def HasXNACKEnabled : Predicate<"Subtarget->isXNACKEnabled()">;
|
||||
|
||||
|
||||
@ -1255,8 +1255,7 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
|
||||
|
||||
ProgInfo.UserSGPR = MFI->getNumUserSGPRs();
|
||||
// For AMDHSA, TRAP_HANDLER must be zero, as it is populated by the CP.
|
||||
ProgInfo.TrapHandlerEnable =
|
||||
STM.isAmdHsaOS() ? 0 : STM.isTrapHandlerEnabled();
|
||||
ProgInfo.TrapHandlerEnable = STM.isAmdHsaOS() ? 0 : STM.hasTrapHandler();
|
||||
ProgInfo.TGIdXEnable = MFI->hasWorkGroupIDX();
|
||||
ProgInfo.TGIdYEnable = MFI->hasWorkGroupIDY();
|
||||
ProgInfo.TGIdZEnable = MFI->hasWorkGroupIDZ();
|
||||
|
||||
@ -1306,7 +1306,7 @@ bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base,
|
||||
|
||||
// FIXME: Select to VOP3 version for with-carry.
|
||||
unsigned SubOp = AMDGPU::V_SUB_CO_U32_e32;
|
||||
if (Subtarget->hasAddNoCarry()) {
|
||||
if (Subtarget->hasAddNoCarryInsts()) {
|
||||
SubOp = AMDGPU::V_SUB_U32_e64;
|
||||
Opnds.push_back(
|
||||
CurDAG->getTargetConstant(0, {}, MVT::i1)); // clamp bit
|
||||
@ -1491,7 +1491,7 @@ bool AMDGPUDAGToDAGISel::SelectDSReadWrite2(SDValue Addr, SDValue &Base,
|
||||
Opnds.push_back(Zero);
|
||||
Opnds.push_back(Addr.getOperand(1));
|
||||
unsigned SubOp = AMDGPU::V_SUB_CO_U32_e32;
|
||||
if (Subtarget->hasAddNoCarry()) {
|
||||
if (Subtarget->hasAddNoCarryInsts()) {
|
||||
SubOp = AMDGPU::V_SUB_U32_e64;
|
||||
Opnds.push_back(
|
||||
CurDAG->getTargetConstant(0, {}, MVT::i1)); // clamp bit
|
||||
@ -1886,7 +1886,7 @@ bool AMDGPUDAGToDAGISel::SelectFlatOffsetImpl(SDNode *N, SDValue Addr,
|
||||
Opnds.push_back(N0);
|
||||
Opnds.push_back(AddOffsetLo);
|
||||
unsigned AddOp = AMDGPU::V_ADD_CO_U32_e32;
|
||||
if (Subtarget->hasAddNoCarry()) {
|
||||
if (Subtarget->hasAddNoCarryInsts()) {
|
||||
AddOp = AMDGPU::V_ADD_U32_e64;
|
||||
Opnds.push_back(Clamp);
|
||||
}
|
||||
|
||||
@ -447,7 +447,7 @@ bool AMDGPUInstructionSelector::selectG_ADD_SUB(MachineInstr &I) const {
|
||||
return constrainSelectedInstRegOperands(*Add, TII, TRI, RBI);
|
||||
}
|
||||
|
||||
if (STI.hasAddNoCarry()) {
|
||||
if (STI.hasAddNoCarryInsts()) {
|
||||
const unsigned Opc = Sub ? AMDGPU::V_SUB_U32_e64 : AMDGPU::V_ADD_U32_e64;
|
||||
I.setDesc(TII.get(Opc));
|
||||
I.addOperand(*MF, MachineOperand::CreateImm(0));
|
||||
|
||||
@ -751,7 +751,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
|
||||
.moreElementsIf(isSmallOddVector(0), oneMoreElement(0))
|
||||
.scalarize(0);
|
||||
|
||||
if (ST.hasVOP3PInsts() && ST.hasAddNoCarry() && ST.hasIntClamp()) {
|
||||
if (ST.hasVOP3PInsts() && ST.hasAddNoCarryInsts() && ST.hasIntClamp()) {
|
||||
// Full set of gfx9 features.
|
||||
if (ST.hasScalarAddSub64()) {
|
||||
getActionDefinitionsBuilder({G_ADD, G_SUB})
|
||||
@ -7313,7 +7313,7 @@ bool AMDGPULegalizerInfo::legalizeSBufferPrefetch(LegalizerHelper &Helper,
|
||||
bool AMDGPULegalizerInfo::legalizeTrap(MachineInstr &MI,
|
||||
MachineRegisterInfo &MRI,
|
||||
MachineIRBuilder &B) const {
|
||||
if (!ST.isTrapHandlerEnabled() ||
|
||||
if (!ST.hasTrapHandler() ||
|
||||
ST.getTrapHandlerAbi() != GCNSubtarget::TrapHandlerAbi::AMDHSA)
|
||||
return legalizeTrapEndpgm(MI, MRI, B);
|
||||
|
||||
@ -7433,7 +7433,7 @@ bool AMDGPULegalizerInfo::legalizeDebugTrap(MachineInstr &MI,
|
||||
MachineIRBuilder &B) const {
|
||||
// Is non-HSA path or trap-handler disabled? Then, report a warning
|
||||
// accordingly
|
||||
if (!ST.isTrapHandlerEnabled() ||
|
||||
if (!ST.hasTrapHandler() ||
|
||||
ST.getTrapHandlerAbi() != GCNSubtarget::TrapHandlerAbi::AMDHSA) {
|
||||
Function &Fn = B.getMF().getFunction();
|
||||
Fn.getContext().diagnose(DiagnosticInfoUnsupported(
|
||||
|
||||
@ -75,7 +75,7 @@ protected:
|
||||
bool HasFminFmaxLegacy = true;
|
||||
bool EnablePromoteAlloca = false;
|
||||
bool HasTrigReducedRange = false;
|
||||
bool FastFMAF32 = false;
|
||||
bool HasFastFMAF32 = false;
|
||||
unsigned EUsPerCU = 4;
|
||||
unsigned MaxWavesPerEU = 10;
|
||||
unsigned LocalMemorySize = 0;
|
||||
@ -299,9 +299,7 @@ public:
|
||||
return HasTrigReducedRange;
|
||||
}
|
||||
|
||||
bool hasFastFMAF32() const {
|
||||
return FastFMAF32;
|
||||
}
|
||||
bool hasFastFMAF32() const { return HasFastFMAF32; }
|
||||
|
||||
bool isPromoteAllocaEnabled() const {
|
||||
return EnablePromoteAlloca;
|
||||
|
||||
@ -51,33 +51,31 @@
|
||||
// These features either have custom getters or code accesses the member
|
||||
// directly.
|
||||
#define GCN_SUBTARGET_HAS_FEATURE_MEMBER_ONLY(X) \
|
||||
X(AssemblerPermissiveWavesize) \
|
||||
X(CIInsts) \
|
||||
X(FastDenormalF32) \
|
||||
X(GCN3Encoding) \
|
||||
X(GFX10_3Insts) \
|
||||
X(GFX10_AEncoding) \
|
||||
X(GFX10_BEncoding) \
|
||||
X(GFX10Insts) \
|
||||
X(GFX11Insts) \
|
||||
X(GFX1250Insts) \
|
||||
X(GFX12Insts) \
|
||||
X(GFX7GFX8GFX9Insts) \
|
||||
X(GFX8Insts) \
|
||||
X(GFX90AInsts) \
|
||||
X(GFX940Insts) \
|
||||
X(GFX950Insts) \
|
||||
X(GFX9Insts) \
|
||||
X(LDSMisalignedBug) \
|
||||
X(UnalignedBufferAccess) \
|
||||
X(UnalignedScratchAccess)
|
||||
X(UnalignedScratchAccess) \
|
||||
X(UserSGPRInit16Bug)
|
||||
|
||||
// Features with both member and getter.
|
||||
#define GCN_SUBTARGET_HAS_FEATURE(X) \
|
||||
X(1_5xVGPRs) \
|
||||
X(1024AddressableVGPRs) \
|
||||
X(45BitNumRecordsBufferResource) \
|
||||
X(AutoWaitcntBeforeBarrier) \
|
||||
X(64BitLiterals) \
|
||||
X(A16) \
|
||||
X(AddMinMaxInsts) \
|
||||
X(AddNoCarryInsts) \
|
||||
X(AddSubU64Insts) \
|
||||
X(AgentScopeFineGrainedRemoteMemoryAtomics) \
|
||||
X(ApertureRegs) \
|
||||
@ -97,6 +95,7 @@
|
||||
X(AtomicFMinFMaxF64FlatInsts) \
|
||||
X(AtomicFMinFMaxF64GlobalInsts) \
|
||||
X(AtomicGlobalPkAddBF16Inst) \
|
||||
X(BackOffBarrier) \
|
||||
X(BitOp3Insts) \
|
||||
X(BVHDualAndBVH8Insts) \
|
||||
X(Clusters) \
|
||||
@ -147,6 +146,13 @@
|
||||
X(G16) \
|
||||
X(GDS) \
|
||||
X(GetWaveIdInst) \
|
||||
X(GFX10_3Insts) \
|
||||
X(GFX10_AEncoding) \
|
||||
X(GFX10_BEncoding) \
|
||||
X(GFX1250Insts) \
|
||||
X(GFX90AInsts) \
|
||||
X(GFX940Insts) \
|
||||
X(GFX950Insts) \
|
||||
X(GloballyAddressableScratch) \
|
||||
X(GWS) \
|
||||
X(HalfRate64Ops) \
|
||||
@ -156,6 +162,7 @@
|
||||
X(ImageStoreD16Bug) \
|
||||
X(InstFwdPrefetchBug) \
|
||||
X(IntClamp) \
|
||||
X(KernargPreload) \
|
||||
X(LdsBarrierArriveAtomic) \
|
||||
X(LdsBranchVmemWARHazard) \
|
||||
X(LerpInst) \
|
||||
@ -174,6 +181,7 @@
|
||||
X(Movrel) \
|
||||
X(MSAALoadDstSelBug) \
|
||||
X(NegativeScratchOffsetBug) \
|
||||
X(NegativeUnalignedScratchOffsetBug) \
|
||||
X(NoDataDepHazard) \
|
||||
X(NoSdstCMPX) \
|
||||
X(NSAClauseBug) \
|
||||
@ -221,6 +229,7 @@
|
||||
X(TanhInsts) \
|
||||
X(TensorCvtLutInsts) \
|
||||
X(TransposeLoadF4F6Insts) \
|
||||
X(TrapHandler) \
|
||||
X(UnalignedAccessMode) \
|
||||
X(UnalignedDSAccess) \
|
||||
X(UnpackedD16VMem) \
|
||||
@ -280,10 +289,7 @@ protected:
|
||||
|
||||
// Dynamically set bits that enable features.
|
||||
bool FlatForGlobal = false;
|
||||
bool AutoWaitcntBeforeBarrier = false;
|
||||
bool BackOffBarrier = false;
|
||||
bool SupportsXNACK = false;
|
||||
bool KernargPreload = false;
|
||||
|
||||
// This should not be used directly. 'TargetID' tracks the dynamic settings
|
||||
// for XNACK.
|
||||
@ -291,7 +297,6 @@ protected:
|
||||
|
||||
bool EnableTgSplit = false;
|
||||
bool EnableCuMode = false;
|
||||
bool TrapHandler = false;
|
||||
bool EnablePreciseMemory = false;
|
||||
|
||||
// Used as options.
|
||||
@ -301,11 +306,7 @@ protected:
|
||||
bool EnableDS128 = false;
|
||||
bool EnablePRTStrictNull = false;
|
||||
bool DumpCode = false;
|
||||
bool AssemblerPermissiveWavesize = false;
|
||||
|
||||
// Subtarget statically properties set by tablegen
|
||||
bool UserSGPRInit16Bug = false;
|
||||
bool NegativeUnalignedScratchOffsetBug = false;
|
||||
/// The maximum number of instructions that may be placed within an S_CLAUSE,
|
||||
/// which is one greater than the maximum argument to S_CLAUSE. A value of 0
|
||||
/// indicates a lack of S_CLAUSE support.
|
||||
@ -320,8 +321,6 @@ protected:
|
||||
bool EnableSRAMECC = false;
|
||||
|
||||
bool EnableFlatScratch = false;
|
||||
bool AddNoCarryInsts = false;
|
||||
bool LDSMisalignedBug = false;
|
||||
bool ScalarizeGlobal = false;
|
||||
bool RequiresCOV6 = false;
|
||||
bool UseBlockVGPROpsForCSR = false;
|
||||
@ -567,12 +566,6 @@ public:
|
||||
/// for sparse texture support).
|
||||
bool usePRTStrictNull() const { return EnablePRTStrictNull; }
|
||||
|
||||
bool hasAutoWaitcntBeforeBarrier() const { return AutoWaitcntBeforeBarrier; }
|
||||
|
||||
/// \returns true if the target supports backing off of s_barrier instructions
|
||||
/// when an exception is raised.
|
||||
bool supportsBackOffBarrier() const { return BackOffBarrier; }
|
||||
|
||||
bool hasUnalignedBufferAccessEnabled() const {
|
||||
return HasUnalignedBufferAccess && HasUnalignedAccessMode;
|
||||
}
|
||||
@ -585,8 +578,6 @@ public:
|
||||
return HasUnalignedScratchAccess && HasUnalignedAccessMode;
|
||||
}
|
||||
|
||||
bool isTrapHandlerEnabled() const { return TrapHandler; }
|
||||
|
||||
bool isXNACKEnabled() const { return TargetID.isXnackOnOrAny(); }
|
||||
|
||||
bool isTgSplitEnabled() const { return EnableTgSplit; }
|
||||
@ -660,12 +651,6 @@ public:
|
||||
/// \returns if target has ds_gws_sema_release_all instruction.
|
||||
bool hasGWSSemaReleaseAll() const { return HasCIInsts; }
|
||||
|
||||
/// \returns true if the target has integer add/sub instructions that do not
|
||||
/// produce a carry-out. This includes v_add_[iu]32, v_sub_[iu]32,
|
||||
/// v_add_[iu]16, and v_sub_[iu]16, all of which support the clamp modifier
|
||||
/// for saturation.
|
||||
bool hasAddNoCarry() const { return AddNoCarryInsts; }
|
||||
|
||||
bool hasScalarAddSub64() const { return getGeneration() >= GFX12; }
|
||||
|
||||
bool hasScalarSMulU64() const { return getGeneration() >= GFX12; }
|
||||
@ -769,12 +754,6 @@ public:
|
||||
return AMDGPU::getNSAMaxSize(*this, HasSampler);
|
||||
}
|
||||
|
||||
bool hasGFX10_AEncoding() const { return HasGFX10_AEncoding; }
|
||||
|
||||
bool hasGFX10_BEncoding() const { return HasGFX10_BEncoding; }
|
||||
|
||||
bool hasGFX10_3Insts() const { return HasGFX10_3Insts; }
|
||||
|
||||
bool hasMadF16() const;
|
||||
|
||||
bool hasMovB64() const { return HasGFX940Insts || HasGFX1250Insts; }
|
||||
@ -789,10 +768,8 @@ public:
|
||||
|
||||
bool loadStoreOptEnabled() const { return EnableLoadStoreOpt; }
|
||||
|
||||
bool hasUserSGPRInit16Bug() const { return UserSGPRInit16Bug && isWave32(); }
|
||||
|
||||
bool hasNegativeUnalignedScratchOffsetBug() const {
|
||||
return NegativeUnalignedScratchOffsetBug;
|
||||
bool hasUserSGPRInit16Bug() const {
|
||||
return HasUserSGPRInit16Bug && isWave32();
|
||||
}
|
||||
|
||||
bool has12DWordStoreHazard() const {
|
||||
@ -819,7 +796,9 @@ public:
|
||||
return getGeneration() == AMDGPUSubtarget::GFX9;
|
||||
}
|
||||
|
||||
bool hasLDSMisalignedBug() const { return LDSMisalignedBug && !EnableCuMode; }
|
||||
bool hasLDSMisalignedBug() const {
|
||||
return HasLDSMisalignedBug && !EnableCuMode;
|
||||
}
|
||||
|
||||
// Shift amount of a 64 bit shift cannot be a highest allocated register
|
||||
// if also at the end of the allocation block.
|
||||
@ -843,8 +822,6 @@ public:
|
||||
|
||||
bool hasHardClauses() const { return MaxHardClauseLength > 0; }
|
||||
|
||||
bool hasGFX90AInsts() const { return HasGFX90AInsts; }
|
||||
|
||||
bool hasFPAtomicToDenormModeHazard() const {
|
||||
return getGeneration() == GFX10;
|
||||
}
|
||||
@ -894,14 +871,6 @@ public:
|
||||
/// Return true if the target has the S_DELAY_ALU instruction.
|
||||
bool hasDelayAlu() const { return HasGFX11Insts; }
|
||||
|
||||
// GFX94* is a derivation to GFX90A. hasGFX940Insts() being true implies that
|
||||
// hasGFX90AInsts is also true.
|
||||
bool hasGFX940Insts() const { return HasGFX940Insts; }
|
||||
|
||||
// GFX950 is a derivation to GFX94*. hasGFX950Insts() implies that
|
||||
// hasGFX940Insts and hasGFX90AInsts are also true.
|
||||
bool hasGFX950Insts() const { return HasGFX950Insts; }
|
||||
|
||||
/// Returns true if the target supports
|
||||
/// global_load_lds_dwordx3/global_load_lds_dwordx4 or
|
||||
/// buffer_load_dwordx3/buffer_load_dwordx4 with the lds bit.
|
||||
@ -977,9 +946,6 @@ public:
|
||||
// \returns true if the target supports the pre-NGG legacy geometry path.
|
||||
bool hasLegacyGeometry() const { return getGeneration() < GFX11; }
|
||||
|
||||
// \returns true if preloading kernel arguments is supported.
|
||||
bool hasKernargPreload() const { return KernargPreload; }
|
||||
|
||||
// \returns true if the target has split barriers feature
|
||||
bool hasSplitBarriers() const { return getGeneration() >= GFX12; }
|
||||
|
||||
@ -996,8 +962,6 @@ public:
|
||||
/// values.
|
||||
bool hasSignedScratchOffsets() const { return getGeneration() >= GFX12; }
|
||||
|
||||
bool hasGFX1250Insts() const { return HasGFX1250Insts; }
|
||||
|
||||
bool hasINVWBL2WaitCntRequirement() const { return HasGFX1250Insts; }
|
||||
|
||||
bool hasVOPD3() const { return HasGFX1250Insts; }
|
||||
|
||||
@ -187,7 +187,7 @@ public:
|
||||
unsigned convertToVALUOp(unsigned Opc, bool UseVOP3 = false) const {
|
||||
switch (Opc) {
|
||||
case AMDGPU::S_ADD_I32: {
|
||||
if (ST->hasAddNoCarry())
|
||||
if (ST->hasAddNoCarryInsts())
|
||||
return UseVOP3 ? AMDGPU::V_ADD_U32_e64 : AMDGPU::V_ADD_U32_e32;
|
||||
return UseVOP3 ? AMDGPU::V_ADD_CO_U32_e64 : AMDGPU::V_ADD_CO_U32_e32;
|
||||
}
|
||||
|
||||
@ -528,7 +528,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
|
||||
if (Subtarget->hasIntClamp())
|
||||
setOperationAction({ISD::UADDSAT, ISD::USUBSAT}, MVT::i32, Legal);
|
||||
|
||||
if (Subtarget->hasAddNoCarry())
|
||||
if (Subtarget->hasAddNoCarryInsts())
|
||||
setOperationAction({ISD::SADDSAT, ISD::SSUBSAT}, {MVT::i16, MVT::i32},
|
||||
Legal);
|
||||
|
||||
@ -8217,7 +8217,7 @@ SDValue SITargetLowering::lowerXMUL_LOHI(SDValue Op, SelectionDAG &DAG) const {
|
||||
}
|
||||
|
||||
SDValue SITargetLowering::lowerTRAP(SDValue Op, SelectionDAG &DAG) const {
|
||||
if (!Subtarget->isTrapHandlerEnabled() ||
|
||||
if (!Subtarget->hasTrapHandler() ||
|
||||
Subtarget->getTrapHandlerAbi() != GCNSubtarget::TrapHandlerAbi::AMDHSA)
|
||||
return lowerTrapEndpgm(Op, DAG);
|
||||
|
||||
@ -8300,7 +8300,7 @@ SDValue SITargetLowering::lowerDEBUGTRAP(SDValue Op, SelectionDAG &DAG) const {
|
||||
SDValue Chain = Op.getOperand(0);
|
||||
MachineFunction &MF = DAG.getMachineFunction();
|
||||
|
||||
if (!Subtarget->isTrapHandlerEnabled() ||
|
||||
if (!Subtarget->hasTrapHandler() ||
|
||||
Subtarget->getTrapHandlerAbi() != GCNSubtarget::TrapHandlerAbi::AMDHSA) {
|
||||
LLVMContext &Ctx = MF.getFunction().getContext();
|
||||
Ctx.diagnose(DiagnosticInfoUnsupported(MF.getFunction(),
|
||||
|
||||
@ -2426,7 +2426,7 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(
|
||||
// In all other cases, ensure safety by ensuring that there are no outstanding
|
||||
// memory operations.
|
||||
if (Opc == AMDGPU::S_BARRIER && !ST->hasAutoWaitcntBeforeBarrier() &&
|
||||
!ST->supportsBackOffBarrier()) {
|
||||
!ST->hasBackOffBarrier()) {
|
||||
Wait = Wait.combined(WCG->getAllZeroWaitcnt(/*IncludeVSCnt=*/true));
|
||||
}
|
||||
|
||||
|
||||
@ -5907,11 +5907,11 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) const {
|
||||
AMDGPU::COPY : AMDGPU::V_MOV_B32_e32;
|
||||
}
|
||||
case AMDGPU::S_ADD_I32:
|
||||
return ST.hasAddNoCarry() ? AMDGPU::V_ADD_U32_e64 : AMDGPU::V_ADD_CO_U32_e32;
|
||||
return ST.hasAddNoCarryInsts() ? AMDGPU::V_ADD_U32_e64 : AMDGPU::V_ADD_CO_U32_e32;
|
||||
case AMDGPU::S_ADDC_U32:
|
||||
return AMDGPU::V_ADDC_U32_e32;
|
||||
case AMDGPU::S_SUB_I32:
|
||||
return ST.hasAddNoCarry() ? AMDGPU::V_SUB_U32_e64 : AMDGPU::V_SUB_CO_U32_e32;
|
||||
return ST.hasAddNoCarryInsts() ? AMDGPU::V_SUB_U32_e64 : AMDGPU::V_SUB_CO_U32_e32;
|
||||
// FIXME: These are not consistently handled, and selected when the carry is
|
||||
// used.
|
||||
case AMDGPU::S_ADD_U32:
|
||||
@ -8451,7 +8451,7 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist,
|
||||
std::pair<bool, MachineBasicBlock *>
|
||||
SIInstrInfo::moveScalarAddSub(SIInstrWorklist &Worklist, MachineInstr &Inst,
|
||||
MachineDominatorTree *MDT) const {
|
||||
if (ST.hasAddNoCarry()) {
|
||||
if (ST.hasAddNoCarryInsts()) {
|
||||
// Assume there is no user of scc since we don't select this in that case.
|
||||
// Since scc isn't used, it doesn't really matter if the i32 or u32 variant
|
||||
// is used.
|
||||
@ -8578,8 +8578,8 @@ void SIInstrInfo::lowerScalarAbs(SIInstrWorklist &Worklist,
|
||||
Register TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
|
||||
Register ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
|
||||
|
||||
unsigned SubOp = ST.hasAddNoCarry() ?
|
||||
AMDGPU::V_SUB_U32_e32 : AMDGPU::V_SUB_CO_U32_e32;
|
||||
unsigned SubOp = ST.hasAddNoCarryInsts() ? AMDGPU::V_SUB_U32_e32
|
||||
: AMDGPU::V_SUB_CO_U32_e32;
|
||||
|
||||
BuildMI(MBB, MII, DL, get(SubOp), TmpReg)
|
||||
.addImm(0)
|
||||
@ -8607,8 +8607,8 @@ void SIInstrInfo::lowerScalarAbsDiff(SIInstrWorklist &Worklist,
|
||||
Register TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
|
||||
Register ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
|
||||
|
||||
unsigned SubOp =
|
||||
ST.hasAddNoCarry() ? AMDGPU::V_SUB_U32_e32 : AMDGPU::V_SUB_CO_U32_e32;
|
||||
unsigned SubOp = ST.hasAddNoCarryInsts() ? AMDGPU::V_SUB_U32_e32
|
||||
: AMDGPU::V_SUB_CO_U32_e32;
|
||||
|
||||
BuildMI(MBB, MII, DL, get(SubOp), SubResultReg)
|
||||
.addReg(Src1.getReg())
|
||||
@ -9207,8 +9207,8 @@ void SIInstrInfo::splitScalar64BitCountOp(SIInstrWorklist &Worklist,
|
||||
const MCInstrDesc &InstDesc = get(Opcode);
|
||||
|
||||
bool IsCtlz = Opcode == AMDGPU::V_FFBH_U32_e32;
|
||||
unsigned OpcodeAdd =
|
||||
ST.hasAddNoCarry() ? AMDGPU::V_ADD_U32_e64 : AMDGPU::V_ADD_CO_U32_e32;
|
||||
unsigned OpcodeAdd = ST.hasAddNoCarryInsts() ? AMDGPU::V_ADD_U32_e64
|
||||
: AMDGPU::V_ADD_CO_U32_e32;
|
||||
|
||||
const TargetRegisterClass *SrcRC =
|
||||
Src.isReg() ? MRI.getRegClass(Src.getReg()) : &AMDGPU::SGPR_32RegClass;
|
||||
@ -9968,7 +9968,7 @@ SIInstrInfo::getAddNoCarry(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator I,
|
||||
const DebugLoc &DL,
|
||||
Register DestReg) const {
|
||||
if (ST.hasAddNoCarry())
|
||||
if (ST.hasAddNoCarryInsts())
|
||||
return BuildMI(MBB, I, DL, get(AMDGPU::V_ADD_U32_e64), DestReg);
|
||||
|
||||
MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
|
||||
@ -9984,7 +9984,7 @@ MachineInstrBuilder SIInstrInfo::getAddNoCarry(MachineBasicBlock &MBB,
|
||||
const DebugLoc &DL,
|
||||
Register DestReg,
|
||||
RegScavenger &RS) const {
|
||||
if (ST.hasAddNoCarry())
|
||||
if (ST.hasAddNoCarryInsts())
|
||||
return BuildMI(MBB, I, DL, get(AMDGPU::V_ADD_U32_e32), DestReg);
|
||||
|
||||
// If available, prefer to use vcc.
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user