[NFCI][AMDGPU] Remove more redundant code from GCNSubtarget.h (#177297)

We are getting pretty close to use `GET_SUBTARGETINFO_MACRO` in the
header with this cleanup.
This commit is contained in:
Shilei Tian 2026-01-22 09:07:15 -05:00 committed by GitHub
parent a81d2bf933
commit 02d34a76f7
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
11 changed files with 78 additions and 133 deletions

View File

@ -70,10 +70,9 @@ multiclass AMDGPUSubtargetFeature<string FeatureString,
def Has#NAME : Predicate<"Subtarget->has"#NAME#"()">;
}
def FeatureFastFMAF32 : SubtargetFeature<"fast-fmaf",
"FastFMAF32",
"true",
"Assuming f32 fma is at least as fast as mul + add"
defm FastFMAF32 : AMDGPUSubtargetFeature<"fast-fmaf",
"Assuming f32 fma is at least as fast as mul + add",
/*GenPredicate=*/0
>;
defm FastDenormalF32 : AMDGPUSubtargetFeature<"fast-denormal-f32",
@ -135,9 +134,7 @@ defm FlatGVSMode : AMDGPUSubtargetFeature<"flat-gvs-mode",
[FeatureFlatAddressSpace]
>;
def FeatureAddNoCarryInsts : SubtargetFeature<"add-no-carry-insts",
"AddNoCarryInsts",
"true",
defm AddNoCarryInsts : AMDGPUSubtargetFeature<"add-no-carry-insts",
"Have VALU add/sub instructions without carry out"
>;
@ -145,10 +142,9 @@ defm UnalignedBufferAccess : AMDGPUSubtargetFeature<"unaligned-buffer-access",
"Hardware supports unaligned global loads and stores"
>;
def FeatureTrapHandler: SubtargetFeature<"trap-handler",
"TrapHandler",
"true",
"Trap handler support"
defm TrapHandler: AMDGPUSubtargetFeature<"trap-handler",
"Trap handler support",
/*GenPredicate=*/0
>;
defm UnalignedScratchAccess : AMDGPUSubtargetFeature<"unaligned-scratch-access",
@ -241,16 +237,14 @@ defm SGPRInitBug : AMDGPUSubtargetFeature<"sgpr-init-bug",
"VI SGPR initialization bug requiring a fixed SGPR allocation size"
>;
def FeatureUserSGPRInit16Bug : SubtargetFeature<"user-sgpr-init16-bug",
"UserSGPRInit16Bug",
"true",
"Bug requiring at least 16 user+system SGPRs to be enabled"
defm UserSGPRInit16Bug : AMDGPUSubtargetFeature<"user-sgpr-init16-bug",
"Bug requiring at least 16 user+system SGPRs to be enabled",
/*GenPredicate=*/0
>;
def FeatureLdsMisalignedBug : SubtargetFeature<"lds-misaligned-bug",
"LDSMisalignedBug",
"true",
"Some GFX10 bug with multi-dword LDS and flat access that is not naturally aligned in WGP mode"
defm LDSMisalignedBug : AMDGPUSubtargetFeature<"lds-misaligned-bug",
"Some GFX10 bug with multi-dword LDS and flat access that is not naturally aligned in WGP mode",
/*GenPredicate=*/0
>;
defm MFMAInlineLiteralBug : AMDGPUSubtargetFeature<"mfma-inline-literal-bug",
@ -336,11 +330,10 @@ defm NegativeScratchOffsetBug : AMDGPUSubtargetFeature<"negative-scratch-offset-
"Negative immediate offsets in scratch instructions with an SGPR offset page fault on GFX9"
>;
def FeatureNegativeUnalignedScratchOffsetBug : SubtargetFeature<"negative-unaligned-scratch-offset-bug",
"NegativeUnalignedScratchOffsetBug",
"true",
defm NegativeUnalignedScratchOffsetBug : AMDGPUSubtargetFeature<"negative-unaligned-scratch-offset-bug",
"Scratch instructions with a VGPR offset and a negative immediate offset that"
"is not a multiple of 4 read wrong memory on GFX10"
"is not a multiple of 4 read wrong memory on GFX10",
/*GenPredicate=*/0
>;
defm Offset3fBug : AMDGPUSubtargetFeature<"offset-3f-bug",
@ -1085,11 +1078,9 @@ defm SWakeupBarrier : AMDGPUSubtargetFeature<"s-wakeup-barrier-inst",
// wave32 and wave64. Instead what users do is assemble with both
// wavesizes enabled. We translate this into this special mode so this
// only influences assembler behavior and nothing else.
def FeatureAssemblerPermissiveWavesize : SubtargetFeature<
"assembler-permissive-wavesize",
"AssemblerPermissiveWavesize",
"true",
"allow parsing wave32 and wave64 variants of instructions"
defm AssemblerPermissiveWavesize : AMDGPUSubtargetFeature<"assembler-permissive-wavesize",
"Allow parsing wave32 and wave64 variants of instructions",
/*GenPredicate=*/0
>;
class FeatureMaxPrivateElementSize<int size> : SubtargetFeature<
@ -1168,17 +1159,14 @@ def FeatureFlatForGlobal : SubtargetFeature<"flat-for-global",
"Force to generate flat instruction for global"
>;
def FeatureAutoWaitcntBeforeBarrier : SubtargetFeature <
"auto-waitcnt-before-barrier",
"AutoWaitcntBeforeBarrier",
"true",
"Hardware automatically inserts waitcnt before barrier"
defm AutoWaitcntBeforeBarrier : AMDGPUSubtargetFeature <"auto-waitcnt-before-barrier",
"Hardware automatically inserts waitcnt before barrier",
/*GenPredicate=*/0
>;
def FeatureBackOffBarrier : SubtargetFeature <"back-off-barrier",
"BackOffBarrier",
"true",
"Hardware supports backing off s_barrier if an exception occurs"
defm BackOffBarrier : AMDGPUSubtargetFeature <"back-off-barrier",
"Hardware supports backing off s_barrier if an exception occurs",
/*GenPredicate=*/0
>;
defm TrigReducedRange : AMDGPUSubtargetFeature<"trig-reduced-range",
@ -1186,10 +1174,9 @@ defm TrigReducedRange : AMDGPUSubtargetFeature<"trig-reduced-range",
/*GenPredicate=*/0
>;
def FeatureKernargPreload : SubtargetFeature <"kernarg-preload",
"KernargPreload",
"true",
"Hardware supports preloading of kernel arguments in user SGPRs."
defm KernargPreload : AMDGPUSubtargetFeature <"kernarg-preload",
"Hardware supports preloading of kernel arguments in user SGPRs.",
/*GenPredicate=*/0
>;
// Alignment enforcement is controlled by a configuration register:
@ -1722,7 +1709,7 @@ def FeatureISAVersion10_1_Common : FeatureSet<
FeatureGetWaveIdInst,
FeatureMadMacF32Insts,
FeatureDsSrc2Insts,
FeatureLdsMisalignedBug,
FeatureLDSMisalignedBug,
FeatureSupportsXNACK,
// gfx101x bugs
FeatureVcmpxPermlaneHazard,
@ -2385,10 +2372,7 @@ def HasDSAddTid : Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::GFX9
def HasLDSFPAtomicAddF32 : Predicate<"Subtarget->hasLDSFPAtomicAddF32()">,
AssemblerPredicate<(all_of FeatureGFX8Insts)>;
def HasAddNoCarryInsts : Predicate<"Subtarget->hasAddNoCarry()">,
AssemblerPredicate<(all_of FeatureAddNoCarryInsts)>;
def NotHasAddNoCarryInsts : Predicate<"!Subtarget->hasAddNoCarry()">;
def NotHasAddNoCarryInsts : Predicate<"!Subtarget->hasAddNoCarryInsts()">;
def HasXNACKEnabled : Predicate<"Subtarget->isXNACKEnabled()">;

View File

@ -1255,8 +1255,7 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
ProgInfo.UserSGPR = MFI->getNumUserSGPRs();
// For AMDHSA, TRAP_HANDLER must be zero, as it is populated by the CP.
ProgInfo.TrapHandlerEnable =
STM.isAmdHsaOS() ? 0 : STM.isTrapHandlerEnabled();
ProgInfo.TrapHandlerEnable = STM.isAmdHsaOS() ? 0 : STM.hasTrapHandler();
ProgInfo.TGIdXEnable = MFI->hasWorkGroupIDX();
ProgInfo.TGIdYEnable = MFI->hasWorkGroupIDY();
ProgInfo.TGIdZEnable = MFI->hasWorkGroupIDZ();

View File

@ -1306,7 +1306,7 @@ bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base,
// FIXME: Select to VOP3 version for with-carry.
unsigned SubOp = AMDGPU::V_SUB_CO_U32_e32;
if (Subtarget->hasAddNoCarry()) {
if (Subtarget->hasAddNoCarryInsts()) {
SubOp = AMDGPU::V_SUB_U32_e64;
Opnds.push_back(
CurDAG->getTargetConstant(0, {}, MVT::i1)); // clamp bit
@ -1491,7 +1491,7 @@ bool AMDGPUDAGToDAGISel::SelectDSReadWrite2(SDValue Addr, SDValue &Base,
Opnds.push_back(Zero);
Opnds.push_back(Addr.getOperand(1));
unsigned SubOp = AMDGPU::V_SUB_CO_U32_e32;
if (Subtarget->hasAddNoCarry()) {
if (Subtarget->hasAddNoCarryInsts()) {
SubOp = AMDGPU::V_SUB_U32_e64;
Opnds.push_back(
CurDAG->getTargetConstant(0, {}, MVT::i1)); // clamp bit
@ -1886,7 +1886,7 @@ bool AMDGPUDAGToDAGISel::SelectFlatOffsetImpl(SDNode *N, SDValue Addr,
Opnds.push_back(N0);
Opnds.push_back(AddOffsetLo);
unsigned AddOp = AMDGPU::V_ADD_CO_U32_e32;
if (Subtarget->hasAddNoCarry()) {
if (Subtarget->hasAddNoCarryInsts()) {
AddOp = AMDGPU::V_ADD_U32_e64;
Opnds.push_back(Clamp);
}

View File

@ -447,7 +447,7 @@ bool AMDGPUInstructionSelector::selectG_ADD_SUB(MachineInstr &I) const {
return constrainSelectedInstRegOperands(*Add, TII, TRI, RBI);
}
if (STI.hasAddNoCarry()) {
if (STI.hasAddNoCarryInsts()) {
const unsigned Opc = Sub ? AMDGPU::V_SUB_U32_e64 : AMDGPU::V_ADD_U32_e64;
I.setDesc(TII.get(Opc));
I.addOperand(*MF, MachineOperand::CreateImm(0));

View File

@ -751,7 +751,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
.moreElementsIf(isSmallOddVector(0), oneMoreElement(0))
.scalarize(0);
if (ST.hasVOP3PInsts() && ST.hasAddNoCarry() && ST.hasIntClamp()) {
if (ST.hasVOP3PInsts() && ST.hasAddNoCarryInsts() && ST.hasIntClamp()) {
// Full set of gfx9 features.
if (ST.hasScalarAddSub64()) {
getActionDefinitionsBuilder({G_ADD, G_SUB})
@ -7313,7 +7313,7 @@ bool AMDGPULegalizerInfo::legalizeSBufferPrefetch(LegalizerHelper &Helper,
bool AMDGPULegalizerInfo::legalizeTrap(MachineInstr &MI,
MachineRegisterInfo &MRI,
MachineIRBuilder &B) const {
if (!ST.isTrapHandlerEnabled() ||
if (!ST.hasTrapHandler() ||
ST.getTrapHandlerAbi() != GCNSubtarget::TrapHandlerAbi::AMDHSA)
return legalizeTrapEndpgm(MI, MRI, B);
@ -7433,7 +7433,7 @@ bool AMDGPULegalizerInfo::legalizeDebugTrap(MachineInstr &MI,
MachineIRBuilder &B) const {
// Is non-HSA path or trap-handler disabled? Then, report a warning
// accordingly
if (!ST.isTrapHandlerEnabled() ||
if (!ST.hasTrapHandler() ||
ST.getTrapHandlerAbi() != GCNSubtarget::TrapHandlerAbi::AMDHSA) {
Function &Fn = B.getMF().getFunction();
Fn.getContext().diagnose(DiagnosticInfoUnsupported(

View File

@ -75,7 +75,7 @@ protected:
bool HasFminFmaxLegacy = true;
bool EnablePromoteAlloca = false;
bool HasTrigReducedRange = false;
bool FastFMAF32 = false;
bool HasFastFMAF32 = false;
unsigned EUsPerCU = 4;
unsigned MaxWavesPerEU = 10;
unsigned LocalMemorySize = 0;
@ -299,9 +299,7 @@ public:
return HasTrigReducedRange;
}
bool hasFastFMAF32() const {
return FastFMAF32;
}
bool hasFastFMAF32() const { return HasFastFMAF32; }
bool isPromoteAllocaEnabled() const {
return EnablePromoteAlloca;

View File

@ -51,33 +51,31 @@
// These features either have custom getters or code accesses the member
// directly.
#define GCN_SUBTARGET_HAS_FEATURE_MEMBER_ONLY(X) \
X(AssemblerPermissiveWavesize) \
X(CIInsts) \
X(FastDenormalF32) \
X(GCN3Encoding) \
X(GFX10_3Insts) \
X(GFX10_AEncoding) \
X(GFX10_BEncoding) \
X(GFX10Insts) \
X(GFX11Insts) \
X(GFX1250Insts) \
X(GFX12Insts) \
X(GFX7GFX8GFX9Insts) \
X(GFX8Insts) \
X(GFX90AInsts) \
X(GFX940Insts) \
X(GFX950Insts) \
X(GFX9Insts) \
X(LDSMisalignedBug) \
X(UnalignedBufferAccess) \
X(UnalignedScratchAccess)
X(UnalignedScratchAccess) \
X(UserSGPRInit16Bug)
// Features with both member and getter.
#define GCN_SUBTARGET_HAS_FEATURE(X) \
X(1_5xVGPRs) \
X(1024AddressableVGPRs) \
X(45BitNumRecordsBufferResource) \
X(AutoWaitcntBeforeBarrier) \
X(64BitLiterals) \
X(A16) \
X(AddMinMaxInsts) \
X(AddNoCarryInsts) \
X(AddSubU64Insts) \
X(AgentScopeFineGrainedRemoteMemoryAtomics) \
X(ApertureRegs) \
@ -97,6 +95,7 @@
X(AtomicFMinFMaxF64FlatInsts) \
X(AtomicFMinFMaxF64GlobalInsts) \
X(AtomicGlobalPkAddBF16Inst) \
X(BackOffBarrier) \
X(BitOp3Insts) \
X(BVHDualAndBVH8Insts) \
X(Clusters) \
@ -147,6 +146,13 @@
X(G16) \
X(GDS) \
X(GetWaveIdInst) \
X(GFX10_3Insts) \
X(GFX10_AEncoding) \
X(GFX10_BEncoding) \
X(GFX1250Insts) \
X(GFX90AInsts) \
X(GFX940Insts) \
X(GFX950Insts) \
X(GloballyAddressableScratch) \
X(GWS) \
X(HalfRate64Ops) \
@ -156,6 +162,7 @@
X(ImageStoreD16Bug) \
X(InstFwdPrefetchBug) \
X(IntClamp) \
X(KernargPreload) \
X(LdsBarrierArriveAtomic) \
X(LdsBranchVmemWARHazard) \
X(LerpInst) \
@ -174,6 +181,7 @@
X(Movrel) \
X(MSAALoadDstSelBug) \
X(NegativeScratchOffsetBug) \
X(NegativeUnalignedScratchOffsetBug) \
X(NoDataDepHazard) \
X(NoSdstCMPX) \
X(NSAClauseBug) \
@ -221,6 +229,7 @@
X(TanhInsts) \
X(TensorCvtLutInsts) \
X(TransposeLoadF4F6Insts) \
X(TrapHandler) \
X(UnalignedAccessMode) \
X(UnalignedDSAccess) \
X(UnpackedD16VMem) \
@ -280,10 +289,7 @@ protected:
// Dynamically set bits that enable features.
bool FlatForGlobal = false;
bool AutoWaitcntBeforeBarrier = false;
bool BackOffBarrier = false;
bool SupportsXNACK = false;
bool KernargPreload = false;
// This should not be used directly. 'TargetID' tracks the dynamic settings
// for XNACK.
@ -291,7 +297,6 @@ protected:
bool EnableTgSplit = false;
bool EnableCuMode = false;
bool TrapHandler = false;
bool EnablePreciseMemory = false;
// Used as options.
@ -301,11 +306,7 @@ protected:
bool EnableDS128 = false;
bool EnablePRTStrictNull = false;
bool DumpCode = false;
bool AssemblerPermissiveWavesize = false;
// Subtarget statically properties set by tablegen
bool UserSGPRInit16Bug = false;
bool NegativeUnalignedScratchOffsetBug = false;
/// The maximum number of instructions that may be placed within an S_CLAUSE,
/// which is one greater than the maximum argument to S_CLAUSE. A value of 0
/// indicates a lack of S_CLAUSE support.
@ -320,8 +321,6 @@ protected:
bool EnableSRAMECC = false;
bool EnableFlatScratch = false;
bool AddNoCarryInsts = false;
bool LDSMisalignedBug = false;
bool ScalarizeGlobal = false;
bool RequiresCOV6 = false;
bool UseBlockVGPROpsForCSR = false;
@ -567,12 +566,6 @@ public:
/// for sparse texture support).
bool usePRTStrictNull() const { return EnablePRTStrictNull; }
bool hasAutoWaitcntBeforeBarrier() const { return AutoWaitcntBeforeBarrier; }
/// \returns true if the target supports backing off of s_barrier instructions
/// when an exception is raised.
bool supportsBackOffBarrier() const { return BackOffBarrier; }
bool hasUnalignedBufferAccessEnabled() const {
return HasUnalignedBufferAccess && HasUnalignedAccessMode;
}
@ -585,8 +578,6 @@ public:
return HasUnalignedScratchAccess && HasUnalignedAccessMode;
}
bool isTrapHandlerEnabled() const { return TrapHandler; }
bool isXNACKEnabled() const { return TargetID.isXnackOnOrAny(); }
bool isTgSplitEnabled() const { return EnableTgSplit; }
@ -660,12 +651,6 @@ public:
/// \returns if target has ds_gws_sema_release_all instruction.
bool hasGWSSemaReleaseAll() const { return HasCIInsts; }
/// \returns true if the target has integer add/sub instructions that do not
/// produce a carry-out. This includes v_add_[iu]32, v_sub_[iu]32,
/// v_add_[iu]16, and v_sub_[iu]16, all of which support the clamp modifier
/// for saturation.
bool hasAddNoCarry() const { return AddNoCarryInsts; }
bool hasScalarAddSub64() const { return getGeneration() >= GFX12; }
bool hasScalarSMulU64() const { return getGeneration() >= GFX12; }
@ -769,12 +754,6 @@ public:
return AMDGPU::getNSAMaxSize(*this, HasSampler);
}
bool hasGFX10_AEncoding() const { return HasGFX10_AEncoding; }
bool hasGFX10_BEncoding() const { return HasGFX10_BEncoding; }
bool hasGFX10_3Insts() const { return HasGFX10_3Insts; }
bool hasMadF16() const;
bool hasMovB64() const { return HasGFX940Insts || HasGFX1250Insts; }
@ -789,10 +768,8 @@ public:
bool loadStoreOptEnabled() const { return EnableLoadStoreOpt; }
bool hasUserSGPRInit16Bug() const { return UserSGPRInit16Bug && isWave32(); }
bool hasNegativeUnalignedScratchOffsetBug() const {
return NegativeUnalignedScratchOffsetBug;
bool hasUserSGPRInit16Bug() const {
return HasUserSGPRInit16Bug && isWave32();
}
bool has12DWordStoreHazard() const {
@ -819,7 +796,9 @@ public:
return getGeneration() == AMDGPUSubtarget::GFX9;
}
bool hasLDSMisalignedBug() const { return LDSMisalignedBug && !EnableCuMode; }
bool hasLDSMisalignedBug() const {
return HasLDSMisalignedBug && !EnableCuMode;
}
// Shift amount of a 64 bit shift cannot be a highest allocated register
// if also at the end of the allocation block.
@ -843,8 +822,6 @@ public:
bool hasHardClauses() const { return MaxHardClauseLength > 0; }
bool hasGFX90AInsts() const { return HasGFX90AInsts; }
bool hasFPAtomicToDenormModeHazard() const {
return getGeneration() == GFX10;
}
@ -894,14 +871,6 @@ public:
/// Return true if the target has the S_DELAY_ALU instruction.
bool hasDelayAlu() const { return HasGFX11Insts; }
// GFX94* is a derivation to GFX90A. hasGFX940Insts() being true implies that
// hasGFX90AInsts is also true.
bool hasGFX940Insts() const { return HasGFX940Insts; }
// GFX950 is a derivation to GFX94*. hasGFX950Insts() implies that
// hasGFX940Insts and hasGFX90AInsts are also true.
bool hasGFX950Insts() const { return HasGFX950Insts; }
/// Returns true if the target supports
/// global_load_lds_dwordx3/global_load_lds_dwordx4 or
/// buffer_load_dwordx3/buffer_load_dwordx4 with the lds bit.
@ -977,9 +946,6 @@ public:
// \returns true if the target supports the pre-NGG legacy geometry path.
bool hasLegacyGeometry() const { return getGeneration() < GFX11; }
// \returns true if preloading kernel arguments is supported.
bool hasKernargPreload() const { return KernargPreload; }
// \returns true if the target has split barriers feature
bool hasSplitBarriers() const { return getGeneration() >= GFX12; }
@ -996,8 +962,6 @@ public:
/// values.
bool hasSignedScratchOffsets() const { return getGeneration() >= GFX12; }
bool hasGFX1250Insts() const { return HasGFX1250Insts; }
bool hasINVWBL2WaitCntRequirement() const { return HasGFX1250Insts; }
bool hasVOPD3() const { return HasGFX1250Insts; }

View File

@ -187,7 +187,7 @@ public:
unsigned convertToVALUOp(unsigned Opc, bool UseVOP3 = false) const {
switch (Opc) {
case AMDGPU::S_ADD_I32: {
if (ST->hasAddNoCarry())
if (ST->hasAddNoCarryInsts())
return UseVOP3 ? AMDGPU::V_ADD_U32_e64 : AMDGPU::V_ADD_U32_e32;
return UseVOP3 ? AMDGPU::V_ADD_CO_U32_e64 : AMDGPU::V_ADD_CO_U32_e32;
}

View File

@ -528,7 +528,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
if (Subtarget->hasIntClamp())
setOperationAction({ISD::UADDSAT, ISD::USUBSAT}, MVT::i32, Legal);
if (Subtarget->hasAddNoCarry())
if (Subtarget->hasAddNoCarryInsts())
setOperationAction({ISD::SADDSAT, ISD::SSUBSAT}, {MVT::i16, MVT::i32},
Legal);
@ -8217,7 +8217,7 @@ SDValue SITargetLowering::lowerXMUL_LOHI(SDValue Op, SelectionDAG &DAG) const {
}
SDValue SITargetLowering::lowerTRAP(SDValue Op, SelectionDAG &DAG) const {
if (!Subtarget->isTrapHandlerEnabled() ||
if (!Subtarget->hasTrapHandler() ||
Subtarget->getTrapHandlerAbi() != GCNSubtarget::TrapHandlerAbi::AMDHSA)
return lowerTrapEndpgm(Op, DAG);
@ -8300,7 +8300,7 @@ SDValue SITargetLowering::lowerDEBUGTRAP(SDValue Op, SelectionDAG &DAG) const {
SDValue Chain = Op.getOperand(0);
MachineFunction &MF = DAG.getMachineFunction();
if (!Subtarget->isTrapHandlerEnabled() ||
if (!Subtarget->hasTrapHandler() ||
Subtarget->getTrapHandlerAbi() != GCNSubtarget::TrapHandlerAbi::AMDHSA) {
LLVMContext &Ctx = MF.getFunction().getContext();
Ctx.diagnose(DiagnosticInfoUnsupported(MF.getFunction(),

View File

@ -2426,7 +2426,7 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(
// In all other cases, ensure safety by ensuring that there are no outstanding
// memory operations.
if (Opc == AMDGPU::S_BARRIER && !ST->hasAutoWaitcntBeforeBarrier() &&
!ST->supportsBackOffBarrier()) {
!ST->hasBackOffBarrier()) {
Wait = Wait.combined(WCG->getAllZeroWaitcnt(/*IncludeVSCnt=*/true));
}

View File

@ -5907,11 +5907,11 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) const {
AMDGPU::COPY : AMDGPU::V_MOV_B32_e32;
}
case AMDGPU::S_ADD_I32:
return ST.hasAddNoCarry() ? AMDGPU::V_ADD_U32_e64 : AMDGPU::V_ADD_CO_U32_e32;
return ST.hasAddNoCarryInsts() ? AMDGPU::V_ADD_U32_e64 : AMDGPU::V_ADD_CO_U32_e32;
case AMDGPU::S_ADDC_U32:
return AMDGPU::V_ADDC_U32_e32;
case AMDGPU::S_SUB_I32:
return ST.hasAddNoCarry() ? AMDGPU::V_SUB_U32_e64 : AMDGPU::V_SUB_CO_U32_e32;
return ST.hasAddNoCarryInsts() ? AMDGPU::V_SUB_U32_e64 : AMDGPU::V_SUB_CO_U32_e32;
// FIXME: These are not consistently handled, and selected when the carry is
// used.
case AMDGPU::S_ADD_U32:
@ -8451,7 +8451,7 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist,
std::pair<bool, MachineBasicBlock *>
SIInstrInfo::moveScalarAddSub(SIInstrWorklist &Worklist, MachineInstr &Inst,
MachineDominatorTree *MDT) const {
if (ST.hasAddNoCarry()) {
if (ST.hasAddNoCarryInsts()) {
// Assume there is no user of scc since we don't select this in that case.
// Since scc isn't used, it doesn't really matter if the i32 or u32 variant
// is used.
@ -8578,8 +8578,8 @@ void SIInstrInfo::lowerScalarAbs(SIInstrWorklist &Worklist,
Register TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
Register ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
unsigned SubOp = ST.hasAddNoCarry() ?
AMDGPU::V_SUB_U32_e32 : AMDGPU::V_SUB_CO_U32_e32;
unsigned SubOp = ST.hasAddNoCarryInsts() ? AMDGPU::V_SUB_U32_e32
: AMDGPU::V_SUB_CO_U32_e32;
BuildMI(MBB, MII, DL, get(SubOp), TmpReg)
.addImm(0)
@ -8607,8 +8607,8 @@ void SIInstrInfo::lowerScalarAbsDiff(SIInstrWorklist &Worklist,
Register TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
Register ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
unsigned SubOp =
ST.hasAddNoCarry() ? AMDGPU::V_SUB_U32_e32 : AMDGPU::V_SUB_CO_U32_e32;
unsigned SubOp = ST.hasAddNoCarryInsts() ? AMDGPU::V_SUB_U32_e32
: AMDGPU::V_SUB_CO_U32_e32;
BuildMI(MBB, MII, DL, get(SubOp), SubResultReg)
.addReg(Src1.getReg())
@ -9207,8 +9207,8 @@ void SIInstrInfo::splitScalar64BitCountOp(SIInstrWorklist &Worklist,
const MCInstrDesc &InstDesc = get(Opcode);
bool IsCtlz = Opcode == AMDGPU::V_FFBH_U32_e32;
unsigned OpcodeAdd =
ST.hasAddNoCarry() ? AMDGPU::V_ADD_U32_e64 : AMDGPU::V_ADD_CO_U32_e32;
unsigned OpcodeAdd = ST.hasAddNoCarryInsts() ? AMDGPU::V_ADD_U32_e64
: AMDGPU::V_ADD_CO_U32_e32;
const TargetRegisterClass *SrcRC =
Src.isReg() ? MRI.getRegClass(Src.getReg()) : &AMDGPU::SGPR_32RegClass;
@ -9968,7 +9968,7 @@ SIInstrInfo::getAddNoCarry(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
const DebugLoc &DL,
Register DestReg) const {
if (ST.hasAddNoCarry())
if (ST.hasAddNoCarryInsts())
return BuildMI(MBB, I, DL, get(AMDGPU::V_ADD_U32_e64), DestReg);
MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
@ -9984,7 +9984,7 @@ MachineInstrBuilder SIInstrInfo::getAddNoCarry(MachineBasicBlock &MBB,
const DebugLoc &DL,
Register DestReg,
RegScavenger &RS) const {
if (ST.hasAddNoCarry())
if (ST.hasAddNoCarryInsts())
return BuildMI(MBB, I, DL, get(AMDGPU::V_ADD_U32_e32), DestReg);
// If available, prefer to use vcc.