[AMDGPU] Add FeatureGFX13 and SMEM encoding for gfx13 (#177567)
For now list of features is based on gfx12 and gfx1250 --------- Co-authored-by: Jay Foad <jay.foad@amd.com>
This commit is contained in:
parent
7cd5b2bffb
commit
3c0f5045e1
@ -496,6 +496,11 @@ defm GFX1250Insts : AMDGPUSubtargetFeature<"gfx1250-insts",
|
||||
/*GenPredicate=*/0
|
||||
>;
|
||||
|
||||
defm GFX13Insts : AMDGPUSubtargetFeature<"gfx13-insts",
|
||||
"Additional instructions for GFX13+",
|
||||
/*GenPredicate=*/0
|
||||
>;
|
||||
|
||||
defm GFX10_3Insts : AMDGPUSubtargetFeature<"gfx10-3-insts",
|
||||
"Additional instructions for GFX10.3",
|
||||
/*GenPredicate=*/0
|
||||
@ -1431,6 +1436,29 @@ def FeatureGFX12 : GCNSubtargetFeatureGeneration<"GFX12",
|
||||
]
|
||||
>;
|
||||
|
||||
def FeatureGFX13 : GCNSubtargetFeatureGeneration<"GFX13",
|
||||
"gfx13",
|
||||
[FeatureFP64, FeatureMIMG_R128,
|
||||
FeatureFlatAddressSpace, Feature16BitInsts,
|
||||
FeatureInv2PiInlineImm, FeatureApertureRegs,
|
||||
FeatureCIInsts, FeatureGFX8Insts, FeatureGFX9Insts, FeatureGFX10Insts,
|
||||
FeatureGFX10_AEncoding, FeatureGFX10_BEncoding, FeatureGFX10_3Insts,
|
||||
FeatureGFX11Insts, FeatureGFX12Insts, FeatureGFX13Insts, FeatureVOP3PInsts,
|
||||
FeatureVOPDInsts, FeatureMovrel, FeatureFastFMAF32, FeatureDPP, FeatureIntClamp,
|
||||
FeatureFlatInstOffsets, FeatureFlatGlobalInsts, FeatureFlatScratchInsts,
|
||||
FeatureAddNoCarryInsts, FeatureFmaMixInsts,
|
||||
FeatureNoSdstCMPX, FeatureVscnt,
|
||||
FeatureVOP3Literal, FeatureDPP8,
|
||||
FeatureNoDataDepHazard, FeaturePkFmacF16Inst,
|
||||
FeatureA16, FeatureFastDenormalF32, FeatureG16,
|
||||
FeatureUnalignedBufferAccess, FeatureUnalignedScratchAccess,
|
||||
FeatureUnalignedDSAccess, FeatureTrue16BitInsts,
|
||||
FeatureDefaultComponentBroadcast, FeatureMaxHardClauseLength32,
|
||||
FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF32FlatInsts,
|
||||
FeatureIEEEMinimumMaximumInsts, FeatureMinimum3Maximum3F32,
|
||||
FeatureMinimum3Maximum3F16, FeatureAgentScopeFineGrainedRemoteMemoryAtomics
|
||||
]
|
||||
>;
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
class FeatureSet<list<SubtargetFeature> Features_> {
|
||||
@ -1989,6 +2017,65 @@ def FeatureISAVersion12_Generic: FeatureSet<
|
||||
!listconcat(FeatureISAVersion12.Features,
|
||||
[FeatureRequiresCOV6])>;
|
||||
|
||||
def FeatureISAVersion13 : FeatureSet<
|
||||
[FeatureGFX13,
|
||||
FeatureGFX1250Insts,
|
||||
FeatureAddressableLocalMemorySize65536,
|
||||
Feature64BitLiterals,
|
||||
FeatureLDSBankCount32,
|
||||
FeatureDLInsts,
|
||||
FeatureFmacF64Inst,
|
||||
FeatureDot7Insts,
|
||||
FeatureDot8Insts,
|
||||
FeatureNSAEncoding,
|
||||
FeaturePartialNSAEncoding,
|
||||
FeatureShaderCyclesRegister,
|
||||
FeatureArchitectedFlatScratch,
|
||||
FeatureArchitectedSGPRs,
|
||||
FeatureAtomicFaddRtnInsts,
|
||||
FeatureAtomicFaddNoRtnInsts,
|
||||
FeatureAtomicDsPkAdd16Insts,
|
||||
FeatureAtomicFlatPkAdd16Insts,
|
||||
FeatureAtomicBufferGlobalPkAddF16Insts,
|
||||
FeatureAtomicGlobalPkAddBF16Inst,
|
||||
FeatureAtomicBufferPkAddBF16Inst,
|
||||
FeatureFlatAtomicFaddF32Inst,
|
||||
FeatureFP8ConversionInsts,
|
||||
FeaturePackedTID,
|
||||
FeatureVcmpxPermlaneHazard,
|
||||
FeatureSALUFloatInsts,
|
||||
FeaturePseudoScalarTrans,
|
||||
FeatureRestrictedSOffset,
|
||||
FeatureScalarDwordx3Loads,
|
||||
FeatureDPPSrc1SGPR,
|
||||
FeatureBitOp3Insts,
|
||||
FeatureTanhInsts,
|
||||
FeatureTensorCvtLutInsts,
|
||||
FeatureTransposeLoadF4F6Insts,
|
||||
Feature1_5xVGPRs,
|
||||
FeatureBF16TransInsts,
|
||||
FeatureBF16ConversionInsts,
|
||||
FeatureBF16PackedInsts,
|
||||
FeaturePrngInst,
|
||||
FeaturePermlane16Swap,
|
||||
FeatureAshrPkInsts,
|
||||
FeatureAtomicFMinFMaxF64GlobalInsts,
|
||||
FeatureAtomicFMinFMaxF64FlatInsts,
|
||||
FeatureFmaMixBF16Insts,
|
||||
FeatureGloballyAddressableScratch,
|
||||
FeatureCvtPkF16F32Inst,
|
||||
FeatureF16BF16ToFP6BF6ConversionScaleInsts,
|
||||
FeatureIEEEMinimumMaximumInsts,
|
||||
FeatureClusters,
|
||||
FeatureCubeInsts,
|
||||
FeatureLerpInst,
|
||||
FeatureSadInsts,
|
||||
FeatureQsadInsts,
|
||||
FeatureCvtNormInsts,
|
||||
FeatureCvtPkNormVOP2Insts,
|
||||
FeatureCvtPkNormVOP3Insts,
|
||||
]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
def AMDGPUInstrInfo : InstrInfo {
|
||||
@ -2273,7 +2360,7 @@ def isGFX11Plus :
|
||||
|
||||
def isGFX12Only :
|
||||
Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::GFX12">,
|
||||
AssemblerPredicate<(all_of FeatureGFX12Insts)>;
|
||||
AssemblerPredicate<(all_of FeatureGFX12Insts, (not FeatureGFX13Insts))>;
|
||||
|
||||
def isGFX12Not12_50 :
|
||||
Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::GFX12 && !Subtarget->hasGFX1250Insts()">,
|
||||
@ -2284,12 +2371,13 @@ def isGFX12Plus :
|
||||
AssemblerPredicate<(all_of FeatureGFX12Insts)>;
|
||||
|
||||
def isGFX12PlusNot12_50 :
|
||||
Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::GFX12 && !Subtarget->hasGFX1250Insts()">,
|
||||
AssemblerPredicate<(all_of FeatureGFX12Insts, (not FeatureGFX1250Insts))>;
|
||||
Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::GFX12 &&"
|
||||
"(Subtarget->getGeneration() >= AMDGPUSubtarget::GFX13 || !Subtarget->hasGFX1250Insts())">,
|
||||
AssemblerPredicate<(all_of FeatureGFX12Insts, (any_of FeatureGFX13Insts, (not FeatureGFX1250Insts)))>;
|
||||
|
||||
def isGFX125xOnly :
|
||||
Predicate<"Subtarget->hasGFX1250Insts()">,
|
||||
AssemblerPredicate<(all_of FeatureGFX1250Insts)>;
|
||||
Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::GFX12 && Subtarget->hasGFX1250Insts()">,
|
||||
AssemblerPredicate<(all_of FeatureGFX1250Insts, (not FeatureGFX13Insts))>;
|
||||
|
||||
def isGFX1250Plus :
|
||||
Predicate<"Subtarget->hasGFX1250Insts()">,
|
||||
@ -2300,8 +2388,18 @@ def isNotGFX1250Plus :
|
||||
AssemblerPredicate<(all_of (not FeatureGFX1250Insts))>;
|
||||
|
||||
def isGFX940orGFX1250 :
|
||||
Predicate<"Subtarget->hasGFX940Insts() || Subtarget->hasGFX1250Insts()">,
|
||||
AssemblerPredicate<(any_of FeatureGFX940Insts, FeatureGFX1250Insts)>;
|
||||
Predicate<"Subtarget->hasGFX940Insts() ||"
|
||||
"(Subtarget->hasGFX1250Insts() && !Subtarget->hasGFX13Insts())">,
|
||||
AssemblerPredicate<(any_of FeatureGFX940Insts,
|
||||
(all_of FeatureGFX1250Insts, (not FeatureGFX13Insts)))>;
|
||||
|
||||
def isGFX13Only :
|
||||
Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::GFX13">,
|
||||
AssemblerPredicate<(all_of FeatureGFX13Insts)>;
|
||||
|
||||
def isGFX13Plus :
|
||||
Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::GFX13">,
|
||||
AssemblerPredicate<(all_of FeatureGFX13Insts)>;
|
||||
|
||||
def HasAtomicCondSubClampFlatInsts :
|
||||
Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::GFX12">,
|
||||
|
||||
@ -660,7 +660,7 @@ AMDGPUAsmPrinter::getAmdhsaKernelDescriptor(const MachineFunction &MF,
|
||||
(void)PGRM_Rsrc3;
|
||||
(void)EvaluatableRsrc3;
|
||||
assert(STM.getGeneration() >= AMDGPUSubtarget::GFX10 ||
|
||||
STM.hasGFX90AInsts() || AMDGPU::isGFX1250(STM) || !EvaluatableRsrc3 ||
|
||||
STM.hasGFX90AInsts() || STM.hasGFX1250Insts() || !EvaluatableRsrc3 ||
|
||||
static_cast<uint64_t>(PGRM_Rsrc3) == 0);
|
||||
KernelDescriptor.compute_pgm_rsrc3 = CurrentProgramInfo.ComputePGMRSrc3;
|
||||
|
||||
@ -831,7 +831,7 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
|
||||
" AccumOffset: " + getMCExprStr(AdjustedAccum), false);
|
||||
}
|
||||
|
||||
if (AMDGPU::isGFX1250(STM))
|
||||
if (STM.hasGFX1250Insts())
|
||||
OutStreamer->emitRawComment(
|
||||
" NamedBarCnt: " + getMCExprStr(CurrentProgramInfo.NamedBarCnt),
|
||||
false);
|
||||
@ -867,7 +867,7 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
|
||||
|
||||
[[maybe_unused]] int64_t PGMRSrc3;
|
||||
assert(STM.getGeneration() >= AMDGPUSubtarget::GFX10 ||
|
||||
STM.hasGFX90AInsts() || AMDGPU::isGFX1250(STM) ||
|
||||
STM.hasGFX90AInsts() || STM.hasGFX1250Insts() ||
|
||||
(CurrentProgramInfo.ComputePGMRSrc3->evaluateAsAbsolute(PGMRSrc3) &&
|
||||
static_cast<uint64_t>(PGMRSrc3) == 0));
|
||||
if (STM.hasGFX90AInsts()) {
|
||||
@ -1288,7 +1288,7 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
|
||||
amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT_SHIFT);
|
||||
}
|
||||
|
||||
if (AMDGPU::isGFX1250(STM))
|
||||
if (STM.hasGFX1250Insts())
|
||||
ProgInfo.ComputePGMRSrc3 =
|
||||
SetBits(ProgInfo.ComputePGMRSrc3, ProgInfo.NamedBarCnt,
|
||||
amdhsa::COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT,
|
||||
|
||||
@ -71,7 +71,7 @@ public:
|
||||
if (SIInstrInfo::isTRANS(MI))
|
||||
return TRANS;
|
||||
// WMMA XDL ops are treated the same as TRANS.
|
||||
if (AMDGPU::isGFX1250(*ST) && SII->isXDLWMMA(MI))
|
||||
if (ST->hasGFX1250Insts() && SII->isXDLWMMA(MI))
|
||||
return TRANS;
|
||||
if (SIInstrInfo::isVALU(MI))
|
||||
return VALU;
|
||||
|
||||
@ -6153,7 +6153,7 @@ AMDGPULegalizerInfo::splitBufferOffsets(MachineIRBuilder &B,
|
||||
// On GFX1250+, voffset and immoffset are zero-extended from 32 bits before
|
||||
// being added, so we can only safely match a 32-bit addition with no unsigned
|
||||
// overflow.
|
||||
bool CheckNUW = AMDGPU::isGFX1250(ST);
|
||||
bool CheckNUW = ST.hasGFX1250Insts();
|
||||
std::tie(BaseReg, ImmOffset) = AMDGPU::getBaseWithConstantOffset(
|
||||
MRI, OrigOffset, /*KnownBits=*/nullptr, CheckNUW);
|
||||
|
||||
|
||||
@ -42,6 +42,7 @@ public:
|
||||
GFX10 = 9,
|
||||
GFX11 = 10,
|
||||
GFX12 = 11,
|
||||
GFX13 = 12,
|
||||
};
|
||||
|
||||
private:
|
||||
|
||||
@ -1551,6 +1551,12 @@ public:
|
||||
|
||||
bool isGFX1250() const { return AMDGPU::isGFX1250(getSTI()); }
|
||||
|
||||
bool isGFX1250Plus() const { return AMDGPU::isGFX1250Plus(getSTI()); }
|
||||
|
||||
bool isGFX13() const { return AMDGPU::isGFX13(getSTI()); }
|
||||
|
||||
bool isGFX13Plus() const { return AMDGPU::isGFX13Plus(getSTI()); }
|
||||
|
||||
bool isGFX10_AEncoding() const { return AMDGPU::isGFX10_AEncoding(getSTI()); }
|
||||
|
||||
bool isGFX10_BEncoding() const {
|
||||
@ -2931,7 +2937,7 @@ MCRegister AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, unsigned RegNum,
|
||||
return AMDGPU::NoRegister;
|
||||
}
|
||||
|
||||
if (RegKind == IS_VGPR && !isGFX1250() && RegIdx + RegWidth / 32 > 256) {
|
||||
if (RegKind == IS_VGPR && !isGFX1250Plus() && RegIdx + RegWidth / 32 > 256) {
|
||||
Error(Loc, "register index is out of range");
|
||||
return MCRegister();
|
||||
}
|
||||
@ -3953,7 +3959,7 @@ AMDGPUAsmParser::checkVOPDRegBankConstraints(const MCInst &Inst, bool AsVOPD3) {
|
||||
bool SkipSrc = Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12 ||
|
||||
Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx1250 ||
|
||||
Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_e96_gfx1250;
|
||||
bool AllowSameVGPR = isGFX1250();
|
||||
bool AllowSameVGPR = isGFX1250Plus();
|
||||
|
||||
if (AsVOPD3) { // Literal constants are not allowed with VOPD3.
|
||||
for (auto OpName : {OpName::src0X, OpName::src0Y}) {
|
||||
@ -4087,7 +4093,7 @@ bool AMDGPUAsmParser::tryVOPD(const MCInst &Inst) {
|
||||
// form but switch to VOPD3 otherwise.
|
||||
bool AMDGPUAsmParser::tryAnotherVOPDEncoding(const MCInst &Inst) {
|
||||
const unsigned Opcode = Inst.getOpcode();
|
||||
if (!isGFX1250() || !isVOPD(Opcode))
|
||||
if (!isGFX1250Plus() || !isVOPD(Opcode))
|
||||
return false;
|
||||
|
||||
if (MII.get(Opcode).TSFlags & SIInstrFlags::VOPD3)
|
||||
@ -5377,7 +5383,7 @@ bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
|
||||
|
||||
unsigned CPol = Inst.getOperand(CPolPos).getImm();
|
||||
|
||||
if (!isGFX1250()) {
|
||||
if (!isGFX1250Plus()) {
|
||||
if (CPol & CPol::SCAL) {
|
||||
SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
|
||||
StringRef CStr(S.getPointer());
|
||||
@ -6176,7 +6182,7 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
|
||||
return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
|
||||
AccumOffset = ExprVal;
|
||||
} else if (ID == ".amdhsa_named_barrier_count") {
|
||||
if (!isGFX1250())
|
||||
if (!isGFX1250Plus())
|
||||
return Error(IDRange.Start, "directive requires gfx1250+", IDRange);
|
||||
NamedBarCnt = ExprVal;
|
||||
} else if (ID == ".amdhsa_reserve_vcc") {
|
||||
@ -6376,7 +6382,7 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
|
||||
return TokError("amdgpu_user_sgpr_count smaller than than implied by "
|
||||
"enabled user SGPRs");
|
||||
|
||||
if (isGFX1250()) {
|
||||
if (isGFX1250Plus()) {
|
||||
if (!isUInt<COMPUTE_PGM_RSRC2_GFX125_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
|
||||
return TokError("too many user SGPRs enabled");
|
||||
AMDGPU::MCKernelDescriptor::bits_set(
|
||||
@ -6431,7 +6437,7 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
|
||||
getContext());
|
||||
}
|
||||
|
||||
if (isGFX1250())
|
||||
if (isGFX1250Plus())
|
||||
MCKernelDescriptor::bits_set(KD.compute_pgm_rsrc3, NamedBarCnt,
|
||||
COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT_SHIFT,
|
||||
COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT,
|
||||
|
||||
@ -580,7 +580,7 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
|
||||
|
||||
// Try to decode DPP and SDWA first to solve conflict with VOP1 and VOP2
|
||||
// encodings
|
||||
if (isGFX1250() && Bytes.size() >= 16) {
|
||||
if (isGFX1250Plus() && Bytes.size() >= 16) {
|
||||
std::bitset<128> DecW = eat16Bytes(Bytes);
|
||||
if (tryDecodeInst(DecoderTableGFX1250128, MI, DecW, Address, CS))
|
||||
break;
|
||||
@ -694,6 +694,9 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
|
||||
tryDecodeInst(DecoderTableGFX12W6464, MI, QW, Address, CS))
|
||||
break;
|
||||
|
||||
if (isGFX13() && tryDecodeInst(DecoderTableGFX1364, MI, QW, Address, CS))
|
||||
break;
|
||||
|
||||
// Reinitialize Bytes
|
||||
Bytes = Bytes_.slice(0, MaxInstBytesNum);
|
||||
}
|
||||
@ -2242,6 +2245,16 @@ bool AMDGPUDisassembler::isGFX12Plus() const {
|
||||
|
||||
bool AMDGPUDisassembler::isGFX1250() const { return AMDGPU::isGFX1250(STI); }
|
||||
|
||||
bool AMDGPUDisassembler::isGFX1250Plus() const {
|
||||
return AMDGPU::isGFX1250Plus(STI);
|
||||
}
|
||||
|
||||
bool AMDGPUDisassembler::isGFX13() const { return AMDGPU::isGFX13(STI); }
|
||||
|
||||
bool AMDGPUDisassembler::isGFX13Plus() const {
|
||||
return AMDGPU::isGFX13Plus(STI);
|
||||
}
|
||||
|
||||
bool AMDGPUDisassembler::hasArchitectedFlatScratch() const {
|
||||
return STI.hasFeature(AMDGPU::FeatureArchitectedFlatScratch);
|
||||
}
|
||||
@ -2398,7 +2411,7 @@ Expected<bool> AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC1(
|
||||
}
|
||||
|
||||
// Bits [27].
|
||||
if (isGFX1250()) {
|
||||
if (isGFX1250Plus()) {
|
||||
PRINT_PSEUDO_DIRECTIVE_COMMENT("FLAT_SCRATCH_IS_NV",
|
||||
COMPUTE_PGM_RSRC1_GFX125_FLAT_SCRATCH_IS_NV);
|
||||
} else {
|
||||
@ -2412,7 +2425,7 @@ Expected<bool> AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC1(
|
||||
// Bits [29-31].
|
||||
if (isGFX10Plus()) {
|
||||
// WGP_MODE is not available on GFX1250.
|
||||
if (!isGFX1250()) {
|
||||
if (!isGFX1250Plus()) {
|
||||
PRINT_DIRECTIVE(".amdhsa_workgroup_processor_mode",
|
||||
COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE);
|
||||
}
|
||||
@ -2543,7 +2556,7 @@ Expected<bool> AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC3(
|
||||
}
|
||||
|
||||
// Bits [14-21].
|
||||
if (isGFX1250()) {
|
||||
if (isGFX1250Plus()) {
|
||||
PRINT_DIRECTIVE(".amdhsa_named_barrier_count",
|
||||
COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT);
|
||||
PRINT_PSEUDO_DIRECTIVE_COMMENT(
|
||||
|
||||
@ -182,6 +182,9 @@ public:
|
||||
bool isGFX12() const;
|
||||
bool isGFX12Plus() const;
|
||||
bool isGFX1250() const;
|
||||
bool isGFX1250Plus() const;
|
||||
bool isGFX13() const;
|
||||
bool isGFX13Plus() const;
|
||||
|
||||
bool hasArchitectedFlatScratch() const;
|
||||
bool hasKernargPreload() const;
|
||||
|
||||
@ -1932,7 +1932,7 @@ bool GCNHazardRecognizer::fixVALUTransUseHazard(MachineInstr *MI) {
|
||||
}
|
||||
|
||||
bool GCNHazardRecognizer::fixVALUTransCoexecutionHazards(MachineInstr *MI) {
|
||||
if (!AMDGPU::isGFX1250(ST) || // Coexecution disabled.
|
||||
if (!ST.hasGFX1250Insts() || // Coexecution disabled.
|
||||
!SIInstrInfo::isVALU(*MI) || SIInstrInfo::isTRANS(*MI))
|
||||
return false;
|
||||
|
||||
@ -2077,7 +2077,7 @@ static bool IsWMMAHazardInstInCategory(const MachineInstr &MI,
|
||||
}
|
||||
|
||||
int GCNHazardRecognizer::checkWMMACoexecutionHazards(MachineInstr *MI) {
|
||||
if (!AMDGPU::isGFX1250(ST))
|
||||
if (!ST.hasGFX1250Insts())
|
||||
return 0;
|
||||
|
||||
const SIInstrInfo *TII = ST.getInstrInfo();
|
||||
|
||||
@ -339,5 +339,5 @@ def : ProcessorModel<"gfx1251", GFX1250SpeedModel,
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
def : ProcessorModel<"gfx1310", GFX12SpeedModel,
|
||||
FeatureISAVersion12_50.Features
|
||||
FeatureISAVersion13.Features
|
||||
>;
|
||||
|
||||
@ -401,7 +401,7 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor(
|
||||
EmitMCExpr(KD.kernarg_size);
|
||||
OS << '\n';
|
||||
|
||||
if (isGFX1250(STI)) {
|
||||
if (isGFX1250Plus(STI)) {
|
||||
PrintField(KD.compute_pgm_rsrc2,
|
||||
amdhsa::COMPUTE_PGM_RSRC2_GFX125_USER_SGPR_COUNT_SHIFT,
|
||||
amdhsa::COMPUTE_PGM_RSRC2_GFX125_USER_SGPR_COUNT,
|
||||
@ -515,7 +515,7 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor(
|
||||
OS << '\n';
|
||||
}
|
||||
|
||||
if (AMDGPU::isGFX1250(STI))
|
||||
if (isGFX1250Plus(STI))
|
||||
PrintField(KD.compute_pgm_rsrc3,
|
||||
amdhsa::COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT_SHIFT,
|
||||
amdhsa::COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT,
|
||||
|
||||
@ -46,6 +46,7 @@ enum {
|
||||
GFX11 = 10,
|
||||
GFX12 = 11,
|
||||
GFX1250 = 12,
|
||||
GFX13 = 13,
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
@ -11743,7 +11743,7 @@ SITargetLowering::splitBufferOffsets(SDValue Offset, SelectionDAG &DAG) const {
|
||||
// On GFX1250+, voffset and immoffset are zero-extended from 32 bits before
|
||||
// being added, so we can only safely match a 32-bit addition with no
|
||||
// unsigned overflow.
|
||||
bool CheckNUW = AMDGPU::isGFX1250(*Subtarget);
|
||||
bool CheckNUW = Subtarget->hasGFX1250Insts();
|
||||
if (!CheckNUW || isNoUnsignedWrap(N0)) {
|
||||
C1 = cast<ConstantSDNode>(N0.getOperand(1));
|
||||
N0 = N0.getOperand(0);
|
||||
|
||||
@ -10239,6 +10239,8 @@ static unsigned subtargetEncodingFamily(const GCNSubtarget &ST) {
|
||||
case AMDGPUSubtarget::GFX12:
|
||||
return ST.hasGFX1250Insts() ? SIEncodingFamily::GFX1250
|
||||
: SIEncodingFamily::GFX12;
|
||||
case AMDGPUSubtarget::GFX13:
|
||||
return SIEncodingFamily::GFX13;
|
||||
}
|
||||
llvm_unreachable("Unknown subtarget generation!");
|
||||
}
|
||||
@ -11260,7 +11262,7 @@ bool SIInstrInfo::isXDLWMMA(const MachineInstr &MI) const {
|
||||
if (!isWMMA(MI) && !isSWMMAC(MI))
|
||||
return false;
|
||||
|
||||
if (AMDGPU::isGFX1250(ST))
|
||||
if (ST.hasGFX1250Insts())
|
||||
return AMDGPU::getWMMAIsXDL(MI.getOpcode());
|
||||
|
||||
return true;
|
||||
|
||||
@ -27,6 +27,7 @@ def SIEncodingFamily {
|
||||
int GFX11 = 10;
|
||||
int GFX12 = 11;
|
||||
int GFX1250 = 12;
|
||||
int GFX13 = 13;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
@ -3365,7 +3366,8 @@ def getMCOpcodeGen : InstrMapping {
|
||||
[!cast<string>(SIEncodingFamily.GFX940)],
|
||||
[!cast<string>(SIEncodingFamily.GFX11)],
|
||||
[!cast<string>(SIEncodingFamily.GFX12)],
|
||||
[!cast<string>(SIEncodingFamily.GFX1250)]];
|
||||
[!cast<string>(SIEncodingFamily.GFX1250)],
|
||||
[!cast<string>(SIEncodingFamily.GFX13)]];
|
||||
}
|
||||
|
||||
// Get equivalent SOPK instruction.
|
||||
|
||||
@ -512,9 +512,9 @@ protected:
|
||||
|
||||
public:
|
||||
SIGfx12CacheControl(const GCNSubtarget &ST) : SICacheControl(ST) {
|
||||
// GFX12.0 and GFX12.5 memory models greatly overlap, and in some cases
|
||||
// the behavior is the same if assuming GFX12.0 in CU mode.
|
||||
assert(!ST.hasGFX1250Insts() || ST.isCuModeEnabled());
|
||||
// GFX120x and GFX125x memory models greatly overlap, and in some cases
|
||||
// the behavior is the same if assuming GFX120x in CU mode.
|
||||
assert(!ST.hasGFX1250Insts() || ST.hasGFX13Insts() || ST.isCuModeEnabled());
|
||||
}
|
||||
|
||||
bool insertWait(MachineBasicBlock::iterator &MI, SIAtomicScope Scope,
|
||||
|
||||
@ -1464,7 +1464,7 @@ class SMEM_Real_gfx12Plus<bits<6> op, SM_Pseudo ps, string opName,
|
||||
class SMEM_Real_gfx12<bits<6> op, SM_Pseudo ps, string opName = ps.Mnemonic> :
|
||||
SMEM_Real_gfx12Plus<op, ps, opName, SIEncodingFamily.GFX12,
|
||||
SGPR_NULL_gfx11plus> {
|
||||
let AssemblerPredicate = isGFX12Plus;
|
||||
let AssemblerPredicate = isGFX12Only;
|
||||
let DecoderNamespace = "GFX12";
|
||||
|
||||
let Inst{5-0} = !if(ps.has_sbase, sbase{6-1}, ?);
|
||||
@ -1537,3 +1537,84 @@ multiclass SMEM_Real_Probe_gfx12<bits<6> op> {
|
||||
|
||||
defm S_ATC_PROBE : SMEM_Real_Probe_gfx12<0x22>;
|
||||
defm S_ATC_PROBE_BUFFER : SMEM_Real_Probe_gfx12<0x23>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// GFX13.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
class SMEM_Real_gfx13<bits<6> op, SM_Pseudo ps, string opName = ps.Mnemonic> :
|
||||
SMEM_Real_gfx12Plus<op, ps, opName, SIEncodingFamily.GFX13,
|
||||
SGPR_NULL_gfx11plus> {
|
||||
let AssemblerPredicate = isGFX13Plus;
|
||||
let DecoderNamespace = "GFX13";
|
||||
|
||||
let Inst{5-0} = !if(ps.has_sbase, sbase{6-1}, ?);
|
||||
let Inst{12-6} = !if(ps.has_sdst, sdst{6-0}, ?);
|
||||
}
|
||||
|
||||
class SMEM_Real_Prefetch_gfx13<bits<6> op, SM_Pseudo ps> :
|
||||
SMEM_Real_gfx13<op, ps> {
|
||||
bits<7> sdata; // Only 5 bits of sdata are supported.
|
||||
|
||||
let sdst = ?;
|
||||
let Inst{12-11} = 0; // Unused sdata bits.
|
||||
let Inst{10-6} = !if(ps.has_sdst, sdata{4-0}, ?);
|
||||
}
|
||||
|
||||
class SMEM_Real_Load_gfx13<bits<6> op, string ps, string opName, OffsetMode offsets> :
|
||||
SMEM_Real_gfx13<op, !cast<SM_Pseudo>(ps # offsets.Variant), opName> {
|
||||
RegisterClass BaseClass = !cast<SM_Load_Pseudo>(ps # offsets.Variant).BaseClass;
|
||||
let InOperandList = !con((ins BaseClass:$sbase), offsets.Ins, (ins CPol:$cpol));
|
||||
|
||||
let Inst{20} = cpol{CPolBit.NV}; // non-volatile
|
||||
let Inst{22-21} = cpol{4-3}; // scope
|
||||
let Inst{24-23} = cpol{1-0}; // th - only lower 2 bits are supported
|
||||
let Inst{56} = cpol{CPolBit.SCAL}; // scale offset
|
||||
}
|
||||
|
||||
multiclass SM_Real_Loads_gfx13<bits<6> op, string ps = NAME> {
|
||||
defvar opName = !tolower(NAME);
|
||||
def _IMM_gfx13 : SMEM_Real_Load_gfx13<op, ps, opName, IMM_Offset>;
|
||||
def _SGPR_IMM_gfx13 : SMEM_Real_Load_gfx13<op, ps, opName, SGPR_IMM_OptOffset>;
|
||||
}
|
||||
|
||||
defm S_LOAD_B32 : SM_Real_Loads_gfx13<0x00, "S_LOAD_DWORD">;
|
||||
defm S_LOAD_B64 : SM_Real_Loads_gfx13<0x01, "S_LOAD_DWORDX2">;
|
||||
defm S_LOAD_B96 : SM_Real_Loads_gfx13<0x0e, "S_LOAD_DWORDX3">;
|
||||
defm S_LOAD_B128 : SM_Real_Loads_gfx13<0x02, "S_LOAD_DWORDX4">;
|
||||
defm S_LOAD_B256 : SM_Real_Loads_gfx13<0x03, "S_LOAD_DWORDX8">;
|
||||
defm S_LOAD_B512 : SM_Real_Loads_gfx13<0x04, "S_LOAD_DWORDX16">;
|
||||
|
||||
defm S_LOAD_I8 : SM_Real_Loads_gfx13<0x30>;
|
||||
defm S_LOAD_U8 : SM_Real_Loads_gfx13<0x31>;
|
||||
defm S_LOAD_I16 : SM_Real_Loads_gfx13<0x32>;
|
||||
defm S_LOAD_U16 : SM_Real_Loads_gfx13<0x33>;
|
||||
|
||||
defm S_BUFFER_LOAD_B32 : SM_Real_Loads_gfx13<0x08, "S_BUFFER_LOAD_DWORD">;
|
||||
defm S_BUFFER_LOAD_B64 : SM_Real_Loads_gfx13<0x09, "S_BUFFER_LOAD_DWORDX2">;
|
||||
defm S_BUFFER_LOAD_B96 : SM_Real_Loads_gfx13<0x0d, "S_BUFFER_LOAD_DWORDX3">;
|
||||
defm S_BUFFER_LOAD_B128 : SM_Real_Loads_gfx13<0x0a, "S_BUFFER_LOAD_DWORDX4">;
|
||||
defm S_BUFFER_LOAD_B256 : SM_Real_Loads_gfx13<0x0b, "S_BUFFER_LOAD_DWORDX8">;
|
||||
defm S_BUFFER_LOAD_B512 : SM_Real_Loads_gfx13<0x0c, "S_BUFFER_LOAD_DWORDX16">;
|
||||
|
||||
defm S_BUFFER_LOAD_I8 : SM_Real_Loads_gfx13<0x34>;
|
||||
defm S_BUFFER_LOAD_U8 : SM_Real_Loads_gfx13<0x35>;
|
||||
defm S_BUFFER_LOAD_I16 : SM_Real_Loads_gfx13<0x36>;
|
||||
defm S_BUFFER_LOAD_U16 : SM_Real_Loads_gfx13<0x37>;
|
||||
|
||||
def S_DCACHE_INV_gfx13 : SMEM_Real_gfx13<0x020, S_DCACHE_INV>;
|
||||
|
||||
def S_PREFETCH_INST_gfx13 : SMEM_Real_Prefetch_gfx13<0x22, S_PREFETCH_INST>;
|
||||
def S_PREFETCH_INST_PC_REL_gfx13 : SMEM_Real_Prefetch_gfx13<0x23, S_PREFETCH_INST_PC_REL>;
|
||||
def S_PREFETCH_DATA_gfx13 : SMEM_Real_Prefetch_gfx13<0x2c, S_PREFETCH_DATA>;
|
||||
def S_BUFFER_PREFETCH_DATA_gfx13 : SMEM_Real_Prefetch_gfx13<0x2d, S_BUFFER_PREFETCH_DATA>;
|
||||
def S_PREFETCH_DATA_PC_REL_gfx13 : SMEM_Real_Prefetch_gfx13<0x2e, S_PREFETCH_DATA_PC_REL>;
|
||||
|
||||
multiclass SMEM_Real_Probe_gfx13<bits<6> op> {
|
||||
defvar ps = NAME;
|
||||
def _IMM_gfx13 : SMEM_Real_Prefetch_gfx13<op, !cast<SM_Probe_Pseudo>(ps#_IMM)>;
|
||||
def _SGPR_IMM_gfx13 : SMEM_Real_Prefetch_gfx13<op, !cast<SM_Probe_Pseudo>(ps#_SGPR_OPT_IMM)>;
|
||||
}
|
||||
|
||||
defm S_ATC_PROBE : SMEM_Real_Probe_gfx13<0x26>;
|
||||
defm S_ATC_PROBE_BUFFER : SMEM_Real_Probe_gfx13<0x27>;
|
||||
|
||||
@ -99,7 +99,7 @@ static constexpr CustomOperand MsgOperands[] = {
|
||||
{{"MSG_EARLY_PRIM_DEALLOC"}, ID_EARLY_PRIM_DEALLOC, isGFX9_GFX10},
|
||||
{{"MSG_GS_ALLOC_REQ"}, ID_GS_ALLOC_REQ, isGFX9Plus},
|
||||
{{"MSG_GET_DOORBELL"}, ID_GET_DOORBELL, isGFX9_GFX10},
|
||||
{{"MSG_SAVEWAVE_HAS_TDM"}, ID_SAVEWAVE_HAS_TDM, isGFX1250},
|
||||
{{"MSG_SAVEWAVE_HAS_TDM"}, ID_SAVEWAVE_HAS_TDM, isGFX1250Plus},
|
||||
{{"MSG_GET_DDID"}, ID_GET_DDID, isGFX10},
|
||||
{{"MSG_SYSMSG"}, ID_SYSMSG},
|
||||
{{"MSG_RTN_GET_DOORBELL"}, ID_RTN_GET_DOORBELL, isGFX11Plus},
|
||||
@ -111,7 +111,7 @@ static constexpr CustomOperand MsgOperands[] = {
|
||||
{{"MSG_RTN_GET_TBA_TO_PC"}, ID_RTN_GET_TBA_TO_PC, isGFX11Plus},
|
||||
{{"MSG_RTN_GET_SE_AID_ID"}, ID_RTN_GET_SE_AID_ID, isGFX12Plus},
|
||||
{{"MSG_RTN_GET_CLUSTER_BARRIER_STATE"}, ID_RTN_GET_CLUSTER_BARRIER_STATE,
|
||||
isGFX1250},
|
||||
isGFX1250Plus},
|
||||
};
|
||||
|
||||
static constexpr CustomOperand SysMsgOperands[] = {
|
||||
@ -213,7 +213,7 @@ static constexpr CustomOperand Operands[] = {
|
||||
{{"HW_REG_POPS_PACKER"}, ID_POPS_PACKER, isGFX10},
|
||||
{{"HW_REG_WAVE_SCHED_MODE"}, ID_SCHED_MODE, isGFX12Plus},
|
||||
{{"HW_REG_PERF_SNAPSHOT_DATA"}, ID_PERF_SNAPSHOT_DATA_gfx11, isGFX11},
|
||||
{{"HW_REG_IB_STS2"}, ID_IB_STS2, isGFX1250},
|
||||
{{"HW_REG_IB_STS2"}, ID_IB_STS2, isGFX1250Plus},
|
||||
{{"HW_REG_SHADER_CYCLES"}, ID_SHADER_CYCLES, isGFX10_3_GFX11},
|
||||
{{"HW_REG_SHADER_CYCLES_LO"}, ID_SHADER_CYCLES, isGFX12Plus},
|
||||
{{"HW_REG_SHADER_CYCLES_HI"}, ID_SHADER_CYCLES_HI, isGFX12Plus},
|
||||
@ -221,8 +221,8 @@ static constexpr CustomOperand Operands[] = {
|
||||
{{"HW_REG_DVGPR_ALLOC_LO"}, ID_DVGPR_ALLOC_LO, isGFX12Plus},
|
||||
{{"HW_REG_WAVE_DVGPR_ALLOC_HI"}, ID_DVGPR_ALLOC_HI, isGFX12Plus},
|
||||
{{"HW_REG_DVGPR_ALLOC_HI"}, ID_DVGPR_ALLOC_HI, isGFX12Plus},
|
||||
{{"HW_REG_XNACK_STATE_PRIV"}, ID_XNACK_STATE_PRIV, isGFX1250},
|
||||
{{"HW_REG_XNACK_MASK"}, ID_XNACK_MASK_gfx1250, isGFX1250},
|
||||
{{"HW_REG_XNACK_STATE_PRIV"}, ID_XNACK_STATE_PRIV, isGFX1250Plus},
|
||||
{{"HW_REG_XNACK_MASK"}, ID_XNACK_MASK_gfx1250, isGFX1250Plus},
|
||||
|
||||
};
|
||||
// clang-format on
|
||||
|
||||
@ -2514,7 +2514,7 @@ unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler) {
|
||||
}
|
||||
|
||||
unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI) {
|
||||
if (isGFX1250(STI))
|
||||
if (isGFX1250Plus(STI))
|
||||
return 32;
|
||||
return 16;
|
||||
}
|
||||
@ -2581,14 +2581,26 @@ bool isGFX12(const MCSubtargetInfo &STI) {
|
||||
return STI.getFeatureBits()[AMDGPU::FeatureGFX12];
|
||||
}
|
||||
|
||||
bool isGFX12Plus(const MCSubtargetInfo &STI) { return isGFX12(STI); }
|
||||
bool isGFX12Plus(const MCSubtargetInfo &STI) {
|
||||
return isGFX12(STI) || isGFX13Plus(STI);
|
||||
}
|
||||
|
||||
bool isNotGFX12Plus(const MCSubtargetInfo &STI) { return !isGFX12Plus(STI); }
|
||||
|
||||
bool isGFX1250(const MCSubtargetInfo &STI) {
|
||||
return STI.getFeatureBits()[AMDGPU::FeatureGFX1250Insts] && !isGFX13(STI);
|
||||
}
|
||||
|
||||
bool isGFX1250Plus(const MCSubtargetInfo &STI) {
|
||||
return STI.getFeatureBits()[AMDGPU::FeatureGFX1250Insts];
|
||||
}
|
||||
|
||||
bool isGFX13(const MCSubtargetInfo &STI) {
|
||||
return STI.getFeatureBits()[AMDGPU::FeatureGFX13];
|
||||
}
|
||||
|
||||
bool isGFX13Plus(const MCSubtargetInfo &STI) { return isGFX13(STI); }
|
||||
|
||||
bool supportsWGP(const MCSubtargetInfo &STI) {
|
||||
if (isGFX1250(STI))
|
||||
return false;
|
||||
|
||||
@ -1599,6 +1599,9 @@ bool isGFX11Plus(const MCSubtargetInfo &STI);
|
||||
bool isGFX12(const MCSubtargetInfo &STI);
|
||||
bool isGFX12Plus(const MCSubtargetInfo &STI);
|
||||
bool isGFX1250(const MCSubtargetInfo &STI);
|
||||
bool isGFX1250Plus(const MCSubtargetInfo &STI);
|
||||
bool isGFX13(const MCSubtargetInfo &STI);
|
||||
bool isGFX13Plus(const MCSubtargetInfo &STI);
|
||||
bool supportsWGP(const MCSubtargetInfo &STI);
|
||||
bool isNotGFX12Plus(const MCSubtargetInfo &STI);
|
||||
bool isNotGFX11Plus(const MCSubtargetInfo &STI);
|
||||
|
||||
@ -21,7 +21,7 @@
|
||||
; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GCN,GFX1100,GFX1100W32 %s
|
||||
; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 < %s | FileCheck --check-prefixes=GCN,GFX1100,GFX1100W64 %s
|
||||
; RUN: llc -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GCN,GFX1250 %s
|
||||
; RUN: llc -mtriple=amdgcn -mcpu=gfx1310 < %s | FileCheck --check-prefixes=GCN,GFX1250 %s
|
||||
; RUN: llc -mtriple=amdgcn -mcpu=gfx1310 < %s | FileCheck --check-prefixes=GCN,GFX1100,GFX1100W32 %s
|
||||
|
||||
; GCN-LABEL: {{^}}max_occupancy:
|
||||
; GFX9: ; Occupancy: 10
|
||||
|
||||
1428
llvm/test/MC/AMDGPU/gfx13_smem.s
Normal file
1428
llvm/test/MC/AMDGPU/gfx13_smem.s
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
x
Reference in New Issue
Block a user