[AMDGPU] Add subtarget feature for MAD_U64/I64 bug on GFX11
Differential Revision: https://reviews.llvm.org/D133012
This commit is contained in:
parent
e04d2e20c3
commit
e58b116843
@ -279,6 +279,12 @@ def FeatureImageGather4D16Bug : SubtargetFeature<"image-gather4-d16-bug",
|
||||
"Image Gather4 D16 hardware bug"
|
||||
>;
|
||||
|
||||
def FeatureMADIntraFwdBug : SubtargetFeature<"mad-intra-fwd-bug",
|
||||
"HasMADIntraFwdBug",
|
||||
"true",
|
||||
"MAD_U64/I64 intra instruction forwarding bug"
|
||||
>;
|
||||
|
||||
class SubtargetFeatureLDSBankCount <int Value> : SubtargetFeature <
|
||||
"ldsbankcount"#Value,
|
||||
"LDSBankCount",
|
||||
@ -1299,7 +1305,8 @@ def FeatureISAVersion11_Common : FeatureSet<
|
||||
FeatureImageInsts,
|
||||
FeaturePackedTID,
|
||||
FeatureVcmpxPermlaneHazard,
|
||||
FeatureBackOffBarrier]>;
|
||||
FeatureBackOffBarrier,
|
||||
FeatureMADIntraFwdBug]>;
|
||||
|
||||
def FeatureISAVersion11_0_0 : FeatureSet<
|
||||
!listconcat(FeatureISAVersion11_Common.Features,
|
||||
@ -1782,6 +1789,10 @@ def DisableFlatScratch : Predicate<"!Subtarget->enableFlatScratch()">;
|
||||
def HasUnalignedAccessMode : Predicate<"Subtarget->hasUnalignedAccessMode()">,
|
||||
AssemblerPredicate<(all_of FeatureUnalignedAccessMode)>;
|
||||
|
||||
def HasMADIntraFwdBug : Predicate<"Subtarget->hasMADIntraFwdBug()">;
|
||||
|
||||
def HasNotMADIntraFwdBug : Predicate<"!Subtarget->hasMADIntraFwdBug()">;
|
||||
|
||||
// Include AMDGPU TD files
|
||||
include "SISchedule.td"
|
||||
include "GCNProcessors.td"
|
||||
|
||||
@ -1008,7 +1008,7 @@ void AMDGPUDAGToDAGISel::SelectMAD_64_32(SDNode *N) {
|
||||
SDLoc SL(N);
|
||||
bool Signed = N->getOpcode() == AMDGPUISD::MAD_I64_I32;
|
||||
unsigned Opc;
|
||||
if (Subtarget->getGeneration() == AMDGPUSubtarget::GFX11)
|
||||
if (Subtarget->hasMADIntraFwdBug())
|
||||
Opc = Signed ? AMDGPU::V_MAD_I64_I32_gfx11_e64
|
||||
: AMDGPU::V_MAD_U64_U32_gfx11_e64;
|
||||
else
|
||||
@ -1026,7 +1026,7 @@ void AMDGPUDAGToDAGISel::SelectMUL_LOHI(SDNode *N) {
|
||||
SDLoc SL(N);
|
||||
bool Signed = N->getOpcode() == ISD::SMUL_LOHI;
|
||||
unsigned Opc;
|
||||
if (Subtarget->getGeneration() == AMDGPUSubtarget::GFX11)
|
||||
if (Subtarget->hasMADIntraFwdBug())
|
||||
Opc = Signed ? AMDGPU::V_MAD_I64_I32_gfx11_e64
|
||||
: AMDGPU::V_MAD_U64_U32_gfx11_e64;
|
||||
else
|
||||
|
||||
@ -465,7 +465,7 @@ bool AMDGPUInstructionSelector::selectG_AMDGPU_MAD_64_32(
|
||||
const bool IsUnsigned = I.getOpcode() == AMDGPU::G_AMDGPU_MAD_U64_U32;
|
||||
|
||||
unsigned Opc;
|
||||
if (Subtarget->getGeneration() == AMDGPUSubtarget::GFX11)
|
||||
if (Subtarget->hasMADIntraFwdBug())
|
||||
Opc = IsUnsigned ? AMDGPU::V_MAD_U64_U32_gfx11_e64
|
||||
: AMDGPU::V_MAD_I64_I32_gfx11_e64;
|
||||
else
|
||||
|
||||
@ -193,6 +193,7 @@ protected:
|
||||
bool HasImageStoreD16Bug = false;
|
||||
bool HasImageGather4D16Bug = false;
|
||||
bool HasGFX11FullVGPRs = false;
|
||||
bool HasMADIntraFwdBug = false;
|
||||
bool HasVOPDInsts = false;
|
||||
|
||||
// Dummy feature to use for assembler in tablegen.
|
||||
@ -910,6 +911,8 @@ public:
|
||||
|
||||
bool hasImageGather4D16Bug() const { return HasImageGather4D16Bug; }
|
||||
|
||||
bool hasMADIntraFwdBug() const { return HasMADIntraFwdBug; }
|
||||
|
||||
bool hasNSAEncoding() const { return HasNSAEncoding; }
|
||||
|
||||
unsigned getNSAMaxSize() const { return NSAMaxSize; }
|
||||
|
||||
@ -289,18 +289,17 @@ defm V_MQSAD_U32_U8 : VOP3Inst <"v_mqsad_u32_u8", VOPProfileMQSAD>;
|
||||
} // End Constraints = "@earlyclobber $vdst", SchedRW = [WriteQuarterRate32]
|
||||
} // End SubtargetPredicate = isGFX7Plus
|
||||
|
||||
let isCommutable = 1 in {
|
||||
let SchedRW = [WriteIntMul, WriteSALU] in {
|
||||
let SubtargetPredicate = isGFX7GFX8GFX9GFX10 in {
|
||||
defm V_MAD_U64_U32 : VOP3Inst <"v_mad_u64_u32", VOP3b_I64_I1_I32_I32_I64>;
|
||||
defm V_MAD_I64_I32 : VOP3Inst <"v_mad_i64_i32", VOP3b_I64_I1_I32_I32_I64>;
|
||||
}
|
||||
let SubtargetPredicate = isGFX11Only, Constraints = "@earlyclobber $vdst" in {
|
||||
defm V_MAD_U64_U32_gfx11 : VOP3Inst <"v_mad_u64_u32_gfx11", VOP3b_I64_I1_I32_I32_I64>;
|
||||
defm V_MAD_I64_I32_gfx11 : VOP3Inst <"v_mad_i64_i32_gfx11", VOP3b_I64_I1_I32_I32_I64>;
|
||||
} // End SubtargetPredicate = isGFX11Only, Constraints = "@earlyclobber $vdst"
|
||||
} // End SchedRW = [WriteIntMul, WriteSALU]
|
||||
} // End isCommutable = 1
|
||||
let isCommutable = 1, SchedRW = [WriteIntMul, WriteSALU] in {
|
||||
let SubtargetPredicate = isGFX7Plus, OtherPredicates = [HasNotMADIntraFwdBug] in {
|
||||
defm V_MAD_U64_U32 : VOP3Inst <"v_mad_u64_u32", VOP3b_I64_I1_I32_I32_I64>;
|
||||
defm V_MAD_I64_I32 : VOP3Inst <"v_mad_i64_i32", VOP3b_I64_I1_I32_I32_I64>;
|
||||
}
|
||||
let SubtargetPredicate = isGFX11Only, OtherPredicates = [HasMADIntraFwdBug],
|
||||
Constraints = "@earlyclobber $vdst" in {
|
||||
defm V_MAD_U64_U32_gfx11 : VOP3Inst <"v_mad_u64_u32", VOP3b_I64_I1_I32_I32_I64>;
|
||||
defm V_MAD_I64_I32_gfx11 : VOP3Inst <"v_mad_i64_i32", VOP3b_I64_I1_I32_I32_I64>;
|
||||
}
|
||||
} // End isCommutable = 1, SchedRW = [WriteIntMul, WriteSALU]
|
||||
|
||||
|
||||
let FPDPRounding = 1 in {
|
||||
@ -658,10 +657,11 @@ multiclass IMAD32_Pats <VOP3_Pseudo inst> {
|
||||
>;
|
||||
}
|
||||
|
||||
let SubtargetPredicate = isGFX9GFX10 in // exclude pre-GFX9 where it was slow
|
||||
defm : IMAD32_Pats<V_MAD_U64_U32_e64>;
|
||||
let SubtargetPredicate = isGFX11Only in
|
||||
defm : IMAD32_Pats<V_MAD_U64_U32_gfx11_e64>;
|
||||
// exclude pre-GFX9 where it was slow
|
||||
let OtherPredicates = [HasNotMADIntraFwdBug], SubtargetPredicate = isGFX9Plus in
|
||||
defm : IMAD32_Pats<V_MAD_U64_U32_e64>;
|
||||
let OtherPredicates = [HasMADIntraFwdBug], SubtargetPredicate = isGFX11Only in
|
||||
defm : IMAD32_Pats<V_MAD_U64_U32_gfx11_e64>;
|
||||
|
||||
def VOP3_PERMLANE_Profile : VOP3_Profile<VOPProfile <[i32, i32, i32, i32]>, VOP3_OPSEL> {
|
||||
let InsVOP3OpSel = (ins IntOpSelMods:$src0_modifiers, VRegSrc_32:$src0,
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user