AMDGPU/GlobalISel: Fix isExtractHiElt when selecting fma_mix (#102130)
isExtractHiElt should return new source register instead of returning instruction that defines it. Src = MI.getOperand(0).getReg() is not correct when MI(for example G_UNMERGE_VALUES) defines multiple registers. Refactor existing code to work with source registers only.
This commit is contained in:
parent
e8429986c8
commit
269cefbc02
@ -1372,8 +1372,8 @@ bool AMDGPUInstructionSelector::selectIntrinsicCmp(MachineInstr &I) const {
|
||||
MachineInstrBuilder SelectedMI;
|
||||
MachineOperand &LHS = I.getOperand(2);
|
||||
MachineOperand &RHS = I.getOperand(3);
|
||||
auto [Src0, Src0Mods] = selectVOP3ModsImpl(LHS);
|
||||
auto [Src1, Src1Mods] = selectVOP3ModsImpl(RHS);
|
||||
auto [Src0, Src0Mods] = selectVOP3ModsImpl(LHS.getReg());
|
||||
auto [Src1, Src1Mods] = selectVOP3ModsImpl(RHS.getReg());
|
||||
Register Src0Reg =
|
||||
copyToVGPRIfSrcFolded(Src0, Src0Mods, LHS, &I, /*ForceVGPR*/ true);
|
||||
Register Src1Reg =
|
||||
@ -2467,14 +2467,48 @@ bool AMDGPUInstructionSelector::selectG_SZA_EXT(MachineInstr &I) const {
|
||||
return false;
|
||||
}
|
||||
|
||||
static Register stripCopy(Register Reg, MachineRegisterInfo &MRI) {
|
||||
return getDefSrcRegIgnoringCopies(Reg, MRI)->Reg;
|
||||
}
|
||||
|
||||
static Register stripBitCast(Register Reg, MachineRegisterInfo &MRI) {
|
||||
Register BitcastSrc;
|
||||
if (mi_match(Reg, MRI, m_GBitcast(m_Reg(BitcastSrc))))
|
||||
Reg = BitcastSrc;
|
||||
return Reg;
|
||||
}
|
||||
|
||||
static bool isExtractHiElt(MachineRegisterInfo &MRI, Register In,
|
||||
Register &Out) {
|
||||
Register Trunc;
|
||||
if (!mi_match(In, MRI, m_GTrunc(m_Reg(Trunc))))
|
||||
return false;
|
||||
|
||||
Register LShlSrc;
|
||||
if (mi_match(In, MRI,
|
||||
m_GTrunc(m_GLShr(m_Reg(LShlSrc), m_SpecificICst(16))))) {
|
||||
Out = LShlSrc;
|
||||
Register Cst;
|
||||
if (mi_match(Trunc, MRI, m_GLShr(m_Reg(LShlSrc), m_Reg(Cst)))) {
|
||||
Cst = stripCopy(Cst, MRI);
|
||||
if (mi_match(Cst, MRI, m_SpecificICst(16))) {
|
||||
Out = stripBitCast(LShlSrc, MRI);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
MachineInstr *Shuffle = MRI.getVRegDef(Trunc);
|
||||
if (Shuffle->getOpcode() != AMDGPU::G_SHUFFLE_VECTOR)
|
||||
return false;
|
||||
|
||||
assert(MRI.getType(Shuffle->getOperand(0).getReg()) ==
|
||||
LLT::fixed_vector(2, 16));
|
||||
|
||||
ArrayRef<int> Mask = Shuffle->getOperand(3).getShuffleMask();
|
||||
assert(Mask.size() == 2);
|
||||
|
||||
if (Mask[0] == 1 && Mask[1] <= 1) {
|
||||
Out = Shuffle->getOperand(0).getReg();
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -3550,11 +3584,8 @@ AMDGPUInstructionSelector::selectVCSRC(MachineOperand &Root) const {
|
||||
|
||||
}
|
||||
|
||||
std::pair<Register, unsigned>
|
||||
AMDGPUInstructionSelector::selectVOP3ModsImpl(MachineOperand &Root,
|
||||
bool IsCanonicalizing,
|
||||
bool AllowAbs, bool OpSel) const {
|
||||
Register Src = Root.getReg();
|
||||
std::pair<Register, unsigned> AMDGPUInstructionSelector::selectVOP3ModsImpl(
|
||||
Register Src, bool IsCanonicalizing, bool AllowAbs, bool OpSel) const {
|
||||
unsigned Mods = 0;
|
||||
MachineInstr *MI = getDefIgnoringCopies(Src, *MRI);
|
||||
|
||||
@ -3617,7 +3648,7 @@ InstructionSelector::ComplexRendererFns
|
||||
AMDGPUInstructionSelector::selectVOP3Mods0(MachineOperand &Root) const {
|
||||
Register Src;
|
||||
unsigned Mods;
|
||||
std::tie(Src, Mods) = selectVOP3ModsImpl(Root);
|
||||
std::tie(Src, Mods) = selectVOP3ModsImpl(Root.getReg());
|
||||
|
||||
return {{
|
||||
[=](MachineInstrBuilder &MIB) {
|
||||
@ -3633,7 +3664,7 @@ InstructionSelector::ComplexRendererFns
|
||||
AMDGPUInstructionSelector::selectVOP3BMods0(MachineOperand &Root) const {
|
||||
Register Src;
|
||||
unsigned Mods;
|
||||
std::tie(Src, Mods) = selectVOP3ModsImpl(Root,
|
||||
std::tie(Src, Mods) = selectVOP3ModsImpl(Root.getReg(),
|
||||
/*IsCanonicalizing=*/true,
|
||||
/*AllowAbs=*/false);
|
||||
|
||||
@ -3660,7 +3691,7 @@ InstructionSelector::ComplexRendererFns
|
||||
AMDGPUInstructionSelector::selectVOP3Mods(MachineOperand &Root) const {
|
||||
Register Src;
|
||||
unsigned Mods;
|
||||
std::tie(Src, Mods) = selectVOP3ModsImpl(Root);
|
||||
std::tie(Src, Mods) = selectVOP3ModsImpl(Root.getReg());
|
||||
|
||||
return {{
|
||||
[=](MachineInstrBuilder &MIB) {
|
||||
@ -3675,7 +3706,8 @@ AMDGPUInstructionSelector::selectVOP3ModsNonCanonicalizing(
|
||||
MachineOperand &Root) const {
|
||||
Register Src;
|
||||
unsigned Mods;
|
||||
std::tie(Src, Mods) = selectVOP3ModsImpl(Root, /*IsCanonicalizing=*/false);
|
||||
std::tie(Src, Mods) =
|
||||
selectVOP3ModsImpl(Root.getReg(), /*IsCanonicalizing=*/false);
|
||||
|
||||
return {{
|
||||
[=](MachineInstrBuilder &MIB) {
|
||||
@ -3689,8 +3721,9 @@ InstructionSelector::ComplexRendererFns
|
||||
AMDGPUInstructionSelector::selectVOP3BMods(MachineOperand &Root) const {
|
||||
Register Src;
|
||||
unsigned Mods;
|
||||
std::tie(Src, Mods) = selectVOP3ModsImpl(Root, /*IsCanonicalizing=*/true,
|
||||
/*AllowAbs=*/false);
|
||||
std::tie(Src, Mods) =
|
||||
selectVOP3ModsImpl(Root.getReg(), /*IsCanonicalizing=*/true,
|
||||
/*AllowAbs=*/false);
|
||||
|
||||
return {{
|
||||
[=](MachineInstrBuilder &MIB) {
|
||||
@ -4016,7 +4049,7 @@ InstructionSelector::ComplexRendererFns
|
||||
AMDGPUInstructionSelector::selectVOP3OpSelMods(MachineOperand &Root) const {
|
||||
Register Src;
|
||||
unsigned Mods;
|
||||
std::tie(Src, Mods) = selectVOP3ModsImpl(Root);
|
||||
std::tie(Src, Mods) = selectVOP3ModsImpl(Root.getReg());
|
||||
|
||||
// FIXME: Handle op_sel
|
||||
return {{
|
||||
@ -4029,7 +4062,7 @@ InstructionSelector::ComplexRendererFns
|
||||
AMDGPUInstructionSelector::selectVINTERPMods(MachineOperand &Root) const {
|
||||
Register Src;
|
||||
unsigned Mods;
|
||||
std::tie(Src, Mods) = selectVOP3ModsImpl(Root,
|
||||
std::tie(Src, Mods) = selectVOP3ModsImpl(Root.getReg(),
|
||||
/*IsCanonicalizing=*/true,
|
||||
/*AllowAbs=*/false,
|
||||
/*OpSel=*/false);
|
||||
@ -4047,7 +4080,7 @@ InstructionSelector::ComplexRendererFns
|
||||
AMDGPUInstructionSelector::selectVINTERPModsHi(MachineOperand &Root) const {
|
||||
Register Src;
|
||||
unsigned Mods;
|
||||
std::tie(Src, Mods) = selectVOP3ModsImpl(Root,
|
||||
std::tie(Src, Mods) = selectVOP3ModsImpl(Root.getReg(),
|
||||
/*IsCanonicalizing=*/true,
|
||||
/*AllowAbs=*/false,
|
||||
/*OpSel=*/true);
|
||||
@ -5229,59 +5262,6 @@ AMDGPUInstructionSelector::selectSMRDBufferSgprImm(MachineOperand &Root) const {
|
||||
[=](MachineInstrBuilder &MIB) { MIB.addImm(*EncodedOffset); }}};
|
||||
}
|
||||
|
||||
// Variant of stripBitCast that returns the instruction instead of a
|
||||
// MachineOperand.
|
||||
static MachineInstr *stripBitCast(MachineInstr *MI, MachineRegisterInfo &MRI) {
|
||||
if (MI->getOpcode() == AMDGPU::G_BITCAST)
|
||||
return getDefIgnoringCopies(MI->getOperand(1).getReg(), MRI);
|
||||
return MI;
|
||||
}
|
||||
|
||||
// Figure out if this is really an extract of the high 16-bits of a dword,
|
||||
// returns nullptr if it isn't.
|
||||
static MachineInstr *isExtractHiElt(MachineInstr *Inst,
|
||||
MachineRegisterInfo &MRI) {
|
||||
Inst = stripBitCast(Inst, MRI);
|
||||
|
||||
if (Inst->getOpcode() != AMDGPU::G_TRUNC)
|
||||
return nullptr;
|
||||
|
||||
MachineInstr *TruncOp =
|
||||
getDefIgnoringCopies(Inst->getOperand(1).getReg(), MRI);
|
||||
TruncOp = stripBitCast(TruncOp, MRI);
|
||||
|
||||
// G_LSHR x, (G_CONSTANT i32 16)
|
||||
if (TruncOp->getOpcode() == AMDGPU::G_LSHR) {
|
||||
auto SrlAmount = getIConstantVRegValWithLookThrough(
|
||||
TruncOp->getOperand(2).getReg(), MRI);
|
||||
if (SrlAmount && SrlAmount->Value.getZExtValue() == 16) {
|
||||
MachineInstr *SrlOp =
|
||||
getDefIgnoringCopies(TruncOp->getOperand(1).getReg(), MRI);
|
||||
return stripBitCast(SrlOp, MRI);
|
||||
}
|
||||
}
|
||||
|
||||
// G_SHUFFLE_VECTOR x, y, shufflemask(1, 1|0)
|
||||
// 1, 0 swaps the low/high 16 bits.
|
||||
// 1, 1 sets the high 16 bits to be the same as the low 16.
|
||||
// in any case, it selects the high elts.
|
||||
if (TruncOp->getOpcode() == AMDGPU::G_SHUFFLE_VECTOR) {
|
||||
assert(MRI.getType(TruncOp->getOperand(0).getReg()) ==
|
||||
LLT::fixed_vector(2, 16));
|
||||
|
||||
ArrayRef<int> Mask = TruncOp->getOperand(3).getShuffleMask();
|
||||
assert(Mask.size() == 2);
|
||||
|
||||
if (Mask[0] == 1 && Mask[1] <= 1) {
|
||||
MachineInstr *LHS =
|
||||
getDefIgnoringCopies(TruncOp->getOperand(1).getReg(), MRI);
|
||||
return stripBitCast(LHS, MRI);
|
||||
}
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
std::pair<Register, unsigned>
|
||||
AMDGPUInstructionSelector::selectVOP3PMadMixModsImpl(MachineOperand &Root,
|
||||
bool &Matched) const {
|
||||
@ -5289,37 +5269,34 @@ AMDGPUInstructionSelector::selectVOP3PMadMixModsImpl(MachineOperand &Root,
|
||||
|
||||
Register Src;
|
||||
unsigned Mods;
|
||||
std::tie(Src, Mods) = selectVOP3ModsImpl(Root);
|
||||
|
||||
MachineInstr *MI = getDefIgnoringCopies(Src, *MRI);
|
||||
if (MI->getOpcode() == AMDGPU::G_FPEXT) {
|
||||
MachineOperand *MO = &MI->getOperand(1);
|
||||
Src = MO->getReg();
|
||||
MI = getDefIgnoringCopies(Src, *MRI);
|
||||
std::tie(Src, Mods) = selectVOP3ModsImpl(Root.getReg());
|
||||
|
||||
if (mi_match(Src, *MRI, m_GFPExt(m_Reg(Src)))) {
|
||||
assert(MRI->getType(Src) == LLT::scalar(16));
|
||||
|
||||
// See through bitcasts.
|
||||
// FIXME: Would be nice to use stripBitCast here.
|
||||
if (MI->getOpcode() == AMDGPU::G_BITCAST) {
|
||||
MO = &MI->getOperand(1);
|
||||
Src = MO->getReg();
|
||||
MI = getDefIgnoringCopies(Src, *MRI);
|
||||
}
|
||||
// Only change Src if src modifier could be gained. In such cases new Src
|
||||
// could be sgpr but this does not violate constant bus restriction for
|
||||
// instruction that is being selected.
|
||||
// Note: Src is not changed when there is only a simple sgpr to vgpr copy
|
||||
// since this could violate constant bus restriction.
|
||||
Register PeekSrc = stripCopy(Src, *MRI);
|
||||
|
||||
const auto CheckAbsNeg = [&]() {
|
||||
// Be careful about folding modifiers if we already have an abs. fneg is
|
||||
// applied last, so we don't want to apply an earlier fneg.
|
||||
if ((Mods & SISrcMods::ABS) == 0) {
|
||||
unsigned ModsTmp;
|
||||
std::tie(Src, ModsTmp) = selectVOP3ModsImpl(*MO);
|
||||
MI = getDefIgnoringCopies(Src, *MRI);
|
||||
std::tie(PeekSrc, ModsTmp) = selectVOP3ModsImpl(PeekSrc);
|
||||
|
||||
if ((ModsTmp & SISrcMods::NEG) != 0)
|
||||
if ((ModsTmp & SISrcMods::NEG) != 0) {
|
||||
Mods ^= SISrcMods::NEG;
|
||||
Src = PeekSrc;
|
||||
}
|
||||
|
||||
if ((ModsTmp & SISrcMods::ABS) != 0)
|
||||
if ((ModsTmp & SISrcMods::ABS) != 0) {
|
||||
Mods |= SISrcMods::ABS;
|
||||
Src = PeekSrc;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
@ -5332,12 +5309,9 @@ AMDGPUInstructionSelector::selectVOP3PMadMixModsImpl(MachineOperand &Root,
|
||||
|
||||
Mods |= SISrcMods::OP_SEL_1;
|
||||
|
||||
if (MachineInstr *ExtractHiEltMI = isExtractHiElt(MI, *MRI)) {
|
||||
if (isExtractHiElt(*MRI, PeekSrc, PeekSrc)) {
|
||||
Src = PeekSrc;
|
||||
Mods |= SISrcMods::OP_SEL_0;
|
||||
MI = ExtractHiEltMI;
|
||||
MO = &MI->getOperand(0);
|
||||
Src = MO->getReg();
|
||||
|
||||
CheckAbsNeg();
|
||||
}
|
||||
|
||||
|
||||
@ -150,7 +150,7 @@ private:
|
||||
bool selectSBarrierSignalIsfirst(MachineInstr &I, Intrinsic::ID IID) const;
|
||||
bool selectSBarrierLeave(MachineInstr &I) const;
|
||||
|
||||
std::pair<Register, unsigned> selectVOP3ModsImpl(MachineOperand &Root,
|
||||
std::pair<Register, unsigned> selectVOP3ModsImpl(Register Src,
|
||||
bool IsCanonicalizing = true,
|
||||
bool AllowAbs = true,
|
||||
bool OpSel = false) const;
|
||||
|
||||
@ -446,28 +446,28 @@ define amdgpu_ps float @test_matching_source_from_unmerge(ptr addrspace(3) %aptr
|
||||
; GFX9-DENORM: ; %bb.0: ; %.entry
|
||||
; GFX9-DENORM-NEXT: ds_read_b64 v[2:3], v0
|
||||
; GFX9-DENORM-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX9-DENORM-NEXT: v_mad_mix_f32 v0, v2, v2, v1 op_sel:[1,1,0] op_sel_hi:[1,1,0]
|
||||
; GFX9-DENORM-NEXT: v_mad_mix_f32 v0, v2, v3, v1 op_sel:[1,1,0] op_sel_hi:[1,1,0]
|
||||
; GFX9-DENORM-NEXT: ; return to shader part epilog
|
||||
;
|
||||
; GFX10-LABEL: test_matching_source_from_unmerge:
|
||||
; GFX10: ; %bb.0: ; %.entry
|
||||
; GFX10-NEXT: ds_read_b64 v[2:3], v0
|
||||
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-NEXT: v_fma_mix_f32 v0, v2, v2, v1 op_sel:[1,1,0] op_sel_hi:[1,1,0]
|
||||
; GFX10-NEXT: v_fma_mix_f32 v0, v2, v3, v1 op_sel:[1,1,0] op_sel_hi:[1,1,0]
|
||||
; GFX10-NEXT: ; return to shader part epilog
|
||||
;
|
||||
; GFX10-CONTRACT-LABEL: test_matching_source_from_unmerge:
|
||||
; GFX10-CONTRACT: ; %bb.0: ; %.entry
|
||||
; GFX10-CONTRACT-NEXT: ds_read_b64 v[2:3], v0
|
||||
; GFX10-CONTRACT-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-CONTRACT-NEXT: v_fma_mix_f32 v0, v2, v2, v1 op_sel:[1,1,0] op_sel_hi:[1,1,0]
|
||||
; GFX10-CONTRACT-NEXT: v_fma_mix_f32 v0, v2, v3, v1 op_sel:[1,1,0] op_sel_hi:[1,1,0]
|
||||
; GFX10-CONTRACT-NEXT: ; return to shader part epilog
|
||||
;
|
||||
; GFX10-DENORM-LABEL: test_matching_source_from_unmerge:
|
||||
; GFX10-DENORM: ; %bb.0: ; %.entry
|
||||
; GFX10-DENORM-NEXT: ds_read_b64 v[2:3], v0
|
||||
; GFX10-DENORM-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-DENORM-NEXT: v_fma_mix_f32 v0, v2, v2, v1 op_sel:[1,1,0] op_sel_hi:[1,1,0]
|
||||
; GFX10-DENORM-NEXT: v_fma_mix_f32 v0, v2, v3, v1 op_sel:[1,1,0] op_sel_hi:[1,1,0]
|
||||
; GFX10-DENORM-NEXT: ; return to shader part epilog
|
||||
.entry:
|
||||
%a = load <4 x half>, ptr addrspace(3) %aptr, align 16
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user