[AMDGPU] Fix and simplify patterns selecting fsub to v_fma_mix_f32 (#180169)
Select (fsub x, y) -> (fma y, -1.0, x). Using -1.0 as the constant avoids the need for ComplexPatterns to negate x or y. This also fixes the bad pattern (fsub x, y) -> (fma -x, 1.0, y).
This commit is contained in:
parent
5283f46615
commit
4a6697f393
@ -205,10 +205,6 @@ def gi_vop3_mad_mix_mods_ext :
|
||||
GIComplexOperandMatcher<s64, "selectVOP3PMadMixModsExt">,
|
||||
GIComplexPatternEquiv<VOP3PMadMixModsExt>;
|
||||
|
||||
def gi_vop3_mad_mix_mods_neg :
|
||||
GIComplexOperandMatcher<s64, "selectVOP3PMadMixModsNeg">,
|
||||
GIComplexPatternEquiv<VOP3PMadMixModsNeg>;
|
||||
|
||||
// Separate load nodes are defined to glue m0 initialization in
|
||||
// SelectionDAG. The GISel selector can just insert m0 initialization
|
||||
// directly before selecting a glue-less load, so hide this
|
||||
|
||||
@ -4204,24 +4204,6 @@ bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixBF16Mods(SDValue In, SDValue &Src,
|
||||
return true;
|
||||
}
|
||||
|
||||
bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsNeg(SDValue In, SDValue &Src,
|
||||
SDValue &SrcMods) const {
|
||||
unsigned Mods = 0;
|
||||
SelectVOP3PMadMixModsImpl(In, Src, Mods, MVT::f16);
|
||||
Mods ^= SISrcMods::NEG;
|
||||
SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixBF16ModsNeg(SDValue In, SDValue &Src,
|
||||
SDValue &SrcMods) const {
|
||||
unsigned Mods = 0;
|
||||
SelectVOP3PMadMixModsImpl(In, Src, Mods, MVT::bf16);
|
||||
Mods ^= SISrcMods::NEG;
|
||||
SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
|
||||
return true;
|
||||
}
|
||||
|
||||
// Match BITOP3 operation and return a number of matched instructions plus
|
||||
// truth table.
|
||||
static std::pair<unsigned, uint8_t> BitOp3_Op(SDValue In,
|
||||
|
||||
@ -260,11 +260,6 @@ private:
|
||||
bool SelectVOP3PMadMixBF16Mods(SDValue In, SDValue &Src,
|
||||
SDValue &SrcMods) const;
|
||||
|
||||
bool SelectVOP3PMadMixModsNeg(SDValue In, SDValue &Src,
|
||||
SDValue &SrcMods) const;
|
||||
bool SelectVOP3PMadMixBF16ModsNeg(SDValue In, SDValue &Src,
|
||||
SDValue &SrcMods) const;
|
||||
|
||||
bool SelectBITOP3(SDValue In, SDValue &Src0, SDValue &Src1, SDValue &Src2,
|
||||
SDValue &Tbl) const;
|
||||
|
||||
|
||||
@ -6930,21 +6930,6 @@ AMDGPUInstructionSelector::selectVOP3PMadMixMods(MachineOperand &Root) const {
|
||||
}};
|
||||
}
|
||||
|
||||
InstructionSelector::ComplexRendererFns
|
||||
AMDGPUInstructionSelector::selectVOP3PMadMixModsNeg(
|
||||
MachineOperand &Root) const {
|
||||
Register Src;
|
||||
unsigned Mods;
|
||||
bool Matched;
|
||||
std::tie(Src, Mods) = selectVOP3PMadMixModsImpl(Root, Matched);
|
||||
Mods ^= SISrcMods::NEG;
|
||||
|
||||
return {{
|
||||
[=](MachineInstrBuilder &MIB) { MIB.addReg(Src); },
|
||||
[=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); } // src_mods
|
||||
}};
|
||||
}
|
||||
|
||||
bool AMDGPUInstructionSelector::selectSBarrierSignalIsfirst(
|
||||
MachineInstr &I, Intrinsic::ID IntrID) const {
|
||||
MachineBasicBlock *MBB = I.getParent();
|
||||
|
||||
@ -343,7 +343,6 @@ private:
|
||||
bool &Matched) const;
|
||||
ComplexRendererFns selectVOP3PMadMixModsExt(MachineOperand &Root) const;
|
||||
ComplexRendererFns selectVOP3PMadMixMods(MachineOperand &Root) const;
|
||||
ComplexRendererFns selectVOP3PMadMixModsNeg(MachineOperand &Root) const;
|
||||
|
||||
void renderTruncImm32(MachineInstrBuilder &MIB, const MachineInstr &MI,
|
||||
int OpIdx = -1) const;
|
||||
|
||||
@ -744,6 +744,7 @@ int FP32_NEG_ONE = 0xbf800000;
|
||||
int FP64_ONE = 0x3ff0000000000000;
|
||||
int FP64_NEG_ONE = 0xbff0000000000000;
|
||||
int BF16_ONE = 0x3F80;
|
||||
int BF16_NEG_ONE = 0xBF80;
|
||||
}
|
||||
def CONST : Constants;
|
||||
|
||||
|
||||
@ -1712,8 +1712,6 @@ def VOP3PMadMixModsExt : ComplexPattern<untyped, 2, "SelectVOP3PMadMixModsExt">;
|
||||
def VOP3PMadMixMods : ComplexPattern<untyped, 2, "SelectVOP3PMadMixMods">;
|
||||
def VOP3PMadMixBF16ModsExt : ComplexPattern<untyped, 2, "SelectVOP3PMadMixBF16ModsExt">;
|
||||
def VOP3PMadMixBF16Mods : ComplexPattern<untyped, 2, "SelectVOP3PMadMixBF16Mods">;
|
||||
def VOP3PMadMixModsNeg : ComplexPattern<untyped, 2, "SelectVOP3PMadMixModsNeg">;
|
||||
def VOP3PMadMixBF16ModsNeg : ComplexPattern<untyped, 2, "SelectVOP3PMadMixBF16ModsNeg">;
|
||||
|
||||
def VINTERPMods : ComplexPattern<untyped, 2, "SelectVINTERPMods">;
|
||||
def VINTERPModsHi : ComplexPattern<untyped, 2, "SelectVINTERPModsHi">;
|
||||
|
||||
@ -182,8 +182,8 @@ multiclass MadFmaMixFP32Pats<SDPatternOperator fma_like,
|
||||
ValueType VT = f16> {
|
||||
defvar VOP3PMadMixModsPat = !if (!eq(VT, bf16), VOP3PMadMixBF16Mods, VOP3PMadMixMods);
|
||||
defvar VOP3PMadMixModsExtPat = !if (!eq(VT, bf16), VOP3PMadMixBF16ModsExt, VOP3PMadMixModsExt);
|
||||
defvar VOP3PMadMixModsNegPat = !if (!eq(VT, bf16), VOP3PMadMixBF16ModsNeg, VOP3PMadMixModsNeg);
|
||||
defvar OneImm = !if (!eq(VT, bf16), CONST.BF16_ONE, CONST.FP16_ONE);
|
||||
defvar NegOneImm = !if (!eq(VT, bf16), CONST.BF16_NEG_ONE, CONST.FP16_NEG_ONE);
|
||||
// At least one of the operands needs to be an fpextend of an f16
|
||||
// for this to be worthwhile, so we need three patterns here.
|
||||
// TODO: Could we use a predicate to inspect src1/2/3 instead?
|
||||
@ -206,28 +206,33 @@ multiclass MadFmaMixFP32Pats<SDPatternOperator fma_like,
|
||||
(mix_inst $src0_mods, $src0, $src1_mods, $src1, $src2_mods, $src2,
|
||||
DSTCLAMP.NONE)>;
|
||||
|
||||
// (fadd x, y) -> (fma x, 1.0, y)
|
||||
def : GCNPat <
|
||||
(f32 (fadd (f32 (VOP3PMadMixModsExtPat VT:$src0, i32:$src0_mods)),
|
||||
(f32 (VOP3PMadMixModsPat f32:$src1, i32:$src1_mods)))),
|
||||
(mix_inst $src0_mods, $src0, (i32 8), (i32 OneImm), $src1_mods, $src1,
|
||||
DSTCLAMP.NONE)>;
|
||||
|
||||
// (fmul x, y) -> (fma x, y, 0.0)
|
||||
// FIXME: This is only valid with nsz.
|
||||
def : GCNPat <
|
||||
(f32 (fmul (f32 (VOP3PMadMixModsExtPat VT:$src0, i32:$src0_mods)),
|
||||
(f32 (VOP3PMadMixModsPat f32:$src1, i32:$src1_mods)))),
|
||||
(mix_inst $src0_mods, $src0, $src1_mods, $src1, (i32 0), (i32 0),
|
||||
DSTCLAMP.NONE)>;
|
||||
|
||||
// (fsub x, y) -> (fma y, -1.0, x)
|
||||
def : GCNPat <
|
||||
(f32 (fsub (f32 (VOP3PMadMixModsExtPat VT:$src0, i32:$src0_mods)),
|
||||
(f32 (VOP3PMadMixModsNegPat f32:$src1, i32:$src1_mods)))),
|
||||
(mix_inst $src0_mods, $src0, (i32 8), (i32 OneImm), $src1_mods, $src1,
|
||||
(f32 (VOP3PMadMixModsPat f32:$src1, i32:$src1_mods)))),
|
||||
(mix_inst $src1_mods, $src1, (i32 8), (i32 NegOneImm), $src0_mods, $src0,
|
||||
DSTCLAMP.NONE)>;
|
||||
|
||||
// (fsub x, y) -> (fma y, -1.0, x)
|
||||
def : GCNPat <
|
||||
(f32 (fsub (f32 (VOP3PMadMixModsNegPat f32:$src0, i32:$src0_mods)),
|
||||
(f32 (fsub (f32 (VOP3PMadMixModsPat f32:$src0, i32:$src0_mods)),
|
||||
(f32 (VOP3PMadMixModsExtPat VT:$src1, i32:$src1_mods)))),
|
||||
(mix_inst $src0_mods, $src0, (i32 8), (i32 OneImm), $src1_mods, $src1,
|
||||
(mix_inst $src1_mods, $src1, (i32 8), (i32 NegOneImm), $src0_mods, $src0,
|
||||
DSTCLAMP.NONE)>;
|
||||
}
|
||||
|
||||
|
||||
@ -44,10 +44,10 @@ define amdgpu_vs <4 x float> @test_v4f16_to_v4f32_sub_ext_mul(<4 x half> %x, <4
|
||||
; GFX9-DENORM: ; %bb.0: ; %entry
|
||||
; GFX9-DENORM-NEXT: v_pk_mul_f16 v2, v0, v2
|
||||
; GFX9-DENORM-NEXT: v_pk_mul_f16 v3, v1, v3
|
||||
; GFX9-DENORM-NEXT: v_mad_mix_f32 v0, v2, 1.0, -v4 op_sel_hi:[1,1,0]
|
||||
; GFX9-DENORM-NEXT: v_mad_mix_f32 v1, v2, 1.0, -v5 op_sel:[1,0,0] op_sel_hi:[1,1,0]
|
||||
; GFX9-DENORM-NEXT: v_mad_mix_f32 v2, v3, 1.0, -v6 op_sel_hi:[1,1,0]
|
||||
; GFX9-DENORM-NEXT: v_mad_mix_f32 v3, v3, 1.0, -v7 op_sel:[1,0,0] op_sel_hi:[1,1,0]
|
||||
; GFX9-DENORM-NEXT: v_mad_mix_f32 v0, v4, -1.0, v2 op_sel_hi:[0,1,1]
|
||||
; GFX9-DENORM-NEXT: v_mad_mix_f32 v1, v5, -1.0, v2 op_sel:[0,0,1] op_sel_hi:[0,1,1]
|
||||
; GFX9-DENORM-NEXT: v_mad_mix_f32 v2, v6, -1.0, v3 op_sel_hi:[0,1,1]
|
||||
; GFX9-DENORM-NEXT: v_mad_mix_f32 v3, v7, -1.0, v3 op_sel:[0,0,1] op_sel_hi:[0,1,1]
|
||||
; GFX9-DENORM-NEXT: ; return to shader part epilog
|
||||
;
|
||||
; GFX10-DENORM-LABEL: test_v4f16_to_v4f32_sub_ext_mul:
|
||||
@ -72,10 +72,10 @@ define amdgpu_vs <4 x float> @test_v4f16_to_v4f32_sub_ext_mul_rhs(<4 x float> %x
|
||||
; GFX9-DENORM: ; %bb.0: ; %.entry
|
||||
; GFX9-DENORM-NEXT: v_pk_mul_f16 v4, v4, v6
|
||||
; GFX9-DENORM-NEXT: v_pk_mul_f16 v5, v5, v7
|
||||
; GFX9-DENORM-NEXT: v_mad_mix_f32 v0, -v0, 1.0, v4 op_sel_hi:[0,1,1]
|
||||
; GFX9-DENORM-NEXT: v_mad_mix_f32 v1, -v1, 1.0, v4 op_sel:[0,0,1] op_sel_hi:[0,1,1]
|
||||
; GFX9-DENORM-NEXT: v_mad_mix_f32 v2, -v2, 1.0, v5 op_sel_hi:[0,1,1]
|
||||
; GFX9-DENORM-NEXT: v_mad_mix_f32 v3, -v3, 1.0, v5 op_sel:[0,0,1] op_sel_hi:[0,1,1]
|
||||
; GFX9-DENORM-NEXT: v_mad_mix_f32 v0, v4, -1.0, v0 op_sel_hi:[1,1,0]
|
||||
; GFX9-DENORM-NEXT: v_mad_mix_f32 v1, v4, -1.0, v1 op_sel:[1,0,0] op_sel_hi:[1,1,0]
|
||||
; GFX9-DENORM-NEXT: v_mad_mix_f32 v2, v5, -1.0, v2 op_sel_hi:[1,1,0]
|
||||
; GFX9-DENORM-NEXT: v_mad_mix_f32 v3, v5, -1.0, v3 op_sel:[1,0,0] op_sel_hi:[1,1,0]
|
||||
; GFX9-DENORM-NEXT: ; return to shader part epilog
|
||||
;
|
||||
; GFX10-DENORM-LABEL: test_v4f16_to_v4f32_sub_ext_mul_rhs:
|
||||
|
||||
@ -85,10 +85,10 @@ define amdgpu_vs <4 x float> @test_v4f16_to_v4f32_sub_ext_neg_mul(<4 x half> %x,
|
||||
; GFX9-DENORM: ; %bb.0: ; %entry
|
||||
; GFX9-DENORM-NEXT: v_pk_mul_f16 v2, v0, v2 neg_lo:[0,1] neg_hi:[0,1]
|
||||
; GFX9-DENORM-NEXT: v_pk_mul_f16 v3, v1, v3 neg_lo:[0,1] neg_hi:[0,1]
|
||||
; GFX9-DENORM-NEXT: v_mad_mix_f32 v0, v2, 1.0, -v4 op_sel_hi:[1,1,0]
|
||||
; GFX9-DENORM-NEXT: v_mad_mix_f32 v1, v2, 1.0, -v5 op_sel:[1,0,0] op_sel_hi:[1,1,0]
|
||||
; GFX9-DENORM-NEXT: v_mad_mix_f32 v2, v3, 1.0, -v6 op_sel_hi:[1,1,0]
|
||||
; GFX9-DENORM-NEXT: v_mad_mix_f32 v3, v3, 1.0, -v7 op_sel:[1,0,0] op_sel_hi:[1,1,0]
|
||||
; GFX9-DENORM-NEXT: v_mad_mix_f32 v0, v4, -1.0, v2 op_sel_hi:[0,1,1]
|
||||
; GFX9-DENORM-NEXT: v_mad_mix_f32 v1, v5, -1.0, v2 op_sel:[0,0,1] op_sel_hi:[0,1,1]
|
||||
; GFX9-DENORM-NEXT: v_mad_mix_f32 v2, v6, -1.0, v3 op_sel_hi:[0,1,1]
|
||||
; GFX9-DENORM-NEXT: v_mad_mix_f32 v3, v7, -1.0, v3 op_sel:[0,0,1] op_sel_hi:[0,1,1]
|
||||
; GFX9-DENORM-NEXT: ; return to shader part epilog
|
||||
;
|
||||
; GFX10-DENORM-LABEL: test_v4f16_to_v4f32_sub_ext_neg_mul:
|
||||
@ -115,10 +115,10 @@ define amdgpu_vs <4 x float> @test_v4f16_to_v4f32_sub_neg_ext_mul(<4 x half> %x,
|
||||
; GFX9-DENORM: ; %bb.0: ; %entry
|
||||
; GFX9-DENORM-NEXT: v_pk_mul_f16 v2, v0, v2 neg_lo:[0,1] neg_hi:[0,1]
|
||||
; GFX9-DENORM-NEXT: v_pk_mul_f16 v3, v1, v3 neg_lo:[0,1] neg_hi:[0,1]
|
||||
; GFX9-DENORM-NEXT: v_mad_mix_f32 v0, v2, 1.0, -v4 op_sel_hi:[1,1,0]
|
||||
; GFX9-DENORM-NEXT: v_mad_mix_f32 v1, v2, 1.0, -v5 op_sel:[1,0,0] op_sel_hi:[1,1,0]
|
||||
; GFX9-DENORM-NEXT: v_mad_mix_f32 v2, v3, 1.0, -v6 op_sel_hi:[1,1,0]
|
||||
; GFX9-DENORM-NEXT: v_mad_mix_f32 v3, v3, 1.0, -v7 op_sel:[1,0,0] op_sel_hi:[1,1,0]
|
||||
; GFX9-DENORM-NEXT: v_mad_mix_f32 v0, v4, -1.0, v2 op_sel_hi:[0,1,1]
|
||||
; GFX9-DENORM-NEXT: v_mad_mix_f32 v1, v5, -1.0, v2 op_sel:[0,0,1] op_sel_hi:[0,1,1]
|
||||
; GFX9-DENORM-NEXT: v_mad_mix_f32 v2, v6, -1.0, v3 op_sel_hi:[0,1,1]
|
||||
; GFX9-DENORM-NEXT: v_mad_mix_f32 v3, v7, -1.0, v3 op_sel:[0,0,1] op_sel_hi:[0,1,1]
|
||||
; GFX9-DENORM-NEXT: ; return to shader part epilog
|
||||
;
|
||||
; GFX10-DENORM-LABEL: test_v4f16_to_v4f32_sub_neg_ext_mul:
|
||||
@ -146,10 +146,10 @@ define amdgpu_vs <4 x float> @test_v4f16_to_v4f32_sub_ext_neg_mul2(<4 x float> %
|
||||
; GFX9-DENORM: ; %bb.0: ; %entry
|
||||
; GFX9-DENORM-NEXT: v_pk_mul_f16 v4, v4, v6 neg_lo:[0,1] neg_hi:[0,1]
|
||||
; GFX9-DENORM-NEXT: v_pk_mul_f16 v5, v5, v7 neg_lo:[0,1] neg_hi:[0,1]
|
||||
; GFX9-DENORM-NEXT: v_mad_mix_f32 v0, -v0, 1.0, v4 op_sel_hi:[0,1,1]
|
||||
; GFX9-DENORM-NEXT: v_mad_mix_f32 v1, -v1, 1.0, v4 op_sel:[0,0,1] op_sel_hi:[0,1,1]
|
||||
; GFX9-DENORM-NEXT: v_mad_mix_f32 v2, -v2, 1.0, v5 op_sel_hi:[0,1,1]
|
||||
; GFX9-DENORM-NEXT: v_mad_mix_f32 v3, -v3, 1.0, v5 op_sel:[0,0,1] op_sel_hi:[0,1,1]
|
||||
; GFX9-DENORM-NEXT: v_mad_mix_f32 v0, v4, -1.0, v0 op_sel_hi:[1,1,0]
|
||||
; GFX9-DENORM-NEXT: v_mad_mix_f32 v1, v4, -1.0, v1 op_sel:[1,0,0] op_sel_hi:[1,1,0]
|
||||
; GFX9-DENORM-NEXT: v_mad_mix_f32 v2, v5, -1.0, v2 op_sel_hi:[1,1,0]
|
||||
; GFX9-DENORM-NEXT: v_mad_mix_f32 v3, v5, -1.0, v3 op_sel:[1,0,0] op_sel_hi:[1,1,0]
|
||||
; GFX9-DENORM-NEXT: ; return to shader part epilog
|
||||
;
|
||||
; GFX10-DENORM-LABEL: test_v4f16_to_v4f32_sub_ext_neg_mul2:
|
||||
@ -175,10 +175,10 @@ define amdgpu_vs <4 x float> @test_v4f16_to_v4f32_sub_neg_ext_mul2(<4 x float> %
|
||||
; GFX9-DENORM: ; %bb.0: ; %entry
|
||||
; GFX9-DENORM-NEXT: v_pk_mul_f16 v4, v4, v6 neg_lo:[0,1] neg_hi:[0,1]
|
||||
; GFX9-DENORM-NEXT: v_pk_mul_f16 v5, v5, v7 neg_lo:[0,1] neg_hi:[0,1]
|
||||
; GFX9-DENORM-NEXT: v_mad_mix_f32 v0, -v0, 1.0, v4 op_sel_hi:[0,1,1]
|
||||
; GFX9-DENORM-NEXT: v_mad_mix_f32 v1, -v1, 1.0, v4 op_sel:[0,0,1] op_sel_hi:[0,1,1]
|
||||
; GFX9-DENORM-NEXT: v_mad_mix_f32 v2, -v2, 1.0, v5 op_sel_hi:[0,1,1]
|
||||
; GFX9-DENORM-NEXT: v_mad_mix_f32 v3, -v3, 1.0, v5 op_sel:[0,0,1] op_sel_hi:[0,1,1]
|
||||
; GFX9-DENORM-NEXT: v_mad_mix_f32 v0, v4, -1.0, v0 op_sel_hi:[1,1,0]
|
||||
; GFX9-DENORM-NEXT: v_mad_mix_f32 v1, v4, -1.0, v1 op_sel:[1,0,0] op_sel_hi:[1,1,0]
|
||||
; GFX9-DENORM-NEXT: v_mad_mix_f32 v2, v5, -1.0, v2 op_sel_hi:[1,1,0]
|
||||
; GFX9-DENORM-NEXT: v_mad_mix_f32 v3, v5, -1.0, v3 op_sel:[1,0,0] op_sel_hi:[1,1,0]
|
||||
; GFX9-DENORM-NEXT: ; return to shader part epilog
|
||||
;
|
||||
; GFX10-DENORM-LABEL: test_v4f16_to_v4f32_sub_neg_ext_mul2:
|
||||
|
||||
@ -13969,7 +13969,7 @@ define bfloat @v_fsub_bf16(bfloat %a, bfloat %b) {
|
||||
; GFX1250: ; %bb.0:
|
||||
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-NEXT: v_fma_mix_f32_bf16 v0, v0, 1.0, -v1 op_sel_hi:[1,1,1]
|
||||
; GFX1250-NEXT: v_fma_mix_f32_bf16 v0, v1, -1.0, v0 op_sel_hi:[1,1,1]
|
||||
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
|
||||
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
|
||||
@ -14372,9 +14372,9 @@ define <3 x bfloat> @v_fsub_v3bf16(<3 x bfloat> %a, <3 x bfloat> %b) {
|
||||
; GFX1250TRUE16: ; %bb.0:
|
||||
; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250TRUE16-NEXT: v_fma_mix_f32_bf16 v1, v1, 1.0, -v3 op_sel_hi:[1,1,1]
|
||||
; GFX1250TRUE16-NEXT: v_fma_mix_f32_bf16 v3, v0, 1.0, -v2 op_sel:[1,0,1] op_sel_hi:[1,1,1]
|
||||
; GFX1250TRUE16-NEXT: v_fma_mix_f32_bf16 v0, v0, 1.0, -v2 op_sel_hi:[1,1,1]
|
||||
; GFX1250TRUE16-NEXT: v_fma_mix_f32_bf16 v1, v3, -1.0, v1 op_sel_hi:[1,1,1]
|
||||
; GFX1250TRUE16-NEXT: v_fma_mix_f32_bf16 v3, v2, -1.0, v0 op_sel:[1,0,1] op_sel_hi:[1,1,1]
|
||||
; GFX1250TRUE16-NEXT: v_fma_mix_f32_bf16 v0, v2, -1.0, v0 op_sel_hi:[1,1,1]
|
||||
; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
|
||||
; GFX1250TRUE16-NEXT: v_cvt_pk_bf16_f32 v1, v1, s0
|
||||
; GFX1250TRUE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, v3
|
||||
@ -14384,9 +14384,9 @@ define <3 x bfloat> @v_fsub_v3bf16(<3 x bfloat> %a, <3 x bfloat> %b) {
|
||||
; GFX1250FAKE16: ; %bb.0:
|
||||
; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250FAKE16-NEXT: v_fma_mix_f32_bf16 v4, v0, 1.0, -v2 op_sel:[1,0,1] op_sel_hi:[1,1,1]
|
||||
; GFX1250FAKE16-NEXT: v_fma_mix_f32_bf16 v0, v0, 1.0, -v2 op_sel_hi:[1,1,1]
|
||||
; GFX1250FAKE16-NEXT: v_fma_mix_f32_bf16 v1, v1, 1.0, -v3 op_sel_hi:[1,1,1]
|
||||
; GFX1250FAKE16-NEXT: v_fma_mix_f32_bf16 v4, v2, -1.0, v0 op_sel:[1,0,1] op_sel_hi:[1,1,1]
|
||||
; GFX1250FAKE16-NEXT: v_fma_mix_f32_bf16 v0, v2, -1.0, v0 op_sel_hi:[1,1,1]
|
||||
; GFX1250FAKE16-NEXT: v_fma_mix_f32_bf16 v1, v3, -1.0, v1 op_sel_hi:[1,1,1]
|
||||
; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
|
||||
; GFX1250FAKE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, v4
|
||||
; GFX1250FAKE16-NEXT: v_cvt_pk_bf16_f32 v1, v1, s0
|
||||
@ -14670,10 +14670,10 @@ define <4 x bfloat> @v_fsub_v4bf16(<4 x bfloat> %a, <4 x bfloat> %b) {
|
||||
; GFX1250: ; %bb.0:
|
||||
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-NEXT: v_fma_mix_f32_bf16 v4, v1, 1.0, -v3 op_sel:[1,0,1] op_sel_hi:[1,1,1]
|
||||
; GFX1250-NEXT: v_fma_mix_f32_bf16 v5, v0, 1.0, -v2 op_sel:[1,0,1] op_sel_hi:[1,1,1]
|
||||
; GFX1250-NEXT: v_fma_mix_f32_bf16 v0, v0, 1.0, -v2 op_sel_hi:[1,1,1]
|
||||
; GFX1250-NEXT: v_fma_mix_f32_bf16 v1, v1, 1.0, -v3 op_sel_hi:[1,1,1]
|
||||
; GFX1250-NEXT: v_fma_mix_f32_bf16 v4, v3, -1.0, v1 op_sel:[1,0,1] op_sel_hi:[1,1,1]
|
||||
; GFX1250-NEXT: v_fma_mix_f32_bf16 v5, v2, -1.0, v0 op_sel:[1,0,1] op_sel_hi:[1,1,1]
|
||||
; GFX1250-NEXT: v_fma_mix_f32_bf16 v0, v2, -1.0, v0 op_sel_hi:[1,1,1]
|
||||
; GFX1250-NEXT: v_fma_mix_f32_bf16 v1, v3, -1.0, v1 op_sel_hi:[1,1,1]
|
||||
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
|
||||
; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, v0, v5
|
||||
; GFX1250-NEXT: v_cvt_pk_bf16_f32 v1, v1, v4
|
||||
@ -31855,7 +31855,7 @@ define bfloat @v_round_bf16(bfloat %a) {
|
||||
; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.h, v0.l
|
||||
; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
||||
; GFX1250TRUE16-NEXT: v_trunc_f32_e32 v2, v1
|
||||
; GFX1250TRUE16-NEXT: v_fma_mix_f32_bf16 v0, v0, 1.0, -v2 op_sel_hi:[1,1,0]
|
||||
; GFX1250TRUE16-NEXT: v_fma_mix_f32_bf16 v0, v2, -1.0, v0 op_sel_hi:[0,1,1]
|
||||
; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
||||
; GFX1250TRUE16-NEXT: v_cmp_ge_f32_e64 s0, |v0|, 0.5
|
||||
; GFX1250TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, s0
|
||||
@ -31873,7 +31873,7 @@ define bfloat @v_round_bf16(bfloat %a) {
|
||||
; GFX1250FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v0
|
||||
; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
||||
; GFX1250FAKE16-NEXT: v_trunc_f32_e32 v2, v1
|
||||
; GFX1250FAKE16-NEXT: v_fma_mix_f32_bf16 v0, v0, 1.0, -v2 op_sel_hi:[1,1,0]
|
||||
; GFX1250FAKE16-NEXT: v_fma_mix_f32_bf16 v0, v2, -1.0, v0 op_sel_hi:[0,1,1]
|
||||
; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
||||
; GFX1250FAKE16-NEXT: v_cmp_ge_f32_e64 s0, |v0|, 0.5
|
||||
; GFX1250FAKE16-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, s0
|
||||
|
||||
@ -624,7 +624,7 @@ define float @fsub_fpext_fmul_f16_to_f32(half %x, half %y, float %z) #0 {
|
||||
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX11-TRUE16-NEXT: v_mul_f16_e32 v0.l, v0.l, v1.l
|
||||
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
; GFX11-TRUE16-NEXT: v_fma_mix_f32 v0, v0, 1.0, -v2 op_sel_hi:[1,1,0]
|
||||
; GFX11-TRUE16-NEXT: v_fma_mix_f32 v0, v2, -1.0, v0 op_sel_hi:[0,1,1]
|
||||
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX11-FAKE16-LABEL: fsub_fpext_fmul_f16_to_f32:
|
||||
@ -632,7 +632,7 @@ define float @fsub_fpext_fmul_f16_to_f32(half %x, half %y, float %z) #0 {
|
||||
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX11-FAKE16-NEXT: v_mul_f16_e32 v0, v0, v1
|
||||
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
; GFX11-FAKE16-NEXT: v_fma_mix_f32 v0, v0, 1.0, -v2 op_sel_hi:[1,1,0]
|
||||
; GFX11-FAKE16-NEXT: v_fma_mix_f32 v0, v2, -1.0, v0 op_sel_hi:[0,1,1]
|
||||
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX9-F32FLUSH-LABEL: fsub_fpext_fmul_f16_to_f32:
|
||||
@ -677,7 +677,7 @@ define float @fsub_fpext_fmul_f16_to_f32_commute(float %x, half %y, half %z) #0
|
||||
; GFX11-F32DENORM-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX11-F32DENORM-TRUE16-NEXT: v_mul_f16_e32 v1.l, v1.l, v2.l
|
||||
; GFX11-F32DENORM-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
; GFX11-F32DENORM-TRUE16-NEXT: v_fma_mix_f32 v0, -v0, 1.0, v1 op_sel_hi:[0,1,1]
|
||||
; GFX11-F32DENORM-TRUE16-NEXT: v_fma_mix_f32 v0, v1, -1.0, v0 op_sel_hi:[1,1,0]
|
||||
; GFX11-F32DENORM-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX11-F32DENORM-FAKE16-LABEL: fsub_fpext_fmul_f16_to_f32_commute:
|
||||
@ -685,7 +685,7 @@ define float @fsub_fpext_fmul_f16_to_f32_commute(float %x, half %y, half %z) #0
|
||||
; GFX11-F32DENORM-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX11-F32DENORM-FAKE16-NEXT: v_mul_f16_e32 v1, v1, v2
|
||||
; GFX11-F32DENORM-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
; GFX11-F32DENORM-FAKE16-NEXT: v_fma_mix_f32 v0, -v0, 1.0, v1 op_sel_hi:[0,1,1]
|
||||
; GFX11-F32DENORM-FAKE16-NEXT: v_fma_mix_f32 v0, v1, -1.0, v0 op_sel_hi:[1,1,0]
|
||||
; GFX11-F32DENORM-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX9-F32FLUSH-LABEL: fsub_fpext_fmul_f16_to_f32_commute:
|
||||
@ -724,7 +724,7 @@ define float @fsub_fpext_fneg_fmul_f16_to_f32(half %x, half %y, float %z) #0 {
|
||||
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX11-TRUE16-NEXT: v_mul_f16_e64 v0.l, v0.l, -v1.l
|
||||
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
; GFX11-TRUE16-NEXT: v_fma_mix_f32 v0, v0, 1.0, -v2 op_sel_hi:[1,1,0]
|
||||
; GFX11-TRUE16-NEXT: v_fma_mix_f32 v0, v2, -1.0, v0 op_sel_hi:[0,1,1]
|
||||
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX11-FAKE16-LABEL: fsub_fpext_fneg_fmul_f16_to_f32:
|
||||
@ -732,7 +732,7 @@ define float @fsub_fpext_fneg_fmul_f16_to_f32(half %x, half %y, float %z) #0 {
|
||||
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX11-FAKE16-NEXT: v_mul_f16_e64 v0, v0, -v1
|
||||
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
; GFX11-FAKE16-NEXT: v_fma_mix_f32 v0, v0, 1.0, -v2 op_sel_hi:[1,1,0]
|
||||
; GFX11-FAKE16-NEXT: v_fma_mix_f32 v0, v2, -1.0, v0 op_sel_hi:[0,1,1]
|
||||
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX9-F32FLUSH-LABEL: fsub_fpext_fneg_fmul_f16_to_f32:
|
||||
@ -772,7 +772,7 @@ define float @fsub_fneg_fpext_fmul_f16_to_f32(half %x, half %y, float %z) #0 {
|
||||
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX11-TRUE16-NEXT: v_mul_f16_e64 v0.l, v0.l, -v1.l
|
||||
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
; GFX11-TRUE16-NEXT: v_fma_mix_f32 v0, v0, 1.0, -v2 op_sel_hi:[1,1,0]
|
||||
; GFX11-TRUE16-NEXT: v_fma_mix_f32 v0, v2, -1.0, v0 op_sel_hi:[0,1,1]
|
||||
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX11-FAKE16-LABEL: fsub_fneg_fpext_fmul_f16_to_f32:
|
||||
@ -780,7 +780,7 @@ define float @fsub_fneg_fpext_fmul_f16_to_f32(half %x, half %y, float %z) #0 {
|
||||
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX11-FAKE16-NEXT: v_mul_f16_e64 v0, v0, -v1
|
||||
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
; GFX11-FAKE16-NEXT: v_fma_mix_f32 v0, v0, 1.0, -v2 op_sel_hi:[1,1,0]
|
||||
; GFX11-FAKE16-NEXT: v_fma_mix_f32 v0, v2, -1.0, v0 op_sel_hi:[0,1,1]
|
||||
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX9-F32FLUSH-LABEL: fsub_fneg_fpext_fmul_f16_to_f32:
|
||||
@ -886,7 +886,7 @@ define float @fsub_fpext_muladd_mul_f16_to_f32(half %x, half %y, float %z, half
|
||||
; GFX11-TRUE16-NEXT: v_mul_f16_e32 v3.l, v3.l, v4.l
|
||||
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
||||
; GFX11-TRUE16-NEXT: v_fmac_f16_e32 v3.l, v0.l, v1.l
|
||||
; GFX11-TRUE16-NEXT: v_fma_mix_f32 v0, v3, 1.0, -v2 op_sel_hi:[1,1,0]
|
||||
; GFX11-TRUE16-NEXT: v_fma_mix_f32 v0, v2, -1.0, v3 op_sel_hi:[0,1,1]
|
||||
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX11-FAKE16-LABEL: fsub_fpext_muladd_mul_f16_to_f32:
|
||||
@ -895,7 +895,7 @@ define float @fsub_fpext_muladd_mul_f16_to_f32(half %x, half %y, float %z, half
|
||||
; GFX11-FAKE16-NEXT: v_mul_f16_e32 v3, v3, v4
|
||||
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
||||
; GFX11-FAKE16-NEXT: v_fmac_f16_e32 v3, v0, v1
|
||||
; GFX11-FAKE16-NEXT: v_fma_mix_f32 v0, v3, 1.0, -v2 op_sel_hi:[1,1,0]
|
||||
; GFX11-FAKE16-NEXT: v_fma_mix_f32 v0, v2, -1.0, v3 op_sel_hi:[0,1,1]
|
||||
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX9-F32FLUSH-LABEL: fsub_fpext_muladd_mul_f16_to_f32:
|
||||
@ -903,7 +903,7 @@ define float @fsub_fpext_muladd_mul_f16_to_f32(half %x, half %y, float %z, half
|
||||
; GFX9-F32FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX9-F32FLUSH-NEXT: v_mul_f16_e32 v3, v3, v4
|
||||
; GFX9-F32FLUSH-NEXT: v_fma_f16 v0, v0, v1, v3
|
||||
; GFX9-F32FLUSH-NEXT: v_mad_mix_f32 v0, v0, 1.0, -v2 op_sel_hi:[1,1,0]
|
||||
; GFX9-F32FLUSH-NEXT: v_mad_mix_f32 v0, v2, -1.0, v0 op_sel_hi:[0,1,1]
|
||||
; GFX9-F32FLUSH-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX9-F32DENORM-LABEL: fsub_fpext_muladd_mul_f16_to_f32:
|
||||
@ -1004,7 +1004,7 @@ define float @fsub_fpext_muladd_mul_f16_to_f32_commute(float %x, half %y, half %
|
||||
; GFX11-TRUE16-NEXT: v_mul_f16_e32 v3.l, v3.l, v4.l
|
||||
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
||||
; GFX11-TRUE16-NEXT: v_fmac_f16_e32 v3.l, v1.l, v2.l
|
||||
; GFX11-TRUE16-NEXT: v_fma_mix_f32 v0, -v0, 1.0, v3 op_sel_hi:[0,1,1]
|
||||
; GFX11-TRUE16-NEXT: v_fma_mix_f32 v0, v3, -1.0, v0 op_sel_hi:[1,1,0]
|
||||
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX11-FAKE16-LABEL: fsub_fpext_muladd_mul_f16_to_f32_commute:
|
||||
@ -1013,7 +1013,7 @@ define float @fsub_fpext_muladd_mul_f16_to_f32_commute(float %x, half %y, half %
|
||||
; GFX11-FAKE16-NEXT: v_mul_f16_e32 v3, v3, v4
|
||||
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
|
||||
; GFX11-FAKE16-NEXT: v_fmac_f16_e32 v3, v1, v2
|
||||
; GFX11-FAKE16-NEXT: v_fma_mix_f32 v0, -v0, 1.0, v3 op_sel_hi:[0,1,1]
|
||||
; GFX11-FAKE16-NEXT: v_fma_mix_f32 v0, v3, -1.0, v0 op_sel_hi:[1,1,0]
|
||||
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX9-F32FLUSH-LABEL: fsub_fpext_muladd_mul_f16_to_f32_commute:
|
||||
@ -1021,7 +1021,7 @@ define float @fsub_fpext_muladd_mul_f16_to_f32_commute(float %x, half %y, half %
|
||||
; GFX9-F32FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX9-F32FLUSH-NEXT: v_mul_f16_e32 v3, v3, v4
|
||||
; GFX9-F32FLUSH-NEXT: v_fma_f16 v1, v1, v2, v3
|
||||
; GFX9-F32FLUSH-NEXT: v_mad_mix_f32 v0, -v0, 1.0, v1 op_sel_hi:[0,1,1]
|
||||
; GFX9-F32FLUSH-NEXT: v_mad_mix_f32 v0, v1, -1.0, v0 op_sel_hi:[1,1,0]
|
||||
; GFX9-F32FLUSH-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX9-F32DENORM-LABEL: fsub_fpext_muladd_mul_f16_to_f32_commute:
|
||||
|
||||
@ -703,7 +703,7 @@ define float @v_mad_mix_f32_negbf16lo_add_bf16lo(bfloat %src0, bfloat %src1) {
|
||||
; GFX1250: ; %bb.0:
|
||||
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-NEXT: v_fma_mix_f32_bf16 v0, v1, 1.0, -v0 op_sel_hi:[1,1,1]
|
||||
; GFX1250-NEXT: v_fma_mix_f32_bf16 v0, v0, -1.0, v1 op_sel_hi:[1,1,1]
|
||||
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
|
||||
%src0.ext = fpext bfloat %src0 to float
|
||||
%src1.ext = fpext bfloat %src1 to float
|
||||
@ -731,7 +731,7 @@ define float @v_mad_mix_f32_negabsbf16lo_add_bf16lo(bfloat %src0, bfloat %src1)
|
||||
; GFX1250: ; %bb.0:
|
||||
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-NEXT: v_fma_mix_f32_bf16 v0, v1, 1.0, -|v0| op_sel_hi:[1,1,1]
|
||||
; GFX1250-NEXT: v_fma_mix_f32_bf16 v0, |v0|, -1.0, v1 op_sel_hi:[1,1,1]
|
||||
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
|
||||
%src0.ext = fpext bfloat %src0 to float
|
||||
%src1.ext = fpext bfloat %src1 to float
|
||||
@ -758,7 +758,7 @@ define float @v_mad_mix_f32_bf16lo_add_negf32(bfloat %src0, float %src1) {
|
||||
; GFX1250: ; %bb.0:
|
||||
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-NEXT: v_fma_mix_f32_bf16 v0, v0, 1.0, -v1 op_sel_hi:[1,1,0]
|
||||
; GFX1250-NEXT: v_fma_mix_f32_bf16 v0, v1, -1.0, v0 op_sel_hi:[0,1,1]
|
||||
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
|
||||
%src0.ext = fpext bfloat %src0 to float
|
||||
%src1.neg = fneg float %src1
|
||||
@ -784,7 +784,7 @@ define float @v_mad_mix_f32_bf16lo_add_negabsf32(bfloat %src0, float %src1) {
|
||||
; GFX1250: ; %bb.0:
|
||||
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-NEXT: v_fma_mix_f32_bf16 v0, v0, 1.0, -|v1| op_sel_hi:[1,1,0]
|
||||
; GFX1250-NEXT: v_fma_mix_f32_bf16 v0, |v1|, -1.0, v0 op_sel_hi:[0,1,1]
|
||||
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
|
||||
%src0.ext = fpext bfloat %src0 to float
|
||||
%src1.abs = call float @llvm.fabs.f32(float %src1)
|
||||
@ -838,7 +838,7 @@ define float @v_mad_mix_f32_negprecvtbf16lo_add_bf16lo(i32 %src0.arg, bfloat %sr
|
||||
; GFX1250: ; %bb.0:
|
||||
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-NEXT: v_fma_mix_f32_bf16 v0, v1, 1.0, -v0 op_sel_hi:[1,1,1]
|
||||
; GFX1250-NEXT: v_fma_mix_f32_bf16 v0, v0, -1.0, v1 op_sel_hi:[1,1,1]
|
||||
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
|
||||
%src0.arg.bc = bitcast i32 %src0.arg to <2 x bfloat>
|
||||
%src0 = extractelement <2 x bfloat> %src0.arg.bc, i32 0
|
||||
@ -870,7 +870,7 @@ define float @v_mad_mix_f32_negabsprecvtbf16lo_add_bf16lo(i32 %src0.arg, bfloat
|
||||
; GFX1250: ; %bb.0:
|
||||
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-NEXT: v_fma_mix_f32_bf16 v0, v1, 1.0, -|v0| op_sel_hi:[1,1,1]
|
||||
; GFX1250-NEXT: v_fma_mix_f32_bf16 v0, |v0|, -1.0, v1 op_sel_hi:[1,1,1]
|
||||
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
|
||||
%src0.arg.bc = bitcast i32 %src0.arg to <2 x bfloat>
|
||||
%src0 = extractelement <2 x bfloat> %src0.arg.bc, i32 0
|
||||
@ -1324,7 +1324,7 @@ define float @v_mad_mix_f32_bf16lo_sub_bf16lo(bfloat %src0, bfloat %src1) {
|
||||
; GFX1250: ; %bb.0:
|
||||
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-NEXT: v_fma_mix_f32_bf16 v0, v0, 1.0, -v1 op_sel_hi:[1,1,1]
|
||||
; GFX1250-NEXT: v_fma_mix_f32_bf16 v0, v1, -1.0, v0 op_sel_hi:[1,1,1]
|
||||
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
|
||||
%src0.ext = fpext bfloat %src0 to float
|
||||
%src1.ext = fpext bfloat %src1 to float
|
||||
@ -1337,7 +1337,7 @@ define float @v_mad_mix_f32_absbf16lo_sub_bf16lo(bfloat %src0, bfloat %src1) {
|
||||
; GFX1250: ; %bb.0:
|
||||
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-NEXT: v_fma_mix_f32_bf16 v0, |v0|, 1.0, -v1 op_sel_hi:[1,1,1]
|
||||
; GFX1250-NEXT: v_fma_mix_f32_bf16 v0, v1, -1.0, |v0| op_sel_hi:[1,1,1]
|
||||
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
|
||||
%src0.ext = fpext bfloat %src0 to float
|
||||
%src1.ext = fpext bfloat %src1 to float
|
||||
@ -1351,7 +1351,7 @@ define float @v_mad_mix_f32_bf16hi_fsub_bf16hi(i32 %src0, i32 %src1) {
|
||||
; GFX1250: ; %bb.0:
|
||||
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-NEXT: v_fma_mix_f32_bf16 v0, v0, 1.0, -v1 op_sel:[1,0,1] op_sel_hi:[1,1,1]
|
||||
; GFX1250-NEXT: v_fma_mix_f32_bf16 v0, v1, -1.0, v0 op_sel:[1,0,1] op_sel_hi:[1,1,1]
|
||||
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
|
||||
%src0.hi = lshr i32 %src0, 16
|
||||
%src1.hi = lshr i32 %src1, 16
|
||||
@ -1370,7 +1370,7 @@ define float @v_mad_mix_f32_absbf16hi_fsub_bf16hi(i32 %src0, i32 %src1) {
|
||||
; GFX1250: ; %bb.0:
|
||||
; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX1250-NEXT: v_fma_mix_f32_bf16 v0, |v0|, 1.0, -v1 op_sel:[1,0,1] op_sel_hi:[1,1,1]
|
||||
; GFX1250-NEXT: v_fma_mix_f32_bf16 v0, v1, -1.0, |v0| op_sel:[1,0,1] op_sel_hi:[1,1,1]
|
||||
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
|
||||
%src0.hi = lshr i32 %src0, 16
|
||||
%src1.hi = lshr i32 %src1, 16
|
||||
|
||||
@ -2946,7 +2946,7 @@ define float @v_mad_mix_f32_negf16lo_add_f16lo(half %src0, half %src1) {
|
||||
; SDAG-GFX1100-LABEL: v_mad_mix_f32_negf16lo_add_f16lo:
|
||||
; SDAG-GFX1100: ; %bb.0:
|
||||
; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; SDAG-GFX1100-NEXT: v_fma_mix_f32 v0, v1, 1.0, -v0 op_sel_hi:[1,1,1]
|
||||
; SDAG-GFX1100-NEXT: v_fma_mix_f32 v0, v0, -1.0, v1 op_sel_hi:[1,1,1]
|
||||
; SDAG-GFX1100-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; SDAG-GFX900-LABEL: v_mad_mix_f32_negf16lo_add_f16lo:
|
||||
@ -2960,7 +2960,7 @@ define float @v_mad_mix_f32_negf16lo_add_f16lo(half %src0, half %src1) {
|
||||
; SDAG-GFX906-LABEL: v_mad_mix_f32_negf16lo_add_f16lo:
|
||||
; SDAG-GFX906: ; %bb.0:
|
||||
; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; SDAG-GFX906-NEXT: v_fma_mix_f32 v0, v1, 1.0, -v0 op_sel_hi:[1,1,1]
|
||||
; SDAG-GFX906-NEXT: v_fma_mix_f32 v0, v0, -1.0, v1 op_sel_hi:[1,1,1]
|
||||
; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; SDAG-GFX9GEN-LABEL: v_mad_mix_f32_negf16lo_add_f16lo:
|
||||
@ -3100,7 +3100,7 @@ define float @v_mad_mix_f32_negabsf16lo_add_f16lo(half %src0, half %src1) {
|
||||
; GFX1100-LABEL: v_mad_mix_f32_negabsf16lo_add_f16lo:
|
||||
; GFX1100: ; %bb.0:
|
||||
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX1100-NEXT: v_fma_mix_f32 v0, v1, 1.0, -|v0| op_sel_hi:[1,1,1]
|
||||
; GFX1100-NEXT: v_fma_mix_f32 v0, |v0|, -1.0, v1 op_sel_hi:[1,1,1]
|
||||
; GFX1100-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX900-LABEL: v_mad_mix_f32_negabsf16lo_add_f16lo:
|
||||
@ -3114,7 +3114,7 @@ define float @v_mad_mix_f32_negabsf16lo_add_f16lo(half %src0, half %src1) {
|
||||
; GFX906-LABEL: v_mad_mix_f32_negabsf16lo_add_f16lo:
|
||||
; GFX906: ; %bb.0:
|
||||
; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX906-NEXT: v_fma_mix_f32 v0, v1, 1.0, -|v0| op_sel_hi:[1,1,1]
|
||||
; GFX906-NEXT: v_fma_mix_f32 v0, |v0|, -1.0, v1 op_sel_hi:[1,1,1]
|
||||
; GFX906-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX9GEN-LABEL: v_mad_mix_f32_negabsf16lo_add_f16lo:
|
||||
@ -3205,7 +3205,7 @@ define float @v_mad_mix_f32_f16lo_add_negf32(half %src0, float %src1) {
|
||||
; GFX1100-LABEL: v_mad_mix_f32_f16lo_add_negf32:
|
||||
; GFX1100: ; %bb.0:
|
||||
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX1100-NEXT: v_fma_mix_f32 v0, v0, 1.0, -v1 op_sel_hi:[1,1,0]
|
||||
; GFX1100-NEXT: v_fma_mix_f32 v0, v1, -1.0, v0 op_sel_hi:[0,1,1]
|
||||
; GFX1100-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX900-LABEL: v_mad_mix_f32_f16lo_add_negf32:
|
||||
@ -3218,7 +3218,7 @@ define float @v_mad_mix_f32_f16lo_add_negf32(half %src0, float %src1) {
|
||||
; GFX906-LABEL: v_mad_mix_f32_f16lo_add_negf32:
|
||||
; GFX906: ; %bb.0:
|
||||
; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX906-NEXT: v_fma_mix_f32 v0, v0, 1.0, -v1 op_sel_hi:[1,1,0]
|
||||
; GFX906-NEXT: v_fma_mix_f32 v0, v1, -1.0, v0 op_sel_hi:[0,1,1]
|
||||
; GFX906-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX9GEN-LABEL: v_mad_mix_f32_f16lo_add_negf32:
|
||||
@ -3297,7 +3297,7 @@ define float @v_mad_mix_f32_f16lo_add_negabsf32(half %src0, float %src1) {
|
||||
; GFX1100-LABEL: v_mad_mix_f32_f16lo_add_negabsf32:
|
||||
; GFX1100: ; %bb.0:
|
||||
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX1100-NEXT: v_fma_mix_f32 v0, v0, 1.0, -|v1| op_sel_hi:[1,1,0]
|
||||
; GFX1100-NEXT: v_fma_mix_f32 v0, |v1|, -1.0, v0 op_sel_hi:[0,1,1]
|
||||
; GFX1100-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX900-LABEL: v_mad_mix_f32_f16lo_add_negabsf32:
|
||||
@ -3310,7 +3310,7 @@ define float @v_mad_mix_f32_f16lo_add_negabsf32(half %src0, float %src1) {
|
||||
; GFX906-LABEL: v_mad_mix_f32_f16lo_add_negabsf32:
|
||||
; GFX906: ; %bb.0:
|
||||
; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX906-NEXT: v_fma_mix_f32 v0, v0, 1.0, -|v1| op_sel_hi:[1,1,0]
|
||||
; GFX906-NEXT: v_fma_mix_f32 v0, |v1|, -1.0, v0 op_sel_hi:[0,1,1]
|
||||
; GFX906-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX9GEN-LABEL: v_mad_mix_f32_f16lo_add_negabsf32:
|
||||
@ -3491,7 +3491,7 @@ define float @v_mad_mix_f32_negprecvtf16lo_add_f16lo(i32 %src0.arg, half %src1)
|
||||
; SDAG-GFX1100-LABEL: v_mad_mix_f32_negprecvtf16lo_add_f16lo:
|
||||
; SDAG-GFX1100: ; %bb.0:
|
||||
; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; SDAG-GFX1100-NEXT: v_fma_mix_f32 v0, v1, 1.0, -v0 op_sel_hi:[1,1,1]
|
||||
; SDAG-GFX1100-NEXT: v_fma_mix_f32 v0, v0, -1.0, v1 op_sel_hi:[1,1,1]
|
||||
; SDAG-GFX1100-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; SDAG-GFX900-LABEL: v_mad_mix_f32_negprecvtf16lo_add_f16lo:
|
||||
@ -3505,7 +3505,7 @@ define float @v_mad_mix_f32_negprecvtf16lo_add_f16lo(i32 %src0.arg, half %src1)
|
||||
; SDAG-GFX906-LABEL: v_mad_mix_f32_negprecvtf16lo_add_f16lo:
|
||||
; SDAG-GFX906: ; %bb.0:
|
||||
; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; SDAG-GFX906-NEXT: v_fma_mix_f32 v0, v1, 1.0, -v0 op_sel_hi:[1,1,1]
|
||||
; SDAG-GFX906-NEXT: v_fma_mix_f32 v0, v0, -1.0, v1 op_sel_hi:[1,1,1]
|
||||
; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; SDAG-GFX9GEN-LABEL: v_mad_mix_f32_negprecvtf16lo_add_f16lo:
|
||||
@ -3641,7 +3641,7 @@ define float @v_mad_mix_f32_negabsprecvtf16lo_add_f16lo(i32 %src0.arg, half %src
|
||||
; SDAG-GFX1100-LABEL: v_mad_mix_f32_negabsprecvtf16lo_add_f16lo:
|
||||
; SDAG-GFX1100: ; %bb.0:
|
||||
; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; SDAG-GFX1100-NEXT: v_fma_mix_f32 v0, v1, 1.0, -|v0| op_sel_hi:[1,1,1]
|
||||
; SDAG-GFX1100-NEXT: v_fma_mix_f32 v0, |v0|, -1.0, v1 op_sel_hi:[1,1,1]
|
||||
; SDAG-GFX1100-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; SDAG-GFX900-LABEL: v_mad_mix_f32_negabsprecvtf16lo_add_f16lo:
|
||||
@ -3655,7 +3655,7 @@ define float @v_mad_mix_f32_negabsprecvtf16lo_add_f16lo(i32 %src0.arg, half %src
|
||||
; SDAG-GFX906-LABEL: v_mad_mix_f32_negabsprecvtf16lo_add_f16lo:
|
||||
; SDAG-GFX906: ; %bb.0:
|
||||
; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; SDAG-GFX906-NEXT: v_fma_mix_f32 v0, v1, 1.0, -|v0| op_sel_hi:[1,1,1]
|
||||
; SDAG-GFX906-NEXT: v_fma_mix_f32 v0, |v0|, -1.0, v1 op_sel_hi:[1,1,1]
|
||||
; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; SDAG-GFX9GEN-LABEL: v_mad_mix_f32_negabsprecvtf16lo_add_f16lo:
|
||||
@ -3880,7 +3880,7 @@ define float @v_mad_mix_f32_preextractfneg_f16hi_add_f16lo(i32 %src0.arg, half %
|
||||
; SDAG-GFX1100-LABEL: v_mad_mix_f32_preextractfneg_f16hi_add_f16lo:
|
||||
; SDAG-GFX1100: ; %bb.0:
|
||||
; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; SDAG-GFX1100-NEXT: v_fma_mix_f32 v0, v1, 1.0, -v0 op_sel:[0,0,1] op_sel_hi:[1,1,1]
|
||||
; SDAG-GFX1100-NEXT: v_fma_mix_f32 v0, v0, -1.0, v1 op_sel:[1,0,0] op_sel_hi:[1,1,1]
|
||||
; SDAG-GFX1100-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; SDAG-GFX900-LABEL: v_mad_mix_f32_preextractfneg_f16hi_add_f16lo:
|
||||
@ -3894,7 +3894,7 @@ define float @v_mad_mix_f32_preextractfneg_f16hi_add_f16lo(i32 %src0.arg, half %
|
||||
; SDAG-GFX906-LABEL: v_mad_mix_f32_preextractfneg_f16hi_add_f16lo:
|
||||
; SDAG-GFX906: ; %bb.0:
|
||||
; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; SDAG-GFX906-NEXT: v_fma_mix_f32 v0, v1, 1.0, -v0 op_sel:[0,0,1] op_sel_hi:[1,1,1]
|
||||
; SDAG-GFX906-NEXT: v_fma_mix_f32 v0, v0, -1.0, v1 op_sel:[1,0,0] op_sel_hi:[1,1,1]
|
||||
; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; SDAG-GFX9GEN-LABEL: v_mad_mix_f32_preextractfneg_f16hi_add_f16lo:
|
||||
@ -4074,7 +4074,7 @@ define float @v_mad_mix_f32_preextractfabsfneg_f16hi_add_f16lo(i32 %src0.arg, ha
|
||||
; SDAG-GFX1100-LABEL: v_mad_mix_f32_preextractfabsfneg_f16hi_add_f16lo:
|
||||
; SDAG-GFX1100: ; %bb.0:
|
||||
; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; SDAG-GFX1100-NEXT: v_fma_mix_f32 v0, v1, 1.0, -|v0| op_sel:[0,0,1] op_sel_hi:[1,1,1]
|
||||
; SDAG-GFX1100-NEXT: v_fma_mix_f32 v0, |v0|, -1.0, v1 op_sel:[1,0,0] op_sel_hi:[1,1,1]
|
||||
; SDAG-GFX1100-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; SDAG-GFX900-LABEL: v_mad_mix_f32_preextractfabsfneg_f16hi_add_f16lo:
|
||||
@ -4088,7 +4088,7 @@ define float @v_mad_mix_f32_preextractfabsfneg_f16hi_add_f16lo(i32 %src0.arg, ha
|
||||
; SDAG-GFX906-LABEL: v_mad_mix_f32_preextractfabsfneg_f16hi_add_f16lo:
|
||||
; SDAG-GFX906: ; %bb.0:
|
||||
; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; SDAG-GFX906-NEXT: v_fma_mix_f32 v0, v1, 1.0, -|v0| op_sel:[0,0,1] op_sel_hi:[1,1,1]
|
||||
; SDAG-GFX906-NEXT: v_fma_mix_f32 v0, |v0|, -1.0, v1 op_sel:[1,0,0] op_sel_hi:[1,1,1]
|
||||
; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; SDAG-GFX9GEN-LABEL: v_mad_mix_f32_preextractfabsfneg_f16hi_add_f16lo:
|
||||
@ -5658,7 +5658,7 @@ define float @v_mad_mix_f32_f16lo_sub_f16lo(half %src0, half %src1) {
|
||||
; GFX1100-LABEL: v_mad_mix_f32_f16lo_sub_f16lo:
|
||||
; GFX1100: ; %bb.0:
|
||||
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX1100-NEXT: v_fma_mix_f32 v0, v0, 1.0, -v1 op_sel_hi:[1,1,1]
|
||||
; GFX1100-NEXT: v_fma_mix_f32 v0, v1, -1.0, v0 op_sel_hi:[1,1,1]
|
||||
; GFX1100-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX900-LABEL: v_mad_mix_f32_f16lo_sub_f16lo:
|
||||
@ -5672,7 +5672,7 @@ define float @v_mad_mix_f32_f16lo_sub_f16lo(half %src0, half %src1) {
|
||||
; GFX906-LABEL: v_mad_mix_f32_f16lo_sub_f16lo:
|
||||
; GFX906: ; %bb.0:
|
||||
; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX906-NEXT: v_fma_mix_f32 v0, v0, 1.0, -v1 op_sel_hi:[1,1,1]
|
||||
; GFX906-NEXT: v_fma_mix_f32 v0, v1, -1.0, v0 op_sel_hi:[1,1,1]
|
||||
; GFX906-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX9GEN-LABEL: v_mad_mix_f32_f16lo_sub_f16lo:
|
||||
@ -5708,7 +5708,7 @@ define float @v_mad_mix_f32_absf16lo_sub_f16lo(half %src0, half %src1) {
|
||||
; GFX1100-LABEL: v_mad_mix_f32_absf16lo_sub_f16lo:
|
||||
; GFX1100: ; %bb.0:
|
||||
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX1100-NEXT: v_fma_mix_f32 v0, |v0|, 1.0, -v1 op_sel_hi:[1,1,1]
|
||||
; GFX1100-NEXT: v_fma_mix_f32 v0, v1, -1.0, |v0| op_sel_hi:[1,1,1]
|
||||
; GFX1100-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX900-LABEL: v_mad_mix_f32_absf16lo_sub_f16lo:
|
||||
@ -5722,7 +5722,7 @@ define float @v_mad_mix_f32_absf16lo_sub_f16lo(half %src0, half %src1) {
|
||||
; GFX906-LABEL: v_mad_mix_f32_absf16lo_sub_f16lo:
|
||||
; GFX906: ; %bb.0:
|
||||
; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX906-NEXT: v_fma_mix_f32 v0, |v0|, 1.0, -v1 op_sel_hi:[1,1,1]
|
||||
; GFX906-NEXT: v_fma_mix_f32 v0, v1, -1.0, |v0| op_sel_hi:[1,1,1]
|
||||
; GFX906-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX9GEN-LABEL: v_mad_mix_f32_absf16lo_sub_f16lo:
|
||||
@ -5767,7 +5767,7 @@ define float @v_mad_mix_f32_f16hi_fsub_f16hi(i32 %src0, i32 %src1) {
|
||||
; GFX1100-LABEL: v_mad_mix_f32_f16hi_fsub_f16hi:
|
||||
; GFX1100: ; %bb.0:
|
||||
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX1100-NEXT: v_fma_mix_f32 v0, v0, 1.0, -v1 op_sel:[1,0,1] op_sel_hi:[1,1,1]
|
||||
; GFX1100-NEXT: v_fma_mix_f32 v0, v1, -1.0, v0 op_sel:[1,0,1] op_sel_hi:[1,1,1]
|
||||
; GFX1100-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX900-LABEL: v_mad_mix_f32_f16hi_fsub_f16hi:
|
||||
@ -5781,7 +5781,7 @@ define float @v_mad_mix_f32_f16hi_fsub_f16hi(i32 %src0, i32 %src1) {
|
||||
; GFX906-LABEL: v_mad_mix_f32_f16hi_fsub_f16hi:
|
||||
; GFX906: ; %bb.0:
|
||||
; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX906-NEXT: v_fma_mix_f32 v0, v0, 1.0, -v1 op_sel:[1,0,1] op_sel_hi:[1,1,1]
|
||||
; GFX906-NEXT: v_fma_mix_f32 v0, v1, -1.0, v0 op_sel:[1,0,1] op_sel_hi:[1,1,1]
|
||||
; GFX906-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX9GEN-LABEL: v_mad_mix_f32_f16hi_fsub_f16hi:
|
||||
@ -5825,7 +5825,7 @@ define float @v_mad_mix_f32_absf16hi_fsub_f16hi(i32 %src0, i32 %src1) {
|
||||
; GFX1100-LABEL: v_mad_mix_f32_absf16hi_fsub_f16hi:
|
||||
; GFX1100: ; %bb.0:
|
||||
; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX1100-NEXT: v_fma_mix_f32 v0, |v0|, 1.0, -v1 op_sel:[1,0,1] op_sel_hi:[1,1,1]
|
||||
; GFX1100-NEXT: v_fma_mix_f32 v0, v1, -1.0, |v0| op_sel:[1,0,1] op_sel_hi:[1,1,1]
|
||||
; GFX1100-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX900-LABEL: v_mad_mix_f32_absf16hi_fsub_f16hi:
|
||||
@ -5839,7 +5839,7 @@ define float @v_mad_mix_f32_absf16hi_fsub_f16hi(i32 %src0, i32 %src1) {
|
||||
; GFX906-LABEL: v_mad_mix_f32_absf16hi_fsub_f16hi:
|
||||
; GFX906: ; %bb.0:
|
||||
; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX906-NEXT: v_fma_mix_f32 v0, |v0|, 1.0, -v1 op_sel:[1,0,1] op_sel_hi:[1,1,1]
|
||||
; GFX906-NEXT: v_fma_mix_f32 v0, v1, -1.0, |v0| op_sel:[1,0,1] op_sel_hi:[1,1,1]
|
||||
; GFX906-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX9GEN-LABEL: v_mad_mix_f32_absf16hi_fsub_f16hi:
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user