From 4a6697f3931665e9bf72c7ce7c86f0bc92a5bbe3 Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Fri, 6 Feb 2026 14:39:13 +0000 Subject: [PATCH] [AMDGPU] Fix and simplify patterns selecting fsub to v_fma_mix_f32 (#180169) Select (fsub x, y) -> (fma y, -1.0, x). Using -1.0 as the constant avoids the need for ComplexPatterns to negate x or y. This also fixes the bad pattern (fsub x, y) -> (fma -x, 1.0, y). --- llvm/lib/Target/AMDGPU/AMDGPUGISel.td | 4 -- llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 18 ------- llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h | 5 -- .../AMDGPU/AMDGPUInstructionSelector.cpp | 15 ------ .../Target/AMDGPU/AMDGPUInstructionSelector.h | 1 - llvm/lib/Target/AMDGPU/AMDGPUInstructions.td | 1 + llvm/lib/Target/AMDGPU/SIInstrInfo.td | 2 - llvm/lib/Target/AMDGPU/VOP3PInstructions.td | 15 ++++-- .../GlobalISel/combine-fma-sub-ext-mul.ll | 16 +++---- .../GlobalISel/combine-fma-sub-ext-neg-mul.ll | 32 ++++++------- llvm/test/CodeGen/AMDGPU/bf16.ll | 26 +++++----- llvm/test/CodeGen/AMDGPU/fpext-free.ll | 28 +++++------ llvm/test/CodeGen/AMDGPU/mad-mix-bf16.ll | 20 ++++---- llvm/test/CodeGen/AMDGPU/mad-mix.ll | 48 +++++++++---------- 14 files changed, 96 insertions(+), 135 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td index 76c1be8690e2..2781618a1707 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td @@ -205,10 +205,6 @@ def gi_vop3_mad_mix_mods_ext : GIComplexOperandMatcher, GIComplexPatternEquiv; -def gi_vop3_mad_mix_mods_neg : - GIComplexOperandMatcher, - GIComplexPatternEquiv; - // Separate load nodes are defined to glue m0 initialization in // SelectionDAG. The GISel selector can just insert m0 initialization // directly before selecting a glue-less load, so hide this diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index 6a35661c9e61..238f06fbd33c 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -4204,24 +4204,6 @@ bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixBF16Mods(SDValue In, SDValue &Src, return true; } -bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsNeg(SDValue In, SDValue &Src, - SDValue &SrcMods) const { - unsigned Mods = 0; - SelectVOP3PMadMixModsImpl(In, Src, Mods, MVT::f16); - Mods ^= SISrcMods::NEG; - SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); - return true; -} - -bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixBF16ModsNeg(SDValue In, SDValue &Src, - SDValue &SrcMods) const { - unsigned Mods = 0; - SelectVOP3PMadMixModsImpl(In, Src, Mods, MVT::bf16); - Mods ^= SISrcMods::NEG; - SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); - return true; -} - // Match BITOP3 operation and return a number of matched instructions plus // truth table. static std::pair BitOp3_Op(SDValue In, diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h index e8873666b3ed..a86b75458923 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h @@ -260,11 +260,6 @@ private: bool SelectVOP3PMadMixBF16Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const; - bool SelectVOP3PMadMixModsNeg(SDValue In, SDValue &Src, - SDValue &SrcMods) const; - bool SelectVOP3PMadMixBF16ModsNeg(SDValue In, SDValue &Src, - SDValue &SrcMods) const; - bool SelectBITOP3(SDValue In, SDValue &Src0, SDValue &Src1, SDValue &Src2, SDValue &Tbl) const; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index 690b2c92c371..b96c2ef70dd8 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -6930,21 +6930,6 @@ AMDGPUInstructionSelector::selectVOP3PMadMixMods(MachineOperand &Root) const { }}; } -InstructionSelector::ComplexRendererFns -AMDGPUInstructionSelector::selectVOP3PMadMixModsNeg( - MachineOperand &Root) const { - Register Src; - unsigned Mods; - bool Matched; - std::tie(Src, Mods) = selectVOP3PMadMixModsImpl(Root, Matched); - Mods ^= SISrcMods::NEG; - - return {{ - [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); }, - [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); } // src_mods - }}; -} - bool AMDGPUInstructionSelector::selectSBarrierSignalIsfirst( MachineInstr &I, Intrinsic::ID IntrID) const { MachineBasicBlock *MBB = I.getParent(); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h index f8a30a03c524..627cce277ae3 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h @@ -343,7 +343,6 @@ private: bool &Matched) const; ComplexRendererFns selectVOP3PMadMixModsExt(MachineOperand &Root) const; ComplexRendererFns selectVOP3PMadMixMods(MachineOperand &Root) const; - ComplexRendererFns selectVOP3PMadMixModsNeg(MachineOperand &Root) const; void renderTruncImm32(MachineInstrBuilder &MIB, const MachineInstr &MI, int OpIdx = -1) const; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td index 022bc65863b7..f77b4c9d9642 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td @@ -744,6 +744,7 @@ int FP32_NEG_ONE = 0xbf800000; int FP64_ONE = 0x3ff0000000000000; int FP64_NEG_ONE = 0xbff0000000000000; int BF16_ONE = 0x3F80; +int BF16_NEG_ONE = 0xBF80; } def CONST : Constants; diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index 3cc0008334a3..69960d62f51a 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -1712,8 +1712,6 @@ def VOP3PMadMixModsExt : ComplexPattern; def VOP3PMadMixMods : ComplexPattern; def VOP3PMadMixBF16ModsExt : ComplexPattern; def VOP3PMadMixBF16Mods : ComplexPattern; -def VOP3PMadMixModsNeg : ComplexPattern; -def VOP3PMadMixBF16ModsNeg : ComplexPattern; def VINTERPMods : ComplexPattern; def VINTERPModsHi : ComplexPattern; diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td index 739448caee87..6f9113e3bcf6 100644 --- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td @@ -182,8 +182,8 @@ multiclass MadFmaMixFP32Pats { defvar VOP3PMadMixModsPat = !if (!eq(VT, bf16), VOP3PMadMixBF16Mods, VOP3PMadMixMods); defvar VOP3PMadMixModsExtPat = !if (!eq(VT, bf16), VOP3PMadMixBF16ModsExt, VOP3PMadMixModsExt); - defvar VOP3PMadMixModsNegPat = !if (!eq(VT, bf16), VOP3PMadMixBF16ModsNeg, VOP3PMadMixModsNeg); defvar OneImm = !if (!eq(VT, bf16), CONST.BF16_ONE, CONST.FP16_ONE); + defvar NegOneImm = !if (!eq(VT, bf16), CONST.BF16_NEG_ONE, CONST.FP16_NEG_ONE); // At least one of the operands needs to be an fpextend of an f16 // for this to be worthwhile, so we need three patterns here. // TODO: Could we use a predicate to inspect src1/2/3 instead? @@ -206,28 +206,33 @@ multiclass MadFmaMixFP32Pats; + // (fadd x, y) -> (fma x, 1.0, y) def : GCNPat < (f32 (fadd (f32 (VOP3PMadMixModsExtPat VT:$src0, i32:$src0_mods)), (f32 (VOP3PMadMixModsPat f32:$src1, i32:$src1_mods)))), (mix_inst $src0_mods, $src0, (i32 8), (i32 OneImm), $src1_mods, $src1, DSTCLAMP.NONE)>; + // (fmul x, y) -> (fma x, y, 0.0) + // FIXME: This is only valid with nsz. def : GCNPat < (f32 (fmul (f32 (VOP3PMadMixModsExtPat VT:$src0, i32:$src0_mods)), (f32 (VOP3PMadMixModsPat f32:$src1, i32:$src1_mods)))), (mix_inst $src0_mods, $src0, $src1_mods, $src1, (i32 0), (i32 0), DSTCLAMP.NONE)>; + // (fsub x, y) -> (fma y, -1.0, x) def : GCNPat < (f32 (fsub (f32 (VOP3PMadMixModsExtPat VT:$src0, i32:$src0_mods)), - (f32 (VOP3PMadMixModsNegPat f32:$src1, i32:$src1_mods)))), - (mix_inst $src0_mods, $src0, (i32 8), (i32 OneImm), $src1_mods, $src1, + (f32 (VOP3PMadMixModsPat f32:$src1, i32:$src1_mods)))), + (mix_inst $src1_mods, $src1, (i32 8), (i32 NegOneImm), $src0_mods, $src0, DSTCLAMP.NONE)>; + // (fsub x, y) -> (fma y, -1.0, x) def : GCNPat < - (f32 (fsub (f32 (VOP3PMadMixModsNegPat f32:$src0, i32:$src0_mods)), + (f32 (fsub (f32 (VOP3PMadMixModsPat f32:$src0, i32:$src0_mods)), (f32 (VOP3PMadMixModsExtPat VT:$src1, i32:$src1_mods)))), - (mix_inst $src0_mods, $src0, (i32 8), (i32 OneImm), $src1_mods, $src1, + (mix_inst $src1_mods, $src1, (i32 8), (i32 NegOneImm), $src0_mods, $src0, DSTCLAMP.NONE)>; } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-sub-ext-mul.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-sub-ext-mul.ll index e424caeebc4a..aa6ce031ff93 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-sub-ext-mul.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-sub-ext-mul.ll @@ -44,10 +44,10 @@ define amdgpu_vs <4 x float> @test_v4f16_to_v4f32_sub_ext_mul(<4 x half> %x, <4 ; GFX9-DENORM: ; %bb.0: ; %entry ; GFX9-DENORM-NEXT: v_pk_mul_f16 v2, v0, v2 ; GFX9-DENORM-NEXT: v_pk_mul_f16 v3, v1, v3 -; GFX9-DENORM-NEXT: v_mad_mix_f32 v0, v2, 1.0, -v4 op_sel_hi:[1,1,0] -; GFX9-DENORM-NEXT: v_mad_mix_f32 v1, v2, 1.0, -v5 op_sel:[1,0,0] op_sel_hi:[1,1,0] -; GFX9-DENORM-NEXT: v_mad_mix_f32 v2, v3, 1.0, -v6 op_sel_hi:[1,1,0] -; GFX9-DENORM-NEXT: v_mad_mix_f32 v3, v3, 1.0, -v7 op_sel:[1,0,0] op_sel_hi:[1,1,0] +; GFX9-DENORM-NEXT: v_mad_mix_f32 v0, v4, -1.0, v2 op_sel_hi:[0,1,1] +; GFX9-DENORM-NEXT: v_mad_mix_f32 v1, v5, -1.0, v2 op_sel:[0,0,1] op_sel_hi:[0,1,1] +; GFX9-DENORM-NEXT: v_mad_mix_f32 v2, v6, -1.0, v3 op_sel_hi:[0,1,1] +; GFX9-DENORM-NEXT: v_mad_mix_f32 v3, v7, -1.0, v3 op_sel:[0,0,1] op_sel_hi:[0,1,1] ; GFX9-DENORM-NEXT: ; return to shader part epilog ; ; GFX10-DENORM-LABEL: test_v4f16_to_v4f32_sub_ext_mul: @@ -72,10 +72,10 @@ define amdgpu_vs <4 x float> @test_v4f16_to_v4f32_sub_ext_mul_rhs(<4 x float> %x ; GFX9-DENORM: ; %bb.0: ; %.entry ; GFX9-DENORM-NEXT: v_pk_mul_f16 v4, v4, v6 ; GFX9-DENORM-NEXT: v_pk_mul_f16 v5, v5, v7 -; GFX9-DENORM-NEXT: v_mad_mix_f32 v0, -v0, 1.0, v4 op_sel_hi:[0,1,1] -; GFX9-DENORM-NEXT: v_mad_mix_f32 v1, -v1, 1.0, v4 op_sel:[0,0,1] op_sel_hi:[0,1,1] -; GFX9-DENORM-NEXT: v_mad_mix_f32 v2, -v2, 1.0, v5 op_sel_hi:[0,1,1] -; GFX9-DENORM-NEXT: v_mad_mix_f32 v3, -v3, 1.0, v5 op_sel:[0,0,1] op_sel_hi:[0,1,1] +; GFX9-DENORM-NEXT: v_mad_mix_f32 v0, v4, -1.0, v0 op_sel_hi:[1,1,0] +; GFX9-DENORM-NEXT: v_mad_mix_f32 v1, v4, -1.0, v1 op_sel:[1,0,0] op_sel_hi:[1,1,0] +; GFX9-DENORM-NEXT: v_mad_mix_f32 v2, v5, -1.0, v2 op_sel_hi:[1,1,0] +; GFX9-DENORM-NEXT: v_mad_mix_f32 v3, v5, -1.0, v3 op_sel:[1,0,0] op_sel_hi:[1,1,0] ; GFX9-DENORM-NEXT: ; return to shader part epilog ; ; GFX10-DENORM-LABEL: test_v4f16_to_v4f32_sub_ext_mul_rhs: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-sub-ext-neg-mul.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-sub-ext-neg-mul.ll index 62f34733df5f..8febf4a53589 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-sub-ext-neg-mul.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-sub-ext-neg-mul.ll @@ -85,10 +85,10 @@ define amdgpu_vs <4 x float> @test_v4f16_to_v4f32_sub_ext_neg_mul(<4 x half> %x, ; GFX9-DENORM: ; %bb.0: ; %entry ; GFX9-DENORM-NEXT: v_pk_mul_f16 v2, v0, v2 neg_lo:[0,1] neg_hi:[0,1] ; GFX9-DENORM-NEXT: v_pk_mul_f16 v3, v1, v3 neg_lo:[0,1] neg_hi:[0,1] -; GFX9-DENORM-NEXT: v_mad_mix_f32 v0, v2, 1.0, -v4 op_sel_hi:[1,1,0] -; GFX9-DENORM-NEXT: v_mad_mix_f32 v1, v2, 1.0, -v5 op_sel:[1,0,0] op_sel_hi:[1,1,0] -; GFX9-DENORM-NEXT: v_mad_mix_f32 v2, v3, 1.0, -v6 op_sel_hi:[1,1,0] -; GFX9-DENORM-NEXT: v_mad_mix_f32 v3, v3, 1.0, -v7 op_sel:[1,0,0] op_sel_hi:[1,1,0] +; GFX9-DENORM-NEXT: v_mad_mix_f32 v0, v4, -1.0, v2 op_sel_hi:[0,1,1] +; GFX9-DENORM-NEXT: v_mad_mix_f32 v1, v5, -1.0, v2 op_sel:[0,0,1] op_sel_hi:[0,1,1] +; GFX9-DENORM-NEXT: v_mad_mix_f32 v2, v6, -1.0, v3 op_sel_hi:[0,1,1] +; GFX9-DENORM-NEXT: v_mad_mix_f32 v3, v7, -1.0, v3 op_sel:[0,0,1] op_sel_hi:[0,1,1] ; GFX9-DENORM-NEXT: ; return to shader part epilog ; ; GFX10-DENORM-LABEL: test_v4f16_to_v4f32_sub_ext_neg_mul: @@ -115,10 +115,10 @@ define amdgpu_vs <4 x float> @test_v4f16_to_v4f32_sub_neg_ext_mul(<4 x half> %x, ; GFX9-DENORM: ; %bb.0: ; %entry ; GFX9-DENORM-NEXT: v_pk_mul_f16 v2, v0, v2 neg_lo:[0,1] neg_hi:[0,1] ; GFX9-DENORM-NEXT: v_pk_mul_f16 v3, v1, v3 neg_lo:[0,1] neg_hi:[0,1] -; GFX9-DENORM-NEXT: v_mad_mix_f32 v0, v2, 1.0, -v4 op_sel_hi:[1,1,0] -; GFX9-DENORM-NEXT: v_mad_mix_f32 v1, v2, 1.0, -v5 op_sel:[1,0,0] op_sel_hi:[1,1,0] -; GFX9-DENORM-NEXT: v_mad_mix_f32 v2, v3, 1.0, -v6 op_sel_hi:[1,1,0] -; GFX9-DENORM-NEXT: v_mad_mix_f32 v3, v3, 1.0, -v7 op_sel:[1,0,0] op_sel_hi:[1,1,0] +; GFX9-DENORM-NEXT: v_mad_mix_f32 v0, v4, -1.0, v2 op_sel_hi:[0,1,1] +; GFX9-DENORM-NEXT: v_mad_mix_f32 v1, v5, -1.0, v2 op_sel:[0,0,1] op_sel_hi:[0,1,1] +; GFX9-DENORM-NEXT: v_mad_mix_f32 v2, v6, -1.0, v3 op_sel_hi:[0,1,1] +; GFX9-DENORM-NEXT: v_mad_mix_f32 v3, v7, -1.0, v3 op_sel:[0,0,1] op_sel_hi:[0,1,1] ; GFX9-DENORM-NEXT: ; return to shader part epilog ; ; GFX10-DENORM-LABEL: test_v4f16_to_v4f32_sub_neg_ext_mul: @@ -146,10 +146,10 @@ define amdgpu_vs <4 x float> @test_v4f16_to_v4f32_sub_ext_neg_mul2(<4 x float> % ; GFX9-DENORM: ; %bb.0: ; %entry ; GFX9-DENORM-NEXT: v_pk_mul_f16 v4, v4, v6 neg_lo:[0,1] neg_hi:[0,1] ; GFX9-DENORM-NEXT: v_pk_mul_f16 v5, v5, v7 neg_lo:[0,1] neg_hi:[0,1] -; GFX9-DENORM-NEXT: v_mad_mix_f32 v0, -v0, 1.0, v4 op_sel_hi:[0,1,1] -; GFX9-DENORM-NEXT: v_mad_mix_f32 v1, -v1, 1.0, v4 op_sel:[0,0,1] op_sel_hi:[0,1,1] -; GFX9-DENORM-NEXT: v_mad_mix_f32 v2, -v2, 1.0, v5 op_sel_hi:[0,1,1] -; GFX9-DENORM-NEXT: v_mad_mix_f32 v3, -v3, 1.0, v5 op_sel:[0,0,1] op_sel_hi:[0,1,1] +; GFX9-DENORM-NEXT: v_mad_mix_f32 v0, v4, -1.0, v0 op_sel_hi:[1,1,0] +; GFX9-DENORM-NEXT: v_mad_mix_f32 v1, v4, -1.0, v1 op_sel:[1,0,0] op_sel_hi:[1,1,0] +; GFX9-DENORM-NEXT: v_mad_mix_f32 v2, v5, -1.0, v2 op_sel_hi:[1,1,0] +; GFX9-DENORM-NEXT: v_mad_mix_f32 v3, v5, -1.0, v3 op_sel:[1,0,0] op_sel_hi:[1,1,0] ; GFX9-DENORM-NEXT: ; return to shader part epilog ; ; GFX10-DENORM-LABEL: test_v4f16_to_v4f32_sub_ext_neg_mul2: @@ -175,10 +175,10 @@ define amdgpu_vs <4 x float> @test_v4f16_to_v4f32_sub_neg_ext_mul2(<4 x float> % ; GFX9-DENORM: ; %bb.0: ; %entry ; GFX9-DENORM-NEXT: v_pk_mul_f16 v4, v4, v6 neg_lo:[0,1] neg_hi:[0,1] ; GFX9-DENORM-NEXT: v_pk_mul_f16 v5, v5, v7 neg_lo:[0,1] neg_hi:[0,1] -; GFX9-DENORM-NEXT: v_mad_mix_f32 v0, -v0, 1.0, v4 op_sel_hi:[0,1,1] -; GFX9-DENORM-NEXT: v_mad_mix_f32 v1, -v1, 1.0, v4 op_sel:[0,0,1] op_sel_hi:[0,1,1] -; GFX9-DENORM-NEXT: v_mad_mix_f32 v2, -v2, 1.0, v5 op_sel_hi:[0,1,1] -; GFX9-DENORM-NEXT: v_mad_mix_f32 v3, -v3, 1.0, v5 op_sel:[0,0,1] op_sel_hi:[0,1,1] +; GFX9-DENORM-NEXT: v_mad_mix_f32 v0, v4, -1.0, v0 op_sel_hi:[1,1,0] +; GFX9-DENORM-NEXT: v_mad_mix_f32 v1, v4, -1.0, v1 op_sel:[1,0,0] op_sel_hi:[1,1,0] +; GFX9-DENORM-NEXT: v_mad_mix_f32 v2, v5, -1.0, v2 op_sel_hi:[1,1,0] +; GFX9-DENORM-NEXT: v_mad_mix_f32 v3, v5, -1.0, v3 op_sel:[1,0,0] op_sel_hi:[1,1,0] ; GFX9-DENORM-NEXT: ; return to shader part epilog ; ; GFX10-DENORM-LABEL: test_v4f16_to_v4f32_sub_neg_ext_mul2: diff --git a/llvm/test/CodeGen/AMDGPU/bf16.ll b/llvm/test/CodeGen/AMDGPU/bf16.ll index 5452608e1e68..e843e125b843 100644 --- a/llvm/test/CodeGen/AMDGPU/bf16.ll +++ b/llvm/test/CodeGen/AMDGPU/bf16.ll @@ -13969,7 +13969,7 @@ define bfloat @v_fsub_bf16(bfloat %a, bfloat %b) { ; GFX1250: ; %bb.0: ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-NEXT: s_wait_kmcnt 0x0 -; GFX1250-NEXT: v_fma_mix_f32_bf16 v0, v0, 1.0, -v1 op_sel_hi:[1,1,1] +; GFX1250-NEXT: v_fma_mix_f32_bf16 v0, v1, -1.0, v0 op_sel_hi:[1,1,1] ; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0 ; GFX1250-NEXT: s_set_pc_i64 s[30:31] @@ -14372,9 +14372,9 @@ define <3 x bfloat> @v_fsub_v3bf16(<3 x bfloat> %a, <3 x bfloat> %b) { ; GFX1250TRUE16: ; %bb.0: ; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0 -; GFX1250TRUE16-NEXT: v_fma_mix_f32_bf16 v1, v1, 1.0, -v3 op_sel_hi:[1,1,1] -; GFX1250TRUE16-NEXT: v_fma_mix_f32_bf16 v3, v0, 1.0, -v2 op_sel:[1,0,1] op_sel_hi:[1,1,1] -; GFX1250TRUE16-NEXT: v_fma_mix_f32_bf16 v0, v0, 1.0, -v2 op_sel_hi:[1,1,1] +; GFX1250TRUE16-NEXT: v_fma_mix_f32_bf16 v1, v3, -1.0, v1 op_sel_hi:[1,1,1] +; GFX1250TRUE16-NEXT: v_fma_mix_f32_bf16 v3, v2, -1.0, v0 op_sel:[1,0,1] op_sel_hi:[1,1,1] +; GFX1250TRUE16-NEXT: v_fma_mix_f32_bf16 v0, v2, -1.0, v0 op_sel_hi:[1,1,1] ; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX1250TRUE16-NEXT: v_cvt_pk_bf16_f32 v1, v1, s0 ; GFX1250TRUE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, v3 @@ -14384,9 +14384,9 @@ define <3 x bfloat> @v_fsub_v3bf16(<3 x bfloat> %a, <3 x bfloat> %b) { ; GFX1250FAKE16: ; %bb.0: ; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0 -; GFX1250FAKE16-NEXT: v_fma_mix_f32_bf16 v4, v0, 1.0, -v2 op_sel:[1,0,1] op_sel_hi:[1,1,1] -; GFX1250FAKE16-NEXT: v_fma_mix_f32_bf16 v0, v0, 1.0, -v2 op_sel_hi:[1,1,1] -; GFX1250FAKE16-NEXT: v_fma_mix_f32_bf16 v1, v1, 1.0, -v3 op_sel_hi:[1,1,1] +; GFX1250FAKE16-NEXT: v_fma_mix_f32_bf16 v4, v2, -1.0, v0 op_sel:[1,0,1] op_sel_hi:[1,1,1] +; GFX1250FAKE16-NEXT: v_fma_mix_f32_bf16 v0, v2, -1.0, v0 op_sel_hi:[1,1,1] +; GFX1250FAKE16-NEXT: v_fma_mix_f32_bf16 v1, v3, -1.0, v1 op_sel_hi:[1,1,1] ; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX1250FAKE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, v4 ; GFX1250FAKE16-NEXT: v_cvt_pk_bf16_f32 v1, v1, s0 @@ -14670,10 +14670,10 @@ define <4 x bfloat> @v_fsub_v4bf16(<4 x bfloat> %a, <4 x bfloat> %b) { ; GFX1250: ; %bb.0: ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-NEXT: s_wait_kmcnt 0x0 -; GFX1250-NEXT: v_fma_mix_f32_bf16 v4, v1, 1.0, -v3 op_sel:[1,0,1] op_sel_hi:[1,1,1] -; GFX1250-NEXT: v_fma_mix_f32_bf16 v5, v0, 1.0, -v2 op_sel:[1,0,1] op_sel_hi:[1,1,1] -; GFX1250-NEXT: v_fma_mix_f32_bf16 v0, v0, 1.0, -v2 op_sel_hi:[1,1,1] -; GFX1250-NEXT: v_fma_mix_f32_bf16 v1, v1, 1.0, -v3 op_sel_hi:[1,1,1] +; GFX1250-NEXT: v_fma_mix_f32_bf16 v4, v3, -1.0, v1 op_sel:[1,0,1] op_sel_hi:[1,1,1] +; GFX1250-NEXT: v_fma_mix_f32_bf16 v5, v2, -1.0, v0 op_sel:[1,0,1] op_sel_hi:[1,1,1] +; GFX1250-NEXT: v_fma_mix_f32_bf16 v0, v2, -1.0, v0 op_sel_hi:[1,1,1] +; GFX1250-NEXT: v_fma_mix_f32_bf16 v1, v3, -1.0, v1 op_sel_hi:[1,1,1] ; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, v0, v5 ; GFX1250-NEXT: v_cvt_pk_bf16_f32 v1, v1, v4 @@ -31855,7 +31855,7 @@ define bfloat @v_round_bf16(bfloat %a) { ; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.h, v0.l ; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1250TRUE16-NEXT: v_trunc_f32_e32 v2, v1 -; GFX1250TRUE16-NEXT: v_fma_mix_f32_bf16 v0, v0, 1.0, -v2 op_sel_hi:[1,1,0] +; GFX1250TRUE16-NEXT: v_fma_mix_f32_bf16 v0, v2, -1.0, v0 op_sel_hi:[0,1,1] ; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1250TRUE16-NEXT: v_cmp_ge_f32_e64 s0, |v0|, 0.5 ; GFX1250TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, s0 @@ -31873,7 +31873,7 @@ define bfloat @v_round_bf16(bfloat %a) { ; GFX1250FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v0 ; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1250FAKE16-NEXT: v_trunc_f32_e32 v2, v1 -; GFX1250FAKE16-NEXT: v_fma_mix_f32_bf16 v0, v0, 1.0, -v2 op_sel_hi:[1,1,0] +; GFX1250FAKE16-NEXT: v_fma_mix_f32_bf16 v0, v2, -1.0, v0 op_sel_hi:[0,1,1] ; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1250FAKE16-NEXT: v_cmp_ge_f32_e64 s0, |v0|, 0.5 ; GFX1250FAKE16-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, s0 diff --git a/llvm/test/CodeGen/AMDGPU/fpext-free.ll b/llvm/test/CodeGen/AMDGPU/fpext-free.ll index f48882308c60..10a925f9df68 100644 --- a/llvm/test/CodeGen/AMDGPU/fpext-free.ll +++ b/llvm/test/CodeGen/AMDGPU/fpext-free.ll @@ -624,7 +624,7 @@ define float @fsub_fpext_fmul_f16_to_f32(half %x, half %y, float %z) #0 { ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: v_mul_f16_e32 v0.l, v0.l, v1.l ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_fma_mix_f32 v0, v0, 1.0, -v2 op_sel_hi:[1,1,0] +; GFX11-TRUE16-NEXT: v_fma_mix_f32 v0, v2, -1.0, v0 op_sel_hi:[0,1,1] ; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-FAKE16-LABEL: fsub_fpext_fmul_f16_to_f32: @@ -632,7 +632,7 @@ define float @fsub_fpext_fmul_f16_to_f32(half %x, half %y, float %z) #0 { ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-FAKE16-NEXT: v_mul_f16_e32 v0, v0, v1 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_fma_mix_f32 v0, v0, 1.0, -v2 op_sel_hi:[1,1,0] +; GFX11-FAKE16-NEXT: v_fma_mix_f32 v0, v2, -1.0, v0 op_sel_hi:[0,1,1] ; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-F32FLUSH-LABEL: fsub_fpext_fmul_f16_to_f32: @@ -677,7 +677,7 @@ define float @fsub_fpext_fmul_f16_to_f32_commute(float %x, half %y, half %z) #0 ; GFX11-F32DENORM-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-F32DENORM-TRUE16-NEXT: v_mul_f16_e32 v1.l, v1.l, v2.l ; GFX11-F32DENORM-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-F32DENORM-TRUE16-NEXT: v_fma_mix_f32 v0, -v0, 1.0, v1 op_sel_hi:[0,1,1] +; GFX11-F32DENORM-TRUE16-NEXT: v_fma_mix_f32 v0, v1, -1.0, v0 op_sel_hi:[1,1,0] ; GFX11-F32DENORM-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-F32DENORM-FAKE16-LABEL: fsub_fpext_fmul_f16_to_f32_commute: @@ -685,7 +685,7 @@ define float @fsub_fpext_fmul_f16_to_f32_commute(float %x, half %y, half %z) #0 ; GFX11-F32DENORM-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-F32DENORM-FAKE16-NEXT: v_mul_f16_e32 v1, v1, v2 ; GFX11-F32DENORM-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-F32DENORM-FAKE16-NEXT: v_fma_mix_f32 v0, -v0, 1.0, v1 op_sel_hi:[0,1,1] +; GFX11-F32DENORM-FAKE16-NEXT: v_fma_mix_f32 v0, v1, -1.0, v0 op_sel_hi:[1,1,0] ; GFX11-F32DENORM-FAKE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-F32FLUSH-LABEL: fsub_fpext_fmul_f16_to_f32_commute: @@ -724,7 +724,7 @@ define float @fsub_fpext_fneg_fmul_f16_to_f32(half %x, half %y, float %z) #0 { ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: v_mul_f16_e64 v0.l, v0.l, -v1.l ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_fma_mix_f32 v0, v0, 1.0, -v2 op_sel_hi:[1,1,0] +; GFX11-TRUE16-NEXT: v_fma_mix_f32 v0, v2, -1.0, v0 op_sel_hi:[0,1,1] ; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-FAKE16-LABEL: fsub_fpext_fneg_fmul_f16_to_f32: @@ -732,7 +732,7 @@ define float @fsub_fpext_fneg_fmul_f16_to_f32(half %x, half %y, float %z) #0 { ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-FAKE16-NEXT: v_mul_f16_e64 v0, v0, -v1 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_fma_mix_f32 v0, v0, 1.0, -v2 op_sel_hi:[1,1,0] +; GFX11-FAKE16-NEXT: v_fma_mix_f32 v0, v2, -1.0, v0 op_sel_hi:[0,1,1] ; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-F32FLUSH-LABEL: fsub_fpext_fneg_fmul_f16_to_f32: @@ -772,7 +772,7 @@ define float @fsub_fneg_fpext_fmul_f16_to_f32(half %x, half %y, float %z) #0 { ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: v_mul_f16_e64 v0.l, v0.l, -v1.l ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_fma_mix_f32 v0, v0, 1.0, -v2 op_sel_hi:[1,1,0] +; GFX11-TRUE16-NEXT: v_fma_mix_f32 v0, v2, -1.0, v0 op_sel_hi:[0,1,1] ; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-FAKE16-LABEL: fsub_fneg_fpext_fmul_f16_to_f32: @@ -780,7 +780,7 @@ define float @fsub_fneg_fpext_fmul_f16_to_f32(half %x, half %y, float %z) #0 { ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-FAKE16-NEXT: v_mul_f16_e64 v0, v0, -v1 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-FAKE16-NEXT: v_fma_mix_f32 v0, v0, 1.0, -v2 op_sel_hi:[1,1,0] +; GFX11-FAKE16-NEXT: v_fma_mix_f32 v0, v2, -1.0, v0 op_sel_hi:[0,1,1] ; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-F32FLUSH-LABEL: fsub_fneg_fpext_fmul_f16_to_f32: @@ -886,7 +886,7 @@ define float @fsub_fpext_muladd_mul_f16_to_f32(half %x, half %y, float %z, half ; GFX11-TRUE16-NEXT: v_mul_f16_e32 v3.l, v3.l, v4.l ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-TRUE16-NEXT: v_fmac_f16_e32 v3.l, v0.l, v1.l -; GFX11-TRUE16-NEXT: v_fma_mix_f32 v0, v3, 1.0, -v2 op_sel_hi:[1,1,0] +; GFX11-TRUE16-NEXT: v_fma_mix_f32 v0, v2, -1.0, v3 op_sel_hi:[0,1,1] ; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-FAKE16-LABEL: fsub_fpext_muladd_mul_f16_to_f32: @@ -895,7 +895,7 @@ define float @fsub_fpext_muladd_mul_f16_to_f32(half %x, half %y, float %z, half ; GFX11-FAKE16-NEXT: v_mul_f16_e32 v3, v3, v4 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-FAKE16-NEXT: v_fmac_f16_e32 v3, v0, v1 -; GFX11-FAKE16-NEXT: v_fma_mix_f32 v0, v3, 1.0, -v2 op_sel_hi:[1,1,0] +; GFX11-FAKE16-NEXT: v_fma_mix_f32 v0, v2, -1.0, v3 op_sel_hi:[0,1,1] ; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-F32FLUSH-LABEL: fsub_fpext_muladd_mul_f16_to_f32: @@ -903,7 +903,7 @@ define float @fsub_fpext_muladd_mul_f16_to_f32(half %x, half %y, float %z, half ; GFX9-F32FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-F32FLUSH-NEXT: v_mul_f16_e32 v3, v3, v4 ; GFX9-F32FLUSH-NEXT: v_fma_f16 v0, v0, v1, v3 -; GFX9-F32FLUSH-NEXT: v_mad_mix_f32 v0, v0, 1.0, -v2 op_sel_hi:[1,1,0] +; GFX9-F32FLUSH-NEXT: v_mad_mix_f32 v0, v2, -1.0, v0 op_sel_hi:[0,1,1] ; GFX9-F32FLUSH-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-F32DENORM-LABEL: fsub_fpext_muladd_mul_f16_to_f32: @@ -1004,7 +1004,7 @@ define float @fsub_fpext_muladd_mul_f16_to_f32_commute(float %x, half %y, half % ; GFX11-TRUE16-NEXT: v_mul_f16_e32 v3.l, v3.l, v4.l ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-TRUE16-NEXT: v_fmac_f16_e32 v3.l, v1.l, v2.l -; GFX11-TRUE16-NEXT: v_fma_mix_f32 v0, -v0, 1.0, v3 op_sel_hi:[0,1,1] +; GFX11-TRUE16-NEXT: v_fma_mix_f32 v0, v3, -1.0, v0 op_sel_hi:[1,1,0] ; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-FAKE16-LABEL: fsub_fpext_muladd_mul_f16_to_f32_commute: @@ -1013,7 +1013,7 @@ define float @fsub_fpext_muladd_mul_f16_to_f32_commute(float %x, half %y, half % ; GFX11-FAKE16-NEXT: v_mul_f16_e32 v3, v3, v4 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-FAKE16-NEXT: v_fmac_f16_e32 v3, v1, v2 -; GFX11-FAKE16-NEXT: v_fma_mix_f32 v0, -v0, 1.0, v3 op_sel_hi:[0,1,1] +; GFX11-FAKE16-NEXT: v_fma_mix_f32 v0, v3, -1.0, v0 op_sel_hi:[1,1,0] ; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-F32FLUSH-LABEL: fsub_fpext_muladd_mul_f16_to_f32_commute: @@ -1021,7 +1021,7 @@ define float @fsub_fpext_muladd_mul_f16_to_f32_commute(float %x, half %y, half % ; GFX9-F32FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-F32FLUSH-NEXT: v_mul_f16_e32 v3, v3, v4 ; GFX9-F32FLUSH-NEXT: v_fma_f16 v1, v1, v2, v3 -; GFX9-F32FLUSH-NEXT: v_mad_mix_f32 v0, -v0, 1.0, v1 op_sel_hi:[0,1,1] +; GFX9-F32FLUSH-NEXT: v_mad_mix_f32 v0, v1, -1.0, v0 op_sel_hi:[1,1,0] ; GFX9-F32FLUSH-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-F32DENORM-LABEL: fsub_fpext_muladd_mul_f16_to_f32_commute: diff --git a/llvm/test/CodeGen/AMDGPU/mad-mix-bf16.ll b/llvm/test/CodeGen/AMDGPU/mad-mix-bf16.ll index 02c2336844a0..2f5d732da0fb 100644 --- a/llvm/test/CodeGen/AMDGPU/mad-mix-bf16.ll +++ b/llvm/test/CodeGen/AMDGPU/mad-mix-bf16.ll @@ -703,7 +703,7 @@ define float @v_mad_mix_f32_negbf16lo_add_bf16lo(bfloat %src0, bfloat %src1) { ; GFX1250: ; %bb.0: ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-NEXT: s_wait_kmcnt 0x0 -; GFX1250-NEXT: v_fma_mix_f32_bf16 v0, v1, 1.0, -v0 op_sel_hi:[1,1,1] +; GFX1250-NEXT: v_fma_mix_f32_bf16 v0, v0, -1.0, v1 op_sel_hi:[1,1,1] ; GFX1250-NEXT: s_set_pc_i64 s[30:31] %src0.ext = fpext bfloat %src0 to float %src1.ext = fpext bfloat %src1 to float @@ -731,7 +731,7 @@ define float @v_mad_mix_f32_negabsbf16lo_add_bf16lo(bfloat %src0, bfloat %src1) ; GFX1250: ; %bb.0: ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-NEXT: s_wait_kmcnt 0x0 -; GFX1250-NEXT: v_fma_mix_f32_bf16 v0, v1, 1.0, -|v0| op_sel_hi:[1,1,1] +; GFX1250-NEXT: v_fma_mix_f32_bf16 v0, |v0|, -1.0, v1 op_sel_hi:[1,1,1] ; GFX1250-NEXT: s_set_pc_i64 s[30:31] %src0.ext = fpext bfloat %src0 to float %src1.ext = fpext bfloat %src1 to float @@ -758,7 +758,7 @@ define float @v_mad_mix_f32_bf16lo_add_negf32(bfloat %src0, float %src1) { ; GFX1250: ; %bb.0: ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-NEXT: s_wait_kmcnt 0x0 -; GFX1250-NEXT: v_fma_mix_f32_bf16 v0, v0, 1.0, -v1 op_sel_hi:[1,1,0] +; GFX1250-NEXT: v_fma_mix_f32_bf16 v0, v1, -1.0, v0 op_sel_hi:[0,1,1] ; GFX1250-NEXT: s_set_pc_i64 s[30:31] %src0.ext = fpext bfloat %src0 to float %src1.neg = fneg float %src1 @@ -784,7 +784,7 @@ define float @v_mad_mix_f32_bf16lo_add_negabsf32(bfloat %src0, float %src1) { ; GFX1250: ; %bb.0: ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-NEXT: s_wait_kmcnt 0x0 -; GFX1250-NEXT: v_fma_mix_f32_bf16 v0, v0, 1.0, -|v1| op_sel_hi:[1,1,0] +; GFX1250-NEXT: v_fma_mix_f32_bf16 v0, |v1|, -1.0, v0 op_sel_hi:[0,1,1] ; GFX1250-NEXT: s_set_pc_i64 s[30:31] %src0.ext = fpext bfloat %src0 to float %src1.abs = call float @llvm.fabs.f32(float %src1) @@ -838,7 +838,7 @@ define float @v_mad_mix_f32_negprecvtbf16lo_add_bf16lo(i32 %src0.arg, bfloat %sr ; GFX1250: ; %bb.0: ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-NEXT: s_wait_kmcnt 0x0 -; GFX1250-NEXT: v_fma_mix_f32_bf16 v0, v1, 1.0, -v0 op_sel_hi:[1,1,1] +; GFX1250-NEXT: v_fma_mix_f32_bf16 v0, v0, -1.0, v1 op_sel_hi:[1,1,1] ; GFX1250-NEXT: s_set_pc_i64 s[30:31] %src0.arg.bc = bitcast i32 %src0.arg to <2 x bfloat> %src0 = extractelement <2 x bfloat> %src0.arg.bc, i32 0 @@ -870,7 +870,7 @@ define float @v_mad_mix_f32_negabsprecvtbf16lo_add_bf16lo(i32 %src0.arg, bfloat ; GFX1250: ; %bb.0: ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-NEXT: s_wait_kmcnt 0x0 -; GFX1250-NEXT: v_fma_mix_f32_bf16 v0, v1, 1.0, -|v0| op_sel_hi:[1,1,1] +; GFX1250-NEXT: v_fma_mix_f32_bf16 v0, |v0|, -1.0, v1 op_sel_hi:[1,1,1] ; GFX1250-NEXT: s_set_pc_i64 s[30:31] %src0.arg.bc = bitcast i32 %src0.arg to <2 x bfloat> %src0 = extractelement <2 x bfloat> %src0.arg.bc, i32 0 @@ -1324,7 +1324,7 @@ define float @v_mad_mix_f32_bf16lo_sub_bf16lo(bfloat %src0, bfloat %src1) { ; GFX1250: ; %bb.0: ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-NEXT: s_wait_kmcnt 0x0 -; GFX1250-NEXT: v_fma_mix_f32_bf16 v0, v0, 1.0, -v1 op_sel_hi:[1,1,1] +; GFX1250-NEXT: v_fma_mix_f32_bf16 v0, v1, -1.0, v0 op_sel_hi:[1,1,1] ; GFX1250-NEXT: s_set_pc_i64 s[30:31] %src0.ext = fpext bfloat %src0 to float %src1.ext = fpext bfloat %src1 to float @@ -1337,7 +1337,7 @@ define float @v_mad_mix_f32_absbf16lo_sub_bf16lo(bfloat %src0, bfloat %src1) { ; GFX1250: ; %bb.0: ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-NEXT: s_wait_kmcnt 0x0 -; GFX1250-NEXT: v_fma_mix_f32_bf16 v0, |v0|, 1.0, -v1 op_sel_hi:[1,1,1] +; GFX1250-NEXT: v_fma_mix_f32_bf16 v0, v1, -1.0, |v0| op_sel_hi:[1,1,1] ; GFX1250-NEXT: s_set_pc_i64 s[30:31] %src0.ext = fpext bfloat %src0 to float %src1.ext = fpext bfloat %src1 to float @@ -1351,7 +1351,7 @@ define float @v_mad_mix_f32_bf16hi_fsub_bf16hi(i32 %src0, i32 %src1) { ; GFX1250: ; %bb.0: ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-NEXT: s_wait_kmcnt 0x0 -; GFX1250-NEXT: v_fma_mix_f32_bf16 v0, v0, 1.0, -v1 op_sel:[1,0,1] op_sel_hi:[1,1,1] +; GFX1250-NEXT: v_fma_mix_f32_bf16 v0, v1, -1.0, v0 op_sel:[1,0,1] op_sel_hi:[1,1,1] ; GFX1250-NEXT: s_set_pc_i64 s[30:31] %src0.hi = lshr i32 %src0, 16 %src1.hi = lshr i32 %src1, 16 @@ -1370,7 +1370,7 @@ define float @v_mad_mix_f32_absbf16hi_fsub_bf16hi(i32 %src0, i32 %src1) { ; GFX1250: ; %bb.0: ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX1250-NEXT: s_wait_kmcnt 0x0 -; GFX1250-NEXT: v_fma_mix_f32_bf16 v0, |v0|, 1.0, -v1 op_sel:[1,0,1] op_sel_hi:[1,1,1] +; GFX1250-NEXT: v_fma_mix_f32_bf16 v0, v1, -1.0, |v0| op_sel:[1,0,1] op_sel_hi:[1,1,1] ; GFX1250-NEXT: s_set_pc_i64 s[30:31] %src0.hi = lshr i32 %src0, 16 %src1.hi = lshr i32 %src1, 16 diff --git a/llvm/test/CodeGen/AMDGPU/mad-mix.ll b/llvm/test/CodeGen/AMDGPU/mad-mix.ll index 8ae378079656..01d76c162cc1 100644 --- a/llvm/test/CodeGen/AMDGPU/mad-mix.ll +++ b/llvm/test/CodeGen/AMDGPU/mad-mix.ll @@ -2946,7 +2946,7 @@ define float @v_mad_mix_f32_negf16lo_add_f16lo(half %src0, half %src1) { ; SDAG-GFX1100-LABEL: v_mad_mix_f32_negf16lo_add_f16lo: ; SDAG-GFX1100: ; %bb.0: ; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-GFX1100-NEXT: v_fma_mix_f32 v0, v1, 1.0, -v0 op_sel_hi:[1,1,1] +; SDAG-GFX1100-NEXT: v_fma_mix_f32 v0, v0, -1.0, v1 op_sel_hi:[1,1,1] ; SDAG-GFX1100-NEXT: s_setpc_b64 s[30:31] ; ; SDAG-GFX900-LABEL: v_mad_mix_f32_negf16lo_add_f16lo: @@ -2960,7 +2960,7 @@ define float @v_mad_mix_f32_negf16lo_add_f16lo(half %src0, half %src1) { ; SDAG-GFX906-LABEL: v_mad_mix_f32_negf16lo_add_f16lo: ; SDAG-GFX906: ; %bb.0: ; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-GFX906-NEXT: v_fma_mix_f32 v0, v1, 1.0, -v0 op_sel_hi:[1,1,1] +; SDAG-GFX906-NEXT: v_fma_mix_f32 v0, v0, -1.0, v1 op_sel_hi:[1,1,1] ; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31] ; ; SDAG-GFX9GEN-LABEL: v_mad_mix_f32_negf16lo_add_f16lo: @@ -3100,7 +3100,7 @@ define float @v_mad_mix_f32_negabsf16lo_add_f16lo(half %src0, half %src1) { ; GFX1100-LABEL: v_mad_mix_f32_negabsf16lo_add_f16lo: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: v_fma_mix_f32 v0, v1, 1.0, -|v0| op_sel_hi:[1,1,1] +; GFX1100-NEXT: v_fma_mix_f32 v0, |v0|, -1.0, v1 op_sel_hi:[1,1,1] ; GFX1100-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-LABEL: v_mad_mix_f32_negabsf16lo_add_f16lo: @@ -3114,7 +3114,7 @@ define float @v_mad_mix_f32_negabsf16lo_add_f16lo(half %src0, half %src1) { ; GFX906-LABEL: v_mad_mix_f32_negabsf16lo_add_f16lo: ; GFX906: ; %bb.0: ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX906-NEXT: v_fma_mix_f32 v0, v1, 1.0, -|v0| op_sel_hi:[1,1,1] +; GFX906-NEXT: v_fma_mix_f32 v0, |v0|, -1.0, v1 op_sel_hi:[1,1,1] ; GFX906-NEXT: s_setpc_b64 s[30:31] ; ; GFX9GEN-LABEL: v_mad_mix_f32_negabsf16lo_add_f16lo: @@ -3205,7 +3205,7 @@ define float @v_mad_mix_f32_f16lo_add_negf32(half %src0, float %src1) { ; GFX1100-LABEL: v_mad_mix_f32_f16lo_add_negf32: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: v_fma_mix_f32 v0, v0, 1.0, -v1 op_sel_hi:[1,1,0] +; GFX1100-NEXT: v_fma_mix_f32 v0, v1, -1.0, v0 op_sel_hi:[0,1,1] ; GFX1100-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-LABEL: v_mad_mix_f32_f16lo_add_negf32: @@ -3218,7 +3218,7 @@ define float @v_mad_mix_f32_f16lo_add_negf32(half %src0, float %src1) { ; GFX906-LABEL: v_mad_mix_f32_f16lo_add_negf32: ; GFX906: ; %bb.0: ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX906-NEXT: v_fma_mix_f32 v0, v0, 1.0, -v1 op_sel_hi:[1,1,0] +; GFX906-NEXT: v_fma_mix_f32 v0, v1, -1.0, v0 op_sel_hi:[0,1,1] ; GFX906-NEXT: s_setpc_b64 s[30:31] ; ; GFX9GEN-LABEL: v_mad_mix_f32_f16lo_add_negf32: @@ -3297,7 +3297,7 @@ define float @v_mad_mix_f32_f16lo_add_negabsf32(half %src0, float %src1) { ; GFX1100-LABEL: v_mad_mix_f32_f16lo_add_negabsf32: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: v_fma_mix_f32 v0, v0, 1.0, -|v1| op_sel_hi:[1,1,0] +; GFX1100-NEXT: v_fma_mix_f32 v0, |v1|, -1.0, v0 op_sel_hi:[0,1,1] ; GFX1100-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-LABEL: v_mad_mix_f32_f16lo_add_negabsf32: @@ -3310,7 +3310,7 @@ define float @v_mad_mix_f32_f16lo_add_negabsf32(half %src0, float %src1) { ; GFX906-LABEL: v_mad_mix_f32_f16lo_add_negabsf32: ; GFX906: ; %bb.0: ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX906-NEXT: v_fma_mix_f32 v0, v0, 1.0, -|v1| op_sel_hi:[1,1,0] +; GFX906-NEXT: v_fma_mix_f32 v0, |v1|, -1.0, v0 op_sel_hi:[0,1,1] ; GFX906-NEXT: s_setpc_b64 s[30:31] ; ; GFX9GEN-LABEL: v_mad_mix_f32_f16lo_add_negabsf32: @@ -3491,7 +3491,7 @@ define float @v_mad_mix_f32_negprecvtf16lo_add_f16lo(i32 %src0.arg, half %src1) ; SDAG-GFX1100-LABEL: v_mad_mix_f32_negprecvtf16lo_add_f16lo: ; SDAG-GFX1100: ; %bb.0: ; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-GFX1100-NEXT: v_fma_mix_f32 v0, v1, 1.0, -v0 op_sel_hi:[1,1,1] +; SDAG-GFX1100-NEXT: v_fma_mix_f32 v0, v0, -1.0, v1 op_sel_hi:[1,1,1] ; SDAG-GFX1100-NEXT: s_setpc_b64 s[30:31] ; ; SDAG-GFX900-LABEL: v_mad_mix_f32_negprecvtf16lo_add_f16lo: @@ -3505,7 +3505,7 @@ define float @v_mad_mix_f32_negprecvtf16lo_add_f16lo(i32 %src0.arg, half %src1) ; SDAG-GFX906-LABEL: v_mad_mix_f32_negprecvtf16lo_add_f16lo: ; SDAG-GFX906: ; %bb.0: ; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-GFX906-NEXT: v_fma_mix_f32 v0, v1, 1.0, -v0 op_sel_hi:[1,1,1] +; SDAG-GFX906-NEXT: v_fma_mix_f32 v0, v0, -1.0, v1 op_sel_hi:[1,1,1] ; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31] ; ; SDAG-GFX9GEN-LABEL: v_mad_mix_f32_negprecvtf16lo_add_f16lo: @@ -3641,7 +3641,7 @@ define float @v_mad_mix_f32_negabsprecvtf16lo_add_f16lo(i32 %src0.arg, half %src ; SDAG-GFX1100-LABEL: v_mad_mix_f32_negabsprecvtf16lo_add_f16lo: ; SDAG-GFX1100: ; %bb.0: ; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-GFX1100-NEXT: v_fma_mix_f32 v0, v1, 1.0, -|v0| op_sel_hi:[1,1,1] +; SDAG-GFX1100-NEXT: v_fma_mix_f32 v0, |v0|, -1.0, v1 op_sel_hi:[1,1,1] ; SDAG-GFX1100-NEXT: s_setpc_b64 s[30:31] ; ; SDAG-GFX900-LABEL: v_mad_mix_f32_negabsprecvtf16lo_add_f16lo: @@ -3655,7 +3655,7 @@ define float @v_mad_mix_f32_negabsprecvtf16lo_add_f16lo(i32 %src0.arg, half %src ; SDAG-GFX906-LABEL: v_mad_mix_f32_negabsprecvtf16lo_add_f16lo: ; SDAG-GFX906: ; %bb.0: ; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-GFX906-NEXT: v_fma_mix_f32 v0, v1, 1.0, -|v0| op_sel_hi:[1,1,1] +; SDAG-GFX906-NEXT: v_fma_mix_f32 v0, |v0|, -1.0, v1 op_sel_hi:[1,1,1] ; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31] ; ; SDAG-GFX9GEN-LABEL: v_mad_mix_f32_negabsprecvtf16lo_add_f16lo: @@ -3880,7 +3880,7 @@ define float @v_mad_mix_f32_preextractfneg_f16hi_add_f16lo(i32 %src0.arg, half % ; SDAG-GFX1100-LABEL: v_mad_mix_f32_preextractfneg_f16hi_add_f16lo: ; SDAG-GFX1100: ; %bb.0: ; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-GFX1100-NEXT: v_fma_mix_f32 v0, v1, 1.0, -v0 op_sel:[0,0,1] op_sel_hi:[1,1,1] +; SDAG-GFX1100-NEXT: v_fma_mix_f32 v0, v0, -1.0, v1 op_sel:[1,0,0] op_sel_hi:[1,1,1] ; SDAG-GFX1100-NEXT: s_setpc_b64 s[30:31] ; ; SDAG-GFX900-LABEL: v_mad_mix_f32_preextractfneg_f16hi_add_f16lo: @@ -3894,7 +3894,7 @@ define float @v_mad_mix_f32_preextractfneg_f16hi_add_f16lo(i32 %src0.arg, half % ; SDAG-GFX906-LABEL: v_mad_mix_f32_preextractfneg_f16hi_add_f16lo: ; SDAG-GFX906: ; %bb.0: ; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-GFX906-NEXT: v_fma_mix_f32 v0, v1, 1.0, -v0 op_sel:[0,0,1] op_sel_hi:[1,1,1] +; SDAG-GFX906-NEXT: v_fma_mix_f32 v0, v0, -1.0, v1 op_sel:[1,0,0] op_sel_hi:[1,1,1] ; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31] ; ; SDAG-GFX9GEN-LABEL: v_mad_mix_f32_preextractfneg_f16hi_add_f16lo: @@ -4074,7 +4074,7 @@ define float @v_mad_mix_f32_preextractfabsfneg_f16hi_add_f16lo(i32 %src0.arg, ha ; SDAG-GFX1100-LABEL: v_mad_mix_f32_preextractfabsfneg_f16hi_add_f16lo: ; SDAG-GFX1100: ; %bb.0: ; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-GFX1100-NEXT: v_fma_mix_f32 v0, v1, 1.0, -|v0| op_sel:[0,0,1] op_sel_hi:[1,1,1] +; SDAG-GFX1100-NEXT: v_fma_mix_f32 v0, |v0|, -1.0, v1 op_sel:[1,0,0] op_sel_hi:[1,1,1] ; SDAG-GFX1100-NEXT: s_setpc_b64 s[30:31] ; ; SDAG-GFX900-LABEL: v_mad_mix_f32_preextractfabsfneg_f16hi_add_f16lo: @@ -4088,7 +4088,7 @@ define float @v_mad_mix_f32_preextractfabsfneg_f16hi_add_f16lo(i32 %src0.arg, ha ; SDAG-GFX906-LABEL: v_mad_mix_f32_preextractfabsfneg_f16hi_add_f16lo: ; SDAG-GFX906: ; %bb.0: ; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-GFX906-NEXT: v_fma_mix_f32 v0, v1, 1.0, -|v0| op_sel:[0,0,1] op_sel_hi:[1,1,1] +; SDAG-GFX906-NEXT: v_fma_mix_f32 v0, |v0|, -1.0, v1 op_sel:[1,0,0] op_sel_hi:[1,1,1] ; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31] ; ; SDAG-GFX9GEN-LABEL: v_mad_mix_f32_preextractfabsfneg_f16hi_add_f16lo: @@ -5658,7 +5658,7 @@ define float @v_mad_mix_f32_f16lo_sub_f16lo(half %src0, half %src1) { ; GFX1100-LABEL: v_mad_mix_f32_f16lo_sub_f16lo: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: v_fma_mix_f32 v0, v0, 1.0, -v1 op_sel_hi:[1,1,1] +; GFX1100-NEXT: v_fma_mix_f32 v0, v1, -1.0, v0 op_sel_hi:[1,1,1] ; GFX1100-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-LABEL: v_mad_mix_f32_f16lo_sub_f16lo: @@ -5672,7 +5672,7 @@ define float @v_mad_mix_f32_f16lo_sub_f16lo(half %src0, half %src1) { ; GFX906-LABEL: v_mad_mix_f32_f16lo_sub_f16lo: ; GFX906: ; %bb.0: ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX906-NEXT: v_fma_mix_f32 v0, v0, 1.0, -v1 op_sel_hi:[1,1,1] +; GFX906-NEXT: v_fma_mix_f32 v0, v1, -1.0, v0 op_sel_hi:[1,1,1] ; GFX906-NEXT: s_setpc_b64 s[30:31] ; ; GFX9GEN-LABEL: v_mad_mix_f32_f16lo_sub_f16lo: @@ -5708,7 +5708,7 @@ define float @v_mad_mix_f32_absf16lo_sub_f16lo(half %src0, half %src1) { ; GFX1100-LABEL: v_mad_mix_f32_absf16lo_sub_f16lo: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: v_fma_mix_f32 v0, |v0|, 1.0, -v1 op_sel_hi:[1,1,1] +; GFX1100-NEXT: v_fma_mix_f32 v0, v1, -1.0, |v0| op_sel_hi:[1,1,1] ; GFX1100-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-LABEL: v_mad_mix_f32_absf16lo_sub_f16lo: @@ -5722,7 +5722,7 @@ define float @v_mad_mix_f32_absf16lo_sub_f16lo(half %src0, half %src1) { ; GFX906-LABEL: v_mad_mix_f32_absf16lo_sub_f16lo: ; GFX906: ; %bb.0: ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX906-NEXT: v_fma_mix_f32 v0, |v0|, 1.0, -v1 op_sel_hi:[1,1,1] +; GFX906-NEXT: v_fma_mix_f32 v0, v1, -1.0, |v0| op_sel_hi:[1,1,1] ; GFX906-NEXT: s_setpc_b64 s[30:31] ; ; GFX9GEN-LABEL: v_mad_mix_f32_absf16lo_sub_f16lo: @@ -5767,7 +5767,7 @@ define float @v_mad_mix_f32_f16hi_fsub_f16hi(i32 %src0, i32 %src1) { ; GFX1100-LABEL: v_mad_mix_f32_f16hi_fsub_f16hi: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: v_fma_mix_f32 v0, v0, 1.0, -v1 op_sel:[1,0,1] op_sel_hi:[1,1,1] +; GFX1100-NEXT: v_fma_mix_f32 v0, v1, -1.0, v0 op_sel:[1,0,1] op_sel_hi:[1,1,1] ; GFX1100-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-LABEL: v_mad_mix_f32_f16hi_fsub_f16hi: @@ -5781,7 +5781,7 @@ define float @v_mad_mix_f32_f16hi_fsub_f16hi(i32 %src0, i32 %src1) { ; GFX906-LABEL: v_mad_mix_f32_f16hi_fsub_f16hi: ; GFX906: ; %bb.0: ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX906-NEXT: v_fma_mix_f32 v0, v0, 1.0, -v1 op_sel:[1,0,1] op_sel_hi:[1,1,1] +; GFX906-NEXT: v_fma_mix_f32 v0, v1, -1.0, v0 op_sel:[1,0,1] op_sel_hi:[1,1,1] ; GFX906-NEXT: s_setpc_b64 s[30:31] ; ; GFX9GEN-LABEL: v_mad_mix_f32_f16hi_fsub_f16hi: @@ -5825,7 +5825,7 @@ define float @v_mad_mix_f32_absf16hi_fsub_f16hi(i32 %src0, i32 %src1) { ; GFX1100-LABEL: v_mad_mix_f32_absf16hi_fsub_f16hi: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: v_fma_mix_f32 v0, |v0|, 1.0, -v1 op_sel:[1,0,1] op_sel_hi:[1,1,1] +; GFX1100-NEXT: v_fma_mix_f32 v0, v1, -1.0, |v0| op_sel:[1,0,1] op_sel_hi:[1,1,1] ; GFX1100-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-LABEL: v_mad_mix_f32_absf16hi_fsub_f16hi: @@ -5839,7 +5839,7 @@ define float @v_mad_mix_f32_absf16hi_fsub_f16hi(i32 %src0, i32 %src1) { ; GFX906-LABEL: v_mad_mix_f32_absf16hi_fsub_f16hi: ; GFX906: ; %bb.0: ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX906-NEXT: v_fma_mix_f32 v0, |v0|, 1.0, -v1 op_sel:[1,0,1] op_sel_hi:[1,1,1] +; GFX906-NEXT: v_fma_mix_f32 v0, v1, -1.0, |v0| op_sel:[1,0,1] op_sel_hi:[1,1,1] ; GFX906-NEXT: s_setpc_b64 s[30:31] ; ; GFX9GEN-LABEL: v_mad_mix_f32_absf16hi_fsub_f16hi: