[X86] Remove UnsafeFPMath uses (#151667)

Remove `UnsafeFPMath` in X86 part, it blocks some bugfixes related to
clang and the ultimate goal is to remove `resetTargetOptions` method in
`TargetMachine`, see FIXME in `resetTargetOptions`.
See also
https://discourse.llvm.org/t/rfc-honor-pragmas-with-ffp-contract-fast

https://discourse.llvm.org/t/allowfpopfusion-vs-sdnodeflags-hasallowcontract
This commit is contained in:
paperchalice 2025-08-05 08:24:52 +08:00 committed by GitHub
parent 862fb42b06
commit 03e902cc68
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 180 additions and 173 deletions

View File

@ -8279,8 +8279,8 @@ static SDValue ExpandHorizontalBinOp(const SDValue &V0, const SDValue &V1,
static bool isAddSubOrSubAdd(const BuildVectorSDNode *BV,
const X86Subtarget &Subtarget, SelectionDAG &DAG,
SDValue &Opnd0, SDValue &Opnd1,
unsigned &NumExtracts,
bool &IsSubAdd) {
unsigned &NumExtracts, bool &IsSubAdd,
bool &HasAllowContract) {
using namespace SDPatternMatch;
MVT VT = BV->getSimpleValueType(0);
@ -8292,6 +8292,7 @@ static bool isAddSubOrSubAdd(const BuildVectorSDNode *BV,
SDValue InVec1 = DAG.getUNDEF(VT);
NumExtracts = 0;
HasAllowContract = NumElts != 0;
// Odd-numbered elements in the input build vector are obtained from
// adding/subtracting two integer/float elements.
@ -8350,6 +8351,7 @@ static bool isAddSubOrSubAdd(const BuildVectorSDNode *BV,
// Increment the number of extractions done.
++NumExtracts;
HasAllowContract &= Op->getFlags().hasAllowContract();
}
// Ensure we have found an opcode for both parities and that they are
@ -8393,9 +8395,10 @@ static bool isAddSubOrSubAdd(const BuildVectorSDNode *BV,
/// is illegal sometimes. E.g. 512-bit ADDSUB is not available, while 512-bit
/// FMADDSUB is.
static bool isFMAddSubOrFMSubAdd(const X86Subtarget &Subtarget,
SelectionDAG &DAG,
SDValue &Opnd0, SDValue &Opnd1, SDValue &Opnd2,
unsigned ExpectedUses) {
SelectionDAG &DAG, SDValue &Opnd0,
SDValue &Opnd1, SDValue &Opnd2,
unsigned ExpectedUses,
bool AllowSubAddOrAddSubContract) {
if (Opnd0.getOpcode() != ISD::FMUL ||
!Opnd0->hasNUsesOfValue(ExpectedUses, 0) || !Subtarget.hasAnyFMA())
return false;
@ -8406,7 +8409,8 @@ static bool isFMAddSubOrFMSubAdd(const X86Subtarget &Subtarget,
// or MUL + ADDSUB to FMADDSUB.
const TargetOptions &Options = DAG.getTarget().Options;
bool AllowFusion =
(Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath);
Options.AllowFPOpFusion == FPOpFusion::Fast ||
(AllowSubAddOrAddSubContract && Opnd0->getFlags().hasAllowContract());
if (!AllowFusion)
return false;
@ -8427,15 +8431,17 @@ static SDValue lowerToAddSubOrFMAddSub(const BuildVectorSDNode *BV,
SDValue Opnd0, Opnd1;
unsigned NumExtracts;
bool IsSubAdd;
if (!isAddSubOrSubAdd(BV, Subtarget, DAG, Opnd0, Opnd1, NumExtracts,
IsSubAdd))
bool HasAllowContract;
if (!isAddSubOrSubAdd(BV, Subtarget, DAG, Opnd0, Opnd1, NumExtracts, IsSubAdd,
HasAllowContract))
return SDValue();
MVT VT = BV->getSimpleValueType(0);
// Try to generate X86ISD::FMADDSUB node here.
SDValue Opnd2;
if (isFMAddSubOrFMSubAdd(Subtarget, DAG, Opnd0, Opnd1, Opnd2, NumExtracts)) {
if (isFMAddSubOrFMSubAdd(Subtarget, DAG, Opnd0, Opnd1, Opnd2, NumExtracts,
HasAllowContract)) {
unsigned Opc = IsSubAdd ? X86ISD::FMSUBADD : X86ISD::FMADDSUB;
return DAG.getNode(Opc, DL, VT, Opnd0, Opnd1, Opnd2);
}
@ -43180,7 +43186,7 @@ static bool isAddSubOrSubAddMask(ArrayRef<int> Mask, bool &Op0Even) {
/// the fact that they're unused.
static bool isAddSubOrSubAdd(SDNode *N, const X86Subtarget &Subtarget,
SelectionDAG &DAG, SDValue &Opnd0, SDValue &Opnd1,
bool &IsSubAdd) {
bool &IsSubAdd, bool &HasAllowContract) {
EVT VT = N->getValueType(0);
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
@ -43231,6 +43237,8 @@ static bool isAddSubOrSubAdd(SDNode *N, const X86Subtarget &Subtarget,
// It's a subadd if the vector in the even parity is an FADD.
IsSubAdd = Op0Even ? V1->getOpcode() == ISD::FADD
: V2->getOpcode() == ISD::FADD;
HasAllowContract =
V1->getFlags().hasAllowContract() && V2->getFlags().hasAllowContract();
Opnd0 = LHS;
Opnd1 = RHS;
@ -43288,14 +43296,17 @@ static SDValue combineShuffleToAddSubOrFMAddSub(SDNode *N, const SDLoc &DL,
SDValue Opnd0, Opnd1;
bool IsSubAdd;
if (!isAddSubOrSubAdd(N, Subtarget, DAG, Opnd0, Opnd1, IsSubAdd))
bool HasAllowContract;
if (!isAddSubOrSubAdd(N, Subtarget, DAG, Opnd0, Opnd1, IsSubAdd,
HasAllowContract))
return SDValue();
MVT VT = N->getSimpleValueType(0);
// Try to generate X86ISD::FMADDSUB node here.
SDValue Opnd2;
if (isFMAddSubOrFMSubAdd(Subtarget, DAG, Opnd0, Opnd1, Opnd2, 2)) {
if (isFMAddSubOrFMSubAdd(Subtarget, DAG, Opnd0, Opnd1, Opnd2, 2,
HasAllowContract)) {
unsigned Opc = IsSubAdd ? X86ISD::FMSUBADD : X86ISD::FMADDSUB;
return DAG.getNode(Opc, DL, VT, Opnd0, Opnd1, Opnd2);
}

View File

@ -4,7 +4,7 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512vl | FileCheck %s --check-prefix=F16C
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512fp16,avx512vl | FileCheck %s --check-prefix=FP16
define <2 x half> @foo(<2 x half> %0) "unsafe-fp-math"="true" nounwind {
define <2 x half> @foo(<2 x half> %0) nounwind {
; AVX2-LABEL: foo:
; AVX2: # %bb.0:
; AVX2-NEXT: subq $40, %rsp

View File

@ -6,7 +6,7 @@
; This test checks the fusing of MUL + ADDSUB to FMADDSUB.
define <2 x double> @mul_addsub_pd128(<2 x double> %A, <2 x double> %B, <2 x double> %C) #0 {
define <2 x double> @mul_addsub_pd128(<2 x double> %A, <2 x double> %B, <2 x double> %C) {
; NOFMA-LABEL: mul_addsub_pd128:
; NOFMA: # %bb.0: # %entry
; NOFMA-NEXT: vmulpd %xmm1, %xmm0, %xmm0
@ -23,14 +23,14 @@ define <2 x double> @mul_addsub_pd128(<2 x double> %A, <2 x double> %B, <2 x do
; FMA4-NEXT: vfmaddsubpd {{.*#+}} xmm0 = (xmm0 * xmm1) +/- xmm2
; FMA4-NEXT: retq
entry:
%AB = fmul <2 x double> %A, %B
%Sub = fsub <2 x double> %AB, %C
%Add = fadd <2 x double> %AB, %C
%AB = fmul contract <2 x double> %A, %B
%Sub = fsub contract <2 x double> %AB, %C
%Add = fadd contract <2 x double> %AB, %C
%Addsub = shufflevector <2 x double> %Sub, <2 x double> %Add, <2 x i32> <i32 0, i32 3>
ret <2 x double> %Addsub
}
define <4 x float> @mul_addsub_ps128(<4 x float> %A, <4 x float> %B, <4 x float> %C) #0 {
define <4 x float> @mul_addsub_ps128(<4 x float> %A, <4 x float> %B, <4 x float> %C) {
; NOFMA-LABEL: mul_addsub_ps128:
; NOFMA: # %bb.0: # %entry
; NOFMA-NEXT: vmulps %xmm1, %xmm0, %xmm0
@ -47,14 +47,14 @@ define <4 x float> @mul_addsub_ps128(<4 x float> %A, <4 x float> %B, <4 x float>
; FMA4-NEXT: vfmaddsubps {{.*#+}} xmm0 = (xmm0 * xmm1) +/- xmm2
; FMA4-NEXT: retq
entry:
%AB = fmul <4 x float> %A, %B
%Sub = fsub <4 x float> %AB, %C
%Add = fadd <4 x float> %AB, %C
%AB = fmul contract <4 x float> %A, %B
%Sub = fsub contract <4 x float> %AB, %C
%Add = fadd contract <4 x float> %AB, %C
%Addsub = shufflevector <4 x float> %Sub, <4 x float> %Add, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
ret <4 x float> %Addsub
}
define <4 x double> @mul_addsub_pd256(<4 x double> %A, <4 x double> %B, <4 x double> %C) #0 {
define <4 x double> @mul_addsub_pd256(<4 x double> %A, <4 x double> %B, <4 x double> %C) {
; NOFMA-LABEL: mul_addsub_pd256:
; NOFMA: # %bb.0: # %entry
; NOFMA-NEXT: vmulpd %ymm1, %ymm0, %ymm0
@ -71,14 +71,14 @@ define <4 x double> @mul_addsub_pd256(<4 x double> %A, <4 x double> %B, <4 x dou
; FMA4-NEXT: vfmaddsubpd {{.*#+}} ymm0 = (ymm0 * ymm1) +/- ymm2
; FMA4-NEXT: retq
entry:
%AB = fmul <4 x double> %A, %B
%Sub = fsub <4 x double> %AB, %C
%Add = fadd <4 x double> %AB, %C
%AB = fmul contract <4 x double> %A, %B
%Sub = fsub contract <4 x double> %AB, %C
%Add = fadd contract <4 x double> %AB, %C
%Addsub = shufflevector <4 x double> %Sub, <4 x double> %Add, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
ret <4 x double> %Addsub
}
define <8 x float> @mul_addsub_ps256(<8 x float> %A, <8 x float> %B, <8 x float> %C) #0 {
define <8 x float> @mul_addsub_ps256(<8 x float> %A, <8 x float> %B, <8 x float> %C) {
; NOFMA-LABEL: mul_addsub_ps256:
; NOFMA: # %bb.0: # %entry
; NOFMA-NEXT: vmulps %ymm1, %ymm0, %ymm0
@ -95,14 +95,14 @@ define <8 x float> @mul_addsub_ps256(<8 x float> %A, <8 x float> %B, <8 x float>
; FMA4-NEXT: vfmaddsubps {{.*#+}} ymm0 = (ymm0 * ymm1) +/- ymm2
; FMA4-NEXT: retq
entry:
%AB = fmul <8 x float> %A, %B
%Sub = fsub <8 x float> %AB, %C
%Add = fadd <8 x float> %AB, %C
%AB = fmul contract <8 x float> %A, %B
%Sub = fsub contract <8 x float> %AB, %C
%Add = fadd contract <8 x float> %AB, %C
%Addsub = shufflevector <8 x float> %Sub, <8 x float> %Add, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
ret <8 x float> %Addsub
}
define <8 x double> @mul_addsub_pd512(<8 x double> %A, <8 x double> %B, <8 x double> %C) #0 {
define <8 x double> @mul_addsub_pd512(<8 x double> %A, <8 x double> %B, <8 x double> %C) {
; NOFMA-LABEL: mul_addsub_pd512:
; NOFMA: # %bb.0: # %entry
; NOFMA-NEXT: vmulpd %ymm3, %ymm1, %ymm1
@ -128,14 +128,14 @@ define <8 x double> @mul_addsub_pd512(<8 x double> %A, <8 x double> %B, <8 x dou
; FMA4-NEXT: vfmaddsubpd {{.*#+}} ymm1 = (ymm1 * ymm3) +/- ymm5
; FMA4-NEXT: retq
entry:
%AB = fmul <8 x double> %A, %B
%Sub = fsub <8 x double> %AB, %C
%Add = fadd <8 x double> %AB, %C
%AB = fmul contract <8 x double> %A, %B
%Sub = fsub contract <8 x double> %AB, %C
%Add = fadd contract <8 x double> %AB, %C
%Addsub = shufflevector <8 x double> %Sub, <8 x double> %Add, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
ret <8 x double> %Addsub
}
define <16 x float> @mul_addsub_ps512(<16 x float> %A, <16 x float> %B, <16 x float> %C) #0 {
define <16 x float> @mul_addsub_ps512(<16 x float> %A, <16 x float> %B, <16 x float> %C) {
; NOFMA-LABEL: mul_addsub_ps512:
; NOFMA: # %bb.0: # %entry
; NOFMA-NEXT: vmulps %ymm3, %ymm1, %ymm1
@ -161,14 +161,14 @@ define <16 x float> @mul_addsub_ps512(<16 x float> %A, <16 x float> %B, <16 x fl
; FMA4-NEXT: vfmaddsubps {{.*#+}} ymm1 = (ymm1 * ymm3) +/- ymm5
; FMA4-NEXT: retq
entry:
%AB = fmul <16 x float> %A, %B
%Sub = fsub <16 x float> %AB, %C
%Add = fadd <16 x float> %AB, %C
%AB = fmul contract <16 x float> %A, %B
%Sub = fsub contract <16 x float> %AB, %C
%Add = fadd contract <16 x float> %AB, %C
%Addsub = shufflevector <16 x float> %Sub, <16 x float> %Add, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31>
ret <16 x float> %Addsub
}
define <4 x float> @buildvector_mul_addsub_ps128(<4 x float> %C, <4 x float> %D, <4 x float> %B) #0 {
define <4 x float> @buildvector_mul_addsub_ps128(<4 x float> %C, <4 x float> %D, <4 x float> %B) {
; NOFMA-LABEL: buildvector_mul_addsub_ps128:
; NOFMA: # %bb.0: # %bb
; NOFMA-NEXT: vmulps %xmm1, %xmm0, %xmm0
@ -185,19 +185,19 @@ define <4 x float> @buildvector_mul_addsub_ps128(<4 x float> %C, <4 x float> %D,
; FMA4-NEXT: vfmaddsubps {{.*#+}} xmm0 = (xmm0 * xmm1) +/- xmm2
; FMA4-NEXT: retq
bb:
%A = fmul <4 x float> %C, %D
%A = fmul contract <4 x float> %C, %D
%A0 = extractelement <4 x float> %A, i32 0
%B0 = extractelement <4 x float> %B, i32 0
%sub0 = fsub float %A0, %B0
%sub0 = fsub contract float %A0, %B0
%A2 = extractelement <4 x float> %A, i32 2
%B2 = extractelement <4 x float> %B, i32 2
%sub2 = fsub float %A2, %B2
%sub2 = fsub contract float %A2, %B2
%A1 = extractelement <4 x float> %A, i32 1
%B1 = extractelement <4 x float> %B, i32 1
%add1 = fadd float %A1, %B1
%add1 = fadd contract float %A1, %B1
%A3 = extractelement <4 x float> %A, i32 3
%B3 = extractelement <4 x float> %B, i32 3
%add3 = fadd float %A3, %B3
%add3 = fadd contract float %A3, %B3
%vecinsert1 = insertelement <4 x float> undef, float %sub0, i32 0
%vecinsert2 = insertelement <4 x float> %vecinsert1, float %add1, i32 1
%vecinsert3 = insertelement <4 x float> %vecinsert2, float %sub2, i32 2
@ -205,7 +205,7 @@ bb:
ret <4 x float> %vecinsert4
}
define <2 x double> @buildvector_mul_addsub_pd128(<2 x double> %C, <2 x double> %D, <2 x double> %B) #0 {
define <2 x double> @buildvector_mul_addsub_pd128(<2 x double> %C, <2 x double> %D, <2 x double> %B) {
; NOFMA-LABEL: buildvector_mul_addsub_pd128:
; NOFMA: # %bb.0: # %bb
; NOFMA-NEXT: vmulpd %xmm1, %xmm0, %xmm0
@ -222,19 +222,19 @@ define <2 x double> @buildvector_mul_addsub_pd128(<2 x double> %C, <2 x double>
; FMA4-NEXT: vfmaddsubpd {{.*#+}} xmm0 = (xmm0 * xmm1) +/- xmm2
; FMA4-NEXT: retq
bb:
%A = fmul <2 x double> %C, %D
%A = fmul contract <2 x double> %C, %D
%A0 = extractelement <2 x double> %A, i32 0
%B0 = extractelement <2 x double> %B, i32 0
%sub0 = fsub double %A0, %B0
%sub0 = fsub contract double %A0, %B0
%A1 = extractelement <2 x double> %A, i32 1
%B1 = extractelement <2 x double> %B, i32 1
%add1 = fadd double %A1, %B1
%add1 = fadd contract double %A1, %B1
%vecinsert1 = insertelement <2 x double> undef, double %sub0, i32 0
%vecinsert2 = insertelement <2 x double> %vecinsert1, double %add1, i32 1
ret <2 x double> %vecinsert2
}
define <8 x float> @buildvector_mul_addsub_ps256(<8 x float> %C, <8 x float> %D, <8 x float> %B) #0 {
define <8 x float> @buildvector_mul_addsub_ps256(<8 x float> %C, <8 x float> %D, <8 x float> %B) {
; NOFMA-LABEL: buildvector_mul_addsub_ps256:
; NOFMA: # %bb.0: # %bb
; NOFMA-NEXT: vmulps %ymm1, %ymm0, %ymm0
@ -251,31 +251,31 @@ define <8 x float> @buildvector_mul_addsub_ps256(<8 x float> %C, <8 x float> %D,
; FMA4-NEXT: vfmaddsubps {{.*#+}} ymm0 = (ymm0 * ymm1) +/- ymm2
; FMA4-NEXT: retq
bb:
%A = fmul <8 x float> %C, %D
%A = fmul contract <8 x float> %C, %D
%A0 = extractelement <8 x float> %A, i32 0
%B0 = extractelement <8 x float> %B, i32 0
%sub0 = fsub float %A0, %B0
%sub0 = fsub contract float %A0, %B0
%A2 = extractelement <8 x float> %A, i32 2
%B2 = extractelement <8 x float> %B, i32 2
%sub2 = fsub float %A2, %B2
%sub2 = fsub contract float %A2, %B2
%A4 = extractelement <8 x float> %A, i32 4
%B4 = extractelement <8 x float> %B, i32 4
%sub4 = fsub float %A4, %B4
%sub4 = fsub contract float %A4, %B4
%A6 = extractelement <8 x float> %A, i32 6
%B6 = extractelement <8 x float> %B, i32 6
%sub6 = fsub float %A6, %B6
%sub6 = fsub contract float %A6, %B6
%A1 = extractelement <8 x float> %A, i32 1
%B1 = extractelement <8 x float> %B, i32 1
%add1 = fadd float %A1, %B1
%add1 = fadd contract float %A1, %B1
%A3 = extractelement <8 x float> %A, i32 3
%B3 = extractelement <8 x float> %B, i32 3
%add3 = fadd float %A3, %B3
%add3 = fadd contract float %A3, %B3
%A5 = extractelement <8 x float> %A, i32 5
%B5 = extractelement <8 x float> %B, i32 5
%add5 = fadd float %A5, %B5
%add5 = fadd contract float %A5, %B5
%A7 = extractelement <8 x float> %A, i32 7
%B7 = extractelement <8 x float> %B, i32 7
%add7 = fadd float %A7, %B7
%add7 = fadd contract float %A7, %B7
%vecinsert1 = insertelement <8 x float> undef, float %sub0, i32 0
%vecinsert2 = insertelement <8 x float> %vecinsert1, float %add1, i32 1
%vecinsert3 = insertelement <8 x float> %vecinsert2, float %sub2, i32 2
@ -287,7 +287,7 @@ bb:
ret <8 x float> %vecinsert8
}
define <4 x double> @buildvector_mul_addsub_pd256(<4 x double> %C, <4 x double> %D, <4 x double> %B) #0 {
define <4 x double> @buildvector_mul_addsub_pd256(<4 x double> %C, <4 x double> %D, <4 x double> %B) {
; NOFMA-LABEL: buildvector_mul_addsub_pd256:
; NOFMA: # %bb.0: # %bb
; NOFMA-NEXT: vmulpd %ymm1, %ymm0, %ymm0
@ -304,19 +304,19 @@ define <4 x double> @buildvector_mul_addsub_pd256(<4 x double> %C, <4 x double>
; FMA4-NEXT: vfmaddsubpd {{.*#+}} ymm0 = (ymm0 * ymm1) +/- ymm2
; FMA4-NEXT: retq
bb:
%A = fmul <4 x double> %C, %D
%A = fmul contract <4 x double> %C, %D
%A0 = extractelement <4 x double> %A, i32 0
%B0 = extractelement <4 x double> %B, i32 0
%sub0 = fsub double %A0, %B0
%sub0 = fsub contract double %A0, %B0
%A2 = extractelement <4 x double> %A, i32 2
%B2 = extractelement <4 x double> %B, i32 2
%sub2 = fsub double %A2, %B2
%sub2 = fsub contract double %A2, %B2
%A1 = extractelement <4 x double> %A, i32 1
%B1 = extractelement <4 x double> %B, i32 1
%add1 = fadd double %A1, %B1
%add1 = fadd contract double %A1, %B1
%A3 = extractelement <4 x double> %A, i32 3
%B3 = extractelement <4 x double> %B, i32 3
%add3 = fadd double %A3, %B3
%add3 = fadd contract double %A3, %B3
%vecinsert1 = insertelement <4 x double> undef, double %sub0, i32 0
%vecinsert2 = insertelement <4 x double> %vecinsert1, double %add1, i32 1
%vecinsert3 = insertelement <4 x double> %vecinsert2, double %sub2, i32 2
@ -324,7 +324,7 @@ bb:
ret <4 x double> %vecinsert4
}
define <16 x float> @buildvector_mul_addsub_ps512(<16 x float> %C, <16 x float> %D, <16 x float> %B) #0 {
define <16 x float> @buildvector_mul_addsub_ps512(<16 x float> %C, <16 x float> %D, <16 x float> %B) {
; NOFMA-LABEL: buildvector_mul_addsub_ps512:
; NOFMA: # %bb.0: # %bb
; NOFMA-NEXT: vmulps %ymm3, %ymm1, %ymm1
@ -350,55 +350,55 @@ define <16 x float> @buildvector_mul_addsub_ps512(<16 x float> %C, <16 x float>
; FMA4-NEXT: vfmaddsubps {{.*#+}} ymm1 = (ymm1 * ymm3) +/- ymm5
; FMA4-NEXT: retq
bb:
%A = fmul <16 x float> %C, %D
%A = fmul contract <16 x float> %C, %D
%A0 = extractelement <16 x float> %A, i32 0
%B0 = extractelement <16 x float> %B, i32 0
%sub0 = fsub float %A0, %B0
%sub0 = fsub contract float %A0, %B0
%A2 = extractelement <16 x float> %A, i32 2
%B2 = extractelement <16 x float> %B, i32 2
%sub2 = fsub float %A2, %B2
%sub2 = fsub contract float %A2, %B2
%A4 = extractelement <16 x float> %A, i32 4
%B4 = extractelement <16 x float> %B, i32 4
%sub4 = fsub float %A4, %B4
%sub4 = fsub contract float %A4, %B4
%A6 = extractelement <16 x float> %A, i32 6
%B6 = extractelement <16 x float> %B, i32 6
%sub6 = fsub float %A6, %B6
%sub6 = fsub contract float %A6, %B6
%A8 = extractelement <16 x float> %A, i32 8
%B8 = extractelement <16 x float> %B, i32 8
%sub8 = fsub float %A8, %B8
%sub8 = fsub contract float %A8, %B8
%A10 = extractelement <16 x float> %A, i32 10
%B10 = extractelement <16 x float> %B, i32 10
%sub10 = fsub float %A10, %B10
%sub10 = fsub contract float %A10, %B10
%A12 = extractelement <16 x float> %A, i32 12
%B12 = extractelement <16 x float> %B, i32 12
%sub12 = fsub float %A12, %B12
%sub12 = fsub contract float %A12, %B12
%A14 = extractelement <16 x float> %A, i32 14
%B14 = extractelement <16 x float> %B, i32 14
%sub14 = fsub float %A14, %B14
%sub14 = fsub contract float %A14, %B14
%A1 = extractelement <16 x float> %A, i32 1
%B1 = extractelement <16 x float> %B, i32 1
%add1 = fadd float %A1, %B1
%add1 = fadd contract float %A1, %B1
%A3 = extractelement <16 x float> %A, i32 3
%B3 = extractelement <16 x float> %B, i32 3
%add3 = fadd float %A3, %B3
%add3 = fadd contract float %A3, %B3
%A5 = extractelement <16 x float> %A, i32 5
%B5 = extractelement <16 x float> %B, i32 5
%add5 = fadd float %A5, %B5
%add5 = fadd contract float %A5, %B5
%A7 = extractelement <16 x float> %A, i32 7
%B7 = extractelement <16 x float> %B, i32 7
%add7 = fadd float %A7, %B7
%add7 = fadd contract float %A7, %B7
%A9 = extractelement <16 x float> %A, i32 9
%B9 = extractelement <16 x float> %B, i32 9
%add9 = fadd float %A9, %B9
%add9 = fadd contract float %A9, %B9
%A11 = extractelement <16 x float> %A, i32 11
%B11 = extractelement <16 x float> %B, i32 11
%add11 = fadd float %A11, %B11
%add11 = fadd contract float %A11, %B11
%A13 = extractelement <16 x float> %A, i32 13
%B13 = extractelement <16 x float> %B, i32 13
%add13 = fadd float %A13, %B13
%add13 = fadd contract float %A13, %B13
%A15 = extractelement <16 x float> %A, i32 15
%B15 = extractelement <16 x float> %B, i32 15
%add15 = fadd float %A15, %B15
%add15 = fadd contract float %A15, %B15
%vecinsert1 = insertelement <16 x float> undef, float %sub0, i32 0
%vecinsert2 = insertelement <16 x float> %vecinsert1, float %add1, i32 1
%vecinsert3 = insertelement <16 x float> %vecinsert2, float %sub2, i32 2
@ -418,7 +418,7 @@ bb:
ret <16 x float> %vecinsert16
}
define <8 x double> @buildvector_mul_addsub_pd512(<8 x double> %C, <8 x double> %D, <8 x double> %B) #0 {
define <8 x double> @buildvector_mul_addsub_pd512(<8 x double> %C, <8 x double> %D, <8 x double> %B) {
; NOFMA-LABEL: buildvector_mul_addsub_pd512:
; NOFMA: # %bb.0: # %bb
; NOFMA-NEXT: vmulpd %ymm3, %ymm1, %ymm1
@ -444,28 +444,28 @@ define <8 x double> @buildvector_mul_addsub_pd512(<8 x double> %C, <8 x double>
; FMA4-NEXT: vfmaddsubpd {{.*#+}} ymm1 = (ymm1 * ymm3) +/- ymm5
; FMA4-NEXT: retq
bb:
%A = fmul <8 x double> %C, %D
%A = fmul contract <8 x double> %C, %D
%A0 = extractelement <8 x double> %A, i32 0
%B0 = extractelement <8 x double> %B, i32 0
%sub0 = fsub double %A0, %B0
%sub0 = fsub contract double %A0, %B0
%A2 = extractelement <8 x double> %A, i32 2
%B2 = extractelement <8 x double> %B, i32 2
%sub2 = fsub double %A2, %B2
%sub2 = fsub contract double %A2, %B2
%A4 = extractelement <8 x double> %A, i32 4
%B4 = extractelement <8 x double> %B, i32 4
%sub4 = fsub double %A4, %B4
%sub4 = fsub contract double %A4, %B4
%A6 = extractelement <8 x double> %A, i32 6
%B6 = extractelement <8 x double> %B, i32 6
%sub6 = fsub double %A6, %B6
%sub6 = fsub contract double %A6, %B6
%A1 = extractelement <8 x double> %A, i32 1
%B1 = extractelement <8 x double> %B, i32 1
%add1 = fadd double %A1, %B1
%add1 = fadd contract double %A1, %B1
%A3 = extractelement <8 x double> %A, i32 3
%B3 = extractelement <8 x double> %B, i32 3
%add3 = fadd double %A3, %B3
%add3 = fadd contract double %A3, %B3
%A7 = extractelement <8 x double> %A, i32 7
%B7 = extractelement <8 x double> %B, i32 7
%add7 = fadd double %A7, %B7
%add7 = fadd contract double %A7, %B7
%vecinsert1 = insertelement <8 x double> undef, double %sub0, i32 0
%vecinsert2 = insertelement <8 x double> %vecinsert1, double %add1, i32 1
%vecinsert3 = insertelement <8 x double> %vecinsert2, double %sub2, i32 2
@ -477,7 +477,7 @@ bb:
ret <8 x double> %vecinsert8
}
define <4 x float> @buildvector_mul_subadd_ps128(<4 x float> %C, <4 x float> %D, <4 x float> %B) #0 {
define <4 x float> @buildvector_mul_subadd_ps128(<4 x float> %C, <4 x float> %D, <4 x float> %B) {
; NOFMA-LABEL: buildvector_mul_subadd_ps128:
; NOFMA: # %bb.0: # %bb
; NOFMA-NEXT: vmulps %xmm1, %xmm0, %xmm0
@ -506,19 +506,19 @@ define <4 x float> @buildvector_mul_subadd_ps128(<4 x float> %C, <4 x float> %D,
; FMA4-NEXT: vfmsubaddps {{.*#+}} xmm0 = (xmm0 * xmm1) -/+ xmm2
; FMA4-NEXT: retq
bb:
%A = fmul <4 x float> %C, %D
%A = fmul contract <4 x float> %C, %D
%A0 = extractelement <4 x float> %A, i32 0
%B0 = extractelement <4 x float> %B, i32 0
%sub0 = fadd float %A0, %B0
%sub0 = fadd contract float %A0, %B0
%A2 = extractelement <4 x float> %A, i32 2
%B2 = extractelement <4 x float> %B, i32 2
%sub2 = fadd float %A2, %B2
%sub2 = fadd contract float %A2, %B2
%A1 = extractelement <4 x float> %A, i32 1
%B1 = extractelement <4 x float> %B, i32 1
%add1 = fsub float %A1, %B1
%add1 = fsub contract float %A1, %B1
%A3 = extractelement <4 x float> %A, i32 3
%B3 = extractelement <4 x float> %B, i32 3
%add3 = fsub float %A3, %B3
%add3 = fsub contract float %A3, %B3
%vecinsert1 = insertelement <4 x float> undef, float %sub0, i32 0
%vecinsert2 = insertelement <4 x float> %vecinsert1, float %add1, i32 1
%vecinsert3 = insertelement <4 x float> %vecinsert2, float %sub2, i32 2
@ -526,7 +526,7 @@ bb:
ret <4 x float> %vecinsert4
}
define <2 x double> @buildvector_mul_subadd_pd128(<2 x double> %C, <2 x double> %D, <2 x double> %B) #0 {
define <2 x double> @buildvector_mul_subadd_pd128(<2 x double> %C, <2 x double> %D, <2 x double> %B) {
; NOFMA-LABEL: buildvector_mul_subadd_pd128:
; NOFMA: # %bb.0: # %bb
; NOFMA-NEXT: vmulpd %xmm1, %xmm0, %xmm0
@ -547,19 +547,19 @@ define <2 x double> @buildvector_mul_subadd_pd128(<2 x double> %C, <2 x double>
; FMA4-NEXT: vfmsubaddpd {{.*#+}} xmm0 = (xmm0 * xmm1) -/+ xmm2
; FMA4-NEXT: retq
bb:
%A = fmul <2 x double> %C, %D
%A = fmul contract <2 x double> %C, %D
%A0 = extractelement <2 x double> %A, i32 0
%B0 = extractelement <2 x double> %B, i32 0
%sub0 = fadd double %A0, %B0
%sub0 = fadd contract double %A0, %B0
%A1 = extractelement <2 x double> %A, i32 1
%B1 = extractelement <2 x double> %B, i32 1
%add1 = fsub double %A1, %B1
%add1 = fsub contract double %A1, %B1
%vecinsert1 = insertelement <2 x double> undef, double %sub0, i32 0
%vecinsert2 = insertelement <2 x double> %vecinsert1, double %add1, i32 1
ret <2 x double> %vecinsert2
}
define <8 x float> @buildvector_mul_subadd_ps256(<8 x float> %C, <8 x float> %D, <8 x float> %B) #0 {
define <8 x float> @buildvector_mul_subadd_ps256(<8 x float> %C, <8 x float> %D, <8 x float> %B) {
; NOFMA-LABEL: buildvector_mul_subadd_ps256:
; NOFMA: # %bb.0: # %bb
; NOFMA-NEXT: vmulps %ymm1, %ymm0, %ymm0
@ -604,31 +604,31 @@ define <8 x float> @buildvector_mul_subadd_ps256(<8 x float> %C, <8 x float> %D,
; FMA4-NEXT: vfmsubaddps {{.*#+}} ymm0 = (ymm0 * ymm1) -/+ ymm2
; FMA4-NEXT: retq
bb:
%A = fmul <8 x float> %C, %D
%A = fmul contract <8 x float> %C, %D
%A0 = extractelement <8 x float> %A, i32 0
%B0 = extractelement <8 x float> %B, i32 0
%sub0 = fadd float %A0, %B0
%sub0 = fadd contract float %A0, %B0
%A2 = extractelement <8 x float> %A, i32 2
%B2 = extractelement <8 x float> %B, i32 2
%sub2 = fadd float %A2, %B2
%sub2 = fadd contract float %A2, %B2
%A4 = extractelement <8 x float> %A, i32 4
%B4 = extractelement <8 x float> %B, i32 4
%sub4 = fadd float %A4, %B4
%sub4 = fadd contract float %A4, %B4
%A6 = extractelement <8 x float> %A, i32 6
%B6 = extractelement <8 x float> %B, i32 6
%sub6 = fadd float %A6, %B6
%sub6 = fadd contract float %A6, %B6
%A1 = extractelement <8 x float> %A, i32 1
%B1 = extractelement <8 x float> %B, i32 1
%add1 = fsub float %A1, %B1
%add1 = fsub contract float %A1, %B1
%A3 = extractelement <8 x float> %A, i32 3
%B3 = extractelement <8 x float> %B, i32 3
%add3 = fsub float %A3, %B3
%add3 = fsub contract float %A3, %B3
%A5 = extractelement <8 x float> %A, i32 5
%B5 = extractelement <8 x float> %B, i32 5
%add5 = fsub float %A5, %B5
%add5 = fsub contract float %A5, %B5
%A7 = extractelement <8 x float> %A, i32 7
%B7 = extractelement <8 x float> %B, i32 7
%add7 = fsub float %A7, %B7
%add7 = fsub contract float %A7, %B7
%vecinsert1 = insertelement <8 x float> undef, float %sub0, i32 0
%vecinsert2 = insertelement <8 x float> %vecinsert1, float %add1, i32 1
%vecinsert3 = insertelement <8 x float> %vecinsert2, float %sub2, i32 2
@ -640,7 +640,7 @@ bb:
ret <8 x float> %vecinsert8
}
define <4 x double> @buildvector_mul_subadd_pd256(<4 x double> %C, <4 x double> %D, <4 x double> %B) #0 {
define <4 x double> @buildvector_mul_subadd_pd256(<4 x double> %C, <4 x double> %D, <4 x double> %B) {
; NOFMA-LABEL: buildvector_mul_subadd_pd256:
; NOFMA: # %bb.0: # %bb
; NOFMA-NEXT: vmulpd %ymm1, %ymm0, %ymm0
@ -669,19 +669,19 @@ define <4 x double> @buildvector_mul_subadd_pd256(<4 x double> %C, <4 x double>
; FMA4-NEXT: vfmsubaddpd {{.*#+}} ymm0 = (ymm0 * ymm1) -/+ ymm2
; FMA4-NEXT: retq
bb:
%A = fmul <4 x double> %C, %D
%A = fmul contract <4 x double> %C, %D
%A0 = extractelement <4 x double> %A, i32 0
%B0 = extractelement <4 x double> %B, i32 0
%sub0 = fadd double %A0, %B0
%sub0 = fadd contract double %A0, %B0
%A2 = extractelement <4 x double> %A, i32 2
%B2 = extractelement <4 x double> %B, i32 2
%sub2 = fadd double %A2, %B2
%sub2 = fadd contract double %A2, %B2
%A1 = extractelement <4 x double> %A, i32 1
%B1 = extractelement <4 x double> %B, i32 1
%add1 = fsub double %A1, %B1
%add1 = fsub contract double %A1, %B1
%A3 = extractelement <4 x double> %A, i32 3
%B3 = extractelement <4 x double> %B, i32 3
%add3 = fsub double %A3, %B3
%add3 = fsub contract double %A3, %B3
%vecinsert1 = insertelement <4 x double> undef, double %sub0, i32 0
%vecinsert2 = insertelement <4 x double> %vecinsert1, double %add1, i32 1
%vecinsert3 = insertelement <4 x double> %vecinsert2, double %sub2, i32 2
@ -689,7 +689,7 @@ bb:
ret <4 x double> %vecinsert4
}
define <16 x float> @buildvector_mul_subadd_ps512(<16 x float> %C, <16 x float> %D, <16 x float> %B) #0 {
define <16 x float> @buildvector_mul_subadd_ps512(<16 x float> %C, <16 x float> %D, <16 x float> %B) {
; NOFMA-LABEL: buildvector_mul_subadd_ps512:
; NOFMA: # %bb.0: # %bb
; NOFMA-NEXT: vmulps %ymm3, %ymm1, %ymm1
@ -765,55 +765,55 @@ define <16 x float> @buildvector_mul_subadd_ps512(<16 x float> %C, <16 x float>
; FMA4-NEXT: vfmsubaddps {{.*#+}} ymm1 = (ymm1 * ymm3) -/+ ymm5
; FMA4-NEXT: retq
bb:
%A = fmul <16 x float> %C, %D
%A = fmul contract <16 x float> %C, %D
%A0 = extractelement <16 x float> %A, i32 0
%B0 = extractelement <16 x float> %B, i32 0
%sub0 = fadd float %A0, %B0
%sub0 = fadd contract float %A0, %B0
%A2 = extractelement <16 x float> %A, i32 2
%B2 = extractelement <16 x float> %B, i32 2
%sub2 = fadd float %A2, %B2
%sub2 = fadd contract float %A2, %B2
%A4 = extractelement <16 x float> %A, i32 4
%B4 = extractelement <16 x float> %B, i32 4
%sub4 = fadd float %A4, %B4
%sub4 = fadd contract float %A4, %B4
%A6 = extractelement <16 x float> %A, i32 6
%B6 = extractelement <16 x float> %B, i32 6
%sub6 = fadd float %A6, %B6
%sub6 = fadd contract float %A6, %B6
%A8 = extractelement <16 x float> %A, i32 8
%B8 = extractelement <16 x float> %B, i32 8
%sub8 = fadd float %A8, %B8
%sub8 = fadd contract float %A8, %B8
%A10 = extractelement <16 x float> %A, i32 10
%B10 = extractelement <16 x float> %B, i32 10
%sub10 = fadd float %A10, %B10
%sub10 = fadd contract float %A10, %B10
%A12 = extractelement <16 x float> %A, i32 12
%B12 = extractelement <16 x float> %B, i32 12
%sub12 = fadd float %A12, %B12
%sub12 = fadd contract float %A12, %B12
%A14 = extractelement <16 x float> %A, i32 14
%B14 = extractelement <16 x float> %B, i32 14
%sub14 = fadd float %A14, %B14
%sub14 = fadd contract float %A14, %B14
%A1 = extractelement <16 x float> %A, i32 1
%B1 = extractelement <16 x float> %B, i32 1
%add1 = fsub float %A1, %B1
%add1 = fsub contract float %A1, %B1
%A3 = extractelement <16 x float> %A, i32 3
%B3 = extractelement <16 x float> %B, i32 3
%add3 = fsub float %A3, %B3
%add3 = fsub contract float %A3, %B3
%A5 = extractelement <16 x float> %A, i32 5
%B5 = extractelement <16 x float> %B, i32 5
%add5 = fsub float %A5, %B5
%add5 = fsub contract float %A5, %B5
%A7 = extractelement <16 x float> %A, i32 7
%B7 = extractelement <16 x float> %B, i32 7
%add7 = fsub float %A7, %B7
%add7 = fsub contract float %A7, %B7
%A9 = extractelement <16 x float> %A, i32 9
%B9 = extractelement <16 x float> %B, i32 9
%add9 = fsub float %A9, %B9
%add9 = fsub contract float %A9, %B9
%A11 = extractelement <16 x float> %A, i32 11
%B11 = extractelement <16 x float> %B, i32 11
%add11 = fsub float %A11, %B11
%add11 = fsub contract float %A11, %B11
%A13 = extractelement <16 x float> %A, i32 13
%B13 = extractelement <16 x float> %B, i32 13
%add13 = fsub float %A13, %B13
%add13 = fsub contract float %A13, %B13
%A15 = extractelement <16 x float> %A, i32 15
%B15 = extractelement <16 x float> %B, i32 15
%add15 = fsub float %A15, %B15
%add15 = fsub contract float %A15, %B15
%vecinsert1 = insertelement <16 x float> undef, float %sub0, i32 0
%vecinsert2 = insertelement <16 x float> %vecinsert1, float %add1, i32 1
%vecinsert3 = insertelement <16 x float> %vecinsert2, float %sub2, i32 2
@ -833,7 +833,7 @@ bb:
ret <16 x float> %vecinsert16
}
define <8 x double> @buildvector_mul_subadd_pd512(<8 x double> %C, <8 x double> %D, <8 x double> %B) #0 {
define <8 x double> @buildvector_mul_subadd_pd512(<8 x double> %C, <8 x double> %D, <8 x double> %B) {
; NOFMA-LABEL: buildvector_mul_subadd_pd512:
; NOFMA: # %bb.0: # %bb
; NOFMA-NEXT: vmulpd %ymm3, %ymm1, %ymm1
@ -879,28 +879,28 @@ define <8 x double> @buildvector_mul_subadd_pd512(<8 x double> %C, <8 x double>
; FMA4-NEXT: vfmsubaddpd {{.*#+}} ymm1 = (ymm1 * ymm3) -/+ ymm5
; FMA4-NEXT: retq
bb:
%A = fmul <8 x double> %C, %D
%A = fmul contract <8 x double> %C, %D
%A0 = extractelement <8 x double> %A, i32 0
%B0 = extractelement <8 x double> %B, i32 0
%sub0 = fadd double %A0, %B0
%sub0 = fadd contract double %A0, %B0
%A2 = extractelement <8 x double> %A, i32 2
%B2 = extractelement <8 x double> %B, i32 2
%sub2 = fadd double %A2, %B2
%sub2 = fadd contract double %A2, %B2
%A4 = extractelement <8 x double> %A, i32 4
%B4 = extractelement <8 x double> %B, i32 4
%sub4 = fadd double %A4, %B4
%sub4 = fadd contract double %A4, %B4
%A6 = extractelement <8 x double> %A, i32 6
%B6 = extractelement <8 x double> %B, i32 6
%sub6 = fadd double %A6, %B6
%sub6 = fadd contract double %A6, %B6
%A1 = extractelement <8 x double> %A, i32 1
%B1 = extractelement <8 x double> %B, i32 1
%add1 = fsub double %A1, %B1
%add1 = fsub contract double %A1, %B1
%A3 = extractelement <8 x double> %A, i32 3
%B3 = extractelement <8 x double> %B, i32 3
%add3 = fsub double %A3, %B3
%add3 = fsub contract double %A3, %B3
%A7 = extractelement <8 x double> %A, i32 7
%B7 = extractelement <8 x double> %B, i32 7
%add7 = fsub double %A7, %B7
%add7 = fsub contract double %A7, %B7
%vecinsert1 = insertelement <8 x double> undef, double %sub0, i32 0
%vecinsert2 = insertelement <8 x double> %vecinsert1, double %add1, i32 1
%vecinsert3 = insertelement <8 x double> %vecinsert2, double %sub2, i32 2
@ -911,5 +911,3 @@ bb:
%vecinsert8 = insertelement <8 x double> %vecinsert7, double %add7, i32 7
ret <8 x double> %vecinsert8
}
attributes #0 = { nounwind "unsafe-fp-math"="true" }

View File

@ -6,7 +6,7 @@
; This test checks the fusing of MUL + SUB/ADD to FMSUBADD.
define <2 x double> @mul_subadd_pd128(<2 x double> %A, <2 x double> %B, <2 x double> %C) #0 {
define <2 x double> @mul_subadd_pd128(<2 x double> %A, <2 x double> %B, <2 x double> %C) {
; NOFMA-LABEL: mul_subadd_pd128:
; NOFMA: # %bb.0: # %entry
; NOFMA-NEXT: vmulpd %xmm1, %xmm0, %xmm0
@ -25,14 +25,14 @@ define <2 x double> @mul_subadd_pd128(<2 x double> %A, <2 x double> %B, <2 x dou
; FMA4-NEXT: vfmsubaddpd {{.*#+}} xmm0 = (xmm0 * xmm1) -/+ xmm2
; FMA4-NEXT: retq
entry:
%AB = fmul <2 x double> %A, %B
%Sub = fsub <2 x double> %AB, %C
%Add = fadd <2 x double> %AB, %C
%AB = fmul contract<2 x double> %A, %B
%Sub = fsub contract<2 x double> %AB, %C
%Add = fadd contract<2 x double> %AB, %C
%subadd = shufflevector <2 x double> %Add, <2 x double> %Sub, <2 x i32> <i32 0, i32 3>
ret <2 x double> %subadd
}
define <4 x float> @mul_subadd_ps128(<4 x float> %A, <4 x float> %B, <4 x float> %C) #0 {
define <4 x float> @mul_subadd_ps128(<4 x float> %A, <4 x float> %B, <4 x float> %C) {
; NOFMA-LABEL: mul_subadd_ps128:
; NOFMA: # %bb.0: # %entry
; NOFMA-NEXT: vmulps %xmm1, %xmm0, %xmm0
@ -51,14 +51,14 @@ define <4 x float> @mul_subadd_ps128(<4 x float> %A, <4 x float> %B, <4 x float>
; FMA4-NEXT: vfmsubaddps {{.*#+}} xmm0 = (xmm0 * xmm1) -/+ xmm2
; FMA4-NEXT: retq
entry:
%AB = fmul <4 x float> %A, %B
%Sub = fsub <4 x float> %AB, %C
%Add = fadd <4 x float> %AB, %C
%AB = fmul contract <4 x float> %A, %B
%Sub = fsub contract <4 x float> %AB, %C
%Add = fadd contract <4 x float> %AB, %C
%subadd = shufflevector <4 x float> %Add, <4 x float> %Sub, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
ret <4 x float> %subadd
}
define <4 x double> @mul_subadd_pd256(<4 x double> %A, <4 x double> %B, <4 x double> %C) #0 {
define <4 x double> @mul_subadd_pd256(<4 x double> %A, <4 x double> %B, <4 x double> %C) {
; NOFMA-LABEL: mul_subadd_pd256:
; NOFMA: # %bb.0: # %entry
; NOFMA-NEXT: vmulpd %ymm1, %ymm0, %ymm0
@ -77,14 +77,14 @@ define <4 x double> @mul_subadd_pd256(<4 x double> %A, <4 x double> %B, <4 x dou
; FMA4-NEXT: vfmsubaddpd {{.*#+}} ymm0 = (ymm0 * ymm1) -/+ ymm2
; FMA4-NEXT: retq
entry:
%AB = fmul <4 x double> %A, %B
%Sub = fsub <4 x double> %AB, %C
%Add = fadd <4 x double> %AB, %C
%AB = fmul contract <4 x double> %A, %B
%Sub = fsub contract <4 x double> %AB, %C
%Add = fadd contract <4 x double> %AB, %C
%subadd = shufflevector <4 x double> %Add, <4 x double> %Sub, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
ret <4 x double> %subadd
}
define <8 x float> @mul_subadd_ps256(<8 x float> %A, <8 x float> %B, <8 x float> %C) #0 {
define <8 x float> @mul_subadd_ps256(<8 x float> %A, <8 x float> %B, <8 x float> %C) {
; NOFMA-LABEL: mul_subadd_ps256:
; NOFMA: # %bb.0: # %entry
; NOFMA-NEXT: vmulps %ymm1, %ymm0, %ymm0
@ -103,14 +103,14 @@ define <8 x float> @mul_subadd_ps256(<8 x float> %A, <8 x float> %B, <8 x float>
; FMA4-NEXT: vfmsubaddps {{.*#+}} ymm0 = (ymm0 * ymm1) -/+ ymm2
; FMA4-NEXT: retq
entry:
%AB = fmul <8 x float> %A, %B
%Sub = fsub <8 x float> %AB, %C
%Add = fadd <8 x float> %AB, %C
%AB = fmul contract <8 x float> %A, %B
%Sub = fsub contract <8 x float> %AB, %C
%Add = fadd contract <8 x float> %AB, %C
%subadd = shufflevector <8 x float> %Add, <8 x float> %Sub, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
ret <8 x float> %subadd
}
define <8 x double> @mul_subadd_pd512(<8 x double> %A, <8 x double> %B, <8 x double> %C) #0 {
define <8 x double> @mul_subadd_pd512(<8 x double> %A, <8 x double> %B, <8 x double> %C) {
; NOFMA-LABEL: mul_subadd_pd512:
; NOFMA: # %bb.0: # %entry
; NOFMA-NEXT: vmulpd %ymm2, %ymm0, %ymm0
@ -140,14 +140,14 @@ define <8 x double> @mul_subadd_pd512(<8 x double> %A, <8 x double> %B, <8 x dou
; FMA4-NEXT: vfmsubaddpd {{.*#+}} ymm1 = (ymm1 * ymm3) -/+ ymm5
; FMA4-NEXT: retq
entry:
%AB = fmul <8 x double> %A, %B
%Sub = fsub <8 x double> %AB, %C
%Add = fadd <8 x double> %AB, %C
%AB = fmul contract <8 x double> %A, %B
%Sub = fsub contract <8 x double> %AB, %C
%Add = fadd contract <8 x double> %AB, %C
%subadd = shufflevector <8 x double> %Add, <8 x double> %Sub, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
ret <8 x double> %subadd
}
define <16 x float> @mul_subadd_ps512(<16 x float> %A, <16 x float> %B, <16 x float> %C) #0 {
define <16 x float> @mul_subadd_ps512(<16 x float> %A, <16 x float> %B, <16 x float> %C) {
; NOFMA-LABEL: mul_subadd_ps512:
; NOFMA: # %bb.0: # %entry
; NOFMA-NEXT: vmulps %ymm2, %ymm0, %ymm0
@ -177,15 +177,15 @@ define <16 x float> @mul_subadd_ps512(<16 x float> %A, <16 x float> %B, <16 x fl
; FMA4-NEXT: vfmsubaddps {{.*#+}} ymm1 = (ymm1 * ymm3) -/+ ymm5
; FMA4-NEXT: retq
entry:
%AB = fmul <16 x float> %A, %B
%Sub = fsub <16 x float> %AB, %C
%Add = fadd <16 x float> %AB, %C
%AB = fmul contract <16 x float> %A, %B
%Sub = fsub contract <16 x float> %AB, %C
%Add = fadd contract <16 x float> %AB, %C
%subadd = shufflevector <16 x float> %Add, <16 x float> %Sub, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31>
ret <16 x float> %subadd
}
; This should not be matched to fmsubadd because the mul is on the wrong side of the fsub.
define <2 x double> @mul_subadd_bad_commute(<2 x double> %A, <2 x double> %B, <2 x double> %C) #0 {
define <2 x double> @mul_subadd_bad_commute(<2 x double> %A, <2 x double> %B, <2 x double> %C) {
; CHECK-LABEL: mul_subadd_bad_commute:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vmulpd %xmm1, %xmm0, %xmm0
@ -194,11 +194,9 @@ define <2 x double> @mul_subadd_bad_commute(<2 x double> %A, <2 x double> %B, <2
; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
; CHECK-NEXT: retq
entry:
%AB = fmul <2 x double> %A, %B
%AB = fmul contract <2 x double> %A, %B
%Sub = fsub <2 x double> %C, %AB
%Add = fadd <2 x double> %AB, %C
%subadd = shufflevector <2 x double> %Add, <2 x double> %Sub, <2 x i32> <i32 0, i32 3>
ret <2 x double> %subadd
}
attributes #0 = { nounwind "unsafe-fp-math"="true" }