fix llvm.fma.f16 double rounding issue when there is no native support (#171904)
fixes https://github.com/llvm/llvm-project/issues/98389 As the issue describes, promoting `llvm.fma.f16` to `llvm.fma.f32` does not work, because there is not enough precision to handle the repeated rounding. `f64` does have sufficient space. So this PR explicitly promotes the 16-bit fma to a 64-bit fma. I could not find examples of a libcall being used for fma, but that's something that could be looked in separately to work around code size issues.
This commit is contained in:
parent
558760009c
commit
a587ccd87d
@ -3510,6 +3510,7 @@ SDValue DAGTypeLegalizer::SoftPromoteHalfRes_FMAD(SDNode *N) {
|
||||
SDValue Op0 = GetSoftPromotedHalf(N->getOperand(0));
|
||||
SDValue Op1 = GetSoftPromotedHalf(N->getOperand(1));
|
||||
SDValue Op2 = GetSoftPromotedHalf(N->getOperand(2));
|
||||
SDNodeFlags Flags = N->getFlags();
|
||||
SDLoc dl(N);
|
||||
|
||||
// Promote to the larger FP type.
|
||||
@ -3518,9 +3519,28 @@ SDValue DAGTypeLegalizer::SoftPromoteHalfRes_FMAD(SDNode *N) {
|
||||
Op1 = DAG.getNode(PromotionOpcode, dl, NVT, Op1);
|
||||
Op2 = DAG.getNode(PromotionOpcode, dl, NVT, Op2);
|
||||
|
||||
SDValue Res = DAG.getNode(N->getOpcode(), dl, NVT, Op0, Op1, Op2);
|
||||
SDValue Res;
|
||||
if (OVT == MVT::f16) {
|
||||
// If f16 fma is not natively supported, the value must be promoted to an
|
||||
// f64 (and not to f32!) to prevent double rounding issues.
|
||||
SDValue A64 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Op0, Flags);
|
||||
SDValue B64 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Op1, Flags);
|
||||
SDValue C64 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Op2, Flags);
|
||||
|
||||
// Convert back to FP16 as an integer.
|
||||
// Prefer a wide FMA node if available; otherwise expand to mul+add.
|
||||
SDValue WideRes;
|
||||
if (TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), MVT::f64)) {
|
||||
WideRes = DAG.getNode(ISD::FMA, dl, MVT::f64, A64, B64, C64, Flags);
|
||||
} else {
|
||||
SDValue Mul = DAG.getNode(ISD::FMUL, dl, MVT::f64, A64, B64, Flags);
|
||||
WideRes = DAG.getNode(ISD::FADD, dl, MVT::f64, Mul, C64, Flags);
|
||||
}
|
||||
|
||||
return DAG.getNode(GetPromotionOpcode(MVT::f64, OVT), dl, MVT::i16,
|
||||
WideRes);
|
||||
}
|
||||
|
||||
Res = DAG.getNode(N->getOpcode(), dl, NVT, Op0, Op1, Op2, Flags);
|
||||
return DAG.getNode(GetPromotionOpcode(NVT, OVT), dl, MVT::i16, Res);
|
||||
}
|
||||
|
||||
|
||||
@ -1041,6 +1041,11 @@ void TargetLoweringBase::initActions() {
|
||||
}
|
||||
}
|
||||
|
||||
// If f16 fma is not natively supported, the value must be promoted to an f64
|
||||
// (and not to f32!) to prevent double rounding issues.
|
||||
AddPromotedToType(ISD::FMA, MVT::f16, MVT::f64);
|
||||
AddPromotedToType(ISD::STRICT_FMA, MVT::f16, MVT::f64);
|
||||
|
||||
// Set default actions for various operations.
|
||||
for (MVT VT : MVT::all_valuetypes()) {
|
||||
// Default all indexed load / store to expand.
|
||||
|
||||
@ -570,6 +570,15 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
|
||||
setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP}, XLenVT, Custom);
|
||||
}
|
||||
|
||||
if (!Subtarget.hasStdExtD()) {
|
||||
// FIXME: handle f16 fma when f64 is not legal. Using an f32 fma
|
||||
// instruction runs into double rounding issues, so this is wrong.
|
||||
// Normally we'd use an f64 fma, but without the D extension the f64 type
|
||||
// is not legal. This should probably be a libcall.
|
||||
AddPromotedToType(ISD::FMA, MVT::f16, MVT::f32);
|
||||
AddPromotedToType(ISD::STRICT_FMA, MVT::f16, MVT::f32);
|
||||
}
|
||||
|
||||
setOperationAction(ISD::BITCAST, MVT::i16, Custom);
|
||||
|
||||
setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Legal);
|
||||
|
||||
@ -1378,11 +1378,11 @@ define half @test_log2(half %a) #0 {
|
||||
define half @test_fma(half %a, half %b, half %c) #0 {
|
||||
; CHECK-CVT-SD-LABEL: test_fma:
|
||||
; CHECK-CVT-SD: // %bb.0:
|
||||
; CHECK-CVT-SD-NEXT: fcvt s2, h2
|
||||
; CHECK-CVT-SD-NEXT: fcvt s1, h1
|
||||
; CHECK-CVT-SD-NEXT: fcvt s0, h0
|
||||
; CHECK-CVT-SD-NEXT: fmadd s0, s0, s1, s2
|
||||
; CHECK-CVT-SD-NEXT: fcvt h0, s0
|
||||
; CHECK-CVT-SD-NEXT: fcvt d2, h2
|
||||
; CHECK-CVT-SD-NEXT: fcvt d1, h1
|
||||
; CHECK-CVT-SD-NEXT: fcvt d0, h0
|
||||
; CHECK-CVT-SD-NEXT: fmadd d0, d0, d1, d2
|
||||
; CHECK-CVT-SD-NEXT: fcvt h0, d0
|
||||
; CHECK-CVT-SD-NEXT: ret
|
||||
;
|
||||
; CHECK-FP16-LABEL: test_fma:
|
||||
|
||||
@ -27,11 +27,11 @@ entry:
|
||||
define half @fma_f16(half %a, half %b, half %c) {
|
||||
; CHECK-SD-NOFP16-LABEL: fma_f16:
|
||||
; CHECK-SD-NOFP16: // %bb.0: // %entry
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt s2, h2
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt s1, h1
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt s0, h0
|
||||
; CHECK-SD-NOFP16-NEXT: fmadd s0, s0, s1, s2
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt h0, s0
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt d2, h2
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt d1, h1
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt d0, h0
|
||||
; CHECK-SD-NOFP16-NEXT: fmadd d0, d0, d1, d2
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt h0, d0
|
||||
; CHECK-SD-NOFP16-NEXT: ret
|
||||
;
|
||||
; CHECK-SD-FP16-LABEL: fma_f16:
|
||||
@ -178,69 +178,69 @@ define <7 x half> @fma_v7f16(<7 x half> %a, <7 x half> %b, <7 x half> %c) {
|
||||
; CHECK-SD-NOFP16-NEXT: mov h3, v2.h[1]
|
||||
; CHECK-SD-NOFP16-NEXT: mov h4, v1.h[1]
|
||||
; CHECK-SD-NOFP16-NEXT: mov h5, v0.h[1]
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt s6, h2
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt s7, h1
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt s16, h0
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt d6, h2
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt d7, h1
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt d16, h0
|
||||
; CHECK-SD-NOFP16-NEXT: mov h17, v2.h[2]
|
||||
; CHECK-SD-NOFP16-NEXT: mov h18, v1.h[2]
|
||||
; CHECK-SD-NOFP16-NEXT: mov h19, v0.h[2]
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt s3, h3
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt s4, h4
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt s5, h5
|
||||
; CHECK-SD-NOFP16-NEXT: fmadd s6, s16, s7, s6
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt s7, h17
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt s16, h18
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt s17, h19
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt d3, h3
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt d4, h4
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt d5, h5
|
||||
; CHECK-SD-NOFP16-NEXT: fmadd d6, d16, d7, d6
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt d7, h17
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt d16, h18
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt d17, h19
|
||||
; CHECK-SD-NOFP16-NEXT: mov h18, v1.h[3]
|
||||
; CHECK-SD-NOFP16-NEXT: mov h19, v0.h[3]
|
||||
; CHECK-SD-NOFP16-NEXT: fmadd s4, s5, s4, s3
|
||||
; CHECK-SD-NOFP16-NEXT: fmadd d4, d5, d4, d3
|
||||
; CHECK-SD-NOFP16-NEXT: mov h5, v2.h[3]
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt h3, s6
|
||||
; CHECK-SD-NOFP16-NEXT: fmadd s6, s17, s16, s7
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt h3, d6
|
||||
; CHECK-SD-NOFP16-NEXT: fmadd d6, d17, d16, d7
|
||||
; CHECK-SD-NOFP16-NEXT: mov h17, v2.h[4]
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt s7, h18
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt s16, h19
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt d7, h18
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt d16, h19
|
||||
; CHECK-SD-NOFP16-NEXT: mov h18, v1.h[4]
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt h4, s4
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt s5, h5
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt h4, d4
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt d5, h5
|
||||
; CHECK-SD-NOFP16-NEXT: mov h19, v0.h[4]
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt h6, s6
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt s17, h17
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt s18, h18
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt h6, d6
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt d17, h17
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt d18, h18
|
||||
; CHECK-SD-NOFP16-NEXT: mov v3.h[1], v4.h[0]
|
||||
; CHECK-SD-NOFP16-NEXT: mov h4, v2.h[5]
|
||||
; CHECK-SD-NOFP16-NEXT: fmadd s5, s16, s7, s5
|
||||
; CHECK-SD-NOFP16-NEXT: fmadd d5, d16, d7, d5
|
||||
; CHECK-SD-NOFP16-NEXT: mov h7, v1.h[5]
|
||||
; CHECK-SD-NOFP16-NEXT: mov h16, v0.h[5]
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt s19, h19
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt d19, h19
|
||||
; CHECK-SD-NOFP16-NEXT: mov v3.h[2], v6.h[0]
|
||||
; CHECK-SD-NOFP16-NEXT: mov h6, v2.h[6]
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt s4, h4
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt s7, h7
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt s16, h16
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt h5, s5
|
||||
; CHECK-SD-NOFP16-NEXT: fmadd s17, s19, s18, s17
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt d4, h4
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt d7, h7
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt d16, h16
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt h5, d5
|
||||
; CHECK-SD-NOFP16-NEXT: fmadd d17, d19, d18, d17
|
||||
; CHECK-SD-NOFP16-NEXT: mov h18, v1.h[6]
|
||||
; CHECK-SD-NOFP16-NEXT: mov h19, v0.h[6]
|
||||
; CHECK-SD-NOFP16-NEXT: mov h2, v2.h[7]
|
||||
; CHECK-SD-NOFP16-NEXT: mov h1, v1.h[7]
|
||||
; CHECK-SD-NOFP16-NEXT: mov h0, v0.h[7]
|
||||
; CHECK-SD-NOFP16-NEXT: fmadd s4, s16, s7, s4
|
||||
; CHECK-SD-NOFP16-NEXT: fmadd d4, d16, d7, d4
|
||||
; CHECK-SD-NOFP16-NEXT: mov v3.h[3], v5.h[0]
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt s5, h6
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt s6, h18
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt s7, h19
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt h16, s17
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt s2, h2
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt s1, h1
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt s0, h0
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt h4, s4
|
||||
; CHECK-SD-NOFP16-NEXT: fmadd s5, s7, s6, s5
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt d5, h6
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt d6, h18
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt d7, h19
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt h16, d17
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt d2, h2
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt d1, h1
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt d0, h0
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt h4, d4
|
||||
; CHECK-SD-NOFP16-NEXT: fmadd d5, d7, d6, d5
|
||||
; CHECK-SD-NOFP16-NEXT: mov v3.h[4], v16.h[0]
|
||||
; CHECK-SD-NOFP16-NEXT: fmadd s0, s0, s1, s2
|
||||
; CHECK-SD-NOFP16-NEXT: fmadd d0, d0, d1, d2
|
||||
; CHECK-SD-NOFP16-NEXT: mov v3.h[5], v4.h[0]
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt h4, s5
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt h0, s0
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt h4, d5
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt h0, d0
|
||||
; CHECK-SD-NOFP16-NEXT: mov v3.h[6], v4.h[0]
|
||||
; CHECK-SD-NOFP16-NEXT: mov v3.h[7], v0.h[0]
|
||||
; CHECK-SD-NOFP16-NEXT: mov v0.16b, v3.16b
|
||||
@ -301,34 +301,34 @@ define <4 x half> @fma_v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %c) {
|
||||
; CHECK-SD-NOFP16-NEXT: mov h3, v2.h[1]
|
||||
; CHECK-SD-NOFP16-NEXT: mov h4, v1.h[1]
|
||||
; CHECK-SD-NOFP16-NEXT: mov h5, v0.h[1]
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt s6, h2
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt s7, h1
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt s16, h0
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt d6, h2
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt d7, h1
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt d16, h0
|
||||
; CHECK-SD-NOFP16-NEXT: mov h17, v2.h[2]
|
||||
; CHECK-SD-NOFP16-NEXT: mov h18, v1.h[2]
|
||||
; CHECK-SD-NOFP16-NEXT: mov h19, v0.h[2]
|
||||
; CHECK-SD-NOFP16-NEXT: mov h2, v2.h[3]
|
||||
; CHECK-SD-NOFP16-NEXT: mov h1, v1.h[3]
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt s3, h3
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt s4, h4
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt s5, h5
|
||||
; CHECK-SD-NOFP16-NEXT: fmadd s6, s16, s7, s6
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt d3, h3
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt d4, h4
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt d5, h5
|
||||
; CHECK-SD-NOFP16-NEXT: fmadd d6, d16, d7, d6
|
||||
; CHECK-SD-NOFP16-NEXT: mov h16, v0.h[3]
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt s7, h19
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt s2, h2
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt s1, h1
|
||||
; CHECK-SD-NOFP16-NEXT: fmadd s3, s5, s4, s3
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt s4, h17
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt s5, h18
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt h0, s6
|
||||
; CHECK-SD-NOFP16-NEXT: fmadd s4, s7, s5, s4
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt h3, s3
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt s5, h16
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt d7, h19
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt d2, h2
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt d1, h1
|
||||
; CHECK-SD-NOFP16-NEXT: fmadd d3, d5, d4, d3
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt d4, h17
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt d5, h18
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt h0, d6
|
||||
; CHECK-SD-NOFP16-NEXT: fmadd d4, d7, d5, d4
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt h3, d3
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt d5, h16
|
||||
; CHECK-SD-NOFP16-NEXT: mov v0.h[1], v3.h[0]
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt h3, s4
|
||||
; CHECK-SD-NOFP16-NEXT: fmadd s1, s5, s1, s2
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt h3, d4
|
||||
; CHECK-SD-NOFP16-NEXT: fmadd d1, d5, d1, d2
|
||||
; CHECK-SD-NOFP16-NEXT: mov v0.h[2], v3.h[0]
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt h1, s1
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt h1, d1
|
||||
; CHECK-SD-NOFP16-NEXT: mov v0.h[3], v1.h[0]
|
||||
; CHECK-SD-NOFP16-NEXT: // kill: def $d0 killed $d0 killed $q0
|
||||
; CHECK-SD-NOFP16-NEXT: ret
|
||||
@ -364,69 +364,69 @@ define <8 x half> @fma_v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
|
||||
; CHECK-SD-NOFP16-NEXT: mov h3, v2.h[1]
|
||||
; CHECK-SD-NOFP16-NEXT: mov h4, v1.h[1]
|
||||
; CHECK-SD-NOFP16-NEXT: mov h5, v0.h[1]
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt s6, h2
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt s7, h1
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt s16, h0
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt d6, h2
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt d7, h1
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt d16, h0
|
||||
; CHECK-SD-NOFP16-NEXT: mov h17, v2.h[2]
|
||||
; CHECK-SD-NOFP16-NEXT: mov h18, v1.h[2]
|
||||
; CHECK-SD-NOFP16-NEXT: mov h19, v0.h[2]
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt s3, h3
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt s4, h4
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt s5, h5
|
||||
; CHECK-SD-NOFP16-NEXT: fmadd s6, s16, s7, s6
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt s7, h17
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt s16, h18
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt s17, h19
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt d3, h3
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt d4, h4
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt d5, h5
|
||||
; CHECK-SD-NOFP16-NEXT: fmadd d6, d16, d7, d6
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt d7, h17
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt d16, h18
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt d17, h19
|
||||
; CHECK-SD-NOFP16-NEXT: mov h18, v1.h[3]
|
||||
; CHECK-SD-NOFP16-NEXT: mov h19, v0.h[3]
|
||||
; CHECK-SD-NOFP16-NEXT: fmadd s4, s5, s4, s3
|
||||
; CHECK-SD-NOFP16-NEXT: fmadd d4, d5, d4, d3
|
||||
; CHECK-SD-NOFP16-NEXT: mov h5, v2.h[3]
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt h3, s6
|
||||
; CHECK-SD-NOFP16-NEXT: fmadd s6, s17, s16, s7
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt h3, d6
|
||||
; CHECK-SD-NOFP16-NEXT: fmadd d6, d17, d16, d7
|
||||
; CHECK-SD-NOFP16-NEXT: mov h17, v2.h[4]
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt s7, h18
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt s16, h19
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt d7, h18
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt d16, h19
|
||||
; CHECK-SD-NOFP16-NEXT: mov h18, v1.h[4]
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt h4, s4
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt s5, h5
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt h4, d4
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt d5, h5
|
||||
; CHECK-SD-NOFP16-NEXT: mov h19, v0.h[4]
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt h6, s6
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt s17, h17
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt s18, h18
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt h6, d6
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt d17, h17
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt d18, h18
|
||||
; CHECK-SD-NOFP16-NEXT: mov v3.h[1], v4.h[0]
|
||||
; CHECK-SD-NOFP16-NEXT: mov h4, v2.h[5]
|
||||
; CHECK-SD-NOFP16-NEXT: fmadd s5, s16, s7, s5
|
||||
; CHECK-SD-NOFP16-NEXT: fmadd d5, d16, d7, d5
|
||||
; CHECK-SD-NOFP16-NEXT: mov h7, v1.h[5]
|
||||
; CHECK-SD-NOFP16-NEXT: mov h16, v0.h[5]
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt s19, h19
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt d19, h19
|
||||
; CHECK-SD-NOFP16-NEXT: mov v3.h[2], v6.h[0]
|
||||
; CHECK-SD-NOFP16-NEXT: mov h6, v2.h[6]
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt s4, h4
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt s7, h7
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt s16, h16
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt h5, s5
|
||||
; CHECK-SD-NOFP16-NEXT: fmadd s17, s19, s18, s17
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt d4, h4
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt d7, h7
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt d16, h16
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt h5, d5
|
||||
; CHECK-SD-NOFP16-NEXT: fmadd d17, d19, d18, d17
|
||||
; CHECK-SD-NOFP16-NEXT: mov h18, v1.h[6]
|
||||
; CHECK-SD-NOFP16-NEXT: mov h19, v0.h[6]
|
||||
; CHECK-SD-NOFP16-NEXT: mov h2, v2.h[7]
|
||||
; CHECK-SD-NOFP16-NEXT: mov h1, v1.h[7]
|
||||
; CHECK-SD-NOFP16-NEXT: mov h0, v0.h[7]
|
||||
; CHECK-SD-NOFP16-NEXT: fmadd s4, s16, s7, s4
|
||||
; CHECK-SD-NOFP16-NEXT: fmadd d4, d16, d7, d4
|
||||
; CHECK-SD-NOFP16-NEXT: mov v3.h[3], v5.h[0]
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt s5, h6
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt s6, h18
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt s7, h19
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt h16, s17
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt s2, h2
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt s1, h1
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt s0, h0
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt h4, s4
|
||||
; CHECK-SD-NOFP16-NEXT: fmadd s5, s7, s6, s5
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt d5, h6
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt d6, h18
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt d7, h19
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt h16, d17
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt d2, h2
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt d1, h1
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt d0, h0
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt h4, d4
|
||||
; CHECK-SD-NOFP16-NEXT: fmadd d5, d7, d6, d5
|
||||
; CHECK-SD-NOFP16-NEXT: mov v3.h[4], v16.h[0]
|
||||
; CHECK-SD-NOFP16-NEXT: fmadd s0, s0, s1, s2
|
||||
; CHECK-SD-NOFP16-NEXT: fmadd d0, d0, d1, d2
|
||||
; CHECK-SD-NOFP16-NEXT: mov v3.h[5], v4.h[0]
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt h4, s5
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt h0, s0
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt h4, d5
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt h0, d0
|
||||
; CHECK-SD-NOFP16-NEXT: mov v3.h[6], v4.h[0]
|
||||
; CHECK-SD-NOFP16-NEXT: mov v3.h[7], v0.h[0]
|
||||
; CHECK-SD-NOFP16-NEXT: mov v0.16b, v3.16b
|
||||
@ -468,136 +468,136 @@ define <16 x half> @fma_v16f16(<16 x half> %a, <16 x half> %b, <16 x half> %c) {
|
||||
; CHECK-SD-NOFP16-NEXT: mov h6, v4.h[1]
|
||||
; CHECK-SD-NOFP16-NEXT: mov h7, v2.h[1]
|
||||
; CHECK-SD-NOFP16-NEXT: mov h16, v0.h[1]
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt s17, h4
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt s18, h2
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt s19, h0
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt d17, h4
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt d18, h2
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt d19, h0
|
||||
; CHECK-SD-NOFP16-NEXT: mov h20, v4.h[2]
|
||||
; CHECK-SD-NOFP16-NEXT: mov h21, v2.h[2]
|
||||
; CHECK-SD-NOFP16-NEXT: mov h22, v0.h[2]
|
||||
; CHECK-SD-NOFP16-NEXT: mov h23, v4.h[3]
|
||||
; CHECK-SD-NOFP16-NEXT: mov h24, v2.h[3]
|
||||
; CHECK-SD-NOFP16-NEXT: mov h25, v0.h[3]
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt s6, h6
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt s7, h7
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt s16, h16
|
||||
; CHECK-SD-NOFP16-NEXT: fmadd s17, s19, s18, s17
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt d6, h6
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt d7, h7
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt d16, h16
|
||||
; CHECK-SD-NOFP16-NEXT: fmadd d17, d19, d18, d17
|
||||
; CHECK-SD-NOFP16-NEXT: mov h26, v1.h[1]
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt s27, h5
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt s18, h20
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt s19, h21
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt s20, h22
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt s21, h23
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt s22, h24
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt s23, h25
|
||||
; CHECK-SD-NOFP16-NEXT: fmadd s7, s16, s7, s6
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt d27, h5
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt d18, h20
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt d19, h21
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt d20, h22
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt d21, h23
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt d22, h24
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt d23, h25
|
||||
; CHECK-SD-NOFP16-NEXT: fmadd d7, d16, d7, d6
|
||||
; CHECK-SD-NOFP16-NEXT: mov h24, v5.h[1]
|
||||
; CHECK-SD-NOFP16-NEXT: mov h25, v3.h[1]
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt h6, s17
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt s28, h3
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt s29, h1
|
||||
; CHECK-SD-NOFP16-NEXT: fmadd s19, s20, s19, s18
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt s26, h26
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt h6, d17
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt d28, h3
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt d29, h1
|
||||
; CHECK-SD-NOFP16-NEXT: fmadd d19, d20, d19, d18
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt d26, h26
|
||||
; CHECK-SD-NOFP16-NEXT: mov h16, v4.h[4]
|
||||
; CHECK-SD-NOFP16-NEXT: fmadd s21, s23, s22, s21
|
||||
; CHECK-SD-NOFP16-NEXT: fmadd d21, d23, d22, d21
|
||||
; CHECK-SD-NOFP16-NEXT: mov h22, v3.h[2]
|
||||
; CHECK-SD-NOFP16-NEXT: mov h23, v1.h[2]
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt h20, s7
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt s24, h24
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt s25, h25
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt h20, d7
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt d24, h24
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt d25, h25
|
||||
; CHECK-SD-NOFP16-NEXT: mov h17, v2.h[4]
|
||||
; CHECK-SD-NOFP16-NEXT: mov h18, v0.h[4]
|
||||
; CHECK-SD-NOFP16-NEXT: mov h7, v4.h[5]
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt h19, s19
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt h19, d19
|
||||
; CHECK-SD-NOFP16-NEXT: mov h30, v2.h[5]
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt s16, h16
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt h21, s21
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt d16, h16
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt h21, d21
|
||||
; CHECK-SD-NOFP16-NEXT: mov h31, v1.h[4]
|
||||
; CHECK-SD-NOFP16-NEXT: fmadd s24, s26, s25, s24
|
||||
; CHECK-SD-NOFP16-NEXT: fmadd s25, s29, s28, s27
|
||||
; CHECK-SD-NOFP16-NEXT: fmadd d24, d26, d25, d24
|
||||
; CHECK-SD-NOFP16-NEXT: fmadd d25, d29, d28, d27
|
||||
; CHECK-SD-NOFP16-NEXT: mov v6.h[1], v20.h[0]
|
||||
; CHECK-SD-NOFP16-NEXT: mov h20, v5.h[2]
|
||||
; CHECK-SD-NOFP16-NEXT: mov h26, v5.h[3]
|
||||
; CHECK-SD-NOFP16-NEXT: mov h27, v3.h[3]
|
||||
; CHECK-SD-NOFP16-NEXT: mov h28, v1.h[3]
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt s17, h17
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt s18, h18
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt s29, h7
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt s30, h30
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt d17, h17
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt d18, h18
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt d29, h7
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt d30, h30
|
||||
; CHECK-SD-NOFP16-NEXT: mov v6.h[2], v19.h[0]
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt h24, s24
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt h7, s25
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt s19, h20
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt s20, h22
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt s22, h23
|
||||
; CHECK-SD-NOFP16-NEXT: fmadd s16, s18, s17, s16
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt h24, d24
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt h7, d25
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt d19, h20
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt d20, h22
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt d22, h23
|
||||
; CHECK-SD-NOFP16-NEXT: fmadd d16, d18, d17, d16
|
||||
; CHECK-SD-NOFP16-NEXT: mov h23, v0.h[5]
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt s25, h26
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt s26, h27
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt s27, h28
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt d25, h26
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt d26, h27
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt d27, h28
|
||||
; CHECK-SD-NOFP16-NEXT: mov h18, v4.h[6]
|
||||
; CHECK-SD-NOFP16-NEXT: mov v6.h[3], v21.h[0]
|
||||
; CHECK-SD-NOFP16-NEXT: mov v7.h[1], v24.h[0]
|
||||
; CHECK-SD-NOFP16-NEXT: mov h24, v5.h[5]
|
||||
; CHECK-SD-NOFP16-NEXT: fmadd s19, s22, s20, s19
|
||||
; CHECK-SD-NOFP16-NEXT: fmadd d19, d22, d20, d19
|
||||
; CHECK-SD-NOFP16-NEXT: mov h20, v5.h[4]
|
||||
; CHECK-SD-NOFP16-NEXT: mov h22, v3.h[4]
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt s23, h23
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt d23, h23
|
||||
; CHECK-SD-NOFP16-NEXT: mov h28, v0.h[6]
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt h16, s16
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt s18, h18
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt h16, d16
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt d18, h18
|
||||
; CHECK-SD-NOFP16-NEXT: mov h4, v4.h[7]
|
||||
; CHECK-SD-NOFP16-NEXT: mov h0, v0.h[7]
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt s20, h20
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt s21, h22
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt s22, h31
|
||||
; CHECK-SD-NOFP16-NEXT: fmadd s17, s23, s30, s29
|
||||
; CHECK-SD-NOFP16-NEXT: fmadd s23, s27, s26, s25
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt h19, s19
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt d20, h20
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt d21, h22
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt d22, h31
|
||||
; CHECK-SD-NOFP16-NEXT: fmadd d17, d23, d30, d29
|
||||
; CHECK-SD-NOFP16-NEXT: fmadd d23, d27, d26, d25
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt h19, d19
|
||||
; CHECK-SD-NOFP16-NEXT: mov h25, v3.h[5]
|
||||
; CHECK-SD-NOFP16-NEXT: mov h26, v1.h[5]
|
||||
; CHECK-SD-NOFP16-NEXT: mov h27, v2.h[6]
|
||||
; CHECK-SD-NOFP16-NEXT: mov h29, v1.h[6]
|
||||
; CHECK-SD-NOFP16-NEXT: mov h2, v2.h[7]
|
||||
; CHECK-SD-NOFP16-NEXT: mov h1, v1.h[7]
|
||||
; CHECK-SD-NOFP16-NEXT: fmadd s20, s22, s21, s20
|
||||
; CHECK-SD-NOFP16-NEXT: fmadd d20, d22, d21, d20
|
||||
; CHECK-SD-NOFP16-NEXT: mov h21, v5.h[6]
|
||||
; CHECK-SD-NOFP16-NEXT: mov h22, v3.h[6]
|
||||
; CHECK-SD-NOFP16-NEXT: mov v7.h[2], v19.h[0]
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt h19, s23
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt s23, h24
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt s24, h25
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt s25, h26
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt s26, h27
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt s27, h28
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt s28, h29
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt h19, d23
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt d23, h24
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt d24, h25
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt d25, h26
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt d26, h27
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt d27, h28
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt d28, h29
|
||||
; CHECK-SD-NOFP16-NEXT: mov h5, v5.h[7]
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt s21, h21
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt s22, h22
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt d21, h21
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt d22, h22
|
||||
; CHECK-SD-NOFP16-NEXT: mov h3, v3.h[7]
|
||||
; CHECK-SD-NOFP16-NEXT: mov v7.h[3], v19.h[0]
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt h19, s20
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt h19, d20
|
||||
; CHECK-SD-NOFP16-NEXT: mov v6.h[4], v16.h[0]
|
||||
; CHECK-SD-NOFP16-NEXT: fmadd s20, s25, s24, s23
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt h16, s17
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt s4, h4
|
||||
; CHECK-SD-NOFP16-NEXT: fmadd s18, s27, s26, s18
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt s2, h2
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt s0, h0
|
||||
; CHECK-SD-NOFP16-NEXT: fmadd s21, s28, s22, s21
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt s5, h5
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt s3, h3
|
||||
; CHECK-SD-NOFP16-NEXT: fmadd d20, d25, d24, d23
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt h16, d17
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt d4, h4
|
||||
; CHECK-SD-NOFP16-NEXT: fmadd d18, d27, d26, d18
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt d2, h2
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt d0, h0
|
||||
; CHECK-SD-NOFP16-NEXT: fmadd d21, d28, d22, d21
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt d5, h5
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt d3, h3
|
||||
; CHECK-SD-NOFP16-NEXT: mov v7.h[4], v19.h[0]
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt s1, h1
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt h17, s20
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt d1, h1
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt h17, d20
|
||||
; CHECK-SD-NOFP16-NEXT: mov v6.h[5], v16.h[0]
|
||||
; CHECK-SD-NOFP16-NEXT: fmadd s0, s0, s2, s4
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt h2, s18
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt h4, s21
|
||||
; CHECK-SD-NOFP16-NEXT: fmadd s1, s1, s3, s5
|
||||
; CHECK-SD-NOFP16-NEXT: fmadd d0, d0, d2, d4
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt h2, d18
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt h4, d21
|
||||
; CHECK-SD-NOFP16-NEXT: fmadd d1, d1, d3, d5
|
||||
; CHECK-SD-NOFP16-NEXT: mov v7.h[5], v17.h[0]
|
||||
; CHECK-SD-NOFP16-NEXT: mov v6.h[6], v2.h[0]
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt h0, s0
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt h1, s1
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt h0, d0
|
||||
; CHECK-SD-NOFP16-NEXT: fcvt h1, d1
|
||||
; CHECK-SD-NOFP16-NEXT: mov v7.h[6], v4.h[0]
|
||||
; CHECK-SD-NOFP16-NEXT: mov v6.h[7], v0.h[0]
|
||||
; CHECK-SD-NOFP16-NEXT: mov v7.h[7], v1.h[0]
|
||||
|
||||
@ -170,11 +170,11 @@ define half @frem_f16(half %x, half %y) #0 {
|
||||
define half @fma_f16(half %x, half %y, half %z) #0 {
|
||||
; CHECK-NOFP16-LABEL: fma_f16:
|
||||
; CHECK-NOFP16: // %bb.0:
|
||||
; CHECK-NOFP16-NEXT: fcvt s2, h2
|
||||
; CHECK-NOFP16-NEXT: fcvt s1, h1
|
||||
; CHECK-NOFP16-NEXT: fcvt s0, h0
|
||||
; CHECK-NOFP16-NEXT: fmadd s0, s0, s1, s2
|
||||
; CHECK-NOFP16-NEXT: fcvt h0, s0
|
||||
; CHECK-NOFP16-NEXT: fcvt d2, h2
|
||||
; CHECK-NOFP16-NEXT: fcvt d1, h1
|
||||
; CHECK-NOFP16-NEXT: fcvt d0, h0
|
||||
; CHECK-NOFP16-NEXT: fmadd d0, d0, d1, d2
|
||||
; CHECK-NOFP16-NEXT: fcvt h0, d0
|
||||
; CHECK-NOFP16-NEXT: ret
|
||||
;
|
||||
; CHECK-FP16-LABEL: fma_f16:
|
||||
@ -1382,3 +1382,5 @@ declare i1 @llvm.experimental.constrained.fcmp.f16(half, half, metadata, metadat
|
||||
|
||||
declare half @llvm.experimental.constrained.fptrunc.f16.f32(float, metadata, metadata)
|
||||
declare float @llvm.experimental.constrained.fpext.f32.f16(half, metadata)
|
||||
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
|
||||
; CHECK-GI: {{.*}}
|
||||
|
||||
@ -1043,38 +1043,38 @@ define <2 x half> @fma_v2f16(<2 x half> %op1, <2 x half> %op2, <2 x half> %op3)
|
||||
; NONEON-NOSVE-NEXT: ldr h0, [sp, #22]
|
||||
; NONEON-NOSVE-NEXT: ldr h1, [sp, #14]
|
||||
; NONEON-NOSVE-NEXT: ldr h2, [sp, #6]
|
||||
; NONEON-NOSVE-NEXT: fcvt s0, h0
|
||||
; NONEON-NOSVE-NEXT: fcvt s1, h1
|
||||
; NONEON-NOSVE-NEXT: fcvt s2, h2
|
||||
; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0
|
||||
; NONEON-NOSVE-NEXT: fcvt d0, h0
|
||||
; NONEON-NOSVE-NEXT: fcvt d1, h1
|
||||
; NONEON-NOSVE-NEXT: fcvt d2, h2
|
||||
; NONEON-NOSVE-NEXT: fmadd d0, d2, d1, d0
|
||||
; NONEON-NOSVE-NEXT: ldr h1, [sp, #12]
|
||||
; NONEON-NOSVE-NEXT: ldr h2, [sp, #4]
|
||||
; NONEON-NOSVE-NEXT: fcvt s1, h1
|
||||
; NONEON-NOSVE-NEXT: fcvt s2, h2
|
||||
; NONEON-NOSVE-NEXT: fcvt h0, s0
|
||||
; NONEON-NOSVE-NEXT: fcvt d1, h1
|
||||
; NONEON-NOSVE-NEXT: fcvt d2, h2
|
||||
; NONEON-NOSVE-NEXT: fcvt h0, d0
|
||||
; NONEON-NOSVE-NEXT: str h0, [sp, #30]
|
||||
; NONEON-NOSVE-NEXT: ldr h0, [sp, #20]
|
||||
; NONEON-NOSVE-NEXT: fcvt s0, h0
|
||||
; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0
|
||||
; NONEON-NOSVE-NEXT: fcvt d0, h0
|
||||
; NONEON-NOSVE-NEXT: fmadd d0, d2, d1, d0
|
||||
; NONEON-NOSVE-NEXT: ldr h1, [sp, #10]
|
||||
; NONEON-NOSVE-NEXT: ldr h2, [sp, #2]
|
||||
; NONEON-NOSVE-NEXT: fcvt s1, h1
|
||||
; NONEON-NOSVE-NEXT: fcvt s2, h2
|
||||
; NONEON-NOSVE-NEXT: fcvt h0, s0
|
||||
; NONEON-NOSVE-NEXT: fcvt d1, h1
|
||||
; NONEON-NOSVE-NEXT: fcvt d2, h2
|
||||
; NONEON-NOSVE-NEXT: fcvt h0, d0
|
||||
; NONEON-NOSVE-NEXT: str h0, [sp, #28]
|
||||
; NONEON-NOSVE-NEXT: ldr h0, [sp, #18]
|
||||
; NONEON-NOSVE-NEXT: fcvt s0, h0
|
||||
; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0
|
||||
; NONEON-NOSVE-NEXT: fcvt d0, h0
|
||||
; NONEON-NOSVE-NEXT: fmadd d0, d2, d1, d0
|
||||
; NONEON-NOSVE-NEXT: ldr h1, [sp, #8]
|
||||
; NONEON-NOSVE-NEXT: ldr h2, [sp]
|
||||
; NONEON-NOSVE-NEXT: fcvt s1, h1
|
||||
; NONEON-NOSVE-NEXT: fcvt s2, h2
|
||||
; NONEON-NOSVE-NEXT: fcvt h0, s0
|
||||
; NONEON-NOSVE-NEXT: fcvt d1, h1
|
||||
; NONEON-NOSVE-NEXT: fcvt d2, h2
|
||||
; NONEON-NOSVE-NEXT: fcvt h0, d0
|
||||
; NONEON-NOSVE-NEXT: str h0, [sp, #26]
|
||||
; NONEON-NOSVE-NEXT: ldr h0, [sp, #16]
|
||||
; NONEON-NOSVE-NEXT: fcvt s0, h0
|
||||
; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0
|
||||
; NONEON-NOSVE-NEXT: fcvt h0, s0
|
||||
; NONEON-NOSVE-NEXT: fcvt d0, h0
|
||||
; NONEON-NOSVE-NEXT: fmadd d0, d2, d1, d0
|
||||
; NONEON-NOSVE-NEXT: fcvt h0, d0
|
||||
; NONEON-NOSVE-NEXT: str h0, [sp, #24]
|
||||
; NONEON-NOSVE-NEXT: ldr d0, [sp, #24]
|
||||
; NONEON-NOSVE-NEXT: add sp, sp, #32
|
||||
@ -1103,38 +1103,38 @@ define <4 x half> @fma_v4f16(<4 x half> %op1, <4 x half> %op2, <4 x half> %op3)
|
||||
; NONEON-NOSVE-NEXT: ldr h0, [sp, #22]
|
||||
; NONEON-NOSVE-NEXT: ldr h1, [sp, #14]
|
||||
; NONEON-NOSVE-NEXT: ldr h2, [sp, #6]
|
||||
; NONEON-NOSVE-NEXT: fcvt s0, h0
|
||||
; NONEON-NOSVE-NEXT: fcvt s1, h1
|
||||
; NONEON-NOSVE-NEXT: fcvt s2, h2
|
||||
; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0
|
||||
; NONEON-NOSVE-NEXT: fcvt d0, h0
|
||||
; NONEON-NOSVE-NEXT: fcvt d1, h1
|
||||
; NONEON-NOSVE-NEXT: fcvt d2, h2
|
||||
; NONEON-NOSVE-NEXT: fmadd d0, d2, d1, d0
|
||||
; NONEON-NOSVE-NEXT: ldr h1, [sp, #12]
|
||||
; NONEON-NOSVE-NEXT: ldr h2, [sp, #4]
|
||||
; NONEON-NOSVE-NEXT: fcvt s1, h1
|
||||
; NONEON-NOSVE-NEXT: fcvt s2, h2
|
||||
; NONEON-NOSVE-NEXT: fcvt h0, s0
|
||||
; NONEON-NOSVE-NEXT: fcvt d1, h1
|
||||
; NONEON-NOSVE-NEXT: fcvt d2, h2
|
||||
; NONEON-NOSVE-NEXT: fcvt h0, d0
|
||||
; NONEON-NOSVE-NEXT: str h0, [sp, #30]
|
||||
; NONEON-NOSVE-NEXT: ldr h0, [sp, #20]
|
||||
; NONEON-NOSVE-NEXT: fcvt s0, h0
|
||||
; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0
|
||||
; NONEON-NOSVE-NEXT: fcvt d0, h0
|
||||
; NONEON-NOSVE-NEXT: fmadd d0, d2, d1, d0
|
||||
; NONEON-NOSVE-NEXT: ldr h1, [sp, #10]
|
||||
; NONEON-NOSVE-NEXT: ldr h2, [sp, #2]
|
||||
; NONEON-NOSVE-NEXT: fcvt s1, h1
|
||||
; NONEON-NOSVE-NEXT: fcvt s2, h2
|
||||
; NONEON-NOSVE-NEXT: fcvt h0, s0
|
||||
; NONEON-NOSVE-NEXT: fcvt d1, h1
|
||||
; NONEON-NOSVE-NEXT: fcvt d2, h2
|
||||
; NONEON-NOSVE-NEXT: fcvt h0, d0
|
||||
; NONEON-NOSVE-NEXT: str h0, [sp, #28]
|
||||
; NONEON-NOSVE-NEXT: ldr h0, [sp, #18]
|
||||
; NONEON-NOSVE-NEXT: fcvt s0, h0
|
||||
; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0
|
||||
; NONEON-NOSVE-NEXT: fcvt d0, h0
|
||||
; NONEON-NOSVE-NEXT: fmadd d0, d2, d1, d0
|
||||
; NONEON-NOSVE-NEXT: ldr h1, [sp, #8]
|
||||
; NONEON-NOSVE-NEXT: ldr h2, [sp]
|
||||
; NONEON-NOSVE-NEXT: fcvt s1, h1
|
||||
; NONEON-NOSVE-NEXT: fcvt s2, h2
|
||||
; NONEON-NOSVE-NEXT: fcvt h0, s0
|
||||
; NONEON-NOSVE-NEXT: fcvt d1, h1
|
||||
; NONEON-NOSVE-NEXT: fcvt d2, h2
|
||||
; NONEON-NOSVE-NEXT: fcvt h0, d0
|
||||
; NONEON-NOSVE-NEXT: str h0, [sp, #26]
|
||||
; NONEON-NOSVE-NEXT: ldr h0, [sp, #16]
|
||||
; NONEON-NOSVE-NEXT: fcvt s0, h0
|
||||
; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0
|
||||
; NONEON-NOSVE-NEXT: fcvt h0, s0
|
||||
; NONEON-NOSVE-NEXT: fcvt d0, h0
|
||||
; NONEON-NOSVE-NEXT: fmadd d0, d2, d1, d0
|
||||
; NONEON-NOSVE-NEXT: fcvt h0, d0
|
||||
; NONEON-NOSVE-NEXT: str h0, [sp, #24]
|
||||
; NONEON-NOSVE-NEXT: ldr d0, [sp, #24]
|
||||
; NONEON-NOSVE-NEXT: add sp, sp, #32
|
||||
@ -1163,74 +1163,74 @@ define <8 x half> @fma_v8f16(<8 x half> %op1, <8 x half> %op2, <8 x half> %op3)
|
||||
; NONEON-NOSVE-NEXT: ldr h0, [sp, #46]
|
||||
; NONEON-NOSVE-NEXT: ldr h1, [sp, #30]
|
||||
; NONEON-NOSVE-NEXT: ldr h2, [sp, #14]
|
||||
; NONEON-NOSVE-NEXT: fcvt s0, h0
|
||||
; NONEON-NOSVE-NEXT: fcvt s1, h1
|
||||
; NONEON-NOSVE-NEXT: fcvt s2, h2
|
||||
; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0
|
||||
; NONEON-NOSVE-NEXT: fcvt d0, h0
|
||||
; NONEON-NOSVE-NEXT: fcvt d1, h1
|
||||
; NONEON-NOSVE-NEXT: fcvt d2, h2
|
||||
; NONEON-NOSVE-NEXT: fmadd d0, d2, d1, d0
|
||||
; NONEON-NOSVE-NEXT: ldr h1, [sp, #28]
|
||||
; NONEON-NOSVE-NEXT: ldr h2, [sp, #12]
|
||||
; NONEON-NOSVE-NEXT: fcvt s1, h1
|
||||
; NONEON-NOSVE-NEXT: fcvt s2, h2
|
||||
; NONEON-NOSVE-NEXT: fcvt h0, s0
|
||||
; NONEON-NOSVE-NEXT: fcvt d1, h1
|
||||
; NONEON-NOSVE-NEXT: fcvt d2, h2
|
||||
; NONEON-NOSVE-NEXT: fcvt h0, d0
|
||||
; NONEON-NOSVE-NEXT: str h0, [sp, #62]
|
||||
; NONEON-NOSVE-NEXT: ldr h0, [sp, #44]
|
||||
; NONEON-NOSVE-NEXT: fcvt s0, h0
|
||||
; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0
|
||||
; NONEON-NOSVE-NEXT: fcvt d0, h0
|
||||
; NONEON-NOSVE-NEXT: fmadd d0, d2, d1, d0
|
||||
; NONEON-NOSVE-NEXT: ldr h1, [sp, #26]
|
||||
; NONEON-NOSVE-NEXT: ldr h2, [sp, #10]
|
||||
; NONEON-NOSVE-NEXT: fcvt s1, h1
|
||||
; NONEON-NOSVE-NEXT: fcvt s2, h2
|
||||
; NONEON-NOSVE-NEXT: fcvt h0, s0
|
||||
; NONEON-NOSVE-NEXT: fcvt d1, h1
|
||||
; NONEON-NOSVE-NEXT: fcvt d2, h2
|
||||
; NONEON-NOSVE-NEXT: fcvt h0, d0
|
||||
; NONEON-NOSVE-NEXT: str h0, [sp, #60]
|
||||
; NONEON-NOSVE-NEXT: ldr h0, [sp, #42]
|
||||
; NONEON-NOSVE-NEXT: fcvt s0, h0
|
||||
; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0
|
||||
; NONEON-NOSVE-NEXT: fcvt d0, h0
|
||||
; NONEON-NOSVE-NEXT: fmadd d0, d2, d1, d0
|
||||
; NONEON-NOSVE-NEXT: ldr h1, [sp, #24]
|
||||
; NONEON-NOSVE-NEXT: ldr h2, [sp, #8]
|
||||
; NONEON-NOSVE-NEXT: fcvt s1, h1
|
||||
; NONEON-NOSVE-NEXT: fcvt s2, h2
|
||||
; NONEON-NOSVE-NEXT: fcvt h0, s0
|
||||
; NONEON-NOSVE-NEXT: fcvt d1, h1
|
||||
; NONEON-NOSVE-NEXT: fcvt d2, h2
|
||||
; NONEON-NOSVE-NEXT: fcvt h0, d0
|
||||
; NONEON-NOSVE-NEXT: str h0, [sp, #58]
|
||||
; NONEON-NOSVE-NEXT: ldr h0, [sp, #40]
|
||||
; NONEON-NOSVE-NEXT: fcvt s0, h0
|
||||
; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0
|
||||
; NONEON-NOSVE-NEXT: fcvt d0, h0
|
||||
; NONEON-NOSVE-NEXT: fmadd d0, d2, d1, d0
|
||||
; NONEON-NOSVE-NEXT: ldr h1, [sp, #22]
|
||||
; NONEON-NOSVE-NEXT: ldr h2, [sp, #6]
|
||||
; NONEON-NOSVE-NEXT: fcvt s1, h1
|
||||
; NONEON-NOSVE-NEXT: fcvt s2, h2
|
||||
; NONEON-NOSVE-NEXT: fcvt h0, s0
|
||||
; NONEON-NOSVE-NEXT: fcvt d1, h1
|
||||
; NONEON-NOSVE-NEXT: fcvt d2, h2
|
||||
; NONEON-NOSVE-NEXT: fcvt h0, d0
|
||||
; NONEON-NOSVE-NEXT: str h0, [sp, #56]
|
||||
; NONEON-NOSVE-NEXT: ldr h0, [sp, #38]
|
||||
; NONEON-NOSVE-NEXT: fcvt s0, h0
|
||||
; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0
|
||||
; NONEON-NOSVE-NEXT: fcvt d0, h0
|
||||
; NONEON-NOSVE-NEXT: fmadd d0, d2, d1, d0
|
||||
; NONEON-NOSVE-NEXT: ldr h1, [sp, #20]
|
||||
; NONEON-NOSVE-NEXT: ldr h2, [sp, #4]
|
||||
; NONEON-NOSVE-NEXT: fcvt s1, h1
|
||||
; NONEON-NOSVE-NEXT: fcvt s2, h2
|
||||
; NONEON-NOSVE-NEXT: fcvt h0, s0
|
||||
; NONEON-NOSVE-NEXT: fcvt d1, h1
|
||||
; NONEON-NOSVE-NEXT: fcvt d2, h2
|
||||
; NONEON-NOSVE-NEXT: fcvt h0, d0
|
||||
; NONEON-NOSVE-NEXT: str h0, [sp, #54]
|
||||
; NONEON-NOSVE-NEXT: ldr h0, [sp, #36]
|
||||
; NONEON-NOSVE-NEXT: fcvt s0, h0
|
||||
; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0
|
||||
; NONEON-NOSVE-NEXT: fcvt d0, h0
|
||||
; NONEON-NOSVE-NEXT: fmadd d0, d2, d1, d0
|
||||
; NONEON-NOSVE-NEXT: ldr h1, [sp, #18]
|
||||
; NONEON-NOSVE-NEXT: ldr h2, [sp, #2]
|
||||
; NONEON-NOSVE-NEXT: fcvt s1, h1
|
||||
; NONEON-NOSVE-NEXT: fcvt s2, h2
|
||||
; NONEON-NOSVE-NEXT: fcvt h0, s0
|
||||
; NONEON-NOSVE-NEXT: fcvt d1, h1
|
||||
; NONEON-NOSVE-NEXT: fcvt d2, h2
|
||||
; NONEON-NOSVE-NEXT: fcvt h0, d0
|
||||
; NONEON-NOSVE-NEXT: str h0, [sp, #52]
|
||||
; NONEON-NOSVE-NEXT: ldr h0, [sp, #34]
|
||||
; NONEON-NOSVE-NEXT: fcvt s0, h0
|
||||
; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0
|
||||
; NONEON-NOSVE-NEXT: fcvt d0, h0
|
||||
; NONEON-NOSVE-NEXT: fmadd d0, d2, d1, d0
|
||||
; NONEON-NOSVE-NEXT: ldr h1, [sp, #16]
|
||||
; NONEON-NOSVE-NEXT: ldr h2, [sp]
|
||||
; NONEON-NOSVE-NEXT: fcvt s1, h1
|
||||
; NONEON-NOSVE-NEXT: fcvt s2, h2
|
||||
; NONEON-NOSVE-NEXT: fcvt h0, s0
|
||||
; NONEON-NOSVE-NEXT: fcvt d1, h1
|
||||
; NONEON-NOSVE-NEXT: fcvt d2, h2
|
||||
; NONEON-NOSVE-NEXT: fcvt h0, d0
|
||||
; NONEON-NOSVE-NEXT: str h0, [sp, #50]
|
||||
; NONEON-NOSVE-NEXT: ldr h0, [sp, #32]
|
||||
; NONEON-NOSVE-NEXT: fcvt s0, h0
|
||||
; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0
|
||||
; NONEON-NOSVE-NEXT: fcvt h0, s0
|
||||
; NONEON-NOSVE-NEXT: fcvt d0, h0
|
||||
; NONEON-NOSVE-NEXT: fmadd d0, d2, d1, d0
|
||||
; NONEON-NOSVE-NEXT: fcvt h0, d0
|
||||
; NONEON-NOSVE-NEXT: str h0, [sp, #48]
|
||||
; NONEON-NOSVE-NEXT: ldr q0, [sp, #48]
|
||||
; NONEON-NOSVE-NEXT: add sp, sp, #64
|
||||
@ -1264,146 +1264,146 @@ define void @fma_v16f16(ptr %a, ptr %b, ptr %c) {
|
||||
; NONEON-NOSVE-NEXT: stp q1, q5, [sp, #32]
|
||||
; NONEON-NOSVE-NEXT: ldr h1, [sp, #78]
|
||||
; NONEON-NOSVE-NEXT: ldr h2, [sp, #62]
|
||||
; NONEON-NOSVE-NEXT: fcvt s0, h0
|
||||
; NONEON-NOSVE-NEXT: fcvt s1, h1
|
||||
; NONEON-NOSVE-NEXT: fcvt s2, h2
|
||||
; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0
|
||||
; NONEON-NOSVE-NEXT: fcvt d0, h0
|
||||
; NONEON-NOSVE-NEXT: fcvt d1, h1
|
||||
; NONEON-NOSVE-NEXT: fcvt d2, h2
|
||||
; NONEON-NOSVE-NEXT: fmadd d0, d2, d1, d0
|
||||
; NONEON-NOSVE-NEXT: ldr h1, [sp, #76]
|
||||
; NONEON-NOSVE-NEXT: ldr h2, [sp, #60]
|
||||
; NONEON-NOSVE-NEXT: fcvt s1, h1
|
||||
; NONEON-NOSVE-NEXT: fcvt s2, h2
|
||||
; NONEON-NOSVE-NEXT: fcvt h0, s0
|
||||
; NONEON-NOSVE-NEXT: fcvt d1, h1
|
||||
; NONEON-NOSVE-NEXT: fcvt d2, h2
|
||||
; NONEON-NOSVE-NEXT: fcvt h0, d0
|
||||
; NONEON-NOSVE-NEXT: str h0, [sp, #126]
|
||||
; NONEON-NOSVE-NEXT: ldr h0, [sp, #92]
|
||||
; NONEON-NOSVE-NEXT: fcvt s0, h0
|
||||
; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0
|
||||
; NONEON-NOSVE-NEXT: fcvt d0, h0
|
||||
; NONEON-NOSVE-NEXT: fmadd d0, d2, d1, d0
|
||||
; NONEON-NOSVE-NEXT: ldr h1, [sp, #74]
|
||||
; NONEON-NOSVE-NEXT: ldr h2, [sp, #58]
|
||||
; NONEON-NOSVE-NEXT: fcvt s1, h1
|
||||
; NONEON-NOSVE-NEXT: fcvt s2, h2
|
||||
; NONEON-NOSVE-NEXT: fcvt h0, s0
|
||||
; NONEON-NOSVE-NEXT: fcvt d1, h1
|
||||
; NONEON-NOSVE-NEXT: fcvt d2, h2
|
||||
; NONEON-NOSVE-NEXT: fcvt h0, d0
|
||||
; NONEON-NOSVE-NEXT: str h0, [sp, #124]
|
||||
; NONEON-NOSVE-NEXT: ldr h0, [sp, #90]
|
||||
; NONEON-NOSVE-NEXT: fcvt s0, h0
|
||||
; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0
|
||||
; NONEON-NOSVE-NEXT: fcvt d0, h0
|
||||
; NONEON-NOSVE-NEXT: fmadd d0, d2, d1, d0
|
||||
; NONEON-NOSVE-NEXT: ldr h1, [sp, #72]
|
||||
; NONEON-NOSVE-NEXT: ldr h2, [sp, #56]
|
||||
; NONEON-NOSVE-NEXT: fcvt s1, h1
|
||||
; NONEON-NOSVE-NEXT: fcvt s2, h2
|
||||
; NONEON-NOSVE-NEXT: fcvt h0, s0
|
||||
; NONEON-NOSVE-NEXT: fcvt d1, h1
|
||||
; NONEON-NOSVE-NEXT: fcvt d2, h2
|
||||
; NONEON-NOSVE-NEXT: fcvt h0, d0
|
||||
; NONEON-NOSVE-NEXT: str h0, [sp, #122]
|
||||
; NONEON-NOSVE-NEXT: ldr h0, [sp, #88]
|
||||
; NONEON-NOSVE-NEXT: fcvt s0, h0
|
||||
; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0
|
||||
; NONEON-NOSVE-NEXT: fcvt d0, h0
|
||||
; NONEON-NOSVE-NEXT: fmadd d0, d2, d1, d0
|
||||
; NONEON-NOSVE-NEXT: ldr h1, [sp, #70]
|
||||
; NONEON-NOSVE-NEXT: ldr h2, [sp, #54]
|
||||
; NONEON-NOSVE-NEXT: fcvt s1, h1
|
||||
; NONEON-NOSVE-NEXT: fcvt s2, h2
|
||||
; NONEON-NOSVE-NEXT: fcvt h0, s0
|
||||
; NONEON-NOSVE-NEXT: fcvt d1, h1
|
||||
; NONEON-NOSVE-NEXT: fcvt d2, h2
|
||||
; NONEON-NOSVE-NEXT: fcvt h0, d0
|
||||
; NONEON-NOSVE-NEXT: str h0, [sp, #120]
|
||||
; NONEON-NOSVE-NEXT: ldr h0, [sp, #86]
|
||||
; NONEON-NOSVE-NEXT: fcvt s0, h0
|
||||
; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0
|
||||
; NONEON-NOSVE-NEXT: fcvt d0, h0
|
||||
; NONEON-NOSVE-NEXT: fmadd d0, d2, d1, d0
|
||||
; NONEON-NOSVE-NEXT: ldr h1, [sp, #68]
|
||||
; NONEON-NOSVE-NEXT: ldr h2, [sp, #52]
|
||||
; NONEON-NOSVE-NEXT: fcvt s1, h1
|
||||
; NONEON-NOSVE-NEXT: fcvt s2, h2
|
||||
; NONEON-NOSVE-NEXT: fcvt h0, s0
|
||||
; NONEON-NOSVE-NEXT: fcvt d1, h1
|
||||
; NONEON-NOSVE-NEXT: fcvt d2, h2
|
||||
; NONEON-NOSVE-NEXT: fcvt h0, d0
|
||||
; NONEON-NOSVE-NEXT: str h0, [sp, #118]
|
||||
; NONEON-NOSVE-NEXT: ldr h0, [sp, #84]
|
||||
; NONEON-NOSVE-NEXT: fcvt s0, h0
|
||||
; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0
|
||||
; NONEON-NOSVE-NEXT: fcvt d0, h0
|
||||
; NONEON-NOSVE-NEXT: fmadd d0, d2, d1, d0
|
||||
; NONEON-NOSVE-NEXT: ldr h1, [sp, #66]
|
||||
; NONEON-NOSVE-NEXT: ldr h2, [sp, #50]
|
||||
; NONEON-NOSVE-NEXT: fcvt s1, h1
|
||||
; NONEON-NOSVE-NEXT: fcvt s2, h2
|
||||
; NONEON-NOSVE-NEXT: fcvt h0, s0
|
||||
; NONEON-NOSVE-NEXT: fcvt d1, h1
|
||||
; NONEON-NOSVE-NEXT: fcvt d2, h2
|
||||
; NONEON-NOSVE-NEXT: fcvt h0, d0
|
||||
; NONEON-NOSVE-NEXT: str h0, [sp, #116]
|
||||
; NONEON-NOSVE-NEXT: ldr h0, [sp, #82]
|
||||
; NONEON-NOSVE-NEXT: fcvt s0, h0
|
||||
; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0
|
||||
; NONEON-NOSVE-NEXT: fcvt d0, h0
|
||||
; NONEON-NOSVE-NEXT: fmadd d0, d2, d1, d0
|
||||
; NONEON-NOSVE-NEXT: ldr h1, [sp, #64]
|
||||
; NONEON-NOSVE-NEXT: ldr h2, [sp, #48]
|
||||
; NONEON-NOSVE-NEXT: fcvt s1, h1
|
||||
; NONEON-NOSVE-NEXT: fcvt s2, h2
|
||||
; NONEON-NOSVE-NEXT: fcvt h0, s0
|
||||
; NONEON-NOSVE-NEXT: fcvt d1, h1
|
||||
; NONEON-NOSVE-NEXT: fcvt d2, h2
|
||||
; NONEON-NOSVE-NEXT: fcvt h0, d0
|
||||
; NONEON-NOSVE-NEXT: str h0, [sp, #114]
|
||||
; NONEON-NOSVE-NEXT: ldr h0, [sp, #80]
|
||||
; NONEON-NOSVE-NEXT: fcvt s0, h0
|
||||
; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0
|
||||
; NONEON-NOSVE-NEXT: fcvt d0, h0
|
||||
; NONEON-NOSVE-NEXT: fmadd d0, d2, d1, d0
|
||||
; NONEON-NOSVE-NEXT: ldr h1, [sp, #30]
|
||||
; NONEON-NOSVE-NEXT: ldr h2, [sp, #14]
|
||||
; NONEON-NOSVE-NEXT: fcvt s1, h1
|
||||
; NONEON-NOSVE-NEXT: fcvt s2, h2
|
||||
; NONEON-NOSVE-NEXT: fcvt h0, s0
|
||||
; NONEON-NOSVE-NEXT: fcvt d1, h1
|
||||
; NONEON-NOSVE-NEXT: fcvt d2, h2
|
||||
; NONEON-NOSVE-NEXT: fcvt h0, d0
|
||||
; NONEON-NOSVE-NEXT: str h0, [sp, #112]
|
||||
; NONEON-NOSVE-NEXT: ldr h0, [sp, #46]
|
||||
; NONEON-NOSVE-NEXT: fcvt s0, h0
|
||||
; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0
|
||||
; NONEON-NOSVE-NEXT: fcvt d0, h0
|
||||
; NONEON-NOSVE-NEXT: fmadd d0, d2, d1, d0
|
||||
; NONEON-NOSVE-NEXT: ldr h1, [sp, #28]
|
||||
; NONEON-NOSVE-NEXT: ldr h2, [sp, #12]
|
||||
; NONEON-NOSVE-NEXT: fcvt s1, h1
|
||||
; NONEON-NOSVE-NEXT: fcvt s2, h2
|
||||
; NONEON-NOSVE-NEXT: fcvt h0, s0
|
||||
; NONEON-NOSVE-NEXT: fcvt d1, h1
|
||||
; NONEON-NOSVE-NEXT: fcvt d2, h2
|
||||
; NONEON-NOSVE-NEXT: fcvt h0, d0
|
||||
; NONEON-NOSVE-NEXT: str h0, [sp, #110]
|
||||
; NONEON-NOSVE-NEXT: ldr h0, [sp, #44]
|
||||
; NONEON-NOSVE-NEXT: fcvt s0, h0
|
||||
; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0
|
||||
; NONEON-NOSVE-NEXT: fcvt d0, h0
|
||||
; NONEON-NOSVE-NEXT: fmadd d0, d2, d1, d0
|
||||
; NONEON-NOSVE-NEXT: ldr h1, [sp, #26]
|
||||
; NONEON-NOSVE-NEXT: ldr h2, [sp, #10]
|
||||
; NONEON-NOSVE-NEXT: fcvt s1, h1
|
||||
; NONEON-NOSVE-NEXT: fcvt s2, h2
|
||||
; NONEON-NOSVE-NEXT: fcvt h0, s0
|
||||
; NONEON-NOSVE-NEXT: fcvt d1, h1
|
||||
; NONEON-NOSVE-NEXT: fcvt d2, h2
|
||||
; NONEON-NOSVE-NEXT: fcvt h0, d0
|
||||
; NONEON-NOSVE-NEXT: str h0, [sp, #108]
|
||||
; NONEON-NOSVE-NEXT: ldr h0, [sp, #42]
|
||||
; NONEON-NOSVE-NEXT: fcvt s0, h0
|
||||
; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0
|
||||
; NONEON-NOSVE-NEXT: fcvt d0, h0
|
||||
; NONEON-NOSVE-NEXT: fmadd d0, d2, d1, d0
|
||||
; NONEON-NOSVE-NEXT: ldr h1, [sp, #24]
|
||||
; NONEON-NOSVE-NEXT: ldr h2, [sp, #8]
|
||||
; NONEON-NOSVE-NEXT: fcvt s1, h1
|
||||
; NONEON-NOSVE-NEXT: fcvt s2, h2
|
||||
; NONEON-NOSVE-NEXT: fcvt h0, s0
|
||||
; NONEON-NOSVE-NEXT: fcvt d1, h1
|
||||
; NONEON-NOSVE-NEXT: fcvt d2, h2
|
||||
; NONEON-NOSVE-NEXT: fcvt h0, d0
|
||||
; NONEON-NOSVE-NEXT: str h0, [sp, #106]
|
||||
; NONEON-NOSVE-NEXT: ldr h0, [sp, #40]
|
||||
; NONEON-NOSVE-NEXT: fcvt s0, h0
|
||||
; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0
|
||||
; NONEON-NOSVE-NEXT: fcvt d0, h0
|
||||
; NONEON-NOSVE-NEXT: fmadd d0, d2, d1, d0
|
||||
; NONEON-NOSVE-NEXT: ldr h1, [sp, #22]
|
||||
; NONEON-NOSVE-NEXT: ldr h2, [sp, #6]
|
||||
; NONEON-NOSVE-NEXT: fcvt s1, h1
|
||||
; NONEON-NOSVE-NEXT: fcvt s2, h2
|
||||
; NONEON-NOSVE-NEXT: fcvt h0, s0
|
||||
; NONEON-NOSVE-NEXT: fcvt d1, h1
|
||||
; NONEON-NOSVE-NEXT: fcvt d2, h2
|
||||
; NONEON-NOSVE-NEXT: fcvt h0, d0
|
||||
; NONEON-NOSVE-NEXT: str h0, [sp, #104]
|
||||
; NONEON-NOSVE-NEXT: ldr h0, [sp, #38]
|
||||
; NONEON-NOSVE-NEXT: fcvt s0, h0
|
||||
; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0
|
||||
; NONEON-NOSVE-NEXT: fcvt d0, h0
|
||||
; NONEON-NOSVE-NEXT: fmadd d0, d2, d1, d0
|
||||
; NONEON-NOSVE-NEXT: ldr h1, [sp, #20]
|
||||
; NONEON-NOSVE-NEXT: ldr h2, [sp, #4]
|
||||
; NONEON-NOSVE-NEXT: fcvt s1, h1
|
||||
; NONEON-NOSVE-NEXT: fcvt s2, h2
|
||||
; NONEON-NOSVE-NEXT: fcvt h0, s0
|
||||
; NONEON-NOSVE-NEXT: fcvt d1, h1
|
||||
; NONEON-NOSVE-NEXT: fcvt d2, h2
|
||||
; NONEON-NOSVE-NEXT: fcvt h0, d0
|
||||
; NONEON-NOSVE-NEXT: str h0, [sp, #102]
|
||||
; NONEON-NOSVE-NEXT: ldr h0, [sp, #36]
|
||||
; NONEON-NOSVE-NEXT: fcvt s0, h0
|
||||
; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0
|
||||
; NONEON-NOSVE-NEXT: fcvt d0, h0
|
||||
; NONEON-NOSVE-NEXT: fmadd d0, d2, d1, d0
|
||||
; NONEON-NOSVE-NEXT: ldr h1, [sp, #18]
|
||||
; NONEON-NOSVE-NEXT: ldr h2, [sp, #2]
|
||||
; NONEON-NOSVE-NEXT: fcvt s1, h1
|
||||
; NONEON-NOSVE-NEXT: fcvt s2, h2
|
||||
; NONEON-NOSVE-NEXT: fcvt h0, s0
|
||||
; NONEON-NOSVE-NEXT: fcvt d1, h1
|
||||
; NONEON-NOSVE-NEXT: fcvt d2, h2
|
||||
; NONEON-NOSVE-NEXT: fcvt h0, d0
|
||||
; NONEON-NOSVE-NEXT: str h0, [sp, #100]
|
||||
; NONEON-NOSVE-NEXT: ldr h0, [sp, #34]
|
||||
; NONEON-NOSVE-NEXT: fcvt s0, h0
|
||||
; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0
|
||||
; NONEON-NOSVE-NEXT: fcvt d0, h0
|
||||
; NONEON-NOSVE-NEXT: fmadd d0, d2, d1, d0
|
||||
; NONEON-NOSVE-NEXT: ldr h1, [sp, #16]
|
||||
; NONEON-NOSVE-NEXT: ldr h2, [sp]
|
||||
; NONEON-NOSVE-NEXT: fcvt s1, h1
|
||||
; NONEON-NOSVE-NEXT: fcvt s2, h2
|
||||
; NONEON-NOSVE-NEXT: fcvt h0, s0
|
||||
; NONEON-NOSVE-NEXT: fcvt d1, h1
|
||||
; NONEON-NOSVE-NEXT: fcvt d2, h2
|
||||
; NONEON-NOSVE-NEXT: fcvt h0, d0
|
||||
; NONEON-NOSVE-NEXT: str h0, [sp, #98]
|
||||
; NONEON-NOSVE-NEXT: ldr h0, [sp, #32]
|
||||
; NONEON-NOSVE-NEXT: fcvt s0, h0
|
||||
; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0
|
||||
; NONEON-NOSVE-NEXT: fcvt h0, s0
|
||||
; NONEON-NOSVE-NEXT: fcvt d0, h0
|
||||
; NONEON-NOSVE-NEXT: fmadd d0, d2, d1, d0
|
||||
; NONEON-NOSVE-NEXT: fcvt h0, d0
|
||||
; NONEON-NOSVE-NEXT: str h0, [sp, #96]
|
||||
; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #96]
|
||||
; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
|
||||
|
||||
@ -1508,61 +1508,81 @@ define void @test_fma(ptr %p, ptr %q, ptr %r) #0 {
|
||||
; CHECK-FP16-NEXT: push {r4, lr}
|
||||
; CHECK-FP16-NEXT: mov r4, r0
|
||||
; CHECK-FP16-NEXT: ldrh r0, [r1]
|
||||
; CHECK-FP16-NEXT: ldrh r1, [r4]
|
||||
; CHECK-FP16-NEXT: ldrh r2, [r2]
|
||||
; CHECK-FP16-NEXT: vmov s2, r0
|
||||
; CHECK-FP16-NEXT: vmov s0, r1
|
||||
; CHECK-FP16-NEXT: vcvtb.f32.f16 s1, s2
|
||||
; CHECK-FP16-NEXT: vmov s2, r2
|
||||
; CHECK-FP16-NEXT: ldrh r1, [r2]
|
||||
; CHECK-FP16-NEXT: vmov s0, r0
|
||||
; CHECK-FP16-NEXT: ldrh r0, [r4]
|
||||
; CHECK-FP16-NEXT: vcvtb.f32.f16 s0, s0
|
||||
; CHECK-FP16-NEXT: vcvtb.f32.f16 s2, s2
|
||||
; CHECK-FP16-NEXT: bl fmaf
|
||||
; CHECK-FP16-NEXT: vcvtb.f16.f32 s0, s0
|
||||
; CHECK-FP16-NEXT: vmov r0, s0
|
||||
; CHECK-FP16-NEXT: vcvt.f64.f32 d16, s0
|
||||
; CHECK-FP16-NEXT: vmov s0, r0
|
||||
; CHECK-FP16-NEXT: vcvtb.f32.f16 s0, s0
|
||||
; CHECK-FP16-NEXT: vcvt.f64.f32 d17, s0
|
||||
; CHECK-FP16-NEXT: vmov s0, r1
|
||||
; CHECK-FP16-NEXT: vcvtb.f32.f16 s0, s0
|
||||
; CHECK-FP16-NEXT: vcvt.f64.f32 d18, s0
|
||||
; CHECK-FP16-NEXT: vmla.f64 d18, d17, d16
|
||||
; CHECK-FP16-NEXT: vmov r0, r1, d18
|
||||
; CHECK-FP16-NEXT: bl __aeabi_d2h
|
||||
; CHECK-FP16-NEXT: strh r0, [r4]
|
||||
; CHECK-FP16-NEXT: pop {r4, pc}
|
||||
;
|
||||
; CHECK-LIBCALL-VFP-LABEL: test_fma:
|
||||
; CHECK-LIBCALL-VFP: .save {r4, r5, r6, lr}
|
||||
; CHECK-LIBCALL-VFP-NEXT: push {r4, r5, r6, lr}
|
||||
; CHECK-LIBCALL-VFP-NEXT: .vsave {d8, d9}
|
||||
; CHECK-LIBCALL-VFP-NEXT: vpush {d8, d9}
|
||||
; CHECK-LIBCALL-VFP-NEXT: mov r4, r0
|
||||
; CHECK-LIBCALL-VFP-NEXT: ldrh r0, [r2]
|
||||
; CHECK-LIBCALL-VFP-NEXT: mov r5, r1
|
||||
; CHECK-LIBCALL-VFP-NEXT: ldrh r0, [r0]
|
||||
; CHECK-LIBCALL-VFP-NEXT: mov r5, r2
|
||||
; CHECK-LIBCALL-VFP-NEXT: mov r6, r1
|
||||
; CHECK-LIBCALL-VFP-NEXT: bl __aeabi_h2f
|
||||
; CHECK-LIBCALL-VFP-NEXT: mov r6, r0
|
||||
; CHECK-LIBCALL-VFP-NEXT: ldrh r0, [r5]
|
||||
; CHECK-LIBCALL-VFP-NEXT: ldrh r1, [r6]
|
||||
; CHECK-LIBCALL-VFP-NEXT: vmov s16, r0
|
||||
; CHECK-LIBCALL-VFP-NEXT: ldrh r5, [r5]
|
||||
; CHECK-LIBCALL-VFP-NEXT: mov r0, r1
|
||||
; CHECK-LIBCALL-VFP-NEXT: bl __aeabi_h2f
|
||||
; CHECK-LIBCALL-VFP-NEXT: mov r5, r0
|
||||
; CHECK-LIBCALL-VFP-NEXT: ldrh r0, [r4]
|
||||
; CHECK-LIBCALL-VFP-NEXT: vmov s18, r0
|
||||
; CHECK-LIBCALL-VFP-NEXT: mov r0, r5
|
||||
; CHECK-LIBCALL-VFP-NEXT: bl __aeabi_h2f
|
||||
; CHECK-LIBCALL-VFP-NEXT: vmov s0, r0
|
||||
; CHECK-LIBCALL-VFP-NEXT: vmov s1, r5
|
||||
; CHECK-LIBCALL-VFP-NEXT: vmov s2, r6
|
||||
; CHECK-LIBCALL-VFP-NEXT: bl fmaf
|
||||
; CHECK-LIBCALL-VFP-NEXT: vmov r0, s0
|
||||
; CHECK-LIBCALL-VFP-NEXT: bl __aeabi_f2h
|
||||
; CHECK-LIBCALL-VFP-NEXT: vcvt.f64.f32 d16, s18
|
||||
; CHECK-LIBCALL-VFP-NEXT: vcvt.f64.f32 d17, s16
|
||||
; CHECK-LIBCALL-VFP-NEXT: vcvt.f64.f32 d18, s0
|
||||
; CHECK-LIBCALL-VFP-NEXT: vmla.f64 d18, d17, d16
|
||||
; CHECK-LIBCALL-VFP-NEXT: vmov r0, r1, d18
|
||||
; CHECK-LIBCALL-VFP-NEXT: bl __aeabi_d2h
|
||||
; CHECK-LIBCALL-VFP-NEXT: strh r0, [r4]
|
||||
; CHECK-LIBCALL-VFP-NEXT: vpop {d8, d9}
|
||||
; CHECK-LIBCALL-VFP-NEXT: pop {r4, r5, r6, pc}
|
||||
;
|
||||
; CHECK-NOVFP-LABEL: test_fma:
|
||||
; CHECK-NOVFP: .save {r4, r5, r6, lr}
|
||||
; CHECK-NOVFP-NEXT: push {r4, r5, r6, lr}
|
||||
; CHECK-NOVFP: .save {r4, r5, r6, r7, r11, lr}
|
||||
; CHECK-NOVFP-NEXT: push {r4, r5, r6, r7, r11, lr}
|
||||
; CHECK-NOVFP-NEXT: mov r4, r0
|
||||
; CHECK-NOVFP-NEXT: ldrh r0, [r1]
|
||||
; CHECK-NOVFP-NEXT: mov r5, r2
|
||||
; CHECK-NOVFP-NEXT: bl __aeabi_h2f
|
||||
; CHECK-NOVFP-NEXT: bl __aeabi_f2d
|
||||
; CHECK-NOVFP-NEXT: mov r6, r0
|
||||
; CHECK-NOVFP-NEXT: ldrh r0, [r4]
|
||||
; CHECK-NOVFP-NEXT: mov r7, r1
|
||||
; CHECK-NOVFP-NEXT: bl __aeabi_h2f
|
||||
; CHECK-NOVFP-NEXT: bl __aeabi_f2d
|
||||
; CHECK-NOVFP-NEXT: mov r2, r6
|
||||
; CHECK-NOVFP-NEXT: mov r3, r7
|
||||
; CHECK-NOVFP-NEXT: bl __aeabi_dmul
|
||||
; CHECK-NOVFP-NEXT: mov r6, r0
|
||||
; CHECK-NOVFP-NEXT: ldrh r0, [r5]
|
||||
; CHECK-NOVFP-NEXT: mov r7, r1
|
||||
; CHECK-NOVFP-NEXT: bl __aeabi_h2f
|
||||
; CHECK-NOVFP-NEXT: mov r5, r0
|
||||
; CHECK-NOVFP-NEXT: ldrh r0, [r4]
|
||||
; CHECK-NOVFP-NEXT: bl __aeabi_h2f
|
||||
; CHECK-NOVFP-NEXT: mov r1, r6
|
||||
; CHECK-NOVFP-NEXT: mov r2, r5
|
||||
; CHECK-NOVFP-NEXT: bl fmaf
|
||||
; CHECK-NOVFP-NEXT: bl __aeabi_f2h
|
||||
; CHECK-NOVFP-NEXT: bl __aeabi_f2d
|
||||
; CHECK-NOVFP-NEXT: mov r2, r0
|
||||
; CHECK-NOVFP-NEXT: mov r3, r1
|
||||
; CHECK-NOVFP-NEXT: mov r0, r6
|
||||
; CHECK-NOVFP-NEXT: mov r1, r7
|
||||
; CHECK-NOVFP-NEXT: bl __aeabi_dadd
|
||||
; CHECK-NOVFP-NEXT: bl __aeabi_d2h
|
||||
; CHECK-NOVFP-NEXT: strh r0, [r4]
|
||||
; CHECK-NOVFP-NEXT: pop {r4, r5, r6, pc}
|
||||
; CHECK-NOVFP-NEXT: pop {r4, r5, r6, r7, r11, pc}
|
||||
%a = load half, ptr %p, align 2
|
||||
%b = load half, ptr %q, align 2
|
||||
%c = load half, ptr %r, align 2
|
||||
|
||||
@ -8,39 +8,39 @@
|
||||
; RUN: %if aarch64-registered-target %{ llc %s -o - -mtriple=arm64ec-pc-windows-msvc | FileCheck %s --check-prefixes=ALL,CHECK-NEG-ABS,CHECK-COPYSIGN,CHECK-FMA %}
|
||||
; RUN: %if amdgpu-registered-target %{ llc %s -o - -mtriple=amdgcn-amd-amdhsa | FileCheck %s --check-prefixes=ALL,CHECK-NEG-ABS,CHECK-COPYSIGN,CHECK-FMA %}
|
||||
; RUN: %if arc-registered-target %{ llc %s -o - -mtriple=arc-elf | FileCheck %s --check-prefixes=ALL,BAD-NEG-ABS,BAD-COPYSIGN,BAD-FMA %}
|
||||
; RUN: %if arm-registered-target %{ llc %s -o - -mtriple=arm-unknown-linux-gnueabi | FileCheck %s --check-prefixes=ALL,CHECK-NEG-ABS,CHECK-COPYSIGN,BAD-FMA %}
|
||||
; RUN: %if arm-registered-target %{ llc %s -o - -mtriple=thumbv7em-none-eabi | FileCheck %s --check-prefixes=ALL,CHECK-NEG-ABS,CHECK-COPYSIGN,BAD-FMA %}
|
||||
; RUN: %if avr-registered-target %{ llc %s -o - -mtriple=avr-none | FileCheck %s --check-prefixes=ALL,CHECK-NEG-ABS,CHECK-COPYSIGN,BAD-FMA %}
|
||||
; RUN: %if arm-registered-target %{ llc %s -o - -mtriple=arm-unknown-linux-gnueabi | FileCheck %s --check-prefixes=ALL,CHECK-NEG-ABS,CHECK-COPYSIGN %}
|
||||
; RUN: %if arm-registered-target %{ llc %s -o - -mtriple=thumbv7em-none-eabi | FileCheck %s --check-prefixes=ALL,CHECK-NEG-ABS,CHECK-COPYSIGN,CHECK-FMA %}
|
||||
; RUN: %if avr-registered-target %{ llc %s -o - -mtriple=avr-none | FileCheck %s --check-prefixes=ALL,CHECK-NEG-ABS,CHECK-COPYSIGN,CHECK-FMA %}
|
||||
; FIXME: BPF has a compiler error
|
||||
; RUN: %if csky-registered-target %{ llc %s -o - -mtriple=csky-unknown-linux-gnuabiv2 | FileCheck %s --check-prefixes=ALL,BAD-NEG-ABS,BAD-COPYSIGN,BAD-FMA %}
|
||||
; RUN: %if csky-registered-target %{ llc %s -o - -mtriple=csky-unknown-linux-gnuabiv2 -mcpu=ck860fv -mattr=+hard-float | FileCheck %s --check-prefixes=ALL,BAD-NEG-ABS,BAD-COPYSIGN,BAD-FMA %}
|
||||
; FIXME: directx has a compiler error
|
||||
; RUN: %if hexagon-registered-target %{ llc %s -o - -mtriple=hexagon-unknown-linux-musl | FileCheck %s --check-prefixes=ALL,CHECK-NEG-ABS,CHECK-COPYSIGN,BAD-FMA %}
|
||||
; RUN: %if hexagon-registered-target %{ llc %s -o - -mtriple=hexagon-unknown-linux-musl | FileCheck %s --check-prefixes=ALL,CHECK-NEG-ABS,CHECK-COPYSIGN,CHECK-FMA %}
|
||||
; RUN: %if lanai-registered-target %{ llc %s -o - -mtriple=lanai-unknown-unknown | FileCheck %s --check-prefixes=ALL,BAD-NEG-ABS,BAD-COPYSIGN,BAD-FMA %}
|
||||
; RUN: %if loongarch-registered-target %{ llc %s -o - -mtriple=loongarch32-unknown-linux-gnu | FileCheck %s --check-prefixes=ALL,CHECK-NEG-ABS,CHECK-COPYSIGN,BAD-FMA %}
|
||||
; RUN: %if loongarch-registered-target %{ llc %s -o - -mtriple=loongarch64-unknown-linux-gnu | FileCheck %s --check-prefixes=ALL,CHECK-NEG-ABS,CHECK-COPYSIGN,BAD-FMA %}
|
||||
; RUN: %if loongarch-registered-target %{ llc %s -o - -mtriple=loongarch64-unknown-linux-gnu -mattr=+f | FileCheck %s --check-prefixes=ALL,CHECK-NEG-ABS,CHECK-COPYSIGN,BAD-FMA %}
|
||||
; RUN: %if loongarch-registered-target %{ llc %s -o - -mtriple=loongarch32-unknown-linux-gnu | FileCheck %s --check-prefixes=ALL,CHECK-NEG-ABS,CHECK-COPYSIGN,CHECK-FMA %}
|
||||
; RUN: %if loongarch-registered-target %{ llc %s -o - -mtriple=loongarch64-unknown-linux-gnu | FileCheck %s --check-prefixes=ALL,CHECK-NEG-ABS,CHECK-COPYSIGN,CHECK-FMA %}
|
||||
; RUN: %if loongarch-registered-target %{ llc %s -o - -mtriple=loongarch64-unknown-linux-gnu -mattr=+f | FileCheck %s --check-prefixes=ALL,CHECK-NEG-ABS,CHECK-COPYSIGN,CHECK-FMA %}
|
||||
; RUN: %if m68k-registered-target %{ llc %s -o - -mtriple=m68k-unknown-linux-gnu | FileCheck %s --check-prefixes=ALL,BAD-NEG-ABS,BAD-COPYSIGN,BAD-FMA %}
|
||||
; RUN: %if mips-registered-target %{ llc %s -o - -mtriple=mips-unknown-linux-gnu | FileCheck %s --check-prefixes=ALL,CHECK-NEG-ABS,CHECK-COPYSIGN,BAD-FMA %}
|
||||
; RUN: %if mips-registered-target %{ llc %s -o - -mtriple=mips64-unknown-linux-gnuabi64 | FileCheck %s --check-prefixes=ALL,CHECK-NEG-ABS,CHECK-COPYSIGN,BAD-FMA %}
|
||||
; RUN: %if mips-registered-target %{ llc %s -o - -mtriple=mips64el-unknown-linux-gnuabi64 | FileCheck %s --check-prefixes=ALL,CHECK-NEG-ABS,CHECK-COPYSIGN,BAD-FMA %}
|
||||
; RUN: %if mips-registered-target %{ llc %s -o - -mtriple=mipsel-unknown-linux-gnu | FileCheck %s --check-prefixes=ALL,CHECK-NEG-ABS,CHECK-COPYSIGN,BAD-FMA %}
|
||||
; RUN: %if mips-registered-target %{ llc %s -o - -mtriple=mips-unknown-linux-gnu | FileCheck %s --check-prefixes=ALL,CHECK-NEG-ABS,CHECK-COPYSIGN,CHECK-FMA %}
|
||||
; RUN: %if mips-registered-target %{ llc %s -o - -mtriple=mips64-unknown-linux-gnuabi64 | FileCheck %s --check-prefixes=ALL,CHECK-NEG-ABS,CHECK-COPYSIGN,CHECK-FMA %}
|
||||
; RUN: %if mips-registered-target %{ llc %s -o - -mtriple=mips64el-unknown-linux-gnuabi64 | FileCheck %s --check-prefixes=ALL,CHECK-NEG-ABS,CHECK-COPYSIGN,CHECK-FMA %}
|
||||
; RUN: %if mips-registered-target %{ llc %s -o - -mtriple=mipsel-unknown-linux-gnu | FileCheck %s --check-prefixes=ALL,CHECK-NEG-ABS,CHECK-COPYSIGN,CHECK-FMA %}
|
||||
; RUN: %if msp430-registered-target %{ llc %s -o - -mtriple=msp430-none-elf | FileCheck %s --check-prefixes=ALL,BAD-NEG-ABS,BAD-COPYSIGN,BAD-FMA %}
|
||||
; RUN: %if nvptx-registered-target %{ llc %s -o - -mtriple=nvptx64-nvidia-cuda | FileCheck %s --check-prefixes=NOCRASH %}
|
||||
; RUN: %if powerpc-registered-target %{ llc %s -o - -mtriple=powerpc-unknown-linux-gnu | FileCheck %s --check-prefixes=ALL,BAD-NEG-ABS,BAD-COPYSIGN,BAD-FMA %}
|
||||
; RUN: %if powerpc-registered-target %{ llc %s -o - -mtriple=powerpc64-unknown-linux-gnu | FileCheck %s --check-prefixes=ALL,BAD-NEG-ABS,BAD-COPYSIGN,BAD-FMA %}
|
||||
; RUN: %if powerpc-registered-target %{ llc %s -o - -mtriple=powerpc64le-unknown-linux-gnu | FileCheck %s --check-prefixes=ALL,BAD-NEG-ABS,BAD-COPYSIGN,BAD-FMA %}
|
||||
; RUN: %if riscv-registered-target %{ llc %s -o - -mtriple=riscv32-unknown-linux-gnu | FileCheck %s --check-prefixes=ALL,CHECK-NEG-ABS,CHECK-COPYSIGN,BAD-FMA %}
|
||||
; RUN: %if riscv-registered-target %{ llc %s -o - -mtriple=riscv64-unknown-linux-gnu | FileCheck %s --check-prefixes=ALL,CHECK-NEG-ABS,CHECK-COPYSIGN,BAD-FMA %}
|
||||
; RUN: %if sparc-registered-target %{ llc %s -o - -mtriple=sparc-unknown-linux-gnu | FileCheck %s --check-prefixes=ALL,CHECK-NEG-ABS,CHECK-COPYSIGN,BAD-FMA %}
|
||||
; RUN: %if sparc-registered-target %{ llc %s -o - -mtriple=sparc64-unknown-linux-gnu | FileCheck %s --check-prefixes=ALL,CHECK-NEG-ABS,CHECK-COPYSIGN,BAD-FMA %}
|
||||
; RUN: %if riscv-registered-target %{ llc %s -o - -mtriple=riscv32-unknown-linux-gnu | FileCheck %s --check-prefixes=ALL,CHECK-NEG-ABS,CHECK-COPYSIGN,CHECK-FMA %}
|
||||
; RUN: %if riscv-registered-target %{ llc %s -o - -mtriple=riscv64-unknown-linux-gnu | FileCheck %s --check-prefixes=ALL,CHECK-NEG-ABS,CHECK-COPYSIGN,CHECK-FMA %}
|
||||
; RUN: %if sparc-registered-target %{ llc %s -o - -mtriple=sparc-unknown-linux-gnu | FileCheck %s --check-prefixes=ALL,CHECK-NEG-ABS,CHECK-COPYSIGN,CHECK-FMA %}
|
||||
; RUN: %if sparc-registered-target %{ llc %s -o - -mtriple=sparc64-unknown-linux-gnu | FileCheck %s --check-prefixes=ALL,CHECK-NEG-ABS,CHECK-COPYSIGN,CHECK-FMA %}
|
||||
; RUN: %if spirv-registered-target %{ llc %s -o - -mtriple=spirv-unknown-unknown | FileCheck %s --check-prefixes=NOCRASH %}
|
||||
; RUN: %if systemz-registered-target %{ llc %s -o - -mtriple=s390x-unknown-linux-gnu | FileCheck %s --check-prefixes=ALL,CHECK-NEG-ABS,CHECK-COPYSIGN,BAD-FMA %}
|
||||
; RUN: %if systemz-registered-target %{ llc %s -o - -mtriple=s390x-unknown-linux-gnu | FileCheck %s --check-prefixes=ALL,CHECK-NEG-ABS,CHECK-COPYSIGN,CHECK-FMA %}
|
||||
; RUN: %if ve-registered-target %{ llc %s -o - -mtriple=ve-unknown-unknown | FileCheck %s --check-prefixes=ALL,BAD-NEG-ABS,BAD-COPYSIGN,BAD-FMA %}
|
||||
; RUN: %if webassembly-registered-target %{ llc %s -o - -mtriple=wasm32-unknown-unknown | FileCheck %s --check-prefixes=ALL,BAD-NEG-ABS,BAD-COPYSIGN,BAD-FMA %}
|
||||
; RUN: %if x86-registered-target %{ llc %s -o - -mtriple=i686-unknown-linux-gnu | FileCheck %s --check-prefixes=ALL,CHECK-NEG-ABS,CHECK-COPYSIGN,BAD-FMA %}
|
||||
; RUN: %if x86-registered-target %{ llc %s -o - -mtriple=x86_64-pc-windows-msvc | FileCheck %s --check-prefixes=ALL,CHECK-NEG-ABS,CHECK-COPYSIGN,BAD-FMA %}
|
||||
; RUN: %if x86-registered-target %{ llc %s -o - -mtriple=x86_64-unknown-linux-gnu | FileCheck %s --check-prefixes=ALL,CHECK-NEG-ABS,CHECK-COPYSIGN,BAD-FMA %}
|
||||
; RUN: %if x86-registered-target %{ llc %s -o - -mtriple=i686-unknown-linux-gnu | FileCheck %s --check-prefixes=ALL,CHECK-NEG-ABS,CHECK-COPYSIGN,CHECK-FMA %}
|
||||
; RUN: %if x86-registered-target %{ llc %s -o - -mtriple=x86_64-pc-windows-msvc | FileCheck %s --check-prefixes=ALL,CHECK-NEG-ABS,CHECK-COPYSIGN,CHECK-FMA %}
|
||||
; RUN: %if x86-registered-target %{ llc %s -o - -mtriple=x86_64-unknown-linux-gnu | FileCheck %s --check-prefixes=ALL,CHECK-NEG-ABS,CHECK-COPYSIGN,CHECK-FMA %}
|
||||
; RUN: %if xcore-registered-target %{ llc %s -o - -mtriple=xcore-unknown-unknown | FileCheck %s --check-prefixes=ALL,BAD-NEG-ABS,BAD-COPYSIGN,BAD-FMA %}
|
||||
; RUN: %if xtensa-registered-target %{ llc %s -o - -mtriple=xtensa-none-elf | FileCheck %s --check-prefixes=ALL,BAD-NEG-ABS,BAD-COPYSIGN,CHECK-FMA %}
|
||||
|
||||
|
||||
@ -1,3 +1,4 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
|
||||
; ## Full FP16 support enabled by default.
|
||||
; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_53 -asm-verbose=false \
|
||||
; RUN: -O0 -disable-post-ra -frame-pointer=all -verify-machineinstrs \
|
||||
@ -953,11 +954,11 @@ define half @test_cos(half %a) #0 {
|
||||
; CHECK-DAG: ld.param.b16 [[C:%rs[0-9]+]], [test_fma_param_2];
|
||||
; CHECK-F16-NOFTZ: fma.rn.f16 [[R:%rs[0-9]+]], [[A]], [[B]], [[C]];
|
||||
; CHECK-F16-FTZ: fma.rn.ftz.f16 [[R:%rs[0-9]+]], [[A]], [[B]], [[C]];
|
||||
; CHECK-NOF16-DAG: cvt.f32.f16 [[A32:%r[0-9]+]], [[A]]
|
||||
; CHECK-NOF16-DAG: cvt.f32.f16 [[B32:%r[0-9]+]], [[B]]
|
||||
; CHECK-NOF16-DAG: cvt.f32.f16 [[C32:%r[0-9]+]], [[C]]
|
||||
; CHECK-NOF16-NEXT: fma.rn.f32 [[R32:%r[0-9]+]], [[A32]], [[B32]], [[C32]];
|
||||
; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[R32]]
|
||||
; CHECK-NOF16-DAG: cvt.f64.f16 [[A64:%rd[0-9]+]], [[A]]
|
||||
; CHECK-NOF16-DAG: cvt.f64.f16 [[B64:%rd[0-9]+]], [[B]]
|
||||
; CHECK-NOF16-DAG: cvt.f64.f16 [[C64:%rd[0-9]+]], [[C]]
|
||||
; CHECK-NOF16-NEXT: fma.rn.f64 [[R64:%rd[0-9]+]], [[A64]], [[B64]], [[C64]];
|
||||
; CHECK-NOF16-NEXT: cvt.rn.f16.f64 [[R:%rs[0-9]+]], [[R64]]
|
||||
; CHECK: st.param.b16 [func_retval0], [[R]];
|
||||
; CHECK: ret
|
||||
define half @test_fma(half %a, half %b, half %c) #0 {
|
||||
@ -1151,11 +1152,11 @@ define half @test_round(half %a) #0 {
|
||||
; CHECK-DAG: ld.param.b16 [[C:%rs[0-9]+]], [test_fmuladd_param_2];
|
||||
; CHECK-F16-NOFTZ: fma.rn.f16 [[R:%rs[0-9]+]], [[A]], [[B]], [[C]];
|
||||
; CHECK-F16-FTZ: fma.rn.ftz.f16 [[R:%rs[0-9]+]], [[A]], [[B]], [[C]];
|
||||
; CHECK-NOF16-DAG: cvt.f32.f16 [[A32:%r[0-9]+]], [[A]]
|
||||
; CHECK-NOF16-DAG: cvt.f32.f16 [[B32:%r[0-9]+]], [[B]]
|
||||
; CHECK-NOF16-DAG: cvt.f32.f16 [[C32:%r[0-9]+]], [[C]]
|
||||
; CHECK-NOF16-NEXT: fma.rn.f32 [[R32:%r[0-9]+]], [[A32]], [[B32]], [[C32]];
|
||||
; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[R32]]
|
||||
; CHECK-NOF16-DAG: cvt.f64.f16 [[A64:%rd[0-9]+]], [[A]]
|
||||
; CHECK-NOF16-DAG: cvt.f64.f16 [[B64:%rd[0-9]+]], [[B]]
|
||||
; CHECK-NOF16-DAG: cvt.f64.f16 [[C64:%rd[0-9]+]], [[C]]
|
||||
; CHECK-NOF16-NEXT: fma.rn.f64 [[R64:%rd[0-9]+]], [[A64]], [[B64]], [[C64]];
|
||||
; CHECK-NOF16-NEXT: cvt.rn.f16.f64 [[R:%rs[0-9]+]], [[R64]]
|
||||
; CHECK: st.param.b16 [func_retval0], [[R]];
|
||||
; CHECK: ret;
|
||||
define half @test_fmuladd(half %a, half %b, half %c) #0 {
|
||||
@ -1183,3 +1184,9 @@ define <2 x half> @test_neg_f16x2(<2 x half> noundef %arg) #0 {
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
|
||||
; CHECK: {{.*}}
|
||||
; CHECK-F16-FTZ: {{.*}}
|
||||
; CHECK-F16-NOFTZ: {{.*}}
|
||||
; CHECK-NOF16: {{.*}}
|
||||
; CHECK-NOFTZ: {{.*}}
|
||||
|
||||
@ -1766,27 +1766,28 @@ define <2 x half> @test_fma(<2 x half> %a, <2 x half> %b, <2 x half> %c) #0 {
|
||||
; CHECK-NOF16-LABEL: test_fma(
|
||||
; CHECK-NOF16: {
|
||||
; CHECK-NOF16-NEXT: .reg .b16 %rs<9>;
|
||||
; CHECK-NOF16-NEXT: .reg .b32 %r<13>;
|
||||
; CHECK-NOF16-NEXT: .reg .b32 %r<5>;
|
||||
; CHECK-NOF16-NEXT: .reg .b64 %rd<9>;
|
||||
; CHECK-NOF16-EMPTY:
|
||||
; CHECK-NOF16-NEXT: // %bb.0:
|
||||
; CHECK-NOF16-NEXT: ld.param.b32 %r3, [test_fma_param_2];
|
||||
; CHECK-NOF16-NEXT: ld.param.b32 %r2, [test_fma_param_1];
|
||||
; CHECK-NOF16-NEXT: ld.param.b32 %r1, [test_fma_param_0];
|
||||
; CHECK-NOF16-NEXT: mov.b32 {%rs1, %rs2}, %r3;
|
||||
; CHECK-NOF16-NEXT: cvt.f32.f16 %r4, %rs2;
|
||||
; CHECK-NOF16-NEXT: cvt.f64.f16 %rd1, %rs2;
|
||||
; CHECK-NOF16-NEXT: mov.b32 {%rs3, %rs4}, %r2;
|
||||
; CHECK-NOF16-NEXT: cvt.f32.f16 %r5, %rs4;
|
||||
; CHECK-NOF16-NEXT: cvt.f64.f16 %rd2, %rs4;
|
||||
; CHECK-NOF16-NEXT: mov.b32 {%rs5, %rs6}, %r1;
|
||||
; CHECK-NOF16-NEXT: cvt.f32.f16 %r6, %rs6;
|
||||
; CHECK-NOF16-NEXT: fma.rn.f32 %r7, %r6, %r5, %r4;
|
||||
; CHECK-NOF16-NEXT: cvt.rn.f16.f32 %rs7, %r7;
|
||||
; CHECK-NOF16-NEXT: cvt.f32.f16 %r8, %rs1;
|
||||
; CHECK-NOF16-NEXT: cvt.f32.f16 %r9, %rs3;
|
||||
; CHECK-NOF16-NEXT: cvt.f32.f16 %r10, %rs5;
|
||||
; CHECK-NOF16-NEXT: fma.rn.f32 %r11, %r10, %r9, %r8;
|
||||
; CHECK-NOF16-NEXT: cvt.rn.f16.f32 %rs8, %r11;
|
||||
; CHECK-NOF16-NEXT: mov.b32 %r12, {%rs8, %rs7};
|
||||
; CHECK-NOF16-NEXT: st.param.b32 [func_retval0], %r12;
|
||||
; CHECK-NOF16-NEXT: cvt.f64.f16 %rd3, %rs6;
|
||||
; CHECK-NOF16-NEXT: fma.rn.f64 %rd4, %rd3, %rd2, %rd1;
|
||||
; CHECK-NOF16-NEXT: cvt.rn.f16.f64 %rs7, %rd4;
|
||||
; CHECK-NOF16-NEXT: cvt.f64.f16 %rd5, %rs1;
|
||||
; CHECK-NOF16-NEXT: cvt.f64.f16 %rd6, %rs3;
|
||||
; CHECK-NOF16-NEXT: cvt.f64.f16 %rd7, %rs5;
|
||||
; CHECK-NOF16-NEXT: fma.rn.f64 %rd8, %rd7, %rd6, %rd5;
|
||||
; CHECK-NOF16-NEXT: cvt.rn.f16.f64 %rs8, %rd8;
|
||||
; CHECK-NOF16-NEXT: mov.b32 %r4, {%rs8, %rs7};
|
||||
; CHECK-NOF16-NEXT: st.param.b32 [func_retval0], %r4;
|
||||
; CHECK-NOF16-NEXT: ret;
|
||||
%r = call <2 x half> @llvm.fma.f16(<2 x half> %a, <2 x half> %b, <2 x half> %c)
|
||||
ret <2 x half> %r
|
||||
@ -2203,27 +2204,28 @@ define <2 x half> @test_fmuladd(<2 x half> %a, <2 x half> %b, <2 x half> %c) #0
|
||||
; CHECK-NOF16-LABEL: test_fmuladd(
|
||||
; CHECK-NOF16: {
|
||||
; CHECK-NOF16-NEXT: .reg .b16 %rs<9>;
|
||||
; CHECK-NOF16-NEXT: .reg .b32 %r<13>;
|
||||
; CHECK-NOF16-NEXT: .reg .b32 %r<5>;
|
||||
; CHECK-NOF16-NEXT: .reg .b64 %rd<9>;
|
||||
; CHECK-NOF16-EMPTY:
|
||||
; CHECK-NOF16-NEXT: // %bb.0:
|
||||
; CHECK-NOF16-NEXT: ld.param.b32 %r3, [test_fmuladd_param_2];
|
||||
; CHECK-NOF16-NEXT: ld.param.b32 %r2, [test_fmuladd_param_1];
|
||||
; CHECK-NOF16-NEXT: ld.param.b32 %r1, [test_fmuladd_param_0];
|
||||
; CHECK-NOF16-NEXT: mov.b32 {%rs1, %rs2}, %r3;
|
||||
; CHECK-NOF16-NEXT: cvt.f32.f16 %r4, %rs2;
|
||||
; CHECK-NOF16-NEXT: cvt.f64.f16 %rd1, %rs2;
|
||||
; CHECK-NOF16-NEXT: mov.b32 {%rs3, %rs4}, %r2;
|
||||
; CHECK-NOF16-NEXT: cvt.f32.f16 %r5, %rs4;
|
||||
; CHECK-NOF16-NEXT: cvt.f64.f16 %rd2, %rs4;
|
||||
; CHECK-NOF16-NEXT: mov.b32 {%rs5, %rs6}, %r1;
|
||||
; CHECK-NOF16-NEXT: cvt.f32.f16 %r6, %rs6;
|
||||
; CHECK-NOF16-NEXT: fma.rn.f32 %r7, %r6, %r5, %r4;
|
||||
; CHECK-NOF16-NEXT: cvt.rn.f16.f32 %rs7, %r7;
|
||||
; CHECK-NOF16-NEXT: cvt.f32.f16 %r8, %rs1;
|
||||
; CHECK-NOF16-NEXT: cvt.f32.f16 %r9, %rs3;
|
||||
; CHECK-NOF16-NEXT: cvt.f32.f16 %r10, %rs5;
|
||||
; CHECK-NOF16-NEXT: fma.rn.f32 %r11, %r10, %r9, %r8;
|
||||
; CHECK-NOF16-NEXT: cvt.rn.f16.f32 %rs8, %r11;
|
||||
; CHECK-NOF16-NEXT: mov.b32 %r12, {%rs8, %rs7};
|
||||
; CHECK-NOF16-NEXT: st.param.b32 [func_retval0], %r12;
|
||||
; CHECK-NOF16-NEXT: cvt.f64.f16 %rd3, %rs6;
|
||||
; CHECK-NOF16-NEXT: fma.rn.f64 %rd4, %rd3, %rd2, %rd1;
|
||||
; CHECK-NOF16-NEXT: cvt.rn.f16.f64 %rs7, %rd4;
|
||||
; CHECK-NOF16-NEXT: cvt.f64.f16 %rd5, %rs1;
|
||||
; CHECK-NOF16-NEXT: cvt.f64.f16 %rd6, %rs3;
|
||||
; CHECK-NOF16-NEXT: cvt.f64.f16 %rd7, %rs5;
|
||||
; CHECK-NOF16-NEXT: fma.rn.f64 %rd8, %rd7, %rd6, %rd5;
|
||||
; CHECK-NOF16-NEXT: cvt.rn.f16.f64 %rs8, %rd8;
|
||||
; CHECK-NOF16-NEXT: mov.b32 %r4, {%rs8, %rs7};
|
||||
; CHECK-NOF16-NEXT: st.param.b32 [func_retval0], %r4;
|
||||
; CHECK-NOF16-NEXT: ret;
|
||||
%r = call <2 x half> @llvm.fmuladd.f16(<2 x half> %a, <2 x half> %b, <2 x half> %c)
|
||||
ret <2 x half> %r
|
||||
|
||||
@ -1093,28 +1093,41 @@ define half @fmadd_h(half %a, half %b, half %c) nounwind {
|
||||
; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
|
||||
; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
|
||||
; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
|
||||
; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
|
||||
; RV32I-NEXT: mv s0, a2
|
||||
; RV32I-NEXT: mv s1, a1
|
||||
; RV32I-NEXT: lui a1, 16
|
||||
; RV32I-NEXT: addi s3, a1, -1
|
||||
; RV32I-NEXT: and a0, a0, s3
|
||||
; RV32I-NEXT: addi s4, a1, -1
|
||||
; RV32I-NEXT: and a0, a0, s4
|
||||
; RV32I-NEXT: call __extendhfsf2
|
||||
; RV32I-NEXT: call __extendsfdf2
|
||||
; RV32I-NEXT: mv s2, a0
|
||||
; RV32I-NEXT: and a0, s1, s3
|
||||
; RV32I-NEXT: call __extendhfsf2
|
||||
; RV32I-NEXT: mv s1, a0
|
||||
; RV32I-NEXT: and a0, s0, s3
|
||||
; RV32I-NEXT: mv s3, a1
|
||||
; RV32I-NEXT: and a0, s1, s4
|
||||
; RV32I-NEXT: call __extendhfsf2
|
||||
; RV32I-NEXT: call __extendsfdf2
|
||||
; RV32I-NEXT: mv a2, a0
|
||||
; RV32I-NEXT: mv a3, a1
|
||||
; RV32I-NEXT: mv a0, s2
|
||||
; RV32I-NEXT: mv a1, s1
|
||||
; RV32I-NEXT: call fmaf
|
||||
; RV32I-NEXT: call __truncsfhf2
|
||||
; RV32I-NEXT: mv a1, s3
|
||||
; RV32I-NEXT: call __muldf3
|
||||
; RV32I-NEXT: mv s1, a0
|
||||
; RV32I-NEXT: mv s2, a1
|
||||
; RV32I-NEXT: and a0, s0, s4
|
||||
; RV32I-NEXT: call __extendhfsf2
|
||||
; RV32I-NEXT: call __extendsfdf2
|
||||
; RV32I-NEXT: mv a2, a0
|
||||
; RV32I-NEXT: mv a3, a1
|
||||
; RV32I-NEXT: mv a0, s1
|
||||
; RV32I-NEXT: mv a1, s2
|
||||
; RV32I-NEXT: call __adddf3
|
||||
; RV32I-NEXT: call __truncdfhf2
|
||||
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
|
||||
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
|
||||
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
|
||||
; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
|
||||
; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
|
||||
; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
|
||||
; RV32I-NEXT: addi sp, sp, 32
|
||||
; RV32I-NEXT: ret
|
||||
;
|
||||
@ -1132,17 +1145,22 @@ define half @fmadd_h(half %a, half %b, half %c) nounwind {
|
||||
; RV64I-NEXT: addi s3, a1, -1
|
||||
; RV64I-NEXT: and a0, a0, s3
|
||||
; RV64I-NEXT: call __extendhfsf2
|
||||
; RV64I-NEXT: call __extendsfdf2
|
||||
; RV64I-NEXT: mv s2, a0
|
||||
; RV64I-NEXT: and a0, s1, s3
|
||||
; RV64I-NEXT: call __extendhfsf2
|
||||
; RV64I-NEXT: call __extendsfdf2
|
||||
; RV64I-NEXT: mv a1, a0
|
||||
; RV64I-NEXT: mv a0, s2
|
||||
; RV64I-NEXT: call __muldf3
|
||||
; RV64I-NEXT: mv s1, a0
|
||||
; RV64I-NEXT: and a0, s0, s3
|
||||
; RV64I-NEXT: call __extendhfsf2
|
||||
; RV64I-NEXT: mv a2, a0
|
||||
; RV64I-NEXT: mv a0, s2
|
||||
; RV64I-NEXT: mv a1, s1
|
||||
; RV64I-NEXT: call fmaf
|
||||
; RV64I-NEXT: call __truncsfhf2
|
||||
; RV64I-NEXT: call __extendsfdf2
|
||||
; RV64I-NEXT: mv a1, a0
|
||||
; RV64I-NEXT: mv a0, s1
|
||||
; RV64I-NEXT: call __adddf3
|
||||
; RV64I-NEXT: call __truncdfhf2
|
||||
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
|
||||
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
|
||||
; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
|
||||
@ -1194,35 +1212,48 @@ define half @fmsub_h(half %a, half %b, half %c) nounwind {
|
||||
; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
|
||||
; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
|
||||
; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
|
||||
; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
|
||||
; RV32I-NEXT: mv s0, a1
|
||||
; RV32I-NEXT: mv s1, a0
|
||||
; RV32I-NEXT: lui a0, 16
|
||||
; RV32I-NEXT: addi s2, a0, -1
|
||||
; RV32I-NEXT: and a0, a2, s2
|
||||
; RV32I-NEXT: addi s3, a0, -1
|
||||
; RV32I-NEXT: and a0, a2, s3
|
||||
; RV32I-NEXT: call __extendhfsf2
|
||||
; RV32I-NEXT: li a1, 0
|
||||
; RV32I-NEXT: call __addsf3
|
||||
; RV32I-NEXT: call __truncsfhf2
|
||||
; RV32I-NEXT: lui a1, 8
|
||||
; RV32I-NEXT: xor s3, a0, a1
|
||||
; RV32I-NEXT: and a0, s1, s2
|
||||
; RV32I-NEXT: xor s4, a0, a1
|
||||
; RV32I-NEXT: and a0, s1, s3
|
||||
; RV32I-NEXT: call __extendhfsf2
|
||||
; RV32I-NEXT: call __extendsfdf2
|
||||
; RV32I-NEXT: mv s1, a0
|
||||
; RV32I-NEXT: and a0, s0, s2
|
||||
; RV32I-NEXT: call __extendhfsf2
|
||||
; RV32I-NEXT: mv s0, a0
|
||||
; RV32I-NEXT: and a0, s3, s2
|
||||
; RV32I-NEXT: mv s2, a1
|
||||
; RV32I-NEXT: and a0, s0, s3
|
||||
; RV32I-NEXT: call __extendhfsf2
|
||||
; RV32I-NEXT: call __extendsfdf2
|
||||
; RV32I-NEXT: mv a2, a0
|
||||
; RV32I-NEXT: mv a3, a1
|
||||
; RV32I-NEXT: mv a0, s1
|
||||
; RV32I-NEXT: mv a1, s0
|
||||
; RV32I-NEXT: call fmaf
|
||||
; RV32I-NEXT: call __truncsfhf2
|
||||
; RV32I-NEXT: mv a1, s2
|
||||
; RV32I-NEXT: call __muldf3
|
||||
; RV32I-NEXT: mv s0, a0
|
||||
; RV32I-NEXT: mv s1, a1
|
||||
; RV32I-NEXT: and a0, s4, s3
|
||||
; RV32I-NEXT: call __extendhfsf2
|
||||
; RV32I-NEXT: call __extendsfdf2
|
||||
; RV32I-NEXT: mv a2, a0
|
||||
; RV32I-NEXT: mv a3, a1
|
||||
; RV32I-NEXT: mv a0, s0
|
||||
; RV32I-NEXT: mv a1, s1
|
||||
; RV32I-NEXT: call __adddf3
|
||||
; RV32I-NEXT: call __truncdfhf2
|
||||
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
|
||||
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
|
||||
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
|
||||
; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
|
||||
; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
|
||||
; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
|
||||
; RV32I-NEXT: addi sp, sp, 32
|
||||
; RV32I-NEXT: ret
|
||||
;
|
||||
@ -1247,17 +1278,22 @@ define half @fmsub_h(half %a, half %b, half %c) nounwind {
|
||||
; RV64I-NEXT: xor s3, a0, a1
|
||||
; RV64I-NEXT: and a0, s1, s2
|
||||
; RV64I-NEXT: call __extendhfsf2
|
||||
; RV64I-NEXT: call __extendsfdf2
|
||||
; RV64I-NEXT: mv s1, a0
|
||||
; RV64I-NEXT: and a0, s0, s2
|
||||
; RV64I-NEXT: call __extendhfsf2
|
||||
; RV64I-NEXT: call __extendsfdf2
|
||||
; RV64I-NEXT: mv a1, a0
|
||||
; RV64I-NEXT: mv a0, s1
|
||||
; RV64I-NEXT: call __muldf3
|
||||
; RV64I-NEXT: mv s0, a0
|
||||
; RV64I-NEXT: and a0, s3, s2
|
||||
; RV64I-NEXT: call __extendhfsf2
|
||||
; RV64I-NEXT: mv a2, a0
|
||||
; RV64I-NEXT: mv a0, s1
|
||||
; RV64I-NEXT: mv a1, s0
|
||||
; RV64I-NEXT: call fmaf
|
||||
; RV64I-NEXT: call __truncsfhf2
|
||||
; RV64I-NEXT: call __extendsfdf2
|
||||
; RV64I-NEXT: mv a1, a0
|
||||
; RV64I-NEXT: mv a0, s0
|
||||
; RV64I-NEXT: call __adddf3
|
||||
; RV64I-NEXT: call __truncdfhf2
|
||||
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
|
||||
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
|
||||
; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
|
||||
@ -1329,8 +1365,8 @@ define half @fnmadd_h(half %a, half %b, half %c) nounwind {
|
||||
; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
|
||||
; RV32I-NEXT: mv s0, a2
|
||||
; RV32I-NEXT: mv s1, a1
|
||||
; RV32I-NEXT: lui a1, 16
|
||||
; RV32I-NEXT: addi s3, a1, -1
|
||||
; RV32I-NEXT: lui s3, 16
|
||||
; RV32I-NEXT: addi s3, s3, -1
|
||||
; RV32I-NEXT: and a0, a0, s3
|
||||
; RV32I-NEXT: call __extendhfsf2
|
||||
; RV32I-NEXT: li a1, 0
|
||||
@ -1347,17 +1383,26 @@ define half @fnmadd_h(half %a, half %b, half %c) nounwind {
|
||||
; RV32I-NEXT: xor s4, a0, a1
|
||||
; RV32I-NEXT: and a0, s1, s3
|
||||
; RV32I-NEXT: call __extendhfsf2
|
||||
; RV32I-NEXT: call __extendsfdf2
|
||||
; RV32I-NEXT: mv s0, a0
|
||||
; RV32I-NEXT: mv s1, a1
|
||||
; RV32I-NEXT: and a0, s2, s3
|
||||
; RV32I-NEXT: call __extendhfsf2
|
||||
; RV32I-NEXT: mv s1, a0
|
||||
; RV32I-NEXT: call __extendsfdf2
|
||||
; RV32I-NEXT: mv a2, s0
|
||||
; RV32I-NEXT: mv a3, s1
|
||||
; RV32I-NEXT: call __muldf3
|
||||
; RV32I-NEXT: mv s0, a0
|
||||
; RV32I-NEXT: mv s1, a1
|
||||
; RV32I-NEXT: and a0, s4, s3
|
||||
; RV32I-NEXT: call __extendhfsf2
|
||||
; RV32I-NEXT: call __extendsfdf2
|
||||
; RV32I-NEXT: mv a2, a0
|
||||
; RV32I-NEXT: mv a0, s1
|
||||
; RV32I-NEXT: mv a1, s0
|
||||
; RV32I-NEXT: call fmaf
|
||||
; RV32I-NEXT: call __truncsfhf2
|
||||
; RV32I-NEXT: mv a3, a1
|
||||
; RV32I-NEXT: mv a0, s0
|
||||
; RV32I-NEXT: mv a1, s1
|
||||
; RV32I-NEXT: call __adddf3
|
||||
; RV32I-NEXT: call __truncdfhf2
|
||||
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
|
||||
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
|
||||
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
|
||||
@ -1378,8 +1423,8 @@ define half @fnmadd_h(half %a, half %b, half %c) nounwind {
|
||||
; RV64I-NEXT: sd s4, 0(sp) # 8-byte Folded Spill
|
||||
; RV64I-NEXT: mv s0, a2
|
||||
; RV64I-NEXT: mv s1, a1
|
||||
; RV64I-NEXT: lui a1, 16
|
||||
; RV64I-NEXT: addi s3, a1, -1
|
||||
; RV64I-NEXT: lui s3, 16
|
||||
; RV64I-NEXT: addi s3, s3, -1
|
||||
; RV64I-NEXT: and a0, a0, s3
|
||||
; RV64I-NEXT: call __extendhfsf2
|
||||
; RV64I-NEXT: li a1, 0
|
||||
@ -1396,17 +1441,21 @@ define half @fnmadd_h(half %a, half %b, half %c) nounwind {
|
||||
; RV64I-NEXT: xor s4, a0, a1
|
||||
; RV64I-NEXT: and a0, s1, s3
|
||||
; RV64I-NEXT: call __extendhfsf2
|
||||
; RV64I-NEXT: call __extendsfdf2
|
||||
; RV64I-NEXT: mv s0, a0
|
||||
; RV64I-NEXT: and a0, s2, s3
|
||||
; RV64I-NEXT: call __extendhfsf2
|
||||
; RV64I-NEXT: mv s1, a0
|
||||
; RV64I-NEXT: call __extendsfdf2
|
||||
; RV64I-NEXT: mv a1, s0
|
||||
; RV64I-NEXT: call __muldf3
|
||||
; RV64I-NEXT: mv s0, a0
|
||||
; RV64I-NEXT: and a0, s4, s3
|
||||
; RV64I-NEXT: call __extendhfsf2
|
||||
; RV64I-NEXT: mv a2, a0
|
||||
; RV64I-NEXT: mv a0, s1
|
||||
; RV64I-NEXT: mv a1, s0
|
||||
; RV64I-NEXT: call fmaf
|
||||
; RV64I-NEXT: call __truncsfhf2
|
||||
; RV64I-NEXT: call __extendsfdf2
|
||||
; RV64I-NEXT: mv a1, a0
|
||||
; RV64I-NEXT: mv a0, s0
|
||||
; RV64I-NEXT: call __adddf3
|
||||
; RV64I-NEXT: call __truncdfhf2
|
||||
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
|
||||
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
|
||||
; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
|
||||
@ -1491,8 +1540,8 @@ define half @fnmadd_h_2(half %a, half %b, half %c) nounwind {
|
||||
; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
|
||||
; RV32I-NEXT: mv s0, a2
|
||||
; RV32I-NEXT: mv s1, a0
|
||||
; RV32I-NEXT: lui a0, 16
|
||||
; RV32I-NEXT: addi s3, a0, -1
|
||||
; RV32I-NEXT: lui s3, 16
|
||||
; RV32I-NEXT: addi s3, s3, -1
|
||||
; RV32I-NEXT: and a0, a1, s3
|
||||
; RV32I-NEXT: call __extendhfsf2
|
||||
; RV32I-NEXT: li a1, 0
|
||||
@ -1509,17 +1558,28 @@ define half @fnmadd_h_2(half %a, half %b, half %c) nounwind {
|
||||
; RV32I-NEXT: xor s4, a0, a1
|
||||
; RV32I-NEXT: and a0, s1, s3
|
||||
; RV32I-NEXT: call __extendhfsf2
|
||||
; RV32I-NEXT: call __extendsfdf2
|
||||
; RV32I-NEXT: mv s0, a0
|
||||
; RV32I-NEXT: mv s1, a1
|
||||
; RV32I-NEXT: and a0, s2, s3
|
||||
; RV32I-NEXT: call __extendhfsf2
|
||||
; RV32I-NEXT: mv s1, a0
|
||||
; RV32I-NEXT: and a0, s4, s3
|
||||
; RV32I-NEXT: call __extendhfsf2
|
||||
; RV32I-NEXT: call __extendsfdf2
|
||||
; RV32I-NEXT: mv a2, a0
|
||||
; RV32I-NEXT: mv a3, a1
|
||||
; RV32I-NEXT: mv a0, s0
|
||||
; RV32I-NEXT: mv a1, s1
|
||||
; RV32I-NEXT: call fmaf
|
||||
; RV32I-NEXT: call __truncsfhf2
|
||||
; RV32I-NEXT: call __muldf3
|
||||
; RV32I-NEXT: mv s0, a0
|
||||
; RV32I-NEXT: mv s1, a1
|
||||
; RV32I-NEXT: and a0, s4, s3
|
||||
; RV32I-NEXT: call __extendhfsf2
|
||||
; RV32I-NEXT: call __extendsfdf2
|
||||
; RV32I-NEXT: mv a2, a0
|
||||
; RV32I-NEXT: mv a3, a1
|
||||
; RV32I-NEXT: mv a0, s0
|
||||
; RV32I-NEXT: mv a1, s1
|
||||
; RV32I-NEXT: call __adddf3
|
||||
; RV32I-NEXT: call __truncdfhf2
|
||||
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
|
||||
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
|
||||
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
|
||||
@ -1540,8 +1600,8 @@ define half @fnmadd_h_2(half %a, half %b, half %c) nounwind {
|
||||
; RV64I-NEXT: sd s4, 0(sp) # 8-byte Folded Spill
|
||||
; RV64I-NEXT: mv s0, a2
|
||||
; RV64I-NEXT: mv s1, a0
|
||||
; RV64I-NEXT: lui a0, 16
|
||||
; RV64I-NEXT: addi s3, a0, -1
|
||||
; RV64I-NEXT: lui s3, 16
|
||||
; RV64I-NEXT: addi s3, s3, -1
|
||||
; RV64I-NEXT: and a0, a1, s3
|
||||
; RV64I-NEXT: call __extendhfsf2
|
||||
; RV64I-NEXT: li a1, 0
|
||||
@ -1558,17 +1618,22 @@ define half @fnmadd_h_2(half %a, half %b, half %c) nounwind {
|
||||
; RV64I-NEXT: xor s4, a0, a1
|
||||
; RV64I-NEXT: and a0, s1, s3
|
||||
; RV64I-NEXT: call __extendhfsf2
|
||||
; RV64I-NEXT: call __extendsfdf2
|
||||
; RV64I-NEXT: mv s0, a0
|
||||
; RV64I-NEXT: and a0, s2, s3
|
||||
; RV64I-NEXT: call __extendhfsf2
|
||||
; RV64I-NEXT: mv s1, a0
|
||||
; RV64I-NEXT: call __extendsfdf2
|
||||
; RV64I-NEXT: mv a1, a0
|
||||
; RV64I-NEXT: mv a0, s0
|
||||
; RV64I-NEXT: call __muldf3
|
||||
; RV64I-NEXT: mv s0, a0
|
||||
; RV64I-NEXT: and a0, s4, s3
|
||||
; RV64I-NEXT: call __extendhfsf2
|
||||
; RV64I-NEXT: mv a2, a0
|
||||
; RV64I-NEXT: call __extendsfdf2
|
||||
; RV64I-NEXT: mv a1, a0
|
||||
; RV64I-NEXT: mv a0, s0
|
||||
; RV64I-NEXT: mv a1, s1
|
||||
; RV64I-NEXT: call fmaf
|
||||
; RV64I-NEXT: call __truncsfhf2
|
||||
; RV64I-NEXT: call __adddf3
|
||||
; RV64I-NEXT: call __truncdfhf2
|
||||
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
|
||||
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
|
||||
; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
|
||||
@ -1659,23 +1724,35 @@ define half @fnmadd_h_3(half %a, half %b, half %c) nounwind {
|
||||
; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
|
||||
; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
|
||||
; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
|
||||
; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
|
||||
; RV32I-NEXT: mv s0, a2
|
||||
; RV32I-NEXT: mv s1, a1
|
||||
; RV32I-NEXT: lui a1, 16
|
||||
; RV32I-NEXT: addi s3, a1, -1
|
||||
; RV32I-NEXT: and a0, a0, s3
|
||||
; RV32I-NEXT: addi s4, a1, -1
|
||||
; RV32I-NEXT: and a0, a0, s4
|
||||
; RV32I-NEXT: call __extendhfsf2
|
||||
; RV32I-NEXT: call __extendsfdf2
|
||||
; RV32I-NEXT: mv s2, a0
|
||||
; RV32I-NEXT: and a0, s1, s3
|
||||
; RV32I-NEXT: call __extendhfsf2
|
||||
; RV32I-NEXT: mv s1, a0
|
||||
; RV32I-NEXT: and a0, s0, s3
|
||||
; RV32I-NEXT: mv s3, a1
|
||||
; RV32I-NEXT: and a0, s1, s4
|
||||
; RV32I-NEXT: call __extendhfsf2
|
||||
; RV32I-NEXT: call __extendsfdf2
|
||||
; RV32I-NEXT: mv a2, a0
|
||||
; RV32I-NEXT: mv a3, a1
|
||||
; RV32I-NEXT: mv a0, s2
|
||||
; RV32I-NEXT: mv a1, s1
|
||||
; RV32I-NEXT: call fmaf
|
||||
; RV32I-NEXT: call __truncsfhf2
|
||||
; RV32I-NEXT: mv a1, s3
|
||||
; RV32I-NEXT: call __muldf3
|
||||
; RV32I-NEXT: mv s1, a0
|
||||
; RV32I-NEXT: mv s2, a1
|
||||
; RV32I-NEXT: and a0, s0, s4
|
||||
; RV32I-NEXT: call __extendhfsf2
|
||||
; RV32I-NEXT: call __extendsfdf2
|
||||
; RV32I-NEXT: mv a2, a0
|
||||
; RV32I-NEXT: mv a3, a1
|
||||
; RV32I-NEXT: mv a0, s1
|
||||
; RV32I-NEXT: mv a1, s2
|
||||
; RV32I-NEXT: call __adddf3
|
||||
; RV32I-NEXT: call __truncdfhf2
|
||||
; RV32I-NEXT: lui a1, 1048568
|
||||
; RV32I-NEXT: xor a0, a0, a1
|
||||
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
|
||||
@ -1683,6 +1760,7 @@ define half @fnmadd_h_3(half %a, half %b, half %c) nounwind {
|
||||
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
|
||||
; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
|
||||
; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
|
||||
; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
|
||||
; RV32I-NEXT: addi sp, sp, 32
|
||||
; RV32I-NEXT: ret
|
||||
;
|
||||
@ -1700,17 +1778,22 @@ define half @fnmadd_h_3(half %a, half %b, half %c) nounwind {
|
||||
; RV64I-NEXT: addi s3, a1, -1
|
||||
; RV64I-NEXT: and a0, a0, s3
|
||||
; RV64I-NEXT: call __extendhfsf2
|
||||
; RV64I-NEXT: call __extendsfdf2
|
||||
; RV64I-NEXT: mv s2, a0
|
||||
; RV64I-NEXT: and a0, s1, s3
|
||||
; RV64I-NEXT: call __extendhfsf2
|
||||
; RV64I-NEXT: call __extendsfdf2
|
||||
; RV64I-NEXT: mv a1, a0
|
||||
; RV64I-NEXT: mv a0, s2
|
||||
; RV64I-NEXT: call __muldf3
|
||||
; RV64I-NEXT: mv s1, a0
|
||||
; RV64I-NEXT: and a0, s0, s3
|
||||
; RV64I-NEXT: call __extendhfsf2
|
||||
; RV64I-NEXT: mv a2, a0
|
||||
; RV64I-NEXT: mv a0, s2
|
||||
; RV64I-NEXT: mv a1, s1
|
||||
; RV64I-NEXT: call fmaf
|
||||
; RV64I-NEXT: call __truncsfhf2
|
||||
; RV64I-NEXT: call __extendsfdf2
|
||||
; RV64I-NEXT: mv a1, a0
|
||||
; RV64I-NEXT: mv a0, s1
|
||||
; RV64I-NEXT: call __adddf3
|
||||
; RV64I-NEXT: call __truncdfhf2
|
||||
; RV64I-NEXT: lui a1, 1048568
|
||||
; RV64I-NEXT: xor a0, a0, a1
|
||||
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
|
||||
@ -1779,23 +1862,35 @@ define half @fnmadd_nsz(half %a, half %b, half %c) nounwind {
|
||||
; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
|
||||
; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
|
||||
; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
|
||||
; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
|
||||
; RV32I-NEXT: mv s0, a2
|
||||
; RV32I-NEXT: mv s1, a1
|
||||
; RV32I-NEXT: lui a1, 16
|
||||
; RV32I-NEXT: addi s3, a1, -1
|
||||
; RV32I-NEXT: and a0, a0, s3
|
||||
; RV32I-NEXT: addi s4, a1, -1
|
||||
; RV32I-NEXT: and a0, a0, s4
|
||||
; RV32I-NEXT: call __extendhfsf2
|
||||
; RV32I-NEXT: call __extendsfdf2
|
||||
; RV32I-NEXT: mv s2, a0
|
||||
; RV32I-NEXT: and a0, s1, s3
|
||||
; RV32I-NEXT: call __extendhfsf2
|
||||
; RV32I-NEXT: mv s1, a0
|
||||
; RV32I-NEXT: and a0, s0, s3
|
||||
; RV32I-NEXT: mv s3, a1
|
||||
; RV32I-NEXT: and a0, s1, s4
|
||||
; RV32I-NEXT: call __extendhfsf2
|
||||
; RV32I-NEXT: call __extendsfdf2
|
||||
; RV32I-NEXT: mv a2, a0
|
||||
; RV32I-NEXT: mv a3, a1
|
||||
; RV32I-NEXT: mv a0, s2
|
||||
; RV32I-NEXT: mv a1, s1
|
||||
; RV32I-NEXT: call fmaf
|
||||
; RV32I-NEXT: call __truncsfhf2
|
||||
; RV32I-NEXT: mv a1, s3
|
||||
; RV32I-NEXT: call __muldf3
|
||||
; RV32I-NEXT: mv s1, a0
|
||||
; RV32I-NEXT: mv s2, a1
|
||||
; RV32I-NEXT: and a0, s0, s4
|
||||
; RV32I-NEXT: call __extendhfsf2
|
||||
; RV32I-NEXT: call __extendsfdf2
|
||||
; RV32I-NEXT: mv a2, a0
|
||||
; RV32I-NEXT: mv a3, a1
|
||||
; RV32I-NEXT: mv a0, s1
|
||||
; RV32I-NEXT: mv a1, s2
|
||||
; RV32I-NEXT: call __adddf3
|
||||
; RV32I-NEXT: call __truncdfhf2
|
||||
; RV32I-NEXT: lui a1, 1048568
|
||||
; RV32I-NEXT: xor a0, a0, a1
|
||||
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
|
||||
@ -1803,6 +1898,7 @@ define half @fnmadd_nsz(half %a, half %b, half %c) nounwind {
|
||||
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
|
||||
; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
|
||||
; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
|
||||
; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
|
||||
; RV32I-NEXT: addi sp, sp, 32
|
||||
; RV32I-NEXT: ret
|
||||
;
|
||||
@ -1820,17 +1916,22 @@ define half @fnmadd_nsz(half %a, half %b, half %c) nounwind {
|
||||
; RV64I-NEXT: addi s3, a1, -1
|
||||
; RV64I-NEXT: and a0, a0, s3
|
||||
; RV64I-NEXT: call __extendhfsf2
|
||||
; RV64I-NEXT: call __extendsfdf2
|
||||
; RV64I-NEXT: mv s2, a0
|
||||
; RV64I-NEXT: and a0, s1, s3
|
||||
; RV64I-NEXT: call __extendhfsf2
|
||||
; RV64I-NEXT: call __extendsfdf2
|
||||
; RV64I-NEXT: mv a1, a0
|
||||
; RV64I-NEXT: mv a0, s2
|
||||
; RV64I-NEXT: call __muldf3
|
||||
; RV64I-NEXT: mv s1, a0
|
||||
; RV64I-NEXT: and a0, s0, s3
|
||||
; RV64I-NEXT: call __extendhfsf2
|
||||
; RV64I-NEXT: mv a2, a0
|
||||
; RV64I-NEXT: mv a0, s2
|
||||
; RV64I-NEXT: mv a1, s1
|
||||
; RV64I-NEXT: call fmaf
|
||||
; RV64I-NEXT: call __truncsfhf2
|
||||
; RV64I-NEXT: call __extendsfdf2
|
||||
; RV64I-NEXT: mv a1, a0
|
||||
; RV64I-NEXT: mv a0, s1
|
||||
; RV64I-NEXT: call __adddf3
|
||||
; RV64I-NEXT: call __truncdfhf2
|
||||
; RV64I-NEXT: lui a1, 1048568
|
||||
; RV64I-NEXT: xor a0, a0, a1
|
||||
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
|
||||
@ -1892,34 +1993,46 @@ define half @fnmsub_h(half %a, half %b, half %c) nounwind {
|
||||
; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
|
||||
; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
|
||||
; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
|
||||
; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
|
||||
; RV32I-NEXT: mv s0, a2
|
||||
; RV32I-NEXT: mv s1, a1
|
||||
; RV32I-NEXT: lui a1, 16
|
||||
; RV32I-NEXT: addi s2, a1, -1
|
||||
; RV32I-NEXT: and a0, a0, s2
|
||||
; RV32I-NEXT: addi s3, a1, -1
|
||||
; RV32I-NEXT: and a0, a0, s3
|
||||
; RV32I-NEXT: call __extendhfsf2
|
||||
; RV32I-NEXT: li a1, 0
|
||||
; RV32I-NEXT: call __addsf3
|
||||
; RV32I-NEXT: call __truncsfhf2
|
||||
; RV32I-NEXT: lui a1, 8
|
||||
; RV32I-NEXT: xor s3, a0, a1
|
||||
; RV32I-NEXT: and a0, s1, s2
|
||||
; RV32I-NEXT: xor s4, a0, a1
|
||||
; RV32I-NEXT: and a0, s1, s3
|
||||
; RV32I-NEXT: call __extendhfsf2
|
||||
; RV32I-NEXT: call __extendsfdf2
|
||||
; RV32I-NEXT: mv s1, a0
|
||||
; RV32I-NEXT: and a0, s0, s2
|
||||
; RV32I-NEXT: mv s2, a1
|
||||
; RV32I-NEXT: and a0, s4, s3
|
||||
; RV32I-NEXT: call __extendhfsf2
|
||||
; RV32I-NEXT: mv s0, a0
|
||||
; RV32I-NEXT: and a0, s3, s2
|
||||
; RV32I-NEXT: call __extendsfdf2
|
||||
; RV32I-NEXT: mv a2, s1
|
||||
; RV32I-NEXT: mv a3, s2
|
||||
; RV32I-NEXT: call __muldf3
|
||||
; RV32I-NEXT: mv s1, a0
|
||||
; RV32I-NEXT: mv s2, a1
|
||||
; RV32I-NEXT: and a0, s0, s3
|
||||
; RV32I-NEXT: call __extendhfsf2
|
||||
; RV32I-NEXT: mv a1, s1
|
||||
; RV32I-NEXT: mv a2, s0
|
||||
; RV32I-NEXT: call fmaf
|
||||
; RV32I-NEXT: call __truncsfhf2
|
||||
; RV32I-NEXT: call __extendsfdf2
|
||||
; RV32I-NEXT: mv a2, a0
|
||||
; RV32I-NEXT: mv a3, a1
|
||||
; RV32I-NEXT: mv a0, s1
|
||||
; RV32I-NEXT: mv a1, s2
|
||||
; RV32I-NEXT: call __adddf3
|
||||
; RV32I-NEXT: call __truncdfhf2
|
||||
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
|
||||
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
|
||||
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
|
||||
; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
|
||||
; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
|
||||
; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
|
||||
; RV32I-NEXT: addi sp, sp, 32
|
||||
; RV32I-NEXT: ret
|
||||
;
|
||||
@ -1944,16 +2057,21 @@ define half @fnmsub_h(half %a, half %b, half %c) nounwind {
|
||||
; RV64I-NEXT: xor s3, a0, a1
|
||||
; RV64I-NEXT: and a0, s1, s2
|
||||
; RV64I-NEXT: call __extendhfsf2
|
||||
; RV64I-NEXT: call __extendsfdf2
|
||||
; RV64I-NEXT: mv s1, a0
|
||||
; RV64I-NEXT: and a0, s3, s2
|
||||
; RV64I-NEXT: call __extendhfsf2
|
||||
; RV64I-NEXT: call __extendsfdf2
|
||||
; RV64I-NEXT: mv a1, s1
|
||||
; RV64I-NEXT: call __muldf3
|
||||
; RV64I-NEXT: mv s1, a0
|
||||
; RV64I-NEXT: and a0, s0, s2
|
||||
; RV64I-NEXT: call __extendhfsf2
|
||||
; RV64I-NEXT: mv s0, a0
|
||||
; RV64I-NEXT: and a0, s3, s2
|
||||
; RV64I-NEXT: call __extendhfsf2
|
||||
; RV64I-NEXT: mv a1, s1
|
||||
; RV64I-NEXT: mv a2, s0
|
||||
; RV64I-NEXT: call fmaf
|
||||
; RV64I-NEXT: call __truncsfhf2
|
||||
; RV64I-NEXT: call __extendsfdf2
|
||||
; RV64I-NEXT: mv a1, a0
|
||||
; RV64I-NEXT: mv a0, s1
|
||||
; RV64I-NEXT: call __adddf3
|
||||
; RV64I-NEXT: call __truncdfhf2
|
||||
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
|
||||
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
|
||||
; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
|
||||
@ -2020,35 +2138,48 @@ define half @fnmsub_h_2(half %a, half %b, half %c) nounwind {
|
||||
; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
|
||||
; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
|
||||
; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
|
||||
; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
|
||||
; RV32I-NEXT: mv s0, a2
|
||||
; RV32I-NEXT: mv s1, a0
|
||||
; RV32I-NEXT: lui a0, 16
|
||||
; RV32I-NEXT: addi s2, a0, -1
|
||||
; RV32I-NEXT: and a0, a1, s2
|
||||
; RV32I-NEXT: addi s3, a0, -1
|
||||
; RV32I-NEXT: and a0, a1, s3
|
||||
; RV32I-NEXT: call __extendhfsf2
|
||||
; RV32I-NEXT: li a1, 0
|
||||
; RV32I-NEXT: call __addsf3
|
||||
; RV32I-NEXT: call __truncsfhf2
|
||||
; RV32I-NEXT: lui a1, 8
|
||||
; RV32I-NEXT: xor s3, a0, a1
|
||||
; RV32I-NEXT: and a0, s1, s2
|
||||
; RV32I-NEXT: xor s4, a0, a1
|
||||
; RV32I-NEXT: and a0, s1, s3
|
||||
; RV32I-NEXT: call __extendhfsf2
|
||||
; RV32I-NEXT: call __extendsfdf2
|
||||
; RV32I-NEXT: mv s1, a0
|
||||
; RV32I-NEXT: and a0, s0, s2
|
||||
; RV32I-NEXT: mv s2, a1
|
||||
; RV32I-NEXT: and a0, s4, s3
|
||||
; RV32I-NEXT: call __extendhfsf2
|
||||
; RV32I-NEXT: mv s0, a0
|
||||
; RV32I-NEXT: and a0, s3, s2
|
||||
; RV32I-NEXT: call __extendhfsf2
|
||||
; RV32I-NEXT: mv a1, a0
|
||||
; RV32I-NEXT: call __extendsfdf2
|
||||
; RV32I-NEXT: mv a2, a0
|
||||
; RV32I-NEXT: mv a3, a1
|
||||
; RV32I-NEXT: mv a0, s1
|
||||
; RV32I-NEXT: mv a2, s0
|
||||
; RV32I-NEXT: call fmaf
|
||||
; RV32I-NEXT: call __truncsfhf2
|
||||
; RV32I-NEXT: mv a1, s2
|
||||
; RV32I-NEXT: call __muldf3
|
||||
; RV32I-NEXT: mv s1, a0
|
||||
; RV32I-NEXT: mv s2, a1
|
||||
; RV32I-NEXT: and a0, s0, s3
|
||||
; RV32I-NEXT: call __extendhfsf2
|
||||
; RV32I-NEXT: call __extendsfdf2
|
||||
; RV32I-NEXT: mv a2, a0
|
||||
; RV32I-NEXT: mv a3, a1
|
||||
; RV32I-NEXT: mv a0, s1
|
||||
; RV32I-NEXT: mv a1, s2
|
||||
; RV32I-NEXT: call __adddf3
|
||||
; RV32I-NEXT: call __truncdfhf2
|
||||
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
|
||||
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
|
||||
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
|
||||
; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
|
||||
; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
|
||||
; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
|
||||
; RV32I-NEXT: addi sp, sp, 32
|
||||
; RV32I-NEXT: ret
|
||||
;
|
||||
@ -2073,17 +2204,22 @@ define half @fnmsub_h_2(half %a, half %b, half %c) nounwind {
|
||||
; RV64I-NEXT: xor s3, a0, a1
|
||||
; RV64I-NEXT: and a0, s1, s2
|
||||
; RV64I-NEXT: call __extendhfsf2
|
||||
; RV64I-NEXT: call __extendsfdf2
|
||||
; RV64I-NEXT: mv s1, a0
|
||||
; RV64I-NEXT: and a0, s3, s2
|
||||
; RV64I-NEXT: call __extendhfsf2
|
||||
; RV64I-NEXT: call __extendsfdf2
|
||||
; RV64I-NEXT: mv a1, a0
|
||||
; RV64I-NEXT: mv a0, s1
|
||||
; RV64I-NEXT: call __muldf3
|
||||
; RV64I-NEXT: mv s1, a0
|
||||
; RV64I-NEXT: and a0, s0, s2
|
||||
; RV64I-NEXT: call __extendhfsf2
|
||||
; RV64I-NEXT: mv s0, a0
|
||||
; RV64I-NEXT: and a0, s3, s2
|
||||
; RV64I-NEXT: call __extendhfsf2
|
||||
; RV64I-NEXT: call __extendsfdf2
|
||||
; RV64I-NEXT: mv a1, a0
|
||||
; RV64I-NEXT: mv a0, s1
|
||||
; RV64I-NEXT: mv a2, s0
|
||||
; RV64I-NEXT: call fmaf
|
||||
; RV64I-NEXT: call __truncsfhf2
|
||||
; RV64I-NEXT: call __adddf3
|
||||
; RV64I-NEXT: call __truncdfhf2
|
||||
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
|
||||
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
|
||||
; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
|
||||
|
||||
@ -1690,28 +1690,41 @@ define half @fma_f16(half %a, half %b, half %c) nounwind {
|
||||
; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
|
||||
; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
|
||||
; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
|
||||
; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
|
||||
; RV32I-NEXT: mv s0, a2
|
||||
; RV32I-NEXT: mv s1, a1
|
||||
; RV32I-NEXT: lui a1, 16
|
||||
; RV32I-NEXT: addi s3, a1, -1
|
||||
; RV32I-NEXT: and a0, a0, s3
|
||||
; RV32I-NEXT: addi s4, a1, -1
|
||||
; RV32I-NEXT: and a0, a0, s4
|
||||
; RV32I-NEXT: call __extendhfsf2
|
||||
; RV32I-NEXT: call __extendsfdf2
|
||||
; RV32I-NEXT: mv s2, a0
|
||||
; RV32I-NEXT: and a0, s1, s3
|
||||
; RV32I-NEXT: call __extendhfsf2
|
||||
; RV32I-NEXT: mv s1, a0
|
||||
; RV32I-NEXT: and a0, s0, s3
|
||||
; RV32I-NEXT: mv s3, a1
|
||||
; RV32I-NEXT: and a0, s1, s4
|
||||
; RV32I-NEXT: call __extendhfsf2
|
||||
; RV32I-NEXT: call __extendsfdf2
|
||||
; RV32I-NEXT: mv a2, a0
|
||||
; RV32I-NEXT: mv a3, a1
|
||||
; RV32I-NEXT: mv a0, s2
|
||||
; RV32I-NEXT: mv a1, s1
|
||||
; RV32I-NEXT: call fmaf
|
||||
; RV32I-NEXT: call __truncsfhf2
|
||||
; RV32I-NEXT: mv a1, s3
|
||||
; RV32I-NEXT: call __muldf3
|
||||
; RV32I-NEXT: mv s1, a0
|
||||
; RV32I-NEXT: mv s2, a1
|
||||
; RV32I-NEXT: and a0, s0, s4
|
||||
; RV32I-NEXT: call __extendhfsf2
|
||||
; RV32I-NEXT: call __extendsfdf2
|
||||
; RV32I-NEXT: mv a2, a0
|
||||
; RV32I-NEXT: mv a3, a1
|
||||
; RV32I-NEXT: mv a0, s1
|
||||
; RV32I-NEXT: mv a1, s2
|
||||
; RV32I-NEXT: call __adddf3
|
||||
; RV32I-NEXT: call __truncdfhf2
|
||||
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
|
||||
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
|
||||
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
|
||||
; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
|
||||
; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
|
||||
; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
|
||||
; RV32I-NEXT: addi sp, sp, 32
|
||||
; RV32I-NEXT: ret
|
||||
;
|
||||
@ -1729,17 +1742,22 @@ define half @fma_f16(half %a, half %b, half %c) nounwind {
|
||||
; RV64I-NEXT: addi s3, a1, -1
|
||||
; RV64I-NEXT: and a0, a0, s3
|
||||
; RV64I-NEXT: call __extendhfsf2
|
||||
; RV64I-NEXT: call __extendsfdf2
|
||||
; RV64I-NEXT: mv s2, a0
|
||||
; RV64I-NEXT: and a0, s1, s3
|
||||
; RV64I-NEXT: call __extendhfsf2
|
||||
; RV64I-NEXT: call __extendsfdf2
|
||||
; RV64I-NEXT: mv a1, a0
|
||||
; RV64I-NEXT: mv a0, s2
|
||||
; RV64I-NEXT: call __muldf3
|
||||
; RV64I-NEXT: mv s1, a0
|
||||
; RV64I-NEXT: and a0, s0, s3
|
||||
; RV64I-NEXT: call __extendhfsf2
|
||||
; RV64I-NEXT: mv a2, a0
|
||||
; RV64I-NEXT: mv a0, s2
|
||||
; RV64I-NEXT: mv a1, s1
|
||||
; RV64I-NEXT: call fmaf
|
||||
; RV64I-NEXT: call __truncsfhf2
|
||||
; RV64I-NEXT: call __extendsfdf2
|
||||
; RV64I-NEXT: mv a1, a0
|
||||
; RV64I-NEXT: mv a0, s1
|
||||
; RV64I-NEXT: call __adddf3
|
||||
; RV64I-NEXT: call __truncdfhf2
|
||||
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
|
||||
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
|
||||
; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
|
||||
@ -1748,14 +1766,41 @@ define half @fma_f16(half %a, half %b, half %c) nounwind {
|
||||
; RV64I-NEXT: addi sp, sp, 48
|
||||
; RV64I-NEXT: ret
|
||||
;
|
||||
; CHECKIZFHMIN-LABEL: fma_f16:
|
||||
; CHECKIZFHMIN: # %bb.0:
|
||||
; CHECKIZFHMIN-NEXT: fcvt.s.h fa5, fa2
|
||||
; CHECKIZFHMIN-NEXT: fcvt.s.h fa4, fa1
|
||||
; CHECKIZFHMIN-NEXT: fcvt.s.h fa3, fa0
|
||||
; CHECKIZFHMIN-NEXT: fmadd.s fa5, fa3, fa4, fa5
|
||||
; CHECKIZFHMIN-NEXT: fcvt.h.s fa0, fa5
|
||||
; CHECKIZFHMIN-NEXT: ret
|
||||
; RV32IFZFHMIN-LABEL: fma_f16:
|
||||
; RV32IFZFHMIN: # %bb.0:
|
||||
; RV32IFZFHMIN-NEXT: fcvt.s.h fa5, fa2
|
||||
; RV32IFZFHMIN-NEXT: fcvt.s.h fa4, fa1
|
||||
; RV32IFZFHMIN-NEXT: fcvt.s.h fa3, fa0
|
||||
; RV32IFZFHMIN-NEXT: fmadd.s fa5, fa3, fa4, fa5
|
||||
; RV32IFZFHMIN-NEXT: fcvt.h.s fa0, fa5
|
||||
; RV32IFZFHMIN-NEXT: ret
|
||||
;
|
||||
; RV64IFZFHMIN-LABEL: fma_f16:
|
||||
; RV64IFZFHMIN: # %bb.0:
|
||||
; RV64IFZFHMIN-NEXT: fcvt.s.h fa5, fa2
|
||||
; RV64IFZFHMIN-NEXT: fcvt.s.h fa4, fa1
|
||||
; RV64IFZFHMIN-NEXT: fcvt.s.h fa3, fa0
|
||||
; RV64IFZFHMIN-NEXT: fmadd.s fa5, fa3, fa4, fa5
|
||||
; RV64IFZFHMIN-NEXT: fcvt.h.s fa0, fa5
|
||||
; RV64IFZFHMIN-NEXT: ret
|
||||
;
|
||||
; RV32IDZFHMIN-LABEL: fma_f16:
|
||||
; RV32IDZFHMIN: # %bb.0:
|
||||
; RV32IDZFHMIN-NEXT: fcvt.d.h fa5, fa2
|
||||
; RV32IDZFHMIN-NEXT: fcvt.d.h fa4, fa1
|
||||
; RV32IDZFHMIN-NEXT: fcvt.d.h fa3, fa0
|
||||
; RV32IDZFHMIN-NEXT: fmadd.d fa5, fa3, fa4, fa5
|
||||
; RV32IDZFHMIN-NEXT: fcvt.h.d fa0, fa5
|
||||
; RV32IDZFHMIN-NEXT: ret
|
||||
;
|
||||
; RV64IDZFHMIN-LABEL: fma_f16:
|
||||
; RV64IDZFHMIN: # %bb.0:
|
||||
; RV64IDZFHMIN-NEXT: fcvt.d.h fa5, fa2
|
||||
; RV64IDZFHMIN-NEXT: fcvt.d.h fa4, fa1
|
||||
; RV64IDZFHMIN-NEXT: fcvt.d.h fa3, fa0
|
||||
; RV64IDZFHMIN-NEXT: fmadd.d fa5, fa3, fa4, fa5
|
||||
; RV64IDZFHMIN-NEXT: fcvt.h.d fa0, fa5
|
||||
; RV64IDZFHMIN-NEXT: ret
|
||||
;
|
||||
; CHECKIZHINXMIN-LABEL: fma_f16:
|
||||
; CHECKIZHINXMIN: # %bb.0:
|
||||
|
||||
@ -8,12 +8,12 @@ declare float @llvm.fma.f32(float %f1, float %f2, float %f3)
|
||||
|
||||
define half @f0(half %f1, half %f2, half %acc) {
|
||||
; CHECK-LABEL: f0:
|
||||
; CHECK: brasl %r14, __extendhfsf2@PLT
|
||||
; CHECK: brasl %r14, __extendhfsf2@PLT
|
||||
; CHECK: brasl %r14, __extendhfsf2@PLT
|
||||
; CHECK-SCALAR: maebr %f0, %f9, %f10
|
||||
; CHECK-VECTOR: wfmasb %f0, %f0, %f8, %f10
|
||||
; CHECK: brasl %r14, __truncsfhf2@PLT
|
||||
; CHECK: brasl %r14, __extendhfdf2@PLT
|
||||
; CHECK: brasl %r14, __extendhfdf2@PLT
|
||||
; CHECK: brasl %r14, __extendhfdf2@PLT
|
||||
; CHECK-SCALAR: madbr %f0, %f9, %f10
|
||||
; CHECK-VECTOR: wfmadb %f0, %f0, %f8, %f10
|
||||
; CHECK: brasl %r14, __truncdfhf2@PLT
|
||||
; CHECK: br %r14
|
||||
%res = call half @llvm.fma.f16 (half %f1, half %f2, half %acc)
|
||||
ret half %res
|
||||
|
||||
@ -10,12 +10,12 @@ define half @f0(half %f1, half %f2, half %acc) {
|
||||
; CHECK-LABEL: f0:
|
||||
; CHECK-NOT: brasl
|
||||
; CHECK: lcdfr %f{{[0-9]+}}, %f4
|
||||
; CHECK: brasl %r14, __extendhfsf2@PLT
|
||||
; CHECK: brasl %r14, __extendhfsf2@PLT
|
||||
; CHECK: brasl %r14, __extendhfsf2@PLT
|
||||
; CHECK-SCALAR: maebr %f0, %f8, %f10
|
||||
; CHECK-VECTOR: wfmasb %f0, %f0, %f8, %f10
|
||||
; CHECK: brasl %r14, __truncsfhf2@PLT
|
||||
; CHECK: brasl %r14, __extendhfdf2@PLT
|
||||
; CHECK: brasl %r14, __extendhfdf2@PLT
|
||||
; CHECK: brasl %r14, __extendhfdf2@PLT
|
||||
; CHECK-SCALAR: madbr %f0, %f8, %f10
|
||||
; CHECK-VECTOR: wfmadb %f0, %f0, %f8, %f10
|
||||
; CHECK: brasl %r14, __truncdfhf2@PLT
|
||||
; CHECK: br %r14
|
||||
%negacc = fneg half %acc
|
||||
%res = call half @llvm.fma.f16 (half %f1, half %f2, half %negacc)
|
||||
|
||||
@ -25,11 +25,11 @@ define double @f2(double %f1, double %f2, double %acc) {
|
||||
|
||||
define half @f3_half(half %f1, half %f2, half %acc) {
|
||||
; CHECK-LABEL: f3_half:
|
||||
; CHECK: brasl %r14, __extendhfsf2@PLT
|
||||
; CHECK: brasl %r14, __extendhfsf2@PLT
|
||||
; CHECK: brasl %r14, __extendhfsf2@PLT
|
||||
; CHECK: wfmasb %f0, %f0, %f8, %f10
|
||||
; CHECK: brasl %r14, __truncsfhf2@PLT
|
||||
; CHECK: brasl %r14, __extendhfdf2@PLT
|
||||
; CHECK: brasl %r14, __extendhfdf2@PLT
|
||||
; CHECK: brasl %r14, __extendhfdf2@PLT
|
||||
; CHECK: wfmadb %f0, %f0, %f8, %f10
|
||||
; CHECK: brasl %r14, __truncdfhf2@PLT
|
||||
; CHECK-NOT: brasl
|
||||
; CHECK: lcdfr %f0, %f0
|
||||
; CHECK-NEXT: lmg
|
||||
@ -52,11 +52,11 @@ define half @f4_half(half %f1, half %f2, half %acc) {
|
||||
; CHECK-LABEL: f4_half:
|
||||
; CHECK-NOT: brasl
|
||||
; CHECK: lcdfr %f0, %f4
|
||||
; CHECK: brasl %r14, __extendhfsf2@PLT
|
||||
; CHECK: brasl %r14, __extendhfsf2@PLT
|
||||
; CHECK: brasl %r14, __extendhfsf2@PLT
|
||||
; CHECK: wfmasb %f0, %f0, %f8, %f10
|
||||
; CHECK: brasl %r14, __truncsfhf2@PLT
|
||||
; CHECK: brasl %r14, __extendhfdf2@PLT
|
||||
; CHECK: brasl %r14, __extendhfdf2@PLT
|
||||
; CHECK: brasl %r14, __extendhfdf2@PLT
|
||||
; CHECK: wfmadb %f0, %f0, %f8, %f10
|
||||
; CHECK: brasl %r14, __truncdfhf2@PLT
|
||||
; CHECK-NOT: brasl
|
||||
; CHECK: lcdfr %f0, %f0
|
||||
; CHECK-NEXT: lmg
|
||||
|
||||
@ -8,13 +8,13 @@ declare float @llvm.experimental.constrained.fma.f32(float, float, float, metada
|
||||
|
||||
define half @f0(half %f1, half %f2, half %acc) #0 {
|
||||
; CHECK-LABEL: f0:
|
||||
; CHECK: brasl %r14, __extendhfsf2@PLT
|
||||
; CHECK: brasl %r14, __extendhfsf2@PLT
|
||||
; CHECK: brasl %r14, __extendhfsf2@PLT
|
||||
; CHECK-SCALAR: maebr %f10, %f0, %f8
|
||||
; CHECK-SCALAR: ler %f0, %f10
|
||||
; CHECK-VECTOR: wfmasb %f0, %f0, %f8, %f10
|
||||
; CHECK: brasl %r14, __truncsfhf2@PLT
|
||||
; CHECK: brasl %r14, __extendhfdf2@PLT
|
||||
; CHECK: brasl %r14, __extendhfdf2@PLT
|
||||
; CHECK: brasl %r14, __extendhfdf2@PLT
|
||||
; CHECK-SCALAR: madbr %f10, %f0, %f8
|
||||
; CHECK-SCALAR: ldr %f0, %f10
|
||||
; CHECK-VECTOR: wfmadb %f0, %f0, %f8, %f10
|
||||
; CHECK: brasl %r14, __truncdfhf2@PLT
|
||||
; CHECK: br %r14
|
||||
%res = call half @llvm.experimental.constrained.fma.f16 (
|
||||
half %f1, half %f2, half %acc,
|
||||
|
||||
@ -432,8 +432,7 @@ define half @fma_f16(half %a, half %b, half %c) nounwind strictfp {
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: subq $24, %rsp
|
||||
; SSE2-NEXT: movss %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
|
||||
; SSE2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
|
||||
; SSE2-NEXT: movaps %xmm1, %xmm0
|
||||
; SSE2-NEXT: movss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
|
||||
; SSE2-NEXT: callq __extendhfsf2@PLT
|
||||
; SSE2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
|
||||
; SSE2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
|
||||
@ -443,12 +442,17 @@ define half @fma_f16(half %a, half %b, half %c) nounwind strictfp {
|
||||
; SSE2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
|
||||
; SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero
|
||||
; SSE2-NEXT: callq __extendhfsf2@PLT
|
||||
; SSE2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload
|
||||
; SSE2-NEXT: # xmm1 = mem[0],zero,zero,zero
|
||||
; SSE2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 4-byte Reload
|
||||
; SSE2-NEXT: # xmm2 = mem[0],zero,zero,zero
|
||||
; SSE2-NEXT: callq fmaf@PLT
|
||||
; SSE2-NEXT: callq __truncsfhf2@PLT
|
||||
; SSE2-NEXT: xorps %xmm2, %xmm2
|
||||
; SSE2-NEXT: cvtss2sd %xmm0, %xmm2
|
||||
; SSE2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
|
||||
; SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero
|
||||
; SSE2-NEXT: xorps %xmm1, %xmm1
|
||||
; SSE2-NEXT: cvtss2sd %xmm0, %xmm1
|
||||
; SSE2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
|
||||
; SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero
|
||||
; SSE2-NEXT: cvtss2sd %xmm0, %xmm0
|
||||
; SSE2-NEXT: callq fma@PLT
|
||||
; SSE2-NEXT: callq __truncdfhf2@PLT
|
||||
; SSE2-NEXT: addq $24, %rsp
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
@ -460,38 +464,42 @@ define half @fma_f16(half %a, half %b, half %c) nounwind strictfp {
|
||||
; F16C-NEXT: vpextrw $0, %xmm2, %edx
|
||||
; F16C-NEXT: movzwl %dx, %edx
|
||||
; F16C-NEXT: vmovd %edx, %xmm0
|
||||
; F16C-NEXT: vcvtph2ps %xmm0, %xmm2
|
||||
; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
|
||||
; F16C-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm2
|
||||
; F16C-NEXT: movzwl %cx, %ecx
|
||||
; F16C-NEXT: vmovd %ecx, %xmm0
|
||||
; F16C-NEXT: vcvtph2ps %xmm0, %xmm1
|
||||
; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
|
||||
; F16C-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm1
|
||||
; F16C-NEXT: movzwl %ax, %eax
|
||||
; F16C-NEXT: vmovd %eax, %xmm0
|
||||
; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
|
||||
; F16C-NEXT: callq fmaf@PLT
|
||||
; F16C-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
||||
; F16C-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
|
||||
; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
|
||||
; F16C-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
|
||||
; F16C-NEXT: callq fma@PLT
|
||||
; F16C-NEXT: callq __truncdfhf2@PLT
|
||||
; F16C-NEXT: popq %rax
|
||||
; F16C-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: fma_f16:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vpextrw $0, %xmm1, %eax
|
||||
; AVX512-NEXT: vpextrw $0, %xmm0, %ecx
|
||||
; AVX512-NEXT: pushq %rax
|
||||
; AVX512-NEXT: vpextrw $0, %xmm0, %eax
|
||||
; AVX512-NEXT: vpextrw $0, %xmm1, %ecx
|
||||
; AVX512-NEXT: vpextrw $0, %xmm2, %edx
|
||||
; AVX512-NEXT: movzwl %dx, %edx
|
||||
; AVX512-NEXT: vmovd %edx, %xmm0
|
||||
; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
|
||||
; AVX512-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm1
|
||||
; AVX512-NEXT: movzwl %cx, %ecx
|
||||
; AVX512-NEXT: vmovd %ecx, %xmm1
|
||||
; AVX512-NEXT: vcvtph2ps %xmm1, %xmm1
|
||||
; AVX512-NEXT: vmovd %ecx, %xmm0
|
||||
; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
|
||||
; AVX512-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm2
|
||||
; AVX512-NEXT: movzwl %ax, %eax
|
||||
; AVX512-NEXT: vmovd %eax, %xmm2
|
||||
; AVX512-NEXT: vcvtph2ps %xmm2, %xmm2
|
||||
; AVX512-NEXT: vfmadd213ss {{.*#+}} xmm2 = (xmm1 * xmm2) + xmm0
|
||||
; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vmovss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3]
|
||||
; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vmovd %eax, %xmm0
|
||||
; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
|
||||
; AVX512-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm2 * xmm0) + xmm1
|
||||
; AVX512-NEXT: callq __truncdfhf2@PLT
|
||||
; AVX512-NEXT: popq %rax
|
||||
; AVX512-NEXT: retq
|
||||
;
|
||||
; X86-LABEL: fma_f16:
|
||||
|
||||
@ -421,10 +421,13 @@ define void @test_half_fma(half %a0, half %a1, half %a2, ptr %p0) nounwind {
|
||||
; F16C-NEXT: pushq %rbx
|
||||
; F16C-NEXT: movq %rdi, %rbx
|
||||
; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
|
||||
; F16C-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
|
||||
; F16C-NEXT: vcvtph2ps %xmm1, %xmm1
|
||||
; F16C-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
|
||||
; F16C-NEXT: vcvtph2ps %xmm2, %xmm2
|
||||
; F16C-NEXT: callq fmaf@PLT
|
||||
; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
|
||||
; F16C-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2
|
||||
; F16C-NEXT: callq fma@PLT
|
||||
; F16C-NEXT: callq __truncdfhf2@PLT
|
||||
; F16C-NEXT: vpextrw $0, %xmm0, (%rbx)
|
||||
; F16C-NEXT: popq %rbx
|
||||
; F16C-NEXT: retq
|
||||
@ -440,24 +443,27 @@ define void @test_half_fma(half %a0, half %a1, half %a2, ptr %p0) nounwind {
|
||||
; X64-NEXT: pushq %rbx
|
||||
; X64-NEXT: subq $16, %rsp
|
||||
; X64-NEXT: movq %rdi, %rbx
|
||||
; X64-NEXT: movss %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
|
||||
; X64-NEXT: movss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
|
||||
; X64-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
|
||||
; X64-NEXT: movaps %xmm1, %xmm0
|
||||
; X64-NEXT: movaps %xmm2, %xmm0
|
||||
; X64-NEXT: callq __extendhfsf2@PLT
|
||||
; X64-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
|
||||
; X64-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
|
||||
; X64-NEXT: # xmm0 = mem[0],zero,zero,zero
|
||||
; X64-NEXT: callq __extendhfsf2@PLT
|
||||
; X64-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
|
||||
; X64-NEXT: movd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
|
||||
; X64-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
|
||||
; X64-NEXT: # xmm0 = mem[0],zero,zero,zero
|
||||
; X64-NEXT: callq __extendhfsf2@PLT
|
||||
; X64-NEXT: cvtss2sd %xmm0, %xmm0
|
||||
; X64-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload
|
||||
; X64-NEXT: # xmm1 = mem[0],zero,zero,zero
|
||||
; X64-NEXT: cvtss2sd %xmm1, %xmm1
|
||||
; X64-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 4-byte Reload
|
||||
; X64-NEXT: # xmm2 = mem[0],zero,zero,zero
|
||||
; X64-NEXT: callq fmaf@PLT
|
||||
; X64-NEXT: callq __truncsfhf2@PLT
|
||||
; X64-NEXT: cvtss2sd %xmm2, %xmm2
|
||||
; X64-NEXT: callq fma@PLT
|
||||
; X64-NEXT: callq __truncdfhf2@PLT
|
||||
; X64-NEXT: pextrw $0, %xmm0, %eax
|
||||
; X64-NEXT: movw %ax, (%rbx)
|
||||
; X64-NEXT: addq $16, %rsp
|
||||
@ -467,7 +473,7 @@ define void @test_half_fma(half %a0, half %a1, half %a2, ptr %p0) nounwind {
|
||||
; X86-LABEL: test_half_fma:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: pushl %esi
|
||||
; X86-NEXT: subl $72, %esp
|
||||
; X86-NEXT: subl $88, %esp
|
||||
; X86-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm0
|
||||
; X86-NEXT: movdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
|
||||
; X86-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm0
|
||||
@ -487,17 +493,17 @@ define void @test_half_fma(half %a0, half %a1, half %a2, ptr %p0) nounwind {
|
||||
; X86-NEXT: pextrw $0, %xmm0, %eax
|
||||
; X86-NEXT: movw %ax, (%esp)
|
||||
; X86-NEXT: calll __extendhfsf2
|
||||
; X86-NEXT: fstps {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: fstpl {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
|
||||
; X86-NEXT: fstps {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: fstpl {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
|
||||
; X86-NEXT: fstps (%esp)
|
||||
; X86-NEXT: calll fmaf
|
||||
; X86-NEXT: fstps (%esp)
|
||||
; X86-NEXT: calll __truncsfhf2
|
||||
; X86-NEXT: fstpl (%esp)
|
||||
; X86-NEXT: calll fma
|
||||
; X86-NEXT: fstpl (%esp)
|
||||
; X86-NEXT: calll __truncdfhf2
|
||||
; X86-NEXT: pextrw $0, %xmm0, %eax
|
||||
; X86-NEXT: movw %ax, (%esi)
|
||||
; X86-NEXT: addl $72, %esp
|
||||
; X86-NEXT: addl $88, %esp
|
||||
; X86-NEXT: popl %esi
|
||||
; X86-NEXT: retl
|
||||
%res = call half @llvm.fma.half(half %a0, half %a1, half %a2)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user