
This reverts commit 9c319d5bb40785c969d2af76535ca62448dfafa7. Some issues were discovered with the bootstrap builds, which seem like they were caused by this commit. I'm reverting to investigate.
1610 lines
63 KiB
LLVM
1610 lines
63 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
|
|
; RUN: llc -mtriple=aarch64 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD,CHECK-SD-NOFP16
|
|
; RUN: llc -mtriple=aarch64 -mattr=+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD,CHECK-SD-FP16
|
|
; RUN: llc -mtriple=aarch64 -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-NOFP16
|
|
; RUN: llc -mtriple=aarch64 -mattr=+fullfp16 -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-FP16
|
|
|
|
define double @fma_f64(double %a, double %b, double %c) {
|
|
; CHECK-LABEL: fma_f64:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: fmadd d0, d0, d1, d2
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%d = call double @llvm.fma.f64(double %a, double %b, double %c)
|
|
ret double %d
|
|
}
|
|
|
|
define float @fma_f32(float %a, float %b, float %c) {
|
|
; CHECK-LABEL: fma_f32:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: fmadd s0, s0, s1, s2
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%d = call float @llvm.fma.f32(float %a, float %b, float %c)
|
|
ret float %d
|
|
}
|
|
|
|
define half @fma_f16(half %a, half %b, half %c) {
|
|
; CHECK-SD-NOFP16-LABEL: fma_f16:
|
|
; CHECK-SD-NOFP16: // %bb.0: // %entry
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s2, h2
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s1, h1
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s0, h0
|
|
; CHECK-SD-NOFP16-NEXT: fmadd s0, s0, s1, s2
|
|
; CHECK-SD-NOFP16-NEXT: fcvt h0, s0
|
|
; CHECK-SD-NOFP16-NEXT: ret
|
|
;
|
|
; CHECK-SD-FP16-LABEL: fma_f16:
|
|
; CHECK-SD-FP16: // %bb.0: // %entry
|
|
; CHECK-SD-FP16-NEXT: fmadd h0, h0, h1, h2
|
|
; CHECK-SD-FP16-NEXT: ret
|
|
;
|
|
; CHECK-GI-NOFP16-LABEL: fma_f16:
|
|
; CHECK-GI-NOFP16: // %bb.0: // %entry
|
|
; CHECK-GI-NOFP16-NEXT: fcvt s0, h0
|
|
; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
|
|
; CHECK-GI-NOFP16-NEXT: fcvt s2, h2
|
|
; CHECK-GI-NOFP16-NEXT: fmadd s0, s0, s1, s2
|
|
; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
|
|
; CHECK-GI-NOFP16-NEXT: ret
|
|
;
|
|
; CHECK-GI-FP16-LABEL: fma_f16:
|
|
; CHECK-GI-FP16: // %bb.0: // %entry
|
|
; CHECK-GI-FP16-NEXT: fmadd h0, h0, h1, h2
|
|
; CHECK-GI-FP16-NEXT: ret
|
|
entry:
|
|
%d = call half @llvm.fma.f16(half %a, half %b, half %c)
|
|
ret half %d
|
|
}
|
|
|
|
define <2 x double> @fma_v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
|
|
; CHECK-LABEL: fma_v2f64:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: fmla v2.2d, v1.2d, v0.2d
|
|
; CHECK-NEXT: mov v0.16b, v2.16b
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%d = call <2 x double> @llvm.fma.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c)
|
|
ret <2 x double> %d
|
|
}
|
|
|
|
define <3 x double> @fma_v3f64(<3 x double> %a, <3 x double> %b, <3 x double> %c) {
|
|
; CHECK-SD-LABEL: fma_v3f64:
|
|
; CHECK-SD: // %bb.0: // %entry
|
|
; CHECK-SD-NEXT: // kill: def $d6 killed $d6 def $q6
|
|
; CHECK-SD-NEXT: // kill: def $d3 killed $d3 def $q3
|
|
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
|
|
; CHECK-SD-NEXT: // kill: def $d7 killed $d7 def $q7
|
|
; CHECK-SD-NEXT: // kill: def $d4 killed $d4 def $q4
|
|
; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
|
|
; CHECK-SD-NEXT: // kill: def $d5 killed $d5 def $q5
|
|
; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2
|
|
; CHECK-SD-NEXT: mov v3.d[1], v4.d[0]
|
|
; CHECK-SD-NEXT: mov v0.d[1], v1.d[0]
|
|
; CHECK-SD-NEXT: mov v6.d[1], v7.d[0]
|
|
; CHECK-SD-NEXT: fmla v6.2d, v3.2d, v0.2d
|
|
; CHECK-SD-NEXT: ldr d3, [sp]
|
|
; CHECK-SD-NEXT: fmla v3.2d, v5.2d, v2.2d
|
|
; CHECK-SD-NEXT: fmov d0, d6
|
|
; CHECK-SD-NEXT: ext v1.16b, v6.16b, v6.16b, #8
|
|
; CHECK-SD-NEXT: // kill: def $d1 killed $d1 killed $q1
|
|
; CHECK-SD-NEXT: fmov d2, d3
|
|
; CHECK-SD-NEXT: ret
|
|
;
|
|
; CHECK-GI-LABEL: fma_v3f64:
|
|
; CHECK-GI: // %bb.0: // %entry
|
|
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
|
|
; CHECK-GI-NEXT: // kill: def $d3 killed $d3 def $q3
|
|
; CHECK-GI-NEXT: // kill: def $d6 killed $d6 def $q6
|
|
; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
|
|
; CHECK-GI-NEXT: // kill: def $d4 killed $d4 def $q4
|
|
; CHECK-GI-NEXT: // kill: def $d7 killed $d7 def $q7
|
|
; CHECK-GI-NEXT: mov v0.d[1], v1.d[0]
|
|
; CHECK-GI-NEXT: mov v3.d[1], v4.d[0]
|
|
; CHECK-GI-NEXT: mov v6.d[1], v7.d[0]
|
|
; CHECK-GI-NEXT: fmla v6.2d, v3.2d, v0.2d
|
|
; CHECK-GI-NEXT: ldr d0, [sp]
|
|
; CHECK-GI-NEXT: fmadd d2, d2, d5, d0
|
|
; CHECK-GI-NEXT: mov d1, v6.d[1]
|
|
; CHECK-GI-NEXT: fmov d0, d6
|
|
; CHECK-GI-NEXT: ret
|
|
entry:
|
|
%d = call <3 x double> @llvm.fma.v3f64(<3 x double> %a, <3 x double> %b, <3 x double> %c)
|
|
ret <3 x double> %d
|
|
}
|
|
|
|
define <4 x double> @fma_v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %c) {
|
|
; CHECK-LABEL: fma_v4f64:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: fmla v4.2d, v2.2d, v0.2d
|
|
; CHECK-NEXT: fmla v5.2d, v3.2d, v1.2d
|
|
; CHECK-NEXT: mov v0.16b, v4.16b
|
|
; CHECK-NEXT: mov v1.16b, v5.16b
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%d = call <4 x double> @llvm.fma.v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %c)
|
|
ret <4 x double> %d
|
|
}
|
|
|
|
define <2 x float> @fma_v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c) {
|
|
; CHECK-LABEL: fma_v2f32:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: fmla v2.2s, v1.2s, v0.2s
|
|
; CHECK-NEXT: fmov d0, d2
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%d = call <2 x float> @llvm.fma.v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c)
|
|
ret <2 x float> %d
|
|
}
|
|
|
|
define <3 x float> @fma_v3f32(<3 x float> %a, <3 x float> %b, <3 x float> %c) {
|
|
; CHECK-LABEL: fma_v3f32:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: fmla v2.4s, v1.4s, v0.4s
|
|
; CHECK-NEXT: mov v0.16b, v2.16b
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%d = call <3 x float> @llvm.fma.v3f32(<3 x float> %a, <3 x float> %b, <3 x float> %c)
|
|
ret <3 x float> %d
|
|
}
|
|
|
|
define <4 x float> @fma_v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
|
|
; CHECK-LABEL: fma_v4f32:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: fmla v2.4s, v1.4s, v0.4s
|
|
; CHECK-NEXT: mov v0.16b, v2.16b
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%d = call <4 x float> @llvm.fma.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c)
|
|
ret <4 x float> %d
|
|
}
|
|
|
|
define <8 x float> @fma_v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c) {
|
|
; CHECK-LABEL: fma_v8f32:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: fmla v4.4s, v2.4s, v0.4s
|
|
; CHECK-NEXT: fmla v5.4s, v3.4s, v1.4s
|
|
; CHECK-NEXT: mov v0.16b, v4.16b
|
|
; CHECK-NEXT: mov v1.16b, v5.16b
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%d = call <8 x float> @llvm.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c)
|
|
ret <8 x float> %d
|
|
}
|
|
|
|
define <7 x half> @fma_v7f16(<7 x half> %a, <7 x half> %b, <7 x half> %c) {
|
|
; CHECK-SD-NOFP16-LABEL: fma_v7f16:
|
|
; CHECK-SD-NOFP16: // %bb.0: // %entry
|
|
; CHECK-SD-NOFP16-NEXT: mov h3, v2.h[1]
|
|
; CHECK-SD-NOFP16-NEXT: mov h4, v1.h[1]
|
|
; CHECK-SD-NOFP16-NEXT: mov h5, v0.h[1]
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s6, h2
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s7, h1
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s16, h0
|
|
; CHECK-SD-NOFP16-NEXT: mov h17, v2.h[2]
|
|
; CHECK-SD-NOFP16-NEXT: mov h18, v1.h[2]
|
|
; CHECK-SD-NOFP16-NEXT: mov h19, v0.h[2]
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s3, h3
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s4, h4
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s5, h5
|
|
; CHECK-SD-NOFP16-NEXT: fmadd s6, s16, s7, s6
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s7, h17
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s16, h18
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s17, h19
|
|
; CHECK-SD-NOFP16-NEXT: mov h18, v1.h[3]
|
|
; CHECK-SD-NOFP16-NEXT: mov h19, v0.h[3]
|
|
; CHECK-SD-NOFP16-NEXT: fmadd s4, s5, s4, s3
|
|
; CHECK-SD-NOFP16-NEXT: mov h5, v2.h[3]
|
|
; CHECK-SD-NOFP16-NEXT: fcvt h3, s6
|
|
; CHECK-SD-NOFP16-NEXT: fmadd s6, s17, s16, s7
|
|
; CHECK-SD-NOFP16-NEXT: mov h17, v2.h[4]
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s7, h18
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s16, h19
|
|
; CHECK-SD-NOFP16-NEXT: mov h18, v1.h[4]
|
|
; CHECK-SD-NOFP16-NEXT: fcvt h4, s4
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s5, h5
|
|
; CHECK-SD-NOFP16-NEXT: mov h19, v0.h[4]
|
|
; CHECK-SD-NOFP16-NEXT: fcvt h6, s6
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s17, h17
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s18, h18
|
|
; CHECK-SD-NOFP16-NEXT: mov v3.h[1], v4.h[0]
|
|
; CHECK-SD-NOFP16-NEXT: mov h4, v2.h[5]
|
|
; CHECK-SD-NOFP16-NEXT: fmadd s5, s16, s7, s5
|
|
; CHECK-SD-NOFP16-NEXT: mov h7, v1.h[5]
|
|
; CHECK-SD-NOFP16-NEXT: mov h16, v0.h[5]
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s19, h19
|
|
; CHECK-SD-NOFP16-NEXT: mov v3.h[2], v6.h[0]
|
|
; CHECK-SD-NOFP16-NEXT: mov h6, v2.h[6]
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s4, h4
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s7, h7
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s16, h16
|
|
; CHECK-SD-NOFP16-NEXT: fcvt h5, s5
|
|
; CHECK-SD-NOFP16-NEXT: fmadd s17, s19, s18, s17
|
|
; CHECK-SD-NOFP16-NEXT: mov h18, v1.h[6]
|
|
; CHECK-SD-NOFP16-NEXT: mov h19, v0.h[6]
|
|
; CHECK-SD-NOFP16-NEXT: mov h2, v2.h[7]
|
|
; CHECK-SD-NOFP16-NEXT: mov h1, v1.h[7]
|
|
; CHECK-SD-NOFP16-NEXT: mov h0, v0.h[7]
|
|
; CHECK-SD-NOFP16-NEXT: fmadd s4, s16, s7, s4
|
|
; CHECK-SD-NOFP16-NEXT: mov v3.h[3], v5.h[0]
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s5, h6
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s6, h18
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s7, h19
|
|
; CHECK-SD-NOFP16-NEXT: fcvt h16, s17
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s2, h2
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s1, h1
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s0, h0
|
|
; CHECK-SD-NOFP16-NEXT: fcvt h4, s4
|
|
; CHECK-SD-NOFP16-NEXT: fmadd s5, s7, s6, s5
|
|
; CHECK-SD-NOFP16-NEXT: mov v3.h[4], v16.h[0]
|
|
; CHECK-SD-NOFP16-NEXT: fmadd s0, s0, s1, s2
|
|
; CHECK-SD-NOFP16-NEXT: mov v3.h[5], v4.h[0]
|
|
; CHECK-SD-NOFP16-NEXT: fcvt h4, s5
|
|
; CHECK-SD-NOFP16-NEXT: fcvt h0, s0
|
|
; CHECK-SD-NOFP16-NEXT: mov v3.h[6], v4.h[0]
|
|
; CHECK-SD-NOFP16-NEXT: mov v3.h[7], v0.h[0]
|
|
; CHECK-SD-NOFP16-NEXT: mov v0.16b, v3.16b
|
|
; CHECK-SD-NOFP16-NEXT: ret
|
|
;
|
|
; CHECK-SD-FP16-LABEL: fma_v7f16:
|
|
; CHECK-SD-FP16: // %bb.0: // %entry
|
|
; CHECK-SD-FP16-NEXT: fmla v2.8h, v1.8h, v0.8h
|
|
; CHECK-SD-FP16-NEXT: mov v0.16b, v2.16b
|
|
; CHECK-SD-FP16-NEXT: ret
|
|
;
|
|
; CHECK-GI-NOFP16-LABEL: fma_v7f16:
|
|
; CHECK-GI-NOFP16: // %bb.0: // %entry
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v0.4h
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl v4.4s, v1.4h
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl v5.4s, v2.4h
|
|
; CHECK-GI-NOFP16-NEXT: mov v6.h[0], v0.h[4]
|
|
; CHECK-GI-NOFP16-NEXT: fmla v5.4s, v4.4s, v3.4s
|
|
; CHECK-GI-NOFP16-NEXT: mov v3.h[0], v1.h[4]
|
|
; CHECK-GI-NOFP16-NEXT: mov v4.h[0], v2.h[4]
|
|
; CHECK-GI-NOFP16-NEXT: mov v6.h[1], v0.h[5]
|
|
; CHECK-GI-NOFP16-NEXT: mov v3.h[1], v1.h[5]
|
|
; CHECK-GI-NOFP16-NEXT: mov v4.h[1], v2.h[5]
|
|
; CHECK-GI-NOFP16-NEXT: fcvtn v5.4h, v5.4s
|
|
; CHECK-GI-NOFP16-NEXT: mov v6.h[2], v0.h[6]
|
|
; CHECK-GI-NOFP16-NEXT: mov v3.h[2], v1.h[6]
|
|
; CHECK-GI-NOFP16-NEXT: mov v4.h[2], v2.h[6]
|
|
; CHECK-GI-NOFP16-NEXT: mov v0.h[0], v5.h[0]
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v6.4h
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v3.4h
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v4.4h
|
|
; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v5.h[1]
|
|
; CHECK-GI-NOFP16-NEXT: fmla v3.4s, v2.4s, v1.4s
|
|
; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v5.h[2]
|
|
; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v3.4s
|
|
; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v5.h[3]
|
|
; CHECK-GI-NOFP16-NEXT: mov v0.h[4], v1.h[0]
|
|
; CHECK-GI-NOFP16-NEXT: mov v0.h[5], v1.h[1]
|
|
; CHECK-GI-NOFP16-NEXT: mov v0.h[6], v1.h[2]
|
|
; CHECK-GI-NOFP16-NEXT: ret
|
|
;
|
|
; CHECK-GI-FP16-LABEL: fma_v7f16:
|
|
; CHECK-GI-FP16: // %bb.0: // %entry
|
|
; CHECK-GI-FP16-NEXT: fmla v2.8h, v1.8h, v0.8h
|
|
; CHECK-GI-FP16-NEXT: mov v0.16b, v2.16b
|
|
; CHECK-GI-FP16-NEXT: ret
|
|
entry:
|
|
%d = call <7 x half> @llvm.fma.v7f16(<7 x half> %a, <7 x half> %b, <7 x half> %c)
|
|
ret <7 x half> %d
|
|
}
|
|
|
|
define <4 x half> @fma_v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %c) {
|
|
; CHECK-SD-NOFP16-LABEL: fma_v4f16:
|
|
; CHECK-SD-NOFP16: // %bb.0: // %entry
|
|
; CHECK-SD-NOFP16-NEXT: // kill: def $d2 killed $d2 def $q2
|
|
; CHECK-SD-NOFP16-NEXT: // kill: def $d1 killed $d1 def $q1
|
|
; CHECK-SD-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0
|
|
; CHECK-SD-NOFP16-NEXT: mov h3, v2.h[1]
|
|
; CHECK-SD-NOFP16-NEXT: mov h4, v1.h[1]
|
|
; CHECK-SD-NOFP16-NEXT: mov h5, v0.h[1]
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s6, h2
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s7, h1
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s16, h0
|
|
; CHECK-SD-NOFP16-NEXT: mov h17, v2.h[2]
|
|
; CHECK-SD-NOFP16-NEXT: mov h18, v1.h[2]
|
|
; CHECK-SD-NOFP16-NEXT: mov h19, v0.h[2]
|
|
; CHECK-SD-NOFP16-NEXT: mov h2, v2.h[3]
|
|
; CHECK-SD-NOFP16-NEXT: mov h1, v1.h[3]
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s3, h3
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s4, h4
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s5, h5
|
|
; CHECK-SD-NOFP16-NEXT: fmadd s6, s16, s7, s6
|
|
; CHECK-SD-NOFP16-NEXT: mov h16, v0.h[3]
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s7, h19
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s2, h2
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s1, h1
|
|
; CHECK-SD-NOFP16-NEXT: fmadd s3, s5, s4, s3
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s4, h17
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s5, h18
|
|
; CHECK-SD-NOFP16-NEXT: fcvt h0, s6
|
|
; CHECK-SD-NOFP16-NEXT: fmadd s4, s7, s5, s4
|
|
; CHECK-SD-NOFP16-NEXT: fcvt h3, s3
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s5, h16
|
|
; CHECK-SD-NOFP16-NEXT: mov v0.h[1], v3.h[0]
|
|
; CHECK-SD-NOFP16-NEXT: fcvt h3, s4
|
|
; CHECK-SD-NOFP16-NEXT: fmadd s1, s5, s1, s2
|
|
; CHECK-SD-NOFP16-NEXT: mov v0.h[2], v3.h[0]
|
|
; CHECK-SD-NOFP16-NEXT: fcvt h1, s1
|
|
; CHECK-SD-NOFP16-NEXT: mov v0.h[3], v1.h[0]
|
|
; CHECK-SD-NOFP16-NEXT: // kill: def $d0 killed $d0 killed $q0
|
|
; CHECK-SD-NOFP16-NEXT: ret
|
|
;
|
|
; CHECK-SD-FP16-LABEL: fma_v4f16:
|
|
; CHECK-SD-FP16: // %bb.0: // %entry
|
|
; CHECK-SD-FP16-NEXT: fmla v2.4h, v1.4h, v0.4h
|
|
; CHECK-SD-FP16-NEXT: fmov d0, d2
|
|
; CHECK-SD-FP16-NEXT: ret
|
|
;
|
|
; CHECK-GI-NOFP16-LABEL: fma_v4f16:
|
|
; CHECK-GI-NOFP16: // %bb.0: // %entry
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v2.4h
|
|
; CHECK-GI-NOFP16-NEXT: fmla v2.4s, v1.4s, v0.4s
|
|
; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v2.4s
|
|
; CHECK-GI-NOFP16-NEXT: ret
|
|
;
|
|
; CHECK-GI-FP16-LABEL: fma_v4f16:
|
|
; CHECK-GI-FP16: // %bb.0: // %entry
|
|
; CHECK-GI-FP16-NEXT: fmla v2.4h, v1.4h, v0.4h
|
|
; CHECK-GI-FP16-NEXT: fmov d0, d2
|
|
; CHECK-GI-FP16-NEXT: ret
|
|
entry:
|
|
%d = call <4 x half> @llvm.fma.v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %c)
|
|
ret <4 x half> %d
|
|
}
|
|
|
|
define <8 x half> @fma_v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
|
|
; CHECK-SD-NOFP16-LABEL: fma_v8f16:
|
|
; CHECK-SD-NOFP16: // %bb.0: // %entry
|
|
; CHECK-SD-NOFP16-NEXT: mov h3, v2.h[1]
|
|
; CHECK-SD-NOFP16-NEXT: mov h4, v1.h[1]
|
|
; CHECK-SD-NOFP16-NEXT: mov h5, v0.h[1]
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s6, h2
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s7, h1
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s16, h0
|
|
; CHECK-SD-NOFP16-NEXT: mov h17, v2.h[2]
|
|
; CHECK-SD-NOFP16-NEXT: mov h18, v1.h[2]
|
|
; CHECK-SD-NOFP16-NEXT: mov h19, v0.h[2]
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s3, h3
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s4, h4
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s5, h5
|
|
; CHECK-SD-NOFP16-NEXT: fmadd s6, s16, s7, s6
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s7, h17
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s16, h18
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s17, h19
|
|
; CHECK-SD-NOFP16-NEXT: mov h18, v1.h[3]
|
|
; CHECK-SD-NOFP16-NEXT: mov h19, v0.h[3]
|
|
; CHECK-SD-NOFP16-NEXT: fmadd s4, s5, s4, s3
|
|
; CHECK-SD-NOFP16-NEXT: mov h5, v2.h[3]
|
|
; CHECK-SD-NOFP16-NEXT: fcvt h3, s6
|
|
; CHECK-SD-NOFP16-NEXT: fmadd s6, s17, s16, s7
|
|
; CHECK-SD-NOFP16-NEXT: mov h17, v2.h[4]
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s7, h18
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s16, h19
|
|
; CHECK-SD-NOFP16-NEXT: mov h18, v1.h[4]
|
|
; CHECK-SD-NOFP16-NEXT: fcvt h4, s4
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s5, h5
|
|
; CHECK-SD-NOFP16-NEXT: mov h19, v0.h[4]
|
|
; CHECK-SD-NOFP16-NEXT: fcvt h6, s6
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s17, h17
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s18, h18
|
|
; CHECK-SD-NOFP16-NEXT: mov v3.h[1], v4.h[0]
|
|
; CHECK-SD-NOFP16-NEXT: mov h4, v2.h[5]
|
|
; CHECK-SD-NOFP16-NEXT: fmadd s5, s16, s7, s5
|
|
; CHECK-SD-NOFP16-NEXT: mov h7, v1.h[5]
|
|
; CHECK-SD-NOFP16-NEXT: mov h16, v0.h[5]
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s19, h19
|
|
; CHECK-SD-NOFP16-NEXT: mov v3.h[2], v6.h[0]
|
|
; CHECK-SD-NOFP16-NEXT: mov h6, v2.h[6]
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s4, h4
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s7, h7
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s16, h16
|
|
; CHECK-SD-NOFP16-NEXT: fcvt h5, s5
|
|
; CHECK-SD-NOFP16-NEXT: fmadd s17, s19, s18, s17
|
|
; CHECK-SD-NOFP16-NEXT: mov h18, v1.h[6]
|
|
; CHECK-SD-NOFP16-NEXT: mov h19, v0.h[6]
|
|
; CHECK-SD-NOFP16-NEXT: mov h2, v2.h[7]
|
|
; CHECK-SD-NOFP16-NEXT: mov h1, v1.h[7]
|
|
; CHECK-SD-NOFP16-NEXT: mov h0, v0.h[7]
|
|
; CHECK-SD-NOFP16-NEXT: fmadd s4, s16, s7, s4
|
|
; CHECK-SD-NOFP16-NEXT: mov v3.h[3], v5.h[0]
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s5, h6
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s6, h18
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s7, h19
|
|
; CHECK-SD-NOFP16-NEXT: fcvt h16, s17
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s2, h2
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s1, h1
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s0, h0
|
|
; CHECK-SD-NOFP16-NEXT: fcvt h4, s4
|
|
; CHECK-SD-NOFP16-NEXT: fmadd s5, s7, s6, s5
|
|
; CHECK-SD-NOFP16-NEXT: mov v3.h[4], v16.h[0]
|
|
; CHECK-SD-NOFP16-NEXT: fmadd s0, s0, s1, s2
|
|
; CHECK-SD-NOFP16-NEXT: mov v3.h[5], v4.h[0]
|
|
; CHECK-SD-NOFP16-NEXT: fcvt h4, s5
|
|
; CHECK-SD-NOFP16-NEXT: fcvt h0, s0
|
|
; CHECK-SD-NOFP16-NEXT: mov v3.h[6], v4.h[0]
|
|
; CHECK-SD-NOFP16-NEXT: mov v3.h[7], v0.h[0]
|
|
; CHECK-SD-NOFP16-NEXT: mov v0.16b, v3.16b
|
|
; CHECK-SD-NOFP16-NEXT: ret
|
|
;
|
|
; CHECK-SD-FP16-LABEL: fma_v8f16:
|
|
; CHECK-SD-FP16: // %bb.0: // %entry
|
|
; CHECK-SD-FP16-NEXT: fmla v2.8h, v1.8h, v0.8h
|
|
; CHECK-SD-FP16-NEXT: mov v0.16b, v2.16b
|
|
; CHECK-SD-FP16-NEXT: ret
|
|
;
|
|
; CHECK-GI-NOFP16-LABEL: fma_v8f16:
|
|
; CHECK-GI-NOFP16: // %bb.0: // %entry
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v0.4h
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl v4.4s, v1.4h
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl v5.4s, v2.4h
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl2 v0.4s, v0.8h
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl2 v1.4s, v1.8h
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl2 v2.4s, v2.8h
|
|
; CHECK-GI-NOFP16-NEXT: fmla v5.4s, v4.4s, v3.4s
|
|
; CHECK-GI-NOFP16-NEXT: fmla v2.4s, v1.4s, v0.4s
|
|
; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v5.4s
|
|
; CHECK-GI-NOFP16-NEXT: fcvtn2 v0.8h, v2.4s
|
|
; CHECK-GI-NOFP16-NEXT: ret
|
|
;
|
|
; CHECK-GI-FP16-LABEL: fma_v8f16:
|
|
; CHECK-GI-FP16: // %bb.0: // %entry
|
|
; CHECK-GI-FP16-NEXT: fmla v2.8h, v1.8h, v0.8h
|
|
; CHECK-GI-FP16-NEXT: mov v0.16b, v2.16b
|
|
; CHECK-GI-FP16-NEXT: ret
|
|
entry:
|
|
%d = call <8 x half> @llvm.fma.v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %c)
|
|
ret <8 x half> %d
|
|
}
|
|
|
|
define <16 x half> @fma_v16f16(<16 x half> %a, <16 x half> %b, <16 x half> %c) {
|
|
; CHECK-SD-NOFP16-LABEL: fma_v16f16:
|
|
; CHECK-SD-NOFP16: // %bb.0: // %entry
|
|
; CHECK-SD-NOFP16-NEXT: mov h6, v4.h[1]
|
|
; CHECK-SD-NOFP16-NEXT: mov h7, v2.h[1]
|
|
; CHECK-SD-NOFP16-NEXT: mov h16, v0.h[1]
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s17, h4
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s18, h2
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s19, h0
|
|
; CHECK-SD-NOFP16-NEXT: mov h20, v4.h[2]
|
|
; CHECK-SD-NOFP16-NEXT: mov h21, v2.h[2]
|
|
; CHECK-SD-NOFP16-NEXT: mov h22, v0.h[2]
|
|
; CHECK-SD-NOFP16-NEXT: mov h23, v4.h[3]
|
|
; CHECK-SD-NOFP16-NEXT: mov h24, v2.h[3]
|
|
; CHECK-SD-NOFP16-NEXT: mov h25, v0.h[3]
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s6, h6
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s7, h7
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s16, h16
|
|
; CHECK-SD-NOFP16-NEXT: fmadd s17, s19, s18, s17
|
|
; CHECK-SD-NOFP16-NEXT: mov h26, v1.h[1]
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s27, h5
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s18, h20
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s19, h21
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s20, h22
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s21, h23
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s22, h24
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s23, h25
|
|
; CHECK-SD-NOFP16-NEXT: fmadd s7, s16, s7, s6
|
|
; CHECK-SD-NOFP16-NEXT: mov h24, v5.h[1]
|
|
; CHECK-SD-NOFP16-NEXT: mov h25, v3.h[1]
|
|
; CHECK-SD-NOFP16-NEXT: fcvt h6, s17
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s28, h3
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s29, h1
|
|
; CHECK-SD-NOFP16-NEXT: fmadd s19, s20, s19, s18
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s26, h26
|
|
; CHECK-SD-NOFP16-NEXT: mov h16, v4.h[4]
|
|
; CHECK-SD-NOFP16-NEXT: fmadd s21, s23, s22, s21
|
|
; CHECK-SD-NOFP16-NEXT: mov h22, v3.h[2]
|
|
; CHECK-SD-NOFP16-NEXT: mov h23, v1.h[2]
|
|
; CHECK-SD-NOFP16-NEXT: fcvt h20, s7
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s24, h24
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s25, h25
|
|
; CHECK-SD-NOFP16-NEXT: mov h17, v2.h[4]
|
|
; CHECK-SD-NOFP16-NEXT: mov h18, v0.h[4]
|
|
; CHECK-SD-NOFP16-NEXT: mov h7, v4.h[5]
|
|
; CHECK-SD-NOFP16-NEXT: fcvt h19, s19
|
|
; CHECK-SD-NOFP16-NEXT: mov h30, v2.h[5]
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s16, h16
|
|
; CHECK-SD-NOFP16-NEXT: fcvt h21, s21
|
|
; CHECK-SD-NOFP16-NEXT: mov h31, v1.h[4]
|
|
; CHECK-SD-NOFP16-NEXT: fmadd s24, s26, s25, s24
|
|
; CHECK-SD-NOFP16-NEXT: fmadd s25, s29, s28, s27
|
|
; CHECK-SD-NOFP16-NEXT: mov v6.h[1], v20.h[0]
|
|
; CHECK-SD-NOFP16-NEXT: mov h20, v5.h[2]
|
|
; CHECK-SD-NOFP16-NEXT: mov h26, v5.h[3]
|
|
; CHECK-SD-NOFP16-NEXT: mov h27, v3.h[3]
|
|
; CHECK-SD-NOFP16-NEXT: mov h28, v1.h[3]
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s17, h17
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s18, h18
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s29, h7
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s30, h30
|
|
; CHECK-SD-NOFP16-NEXT: mov v6.h[2], v19.h[0]
|
|
; CHECK-SD-NOFP16-NEXT: fcvt h24, s24
|
|
; CHECK-SD-NOFP16-NEXT: fcvt h7, s25
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s19, h20
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s20, h22
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s22, h23
|
|
; CHECK-SD-NOFP16-NEXT: fmadd s16, s18, s17, s16
|
|
; CHECK-SD-NOFP16-NEXT: mov h23, v0.h[5]
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s25, h26
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s26, h27
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s27, h28
|
|
; CHECK-SD-NOFP16-NEXT: mov h18, v4.h[6]
|
|
; CHECK-SD-NOFP16-NEXT: mov v6.h[3], v21.h[0]
|
|
; CHECK-SD-NOFP16-NEXT: mov v7.h[1], v24.h[0]
|
|
; CHECK-SD-NOFP16-NEXT: mov h24, v5.h[5]
|
|
; CHECK-SD-NOFP16-NEXT: fmadd s19, s22, s20, s19
|
|
; CHECK-SD-NOFP16-NEXT: mov h20, v5.h[4]
|
|
; CHECK-SD-NOFP16-NEXT: mov h22, v3.h[4]
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s23, h23
|
|
; CHECK-SD-NOFP16-NEXT: mov h28, v0.h[6]
|
|
; CHECK-SD-NOFP16-NEXT: fcvt h16, s16
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s18, h18
|
|
; CHECK-SD-NOFP16-NEXT: mov h4, v4.h[7]
|
|
; CHECK-SD-NOFP16-NEXT: mov h0, v0.h[7]
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s20, h20
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s21, h22
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s22, h31
|
|
; CHECK-SD-NOFP16-NEXT: fmadd s17, s23, s30, s29
|
|
; CHECK-SD-NOFP16-NEXT: fmadd s23, s27, s26, s25
|
|
; CHECK-SD-NOFP16-NEXT: fcvt h19, s19
|
|
; CHECK-SD-NOFP16-NEXT: mov h25, v3.h[5]
|
|
; CHECK-SD-NOFP16-NEXT: mov h26, v1.h[5]
|
|
; CHECK-SD-NOFP16-NEXT: mov h27, v2.h[6]
|
|
; CHECK-SD-NOFP16-NEXT: mov h29, v1.h[6]
|
|
; CHECK-SD-NOFP16-NEXT: mov h2, v2.h[7]
|
|
; CHECK-SD-NOFP16-NEXT: mov h1, v1.h[7]
|
|
; CHECK-SD-NOFP16-NEXT: fmadd s20, s22, s21, s20
|
|
; CHECK-SD-NOFP16-NEXT: mov h21, v5.h[6]
|
|
; CHECK-SD-NOFP16-NEXT: mov h22, v3.h[6]
|
|
; CHECK-SD-NOFP16-NEXT: mov v7.h[2], v19.h[0]
|
|
; CHECK-SD-NOFP16-NEXT: fcvt h19, s23
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s23, h24
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s24, h25
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s25, h26
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s26, h27
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s27, h28
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s28, h29
|
|
; CHECK-SD-NOFP16-NEXT: mov h5, v5.h[7]
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s21, h21
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s22, h22
|
|
; CHECK-SD-NOFP16-NEXT: mov h3, v3.h[7]
|
|
; CHECK-SD-NOFP16-NEXT: mov v7.h[3], v19.h[0]
|
|
; CHECK-SD-NOFP16-NEXT: fcvt h19, s20
|
|
; CHECK-SD-NOFP16-NEXT: mov v6.h[4], v16.h[0]
|
|
; CHECK-SD-NOFP16-NEXT: fmadd s20, s25, s24, s23
|
|
; CHECK-SD-NOFP16-NEXT: fcvt h16, s17
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s4, h4
|
|
; CHECK-SD-NOFP16-NEXT: fmadd s18, s27, s26, s18
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s2, h2
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s0, h0
|
|
; CHECK-SD-NOFP16-NEXT: fmadd s21, s28, s22, s21
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s5, h5
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s3, h3
|
|
; CHECK-SD-NOFP16-NEXT: mov v7.h[4], v19.h[0]
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s1, h1
|
|
; CHECK-SD-NOFP16-NEXT: fcvt h17, s20
|
|
; CHECK-SD-NOFP16-NEXT: mov v6.h[5], v16.h[0]
|
|
; CHECK-SD-NOFP16-NEXT: fmadd s0, s0, s2, s4
|
|
; CHECK-SD-NOFP16-NEXT: fcvt h2, s18
|
|
; CHECK-SD-NOFP16-NEXT: fcvt h4, s21
|
|
; CHECK-SD-NOFP16-NEXT: fmadd s1, s1, s3, s5
|
|
; CHECK-SD-NOFP16-NEXT: mov v7.h[5], v17.h[0]
|
|
; CHECK-SD-NOFP16-NEXT: mov v6.h[6], v2.h[0]
|
|
; CHECK-SD-NOFP16-NEXT: fcvt h0, s0
|
|
; CHECK-SD-NOFP16-NEXT: fcvt h1, s1
|
|
; CHECK-SD-NOFP16-NEXT: mov v7.h[6], v4.h[0]
|
|
; CHECK-SD-NOFP16-NEXT: mov v6.h[7], v0.h[0]
|
|
; CHECK-SD-NOFP16-NEXT: mov v7.h[7], v1.h[0]
|
|
; CHECK-SD-NOFP16-NEXT: mov v0.16b, v6.16b
|
|
; CHECK-SD-NOFP16-NEXT: mov v1.16b, v7.16b
|
|
; CHECK-SD-NOFP16-NEXT: ret
|
|
;
|
|
; CHECK-SD-FP16-LABEL: fma_v16f16:
|
|
; CHECK-SD-FP16: // %bb.0: // %entry
|
|
; CHECK-SD-FP16-NEXT: fmla v4.8h, v2.8h, v0.8h
|
|
; CHECK-SD-FP16-NEXT: fmla v5.8h, v3.8h, v1.8h
|
|
; CHECK-SD-FP16-NEXT: mov v0.16b, v4.16b
|
|
; CHECK-SD-FP16-NEXT: mov v1.16b, v5.16b
|
|
; CHECK-SD-FP16-NEXT: ret
|
|
;
|
|
; CHECK-GI-NOFP16-LABEL: fma_v16f16:
|
|
; CHECK-GI-NOFP16: // %bb.0: // %entry
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl v6.4s, v0.4h
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl v7.4s, v1.4h
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl v16.4s, v2.4h
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl v17.4s, v3.4h
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl v18.4s, v4.4h
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl v19.4s, v5.4h
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl2 v0.4s, v0.8h
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl2 v1.4s, v1.8h
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl2 v2.4s, v2.8h
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl2 v3.4s, v3.8h
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl2 v4.4s, v4.8h
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl2 v5.4s, v5.8h
|
|
; CHECK-GI-NOFP16-NEXT: fmla v18.4s, v16.4s, v6.4s
|
|
; CHECK-GI-NOFP16-NEXT: fmla v19.4s, v17.4s, v7.4s
|
|
; CHECK-GI-NOFP16-NEXT: fmla v4.4s, v2.4s, v0.4s
|
|
; CHECK-GI-NOFP16-NEXT: fmla v5.4s, v3.4s, v1.4s
|
|
; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v18.4s
|
|
; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v19.4s
|
|
; CHECK-GI-NOFP16-NEXT: fcvtn2 v0.8h, v4.4s
|
|
; CHECK-GI-NOFP16-NEXT: fcvtn2 v1.8h, v5.4s
|
|
; CHECK-GI-NOFP16-NEXT: ret
|
|
;
|
|
; CHECK-GI-FP16-LABEL: fma_v16f16:
|
|
; CHECK-GI-FP16: // %bb.0: // %entry
|
|
; CHECK-GI-FP16-NEXT: fmla v4.8h, v2.8h, v0.8h
|
|
; CHECK-GI-FP16-NEXT: fmla v5.8h, v3.8h, v1.8h
|
|
; CHECK-GI-FP16-NEXT: mov v0.16b, v4.16b
|
|
; CHECK-GI-FP16-NEXT: mov v1.16b, v5.16b
|
|
; CHECK-GI-FP16-NEXT: ret
|
|
entry:
|
|
%d = call <16 x half> @llvm.fma.v16f16(<16 x half> %a, <16 x half> %b, <16 x half> %c)
|
|
ret <16 x half> %d
|
|
}
|
|
|
|
define double @fmuladd_f64(double %a, double %b, double %c) {
|
|
; CHECK-LABEL: fmuladd_f64:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: fmadd d0, d0, d1, d2
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%d = call double @llvm.fmuladd.f64(double %a, double %b, double %c)
|
|
ret double %d
|
|
}
|
|
|
|
define float @fmuladd_f32(float %a, float %b, float %c) {
|
|
; CHECK-LABEL: fmuladd_f32:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: fmadd s0, s0, s1, s2
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%d = call float @llvm.fmuladd.f32(float %a, float %b, float %c)
|
|
ret float %d
|
|
}
|
|
|
|
define half @fmuladd_f16(half %a, half %b, half %c) {
|
|
; CHECK-SD-NOFP16-LABEL: fmuladd_f16:
|
|
; CHECK-SD-NOFP16: // %bb.0: // %entry
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s1, h1
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s0, h0
|
|
; CHECK-SD-NOFP16-NEXT: fmul s0, s0, s1
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s1, h2
|
|
; CHECK-SD-NOFP16-NEXT: fcvt h0, s0
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s0, h0
|
|
; CHECK-SD-NOFP16-NEXT: fadd s0, s0, s1
|
|
; CHECK-SD-NOFP16-NEXT: fcvt h0, s0
|
|
; CHECK-SD-NOFP16-NEXT: ret
|
|
;
|
|
; CHECK-SD-FP16-LABEL: fmuladd_f16:
|
|
; CHECK-SD-FP16: // %bb.0: // %entry
|
|
; CHECK-SD-FP16-NEXT: fmadd h0, h0, h1, h2
|
|
; CHECK-SD-FP16-NEXT: ret
|
|
;
|
|
; CHECK-GI-NOFP16-LABEL: fmuladd_f16:
|
|
; CHECK-GI-NOFP16: // %bb.0: // %entry
|
|
; CHECK-GI-NOFP16-NEXT: fcvt s0, h0
|
|
; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
|
|
; CHECK-GI-NOFP16-NEXT: fmul s0, s0, s1
|
|
; CHECK-GI-NOFP16-NEXT: fcvt s1, h2
|
|
; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
|
|
; CHECK-GI-NOFP16-NEXT: fcvt s0, h0
|
|
; CHECK-GI-NOFP16-NEXT: fadd s0, s0, s1
|
|
; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
|
|
; CHECK-GI-NOFP16-NEXT: ret
|
|
;
|
|
; CHECK-GI-FP16-LABEL: fmuladd_f16:
|
|
; CHECK-GI-FP16: // %bb.0: // %entry
|
|
; CHECK-GI-FP16-NEXT: fmadd h0, h0, h1, h2
|
|
; CHECK-GI-FP16-NEXT: ret
|
|
entry:
|
|
%d = call half @llvm.fmuladd.f16(half %a, half %b, half %c)
|
|
ret half %d
|
|
}
|
|
|
|
define <2 x double> @fmuladd_v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
|
|
; CHECK-LABEL: fmuladd_v2f64:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: fmla v2.2d, v1.2d, v0.2d
|
|
; CHECK-NEXT: mov v0.16b, v2.16b
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%d = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c)
|
|
ret <2 x double> %d
|
|
}
|
|
|
|
define <3 x double> @fmuladd_v3f64(<3 x double> %a, <3 x double> %b, <3 x double> %c) {
|
|
; CHECK-SD-LABEL: fmuladd_v3f64:
|
|
; CHECK-SD: // %bb.0: // %entry
|
|
; CHECK-SD-NEXT: // kill: def $d6 killed $d6 def $q6
|
|
; CHECK-SD-NEXT: // kill: def $d3 killed $d3 def $q3
|
|
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
|
|
; CHECK-SD-NEXT: // kill: def $d7 killed $d7 def $q7
|
|
; CHECK-SD-NEXT: // kill: def $d4 killed $d4 def $q4
|
|
; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
|
|
; CHECK-SD-NEXT: // kill: def $d5 killed $d5 def $q5
|
|
; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2
|
|
; CHECK-SD-NEXT: mov v3.d[1], v4.d[0]
|
|
; CHECK-SD-NEXT: mov v0.d[1], v1.d[0]
|
|
; CHECK-SD-NEXT: mov v6.d[1], v7.d[0]
|
|
; CHECK-SD-NEXT: fmla v6.2d, v3.2d, v0.2d
|
|
; CHECK-SD-NEXT: ldr d3, [sp]
|
|
; CHECK-SD-NEXT: fmla v3.2d, v5.2d, v2.2d
|
|
; CHECK-SD-NEXT: fmov d0, d6
|
|
; CHECK-SD-NEXT: ext v1.16b, v6.16b, v6.16b, #8
|
|
; CHECK-SD-NEXT: // kill: def $d1 killed $d1 killed $q1
|
|
; CHECK-SD-NEXT: fmov d2, d3
|
|
; CHECK-SD-NEXT: ret
|
|
;
|
|
; CHECK-GI-LABEL: fmuladd_v3f64:
|
|
; CHECK-GI: // %bb.0: // %entry
|
|
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
|
|
; CHECK-GI-NEXT: // kill: def $d3 killed $d3 def $q3
|
|
; CHECK-GI-NEXT: // kill: def $d6 killed $d6 def $q6
|
|
; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
|
|
; CHECK-GI-NEXT: // kill: def $d4 killed $d4 def $q4
|
|
; CHECK-GI-NEXT: // kill: def $d7 killed $d7 def $q7
|
|
; CHECK-GI-NEXT: mov v0.d[1], v1.d[0]
|
|
; CHECK-GI-NEXT: mov v3.d[1], v4.d[0]
|
|
; CHECK-GI-NEXT: mov v6.d[1], v7.d[0]
|
|
; CHECK-GI-NEXT: fmla v6.2d, v3.2d, v0.2d
|
|
; CHECK-GI-NEXT: ldr d0, [sp]
|
|
; CHECK-GI-NEXT: fmadd d2, d2, d5, d0
|
|
; CHECK-GI-NEXT: mov d1, v6.d[1]
|
|
; CHECK-GI-NEXT: fmov d0, d6
|
|
; CHECK-GI-NEXT: ret
|
|
entry:
|
|
%d = call <3 x double> @llvm.fmuladd.v3f64(<3 x double> %a, <3 x double> %b, <3 x double> %c)
|
|
ret <3 x double> %d
|
|
}
|
|
|
|
define <4 x double> @fmuladd_v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %c) {
|
|
; CHECK-LABEL: fmuladd_v4f64:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: fmla v4.2d, v2.2d, v0.2d
|
|
; CHECK-NEXT: fmla v5.2d, v3.2d, v1.2d
|
|
; CHECK-NEXT: mov v0.16b, v4.16b
|
|
; CHECK-NEXT: mov v1.16b, v5.16b
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%d = call <4 x double> @llvm.fmuladd.v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %c)
|
|
ret <4 x double> %d
|
|
}
|
|
|
|
define <2 x float> @fmuladd_v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c) {
|
|
; CHECK-LABEL: fmuladd_v2f32:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: fmla v2.2s, v1.2s, v0.2s
|
|
; CHECK-NEXT: fmov d0, d2
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%d = call <2 x float> @llvm.fmuladd.v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c)
|
|
ret <2 x float> %d
|
|
}
|
|
|
|
define <3 x float> @fmuladd_v3f32(<3 x float> %a, <3 x float> %b, <3 x float> %c) {
|
|
; CHECK-LABEL: fmuladd_v3f32:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: fmla v2.4s, v1.4s, v0.4s
|
|
; CHECK-NEXT: mov v0.16b, v2.16b
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%d = call <3 x float> @llvm.fmuladd.v3f32(<3 x float> %a, <3 x float> %b, <3 x float> %c)
|
|
ret <3 x float> %d
|
|
}
|
|
|
|
define <4 x float> @fmuladd_v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
|
|
; CHECK-LABEL: fmuladd_v4f32:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: fmla v2.4s, v1.4s, v0.4s
|
|
; CHECK-NEXT: mov v0.16b, v2.16b
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%d = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c)
|
|
ret <4 x float> %d
|
|
}
|
|
|
|
define <8 x float> @fmuladd_v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c) {
|
|
; CHECK-LABEL: fmuladd_v8f32:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: fmla v4.4s, v2.4s, v0.4s
|
|
; CHECK-NEXT: fmla v5.4s, v3.4s, v1.4s
|
|
; CHECK-NEXT: mov v0.16b, v4.16b
|
|
; CHECK-NEXT: mov v1.16b, v5.16b
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%d = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c)
|
|
ret <8 x float> %d
|
|
}
|
|
|
|
define <7 x half> @fmuladd_v7f16(<7 x half> %a, <7 x half> %b, <7 x half> %c) {
|
|
; CHECK-SD-NOFP16-LABEL: fmuladd_v7f16:
|
|
; CHECK-SD-NOFP16: // %bb.0: // %entry
|
|
; CHECK-SD-NOFP16-NEXT: fcvtl v3.4s, v1.4h
|
|
; CHECK-SD-NOFP16-NEXT: fcvtl v4.4s, v0.4h
|
|
; CHECK-SD-NOFP16-NEXT: fcvtl2 v1.4s, v1.8h
|
|
; CHECK-SD-NOFP16-NEXT: fcvtl2 v0.4s, v0.8h
|
|
; CHECK-SD-NOFP16-NEXT: fmul v3.4s, v4.4s, v3.4s
|
|
; CHECK-SD-NOFP16-NEXT: fmul v0.4s, v0.4s, v1.4s
|
|
; CHECK-SD-NOFP16-NEXT: fcvtn v1.4h, v3.4s
|
|
; CHECK-SD-NOFP16-NEXT: fcvtl v3.4s, v2.4h
|
|
; CHECK-SD-NOFP16-NEXT: fcvtl2 v2.4s, v2.8h
|
|
; CHECK-SD-NOFP16-NEXT: fcvtn2 v1.8h, v0.4s
|
|
; CHECK-SD-NOFP16-NEXT: fcvtl v0.4s, v1.4h
|
|
; CHECK-SD-NOFP16-NEXT: fcvtl2 v1.4s, v1.8h
|
|
; CHECK-SD-NOFP16-NEXT: fadd v0.4s, v0.4s, v3.4s
|
|
; CHECK-SD-NOFP16-NEXT: fadd v1.4s, v1.4s, v2.4s
|
|
; CHECK-SD-NOFP16-NEXT: fcvtn v0.4h, v0.4s
|
|
; CHECK-SD-NOFP16-NEXT: fcvtn2 v0.8h, v1.4s
|
|
; CHECK-SD-NOFP16-NEXT: ret
|
|
;
|
|
; CHECK-SD-FP16-LABEL: fmuladd_v7f16:
|
|
; CHECK-SD-FP16: // %bb.0: // %entry
|
|
; CHECK-SD-FP16-NEXT: fmla v2.8h, v1.8h, v0.8h
|
|
; CHECK-SD-FP16-NEXT: mov v0.16b, v2.16b
|
|
; CHECK-SD-FP16-NEXT: ret
|
|
;
|
|
; CHECK-GI-NOFP16-LABEL: fmuladd_v7f16:
|
|
; CHECK-GI-NOFP16: // %bb.0: // %entry
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v0.4h
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl v4.4s, v1.4h
|
|
; CHECK-GI-NOFP16-NEXT: mov v5.h[0], v0.h[4]
|
|
; CHECK-GI-NOFP16-NEXT: fmul v3.4s, v3.4s, v4.4s
|
|
; CHECK-GI-NOFP16-NEXT: mov v4.h[0], v1.h[4]
|
|
; CHECK-GI-NOFP16-NEXT: mov v5.h[1], v0.h[5]
|
|
; CHECK-GI-NOFP16-NEXT: mov v4.h[1], v1.h[5]
|
|
; CHECK-GI-NOFP16-NEXT: fcvtn v3.4h, v3.4s
|
|
; CHECK-GI-NOFP16-NEXT: mov v5.h[2], v0.h[6]
|
|
; CHECK-GI-NOFP16-NEXT: mov v4.h[2], v1.h[6]
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v3.4h
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v2.4h
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v5.4h
|
|
; CHECK-GI-NOFP16-NEXT: mov v5.h[0], v2.h[4]
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl v4.4s, v4.4h
|
|
; CHECK-GI-NOFP16-NEXT: fadd v0.4s, v0.4s, v1.4s
|
|
; CHECK-GI-NOFP16-NEXT: mov v5.h[1], v2.h[5]
|
|
; CHECK-GI-NOFP16-NEXT: fmul v1.4s, v3.4s, v4.4s
|
|
; CHECK-GI-NOFP16-NEXT: fcvtn v3.4h, v0.4s
|
|
; CHECK-GI-NOFP16-NEXT: mov v5.h[2], v2.h[6]
|
|
; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v1.4s
|
|
; CHECK-GI-NOFP16-NEXT: mov v0.h[0], v3.h[0]
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v5.4h
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h
|
|
; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v3.h[1]
|
|
; CHECK-GI-NOFP16-NEXT: fadd v1.4s, v1.4s, v2.4s
|
|
; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v3.h[2]
|
|
; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v1.4s
|
|
; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v3.h[3]
|
|
; CHECK-GI-NOFP16-NEXT: mov v0.h[4], v1.h[0]
|
|
; CHECK-GI-NOFP16-NEXT: mov v0.h[5], v1.h[1]
|
|
; CHECK-GI-NOFP16-NEXT: mov v0.h[6], v1.h[2]
|
|
; CHECK-GI-NOFP16-NEXT: ret
|
|
;
|
|
; CHECK-GI-FP16-LABEL: fmuladd_v7f16:
|
|
; CHECK-GI-FP16: // %bb.0: // %entry
|
|
; CHECK-GI-FP16-NEXT: fmla v2.8h, v1.8h, v0.8h
|
|
; CHECK-GI-FP16-NEXT: mov v0.16b, v2.16b
|
|
; CHECK-GI-FP16-NEXT: ret
|
|
entry:
|
|
%d = call <7 x half> @llvm.fmuladd.v7f16(<7 x half> %a, <7 x half> %b, <7 x half> %c)
|
|
ret <7 x half> %d
|
|
}
|
|
|
|
define <4 x half> @fmuladd_v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %c) {
|
|
; CHECK-SD-NOFP16-LABEL: fmuladd_v4f16:
|
|
; CHECK-SD-NOFP16: // %bb.0: // %entry
|
|
; CHECK-SD-NOFP16-NEXT: fcvtl v1.4s, v1.4h
|
|
; CHECK-SD-NOFP16-NEXT: fcvtl v0.4s, v0.4h
|
|
; CHECK-SD-NOFP16-NEXT: fmul v0.4s, v0.4s, v1.4s
|
|
; CHECK-SD-NOFP16-NEXT: fcvtl v1.4s, v2.4h
|
|
; CHECK-SD-NOFP16-NEXT: fcvtn v0.4h, v0.4s
|
|
; CHECK-SD-NOFP16-NEXT: fcvtl v0.4s, v0.4h
|
|
; CHECK-SD-NOFP16-NEXT: fadd v0.4s, v0.4s, v1.4s
|
|
; CHECK-SD-NOFP16-NEXT: fcvtn v0.4h, v0.4s
|
|
; CHECK-SD-NOFP16-NEXT: ret
|
|
;
|
|
; CHECK-SD-FP16-LABEL: fmuladd_v4f16:
|
|
; CHECK-SD-FP16: // %bb.0: // %entry
|
|
; CHECK-SD-FP16-NEXT: fmla v2.4h, v1.4h, v0.4h
|
|
; CHECK-SD-FP16-NEXT: fmov d0, d2
|
|
; CHECK-SD-FP16-NEXT: ret
|
|
;
|
|
; CHECK-GI-NOFP16-LABEL: fmuladd_v4f16:
|
|
; CHECK-GI-NOFP16: // %bb.0: // %entry
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h
|
|
; CHECK-GI-NOFP16-NEXT: fmul v0.4s, v0.4s, v1.4s
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v2.4h
|
|
; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v0.4s
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h
|
|
; CHECK-GI-NOFP16-NEXT: fadd v0.4s, v0.4s, v1.4s
|
|
; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v0.4s
|
|
; CHECK-GI-NOFP16-NEXT: ret
|
|
;
|
|
; CHECK-GI-FP16-LABEL: fmuladd_v4f16:
|
|
; CHECK-GI-FP16: // %bb.0: // %entry
|
|
; CHECK-GI-FP16-NEXT: fmla v2.4h, v1.4h, v0.4h
|
|
; CHECK-GI-FP16-NEXT: fmov d0, d2
|
|
; CHECK-GI-FP16-NEXT: ret
|
|
entry:
|
|
%d = call <4 x half> @llvm.fmuladd.v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %c)
|
|
ret <4 x half> %d
|
|
}
|
|
|
|
define <8 x half> @fmuladd_v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
|
|
; CHECK-SD-NOFP16-LABEL: fmuladd_v8f16:
|
|
; CHECK-SD-NOFP16: // %bb.0: // %entry
|
|
; CHECK-SD-NOFP16-NEXT: fcvtl v3.4s, v1.4h
|
|
; CHECK-SD-NOFP16-NEXT: fcvtl v4.4s, v0.4h
|
|
; CHECK-SD-NOFP16-NEXT: fcvtl2 v1.4s, v1.8h
|
|
; CHECK-SD-NOFP16-NEXT: fcvtl2 v0.4s, v0.8h
|
|
; CHECK-SD-NOFP16-NEXT: fmul v3.4s, v4.4s, v3.4s
|
|
; CHECK-SD-NOFP16-NEXT: fmul v0.4s, v0.4s, v1.4s
|
|
; CHECK-SD-NOFP16-NEXT: fcvtn v1.4h, v3.4s
|
|
; CHECK-SD-NOFP16-NEXT: fcvtl v3.4s, v2.4h
|
|
; CHECK-SD-NOFP16-NEXT: fcvtl2 v2.4s, v2.8h
|
|
; CHECK-SD-NOFP16-NEXT: fcvtn2 v1.8h, v0.4s
|
|
; CHECK-SD-NOFP16-NEXT: fcvtl v0.4s, v1.4h
|
|
; CHECK-SD-NOFP16-NEXT: fcvtl2 v1.4s, v1.8h
|
|
; CHECK-SD-NOFP16-NEXT: fadd v0.4s, v0.4s, v3.4s
|
|
; CHECK-SD-NOFP16-NEXT: fadd v1.4s, v1.4s, v2.4s
|
|
; CHECK-SD-NOFP16-NEXT: fcvtn v0.4h, v0.4s
|
|
; CHECK-SD-NOFP16-NEXT: fcvtn2 v0.8h, v1.4s
|
|
; CHECK-SD-NOFP16-NEXT: ret
|
|
;
|
|
; CHECK-SD-FP16-LABEL: fmuladd_v8f16:
|
|
; CHECK-SD-FP16: // %bb.0: // %entry
|
|
; CHECK-SD-FP16-NEXT: fmla v2.8h, v1.8h, v0.8h
|
|
; CHECK-SD-FP16-NEXT: mov v0.16b, v2.16b
|
|
; CHECK-SD-FP16-NEXT: ret
|
|
;
|
|
; CHECK-GI-NOFP16-LABEL: fmuladd_v8f16:
|
|
; CHECK-GI-NOFP16: // %bb.0: // %entry
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v0.4h
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl v4.4s, v1.4h
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl2 v0.4s, v0.8h
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl2 v1.4s, v1.8h
|
|
; CHECK-GI-NOFP16-NEXT: fmul v3.4s, v3.4s, v4.4s
|
|
; CHECK-GI-NOFP16-NEXT: fmul v0.4s, v0.4s, v1.4s
|
|
; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v3.4s
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v2.4h
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl2 v2.4s, v2.8h
|
|
; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v0.4s
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h
|
|
; CHECK-GI-NOFP16-NEXT: fadd v1.4s, v1.4s, v3.4s
|
|
; CHECK-GI-NOFP16-NEXT: fadd v2.4s, v0.4s, v2.4s
|
|
; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v1.4s
|
|
; CHECK-GI-NOFP16-NEXT: fcvtn2 v0.8h, v2.4s
|
|
; CHECK-GI-NOFP16-NEXT: ret
|
|
;
|
|
; CHECK-GI-FP16-LABEL: fmuladd_v8f16:
|
|
; CHECK-GI-FP16: // %bb.0: // %entry
|
|
; CHECK-GI-FP16-NEXT: fmla v2.8h, v1.8h, v0.8h
|
|
; CHECK-GI-FP16-NEXT: mov v0.16b, v2.16b
|
|
; CHECK-GI-FP16-NEXT: ret
|
|
entry:
|
|
%d = call <8 x half> @llvm.fmuladd.v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %c)
|
|
ret <8 x half> %d
|
|
}
|
|
|
|
define <16 x half> @fmuladd_v16f16(<16 x half> %a, <16 x half> %b, <16 x half> %c) {
|
|
; CHECK-SD-NOFP16-LABEL: fmuladd_v16f16:
|
|
; CHECK-SD-NOFP16: // %bb.0: // %entry
|
|
; CHECK-SD-NOFP16-NEXT: fcvtl v6.4s, v2.4h
|
|
; CHECK-SD-NOFP16-NEXT: fcvtl v7.4s, v0.4h
|
|
; CHECK-SD-NOFP16-NEXT: fcvtl v16.4s, v3.4h
|
|
; CHECK-SD-NOFP16-NEXT: fcvtl v17.4s, v1.4h
|
|
; CHECK-SD-NOFP16-NEXT: fcvtl2 v2.4s, v2.8h
|
|
; CHECK-SD-NOFP16-NEXT: fcvtl2 v0.4s, v0.8h
|
|
; CHECK-SD-NOFP16-NEXT: fcvtl2 v3.4s, v3.8h
|
|
; CHECK-SD-NOFP16-NEXT: fcvtl2 v1.4s, v1.8h
|
|
; CHECK-SD-NOFP16-NEXT: fmul v6.4s, v7.4s, v6.4s
|
|
; CHECK-SD-NOFP16-NEXT: fmul v7.4s, v17.4s, v16.4s
|
|
; CHECK-SD-NOFP16-NEXT: fmul v0.4s, v0.4s, v2.4s
|
|
; CHECK-SD-NOFP16-NEXT: fmul v1.4s, v1.4s, v3.4s
|
|
; CHECK-SD-NOFP16-NEXT: fcvtn v2.4h, v6.4s
|
|
; CHECK-SD-NOFP16-NEXT: fcvtl v6.4s, v5.4h
|
|
; CHECK-SD-NOFP16-NEXT: fcvtn v3.4h, v7.4s
|
|
; CHECK-SD-NOFP16-NEXT: fcvtn2 v2.8h, v0.4s
|
|
; CHECK-SD-NOFP16-NEXT: fcvtl v0.4s, v4.4h
|
|
; CHECK-SD-NOFP16-NEXT: fcvtl2 v4.4s, v4.8h
|
|
; CHECK-SD-NOFP16-NEXT: fcvtn2 v3.8h, v1.4s
|
|
; CHECK-SD-NOFP16-NEXT: fcvtl v1.4s, v2.4h
|
|
; CHECK-SD-NOFP16-NEXT: fcvtl2 v2.4s, v2.8h
|
|
; CHECK-SD-NOFP16-NEXT: fcvtl v7.4s, v3.4h
|
|
; CHECK-SD-NOFP16-NEXT: fadd v0.4s, v1.4s, v0.4s
|
|
; CHECK-SD-NOFP16-NEXT: fcvtl2 v1.4s, v3.8h
|
|
; CHECK-SD-NOFP16-NEXT: fcvtl2 v3.4s, v5.8h
|
|
; CHECK-SD-NOFP16-NEXT: fadd v5.4s, v7.4s, v6.4s
|
|
; CHECK-SD-NOFP16-NEXT: fadd v2.4s, v2.4s, v4.4s
|
|
; CHECK-SD-NOFP16-NEXT: fcvtn v0.4h, v0.4s
|
|
; CHECK-SD-NOFP16-NEXT: fadd v3.4s, v1.4s, v3.4s
|
|
; CHECK-SD-NOFP16-NEXT: fcvtn v1.4h, v5.4s
|
|
; CHECK-SD-NOFP16-NEXT: fcvtn2 v0.8h, v2.4s
|
|
; CHECK-SD-NOFP16-NEXT: fcvtn2 v1.8h, v3.4s
|
|
; CHECK-SD-NOFP16-NEXT: ret
|
|
;
|
|
; CHECK-SD-FP16-LABEL: fmuladd_v16f16:
|
|
; CHECK-SD-FP16: // %bb.0: // %entry
|
|
; CHECK-SD-FP16-NEXT: fmla v4.8h, v2.8h, v0.8h
|
|
; CHECK-SD-FP16-NEXT: fmla v5.8h, v3.8h, v1.8h
|
|
; CHECK-SD-FP16-NEXT: mov v0.16b, v4.16b
|
|
; CHECK-SD-FP16-NEXT: mov v1.16b, v5.16b
|
|
; CHECK-SD-FP16-NEXT: ret
|
|
;
|
|
; CHECK-GI-NOFP16-LABEL: fmuladd_v16f16:
|
|
; CHECK-GI-NOFP16: // %bb.0: // %entry
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl v6.4s, v0.4h
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl v7.4s, v2.4h
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl v16.4s, v1.4h
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl v17.4s, v3.4h
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl2 v0.4s, v0.8h
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl2 v1.4s, v1.8h
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl2 v2.4s, v2.8h
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl2 v3.4s, v3.8h
|
|
; CHECK-GI-NOFP16-NEXT: fmul v6.4s, v6.4s, v7.4s
|
|
; CHECK-GI-NOFP16-NEXT: fmul v7.4s, v16.4s, v17.4s
|
|
; CHECK-GI-NOFP16-NEXT: fmul v0.4s, v0.4s, v2.4s
|
|
; CHECK-GI-NOFP16-NEXT: fmul v1.4s, v1.4s, v3.4s
|
|
; CHECK-GI-NOFP16-NEXT: fcvtn v2.4h, v6.4s
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl v6.4s, v4.4h
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl2 v4.4s, v4.8h
|
|
; CHECK-GI-NOFP16-NEXT: fcvtn v3.4h, v7.4s
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl v7.4s, v5.4h
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl2 v5.4s, v5.8h
|
|
; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v0.4s
|
|
; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v1.4s
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v2.4h
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v3.4h
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h
|
|
; CHECK-GI-NOFP16-NEXT: fadd v2.4s, v2.4s, v6.4s
|
|
; CHECK-GI-NOFP16-NEXT: fadd v3.4s, v3.4s, v7.4s
|
|
; CHECK-GI-NOFP16-NEXT: fadd v4.4s, v0.4s, v4.4s
|
|
; CHECK-GI-NOFP16-NEXT: fadd v5.4s, v1.4s, v5.4s
|
|
; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v2.4s
|
|
; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v3.4s
|
|
; CHECK-GI-NOFP16-NEXT: fcvtn2 v0.8h, v4.4s
|
|
; CHECK-GI-NOFP16-NEXT: fcvtn2 v1.8h, v5.4s
|
|
; CHECK-GI-NOFP16-NEXT: ret
|
|
;
|
|
; CHECK-GI-FP16-LABEL: fmuladd_v16f16:
|
|
; CHECK-GI-FP16: // %bb.0: // %entry
|
|
; CHECK-GI-FP16-NEXT: fmla v4.8h, v2.8h, v0.8h
|
|
; CHECK-GI-FP16-NEXT: fmla v5.8h, v3.8h, v1.8h
|
|
; CHECK-GI-FP16-NEXT: mov v0.16b, v4.16b
|
|
; CHECK-GI-FP16-NEXT: mov v1.16b, v5.16b
|
|
; CHECK-GI-FP16-NEXT: ret
|
|
entry:
|
|
%d = call <16 x half> @llvm.fmuladd.v16f16(<16 x half> %a, <16 x half> %b, <16 x half> %c)
|
|
ret <16 x half> %d
|
|
}
|
|
|
|
define double @fmul_f64(double %a, double %b, double %c) {
|
|
; CHECK-LABEL: fmul_f64:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: fmadd d0, d0, d1, d2
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%d = fmul fast double %a, %b
|
|
%e = fadd fast double %d, %c
|
|
ret double %e
|
|
}
|
|
|
|
define float @fmul_f32(float %a, float %b, float %c) {
|
|
; CHECK-LABEL: fmul_f32:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: fmadd s0, s0, s1, s2
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%d = fmul fast float %a, %b
|
|
%e = fadd fast float %d, %c
|
|
ret float %e
|
|
}
|
|
|
|
define half @fmul_f16(half %a, half %b, half %c) {
|
|
; CHECK-SD-NOFP16-LABEL: fmul_f16:
|
|
; CHECK-SD-NOFP16: // %bb.0: // %entry
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s1, h1
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s0, h0
|
|
; CHECK-SD-NOFP16-NEXT: fmul s0, s0, s1
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s1, h2
|
|
; CHECK-SD-NOFP16-NEXT: fcvt h0, s0
|
|
; CHECK-SD-NOFP16-NEXT: fcvt s0, h0
|
|
; CHECK-SD-NOFP16-NEXT: fadd s0, s0, s1
|
|
; CHECK-SD-NOFP16-NEXT: fcvt h0, s0
|
|
; CHECK-SD-NOFP16-NEXT: ret
|
|
;
|
|
; CHECK-SD-FP16-LABEL: fmul_f16:
|
|
; CHECK-SD-FP16: // %bb.0: // %entry
|
|
; CHECK-SD-FP16-NEXT: fmadd h0, h0, h1, h2
|
|
; CHECK-SD-FP16-NEXT: ret
|
|
;
|
|
; CHECK-GI-NOFP16-LABEL: fmul_f16:
|
|
; CHECK-GI-NOFP16: // %bb.0: // %entry
|
|
; CHECK-GI-NOFP16-NEXT: fcvt s0, h0
|
|
; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
|
|
; CHECK-GI-NOFP16-NEXT: fmul s0, s0, s1
|
|
; CHECK-GI-NOFP16-NEXT: fcvt s1, h2
|
|
; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
|
|
; CHECK-GI-NOFP16-NEXT: fcvt s0, h0
|
|
; CHECK-GI-NOFP16-NEXT: fadd s0, s0, s1
|
|
; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
|
|
; CHECK-GI-NOFP16-NEXT: ret
|
|
;
|
|
; CHECK-GI-FP16-LABEL: fmul_f16:
|
|
; CHECK-GI-FP16: // %bb.0: // %entry
|
|
; CHECK-GI-FP16-NEXT: fmadd h0, h0, h1, h2
|
|
; CHECK-GI-FP16-NEXT: ret
|
|
entry:
|
|
%d = fmul fast half %a, %b
|
|
%e = fadd fast half %d, %c
|
|
ret half %e
|
|
}
|
|
|
|
define <2 x double> @fmul_v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
|
|
; CHECK-SD-LABEL: fmul_v2f64:
|
|
; CHECK-SD: // %bb.0: // %entry
|
|
; CHECK-SD-NEXT: fmla v2.2d, v1.2d, v0.2d
|
|
; CHECK-SD-NEXT: mov v0.16b, v2.16b
|
|
; CHECK-SD-NEXT: ret
|
|
;
|
|
; CHECK-GI-LABEL: fmul_v2f64:
|
|
; CHECK-GI: // %bb.0: // %entry
|
|
; CHECK-GI-NEXT: fmla v2.2d, v0.2d, v1.2d
|
|
; CHECK-GI-NEXT: mov v0.16b, v2.16b
|
|
; CHECK-GI-NEXT: ret
|
|
entry:
|
|
%d = fmul fast <2 x double> %a, %b
|
|
%e = fadd fast <2 x double> %d, %c
|
|
ret <2 x double> %e
|
|
}
|
|
|
|
define <3 x double> @fmul_v3f64(<3 x double> %a, <3 x double> %b, <3 x double> %c) {
|
|
; CHECK-SD-LABEL: fmul_v3f64:
|
|
; CHECK-SD: // %bb.0: // %entry
|
|
; CHECK-SD-NEXT: // kill: def $d6 killed $d6 def $q6
|
|
; CHECK-SD-NEXT: // kill: def $d3 killed $d3 def $q3
|
|
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
|
|
; CHECK-SD-NEXT: // kill: def $d7 killed $d7 def $q7
|
|
; CHECK-SD-NEXT: // kill: def $d4 killed $d4 def $q4
|
|
; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
|
|
; CHECK-SD-NEXT: // kill: def $d5 killed $d5 def $q5
|
|
; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2
|
|
; CHECK-SD-NEXT: mov v3.d[1], v4.d[0]
|
|
; CHECK-SD-NEXT: mov v0.d[1], v1.d[0]
|
|
; CHECK-SD-NEXT: mov v6.d[1], v7.d[0]
|
|
; CHECK-SD-NEXT: fmla v6.2d, v3.2d, v0.2d
|
|
; CHECK-SD-NEXT: ldr d3, [sp]
|
|
; CHECK-SD-NEXT: fmla v3.2d, v5.2d, v2.2d
|
|
; CHECK-SD-NEXT: fmov d0, d6
|
|
; CHECK-SD-NEXT: ext v1.16b, v6.16b, v6.16b, #8
|
|
; CHECK-SD-NEXT: // kill: def $d1 killed $d1 killed $q1
|
|
; CHECK-SD-NEXT: fmov d2, d3
|
|
; CHECK-SD-NEXT: ret
|
|
;
|
|
; CHECK-GI-LABEL: fmul_v3f64:
|
|
; CHECK-GI: // %bb.0: // %entry
|
|
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
|
|
; CHECK-GI-NEXT: // kill: def $d3 killed $d3 def $q3
|
|
; CHECK-GI-NEXT: // kill: def $d6 killed $d6 def $q6
|
|
; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
|
|
; CHECK-GI-NEXT: // kill: def $d4 killed $d4 def $q4
|
|
; CHECK-GI-NEXT: // kill: def $d7 killed $d7 def $q7
|
|
; CHECK-GI-NEXT: mov v0.d[1], v1.d[0]
|
|
; CHECK-GI-NEXT: mov v3.d[1], v4.d[0]
|
|
; CHECK-GI-NEXT: mov v6.d[1], v7.d[0]
|
|
; CHECK-GI-NEXT: fmla v6.2d, v0.2d, v3.2d
|
|
; CHECK-GI-NEXT: ldr d0, [sp]
|
|
; CHECK-GI-NEXT: fmadd d2, d2, d5, d0
|
|
; CHECK-GI-NEXT: mov d1, v6.d[1]
|
|
; CHECK-GI-NEXT: fmov d0, d6
|
|
; CHECK-GI-NEXT: ret
|
|
entry:
|
|
%d = fmul fast <3 x double> %a, %b
|
|
%e = fadd fast <3 x double> %d, %c
|
|
ret <3 x double> %e
|
|
}
|
|
|
|
define <4 x double> @fmul_v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %c) {
|
|
; CHECK-SD-LABEL: fmul_v4f64:
|
|
; CHECK-SD: // %bb.0: // %entry
|
|
; CHECK-SD-NEXT: fmla v4.2d, v2.2d, v0.2d
|
|
; CHECK-SD-NEXT: fmla v5.2d, v3.2d, v1.2d
|
|
; CHECK-SD-NEXT: mov v0.16b, v4.16b
|
|
; CHECK-SD-NEXT: mov v1.16b, v5.16b
|
|
; CHECK-SD-NEXT: ret
|
|
;
|
|
; CHECK-GI-LABEL: fmul_v4f64:
|
|
; CHECK-GI: // %bb.0: // %entry
|
|
; CHECK-GI-NEXT: fmla v4.2d, v0.2d, v2.2d
|
|
; CHECK-GI-NEXT: fmla v5.2d, v1.2d, v3.2d
|
|
; CHECK-GI-NEXT: mov v0.16b, v4.16b
|
|
; CHECK-GI-NEXT: mov v1.16b, v5.16b
|
|
; CHECK-GI-NEXT: ret
|
|
entry:
|
|
%d = fmul fast <4 x double> %a, %b
|
|
%e = fadd fast <4 x double> %d, %c
|
|
ret <4 x double> %e
|
|
}
|
|
|
|
define <2 x float> @fmul_v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c) {
|
|
; CHECK-SD-LABEL: fmul_v2f32:
|
|
; CHECK-SD: // %bb.0: // %entry
|
|
; CHECK-SD-NEXT: fmla v2.2s, v1.2s, v0.2s
|
|
; CHECK-SD-NEXT: fmov d0, d2
|
|
; CHECK-SD-NEXT: ret
|
|
;
|
|
; CHECK-GI-LABEL: fmul_v2f32:
|
|
; CHECK-GI: // %bb.0: // %entry
|
|
; CHECK-GI-NEXT: fmla v2.2s, v0.2s, v1.2s
|
|
; CHECK-GI-NEXT: fmov d0, d2
|
|
; CHECK-GI-NEXT: ret
|
|
entry:
|
|
%d = fmul fast <2 x float> %a, %b
|
|
%e = fadd fast <2 x float> %d, %c
|
|
ret <2 x float> %e
|
|
}
|
|
|
|
define <3 x float> @fmul_v3f32(<3 x float> %a, <3 x float> %b, <3 x float> %c) {
|
|
; CHECK-SD-LABEL: fmul_v3f32:
|
|
; CHECK-SD: // %bb.0: // %entry
|
|
; CHECK-SD-NEXT: fmla v2.4s, v1.4s, v0.4s
|
|
; CHECK-SD-NEXT: mov v0.16b, v2.16b
|
|
; CHECK-SD-NEXT: ret
|
|
;
|
|
; CHECK-GI-LABEL: fmul_v3f32:
|
|
; CHECK-GI: // %bb.0: // %entry
|
|
; CHECK-GI-NEXT: fmla v2.4s, v0.4s, v1.4s
|
|
; CHECK-GI-NEXT: mov v0.16b, v2.16b
|
|
; CHECK-GI-NEXT: ret
|
|
entry:
|
|
%d = fmul fast <3 x float> %a, %b
|
|
%e = fadd fast <3 x float> %d, %c
|
|
ret <3 x float> %e
|
|
}
|
|
|
|
define <4 x float> @fmul_v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
|
|
; CHECK-SD-LABEL: fmul_v4f32:
|
|
; CHECK-SD: // %bb.0: // %entry
|
|
; CHECK-SD-NEXT: fmla v2.4s, v1.4s, v0.4s
|
|
; CHECK-SD-NEXT: mov v0.16b, v2.16b
|
|
; CHECK-SD-NEXT: ret
|
|
;
|
|
; CHECK-GI-LABEL: fmul_v4f32:
|
|
; CHECK-GI: // %bb.0: // %entry
|
|
; CHECK-GI-NEXT: fmla v2.4s, v0.4s, v1.4s
|
|
; CHECK-GI-NEXT: mov v0.16b, v2.16b
|
|
; CHECK-GI-NEXT: ret
|
|
entry:
|
|
%d = fmul fast <4 x float> %a, %b
|
|
%e = fadd fast <4 x float> %d, %c
|
|
ret <4 x float> %e
|
|
}
|
|
|
|
define <8 x float> @fmul_v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c) {
|
|
; CHECK-SD-LABEL: fmul_v8f32:
|
|
; CHECK-SD: // %bb.0: // %entry
|
|
; CHECK-SD-NEXT: fmla v4.4s, v2.4s, v0.4s
|
|
; CHECK-SD-NEXT: fmla v5.4s, v3.4s, v1.4s
|
|
; CHECK-SD-NEXT: mov v0.16b, v4.16b
|
|
; CHECK-SD-NEXT: mov v1.16b, v5.16b
|
|
; CHECK-SD-NEXT: ret
|
|
;
|
|
; CHECK-GI-LABEL: fmul_v8f32:
|
|
; CHECK-GI: // %bb.0: // %entry
|
|
; CHECK-GI-NEXT: fmla v4.4s, v0.4s, v2.4s
|
|
; CHECK-GI-NEXT: fmla v5.4s, v1.4s, v3.4s
|
|
; CHECK-GI-NEXT: mov v0.16b, v4.16b
|
|
; CHECK-GI-NEXT: mov v1.16b, v5.16b
|
|
; CHECK-GI-NEXT: ret
|
|
entry:
|
|
%d = fmul fast <8 x float> %a, %b
|
|
%e = fadd fast <8 x float> %d, %c
|
|
ret <8 x float> %e
|
|
}
|
|
|
|
define <7 x half> @fmul_v7f16(<7 x half> %a, <7 x half> %b, <7 x half> %c) {
|
|
; CHECK-SD-NOFP16-LABEL: fmul_v7f16:
|
|
; CHECK-SD-NOFP16: // %bb.0: // %entry
|
|
; CHECK-SD-NOFP16-NEXT: fcvtl v3.4s, v1.4h
|
|
; CHECK-SD-NOFP16-NEXT: fcvtl v4.4s, v0.4h
|
|
; CHECK-SD-NOFP16-NEXT: fcvtl2 v1.4s, v1.8h
|
|
; CHECK-SD-NOFP16-NEXT: fcvtl2 v0.4s, v0.8h
|
|
; CHECK-SD-NOFP16-NEXT: fmul v3.4s, v4.4s, v3.4s
|
|
; CHECK-SD-NOFP16-NEXT: fmul v0.4s, v0.4s, v1.4s
|
|
; CHECK-SD-NOFP16-NEXT: fcvtn v1.4h, v3.4s
|
|
; CHECK-SD-NOFP16-NEXT: fcvtl v3.4s, v2.4h
|
|
; CHECK-SD-NOFP16-NEXT: fcvtl2 v2.4s, v2.8h
|
|
; CHECK-SD-NOFP16-NEXT: fcvtn2 v1.8h, v0.4s
|
|
; CHECK-SD-NOFP16-NEXT: fcvtl v0.4s, v1.4h
|
|
; CHECK-SD-NOFP16-NEXT: fcvtl2 v1.4s, v1.8h
|
|
; CHECK-SD-NOFP16-NEXT: fadd v0.4s, v0.4s, v3.4s
|
|
; CHECK-SD-NOFP16-NEXT: fadd v1.4s, v1.4s, v2.4s
|
|
; CHECK-SD-NOFP16-NEXT: fcvtn v0.4h, v0.4s
|
|
; CHECK-SD-NOFP16-NEXT: fcvtn2 v0.8h, v1.4s
|
|
; CHECK-SD-NOFP16-NEXT: ret
|
|
;
|
|
; CHECK-SD-FP16-LABEL: fmul_v7f16:
|
|
; CHECK-SD-FP16: // %bb.0: // %entry
|
|
; CHECK-SD-FP16-NEXT: fmla v2.8h, v1.8h, v0.8h
|
|
; CHECK-SD-FP16-NEXT: mov v0.16b, v2.16b
|
|
; CHECK-SD-FP16-NEXT: ret
|
|
;
|
|
; CHECK-GI-NOFP16-LABEL: fmul_v7f16:
|
|
; CHECK-GI-NOFP16: // %bb.0: // %entry
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v0.4h
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl v4.4s, v1.4h
|
|
; CHECK-GI-NOFP16-NEXT: mov v5.h[0], v0.h[4]
|
|
; CHECK-GI-NOFP16-NEXT: fmul v3.4s, v3.4s, v4.4s
|
|
; CHECK-GI-NOFP16-NEXT: mov v4.h[0], v1.h[4]
|
|
; CHECK-GI-NOFP16-NEXT: mov v5.h[1], v0.h[5]
|
|
; CHECK-GI-NOFP16-NEXT: mov v4.h[1], v1.h[5]
|
|
; CHECK-GI-NOFP16-NEXT: fcvtn v3.4h, v3.4s
|
|
; CHECK-GI-NOFP16-NEXT: mov v5.h[2], v0.h[6]
|
|
; CHECK-GI-NOFP16-NEXT: mov v4.h[2], v1.h[6]
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v3.4h
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v2.4h
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v5.4h
|
|
; CHECK-GI-NOFP16-NEXT: mov v5.h[0], v2.h[4]
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl v4.4s, v4.4h
|
|
; CHECK-GI-NOFP16-NEXT: fadd v0.4s, v0.4s, v1.4s
|
|
; CHECK-GI-NOFP16-NEXT: mov v5.h[1], v2.h[5]
|
|
; CHECK-GI-NOFP16-NEXT: fmul v1.4s, v3.4s, v4.4s
|
|
; CHECK-GI-NOFP16-NEXT: fcvtn v3.4h, v0.4s
|
|
; CHECK-GI-NOFP16-NEXT: mov v5.h[2], v2.h[6]
|
|
; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v1.4s
|
|
; CHECK-GI-NOFP16-NEXT: mov v0.h[0], v3.h[0]
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v5.4h
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h
|
|
; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v3.h[1]
|
|
; CHECK-GI-NOFP16-NEXT: fadd v1.4s, v1.4s, v2.4s
|
|
; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v3.h[2]
|
|
; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v1.4s
|
|
; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v3.h[3]
|
|
; CHECK-GI-NOFP16-NEXT: mov v0.h[4], v1.h[0]
|
|
; CHECK-GI-NOFP16-NEXT: mov v0.h[5], v1.h[1]
|
|
; CHECK-GI-NOFP16-NEXT: mov v0.h[6], v1.h[2]
|
|
; CHECK-GI-NOFP16-NEXT: ret
|
|
;
|
|
; CHECK-GI-FP16-LABEL: fmul_v7f16:
|
|
; CHECK-GI-FP16: // %bb.0: // %entry
|
|
; CHECK-GI-FP16-NEXT: fmla v2.8h, v0.8h, v1.8h
|
|
; CHECK-GI-FP16-NEXT: mov v0.16b, v2.16b
|
|
; CHECK-GI-FP16-NEXT: ret
|
|
entry:
|
|
%d = fmul fast <7 x half> %a, %b
|
|
%e = fadd fast <7 x half> %d, %c
|
|
ret <7 x half> %e
|
|
}
|
|
|
|
define <4 x half> @fmul_v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %c) {
|
|
; CHECK-SD-NOFP16-LABEL: fmul_v4f16:
|
|
; CHECK-SD-NOFP16: // %bb.0: // %entry
|
|
; CHECK-SD-NOFP16-NEXT: fcvtl v1.4s, v1.4h
|
|
; CHECK-SD-NOFP16-NEXT: fcvtl v0.4s, v0.4h
|
|
; CHECK-SD-NOFP16-NEXT: fmul v0.4s, v0.4s, v1.4s
|
|
; CHECK-SD-NOFP16-NEXT: fcvtl v1.4s, v2.4h
|
|
; CHECK-SD-NOFP16-NEXT: fcvtn v0.4h, v0.4s
|
|
; CHECK-SD-NOFP16-NEXT: fcvtl v0.4s, v0.4h
|
|
; CHECK-SD-NOFP16-NEXT: fadd v0.4s, v0.4s, v1.4s
|
|
; CHECK-SD-NOFP16-NEXT: fcvtn v0.4h, v0.4s
|
|
; CHECK-SD-NOFP16-NEXT: ret
|
|
;
|
|
; CHECK-SD-FP16-LABEL: fmul_v4f16:
|
|
; CHECK-SD-FP16: // %bb.0: // %entry
|
|
; CHECK-SD-FP16-NEXT: fmla v2.4h, v1.4h, v0.4h
|
|
; CHECK-SD-FP16-NEXT: fmov d0, d2
|
|
; CHECK-SD-FP16-NEXT: ret
|
|
;
|
|
; CHECK-GI-NOFP16-LABEL: fmul_v4f16:
|
|
; CHECK-GI-NOFP16: // %bb.0: // %entry
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h
|
|
; CHECK-GI-NOFP16-NEXT: fmul v0.4s, v0.4s, v1.4s
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v2.4h
|
|
; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v0.4s
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h
|
|
; CHECK-GI-NOFP16-NEXT: fadd v0.4s, v0.4s, v1.4s
|
|
; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v0.4s
|
|
; CHECK-GI-NOFP16-NEXT: ret
|
|
;
|
|
; CHECK-GI-FP16-LABEL: fmul_v4f16:
|
|
; CHECK-GI-FP16: // %bb.0: // %entry
|
|
; CHECK-GI-FP16-NEXT: fmla v2.4h, v0.4h, v1.4h
|
|
; CHECK-GI-FP16-NEXT: fmov d0, d2
|
|
; CHECK-GI-FP16-NEXT: ret
|
|
entry:
|
|
%d = fmul fast <4 x half> %a, %b
|
|
%e = fadd fast <4 x half> %d, %c
|
|
ret <4 x half> %e
|
|
}
|
|
|
|
define <8 x half> @fmul_v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
|
|
; CHECK-SD-NOFP16-LABEL: fmul_v8f16:
|
|
; CHECK-SD-NOFP16: // %bb.0: // %entry
|
|
; CHECK-SD-NOFP16-NEXT: fcvtl v3.4s, v1.4h
|
|
; CHECK-SD-NOFP16-NEXT: fcvtl v4.4s, v0.4h
|
|
; CHECK-SD-NOFP16-NEXT: fcvtl2 v1.4s, v1.8h
|
|
; CHECK-SD-NOFP16-NEXT: fcvtl2 v0.4s, v0.8h
|
|
; CHECK-SD-NOFP16-NEXT: fmul v3.4s, v4.4s, v3.4s
|
|
; CHECK-SD-NOFP16-NEXT: fmul v0.4s, v0.4s, v1.4s
|
|
; CHECK-SD-NOFP16-NEXT: fcvtn v1.4h, v3.4s
|
|
; CHECK-SD-NOFP16-NEXT: fcvtl v3.4s, v2.4h
|
|
; CHECK-SD-NOFP16-NEXT: fcvtl2 v2.4s, v2.8h
|
|
; CHECK-SD-NOFP16-NEXT: fcvtn2 v1.8h, v0.4s
|
|
; CHECK-SD-NOFP16-NEXT: fcvtl v0.4s, v1.4h
|
|
; CHECK-SD-NOFP16-NEXT: fcvtl2 v1.4s, v1.8h
|
|
; CHECK-SD-NOFP16-NEXT: fadd v0.4s, v0.4s, v3.4s
|
|
; CHECK-SD-NOFP16-NEXT: fadd v1.4s, v1.4s, v2.4s
|
|
; CHECK-SD-NOFP16-NEXT: fcvtn v0.4h, v0.4s
|
|
; CHECK-SD-NOFP16-NEXT: fcvtn2 v0.8h, v1.4s
|
|
; CHECK-SD-NOFP16-NEXT: ret
|
|
;
|
|
; CHECK-SD-FP16-LABEL: fmul_v8f16:
|
|
; CHECK-SD-FP16: // %bb.0: // %entry
|
|
; CHECK-SD-FP16-NEXT: fmla v2.8h, v1.8h, v0.8h
|
|
; CHECK-SD-FP16-NEXT: mov v0.16b, v2.16b
|
|
; CHECK-SD-FP16-NEXT: ret
|
|
;
|
|
; CHECK-GI-NOFP16-LABEL: fmul_v8f16:
|
|
; CHECK-GI-NOFP16: // %bb.0: // %entry
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v0.4h
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl v4.4s, v1.4h
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl2 v0.4s, v0.8h
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl2 v1.4s, v1.8h
|
|
; CHECK-GI-NOFP16-NEXT: fmul v3.4s, v3.4s, v4.4s
|
|
; CHECK-GI-NOFP16-NEXT: fmul v0.4s, v0.4s, v1.4s
|
|
; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v3.4s
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v2.4h
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl2 v2.4s, v2.8h
|
|
; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v0.4s
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h
|
|
; CHECK-GI-NOFP16-NEXT: fadd v1.4s, v1.4s, v3.4s
|
|
; CHECK-GI-NOFP16-NEXT: fadd v2.4s, v0.4s, v2.4s
|
|
; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v1.4s
|
|
; CHECK-GI-NOFP16-NEXT: fcvtn2 v0.8h, v2.4s
|
|
; CHECK-GI-NOFP16-NEXT: ret
|
|
;
|
|
; CHECK-GI-FP16-LABEL: fmul_v8f16:
|
|
; CHECK-GI-FP16: // %bb.0: // %entry
|
|
; CHECK-GI-FP16-NEXT: fmla v2.8h, v0.8h, v1.8h
|
|
; CHECK-GI-FP16-NEXT: mov v0.16b, v2.16b
|
|
; CHECK-GI-FP16-NEXT: ret
|
|
entry:
|
|
%d = fmul fast <8 x half> %a, %b
|
|
%e = fadd fast <8 x half> %d, %c
|
|
ret <8 x half> %e
|
|
}
|
|
|
|
define <16 x half> @fmul_v16f16(<16 x half> %a, <16 x half> %b, <16 x half> %c) {
|
|
; CHECK-SD-NOFP16-LABEL: fmul_v16f16:
|
|
; CHECK-SD-NOFP16: // %bb.0: // %entry
|
|
; CHECK-SD-NOFP16-NEXT: fcvtl v6.4s, v2.4h
|
|
; CHECK-SD-NOFP16-NEXT: fcvtl v7.4s, v0.4h
|
|
; CHECK-SD-NOFP16-NEXT: fcvtl v16.4s, v3.4h
|
|
; CHECK-SD-NOFP16-NEXT: fcvtl v17.4s, v1.4h
|
|
; CHECK-SD-NOFP16-NEXT: fcvtl2 v2.4s, v2.8h
|
|
; CHECK-SD-NOFP16-NEXT: fcvtl2 v0.4s, v0.8h
|
|
; CHECK-SD-NOFP16-NEXT: fcvtl2 v3.4s, v3.8h
|
|
; CHECK-SD-NOFP16-NEXT: fcvtl2 v1.4s, v1.8h
|
|
; CHECK-SD-NOFP16-NEXT: fmul v6.4s, v7.4s, v6.4s
|
|
; CHECK-SD-NOFP16-NEXT: fmul v7.4s, v17.4s, v16.4s
|
|
; CHECK-SD-NOFP16-NEXT: fmul v0.4s, v0.4s, v2.4s
|
|
; CHECK-SD-NOFP16-NEXT: fmul v1.4s, v1.4s, v3.4s
|
|
; CHECK-SD-NOFP16-NEXT: fcvtn v2.4h, v6.4s
|
|
; CHECK-SD-NOFP16-NEXT: fcvtl v6.4s, v5.4h
|
|
; CHECK-SD-NOFP16-NEXT: fcvtn v3.4h, v7.4s
|
|
; CHECK-SD-NOFP16-NEXT: fcvtn2 v2.8h, v0.4s
|
|
; CHECK-SD-NOFP16-NEXT: fcvtl v0.4s, v4.4h
|
|
; CHECK-SD-NOFP16-NEXT: fcvtl2 v4.4s, v4.8h
|
|
; CHECK-SD-NOFP16-NEXT: fcvtn2 v3.8h, v1.4s
|
|
; CHECK-SD-NOFP16-NEXT: fcvtl v1.4s, v2.4h
|
|
; CHECK-SD-NOFP16-NEXT: fcvtl2 v2.4s, v2.8h
|
|
; CHECK-SD-NOFP16-NEXT: fcvtl v7.4s, v3.4h
|
|
; CHECK-SD-NOFP16-NEXT: fadd v0.4s, v1.4s, v0.4s
|
|
; CHECK-SD-NOFP16-NEXT: fcvtl2 v1.4s, v3.8h
|
|
; CHECK-SD-NOFP16-NEXT: fcvtl2 v3.4s, v5.8h
|
|
; CHECK-SD-NOFP16-NEXT: fadd v5.4s, v7.4s, v6.4s
|
|
; CHECK-SD-NOFP16-NEXT: fadd v2.4s, v2.4s, v4.4s
|
|
; CHECK-SD-NOFP16-NEXT: fcvtn v0.4h, v0.4s
|
|
; CHECK-SD-NOFP16-NEXT: fadd v3.4s, v1.4s, v3.4s
|
|
; CHECK-SD-NOFP16-NEXT: fcvtn v1.4h, v5.4s
|
|
; CHECK-SD-NOFP16-NEXT: fcvtn2 v0.8h, v2.4s
|
|
; CHECK-SD-NOFP16-NEXT: fcvtn2 v1.8h, v3.4s
|
|
; CHECK-SD-NOFP16-NEXT: ret
|
|
;
|
|
; CHECK-SD-FP16-LABEL: fmul_v16f16:
|
|
; CHECK-SD-FP16: // %bb.0: // %entry
|
|
; CHECK-SD-FP16-NEXT: fmla v4.8h, v2.8h, v0.8h
|
|
; CHECK-SD-FP16-NEXT: fmla v5.8h, v3.8h, v1.8h
|
|
; CHECK-SD-FP16-NEXT: mov v0.16b, v4.16b
|
|
; CHECK-SD-FP16-NEXT: mov v1.16b, v5.16b
|
|
; CHECK-SD-FP16-NEXT: ret
|
|
;
|
|
; CHECK-GI-NOFP16-LABEL: fmul_v16f16:
|
|
; CHECK-GI-NOFP16: // %bb.0: // %entry
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl v6.4s, v0.4h
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl v7.4s, v2.4h
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl v16.4s, v1.4h
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl v17.4s, v3.4h
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl2 v0.4s, v0.8h
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl2 v1.4s, v1.8h
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl2 v2.4s, v2.8h
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl2 v3.4s, v3.8h
|
|
; CHECK-GI-NOFP16-NEXT: fmul v6.4s, v6.4s, v7.4s
|
|
; CHECK-GI-NOFP16-NEXT: fmul v7.4s, v16.4s, v17.4s
|
|
; CHECK-GI-NOFP16-NEXT: fmul v0.4s, v0.4s, v2.4s
|
|
; CHECK-GI-NOFP16-NEXT: fmul v1.4s, v1.4s, v3.4s
|
|
; CHECK-GI-NOFP16-NEXT: fcvtn v2.4h, v6.4s
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl v6.4s, v4.4h
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl2 v4.4s, v4.8h
|
|
; CHECK-GI-NOFP16-NEXT: fcvtn v3.4h, v7.4s
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl v7.4s, v5.4h
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl2 v5.4s, v5.8h
|
|
; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v0.4s
|
|
; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v1.4s
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v2.4h
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v3.4h
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h
|
|
; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h
|
|
; CHECK-GI-NOFP16-NEXT: fadd v2.4s, v2.4s, v6.4s
|
|
; CHECK-GI-NOFP16-NEXT: fadd v3.4s, v3.4s, v7.4s
|
|
; CHECK-GI-NOFP16-NEXT: fadd v4.4s, v0.4s, v4.4s
|
|
; CHECK-GI-NOFP16-NEXT: fadd v5.4s, v1.4s, v5.4s
|
|
; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v2.4s
|
|
; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v3.4s
|
|
; CHECK-GI-NOFP16-NEXT: fcvtn2 v0.8h, v4.4s
|
|
; CHECK-GI-NOFP16-NEXT: fcvtn2 v1.8h, v5.4s
|
|
; CHECK-GI-NOFP16-NEXT: ret
|
|
;
|
|
; CHECK-GI-FP16-LABEL: fmul_v16f16:
|
|
; CHECK-GI-FP16: // %bb.0: // %entry
|
|
; CHECK-GI-FP16-NEXT: fmla v4.8h, v0.8h, v2.8h
|
|
; CHECK-GI-FP16-NEXT: fmla v5.8h, v1.8h, v3.8h
|
|
; CHECK-GI-FP16-NEXT: mov v0.16b, v4.16b
|
|
; CHECK-GI-FP16-NEXT: mov v1.16b, v5.16b
|
|
; CHECK-GI-FP16-NEXT: ret
|
|
entry:
|
|
%d = fmul fast <16 x half> %a, %b
|
|
%e = fadd fast <16 x half> %d, %c
|
|
ret <16 x half> %e
|
|
}
|
|
|
|
declare <16 x half> @llvm.fma.v16f16(<16 x half>, <16 x half>, <16 x half>)
|
|
declare <16 x half> @llvm.fmuladd.v16f16(<16 x half>, <16 x half>, <16 x half>)
|
|
declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>)
|
|
declare <2 x double> @llvm.fmuladd.v2f64(<2 x double>, <2 x double>, <2 x double>)
|
|
declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>)
|
|
declare <2 x float> @llvm.fmuladd.v2f32(<2 x float>, <2 x float>, <2 x float>)
|
|
declare <3 x double> @llvm.fma.v3f64(<3 x double>, <3 x double>, <3 x double>)
|
|
declare <3 x double> @llvm.fmuladd.v3f64(<3 x double>, <3 x double>, <3 x double>)
|
|
declare <3 x float> @llvm.fma.v3f32(<3 x float>, <3 x float>, <3 x float>)
|
|
declare <3 x float> @llvm.fmuladd.v3f32(<3 x float>, <3 x float>, <3 x float>)
|
|
declare <4 x double> @llvm.fma.v4f64(<4 x double>, <4 x double>, <4 x double>)
|
|
declare <4 x double> @llvm.fmuladd.v4f64(<4 x double>, <4 x double>, <4 x double>)
|
|
declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>)
|
|
declare <4 x float> @llvm.fmuladd.v4f32(<4 x float>, <4 x float>, <4 x float>)
|
|
declare <4 x half> @llvm.fma.v4f16(<4 x half>, <4 x half>, <4 x half>)
|
|
declare <4 x half> @llvm.fmuladd.v4f16(<4 x half>, <4 x half>, <4 x half>)
|
|
declare <7 x half> @llvm.fma.v7f16(<7 x half>, <7 x half>, <7 x half>)
|
|
declare <7 x half> @llvm.fmuladd.v7f16(<7 x half>, <7 x half>, <7 x half>)
|
|
declare <8 x float> @llvm.fma.v8f32(<8 x float>, <8 x float>, <8 x float>)
|
|
declare <8 x float> @llvm.fmuladd.v8f32(<8 x float>, <8 x float>, <8 x float>)
|
|
declare <8 x half> @llvm.fma.v8f16(<8 x half>, <8 x half>, <8 x half>)
|
|
declare <8 x half> @llvm.fmuladd.v8f16(<8 x half>, <8 x half>, <8 x half>)
|
|
declare double @llvm.fma.f64(double, double, double)
|
|
declare double @llvm.fmuladd.f64(double, double, double)
|
|
declare float @llvm.fma.f32(float, float, float)
|
|
declare float @llvm.fmuladd.f32(float, float, float)
|
|
declare half @llvm.fma.f16(half, half, half)
|
|
declare half @llvm.fmuladd.f16(half, half, half)
|