llvm-project/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-arith.ll
Sander de Smalen 61510b51c3 Revert "[AArch64] Enable subreg liveness tracking by default."
This reverts commit 9c319d5bb40785c969d2af76535ca62448dfafa7.

Some issues were discovered with the bootstrap builds, which
seem like they were caused by this commit. I'm reverting to investigate.
2024-12-12 17:22:15 +00:00

3915 lines
141 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s
; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
target triple = "aarch64-unknown-linux-gnu"
;
; FADD
;
define <2 x half> @fadd_v2f16(<2 x half> %op1, <2 x half> %op2) {
; CHECK-LABEL: fadd_v2f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl4
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
; CHECK-NEXT: fadd z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: fadd_v2f16:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: sub sp, sp, #32
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #22]
; NONEON-NOSVE-NEXT: ldr h1, [sp, #14]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fadd s0, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #12]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #30]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #20]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fadd s0, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #10]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #28]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #18]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fadd s0, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #8]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #26]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #16]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fadd s0, s1, s0
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #24]
; NONEON-NOSVE-NEXT: ldr d0, [sp, #24]
; NONEON-NOSVE-NEXT: add sp, sp, #32
; NONEON-NOSVE-NEXT: ret
%res = fadd <2 x half> %op1, %op2
ret <2 x half> %res
}
define <4 x half> @fadd_v4f16(<4 x half> %op1, <4 x half> %op2) {
; CHECK-LABEL: fadd_v4f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl4
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
; CHECK-NEXT: fadd z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: fadd_v4f16:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: sub sp, sp, #32
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #22]
; NONEON-NOSVE-NEXT: ldr h1, [sp, #14]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fadd s0, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #12]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #30]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #20]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fadd s0, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #10]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #28]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #18]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fadd s0, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #8]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #26]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #16]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fadd s0, s1, s0
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #24]
; NONEON-NOSVE-NEXT: ldr d0, [sp, #24]
; NONEON-NOSVE-NEXT: add sp, sp, #32
; NONEON-NOSVE-NEXT: ret
%res = fadd <4 x half> %op1, %op2
ret <4 x half> %res
}
define <8 x half> @fadd_v8f16(<8 x half> %op1, <8 x half> %op2) {
; CHECK-LABEL: fadd_v8f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl8
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
; CHECK-NEXT: fadd z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: fadd_v8f16:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]!
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48
; NONEON-NOSVE-NEXT: ldr h0, [sp, #30]
; NONEON-NOSVE-NEXT: ldr h1, [sp, #14]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fadd s0, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #12]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #46]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #28]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fadd s0, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #10]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #44]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #26]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fadd s0, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #8]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #42]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #24]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fadd s0, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #6]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #40]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #22]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fadd s0, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #4]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #38]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #20]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fadd s0, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #2]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #36]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #18]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fadd s0, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #34]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #16]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fadd s0, s1, s0
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #32]
; NONEON-NOSVE-NEXT: ldr q0, [sp, #32]
; NONEON-NOSVE-NEXT: add sp, sp, #48
; NONEON-NOSVE-NEXT: ret
%res = fadd <8 x half> %op1, %op2
ret <8 x half> %res
}
define void @fadd_v16f16(ptr %a, ptr %b) {
; CHECK-LABEL: fadd_v16f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ldp q0, q3, [x1]
; CHECK-NEXT: ptrue p0.h, vl8
; CHECK-NEXT: ldp q1, q2, [x0]
; CHECK-NEXT: fadd z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: movprfx z1, z2
; CHECK-NEXT: fadd z1.h, p0/m, z1.h, z3.h
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: fadd_v16f16:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: sub sp, sp, #96
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96
; NONEON-NOSVE-NEXT: ldp q3, q0, [x1]
; NONEON-NOSVE-NEXT: ldp q2, q1, [x0]
; NONEON-NOSVE-NEXT: stp q2, q3, [sp]
; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #62]
; NONEON-NOSVE-NEXT: ldr h1, [sp, #46]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fadd s0, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #44]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #94]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #60]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fadd s0, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #42]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #92]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #58]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fadd s0, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #40]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #90]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #56]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fadd s0, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #38]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #88]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #54]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fadd s0, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #36]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #86]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #52]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fadd s0, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #34]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #84]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #50]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fadd s0, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #32]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #82]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #48]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fadd s0, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #14]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #80]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #30]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fadd s0, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #12]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #78]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #28]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fadd s0, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #10]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #76]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #26]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fadd s0, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #8]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #74]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #24]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fadd s0, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #6]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #72]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #22]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fadd s0, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #4]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #70]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #20]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fadd s0, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #2]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #68]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #18]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fadd s0, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #66]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #16]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fadd s0, s1, s0
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #64]
; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64]
; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
; NONEON-NOSVE-NEXT: add sp, sp, #96
; NONEON-NOSVE-NEXT: ret
%op1 = load <16 x half>, ptr %a
%op2 = load <16 x half>, ptr %b
%res = fadd <16 x half> %op1, %op2
store <16 x half> %res, ptr %a
ret void
}
define <2 x float> @fadd_v2f32(<2 x float> %op1, <2 x float> %op2) {
; CHECK-LABEL: fadd_v2f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl2
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
; CHECK-NEXT: fadd z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: fadd_v2f32:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: sub sp, sp, #32
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8]
; NONEON-NOSVE-NEXT: ldp s1, s2, [sp, #8]
; NONEON-NOSVE-NEXT: ldr s0, [sp, #20]
; NONEON-NOSVE-NEXT: fadd s3, s2, s0
; NONEON-NOSVE-NEXT: ldr s0, [sp, #16]
; NONEON-NOSVE-NEXT: fadd s0, s1, s0
; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #24]
; NONEON-NOSVE-NEXT: ldr d0, [sp, #24]
; NONEON-NOSVE-NEXT: add sp, sp, #32
; NONEON-NOSVE-NEXT: ret
%res = fadd <2 x float> %op1, %op2
ret <2 x float> %res
}
define <4 x float> @fadd_v4f32(<4 x float> %op1, <4 x float> %op2) {
; CHECK-LABEL: fadd_v4f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl4
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
; CHECK-NEXT: fadd z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: fadd_v4f32:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]!
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48
; NONEON-NOSVE-NEXT: ldp s1, s2, [sp, #8]
; NONEON-NOSVE-NEXT: ldr s0, [sp, #28]
; NONEON-NOSVE-NEXT: fadd s3, s2, s0
; NONEON-NOSVE-NEXT: ldr s0, [sp, #24]
; NONEON-NOSVE-NEXT: fadd s0, s1, s0
; NONEON-NOSVE-NEXT: ldp s1, s2, [sp]
; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #40]
; NONEON-NOSVE-NEXT: ldr s0, [sp, #20]
; NONEON-NOSVE-NEXT: fadd s3, s2, s0
; NONEON-NOSVE-NEXT: ldr s0, [sp, #16]
; NONEON-NOSVE-NEXT: fadd s0, s1, s0
; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #32]
; NONEON-NOSVE-NEXT: ldr q0, [sp, #32]
; NONEON-NOSVE-NEXT: add sp, sp, #48
; NONEON-NOSVE-NEXT: ret
%res = fadd <4 x float> %op1, %op2
ret <4 x float> %res
}
define void @fadd_v8f32(ptr %a, ptr %b) {
; CHECK-LABEL: fadd_v8f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ldp q0, q3, [x1]
; CHECK-NEXT: ptrue p0.s, vl4
; CHECK-NEXT: ldp q1, q2, [x0]
; CHECK-NEXT: fadd z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: movprfx z1, z2
; CHECK-NEXT: fadd z1.s, p0/m, z1.s, z3.s
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: fadd_v8f32:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: sub sp, sp, #96
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96
; NONEON-NOSVE-NEXT: ldp q3, q0, [x1]
; NONEON-NOSVE-NEXT: ldp q2, q1, [x0]
; NONEON-NOSVE-NEXT: stp q2, q3, [sp]
; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32]
; NONEON-NOSVE-NEXT: ldp s1, s2, [sp, #40]
; NONEON-NOSVE-NEXT: ldr s0, [sp, #60]
; NONEON-NOSVE-NEXT: fadd s3, s2, s0
; NONEON-NOSVE-NEXT: ldr s0, [sp, #56]
; NONEON-NOSVE-NEXT: fadd s0, s1, s0
; NONEON-NOSVE-NEXT: ldp s1, s2, [sp, #32]
; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #88]
; NONEON-NOSVE-NEXT: ldr s0, [sp, #52]
; NONEON-NOSVE-NEXT: fadd s3, s2, s0
; NONEON-NOSVE-NEXT: ldr s0, [sp, #48]
; NONEON-NOSVE-NEXT: fadd s0, s1, s0
; NONEON-NOSVE-NEXT: ldp s1, s2, [sp, #8]
; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #80]
; NONEON-NOSVE-NEXT: ldr s0, [sp, #28]
; NONEON-NOSVE-NEXT: fadd s3, s2, s0
; NONEON-NOSVE-NEXT: ldr s0, [sp, #24]
; NONEON-NOSVE-NEXT: fadd s0, s1, s0
; NONEON-NOSVE-NEXT: ldp s1, s2, [sp]
; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #72]
; NONEON-NOSVE-NEXT: ldr s0, [sp, #20]
; NONEON-NOSVE-NEXT: fadd s3, s2, s0
; NONEON-NOSVE-NEXT: ldr s0, [sp, #16]
; NONEON-NOSVE-NEXT: fadd s0, s1, s0
; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #64]
; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64]
; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
; NONEON-NOSVE-NEXT: add sp, sp, #96
; NONEON-NOSVE-NEXT: ret
%op1 = load <8 x float>, ptr %a
%op2 = load <8 x float>, ptr %b
%res = fadd <8 x float> %op1, %op2
store <8 x float> %res, ptr %a
ret void
}
define <2 x double> @fadd_v2f64(<2 x double> %op1, <2 x double> %op2) {
; CHECK-LABEL: fadd_v2f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl2
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
; CHECK-NEXT: fadd z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: fadd_v2f64:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]!
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48
; NONEON-NOSVE-NEXT: ldp d1, d2, [sp]
; NONEON-NOSVE-NEXT: ldr d0, [sp, #24]
; NONEON-NOSVE-NEXT: fadd d3, d2, d0
; NONEON-NOSVE-NEXT: ldr d0, [sp, #16]
; NONEON-NOSVE-NEXT: fadd d0, d1, d0
; NONEON-NOSVE-NEXT: stp d0, d3, [sp, #32]
; NONEON-NOSVE-NEXT: ldr q0, [sp, #32]
; NONEON-NOSVE-NEXT: add sp, sp, #48
; NONEON-NOSVE-NEXT: ret
%res = fadd <2 x double> %op1, %op2
ret <2 x double> %res
}
define void @fadd_v4f64(ptr %a, ptr %b) {
; CHECK-LABEL: fadd_v4f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ldp q0, q3, [x1]
; CHECK-NEXT: ptrue p0.d, vl2
; CHECK-NEXT: ldp q1, q2, [x0]
; CHECK-NEXT: fadd z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: movprfx z1, z2
; CHECK-NEXT: fadd z1.d, p0/m, z1.d, z3.d
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: fadd_v4f64:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: sub sp, sp, #96
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96
; NONEON-NOSVE-NEXT: ldp q3, q0, [x1]
; NONEON-NOSVE-NEXT: ldp q2, q1, [x0]
; NONEON-NOSVE-NEXT: stp q2, q3, [sp]
; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32]
; NONEON-NOSVE-NEXT: ldp d1, d2, [sp, #32]
; NONEON-NOSVE-NEXT: ldr d0, [sp, #56]
; NONEON-NOSVE-NEXT: fadd d3, d2, d0
; NONEON-NOSVE-NEXT: ldr d0, [sp, #48]
; NONEON-NOSVE-NEXT: fadd d0, d1, d0
; NONEON-NOSVE-NEXT: ldp d1, d2, [sp]
; NONEON-NOSVE-NEXT: stp d0, d3, [sp, #80]
; NONEON-NOSVE-NEXT: ldr d0, [sp, #24]
; NONEON-NOSVE-NEXT: fadd d3, d2, d0
; NONEON-NOSVE-NEXT: ldr d0, [sp, #16]
; NONEON-NOSVE-NEXT: fadd d0, d1, d0
; NONEON-NOSVE-NEXT: stp d0, d3, [sp, #64]
; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64]
; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
; NONEON-NOSVE-NEXT: add sp, sp, #96
; NONEON-NOSVE-NEXT: ret
%op1 = load <4 x double>, ptr %a
%op2 = load <4 x double>, ptr %b
%res = fadd <4 x double> %op1, %op2
store <4 x double> %res, ptr %a
ret void
}
;
; FDIV
;
define <2 x half> @fdiv_v2f16(<2 x half> %op1, <2 x half> %op2) {
; CHECK-LABEL: fdiv_v2f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl4
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
; CHECK-NEXT: fdiv z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: fdiv_v2f16:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: sub sp, sp, #32
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #22]
; NONEON-NOSVE-NEXT: ldr h1, [sp, #14]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fdiv s0, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #12]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #30]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #20]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fdiv s0, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #10]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #28]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #18]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fdiv s0, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #8]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #26]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #16]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fdiv s0, s1, s0
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #24]
; NONEON-NOSVE-NEXT: ldr d0, [sp, #24]
; NONEON-NOSVE-NEXT: add sp, sp, #32
; NONEON-NOSVE-NEXT: ret
%res = fdiv <2 x half> %op1, %op2
ret <2 x half> %res
}
define <4 x half> @fdiv_v4f16(<4 x half> %op1, <4 x half> %op2) {
; CHECK-LABEL: fdiv_v4f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl4
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
; CHECK-NEXT: fdiv z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: fdiv_v4f16:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: sub sp, sp, #32
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #22]
; NONEON-NOSVE-NEXT: ldr h1, [sp, #14]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fdiv s0, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #12]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #30]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #20]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fdiv s0, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #10]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #28]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #18]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fdiv s0, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #8]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #26]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #16]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fdiv s0, s1, s0
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #24]
; NONEON-NOSVE-NEXT: ldr d0, [sp, #24]
; NONEON-NOSVE-NEXT: add sp, sp, #32
; NONEON-NOSVE-NEXT: ret
%res = fdiv <4 x half> %op1, %op2
ret <4 x half> %res
}
define <8 x half> @fdiv_v8f16(<8 x half> %op1, <8 x half> %op2) {
; CHECK-LABEL: fdiv_v8f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl8
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
; CHECK-NEXT: fdiv z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: fdiv_v8f16:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]!
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48
; NONEON-NOSVE-NEXT: ldr h0, [sp, #30]
; NONEON-NOSVE-NEXT: ldr h1, [sp, #14]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fdiv s0, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #12]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #46]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #28]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fdiv s0, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #10]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #44]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #26]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fdiv s0, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #8]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #42]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #24]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fdiv s0, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #6]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #40]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #22]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fdiv s0, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #4]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #38]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #20]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fdiv s0, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #2]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #36]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #18]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fdiv s0, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #34]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #16]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fdiv s0, s1, s0
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #32]
; NONEON-NOSVE-NEXT: ldr q0, [sp, #32]
; NONEON-NOSVE-NEXT: add sp, sp, #48
; NONEON-NOSVE-NEXT: ret
%res = fdiv <8 x half> %op1, %op2
ret <8 x half> %res
}
define void @fdiv_v16f16(ptr %a, ptr %b) {
; CHECK-LABEL: fdiv_v16f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ldp q0, q3, [x1]
; CHECK-NEXT: ptrue p0.h, vl8
; CHECK-NEXT: ldp q1, q2, [x0]
; CHECK-NEXT: fdivr z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: movprfx z1, z2
; CHECK-NEXT: fdiv z1.h, p0/m, z1.h, z3.h
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: fdiv_v16f16:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: sub sp, sp, #96
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96
; NONEON-NOSVE-NEXT: ldp q3, q0, [x1]
; NONEON-NOSVE-NEXT: ldp q2, q1, [x0]
; NONEON-NOSVE-NEXT: stp q2, q3, [sp]
; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #62]
; NONEON-NOSVE-NEXT: ldr h1, [sp, #46]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fdiv s0, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #44]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #94]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #60]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fdiv s0, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #42]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #92]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #58]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fdiv s0, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #40]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #90]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #56]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fdiv s0, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #38]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #88]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #54]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fdiv s0, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #36]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #86]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #52]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fdiv s0, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #34]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #84]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #50]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fdiv s0, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #32]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #82]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #48]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fdiv s0, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #14]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #80]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #30]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fdiv s0, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #12]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #78]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #28]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fdiv s0, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #10]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #76]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #26]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fdiv s0, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #8]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #74]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #24]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fdiv s0, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #6]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #72]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #22]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fdiv s0, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #4]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #70]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #20]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fdiv s0, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #2]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #68]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #18]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fdiv s0, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #66]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #16]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fdiv s0, s1, s0
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #64]
; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64]
; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
; NONEON-NOSVE-NEXT: add sp, sp, #96
; NONEON-NOSVE-NEXT: ret
%op1 = load <16 x half>, ptr %a
%op2 = load <16 x half>, ptr %b
%res = fdiv <16 x half> %op1, %op2
store <16 x half> %res, ptr %a
ret void
}
define <2 x float> @fdiv_v2f32(<2 x float> %op1, <2 x float> %op2) {
; CHECK-LABEL: fdiv_v2f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl2
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
; CHECK-NEXT: fdiv z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: fdiv_v2f32:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: sub sp, sp, #32
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8]
; NONEON-NOSVE-NEXT: ldp s1, s2, [sp, #8]
; NONEON-NOSVE-NEXT: ldr s0, [sp, #20]
; NONEON-NOSVE-NEXT: fdiv s3, s2, s0
; NONEON-NOSVE-NEXT: ldr s0, [sp, #16]
; NONEON-NOSVE-NEXT: fdiv s0, s1, s0
; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #24]
; NONEON-NOSVE-NEXT: ldr d0, [sp, #24]
; NONEON-NOSVE-NEXT: add sp, sp, #32
; NONEON-NOSVE-NEXT: ret
%res = fdiv <2 x float> %op1, %op2
ret <2 x float> %res
}
define <4 x float> @fdiv_v4f32(<4 x float> %op1, <4 x float> %op2) {
; CHECK-LABEL: fdiv_v4f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl4
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
; CHECK-NEXT: fdiv z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: fdiv_v4f32:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]!
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48
; NONEON-NOSVE-NEXT: ldp s1, s2, [sp, #8]
; NONEON-NOSVE-NEXT: ldr s0, [sp, #28]
; NONEON-NOSVE-NEXT: fdiv s3, s2, s0
; NONEON-NOSVE-NEXT: ldr s0, [sp, #24]
; NONEON-NOSVE-NEXT: fdiv s0, s1, s0
; NONEON-NOSVE-NEXT: ldp s1, s2, [sp]
; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #40]
; NONEON-NOSVE-NEXT: ldr s0, [sp, #20]
; NONEON-NOSVE-NEXT: fdiv s3, s2, s0
; NONEON-NOSVE-NEXT: ldr s0, [sp, #16]
; NONEON-NOSVE-NEXT: fdiv s0, s1, s0
; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #32]
; NONEON-NOSVE-NEXT: ldr q0, [sp, #32]
; NONEON-NOSVE-NEXT: add sp, sp, #48
; NONEON-NOSVE-NEXT: ret
%res = fdiv <4 x float> %op1, %op2
ret <4 x float> %res
}
define void @fdiv_v8f32(ptr %a, ptr %b) {
; CHECK-LABEL: fdiv_v8f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ldp q0, q3, [x1]
; CHECK-NEXT: ptrue p0.s, vl4
; CHECK-NEXT: ldp q1, q2, [x0]
; CHECK-NEXT: fdivr z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: movprfx z1, z2
; CHECK-NEXT: fdiv z1.s, p0/m, z1.s, z3.s
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: fdiv_v8f32:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: sub sp, sp, #96
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96
; NONEON-NOSVE-NEXT: ldp q3, q0, [x1]
; NONEON-NOSVE-NEXT: ldp q2, q1, [x0]
; NONEON-NOSVE-NEXT: stp q2, q3, [sp]
; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32]
; NONEON-NOSVE-NEXT: ldp s1, s2, [sp, #40]
; NONEON-NOSVE-NEXT: ldr s0, [sp, #60]
; NONEON-NOSVE-NEXT: fdiv s3, s2, s0
; NONEON-NOSVE-NEXT: ldr s0, [sp, #56]
; NONEON-NOSVE-NEXT: fdiv s0, s1, s0
; NONEON-NOSVE-NEXT: ldp s1, s2, [sp, #32]
; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #88]
; NONEON-NOSVE-NEXT: ldr s0, [sp, #52]
; NONEON-NOSVE-NEXT: fdiv s3, s2, s0
; NONEON-NOSVE-NEXT: ldr s0, [sp, #48]
; NONEON-NOSVE-NEXT: fdiv s0, s1, s0
; NONEON-NOSVE-NEXT: ldp s1, s2, [sp, #8]
; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #80]
; NONEON-NOSVE-NEXT: ldr s0, [sp, #28]
; NONEON-NOSVE-NEXT: fdiv s3, s2, s0
; NONEON-NOSVE-NEXT: ldr s0, [sp, #24]
; NONEON-NOSVE-NEXT: fdiv s0, s1, s0
; NONEON-NOSVE-NEXT: ldp s1, s2, [sp]
; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #72]
; NONEON-NOSVE-NEXT: ldr s0, [sp, #20]
; NONEON-NOSVE-NEXT: fdiv s3, s2, s0
; NONEON-NOSVE-NEXT: ldr s0, [sp, #16]
; NONEON-NOSVE-NEXT: fdiv s0, s1, s0
; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #64]
; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64]
; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
; NONEON-NOSVE-NEXT: add sp, sp, #96
; NONEON-NOSVE-NEXT: ret
%op1 = load <8 x float>, ptr %a
%op2 = load <8 x float>, ptr %b
%res = fdiv <8 x float> %op1, %op2
store <8 x float> %res, ptr %a
ret void
}
define <2 x double> @fdiv_v2f64(<2 x double> %op1, <2 x double> %op2) {
; CHECK-LABEL: fdiv_v2f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl2
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
; CHECK-NEXT: fdiv z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: fdiv_v2f64:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]!
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48
; NONEON-NOSVE-NEXT: ldp d1, d2, [sp]
; NONEON-NOSVE-NEXT: ldr d0, [sp, #24]
; NONEON-NOSVE-NEXT: fdiv d3, d2, d0
; NONEON-NOSVE-NEXT: ldr d0, [sp, #16]
; NONEON-NOSVE-NEXT: fdiv d0, d1, d0
; NONEON-NOSVE-NEXT: stp d0, d3, [sp, #32]
; NONEON-NOSVE-NEXT: ldr q0, [sp, #32]
; NONEON-NOSVE-NEXT: add sp, sp, #48
; NONEON-NOSVE-NEXT: ret
%res = fdiv <2 x double> %op1, %op2
ret <2 x double> %res
}
define void @fdiv_v4f64(ptr %a, ptr %b) {
; CHECK-LABEL: fdiv_v4f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ldp q0, q3, [x1]
; CHECK-NEXT: ptrue p0.d, vl2
; CHECK-NEXT: ldp q1, q2, [x0]
; CHECK-NEXT: fdivr z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: movprfx z1, z2
; CHECK-NEXT: fdiv z1.d, p0/m, z1.d, z3.d
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: fdiv_v4f64:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: sub sp, sp, #96
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96
; NONEON-NOSVE-NEXT: ldp q3, q0, [x1]
; NONEON-NOSVE-NEXT: ldp q2, q1, [x0]
; NONEON-NOSVE-NEXT: stp q2, q3, [sp]
; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32]
; NONEON-NOSVE-NEXT: ldp d1, d2, [sp, #32]
; NONEON-NOSVE-NEXT: ldr d0, [sp, #56]
; NONEON-NOSVE-NEXT: fdiv d3, d2, d0
; NONEON-NOSVE-NEXT: ldr d0, [sp, #48]
; NONEON-NOSVE-NEXT: fdiv d0, d1, d0
; NONEON-NOSVE-NEXT: ldp d1, d2, [sp]
; NONEON-NOSVE-NEXT: stp d0, d3, [sp, #80]
; NONEON-NOSVE-NEXT: ldr d0, [sp, #24]
; NONEON-NOSVE-NEXT: fdiv d3, d2, d0
; NONEON-NOSVE-NEXT: ldr d0, [sp, #16]
; NONEON-NOSVE-NEXT: fdiv d0, d1, d0
; NONEON-NOSVE-NEXT: stp d0, d3, [sp, #64]
; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64]
; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
; NONEON-NOSVE-NEXT: add sp, sp, #96
; NONEON-NOSVE-NEXT: ret
%op1 = load <4 x double>, ptr %a
%op2 = load <4 x double>, ptr %b
%res = fdiv <4 x double> %op1, %op2
store <4 x double> %res, ptr %a
ret void
}
;
; FMA
;
define <2 x half> @fma_v2f16(<2 x half> %op1, <2 x half> %op2, <2 x half> %op3) {
; CHECK-LABEL: fma_v2f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl4
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: // kill: def $d2 killed $d2 def $z2
; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
; CHECK-NEXT: fmad z0.h, p0/m, z1.h, z2.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: fma_v2f16:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: sub sp, sp, #32
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
; NONEON-NOSVE-NEXT: stp d1, d2, [sp, #8]
; NONEON-NOSVE-NEXT: str d0, [sp]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #22]
; NONEON-NOSVE-NEXT: ldr h1, [sp, #14]
; NONEON-NOSVE-NEXT: ldr h2, [sp, #6]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt s2, h2
; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #12]
; NONEON-NOSVE-NEXT: ldr h2, [sp, #4]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt s2, h2
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #30]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #20]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #10]
; NONEON-NOSVE-NEXT: ldr h2, [sp, #2]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt s2, h2
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #28]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #18]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #8]
; NONEON-NOSVE-NEXT: ldr h2, [sp]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt s2, h2
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #26]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #16]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #24]
; NONEON-NOSVE-NEXT: ldr d0, [sp, #24]
; NONEON-NOSVE-NEXT: add sp, sp, #32
; NONEON-NOSVE-NEXT: ret
%res = call <2 x half> @llvm.fma.v2f16(<2 x half> %op1, <2 x half> %op2, <2 x half> %op3)
ret <2 x half> %res
}
define <4 x half> @fma_v4f16(<4 x half> %op1, <4 x half> %op2, <4 x half> %op3) {
; CHECK-LABEL: fma_v4f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl4
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: // kill: def $d2 killed $d2 def $z2
; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
; CHECK-NEXT: fmad z0.h, p0/m, z1.h, z2.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: fma_v4f16:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: sub sp, sp, #32
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
; NONEON-NOSVE-NEXT: stp d1, d2, [sp, #8]
; NONEON-NOSVE-NEXT: str d0, [sp]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #22]
; NONEON-NOSVE-NEXT: ldr h1, [sp, #14]
; NONEON-NOSVE-NEXT: ldr h2, [sp, #6]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt s2, h2
; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #12]
; NONEON-NOSVE-NEXT: ldr h2, [sp, #4]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt s2, h2
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #30]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #20]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #10]
; NONEON-NOSVE-NEXT: ldr h2, [sp, #2]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt s2, h2
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #28]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #18]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #8]
; NONEON-NOSVE-NEXT: ldr h2, [sp]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt s2, h2
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #26]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #16]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #24]
; NONEON-NOSVE-NEXT: ldr d0, [sp, #24]
; NONEON-NOSVE-NEXT: add sp, sp, #32
; NONEON-NOSVE-NEXT: ret
%res = call <4 x half> @llvm.fma.v4f16(<4 x half> %op1, <4 x half> %op2, <4 x half> %op3)
ret <4 x half> %res
}
define <8 x half> @fma_v8f16(<8 x half> %op1, <8 x half> %op2, <8 x half> %op3) {
; CHECK-LABEL: fma_v8f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl8
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: // kill: def $q2 killed $q2 def $z2
; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
; CHECK-NEXT: fmad z0.h, p0/m, z1.h, z2.h
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: fma_v8f16:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: sub sp, sp, #64
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64
; NONEON-NOSVE-NEXT: stp q1, q2, [sp, #16]
; NONEON-NOSVE-NEXT: str q0, [sp]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #46]
; NONEON-NOSVE-NEXT: ldr h1, [sp, #30]
; NONEON-NOSVE-NEXT: ldr h2, [sp, #14]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt s2, h2
; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #28]
; NONEON-NOSVE-NEXT: ldr h2, [sp, #12]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt s2, h2
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #62]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #44]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #26]
; NONEON-NOSVE-NEXT: ldr h2, [sp, #10]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt s2, h2
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #60]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #42]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #24]
; NONEON-NOSVE-NEXT: ldr h2, [sp, #8]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt s2, h2
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #58]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #40]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #22]
; NONEON-NOSVE-NEXT: ldr h2, [sp, #6]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt s2, h2
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #56]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #38]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #20]
; NONEON-NOSVE-NEXT: ldr h2, [sp, #4]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt s2, h2
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #54]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #36]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #18]
; NONEON-NOSVE-NEXT: ldr h2, [sp, #2]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt s2, h2
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #52]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #34]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #16]
; NONEON-NOSVE-NEXT: ldr h2, [sp]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt s2, h2
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #50]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #32]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #48]
; NONEON-NOSVE-NEXT: ldr q0, [sp, #48]
; NONEON-NOSVE-NEXT: add sp, sp, #64
; NONEON-NOSVE-NEXT: ret
%res = call <8 x half> @llvm.fma.v8f16(<8 x half> %op1, <8 x half> %op2, <8 x half> %op3)
ret <8 x half> %res
}
define void @fma_v16f16(ptr %a, ptr %b, ptr %c) {
; CHECK-LABEL: fma_v16f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ldp q0, q4, [x1]
; CHECK-NEXT: ptrue p0.h, vl8
; CHECK-NEXT: ldp q1, q5, [x2]
; CHECK-NEXT: ldp q2, q3, [x0]
; CHECK-NEXT: fmad z0.h, p0/m, z2.h, z1.h
; CHECK-NEXT: movprfx z1, z5
; CHECK-NEXT: fmla z1.h, p0/m, z3.h, z4.h
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: fma_v16f16:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: sub sp, sp, #128
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 128
; NONEON-NOSVE-NEXT: ldp q1, q0, [x2]
; NONEON-NOSVE-NEXT: ldp q2, q3, [x1]
; NONEON-NOSVE-NEXT: ldp q4, q5, [x0]
; NONEON-NOSVE-NEXT: stp q3, q0, [sp, #64]
; NONEON-NOSVE-NEXT: stp q4, q2, [sp]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #94]
; NONEON-NOSVE-NEXT: stp q1, q5, [sp, #32]
; NONEON-NOSVE-NEXT: ldr h1, [sp, #78]
; NONEON-NOSVE-NEXT: ldr h2, [sp, #62]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt s2, h2
; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #76]
; NONEON-NOSVE-NEXT: ldr h2, [sp, #60]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt s2, h2
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #126]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #92]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #74]
; NONEON-NOSVE-NEXT: ldr h2, [sp, #58]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt s2, h2
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #124]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #90]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #72]
; NONEON-NOSVE-NEXT: ldr h2, [sp, #56]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt s2, h2
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #122]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #88]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #70]
; NONEON-NOSVE-NEXT: ldr h2, [sp, #54]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt s2, h2
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #120]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #86]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #68]
; NONEON-NOSVE-NEXT: ldr h2, [sp, #52]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt s2, h2
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #118]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #84]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #66]
; NONEON-NOSVE-NEXT: ldr h2, [sp, #50]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt s2, h2
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #116]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #82]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #64]
; NONEON-NOSVE-NEXT: ldr h2, [sp, #48]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt s2, h2
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #114]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #80]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #30]
; NONEON-NOSVE-NEXT: ldr h2, [sp, #14]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt s2, h2
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #112]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #46]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #28]
; NONEON-NOSVE-NEXT: ldr h2, [sp, #12]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt s2, h2
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #110]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #44]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #26]
; NONEON-NOSVE-NEXT: ldr h2, [sp, #10]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt s2, h2
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #108]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #42]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #24]
; NONEON-NOSVE-NEXT: ldr h2, [sp, #8]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt s2, h2
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #106]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #40]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #22]
; NONEON-NOSVE-NEXT: ldr h2, [sp, #6]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt s2, h2
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #104]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #38]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #20]
; NONEON-NOSVE-NEXT: ldr h2, [sp, #4]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt s2, h2
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #102]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #36]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #18]
; NONEON-NOSVE-NEXT: ldr h2, [sp, #2]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt s2, h2
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #100]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #34]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #16]
; NONEON-NOSVE-NEXT: ldr h2, [sp]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt s2, h2
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #98]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #32]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #96]
; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #96]
; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
; NONEON-NOSVE-NEXT: add sp, sp, #128
; NONEON-NOSVE-NEXT: ret
%op1 = load <16 x half>, ptr %a
%op2 = load <16 x half>, ptr %b
%op3 = load <16 x half>, ptr %c
%res = call <16 x half> @llvm.fma.v16f16(<16 x half> %op1, <16 x half> %op2, <16 x half> %op3)
store <16 x half> %res, ptr %a
ret void
}
define <2 x float> @fma_v2f32(<2 x float> %op1, <2 x float> %op2, <2 x float> %op3) {
; CHECK-LABEL: fma_v2f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl2
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: // kill: def $d2 killed $d2 def $z2
; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
; CHECK-NEXT: fmad z0.s, p0/m, z1.s, z2.s
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: fma_v2f32:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: sub sp, sp, #32
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
; NONEON-NOSVE-NEXT: stp d1, d2, [sp, #8]
; NONEON-NOSVE-NEXT: str d0, [sp]
; NONEON-NOSVE-NEXT: ldp s1, s3, [sp, #8]
; NONEON-NOSVE-NEXT: ldp s2, s4, [sp]
; NONEON-NOSVE-NEXT: ldr s0, [sp, #20]
; NONEON-NOSVE-NEXT: fmadd s5, s4, s3, s0
; NONEON-NOSVE-NEXT: ldr s0, [sp, #16]
; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0
; NONEON-NOSVE-NEXT: stp s0, s5, [sp, #24]
; NONEON-NOSVE-NEXT: ldr d0, [sp, #24]
; NONEON-NOSVE-NEXT: add sp, sp, #32
; NONEON-NOSVE-NEXT: ret
%res = call <2 x float> @llvm.fma.v2f32(<2 x float> %op1, <2 x float> %op2, <2 x float> %op3)
ret <2 x float> %res
}
define <4 x float> @fma_v4f32(<4 x float> %op1, <4 x float> %op2, <4 x float> %op3) {
; CHECK-LABEL: fma_v4f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl4
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: // kill: def $q2 killed $q2 def $z2
; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
; CHECK-NEXT: fmad z0.s, p0/m, z1.s, z2.s
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: fma_v4f32:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: sub sp, sp, #64
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64
; NONEON-NOSVE-NEXT: stp q1, q2, [sp, #16]
; NONEON-NOSVE-NEXT: str q0, [sp]
; NONEON-NOSVE-NEXT: ldp s1, s3, [sp, #24]
; NONEON-NOSVE-NEXT: ldp s2, s4, [sp, #8]
; NONEON-NOSVE-NEXT: ldr s0, [sp, #44]
; NONEON-NOSVE-NEXT: fmadd s5, s4, s3, s0
; NONEON-NOSVE-NEXT: ldr s0, [sp, #40]
; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0
; NONEON-NOSVE-NEXT: ldp s1, s3, [sp, #16]
; NONEON-NOSVE-NEXT: ldp s2, s4, [sp]
; NONEON-NOSVE-NEXT: stp s0, s5, [sp, #56]
; NONEON-NOSVE-NEXT: ldr s0, [sp, #36]
; NONEON-NOSVE-NEXT: fmadd s5, s4, s3, s0
; NONEON-NOSVE-NEXT: ldr s0, [sp, #32]
; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0
; NONEON-NOSVE-NEXT: stp s0, s5, [sp, #48]
; NONEON-NOSVE-NEXT: ldr q0, [sp, #48]
; NONEON-NOSVE-NEXT: add sp, sp, #64
; NONEON-NOSVE-NEXT: ret
%res = call <4 x float> @llvm.fma.v4f32(<4 x float> %op1, <4 x float> %op2, <4 x float> %op3)
ret <4 x float> %res
}
define void @fma_v8f32(ptr %a, ptr %b, ptr %c) {
; CHECK-LABEL: fma_v8f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ldp q0, q4, [x1]
; CHECK-NEXT: ptrue p0.s, vl4
; CHECK-NEXT: ldp q1, q5, [x2]
; CHECK-NEXT: ldp q2, q3, [x0]
; CHECK-NEXT: fmad z0.s, p0/m, z2.s, z1.s
; CHECK-NEXT: movprfx z1, z5
; CHECK-NEXT: fmla z1.s, p0/m, z3.s, z4.s
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: fma_v8f32:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: sub sp, sp, #128
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 128
; NONEON-NOSVE-NEXT: ldp q1, q0, [x2]
; NONEON-NOSVE-NEXT: ldp q2, q3, [x1]
; NONEON-NOSVE-NEXT: ldp q4, q5, [x0]
; NONEON-NOSVE-NEXT: stp q3, q0, [sp, #64]
; NONEON-NOSVE-NEXT: stp q4, q2, [sp]
; NONEON-NOSVE-NEXT: ldr s0, [sp, #92]
; NONEON-NOSVE-NEXT: stp q1, q5, [sp, #32]
; NONEON-NOSVE-NEXT: ldp s1, s3, [sp, #72]
; NONEON-NOSVE-NEXT: ldp s2, s4, [sp, #56]
; NONEON-NOSVE-NEXT: fmadd s5, s4, s3, s0
; NONEON-NOSVE-NEXT: ldr s0, [sp, #88]
; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0
; NONEON-NOSVE-NEXT: ldp s1, s3, [sp, #64]
; NONEON-NOSVE-NEXT: ldp s2, s4, [sp, #48]
; NONEON-NOSVE-NEXT: stp s0, s5, [sp, #120]
; NONEON-NOSVE-NEXT: ldr s0, [sp, #84]
; NONEON-NOSVE-NEXT: fmadd s5, s4, s3, s0
; NONEON-NOSVE-NEXT: ldr s0, [sp, #80]
; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0
; NONEON-NOSVE-NEXT: ldp s1, s3, [sp, #24]
; NONEON-NOSVE-NEXT: ldp s2, s4, [sp, #8]
; NONEON-NOSVE-NEXT: stp s0, s5, [sp, #112]
; NONEON-NOSVE-NEXT: ldr s0, [sp, #44]
; NONEON-NOSVE-NEXT: fmadd s5, s4, s3, s0
; NONEON-NOSVE-NEXT: ldr s0, [sp, #40]
; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0
; NONEON-NOSVE-NEXT: ldp s1, s3, [sp, #16]
; NONEON-NOSVE-NEXT: ldp s2, s4, [sp]
; NONEON-NOSVE-NEXT: stp s0, s5, [sp, #104]
; NONEON-NOSVE-NEXT: ldr s0, [sp, #36]
; NONEON-NOSVE-NEXT: fmadd s5, s4, s3, s0
; NONEON-NOSVE-NEXT: ldr s0, [sp, #32]
; NONEON-NOSVE-NEXT: fmadd s0, s2, s1, s0
; NONEON-NOSVE-NEXT: stp s0, s5, [sp, #96]
; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #96]
; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
; NONEON-NOSVE-NEXT: add sp, sp, #128
; NONEON-NOSVE-NEXT: ret
%op1 = load <8 x float>, ptr %a
%op2 = load <8 x float>, ptr %b
%op3 = load <8 x float>, ptr %c
%res = call <8 x float> @llvm.fma.v8f32(<8 x float> %op1, <8 x float> %op2, <8 x float> %op3)
store <8 x float> %res, ptr %a
ret void
}
define <2 x double> @fma_v2f64(<2 x double> %op1, <2 x double> %op2, <2 x double> %op3) {
; CHECK-LABEL: fma_v2f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl2
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: // kill: def $q2 killed $q2 def $z2
; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
; CHECK-NEXT: fmad z0.d, p0/m, z1.d, z2.d
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: fma_v2f64:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: sub sp, sp, #64
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64
; NONEON-NOSVE-NEXT: stp q1, q2, [sp, #16]
; NONEON-NOSVE-NEXT: str q0, [sp]
; NONEON-NOSVE-NEXT: ldp d1, d3, [sp, #16]
; NONEON-NOSVE-NEXT: ldp d2, d4, [sp]
; NONEON-NOSVE-NEXT: ldr d0, [sp, #40]
; NONEON-NOSVE-NEXT: fmadd d5, d4, d3, d0
; NONEON-NOSVE-NEXT: ldr d0, [sp, #32]
; NONEON-NOSVE-NEXT: fmadd d0, d2, d1, d0
; NONEON-NOSVE-NEXT: stp d0, d5, [sp, #48]
; NONEON-NOSVE-NEXT: ldr q0, [sp, #48]
; NONEON-NOSVE-NEXT: add sp, sp, #64
; NONEON-NOSVE-NEXT: ret
%res = call <2 x double> @llvm.fma.v2f64(<2 x double> %op1, <2 x double> %op2, <2 x double> %op3)
ret <2 x double> %res
}
define void @fma_v4f64(ptr %a, ptr %b, ptr %c) {
; CHECK-LABEL: fma_v4f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ldp q0, q4, [x1]
; CHECK-NEXT: ptrue p0.d, vl2
; CHECK-NEXT: ldp q1, q5, [x2]
; CHECK-NEXT: ldp q2, q3, [x0]
; CHECK-NEXT: fmad z0.d, p0/m, z2.d, z1.d
; CHECK-NEXT: movprfx z1, z5
; CHECK-NEXT: fmla z1.d, p0/m, z3.d, z4.d
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: fma_v4f64:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: sub sp, sp, #128
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 128
; NONEON-NOSVE-NEXT: ldp q1, q0, [x2]
; NONEON-NOSVE-NEXT: ldp q2, q3, [x1]
; NONEON-NOSVE-NEXT: ldp q4, q5, [x0]
; NONEON-NOSVE-NEXT: stp q3, q0, [sp, #64]
; NONEON-NOSVE-NEXT: stp q4, q2, [sp]
; NONEON-NOSVE-NEXT: ldr d0, [sp, #88]
; NONEON-NOSVE-NEXT: stp q1, q5, [sp, #32]
; NONEON-NOSVE-NEXT: ldp d1, d3, [sp, #64]
; NONEON-NOSVE-NEXT: ldp d2, d4, [sp, #48]
; NONEON-NOSVE-NEXT: fmadd d5, d4, d3, d0
; NONEON-NOSVE-NEXT: ldr d0, [sp, #80]
; NONEON-NOSVE-NEXT: fmadd d0, d2, d1, d0
; NONEON-NOSVE-NEXT: ldp d1, d3, [sp, #16]
; NONEON-NOSVE-NEXT: ldp d2, d4, [sp]
; NONEON-NOSVE-NEXT: stp d0, d5, [sp, #112]
; NONEON-NOSVE-NEXT: ldr d0, [sp, #40]
; NONEON-NOSVE-NEXT: fmadd d5, d4, d3, d0
; NONEON-NOSVE-NEXT: ldr d0, [sp, #32]
; NONEON-NOSVE-NEXT: fmadd d0, d2, d1, d0
; NONEON-NOSVE-NEXT: stp d0, d5, [sp, #96]
; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #96]
; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
; NONEON-NOSVE-NEXT: add sp, sp, #128
; NONEON-NOSVE-NEXT: ret
%op1 = load <4 x double>, ptr %a
%op2 = load <4 x double>, ptr %b
%op3 = load <4 x double>, ptr %c
%res = call <4 x double> @llvm.fma.v4f64(<4 x double> %op1, <4 x double> %op2, <4 x double> %op3)
store <4 x double> %res, ptr %a
ret void
}
;
; FMUL
;
define <2 x half> @fmul_v2f16(<2 x half> %op1, <2 x half> %op2) {
; CHECK-LABEL: fmul_v2f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl4
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
; CHECK-NEXT: fmul z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: fmul_v2f16:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: sub sp, sp, #32
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #22]
; NONEON-NOSVE-NEXT: ldr h1, [sp, #14]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fmul s0, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #12]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #30]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #20]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fmul s0, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #10]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #28]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #18]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fmul s0, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #8]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #26]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #16]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fmul s0, s1, s0
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #24]
; NONEON-NOSVE-NEXT: ldr d0, [sp, #24]
; NONEON-NOSVE-NEXT: add sp, sp, #32
; NONEON-NOSVE-NEXT: ret
%res = fmul <2 x half> %op1, %op2
ret <2 x half> %res
}
define <4 x half> @fmul_v4f16(<4 x half> %op1, <4 x half> %op2) {
; CHECK-LABEL: fmul_v4f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl4
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
; CHECK-NEXT: fmul z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: fmul_v4f16:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: sub sp, sp, #32
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #22]
; NONEON-NOSVE-NEXT: ldr h1, [sp, #14]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fmul s0, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #12]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #30]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #20]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fmul s0, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #10]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #28]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #18]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fmul s0, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #8]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #26]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #16]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fmul s0, s1, s0
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #24]
; NONEON-NOSVE-NEXT: ldr d0, [sp, #24]
; NONEON-NOSVE-NEXT: add sp, sp, #32
; NONEON-NOSVE-NEXT: ret
%res = fmul <4 x half> %op1, %op2
ret <4 x half> %res
}
define <8 x half> @fmul_v8f16(<8 x half> %op1, <8 x half> %op2) {
; CHECK-LABEL: fmul_v8f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl8
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
; CHECK-NEXT: fmul z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: fmul_v8f16:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]!
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48
; NONEON-NOSVE-NEXT: ldr h0, [sp, #30]
; NONEON-NOSVE-NEXT: ldr h1, [sp, #14]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fmul s0, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #12]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #46]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #28]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fmul s0, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #10]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #44]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #26]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fmul s0, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #8]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #42]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #24]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fmul s0, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #6]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #40]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #22]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fmul s0, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #4]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #38]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #20]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fmul s0, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #2]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #36]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #18]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fmul s0, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #34]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #16]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fmul s0, s1, s0
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #32]
; NONEON-NOSVE-NEXT: ldr q0, [sp, #32]
; NONEON-NOSVE-NEXT: add sp, sp, #48
; NONEON-NOSVE-NEXT: ret
%res = fmul <8 x half> %op1, %op2
ret <8 x half> %res
}
define void @fmul_v16f16(ptr %a, ptr %b) {
; CHECK-LABEL: fmul_v16f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ldp q0, q3, [x1]
; CHECK-NEXT: ptrue p0.h, vl8
; CHECK-NEXT: ldp q1, q2, [x0]
; CHECK-NEXT: fmul z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: movprfx z1, z2
; CHECK-NEXT: fmul z1.h, p0/m, z1.h, z3.h
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: fmul_v16f16:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: sub sp, sp, #96
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96
; NONEON-NOSVE-NEXT: ldp q3, q0, [x1]
; NONEON-NOSVE-NEXT: ldp q2, q1, [x0]
; NONEON-NOSVE-NEXT: stp q2, q3, [sp]
; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #62]
; NONEON-NOSVE-NEXT: ldr h1, [sp, #46]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fmul s0, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #44]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #94]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #60]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fmul s0, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #42]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #92]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #58]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fmul s0, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #40]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #90]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #56]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fmul s0, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #38]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #88]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #54]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fmul s0, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #36]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #86]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #52]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fmul s0, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #34]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #84]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #50]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fmul s0, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #32]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #82]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #48]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fmul s0, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #14]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #80]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #30]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fmul s0, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #12]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #78]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #28]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fmul s0, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #10]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #76]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #26]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fmul s0, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #8]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #74]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #24]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fmul s0, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #6]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #72]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #22]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fmul s0, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #4]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #70]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #20]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fmul s0, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #2]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #68]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #18]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fmul s0, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #66]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #16]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fmul s0, s1, s0
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #64]
; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64]
; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
; NONEON-NOSVE-NEXT: add sp, sp, #96
; NONEON-NOSVE-NEXT: ret
%op1 = load <16 x half>, ptr %a
%op2 = load <16 x half>, ptr %b
%res = fmul <16 x half> %op1, %op2
store <16 x half> %res, ptr %a
ret void
}
define <2 x float> @fmul_v2f32(<2 x float> %op1, <2 x float> %op2) {
; CHECK-LABEL: fmul_v2f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl2
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
; CHECK-NEXT: fmul z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: fmul_v2f32:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: sub sp, sp, #32
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8]
; NONEON-NOSVE-NEXT: ldp s1, s2, [sp, #8]
; NONEON-NOSVE-NEXT: ldr s0, [sp, #20]
; NONEON-NOSVE-NEXT: fmul s3, s2, s0
; NONEON-NOSVE-NEXT: ldr s0, [sp, #16]
; NONEON-NOSVE-NEXT: fmul s0, s1, s0
; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #24]
; NONEON-NOSVE-NEXT: ldr d0, [sp, #24]
; NONEON-NOSVE-NEXT: add sp, sp, #32
; NONEON-NOSVE-NEXT: ret
%res = fmul <2 x float> %op1, %op2
ret <2 x float> %res
}
define <4 x float> @fmul_v4f32(<4 x float> %op1, <4 x float> %op2) {
; CHECK-LABEL: fmul_v4f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl4
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
; CHECK-NEXT: fmul z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: fmul_v4f32:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]!
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48
; NONEON-NOSVE-NEXT: ldp s1, s2, [sp, #8]
; NONEON-NOSVE-NEXT: ldr s0, [sp, #28]
; NONEON-NOSVE-NEXT: fmul s3, s2, s0
; NONEON-NOSVE-NEXT: ldr s0, [sp, #24]
; NONEON-NOSVE-NEXT: fmul s0, s1, s0
; NONEON-NOSVE-NEXT: ldp s1, s2, [sp]
; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #40]
; NONEON-NOSVE-NEXT: ldr s0, [sp, #20]
; NONEON-NOSVE-NEXT: fmul s3, s2, s0
; NONEON-NOSVE-NEXT: ldr s0, [sp, #16]
; NONEON-NOSVE-NEXT: fmul s0, s1, s0
; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #32]
; NONEON-NOSVE-NEXT: ldr q0, [sp, #32]
; NONEON-NOSVE-NEXT: add sp, sp, #48
; NONEON-NOSVE-NEXT: ret
%res = fmul <4 x float> %op1, %op2
ret <4 x float> %res
}
define void @fmul_v8f32(ptr %a, ptr %b) {
; CHECK-LABEL: fmul_v8f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ldp q0, q3, [x1]
; CHECK-NEXT: ptrue p0.s, vl4
; CHECK-NEXT: ldp q1, q2, [x0]
; CHECK-NEXT: fmul z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: movprfx z1, z2
; CHECK-NEXT: fmul z1.s, p0/m, z1.s, z3.s
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: fmul_v8f32:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: sub sp, sp, #96
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96
; NONEON-NOSVE-NEXT: ldp q3, q0, [x1]
; NONEON-NOSVE-NEXT: ldp q2, q1, [x0]
; NONEON-NOSVE-NEXT: stp q2, q3, [sp]
; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32]
; NONEON-NOSVE-NEXT: ldp s1, s2, [sp, #40]
; NONEON-NOSVE-NEXT: ldr s0, [sp, #60]
; NONEON-NOSVE-NEXT: fmul s3, s2, s0
; NONEON-NOSVE-NEXT: ldr s0, [sp, #56]
; NONEON-NOSVE-NEXT: fmul s0, s1, s0
; NONEON-NOSVE-NEXT: ldp s1, s2, [sp, #32]
; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #88]
; NONEON-NOSVE-NEXT: ldr s0, [sp, #52]
; NONEON-NOSVE-NEXT: fmul s3, s2, s0
; NONEON-NOSVE-NEXT: ldr s0, [sp, #48]
; NONEON-NOSVE-NEXT: fmul s0, s1, s0
; NONEON-NOSVE-NEXT: ldp s1, s2, [sp, #8]
; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #80]
; NONEON-NOSVE-NEXT: ldr s0, [sp, #28]
; NONEON-NOSVE-NEXT: fmul s3, s2, s0
; NONEON-NOSVE-NEXT: ldr s0, [sp, #24]
; NONEON-NOSVE-NEXT: fmul s0, s1, s0
; NONEON-NOSVE-NEXT: ldp s1, s2, [sp]
; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #72]
; NONEON-NOSVE-NEXT: ldr s0, [sp, #20]
; NONEON-NOSVE-NEXT: fmul s3, s2, s0
; NONEON-NOSVE-NEXT: ldr s0, [sp, #16]
; NONEON-NOSVE-NEXT: fmul s0, s1, s0
; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #64]
; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64]
; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
; NONEON-NOSVE-NEXT: add sp, sp, #96
; NONEON-NOSVE-NEXT: ret
%op1 = load <8 x float>, ptr %a
%op2 = load <8 x float>, ptr %b
%res = fmul <8 x float> %op1, %op2
store <8 x float> %res, ptr %a
ret void
}
define <2 x double> @fmul_v2f64(<2 x double> %op1, <2 x double> %op2) {
; CHECK-LABEL: fmul_v2f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl2
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
; CHECK-NEXT: fmul z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: fmul_v2f64:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]!
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48
; NONEON-NOSVE-NEXT: ldp d1, d2, [sp]
; NONEON-NOSVE-NEXT: ldr d0, [sp, #24]
; NONEON-NOSVE-NEXT: fmul d3, d2, d0
; NONEON-NOSVE-NEXT: ldr d0, [sp, #16]
; NONEON-NOSVE-NEXT: fmul d0, d1, d0
; NONEON-NOSVE-NEXT: stp d0, d3, [sp, #32]
; NONEON-NOSVE-NEXT: ldr q0, [sp, #32]
; NONEON-NOSVE-NEXT: add sp, sp, #48
; NONEON-NOSVE-NEXT: ret
%res = fmul <2 x double> %op1, %op2
ret <2 x double> %res
}
define void @fmul_v4f64(ptr %a, ptr %b) {
; CHECK-LABEL: fmul_v4f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ldp q0, q3, [x1]
; CHECK-NEXT: ptrue p0.d, vl2
; CHECK-NEXT: ldp q1, q2, [x0]
; CHECK-NEXT: fmul z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: movprfx z1, z2
; CHECK-NEXT: fmul z1.d, p0/m, z1.d, z3.d
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: fmul_v4f64:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: sub sp, sp, #96
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96
; NONEON-NOSVE-NEXT: ldp q3, q0, [x1]
; NONEON-NOSVE-NEXT: ldp q2, q1, [x0]
; NONEON-NOSVE-NEXT: stp q2, q3, [sp]
; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32]
; NONEON-NOSVE-NEXT: ldp d1, d2, [sp, #32]
; NONEON-NOSVE-NEXT: ldr d0, [sp, #56]
; NONEON-NOSVE-NEXT: fmul d3, d2, d0
; NONEON-NOSVE-NEXT: ldr d0, [sp, #48]
; NONEON-NOSVE-NEXT: fmul d0, d1, d0
; NONEON-NOSVE-NEXT: ldp d1, d2, [sp]
; NONEON-NOSVE-NEXT: stp d0, d3, [sp, #80]
; NONEON-NOSVE-NEXT: ldr d0, [sp, #24]
; NONEON-NOSVE-NEXT: fmul d3, d2, d0
; NONEON-NOSVE-NEXT: ldr d0, [sp, #16]
; NONEON-NOSVE-NEXT: fmul d0, d1, d0
; NONEON-NOSVE-NEXT: stp d0, d3, [sp, #64]
; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64]
; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
; NONEON-NOSVE-NEXT: add sp, sp, #96
; NONEON-NOSVE-NEXT: ret
%op1 = load <4 x double>, ptr %a
%op2 = load <4 x double>, ptr %b
%res = fmul <4 x double> %op1, %op2
store <4 x double> %res, ptr %a
ret void
}
;
; FNEG
;
define <2 x half> @fneg_v2f16(<2 x half> %op) {
; CHECK-LABEL: fneg_v2f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl4
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: fneg z0.h, p0/m, z0.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: fneg_v2f16:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: str d0, [sp, #-16]!
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
; NONEON-NOSVE-NEXT: ldr h0, [sp, #6]
; NONEON-NOSVE-NEXT: fmov w8, s0
; NONEON-NOSVE-NEXT: eor w8, w8, #0x8000
; NONEON-NOSVE-NEXT: fmov s0, w8
; NONEON-NOSVE-NEXT: str h0, [sp, #14]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #4]
; NONEON-NOSVE-NEXT: fmov w8, s0
; NONEON-NOSVE-NEXT: eor w8, w8, #0x8000
; NONEON-NOSVE-NEXT: fmov s0, w8
; NONEON-NOSVE-NEXT: str h0, [sp, #12]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #2]
; NONEON-NOSVE-NEXT: fmov w8, s0
; NONEON-NOSVE-NEXT: eor w8, w8, #0x8000
; NONEON-NOSVE-NEXT: fmov s0, w8
; NONEON-NOSVE-NEXT: str h0, [sp, #10]
; NONEON-NOSVE-NEXT: ldr h0, [sp]
; NONEON-NOSVE-NEXT: fmov w8, s0
; NONEON-NOSVE-NEXT: eor w8, w8, #0x8000
; NONEON-NOSVE-NEXT: fmov s0, w8
; NONEON-NOSVE-NEXT: str h0, [sp, #8]
; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
; NONEON-NOSVE-NEXT: add sp, sp, #16
; NONEON-NOSVE-NEXT: ret
%res = fneg <2 x half> %op
ret <2 x half> %res
}
define <4 x half> @fneg_v4f16(<4 x half> %op) {
; CHECK-LABEL: fneg_v4f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl4
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: fneg z0.h, p0/m, z0.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: fneg_v4f16:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: str d0, [sp, #-16]!
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
; NONEON-NOSVE-NEXT: ldr h0, [sp, #6]
; NONEON-NOSVE-NEXT: fmov w8, s0
; NONEON-NOSVE-NEXT: eor w8, w8, #0x8000
; NONEON-NOSVE-NEXT: fmov s0, w8
; NONEON-NOSVE-NEXT: str h0, [sp, #14]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #4]
; NONEON-NOSVE-NEXT: fmov w8, s0
; NONEON-NOSVE-NEXT: eor w8, w8, #0x8000
; NONEON-NOSVE-NEXT: fmov s0, w8
; NONEON-NOSVE-NEXT: str h0, [sp, #12]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #2]
; NONEON-NOSVE-NEXT: fmov w8, s0
; NONEON-NOSVE-NEXT: eor w8, w8, #0x8000
; NONEON-NOSVE-NEXT: fmov s0, w8
; NONEON-NOSVE-NEXT: str h0, [sp, #10]
; NONEON-NOSVE-NEXT: ldr h0, [sp]
; NONEON-NOSVE-NEXT: fmov w8, s0
; NONEON-NOSVE-NEXT: eor w8, w8, #0x8000
; NONEON-NOSVE-NEXT: fmov s0, w8
; NONEON-NOSVE-NEXT: str h0, [sp, #8]
; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
; NONEON-NOSVE-NEXT: add sp, sp, #16
; NONEON-NOSVE-NEXT: ret
%res = fneg <4 x half> %op
ret <4 x half> %res
}
define <8 x half> @fneg_v8f16(<8 x half> %op) {
; CHECK-LABEL: fneg_v8f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl8
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: fneg z0.h, p0/m, z0.h
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: fneg_v8f16:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: str q0, [sp, #-32]!
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
; NONEON-NOSVE-NEXT: ldr h0, [sp, #14]
; NONEON-NOSVE-NEXT: fmov w8, s0
; NONEON-NOSVE-NEXT: eor w8, w8, #0x8000
; NONEON-NOSVE-NEXT: fmov s0, w8
; NONEON-NOSVE-NEXT: str h0, [sp, #30]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #12]
; NONEON-NOSVE-NEXT: fmov w8, s0
; NONEON-NOSVE-NEXT: eor w8, w8, #0x8000
; NONEON-NOSVE-NEXT: fmov s0, w8
; NONEON-NOSVE-NEXT: str h0, [sp, #28]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #10]
; NONEON-NOSVE-NEXT: fmov w8, s0
; NONEON-NOSVE-NEXT: eor w8, w8, #0x8000
; NONEON-NOSVE-NEXT: fmov s0, w8
; NONEON-NOSVE-NEXT: str h0, [sp, #26]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #8]
; NONEON-NOSVE-NEXT: fmov w8, s0
; NONEON-NOSVE-NEXT: eor w8, w8, #0x8000
; NONEON-NOSVE-NEXT: fmov s0, w8
; NONEON-NOSVE-NEXT: str h0, [sp, #24]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #6]
; NONEON-NOSVE-NEXT: fmov w8, s0
; NONEON-NOSVE-NEXT: eor w8, w8, #0x8000
; NONEON-NOSVE-NEXT: fmov s0, w8
; NONEON-NOSVE-NEXT: str h0, [sp, #22]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #4]
; NONEON-NOSVE-NEXT: fmov w8, s0
; NONEON-NOSVE-NEXT: eor w8, w8, #0x8000
; NONEON-NOSVE-NEXT: fmov s0, w8
; NONEON-NOSVE-NEXT: str h0, [sp, #20]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #2]
; NONEON-NOSVE-NEXT: fmov w8, s0
; NONEON-NOSVE-NEXT: eor w8, w8, #0x8000
; NONEON-NOSVE-NEXT: fmov s0, w8
; NONEON-NOSVE-NEXT: str h0, [sp, #18]
; NONEON-NOSVE-NEXT: ldr h0, [sp]
; NONEON-NOSVE-NEXT: fmov w8, s0
; NONEON-NOSVE-NEXT: eor w8, w8, #0x8000
; NONEON-NOSVE-NEXT: fmov s0, w8
; NONEON-NOSVE-NEXT: str h0, [sp, #16]
; NONEON-NOSVE-NEXT: ldr q0, [sp, #16]
; NONEON-NOSVE-NEXT: add sp, sp, #32
; NONEON-NOSVE-NEXT: ret
%res = fneg <8 x half> %op
ret <8 x half> %res
}
define void @fneg_v16f16(ptr %a, ptr %b) {
; CHECK-LABEL: fneg_v16f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ldp q0, q1, [x0]
; CHECK-NEXT: ptrue p0.h, vl8
; CHECK-NEXT: fneg z0.h, p0/m, z0.h
; CHECK-NEXT: fneg z1.h, p0/m, z1.h
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: fneg_v16f16:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]!
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64
; NONEON-NOSVE-NEXT: ldr h0, [sp, #30]
; NONEON-NOSVE-NEXT: fmov w8, s0
; NONEON-NOSVE-NEXT: eor w8, w8, #0x8000
; NONEON-NOSVE-NEXT: fmov s0, w8
; NONEON-NOSVE-NEXT: str h0, [sp, #62]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #28]
; NONEON-NOSVE-NEXT: fmov w8, s0
; NONEON-NOSVE-NEXT: eor w8, w8, #0x8000
; NONEON-NOSVE-NEXT: fmov s0, w8
; NONEON-NOSVE-NEXT: str h0, [sp, #60]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #26]
; NONEON-NOSVE-NEXT: fmov w8, s0
; NONEON-NOSVE-NEXT: eor w8, w8, #0x8000
; NONEON-NOSVE-NEXT: fmov s0, w8
; NONEON-NOSVE-NEXT: str h0, [sp, #58]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #24]
; NONEON-NOSVE-NEXT: fmov w8, s0
; NONEON-NOSVE-NEXT: eor w8, w8, #0x8000
; NONEON-NOSVE-NEXT: fmov s0, w8
; NONEON-NOSVE-NEXT: str h0, [sp, #56]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #22]
; NONEON-NOSVE-NEXT: fmov w8, s0
; NONEON-NOSVE-NEXT: eor w8, w8, #0x8000
; NONEON-NOSVE-NEXT: fmov s0, w8
; NONEON-NOSVE-NEXT: str h0, [sp, #54]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #20]
; NONEON-NOSVE-NEXT: fmov w8, s0
; NONEON-NOSVE-NEXT: eor w8, w8, #0x8000
; NONEON-NOSVE-NEXT: fmov s0, w8
; NONEON-NOSVE-NEXT: str h0, [sp, #52]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #18]
; NONEON-NOSVE-NEXT: fmov w8, s0
; NONEON-NOSVE-NEXT: eor w8, w8, #0x8000
; NONEON-NOSVE-NEXT: fmov s0, w8
; NONEON-NOSVE-NEXT: str h0, [sp, #50]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #16]
; NONEON-NOSVE-NEXT: fmov w8, s0
; NONEON-NOSVE-NEXT: eor w8, w8, #0x8000
; NONEON-NOSVE-NEXT: fmov s0, w8
; NONEON-NOSVE-NEXT: str h0, [sp, #48]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #14]
; NONEON-NOSVE-NEXT: fmov w8, s0
; NONEON-NOSVE-NEXT: eor w8, w8, #0x8000
; NONEON-NOSVE-NEXT: fmov s0, w8
; NONEON-NOSVE-NEXT: str h0, [sp, #46]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #12]
; NONEON-NOSVE-NEXT: fmov w8, s0
; NONEON-NOSVE-NEXT: eor w8, w8, #0x8000
; NONEON-NOSVE-NEXT: fmov s0, w8
; NONEON-NOSVE-NEXT: str h0, [sp, #44]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #10]
; NONEON-NOSVE-NEXT: fmov w8, s0
; NONEON-NOSVE-NEXT: eor w8, w8, #0x8000
; NONEON-NOSVE-NEXT: fmov s0, w8
; NONEON-NOSVE-NEXT: str h0, [sp, #42]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #8]
; NONEON-NOSVE-NEXT: fmov w8, s0
; NONEON-NOSVE-NEXT: eor w8, w8, #0x8000
; NONEON-NOSVE-NEXT: fmov s0, w8
; NONEON-NOSVE-NEXT: str h0, [sp, #40]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #6]
; NONEON-NOSVE-NEXT: fmov w8, s0
; NONEON-NOSVE-NEXT: eor w8, w8, #0x8000
; NONEON-NOSVE-NEXT: fmov s0, w8
; NONEON-NOSVE-NEXT: str h0, [sp, #38]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #4]
; NONEON-NOSVE-NEXT: fmov w8, s0
; NONEON-NOSVE-NEXT: eor w8, w8, #0x8000
; NONEON-NOSVE-NEXT: fmov s0, w8
; NONEON-NOSVE-NEXT: str h0, [sp, #36]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #2]
; NONEON-NOSVE-NEXT: fmov w8, s0
; NONEON-NOSVE-NEXT: eor w8, w8, #0x8000
; NONEON-NOSVE-NEXT: fmov s0, w8
; NONEON-NOSVE-NEXT: str h0, [sp, #34]
; NONEON-NOSVE-NEXT: ldr h0, [sp]
; NONEON-NOSVE-NEXT: fmov w8, s0
; NONEON-NOSVE-NEXT: eor w8, w8, #0x8000
; NONEON-NOSVE-NEXT: fmov s0, w8
; NONEON-NOSVE-NEXT: str h0, [sp, #32]
; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32]
; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
; NONEON-NOSVE-NEXT: add sp, sp, #64
; NONEON-NOSVE-NEXT: ret
%op = load <16 x half>, ptr %a
%res = fneg <16 x half> %op
store <16 x half> %res, ptr %a
ret void
}
define <2 x float> @fneg_v2f32(<2 x float> %op) {
; CHECK-LABEL: fneg_v2f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl2
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: fneg z0.s, p0/m, z0.s
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: fneg_v2f32:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: str d0, [sp, #-16]!
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
; NONEON-NOSVE-NEXT: ldr s0, [sp, #4]
; NONEON-NOSVE-NEXT: fneg s1, s0
; NONEON-NOSVE-NEXT: ldr s0, [sp]
; NONEON-NOSVE-NEXT: fneg s0, s0
; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #8]
; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
; NONEON-NOSVE-NEXT: add sp, sp, #16
; NONEON-NOSVE-NEXT: ret
%res = fneg <2 x float> %op
ret <2 x float> %res
}
define <4 x float> @fneg_v4f32(<4 x float> %op) {
; CHECK-LABEL: fneg_v4f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl4
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: fneg z0.s, p0/m, z0.s
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: fneg_v4f32:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: str q0, [sp, #-32]!
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
; NONEON-NOSVE-NEXT: ldr s0, [sp, #12]
; NONEON-NOSVE-NEXT: fneg s1, s0
; NONEON-NOSVE-NEXT: ldr s0, [sp, #8]
; NONEON-NOSVE-NEXT: fneg s0, s0
; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #24]
; NONEON-NOSVE-NEXT: ldr s0, [sp, #4]
; NONEON-NOSVE-NEXT: fneg s1, s0
; NONEON-NOSVE-NEXT: ldr s0, [sp]
; NONEON-NOSVE-NEXT: fneg s0, s0
; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #16]
; NONEON-NOSVE-NEXT: ldr q0, [sp, #16]
; NONEON-NOSVE-NEXT: add sp, sp, #32
; NONEON-NOSVE-NEXT: ret
%res = fneg <4 x float> %op
ret <4 x float> %res
}
define void @fneg_v8f32(ptr %a) {
; CHECK-LABEL: fneg_v8f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ldp q0, q1, [x0]
; CHECK-NEXT: ptrue p0.s, vl4
; CHECK-NEXT: fneg z0.s, p0/m, z0.s
; CHECK-NEXT: fneg z1.s, p0/m, z1.s
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: fneg_v8f32:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]!
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64
; NONEON-NOSVE-NEXT: ldr s0, [sp, #28]
; NONEON-NOSVE-NEXT: fneg s1, s0
; NONEON-NOSVE-NEXT: ldr s0, [sp, #24]
; NONEON-NOSVE-NEXT: fneg s0, s0
; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #56]
; NONEON-NOSVE-NEXT: ldr s0, [sp, #20]
; NONEON-NOSVE-NEXT: fneg s1, s0
; NONEON-NOSVE-NEXT: ldr s0, [sp, #16]
; NONEON-NOSVE-NEXT: fneg s0, s0
; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #48]
; NONEON-NOSVE-NEXT: ldr s0, [sp, #12]
; NONEON-NOSVE-NEXT: fneg s1, s0
; NONEON-NOSVE-NEXT: ldr s0, [sp, #8]
; NONEON-NOSVE-NEXT: fneg s0, s0
; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #40]
; NONEON-NOSVE-NEXT: ldr s0, [sp, #4]
; NONEON-NOSVE-NEXT: fneg s1, s0
; NONEON-NOSVE-NEXT: ldr s0, [sp]
; NONEON-NOSVE-NEXT: fneg s0, s0
; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #32]
; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32]
; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
; NONEON-NOSVE-NEXT: add sp, sp, #64
; NONEON-NOSVE-NEXT: ret
%op = load <8 x float>, ptr %a
%res = fneg <8 x float> %op
store <8 x float> %res, ptr %a
ret void
}
define <2 x double> @fneg_v2f64(<2 x double> %op) {
; CHECK-LABEL: fneg_v2f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl2
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: fneg z0.d, p0/m, z0.d
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: fneg_v2f64:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: str q0, [sp, #-32]!
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
; NONEON-NOSVE-NEXT: fneg d1, d0
; NONEON-NOSVE-NEXT: ldr d0, [sp]
; NONEON-NOSVE-NEXT: fneg d0, d0
; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #16]
; NONEON-NOSVE-NEXT: ldr q0, [sp, #16]
; NONEON-NOSVE-NEXT: add sp, sp, #32
; NONEON-NOSVE-NEXT: ret
%res = fneg <2 x double> %op
ret <2 x double> %res
}
define void @fneg_v4f64(ptr %a) {
; CHECK-LABEL: fneg_v4f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ldp q0, q1, [x0]
; CHECK-NEXT: ptrue p0.d, vl2
; CHECK-NEXT: fneg z0.d, p0/m, z0.d
; CHECK-NEXT: fneg z1.d, p0/m, z1.d
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: fneg_v4f64:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]!
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64
; NONEON-NOSVE-NEXT: ldr d0, [sp, #24]
; NONEON-NOSVE-NEXT: fneg d1, d0
; NONEON-NOSVE-NEXT: ldr d0, [sp, #16]
; NONEON-NOSVE-NEXT: fneg d0, d0
; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #48]
; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
; NONEON-NOSVE-NEXT: fneg d1, d0
; NONEON-NOSVE-NEXT: ldr d0, [sp]
; NONEON-NOSVE-NEXT: fneg d0, d0
; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #32]
; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32]
; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
; NONEON-NOSVE-NEXT: add sp, sp, #64
; NONEON-NOSVE-NEXT: ret
%op = load <4 x double>, ptr %a
%res = fneg <4 x double> %op
store <4 x double> %res, ptr %a
ret void
}
;
; FSQRT
;
define <2 x half> @fsqrt_v2f16(<2 x half> %op) {
; CHECK-LABEL: fsqrt_v2f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl4
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: fsqrt z0.h, p0/m, z0.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: fsqrt_v2f16:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: str d0, [sp, #-16]!
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
; NONEON-NOSVE-NEXT: ldr h0, [sp, #6]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fsqrt s0, s0
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #14]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #4]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fsqrt s0, s0
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #12]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #2]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fsqrt s0, s0
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #10]
; NONEON-NOSVE-NEXT: ldr h0, [sp]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fsqrt s0, s0
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #8]
; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
; NONEON-NOSVE-NEXT: add sp, sp, #16
; NONEON-NOSVE-NEXT: ret
%res = call <2 x half> @llvm.sqrt.v2f16(<2 x half> %op)
ret <2 x half> %res
}
define <4 x half> @fsqrt_v4f16(<4 x half> %op) {
; CHECK-LABEL: fsqrt_v4f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl4
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: fsqrt z0.h, p0/m, z0.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: fsqrt_v4f16:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: str d0, [sp, #-16]!
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
; NONEON-NOSVE-NEXT: ldr h0, [sp, #6]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fsqrt s0, s0
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #14]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #4]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fsqrt s0, s0
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #12]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #2]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fsqrt s0, s0
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #10]
; NONEON-NOSVE-NEXT: ldr h0, [sp]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fsqrt s0, s0
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #8]
; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
; NONEON-NOSVE-NEXT: add sp, sp, #16
; NONEON-NOSVE-NEXT: ret
%res = call <4 x half> @llvm.sqrt.v4f16(<4 x half> %op)
ret <4 x half> %res
}
define <8 x half> @fsqrt_v8f16(<8 x half> %op) {
; CHECK-LABEL: fsqrt_v8f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl8
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: fsqrt z0.h, p0/m, z0.h
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: fsqrt_v8f16:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: str q0, [sp, #-32]!
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
; NONEON-NOSVE-NEXT: ldr h0, [sp, #14]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fsqrt s0, s0
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #30]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #12]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fsqrt s0, s0
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #28]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #10]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fsqrt s0, s0
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #26]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #8]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fsqrt s0, s0
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #24]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #6]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fsqrt s0, s0
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #22]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #4]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fsqrt s0, s0
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #20]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #2]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fsqrt s0, s0
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #18]
; NONEON-NOSVE-NEXT: ldr h0, [sp]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fsqrt s0, s0
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #16]
; NONEON-NOSVE-NEXT: ldr q0, [sp, #16]
; NONEON-NOSVE-NEXT: add sp, sp, #32
; NONEON-NOSVE-NEXT: ret
%res = call <8 x half> @llvm.sqrt.v8f16(<8 x half> %op)
ret <8 x half> %res
}
define void @fsqrt_v16f16(ptr %a, ptr %b) {
; CHECK-LABEL: fsqrt_v16f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ldp q0, q1, [x0]
; CHECK-NEXT: ptrue p0.h, vl8
; CHECK-NEXT: fsqrt z0.h, p0/m, z0.h
; CHECK-NEXT: fsqrt z1.h, p0/m, z1.h
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: fsqrt_v16f16:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]!
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64
; NONEON-NOSVE-NEXT: ldr h0, [sp, #30]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fsqrt s0, s0
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #62]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #28]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fsqrt s0, s0
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #60]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #26]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fsqrt s0, s0
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #58]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #24]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fsqrt s0, s0
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #56]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #22]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fsqrt s0, s0
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #54]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #20]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fsqrt s0, s0
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #52]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #18]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fsqrt s0, s0
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #50]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #16]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fsqrt s0, s0
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #48]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #14]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fsqrt s0, s0
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #46]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #12]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fsqrt s0, s0
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #44]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #10]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fsqrt s0, s0
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #42]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #8]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fsqrt s0, s0
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #40]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #6]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fsqrt s0, s0
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #38]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #4]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fsqrt s0, s0
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #36]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #2]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fsqrt s0, s0
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #34]
; NONEON-NOSVE-NEXT: ldr h0, [sp]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fsqrt s0, s0
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #32]
; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32]
; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
; NONEON-NOSVE-NEXT: add sp, sp, #64
; NONEON-NOSVE-NEXT: ret
%op = load <16 x half>, ptr %a
%res = call <16 x half> @llvm.sqrt.v16f16(<16 x half> %op)
store <16 x half> %res, ptr %a
ret void
}
define <2 x float> @fsqrt_v2f32(<2 x float> %op) {
; CHECK-LABEL: fsqrt_v2f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl2
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: fsqrt z0.s, p0/m, z0.s
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: fsqrt_v2f32:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: str d0, [sp, #-16]!
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
; NONEON-NOSVE-NEXT: ldr s0, [sp, #4]
; NONEON-NOSVE-NEXT: fsqrt s1, s0
; NONEON-NOSVE-NEXT: ldr s0, [sp]
; NONEON-NOSVE-NEXT: fsqrt s0, s0
; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #8]
; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
; NONEON-NOSVE-NEXT: add sp, sp, #16
; NONEON-NOSVE-NEXT: ret
%res = call <2 x float> @llvm.sqrt.v2f32(<2 x float> %op)
ret <2 x float> %res
}
define <4 x float> @fsqrt_v4f32(<4 x float> %op) {
; CHECK-LABEL: fsqrt_v4f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl4
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: fsqrt z0.s, p0/m, z0.s
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: fsqrt_v4f32:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: str q0, [sp, #-32]!
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
; NONEON-NOSVE-NEXT: ldr s0, [sp, #12]
; NONEON-NOSVE-NEXT: fsqrt s1, s0
; NONEON-NOSVE-NEXT: ldr s0, [sp, #8]
; NONEON-NOSVE-NEXT: fsqrt s0, s0
; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #24]
; NONEON-NOSVE-NEXT: ldr s0, [sp, #4]
; NONEON-NOSVE-NEXT: fsqrt s1, s0
; NONEON-NOSVE-NEXT: ldr s0, [sp]
; NONEON-NOSVE-NEXT: fsqrt s0, s0
; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #16]
; NONEON-NOSVE-NEXT: ldr q0, [sp, #16]
; NONEON-NOSVE-NEXT: add sp, sp, #32
; NONEON-NOSVE-NEXT: ret
%res = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %op)
ret <4 x float> %res
}
define void @fsqrt_v8f32(ptr %a) {
; CHECK-LABEL: fsqrt_v8f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ldp q0, q1, [x0]
; CHECK-NEXT: ptrue p0.s, vl4
; CHECK-NEXT: fsqrt z0.s, p0/m, z0.s
; CHECK-NEXT: fsqrt z1.s, p0/m, z1.s
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: fsqrt_v8f32:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]!
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64
; NONEON-NOSVE-NEXT: ldr s0, [sp, #28]
; NONEON-NOSVE-NEXT: fsqrt s1, s0
; NONEON-NOSVE-NEXT: ldr s0, [sp, #24]
; NONEON-NOSVE-NEXT: fsqrt s0, s0
; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #56]
; NONEON-NOSVE-NEXT: ldr s0, [sp, #20]
; NONEON-NOSVE-NEXT: fsqrt s1, s0
; NONEON-NOSVE-NEXT: ldr s0, [sp, #16]
; NONEON-NOSVE-NEXT: fsqrt s0, s0
; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #48]
; NONEON-NOSVE-NEXT: ldr s0, [sp, #12]
; NONEON-NOSVE-NEXT: fsqrt s1, s0
; NONEON-NOSVE-NEXT: ldr s0, [sp, #8]
; NONEON-NOSVE-NEXT: fsqrt s0, s0
; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #40]
; NONEON-NOSVE-NEXT: ldr s0, [sp, #4]
; NONEON-NOSVE-NEXT: fsqrt s1, s0
; NONEON-NOSVE-NEXT: ldr s0, [sp]
; NONEON-NOSVE-NEXT: fsqrt s0, s0
; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #32]
; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32]
; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
; NONEON-NOSVE-NEXT: add sp, sp, #64
; NONEON-NOSVE-NEXT: ret
%op = load <8 x float>, ptr %a
%res = call <8 x float> @llvm.sqrt.v8f32(<8 x float> %op)
store <8 x float> %res, ptr %a
ret void
}
define <2 x double> @fsqrt_v2f64(<2 x double> %op) {
; CHECK-LABEL: fsqrt_v2f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl2
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: fsqrt z0.d, p0/m, z0.d
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: fsqrt_v2f64:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: str q0, [sp, #-32]!
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
; NONEON-NOSVE-NEXT: fsqrt d1, d0
; NONEON-NOSVE-NEXT: ldr d0, [sp]
; NONEON-NOSVE-NEXT: fsqrt d0, d0
; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #16]
; NONEON-NOSVE-NEXT: ldr q0, [sp, #16]
; NONEON-NOSVE-NEXT: add sp, sp, #32
; NONEON-NOSVE-NEXT: ret
%res = call <2 x double> @llvm.sqrt.v2f64(<2 x double> %op)
ret <2 x double> %res
}
define void @fsqrt_v4f64(ptr %a) {
; CHECK-LABEL: fsqrt_v4f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ldp q0, q1, [x0]
; CHECK-NEXT: ptrue p0.d, vl2
; CHECK-NEXT: fsqrt z0.d, p0/m, z0.d
; CHECK-NEXT: fsqrt z1.d, p0/m, z1.d
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: fsqrt_v4f64:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]!
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64
; NONEON-NOSVE-NEXT: ldr d0, [sp, #24]
; NONEON-NOSVE-NEXT: fsqrt d1, d0
; NONEON-NOSVE-NEXT: ldr d0, [sp, #16]
; NONEON-NOSVE-NEXT: fsqrt d0, d0
; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #48]
; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
; NONEON-NOSVE-NEXT: fsqrt d1, d0
; NONEON-NOSVE-NEXT: ldr d0, [sp]
; NONEON-NOSVE-NEXT: fsqrt d0, d0
; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #32]
; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32]
; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
; NONEON-NOSVE-NEXT: add sp, sp, #64
; NONEON-NOSVE-NEXT: ret
%op = load <4 x double>, ptr %a
%res = call <4 x double> @llvm.sqrt.v4f64(<4 x double> %op)
store <4 x double> %res, ptr %a
ret void
}
;
; FSUB
;
define <2 x half> @fsub_v2f16(<2 x half> %op1, <2 x half> %op2) {
; CHECK-LABEL: fsub_v2f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl4
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
; CHECK-NEXT: fsub z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: fsub_v2f16:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: sub sp, sp, #32
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #22]
; NONEON-NOSVE-NEXT: ldr h1, [sp, #14]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fsub s0, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #12]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #30]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #20]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fsub s0, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #10]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #28]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #18]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fsub s0, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #8]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #26]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #16]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fsub s0, s1, s0
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #24]
; NONEON-NOSVE-NEXT: ldr d0, [sp, #24]
; NONEON-NOSVE-NEXT: add sp, sp, #32
; NONEON-NOSVE-NEXT: ret
%res = fsub <2 x half> %op1, %op2
ret <2 x half> %res
}
define <4 x half> @fsub_v4f16(<4 x half> %op1, <4 x half> %op2) {
; CHECK-LABEL: fsub_v4f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl4
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
; CHECK-NEXT: fsub z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: fsub_v4f16:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: sub sp, sp, #32
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #22]
; NONEON-NOSVE-NEXT: ldr h1, [sp, #14]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fsub s0, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #12]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #30]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #20]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fsub s0, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #10]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #28]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #18]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fsub s0, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #8]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #26]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #16]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fsub s0, s1, s0
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #24]
; NONEON-NOSVE-NEXT: ldr d0, [sp, #24]
; NONEON-NOSVE-NEXT: add sp, sp, #32
; NONEON-NOSVE-NEXT: ret
%res = fsub <4 x half> %op1, %op2
ret <4 x half> %res
}
define <8 x half> @fsub_v8f16(<8 x half> %op1, <8 x half> %op2) {
; CHECK-LABEL: fsub_v8f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl8
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
; CHECK-NEXT: fsub z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: fsub_v8f16:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]!
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48
; NONEON-NOSVE-NEXT: ldr h0, [sp, #30]
; NONEON-NOSVE-NEXT: ldr h1, [sp, #14]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fsub s0, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #12]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #46]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #28]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fsub s0, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #10]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #44]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #26]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fsub s0, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #8]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #42]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #24]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fsub s0, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #6]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #40]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #22]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fsub s0, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #4]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #38]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #20]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fsub s0, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #2]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #36]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #18]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fsub s0, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #34]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #16]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fsub s0, s1, s0
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #32]
; NONEON-NOSVE-NEXT: ldr q0, [sp, #32]
; NONEON-NOSVE-NEXT: add sp, sp, #48
; NONEON-NOSVE-NEXT: ret
%res = fsub <8 x half> %op1, %op2
ret <8 x half> %res
}
define void @fsub_v16f16(ptr %a, ptr %b) {
; CHECK-LABEL: fsub_v16f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ldp q0, q3, [x1]
; CHECK-NEXT: ptrue p0.h, vl8
; CHECK-NEXT: ldp q1, q2, [x0]
; CHECK-NEXT: fsubr z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: movprfx z1, z2
; CHECK-NEXT: fsub z1.h, p0/m, z1.h, z3.h
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: fsub_v16f16:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: sub sp, sp, #96
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96
; NONEON-NOSVE-NEXT: ldp q3, q0, [x1]
; NONEON-NOSVE-NEXT: ldp q2, q1, [x0]
; NONEON-NOSVE-NEXT: stp q2, q3, [sp]
; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #62]
; NONEON-NOSVE-NEXT: ldr h1, [sp, #46]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fsub s0, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #44]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #94]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #60]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fsub s0, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #42]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #92]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #58]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fsub s0, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #40]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #90]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #56]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fsub s0, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #38]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #88]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #54]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fsub s0, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #36]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #86]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #52]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fsub s0, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #34]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #84]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #50]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fsub s0, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #32]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #82]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #48]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fsub s0, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #14]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #80]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #30]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fsub s0, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #12]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #78]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #28]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fsub s0, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #10]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #76]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #26]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fsub s0, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #8]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #74]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #24]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fsub s0, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #6]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #72]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #22]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fsub s0, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #4]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #70]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #20]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fsub s0, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp, #2]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #68]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #18]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fsub s0, s1, s0
; NONEON-NOSVE-NEXT: ldr h1, [sp]
; NONEON-NOSVE-NEXT: fcvt s1, h1
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #66]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #16]
; NONEON-NOSVE-NEXT: fcvt s0, h0
; NONEON-NOSVE-NEXT: fsub s0, s1, s0
; NONEON-NOSVE-NEXT: fcvt h0, s0
; NONEON-NOSVE-NEXT: str h0, [sp, #64]
; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64]
; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
; NONEON-NOSVE-NEXT: add sp, sp, #96
; NONEON-NOSVE-NEXT: ret
%op1 = load <16 x half>, ptr %a
%op2 = load <16 x half>, ptr %b
%res = fsub <16 x half> %op1, %op2
store <16 x half> %res, ptr %a
ret void
}
define <2 x float> @fsub_v2f32(<2 x float> %op1, <2 x float> %op2) {
; CHECK-LABEL: fsub_v2f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl2
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
; CHECK-NEXT: fsub z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: fsub_v2f32:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: sub sp, sp, #32
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8]
; NONEON-NOSVE-NEXT: ldp s1, s2, [sp, #8]
; NONEON-NOSVE-NEXT: ldr s0, [sp, #20]
; NONEON-NOSVE-NEXT: fsub s3, s2, s0
; NONEON-NOSVE-NEXT: ldr s0, [sp, #16]
; NONEON-NOSVE-NEXT: fsub s0, s1, s0
; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #24]
; NONEON-NOSVE-NEXT: ldr d0, [sp, #24]
; NONEON-NOSVE-NEXT: add sp, sp, #32
; NONEON-NOSVE-NEXT: ret
%res = fsub <2 x float> %op1, %op2
ret <2 x float> %res
}
define <4 x float> @fsub_v4f32(<4 x float> %op1, <4 x float> %op2) {
; CHECK-LABEL: fsub_v4f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl4
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
; CHECK-NEXT: fsub z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: fsub_v4f32:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]!
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48
; NONEON-NOSVE-NEXT: ldp s1, s2, [sp, #8]
; NONEON-NOSVE-NEXT: ldr s0, [sp, #28]
; NONEON-NOSVE-NEXT: fsub s3, s2, s0
; NONEON-NOSVE-NEXT: ldr s0, [sp, #24]
; NONEON-NOSVE-NEXT: fsub s0, s1, s0
; NONEON-NOSVE-NEXT: ldp s1, s2, [sp]
; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #40]
; NONEON-NOSVE-NEXT: ldr s0, [sp, #20]
; NONEON-NOSVE-NEXT: fsub s3, s2, s0
; NONEON-NOSVE-NEXT: ldr s0, [sp, #16]
; NONEON-NOSVE-NEXT: fsub s0, s1, s0
; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #32]
; NONEON-NOSVE-NEXT: ldr q0, [sp, #32]
; NONEON-NOSVE-NEXT: add sp, sp, #48
; NONEON-NOSVE-NEXT: ret
%res = fsub <4 x float> %op1, %op2
ret <4 x float> %res
}
define void @fsub_v8f32(ptr %a, ptr %b) {
; CHECK-LABEL: fsub_v8f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ldp q0, q3, [x1]
; CHECK-NEXT: ptrue p0.s, vl4
; CHECK-NEXT: ldp q1, q2, [x0]
; CHECK-NEXT: fsubr z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: movprfx z1, z2
; CHECK-NEXT: fsub z1.s, p0/m, z1.s, z3.s
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: fsub_v8f32:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: sub sp, sp, #96
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96
; NONEON-NOSVE-NEXT: ldp q3, q0, [x1]
; NONEON-NOSVE-NEXT: ldp q2, q1, [x0]
; NONEON-NOSVE-NEXT: stp q2, q3, [sp]
; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32]
; NONEON-NOSVE-NEXT: ldp s1, s2, [sp, #40]
; NONEON-NOSVE-NEXT: ldr s0, [sp, #60]
; NONEON-NOSVE-NEXT: fsub s3, s2, s0
; NONEON-NOSVE-NEXT: ldr s0, [sp, #56]
; NONEON-NOSVE-NEXT: fsub s0, s1, s0
; NONEON-NOSVE-NEXT: ldp s1, s2, [sp, #32]
; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #88]
; NONEON-NOSVE-NEXT: ldr s0, [sp, #52]
; NONEON-NOSVE-NEXT: fsub s3, s2, s0
; NONEON-NOSVE-NEXT: ldr s0, [sp, #48]
; NONEON-NOSVE-NEXT: fsub s0, s1, s0
; NONEON-NOSVE-NEXT: ldp s1, s2, [sp, #8]
; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #80]
; NONEON-NOSVE-NEXT: ldr s0, [sp, #28]
; NONEON-NOSVE-NEXT: fsub s3, s2, s0
; NONEON-NOSVE-NEXT: ldr s0, [sp, #24]
; NONEON-NOSVE-NEXT: fsub s0, s1, s0
; NONEON-NOSVE-NEXT: ldp s1, s2, [sp]
; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #72]
; NONEON-NOSVE-NEXT: ldr s0, [sp, #20]
; NONEON-NOSVE-NEXT: fsub s3, s2, s0
; NONEON-NOSVE-NEXT: ldr s0, [sp, #16]
; NONEON-NOSVE-NEXT: fsub s0, s1, s0
; NONEON-NOSVE-NEXT: stp s0, s3, [sp, #64]
; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64]
; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
; NONEON-NOSVE-NEXT: add sp, sp, #96
; NONEON-NOSVE-NEXT: ret
%op1 = load <8 x float>, ptr %a
%op2 = load <8 x float>, ptr %b
%res = fsub <8 x float> %op1, %op2
store <8 x float> %res, ptr %a
ret void
}
define <2 x double> @fsub_v2f64(<2 x double> %op1, <2 x double> %op2) {
; CHECK-LABEL: fsub_v2f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl2
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
; CHECK-NEXT: fsub z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: fsub_v2f64:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]!
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48
; NONEON-NOSVE-NEXT: ldp d1, d2, [sp]
; NONEON-NOSVE-NEXT: ldr d0, [sp, #24]
; NONEON-NOSVE-NEXT: fsub d3, d2, d0
; NONEON-NOSVE-NEXT: ldr d0, [sp, #16]
; NONEON-NOSVE-NEXT: fsub d0, d1, d0
; NONEON-NOSVE-NEXT: stp d0, d3, [sp, #32]
; NONEON-NOSVE-NEXT: ldr q0, [sp, #32]
; NONEON-NOSVE-NEXT: add sp, sp, #48
; NONEON-NOSVE-NEXT: ret
%res = fsub <2 x double> %op1, %op2
ret <2 x double> %res
}
define void @fsub_v4f64(ptr %a, ptr %b) {
; CHECK-LABEL: fsub_v4f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ldp q0, q3, [x1]
; CHECK-NEXT: ptrue p0.d, vl2
; CHECK-NEXT: ldp q1, q2, [x0]
; CHECK-NEXT: fsubr z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: movprfx z1, z2
; CHECK-NEXT: fsub z1.d, p0/m, z1.d, z3.d
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: fsub_v4f64:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: sub sp, sp, #96
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96
; NONEON-NOSVE-NEXT: ldp q3, q0, [x1]
; NONEON-NOSVE-NEXT: ldp q2, q1, [x0]
; NONEON-NOSVE-NEXT: stp q2, q3, [sp]
; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32]
; NONEON-NOSVE-NEXT: ldp d1, d2, [sp, #32]
; NONEON-NOSVE-NEXT: ldr d0, [sp, #56]
; NONEON-NOSVE-NEXT: fsub d3, d2, d0
; NONEON-NOSVE-NEXT: ldr d0, [sp, #48]
; NONEON-NOSVE-NEXT: fsub d0, d1, d0
; NONEON-NOSVE-NEXT: ldp d1, d2, [sp]
; NONEON-NOSVE-NEXT: stp d0, d3, [sp, #80]
; NONEON-NOSVE-NEXT: ldr d0, [sp, #24]
; NONEON-NOSVE-NEXT: fsub d3, d2, d0
; NONEON-NOSVE-NEXT: ldr d0, [sp, #16]
; NONEON-NOSVE-NEXT: fsub d0, d1, d0
; NONEON-NOSVE-NEXT: stp d0, d3, [sp, #64]
; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64]
; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
; NONEON-NOSVE-NEXT: add sp, sp, #96
; NONEON-NOSVE-NEXT: ret
%op1 = load <4 x double>, ptr %a
%op2 = load <4 x double>, ptr %b
%res = fsub <4 x double> %op1, %op2
store <4 x double> %res, ptr %a
ret void
}
;
; FABS
;
define <2 x half> @fabs_v2f16(<2 x half> %op) {
; CHECK-LABEL: fabs_v2f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl4
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: fabs z0.h, p0/m, z0.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: fabs_v2f16:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: str d0, [sp, #-16]!
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
; NONEON-NOSVE-NEXT: ldr h0, [sp, #6]
; NONEON-NOSVE-NEXT: fmov w8, s0
; NONEON-NOSVE-NEXT: and w8, w8, #0x7fff
; NONEON-NOSVE-NEXT: fmov s0, w8
; NONEON-NOSVE-NEXT: str h0, [sp, #14]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #4]
; NONEON-NOSVE-NEXT: fmov w8, s0
; NONEON-NOSVE-NEXT: and w8, w8, #0x7fff
; NONEON-NOSVE-NEXT: fmov s0, w8
; NONEON-NOSVE-NEXT: str h0, [sp, #12]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #2]
; NONEON-NOSVE-NEXT: fmov w8, s0
; NONEON-NOSVE-NEXT: and w8, w8, #0x7fff
; NONEON-NOSVE-NEXT: fmov s0, w8
; NONEON-NOSVE-NEXT: str h0, [sp, #10]
; NONEON-NOSVE-NEXT: ldr h0, [sp]
; NONEON-NOSVE-NEXT: fmov w8, s0
; NONEON-NOSVE-NEXT: and w8, w8, #0x7fff
; NONEON-NOSVE-NEXT: fmov s0, w8
; NONEON-NOSVE-NEXT: str h0, [sp, #8]
; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
; NONEON-NOSVE-NEXT: add sp, sp, #16
; NONEON-NOSVE-NEXT: ret
%res = call <2 x half> @llvm.fabs.v2f16(<2 x half> %op)
ret <2 x half> %res
}
define <4 x half> @fabs_v4f16(<4 x half> %op) {
; CHECK-LABEL: fabs_v4f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl4
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: fabs z0.h, p0/m, z0.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: fabs_v4f16:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: str d0, [sp, #-16]!
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
; NONEON-NOSVE-NEXT: ldr h0, [sp, #6]
; NONEON-NOSVE-NEXT: fmov w8, s0
; NONEON-NOSVE-NEXT: and w8, w8, #0x7fff
; NONEON-NOSVE-NEXT: fmov s0, w8
; NONEON-NOSVE-NEXT: str h0, [sp, #14]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #4]
; NONEON-NOSVE-NEXT: fmov w8, s0
; NONEON-NOSVE-NEXT: and w8, w8, #0x7fff
; NONEON-NOSVE-NEXT: fmov s0, w8
; NONEON-NOSVE-NEXT: str h0, [sp, #12]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #2]
; NONEON-NOSVE-NEXT: fmov w8, s0
; NONEON-NOSVE-NEXT: and w8, w8, #0x7fff
; NONEON-NOSVE-NEXT: fmov s0, w8
; NONEON-NOSVE-NEXT: str h0, [sp, #10]
; NONEON-NOSVE-NEXT: ldr h0, [sp]
; NONEON-NOSVE-NEXT: fmov w8, s0
; NONEON-NOSVE-NEXT: and w8, w8, #0x7fff
; NONEON-NOSVE-NEXT: fmov s0, w8
; NONEON-NOSVE-NEXT: str h0, [sp, #8]
; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
; NONEON-NOSVE-NEXT: add sp, sp, #16
; NONEON-NOSVE-NEXT: ret
%res = call <4 x half> @llvm.fabs.v4f16(<4 x half> %op)
ret <4 x half> %res
}
define <8 x half> @fabs_v8f16(<8 x half> %op) {
; CHECK-LABEL: fabs_v8f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h, vl8
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: fabs z0.h, p0/m, z0.h
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: fabs_v8f16:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: str q0, [sp, #-32]!
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
; NONEON-NOSVE-NEXT: ldr h0, [sp, #14]
; NONEON-NOSVE-NEXT: fmov w8, s0
; NONEON-NOSVE-NEXT: and w8, w8, #0x7fff
; NONEON-NOSVE-NEXT: fmov s0, w8
; NONEON-NOSVE-NEXT: str h0, [sp, #30]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #12]
; NONEON-NOSVE-NEXT: fmov w8, s0
; NONEON-NOSVE-NEXT: and w8, w8, #0x7fff
; NONEON-NOSVE-NEXT: fmov s0, w8
; NONEON-NOSVE-NEXT: str h0, [sp, #28]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #10]
; NONEON-NOSVE-NEXT: fmov w8, s0
; NONEON-NOSVE-NEXT: and w8, w8, #0x7fff
; NONEON-NOSVE-NEXT: fmov s0, w8
; NONEON-NOSVE-NEXT: str h0, [sp, #26]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #8]
; NONEON-NOSVE-NEXT: fmov w8, s0
; NONEON-NOSVE-NEXT: and w8, w8, #0x7fff
; NONEON-NOSVE-NEXT: fmov s0, w8
; NONEON-NOSVE-NEXT: str h0, [sp, #24]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #6]
; NONEON-NOSVE-NEXT: fmov w8, s0
; NONEON-NOSVE-NEXT: and w8, w8, #0x7fff
; NONEON-NOSVE-NEXT: fmov s0, w8
; NONEON-NOSVE-NEXT: str h0, [sp, #22]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #4]
; NONEON-NOSVE-NEXT: fmov w8, s0
; NONEON-NOSVE-NEXT: and w8, w8, #0x7fff
; NONEON-NOSVE-NEXT: fmov s0, w8
; NONEON-NOSVE-NEXT: str h0, [sp, #20]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #2]
; NONEON-NOSVE-NEXT: fmov w8, s0
; NONEON-NOSVE-NEXT: and w8, w8, #0x7fff
; NONEON-NOSVE-NEXT: fmov s0, w8
; NONEON-NOSVE-NEXT: str h0, [sp, #18]
; NONEON-NOSVE-NEXT: ldr h0, [sp]
; NONEON-NOSVE-NEXT: fmov w8, s0
; NONEON-NOSVE-NEXT: and w8, w8, #0x7fff
; NONEON-NOSVE-NEXT: fmov s0, w8
; NONEON-NOSVE-NEXT: str h0, [sp, #16]
; NONEON-NOSVE-NEXT: ldr q0, [sp, #16]
; NONEON-NOSVE-NEXT: add sp, sp, #32
; NONEON-NOSVE-NEXT: ret
%res = call <8 x half> @llvm.fabs.v8f16(<8 x half> %op)
ret <8 x half> %res
}
define void @fabs_v16f16(ptr %a) {
; CHECK-LABEL: fabs_v16f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ldp q0, q1, [x0]
; CHECK-NEXT: ptrue p0.h, vl8
; CHECK-NEXT: fabs z0.h, p0/m, z0.h
; CHECK-NEXT: fabs z1.h, p0/m, z1.h
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: fabs_v16f16:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]!
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64
; NONEON-NOSVE-NEXT: ldr h0, [sp, #30]
; NONEON-NOSVE-NEXT: fmov w8, s0
; NONEON-NOSVE-NEXT: and w8, w8, #0x7fff
; NONEON-NOSVE-NEXT: fmov s0, w8
; NONEON-NOSVE-NEXT: str h0, [sp, #62]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #28]
; NONEON-NOSVE-NEXT: fmov w8, s0
; NONEON-NOSVE-NEXT: and w8, w8, #0x7fff
; NONEON-NOSVE-NEXT: fmov s0, w8
; NONEON-NOSVE-NEXT: str h0, [sp, #60]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #26]
; NONEON-NOSVE-NEXT: fmov w8, s0
; NONEON-NOSVE-NEXT: and w8, w8, #0x7fff
; NONEON-NOSVE-NEXT: fmov s0, w8
; NONEON-NOSVE-NEXT: str h0, [sp, #58]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #24]
; NONEON-NOSVE-NEXT: fmov w8, s0
; NONEON-NOSVE-NEXT: and w8, w8, #0x7fff
; NONEON-NOSVE-NEXT: fmov s0, w8
; NONEON-NOSVE-NEXT: str h0, [sp, #56]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #22]
; NONEON-NOSVE-NEXT: fmov w8, s0
; NONEON-NOSVE-NEXT: and w8, w8, #0x7fff
; NONEON-NOSVE-NEXT: fmov s0, w8
; NONEON-NOSVE-NEXT: str h0, [sp, #54]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #20]
; NONEON-NOSVE-NEXT: fmov w8, s0
; NONEON-NOSVE-NEXT: and w8, w8, #0x7fff
; NONEON-NOSVE-NEXT: fmov s0, w8
; NONEON-NOSVE-NEXT: str h0, [sp, #52]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #18]
; NONEON-NOSVE-NEXT: fmov w8, s0
; NONEON-NOSVE-NEXT: and w8, w8, #0x7fff
; NONEON-NOSVE-NEXT: fmov s0, w8
; NONEON-NOSVE-NEXT: str h0, [sp, #50]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #16]
; NONEON-NOSVE-NEXT: fmov w8, s0
; NONEON-NOSVE-NEXT: and w8, w8, #0x7fff
; NONEON-NOSVE-NEXT: fmov s0, w8
; NONEON-NOSVE-NEXT: str h0, [sp, #48]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #14]
; NONEON-NOSVE-NEXT: fmov w8, s0
; NONEON-NOSVE-NEXT: and w8, w8, #0x7fff
; NONEON-NOSVE-NEXT: fmov s0, w8
; NONEON-NOSVE-NEXT: str h0, [sp, #46]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #12]
; NONEON-NOSVE-NEXT: fmov w8, s0
; NONEON-NOSVE-NEXT: and w8, w8, #0x7fff
; NONEON-NOSVE-NEXT: fmov s0, w8
; NONEON-NOSVE-NEXT: str h0, [sp, #44]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #10]
; NONEON-NOSVE-NEXT: fmov w8, s0
; NONEON-NOSVE-NEXT: and w8, w8, #0x7fff
; NONEON-NOSVE-NEXT: fmov s0, w8
; NONEON-NOSVE-NEXT: str h0, [sp, #42]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #8]
; NONEON-NOSVE-NEXT: fmov w8, s0
; NONEON-NOSVE-NEXT: and w8, w8, #0x7fff
; NONEON-NOSVE-NEXT: fmov s0, w8
; NONEON-NOSVE-NEXT: str h0, [sp, #40]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #6]
; NONEON-NOSVE-NEXT: fmov w8, s0
; NONEON-NOSVE-NEXT: and w8, w8, #0x7fff
; NONEON-NOSVE-NEXT: fmov s0, w8
; NONEON-NOSVE-NEXT: str h0, [sp, #38]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #4]
; NONEON-NOSVE-NEXT: fmov w8, s0
; NONEON-NOSVE-NEXT: and w8, w8, #0x7fff
; NONEON-NOSVE-NEXT: fmov s0, w8
; NONEON-NOSVE-NEXT: str h0, [sp, #36]
; NONEON-NOSVE-NEXT: ldr h0, [sp, #2]
; NONEON-NOSVE-NEXT: fmov w8, s0
; NONEON-NOSVE-NEXT: and w8, w8, #0x7fff
; NONEON-NOSVE-NEXT: fmov s0, w8
; NONEON-NOSVE-NEXT: str h0, [sp, #34]
; NONEON-NOSVE-NEXT: ldr h0, [sp]
; NONEON-NOSVE-NEXT: fmov w8, s0
; NONEON-NOSVE-NEXT: and w8, w8, #0x7fff
; NONEON-NOSVE-NEXT: fmov s0, w8
; NONEON-NOSVE-NEXT: str h0, [sp, #32]
; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32]
; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
; NONEON-NOSVE-NEXT: add sp, sp, #64
; NONEON-NOSVE-NEXT: ret
%op = load <16 x half>, ptr %a
%res = call <16 x half> @llvm.fabs.v16f16(<16 x half> %op)
store <16 x half> %res, ptr %a
ret void
}
define <2 x float> @fabs_v2f32(<2 x float> %op) {
; CHECK-LABEL: fabs_v2f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl2
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: fabs z0.s, p0/m, z0.s
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: fabs_v2f32:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: str d0, [sp, #-16]!
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
; NONEON-NOSVE-NEXT: ldr s0, [sp, #4]
; NONEON-NOSVE-NEXT: fabs s1, s0
; NONEON-NOSVE-NEXT: ldr s0, [sp]
; NONEON-NOSVE-NEXT: fabs s0, s0
; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #8]
; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
; NONEON-NOSVE-NEXT: add sp, sp, #16
; NONEON-NOSVE-NEXT: ret
%res = call <2 x float> @llvm.fabs.v2f32(<2 x float> %op)
ret <2 x float> %res
}
define <4 x float> @fabs_v4f32(<4 x float> %op) {
; CHECK-LABEL: fabs_v4f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s, vl4
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: fabs z0.s, p0/m, z0.s
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: fabs_v4f32:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: str q0, [sp, #-32]!
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
; NONEON-NOSVE-NEXT: ldr s0, [sp, #12]
; NONEON-NOSVE-NEXT: fabs s1, s0
; NONEON-NOSVE-NEXT: ldr s0, [sp, #8]
; NONEON-NOSVE-NEXT: fabs s0, s0
; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #24]
; NONEON-NOSVE-NEXT: ldr s0, [sp, #4]
; NONEON-NOSVE-NEXT: fabs s1, s0
; NONEON-NOSVE-NEXT: ldr s0, [sp]
; NONEON-NOSVE-NEXT: fabs s0, s0
; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #16]
; NONEON-NOSVE-NEXT: ldr q0, [sp, #16]
; NONEON-NOSVE-NEXT: add sp, sp, #32
; NONEON-NOSVE-NEXT: ret
%res = call <4 x float> @llvm.fabs.v4f32(<4 x float> %op)
ret <4 x float> %res
}
define void @fabs_v8f32(ptr %a) {
; CHECK-LABEL: fabs_v8f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ldp q0, q1, [x0]
; CHECK-NEXT: ptrue p0.s, vl4
; CHECK-NEXT: fabs z0.s, p0/m, z0.s
; CHECK-NEXT: fabs z1.s, p0/m, z1.s
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: fabs_v8f32:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]!
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64
; NONEON-NOSVE-NEXT: ldr s0, [sp, #28]
; NONEON-NOSVE-NEXT: fabs s1, s0
; NONEON-NOSVE-NEXT: ldr s0, [sp, #24]
; NONEON-NOSVE-NEXT: fabs s0, s0
; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #56]
; NONEON-NOSVE-NEXT: ldr s0, [sp, #20]
; NONEON-NOSVE-NEXT: fabs s1, s0
; NONEON-NOSVE-NEXT: ldr s0, [sp, #16]
; NONEON-NOSVE-NEXT: fabs s0, s0
; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #48]
; NONEON-NOSVE-NEXT: ldr s0, [sp, #12]
; NONEON-NOSVE-NEXT: fabs s1, s0
; NONEON-NOSVE-NEXT: ldr s0, [sp, #8]
; NONEON-NOSVE-NEXT: fabs s0, s0
; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #40]
; NONEON-NOSVE-NEXT: ldr s0, [sp, #4]
; NONEON-NOSVE-NEXT: fabs s1, s0
; NONEON-NOSVE-NEXT: ldr s0, [sp]
; NONEON-NOSVE-NEXT: fabs s0, s0
; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #32]
; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32]
; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
; NONEON-NOSVE-NEXT: add sp, sp, #64
; NONEON-NOSVE-NEXT: ret
%op = load <8 x float>, ptr %a
%res = call <8 x float> @llvm.fabs.v8f32(<8 x float> %op)
store <8 x float> %res, ptr %a
ret void
}
define <2 x double> @fabs_v2f64(<2 x double> %op) {
; CHECK-LABEL: fabs_v2f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d, vl2
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: fabs z0.d, p0/m, z0.d
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: fabs_v2f64:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: str q0, [sp, #-32]!
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
; NONEON-NOSVE-NEXT: fabs d1, d0
; NONEON-NOSVE-NEXT: ldr d0, [sp]
; NONEON-NOSVE-NEXT: fabs d0, d0
; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #16]
; NONEON-NOSVE-NEXT: ldr q0, [sp, #16]
; NONEON-NOSVE-NEXT: add sp, sp, #32
; NONEON-NOSVE-NEXT: ret
%res = call <2 x double> @llvm.fabs.v2f64(<2 x double> %op)
ret <2 x double> %res
}
define void @fabs_v4f64(ptr %a) {
; CHECK-LABEL: fabs_v4f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ldp q0, q1, [x0]
; CHECK-NEXT: ptrue p0.d, vl2
; CHECK-NEXT: fabs z0.d, p0/m, z0.d
; CHECK-NEXT: fabs z1.d, p0/m, z1.d
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: fabs_v4f64:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-64]!
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64
; NONEON-NOSVE-NEXT: ldr d0, [sp, #24]
; NONEON-NOSVE-NEXT: fabs d1, d0
; NONEON-NOSVE-NEXT: ldr d0, [sp, #16]
; NONEON-NOSVE-NEXT: fabs d0, d0
; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #48]
; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
; NONEON-NOSVE-NEXT: fabs d1, d0
; NONEON-NOSVE-NEXT: ldr d0, [sp]
; NONEON-NOSVE-NEXT: fabs d0, d0
; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #32]
; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32]
; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
; NONEON-NOSVE-NEXT: add sp, sp, #64
; NONEON-NOSVE-NEXT: ret
%op = load <4 x double>, ptr %a
%res = call <4 x double> @llvm.fabs.v4f64(<4 x double> %op)
store <4 x double> %res, ptr %a
ret void
}
declare <2 x half> @llvm.fma.v2f16(<2 x half>, <2 x half>, <2 x half>)
declare <4 x half> @llvm.fma.v4f16(<4 x half>, <4 x half>, <4 x half>)
declare <8 x half> @llvm.fma.v8f16(<8 x half>, <8 x half>, <8 x half>)
declare <16 x half> @llvm.fma.v16f16(<16 x half>, <16 x half>, <16 x half>)
declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>)
declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>)
declare <8 x float> @llvm.fma.v8f32(<8 x float>, <8 x float>, <8 x float>)
declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>)
declare <4 x double> @llvm.fma.v4f64(<4 x double>, <4 x double>, <4 x double>)
declare <2 x half> @llvm.sqrt.v2f16(<2 x half>)
declare <4 x half> @llvm.sqrt.v4f16(<4 x half>)
declare <8 x half> @llvm.sqrt.v8f16(<8 x half>)
declare <16 x half> @llvm.sqrt.v16f16(<16 x half>)
declare <2 x float> @llvm.sqrt.v2f32(<2 x float>)
declare <4 x float> @llvm.sqrt.v4f32(<4 x float>)
declare <8 x float> @llvm.sqrt.v8f32(<8 x float>)
declare <2 x double> @llvm.sqrt.v2f64(<2 x double>)
declare <4 x double> @llvm.sqrt.v4f64(<4 x double>)
declare <2 x half> @llvm.fabs.v2f16(<2 x half>)
declare <4 x half> @llvm.fabs.v4f16(<4 x half>)
declare <8 x half> @llvm.fabs.v8f16(<8 x half>)
declare <16 x half> @llvm.fabs.v16f16(<16 x half>)
declare <2 x float> @llvm.fabs.v2f32(<2 x float>)
declare <4 x float> @llvm.fabs.v4f32(<4 x float>)
declare <8 x float> @llvm.fabs.v8f32(<8 x float>)
declare <2 x double> @llvm.fabs.v2f64(<2 x double>)
declare <4 x double> @llvm.fabs.v4f64(<4 x double>)