
The patch adds patterns to select the EXT_ZZI_CONSTRUCTIVE pseudo instead of the EXT_ZZI destructive instruction for vector_splice. This only works when the two inputs to vector_splice are identical. Given that registers aren't tied anymore, this gives the register allocator more freedom and a lot of MOVs get replaced with MOVPRFX. In some cases however, we could have just chosen the same input and output register, but regalloc preferred not to. This means we end up with some test cases now having more instructions: there is now a MOVPRFX while no MOV was previously needed.
1152 lines
39 KiB
LLVM
1152 lines
39 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
|
|
; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s
|
|
; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
|
|
|
|
target triple = "aarch64-unknown-linux-gnu"
|
|
|
|
;
|
|
; FCVT H -> S; Without load instr
|
|
;
|
|
|
|
define void @fcvt_v2f16_to_v2f32(<2 x half> %a, ptr %b) {
|
|
; CHECK-LABEL: fcvt_v2f16_to_v2f32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
|
|
; CHECK-NEXT: ptrue p0.s, vl4
|
|
; CHECK-NEXT: uunpklo z0.s, z0.h
|
|
; CHECK-NEXT: fcvt z0.s, p0/m, z0.h
|
|
; CHECK-NEXT: str d0, [x0]
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; NONEON-NOSVE-LABEL: fcvt_v2f16_to_v2f32:
|
|
; NONEON-NOSVE: // %bb.0:
|
|
; NONEON-NOSVE-NEXT: str d0, [sp, #-16]!
|
|
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
|
|
; NONEON-NOSVE-NEXT: ldr h0, [sp, #2]
|
|
; NONEON-NOSVE-NEXT: fcvt s1, h0
|
|
; NONEON-NOSVE-NEXT: ldr h0, [sp]
|
|
; NONEON-NOSVE-NEXT: fcvt s0, h0
|
|
; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #8]
|
|
; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
|
|
; NONEON-NOSVE-NEXT: str d0, [x0]
|
|
; NONEON-NOSVE-NEXT: add sp, sp, #16
|
|
; NONEON-NOSVE-NEXT: ret
|
|
%res = fpext <2 x half> %a to <2 x float>
|
|
store <2 x float> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @fcvt_v4f16_to_v4f32(<4 x half> %a, ptr %b) {
|
|
; CHECK-LABEL: fcvt_v4f16_to_v4f32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
|
|
; CHECK-NEXT: ptrue p0.s, vl4
|
|
; CHECK-NEXT: uunpklo z0.s, z0.h
|
|
; CHECK-NEXT: fcvt z0.s, p0/m, z0.h
|
|
; CHECK-NEXT: str q0, [x0]
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; NONEON-NOSVE-LABEL: fcvt_v4f16_to_v4f32:
|
|
; NONEON-NOSVE: // %bb.0:
|
|
; NONEON-NOSVE-NEXT: sub sp, sp, #32
|
|
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
|
|
; NONEON-NOSVE-NEXT: str d0, [sp, #8]
|
|
; NONEON-NOSVE-NEXT: ldr h0, [sp, #14]
|
|
; NONEON-NOSVE-NEXT: fcvt s1, h0
|
|
; NONEON-NOSVE-NEXT: ldr h0, [sp, #12]
|
|
; NONEON-NOSVE-NEXT: fcvt s0, h0
|
|
; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #24]
|
|
; NONEON-NOSVE-NEXT: ldr h0, [sp, #10]
|
|
; NONEON-NOSVE-NEXT: fcvt s1, h0
|
|
; NONEON-NOSVE-NEXT: ldr h0, [sp, #8]
|
|
; NONEON-NOSVE-NEXT: fcvt s0, h0
|
|
; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: ldr q0, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: str q0, [x0]
|
|
; NONEON-NOSVE-NEXT: add sp, sp, #32
|
|
; NONEON-NOSVE-NEXT: ret
|
|
%res = fpext <4 x half> %a to <4 x float>
|
|
store <4 x float> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @fcvt_v8f16_to_v8f32(<8 x half> %a, ptr %b) {
|
|
; CHECK-LABEL: fcvt_v8f16_to_v8f32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
|
|
; CHECK-NEXT: movprfx z1, z0
|
|
; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8
|
|
; CHECK-NEXT: ptrue p0.s, vl4
|
|
; CHECK-NEXT: uunpklo z0.s, z0.h
|
|
; CHECK-NEXT: uunpklo z1.s, z1.h
|
|
; CHECK-NEXT: fcvt z0.s, p0/m, z0.h
|
|
; CHECK-NEXT: fcvt z1.s, p0/m, z1.h
|
|
; CHECK-NEXT: stp q0, q1, [x0]
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; NONEON-NOSVE-LABEL: fcvt_v8f16_to_v8f32:
|
|
; NONEON-NOSVE: // %bb.0:
|
|
; NONEON-NOSVE-NEXT: str q0, [sp, #-64]!
|
|
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64
|
|
; NONEON-NOSVE-NEXT: ldp d1, d0, [sp]
|
|
; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: ldr h0, [sp, #30]
|
|
; NONEON-NOSVE-NEXT: fcvt s1, h0
|
|
; NONEON-NOSVE-NEXT: ldr h0, [sp, #28]
|
|
; NONEON-NOSVE-NEXT: fcvt s0, h0
|
|
; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #56]
|
|
; NONEON-NOSVE-NEXT: ldr h0, [sp, #26]
|
|
; NONEON-NOSVE-NEXT: fcvt s1, h0
|
|
; NONEON-NOSVE-NEXT: ldr h0, [sp, #24]
|
|
; NONEON-NOSVE-NEXT: fcvt s0, h0
|
|
; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #48]
|
|
; NONEON-NOSVE-NEXT: ldr h0, [sp, #22]
|
|
; NONEON-NOSVE-NEXT: fcvt s1, h0
|
|
; NONEON-NOSVE-NEXT: ldr h0, [sp, #20]
|
|
; NONEON-NOSVE-NEXT: fcvt s0, h0
|
|
; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #40]
|
|
; NONEON-NOSVE-NEXT: ldr h0, [sp, #18]
|
|
; NONEON-NOSVE-NEXT: fcvt s1, h0
|
|
; NONEON-NOSVE-NEXT: ldr h0, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: fcvt s0, h0
|
|
; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #32]
|
|
; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32]
|
|
; NONEON-NOSVE-NEXT: stp q1, q0, [x0]
|
|
; NONEON-NOSVE-NEXT: add sp, sp, #64
|
|
; NONEON-NOSVE-NEXT: ret
|
|
%res = fpext <8 x half> %a to <8 x float>
|
|
store <8 x float> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @fcvt_v16f16_to_v16f32(<16 x half> %a, ptr %b) {
|
|
; CHECK-LABEL: fcvt_v16f16_to_v16f32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
|
|
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
|
|
; CHECK-NEXT: movprfx z2, z1
|
|
; CHECK-NEXT: ext z2.b, z2.b, z1.b, #8
|
|
; CHECK-NEXT: movprfx z3, z0
|
|
; CHECK-NEXT: ext z3.b, z3.b, z0.b, #8
|
|
; CHECK-NEXT: ptrue p0.s, vl4
|
|
; CHECK-NEXT: uunpklo z1.s, z1.h
|
|
; CHECK-NEXT: uunpklo z0.s, z0.h
|
|
; CHECK-NEXT: uunpklo z2.s, z2.h
|
|
; CHECK-NEXT: uunpklo z3.s, z3.h
|
|
; CHECK-NEXT: fcvt z1.s, p0/m, z1.h
|
|
; CHECK-NEXT: fcvt z0.s, p0/m, z0.h
|
|
; CHECK-NEXT: fcvt z2.s, p0/m, z2.h
|
|
; CHECK-NEXT: fcvt z3.s, p0/m, z3.h
|
|
; CHECK-NEXT: stp q0, q3, [x0]
|
|
; CHECK-NEXT: stp q1, q2, [x0, #32]
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; NONEON-NOSVE-LABEL: fcvt_v16f16_to_v16f32:
|
|
; NONEON-NOSVE: // %bb.0:
|
|
; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-128]!
|
|
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 128
|
|
; NONEON-NOSVE-NEXT: ldp d1, d0, [sp]
|
|
; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #32]
|
|
; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #48]
|
|
; NONEON-NOSVE-NEXT: ldr h0, [sp, #46]
|
|
; NONEON-NOSVE-NEXT: fcvt s1, h0
|
|
; NONEON-NOSVE-NEXT: ldr h0, [sp, #44]
|
|
; NONEON-NOSVE-NEXT: fcvt s0, h0
|
|
; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #88]
|
|
; NONEON-NOSVE-NEXT: ldr h0, [sp, #42]
|
|
; NONEON-NOSVE-NEXT: fcvt s1, h0
|
|
; NONEON-NOSVE-NEXT: ldr h0, [sp, #40]
|
|
; NONEON-NOSVE-NEXT: fcvt s0, h0
|
|
; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #80]
|
|
; NONEON-NOSVE-NEXT: ldr h0, [sp, #38]
|
|
; NONEON-NOSVE-NEXT: fcvt s1, h0
|
|
; NONEON-NOSVE-NEXT: ldr h0, [sp, #36]
|
|
; NONEON-NOSVE-NEXT: fcvt s0, h0
|
|
; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #72]
|
|
; NONEON-NOSVE-NEXT: ldr h0, [sp, #34]
|
|
; NONEON-NOSVE-NEXT: fcvt s1, h0
|
|
; NONEON-NOSVE-NEXT: ldr h0, [sp, #32]
|
|
; NONEON-NOSVE-NEXT: fcvt s0, h0
|
|
; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #64]
|
|
; NONEON-NOSVE-NEXT: ldr h0, [sp, #62]
|
|
; NONEON-NOSVE-NEXT: ldp q3, q2, [sp, #64]
|
|
; NONEON-NOSVE-NEXT: fcvt s1, h0
|
|
; NONEON-NOSVE-NEXT: ldr h0, [sp, #60]
|
|
; NONEON-NOSVE-NEXT: fcvt s0, h0
|
|
; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #120]
|
|
; NONEON-NOSVE-NEXT: ldr h0, [sp, #58]
|
|
; NONEON-NOSVE-NEXT: fcvt s1, h0
|
|
; NONEON-NOSVE-NEXT: ldr h0, [sp, #56]
|
|
; NONEON-NOSVE-NEXT: fcvt s0, h0
|
|
; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #112]
|
|
; NONEON-NOSVE-NEXT: ldr h0, [sp, #54]
|
|
; NONEON-NOSVE-NEXT: fcvt s1, h0
|
|
; NONEON-NOSVE-NEXT: ldr h0, [sp, #52]
|
|
; NONEON-NOSVE-NEXT: fcvt s0, h0
|
|
; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #104]
|
|
; NONEON-NOSVE-NEXT: ldr h0, [sp, #50]
|
|
; NONEON-NOSVE-NEXT: fcvt s1, h0
|
|
; NONEON-NOSVE-NEXT: ldr h0, [sp, #48]
|
|
; NONEON-NOSVE-NEXT: fcvt s0, h0
|
|
; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #96]
|
|
; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #96]
|
|
; NONEON-NOSVE-NEXT: stp q2, q3, [x0]
|
|
; NONEON-NOSVE-NEXT: stp q1, q0, [x0, #32]
|
|
; NONEON-NOSVE-NEXT: add sp, sp, #128
|
|
; NONEON-NOSVE-NEXT: ret
|
|
%res = fpext <16 x half> %a to <16 x float>
|
|
store <16 x float> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
;
|
|
;
|
|
; FCVT H -> S
|
|
;
|
|
|
|
define void @fcvt_v2f16_v2f32(ptr %a, ptr %b) {
|
|
; CHECK-LABEL: fcvt_v2f16_v2f32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s, vl2
|
|
; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0]
|
|
; CHECK-NEXT: fcvt z0.s, p0/m, z0.h
|
|
; CHECK-NEXT: str d0, [x1]
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; NONEON-NOSVE-LABEL: fcvt_v2f16_v2f32:
|
|
; NONEON-NOSVE: // %bb.0:
|
|
; NONEON-NOSVE-NEXT: sub sp, sp, #32
|
|
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
|
|
; NONEON-NOSVE-NEXT: ldr w8, [x0]
|
|
; NONEON-NOSVE-NEXT: str w8, [sp, #8]
|
|
; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
|
|
; NONEON-NOSVE-NEXT: str d0, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: ldr h0, [sp, #18]
|
|
; NONEON-NOSVE-NEXT: fcvt s1, h0
|
|
; NONEON-NOSVE-NEXT: ldr h0, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: fcvt s0, h0
|
|
; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #24]
|
|
; NONEON-NOSVE-NEXT: ldr d0, [sp, #24]
|
|
; NONEON-NOSVE-NEXT: str d0, [x1]
|
|
; NONEON-NOSVE-NEXT: add sp, sp, #32
|
|
; NONEON-NOSVE-NEXT: ret
|
|
%op1 = load <2 x half>, ptr %a
|
|
%res = fpext <2 x half> %op1 to <2 x float>
|
|
store <2 x float> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @fcvt_v4f16_v4f32(ptr %a, ptr %b) {
|
|
; CHECK-LABEL: fcvt_v4f16_v4f32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s, vl4
|
|
; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0]
|
|
; CHECK-NEXT: fcvt z0.s, p0/m, z0.h
|
|
; CHECK-NEXT: str q0, [x1]
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; NONEON-NOSVE-LABEL: fcvt_v4f16_v4f32:
|
|
; NONEON-NOSVE: // %bb.0:
|
|
; NONEON-NOSVE-NEXT: sub sp, sp, #32
|
|
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
|
|
; NONEON-NOSVE-NEXT: ldr d0, [x0]
|
|
; NONEON-NOSVE-NEXT: str d0, [sp, #8]
|
|
; NONEON-NOSVE-NEXT: ldr h0, [sp, #14]
|
|
; NONEON-NOSVE-NEXT: fcvt s1, h0
|
|
; NONEON-NOSVE-NEXT: ldr h0, [sp, #12]
|
|
; NONEON-NOSVE-NEXT: fcvt s0, h0
|
|
; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #24]
|
|
; NONEON-NOSVE-NEXT: ldr h0, [sp, #10]
|
|
; NONEON-NOSVE-NEXT: fcvt s1, h0
|
|
; NONEON-NOSVE-NEXT: ldr h0, [sp, #8]
|
|
; NONEON-NOSVE-NEXT: fcvt s0, h0
|
|
; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: ldr q0, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: str q0, [x1]
|
|
; NONEON-NOSVE-NEXT: add sp, sp, #32
|
|
; NONEON-NOSVE-NEXT: ret
|
|
%op1 = load <4 x half>, ptr %a
|
|
%res = fpext <4 x half> %op1 to <4 x float>
|
|
store <4 x float> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @fcvt_v8f16_v8f32(ptr %a, ptr %b) {
|
|
; CHECK-LABEL: fcvt_v8f16_v8f32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s, vl4
|
|
; CHECK-NEXT: mov x8, #4 // =0x4
|
|
; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0, x8, lsl #1]
|
|
; CHECK-NEXT: ld1h { z1.s }, p0/z, [x0]
|
|
; CHECK-NEXT: fcvt z0.s, p0/m, z0.h
|
|
; CHECK-NEXT: fcvt z1.s, p0/m, z1.h
|
|
; CHECK-NEXT: stp q1, q0, [x1]
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; NONEON-NOSVE-LABEL: fcvt_v8f16_v8f32:
|
|
; NONEON-NOSVE: // %bb.0:
|
|
; NONEON-NOSVE-NEXT: ldr q0, [x0]
|
|
; NONEON-NOSVE-NEXT: str q0, [sp, #-64]!
|
|
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64
|
|
; NONEON-NOSVE-NEXT: ldp d1, d0, [sp]
|
|
; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: ldr h0, [sp, #30]
|
|
; NONEON-NOSVE-NEXT: fcvt s1, h0
|
|
; NONEON-NOSVE-NEXT: ldr h0, [sp, #28]
|
|
; NONEON-NOSVE-NEXT: fcvt s0, h0
|
|
; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #56]
|
|
; NONEON-NOSVE-NEXT: ldr h0, [sp, #26]
|
|
; NONEON-NOSVE-NEXT: fcvt s1, h0
|
|
; NONEON-NOSVE-NEXT: ldr h0, [sp, #24]
|
|
; NONEON-NOSVE-NEXT: fcvt s0, h0
|
|
; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #48]
|
|
; NONEON-NOSVE-NEXT: ldr h0, [sp, #22]
|
|
; NONEON-NOSVE-NEXT: fcvt s1, h0
|
|
; NONEON-NOSVE-NEXT: ldr h0, [sp, #20]
|
|
; NONEON-NOSVE-NEXT: fcvt s0, h0
|
|
; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #40]
|
|
; NONEON-NOSVE-NEXT: ldr h0, [sp, #18]
|
|
; NONEON-NOSVE-NEXT: fcvt s1, h0
|
|
; NONEON-NOSVE-NEXT: ldr h0, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: fcvt s0, h0
|
|
; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #32]
|
|
; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32]
|
|
; NONEON-NOSVE-NEXT: stp q1, q0, [x1]
|
|
; NONEON-NOSVE-NEXT: add sp, sp, #64
|
|
; NONEON-NOSVE-NEXT: ret
|
|
%op1 = load <8 x half>, ptr %a
|
|
%res = fpext <8 x half> %op1 to <8 x float>
|
|
store <8 x float> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @fcvt_v16f16_v16f32(ptr %a, ptr %b) {
|
|
; CHECK-LABEL: fcvt_v16f16_v16f32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s, vl4
|
|
; CHECK-NEXT: mov x8, #8 // =0x8
|
|
; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0, x8, lsl #1]
|
|
; CHECK-NEXT: mov x8, #12 // =0xc
|
|
; CHECK-NEXT: ld1h { z2.s }, p0/z, [x0]
|
|
; CHECK-NEXT: ld1h { z1.s }, p0/z, [x0, x8, lsl #1]
|
|
; CHECK-NEXT: mov x8, #4 // =0x4
|
|
; CHECK-NEXT: ld1h { z3.s }, p0/z, [x0, x8, lsl #1]
|
|
; CHECK-NEXT: fcvt z0.s, p0/m, z0.h
|
|
; CHECK-NEXT: fcvt z2.s, p0/m, z2.h
|
|
; CHECK-NEXT: fcvt z1.s, p0/m, z1.h
|
|
; CHECK-NEXT: fcvt z3.s, p0/m, z3.h
|
|
; CHECK-NEXT: stp q0, q1, [x1, #32]
|
|
; CHECK-NEXT: stp q2, q3, [x1]
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; NONEON-NOSVE-LABEL: fcvt_v16f16_v16f32:
|
|
; NONEON-NOSVE: // %bb.0:
|
|
; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
|
|
; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-128]!
|
|
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 128
|
|
; NONEON-NOSVE-NEXT: ldp d1, d0, [sp]
|
|
; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #32]
|
|
; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #48]
|
|
; NONEON-NOSVE-NEXT: ldr h0, [sp, #46]
|
|
; NONEON-NOSVE-NEXT: fcvt s1, h0
|
|
; NONEON-NOSVE-NEXT: ldr h0, [sp, #44]
|
|
; NONEON-NOSVE-NEXT: fcvt s0, h0
|
|
; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #88]
|
|
; NONEON-NOSVE-NEXT: ldr h0, [sp, #42]
|
|
; NONEON-NOSVE-NEXT: fcvt s1, h0
|
|
; NONEON-NOSVE-NEXT: ldr h0, [sp, #40]
|
|
; NONEON-NOSVE-NEXT: fcvt s0, h0
|
|
; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #80]
|
|
; NONEON-NOSVE-NEXT: ldr h0, [sp, #38]
|
|
; NONEON-NOSVE-NEXT: fcvt s1, h0
|
|
; NONEON-NOSVE-NEXT: ldr h0, [sp, #36]
|
|
; NONEON-NOSVE-NEXT: fcvt s0, h0
|
|
; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #72]
|
|
; NONEON-NOSVE-NEXT: ldr h0, [sp, #34]
|
|
; NONEON-NOSVE-NEXT: fcvt s1, h0
|
|
; NONEON-NOSVE-NEXT: ldr h0, [sp, #32]
|
|
; NONEON-NOSVE-NEXT: fcvt s0, h0
|
|
; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #64]
|
|
; NONEON-NOSVE-NEXT: ldr h0, [sp, #62]
|
|
; NONEON-NOSVE-NEXT: ldp q3, q2, [sp, #64]
|
|
; NONEON-NOSVE-NEXT: fcvt s1, h0
|
|
; NONEON-NOSVE-NEXT: ldr h0, [sp, #60]
|
|
; NONEON-NOSVE-NEXT: fcvt s0, h0
|
|
; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #120]
|
|
; NONEON-NOSVE-NEXT: ldr h0, [sp, #58]
|
|
; NONEON-NOSVE-NEXT: fcvt s1, h0
|
|
; NONEON-NOSVE-NEXT: ldr h0, [sp, #56]
|
|
; NONEON-NOSVE-NEXT: fcvt s0, h0
|
|
; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #112]
|
|
; NONEON-NOSVE-NEXT: ldr h0, [sp, #54]
|
|
; NONEON-NOSVE-NEXT: fcvt s1, h0
|
|
; NONEON-NOSVE-NEXT: ldr h0, [sp, #52]
|
|
; NONEON-NOSVE-NEXT: fcvt s0, h0
|
|
; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #104]
|
|
; NONEON-NOSVE-NEXT: ldr h0, [sp, #50]
|
|
; NONEON-NOSVE-NEXT: fcvt s1, h0
|
|
; NONEON-NOSVE-NEXT: ldr h0, [sp, #48]
|
|
; NONEON-NOSVE-NEXT: fcvt s0, h0
|
|
; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #96]
|
|
; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #96]
|
|
; NONEON-NOSVE-NEXT: stp q2, q3, [x1]
|
|
; NONEON-NOSVE-NEXT: stp q1, q0, [x1, #32]
|
|
; NONEON-NOSVE-NEXT: add sp, sp, #128
|
|
; NONEON-NOSVE-NEXT: ret
|
|
%op1 = load <16 x half>, ptr %a
|
|
%res = fpext <16 x half> %op1 to <16 x float>
|
|
store <16 x float> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
;
|
|
; FCVT H -> D
|
|
;
|
|
|
|
define void @fcvt_v1f16_v1f64(ptr %a, ptr %b) {
|
|
; CHECK-LABEL: fcvt_v1f16_v1f64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ldr h0, [x0]
|
|
; CHECK-NEXT: fcvt d0, h0
|
|
; CHECK-NEXT: str d0, [x1]
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; NONEON-NOSVE-LABEL: fcvt_v1f16_v1f64:
|
|
; NONEON-NOSVE: // %bb.0:
|
|
; NONEON-NOSVE-NEXT: sub sp, sp, #16
|
|
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
|
|
; NONEON-NOSVE-NEXT: ldr h0, [x0]
|
|
; NONEON-NOSVE-NEXT: fcvt d0, h0
|
|
; NONEON-NOSVE-NEXT: str d0, [sp, #8]
|
|
; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
|
|
; NONEON-NOSVE-NEXT: str d0, [x1]
|
|
; NONEON-NOSVE-NEXT: add sp, sp, #16
|
|
; NONEON-NOSVE-NEXT: ret
|
|
%op1 = load <1 x half>, ptr %a
|
|
%res = fpext <1 x half> %op1 to <1 x double>
|
|
store <1 x double> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @fcvt_v2f16_v2f64(ptr %a, ptr %b) {
|
|
; CHECK-LABEL: fcvt_v2f16_v2f64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d, vl2
|
|
; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0]
|
|
; CHECK-NEXT: fcvt z0.d, p0/m, z0.h
|
|
; CHECK-NEXT: str q0, [x1]
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; NONEON-NOSVE-LABEL: fcvt_v2f16_v2f64:
|
|
; NONEON-NOSVE: // %bb.0:
|
|
; NONEON-NOSVE-NEXT: ldr w8, [x0]
|
|
; NONEON-NOSVE-NEXT: str w8, [sp, #-32]!
|
|
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
|
|
; NONEON-NOSVE-NEXT: ldr d0, [sp]
|
|
; NONEON-NOSVE-NEXT: str d0, [sp, #8]
|
|
; NONEON-NOSVE-NEXT: ldr h0, [sp, #10]
|
|
; NONEON-NOSVE-NEXT: fcvt s0, h0
|
|
; NONEON-NOSVE-NEXT: fcvt d1, s0
|
|
; NONEON-NOSVE-NEXT: ldr h0, [sp, #8]
|
|
; NONEON-NOSVE-NEXT: fcvt s0, h0
|
|
; NONEON-NOSVE-NEXT: fcvt d0, s0
|
|
; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: ldr q0, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: str q0, [x1]
|
|
; NONEON-NOSVE-NEXT: add sp, sp, #32
|
|
; NONEON-NOSVE-NEXT: ret
|
|
%op1 = load <2 x half>, ptr %a
|
|
%res = fpext <2 x half> %op1 to <2 x double>
|
|
store <2 x double> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @fcvt_v4f16_v4f64(ptr %a, ptr %b) {
|
|
; CHECK-LABEL: fcvt_v4f16_v4f64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d, vl2
|
|
; CHECK-NEXT: mov x8, #2 // =0x2
|
|
; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0, x8, lsl #1]
|
|
; CHECK-NEXT: ld1h { z1.d }, p0/z, [x0]
|
|
; CHECK-NEXT: fcvt z0.d, p0/m, z0.h
|
|
; CHECK-NEXT: fcvt z1.d, p0/m, z1.h
|
|
; CHECK-NEXT: stp q1, q0, [x1]
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; NONEON-NOSVE-LABEL: fcvt_v4f16_v4f64:
|
|
; NONEON-NOSVE: // %bb.0:
|
|
; NONEON-NOSVE-NEXT: sub sp, sp, #48
|
|
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48
|
|
; NONEON-NOSVE-NEXT: ldr d0, [x0]
|
|
; NONEON-NOSVE-NEXT: str d0, [sp, #8]
|
|
; NONEON-NOSVE-NEXT: ldr h0, [sp, #10]
|
|
; NONEON-NOSVE-NEXT: fcvt s0, h0
|
|
; NONEON-NOSVE-NEXT: fcvt d1, s0
|
|
; NONEON-NOSVE-NEXT: ldr h0, [sp, #8]
|
|
; NONEON-NOSVE-NEXT: fcvt s0, h0
|
|
; NONEON-NOSVE-NEXT: fcvt d0, s0
|
|
; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #32]
|
|
; NONEON-NOSVE-NEXT: ldr h0, [sp, #14]
|
|
; NONEON-NOSVE-NEXT: fcvt s0, h0
|
|
; NONEON-NOSVE-NEXT: fcvt d1, s0
|
|
; NONEON-NOSVE-NEXT: ldr h0, [sp, #12]
|
|
; NONEON-NOSVE-NEXT: fcvt s0, h0
|
|
; NONEON-NOSVE-NEXT: fcvt d0, s0
|
|
; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: stp q1, q0, [x1]
|
|
; NONEON-NOSVE-NEXT: add sp, sp, #48
|
|
; NONEON-NOSVE-NEXT: ret
|
|
%op1 = load <4 x half>, ptr %a
|
|
%res = fpext <4 x half> %op1 to <4 x double>
|
|
store <4 x double> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @fcvt_v8f16_v8f64(ptr %a, ptr %b) {
|
|
; CHECK-LABEL: fcvt_v8f16_v8f64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d, vl2
|
|
; CHECK-NEXT: mov x8, #4 // =0x4
|
|
; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0, x8, lsl #1]
|
|
; CHECK-NEXT: mov x8, #6 // =0x6
|
|
; CHECK-NEXT: ld1h { z2.d }, p0/z, [x0]
|
|
; CHECK-NEXT: ld1h { z1.d }, p0/z, [x0, x8, lsl #1]
|
|
; CHECK-NEXT: mov x8, #2 // =0x2
|
|
; CHECK-NEXT: ld1h { z3.d }, p0/z, [x0, x8, lsl #1]
|
|
; CHECK-NEXT: fcvt z0.d, p0/m, z0.h
|
|
; CHECK-NEXT: fcvt z2.d, p0/m, z2.h
|
|
; CHECK-NEXT: fcvt z1.d, p0/m, z1.h
|
|
; CHECK-NEXT: fcvt z3.d, p0/m, z3.h
|
|
; CHECK-NEXT: stp q0, q1, [x1, #32]
|
|
; CHECK-NEXT: stp q2, q3, [x1]
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; NONEON-NOSVE-LABEL: fcvt_v8f16_v8f64:
|
|
; NONEON-NOSVE: // %bb.0:
|
|
; NONEON-NOSVE-NEXT: ldr q0, [x0]
|
|
; NONEON-NOSVE-NEXT: str q0, [sp, #-96]!
|
|
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96
|
|
; NONEON-NOSVE-NEXT: ldp d1, d0, [sp]
|
|
; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: ldr h0, [sp, #26]
|
|
; NONEON-NOSVE-NEXT: fcvt s0, h0
|
|
; NONEON-NOSVE-NEXT: fcvt d1, s0
|
|
; NONEON-NOSVE-NEXT: ldr h0, [sp, #24]
|
|
; NONEON-NOSVE-NEXT: fcvt s0, h0
|
|
; NONEON-NOSVE-NEXT: fcvt d0, s0
|
|
; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #80]
|
|
; NONEON-NOSVE-NEXT: ldr h0, [sp, #30]
|
|
; NONEON-NOSVE-NEXT: fcvt s0, h0
|
|
; NONEON-NOSVE-NEXT: fcvt d1, s0
|
|
; NONEON-NOSVE-NEXT: ldr h0, [sp, #28]
|
|
; NONEON-NOSVE-NEXT: fcvt s0, h0
|
|
; NONEON-NOSVE-NEXT: fcvt d0, s0
|
|
; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #64]
|
|
; NONEON-NOSVE-NEXT: ldr h0, [sp, #18]
|
|
; NONEON-NOSVE-NEXT: ldp q3, q2, [sp, #64]
|
|
; NONEON-NOSVE-NEXT: fcvt s0, h0
|
|
; NONEON-NOSVE-NEXT: fcvt d1, s0
|
|
; NONEON-NOSVE-NEXT: ldr h0, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: fcvt s0, h0
|
|
; NONEON-NOSVE-NEXT: fcvt d0, s0
|
|
; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #48]
|
|
; NONEON-NOSVE-NEXT: ldr h0, [sp, #22]
|
|
; NONEON-NOSVE-NEXT: fcvt s0, h0
|
|
; NONEON-NOSVE-NEXT: fcvt d1, s0
|
|
; NONEON-NOSVE-NEXT: ldr h0, [sp, #20]
|
|
; NONEON-NOSVE-NEXT: fcvt s0, h0
|
|
; NONEON-NOSVE-NEXT: fcvt d0, s0
|
|
; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #32]
|
|
; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32]
|
|
; NONEON-NOSVE-NEXT: stp q2, q3, [x1]
|
|
; NONEON-NOSVE-NEXT: stp q1, q0, [x1, #32]
|
|
; NONEON-NOSVE-NEXT: add sp, sp, #96
|
|
; NONEON-NOSVE-NEXT: ret
|
|
%op1 = load <8 x half>, ptr %a
|
|
%res = fpext <8 x half> %op1 to <8 x double>
|
|
store <8 x double> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @fcvt_v16f16_v16f64(ptr %a, ptr %b) {
|
|
; CHECK-LABEL: fcvt_v16f16_v16f64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d, vl2
|
|
; CHECK-NEXT: mov x8, #12 // =0xc
|
|
; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0, x8, lsl #1]
|
|
; CHECK-NEXT: mov x8, #14 // =0xe
|
|
; CHECK-NEXT: ld1h { z6.d }, p0/z, [x0]
|
|
; CHECK-NEXT: ld1h { z1.d }, p0/z, [x0, x8, lsl #1]
|
|
; CHECK-NEXT: mov x8, #8 // =0x8
|
|
; CHECK-NEXT: ld1h { z2.d }, p0/z, [x0, x8, lsl #1]
|
|
; CHECK-NEXT: mov x8, #10 // =0xa
|
|
; CHECK-NEXT: ld1h { z3.d }, p0/z, [x0, x8, lsl #1]
|
|
; CHECK-NEXT: mov x8, #4 // =0x4
|
|
; CHECK-NEXT: fcvt z0.d, p0/m, z0.h
|
|
; CHECK-NEXT: ld1h { z4.d }, p0/z, [x0, x8, lsl #1]
|
|
; CHECK-NEXT: mov x8, #6 // =0x6
|
|
; CHECK-NEXT: fcvt z1.d, p0/m, z1.h
|
|
; CHECK-NEXT: ld1h { z5.d }, p0/z, [x0, x8, lsl #1]
|
|
; CHECK-NEXT: fcvt z2.d, p0/m, z2.h
|
|
; CHECK-NEXT: mov x8, #2 // =0x2
|
|
; CHECK-NEXT: fcvt z3.d, p0/m, z3.h
|
|
; CHECK-NEXT: ld1h { z7.d }, p0/z, [x0, x8, lsl #1]
|
|
; CHECK-NEXT: fcvt z4.d, p0/m, z4.h
|
|
; CHECK-NEXT: stp q0, q1, [x1, #96]
|
|
; CHECK-NEXT: movprfx z0, z5
|
|
; CHECK-NEXT: fcvt z0.d, p0/m, z5.h
|
|
; CHECK-NEXT: movprfx z1, z6
|
|
; CHECK-NEXT: fcvt z1.d, p0/m, z6.h
|
|
; CHECK-NEXT: stp q2, q3, [x1, #64]
|
|
; CHECK-NEXT: movprfx z2, z7
|
|
; CHECK-NEXT: fcvt z2.d, p0/m, z7.h
|
|
; CHECK-NEXT: stp q4, q0, [x1, #32]
|
|
; CHECK-NEXT: stp q1, q2, [x1]
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; NONEON-NOSVE-LABEL: fcvt_v16f16_v16f64:
|
|
; NONEON-NOSVE: // %bb.0:
|
|
; NONEON-NOSVE-NEXT: sub sp, sp, #192
|
|
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 192
|
|
; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
|
|
; NONEON-NOSVE-NEXT: stp q0, q1, [sp]
|
|
; NONEON-NOSVE-NEXT: ldp d1, d0, [sp]
|
|
; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #32]
|
|
; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #48]
|
|
; NONEON-NOSVE-NEXT: ldr h0, [sp, #42]
|
|
; NONEON-NOSVE-NEXT: fcvt s0, h0
|
|
; NONEON-NOSVE-NEXT: fcvt d1, s0
|
|
; NONEON-NOSVE-NEXT: ldr h0, [sp, #40]
|
|
; NONEON-NOSVE-NEXT: fcvt s0, h0
|
|
; NONEON-NOSVE-NEXT: fcvt d0, s0
|
|
; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #112]
|
|
; NONEON-NOSVE-NEXT: ldr h0, [sp, #46]
|
|
; NONEON-NOSVE-NEXT: fcvt s0, h0
|
|
; NONEON-NOSVE-NEXT: fcvt d1, s0
|
|
; NONEON-NOSVE-NEXT: ldr h0, [sp, #44]
|
|
; NONEON-NOSVE-NEXT: fcvt s0, h0
|
|
; NONEON-NOSVE-NEXT: fcvt d0, s0
|
|
; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #96]
|
|
; NONEON-NOSVE-NEXT: ldr h0, [sp, #34]
|
|
; NONEON-NOSVE-NEXT: fcvt s0, h0
|
|
; NONEON-NOSVE-NEXT: fcvt d1, s0
|
|
; NONEON-NOSVE-NEXT: ldr h0, [sp, #32]
|
|
; NONEON-NOSVE-NEXT: fcvt s0, h0
|
|
; NONEON-NOSVE-NEXT: fcvt d0, s0
|
|
; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #80]
|
|
; NONEON-NOSVE-NEXT: ldr h0, [sp, #38]
|
|
; NONEON-NOSVE-NEXT: fcvt s0, h0
|
|
; NONEON-NOSVE-NEXT: fcvt d1, s0
|
|
; NONEON-NOSVE-NEXT: ldr h0, [sp, #36]
|
|
; NONEON-NOSVE-NEXT: fcvt s0, h0
|
|
; NONEON-NOSVE-NEXT: fcvt d0, s0
|
|
; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #64]
|
|
; NONEON-NOSVE-NEXT: ldr h0, [sp, #58]
|
|
; NONEON-NOSVE-NEXT: ldp q4, q3, [sp, #64]
|
|
; NONEON-NOSVE-NEXT: fcvt s0, h0
|
|
; NONEON-NOSVE-NEXT: fcvt d1, s0
|
|
; NONEON-NOSVE-NEXT: ldr h0, [sp, #56]
|
|
; NONEON-NOSVE-NEXT: fcvt s0, h0
|
|
; NONEON-NOSVE-NEXT: fcvt d0, s0
|
|
; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #176]
|
|
; NONEON-NOSVE-NEXT: ldr h0, [sp, #62]
|
|
; NONEON-NOSVE-NEXT: fcvt s0, h0
|
|
; NONEON-NOSVE-NEXT: fcvt d1, s0
|
|
; NONEON-NOSVE-NEXT: ldr h0, [sp, #60]
|
|
; NONEON-NOSVE-NEXT: fcvt s0, h0
|
|
; NONEON-NOSVE-NEXT: fcvt d0, s0
|
|
; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #160]
|
|
; NONEON-NOSVE-NEXT: ldr h0, [sp, #50]
|
|
; NONEON-NOSVE-NEXT: ldp q7, q6, [sp, #160]
|
|
; NONEON-NOSVE-NEXT: fcvt s0, h0
|
|
; NONEON-NOSVE-NEXT: fcvt d1, s0
|
|
; NONEON-NOSVE-NEXT: ldr h0, [sp, #48]
|
|
; NONEON-NOSVE-NEXT: fcvt s0, h0
|
|
; NONEON-NOSVE-NEXT: fcvt d0, s0
|
|
; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #144]
|
|
; NONEON-NOSVE-NEXT: ldr h0, [sp, #54]
|
|
; NONEON-NOSVE-NEXT: fcvt s0, h0
|
|
; NONEON-NOSVE-NEXT: fcvt d1, s0
|
|
; NONEON-NOSVE-NEXT: ldr h0, [sp, #52]
|
|
; NONEON-NOSVE-NEXT: fcvt s0, h0
|
|
; NONEON-NOSVE-NEXT: fcvt d0, s0
|
|
; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #128]
|
|
; NONEON-NOSVE-NEXT: ldp q1, q0, [sp, #96]
|
|
; NONEON-NOSVE-NEXT: ldp q2, q5, [sp, #128]
|
|
; NONEON-NOSVE-NEXT: stp q3, q4, [x1, #32]
|
|
; NONEON-NOSVE-NEXT: stp q6, q7, [x1, #64]
|
|
; NONEON-NOSVE-NEXT: stp q0, q1, [x1]
|
|
; NONEON-NOSVE-NEXT: stp q5, q2, [x1, #96]
|
|
; NONEON-NOSVE-NEXT: add sp, sp, #192
|
|
; NONEON-NOSVE-NEXT: ret
|
|
%op1 = load <16 x half>, ptr %a
|
|
%res = fpext <16 x half> %op1 to <16 x double>
|
|
store <16 x double> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
;
|
|
; FCVT S -> D
|
|
;
|
|
|
|
define void @fcvt_v1f32_v1f64(ptr %a, ptr %b) {
|
|
; CHECK-LABEL: fcvt_v1f32_v1f64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ldr s0, [x0]
|
|
; CHECK-NEXT: fcvt d0, s0
|
|
; CHECK-NEXT: str d0, [x1]
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; NONEON-NOSVE-LABEL: fcvt_v1f32_v1f64:
|
|
; NONEON-NOSVE: // %bb.0:
|
|
; NONEON-NOSVE-NEXT: ldr s0, [x0]
|
|
; NONEON-NOSVE-NEXT: fcvt d0, s0
|
|
; NONEON-NOSVE-NEXT: str d0, [x1]
|
|
; NONEON-NOSVE-NEXT: ret
|
|
%op1 = load <1 x float>, ptr %a
|
|
%res = fpext <1 x float> %op1 to <1 x double>
|
|
store <1 x double> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @fcvt_v2f32_v2f64(ptr %a, ptr %b) {
|
|
; CHECK-LABEL: fcvt_v2f32_v2f64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d, vl2
|
|
; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0]
|
|
; CHECK-NEXT: fcvt z0.d, p0/m, z0.s
|
|
; CHECK-NEXT: str q0, [x1]
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; NONEON-NOSVE-LABEL: fcvt_v2f32_v2f64:
|
|
; NONEON-NOSVE: // %bb.0:
|
|
; NONEON-NOSVE-NEXT: sub sp, sp, #32
|
|
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
|
|
; NONEON-NOSVE-NEXT: ldr d0, [x0]
|
|
; NONEON-NOSVE-NEXT: str d0, [sp, #8]
|
|
; NONEON-NOSVE-NEXT: ldr s0, [sp, #12]
|
|
; NONEON-NOSVE-NEXT: fcvt d1, s0
|
|
; NONEON-NOSVE-NEXT: ldr s0, [sp, #8]
|
|
; NONEON-NOSVE-NEXT: fcvt d0, s0
|
|
; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: ldr q0, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: str q0, [x1]
|
|
; NONEON-NOSVE-NEXT: add sp, sp, #32
|
|
; NONEON-NOSVE-NEXT: ret
|
|
%op1 = load <2 x float>, ptr %a
|
|
%res = fpext <2 x float> %op1 to <2 x double>
|
|
store <2 x double> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @fcvt_v4f32_v4f64(ptr %a, ptr %b) {
|
|
; CHECK-LABEL: fcvt_v4f32_v4f64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d, vl2
|
|
; CHECK-NEXT: mov x8, #2 // =0x2
|
|
; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0, x8, lsl #2]
|
|
; CHECK-NEXT: ld1w { z1.d }, p0/z, [x0]
|
|
; CHECK-NEXT: fcvt z0.d, p0/m, z0.s
|
|
; CHECK-NEXT: fcvt z1.d, p0/m, z1.s
|
|
; CHECK-NEXT: stp q1, q0, [x1]
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; NONEON-NOSVE-LABEL: fcvt_v4f32_v4f64:
|
|
; NONEON-NOSVE: // %bb.0:
|
|
; NONEON-NOSVE-NEXT: ldr q0, [x0]
|
|
; NONEON-NOSVE-NEXT: str q0, [sp, #-64]!
|
|
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64
|
|
; NONEON-NOSVE-NEXT: ldp d1, d0, [sp]
|
|
; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: ldr s0, [sp, #28]
|
|
; NONEON-NOSVE-NEXT: fcvt d1, s0
|
|
; NONEON-NOSVE-NEXT: ldr s0, [sp, #24]
|
|
; NONEON-NOSVE-NEXT: fcvt d0, s0
|
|
; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #48]
|
|
; NONEON-NOSVE-NEXT: ldr s0, [sp, #20]
|
|
; NONEON-NOSVE-NEXT: fcvt d1, s0
|
|
; NONEON-NOSVE-NEXT: ldr s0, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: fcvt d0, s0
|
|
; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #32]
|
|
; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #32]
|
|
; NONEON-NOSVE-NEXT: stp q1, q0, [x1]
|
|
; NONEON-NOSVE-NEXT: add sp, sp, #64
|
|
; NONEON-NOSVE-NEXT: ret
|
|
%op1 = load <4 x float>, ptr %a
|
|
%res = fpext <4 x float> %op1 to <4 x double>
|
|
store <4 x double> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @fcvt_v8f32_v8f64(ptr %a, ptr %b) {
|
|
; CHECK-LABEL: fcvt_v8f32_v8f64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d, vl2
|
|
; CHECK-NEXT: mov x8, #4 // =0x4
|
|
; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0, x8, lsl #2]
|
|
; CHECK-NEXT: mov x8, #6 // =0x6
|
|
; CHECK-NEXT: ld1w { z2.d }, p0/z, [x0]
|
|
; CHECK-NEXT: ld1w { z1.d }, p0/z, [x0, x8, lsl #2]
|
|
; CHECK-NEXT: mov x8, #2 // =0x2
|
|
; CHECK-NEXT: ld1w { z3.d }, p0/z, [x0, x8, lsl #2]
|
|
; CHECK-NEXT: fcvt z0.d, p0/m, z0.s
|
|
; CHECK-NEXT: fcvt z2.d, p0/m, z2.s
|
|
; CHECK-NEXT: fcvt z1.d, p0/m, z1.s
|
|
; CHECK-NEXT: fcvt z3.d, p0/m, z3.s
|
|
; CHECK-NEXT: stp q0, q1, [x1, #32]
|
|
; CHECK-NEXT: stp q2, q3, [x1]
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; NONEON-NOSVE-LABEL: fcvt_v8f32_v8f64:
|
|
; NONEON-NOSVE: // %bb.0:
|
|
; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
|
|
; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-128]!
|
|
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 128
|
|
; NONEON-NOSVE-NEXT: ldp d1, d0, [sp]
|
|
; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #32]
|
|
; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #48]
|
|
; NONEON-NOSVE-NEXT: ldr s0, [sp, #44]
|
|
; NONEON-NOSVE-NEXT: fcvt d1, s0
|
|
; NONEON-NOSVE-NEXT: ldr s0, [sp, #40]
|
|
; NONEON-NOSVE-NEXT: fcvt d0, s0
|
|
; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #80]
|
|
; NONEON-NOSVE-NEXT: ldr s0, [sp, #36]
|
|
; NONEON-NOSVE-NEXT: fcvt d1, s0
|
|
; NONEON-NOSVE-NEXT: ldr s0, [sp, #32]
|
|
; NONEON-NOSVE-NEXT: fcvt d0, s0
|
|
; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #64]
|
|
; NONEON-NOSVE-NEXT: ldr s0, [sp, #60]
|
|
; NONEON-NOSVE-NEXT: ldp q3, q2, [sp, #64]
|
|
; NONEON-NOSVE-NEXT: fcvt d1, s0
|
|
; NONEON-NOSVE-NEXT: ldr s0, [sp, #56]
|
|
; NONEON-NOSVE-NEXT: fcvt d0, s0
|
|
; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #112]
|
|
; NONEON-NOSVE-NEXT: ldr s0, [sp, #52]
|
|
; NONEON-NOSVE-NEXT: fcvt d1, s0
|
|
; NONEON-NOSVE-NEXT: ldr s0, [sp, #48]
|
|
; NONEON-NOSVE-NEXT: fcvt d0, s0
|
|
; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #96]
|
|
; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #96]
|
|
; NONEON-NOSVE-NEXT: stp q2, q3, [x1]
|
|
; NONEON-NOSVE-NEXT: stp q1, q0, [x1, #32]
|
|
; NONEON-NOSVE-NEXT: add sp, sp, #128
|
|
; NONEON-NOSVE-NEXT: ret
|
|
%op1 = load <8 x float>, ptr %a
|
|
%res = fpext <8 x float> %op1 to <8 x double>
|
|
store <8 x double> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
;
|
|
; FCVT S -> H
|
|
;
|
|
|
|
define void @fcvt_v2f32_v2f16(ptr %a, ptr %b) {
|
|
; CHECK-LABEL: fcvt_v2f32_v2f16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s, vl2
|
|
; CHECK-NEXT: ldr d0, [x0]
|
|
; CHECK-NEXT: fcvt z0.h, p0/m, z0.s
|
|
; CHECK-NEXT: st1h { z0.s }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; NONEON-NOSVE-LABEL: fcvt_v2f32_v2f16:
|
|
; NONEON-NOSVE: // %bb.0:
|
|
; NONEON-NOSVE-NEXT: sub sp, sp, #32
|
|
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
|
|
; NONEON-NOSVE-NEXT: ldr d0, [x0]
|
|
; NONEON-NOSVE-NEXT: str d0, [sp, #8]
|
|
; NONEON-NOSVE-NEXT: ldr s0, [sp, #12]
|
|
; NONEON-NOSVE-NEXT: fcvt h0, s0
|
|
; NONEON-NOSVE-NEXT: str h0, [sp, #18]
|
|
; NONEON-NOSVE-NEXT: ldr s0, [sp, #8]
|
|
; NONEON-NOSVE-NEXT: fcvt h0, s0
|
|
; NONEON-NOSVE-NEXT: str h0, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: ldr d0, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: str d0, [sp, #24]
|
|
; NONEON-NOSVE-NEXT: ldr w8, [sp, #24]
|
|
; NONEON-NOSVE-NEXT: str w8, [x1]
|
|
; NONEON-NOSVE-NEXT: add sp, sp, #32
|
|
; NONEON-NOSVE-NEXT: ret
|
|
%op1 = load <2 x float>, ptr %a
|
|
%res = fptrunc <2 x float> %op1 to <2 x half>
|
|
store <2 x half> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @fcvt_v4f32_v4f16(ptr %a, ptr %b) {
|
|
; CHECK-LABEL: fcvt_v4f32_v4f16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s, vl4
|
|
; CHECK-NEXT: ldr q0, [x0]
|
|
; CHECK-NEXT: fcvt z0.h, p0/m, z0.s
|
|
; CHECK-NEXT: st1h { z0.s }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; NONEON-NOSVE-LABEL: fcvt_v4f32_v4f16:
|
|
; NONEON-NOSVE: // %bb.0:
|
|
; NONEON-NOSVE-NEXT: ldr q0, [x0]
|
|
; NONEON-NOSVE-NEXT: str q0, [sp, #-32]!
|
|
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
|
|
; NONEON-NOSVE-NEXT: ldr s0, [sp, #12]
|
|
; NONEON-NOSVE-NEXT: fcvt h0, s0
|
|
; NONEON-NOSVE-NEXT: str h0, [sp, #30]
|
|
; NONEON-NOSVE-NEXT: ldr s0, [sp, #8]
|
|
; NONEON-NOSVE-NEXT: fcvt h0, s0
|
|
; NONEON-NOSVE-NEXT: str h0, [sp, #28]
|
|
; NONEON-NOSVE-NEXT: ldr s0, [sp, #4]
|
|
; NONEON-NOSVE-NEXT: fcvt h0, s0
|
|
; NONEON-NOSVE-NEXT: str h0, [sp, #26]
|
|
; NONEON-NOSVE-NEXT: ldr s0, [sp]
|
|
; NONEON-NOSVE-NEXT: fcvt h0, s0
|
|
; NONEON-NOSVE-NEXT: str h0, [sp, #24]
|
|
; NONEON-NOSVE-NEXT: ldr d0, [sp, #24]
|
|
; NONEON-NOSVE-NEXT: str d0, [x1]
|
|
; NONEON-NOSVE-NEXT: add sp, sp, #32
|
|
; NONEON-NOSVE-NEXT: ret
|
|
%op1 = load <4 x float>, ptr %a
|
|
%res = fptrunc <4 x float> %op1 to <4 x half>
|
|
store <4 x half> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @fcvt_v8f32_v8f16(ptr %a, ptr %b) {
|
|
; CHECK-LABEL: fcvt_v8f32_v8f16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ldp q1, q0, [x0]
|
|
; CHECK-NEXT: ptrue p0.s, vl4
|
|
; CHECK-NEXT: mov x8, #4 // =0x4
|
|
; CHECK-NEXT: fcvt z0.h, p0/m, z0.s
|
|
; CHECK-NEXT: fcvt z1.h, p0/m, z1.s
|
|
; CHECK-NEXT: st1h { z0.s }, p0, [x1, x8, lsl #1]
|
|
; CHECK-NEXT: st1h { z1.s }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; NONEON-NOSVE-LABEL: fcvt_v8f32_v8f16:
|
|
; NONEON-NOSVE: // %bb.0:
|
|
; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
|
|
; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-48]!
|
|
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48
|
|
; NONEON-NOSVE-NEXT: ldr s0, [sp, #28]
|
|
; NONEON-NOSVE-NEXT: fcvt h0, s0
|
|
; NONEON-NOSVE-NEXT: str h0, [sp, #46]
|
|
; NONEON-NOSVE-NEXT: ldr s0, [sp, #24]
|
|
; NONEON-NOSVE-NEXT: fcvt h0, s0
|
|
; NONEON-NOSVE-NEXT: str h0, [sp, #44]
|
|
; NONEON-NOSVE-NEXT: ldr s0, [sp, #20]
|
|
; NONEON-NOSVE-NEXT: fcvt h0, s0
|
|
; NONEON-NOSVE-NEXT: str h0, [sp, #42]
|
|
; NONEON-NOSVE-NEXT: ldr s0, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: fcvt h0, s0
|
|
; NONEON-NOSVE-NEXT: str h0, [sp, #40]
|
|
; NONEON-NOSVE-NEXT: ldr s0, [sp, #12]
|
|
; NONEON-NOSVE-NEXT: fcvt h0, s0
|
|
; NONEON-NOSVE-NEXT: str h0, [sp, #38]
|
|
; NONEON-NOSVE-NEXT: ldr s0, [sp, #8]
|
|
; NONEON-NOSVE-NEXT: fcvt h0, s0
|
|
; NONEON-NOSVE-NEXT: str h0, [sp, #36]
|
|
; NONEON-NOSVE-NEXT: ldr s0, [sp, #4]
|
|
; NONEON-NOSVE-NEXT: fcvt h0, s0
|
|
; NONEON-NOSVE-NEXT: str h0, [sp, #34]
|
|
; NONEON-NOSVE-NEXT: ldr s0, [sp]
|
|
; NONEON-NOSVE-NEXT: fcvt h0, s0
|
|
; NONEON-NOSVE-NEXT: str h0, [sp, #32]
|
|
; NONEON-NOSVE-NEXT: ldr q0, [sp, #32]
|
|
; NONEON-NOSVE-NEXT: str q0, [x1]
|
|
; NONEON-NOSVE-NEXT: add sp, sp, #48
|
|
; NONEON-NOSVE-NEXT: ret
|
|
%op1 = load <8 x float>, ptr %a
|
|
%res = fptrunc <8 x float> %op1 to <8 x half>
|
|
store <8 x half> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
;
|
|
; FCVT D -> H
|
|
;
|
|
|
|
define void @fcvt_v1f64_v1f16(ptr %a, ptr %b) {
|
|
; CHECK-LABEL: fcvt_v1f64_v1f16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d, vl1
|
|
; CHECK-NEXT: ldr d0, [x0]
|
|
; CHECK-NEXT: fcvt z0.h, p0/m, z0.d
|
|
; CHECK-NEXT: st1h { z0.d }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; NONEON-NOSVE-LABEL: fcvt_v1f64_v1f16:
|
|
; NONEON-NOSVE: // %bb.0:
|
|
; NONEON-NOSVE-NEXT: ldr d0, [x0]
|
|
; NONEON-NOSVE-NEXT: fcvt h0, d0
|
|
; NONEON-NOSVE-NEXT: str h0, [x1]
|
|
; NONEON-NOSVE-NEXT: ret
|
|
%op1 = load <1 x double>, ptr %a
|
|
%res = fptrunc <1 x double> %op1 to <1 x half>
|
|
store <1 x half> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @fcvt_v2f64_v2f16(ptr %a, ptr %b) {
|
|
; CHECK-LABEL: fcvt_v2f64_v2f16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d, vl2
|
|
; CHECK-NEXT: ldr q0, [x0]
|
|
; CHECK-NEXT: fcvt z0.h, p0/m, z0.d
|
|
; CHECK-NEXT: st1h { z0.d }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; NONEON-NOSVE-LABEL: fcvt_v2f64_v2f16:
|
|
; NONEON-NOSVE: // %bb.0:
|
|
; NONEON-NOSVE-NEXT: ldr q0, [x0]
|
|
; NONEON-NOSVE-NEXT: str q0, [sp, #-32]!
|
|
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
|
|
; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
|
|
; NONEON-NOSVE-NEXT: fcvt h0, d0
|
|
; NONEON-NOSVE-NEXT: str h0, [sp, #18]
|
|
; NONEON-NOSVE-NEXT: ldr d0, [sp]
|
|
; NONEON-NOSVE-NEXT: fcvt h0, d0
|
|
; NONEON-NOSVE-NEXT: str h0, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: ldr d0, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: str d0, [sp, #24]
|
|
; NONEON-NOSVE-NEXT: ldr w8, [sp, #24]
|
|
; NONEON-NOSVE-NEXT: str w8, [x1]
|
|
; NONEON-NOSVE-NEXT: add sp, sp, #32
|
|
; NONEON-NOSVE-NEXT: ret
|
|
%op1 = load <2 x double>, ptr %a
|
|
%res = fptrunc <2 x double> %op1 to <2 x half>
|
|
store <2 x half> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @fcvt_v4f64_v4f16(ptr %a, ptr %b) {
|
|
; CHECK-LABEL: fcvt_v4f64_v4f16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ldp q1, q0, [x0]
|
|
; CHECK-NEXT: ptrue p0.d, vl2
|
|
; CHECK-NEXT: mov x8, #2 // =0x2
|
|
; CHECK-NEXT: fcvt z0.h, p0/m, z0.d
|
|
; CHECK-NEXT: fcvt z1.h, p0/m, z1.d
|
|
; CHECK-NEXT: st1h { z0.d }, p0, [x1, x8, lsl #1]
|
|
; CHECK-NEXT: st1h { z1.d }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; NONEON-NOSVE-LABEL: fcvt_v4f64_v4f16:
|
|
; NONEON-NOSVE: // %bb.0:
|
|
; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
|
|
; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-48]!
|
|
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48
|
|
; NONEON-NOSVE-NEXT: ldr d0, [sp, #24]
|
|
; NONEON-NOSVE-NEXT: fcvt h0, d0
|
|
; NONEON-NOSVE-NEXT: str h0, [sp, #46]
|
|
; NONEON-NOSVE-NEXT: ldr d0, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: fcvt h0, d0
|
|
; NONEON-NOSVE-NEXT: str h0, [sp, #44]
|
|
; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
|
|
; NONEON-NOSVE-NEXT: fcvt h0, d0
|
|
; NONEON-NOSVE-NEXT: str h0, [sp, #42]
|
|
; NONEON-NOSVE-NEXT: ldr d0, [sp]
|
|
; NONEON-NOSVE-NEXT: fcvt h0, d0
|
|
; NONEON-NOSVE-NEXT: str h0, [sp, #40]
|
|
; NONEON-NOSVE-NEXT: ldr d0, [sp, #40]
|
|
; NONEON-NOSVE-NEXT: str d0, [x1]
|
|
; NONEON-NOSVE-NEXT: add sp, sp, #48
|
|
; NONEON-NOSVE-NEXT: ret
|
|
%op1 = load <4 x double>, ptr %a
|
|
%res = fptrunc <4 x double> %op1 to <4 x half>
|
|
store <4 x half> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
;
|
|
; FCVT D -> S
|
|
;
|
|
|
|
define void @fcvt_v1f64_v1f32(<1 x double> %op1, ptr %b) {
|
|
; CHECK-LABEL: fcvt_v1f64_v1f32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d, vl1
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
|
|
; CHECK-NEXT: fcvt z0.s, p0/m, z0.d
|
|
; CHECK-NEXT: st1w { z0.d }, p0, [x0]
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; NONEON-NOSVE-LABEL: fcvt_v1f64_v1f32:
|
|
; NONEON-NOSVE: // %bb.0:
|
|
; NONEON-NOSVE-NEXT: fcvt s0, d0
|
|
; NONEON-NOSVE-NEXT: str s0, [x0]
|
|
; NONEON-NOSVE-NEXT: ret
|
|
%res = fptrunc <1 x double> %op1 to <1 x float>
|
|
store <1 x float> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @fcvt_v2f64_v2f32(<2 x double> %op1, ptr %b) {
|
|
; CHECK-LABEL: fcvt_v2f64_v2f32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d, vl2
|
|
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
|
|
; CHECK-NEXT: fcvt z0.s, p0/m, z0.d
|
|
; CHECK-NEXT: st1w { z0.d }, p0, [x0]
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; NONEON-NOSVE-LABEL: fcvt_v2f64_v2f32:
|
|
; NONEON-NOSVE: // %bb.0:
|
|
; NONEON-NOSVE-NEXT: str q0, [sp, #-32]!
|
|
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
|
|
; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
|
|
; NONEON-NOSVE-NEXT: fcvt s1, d0
|
|
; NONEON-NOSVE-NEXT: ldr d0, [sp]
|
|
; NONEON-NOSVE-NEXT: fcvt s0, d0
|
|
; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #24]
|
|
; NONEON-NOSVE-NEXT: ldr d0, [sp, #24]
|
|
; NONEON-NOSVE-NEXT: str d0, [x0]
|
|
; NONEON-NOSVE-NEXT: add sp, sp, #32
|
|
; NONEON-NOSVE-NEXT: ret
|
|
%res = fptrunc <2 x double> %op1 to <2 x float>
|
|
store <2 x float> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @fcvt_v4f64_v4f32(ptr %a, ptr %b) {
|
|
; CHECK-LABEL: fcvt_v4f64_v4f32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ldp q1, q0, [x0]
|
|
; CHECK-NEXT: ptrue p0.d, vl2
|
|
; CHECK-NEXT: mov x8, #2 // =0x2
|
|
; CHECK-NEXT: fcvt z0.s, p0/m, z0.d
|
|
; CHECK-NEXT: fcvt z1.s, p0/m, z1.d
|
|
; CHECK-NEXT: st1w { z0.d }, p0, [x1, x8, lsl #2]
|
|
; CHECK-NEXT: st1w { z1.d }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
;
|
|
; NONEON-NOSVE-LABEL: fcvt_v4f64_v4f32:
|
|
; NONEON-NOSVE: // %bb.0:
|
|
; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
|
|
; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-48]!
|
|
; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48
|
|
; NONEON-NOSVE-NEXT: ldr d0, [sp, #24]
|
|
; NONEON-NOSVE-NEXT: fcvt s1, d0
|
|
; NONEON-NOSVE-NEXT: ldr d0, [sp, #16]
|
|
; NONEON-NOSVE-NEXT: fcvt s0, d0
|
|
; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #40]
|
|
; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
|
|
; NONEON-NOSVE-NEXT: fcvt s1, d0
|
|
; NONEON-NOSVE-NEXT: ldr d0, [sp]
|
|
; NONEON-NOSVE-NEXT: fcvt s0, d0
|
|
; NONEON-NOSVE-NEXT: stp s0, s1, [sp, #32]
|
|
; NONEON-NOSVE-NEXT: ldr q0, [sp, #32]
|
|
; NONEON-NOSVE-NEXT: str q0, [x1]
|
|
; NONEON-NOSVE-NEXT: add sp, sp, #48
|
|
; NONEON-NOSVE-NEXT: ret
|
|
%op1 = load <4 x double>, ptr %a
|
|
%res = fptrunc <4 x double> %op1 to <4 x float>
|
|
store <4 x float> %res, ptr %b
|
|
ret void
|
|
}
|