
The patch adds patterns to select the EXT_ZZI_CONSTRUCTIVE pseudo instead of the EXT_ZZI destructive instruction for vector_splice. This only works when the two inputs to vector_splice are identical. Given that registers aren't tied anymore, this gives the register allocator more freedom and a lot of MOVs get replaced with MOVPRFX. In some cases however, we could have just chosen the same input and output register, but regalloc preferred not to. This means we end up with some test cases now having more instructions: there is now a MOVPRFX while no MOV was previously needed.
1857 lines
59 KiB
LLVM
1857 lines
59 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -aarch64-sve-vector-bits-min=256 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256
|
|
; RUN: llc -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
|
|
; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
|
|
|
|
target triple = "aarch64-unknown-linux-gnu"
|
|
|
|
;
|
|
; UCVTF H -> H
|
|
;
|
|
|
|
; Don't use SVE for 64-bit vectors.
|
|
define <4 x half> @ucvtf_v4i16_v4f16(<4 x i16> %op1) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: ucvtf_v4i16_v4f16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ucvtf v0.4h, v0.4h
|
|
; CHECK-NEXT: ret
|
|
%res = uitofp <4 x i16> %op1 to <4 x half>
|
|
ret <4 x half> %res
|
|
}
|
|
|
|
; Don't use SVE for 128-bit vectors.
|
|
define void @ucvtf_v8i16_v8f16(ptr %a, ptr %b) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: ucvtf_v8i16_v8f16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ldr q0, [x0]
|
|
; CHECK-NEXT: ucvtf v0.8h, v0.8h
|
|
; CHECK-NEXT: str q0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <8 x i16>, ptr %a
|
|
%res = uitofp <8 x i16> %op1 to <8 x half>
|
|
store <8 x half> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @ucvtf_v16i16_v16f16(ptr %a, ptr %b) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: ucvtf_v16i16_v16f16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.h, vl16
|
|
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
|
|
; CHECK-NEXT: ucvtf z0.h, p0/m, z0.h
|
|
; CHECK-NEXT: st1h { z0.h }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <16 x i16>, ptr %a
|
|
%res = uitofp <16 x i16> %op1 to <16 x half>
|
|
store <16 x half> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @ucvtf_v32i16_v32f16(ptr %a, ptr %b) #0 {
|
|
; VBITS_GE_256-LABEL: ucvtf_v32i16_v32f16:
|
|
; VBITS_GE_256: // %bb.0:
|
|
; VBITS_GE_256-NEXT: ptrue p0.h, vl16
|
|
; VBITS_GE_256-NEXT: mov x8, #16 // =0x10
|
|
; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1]
|
|
; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0]
|
|
; VBITS_GE_256-NEXT: ucvtf z0.h, p0/m, z0.h
|
|
; VBITS_GE_256-NEXT: ucvtf z1.h, p0/m, z1.h
|
|
; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x1, x8, lsl #1]
|
|
; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x1]
|
|
; VBITS_GE_256-NEXT: ret
|
|
;
|
|
; VBITS_GE_512-LABEL: ucvtf_v32i16_v32f16:
|
|
; VBITS_GE_512: // %bb.0:
|
|
; VBITS_GE_512-NEXT: ptrue p0.h, vl32
|
|
; VBITS_GE_512-NEXT: ld1h { z0.h }, p0/z, [x0]
|
|
; VBITS_GE_512-NEXT: ucvtf z0.h, p0/m, z0.h
|
|
; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x1]
|
|
; VBITS_GE_512-NEXT: ret
|
|
%op1 = load <32 x i16>, ptr %a
|
|
%res = uitofp <32 x i16> %op1 to <32 x half>
|
|
store <32 x half> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @ucvtf_v64i16_v64f16(ptr %a, ptr %b) vscale_range(8,0) #0 {
|
|
; CHECK-LABEL: ucvtf_v64i16_v64f16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.h, vl64
|
|
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
|
|
; CHECK-NEXT: ucvtf z0.h, p0/m, z0.h
|
|
; CHECK-NEXT: st1h { z0.h }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <64 x i16>, ptr %a
|
|
%res = uitofp <64 x i16> %op1 to <64 x half>
|
|
store <64 x half> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @ucvtf_v128i16_v128f16(ptr %a, ptr %b) vscale_range(16,0) #0 {
|
|
; CHECK-LABEL: ucvtf_v128i16_v128f16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.h, vl128
|
|
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
|
|
; CHECK-NEXT: ucvtf z0.h, p0/m, z0.h
|
|
; CHECK-NEXT: st1h { z0.h }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <128 x i16>, ptr %a
|
|
%res = uitofp <128 x i16> %op1 to <128 x half>
|
|
store <128 x half> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
;
|
|
; UCVTF H -> S
|
|
;
|
|
|
|
; Don't use SVE for 64-bit vectors.
|
|
define <2 x float> @ucvtf_v2i16_v2f32(<2 x i16> %op1) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: ucvtf_v2i16_v2f32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: movi d1, #0x00ffff0000ffff
|
|
; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
|
|
; CHECK-NEXT: ucvtf v0.2s, v0.2s
|
|
; CHECK-NEXT: ret
|
|
%res = uitofp <2 x i16> %op1 to <2 x float>
|
|
ret <2 x float> %res
|
|
}
|
|
|
|
; Don't use SVE for 128-bit vectors.
|
|
define <4 x float> @ucvtf_v4i16_v4f32(<4 x i16> %op1) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: ucvtf_v4i16_v4f32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ushll v0.4s, v0.4h, #0
|
|
; CHECK-NEXT: ucvtf v0.4s, v0.4s
|
|
; CHECK-NEXT: ret
|
|
%res = uitofp <4 x i16> %op1 to <4 x float>
|
|
ret <4 x float> %res
|
|
}
|
|
|
|
define void @ucvtf_v8i16_v8f32(ptr %a, ptr %b) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: ucvtf_v8i16_v8f32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ldr q0, [x0]
|
|
; CHECK-NEXT: ptrue p0.s, vl8
|
|
; CHECK-NEXT: uunpklo z0.s, z0.h
|
|
; CHECK-NEXT: ucvtf z0.s, p0/m, z0.s
|
|
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <8 x i16>, ptr %a
|
|
%res = uitofp <8 x i16> %op1 to <8 x float>
|
|
store <8 x float> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @ucvtf_v16i16_v16f32(ptr %a, ptr %b) #0 {
|
|
; VBITS_GE_256-LABEL: ucvtf_v16i16_v16f32:
|
|
; VBITS_GE_256: // %bb.0:
|
|
; VBITS_GE_256-NEXT: ptrue p0.h, vl16
|
|
; VBITS_GE_256-NEXT: mov x8, #8 // =0x8
|
|
; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0]
|
|
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
|
|
; VBITS_GE_256-NEXT: movprfx z1, z0
|
|
; VBITS_GE_256-NEXT: ext z1.b, z1.b, z0.b, #16
|
|
; VBITS_GE_256-NEXT: uunpklo z0.s, z0.h
|
|
; VBITS_GE_256-NEXT: uunpklo z1.s, z1.h
|
|
; VBITS_GE_256-NEXT: ucvtf z0.s, p0/m, z0.s
|
|
; VBITS_GE_256-NEXT: ucvtf z1.s, p0/m, z1.s
|
|
; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x1]
|
|
; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x1, x8, lsl #2]
|
|
; VBITS_GE_256-NEXT: ret
|
|
;
|
|
; VBITS_GE_512-LABEL: ucvtf_v16i16_v16f32:
|
|
; VBITS_GE_512: // %bb.0:
|
|
; VBITS_GE_512-NEXT: ptrue p0.s, vl16
|
|
; VBITS_GE_512-NEXT: ld1h { z0.s }, p0/z, [x0]
|
|
; VBITS_GE_512-NEXT: ucvtf z0.s, p0/m, z0.s
|
|
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x1]
|
|
; VBITS_GE_512-NEXT: ret
|
|
%op1 = load <16 x i16>, ptr %a
|
|
%res = uitofp <16 x i16> %op1 to <16 x float>
|
|
store <16 x float> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @ucvtf_v32i16_v32f32(ptr %a, ptr %b) vscale_range(8,0) #0 {
|
|
; CHECK-LABEL: ucvtf_v32i16_v32f32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s, vl32
|
|
; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0]
|
|
; CHECK-NEXT: ucvtf z0.s, p0/m, z0.s
|
|
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <32 x i16>, ptr %a
|
|
%res = uitofp <32 x i16> %op1 to <32 x float>
|
|
store <32 x float> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @ucvtf_v64i16_v64f32(ptr %a, ptr %b) vscale_range(16,0) #0 {
|
|
; CHECK-LABEL: ucvtf_v64i16_v64f32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s, vl64
|
|
; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0]
|
|
; CHECK-NEXT: ucvtf z0.s, p0/m, z0.s
|
|
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <64 x i16>, ptr %a
|
|
%res = uitofp <64 x i16> %op1 to <64 x float>
|
|
store <64 x float> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
;
|
|
; UCVTF H -> D
|
|
;
|
|
|
|
; v1i16 is perfered to be widened to v4i16, which pushes the output into SVE types, so use SVE
|
|
define <1 x double> @ucvtf_v1i16_v1f64(<1 x i16> %op1) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: ucvtf_v1i16_v1f64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
|
|
; CHECK-NEXT: ptrue p0.d, vl4
|
|
; CHECK-NEXT: uunpklo z0.s, z0.h
|
|
; CHECK-NEXT: uunpklo z0.d, z0.s
|
|
; CHECK-NEXT: ucvtf z0.d, p0/m, z0.d
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
%res = uitofp <1 x i16> %op1 to <1 x double>
|
|
ret <1 x double> %res
|
|
}
|
|
|
|
; Don't use SVE for 128-bit vectors.
|
|
define <2 x double> @ucvtf_v2i16_v2f64(<2 x i16> %op1) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: ucvtf_v2i16_v2f64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: movi d1, #0x00ffff0000ffff
|
|
; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
|
|
; CHECK-NEXT: ushll v0.2d, v0.2s, #0
|
|
; CHECK-NEXT: ucvtf v0.2d, v0.2d
|
|
; CHECK-NEXT: ret
|
|
%res = uitofp <2 x i16> %op1 to <2 x double>
|
|
ret <2 x double> %res
|
|
}
|
|
|
|
define void @ucvtf_v4i16_v4f64(ptr %a, ptr %b) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: ucvtf_v4i16_v4f64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ldr d0, [x0]
|
|
; CHECK-NEXT: ptrue p0.d, vl4
|
|
; CHECK-NEXT: uunpklo z0.s, z0.h
|
|
; CHECK-NEXT: uunpklo z0.d, z0.s
|
|
; CHECK-NEXT: ucvtf z0.d, p0/m, z0.d
|
|
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <4 x i16>, ptr %a
|
|
%res = uitofp <4 x i16> %op1 to <4 x double>
|
|
store <4 x double> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @ucvtf_v8i16_v8f64(ptr %a, ptr %b) #0 {
|
|
; VBITS_GE_256-LABEL: ucvtf_v8i16_v8f64:
|
|
; VBITS_GE_256: // %bb.0:
|
|
; VBITS_GE_256-NEXT: ldr q0, [x0]
|
|
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
|
|
; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
|
|
; VBITS_GE_256-NEXT: ext v1.16b, v0.16b, v0.16b, #8
|
|
; VBITS_GE_256-NEXT: uunpklo z0.s, z0.h
|
|
; VBITS_GE_256-NEXT: uunpklo z1.s, z1.h
|
|
; VBITS_GE_256-NEXT: uunpklo z0.d, z0.s
|
|
; VBITS_GE_256-NEXT: uunpklo z1.d, z1.s
|
|
; VBITS_GE_256-NEXT: ucvtf z0.d, p0/m, z0.d
|
|
; VBITS_GE_256-NEXT: ucvtf z1.d, p0/m, z1.d
|
|
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x1]
|
|
; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x1, x8, lsl #3]
|
|
; VBITS_GE_256-NEXT: ret
|
|
;
|
|
; VBITS_GE_512-LABEL: ucvtf_v8i16_v8f64:
|
|
; VBITS_GE_512: // %bb.0:
|
|
; VBITS_GE_512-NEXT: ldr q0, [x0]
|
|
; VBITS_GE_512-NEXT: ptrue p0.d, vl8
|
|
; VBITS_GE_512-NEXT: uunpklo z0.s, z0.h
|
|
; VBITS_GE_512-NEXT: uunpklo z0.d, z0.s
|
|
; VBITS_GE_512-NEXT: ucvtf z0.d, p0/m, z0.d
|
|
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x1]
|
|
; VBITS_GE_512-NEXT: ret
|
|
%op1 = load <8 x i16>, ptr %a
|
|
%res = uitofp <8 x i16> %op1 to <8 x double>
|
|
store <8 x double> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @ucvtf_v16i16_v16f64(ptr %a, ptr %b) vscale_range(8,0) #0 {
|
|
; CHECK-LABEL: ucvtf_v16i16_v16f64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d, vl16
|
|
; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0]
|
|
; CHECK-NEXT: ucvtf z0.d, p0/m, z0.d
|
|
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <16 x i16>, ptr %a
|
|
%res = uitofp <16 x i16> %op1 to <16 x double>
|
|
store <16 x double> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @ucvtf_v32i16_v32f64(ptr %a, ptr %b) vscale_range(16,0) #0 {
|
|
; CHECK-LABEL: ucvtf_v32i16_v32f64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d, vl32
|
|
; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0]
|
|
; CHECK-NEXT: ucvtf z0.d, p0/m, z0.d
|
|
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <32 x i16>, ptr %a
|
|
%res = uitofp <32 x i16> %op1 to <32 x double>
|
|
store <32 x double> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
;
|
|
; UCVTF S -> H
|
|
;
|
|
|
|
; Don't use SVE for 64-bit vectors.
|
|
define <2 x half> @ucvtf_v2i32_v2f16(<2 x i32> %op1) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: ucvtf_v2i32_v2f16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
|
|
; CHECK-NEXT: ucvtf v0.4s, v0.4s
|
|
; CHECK-NEXT: fcvtn v0.4h, v0.4s
|
|
; CHECK-NEXT: ret
|
|
%res = uitofp <2 x i32> %op1 to <2 x half>
|
|
ret <2 x half> %res
|
|
}
|
|
|
|
; Don't use SVE for 128-bit vectors.
|
|
define <4 x half> @ucvtf_v4i32_v4f16(<4 x i32> %op1) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: ucvtf_v4i32_v4f16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ucvtf v0.4s, v0.4s
|
|
; CHECK-NEXT: fcvtn v0.4h, v0.4s
|
|
; CHECK-NEXT: ret
|
|
%res = uitofp <4 x i32> %op1 to <4 x half>
|
|
ret <4 x half> %res
|
|
}
|
|
|
|
define <8 x half> @ucvtf_v8i32_v8f16(ptr %a) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: ucvtf_v8i32_v8f16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s, vl8
|
|
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
|
|
; CHECK-NEXT: ucvtf z0.h, p0/m, z0.s
|
|
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
|
|
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <8 x i32>, ptr %a
|
|
%res = uitofp <8 x i32> %op1 to <8 x half>
|
|
ret <8 x half> %res
|
|
}
|
|
|
|
define void @ucvtf_v16i32_v16f16(ptr %a, ptr %b) #0 {
|
|
; VBITS_GE_256-LABEL: ucvtf_v16i32_v16f16:
|
|
; VBITS_GE_256: // %bb.0:
|
|
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
|
|
; VBITS_GE_256-NEXT: mov x8, #8 // =0x8
|
|
; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
|
|
; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0]
|
|
; VBITS_GE_256-NEXT: ucvtf z0.h, p0/m, z0.s
|
|
; VBITS_GE_256-NEXT: ucvtf z1.h, p0/m, z1.s
|
|
; VBITS_GE_256-NEXT: ptrue p0.h, vl8
|
|
; VBITS_GE_256-NEXT: uzp1 z0.h, z0.h, z0.h
|
|
; VBITS_GE_256-NEXT: uzp1 z1.h, z1.h, z1.h
|
|
; VBITS_GE_256-NEXT: splice z1.h, p0, z1.h, z0.h
|
|
; VBITS_GE_256-NEXT: ptrue p0.h, vl16
|
|
; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x1]
|
|
; VBITS_GE_256-NEXT: ret
|
|
;
|
|
; VBITS_GE_512-LABEL: ucvtf_v16i32_v16f16:
|
|
; VBITS_GE_512: // %bb.0:
|
|
; VBITS_GE_512-NEXT: ptrue p0.s, vl16
|
|
; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0]
|
|
; VBITS_GE_512-NEXT: ucvtf z0.h, p0/m, z0.s
|
|
; VBITS_GE_512-NEXT: ptrue p0.h, vl16
|
|
; VBITS_GE_512-NEXT: uzp1 z0.h, z0.h, z0.h
|
|
; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x1]
|
|
; VBITS_GE_512-NEXT: ret
|
|
%op1 = load <16 x i32>, ptr %a
|
|
%res = uitofp <16 x i32> %op1 to <16 x half>
|
|
store <16 x half> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @ucvtf_v32i32_v32f16(ptr %a, ptr %b) vscale_range(8,0) #0 {
|
|
; CHECK-LABEL: ucvtf_v32i32_v32f16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s, vl32
|
|
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
|
|
; CHECK-NEXT: ucvtf z0.h, p0/m, z0.s
|
|
; CHECK-NEXT: ptrue p0.h, vl32
|
|
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
|
|
; CHECK-NEXT: st1h { z0.h }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <32 x i32>, ptr %a
|
|
%res = uitofp <32 x i32> %op1 to <32 x half>
|
|
store <32 x half> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @ucvtf_v64i32_v64f16(ptr %a, ptr %b) vscale_range(16,0) #0 {
|
|
; CHECK-LABEL: ucvtf_v64i32_v64f16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s, vl64
|
|
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
|
|
; CHECK-NEXT: ucvtf z0.h, p0/m, z0.s
|
|
; CHECK-NEXT: ptrue p0.h, vl64
|
|
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
|
|
; CHECK-NEXT: st1h { z0.h }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <64 x i32>, ptr %a
|
|
%res = uitofp <64 x i32> %op1 to <64 x half>
|
|
store <64 x half> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
;
|
|
; UCVTF S -> S
|
|
;
|
|
|
|
; Don't use SVE for 64-bit vectors.
|
|
define <2 x float> @ucvtf_v2i32_v2f32(<2 x i32> %op1) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: ucvtf_v2i32_v2f32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ucvtf v0.2s, v0.2s
|
|
; CHECK-NEXT: ret
|
|
%res = uitofp <2 x i32> %op1 to <2 x float>
|
|
ret <2 x float> %res
|
|
}
|
|
|
|
; Don't use SVE for 128-bit vectors.
|
|
define <4 x float> @ucvtf_v4i32_v4f32(<4 x i32> %op1) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: ucvtf_v4i32_v4f32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ucvtf v0.4s, v0.4s
|
|
; CHECK-NEXT: ret
|
|
%res = uitofp <4 x i32> %op1 to <4 x float>
|
|
ret <4 x float> %res
|
|
}
|
|
|
|
define void @ucvtf_v8i32_v8f32(ptr %a, ptr %b) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: ucvtf_v8i32_v8f32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s, vl8
|
|
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
|
|
; CHECK-NEXT: ucvtf z0.s, p0/m, z0.s
|
|
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <8 x i32>, ptr %a
|
|
%res = uitofp <8 x i32> %op1 to <8 x float>
|
|
store <8 x float> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @ucvtf_v16i32_v16f32(ptr %a, ptr %b) #0 {
|
|
; VBITS_GE_256-LABEL: ucvtf_v16i32_v16f32:
|
|
; VBITS_GE_256: // %bb.0:
|
|
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
|
|
; VBITS_GE_256-NEXT: mov x8, #8 // =0x8
|
|
; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
|
|
; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0]
|
|
; VBITS_GE_256-NEXT: ucvtf z0.s, p0/m, z0.s
|
|
; VBITS_GE_256-NEXT: ucvtf z1.s, p0/m, z1.s
|
|
; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x1, x8, lsl #2]
|
|
; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x1]
|
|
; VBITS_GE_256-NEXT: ret
|
|
;
|
|
; VBITS_GE_512-LABEL: ucvtf_v16i32_v16f32:
|
|
; VBITS_GE_512: // %bb.0:
|
|
; VBITS_GE_512-NEXT: ptrue p0.s, vl16
|
|
; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0]
|
|
; VBITS_GE_512-NEXT: ucvtf z0.s, p0/m, z0.s
|
|
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x1]
|
|
; VBITS_GE_512-NEXT: ret
|
|
%op1 = load <16 x i32>, ptr %a
|
|
%res = uitofp <16 x i32> %op1 to <16 x float>
|
|
store <16 x float> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @ucvtf_v32i32_v32f32(ptr %a, ptr %b) vscale_range(8,0) #0 {
|
|
; CHECK-LABEL: ucvtf_v32i32_v32f32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s, vl32
|
|
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
|
|
; CHECK-NEXT: ucvtf z0.s, p0/m, z0.s
|
|
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <32 x i32>, ptr %a
|
|
%res = uitofp <32 x i32> %op1 to <32 x float>
|
|
store <32 x float> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @ucvtf_v64i32_v64f32(ptr %a, ptr %b) vscale_range(16,0) #0 {
|
|
; CHECK-LABEL: ucvtf_v64i32_v64f32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s, vl64
|
|
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
|
|
; CHECK-NEXT: ucvtf z0.s, p0/m, z0.s
|
|
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <64 x i32>, ptr %a
|
|
%res = uitofp <64 x i32> %op1 to <64 x float>
|
|
store <64 x float> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
;
|
|
; UCVTF S -> D
|
|
;
|
|
|
|
; Don't use SVE for 64-bit vectors.
|
|
define <1 x double> @ucvtf_v1i32_v1f64(<1 x i32> %op1) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: ucvtf_v1i32_v1f64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ushll v0.2d, v0.2s, #0
|
|
; CHECK-NEXT: ucvtf v0.2d, v0.2d
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
|
|
; CHECK-NEXT: ret
|
|
%res = uitofp <1 x i32> %op1 to <1 x double>
|
|
ret <1 x double> %res
|
|
}
|
|
|
|
; Don't use SVE for 128-bit vectors.
|
|
define <2 x double> @ucvtf_v2i32_v2f64(<2 x i32> %op1) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: ucvtf_v2i32_v2f64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ushll v0.2d, v0.2s, #0
|
|
; CHECK-NEXT: ucvtf v0.2d, v0.2d
|
|
; CHECK-NEXT: ret
|
|
%res = uitofp <2 x i32> %op1 to <2 x double>
|
|
ret <2 x double> %res
|
|
}
|
|
|
|
define void @ucvtf_v4i32_v4f64(ptr %a, ptr %b) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: ucvtf_v4i32_v4f64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ldr q0, [x0]
|
|
; CHECK-NEXT: ptrue p0.d, vl4
|
|
; CHECK-NEXT: uunpklo z0.d, z0.s
|
|
; CHECK-NEXT: ucvtf z0.d, p0/m, z0.d
|
|
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <4 x i32>, ptr %a
|
|
%res = uitofp <4 x i32> %op1 to <4 x double>
|
|
store <4 x double> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @ucvtf_v8i32_v8f64(ptr %a, ptr %b) #0 {
|
|
; VBITS_GE_256-LABEL: ucvtf_v8i32_v8f64:
|
|
; VBITS_GE_256: // %bb.0:
|
|
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
|
|
; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
|
|
; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0]
|
|
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
|
|
; VBITS_GE_256-NEXT: movprfx z1, z0
|
|
; VBITS_GE_256-NEXT: ext z1.b, z1.b, z0.b, #16
|
|
; VBITS_GE_256-NEXT: uunpklo z0.d, z0.s
|
|
; VBITS_GE_256-NEXT: uunpklo z1.d, z1.s
|
|
; VBITS_GE_256-NEXT: ucvtf z0.d, p0/m, z0.d
|
|
; VBITS_GE_256-NEXT: ucvtf z1.d, p0/m, z1.d
|
|
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x1]
|
|
; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x1, x8, lsl #3]
|
|
; VBITS_GE_256-NEXT: ret
|
|
;
|
|
; VBITS_GE_512-LABEL: ucvtf_v8i32_v8f64:
|
|
; VBITS_GE_512: // %bb.0:
|
|
; VBITS_GE_512-NEXT: ptrue p0.d, vl8
|
|
; VBITS_GE_512-NEXT: ld1w { z0.d }, p0/z, [x0]
|
|
; VBITS_GE_512-NEXT: ucvtf z0.d, p0/m, z0.d
|
|
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x1]
|
|
; VBITS_GE_512-NEXT: ret
|
|
%op1 = load <8 x i32>, ptr %a
|
|
%res = uitofp <8 x i32> %op1 to <8 x double>
|
|
store <8 x double> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @ucvtf_v16i32_v16f64(ptr %a, ptr %b) vscale_range(8,0) #0 {
|
|
; CHECK-LABEL: ucvtf_v16i32_v16f64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d, vl16
|
|
; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0]
|
|
; CHECK-NEXT: ucvtf z0.d, p0/m, z0.d
|
|
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <16 x i32>, ptr %a
|
|
%res = uitofp <16 x i32> %op1 to <16 x double>
|
|
store <16 x double> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @ucvtf_v32i32_v32f64(ptr %a, ptr %b) vscale_range(16,0) #0 {
|
|
; CHECK-LABEL: ucvtf_v32i32_v32f64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d, vl32
|
|
; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0]
|
|
; CHECK-NEXT: ucvtf z0.d, p0/m, z0.d
|
|
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <32 x i32>, ptr %a
|
|
%res = uitofp <32 x i32> %op1 to <32 x double>
|
|
store <32 x double> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
;
|
|
; UCVTF D -> H
|
|
;
|
|
|
|
; Don't use SVE for 64-bit vectors.
|
|
define <1 x half> @ucvtf_v1i64_v1f16(<1 x i64> %op1) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: ucvtf_v1i64_v1f16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
|
|
; CHECK-NEXT: fmov x8, d0
|
|
; CHECK-NEXT: ucvtf h0, x8
|
|
; CHECK-NEXT: ret
|
|
%res = uitofp <1 x i64> %op1 to <1 x half>
|
|
ret <1 x half> %res
|
|
}
|
|
|
|
; v2f16 is not legal for NEON, so use SVE
|
|
define <2 x half> @ucvtf_v2i64_v2f16(<2 x i64> %op1) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: ucvtf_v2i64_v2f16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d, vl4
|
|
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
|
|
; CHECK-NEXT: ucvtf z0.h, p0/m, z0.d
|
|
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
|
|
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
%res = uitofp <2 x i64> %op1 to <2 x half>
|
|
ret <2 x half> %res
|
|
}
|
|
|
|
define <4 x half> @ucvtf_v4i64_v4f16(ptr %a) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: ucvtf_v4i64_v4f16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d, vl4
|
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
|
; CHECK-NEXT: ucvtf z0.h, p0/m, z0.d
|
|
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
|
|
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <4 x i64>, ptr %a
|
|
%res = uitofp <4 x i64> %op1 to <4 x half>
|
|
ret <4 x half> %res
|
|
}
|
|
|
|
define <8 x half> @ucvtf_v8i64_v8f16(ptr %a) #0 {
|
|
; VBITS_GE_256-LABEL: ucvtf_v8i64_v8f16:
|
|
; VBITS_GE_256: // %bb.0:
|
|
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
|
|
; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
|
|
; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
|
|
; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0]
|
|
; VBITS_GE_256-NEXT: ucvtf z0.h, p0/m, z0.d
|
|
; VBITS_GE_256-NEXT: ucvtf z1.h, p0/m, z1.d
|
|
; VBITS_GE_256-NEXT: uzp1 z0.s, z0.s, z0.s
|
|
; VBITS_GE_256-NEXT: uzp1 z1.s, z1.s, z1.s
|
|
; VBITS_GE_256-NEXT: uzp1 z2.h, z0.h, z0.h
|
|
; VBITS_GE_256-NEXT: uzp1 z0.h, z1.h, z1.h
|
|
; VBITS_GE_256-NEXT: mov v0.d[1], v2.d[0]
|
|
; VBITS_GE_256-NEXT: // kill: def $q0 killed $q0 killed $z0
|
|
; VBITS_GE_256-NEXT: ret
|
|
;
|
|
; VBITS_GE_512-LABEL: ucvtf_v8i64_v8f16:
|
|
; VBITS_GE_512: // %bb.0:
|
|
; VBITS_GE_512-NEXT: ptrue p0.d, vl8
|
|
; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0]
|
|
; VBITS_GE_512-NEXT: ucvtf z0.h, p0/m, z0.d
|
|
; VBITS_GE_512-NEXT: uzp1 z0.s, z0.s, z0.s
|
|
; VBITS_GE_512-NEXT: uzp1 z0.h, z0.h, z0.h
|
|
; VBITS_GE_512-NEXT: // kill: def $q0 killed $q0 killed $z0
|
|
; VBITS_GE_512-NEXT: ret
|
|
%op1 = load <8 x i64>, ptr %a
|
|
%res = uitofp <8 x i64> %op1 to <8 x half>
|
|
ret <8 x half> %res
|
|
}
|
|
|
|
define void @ucvtf_v16i64_v16f16(ptr %a, ptr %b) vscale_range(8,0) #0 {
|
|
; CHECK-LABEL: ucvtf_v16i64_v16f16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d, vl16
|
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
|
; CHECK-NEXT: ucvtf z0.h, p0/m, z0.d
|
|
; CHECK-NEXT: ptrue p0.s, vl16
|
|
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
|
|
; CHECK-NEXT: st1h { z0.s }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <16 x i64>, ptr %a
|
|
%res = uitofp <16 x i64> %op1 to <16 x half>
|
|
store <16 x half> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @ucvtf_v32i64_v32f16(ptr %a, ptr %b) vscale_range(16,0) #0 {
|
|
; CHECK-LABEL: ucvtf_v32i64_v32f16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d, vl32
|
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
|
; CHECK-NEXT: ucvtf z0.h, p0/m, z0.d
|
|
; CHECK-NEXT: ptrue p0.s, vl32
|
|
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
|
|
; CHECK-NEXT: st1h { z0.s }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <32 x i64>, ptr %a
|
|
%res = uitofp <32 x i64> %op1 to <32 x half>
|
|
store <32 x half> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
;
|
|
; UCVTF D -> S
|
|
;
|
|
|
|
; Don't use SVE for 64-bit vectors.
|
|
define <1 x float> @ucvtf_v1i64_v1f32(<1 x i64> %op1) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: ucvtf_v1i64_v1f32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
|
|
; CHECK-NEXT: fmov x8, d0
|
|
; CHECK-NEXT: movi d1, #0000000000000000
|
|
; CHECK-NEXT: ucvtf s0, x8
|
|
; CHECK-NEXT: mov v1.s[0], v0.s[0]
|
|
; CHECK-NEXT: fmov d0, d1
|
|
; CHECK-NEXT: ret
|
|
%res = uitofp <1 x i64> %op1 to <1 x float>
|
|
ret <1 x float> %res
|
|
}
|
|
|
|
; Don't use SVE for 128-bit vectors.
|
|
define <2 x float> @ucvtf_v2i64_v2f32(<2 x i64> %op1) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: ucvtf_v2i64_v2f32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: mov x8, v0.d[1]
|
|
; CHECK-NEXT: fmov x9, d0
|
|
; CHECK-NEXT: ucvtf s0, x9
|
|
; CHECK-NEXT: ucvtf s1, x8
|
|
; CHECK-NEXT: mov v0.s[1], v1.s[0]
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
|
|
; CHECK-NEXT: ret
|
|
%res = uitofp <2 x i64> %op1 to <2 x float>
|
|
ret <2 x float> %res
|
|
}
|
|
|
|
define <4 x float> @ucvtf_v4i64_v4f32(ptr %a) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: ucvtf_v4i64_v4f32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d, vl4
|
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
|
; CHECK-NEXT: ucvtf z0.s, p0/m, z0.d
|
|
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
|
|
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <4 x i64>, ptr %a
|
|
%res = uitofp <4 x i64> %op1 to <4 x float>
|
|
ret <4 x float> %res
|
|
}
|
|
|
|
define void @ucvtf_v8i64_v8f32(ptr %a, ptr %b) #0 {
|
|
; VBITS_GE_256-LABEL: ucvtf_v8i64_v8f32:
|
|
; VBITS_GE_256: // %bb.0:
|
|
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
|
|
; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
|
|
; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
|
|
; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0]
|
|
; VBITS_GE_256-NEXT: ucvtf z0.s, p0/m, z0.d
|
|
; VBITS_GE_256-NEXT: ucvtf z1.s, p0/m, z1.d
|
|
; VBITS_GE_256-NEXT: ptrue p0.s, vl4
|
|
; VBITS_GE_256-NEXT: uzp1 z0.s, z0.s, z0.s
|
|
; VBITS_GE_256-NEXT: uzp1 z1.s, z1.s, z1.s
|
|
; VBITS_GE_256-NEXT: splice z1.s, p0, z1.s, z0.s
|
|
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
|
|
; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x1]
|
|
; VBITS_GE_256-NEXT: ret
|
|
;
|
|
; VBITS_GE_512-LABEL: ucvtf_v8i64_v8f32:
|
|
; VBITS_GE_512: // %bb.0:
|
|
; VBITS_GE_512-NEXT: ptrue p0.d, vl8
|
|
; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0]
|
|
; VBITS_GE_512-NEXT: ucvtf z0.s, p0/m, z0.d
|
|
; VBITS_GE_512-NEXT: ptrue p0.s, vl8
|
|
; VBITS_GE_512-NEXT: uzp1 z0.s, z0.s, z0.s
|
|
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x1]
|
|
; VBITS_GE_512-NEXT: ret
|
|
%op1 = load <8 x i64>, ptr %a
|
|
%res = uitofp <8 x i64> %op1 to <8 x float>
|
|
store <8 x float> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @ucvtf_v16i64_v16f32(ptr %a, ptr %b) vscale_range(8,0) #0 {
|
|
; CHECK-LABEL: ucvtf_v16i64_v16f32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d, vl16
|
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
|
; CHECK-NEXT: ucvtf z0.s, p0/m, z0.d
|
|
; CHECK-NEXT: ptrue p0.s, vl16
|
|
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
|
|
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <16 x i64>, ptr %a
|
|
%res = uitofp <16 x i64> %op1 to <16 x float>
|
|
store <16 x float> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @ucvtf_v32i64_v32f32(ptr %a, ptr %b) vscale_range(16,0) #0 {
|
|
; CHECK-LABEL: ucvtf_v32i64_v32f32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d, vl32
|
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
|
; CHECK-NEXT: ucvtf z0.s, p0/m, z0.d
|
|
; CHECK-NEXT: ptrue p0.s, vl32
|
|
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
|
|
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <32 x i64>, ptr %a
|
|
%res = uitofp <32 x i64> %op1 to <32 x float>
|
|
store <32 x float> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
;
|
|
; UCVTF D -> D
|
|
;
|
|
|
|
; Don't use SVE for 64-bit vectors.
|
|
define <1 x double> @ucvtf_v1i64_v1f64(<1 x i64> %op1) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: ucvtf_v1i64_v1f64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
|
|
; CHECK-NEXT: ucvtf d0, d0
|
|
; CHECK-NEXT: ret
|
|
%res = uitofp <1 x i64> %op1 to <1 x double>
|
|
ret <1 x double> %res
|
|
}
|
|
|
|
; Don't use SVE for 128-bit vectors.
|
|
define <2 x double> @ucvtf_v2i64_v2f64(<2 x i64> %op1) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: ucvtf_v2i64_v2f64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ucvtf v0.2d, v0.2d
|
|
; CHECK-NEXT: ret
|
|
%res = uitofp <2 x i64> %op1 to <2 x double>
|
|
ret <2 x double> %res
|
|
}
|
|
|
|
define void @ucvtf_v4i64_v4f64(ptr %a, ptr %b) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: ucvtf_v4i64_v4f64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d, vl4
|
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
|
; CHECK-NEXT: ucvtf z0.d, p0/m, z0.d
|
|
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <4 x i64>, ptr %a
|
|
%res = uitofp <4 x i64> %op1 to <4 x double>
|
|
store <4 x double> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @ucvtf_v8i64_v8f64(ptr %a, ptr %b) #0 {
|
|
; VBITS_GE_256-LABEL: ucvtf_v8i64_v8f64:
|
|
; VBITS_GE_256: // %bb.0:
|
|
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
|
|
; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
|
|
; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
|
|
; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0]
|
|
; VBITS_GE_256-NEXT: ucvtf z0.d, p0/m, z0.d
|
|
; VBITS_GE_256-NEXT: ucvtf z1.d, p0/m, z1.d
|
|
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x1, x8, lsl #3]
|
|
; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x1]
|
|
; VBITS_GE_256-NEXT: ret
|
|
;
|
|
; VBITS_GE_512-LABEL: ucvtf_v8i64_v8f64:
|
|
; VBITS_GE_512: // %bb.0:
|
|
; VBITS_GE_512-NEXT: ptrue p0.d, vl8
|
|
; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0]
|
|
; VBITS_GE_512-NEXT: ucvtf z0.d, p0/m, z0.d
|
|
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x1]
|
|
; VBITS_GE_512-NEXT: ret
|
|
%op1 = load <8 x i64>, ptr %a
|
|
%res = uitofp <8 x i64> %op1 to <8 x double>
|
|
store <8 x double> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @ucvtf_v16i64_v16f64(ptr %a, ptr %b) vscale_range(8,0) #0 {
|
|
; CHECK-LABEL: ucvtf_v16i64_v16f64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d, vl16
|
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
|
; CHECK-NEXT: ucvtf z0.d, p0/m, z0.d
|
|
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <16 x i64>, ptr %a
|
|
%res = uitofp <16 x i64> %op1 to <16 x double>
|
|
store <16 x double> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @ucvtf_v32i64_v32f64(ptr %a, ptr %b) vscale_range(16,0) #0 {
|
|
; CHECK-LABEL: ucvtf_v32i64_v32f64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d, vl32
|
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
|
; CHECK-NEXT: ucvtf z0.d, p0/m, z0.d
|
|
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <32 x i64>, ptr %a
|
|
%res = uitofp <32 x i64> %op1 to <32 x double>
|
|
store <32 x double> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
;
|
|
; SCVTF H -> H
|
|
;
|
|
|
|
; Don't use SVE for 64-bit vectors.
|
|
define <4 x half> @scvtf_v4i16_v4f16(<4 x i16> %op1) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: scvtf_v4i16_v4f16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: scvtf v0.4h, v0.4h
|
|
; CHECK-NEXT: ret
|
|
%res = sitofp <4 x i16> %op1 to <4 x half>
|
|
ret <4 x half> %res
|
|
}
|
|
|
|
; Don't use SVE for 128-bit vectors.
|
|
define void @scvtf_v8i16_v8f16(ptr %a, ptr %b) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: scvtf_v8i16_v8f16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ldr q0, [x0]
|
|
; CHECK-NEXT: scvtf v0.8h, v0.8h
|
|
; CHECK-NEXT: str q0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <8 x i16>, ptr %a
|
|
%res = sitofp <8 x i16> %op1 to <8 x half>
|
|
store <8 x half> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @scvtf_v16i16_v16f16(ptr %a, ptr %b) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: scvtf_v16i16_v16f16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.h, vl16
|
|
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
|
|
; CHECK-NEXT: scvtf z0.h, p0/m, z0.h
|
|
; CHECK-NEXT: st1h { z0.h }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <16 x i16>, ptr %a
|
|
%res = sitofp <16 x i16> %op1 to <16 x half>
|
|
store <16 x half> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @scvtf_v32i16_v32f16(ptr %a, ptr %b) #0 {
|
|
; VBITS_GE_256-LABEL: scvtf_v32i16_v32f16:
|
|
; VBITS_GE_256: // %bb.0:
|
|
; VBITS_GE_256-NEXT: ptrue p0.h, vl16
|
|
; VBITS_GE_256-NEXT: mov x8, #16 // =0x10
|
|
; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1]
|
|
; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0]
|
|
; VBITS_GE_256-NEXT: scvtf z0.h, p0/m, z0.h
|
|
; VBITS_GE_256-NEXT: scvtf z1.h, p0/m, z1.h
|
|
; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x1, x8, lsl #1]
|
|
; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x1]
|
|
; VBITS_GE_256-NEXT: ret
|
|
;
|
|
; VBITS_GE_512-LABEL: scvtf_v32i16_v32f16:
|
|
; VBITS_GE_512: // %bb.0:
|
|
; VBITS_GE_512-NEXT: ptrue p0.h, vl32
|
|
; VBITS_GE_512-NEXT: ld1h { z0.h }, p0/z, [x0]
|
|
; VBITS_GE_512-NEXT: scvtf z0.h, p0/m, z0.h
|
|
; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x1]
|
|
; VBITS_GE_512-NEXT: ret
|
|
%op1 = load <32 x i16>, ptr %a
|
|
%res = sitofp <32 x i16> %op1 to <32 x half>
|
|
store <32 x half> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @scvtf_v64i16_v64f16(ptr %a, ptr %b) vscale_range(8,0) #0 {
|
|
; CHECK-LABEL: scvtf_v64i16_v64f16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.h, vl64
|
|
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
|
|
; CHECK-NEXT: scvtf z0.h, p0/m, z0.h
|
|
; CHECK-NEXT: st1h { z0.h }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <64 x i16>, ptr %a
|
|
%res = sitofp <64 x i16> %op1 to <64 x half>
|
|
store <64 x half> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @scvtf_v128i16_v128f16(ptr %a, ptr %b) vscale_range(16,0) #0 {
|
|
; CHECK-LABEL: scvtf_v128i16_v128f16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.h, vl128
|
|
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
|
|
; CHECK-NEXT: scvtf z0.h, p0/m, z0.h
|
|
; CHECK-NEXT: st1h { z0.h }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <128 x i16>, ptr %a
|
|
%res = sitofp <128 x i16> %op1 to <128 x half>
|
|
store <128 x half> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
;
|
|
; SCVTF H -> S
|
|
;
|
|
|
|
; Don't use SVE for 64-bit vectors.
|
|
define <2 x float> @scvtf_v2i16_v2f32(<2 x i16> %op1) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: scvtf_v2i16_v2f32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: shl v0.2s, v0.2s, #16
|
|
; CHECK-NEXT: sshr v0.2s, v0.2s, #16
|
|
; CHECK-NEXT: scvtf v0.2s, v0.2s
|
|
; CHECK-NEXT: ret
|
|
%res = sitofp <2 x i16> %op1 to <2 x float>
|
|
ret <2 x float> %res
|
|
}
|
|
|
|
; Don't use SVE for 128-bit vectors.
|
|
define <4 x float> @scvtf_v4i16_v4f32(<4 x i16> %op1) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: scvtf_v4i16_v4f32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: sshll v0.4s, v0.4h, #0
|
|
; CHECK-NEXT: scvtf v0.4s, v0.4s
|
|
; CHECK-NEXT: ret
|
|
%res = sitofp <4 x i16> %op1 to <4 x float>
|
|
ret <4 x float> %res
|
|
}
|
|
|
|
define void @scvtf_v8i16_v8f32(ptr %a, ptr %b) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: scvtf_v8i16_v8f32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ldr q0, [x0]
|
|
; CHECK-NEXT: ptrue p0.s, vl8
|
|
; CHECK-NEXT: sunpklo z0.s, z0.h
|
|
; CHECK-NEXT: scvtf z0.s, p0/m, z0.s
|
|
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <8 x i16>, ptr %a
|
|
%res = sitofp <8 x i16> %op1 to <8 x float>
|
|
store <8 x float> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @scvtf_v16i16_v16f32(ptr %a, ptr %b) #0 {
|
|
; VBITS_GE_256-LABEL: scvtf_v16i16_v16f32:
|
|
; VBITS_GE_256: // %bb.0:
|
|
; VBITS_GE_256-NEXT: ptrue p0.h, vl16
|
|
; VBITS_GE_256-NEXT: mov x8, #8 // =0x8
|
|
; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0]
|
|
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
|
|
; VBITS_GE_256-NEXT: movprfx z1, z0
|
|
; VBITS_GE_256-NEXT: ext z1.b, z1.b, z0.b, #16
|
|
; VBITS_GE_256-NEXT: sunpklo z0.s, z0.h
|
|
; VBITS_GE_256-NEXT: sunpklo z1.s, z1.h
|
|
; VBITS_GE_256-NEXT: scvtf z0.s, p0/m, z0.s
|
|
; VBITS_GE_256-NEXT: scvtf z1.s, p0/m, z1.s
|
|
; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x1]
|
|
; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x1, x8, lsl #2]
|
|
; VBITS_GE_256-NEXT: ret
|
|
;
|
|
; VBITS_GE_512-LABEL: scvtf_v16i16_v16f32:
|
|
; VBITS_GE_512: // %bb.0:
|
|
; VBITS_GE_512-NEXT: ptrue p0.h, vl16
|
|
; VBITS_GE_512-NEXT: ld1h { z0.h }, p0/z, [x0]
|
|
; VBITS_GE_512-NEXT: ptrue p0.s, vl16
|
|
; VBITS_GE_512-NEXT: sunpklo z0.s, z0.h
|
|
; VBITS_GE_512-NEXT: scvtf z0.s, p0/m, z0.s
|
|
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x1]
|
|
; VBITS_GE_512-NEXT: ret
|
|
%op1 = load <16 x i16>, ptr %a
|
|
%res = sitofp <16 x i16> %op1 to <16 x float>
|
|
store <16 x float> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @scvtf_v32i16_v32f32(ptr %a, ptr %b) vscale_range(8,0) #0 {
|
|
; CHECK-LABEL: scvtf_v32i16_v32f32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.h, vl32
|
|
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
|
|
; CHECK-NEXT: ptrue p0.s, vl32
|
|
; CHECK-NEXT: sunpklo z0.s, z0.h
|
|
; CHECK-NEXT: scvtf z0.s, p0/m, z0.s
|
|
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <32 x i16>, ptr %a
|
|
%res = sitofp <32 x i16> %op1 to <32 x float>
|
|
store <32 x float> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @scvtf_v64i16_v64f32(ptr %a, ptr %b) vscale_range(16,0) #0 {
|
|
; CHECK-LABEL: scvtf_v64i16_v64f32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.h, vl64
|
|
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
|
|
; CHECK-NEXT: ptrue p0.s, vl64
|
|
; CHECK-NEXT: sunpklo z0.s, z0.h
|
|
; CHECK-NEXT: scvtf z0.s, p0/m, z0.s
|
|
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <64 x i16>, ptr %a
|
|
%res = sitofp <64 x i16> %op1 to <64 x float>
|
|
store <64 x float> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
;
|
|
; SCVTF H -> D
|
|
;
|
|
|
|
; v1i16 is perfered to be widened to v4i16, which pushes the output into SVE types, so use SVE
|
|
define <1 x double> @scvtf_v1i16_v1f64(<1 x i16> %op1) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: scvtf_v1i16_v1f64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
|
|
; CHECK-NEXT: ptrue p0.d, vl4
|
|
; CHECK-NEXT: sunpklo z0.s, z0.h
|
|
; CHECK-NEXT: sunpklo z0.d, z0.s
|
|
; CHECK-NEXT: scvtf z0.d, p0/m, z0.d
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
%res = sitofp <1 x i16> %op1 to <1 x double>
|
|
ret <1 x double> %res
|
|
}
|
|
|
|
; Don't use SVE for 128-bit vectors.
|
|
define <2 x double> @scvtf_v2i16_v2f64(<2 x i16> %op1) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: scvtf_v2i16_v2f64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: shl v0.2s, v0.2s, #16
|
|
; CHECK-NEXT: sshr v0.2s, v0.2s, #16
|
|
; CHECK-NEXT: sshll v0.2d, v0.2s, #0
|
|
; CHECK-NEXT: scvtf v0.2d, v0.2d
|
|
; CHECK-NEXT: ret
|
|
%res = sitofp <2 x i16> %op1 to <2 x double>
|
|
ret <2 x double> %res
|
|
}
|
|
|
|
define void @scvtf_v4i16_v4f64(ptr %a, ptr %b) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: scvtf_v4i16_v4f64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ldr d0, [x0]
|
|
; CHECK-NEXT: ptrue p0.d, vl4
|
|
; CHECK-NEXT: sunpklo z0.s, z0.h
|
|
; CHECK-NEXT: sunpklo z0.d, z0.s
|
|
; CHECK-NEXT: scvtf z0.d, p0/m, z0.d
|
|
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <4 x i16>, ptr %a
|
|
%res = sitofp <4 x i16> %op1 to <4 x double>
|
|
store <4 x double> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @scvtf_v8i16_v8f64(ptr %a, ptr %b) #0 {
|
|
; VBITS_GE_256-LABEL: scvtf_v8i16_v8f64:
|
|
; VBITS_GE_256: // %bb.0:
|
|
; VBITS_GE_256-NEXT: ldr q0, [x0]
|
|
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
|
|
; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
|
|
; VBITS_GE_256-NEXT: ext v1.16b, v0.16b, v0.16b, #8
|
|
; VBITS_GE_256-NEXT: sunpklo z0.s, z0.h
|
|
; VBITS_GE_256-NEXT: sunpklo z1.s, z1.h
|
|
; VBITS_GE_256-NEXT: sunpklo z0.d, z0.s
|
|
; VBITS_GE_256-NEXT: sunpklo z1.d, z1.s
|
|
; VBITS_GE_256-NEXT: scvtf z0.d, p0/m, z0.d
|
|
; VBITS_GE_256-NEXT: scvtf z1.d, p0/m, z1.d
|
|
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x1]
|
|
; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x1, x8, lsl #3]
|
|
; VBITS_GE_256-NEXT: ret
|
|
;
|
|
; VBITS_GE_512-LABEL: scvtf_v8i16_v8f64:
|
|
; VBITS_GE_512: // %bb.0:
|
|
; VBITS_GE_512-NEXT: ldr q0, [x0]
|
|
; VBITS_GE_512-NEXT: ptrue p0.d, vl8
|
|
; VBITS_GE_512-NEXT: sunpklo z0.s, z0.h
|
|
; VBITS_GE_512-NEXT: sunpklo z0.d, z0.s
|
|
; VBITS_GE_512-NEXT: scvtf z0.d, p0/m, z0.d
|
|
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x1]
|
|
; VBITS_GE_512-NEXT: ret
|
|
%op1 = load <8 x i16>, ptr %a
|
|
%res = sitofp <8 x i16> %op1 to <8 x double>
|
|
store <8 x double> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @scvtf_v16i16_v16f64(ptr %a, ptr %b) vscale_range(8,0) #0 {
|
|
; CHECK-LABEL: scvtf_v16i16_v16f64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.h, vl16
|
|
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
|
|
; CHECK-NEXT: ptrue p0.d, vl16
|
|
; CHECK-NEXT: sunpklo z0.s, z0.h
|
|
; CHECK-NEXT: sunpklo z0.d, z0.s
|
|
; CHECK-NEXT: scvtf z0.d, p0/m, z0.d
|
|
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <16 x i16>, ptr %a
|
|
%res = sitofp <16 x i16> %op1 to <16 x double>
|
|
store <16 x double> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @scvtf_v32i16_v32f64(ptr %a, ptr %b) vscale_range(16,0) #0 {
|
|
; CHECK-LABEL: scvtf_v32i16_v32f64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.h, vl32
|
|
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
|
|
; CHECK-NEXT: ptrue p0.d, vl32
|
|
; CHECK-NEXT: sunpklo z0.s, z0.h
|
|
; CHECK-NEXT: sunpklo z0.d, z0.s
|
|
; CHECK-NEXT: scvtf z0.d, p0/m, z0.d
|
|
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <32 x i16>, ptr %a
|
|
%res = sitofp <32 x i16> %op1 to <32 x double>
|
|
store <32 x double> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
;
|
|
; SCVTF S -> H
|
|
;
|
|
|
|
; Don't use SVE for 64-bit vectors.
|
|
define <2 x half> @scvtf_v2i32_v2f16(<2 x i32> %op1) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: scvtf_v2i32_v2f16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
|
|
; CHECK-NEXT: scvtf v0.4s, v0.4s
|
|
; CHECK-NEXT: fcvtn v0.4h, v0.4s
|
|
; CHECK-NEXT: ret
|
|
%res = sitofp <2 x i32> %op1 to <2 x half>
|
|
ret <2 x half> %res
|
|
}
|
|
|
|
; Don't use SVE for 128-bit vectors.
|
|
define <4 x half> @scvtf_v4i32_v4f16(<4 x i32> %op1) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: scvtf_v4i32_v4f16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: scvtf v0.4s, v0.4s
|
|
; CHECK-NEXT: fcvtn v0.4h, v0.4s
|
|
; CHECK-NEXT: ret
|
|
%res = sitofp <4 x i32> %op1 to <4 x half>
|
|
ret <4 x half> %res
|
|
}
|
|
|
|
define <8 x half> @scvtf_v8i32_v8f16(ptr %a) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: scvtf_v8i32_v8f16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s, vl8
|
|
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
|
|
; CHECK-NEXT: scvtf z0.h, p0/m, z0.s
|
|
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
|
|
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <8 x i32>, ptr %a
|
|
%res = sitofp <8 x i32> %op1 to <8 x half>
|
|
ret <8 x half> %res
|
|
}
|
|
|
|
define void @scvtf_v16i32_v16f16(ptr %a, ptr %b) #0 {
|
|
; VBITS_GE_256-LABEL: scvtf_v16i32_v16f16:
|
|
; VBITS_GE_256: // %bb.0:
|
|
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
|
|
; VBITS_GE_256-NEXT: mov x8, #8 // =0x8
|
|
; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
|
|
; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0]
|
|
; VBITS_GE_256-NEXT: scvtf z0.h, p0/m, z0.s
|
|
; VBITS_GE_256-NEXT: scvtf z1.h, p0/m, z1.s
|
|
; VBITS_GE_256-NEXT: ptrue p0.h, vl8
|
|
; VBITS_GE_256-NEXT: uzp1 z0.h, z0.h, z0.h
|
|
; VBITS_GE_256-NEXT: uzp1 z1.h, z1.h, z1.h
|
|
; VBITS_GE_256-NEXT: splice z1.h, p0, z1.h, z0.h
|
|
; VBITS_GE_256-NEXT: ptrue p0.h, vl16
|
|
; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x1]
|
|
; VBITS_GE_256-NEXT: ret
|
|
;
|
|
; VBITS_GE_512-LABEL: scvtf_v16i32_v16f16:
|
|
; VBITS_GE_512: // %bb.0:
|
|
; VBITS_GE_512-NEXT: ptrue p0.s, vl16
|
|
; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0]
|
|
; VBITS_GE_512-NEXT: scvtf z0.h, p0/m, z0.s
|
|
; VBITS_GE_512-NEXT: ptrue p0.h, vl16
|
|
; VBITS_GE_512-NEXT: uzp1 z0.h, z0.h, z0.h
|
|
; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x1]
|
|
; VBITS_GE_512-NEXT: ret
|
|
%op1 = load <16 x i32>, ptr %a
|
|
%res = sitofp <16 x i32> %op1 to <16 x half>
|
|
store <16 x half> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @scvtf_v32i32_v32f16(ptr %a, ptr %b) vscale_range(8,0) #0 {
|
|
; CHECK-LABEL: scvtf_v32i32_v32f16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s, vl32
|
|
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
|
|
; CHECK-NEXT: scvtf z0.h, p0/m, z0.s
|
|
; CHECK-NEXT: ptrue p0.h, vl32
|
|
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
|
|
; CHECK-NEXT: st1h { z0.h }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <32 x i32>, ptr %a
|
|
%res = sitofp <32 x i32> %op1 to <32 x half>
|
|
store <32 x half> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @scvtf_v64i32_v64f16(ptr %a, ptr %b) vscale_range(16,0) #0 {
|
|
; CHECK-LABEL: scvtf_v64i32_v64f16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s, vl64
|
|
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
|
|
; CHECK-NEXT: scvtf z0.h, p0/m, z0.s
|
|
; CHECK-NEXT: ptrue p0.h, vl64
|
|
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
|
|
; CHECK-NEXT: st1h { z0.h }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <64 x i32>, ptr %a
|
|
%res = sitofp <64 x i32> %op1 to <64 x half>
|
|
store <64 x half> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
;
|
|
; SCVTF S -> S
|
|
;
|
|
|
|
; Don't use SVE for 64-bit vectors.
|
|
define <2 x float> @scvtf_v2i32_v2f32(<2 x i32> %op1) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: scvtf_v2i32_v2f32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: scvtf v0.2s, v0.2s
|
|
; CHECK-NEXT: ret
|
|
%res = sitofp <2 x i32> %op1 to <2 x float>
|
|
ret <2 x float> %res
|
|
}
|
|
|
|
; Don't use SVE for 128-bit vectors.
|
|
define <4 x float> @scvtf_v4i32_v4f32(<4 x i32> %op1) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: scvtf_v4i32_v4f32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: scvtf v0.4s, v0.4s
|
|
; CHECK-NEXT: ret
|
|
%res = sitofp <4 x i32> %op1 to <4 x float>
|
|
ret <4 x float> %res
|
|
}
|
|
|
|
define void @scvtf_v8i32_v8f32(ptr %a, ptr %b) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: scvtf_v8i32_v8f32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s, vl8
|
|
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
|
|
; CHECK-NEXT: scvtf z0.s, p0/m, z0.s
|
|
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <8 x i32>, ptr %a
|
|
%res = sitofp <8 x i32> %op1 to <8 x float>
|
|
store <8 x float> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @scvtf_v16i32_v16f32(ptr %a, ptr %b) #0 {
|
|
; VBITS_GE_256-LABEL: scvtf_v16i32_v16f32:
|
|
; VBITS_GE_256: // %bb.0:
|
|
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
|
|
; VBITS_GE_256-NEXT: mov x8, #8 // =0x8
|
|
; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
|
|
; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0]
|
|
; VBITS_GE_256-NEXT: scvtf z0.s, p0/m, z0.s
|
|
; VBITS_GE_256-NEXT: scvtf z1.s, p0/m, z1.s
|
|
; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x1, x8, lsl #2]
|
|
; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x1]
|
|
; VBITS_GE_256-NEXT: ret
|
|
;
|
|
; VBITS_GE_512-LABEL: scvtf_v16i32_v16f32:
|
|
; VBITS_GE_512: // %bb.0:
|
|
; VBITS_GE_512-NEXT: ptrue p0.s, vl16
|
|
; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0]
|
|
; VBITS_GE_512-NEXT: scvtf z0.s, p0/m, z0.s
|
|
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x1]
|
|
; VBITS_GE_512-NEXT: ret
|
|
%op1 = load <16 x i32>, ptr %a
|
|
%res = sitofp <16 x i32> %op1 to <16 x float>
|
|
store <16 x float> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @scvtf_v32i32_v32f32(ptr %a, ptr %b) vscale_range(8,0) #0 {
|
|
; CHECK-LABEL: scvtf_v32i32_v32f32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s, vl32
|
|
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
|
|
; CHECK-NEXT: scvtf z0.s, p0/m, z0.s
|
|
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <32 x i32>, ptr %a
|
|
%res = sitofp <32 x i32> %op1 to <32 x float>
|
|
store <32 x float> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @scvtf_v64i32_v64f32(ptr %a, ptr %b) vscale_range(16,0) #0 {
|
|
; CHECK-LABEL: scvtf_v64i32_v64f32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s, vl64
|
|
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
|
|
; CHECK-NEXT: scvtf z0.s, p0/m, z0.s
|
|
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <64 x i32>, ptr %a
|
|
%res = sitofp <64 x i32> %op1 to <64 x float>
|
|
store <64 x float> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
;
|
|
; SCVTF S -> D
|
|
;
|
|
|
|
; Don't use SVE for 64-bit vectors.
|
|
define <1 x double> @scvtf_v1i32_v1f64(<1 x i32> %op1) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: scvtf_v1i32_v1f64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: sshll v0.2d, v0.2s, #0
|
|
; CHECK-NEXT: scvtf v0.2d, v0.2d
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
|
|
; CHECK-NEXT: ret
|
|
%res = sitofp <1 x i32> %op1 to <1 x double>
|
|
ret <1 x double> %res
|
|
}
|
|
|
|
; Don't use SVE for 128-bit vectors.
|
|
define <2 x double> @scvtf_v2i32_v2f64(<2 x i32> %op1) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: scvtf_v2i32_v2f64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: sshll v0.2d, v0.2s, #0
|
|
; CHECK-NEXT: scvtf v0.2d, v0.2d
|
|
; CHECK-NEXT: ret
|
|
%res = sitofp <2 x i32> %op1 to <2 x double>
|
|
ret <2 x double> %res
|
|
}
|
|
|
|
define void @scvtf_v4i32_v4f64(ptr %a, ptr %b) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: scvtf_v4i32_v4f64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ldr q0, [x0]
|
|
; CHECK-NEXT: ptrue p0.d, vl4
|
|
; CHECK-NEXT: sunpklo z0.d, z0.s
|
|
; CHECK-NEXT: scvtf z0.d, p0/m, z0.d
|
|
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <4 x i32>, ptr %a
|
|
%res = sitofp <4 x i32> %op1 to <4 x double>
|
|
store <4 x double> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @scvtf_v8i32_v8f64(ptr %a, ptr %b) #0 {
|
|
; VBITS_GE_256-LABEL: scvtf_v8i32_v8f64:
|
|
; VBITS_GE_256: // %bb.0:
|
|
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
|
|
; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
|
|
; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0]
|
|
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
|
|
; VBITS_GE_256-NEXT: movprfx z1, z0
|
|
; VBITS_GE_256-NEXT: ext z1.b, z1.b, z0.b, #16
|
|
; VBITS_GE_256-NEXT: sunpklo z0.d, z0.s
|
|
; VBITS_GE_256-NEXT: sunpklo z1.d, z1.s
|
|
; VBITS_GE_256-NEXT: scvtf z0.d, p0/m, z0.d
|
|
; VBITS_GE_256-NEXT: scvtf z1.d, p0/m, z1.d
|
|
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x1]
|
|
; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x1, x8, lsl #3]
|
|
; VBITS_GE_256-NEXT: ret
|
|
;
|
|
; VBITS_GE_512-LABEL: scvtf_v8i32_v8f64:
|
|
; VBITS_GE_512: // %bb.0:
|
|
; VBITS_GE_512-NEXT: ptrue p0.s, vl8
|
|
; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0]
|
|
; VBITS_GE_512-NEXT: ptrue p0.d, vl8
|
|
; VBITS_GE_512-NEXT: sunpklo z0.d, z0.s
|
|
; VBITS_GE_512-NEXT: scvtf z0.d, p0/m, z0.d
|
|
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x1]
|
|
; VBITS_GE_512-NEXT: ret
|
|
%op1 = load <8 x i32>, ptr %a
|
|
%res = sitofp <8 x i32> %op1 to <8 x double>
|
|
store <8 x double> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @scvtf_v16i32_v16f64(ptr %a, ptr %b) vscale_range(8,0) #0 {
|
|
; CHECK-LABEL: scvtf_v16i32_v16f64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s, vl16
|
|
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
|
|
; CHECK-NEXT: ptrue p0.d, vl16
|
|
; CHECK-NEXT: sunpklo z0.d, z0.s
|
|
; CHECK-NEXT: scvtf z0.d, p0/m, z0.d
|
|
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <16 x i32>, ptr %a
|
|
%res = sitofp <16 x i32> %op1 to <16 x double>
|
|
store <16 x double> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @scvtf_v32i32_v32f64(ptr %a, ptr %b) vscale_range(16,0) #0 {
|
|
; CHECK-LABEL: scvtf_v32i32_v32f64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s, vl32
|
|
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
|
|
; CHECK-NEXT: ptrue p0.d, vl32
|
|
; CHECK-NEXT: sunpklo z0.d, z0.s
|
|
; CHECK-NEXT: scvtf z0.d, p0/m, z0.d
|
|
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <32 x i32>, ptr %a
|
|
%res = sitofp <32 x i32> %op1 to <32 x double>
|
|
store <32 x double> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
;
|
|
; SCVTF D -> H
|
|
;
|
|
|
|
; Don't use SVE for 64-bit vectors.
|
|
define <1 x half> @scvtf_v1i64_v1f16(<1 x i64> %op1) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: scvtf_v1i64_v1f16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
|
|
; CHECK-NEXT: fmov x8, d0
|
|
; CHECK-NEXT: scvtf h0, x8
|
|
; CHECK-NEXT: ret
|
|
%res = sitofp <1 x i64> %op1 to <1 x half>
|
|
ret <1 x half> %res
|
|
}
|
|
|
|
; v2f16 is not legal for NEON, so use SVE
|
|
define <2 x half> @scvtf_v2i64_v2f16(<2 x i64> %op1) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: scvtf_v2i64_v2f16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d, vl4
|
|
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
|
|
; CHECK-NEXT: scvtf z0.h, p0/m, z0.d
|
|
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
|
|
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
%res = sitofp <2 x i64> %op1 to <2 x half>
|
|
ret <2 x half> %res
|
|
}
|
|
|
|
define <4 x half> @scvtf_v4i64_v4f16(ptr %a) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: scvtf_v4i64_v4f16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d, vl4
|
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
|
; CHECK-NEXT: scvtf z0.h, p0/m, z0.d
|
|
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
|
|
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <4 x i64>, ptr %a
|
|
%res = sitofp <4 x i64> %op1 to <4 x half>
|
|
ret <4 x half> %res
|
|
}
|
|
|
|
define <8 x half> @scvtf_v8i64_v8f16(ptr %a) #0 {
|
|
; VBITS_GE_256-LABEL: scvtf_v8i64_v8f16:
|
|
; VBITS_GE_256: // %bb.0:
|
|
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
|
|
; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
|
|
; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
|
|
; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0]
|
|
; VBITS_GE_256-NEXT: scvtf z0.h, p0/m, z0.d
|
|
; VBITS_GE_256-NEXT: scvtf z1.h, p0/m, z1.d
|
|
; VBITS_GE_256-NEXT: uzp1 z0.s, z0.s, z0.s
|
|
; VBITS_GE_256-NEXT: uzp1 z1.s, z1.s, z1.s
|
|
; VBITS_GE_256-NEXT: uzp1 z2.h, z0.h, z0.h
|
|
; VBITS_GE_256-NEXT: uzp1 z0.h, z1.h, z1.h
|
|
; VBITS_GE_256-NEXT: mov v0.d[1], v2.d[0]
|
|
; VBITS_GE_256-NEXT: // kill: def $q0 killed $q0 killed $z0
|
|
; VBITS_GE_256-NEXT: ret
|
|
;
|
|
; VBITS_GE_512-LABEL: scvtf_v8i64_v8f16:
|
|
; VBITS_GE_512: // %bb.0:
|
|
; VBITS_GE_512-NEXT: ptrue p0.d, vl8
|
|
; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0]
|
|
; VBITS_GE_512-NEXT: scvtf z0.h, p0/m, z0.d
|
|
; VBITS_GE_512-NEXT: uzp1 z0.s, z0.s, z0.s
|
|
; VBITS_GE_512-NEXT: uzp1 z0.h, z0.h, z0.h
|
|
; VBITS_GE_512-NEXT: // kill: def $q0 killed $q0 killed $z0
|
|
; VBITS_GE_512-NEXT: ret
|
|
%op1 = load <8 x i64>, ptr %a
|
|
%res = sitofp <8 x i64> %op1 to <8 x half>
|
|
ret <8 x half> %res
|
|
}
|
|
|
|
define void @scvtf_v16i64_v16f16(ptr %a, ptr %b) vscale_range(8,0) #0 {
|
|
; CHECK-LABEL: scvtf_v16i64_v16f16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d, vl16
|
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
|
; CHECK-NEXT: scvtf z0.h, p0/m, z0.d
|
|
; CHECK-NEXT: ptrue p0.s, vl16
|
|
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
|
|
; CHECK-NEXT: st1h { z0.s }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <16 x i64>, ptr %a
|
|
%res = sitofp <16 x i64> %op1 to <16 x half>
|
|
store <16 x half> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @scvtf_v32i64_v32f16(ptr %a, ptr %b) vscale_range(16,0) #0 {
|
|
; CHECK-LABEL: scvtf_v32i64_v32f16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d, vl32
|
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
|
; CHECK-NEXT: scvtf z0.h, p0/m, z0.d
|
|
; CHECK-NEXT: ptrue p0.s, vl32
|
|
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
|
|
; CHECK-NEXT: st1h { z0.s }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <32 x i64>, ptr %a
|
|
%res = sitofp <32 x i64> %op1 to <32 x half>
|
|
store <32 x half> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
;
|
|
; SCVTF D -> S
|
|
;
|
|
|
|
; Don't use SVE for 64-bit vectors.
|
|
define <1 x float> @scvtf_v1i64_v1f32(<1 x i64> %op1) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: scvtf_v1i64_v1f32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
|
|
; CHECK-NEXT: fmov x8, d0
|
|
; CHECK-NEXT: movi d1, #0000000000000000
|
|
; CHECK-NEXT: scvtf s0, x8
|
|
; CHECK-NEXT: mov v1.s[0], v0.s[0]
|
|
; CHECK-NEXT: fmov d0, d1
|
|
; CHECK-NEXT: ret
|
|
%res = sitofp <1 x i64> %op1 to <1 x float>
|
|
ret <1 x float> %res
|
|
}
|
|
|
|
; Don't use SVE for 128-bit vectors.
|
|
define <2 x float> @scvtf_v2i64_v2f32(<2 x i64> %op1) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: scvtf_v2i64_v2f32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: mov x8, v0.d[1]
|
|
; CHECK-NEXT: fmov x9, d0
|
|
; CHECK-NEXT: scvtf s0, x9
|
|
; CHECK-NEXT: scvtf s1, x8
|
|
; CHECK-NEXT: mov v0.s[1], v1.s[0]
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
|
|
; CHECK-NEXT: ret
|
|
%res = sitofp <2 x i64> %op1 to <2 x float>
|
|
ret <2 x float> %res
|
|
}
|
|
|
|
define <4 x float> @scvtf_v4i64_v4f32(ptr %a) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: scvtf_v4i64_v4f32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d, vl4
|
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
|
; CHECK-NEXT: scvtf z0.s, p0/m, z0.d
|
|
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
|
|
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <4 x i64>, ptr %a
|
|
%res = sitofp <4 x i64> %op1 to <4 x float>
|
|
ret <4 x float> %res
|
|
}
|
|
|
|
define void @scvtf_v8i64_v8f32(ptr %a, ptr %b) #0 {
|
|
; VBITS_GE_256-LABEL: scvtf_v8i64_v8f32:
|
|
; VBITS_GE_256: // %bb.0:
|
|
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
|
|
; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
|
|
; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
|
|
; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0]
|
|
; VBITS_GE_256-NEXT: scvtf z0.s, p0/m, z0.d
|
|
; VBITS_GE_256-NEXT: scvtf z1.s, p0/m, z1.d
|
|
; VBITS_GE_256-NEXT: ptrue p0.s, vl4
|
|
; VBITS_GE_256-NEXT: uzp1 z0.s, z0.s, z0.s
|
|
; VBITS_GE_256-NEXT: uzp1 z1.s, z1.s, z1.s
|
|
; VBITS_GE_256-NEXT: splice z1.s, p0, z1.s, z0.s
|
|
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
|
|
; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x1]
|
|
; VBITS_GE_256-NEXT: ret
|
|
;
|
|
; VBITS_GE_512-LABEL: scvtf_v8i64_v8f32:
|
|
; VBITS_GE_512: // %bb.0:
|
|
; VBITS_GE_512-NEXT: ptrue p0.d, vl8
|
|
; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0]
|
|
; VBITS_GE_512-NEXT: scvtf z0.s, p0/m, z0.d
|
|
; VBITS_GE_512-NEXT: ptrue p0.s, vl8
|
|
; VBITS_GE_512-NEXT: uzp1 z0.s, z0.s, z0.s
|
|
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x1]
|
|
; VBITS_GE_512-NEXT: ret
|
|
%op1 = load <8 x i64>, ptr %a
|
|
%res = sitofp <8 x i64> %op1 to <8 x float>
|
|
store <8 x float> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @scvtf_v16i64_v16f32(ptr %a, ptr %b) vscale_range(8,0) #0 {
|
|
; CHECK-LABEL: scvtf_v16i64_v16f32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d, vl16
|
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
|
; CHECK-NEXT: scvtf z0.s, p0/m, z0.d
|
|
; CHECK-NEXT: ptrue p0.s, vl16
|
|
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
|
|
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <16 x i64>, ptr %a
|
|
%res = sitofp <16 x i64> %op1 to <16 x float>
|
|
store <16 x float> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @scvtf_v32i64_v32f32(ptr %a, ptr %b) vscale_range(16,0) #0 {
|
|
; CHECK-LABEL: scvtf_v32i64_v32f32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d, vl32
|
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
|
; CHECK-NEXT: scvtf z0.s, p0/m, z0.d
|
|
; CHECK-NEXT: ptrue p0.s, vl32
|
|
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
|
|
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <32 x i64>, ptr %a
|
|
%res = sitofp <32 x i64> %op1 to <32 x float>
|
|
store <32 x float> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
;
|
|
; SCVTF D -> D
|
|
;
|
|
|
|
; Don't use SVE for 64-bit vectors.
|
|
define <1 x double> @scvtf_v1i64_v1f64(<1 x i64> %op1) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: scvtf_v1i64_v1f64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
|
|
; CHECK-NEXT: scvtf d0, d0
|
|
; CHECK-NEXT: ret
|
|
%res = sitofp <1 x i64> %op1 to <1 x double>
|
|
ret <1 x double> %res
|
|
}
|
|
|
|
; Don't use SVE for 128-bit vectors.
|
|
define <2 x double> @scvtf_v2i64_v2f64(<2 x i64> %op1) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: scvtf_v2i64_v2f64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: scvtf v0.2d, v0.2d
|
|
; CHECK-NEXT: ret
|
|
%res = sitofp <2 x i64> %op1 to <2 x double>
|
|
ret <2 x double> %res
|
|
}
|
|
|
|
define void @scvtf_v4i64_v4f64(ptr %a, ptr %b) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: scvtf_v4i64_v4f64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d, vl4
|
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
|
; CHECK-NEXT: scvtf z0.d, p0/m, z0.d
|
|
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <4 x i64>, ptr %a
|
|
%res = sitofp <4 x i64> %op1 to <4 x double>
|
|
store <4 x double> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @scvtf_v8i64_v8f64(ptr %a, ptr %b) #0 {
|
|
; VBITS_GE_256-LABEL: scvtf_v8i64_v8f64:
|
|
; VBITS_GE_256: // %bb.0:
|
|
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
|
|
; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
|
|
; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
|
|
; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0]
|
|
; VBITS_GE_256-NEXT: scvtf z0.d, p0/m, z0.d
|
|
; VBITS_GE_256-NEXT: scvtf z1.d, p0/m, z1.d
|
|
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x1, x8, lsl #3]
|
|
; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x1]
|
|
; VBITS_GE_256-NEXT: ret
|
|
;
|
|
; VBITS_GE_512-LABEL: scvtf_v8i64_v8f64:
|
|
; VBITS_GE_512: // %bb.0:
|
|
; VBITS_GE_512-NEXT: ptrue p0.d, vl8
|
|
; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0]
|
|
; VBITS_GE_512-NEXT: scvtf z0.d, p0/m, z0.d
|
|
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x1]
|
|
; VBITS_GE_512-NEXT: ret
|
|
%op1 = load <8 x i64>, ptr %a
|
|
%res = sitofp <8 x i64> %op1 to <8 x double>
|
|
store <8 x double> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @scvtf_v16i64_v16f64(ptr %a, ptr %b) vscale_range(8,0) #0 {
|
|
; CHECK-LABEL: scvtf_v16i64_v16f64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d, vl16
|
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
|
; CHECK-NEXT: scvtf z0.d, p0/m, z0.d
|
|
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <16 x i64>, ptr %a
|
|
%res = sitofp <16 x i64> %op1 to <16 x double>
|
|
store <16 x double> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @scvtf_v32i64_v32f64(ptr %a, ptr %b) vscale_range(16,0) #0 {
|
|
; CHECK-LABEL: scvtf_v32i64_v32f64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d, vl32
|
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
|
; CHECK-NEXT: scvtf z0.d, p0/m, z0.d
|
|
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <32 x i64>, ptr %a
|
|
%res = sitofp <32 x i64> %op1 to <32 x double>
|
|
store <32 x double> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
attributes #0 = { "target-features"="+sve" }
|