
The patch adds patterns to select the EXT_ZZI_CONSTRUCTIVE pseudo instead of the EXT_ZZI destructive instruction for vector_splice. This only works when the two inputs to vector_splice are identical. Given that registers aren't tied anymore, this gives the register allocator more freedom and a lot of MOVs get replaced with MOVPRFX. In some cases however, we could have just chosen the same input and output register, but regalloc preferred not to. This means we end up with some test cases now having more instructions: there is now a MOVPRFX while no MOV was previously needed.
1799 lines
57 KiB
LLVM
1799 lines
57 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -aarch64-sve-vector-bits-min=256 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256
|
|
; RUN: llc -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
|
|
; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
|
|
|
|
target triple = "aarch64-unknown-linux-gnu"
|
|
|
|
;
|
|
; FCVTZU H -> H
|
|
;
|
|
|
|
; Don't use SVE for 64-bit vectors.
|
|
define <4 x i16> @fcvtzu_v4f16_v4i16(<4 x half> %op1) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fcvtzu_v4f16_v4i16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fcvtzu v0.4h, v0.4h
|
|
; CHECK-NEXT: ret
|
|
%res = fptoui <4 x half> %op1 to <4 x i16>
|
|
ret <4 x i16> %res
|
|
}
|
|
|
|
; Don't use SVE for 128-bit vectors.
|
|
define void @fcvtzu_v8f16_v8i16(ptr %a, ptr %b) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fcvtzu_v8f16_v8i16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ldr q0, [x0]
|
|
; CHECK-NEXT: fcvtzu v0.8h, v0.8h
|
|
; CHECK-NEXT: str q0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <8 x half>, ptr %a
|
|
%res = fptoui <8 x half> %op1 to <8 x i16>
|
|
store <8 x i16> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @fcvtzu_v16f16_v16i16(ptr %a, ptr %b) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fcvtzu_v16f16_v16i16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.h, vl16
|
|
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
|
|
; CHECK-NEXT: fcvtzu z0.h, p0/m, z0.h
|
|
; CHECK-NEXT: st1h { z0.h }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <16 x half>, ptr %a
|
|
%res = fptoui <16 x half> %op1 to <16 x i16>
|
|
store <16 x i16> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @fcvtzu_v32f16_v32i16(ptr %a, ptr %b) #0 {
|
|
; VBITS_GE_256-LABEL: fcvtzu_v32f16_v32i16:
|
|
; VBITS_GE_256: // %bb.0:
|
|
; VBITS_GE_256-NEXT: ptrue p0.h, vl16
|
|
; VBITS_GE_256-NEXT: mov x8, #16 // =0x10
|
|
; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1]
|
|
; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0]
|
|
; VBITS_GE_256-NEXT: fcvtzu z0.h, p0/m, z0.h
|
|
; VBITS_GE_256-NEXT: fcvtzu z1.h, p0/m, z1.h
|
|
; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x1, x8, lsl #1]
|
|
; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x1]
|
|
; VBITS_GE_256-NEXT: ret
|
|
;
|
|
; VBITS_GE_512-LABEL: fcvtzu_v32f16_v32i16:
|
|
; VBITS_GE_512: // %bb.0:
|
|
; VBITS_GE_512-NEXT: ptrue p0.h, vl32
|
|
; VBITS_GE_512-NEXT: ld1h { z0.h }, p0/z, [x0]
|
|
; VBITS_GE_512-NEXT: fcvtzu z0.h, p0/m, z0.h
|
|
; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x1]
|
|
; VBITS_GE_512-NEXT: ret
|
|
%op1 = load <32 x half>, ptr %a
|
|
%res = fptoui <32 x half> %op1 to <32 x i16>
|
|
store <32 x i16> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @fcvtzu_v64f16_v64i16(ptr %a, ptr %b) vscale_range(8,0) #0 {
|
|
; CHECK-LABEL: fcvtzu_v64f16_v64i16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.h, vl64
|
|
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
|
|
; CHECK-NEXT: fcvtzu z0.h, p0/m, z0.h
|
|
; CHECK-NEXT: st1h { z0.h }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <64 x half>, ptr %a
|
|
%res = fptoui <64 x half> %op1 to <64 x i16>
|
|
store <64 x i16> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @fcvtzu_v128f16_v128i16(ptr %a, ptr %b) vscale_range(16,0) #0 {
|
|
; CHECK-LABEL: fcvtzu_v128f16_v128i16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.h, vl128
|
|
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
|
|
; CHECK-NEXT: fcvtzu z0.h, p0/m, z0.h
|
|
; CHECK-NEXT: st1h { z0.h }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <128 x half>, ptr %a
|
|
%res = fptoui <128 x half> %op1 to <128 x i16>
|
|
store <128 x i16> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
;
|
|
; FCVTZU H -> S
|
|
;
|
|
|
|
; Don't use SVE for 64-bit vectors.
|
|
define <2 x i32> @fcvtzu_v2f16_v2i32(<2 x half> %op1) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fcvtzu_v2f16_v2i32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fcvtl v0.4s, v0.4h
|
|
; CHECK-NEXT: fcvtzu v0.4s, v0.4s
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
|
|
; CHECK-NEXT: ret
|
|
%res = fptoui <2 x half> %op1 to <2 x i32>
|
|
ret <2 x i32> %res
|
|
}
|
|
|
|
; Don't use SVE for 128-bit vectors.
|
|
define <4 x i32> @fcvtzu_v4f16_v4i32(<4 x half> %op1) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fcvtzu_v4f16_v4i32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fcvtl v0.4s, v0.4h
|
|
; CHECK-NEXT: fcvtzu v0.4s, v0.4s
|
|
; CHECK-NEXT: ret
|
|
%res = fptoui <4 x half> %op1 to <4 x i32>
|
|
ret <4 x i32> %res
|
|
}
|
|
|
|
define void @fcvtzu_v8f16_v8i32(ptr %a, ptr %b) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fcvtzu_v8f16_v8i32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ldr q0, [x0]
|
|
; CHECK-NEXT: ptrue p0.s, vl8
|
|
; CHECK-NEXT: uunpklo z0.s, z0.h
|
|
; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.h
|
|
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <8 x half>, ptr %a
|
|
%res = fptoui <8 x half> %op1 to <8 x i32>
|
|
store <8 x i32> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @fcvtzu_v16f16_v16i32(ptr %a, ptr %b) #0 {
|
|
; VBITS_GE_256-LABEL: fcvtzu_v16f16_v16i32:
|
|
; VBITS_GE_256: // %bb.0:
|
|
; VBITS_GE_256-NEXT: ptrue p0.h, vl16
|
|
; VBITS_GE_256-NEXT: mov x8, #8 // =0x8
|
|
; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0]
|
|
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
|
|
; VBITS_GE_256-NEXT: movprfx z1, z0
|
|
; VBITS_GE_256-NEXT: ext z1.b, z1.b, z0.b, #16
|
|
; VBITS_GE_256-NEXT: uunpklo z0.s, z0.h
|
|
; VBITS_GE_256-NEXT: uunpklo z1.s, z1.h
|
|
; VBITS_GE_256-NEXT: fcvtzu z0.s, p0/m, z0.h
|
|
; VBITS_GE_256-NEXT: fcvtzu z1.s, p0/m, z1.h
|
|
; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x1]
|
|
; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x1, x8, lsl #2]
|
|
; VBITS_GE_256-NEXT: ret
|
|
;
|
|
; VBITS_GE_512-LABEL: fcvtzu_v16f16_v16i32:
|
|
; VBITS_GE_512: // %bb.0:
|
|
; VBITS_GE_512-NEXT: ptrue p0.s, vl16
|
|
; VBITS_GE_512-NEXT: ld1h { z0.s }, p0/z, [x0]
|
|
; VBITS_GE_512-NEXT: fcvtzu z0.s, p0/m, z0.h
|
|
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x1]
|
|
; VBITS_GE_512-NEXT: ret
|
|
%op1 = load <16 x half>, ptr %a
|
|
%res = fptoui <16 x half> %op1 to <16 x i32>
|
|
store <16 x i32> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @fcvtzu_v32f16_v32i32(ptr %a, ptr %b) vscale_range(8,0) #0 {
|
|
; CHECK-LABEL: fcvtzu_v32f16_v32i32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s, vl32
|
|
; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0]
|
|
; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.h
|
|
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <32 x half>, ptr %a
|
|
%res = fptoui <32 x half> %op1 to <32 x i32>
|
|
store <32 x i32> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @fcvtzu_v64f16_v64i32(ptr %a, ptr %b) vscale_range(16,0) #0 {
|
|
; CHECK-LABEL: fcvtzu_v64f16_v64i32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s, vl64
|
|
; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0]
|
|
; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.h
|
|
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <64 x half>, ptr %a
|
|
%res = fptoui <64 x half> %op1 to <64 x i32>
|
|
store <64 x i32> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
;
|
|
; FCVTZU H -> D
|
|
;
|
|
|
|
; Don't use SVE for 64-bit vectors.
|
|
define <1 x i64> @fcvtzu_v1f16_v1i64(<1 x half> %op1) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fcvtzu_v1f16_v1i64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fcvtzu x8, h0
|
|
; CHECK-NEXT: fmov d0, x8
|
|
; CHECK-NEXT: ret
|
|
%res = fptoui <1 x half> %op1 to <1 x i64>
|
|
ret <1 x i64> %res
|
|
}
|
|
|
|
; v2f16 is not legal for NEON, so use SVE
|
|
define <2 x i64> @fcvtzu_v2f16_v2i64(<2 x half> %op1) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fcvtzu_v2f16_v2i64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
|
|
; CHECK-NEXT: ptrue p0.d, vl4
|
|
; CHECK-NEXT: uunpklo z0.s, z0.h
|
|
; CHECK-NEXT: uunpklo z0.d, z0.s
|
|
; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.h
|
|
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
%res = fptoui <2 x half> %op1 to <2 x i64>
|
|
ret <2 x i64> %res
|
|
}
|
|
|
|
define void @fcvtzu_v4f16_v4i64(ptr %a, ptr %b) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fcvtzu_v4f16_v4i64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ldr d0, [x0]
|
|
; CHECK-NEXT: ptrue p0.d, vl4
|
|
; CHECK-NEXT: uunpklo z0.s, z0.h
|
|
; CHECK-NEXT: uunpklo z0.d, z0.s
|
|
; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.h
|
|
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <4 x half>, ptr %a
|
|
%res = fptoui <4 x half> %op1 to <4 x i64>
|
|
store <4 x i64> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @fcvtzu_v8f16_v8i64(ptr %a, ptr %b) #0 {
|
|
; VBITS_GE_256-LABEL: fcvtzu_v8f16_v8i64:
|
|
; VBITS_GE_256: // %bb.0:
|
|
; VBITS_GE_256-NEXT: ldr q0, [x0]
|
|
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
|
|
; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
|
|
; VBITS_GE_256-NEXT: ext v1.16b, v0.16b, v0.16b, #8
|
|
; VBITS_GE_256-NEXT: uunpklo z0.s, z0.h
|
|
; VBITS_GE_256-NEXT: uunpklo z1.s, z1.h
|
|
; VBITS_GE_256-NEXT: uunpklo z0.d, z0.s
|
|
; VBITS_GE_256-NEXT: uunpklo z1.d, z1.s
|
|
; VBITS_GE_256-NEXT: fcvtzu z0.d, p0/m, z0.h
|
|
; VBITS_GE_256-NEXT: fcvtzu z1.d, p0/m, z1.h
|
|
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x1]
|
|
; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x1, x8, lsl #3]
|
|
; VBITS_GE_256-NEXT: ret
|
|
;
|
|
; VBITS_GE_512-LABEL: fcvtzu_v8f16_v8i64:
|
|
; VBITS_GE_512: // %bb.0:
|
|
; VBITS_GE_512-NEXT: ldr q0, [x0]
|
|
; VBITS_GE_512-NEXT: ptrue p0.d, vl8
|
|
; VBITS_GE_512-NEXT: uunpklo z0.s, z0.h
|
|
; VBITS_GE_512-NEXT: uunpklo z0.d, z0.s
|
|
; VBITS_GE_512-NEXT: fcvtzu z0.d, p0/m, z0.h
|
|
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x1]
|
|
; VBITS_GE_512-NEXT: ret
|
|
%op1 = load <8 x half>, ptr %a
|
|
%res = fptoui <8 x half> %op1 to <8 x i64>
|
|
store <8 x i64> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @fcvtzu_v16f16_v16i64(ptr %a, ptr %b) vscale_range(8,0) #0 {
|
|
; CHECK-LABEL: fcvtzu_v16f16_v16i64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d, vl16
|
|
; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0]
|
|
; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.h
|
|
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <16 x half>, ptr %a
|
|
%res = fptoui <16 x half> %op1 to <16 x i64>
|
|
store <16 x i64> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @fcvtzu_v32f16_v32i64(ptr %a, ptr %b) vscale_range(16,0) #0 {
|
|
; CHECK-LABEL: fcvtzu_v32f16_v32i64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d, vl32
|
|
; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0]
|
|
; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.h
|
|
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <32 x half>, ptr %a
|
|
%res = fptoui <32 x half> %op1 to <32 x i64>
|
|
store <32 x i64> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
;
|
|
; FCVTZU S -> H
|
|
;
|
|
|
|
; Don't use SVE for 64-bit vectors.
|
|
define <2 x i16> @fcvtzu_v2f32_v2i16(<2 x float> %op1) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fcvtzu_v2f32_v2i16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fcvtzs v0.2s, v0.2s
|
|
; CHECK-NEXT: ret
|
|
%res = fptoui <2 x float> %op1 to <2 x i16>
|
|
ret <2 x i16> %res
|
|
}
|
|
|
|
; Don't use SVE for 128-bit vectors.
|
|
define <4 x i16> @fcvtzu_v4f32_v4i16(<4 x float> %op1) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fcvtzu_v4f32_v4i16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fcvtzu v1.4s, v0.4s
|
|
; CHECK-NEXT: mov w8, v1.s[1]
|
|
; CHECK-NEXT: mov v0.16b, v1.16b
|
|
; CHECK-NEXT: mov w9, v1.s[2]
|
|
; CHECK-NEXT: mov v0.h[1], w8
|
|
; CHECK-NEXT: mov w8, v1.s[3]
|
|
; CHECK-NEXT: mov v0.h[2], w9
|
|
; CHECK-NEXT: mov v0.h[3], w8
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
|
|
; CHECK-NEXT: ret
|
|
%res = fptoui <4 x float> %op1 to <4 x i16>
|
|
ret <4 x i16> %res
|
|
}
|
|
|
|
define <8 x i16> @fcvtzu_v8f32_v8i16(ptr %a) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fcvtzu_v8f32_v8i16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s, vl8
|
|
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
|
|
; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.s
|
|
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
|
|
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <8 x float>, ptr %a
|
|
%res = fptoui <8 x float> %op1 to <8 x i16>
|
|
ret <8 x i16> %res
|
|
}
|
|
|
|
define void @fcvtzu_v16f32_v16i16(ptr %a, ptr %b) #0 {
|
|
; VBITS_GE_256-LABEL: fcvtzu_v16f32_v16i16:
|
|
; VBITS_GE_256: // %bb.0:
|
|
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
|
|
; VBITS_GE_256-NEXT: mov x8, #8 // =0x8
|
|
; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
|
|
; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0]
|
|
; VBITS_GE_256-NEXT: fcvtzu z0.s, p0/m, z0.s
|
|
; VBITS_GE_256-NEXT: fcvtzu z1.s, p0/m, z1.s
|
|
; VBITS_GE_256-NEXT: ptrue p0.h, vl8
|
|
; VBITS_GE_256-NEXT: uzp1 z0.h, z0.h, z0.h
|
|
; VBITS_GE_256-NEXT: uzp1 z1.h, z1.h, z1.h
|
|
; VBITS_GE_256-NEXT: splice z1.h, p0, z1.h, z0.h
|
|
; VBITS_GE_256-NEXT: ptrue p0.h, vl16
|
|
; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x1]
|
|
; VBITS_GE_256-NEXT: ret
|
|
;
|
|
; VBITS_GE_512-LABEL: fcvtzu_v16f32_v16i16:
|
|
; VBITS_GE_512: // %bb.0:
|
|
; VBITS_GE_512-NEXT: ptrue p0.s, vl16
|
|
; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0]
|
|
; VBITS_GE_512-NEXT: fcvtzu z0.s, p0/m, z0.s
|
|
; VBITS_GE_512-NEXT: st1h { z0.s }, p0, [x1]
|
|
; VBITS_GE_512-NEXT: ret
|
|
%op1 = load <16 x float>, ptr %a
|
|
%res = fptoui <16 x float> %op1 to <16 x i16>
|
|
store <16 x i16> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @fcvtzu_v32f32_v32i16(ptr %a, ptr %b) vscale_range(8,0) #0 {
|
|
; CHECK-LABEL: fcvtzu_v32f32_v32i16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s, vl32
|
|
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
|
|
; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.s
|
|
; CHECK-NEXT: st1h { z0.s }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <32 x float>, ptr %a
|
|
%res = fptoui <32 x float> %op1 to <32 x i16>
|
|
store <32 x i16> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @fcvtzu_v64f32_v64i16(ptr %a, ptr %b) vscale_range(16,0) #0 {
|
|
; CHECK-LABEL: fcvtzu_v64f32_v64i16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s, vl64
|
|
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
|
|
; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.s
|
|
; CHECK-NEXT: st1h { z0.s }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <64 x float>, ptr %a
|
|
%res = fptoui <64 x float> %op1 to <64 x i16>
|
|
store <64 x i16> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
;
|
|
; FCVTZU S -> S
|
|
;
|
|
|
|
; Don't use SVE for 64-bit vectors.
|
|
define <2 x i32> @fcvtzu_v2f32_v2i32(<2 x float> %op1) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fcvtzu_v2f32_v2i32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fcvtzu v0.2s, v0.2s
|
|
; CHECK-NEXT: ret
|
|
%res = fptoui <2 x float> %op1 to <2 x i32>
|
|
ret <2 x i32> %res
|
|
}
|
|
|
|
; Don't use SVE for 128-bit vectors.
|
|
define <4 x i32> @fcvtzu_v4f32_v4i32(<4 x float> %op1) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fcvtzu_v4f32_v4i32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fcvtzu v0.4s, v0.4s
|
|
; CHECK-NEXT: ret
|
|
%res = fptoui <4 x float> %op1 to <4 x i32>
|
|
ret <4 x i32> %res
|
|
}
|
|
|
|
define void @fcvtzu_v8f32_v8i32(ptr %a, ptr %b) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fcvtzu_v8f32_v8i32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s, vl8
|
|
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
|
|
; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.s
|
|
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <8 x float>, ptr %a
|
|
%res = fptoui <8 x float> %op1 to <8 x i32>
|
|
store <8 x i32> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @fcvtzu_v16f32_v16i32(ptr %a, ptr %b) #0 {
|
|
; VBITS_GE_256-LABEL: fcvtzu_v16f32_v16i32:
|
|
; VBITS_GE_256: // %bb.0:
|
|
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
|
|
; VBITS_GE_256-NEXT: mov x8, #8 // =0x8
|
|
; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
|
|
; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0]
|
|
; VBITS_GE_256-NEXT: fcvtzu z0.s, p0/m, z0.s
|
|
; VBITS_GE_256-NEXT: fcvtzu z1.s, p0/m, z1.s
|
|
; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x1, x8, lsl #2]
|
|
; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x1]
|
|
; VBITS_GE_256-NEXT: ret
|
|
;
|
|
; VBITS_GE_512-LABEL: fcvtzu_v16f32_v16i32:
|
|
; VBITS_GE_512: // %bb.0:
|
|
; VBITS_GE_512-NEXT: ptrue p0.s, vl16
|
|
; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0]
|
|
; VBITS_GE_512-NEXT: fcvtzu z0.s, p0/m, z0.s
|
|
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x1]
|
|
; VBITS_GE_512-NEXT: ret
|
|
%op1 = load <16 x float>, ptr %a
|
|
%res = fptoui <16 x float> %op1 to <16 x i32>
|
|
store <16 x i32> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @fcvtzu_v32f32_v32i32(ptr %a, ptr %b) vscale_range(8,0) #0 {
|
|
; CHECK-LABEL: fcvtzu_v32f32_v32i32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s, vl32
|
|
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
|
|
; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.s
|
|
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <32 x float>, ptr %a
|
|
%res = fptoui <32 x float> %op1 to <32 x i32>
|
|
store <32 x i32> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @fcvtzu_v64f32_v64i32(ptr %a, ptr %b) vscale_range(16,0) #0 {
|
|
; CHECK-LABEL: fcvtzu_v64f32_v64i32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s, vl64
|
|
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
|
|
; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.s
|
|
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <64 x float>, ptr %a
|
|
%res = fptoui <64 x float> %op1 to <64 x i32>
|
|
store <64 x i32> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
;
|
|
; FCVTZU S -> D
|
|
;
|
|
|
|
; Don't use SVE for 64-bit vectors.
|
|
define <1 x i64> @fcvtzu_v1f32_v1i64(<1 x float> %op1) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fcvtzu_v1f32_v1i64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fcvtl v0.2d, v0.2s
|
|
; CHECK-NEXT: fcvtzu v0.2d, v0.2d
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
|
|
; CHECK-NEXT: ret
|
|
%res = fptoui <1 x float> %op1 to <1 x i64>
|
|
ret <1 x i64> %res
|
|
}
|
|
|
|
; Don't use SVE for 128-bit vectors.
|
|
define <2 x i64> @fcvtzu_v2f32_v2i64(<2 x float> %op1) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fcvtzu_v2f32_v2i64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fcvtl v0.2d, v0.2s
|
|
; CHECK-NEXT: fcvtzu v0.2d, v0.2d
|
|
; CHECK-NEXT: ret
|
|
%res = fptoui <2 x float> %op1 to <2 x i64>
|
|
ret <2 x i64> %res
|
|
}
|
|
|
|
define void @fcvtzu_v4f32_v4i64(ptr %a, ptr %b) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fcvtzu_v4f32_v4i64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ldr q0, [x0]
|
|
; CHECK-NEXT: ptrue p0.d, vl4
|
|
; CHECK-NEXT: uunpklo z0.d, z0.s
|
|
; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.s
|
|
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <4 x float>, ptr %a
|
|
%res = fptoui <4 x float> %op1 to <4 x i64>
|
|
store <4 x i64> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @fcvtzu_v8f32_v8i64(ptr %a, ptr %b) #0 {
|
|
; VBITS_GE_256-LABEL: fcvtzu_v8f32_v8i64:
|
|
; VBITS_GE_256: // %bb.0:
|
|
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
|
|
; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
|
|
; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0]
|
|
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
|
|
; VBITS_GE_256-NEXT: movprfx z1, z0
|
|
; VBITS_GE_256-NEXT: ext z1.b, z1.b, z0.b, #16
|
|
; VBITS_GE_256-NEXT: uunpklo z0.d, z0.s
|
|
; VBITS_GE_256-NEXT: uunpklo z1.d, z1.s
|
|
; VBITS_GE_256-NEXT: fcvtzu z0.d, p0/m, z0.s
|
|
; VBITS_GE_256-NEXT: fcvtzu z1.d, p0/m, z1.s
|
|
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x1]
|
|
; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x1, x8, lsl #3]
|
|
; VBITS_GE_256-NEXT: ret
|
|
;
|
|
; VBITS_GE_512-LABEL: fcvtzu_v8f32_v8i64:
|
|
; VBITS_GE_512: // %bb.0:
|
|
; VBITS_GE_512-NEXT: ptrue p0.d, vl8
|
|
; VBITS_GE_512-NEXT: ld1w { z0.d }, p0/z, [x0]
|
|
; VBITS_GE_512-NEXT: fcvtzu z0.d, p0/m, z0.s
|
|
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x1]
|
|
; VBITS_GE_512-NEXT: ret
|
|
%op1 = load <8 x float>, ptr %a
|
|
%res = fptoui <8 x float> %op1 to <8 x i64>
|
|
store <8 x i64> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @fcvtzu_v16f32_v16i64(ptr %a, ptr %b) vscale_range(8,0) #0 {
|
|
; CHECK-LABEL: fcvtzu_v16f32_v16i64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d, vl16
|
|
; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0]
|
|
; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.s
|
|
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <16 x float>, ptr %a
|
|
%res = fptoui <16 x float> %op1 to <16 x i64>
|
|
store <16 x i64> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @fcvtzu_v32f32_v32i64(ptr %a, ptr %b) vscale_range(16,0) #0 {
|
|
; CHECK-LABEL: fcvtzu_v32f32_v32i64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d, vl32
|
|
; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0]
|
|
; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.s
|
|
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <32 x float>, ptr %a
|
|
%res = fptoui <32 x float> %op1 to <32 x i64>
|
|
store <32 x i64> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
|
|
;
|
|
; FCVTZU D -> H
|
|
;
|
|
|
|
; v1f64 is perfered to be widened to v4f64, so use SVE
|
|
define <1 x i16> @fcvtzu_v1f64_v1i16(<1 x double> %op1) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fcvtzu_v1f64_v1i16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d, vl4
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
|
|
; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.d
|
|
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
|
|
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
%res = fptoui <1 x double> %op1 to <1 x i16>
|
|
ret <1 x i16> %res
|
|
}
|
|
|
|
; Don't use SVE for 128-bit vectors.
|
|
define <2 x i16> @fcvtzu_v2f64_v2i16(<2 x double> %op1) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fcvtzu_v2f64_v2i16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fcvtzs v0.2d, v0.2d
|
|
; CHECK-NEXT: xtn v0.2s, v0.2d
|
|
; CHECK-NEXT: ret
|
|
%res = fptoui <2 x double> %op1 to <2 x i16>
|
|
ret <2 x i16> %res
|
|
}
|
|
|
|
define <4 x i16> @fcvtzu_v4f64_v4i16(ptr %a) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fcvtzu_v4f64_v4i16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d, vl4
|
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
|
; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.d
|
|
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
|
|
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <4 x double>, ptr %a
|
|
%res = fptoui <4 x double> %op1 to <4 x i16>
|
|
ret <4 x i16> %res
|
|
}
|
|
|
|
define <8 x i16> @fcvtzu_v8f64_v8i16(ptr %a) #0 {
|
|
; VBITS_GE_256-LABEL: fcvtzu_v8f64_v8i16:
|
|
; VBITS_GE_256: // %bb.0:
|
|
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
|
|
; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
|
|
; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
|
|
; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0]
|
|
; VBITS_GE_256-NEXT: fcvtzu z0.d, p0/m, z0.d
|
|
; VBITS_GE_256-NEXT: fcvtzu z1.d, p0/m, z1.d
|
|
; VBITS_GE_256-NEXT: uzp1 z0.s, z0.s, z0.s
|
|
; VBITS_GE_256-NEXT: uzp1 z1.s, z1.s, z1.s
|
|
; VBITS_GE_256-NEXT: uzp1 z2.h, z0.h, z0.h
|
|
; VBITS_GE_256-NEXT: uzp1 z0.h, z1.h, z1.h
|
|
; VBITS_GE_256-NEXT: mov v0.d[1], v2.d[0]
|
|
; VBITS_GE_256-NEXT: // kill: def $q0 killed $q0 killed $z0
|
|
; VBITS_GE_256-NEXT: ret
|
|
;
|
|
; VBITS_GE_512-LABEL: fcvtzu_v8f64_v8i16:
|
|
; VBITS_GE_512: // %bb.0:
|
|
; VBITS_GE_512-NEXT: ptrue p0.d, vl8
|
|
; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0]
|
|
; VBITS_GE_512-NEXT: fcvtzu z0.d, p0/m, z0.d
|
|
; VBITS_GE_512-NEXT: uzp1 z0.s, z0.s, z0.s
|
|
; VBITS_GE_512-NEXT: uzp1 z0.h, z0.h, z0.h
|
|
; VBITS_GE_512-NEXT: // kill: def $q0 killed $q0 killed $z0
|
|
; VBITS_GE_512-NEXT: ret
|
|
%op1 = load <8 x double>, ptr %a
|
|
%res = fptoui <8 x double> %op1 to <8 x i16>
|
|
ret <8 x i16> %res
|
|
}
|
|
|
|
define void @fcvtzu_v16f64_v16i16(ptr %a, ptr %b) vscale_range(8,0) #0 {
|
|
; CHECK-LABEL: fcvtzu_v16f64_v16i16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d, vl16
|
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
|
; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.d
|
|
; CHECK-NEXT: st1h { z0.d }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <16 x double>, ptr %a
|
|
%res = fptoui <16 x double> %op1 to <16 x i16>
|
|
store <16 x i16> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @fcvtzu_v32f64_v32i16(ptr %a, ptr %b) vscale_range(16,0) #0 {
|
|
; CHECK-LABEL: fcvtzu_v32f64_v32i16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d, vl32
|
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
|
; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.d
|
|
; CHECK-NEXT: st1h { z0.d }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <32 x double>, ptr %a
|
|
%res = fptoui <32 x double> %op1 to <32 x i16>
|
|
store <32 x i16> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
;
|
|
; FCVTZU D -> S
|
|
;
|
|
|
|
; Don't use SVE for 64-bit vectors.
|
|
define <1 x i32> @fcvtzu_v1f64_v1i32(<1 x double> %op1) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fcvtzu_v1f64_v1i32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
|
|
; CHECK-NEXT: fcvtzu v0.2d, v0.2d
|
|
; CHECK-NEXT: xtn v0.2s, v0.2d
|
|
; CHECK-NEXT: ret
|
|
%res = fptoui <1 x double> %op1 to <1 x i32>
|
|
ret <1 x i32> %res
|
|
}
|
|
|
|
; Don't use SVE for 128-bit vectors.
|
|
define <2 x i32> @fcvtzu_v2f64_v2i32(<2 x double> %op1) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fcvtzu_v2f64_v2i32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fcvtzu v0.2d, v0.2d
|
|
; CHECK-NEXT: xtn v0.2s, v0.2d
|
|
; CHECK-NEXT: ret
|
|
%res = fptoui <2 x double> %op1 to <2 x i32>
|
|
ret <2 x i32> %res
|
|
}
|
|
|
|
define <4 x i32> @fcvtzu_v4f64_v4i32(ptr %a) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fcvtzu_v4f64_v4i32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d, vl4
|
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
|
; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.d
|
|
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
|
|
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <4 x double>, ptr %a
|
|
%res = fptoui <4 x double> %op1 to <4 x i32>
|
|
ret <4 x i32> %res
|
|
}
|
|
|
|
define void @fcvtzu_v8f64_v8i32(ptr %a, ptr %b) #0 {
|
|
; VBITS_GE_256-LABEL: fcvtzu_v8f64_v8i32:
|
|
; VBITS_GE_256: // %bb.0:
|
|
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
|
|
; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
|
|
; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
|
|
; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0]
|
|
; VBITS_GE_256-NEXT: fcvtzu z0.d, p0/m, z0.d
|
|
; VBITS_GE_256-NEXT: fcvtzu z1.d, p0/m, z1.d
|
|
; VBITS_GE_256-NEXT: ptrue p0.s, vl4
|
|
; VBITS_GE_256-NEXT: uzp1 z0.s, z0.s, z0.s
|
|
; VBITS_GE_256-NEXT: uzp1 z1.s, z1.s, z1.s
|
|
; VBITS_GE_256-NEXT: splice z1.s, p0, z1.s, z0.s
|
|
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
|
|
; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x1]
|
|
; VBITS_GE_256-NEXT: ret
|
|
;
|
|
; VBITS_GE_512-LABEL: fcvtzu_v8f64_v8i32:
|
|
; VBITS_GE_512: // %bb.0:
|
|
; VBITS_GE_512-NEXT: ptrue p0.d, vl8
|
|
; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0]
|
|
; VBITS_GE_512-NEXT: fcvtzu z0.d, p0/m, z0.d
|
|
; VBITS_GE_512-NEXT: st1w { z0.d }, p0, [x1]
|
|
; VBITS_GE_512-NEXT: ret
|
|
%op1 = load <8 x double>, ptr %a
|
|
%res = fptoui <8 x double> %op1 to <8 x i32>
|
|
store <8 x i32> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @fcvtzu_v16f64_v16i32(ptr %a, ptr %b) vscale_range(8,0) #0 {
|
|
; CHECK-LABEL: fcvtzu_v16f64_v16i32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d, vl16
|
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
|
; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.d
|
|
; CHECK-NEXT: st1w { z0.d }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <16 x double>, ptr %a
|
|
%res = fptoui <16 x double> %op1 to <16 x i32>
|
|
store <16 x i32> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @fcvtzu_v32f64_v32i32(ptr %a, ptr %b) vscale_range(16,0) #0 {
|
|
; CHECK-LABEL: fcvtzu_v32f64_v32i32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d, vl32
|
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
|
; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.d
|
|
; CHECK-NEXT: st1w { z0.d }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <32 x double>, ptr %a
|
|
%res = fptoui <32 x double> %op1 to <32 x i32>
|
|
store <32 x i32> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
;
|
|
; FCVTZU D -> D
|
|
;
|
|
|
|
; Don't use SVE for 64-bit vectors.
|
|
define <1 x i64> @fcvtzu_v1f64_v1i64(<1 x double> %op1) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fcvtzu_v1f64_v1i64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fcvtzu x8, d0
|
|
; CHECK-NEXT: fmov d0, x8
|
|
; CHECK-NEXT: ret
|
|
%res = fptoui <1 x double> %op1 to <1 x i64>
|
|
ret <1 x i64> %res
|
|
}
|
|
|
|
; Don't use SVE for 128-bit vectors.
|
|
define <2 x i64> @fcvtzu_v2f64_v2i64(<2 x double> %op1) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fcvtzu_v2f64_v2i64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fcvtzu v0.2d, v0.2d
|
|
; CHECK-NEXT: ret
|
|
%res = fptoui <2 x double> %op1 to <2 x i64>
|
|
ret <2 x i64> %res
|
|
}
|
|
|
|
define void @fcvtzu_v4f64_v4i64(ptr %a, ptr %b) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fcvtzu_v4f64_v4i64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d, vl4
|
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
|
; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.d
|
|
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <4 x double>, ptr %a
|
|
%res = fptoui <4 x double> %op1 to <4 x i64>
|
|
store <4 x i64> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @fcvtzu_v8f64_v8i64(ptr %a, ptr %b) #0 {
|
|
; VBITS_GE_256-LABEL: fcvtzu_v8f64_v8i64:
|
|
; VBITS_GE_256: // %bb.0:
|
|
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
|
|
; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
|
|
; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
|
|
; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0]
|
|
; VBITS_GE_256-NEXT: fcvtzu z0.d, p0/m, z0.d
|
|
; VBITS_GE_256-NEXT: fcvtzu z1.d, p0/m, z1.d
|
|
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x1, x8, lsl #3]
|
|
; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x1]
|
|
; VBITS_GE_256-NEXT: ret
|
|
;
|
|
; VBITS_GE_512-LABEL: fcvtzu_v8f64_v8i64:
|
|
; VBITS_GE_512: // %bb.0:
|
|
; VBITS_GE_512-NEXT: ptrue p0.d, vl8
|
|
; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0]
|
|
; VBITS_GE_512-NEXT: fcvtzu z0.d, p0/m, z0.d
|
|
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x1]
|
|
; VBITS_GE_512-NEXT: ret
|
|
%op1 = load <8 x double>, ptr %a
|
|
%res = fptoui <8 x double> %op1 to <8 x i64>
|
|
store <8 x i64> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @fcvtzu_v16f64_v16i64(ptr %a, ptr %b) vscale_range(8,0) #0 {
|
|
; CHECK-LABEL: fcvtzu_v16f64_v16i64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d, vl16
|
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
|
; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.d
|
|
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <16 x double>, ptr %a
|
|
%res = fptoui <16 x double> %op1 to <16 x i64>
|
|
store <16 x i64> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @fcvtzu_v32f64_v32i64(ptr %a, ptr %b) vscale_range(16,0) #0 {
|
|
; CHECK-LABEL: fcvtzu_v32f64_v32i64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d, vl32
|
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
|
; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.d
|
|
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <32 x double>, ptr %a
|
|
%res = fptoui <32 x double> %op1 to <32 x i64>
|
|
store <32 x i64> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
;
|
|
; FCVTZS H -> H
|
|
;
|
|
|
|
; Don't use SVE for 64-bit vectors.
|
|
define <4 x i16> @fcvtzs_v4f16_v4i16(<4 x half> %op1) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fcvtzs_v4f16_v4i16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fcvtzs v0.4h, v0.4h
|
|
; CHECK-NEXT: ret
|
|
%res = fptosi <4 x half> %op1 to <4 x i16>
|
|
ret <4 x i16> %res
|
|
}
|
|
|
|
; Don't use SVE for 128-bit vectors.
|
|
define void @fcvtzs_v8f16_v8i16(ptr %a, ptr %b) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fcvtzs_v8f16_v8i16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ldr q0, [x0]
|
|
; CHECK-NEXT: fcvtzs v0.8h, v0.8h
|
|
; CHECK-NEXT: str q0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <8 x half>, ptr %a
|
|
%res = fptosi <8 x half> %op1 to <8 x i16>
|
|
store <8 x i16> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @fcvtzs_v16f16_v16i16(ptr %a, ptr %b) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fcvtzs_v16f16_v16i16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.h, vl16
|
|
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
|
|
; CHECK-NEXT: fcvtzs z0.h, p0/m, z0.h
|
|
; CHECK-NEXT: st1h { z0.h }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <16 x half>, ptr %a
|
|
%res = fptosi <16 x half> %op1 to <16 x i16>
|
|
store <16 x i16> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @fcvtzs_v32f16_v32i16(ptr %a, ptr %b) #0 {
|
|
; VBITS_GE_256-LABEL: fcvtzs_v32f16_v32i16:
|
|
; VBITS_GE_256: // %bb.0:
|
|
; VBITS_GE_256-NEXT: ptrue p0.h, vl16
|
|
; VBITS_GE_256-NEXT: mov x8, #16 // =0x10
|
|
; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1]
|
|
; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0]
|
|
; VBITS_GE_256-NEXT: fcvtzs z0.h, p0/m, z0.h
|
|
; VBITS_GE_256-NEXT: fcvtzs z1.h, p0/m, z1.h
|
|
; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x1, x8, lsl #1]
|
|
; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x1]
|
|
; VBITS_GE_256-NEXT: ret
|
|
;
|
|
; VBITS_GE_512-LABEL: fcvtzs_v32f16_v32i16:
|
|
; VBITS_GE_512: // %bb.0:
|
|
; VBITS_GE_512-NEXT: ptrue p0.h, vl32
|
|
; VBITS_GE_512-NEXT: ld1h { z0.h }, p0/z, [x0]
|
|
; VBITS_GE_512-NEXT: fcvtzs z0.h, p0/m, z0.h
|
|
; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x1]
|
|
; VBITS_GE_512-NEXT: ret
|
|
%op1 = load <32 x half>, ptr %a
|
|
%res = fptosi <32 x half> %op1 to <32 x i16>
|
|
store <32 x i16> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @fcvtzs_v64f16_v64i16(ptr %a, ptr %b) vscale_range(8,0) #0 {
|
|
; CHECK-LABEL: fcvtzs_v64f16_v64i16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.h, vl64
|
|
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
|
|
; CHECK-NEXT: fcvtzs z0.h, p0/m, z0.h
|
|
; CHECK-NEXT: st1h { z0.h }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <64 x half>, ptr %a
|
|
%res = fptosi <64 x half> %op1 to <64 x i16>
|
|
store <64 x i16> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @fcvtzs_v128f16_v128i16(ptr %a, ptr %b) vscale_range(16,0) #0 {
|
|
; CHECK-LABEL: fcvtzs_v128f16_v128i16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.h, vl128
|
|
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
|
|
; CHECK-NEXT: fcvtzs z0.h, p0/m, z0.h
|
|
; CHECK-NEXT: st1h { z0.h }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <128 x half>, ptr %a
|
|
%res = fptosi <128 x half> %op1 to <128 x i16>
|
|
store <128 x i16> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
;
|
|
; FCVTZS H -> S
|
|
;
|
|
|
|
; Don't use SVE for 64-bit vectors.
|
|
define <2 x i32> @fcvtzs_v2f16_v2i32(<2 x half> %op1) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fcvtzs_v2f16_v2i32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fcvtl v0.4s, v0.4h
|
|
; CHECK-NEXT: fcvtzs v0.4s, v0.4s
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
|
|
; CHECK-NEXT: ret
|
|
%res = fptosi <2 x half> %op1 to <2 x i32>
|
|
ret <2 x i32> %res
|
|
}
|
|
|
|
; Don't use SVE for 128-bit vectors.
|
|
define <4 x i32> @fcvtzs_v4f16_v4i32(<4 x half> %op1) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fcvtzs_v4f16_v4i32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fcvtl v0.4s, v0.4h
|
|
; CHECK-NEXT: fcvtzs v0.4s, v0.4s
|
|
; CHECK-NEXT: ret
|
|
%res = fptosi <4 x half> %op1 to <4 x i32>
|
|
ret <4 x i32> %res
|
|
}
|
|
|
|
define void @fcvtzs_v8f16_v8i32(ptr %a, ptr %b) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fcvtzs_v8f16_v8i32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ldr q0, [x0]
|
|
; CHECK-NEXT: ptrue p0.s, vl8
|
|
; CHECK-NEXT: uunpklo z0.s, z0.h
|
|
; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.h
|
|
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <8 x half>, ptr %a
|
|
%res = fptosi <8 x half> %op1 to <8 x i32>
|
|
store <8 x i32> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @fcvtzs_v16f16_v16i32(ptr %a, ptr %b) #0 {
|
|
; VBITS_GE_256-LABEL: fcvtzs_v16f16_v16i32:
|
|
; VBITS_GE_256: // %bb.0:
|
|
; VBITS_GE_256-NEXT: ptrue p0.h, vl16
|
|
; VBITS_GE_256-NEXT: mov x8, #8 // =0x8
|
|
; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0]
|
|
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
|
|
; VBITS_GE_256-NEXT: movprfx z1, z0
|
|
; VBITS_GE_256-NEXT: ext z1.b, z1.b, z0.b, #16
|
|
; VBITS_GE_256-NEXT: uunpklo z0.s, z0.h
|
|
; VBITS_GE_256-NEXT: uunpklo z1.s, z1.h
|
|
; VBITS_GE_256-NEXT: fcvtzs z0.s, p0/m, z0.h
|
|
; VBITS_GE_256-NEXT: fcvtzs z1.s, p0/m, z1.h
|
|
; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x1]
|
|
; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x1, x8, lsl #2]
|
|
; VBITS_GE_256-NEXT: ret
|
|
;
|
|
; VBITS_GE_512-LABEL: fcvtzs_v16f16_v16i32:
|
|
; VBITS_GE_512: // %bb.0:
|
|
; VBITS_GE_512-NEXT: ptrue p0.s, vl16
|
|
; VBITS_GE_512-NEXT: ld1h { z0.s }, p0/z, [x0]
|
|
; VBITS_GE_512-NEXT: fcvtzs z0.s, p0/m, z0.h
|
|
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x1]
|
|
; VBITS_GE_512-NEXT: ret
|
|
%op1 = load <16 x half>, ptr %a
|
|
%res = fptosi <16 x half> %op1 to <16 x i32>
|
|
store <16 x i32> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @fcvtzs_v32f16_v32i32(ptr %a, ptr %b) vscale_range(8,0) #0 {
|
|
; CHECK-LABEL: fcvtzs_v32f16_v32i32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s, vl32
|
|
; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0]
|
|
; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.h
|
|
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <32 x half>, ptr %a
|
|
%res = fptosi <32 x half> %op1 to <32 x i32>
|
|
store <32 x i32> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @fcvtzs_v64f16_v64i32(ptr %a, ptr %b) vscale_range(16,0) #0 {
|
|
; CHECK-LABEL: fcvtzs_v64f16_v64i32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s, vl64
|
|
; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0]
|
|
; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.h
|
|
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <64 x half>, ptr %a
|
|
%res = fptosi <64 x half> %op1 to <64 x i32>
|
|
store <64 x i32> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
;
|
|
; FCVTZS H -> D
|
|
;
|
|
|
|
; Don't use SVE for 64-bit vectors.
|
|
define <1 x i64> @fcvtzs_v1f16_v1i64(<1 x half> %op1) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fcvtzs_v1f16_v1i64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fcvtzs x8, h0
|
|
; CHECK-NEXT: fmov d0, x8
|
|
; CHECK-NEXT: ret
|
|
%res = fptosi <1 x half> %op1 to <1 x i64>
|
|
ret <1 x i64> %res
|
|
}
|
|
|
|
; v2f16 is not legal for NEON, so use SVE
|
|
define <2 x i64> @fcvtzs_v2f16_v2i64(<2 x half> %op1) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fcvtzs_v2f16_v2i64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
|
|
; CHECK-NEXT: ptrue p0.d, vl4
|
|
; CHECK-NEXT: uunpklo z0.s, z0.h
|
|
; CHECK-NEXT: uunpklo z0.d, z0.s
|
|
; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.h
|
|
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
%res = fptosi <2 x half> %op1 to <2 x i64>
|
|
ret <2 x i64> %res
|
|
}
|
|
|
|
define void @fcvtzs_v4f16_v4i64(ptr %a, ptr %b) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fcvtzs_v4f16_v4i64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ldr d0, [x0]
|
|
; CHECK-NEXT: ptrue p0.d, vl4
|
|
; CHECK-NEXT: uunpklo z0.s, z0.h
|
|
; CHECK-NEXT: uunpklo z0.d, z0.s
|
|
; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.h
|
|
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <4 x half>, ptr %a
|
|
%res = fptosi <4 x half> %op1 to <4 x i64>
|
|
store <4 x i64> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @fcvtzs_v8f16_v8i64(ptr %a, ptr %b) #0 {
|
|
; VBITS_GE_256-LABEL: fcvtzs_v8f16_v8i64:
|
|
; VBITS_GE_256: // %bb.0:
|
|
; VBITS_GE_256-NEXT: ldr q0, [x0]
|
|
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
|
|
; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
|
|
; VBITS_GE_256-NEXT: ext v1.16b, v0.16b, v0.16b, #8
|
|
; VBITS_GE_256-NEXT: uunpklo z0.s, z0.h
|
|
; VBITS_GE_256-NEXT: uunpklo z1.s, z1.h
|
|
; VBITS_GE_256-NEXT: uunpklo z0.d, z0.s
|
|
; VBITS_GE_256-NEXT: uunpklo z1.d, z1.s
|
|
; VBITS_GE_256-NEXT: fcvtzs z0.d, p0/m, z0.h
|
|
; VBITS_GE_256-NEXT: fcvtzs z1.d, p0/m, z1.h
|
|
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x1]
|
|
; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x1, x8, lsl #3]
|
|
; VBITS_GE_256-NEXT: ret
|
|
;
|
|
; VBITS_GE_512-LABEL: fcvtzs_v8f16_v8i64:
|
|
; VBITS_GE_512: // %bb.0:
|
|
; VBITS_GE_512-NEXT: ldr q0, [x0]
|
|
; VBITS_GE_512-NEXT: ptrue p0.d, vl8
|
|
; VBITS_GE_512-NEXT: uunpklo z0.s, z0.h
|
|
; VBITS_GE_512-NEXT: uunpklo z0.d, z0.s
|
|
; VBITS_GE_512-NEXT: fcvtzs z0.d, p0/m, z0.h
|
|
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x1]
|
|
; VBITS_GE_512-NEXT: ret
|
|
%op1 = load <8 x half>, ptr %a
|
|
%res = fptosi <8 x half> %op1 to <8 x i64>
|
|
store <8 x i64> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @fcvtzs_v16f16_v16i64(ptr %a, ptr %b) vscale_range(8,0) #0 {
|
|
; CHECK-LABEL: fcvtzs_v16f16_v16i64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d, vl16
|
|
; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0]
|
|
; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.h
|
|
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <16 x half>, ptr %a
|
|
%res = fptosi <16 x half> %op1 to <16 x i64>
|
|
store <16 x i64> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @fcvtzs_v32f16_v32i64(ptr %a, ptr %b) vscale_range(16,0) #0 {
|
|
; CHECK-LABEL: fcvtzs_v32f16_v32i64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d, vl32
|
|
; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0]
|
|
; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.h
|
|
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <32 x half>, ptr %a
|
|
%res = fptosi <32 x half> %op1 to <32 x i64>
|
|
store <32 x i64> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
;
|
|
; FCVTZS S -> H
|
|
;
|
|
|
|
; Don't use SVE for 64-bit vectors.
|
|
define <2 x i16> @fcvtzs_v2f32_v2i16(<2 x float> %op1) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fcvtzs_v2f32_v2i16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fcvtzs v0.2s, v0.2s
|
|
; CHECK-NEXT: ret
|
|
%res = fptosi <2 x float> %op1 to <2 x i16>
|
|
ret <2 x i16> %res
|
|
}
|
|
|
|
; Don't use SVE for 128-bit vectors.
|
|
define <4 x i16> @fcvtzs_v4f32_v4i16(<4 x float> %op1) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fcvtzs_v4f32_v4i16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fcvtzs v1.4s, v0.4s
|
|
; CHECK-NEXT: mov w8, v1.s[1]
|
|
; CHECK-NEXT: mov v0.16b, v1.16b
|
|
; CHECK-NEXT: mov w9, v1.s[2]
|
|
; CHECK-NEXT: mov v0.h[1], w8
|
|
; CHECK-NEXT: mov w8, v1.s[3]
|
|
; CHECK-NEXT: mov v0.h[2], w9
|
|
; CHECK-NEXT: mov v0.h[3], w8
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
|
|
; CHECK-NEXT: ret
|
|
%res = fptosi <4 x float> %op1 to <4 x i16>
|
|
ret <4 x i16> %res
|
|
}
|
|
|
|
define <8 x i16> @fcvtzs_v8f32_v8i16(ptr %a) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fcvtzs_v8f32_v8i16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s, vl8
|
|
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
|
|
; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.s
|
|
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
|
|
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <8 x float>, ptr %a
|
|
%res = fptosi <8 x float> %op1 to <8 x i16>
|
|
ret <8 x i16> %res
|
|
}
|
|
|
|
define void @fcvtzs_v16f32_v16i16(ptr %a, ptr %b) #0 {
|
|
; VBITS_GE_256-LABEL: fcvtzs_v16f32_v16i16:
|
|
; VBITS_GE_256: // %bb.0:
|
|
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
|
|
; VBITS_GE_256-NEXT: mov x8, #8 // =0x8
|
|
; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
|
|
; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0]
|
|
; VBITS_GE_256-NEXT: fcvtzs z0.s, p0/m, z0.s
|
|
; VBITS_GE_256-NEXT: fcvtzs z1.s, p0/m, z1.s
|
|
; VBITS_GE_256-NEXT: ptrue p0.h, vl8
|
|
; VBITS_GE_256-NEXT: uzp1 z0.h, z0.h, z0.h
|
|
; VBITS_GE_256-NEXT: uzp1 z1.h, z1.h, z1.h
|
|
; VBITS_GE_256-NEXT: splice z1.h, p0, z1.h, z0.h
|
|
; VBITS_GE_256-NEXT: ptrue p0.h, vl16
|
|
; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x1]
|
|
; VBITS_GE_256-NEXT: ret
|
|
;
|
|
; VBITS_GE_512-LABEL: fcvtzs_v16f32_v16i16:
|
|
; VBITS_GE_512: // %bb.0:
|
|
; VBITS_GE_512-NEXT: ptrue p0.s, vl16
|
|
; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0]
|
|
; VBITS_GE_512-NEXT: fcvtzs z0.s, p0/m, z0.s
|
|
; VBITS_GE_512-NEXT: st1h { z0.s }, p0, [x1]
|
|
; VBITS_GE_512-NEXT: ret
|
|
%op1 = load <16 x float>, ptr %a
|
|
%res = fptosi <16 x float> %op1 to <16 x i16>
|
|
store <16 x i16> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @fcvtzs_v32f32_v32i16(ptr %a, ptr %b) vscale_range(8,0) #0 {
|
|
; CHECK-LABEL: fcvtzs_v32f32_v32i16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s, vl32
|
|
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
|
|
; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.s
|
|
; CHECK-NEXT: st1h { z0.s }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <32 x float>, ptr %a
|
|
%res = fptosi <32 x float> %op1 to <32 x i16>
|
|
store <32 x i16> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @fcvtzs_v64f32_v64i16(ptr %a, ptr %b) vscale_range(16,0) #0 {
|
|
; CHECK-LABEL: fcvtzs_v64f32_v64i16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s, vl64
|
|
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
|
|
; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.s
|
|
; CHECK-NEXT: st1h { z0.s }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <64 x float>, ptr %a
|
|
%res = fptosi <64 x float> %op1 to <64 x i16>
|
|
store <64 x i16> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
;
|
|
; FCVTZS S -> S
|
|
;
|
|
|
|
; Don't use SVE for 64-bit vectors.
|
|
define <2 x i32> @fcvtzs_v2f32_v2i32(<2 x float> %op1) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fcvtzs_v2f32_v2i32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fcvtzs v0.2s, v0.2s
|
|
; CHECK-NEXT: ret
|
|
%res = fptosi <2 x float> %op1 to <2 x i32>
|
|
ret <2 x i32> %res
|
|
}
|
|
|
|
; Don't use SVE for 128-bit vectors.
|
|
define <4 x i32> @fcvtzs_v4f32_v4i32(<4 x float> %op1) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fcvtzs_v4f32_v4i32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fcvtzs v0.4s, v0.4s
|
|
; CHECK-NEXT: ret
|
|
%res = fptosi <4 x float> %op1 to <4 x i32>
|
|
ret <4 x i32> %res
|
|
}
|
|
|
|
define void @fcvtzs_v8f32_v8i32(ptr %a, ptr %b) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fcvtzs_v8f32_v8i32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s, vl8
|
|
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
|
|
; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.s
|
|
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <8 x float>, ptr %a
|
|
%res = fptosi <8 x float> %op1 to <8 x i32>
|
|
store <8 x i32> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @fcvtzs_v16f32_v16i32(ptr %a, ptr %b) #0 {
|
|
; VBITS_GE_256-LABEL: fcvtzs_v16f32_v16i32:
|
|
; VBITS_GE_256: // %bb.0:
|
|
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
|
|
; VBITS_GE_256-NEXT: mov x8, #8 // =0x8
|
|
; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
|
|
; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0]
|
|
; VBITS_GE_256-NEXT: fcvtzs z0.s, p0/m, z0.s
|
|
; VBITS_GE_256-NEXT: fcvtzs z1.s, p0/m, z1.s
|
|
; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x1, x8, lsl #2]
|
|
; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x1]
|
|
; VBITS_GE_256-NEXT: ret
|
|
;
|
|
; VBITS_GE_512-LABEL: fcvtzs_v16f32_v16i32:
|
|
; VBITS_GE_512: // %bb.0:
|
|
; VBITS_GE_512-NEXT: ptrue p0.s, vl16
|
|
; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0]
|
|
; VBITS_GE_512-NEXT: fcvtzs z0.s, p0/m, z0.s
|
|
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x1]
|
|
; VBITS_GE_512-NEXT: ret
|
|
%op1 = load <16 x float>, ptr %a
|
|
%res = fptosi <16 x float> %op1 to <16 x i32>
|
|
store <16 x i32> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @fcvtzs_v32f32_v32i32(ptr %a, ptr %b) vscale_range(8,0) #0 {
|
|
; CHECK-LABEL: fcvtzs_v32f32_v32i32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s, vl32
|
|
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
|
|
; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.s
|
|
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <32 x float>, ptr %a
|
|
%res = fptosi <32 x float> %op1 to <32 x i32>
|
|
store <32 x i32> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @fcvtzs_v64f32_v64i32(ptr %a, ptr %b) vscale_range(16,0) #0 {
|
|
; CHECK-LABEL: fcvtzs_v64f32_v64i32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s, vl64
|
|
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
|
|
; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.s
|
|
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <64 x float>, ptr %a
|
|
%res = fptosi <64 x float> %op1 to <64 x i32>
|
|
store <64 x i32> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
;
|
|
; FCVTZS S -> D
|
|
;
|
|
|
|
; Don't use SVE for 64-bit vectors.
|
|
define <1 x i64> @fcvtzs_v1f32_v1i64(<1 x float> %op1) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fcvtzs_v1f32_v1i64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fcvtl v0.2d, v0.2s
|
|
; CHECK-NEXT: fcvtzs v0.2d, v0.2d
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
|
|
; CHECK-NEXT: ret
|
|
%res = fptosi <1 x float> %op1 to <1 x i64>
|
|
ret <1 x i64> %res
|
|
}
|
|
|
|
; Don't use SVE for 128-bit vectors.
|
|
define <2 x i64> @fcvtzs_v2f32_v2i64(<2 x float> %op1) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fcvtzs_v2f32_v2i64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fcvtl v0.2d, v0.2s
|
|
; CHECK-NEXT: fcvtzs v0.2d, v0.2d
|
|
; CHECK-NEXT: ret
|
|
%res = fptosi <2 x float> %op1 to <2 x i64>
|
|
ret <2 x i64> %res
|
|
}
|
|
|
|
define void @fcvtzs_v4f32_v4i64(ptr %a, ptr %b) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fcvtzs_v4f32_v4i64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ldr q0, [x0]
|
|
; CHECK-NEXT: ptrue p0.d, vl4
|
|
; CHECK-NEXT: uunpklo z0.d, z0.s
|
|
; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.s
|
|
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <4 x float>, ptr %a
|
|
%res = fptosi <4 x float> %op1 to <4 x i64>
|
|
store <4 x i64> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @fcvtzs_v8f32_v8i64(ptr %a, ptr %b) #0 {
|
|
; VBITS_GE_256-LABEL: fcvtzs_v8f32_v8i64:
|
|
; VBITS_GE_256: // %bb.0:
|
|
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
|
|
; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
|
|
; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0]
|
|
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
|
|
; VBITS_GE_256-NEXT: movprfx z1, z0
|
|
; VBITS_GE_256-NEXT: ext z1.b, z1.b, z0.b, #16
|
|
; VBITS_GE_256-NEXT: uunpklo z0.d, z0.s
|
|
; VBITS_GE_256-NEXT: uunpklo z1.d, z1.s
|
|
; VBITS_GE_256-NEXT: fcvtzs z0.d, p0/m, z0.s
|
|
; VBITS_GE_256-NEXT: fcvtzs z1.d, p0/m, z1.s
|
|
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x1]
|
|
; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x1, x8, lsl #3]
|
|
; VBITS_GE_256-NEXT: ret
|
|
;
|
|
; VBITS_GE_512-LABEL: fcvtzs_v8f32_v8i64:
|
|
; VBITS_GE_512: // %bb.0:
|
|
; VBITS_GE_512-NEXT: ptrue p0.d, vl8
|
|
; VBITS_GE_512-NEXT: ld1w { z0.d }, p0/z, [x0]
|
|
; VBITS_GE_512-NEXT: fcvtzs z0.d, p0/m, z0.s
|
|
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x1]
|
|
; VBITS_GE_512-NEXT: ret
|
|
%op1 = load <8 x float>, ptr %a
|
|
%res = fptosi <8 x float> %op1 to <8 x i64>
|
|
store <8 x i64> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @fcvtzs_v16f32_v16i64(ptr %a, ptr %b) vscale_range(8,0) #0 {
|
|
; CHECK-LABEL: fcvtzs_v16f32_v16i64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d, vl16
|
|
; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0]
|
|
; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.s
|
|
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <16 x float>, ptr %a
|
|
%res = fptosi <16 x float> %op1 to <16 x i64>
|
|
store <16 x i64> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @fcvtzs_v32f32_v32i64(ptr %a, ptr %b) vscale_range(16,0) #0 {
|
|
; CHECK-LABEL: fcvtzs_v32f32_v32i64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d, vl32
|
|
; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0]
|
|
; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.s
|
|
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <32 x float>, ptr %a
|
|
%res = fptosi <32 x float> %op1 to <32 x i64>
|
|
store <32 x i64> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
|
|
;
|
|
; FCVTZS D -> H
|
|
;
|
|
|
|
; v1f64 is perfered to be widened to v4f64, so use SVE
|
|
define <1 x i16> @fcvtzs_v1f64_v1i16(<1 x double> %op1) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fcvtzs_v1f64_v1i16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d, vl4
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
|
|
; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d
|
|
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
|
|
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
%res = fptosi <1 x double> %op1 to <1 x i16>
|
|
ret <1 x i16> %res
|
|
}
|
|
|
|
; Don't use SVE for 128-bit vectors.
|
|
define <2 x i16> @fcvtzs_v2f64_v2i16(<2 x double> %op1) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fcvtzs_v2f64_v2i16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fcvtzs v0.2d, v0.2d
|
|
; CHECK-NEXT: xtn v0.2s, v0.2d
|
|
; CHECK-NEXT: ret
|
|
%res = fptosi <2 x double> %op1 to <2 x i16>
|
|
ret <2 x i16> %res
|
|
}
|
|
|
|
define <4 x i16> @fcvtzs_v4f64_v4i16(ptr %a) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fcvtzs_v4f64_v4i16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d, vl4
|
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
|
; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d
|
|
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
|
|
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <4 x double>, ptr %a
|
|
%res = fptosi <4 x double> %op1 to <4 x i16>
|
|
ret <4 x i16> %res
|
|
}
|
|
|
|
define <8 x i16> @fcvtzs_v8f64_v8i16(ptr %a) #0 {
|
|
; VBITS_GE_256-LABEL: fcvtzs_v8f64_v8i16:
|
|
; VBITS_GE_256: // %bb.0:
|
|
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
|
|
; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
|
|
; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
|
|
; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0]
|
|
; VBITS_GE_256-NEXT: fcvtzs z0.d, p0/m, z0.d
|
|
; VBITS_GE_256-NEXT: fcvtzs z1.d, p0/m, z1.d
|
|
; VBITS_GE_256-NEXT: uzp1 z0.s, z0.s, z0.s
|
|
; VBITS_GE_256-NEXT: uzp1 z1.s, z1.s, z1.s
|
|
; VBITS_GE_256-NEXT: uzp1 z2.h, z0.h, z0.h
|
|
; VBITS_GE_256-NEXT: uzp1 z0.h, z1.h, z1.h
|
|
; VBITS_GE_256-NEXT: mov v0.d[1], v2.d[0]
|
|
; VBITS_GE_256-NEXT: // kill: def $q0 killed $q0 killed $z0
|
|
; VBITS_GE_256-NEXT: ret
|
|
;
|
|
; VBITS_GE_512-LABEL: fcvtzs_v8f64_v8i16:
|
|
; VBITS_GE_512: // %bb.0:
|
|
; VBITS_GE_512-NEXT: ptrue p0.d, vl8
|
|
; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0]
|
|
; VBITS_GE_512-NEXT: fcvtzs z0.d, p0/m, z0.d
|
|
; VBITS_GE_512-NEXT: uzp1 z0.s, z0.s, z0.s
|
|
; VBITS_GE_512-NEXT: uzp1 z0.h, z0.h, z0.h
|
|
; VBITS_GE_512-NEXT: // kill: def $q0 killed $q0 killed $z0
|
|
; VBITS_GE_512-NEXT: ret
|
|
%op1 = load <8 x double>, ptr %a
|
|
%res = fptosi <8 x double> %op1 to <8 x i16>
|
|
ret <8 x i16> %res
|
|
}
|
|
|
|
define void @fcvtzs_v16f64_v16i16(ptr %a, ptr %b) vscale_range(8,0) #0 {
|
|
; CHECK-LABEL: fcvtzs_v16f64_v16i16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d, vl16
|
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
|
; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d
|
|
; CHECK-NEXT: st1h { z0.d }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <16 x double>, ptr %a
|
|
%res = fptosi <16 x double> %op1 to <16 x i16>
|
|
store <16 x i16> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @fcvtzs_v32f64_v32i16(ptr %a, ptr %b) vscale_range(16,0) #0 {
|
|
; CHECK-LABEL: fcvtzs_v32f64_v32i16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d, vl32
|
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
|
; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d
|
|
; CHECK-NEXT: st1h { z0.d }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <32 x double>, ptr %a
|
|
%res = fptosi <32 x double> %op1 to <32 x i16>
|
|
store <32 x i16> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
;
|
|
; FCVTZS D -> S
|
|
;
|
|
|
|
; Don't use SVE for 64-bit vectors.
|
|
define <1 x i32> @fcvtzs_v1f64_v1i32(<1 x double> %op1) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fcvtzs_v1f64_v1i32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
|
|
; CHECK-NEXT: fcvtzs v0.2d, v0.2d
|
|
; CHECK-NEXT: xtn v0.2s, v0.2d
|
|
; CHECK-NEXT: ret
|
|
%res = fptosi <1 x double> %op1 to <1 x i32>
|
|
ret <1 x i32> %res
|
|
}
|
|
|
|
; Don't use SVE for 128-bit vectors.
|
|
define <2 x i32> @fcvtzs_v2f64_v2i32(<2 x double> %op1) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fcvtzs_v2f64_v2i32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fcvtzs v0.2d, v0.2d
|
|
; CHECK-NEXT: xtn v0.2s, v0.2d
|
|
; CHECK-NEXT: ret
|
|
%res = fptosi <2 x double> %op1 to <2 x i32>
|
|
ret <2 x i32> %res
|
|
}
|
|
|
|
define <4 x i32> @fcvtzs_v4f64_v4i32(ptr %a) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fcvtzs_v4f64_v4i32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d, vl4
|
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
|
; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d
|
|
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
|
|
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <4 x double>, ptr %a
|
|
%res = fptosi <4 x double> %op1 to <4 x i32>
|
|
ret <4 x i32> %res
|
|
}
|
|
|
|
define void @fcvtzs_v8f64_v8i32(ptr %a, ptr %b) #0 {
|
|
; VBITS_GE_256-LABEL: fcvtzs_v8f64_v8i32:
|
|
; VBITS_GE_256: // %bb.0:
|
|
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
|
|
; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
|
|
; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
|
|
; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0]
|
|
; VBITS_GE_256-NEXT: fcvtzs z0.d, p0/m, z0.d
|
|
; VBITS_GE_256-NEXT: fcvtzs z1.d, p0/m, z1.d
|
|
; VBITS_GE_256-NEXT: ptrue p0.s, vl4
|
|
; VBITS_GE_256-NEXT: uzp1 z0.s, z0.s, z0.s
|
|
; VBITS_GE_256-NEXT: uzp1 z1.s, z1.s, z1.s
|
|
; VBITS_GE_256-NEXT: splice z1.s, p0, z1.s, z0.s
|
|
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
|
|
; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x1]
|
|
; VBITS_GE_256-NEXT: ret
|
|
;
|
|
; VBITS_GE_512-LABEL: fcvtzs_v8f64_v8i32:
|
|
; VBITS_GE_512: // %bb.0:
|
|
; VBITS_GE_512-NEXT: ptrue p0.d, vl8
|
|
; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0]
|
|
; VBITS_GE_512-NEXT: fcvtzs z0.d, p0/m, z0.d
|
|
; VBITS_GE_512-NEXT: st1w { z0.d }, p0, [x1]
|
|
; VBITS_GE_512-NEXT: ret
|
|
%op1 = load <8 x double>, ptr %a
|
|
%res = fptosi <8 x double> %op1 to <8 x i32>
|
|
store <8 x i32> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @fcvtzs_v16f64_v16i32(ptr %a, ptr %b) vscale_range(8,0) #0 {
|
|
; CHECK-LABEL: fcvtzs_v16f64_v16i32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d, vl16
|
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
|
; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d
|
|
; CHECK-NEXT: st1w { z0.d }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <16 x double>, ptr %a
|
|
%res = fptosi <16 x double> %op1 to <16 x i32>
|
|
store <16 x i32> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @fcvtzs_v32f64_v32i32(ptr %a, ptr %b) vscale_range(16,0) #0 {
|
|
; CHECK-LABEL: fcvtzs_v32f64_v32i32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d, vl32
|
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
|
; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d
|
|
; CHECK-NEXT: st1w { z0.d }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <32 x double>, ptr %a
|
|
%res = fptosi <32 x double> %op1 to <32 x i32>
|
|
store <32 x i32> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
;
|
|
; FCVTZS D -> D
|
|
;
|
|
|
|
; Don't use SVE for 64-bit vectors.
|
|
define <1 x i64> @fcvtzs_v1f64_v1i64(<1 x double> %op1) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fcvtzs_v1f64_v1i64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fcvtzs x8, d0
|
|
; CHECK-NEXT: fmov d0, x8
|
|
; CHECK-NEXT: ret
|
|
%res = fptosi <1 x double> %op1 to <1 x i64>
|
|
ret <1 x i64> %res
|
|
}
|
|
|
|
; Don't use SVE for 128-bit vectors.
|
|
define <2 x i64> @fcvtzs_v2f64_v2i64(<2 x double> %op1) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fcvtzs_v2f64_v2i64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fcvtzs v0.2d, v0.2d
|
|
; CHECK-NEXT: ret
|
|
%res = fptosi <2 x double> %op1 to <2 x i64>
|
|
ret <2 x i64> %res
|
|
}
|
|
|
|
define void @fcvtzs_v4f64_v4i64(ptr %a, ptr %b) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fcvtzs_v4f64_v4i64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d, vl4
|
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
|
; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d
|
|
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <4 x double>, ptr %a
|
|
%res = fptosi <4 x double> %op1 to <4 x i64>
|
|
store <4 x i64> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @fcvtzs_v8f64_v8i64(ptr %a, ptr %b) #0 {
|
|
; VBITS_GE_256-LABEL: fcvtzs_v8f64_v8i64:
|
|
; VBITS_GE_256: // %bb.0:
|
|
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
|
|
; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
|
|
; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
|
|
; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0]
|
|
; VBITS_GE_256-NEXT: fcvtzs z0.d, p0/m, z0.d
|
|
; VBITS_GE_256-NEXT: fcvtzs z1.d, p0/m, z1.d
|
|
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x1, x8, lsl #3]
|
|
; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x1]
|
|
; VBITS_GE_256-NEXT: ret
|
|
;
|
|
; VBITS_GE_512-LABEL: fcvtzs_v8f64_v8i64:
|
|
; VBITS_GE_512: // %bb.0:
|
|
; VBITS_GE_512-NEXT: ptrue p0.d, vl8
|
|
; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0]
|
|
; VBITS_GE_512-NEXT: fcvtzs z0.d, p0/m, z0.d
|
|
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x1]
|
|
; VBITS_GE_512-NEXT: ret
|
|
%op1 = load <8 x double>, ptr %a
|
|
%res = fptosi <8 x double> %op1 to <8 x i64>
|
|
store <8 x i64> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @fcvtzs_v16f64_v16i64(ptr %a, ptr %b) vscale_range(8,0) #0 {
|
|
; CHECK-LABEL: fcvtzs_v16f64_v16i64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d, vl16
|
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
|
; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d
|
|
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <16 x double>, ptr %a
|
|
%res = fptosi <16 x double> %op1 to <16 x i64>
|
|
store <16 x i64> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @fcvtzs_v32f64_v32i64(ptr %a, ptr %b) vscale_range(16,0) #0 {
|
|
; CHECK-LABEL: fcvtzs_v32f64_v32i64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d, vl32
|
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
|
; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d
|
|
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <32 x double>, ptr %a
|
|
%res = fptosi <32 x double> %op1 to <32 x i64>
|
|
store <32 x i64> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
attributes #0 = { "target-features"="+sve" }
|