
This reverts commit 9c319d5bb40785c969d2af76535ca62448dfafa7. Some issues were discovered with the bootstrap builds, which seem like they were caused by this commit. I'm reverting to investigate.
1795 lines
57 KiB
LLVM
1795 lines
57 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -aarch64-sve-vector-bits-min=256 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256
|
|
; RUN: llc -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
|
|
; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
|
|
|
|
target triple = "aarch64-unknown-linux-gnu"
|
|
|
|
;
|
|
; FCVTZU H -> H
|
|
;
|
|
|
|
; Don't use SVE for 64-bit vectors.
|
|
define <4 x i16> @fcvtzu_v4f16_v4i16(<4 x half> %op1) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fcvtzu_v4f16_v4i16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fcvtzu v0.4h, v0.4h
|
|
; CHECK-NEXT: ret
|
|
%res = fptoui <4 x half> %op1 to <4 x i16>
|
|
ret <4 x i16> %res
|
|
}
|
|
|
|
; Don't use SVE for 128-bit vectors.
|
|
define void @fcvtzu_v8f16_v8i16(ptr %a, ptr %b) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fcvtzu_v8f16_v8i16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ldr q0, [x0]
|
|
; CHECK-NEXT: fcvtzu v0.8h, v0.8h
|
|
; CHECK-NEXT: str q0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <8 x half>, ptr %a
|
|
%res = fptoui <8 x half> %op1 to <8 x i16>
|
|
store <8 x i16> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @fcvtzu_v16f16_v16i16(ptr %a, ptr %b) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fcvtzu_v16f16_v16i16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.h, vl16
|
|
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
|
|
; CHECK-NEXT: fcvtzu z0.h, p0/m, z0.h
|
|
; CHECK-NEXT: st1h { z0.h }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <16 x half>, ptr %a
|
|
%res = fptoui <16 x half> %op1 to <16 x i16>
|
|
store <16 x i16> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @fcvtzu_v32f16_v32i16(ptr %a, ptr %b) #0 {
|
|
; VBITS_GE_256-LABEL: fcvtzu_v32f16_v32i16:
|
|
; VBITS_GE_256: // %bb.0:
|
|
; VBITS_GE_256-NEXT: ptrue p0.h, vl16
|
|
; VBITS_GE_256-NEXT: mov x8, #16 // =0x10
|
|
; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1]
|
|
; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0]
|
|
; VBITS_GE_256-NEXT: fcvtzu z0.h, p0/m, z0.h
|
|
; VBITS_GE_256-NEXT: fcvtzu z1.h, p0/m, z1.h
|
|
; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x1, x8, lsl #1]
|
|
; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x1]
|
|
; VBITS_GE_256-NEXT: ret
|
|
;
|
|
; VBITS_GE_512-LABEL: fcvtzu_v32f16_v32i16:
|
|
; VBITS_GE_512: // %bb.0:
|
|
; VBITS_GE_512-NEXT: ptrue p0.h, vl32
|
|
; VBITS_GE_512-NEXT: ld1h { z0.h }, p0/z, [x0]
|
|
; VBITS_GE_512-NEXT: fcvtzu z0.h, p0/m, z0.h
|
|
; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x1]
|
|
; VBITS_GE_512-NEXT: ret
|
|
%op1 = load <32 x half>, ptr %a
|
|
%res = fptoui <32 x half> %op1 to <32 x i16>
|
|
store <32 x i16> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @fcvtzu_v64f16_v64i16(ptr %a, ptr %b) vscale_range(8,0) #0 {
|
|
; CHECK-LABEL: fcvtzu_v64f16_v64i16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.h, vl64
|
|
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
|
|
; CHECK-NEXT: fcvtzu z0.h, p0/m, z0.h
|
|
; CHECK-NEXT: st1h { z0.h }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <64 x half>, ptr %a
|
|
%res = fptoui <64 x half> %op1 to <64 x i16>
|
|
store <64 x i16> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @fcvtzu_v128f16_v128i16(ptr %a, ptr %b) vscale_range(16,0) #0 {
|
|
; CHECK-LABEL: fcvtzu_v128f16_v128i16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.h, vl128
|
|
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
|
|
; CHECK-NEXT: fcvtzu z0.h, p0/m, z0.h
|
|
; CHECK-NEXT: st1h { z0.h }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <128 x half>, ptr %a
|
|
%res = fptoui <128 x half> %op1 to <128 x i16>
|
|
store <128 x i16> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
;
|
|
; FCVTZU H -> S
|
|
;
|
|
|
|
; Don't use SVE for 64-bit vectors.
|
|
define <2 x i32> @fcvtzu_v2f16_v2i32(<2 x half> %op1) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fcvtzu_v2f16_v2i32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fcvtl v0.4s, v0.4h
|
|
; CHECK-NEXT: fcvtzu v0.4s, v0.4s
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
|
|
; CHECK-NEXT: ret
|
|
%res = fptoui <2 x half> %op1 to <2 x i32>
|
|
ret <2 x i32> %res
|
|
}
|
|
|
|
; Don't use SVE for 128-bit vectors.
|
|
define <4 x i32> @fcvtzu_v4f16_v4i32(<4 x half> %op1) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fcvtzu_v4f16_v4i32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fcvtl v0.4s, v0.4h
|
|
; CHECK-NEXT: fcvtzu v0.4s, v0.4s
|
|
; CHECK-NEXT: ret
|
|
%res = fptoui <4 x half> %op1 to <4 x i32>
|
|
ret <4 x i32> %res
|
|
}
|
|
|
|
define void @fcvtzu_v8f16_v8i32(ptr %a, ptr %b) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fcvtzu_v8f16_v8i32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ldr q0, [x0]
|
|
; CHECK-NEXT: ptrue p0.s, vl8
|
|
; CHECK-NEXT: uunpklo z0.s, z0.h
|
|
; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.h
|
|
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <8 x half>, ptr %a
|
|
%res = fptoui <8 x half> %op1 to <8 x i32>
|
|
store <8 x i32> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @fcvtzu_v16f16_v16i32(ptr %a, ptr %b) #0 {
|
|
; VBITS_GE_256-LABEL: fcvtzu_v16f16_v16i32:
|
|
; VBITS_GE_256: // %bb.0:
|
|
; VBITS_GE_256-NEXT: ptrue p0.h, vl16
|
|
; VBITS_GE_256-NEXT: mov x8, #8 // =0x8
|
|
; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0]
|
|
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
|
|
; VBITS_GE_256-NEXT: uunpklo z1.s, z0.h
|
|
; VBITS_GE_256-NEXT: ext z0.b, z0.b, z0.b, #16
|
|
; VBITS_GE_256-NEXT: uunpklo z0.s, z0.h
|
|
; VBITS_GE_256-NEXT: fcvtzu z1.s, p0/m, z1.h
|
|
; VBITS_GE_256-NEXT: fcvtzu z0.s, p0/m, z0.h
|
|
; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x1]
|
|
; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x1, x8, lsl #2]
|
|
; VBITS_GE_256-NEXT: ret
|
|
;
|
|
; VBITS_GE_512-LABEL: fcvtzu_v16f16_v16i32:
|
|
; VBITS_GE_512: // %bb.0:
|
|
; VBITS_GE_512-NEXT: ptrue p0.s, vl16
|
|
; VBITS_GE_512-NEXT: ld1h { z0.s }, p0/z, [x0]
|
|
; VBITS_GE_512-NEXT: fcvtzu z0.s, p0/m, z0.h
|
|
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x1]
|
|
; VBITS_GE_512-NEXT: ret
|
|
%op1 = load <16 x half>, ptr %a
|
|
%res = fptoui <16 x half> %op1 to <16 x i32>
|
|
store <16 x i32> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @fcvtzu_v32f16_v32i32(ptr %a, ptr %b) vscale_range(8,0) #0 {
|
|
; CHECK-LABEL: fcvtzu_v32f16_v32i32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s, vl32
|
|
; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0]
|
|
; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.h
|
|
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <32 x half>, ptr %a
|
|
%res = fptoui <32 x half> %op1 to <32 x i32>
|
|
store <32 x i32> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @fcvtzu_v64f16_v64i32(ptr %a, ptr %b) vscale_range(16,0) #0 {
|
|
; CHECK-LABEL: fcvtzu_v64f16_v64i32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s, vl64
|
|
; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0]
|
|
; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.h
|
|
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <64 x half>, ptr %a
|
|
%res = fptoui <64 x half> %op1 to <64 x i32>
|
|
store <64 x i32> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
;
|
|
; FCVTZU H -> D
|
|
;
|
|
|
|
; Don't use SVE for 64-bit vectors.
|
|
define <1 x i64> @fcvtzu_v1f16_v1i64(<1 x half> %op1) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fcvtzu_v1f16_v1i64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fcvtzu x8, h0
|
|
; CHECK-NEXT: fmov d0, x8
|
|
; CHECK-NEXT: ret
|
|
%res = fptoui <1 x half> %op1 to <1 x i64>
|
|
ret <1 x i64> %res
|
|
}
|
|
|
|
; v2f16 is not legal for NEON, so use SVE
|
|
define <2 x i64> @fcvtzu_v2f16_v2i64(<2 x half> %op1) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fcvtzu_v2f16_v2i64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
|
|
; CHECK-NEXT: ptrue p0.d, vl4
|
|
; CHECK-NEXT: uunpklo z0.s, z0.h
|
|
; CHECK-NEXT: uunpklo z0.d, z0.s
|
|
; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.h
|
|
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
%res = fptoui <2 x half> %op1 to <2 x i64>
|
|
ret <2 x i64> %res
|
|
}
|
|
|
|
define void @fcvtzu_v4f16_v4i64(ptr %a, ptr %b) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fcvtzu_v4f16_v4i64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ldr d0, [x0]
|
|
; CHECK-NEXT: ptrue p0.d, vl4
|
|
; CHECK-NEXT: uunpklo z0.s, z0.h
|
|
; CHECK-NEXT: uunpklo z0.d, z0.s
|
|
; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.h
|
|
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <4 x half>, ptr %a
|
|
%res = fptoui <4 x half> %op1 to <4 x i64>
|
|
store <4 x i64> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @fcvtzu_v8f16_v8i64(ptr %a, ptr %b) #0 {
|
|
; VBITS_GE_256-LABEL: fcvtzu_v8f16_v8i64:
|
|
; VBITS_GE_256: // %bb.0:
|
|
; VBITS_GE_256-NEXT: ldr q0, [x0]
|
|
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
|
|
; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
|
|
; VBITS_GE_256-NEXT: ext v1.16b, v0.16b, v0.16b, #8
|
|
; VBITS_GE_256-NEXT: uunpklo z0.s, z0.h
|
|
; VBITS_GE_256-NEXT: uunpklo z1.s, z1.h
|
|
; VBITS_GE_256-NEXT: uunpklo z0.d, z0.s
|
|
; VBITS_GE_256-NEXT: uunpklo z1.d, z1.s
|
|
; VBITS_GE_256-NEXT: fcvtzu z0.d, p0/m, z0.h
|
|
; VBITS_GE_256-NEXT: fcvtzu z1.d, p0/m, z1.h
|
|
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x1]
|
|
; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x1, x8, lsl #3]
|
|
; VBITS_GE_256-NEXT: ret
|
|
;
|
|
; VBITS_GE_512-LABEL: fcvtzu_v8f16_v8i64:
|
|
; VBITS_GE_512: // %bb.0:
|
|
; VBITS_GE_512-NEXT: ldr q0, [x0]
|
|
; VBITS_GE_512-NEXT: ptrue p0.d, vl8
|
|
; VBITS_GE_512-NEXT: uunpklo z0.s, z0.h
|
|
; VBITS_GE_512-NEXT: uunpklo z0.d, z0.s
|
|
; VBITS_GE_512-NEXT: fcvtzu z0.d, p0/m, z0.h
|
|
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x1]
|
|
; VBITS_GE_512-NEXT: ret
|
|
%op1 = load <8 x half>, ptr %a
|
|
%res = fptoui <8 x half> %op1 to <8 x i64>
|
|
store <8 x i64> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @fcvtzu_v16f16_v16i64(ptr %a, ptr %b) vscale_range(8,0) #0 {
|
|
; CHECK-LABEL: fcvtzu_v16f16_v16i64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d, vl16
|
|
; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0]
|
|
; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.h
|
|
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <16 x half>, ptr %a
|
|
%res = fptoui <16 x half> %op1 to <16 x i64>
|
|
store <16 x i64> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @fcvtzu_v32f16_v32i64(ptr %a, ptr %b) vscale_range(16,0) #0 {
|
|
; CHECK-LABEL: fcvtzu_v32f16_v32i64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d, vl32
|
|
; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0]
|
|
; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.h
|
|
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <32 x half>, ptr %a
|
|
%res = fptoui <32 x half> %op1 to <32 x i64>
|
|
store <32 x i64> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
;
|
|
; FCVTZU S -> H
|
|
;
|
|
|
|
; Don't use SVE for 64-bit vectors.
|
|
define <2 x i16> @fcvtzu_v2f32_v2i16(<2 x float> %op1) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fcvtzu_v2f32_v2i16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fcvtzs v0.2s, v0.2s
|
|
; CHECK-NEXT: ret
|
|
%res = fptoui <2 x float> %op1 to <2 x i16>
|
|
ret <2 x i16> %res
|
|
}
|
|
|
|
; Don't use SVE for 128-bit vectors.
|
|
define <4 x i16> @fcvtzu_v4f32_v4i16(<4 x float> %op1) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fcvtzu_v4f32_v4i16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fcvtzu v1.4s, v0.4s
|
|
; CHECK-NEXT: mov w8, v1.s[1]
|
|
; CHECK-NEXT: mov v0.16b, v1.16b
|
|
; CHECK-NEXT: mov w9, v1.s[2]
|
|
; CHECK-NEXT: mov v0.h[1], w8
|
|
; CHECK-NEXT: mov w8, v1.s[3]
|
|
; CHECK-NEXT: mov v0.h[2], w9
|
|
; CHECK-NEXT: mov v0.h[3], w8
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
|
|
; CHECK-NEXT: ret
|
|
%res = fptoui <4 x float> %op1 to <4 x i16>
|
|
ret <4 x i16> %res
|
|
}
|
|
|
|
define <8 x i16> @fcvtzu_v8f32_v8i16(ptr %a) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fcvtzu_v8f32_v8i16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s, vl8
|
|
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
|
|
; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.s
|
|
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
|
|
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <8 x float>, ptr %a
|
|
%res = fptoui <8 x float> %op1 to <8 x i16>
|
|
ret <8 x i16> %res
|
|
}
|
|
|
|
define void @fcvtzu_v16f32_v16i16(ptr %a, ptr %b) #0 {
|
|
; VBITS_GE_256-LABEL: fcvtzu_v16f32_v16i16:
|
|
; VBITS_GE_256: // %bb.0:
|
|
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
|
|
; VBITS_GE_256-NEXT: mov x8, #8 // =0x8
|
|
; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
|
|
; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0]
|
|
; VBITS_GE_256-NEXT: fcvtzu z0.s, p0/m, z0.s
|
|
; VBITS_GE_256-NEXT: fcvtzu z1.s, p0/m, z1.s
|
|
; VBITS_GE_256-NEXT: ptrue p0.h, vl8
|
|
; VBITS_GE_256-NEXT: uzp1 z0.h, z0.h, z0.h
|
|
; VBITS_GE_256-NEXT: uzp1 z1.h, z1.h, z1.h
|
|
; VBITS_GE_256-NEXT: splice z1.h, p0, z1.h, z0.h
|
|
; VBITS_GE_256-NEXT: ptrue p0.h, vl16
|
|
; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x1]
|
|
; VBITS_GE_256-NEXT: ret
|
|
;
|
|
; VBITS_GE_512-LABEL: fcvtzu_v16f32_v16i16:
|
|
; VBITS_GE_512: // %bb.0:
|
|
; VBITS_GE_512-NEXT: ptrue p0.s, vl16
|
|
; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0]
|
|
; VBITS_GE_512-NEXT: fcvtzu z0.s, p0/m, z0.s
|
|
; VBITS_GE_512-NEXT: st1h { z0.s }, p0, [x1]
|
|
; VBITS_GE_512-NEXT: ret
|
|
%op1 = load <16 x float>, ptr %a
|
|
%res = fptoui <16 x float> %op1 to <16 x i16>
|
|
store <16 x i16> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @fcvtzu_v32f32_v32i16(ptr %a, ptr %b) vscale_range(8,0) #0 {
|
|
; CHECK-LABEL: fcvtzu_v32f32_v32i16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s, vl32
|
|
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
|
|
; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.s
|
|
; CHECK-NEXT: st1h { z0.s }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <32 x float>, ptr %a
|
|
%res = fptoui <32 x float> %op1 to <32 x i16>
|
|
store <32 x i16> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @fcvtzu_v64f32_v64i16(ptr %a, ptr %b) vscale_range(16,0) #0 {
|
|
; CHECK-LABEL: fcvtzu_v64f32_v64i16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s, vl64
|
|
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
|
|
; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.s
|
|
; CHECK-NEXT: st1h { z0.s }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <64 x float>, ptr %a
|
|
%res = fptoui <64 x float> %op1 to <64 x i16>
|
|
store <64 x i16> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
;
|
|
; FCVTZU S -> S
|
|
;
|
|
|
|
; Don't use SVE for 64-bit vectors.
|
|
define <2 x i32> @fcvtzu_v2f32_v2i32(<2 x float> %op1) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fcvtzu_v2f32_v2i32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fcvtzu v0.2s, v0.2s
|
|
; CHECK-NEXT: ret
|
|
%res = fptoui <2 x float> %op1 to <2 x i32>
|
|
ret <2 x i32> %res
|
|
}
|
|
|
|
; Don't use SVE for 128-bit vectors.
|
|
define <4 x i32> @fcvtzu_v4f32_v4i32(<4 x float> %op1) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fcvtzu_v4f32_v4i32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fcvtzu v0.4s, v0.4s
|
|
; CHECK-NEXT: ret
|
|
%res = fptoui <4 x float> %op1 to <4 x i32>
|
|
ret <4 x i32> %res
|
|
}
|
|
|
|
define void @fcvtzu_v8f32_v8i32(ptr %a, ptr %b) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fcvtzu_v8f32_v8i32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s, vl8
|
|
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
|
|
; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.s
|
|
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <8 x float>, ptr %a
|
|
%res = fptoui <8 x float> %op1 to <8 x i32>
|
|
store <8 x i32> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @fcvtzu_v16f32_v16i32(ptr %a, ptr %b) #0 {
|
|
; VBITS_GE_256-LABEL: fcvtzu_v16f32_v16i32:
|
|
; VBITS_GE_256: // %bb.0:
|
|
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
|
|
; VBITS_GE_256-NEXT: mov x8, #8 // =0x8
|
|
; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
|
|
; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0]
|
|
; VBITS_GE_256-NEXT: fcvtzu z0.s, p0/m, z0.s
|
|
; VBITS_GE_256-NEXT: fcvtzu z1.s, p0/m, z1.s
|
|
; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x1, x8, lsl #2]
|
|
; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x1]
|
|
; VBITS_GE_256-NEXT: ret
|
|
;
|
|
; VBITS_GE_512-LABEL: fcvtzu_v16f32_v16i32:
|
|
; VBITS_GE_512: // %bb.0:
|
|
; VBITS_GE_512-NEXT: ptrue p0.s, vl16
|
|
; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0]
|
|
; VBITS_GE_512-NEXT: fcvtzu z0.s, p0/m, z0.s
|
|
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x1]
|
|
; VBITS_GE_512-NEXT: ret
|
|
%op1 = load <16 x float>, ptr %a
|
|
%res = fptoui <16 x float> %op1 to <16 x i32>
|
|
store <16 x i32> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @fcvtzu_v32f32_v32i32(ptr %a, ptr %b) vscale_range(8,0) #0 {
|
|
; CHECK-LABEL: fcvtzu_v32f32_v32i32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s, vl32
|
|
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
|
|
; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.s
|
|
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <32 x float>, ptr %a
|
|
%res = fptoui <32 x float> %op1 to <32 x i32>
|
|
store <32 x i32> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @fcvtzu_v64f32_v64i32(ptr %a, ptr %b) vscale_range(16,0) #0 {
|
|
; CHECK-LABEL: fcvtzu_v64f32_v64i32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s, vl64
|
|
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
|
|
; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.s
|
|
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <64 x float>, ptr %a
|
|
%res = fptoui <64 x float> %op1 to <64 x i32>
|
|
store <64 x i32> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
;
|
|
; FCVTZU S -> D
|
|
;
|
|
|
|
; Don't use SVE for 64-bit vectors.
|
|
define <1 x i64> @fcvtzu_v1f32_v1i64(<1 x float> %op1) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fcvtzu_v1f32_v1i64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fcvtl v0.2d, v0.2s
|
|
; CHECK-NEXT: fcvtzu v0.2d, v0.2d
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
|
|
; CHECK-NEXT: ret
|
|
%res = fptoui <1 x float> %op1 to <1 x i64>
|
|
ret <1 x i64> %res
|
|
}
|
|
|
|
; Don't use SVE for 128-bit vectors.
|
|
define <2 x i64> @fcvtzu_v2f32_v2i64(<2 x float> %op1) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fcvtzu_v2f32_v2i64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fcvtl v0.2d, v0.2s
|
|
; CHECK-NEXT: fcvtzu v0.2d, v0.2d
|
|
; CHECK-NEXT: ret
|
|
%res = fptoui <2 x float> %op1 to <2 x i64>
|
|
ret <2 x i64> %res
|
|
}
|
|
|
|
define void @fcvtzu_v4f32_v4i64(ptr %a, ptr %b) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fcvtzu_v4f32_v4i64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ldr q0, [x0]
|
|
; CHECK-NEXT: ptrue p0.d, vl4
|
|
; CHECK-NEXT: uunpklo z0.d, z0.s
|
|
; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.s
|
|
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <4 x float>, ptr %a
|
|
%res = fptoui <4 x float> %op1 to <4 x i64>
|
|
store <4 x i64> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @fcvtzu_v8f32_v8i64(ptr %a, ptr %b) #0 {
|
|
; VBITS_GE_256-LABEL: fcvtzu_v8f32_v8i64:
|
|
; VBITS_GE_256: // %bb.0:
|
|
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
|
|
; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
|
|
; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0]
|
|
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
|
|
; VBITS_GE_256-NEXT: uunpklo z1.d, z0.s
|
|
; VBITS_GE_256-NEXT: ext z0.b, z0.b, z0.b, #16
|
|
; VBITS_GE_256-NEXT: uunpklo z0.d, z0.s
|
|
; VBITS_GE_256-NEXT: fcvtzu z1.d, p0/m, z1.s
|
|
; VBITS_GE_256-NEXT: fcvtzu z0.d, p0/m, z0.s
|
|
; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x1]
|
|
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x1, x8, lsl #3]
|
|
; VBITS_GE_256-NEXT: ret
|
|
;
|
|
; VBITS_GE_512-LABEL: fcvtzu_v8f32_v8i64:
|
|
; VBITS_GE_512: // %bb.0:
|
|
; VBITS_GE_512-NEXT: ptrue p0.d, vl8
|
|
; VBITS_GE_512-NEXT: ld1w { z0.d }, p0/z, [x0]
|
|
; VBITS_GE_512-NEXT: fcvtzu z0.d, p0/m, z0.s
|
|
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x1]
|
|
; VBITS_GE_512-NEXT: ret
|
|
%op1 = load <8 x float>, ptr %a
|
|
%res = fptoui <8 x float> %op1 to <8 x i64>
|
|
store <8 x i64> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @fcvtzu_v16f32_v16i64(ptr %a, ptr %b) vscale_range(8,0) #0 {
|
|
; CHECK-LABEL: fcvtzu_v16f32_v16i64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d, vl16
|
|
; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0]
|
|
; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.s
|
|
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <16 x float>, ptr %a
|
|
%res = fptoui <16 x float> %op1 to <16 x i64>
|
|
store <16 x i64> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @fcvtzu_v32f32_v32i64(ptr %a, ptr %b) vscale_range(16,0) #0 {
|
|
; CHECK-LABEL: fcvtzu_v32f32_v32i64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d, vl32
|
|
; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0]
|
|
; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.s
|
|
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <32 x float>, ptr %a
|
|
%res = fptoui <32 x float> %op1 to <32 x i64>
|
|
store <32 x i64> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
|
|
;
|
|
; FCVTZU D -> H
|
|
;
|
|
|
|
; v1f64 is perfered to be widened to v4f64, so use SVE
|
|
define <1 x i16> @fcvtzu_v1f64_v1i16(<1 x double> %op1) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fcvtzu_v1f64_v1i16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d, vl4
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
|
|
; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.d
|
|
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
|
|
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
%res = fptoui <1 x double> %op1 to <1 x i16>
|
|
ret <1 x i16> %res
|
|
}
|
|
|
|
; Don't use SVE for 128-bit vectors.
|
|
define <2 x i16> @fcvtzu_v2f64_v2i16(<2 x double> %op1) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fcvtzu_v2f64_v2i16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fcvtzs v0.2d, v0.2d
|
|
; CHECK-NEXT: xtn v0.2s, v0.2d
|
|
; CHECK-NEXT: ret
|
|
%res = fptoui <2 x double> %op1 to <2 x i16>
|
|
ret <2 x i16> %res
|
|
}
|
|
|
|
define <4 x i16> @fcvtzu_v4f64_v4i16(ptr %a) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fcvtzu_v4f64_v4i16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d, vl4
|
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
|
; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.d
|
|
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
|
|
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <4 x double>, ptr %a
|
|
%res = fptoui <4 x double> %op1 to <4 x i16>
|
|
ret <4 x i16> %res
|
|
}
|
|
|
|
define <8 x i16> @fcvtzu_v8f64_v8i16(ptr %a) #0 {
|
|
; VBITS_GE_256-LABEL: fcvtzu_v8f64_v8i16:
|
|
; VBITS_GE_256: // %bb.0:
|
|
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
|
|
; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
|
|
; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
|
|
; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0]
|
|
; VBITS_GE_256-NEXT: fcvtzu z0.d, p0/m, z0.d
|
|
; VBITS_GE_256-NEXT: fcvtzu z1.d, p0/m, z1.d
|
|
; VBITS_GE_256-NEXT: uzp1 z0.s, z0.s, z0.s
|
|
; VBITS_GE_256-NEXT: uzp1 z1.s, z1.s, z1.s
|
|
; VBITS_GE_256-NEXT: uzp1 z2.h, z0.h, z0.h
|
|
; VBITS_GE_256-NEXT: uzp1 z0.h, z1.h, z1.h
|
|
; VBITS_GE_256-NEXT: mov v0.d[1], v2.d[0]
|
|
; VBITS_GE_256-NEXT: // kill: def $q0 killed $q0 killed $z0
|
|
; VBITS_GE_256-NEXT: ret
|
|
;
|
|
; VBITS_GE_512-LABEL: fcvtzu_v8f64_v8i16:
|
|
; VBITS_GE_512: // %bb.0:
|
|
; VBITS_GE_512-NEXT: ptrue p0.d, vl8
|
|
; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0]
|
|
; VBITS_GE_512-NEXT: fcvtzu z0.d, p0/m, z0.d
|
|
; VBITS_GE_512-NEXT: uzp1 z0.s, z0.s, z0.s
|
|
; VBITS_GE_512-NEXT: uzp1 z0.h, z0.h, z0.h
|
|
; VBITS_GE_512-NEXT: // kill: def $q0 killed $q0 killed $z0
|
|
; VBITS_GE_512-NEXT: ret
|
|
%op1 = load <8 x double>, ptr %a
|
|
%res = fptoui <8 x double> %op1 to <8 x i16>
|
|
ret <8 x i16> %res
|
|
}
|
|
|
|
define void @fcvtzu_v16f64_v16i16(ptr %a, ptr %b) vscale_range(8,0) #0 {
|
|
; CHECK-LABEL: fcvtzu_v16f64_v16i16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d, vl16
|
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
|
; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.d
|
|
; CHECK-NEXT: st1h { z0.d }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <16 x double>, ptr %a
|
|
%res = fptoui <16 x double> %op1 to <16 x i16>
|
|
store <16 x i16> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @fcvtzu_v32f64_v32i16(ptr %a, ptr %b) vscale_range(16,0) #0 {
|
|
; CHECK-LABEL: fcvtzu_v32f64_v32i16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d, vl32
|
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
|
; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.d
|
|
; CHECK-NEXT: st1h { z0.d }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <32 x double>, ptr %a
|
|
%res = fptoui <32 x double> %op1 to <32 x i16>
|
|
store <32 x i16> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
;
|
|
; FCVTZU D -> S
|
|
;
|
|
|
|
; Don't use SVE for 64-bit vectors.
|
|
define <1 x i32> @fcvtzu_v1f64_v1i32(<1 x double> %op1) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fcvtzu_v1f64_v1i32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
|
|
; CHECK-NEXT: fcvtzu v0.2d, v0.2d
|
|
; CHECK-NEXT: xtn v0.2s, v0.2d
|
|
; CHECK-NEXT: ret
|
|
%res = fptoui <1 x double> %op1 to <1 x i32>
|
|
ret <1 x i32> %res
|
|
}
|
|
|
|
; Don't use SVE for 128-bit vectors.
|
|
define <2 x i32> @fcvtzu_v2f64_v2i32(<2 x double> %op1) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fcvtzu_v2f64_v2i32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fcvtzu v0.2d, v0.2d
|
|
; CHECK-NEXT: xtn v0.2s, v0.2d
|
|
; CHECK-NEXT: ret
|
|
%res = fptoui <2 x double> %op1 to <2 x i32>
|
|
ret <2 x i32> %res
|
|
}
|
|
|
|
define <4 x i32> @fcvtzu_v4f64_v4i32(ptr %a) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fcvtzu_v4f64_v4i32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d, vl4
|
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
|
; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.d
|
|
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
|
|
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <4 x double>, ptr %a
|
|
%res = fptoui <4 x double> %op1 to <4 x i32>
|
|
ret <4 x i32> %res
|
|
}
|
|
|
|
define void @fcvtzu_v8f64_v8i32(ptr %a, ptr %b) #0 {
|
|
; VBITS_GE_256-LABEL: fcvtzu_v8f64_v8i32:
|
|
; VBITS_GE_256: // %bb.0:
|
|
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
|
|
; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
|
|
; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
|
|
; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0]
|
|
; VBITS_GE_256-NEXT: fcvtzu z0.d, p0/m, z0.d
|
|
; VBITS_GE_256-NEXT: fcvtzu z1.d, p0/m, z1.d
|
|
; VBITS_GE_256-NEXT: ptrue p0.s, vl4
|
|
; VBITS_GE_256-NEXT: uzp1 z0.s, z0.s, z0.s
|
|
; VBITS_GE_256-NEXT: uzp1 z1.s, z1.s, z1.s
|
|
; VBITS_GE_256-NEXT: splice z1.s, p0, z1.s, z0.s
|
|
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
|
|
; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x1]
|
|
; VBITS_GE_256-NEXT: ret
|
|
;
|
|
; VBITS_GE_512-LABEL: fcvtzu_v8f64_v8i32:
|
|
; VBITS_GE_512: // %bb.0:
|
|
; VBITS_GE_512-NEXT: ptrue p0.d, vl8
|
|
; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0]
|
|
; VBITS_GE_512-NEXT: fcvtzu z0.d, p0/m, z0.d
|
|
; VBITS_GE_512-NEXT: st1w { z0.d }, p0, [x1]
|
|
; VBITS_GE_512-NEXT: ret
|
|
%op1 = load <8 x double>, ptr %a
|
|
%res = fptoui <8 x double> %op1 to <8 x i32>
|
|
store <8 x i32> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @fcvtzu_v16f64_v16i32(ptr %a, ptr %b) vscale_range(8,0) #0 {
|
|
; CHECK-LABEL: fcvtzu_v16f64_v16i32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d, vl16
|
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
|
; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.d
|
|
; CHECK-NEXT: st1w { z0.d }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <16 x double>, ptr %a
|
|
%res = fptoui <16 x double> %op1 to <16 x i32>
|
|
store <16 x i32> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @fcvtzu_v32f64_v32i32(ptr %a, ptr %b) vscale_range(16,0) #0 {
|
|
; CHECK-LABEL: fcvtzu_v32f64_v32i32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d, vl32
|
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
|
; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.d
|
|
; CHECK-NEXT: st1w { z0.d }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <32 x double>, ptr %a
|
|
%res = fptoui <32 x double> %op1 to <32 x i32>
|
|
store <32 x i32> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
;
|
|
; FCVTZU D -> D
|
|
;
|
|
|
|
; Don't use SVE for 64-bit vectors.
|
|
define <1 x i64> @fcvtzu_v1f64_v1i64(<1 x double> %op1) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fcvtzu_v1f64_v1i64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fcvtzu x8, d0
|
|
; CHECK-NEXT: fmov d0, x8
|
|
; CHECK-NEXT: ret
|
|
%res = fptoui <1 x double> %op1 to <1 x i64>
|
|
ret <1 x i64> %res
|
|
}
|
|
|
|
; Don't use SVE for 128-bit vectors.
|
|
define <2 x i64> @fcvtzu_v2f64_v2i64(<2 x double> %op1) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fcvtzu_v2f64_v2i64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fcvtzu v0.2d, v0.2d
|
|
; CHECK-NEXT: ret
|
|
%res = fptoui <2 x double> %op1 to <2 x i64>
|
|
ret <2 x i64> %res
|
|
}
|
|
|
|
define void @fcvtzu_v4f64_v4i64(ptr %a, ptr %b) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fcvtzu_v4f64_v4i64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d, vl4
|
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
|
; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.d
|
|
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <4 x double>, ptr %a
|
|
%res = fptoui <4 x double> %op1 to <4 x i64>
|
|
store <4 x i64> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @fcvtzu_v8f64_v8i64(ptr %a, ptr %b) #0 {
|
|
; VBITS_GE_256-LABEL: fcvtzu_v8f64_v8i64:
|
|
; VBITS_GE_256: // %bb.0:
|
|
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
|
|
; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
|
|
; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
|
|
; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0]
|
|
; VBITS_GE_256-NEXT: fcvtzu z0.d, p0/m, z0.d
|
|
; VBITS_GE_256-NEXT: fcvtzu z1.d, p0/m, z1.d
|
|
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x1, x8, lsl #3]
|
|
; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x1]
|
|
; VBITS_GE_256-NEXT: ret
|
|
;
|
|
; VBITS_GE_512-LABEL: fcvtzu_v8f64_v8i64:
|
|
; VBITS_GE_512: // %bb.0:
|
|
; VBITS_GE_512-NEXT: ptrue p0.d, vl8
|
|
; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0]
|
|
; VBITS_GE_512-NEXT: fcvtzu z0.d, p0/m, z0.d
|
|
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x1]
|
|
; VBITS_GE_512-NEXT: ret
|
|
%op1 = load <8 x double>, ptr %a
|
|
%res = fptoui <8 x double> %op1 to <8 x i64>
|
|
store <8 x i64> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @fcvtzu_v16f64_v16i64(ptr %a, ptr %b) vscale_range(8,0) #0 {
|
|
; CHECK-LABEL: fcvtzu_v16f64_v16i64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d, vl16
|
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
|
; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.d
|
|
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <16 x double>, ptr %a
|
|
%res = fptoui <16 x double> %op1 to <16 x i64>
|
|
store <16 x i64> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @fcvtzu_v32f64_v32i64(ptr %a, ptr %b) vscale_range(16,0) #0 {
|
|
; CHECK-LABEL: fcvtzu_v32f64_v32i64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d, vl32
|
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
|
; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.d
|
|
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <32 x double>, ptr %a
|
|
%res = fptoui <32 x double> %op1 to <32 x i64>
|
|
store <32 x i64> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
;
|
|
; FCVTZS H -> H
|
|
;
|
|
|
|
; Don't use SVE for 64-bit vectors.
|
|
define <4 x i16> @fcvtzs_v4f16_v4i16(<4 x half> %op1) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fcvtzs_v4f16_v4i16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fcvtzs v0.4h, v0.4h
|
|
; CHECK-NEXT: ret
|
|
%res = fptosi <4 x half> %op1 to <4 x i16>
|
|
ret <4 x i16> %res
|
|
}
|
|
|
|
; Don't use SVE for 128-bit vectors.
|
|
define void @fcvtzs_v8f16_v8i16(ptr %a, ptr %b) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fcvtzs_v8f16_v8i16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ldr q0, [x0]
|
|
; CHECK-NEXT: fcvtzs v0.8h, v0.8h
|
|
; CHECK-NEXT: str q0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <8 x half>, ptr %a
|
|
%res = fptosi <8 x half> %op1 to <8 x i16>
|
|
store <8 x i16> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @fcvtzs_v16f16_v16i16(ptr %a, ptr %b) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fcvtzs_v16f16_v16i16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.h, vl16
|
|
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
|
|
; CHECK-NEXT: fcvtzs z0.h, p0/m, z0.h
|
|
; CHECK-NEXT: st1h { z0.h }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <16 x half>, ptr %a
|
|
%res = fptosi <16 x half> %op1 to <16 x i16>
|
|
store <16 x i16> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @fcvtzs_v32f16_v32i16(ptr %a, ptr %b) #0 {
|
|
; VBITS_GE_256-LABEL: fcvtzs_v32f16_v32i16:
|
|
; VBITS_GE_256: // %bb.0:
|
|
; VBITS_GE_256-NEXT: ptrue p0.h, vl16
|
|
; VBITS_GE_256-NEXT: mov x8, #16 // =0x10
|
|
; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1]
|
|
; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0]
|
|
; VBITS_GE_256-NEXT: fcvtzs z0.h, p0/m, z0.h
|
|
; VBITS_GE_256-NEXT: fcvtzs z1.h, p0/m, z1.h
|
|
; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x1, x8, lsl #1]
|
|
; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x1]
|
|
; VBITS_GE_256-NEXT: ret
|
|
;
|
|
; VBITS_GE_512-LABEL: fcvtzs_v32f16_v32i16:
|
|
; VBITS_GE_512: // %bb.0:
|
|
; VBITS_GE_512-NEXT: ptrue p0.h, vl32
|
|
; VBITS_GE_512-NEXT: ld1h { z0.h }, p0/z, [x0]
|
|
; VBITS_GE_512-NEXT: fcvtzs z0.h, p0/m, z0.h
|
|
; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x1]
|
|
; VBITS_GE_512-NEXT: ret
|
|
%op1 = load <32 x half>, ptr %a
|
|
%res = fptosi <32 x half> %op1 to <32 x i16>
|
|
store <32 x i16> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @fcvtzs_v64f16_v64i16(ptr %a, ptr %b) vscale_range(8,0) #0 {
|
|
; CHECK-LABEL: fcvtzs_v64f16_v64i16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.h, vl64
|
|
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
|
|
; CHECK-NEXT: fcvtzs z0.h, p0/m, z0.h
|
|
; CHECK-NEXT: st1h { z0.h }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <64 x half>, ptr %a
|
|
%res = fptosi <64 x half> %op1 to <64 x i16>
|
|
store <64 x i16> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @fcvtzs_v128f16_v128i16(ptr %a, ptr %b) vscale_range(16,0) #0 {
|
|
; CHECK-LABEL: fcvtzs_v128f16_v128i16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.h, vl128
|
|
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
|
|
; CHECK-NEXT: fcvtzs z0.h, p0/m, z0.h
|
|
; CHECK-NEXT: st1h { z0.h }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <128 x half>, ptr %a
|
|
%res = fptosi <128 x half> %op1 to <128 x i16>
|
|
store <128 x i16> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
;
|
|
; FCVTZS H -> S
|
|
;
|
|
|
|
; Don't use SVE for 64-bit vectors.
|
|
define <2 x i32> @fcvtzs_v2f16_v2i32(<2 x half> %op1) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fcvtzs_v2f16_v2i32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fcvtl v0.4s, v0.4h
|
|
; CHECK-NEXT: fcvtzs v0.4s, v0.4s
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
|
|
; CHECK-NEXT: ret
|
|
%res = fptosi <2 x half> %op1 to <2 x i32>
|
|
ret <2 x i32> %res
|
|
}
|
|
|
|
; Don't use SVE for 128-bit vectors.
|
|
define <4 x i32> @fcvtzs_v4f16_v4i32(<4 x half> %op1) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fcvtzs_v4f16_v4i32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fcvtl v0.4s, v0.4h
|
|
; CHECK-NEXT: fcvtzs v0.4s, v0.4s
|
|
; CHECK-NEXT: ret
|
|
%res = fptosi <4 x half> %op1 to <4 x i32>
|
|
ret <4 x i32> %res
|
|
}
|
|
|
|
define void @fcvtzs_v8f16_v8i32(ptr %a, ptr %b) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fcvtzs_v8f16_v8i32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ldr q0, [x0]
|
|
; CHECK-NEXT: ptrue p0.s, vl8
|
|
; CHECK-NEXT: uunpklo z0.s, z0.h
|
|
; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.h
|
|
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <8 x half>, ptr %a
|
|
%res = fptosi <8 x half> %op1 to <8 x i32>
|
|
store <8 x i32> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @fcvtzs_v16f16_v16i32(ptr %a, ptr %b) #0 {
|
|
; VBITS_GE_256-LABEL: fcvtzs_v16f16_v16i32:
|
|
; VBITS_GE_256: // %bb.0:
|
|
; VBITS_GE_256-NEXT: ptrue p0.h, vl16
|
|
; VBITS_GE_256-NEXT: mov x8, #8 // =0x8
|
|
; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0]
|
|
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
|
|
; VBITS_GE_256-NEXT: uunpklo z1.s, z0.h
|
|
; VBITS_GE_256-NEXT: ext z0.b, z0.b, z0.b, #16
|
|
; VBITS_GE_256-NEXT: uunpklo z0.s, z0.h
|
|
; VBITS_GE_256-NEXT: fcvtzs z1.s, p0/m, z1.h
|
|
; VBITS_GE_256-NEXT: fcvtzs z0.s, p0/m, z0.h
|
|
; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x1]
|
|
; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x1, x8, lsl #2]
|
|
; VBITS_GE_256-NEXT: ret
|
|
;
|
|
; VBITS_GE_512-LABEL: fcvtzs_v16f16_v16i32:
|
|
; VBITS_GE_512: // %bb.0:
|
|
; VBITS_GE_512-NEXT: ptrue p0.s, vl16
|
|
; VBITS_GE_512-NEXT: ld1h { z0.s }, p0/z, [x0]
|
|
; VBITS_GE_512-NEXT: fcvtzs z0.s, p0/m, z0.h
|
|
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x1]
|
|
; VBITS_GE_512-NEXT: ret
|
|
%op1 = load <16 x half>, ptr %a
|
|
%res = fptosi <16 x half> %op1 to <16 x i32>
|
|
store <16 x i32> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @fcvtzs_v32f16_v32i32(ptr %a, ptr %b) vscale_range(8,0) #0 {
|
|
; CHECK-LABEL: fcvtzs_v32f16_v32i32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s, vl32
|
|
; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0]
|
|
; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.h
|
|
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <32 x half>, ptr %a
|
|
%res = fptosi <32 x half> %op1 to <32 x i32>
|
|
store <32 x i32> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @fcvtzs_v64f16_v64i32(ptr %a, ptr %b) vscale_range(16,0) #0 {
|
|
; CHECK-LABEL: fcvtzs_v64f16_v64i32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s, vl64
|
|
; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0]
|
|
; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.h
|
|
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <64 x half>, ptr %a
|
|
%res = fptosi <64 x half> %op1 to <64 x i32>
|
|
store <64 x i32> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
;
|
|
; FCVTZS H -> D
|
|
;
|
|
|
|
; Don't use SVE for 64-bit vectors.
|
|
define <1 x i64> @fcvtzs_v1f16_v1i64(<1 x half> %op1) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fcvtzs_v1f16_v1i64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fcvtzs x8, h0
|
|
; CHECK-NEXT: fmov d0, x8
|
|
; CHECK-NEXT: ret
|
|
%res = fptosi <1 x half> %op1 to <1 x i64>
|
|
ret <1 x i64> %res
|
|
}
|
|
|
|
; v2f16 is not legal for NEON, so use SVE
|
|
define <2 x i64> @fcvtzs_v2f16_v2i64(<2 x half> %op1) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fcvtzs_v2f16_v2i64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
|
|
; CHECK-NEXT: ptrue p0.d, vl4
|
|
; CHECK-NEXT: uunpklo z0.s, z0.h
|
|
; CHECK-NEXT: uunpklo z0.d, z0.s
|
|
; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.h
|
|
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
%res = fptosi <2 x half> %op1 to <2 x i64>
|
|
ret <2 x i64> %res
|
|
}
|
|
|
|
define void @fcvtzs_v4f16_v4i64(ptr %a, ptr %b) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fcvtzs_v4f16_v4i64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ldr d0, [x0]
|
|
; CHECK-NEXT: ptrue p0.d, vl4
|
|
; CHECK-NEXT: uunpklo z0.s, z0.h
|
|
; CHECK-NEXT: uunpklo z0.d, z0.s
|
|
; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.h
|
|
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <4 x half>, ptr %a
|
|
%res = fptosi <4 x half> %op1 to <4 x i64>
|
|
store <4 x i64> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @fcvtzs_v8f16_v8i64(ptr %a, ptr %b) #0 {
|
|
; VBITS_GE_256-LABEL: fcvtzs_v8f16_v8i64:
|
|
; VBITS_GE_256: // %bb.0:
|
|
; VBITS_GE_256-NEXT: ldr q0, [x0]
|
|
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
|
|
; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
|
|
; VBITS_GE_256-NEXT: ext v1.16b, v0.16b, v0.16b, #8
|
|
; VBITS_GE_256-NEXT: uunpklo z0.s, z0.h
|
|
; VBITS_GE_256-NEXT: uunpklo z1.s, z1.h
|
|
; VBITS_GE_256-NEXT: uunpklo z0.d, z0.s
|
|
; VBITS_GE_256-NEXT: uunpklo z1.d, z1.s
|
|
; VBITS_GE_256-NEXT: fcvtzs z0.d, p0/m, z0.h
|
|
; VBITS_GE_256-NEXT: fcvtzs z1.d, p0/m, z1.h
|
|
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x1]
|
|
; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x1, x8, lsl #3]
|
|
; VBITS_GE_256-NEXT: ret
|
|
;
|
|
; VBITS_GE_512-LABEL: fcvtzs_v8f16_v8i64:
|
|
; VBITS_GE_512: // %bb.0:
|
|
; VBITS_GE_512-NEXT: ldr q0, [x0]
|
|
; VBITS_GE_512-NEXT: ptrue p0.d, vl8
|
|
; VBITS_GE_512-NEXT: uunpklo z0.s, z0.h
|
|
; VBITS_GE_512-NEXT: uunpklo z0.d, z0.s
|
|
; VBITS_GE_512-NEXT: fcvtzs z0.d, p0/m, z0.h
|
|
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x1]
|
|
; VBITS_GE_512-NEXT: ret
|
|
%op1 = load <8 x half>, ptr %a
|
|
%res = fptosi <8 x half> %op1 to <8 x i64>
|
|
store <8 x i64> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @fcvtzs_v16f16_v16i64(ptr %a, ptr %b) vscale_range(8,0) #0 {
|
|
; CHECK-LABEL: fcvtzs_v16f16_v16i64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d, vl16
|
|
; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0]
|
|
; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.h
|
|
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <16 x half>, ptr %a
|
|
%res = fptosi <16 x half> %op1 to <16 x i64>
|
|
store <16 x i64> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @fcvtzs_v32f16_v32i64(ptr %a, ptr %b) vscale_range(16,0) #0 {
|
|
; CHECK-LABEL: fcvtzs_v32f16_v32i64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d, vl32
|
|
; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0]
|
|
; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.h
|
|
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <32 x half>, ptr %a
|
|
%res = fptosi <32 x half> %op1 to <32 x i64>
|
|
store <32 x i64> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
;
|
|
; FCVTZS S -> H
|
|
;
|
|
|
|
; Don't use SVE for 64-bit vectors.
|
|
define <2 x i16> @fcvtzs_v2f32_v2i16(<2 x float> %op1) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fcvtzs_v2f32_v2i16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fcvtzs v0.2s, v0.2s
|
|
; CHECK-NEXT: ret
|
|
%res = fptosi <2 x float> %op1 to <2 x i16>
|
|
ret <2 x i16> %res
|
|
}
|
|
|
|
; Don't use SVE for 128-bit vectors.
|
|
define <4 x i16> @fcvtzs_v4f32_v4i16(<4 x float> %op1) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fcvtzs_v4f32_v4i16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fcvtzs v1.4s, v0.4s
|
|
; CHECK-NEXT: mov w8, v1.s[1]
|
|
; CHECK-NEXT: mov v0.16b, v1.16b
|
|
; CHECK-NEXT: mov w9, v1.s[2]
|
|
; CHECK-NEXT: mov v0.h[1], w8
|
|
; CHECK-NEXT: mov w8, v1.s[3]
|
|
; CHECK-NEXT: mov v0.h[2], w9
|
|
; CHECK-NEXT: mov v0.h[3], w8
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
|
|
; CHECK-NEXT: ret
|
|
%res = fptosi <4 x float> %op1 to <4 x i16>
|
|
ret <4 x i16> %res
|
|
}
|
|
|
|
define <8 x i16> @fcvtzs_v8f32_v8i16(ptr %a) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fcvtzs_v8f32_v8i16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s, vl8
|
|
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
|
|
; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.s
|
|
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
|
|
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <8 x float>, ptr %a
|
|
%res = fptosi <8 x float> %op1 to <8 x i16>
|
|
ret <8 x i16> %res
|
|
}
|
|
|
|
define void @fcvtzs_v16f32_v16i16(ptr %a, ptr %b) #0 {
|
|
; VBITS_GE_256-LABEL: fcvtzs_v16f32_v16i16:
|
|
; VBITS_GE_256: // %bb.0:
|
|
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
|
|
; VBITS_GE_256-NEXT: mov x8, #8 // =0x8
|
|
; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
|
|
; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0]
|
|
; VBITS_GE_256-NEXT: fcvtzs z0.s, p0/m, z0.s
|
|
; VBITS_GE_256-NEXT: fcvtzs z1.s, p0/m, z1.s
|
|
; VBITS_GE_256-NEXT: ptrue p0.h, vl8
|
|
; VBITS_GE_256-NEXT: uzp1 z0.h, z0.h, z0.h
|
|
; VBITS_GE_256-NEXT: uzp1 z1.h, z1.h, z1.h
|
|
; VBITS_GE_256-NEXT: splice z1.h, p0, z1.h, z0.h
|
|
; VBITS_GE_256-NEXT: ptrue p0.h, vl16
|
|
; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x1]
|
|
; VBITS_GE_256-NEXT: ret
|
|
;
|
|
; VBITS_GE_512-LABEL: fcvtzs_v16f32_v16i16:
|
|
; VBITS_GE_512: // %bb.0:
|
|
; VBITS_GE_512-NEXT: ptrue p0.s, vl16
|
|
; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0]
|
|
; VBITS_GE_512-NEXT: fcvtzs z0.s, p0/m, z0.s
|
|
; VBITS_GE_512-NEXT: st1h { z0.s }, p0, [x1]
|
|
; VBITS_GE_512-NEXT: ret
|
|
%op1 = load <16 x float>, ptr %a
|
|
%res = fptosi <16 x float> %op1 to <16 x i16>
|
|
store <16 x i16> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @fcvtzs_v32f32_v32i16(ptr %a, ptr %b) vscale_range(8,0) #0 {
|
|
; CHECK-LABEL: fcvtzs_v32f32_v32i16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s, vl32
|
|
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
|
|
; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.s
|
|
; CHECK-NEXT: st1h { z0.s }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <32 x float>, ptr %a
|
|
%res = fptosi <32 x float> %op1 to <32 x i16>
|
|
store <32 x i16> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @fcvtzs_v64f32_v64i16(ptr %a, ptr %b) vscale_range(16,0) #0 {
|
|
; CHECK-LABEL: fcvtzs_v64f32_v64i16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s, vl64
|
|
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
|
|
; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.s
|
|
; CHECK-NEXT: st1h { z0.s }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <64 x float>, ptr %a
|
|
%res = fptosi <64 x float> %op1 to <64 x i16>
|
|
store <64 x i16> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
;
|
|
; FCVTZS S -> S
|
|
;
|
|
|
|
; Don't use SVE for 64-bit vectors.
|
|
define <2 x i32> @fcvtzs_v2f32_v2i32(<2 x float> %op1) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fcvtzs_v2f32_v2i32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fcvtzs v0.2s, v0.2s
|
|
; CHECK-NEXT: ret
|
|
%res = fptosi <2 x float> %op1 to <2 x i32>
|
|
ret <2 x i32> %res
|
|
}
|
|
|
|
; Don't use SVE for 128-bit vectors.
|
|
define <4 x i32> @fcvtzs_v4f32_v4i32(<4 x float> %op1) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fcvtzs_v4f32_v4i32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fcvtzs v0.4s, v0.4s
|
|
; CHECK-NEXT: ret
|
|
%res = fptosi <4 x float> %op1 to <4 x i32>
|
|
ret <4 x i32> %res
|
|
}
|
|
|
|
define void @fcvtzs_v8f32_v8i32(ptr %a, ptr %b) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fcvtzs_v8f32_v8i32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s, vl8
|
|
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
|
|
; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.s
|
|
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <8 x float>, ptr %a
|
|
%res = fptosi <8 x float> %op1 to <8 x i32>
|
|
store <8 x i32> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @fcvtzs_v16f32_v16i32(ptr %a, ptr %b) #0 {
|
|
; VBITS_GE_256-LABEL: fcvtzs_v16f32_v16i32:
|
|
; VBITS_GE_256: // %bb.0:
|
|
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
|
|
; VBITS_GE_256-NEXT: mov x8, #8 // =0x8
|
|
; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
|
|
; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0]
|
|
; VBITS_GE_256-NEXT: fcvtzs z0.s, p0/m, z0.s
|
|
; VBITS_GE_256-NEXT: fcvtzs z1.s, p0/m, z1.s
|
|
; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x1, x8, lsl #2]
|
|
; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x1]
|
|
; VBITS_GE_256-NEXT: ret
|
|
;
|
|
; VBITS_GE_512-LABEL: fcvtzs_v16f32_v16i32:
|
|
; VBITS_GE_512: // %bb.0:
|
|
; VBITS_GE_512-NEXT: ptrue p0.s, vl16
|
|
; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0]
|
|
; VBITS_GE_512-NEXT: fcvtzs z0.s, p0/m, z0.s
|
|
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x1]
|
|
; VBITS_GE_512-NEXT: ret
|
|
%op1 = load <16 x float>, ptr %a
|
|
%res = fptosi <16 x float> %op1 to <16 x i32>
|
|
store <16 x i32> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @fcvtzs_v32f32_v32i32(ptr %a, ptr %b) vscale_range(8,0) #0 {
|
|
; CHECK-LABEL: fcvtzs_v32f32_v32i32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s, vl32
|
|
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
|
|
; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.s
|
|
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <32 x float>, ptr %a
|
|
%res = fptosi <32 x float> %op1 to <32 x i32>
|
|
store <32 x i32> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @fcvtzs_v64f32_v64i32(ptr %a, ptr %b) vscale_range(16,0) #0 {
|
|
; CHECK-LABEL: fcvtzs_v64f32_v64i32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s, vl64
|
|
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
|
|
; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.s
|
|
; CHECK-NEXT: st1w { z0.s }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <64 x float>, ptr %a
|
|
%res = fptosi <64 x float> %op1 to <64 x i32>
|
|
store <64 x i32> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
;
|
|
; FCVTZS S -> D
|
|
;
|
|
|
|
; Don't use SVE for 64-bit vectors.
|
|
define <1 x i64> @fcvtzs_v1f32_v1i64(<1 x float> %op1) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fcvtzs_v1f32_v1i64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fcvtl v0.2d, v0.2s
|
|
; CHECK-NEXT: fcvtzs v0.2d, v0.2d
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
|
|
; CHECK-NEXT: ret
|
|
%res = fptosi <1 x float> %op1 to <1 x i64>
|
|
ret <1 x i64> %res
|
|
}
|
|
|
|
; Don't use SVE for 128-bit vectors.
|
|
define <2 x i64> @fcvtzs_v2f32_v2i64(<2 x float> %op1) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fcvtzs_v2f32_v2i64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fcvtl v0.2d, v0.2s
|
|
; CHECK-NEXT: fcvtzs v0.2d, v0.2d
|
|
; CHECK-NEXT: ret
|
|
%res = fptosi <2 x float> %op1 to <2 x i64>
|
|
ret <2 x i64> %res
|
|
}
|
|
|
|
define void @fcvtzs_v4f32_v4i64(ptr %a, ptr %b) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fcvtzs_v4f32_v4i64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ldr q0, [x0]
|
|
; CHECK-NEXT: ptrue p0.d, vl4
|
|
; CHECK-NEXT: uunpklo z0.d, z0.s
|
|
; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.s
|
|
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <4 x float>, ptr %a
|
|
%res = fptosi <4 x float> %op1 to <4 x i64>
|
|
store <4 x i64> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @fcvtzs_v8f32_v8i64(ptr %a, ptr %b) #0 {
|
|
; VBITS_GE_256-LABEL: fcvtzs_v8f32_v8i64:
|
|
; VBITS_GE_256: // %bb.0:
|
|
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
|
|
; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
|
|
; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0]
|
|
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
|
|
; VBITS_GE_256-NEXT: uunpklo z1.d, z0.s
|
|
; VBITS_GE_256-NEXT: ext z0.b, z0.b, z0.b, #16
|
|
; VBITS_GE_256-NEXT: uunpklo z0.d, z0.s
|
|
; VBITS_GE_256-NEXT: fcvtzs z1.d, p0/m, z1.s
|
|
; VBITS_GE_256-NEXT: fcvtzs z0.d, p0/m, z0.s
|
|
; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x1]
|
|
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x1, x8, lsl #3]
|
|
; VBITS_GE_256-NEXT: ret
|
|
;
|
|
; VBITS_GE_512-LABEL: fcvtzs_v8f32_v8i64:
|
|
; VBITS_GE_512: // %bb.0:
|
|
; VBITS_GE_512-NEXT: ptrue p0.d, vl8
|
|
; VBITS_GE_512-NEXT: ld1w { z0.d }, p0/z, [x0]
|
|
; VBITS_GE_512-NEXT: fcvtzs z0.d, p0/m, z0.s
|
|
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x1]
|
|
; VBITS_GE_512-NEXT: ret
|
|
%op1 = load <8 x float>, ptr %a
|
|
%res = fptosi <8 x float> %op1 to <8 x i64>
|
|
store <8 x i64> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @fcvtzs_v16f32_v16i64(ptr %a, ptr %b) vscale_range(8,0) #0 {
|
|
; CHECK-LABEL: fcvtzs_v16f32_v16i64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d, vl16
|
|
; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0]
|
|
; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.s
|
|
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <16 x float>, ptr %a
|
|
%res = fptosi <16 x float> %op1 to <16 x i64>
|
|
store <16 x i64> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @fcvtzs_v32f32_v32i64(ptr %a, ptr %b) vscale_range(16,0) #0 {
|
|
; CHECK-LABEL: fcvtzs_v32f32_v32i64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d, vl32
|
|
; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0]
|
|
; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.s
|
|
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <32 x float>, ptr %a
|
|
%res = fptosi <32 x float> %op1 to <32 x i64>
|
|
store <32 x i64> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
|
|
;
|
|
; FCVTZS D -> H
|
|
;
|
|
|
|
; v1f64 is perfered to be widened to v4f64, so use SVE
|
|
define <1 x i16> @fcvtzs_v1f64_v1i16(<1 x double> %op1) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fcvtzs_v1f64_v1i16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d, vl4
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
|
|
; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d
|
|
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
|
|
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
%res = fptosi <1 x double> %op1 to <1 x i16>
|
|
ret <1 x i16> %res
|
|
}
|
|
|
|
; Don't use SVE for 128-bit vectors.
|
|
define <2 x i16> @fcvtzs_v2f64_v2i16(<2 x double> %op1) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fcvtzs_v2f64_v2i16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fcvtzs v0.2d, v0.2d
|
|
; CHECK-NEXT: xtn v0.2s, v0.2d
|
|
; CHECK-NEXT: ret
|
|
%res = fptosi <2 x double> %op1 to <2 x i16>
|
|
ret <2 x i16> %res
|
|
}
|
|
|
|
define <4 x i16> @fcvtzs_v4f64_v4i16(ptr %a) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fcvtzs_v4f64_v4i16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d, vl4
|
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
|
; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d
|
|
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
|
|
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <4 x double>, ptr %a
|
|
%res = fptosi <4 x double> %op1 to <4 x i16>
|
|
ret <4 x i16> %res
|
|
}
|
|
|
|
define <8 x i16> @fcvtzs_v8f64_v8i16(ptr %a) #0 {
|
|
; VBITS_GE_256-LABEL: fcvtzs_v8f64_v8i16:
|
|
; VBITS_GE_256: // %bb.0:
|
|
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
|
|
; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
|
|
; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
|
|
; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0]
|
|
; VBITS_GE_256-NEXT: fcvtzs z0.d, p0/m, z0.d
|
|
; VBITS_GE_256-NEXT: fcvtzs z1.d, p0/m, z1.d
|
|
; VBITS_GE_256-NEXT: uzp1 z0.s, z0.s, z0.s
|
|
; VBITS_GE_256-NEXT: uzp1 z1.s, z1.s, z1.s
|
|
; VBITS_GE_256-NEXT: uzp1 z2.h, z0.h, z0.h
|
|
; VBITS_GE_256-NEXT: uzp1 z0.h, z1.h, z1.h
|
|
; VBITS_GE_256-NEXT: mov v0.d[1], v2.d[0]
|
|
; VBITS_GE_256-NEXT: // kill: def $q0 killed $q0 killed $z0
|
|
; VBITS_GE_256-NEXT: ret
|
|
;
|
|
; VBITS_GE_512-LABEL: fcvtzs_v8f64_v8i16:
|
|
; VBITS_GE_512: // %bb.0:
|
|
; VBITS_GE_512-NEXT: ptrue p0.d, vl8
|
|
; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0]
|
|
; VBITS_GE_512-NEXT: fcvtzs z0.d, p0/m, z0.d
|
|
; VBITS_GE_512-NEXT: uzp1 z0.s, z0.s, z0.s
|
|
; VBITS_GE_512-NEXT: uzp1 z0.h, z0.h, z0.h
|
|
; VBITS_GE_512-NEXT: // kill: def $q0 killed $q0 killed $z0
|
|
; VBITS_GE_512-NEXT: ret
|
|
%op1 = load <8 x double>, ptr %a
|
|
%res = fptosi <8 x double> %op1 to <8 x i16>
|
|
ret <8 x i16> %res
|
|
}
|
|
|
|
define void @fcvtzs_v16f64_v16i16(ptr %a, ptr %b) vscale_range(8,0) #0 {
|
|
; CHECK-LABEL: fcvtzs_v16f64_v16i16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d, vl16
|
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
|
; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d
|
|
; CHECK-NEXT: st1h { z0.d }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <16 x double>, ptr %a
|
|
%res = fptosi <16 x double> %op1 to <16 x i16>
|
|
store <16 x i16> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @fcvtzs_v32f64_v32i16(ptr %a, ptr %b) vscale_range(16,0) #0 {
|
|
; CHECK-LABEL: fcvtzs_v32f64_v32i16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d, vl32
|
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
|
; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d
|
|
; CHECK-NEXT: st1h { z0.d }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <32 x double>, ptr %a
|
|
%res = fptosi <32 x double> %op1 to <32 x i16>
|
|
store <32 x i16> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
;
|
|
; FCVTZS D -> S
|
|
;
|
|
|
|
; Don't use SVE for 64-bit vectors.
|
|
define <1 x i32> @fcvtzs_v1f64_v1i32(<1 x double> %op1) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fcvtzs_v1f64_v1i32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
|
|
; CHECK-NEXT: fcvtzs v0.2d, v0.2d
|
|
; CHECK-NEXT: xtn v0.2s, v0.2d
|
|
; CHECK-NEXT: ret
|
|
%res = fptosi <1 x double> %op1 to <1 x i32>
|
|
ret <1 x i32> %res
|
|
}
|
|
|
|
; Don't use SVE for 128-bit vectors.
|
|
define <2 x i32> @fcvtzs_v2f64_v2i32(<2 x double> %op1) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fcvtzs_v2f64_v2i32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fcvtzs v0.2d, v0.2d
|
|
; CHECK-NEXT: xtn v0.2s, v0.2d
|
|
; CHECK-NEXT: ret
|
|
%res = fptosi <2 x double> %op1 to <2 x i32>
|
|
ret <2 x i32> %res
|
|
}
|
|
|
|
define <4 x i32> @fcvtzs_v4f64_v4i32(ptr %a) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fcvtzs_v4f64_v4i32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d, vl4
|
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
|
; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d
|
|
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
|
|
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <4 x double>, ptr %a
|
|
%res = fptosi <4 x double> %op1 to <4 x i32>
|
|
ret <4 x i32> %res
|
|
}
|
|
|
|
define void @fcvtzs_v8f64_v8i32(ptr %a, ptr %b) #0 {
|
|
; VBITS_GE_256-LABEL: fcvtzs_v8f64_v8i32:
|
|
; VBITS_GE_256: // %bb.0:
|
|
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
|
|
; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
|
|
; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
|
|
; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0]
|
|
; VBITS_GE_256-NEXT: fcvtzs z0.d, p0/m, z0.d
|
|
; VBITS_GE_256-NEXT: fcvtzs z1.d, p0/m, z1.d
|
|
; VBITS_GE_256-NEXT: ptrue p0.s, vl4
|
|
; VBITS_GE_256-NEXT: uzp1 z0.s, z0.s, z0.s
|
|
; VBITS_GE_256-NEXT: uzp1 z1.s, z1.s, z1.s
|
|
; VBITS_GE_256-NEXT: splice z1.s, p0, z1.s, z0.s
|
|
; VBITS_GE_256-NEXT: ptrue p0.s, vl8
|
|
; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x1]
|
|
; VBITS_GE_256-NEXT: ret
|
|
;
|
|
; VBITS_GE_512-LABEL: fcvtzs_v8f64_v8i32:
|
|
; VBITS_GE_512: // %bb.0:
|
|
; VBITS_GE_512-NEXT: ptrue p0.d, vl8
|
|
; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0]
|
|
; VBITS_GE_512-NEXT: fcvtzs z0.d, p0/m, z0.d
|
|
; VBITS_GE_512-NEXT: st1w { z0.d }, p0, [x1]
|
|
; VBITS_GE_512-NEXT: ret
|
|
%op1 = load <8 x double>, ptr %a
|
|
%res = fptosi <8 x double> %op1 to <8 x i32>
|
|
store <8 x i32> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @fcvtzs_v16f64_v16i32(ptr %a, ptr %b) vscale_range(8,0) #0 {
|
|
; CHECK-LABEL: fcvtzs_v16f64_v16i32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d, vl16
|
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
|
; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d
|
|
; CHECK-NEXT: st1w { z0.d }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <16 x double>, ptr %a
|
|
%res = fptosi <16 x double> %op1 to <16 x i32>
|
|
store <16 x i32> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @fcvtzs_v32f64_v32i32(ptr %a, ptr %b) vscale_range(16,0) #0 {
|
|
; CHECK-LABEL: fcvtzs_v32f64_v32i32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d, vl32
|
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
|
; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d
|
|
; CHECK-NEXT: st1w { z0.d }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <32 x double>, ptr %a
|
|
%res = fptosi <32 x double> %op1 to <32 x i32>
|
|
store <32 x i32> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
;
|
|
; FCVTZS D -> D
|
|
;
|
|
|
|
; Don't use SVE for 64-bit vectors.
|
|
define <1 x i64> @fcvtzs_v1f64_v1i64(<1 x double> %op1) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fcvtzs_v1f64_v1i64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fcvtzs x8, d0
|
|
; CHECK-NEXT: fmov d0, x8
|
|
; CHECK-NEXT: ret
|
|
%res = fptosi <1 x double> %op1 to <1 x i64>
|
|
ret <1 x i64> %res
|
|
}
|
|
|
|
; Don't use SVE for 128-bit vectors.
|
|
define <2 x i64> @fcvtzs_v2f64_v2i64(<2 x double> %op1) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fcvtzs_v2f64_v2i64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fcvtzs v0.2d, v0.2d
|
|
; CHECK-NEXT: ret
|
|
%res = fptosi <2 x double> %op1 to <2 x i64>
|
|
ret <2 x i64> %res
|
|
}
|
|
|
|
define void @fcvtzs_v4f64_v4i64(ptr %a, ptr %b) vscale_range(2,0) #0 {
|
|
; CHECK-LABEL: fcvtzs_v4f64_v4i64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d, vl4
|
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
|
; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d
|
|
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <4 x double>, ptr %a
|
|
%res = fptosi <4 x double> %op1 to <4 x i64>
|
|
store <4 x i64> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @fcvtzs_v8f64_v8i64(ptr %a, ptr %b) #0 {
|
|
; VBITS_GE_256-LABEL: fcvtzs_v8f64_v8i64:
|
|
; VBITS_GE_256: // %bb.0:
|
|
; VBITS_GE_256-NEXT: ptrue p0.d, vl4
|
|
; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
|
|
; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
|
|
; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0]
|
|
; VBITS_GE_256-NEXT: fcvtzs z0.d, p0/m, z0.d
|
|
; VBITS_GE_256-NEXT: fcvtzs z1.d, p0/m, z1.d
|
|
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x1, x8, lsl #3]
|
|
; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x1]
|
|
; VBITS_GE_256-NEXT: ret
|
|
;
|
|
; VBITS_GE_512-LABEL: fcvtzs_v8f64_v8i64:
|
|
; VBITS_GE_512: // %bb.0:
|
|
; VBITS_GE_512-NEXT: ptrue p0.d, vl8
|
|
; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0]
|
|
; VBITS_GE_512-NEXT: fcvtzs z0.d, p0/m, z0.d
|
|
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x1]
|
|
; VBITS_GE_512-NEXT: ret
|
|
%op1 = load <8 x double>, ptr %a
|
|
%res = fptosi <8 x double> %op1 to <8 x i64>
|
|
store <8 x i64> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @fcvtzs_v16f64_v16i64(ptr %a, ptr %b) vscale_range(8,0) #0 {
|
|
; CHECK-LABEL: fcvtzs_v16f64_v16i64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d, vl16
|
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
|
; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d
|
|
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <16 x double>, ptr %a
|
|
%res = fptosi <16 x double> %op1 to <16 x i64>
|
|
store <16 x i64> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
define void @fcvtzs_v32f64_v32i64(ptr %a, ptr %b) vscale_range(16,0) #0 {
|
|
; CHECK-LABEL: fcvtzs_v32f64_v32i64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d, vl32
|
|
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
|
; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d
|
|
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
|
|
; CHECK-NEXT: ret
|
|
%op1 = load <32 x double>, ptr %a
|
|
%res = fptosi <32 x double> %op1 to <32 x i64>
|
|
store <32 x i64> %res, ptr %b
|
|
ret void
|
|
}
|
|
|
|
attributes #0 = { "target-features"="+sve" }
|