
Currently, given: ```cpp svuint8_t foo(uint8_t *x) { return svld1(svptrue_b8(), x); } ``` We generate: ```gas foo: ptrue p0.b ld1b { z0.b }, p0/z, [x0] ret ``` However, on little-endian and with unaligned memory accesses allowed, we could instead be using LDR as follows: ```gas foo: ldr z0, [x0] ret ``` The second form avoids the predicate dependency. Likewise for other types and stores.
111 lines
4.3 KiB
LLVM
111 lines
4.3 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc < %s -mtriple arm64-- | FileCheck %s
|
|
|
|
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
|
|
|
|
; Test the (concat_vectors (trunc), (trunc)) pattern.
|
|
|
|
define <4 x i16> @test_concat_truncate_v2i64_to_v4i16(<2 x i64> %a, <2 x i64> %b) #0 {
|
|
; CHECK-LABEL: test_concat_truncate_v2i64_to_v4i16:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: uzp1 v0.4s, v0.4s, v1.4s
|
|
; CHECK-NEXT: xtn v0.4h, v0.4s
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%at = trunc <2 x i64> %a to <2 x i16>
|
|
%bt = trunc <2 x i64> %b to <2 x i16>
|
|
%shuffle = shufflevector <2 x i16> %at, <2 x i16> %bt, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
ret <4 x i16> %shuffle
|
|
}
|
|
|
|
define <4 x i32> @test_concat_truncate_v2i64_to_v4i32(<2 x i64> %a, <2 x i64> %b) #0 {
|
|
; CHECK-LABEL: test_concat_truncate_v2i64_to_v4i32:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: uzp1 v0.4s, v0.4s, v1.4s
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%at = trunc <2 x i64> %a to <2 x i32>
|
|
%bt = trunc <2 x i64> %b to <2 x i32>
|
|
%shuffle = shufflevector <2 x i32> %at, <2 x i32> %bt, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
ret <4 x i32> %shuffle
|
|
}
|
|
|
|
define <4 x i16> @test_concat_truncate_v2i32_to_v4i16(<2 x i32> %a, <2 x i32> %b) #0 {
|
|
; CHECK-LABEL: test_concat_truncate_v2i32_to_v4i16:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: uzp1 v0.4h, v0.4h, v1.4h
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%at = trunc <2 x i32> %a to <2 x i16>
|
|
%bt = trunc <2 x i32> %b to <2 x i16>
|
|
%shuffle = shufflevector <2 x i16> %at, <2 x i16> %bt, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
ret <4 x i16> %shuffle
|
|
}
|
|
|
|
define <8 x i8> @test_concat_truncate_v4i32_to_v8i8(<4 x i32> %a, <4 x i32> %b) #0 {
|
|
; CHECK-LABEL: test_concat_truncate_v4i32_to_v8i8:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h
|
|
; CHECK-NEXT: xtn v0.8b, v0.8h
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%at = trunc <4 x i32> %a to <4 x i8>
|
|
%bt = trunc <4 x i32> %b to <4 x i8>
|
|
%shuffle = shufflevector <4 x i8> %at, <4 x i8> %bt, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
|
ret <8 x i8> %shuffle
|
|
}
|
|
|
|
define <8 x i16> @test_concat_truncate_v4i32_to_v8i16(<4 x i32> %a, <4 x i32> %b) #0 {
|
|
; CHECK-LABEL: test_concat_truncate_v4i32_to_v8i16:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%at = trunc <4 x i32> %a to <4 x i16>
|
|
%bt = trunc <4 x i32> %b to <4 x i16>
|
|
%shuffle = shufflevector <4 x i16> %at, <4 x i16> %bt, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
|
ret <8 x i16> %shuffle
|
|
}
|
|
|
|
define <8 x i8> @test_concat_truncate_v4i16_to_v8i8(<4 x i16> %a, <4 x i16> %b) #0 {
|
|
; CHECK-LABEL: test_concat_truncate_v4i16_to_v8i8:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: uzp1 v0.8b, v0.8b, v1.8b
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%at = trunc <4 x i16> %a to <4 x i8>
|
|
%bt = trunc <4 x i16> %b to <4 x i8>
|
|
%shuffle = shufflevector <4 x i8> %at, <4 x i8> %bt, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
|
ret <8 x i8> %shuffle
|
|
}
|
|
|
|
define <16 x i8> @test_concat_truncate_v8i16_to_v16i8(<8 x i16> %a, <8 x i16> %b) #0 {
|
|
; CHECK-LABEL: test_concat_truncate_v8i16_to_v16i8:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: uzp1 v0.16b, v0.16b, v1.16b
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%at = trunc <8 x i16> %a to <8 x i8>
|
|
%bt = trunc <8 x i16> %b to <8 x i8>
|
|
%shuffle = shufflevector <8 x i8> %at, <8 x i8> %bt, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
ret <16 x i8> %shuffle
|
|
}
|
|
|
|
; The concat_vectors operation in this test is introduced when splitting
|
|
; the fptrunc operation due to the split <vscale x 4 x double> input operand.
|
|
define void @test_concat_fptrunc_v4f64_to_v4f32(ptr %ptr) #1 {
|
|
; CHECK-LABEL: test_concat_fptrunc_v4f64_to_v4f32:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: fmov z0.s, #1.00000000
|
|
; CHECK-NEXT: str z0, [x0]
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%0 = shufflevector <vscale x 4 x double> insertelement (<vscale x 4 x double> poison, double 1.000000e+00, i32 0), <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer
|
|
%1 = fptrunc <vscale x 4 x double> %0 to <vscale x 4 x float>
|
|
store <vscale x 4 x float> %1, ptr %ptr, align 4
|
|
ret void
|
|
}
|
|
|
|
attributes #0 = { nounwind }
|
|
attributes #1 = { "target-features"="+sve" }
|